Compare commits
7 Commits
7223073afc
...
v0.9.81
Author | SHA1 | Date | |
---|---|---|---|
3e001b08b6 | |||
7f6b3ebb6b | |||
91858fbd20 | |||
b66cfb07d8 | |||
9885914abd | |||
e8da3714c0 | |||
4d23d0419c |
16
CHANGELOG.md
16
CHANGELOG.md
@ -1,5 +1,21 @@
|
|||||||
## PVC Changelog
|
## PVC Changelog
|
||||||
|
|
||||||
|
###### [v0.9.81](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.81)
|
||||||
|
|
||||||
|
**Breaking Changes:** This large release features a number of major changes. While these should all be a seamless transition, the behaviour of several commands and the backend system for handling them has changed significantly, along with new dependencies from PVC Ansible. A full cluster configuration update via `pvc.yml` is recommended after installing this version. Redis is replaced with KeyDB on coordinator nodes as a Celery backend; this transition will be handled gracefully by the `pvc-ansible` playbooks, though note that KeyDB will be exposed on the Upstream interface. The Celery worker system is renamed `pvcworkerd`, is now active on all nodes (coordinator and non-coordinator), and is expanded to encompass several commands that previously used a similar, custom setup within the node daemons, including "pvc vm flush-locks" and all "pvc storage osd" tasks. The previously-mentioned CLI commands now all feature "--wait"/"--no-wait" flags, with wait showing a progress bar and status output of the task run. The "pvc cluster task" command can now used for viewing all task types, replacing the previously-custom/specific "pvc provisioner status" command. All example provisioner scripts have been updated to leverage new helper functions in the Celery system; while updating these is optional, an administrator is recommended to do so for optimal log output behaviour.
|
||||||
|
|
||||||
|
* [CLI Client] Fixes "--live" argument handling and duplicate restart prompts.
|
||||||
|
* [All] Adds support for multiple OSDs on individual disks (NVMe workloads).
|
||||||
|
* [All] Corrects and updates OSD replace, refresh, remove, and add functionality; replace no longer purges.
|
||||||
|
* [All] Switches to KeyDB (multi-master) instead of Redis and adds node monitoring plugin.
|
||||||
|
* [All] Replaces Zookeeper/Node Daemon-based message passing and task handling with pvcworkerd Celery workers on all nodes; increases worker concurrency to 3 (per node).
|
||||||
|
* [All] Moves all task-like functions to Celery and updates existing Celery tasks to use new helpers and ID system.
|
||||||
|
* [CLI Client] Adds "--wait/--no-wait" options with progress bars to all Celery-based tasks, "--wait" default; adds a standardized task interface under "pvc cluster task".
|
||||||
|
* [Node Daemon] Cleans up the fencing handler and related functions.
|
||||||
|
* [Node Daemon] Fixes bugs with VM memory reporting during keepalives.
|
||||||
|
* [Node Daemon] Fixes a potential race condition during primary/secondary transition by backgrounding systemctl commands.
|
||||||
|
* [API Daemon] Updates example provisioner plugins to use new Celery functions.
|
||||||
|
|
||||||
###### [v0.9.80](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.80)
|
###### [v0.9.80](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.80)
|
||||||
|
|
||||||
* [CLI] Improves CLI performance by not loading "pkg_resources" until needed
|
* [CLI] Improves CLI performance by not loading "pkg_resources" until needed
|
||||||
|
@ -27,7 +27,7 @@ from ssl import SSLContext, TLSVersion
|
|||||||
from distutils.util import strtobool as dustrtobool
|
from distutils.util import strtobool as dustrtobool
|
||||||
|
|
||||||
# Daemon version
|
# Daemon version
|
||||||
version = "0.9.80"
|
version = "0.9.81"
|
||||||
|
|
||||||
# API version
|
# API version
|
||||||
API_VERSION = 1.0
|
API_VERSION = 1.0
|
||||||
@ -155,6 +155,7 @@ def entrypoint():
|
|||||||
print("|----------------------------------------------------------|")
|
print("|----------------------------------------------------------|")
|
||||||
print("")
|
print("")
|
||||||
|
|
||||||
|
pvc_api.celery_startup()
|
||||||
pvc_api.app.run(
|
pvc_api.app.run(
|
||||||
config["listen_address"],
|
config["listen_address"],
|
||||||
config["listen_port"],
|
config["listen_port"],
|
||||||
|
@ -22,16 +22,22 @@
|
|||||||
import psycopg2
|
import psycopg2
|
||||||
import psycopg2.extras
|
import psycopg2.extras
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
from json import loads, dumps
|
from json import loads, dumps
|
||||||
|
|
||||||
from pvcapid.Daemon import config
|
from pvcapid.Daemon import config
|
||||||
|
|
||||||
from daemon_lib.zkhandler import ZKHandler
|
from daemon_lib.zkhandler import ZKHandler
|
||||||
|
from daemon_lib.celery import start, fail, log_info, update, finish
|
||||||
|
|
||||||
import daemon_lib.common as pvc_common
|
import daemon_lib.common as pvc_common
|
||||||
import daemon_lib.ceph as pvc_ceph
|
import daemon_lib.ceph as pvc_ceph
|
||||||
|
|
||||||
|
|
||||||
|
# Define the current test format
|
||||||
|
TEST_FORMAT = 1
|
||||||
|
|
||||||
|
|
||||||
# We run a total of 8 tests, to give a generalized idea of performance on the cluster:
|
# We run a total of 8 tests, to give a generalized idea of performance on the cluster:
|
||||||
# 1. A sequential read test of 8GB with a 4M block size
|
# 1. A sequential read test of 8GB with a 4M block size
|
||||||
# 2. A sequential write test of 8GB with a 4M block size
|
# 2. A sequential write test of 8GB with a 4M block size
|
||||||
@ -104,27 +110,7 @@ benchmark_volume_size = "8G"
|
|||||||
# Exceptions (used by Celery tasks)
|
# Exceptions (used by Celery tasks)
|
||||||
#
|
#
|
||||||
class BenchmarkError(Exception):
|
class BenchmarkError(Exception):
|
||||||
"""
|
pass
|
||||||
An exception that results from the Benchmark job.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self, message, job_name=None, db_conn=None, db_cur=None, zkhandler=None
|
|
||||||
):
|
|
||||||
self.message = message
|
|
||||||
if job_name is not None and db_conn is not None and db_cur is not None:
|
|
||||||
# Clean up our dangling result
|
|
||||||
query = "DELETE FROM storage_benchmarks WHERE job = %s;"
|
|
||||||
args = (job_name,)
|
|
||||||
db_cur.execute(query, args)
|
|
||||||
db_conn.commit()
|
|
||||||
# Close the database connections cleanly
|
|
||||||
close_database(db_conn, db_cur)
|
|
||||||
if job_name is not None and zkhandler is not None:
|
|
||||||
zkhandler.disconnect()
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return str(self.message)
|
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
@ -132,6 +118,20 @@ class BenchmarkError(Exception):
|
|||||||
#
|
#
|
||||||
|
|
||||||
|
|
||||||
|
def cleanup(job_name, db_conn=None, db_cur=None, zkhandler=None):
|
||||||
|
if db_conn is not None and db_cur is not None:
|
||||||
|
# Clean up our dangling result
|
||||||
|
query = "DELETE FROM storage_benchmarks WHERE job = %s;"
|
||||||
|
args = (job_name,)
|
||||||
|
db_cur.execute(query, args)
|
||||||
|
db_conn.commit()
|
||||||
|
# Close the database connections cleanly
|
||||||
|
close_database(db_conn, db_cur)
|
||||||
|
if zkhandler is not None:
|
||||||
|
zkhandler.disconnect()
|
||||||
|
del zkhandler
|
||||||
|
|
||||||
|
|
||||||
# Database connections
|
# Database connections
|
||||||
def open_database(config):
|
def open_database(config):
|
||||||
conn = psycopg2.connect(
|
conn = psycopg2.connect(
|
||||||
@ -193,17 +193,18 @@ def prepare_benchmark_volume(
|
|||||||
zkhandler, pool, benchmark_volume_name, benchmark_volume_size
|
zkhandler, pool, benchmark_volume_name, benchmark_volume_size
|
||||||
)
|
)
|
||||||
if not retcode:
|
if not retcode:
|
||||||
raise BenchmarkError(
|
cleanup(
|
||||||
'Failed to create volume "{}" on pool "{}": {}'.format(
|
job_name,
|
||||||
benchmark_volume_name, pool, retmsg
|
|
||||||
),
|
|
||||||
job_name=job_name,
|
|
||||||
db_conn=db_conn,
|
db_conn=db_conn,
|
||||||
db_cur=db_cur,
|
db_cur=db_cur,
|
||||||
zkhandler=zkhandler,
|
zkhandler=zkhandler,
|
||||||
)
|
)
|
||||||
|
fail(
|
||||||
|
None,
|
||||||
|
f'Failed to create volume "{benchmark_volume_name}" on pool "{pool}": {retmsg}',
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
print(retmsg)
|
log_info(None, retmsg)
|
||||||
|
|
||||||
|
|
||||||
def cleanup_benchmark_volume(
|
def cleanup_benchmark_volume(
|
||||||
@ -212,24 +213,25 @@ def cleanup_benchmark_volume(
|
|||||||
# Remove the RBD volume
|
# Remove the RBD volume
|
||||||
retcode, retmsg = pvc_ceph.remove_volume(zkhandler, pool, benchmark_volume_name)
|
retcode, retmsg = pvc_ceph.remove_volume(zkhandler, pool, benchmark_volume_name)
|
||||||
if not retcode:
|
if not retcode:
|
||||||
raise BenchmarkError(
|
cleanup(
|
||||||
'Failed to remove volume "{}" on pool "{}": {}'.format(
|
job_name,
|
||||||
benchmark_volume_name, pool, retmsg
|
|
||||||
),
|
|
||||||
job_name=job_name,
|
|
||||||
db_conn=db_conn,
|
db_conn=db_conn,
|
||||||
db_cur=db_cur,
|
db_cur=db_cur,
|
||||||
zkhandler=zkhandler,
|
zkhandler=zkhandler,
|
||||||
)
|
)
|
||||||
|
fail(
|
||||||
|
None,
|
||||||
|
f'Failed to remove volume "{benchmark_volume_name}" from pool "{pool}": {retmsg}',
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
print(retmsg)
|
log_info(None, retmsg)
|
||||||
|
|
||||||
|
|
||||||
def run_benchmark_job(
|
def run_benchmark_job(
|
||||||
test, pool, job_name=None, db_conn=None, db_cur=None, zkhandler=None
|
test, pool, job_name=None, db_conn=None, db_cur=None, zkhandler=None
|
||||||
):
|
):
|
||||||
test_spec = test_matrix[test]
|
test_spec = test_matrix[test]
|
||||||
print("Running test '{}'".format(test))
|
log_info(None, f"Running test '{test}'")
|
||||||
fio_cmd = """
|
fio_cmd = """
|
||||||
fio \
|
fio \
|
||||||
--name={test} \
|
--name={test} \
|
||||||
@ -255,51 +257,73 @@ def run_benchmark_job(
|
|||||||
rw=test_spec["rw"],
|
rw=test_spec["rw"],
|
||||||
)
|
)
|
||||||
|
|
||||||
print("Running fio job: {}".format(" ".join(fio_cmd.split())))
|
log_info(None, "Running fio job: {}".format(" ".join(fio_cmd.split())))
|
||||||
retcode, stdout, stderr = pvc_common.run_os_command(fio_cmd)
|
retcode, stdout, stderr = pvc_common.run_os_command(fio_cmd)
|
||||||
if retcode:
|
try:
|
||||||
raise BenchmarkError(
|
jstdout = loads(stdout)
|
||||||
"Failed to run fio test: {}".format(stderr),
|
if retcode:
|
||||||
job_name=job_name,
|
raise
|
||||||
|
except Exception:
|
||||||
|
cleanup(
|
||||||
|
job_name,
|
||||||
db_conn=db_conn,
|
db_conn=db_conn,
|
||||||
db_cur=db_cur,
|
db_cur=db_cur,
|
||||||
zkhandler=zkhandler,
|
zkhandler=zkhandler,
|
||||||
)
|
)
|
||||||
|
fail(
|
||||||
|
None,
|
||||||
|
f"Failed to run fio test '{test}': {stderr}",
|
||||||
|
)
|
||||||
|
|
||||||
return loads(stdout)
|
return jstdout
|
||||||
|
|
||||||
|
|
||||||
def run_benchmark(self, pool):
|
def run_benchmark(self, pool):
|
||||||
# Runtime imports
|
|
||||||
import time
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
# Define the current test format
|
|
||||||
TEST_FORMAT = 1
|
|
||||||
|
|
||||||
time.sleep(2)
|
|
||||||
|
|
||||||
# Phase 0 - connect to databases
|
# Phase 0 - connect to databases
|
||||||
try:
|
|
||||||
db_conn, db_cur = open_database(config)
|
|
||||||
except Exception:
|
|
||||||
print("FATAL - failed to connect to Postgres")
|
|
||||||
raise Exception
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
zkhandler = ZKHandler(config)
|
zkhandler = ZKHandler(config)
|
||||||
zkhandler.connect()
|
zkhandler.connect()
|
||||||
except Exception:
|
except Exception:
|
||||||
print("FATAL - failed to connect to Zookeeper")
|
fail(
|
||||||
raise Exception
|
self,
|
||||||
|
"Failed to connect to Zookeeper",
|
||||||
|
)
|
||||||
|
|
||||||
cur_time = datetime.now().isoformat(timespec="seconds")
|
cur_time = datetime.now().isoformat(timespec="seconds")
|
||||||
cur_primary = zkhandler.read("base.config.primary_node")
|
cur_primary = zkhandler.read("base.config.primary_node")
|
||||||
job_name = "{}_{}".format(cur_time, cur_primary)
|
job_name = f"{cur_time}_{cur_primary}"
|
||||||
|
|
||||||
print("Starting storage benchmark '{}' on pool '{}'".format(job_name, pool))
|
current_stage = 0
|
||||||
|
total_stages = 13
|
||||||
|
start(
|
||||||
|
self,
|
||||||
|
f"Running storage benchmark '{job_name}' on pool '{pool}'",
|
||||||
|
current=current_stage,
|
||||||
|
total=total_stages,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
db_conn, db_cur = open_database(config)
|
||||||
|
except Exception:
|
||||||
|
cleanup(
|
||||||
|
job_name,
|
||||||
|
db_conn=None,
|
||||||
|
db_cur=None,
|
||||||
|
zkhandler=zkhandler,
|
||||||
|
)
|
||||||
|
fail(
|
||||||
|
self,
|
||||||
|
"Failed to connect to Postgres",
|
||||||
|
)
|
||||||
|
|
||||||
|
current_stage += 1
|
||||||
|
update(
|
||||||
|
self,
|
||||||
|
"Storing running status in database",
|
||||||
|
current=current_stage,
|
||||||
|
total=total_stages,
|
||||||
|
)
|
||||||
|
|
||||||
print("Storing running status for job '{}' in database".format(job_name))
|
|
||||||
try:
|
try:
|
||||||
query = "INSERT INTO storage_benchmarks (job, test_format, result) VALUES (%s, %s, %s);"
|
query = "INSERT INTO storage_benchmarks (job, test_format, result) VALUES (%s, %s, %s);"
|
||||||
args = (
|
args = (
|
||||||
@ -310,20 +334,21 @@ def run_benchmark(self, pool):
|
|||||||
db_cur.execute(query, args)
|
db_cur.execute(query, args)
|
||||||
db_conn.commit()
|
db_conn.commit()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise BenchmarkError(
|
cleanup(
|
||||||
"Failed to store running status: {}".format(e),
|
job_name,
|
||||||
job_name=job_name,
|
|
||||||
db_conn=db_conn,
|
db_conn=db_conn,
|
||||||
db_cur=db_cur,
|
db_cur=db_cur,
|
||||||
zkhandler=zkhandler,
|
zkhandler=zkhandler,
|
||||||
)
|
)
|
||||||
|
fail(self, f"Failed to store running status: {e}", exception=BenchmarkError)
|
||||||
|
|
||||||
# Phase 1 - volume preparation
|
current_stage += 1
|
||||||
self.update_state(
|
update(
|
||||||
state="RUNNING",
|
self,
|
||||||
meta={"current": 1, "total": 3, "status": "Creating benchmark volume"},
|
"Creating benchmark volume",
|
||||||
|
current=current_stage,
|
||||||
|
total=total_stages,
|
||||||
)
|
)
|
||||||
time.sleep(1)
|
|
||||||
|
|
||||||
prepare_benchmark_volume(
|
prepare_benchmark_volume(
|
||||||
pool,
|
pool,
|
||||||
@ -334,14 +359,16 @@ def run_benchmark(self, pool):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Phase 2 - benchmark run
|
# Phase 2 - benchmark run
|
||||||
self.update_state(
|
|
||||||
state="RUNNING",
|
|
||||||
meta={"current": 2, "total": 3, "status": "Running fio benchmarks on volume"},
|
|
||||||
)
|
|
||||||
time.sleep(1)
|
|
||||||
|
|
||||||
results = dict()
|
results = dict()
|
||||||
for test in test_matrix:
|
for test in test_matrix:
|
||||||
|
current_stage += 1
|
||||||
|
update(
|
||||||
|
self,
|
||||||
|
f"Running benchmark job '{test}'",
|
||||||
|
current=current_stage,
|
||||||
|
total=total_stages,
|
||||||
|
)
|
||||||
|
|
||||||
results[test] = run_benchmark_job(
|
results[test] = run_benchmark_job(
|
||||||
test,
|
test,
|
||||||
pool,
|
pool,
|
||||||
@ -352,11 +379,13 @@ def run_benchmark(self, pool):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Phase 3 - cleanup
|
# Phase 3 - cleanup
|
||||||
self.update_state(
|
current_stage += 1
|
||||||
state="RUNNING",
|
update(
|
||||||
meta={"current": 3, "total": 3, "status": "Cleaning up and storing results"},
|
self,
|
||||||
|
"Cleaning up venchmark volume",
|
||||||
|
current=current_stage,
|
||||||
|
total=total_stages,
|
||||||
)
|
)
|
||||||
time.sleep(1)
|
|
||||||
|
|
||||||
cleanup_benchmark_volume(
|
cleanup_benchmark_volume(
|
||||||
pool,
|
pool,
|
||||||
@ -366,27 +395,39 @@ def run_benchmark(self, pool):
|
|||||||
zkhandler=zkhandler,
|
zkhandler=zkhandler,
|
||||||
)
|
)
|
||||||
|
|
||||||
print("Storing result of tests for job '{}' in database".format(job_name))
|
current_stage += 1
|
||||||
|
update(
|
||||||
|
self,
|
||||||
|
"Storing results in database",
|
||||||
|
current=current_stage,
|
||||||
|
total=total_stages,
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
query = "UPDATE storage_benchmarks SET result = %s WHERE job = %s;"
|
query = "UPDATE storage_benchmarks SET result = %s WHERE job = %s;"
|
||||||
args = (dumps(results), job_name)
|
args = (dumps(results), job_name)
|
||||||
db_cur.execute(query, args)
|
db_cur.execute(query, args)
|
||||||
db_conn.commit()
|
db_conn.commit()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise BenchmarkError(
|
cleanup(
|
||||||
"Failed to store test results: {}".format(e),
|
job_name,
|
||||||
job_name=job_name,
|
|
||||||
db_conn=db_conn,
|
db_conn=db_conn,
|
||||||
db_cur=db_cur,
|
db_cur=db_cur,
|
||||||
zkhandler=zkhandler,
|
zkhandler=zkhandler,
|
||||||
)
|
)
|
||||||
|
fail(self, f"Failed to store test results: {e}", exception=BenchmarkError)
|
||||||
|
|
||||||
close_database(db_conn, db_cur)
|
cleanup(
|
||||||
zkhandler.disconnect()
|
job_name,
|
||||||
del zkhandler
|
db_conn=db_conn,
|
||||||
|
db_cur=db_cur,
|
||||||
|
zkhandler=zkhandler,
|
||||||
|
)
|
||||||
|
|
||||||
return {
|
current_stage += 1
|
||||||
"status": "Storage benchmark '{}' completed successfully.",
|
return finish(
|
||||||
"current": 3,
|
self,
|
||||||
"total": 3,
|
f"Storage benchmark {job_name} completed successfully",
|
||||||
}
|
current=current_stage,
|
||||||
|
total=total_stages,
|
||||||
|
)
|
||||||
|
@ -57,64 +57,6 @@ from flask_sqlalchemy import SQLAlchemy
|
|||||||
|
|
||||||
# Create Flask app and set config values
|
# Create Flask app and set config values
|
||||||
app = flask.Flask(__name__)
|
app = flask.Flask(__name__)
|
||||||
celery_task_uri = "redis://{}:{}{}".format(
|
|
||||||
config["queue_host"], config["queue_port"], config["queue_path"]
|
|
||||||
)
|
|
||||||
app.config["CELERY_BROKER_URL"] = celery_task_uri
|
|
||||||
app.config["CELERY_RESULT_BACKEND"] = celery_task_uri
|
|
||||||
|
|
||||||
|
|
||||||
# Set up Celery queues
|
|
||||||
@ZKConnection(config)
|
|
||||||
def get_all_nodes(zkhandler):
|
|
||||||
_, all_nodes = get_node_list(zkhandler, None)
|
|
||||||
return [n["name"] for n in all_nodes]
|
|
||||||
|
|
||||||
|
|
||||||
@ZKConnection(config)
|
|
||||||
def get_primary_node(zkhandler):
|
|
||||||
return getPrimaryNode(zkhandler)
|
|
||||||
|
|
||||||
|
|
||||||
app.config["CELERY_QUEUES"] = tuple(
|
|
||||||
[Queue(h, routing_key=f"{h}.#") for h in get_all_nodes()]
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# Set up Celery queue routing
|
|
||||||
def route_task(name, args, kwargs, options, task=None, **kw):
|
|
||||||
print("----")
|
|
||||||
print(f"Incoming Celery task: '{name}' with args {args}, kwargs {kwargs}")
|
|
||||||
|
|
||||||
# If an explicit routing_key is set and it's in the kwargs of the function, use it to set the queue
|
|
||||||
if options["routing_key"] != "default" and options["routing_key"] in kwargs.keys():
|
|
||||||
run_on = kwargs[options["routing_key"]]
|
|
||||||
if run_on == "primary":
|
|
||||||
run_on = get_primary_node()
|
|
||||||
# Otherwise, use the primary node
|
|
||||||
else:
|
|
||||||
run_on = get_primary_node()
|
|
||||||
|
|
||||||
print(f"Selected Celery worker: {run_on}")
|
|
||||||
print("----")
|
|
||||||
|
|
||||||
return run_on
|
|
||||||
|
|
||||||
|
|
||||||
app.config["CELERY_ROUTES"] = (route_task,)
|
|
||||||
|
|
||||||
|
|
||||||
# Set up Celery task ID generator
|
|
||||||
# WHY? We don't want to use UUIDs; they're too long and cumbersome. Instead, use a shorter partial UUID.
|
|
||||||
def run_celery_task(task_def, **kwargs):
|
|
||||||
task_id = str(uuid4()).split("-")[0]
|
|
||||||
task = task_def.apply_async(
|
|
||||||
(),
|
|
||||||
kwargs,
|
|
||||||
task_id=task_id,
|
|
||||||
)
|
|
||||||
return task
|
|
||||||
|
|
||||||
|
|
||||||
# Set up SQLAlchemy backend
|
# Set up SQLAlchemy backend
|
||||||
app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False
|
app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False
|
||||||
@ -144,14 +86,72 @@ blueprint = flask.Blueprint("api", __name__, url_prefix="/api/v1")
|
|||||||
api = Api(blueprint)
|
api = Api(blueprint)
|
||||||
app.register_blueprint(blueprint)
|
app.register_blueprint(blueprint)
|
||||||
|
|
||||||
|
|
||||||
|
# Set up Celery queues
|
||||||
|
@ZKConnection(config)
|
||||||
|
def get_all_nodes(zkhandler):
|
||||||
|
_, all_nodes = get_node_list(zkhandler, None)
|
||||||
|
return [n["name"] for n in all_nodes]
|
||||||
|
|
||||||
|
|
||||||
|
@ZKConnection(config)
|
||||||
|
def get_primary_node(zkhandler):
|
||||||
|
return getPrimaryNode(zkhandler)
|
||||||
|
|
||||||
|
|
||||||
|
# Set up Celery queue routing
|
||||||
|
def route_task(name, args, kwargs, options, task=None, **kw):
|
||||||
|
print("----")
|
||||||
|
print(f"Incoming Celery task: '{name}' with args {args}, kwargs {kwargs}")
|
||||||
|
|
||||||
|
# If an explicit routing_key is set and it's in the kwargs of the function, use it to set the queue
|
||||||
|
if options["routing_key"] != "default" and options["routing_key"] in kwargs.keys():
|
||||||
|
run_on = kwargs[options["routing_key"]]
|
||||||
|
if run_on == "primary":
|
||||||
|
run_on = get_primary_node()
|
||||||
|
# Otherwise, use the primary node
|
||||||
|
else:
|
||||||
|
run_on = get_primary_node()
|
||||||
|
|
||||||
|
print(f"Selected Celery worker: {run_on}")
|
||||||
|
print("----")
|
||||||
|
|
||||||
|
return run_on
|
||||||
|
|
||||||
|
|
||||||
|
# Set up Celery task ID generator
|
||||||
|
# WHY? We don't want to use UUIDs; they're too long and cumbersome. Instead, use a shorter partial UUID.
|
||||||
|
def run_celery_task(task_def, **kwargs):
|
||||||
|
task_id = str(uuid4()).split("-")[0]
|
||||||
|
task = task_def.apply_async(
|
||||||
|
(),
|
||||||
|
kwargs,
|
||||||
|
task_id=task_id,
|
||||||
|
)
|
||||||
|
return task
|
||||||
|
|
||||||
|
|
||||||
# Create celery definition
|
# Create celery definition
|
||||||
|
celery_task_uri = "redis://{}:{}{}".format(
|
||||||
|
config["queue_host"], config["queue_port"], config["queue_path"]
|
||||||
|
)
|
||||||
celery = Celery(
|
celery = Celery(
|
||||||
app.name,
|
app.name,
|
||||||
broker=celery_task_uri,
|
broker=celery_task_uri,
|
||||||
result_backend=celery_task_uri,
|
result_backend=celery_task_uri,
|
||||||
result_extended=True,
|
result_extended=True,
|
||||||
)
|
)
|
||||||
celery.conf.update(app.config)
|
|
||||||
|
|
||||||
|
def celery_startup():
|
||||||
|
app.config["CELERY_broker_url"] = celery_task_uri
|
||||||
|
app.config["result_backend"] = celery_task_uri
|
||||||
|
app.config["task_queues"] = tuple(
|
||||||
|
[Queue(h, routing_key=f"{h}.#") for h in get_all_nodes()]
|
||||||
|
)
|
||||||
|
app.config["task_routes"] = (route_task,)
|
||||||
|
celery.conf.update(app.config)
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# Custom decorators
|
# Custom decorators
|
||||||
|
@ -220,7 +220,7 @@ def create_vm(
|
|||||||
celery, vm_name, vm_profile, define_vm=True, start_vm=True, script_run_args=[]
|
celery, vm_name, vm_profile, define_vm=True, start_vm=True, script_run_args=[]
|
||||||
):
|
):
|
||||||
current_stage = 0
|
current_stage = 0
|
||||||
total_stages = 10
|
total_stages = 11
|
||||||
start(
|
start(
|
||||||
celery,
|
celery,
|
||||||
f"Provisioning new VM '{vm_name}' with profile '{vm_profile}'",
|
f"Provisioning new VM '{vm_name}' with profile '{vm_profile}'",
|
||||||
@ -373,7 +373,7 @@ def create_vm(
|
|||||||
if pvc_vm.searchClusterByName(zkhandler, vm_name):
|
if pvc_vm.searchClusterByName(zkhandler, vm_name):
|
||||||
fail(
|
fail(
|
||||||
celery,
|
celery,
|
||||||
f"A VM with the name '{vm_name}' already exists in the cluster.",
|
f"A VM with the name '{vm_name}' already exists in the cluster",
|
||||||
exception=ClusterError,
|
exception=ClusterError,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -416,7 +416,7 @@ def create_vm(
|
|||||||
]:
|
]:
|
||||||
fail(
|
fail(
|
||||||
celery,
|
celery,
|
||||||
f'The network VNI "{vni}" is not present on the cluster.',
|
f'The network VNI "{vni}" is not present on the cluster',
|
||||||
exception=ClusterError,
|
exception=ClusterError,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -432,7 +432,7 @@ def create_vm(
|
|||||||
if not volume_data:
|
if not volume_data:
|
||||||
fail(
|
fail(
|
||||||
celery,
|
celery,
|
||||||
f"The source volume {volume['pool']}/{volume['source_volume']} could not be found.",
|
f"The source volume {volume['pool']}/{volume['source_volume']} could not be found",
|
||||||
exception=ClusterError,
|
exception=ClusterError,
|
||||||
)
|
)
|
||||||
if not volume["pool"] in pools:
|
if not volume["pool"] in pools:
|
||||||
@ -463,7 +463,7 @@ def create_vm(
|
|||||||
except Exception:
|
except Exception:
|
||||||
fail(
|
fail(
|
||||||
celery,
|
celery,
|
||||||
f'Pool "{pool}" is not present on the cluster.',
|
f'Pool "{pool}" is not present on the cluster',
|
||||||
exception=ClusterError,
|
exception=ClusterError,
|
||||||
)
|
)
|
||||||
pool_free_space_gb = int(
|
pool_free_space_gb = int(
|
||||||
@ -474,7 +474,7 @@ def create_vm(
|
|||||||
if pool_vm_usage_gb >= pool_free_space_gb:
|
if pool_vm_usage_gb >= pool_free_space_gb:
|
||||||
fail(
|
fail(
|
||||||
celery,
|
celery,
|
||||||
f'Pool "{pool}" has only {pool_free_space_gb} GB free but VM requires {pool_vm_usage_gb} GB.',
|
f'Pool "{pool}" has only {pool_free_space_gb} GB free but VM requires {pool_vm_usage_gb} GB',
|
||||||
exception=ClusterError,
|
exception=ClusterError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -2,7 +2,7 @@ from setuptools import setup
|
|||||||
|
|
||||||
setup(
|
setup(
|
||||||
name="pvc",
|
name="pvc",
|
||||||
version="0.9.80",
|
version="0.9.81",
|
||||||
packages=["pvc.cli", "pvc.lib"],
|
packages=["pvc.cli", "pvc.lib"],
|
||||||
install_requires=[
|
install_requires=[
|
||||||
"Click",
|
"Click",
|
||||||
|
18
debian/changelog
vendored
18
debian/changelog
vendored
@ -1,3 +1,21 @@
|
|||||||
|
pvc (0.9.81-0) unstable; urgency=high
|
||||||
|
|
||||||
|
**Breaking Changes:** This large release features a number of major changes. While these should all be a seamless transition, the behaviour of several commands and the backend system for handling them has changed significantly, along with new dependencies from PVC Ansible. A full cluster configuration update via `pvc.yml` is recommended after installing this version. Redis is replaced with KeyDB on coordinator nodes as a Celery backend; this transition will be handled gracefully by the `pvc-ansible` playbooks, though note that KeyDB will be exposed on the Upstream interface. The Celery worker system is renamed `pvcworkerd`, is now active on all nodes (coordinator and non-coordinator), and is expanded to encompass several commands that previously used a similar, custom setup within the node daemons, including "pvc vm flush-locks" and all "pvc storage osd" tasks. The previously-mentioned CLI commands now all feature "--wait"/"--no-wait" flags, with wait showing a progress bar and status output of the task run. The "pvc cluster task" command can now used for viewing all task types, replacing the previously-custom/specific "pvc provisioner status" command. All example provisioner scripts have been updated to leverage new helper functions in the Celery system; while updating these is optional, an administrator is recommended to do so for optimal log output behaviour.
|
||||||
|
|
||||||
|
* [CLI Client] Fixes "--live" argument handling and duplicate restart prompts.
|
||||||
|
* [All] Adds support for multiple OSDs on individual disks (NVMe workloads).
|
||||||
|
* [All] Corrects and updates OSD replace, refresh, remove, and add functionality; replace no longer purges.
|
||||||
|
* [All] Switches to KeyDB (multi-master) instead of Redis and adds node monitoring plugin.
|
||||||
|
* [All] Replaces Zookeeper/Node Daemon-based message passing and task handling with pvcworkerd Celery workers on all nodes; increases worker concurrency to 3 (per node).
|
||||||
|
* [All] Moves all task-like functions to Celery and updates existing Celery tasks to use new helpers and ID system.
|
||||||
|
* [CLI Client] Adds "--wait/--no-wait" options with progress bars to all Celery-based tasks, "--wait" default; adds a standardized task interface under "pvc cluster task".
|
||||||
|
* [Node Daemon] Cleans up the fencing handler and related functions.
|
||||||
|
* [Node Daemon] Fixes bugs with VM memory reporting during keepalives.
|
||||||
|
* [Node Daemon] Fixes a potential race condition during primary/secondary transition by backgrounding systemctl commands.
|
||||||
|
* [API Daemon] Updates example provisioner plugins to use new Celery functions.
|
||||||
|
|
||||||
|
-- Joshua M. Boniface <joshua@boniface.me> Fri, 17 Nov 2023 01:29:41 -0500
|
||||||
|
|
||||||
pvc (0.9.80-0) unstable; urgency=high
|
pvc (0.9.80-0) unstable; urgency=high
|
||||||
|
|
||||||
* [CLI] Improves CLI performance by not loading "pkg_resources" until needed
|
* [CLI] Improves CLI performance by not loading "pkg_resources" until needed
|
||||||
|
@ -7,7 +7,7 @@ VERSION="$( head -1 debian/changelog | awk -F'[()-]' '{ print $2 }' )"
|
|||||||
pushd $( git rev-parse --show-toplevel ) &>/dev/null
|
pushd $( git rev-parse --show-toplevel ) &>/dev/null
|
||||||
pushd api-daemon &>/dev/null
|
pushd api-daemon &>/dev/null
|
||||||
export PVC_CONFIG_FILE="./pvcapid.sample.yaml"
|
export PVC_CONFIG_FILE="./pvcapid.sample.yaml"
|
||||||
./pvcapid-manage.py db migrate -m "PVC version ${VERSION}"
|
./pvcapid-manage_flask.py db migrate -m "PVC version ${VERSION}"
|
||||||
./pvcapid-manage.py db upgrade
|
./pvcapid-manage_flask.py db upgrade
|
||||||
popd &>/dev/null
|
popd &>/dev/null
|
||||||
popd &>/dev/null
|
popd &>/dev/null
|
||||||
|
@ -49,7 +49,7 @@ import re
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
# Daemon version
|
# Daemon version
|
||||||
version = "0.9.80"
|
version = "0.9.81"
|
||||||
|
|
||||||
|
|
||||||
##########################################################
|
##########################################################
|
||||||
|
@ -78,9 +78,9 @@ _pvc vm tag get testx
|
|||||||
_pvc vm list --tag mytag
|
_pvc vm list --tag mytag
|
||||||
_pvc vm tag remove testx mytag
|
_pvc vm tag remove testx mytag
|
||||||
_pvc vm network get testx
|
_pvc vm network get testx
|
||||||
_pvc vm vcpu set --no-restart testx 4
|
_pvc vm vcpu set --no-restart testx 1
|
||||||
_pvc vm vcpu get testx
|
_pvc vm vcpu get testx
|
||||||
_pvc vm memory set --no-restart testx 4096
|
_pvc vm memory set --no-restart testx 1024
|
||||||
_pvc vm memory get testx
|
_pvc vm memory get testx
|
||||||
_pvc vm vcpu set --no-restart testx 2
|
_pvc vm vcpu set --no-restart testx 2
|
||||||
_pvc vm memory set testx 2048 --restart --yes
|
_pvc vm memory set testx 2048 --restart --yes
|
||||||
|
Reference in New Issue
Block a user