Compare commits
51 Commits
v0.9.80
...
2c15036f86
| Author | SHA1 | Date | |
|---|---|---|---|
| 2c15036f86 | |||
| 42ed6f6420 | |||
| 3dc1f57de2 | |||
| b99b4e64b2 | |||
| 91af1175ef | |||
| af8a8d969e | |||
| a6caac1b78 | |||
| 30d7e49401 | |||
| ab629f6b51 | |||
| 54215bab6c | |||
| 7490f13b7c | |||
| d1602f35de | |||
| 7cdedde2fb | |||
| ab156b14b7 | |||
| a016337f57 | |||
| e32054be81 | |||
| 18d32fede3 | |||
| b3d13fe9be | |||
| 48b2ccbd95 | |||
| 1535078842 | |||
| 0e45613634 | |||
| 75135f6d5f | |||
| 7f5dd385b5 | |||
| befce62925 | |||
| b0909aed61 | |||
| f418b40527 | |||
| ec42b19d0e | |||
| dd0177ce10 | |||
| ed5bc9fb43 | |||
| 94d8d2cf75 | |||
| 20497cf89d | |||
| 64e37ae963 | |||
| 3cb8a70f04 | |||
| 44d2f98e75 | |||
| cb91bf18a7 | |||
| a3e3fe829a | |||
| f53af510c1 | |||
| d5d783fad3 | |||
| 8b8957547a | |||
| 980ea6a9e9 | |||
| 0f433bd5eb | |||
| 8780044be6 | |||
| f08c654f22 | |||
| 80a7fd6195 | |||
| 8b93f9a80e | |||
| 526a5f4a74 | |||
| aa0b1f504f | |||
| bc425b9224 | |||
| 79e5c098cd | |||
| 3c61a3ac03 | |||
| 988c777912 |
@@ -25,10 +25,10 @@ CELERY_BIN="$( which celery )"
|
||||
# app arguments work in a non-backwards-compatible way with Celery 5.
|
||||
case "$( cat /etc/debian_version )" in
|
||||
10.*)
|
||||
CELERY_ARGS="worker --app pvcapid.flaskapi.celery --concurrency 1 --loglevel INFO"
|
||||
CELERY_ARGS="worker --app pvcapid.flaskapi.celery --concurrency 1 --hostname $(hostname -s) --queues $(hostname -s) --loglevel INFO"
|
||||
;;
|
||||
*)
|
||||
CELERY_ARGS="--app pvcapid.flaskapi.celery worker --concurrency 1 --loglevel INFO"
|
||||
CELERY_ARGS="--app pvcapid.flaskapi.celery worker --concurrency 1 --hostname $(hostname -s) --queues $(hostname -s) --loglevel INFO"
|
||||
;;
|
||||
esac
|
||||
|
||||
|
||||
@@ -24,6 +24,11 @@ import flask
|
||||
from functools import wraps
|
||||
from flask_restful import Resource, Api, reqparse, abort
|
||||
from celery import Celery
|
||||
from kombu import Queue
|
||||
|
||||
from daemon_lib.common import getPrimaryNode
|
||||
from daemon_lib.zkhandler import ZKConnection
|
||||
from daemon_lib.node import get_list as get_node_list
|
||||
|
||||
from pvcapid.Daemon import config, strtobool, API_VERSION
|
||||
|
||||
@@ -44,6 +49,47 @@ app.config["CELERY_BROKER_URL"] = "redis://{}:{}{}".format(
|
||||
app.config["CELERY_RESULT_BACKEND"] = "redis://{}:{}{}".format(
|
||||
config["queue_host"], config["queue_port"], config["queue_path"]
|
||||
)
|
||||
|
||||
# Set up Celery queues
|
||||
app.config["CELERY_DATABASE_ENGINE_OPTIONS"] = {"echo": True}
|
||||
|
||||
|
||||
@ZKConnection(config)
|
||||
def get_all_nodes(zkhandler):
|
||||
_, all_nodes = get_node_list(zkhandler, None)
|
||||
return [n["name"] for n in all_nodes]
|
||||
|
||||
|
||||
app.config["CELERY_QUEUES"] = tuple(
|
||||
[Queue(h, routing_key=f"{h}.#") for h in get_all_nodes()]
|
||||
)
|
||||
|
||||
|
||||
# Set up Celery queue routing
|
||||
def route_task(name, args, kwargs, options, task=None, **kw):
|
||||
@ZKConnection(config)
|
||||
def get_primary_node(zkhandler):
|
||||
return getPrimaryNode(zkhandler)
|
||||
|
||||
print("----")
|
||||
print(f"Incoming Celery task: '{name}' with args {args}, kwargs {kwargs}")
|
||||
|
||||
# If an explicit routing_key is set and it's in the kwargs of the function, use it to set the queue
|
||||
if options["routing_key"] != "default" and options["routing_key"] in kwargs.keys():
|
||||
run_on = kwargs[options["routing_key"]]
|
||||
# Otherwise, use the primary node
|
||||
else:
|
||||
run_on = get_primary_node()
|
||||
|
||||
print(f"Selected Celery worker: {run_on}")
|
||||
print("----")
|
||||
|
||||
return run_on
|
||||
|
||||
|
||||
app.config["CELERY_ROUTES"] = (route_task,)
|
||||
|
||||
# Set up SQLAlchemy backend
|
||||
app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False
|
||||
app.config["SQLALCHEMY_DATABASE_URI"] = "postgresql://{}:{}@{}:{}/{}".format(
|
||||
config["database_user"],
|
||||
@@ -75,7 +121,6 @@ app.register_blueprint(blueprint)
|
||||
celery = Celery(app.name, broker=app.config["CELERY_BROKER_URL"])
|
||||
celery.conf.update(app.config)
|
||||
|
||||
|
||||
#
|
||||
# Custom decorators
|
||||
#
|
||||
@@ -142,7 +187,7 @@ def Authenticator(function):
|
||||
#
|
||||
# Job functions
|
||||
#
|
||||
@celery.task(bind=True)
|
||||
@celery.task(name="provisioner.create", bind=True)
|
||||
def create_vm(
|
||||
self, vm_name, profile_name, define_vm=True, start_vm=True, script_run_args=[]
|
||||
):
|
||||
@@ -156,7 +201,7 @@ def create_vm(
|
||||
)
|
||||
|
||||
|
||||
@celery.task(bind=True)
|
||||
@celery.task(name="storage.benchmark", bind=True)
|
||||
def run_benchmark(self, pool):
|
||||
return api_benchmark.run_benchmark(self, pool)
|
||||
|
||||
@@ -4281,15 +4326,17 @@ class API_Storage_Ceph_OSD_Root(Resource):
|
||||
"required": True,
|
||||
"helptext": "An OSD weight must be specified.",
|
||||
},
|
||||
{
|
||||
"name": "ext_db",
|
||||
"required": False,
|
||||
"helptext": "Whether to use an external OSD DB LV device.",
|
||||
},
|
||||
{
|
||||
"name": "ext_db_ratio",
|
||||
"required": False,
|
||||
"helptext": "Decimal size ratio of the external OSD DB LV device.",
|
||||
},
|
||||
{
|
||||
"name": "ext_db_size",
|
||||
"required": False,
|
||||
},
|
||||
{
|
||||
"name": "osd_count",
|
||||
"required": False,
|
||||
},
|
||||
]
|
||||
)
|
||||
@@ -4297,7 +4344,7 @@ class API_Storage_Ceph_OSD_Root(Resource):
|
||||
def post(self, reqargs):
|
||||
"""
|
||||
Add a Ceph OSD to the cluster
|
||||
Note: This task may take up to 30s to complete and return
|
||||
Note: This task may take up to 60s to complete and return
|
||||
---
|
||||
tags:
|
||||
- storage / ceph
|
||||
@@ -4317,16 +4364,21 @@ class API_Storage_Ceph_OSD_Root(Resource):
|
||||
type: number
|
||||
required: true
|
||||
description: The Ceph CRUSH weight for the OSD
|
||||
- in: query
|
||||
name: ext_db
|
||||
type: boolean
|
||||
required: false
|
||||
description: Whether to use an external OSD DB LV device
|
||||
- in: query
|
||||
name: ext_db_ratio
|
||||
type: float
|
||||
required: false
|
||||
description: Decimal ratio of total OSD size for the external OSD DB LV device, default 0.05 (5%)
|
||||
description: If set, creates an OSD DB LV with this decimal ratio of DB to total OSD size (usually 0.05 i.e. 5%); mutually exclusive with ext_db_size
|
||||
- in: query
|
||||
name: ext_db_size
|
||||
type: float
|
||||
required: false
|
||||
description: If set, creates an OSD DB LV with this explicit size in human units (e.g. 1024M, 20G); mutually exclusive with ext_db_ratio
|
||||
- in: query
|
||||
name: osd_count
|
||||
type: integer
|
||||
required: false
|
||||
description: If set, create this many OSDs on the block device instead of 1; usually 2 or 4 depending on size
|
||||
responses:
|
||||
200:
|
||||
description: OK
|
||||
@@ -4343,8 +4395,9 @@ class API_Storage_Ceph_OSD_Root(Resource):
|
||||
reqargs.get("node", None),
|
||||
reqargs.get("device", None),
|
||||
reqargs.get("weight", None),
|
||||
reqargs.get("ext_db", False),
|
||||
float(reqargs.get("ext_db_ratio", 0.05)),
|
||||
reqargs.get("ext_db_ratio", None),
|
||||
reqargs.get("ext_db_size", None),
|
||||
reqargs.get("osd_count", None),
|
||||
)
|
||||
|
||||
|
||||
@@ -4371,14 +4424,25 @@ class API_Storage_Ceph_OSD_Element(Resource):
|
||||
@RequestParser(
|
||||
[
|
||||
{
|
||||
"name": "device",
|
||||
"name": "new_device",
|
||||
"required": True,
|
||||
"helptext": "A valid device or detect string must be specified.",
|
||||
},
|
||||
{
|
||||
"name": "old_device",
|
||||
"required": False,
|
||||
},
|
||||
{
|
||||
"name": "weight",
|
||||
"required": True,
|
||||
"helptext": "An OSD weight must be specified.",
|
||||
"required": False,
|
||||
},
|
||||
{
|
||||
"name": "ext_db_ratio",
|
||||
"required": False,
|
||||
},
|
||||
{
|
||||
"name": "ext_db_size",
|
||||
"required": False,
|
||||
},
|
||||
{
|
||||
"name": "yes-i-really-mean-it",
|
||||
@@ -4397,15 +4461,30 @@ class API_Storage_Ceph_OSD_Element(Resource):
|
||||
- storage / ceph
|
||||
parameters:
|
||||
- in: query
|
||||
name: device
|
||||
name: new_device
|
||||
type: string
|
||||
required: true
|
||||
description: The block device (e.g. "/dev/sdb", "/dev/disk/by-path/...", etc.) or detect string ("detect:NAME:SIZE:ID") to replace the OSD onto
|
||||
- in: query
|
||||
name: old_device
|
||||
type: string
|
||||
required: false
|
||||
description: The block device (e.g. "/dev/sdb", "/dev/disk/by-path/...", etc.) or detect string ("detect:NAME:SIZE:ID") of the original OSD
|
||||
- in: query
|
||||
name: weight
|
||||
type: number
|
||||
required: true
|
||||
description: The Ceph CRUSH weight for the replaced OSD
|
||||
required: false
|
||||
description: The Ceph CRUSH weight for the replacement OSD
|
||||
- in: query
|
||||
name: ext_db_ratio
|
||||
type: float
|
||||
required: false
|
||||
description: If set, creates an OSD DB LV for the replcement OSD with this decimal ratio of DB to total OSD size (usually 0.05 i.e. 5%); if unset, use existing ext_db_size
|
||||
- in: query
|
||||
name: ext_db_size
|
||||
type: float
|
||||
required: false
|
||||
description: If set, creates an OSD DB LV for the replacement OSD with this explicit size in human units (e.g. 1024M, 20G); if unset, use existing ext_db_size
|
||||
responses:
|
||||
200:
|
||||
description: OK
|
||||
@@ -4420,8 +4499,11 @@ class API_Storage_Ceph_OSD_Element(Resource):
|
||||
"""
|
||||
return api_helper.ceph_osd_replace(
|
||||
osdid,
|
||||
reqargs.get("device", None),
|
||||
reqargs.get("new_device"),
|
||||
reqargs.get("old_device", None),
|
||||
reqargs.get("weight", None),
|
||||
reqargs.get("ext_db_ratio", None),
|
||||
reqargs.get("ext_db_size", None),
|
||||
)
|
||||
|
||||
@RequestParser(
|
||||
|
||||
@@ -1366,12 +1366,26 @@ def ceph_osd_db_vg_add(zkhandler, node, device):
|
||||
|
||||
|
||||
@ZKConnection(config)
|
||||
def ceph_osd_add(zkhandler, node, device, weight, ext_db_flag=False, ext_db_ratio=0.05):
|
||||
def ceph_osd_add(
|
||||
zkhandler,
|
||||
node,
|
||||
device,
|
||||
weight,
|
||||
ext_db_ratio=None,
|
||||
ext_db_size=None,
|
||||
split_count=None,
|
||||
):
|
||||
"""
|
||||
Add a Ceph OSD to the PVC Ceph storage cluster.
|
||||
"""
|
||||
retflag, retdata = pvc_ceph.add_osd(
|
||||
zkhandler, node, device, weight, ext_db_flag, ext_db_ratio
|
||||
zkhandler,
|
||||
node,
|
||||
device,
|
||||
weight,
|
||||
ext_db_ratio,
|
||||
ext_db_size,
|
||||
split_count,
|
||||
)
|
||||
|
||||
if retflag:
|
||||
@@ -1384,11 +1398,21 @@ def ceph_osd_add(zkhandler, node, device, weight, ext_db_flag=False, ext_db_rati
|
||||
|
||||
|
||||
@ZKConnection(config)
|
||||
def ceph_osd_replace(zkhandler, osd_id, device, weight):
|
||||
def ceph_osd_replace(
|
||||
zkhandler,
|
||||
osd_id,
|
||||
new_device,
|
||||
old_device=None,
|
||||
weight=None,
|
||||
ext_db_ratio=None,
|
||||
ext_db_size=None,
|
||||
):
|
||||
"""
|
||||
Replace a Ceph OSD in the PVC Ceph storage cluster.
|
||||
"""
|
||||
retflag, retdata = pvc_ceph.replace_osd(zkhandler, osd_id, device, weight)
|
||||
retflag, retdata = pvc_ceph.replace_osd(
|
||||
zkhandler, osd_id, new_device, old_device, weight, ext_db_ratio, ext_db_size
|
||||
)
|
||||
|
||||
if retflag:
|
||||
retcode = 200
|
||||
|
||||
@@ -169,9 +169,10 @@ def restart_opt(function):
|
||||
@wraps(function)
|
||||
def confirm_action(*args, **kwargs):
|
||||
restart_state = kwargs.get("restart_flag", None)
|
||||
live_state = kwargs.get("live_flag", False)
|
||||
|
||||
if restart_state is None:
|
||||
# Neither "--restart" or "--no-restart" was passed: prompt for restart or restart if "--unsafe"
|
||||
if restart_state is None and not live_state:
|
||||
# Neither "--restart" or "--no-restart" was passed, and "--no-live" was passed: prompt for restart or restart if "--unsafe"
|
||||
try:
|
||||
click.confirm(
|
||||
f"Restart VM {kwargs.get('domain')} to apply changes",
|
||||
@@ -179,6 +180,7 @@ def restart_opt(function):
|
||||
abort=True,
|
||||
)
|
||||
kwargs["restart_flag"] = True
|
||||
kwargs["confirm_flag"] = True
|
||||
except Exception:
|
||||
echo(CLI_CONFIG, "Changes will be applied on next VM start/restart.")
|
||||
kwargs["restart_flag"] = False
|
||||
@@ -3362,9 +3364,11 @@ def cli_storage_osd():
|
||||
)
|
||||
def cli_storage_osd_create_db_vg(node, device):
|
||||
"""
|
||||
Create a new Ceph OSD database volume group on node NODE with block device DEVICE. DEVICE must be a valid block device path (e.g. '/dev/nvme0n1', '/dev/disk/by-path/...') or a "detect" string. Using partitions is not supported.
|
||||
Create a new Ceph OSD database volume group on node NODE with block device DEVICE.
|
||||
|
||||
This volume group will be used for Ceph OSD database and WAL functionality if the '--ext-db' flag is passed to newly-created OSDs during 'pvc storage osd add'. DEVICE should be an extremely fast SSD device (NVMe, Intel Optane, etc.) which is significantly faster than the normal OSD disks and with very high write endurance.
|
||||
DEVICE must be a valid block device path (e.g. '/dev/nvme0n1', '/dev/disk/by-path/...') or a "detect" string. Partitions are NOT supported. A "detect" string is a string in the form "detect:<NAME>:<HUMAN-SIZE>:<ID>". For details, see 'pvc storage osd add --help'. The path or detect string must be valid on the current node housing the OSD.
|
||||
|
||||
This volume group will be used for Ceph OSD database and WAL functionality if an'--ext-db-*' flag is passed to newly-created OSDs during 'pvc storage osd add'. DEVICE should be an extremely fast SSD device (NVMe, Intel Optane, etc.) which is significantly faster than the normal OSD disks and with very high write endurance. For mor edetails, see the "pvc storage osd add" command help.
|
||||
|
||||
Only one OSD database volume group on a single physical device, named "osd-db", is supported per node, so it must be fast and large enough to act as an effective OSD database device for all OSDs on the node. Attempting to add additional database volume groups after the first will result in an error.
|
||||
|
||||
@@ -3390,42 +3394,70 @@ def cli_storage_osd_create_db_vg(node, device):
|
||||
"weight",
|
||||
default=1.0,
|
||||
show_default=True,
|
||||
help="Weight of the OSD within the CRUSH map.",
|
||||
)
|
||||
@click.option(
|
||||
"-d",
|
||||
"--ext-db",
|
||||
"ext_db_flag",
|
||||
is_flag=True,
|
||||
default=False,
|
||||
help="Use an external database logical volume for this OSD.",
|
||||
help="Weight of the OSD(s) within the CRUSH map.",
|
||||
)
|
||||
@click.option(
|
||||
"-r",
|
||||
"--ext-db-ratio",
|
||||
"ext_db_ratio",
|
||||
default=0.05,
|
||||
show_default=True,
|
||||
default=None,
|
||||
type=float,
|
||||
help="Decimal ratio of the external database logical volume to the OSD size.",
|
||||
help="Create an external database logical volume for the OSD(s) with this decimal ratio of the DB LV to the OSD size.",
|
||||
)
|
||||
@confirm_opt("Destroy all data on and create new OSD on node {node} device {device}")
|
||||
def cli_storage_osd_add(node, device, weight, ext_db_flag, ext_db_ratio):
|
||||
@click.option(
|
||||
"-s",
|
||||
"--ext-db-size",
|
||||
"ext_db_size",
|
||||
default=None,
|
||||
show_default=True,
|
||||
help="Create an external database logical volume for the OSD(s) with this human-unit size.",
|
||||
)
|
||||
@click.option(
|
||||
"-c",
|
||||
"--osd-count",
|
||||
"osd_count",
|
||||
default=None,
|
||||
show_default=False,
|
||||
type=int,
|
||||
help="Split (an NVMe) disk into this many OSDs.",
|
||||
)
|
||||
@confirm_opt("Destroy all data on and create new OSD(s) on node {node} device {device}")
|
||||
def cli_storage_osd_add(node, device, weight, ext_db_ratio, ext_db_size, osd_count):
|
||||
"""
|
||||
Add a new Ceph OSD on node NODE with block device DEVICE. DEVICE must be a valid block device path (e.g. '/dev/sda', '/dev/nvme0n1', '/dev/disk/by-path/...', '/dev/disk/by-id/...') or a "detect" string. Using partitions is not supported.
|
||||
Add a new Ceph OSD on node NODE with block device DEVICE.
|
||||
|
||||
DEVICE must be a valid block device path (e.g. '/dev/nvme0n1', '/dev/disk/by-path/...') or a "detect" string. Partitions are NOT supported. A "detect" string is a string in the form "detect:<NAME>:<HUMAN-SIZE>:<ID>". The path or detect string must be valid on the current node housing the OSD.
|
||||
|
||||
A "detect" string is a string in the form "detect:<NAME>:<HUMAN-SIZE>:<ID>". Detect strings allow for automatic determination of Linux block device paths from known basic information about disks by leveraging "lsscsi" on the target host. The "NAME" should be some descriptive identifier, for instance the manufacturer (e.g. "INTEL"), the "HUMAN-SIZE" should be the labeled human-readable size of the device (e.g. "480GB", "1.92TB"), and "ID" specifies the Nth 0-indexed device which matches the "NAME" and "HUMAN-SIZE" values (e.g. "2" would match the third device with the corresponding "NAME" and "HUMAN-SIZE"). When matching against sizes, there is +/- 3% flexibility to account for base-1000 vs. base-1024 differences and rounding errors. The "NAME" may contain whitespace but if so the entire detect string should be quoted, and is case-insensitive. More information about detect strings can be found in the pvcbootstrapd manual.
|
||||
|
||||
The weight of an OSD should reflect the ratio of the OSD to other OSDs in the storage cluster. For example, if all OSDs are the same size as recommended for PVC, 1 (the default) is a valid weight so that all are treated identically. If a new OSD is added later which is 4x the size of the existing OSDs, the new OSD's weight should then be 4 to tell the cluster that 4x the data can be stored on the OSD. Weights can also be tweaked for performance reasons, since OSDs with more data will incur more I/O load. For more information about CRUSH weights, please see the Ceph documentation.
|
||||
The weight of an OSD should reflect the ratio of the size of the OSD to the other OSDs in the storage cluster. For example, with a 200GB disk and a 400GB disk in each node, the 400GB disk should have twice the weight as the 200GB disk. For more information about CRUSH weights, please see the Ceph documentation.
|
||||
|
||||
If '--ext-db' is specified, the OSD database and WAL will be placed on a new logical volume in NODE's OSD database volume group. An OSD database volume group must exist on the node or the OSD creation will fail. See the 'pvc storage osd create-db-vg' command for more details.
|
||||
The "-r"/"--ext-db-ratio" or "-s"/"--ext-db-size" options, if specified, and if a OSD DB VG exists on the node (see "pvc storage osd create-db-vg"), will instruct the OSD to locate its RocksDB database and WAL on a new logical volume on that OSD DB VG. If "-r"/"--ext-db-ratio" is specified, the sizing of this DB LV will be the given ratio (specified as a decimal percentage e.g. 0.05 for 5%) of the size of the OSD (e.g. 0.05 on a 1TB SSD will create a 50GB LV). If "-s"/"--ext-db-size" is specified, the sizing of this DB LV will be the given human-unit size (e.g. 1024M, 20GB, etc.). An 0.05 ratio is recommended; at least 0.02 is required, and more than 0.05 can potentially increase performance in write-heavy workloads.
|
||||
|
||||
The default '--ext-db-ratio' of 0.05 (5%) is sufficient for most RBD workloads and OSD sizes, though this can be adjusted based on the sizes of the OSD(s) and the underlying database device. Ceph documentation recommends at least 0.02 (2%) for RBD use-cases, and higher values may improve WAL performance under write-heavy workloads with fewer OSDs per node.
|
||||
WARNING: An external DB carries important caveats. An external DB is only suggested for relatively slow OSD devices (e.g. SATA SSDs) when there is also a much faster, more robust, but smaller storage device in the system (e.g. an NVMe or 3DXPoint SSD) which can accelerate the OSD. An external DB is NOT recommended for NVMe OSDs as this will hamper performance and reliability. Additionally, it is important to note that the OSD will depend entirely on this external DB device; they cannot be separated without destroying the OSD, and the OSD cannot function without the external DB device, thus introducting a single point of failure. Use this feature with extreme care.
|
||||
|
||||
The "-c"/"--osd-count" option allows the splitting of a single block device into multiple logical OSDs. This is recommended in the Ceph literature for extremely fast OSD block devices (i.e. NVMe or 3DXPoint) which can saturate a single OSD process. Usually, 2 or 4 OSDs is recommended, based on the size and performance of the OSD disk; more than 4 OSDs per volume is not recommended, and this option is not recommended for SATA SSDs.
|
||||
|
||||
Note that, if "-c"/"--osd-count" is specified, the provided "-w"/"--weight" will be the weight of EACH created OSD, not the block device as a whole. Ensure you take this into account if mixing and matching OSD block devices. Additionally, if "-r"/"--ext-db-ratio" or "-s"/"--ext-db-size" is specified, one DB LV will be created for EACH created OSD, of the given ratio/size per OSD; ratios are calculated from the OSD size, not the underlying device.
|
||||
|
||||
NOTE: This command may take a long time to complete. Observe the node logs of the hosting OSD node for detailed status.
|
||||
"""
|
||||
|
||||
retcode, retmsg = pvc.lib.storage.ceph_osd_add(
|
||||
CLI_CONFIG, node, device, weight, ext_db_flag, ext_db_ratio
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
"Waiting for node task to complete, this may take some time... ",
|
||||
newline=False,
|
||||
)
|
||||
retcode, retmsg = pvc.lib.storage.ceph_osd_add(
|
||||
CLI_CONFIG,
|
||||
node,
|
||||
device,
|
||||
weight,
|
||||
ext_db_ratio,
|
||||
ext_db_size,
|
||||
osd_count,
|
||||
)
|
||||
echo(CLI_CONFIG, "done.")
|
||||
finish(retcode, retmsg)
|
||||
|
||||
|
||||
@@ -3435,30 +3467,68 @@ def cli_storage_osd_add(node, device, weight, ext_db_flag, ext_db_ratio):
|
||||
@click.command(name="replace", short_help="Replace OSD block device.")
|
||||
@connection_req
|
||||
@click.argument("osdid")
|
||||
@click.argument("device")
|
||||
@click.argument("new_device")
|
||||
@click.option(
|
||||
"-o",
|
||||
"--old-device",
|
||||
"old_device",
|
||||
default=None,
|
||||
help="The old OSD block device, if known and valid",
|
||||
)
|
||||
@click.option(
|
||||
"-w",
|
||||
"--weight",
|
||||
"weight",
|
||||
default=1.0,
|
||||
show_default=True,
|
||||
help="New weight of the OSD within the CRUSH map.",
|
||||
default=None,
|
||||
help="New weight of the OSD(s) within the CRUSH map; if unset, old weight is used",
|
||||
)
|
||||
@confirm_opt("Replace OSD {osdid} with block device {device} weight {weight}")
|
||||
def cli_storage_osd_replace(osdid, device, weight):
|
||||
@click.option(
|
||||
"-r",
|
||||
"--ext-db-ratio",
|
||||
"ext_db_ratio",
|
||||
default=None,
|
||||
help="Create a new external database logical volume for the OSD(s) with this decimal ratio of the DB LV to the OSD size; if unset, old ext_db_size is used",
|
||||
)
|
||||
@click.option(
|
||||
"-s",
|
||||
"--ext-db-size",
|
||||
"ext_db_size",
|
||||
default=None,
|
||||
help="Create a new external database logical volume for the OSD(s) with this human-unit size; if unset, old ext_db_size is used",
|
||||
)
|
||||
@confirm_opt(
|
||||
"Destroy all data on and replace OSD {osdid} (and peer split OSDs) with new device {new_device}"
|
||||
)
|
||||
def cli_storage_osd_replace(
|
||||
osdid, new_device, old_device, weight, ext_db_ratio, ext_db_size
|
||||
):
|
||||
"""
|
||||
Replace the block device of an existing OSD with ID OSDID with DEVICE. Use this command to replace a failed or smaller OSD block device with a new one.
|
||||
Replace the block device of an existing OSD with ID OSDID, and any peer split OSDs with the same block device, with NEW_DEVICE. Use this command to replace a failed or smaller OSD block device with a new one in one command.
|
||||
|
||||
DEVICE must be a valid block device path (e.g. '/dev/sda', '/dev/nvme0n1', '/dev/disk/by-path/...', '/dev/disk/by-id/...') or a "detect" string. Using partitions is not supported. A "detect" string is a string in the form "detect:<NAME>:<HUMAN-SIZE>:<ID>". For details, see 'pvc storage osd add --help'.
|
||||
DEVICE must be a valid block device path (e.g. '/dev/nvme0n1', '/dev/disk/by-path/...') or a "detect" string. Partitions are NOT supported. A "detect" string is a string in the form "detect:<NAME>:<HUMAN-SIZE>:<ID>". For details, see 'pvc storage osd add --help'. The path or detect string must be valid on the current node housing the OSD.
|
||||
|
||||
The weight of an OSD should reflect the ratio of the OSD to other OSDs in the storage cluster. For details, see 'pvc storage osd add --help'. Note that the current weight must be explicitly specified if it differs from the default.
|
||||
If OSDID is part of a split OSD set, any peer split OSDs with the same configured block device will be replaced as well. The split count will be retained and cannot be changed with this command; to do so, all OSDs in the split OSD set must be removed and new OSD(s) created.
|
||||
|
||||
Existing IDs, external DB devices, etc. of the OSD will be preserved; data will be lost and rebuilt from the remaining healthy OSDs.
|
||||
WARNING: This operation entails (and is functionally equivalent to) a removal and recreation of the specified OSD and, if applicable, all peer split OSDs. This is an intensive and potentially destructive action. Ensure that the cluster is otherwise healthy before proceeding, and ensure the subsequent rebuild completes successfully. Do not attempt this operation on a severely degraded cluster without first considering the possible data loss implications.
|
||||
|
||||
If the "-o"/"--old-device" option is specified, is a valid block device on the node, is readable/accessible, and contains the metadata for the specified OSD, it will be zapped. If this option is not specified, the system will try to find the old block device automatically to zap it. If it can't be found, the OSD will simply be removed from the CRUSH map and PVC database before recreating. This option can provide a cleaner deletion when replacing a working device that has a different block path, but is otherwise unnecessary.
|
||||
|
||||
The "-w"/"--weight", "-r"/"--ext-db-ratio", and "-s"/"--ext-db-size" allow overriding the existing weight and external DB LV for the OSD(s), if desired. If unset, the existing weight and external DB LV size (if applicable) will be used for the replacement OSD(s) instead.
|
||||
|
||||
NOTE: If neither the "-r"/"--ext-db-ratio" or "-s"/"--ext-db-size" option is specified, and the OSD(s) had an external DB LV, it cannot be removed a new DB LV will be created for the replacement OSD(s); this cannot be avoided. However, if the OSD(s) did not have an external DB LV, and one of these options is specified, a new DB LV will be added to the new OSD.
|
||||
|
||||
NOTE: This command may take a long time to complete. Observe the node logs of the hosting OSD node for detailed status.
|
||||
"""
|
||||
|
||||
retcode, retmsg = pvc.lib.storage.ceph_osd_replace(
|
||||
CLI_CONFIG, osdid, device, weight
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
"Waiting for node task to complete, this may take some time... ",
|
||||
newline=False,
|
||||
)
|
||||
retcode, retmsg = pvc.lib.storage.ceph_osd_replace(
|
||||
CLI_CONFIG, osdid, new_device, old_device, weight, ext_db_ratio, ext_db_size
|
||||
)
|
||||
echo(CLI_CONFIG, "done.")
|
||||
finish(retcode, retmsg)
|
||||
|
||||
|
||||
@@ -3474,13 +3544,22 @@ def cli_storage_osd_refresh(osdid, device):
|
||||
"""
|
||||
Refresh (reimport) the block DEVICE of an existing OSD with ID OSDID. Use this command to reimport a working OSD into a rebuilt/replaced node.
|
||||
|
||||
DEVICE must be a valid block device path (e.g. '/dev/sda', '/dev/nvme0n1', '/dev/disk/by-path/...', '/dev/disk/by-id/...') or a "detect" string. Using partitions is not supported. A "detect" string is a string in the form "detect:<NAME>:<HUMAN-SIZE>:<ID>". For details, see 'pvc storage osd add --help'.
|
||||
DEVICE must be a valid block device path (e.g. '/dev/nvme0n1', '/dev/disk/by-path/...') or a "detect" string. Partitions are NOT supported. A "detect" string is a string in the form "detect:<NAME>:<HUMAN-SIZE>:<ID>". For details, see 'pvc storage osd add --help'. The path or detect string must be valid on the current node housing the OSD.
|
||||
|
||||
Existing data, IDs, weights, etc. of the OSD will be preserved.
|
||||
Existing data, IDs, weights, DB LVs, etc. of the OSD will be preserved. Any split peer OSD(s) on the same block device will also be automatically refreshed.
|
||||
|
||||
NOTE: If a device had an external DB device, this is not automatically handled at this time. It is best to remove and re-add the OSD instead.
|
||||
NOTE: If the OSD(s) had an external DB device, it must exist before refreshing the OSD. If it can't be found, the OSD cannot be reimported and must be recreated.
|
||||
|
||||
NOTE: This command may take a long time to complete. Observe the node logs of the hosting OSD node for detailed status.
|
||||
"""
|
||||
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
"Waiting for node task to complete, this may take some time... ",
|
||||
newline=False,
|
||||
)
|
||||
retcode, retmsg = pvc.lib.storage.ceph_osd_refresh(CLI_CONFIG, osdid, device)
|
||||
echo(CLI_CONFIG, "done.")
|
||||
finish(retcode, retmsg)
|
||||
|
||||
|
||||
@@ -3506,9 +3585,17 @@ def cli_storage_osd_remove(osdid, force_flag):
|
||||
DANGER: This will completely remove the OSD from the cluster. OSDs will rebalance which will negatively affect performance and available space. It is STRONGLY RECOMMENDED to set an OSD out (using 'pvc storage osd out') and allow the cluster to fully rebalance, verified with 'pvc storage status', before removing an OSD.
|
||||
|
||||
NOTE: The "-f"/"--force" option is useful after replacing a failed node, to ensure the OSD is removed even if the OSD in question does not properly exist on the node after a rebuild.
|
||||
|
||||
NOTE: This command may take a long time to complete. Observe the node logs of the hosting OSD node for detailed status.
|
||||
"""
|
||||
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
"Waiting for node task to complete, this may take some time... ",
|
||||
newline=False,
|
||||
)
|
||||
retcode, retmsg = pvc.lib.storage.ceph_osd_remove(CLI_CONFIG, osdid, force_flag)
|
||||
echo(CLI_CONFIG, "done.")
|
||||
finish(retcode, retmsg)
|
||||
|
||||
|
||||
@@ -5577,7 +5664,7 @@ def cli_connection_add(
|
||||
scheme = "https" if ssl_flag else "http"
|
||||
|
||||
# Get the store data
|
||||
connections_config = get_store(store_path)
|
||||
connections_config = get_store(CLI_CONFIG["store_path"])
|
||||
|
||||
# Add (or update) the new connection details
|
||||
connections_config[name] = {
|
||||
@@ -5589,7 +5676,7 @@ def cli_connection_add(
|
||||
}
|
||||
|
||||
# Update the store data
|
||||
update_store(store_path, connections_config)
|
||||
update_store(CLI_CONFIG["store_path"], connections_config)
|
||||
|
||||
finish(
|
||||
True,
|
||||
@@ -5613,7 +5700,7 @@ def cli_connection_remove(
|
||||
"""
|
||||
|
||||
# Get the store data
|
||||
connections_config = get_store(store_path)
|
||||
connections_config = get_store(CLI_CONFIG["store_path"])
|
||||
|
||||
# Remove the entry matching the name
|
||||
try:
|
||||
@@ -5622,7 +5709,7 @@ def cli_connection_remove(
|
||||
finish(False, f"""No connection found with name "{name}" in local database""")
|
||||
|
||||
# Update the store data
|
||||
update_store(store_path, connections_config)
|
||||
update_store(CLI_CONFIG["store_path"], connections_config)
|
||||
|
||||
finish(True, f"""Removed connection "{name}" from client database""")
|
||||
|
||||
@@ -5665,7 +5752,7 @@ def cli_connection_list(
|
||||
"json-pretty": Output in formatted JSON.
|
||||
"""
|
||||
|
||||
connections_config = get_store(store_path)
|
||||
connections_config = get_store(CLI_CONFIG["store_path"])
|
||||
connections_data = cli_connection_list_parser(connections_config, show_keys_flag)
|
||||
finish(True, connections_data, format_function)
|
||||
|
||||
@@ -5703,7 +5790,7 @@ def cli_connection_detail(
|
||||
newline=False,
|
||||
stderr=True,
|
||||
)
|
||||
connections_config = get_store(store_path)
|
||||
connections_config = get_store(CLI_CONFIG["store_path"])
|
||||
connections_data = cli_connection_detail_parser(connections_config)
|
||||
echo(CLI_CONFIG, "done.", stderr=True)
|
||||
echo(CLI_CONFIG, "", stderr=True)
|
||||
@@ -5843,6 +5930,7 @@ def cli(
|
||||
CLI_CONFIG["colour"] = _colour
|
||||
CLI_CONFIG["quiet"] = _quiet
|
||||
CLI_CONFIG["silent"] = _silent
|
||||
CLI_CONFIG["store_path"] = store_path
|
||||
|
||||
audit()
|
||||
|
||||
|
||||
@@ -208,7 +208,7 @@ def wait_for_provisioner(CLI_CONFIG, task_id):
|
||||
)
|
||||
if task_status.get("state") != "PENDING":
|
||||
break
|
||||
echo(".", newline=False)
|
||||
echo(CLI_CONFIG, ".", newline=False)
|
||||
echo(CLI_CONFIG, " done.")
|
||||
echo(CLI_CONFIG, "")
|
||||
|
||||
|
||||
@@ -231,21 +231,27 @@ def ceph_osd_list(config, limit):
|
||||
return False, response.json().get("message", "")
|
||||
|
||||
|
||||
def ceph_osd_add(config, node, device, weight, ext_db_flag, ext_db_ratio):
|
||||
def ceph_osd_add(config, node, device, weight, ext_db_ratio, ext_db_size, osd_count):
|
||||
"""
|
||||
Add new Ceph OSD
|
||||
|
||||
API endpoint: POST /api/v1/storage/ceph/osd
|
||||
API arguments: node={node}, device={device}, weight={weight}, ext_db={ext_db_flag}, ext_db_ratio={ext_db_ratio}
|
||||
API arguments: node={node}, device={device}, weight={weight}, [ext_db_ratio={ext_db_ratio}, ext_db_size={ext_db_size}, osd_count={osd_count}]
|
||||
API schema: {"message":"{data}"}
|
||||
"""
|
||||
params = {
|
||||
"node": node,
|
||||
"device": device,
|
||||
"weight": weight,
|
||||
"ext_db": ext_db_flag,
|
||||
"ext_db_ratio": ext_db_ratio,
|
||||
}
|
||||
|
||||
if ext_db_ratio is not None:
|
||||
params["ext_db_ratio"] = ext_db_ratio
|
||||
if ext_db_size is not None:
|
||||
params["ext_db_size"] = ext_db_size
|
||||
if osd_count is not None:
|
||||
params["osd_count"] = osd_count
|
||||
|
||||
response = call_api(config, "post", "/storage/ceph/osd", params=params)
|
||||
|
||||
if response.status_code == 200:
|
||||
@@ -256,15 +262,30 @@ def ceph_osd_add(config, node, device, weight, ext_db_flag, ext_db_ratio):
|
||||
return retstatus, response.json().get("message", "")
|
||||
|
||||
|
||||
def ceph_osd_replace(config, osdid, device, weight):
|
||||
def ceph_osd_replace(
|
||||
config, osdid, new_device, old_device, weight, ext_db_ratio, ext_db_size
|
||||
):
|
||||
"""
|
||||
Replace an existing Ceph OSD with a new device
|
||||
|
||||
API endpoint: POST /api/v1/storage/ceph/osd/{osdid}
|
||||
API arguments: device={device}, weight={weight}
|
||||
API arguments: new_device, [old_device={old_device}, weight={weight}, ext_db_ratio={ext_db_ratio}, ext_db_size={ext_db_size}]
|
||||
API schema: {"message":"{data}"}
|
||||
"""
|
||||
params = {"device": device, "weight": weight, "yes-i-really-mean-it": "yes"}
|
||||
params = {
|
||||
"new_device": new_device,
|
||||
"yes-i-really-mean-it": "yes",
|
||||
}
|
||||
|
||||
if old_device is not None:
|
||||
params["old_device"] = old_device
|
||||
if weight is not None:
|
||||
params["weight"] = weight
|
||||
if ext_db_ratio is not None:
|
||||
params["ext_db_ratio"] = ext_db_ratio
|
||||
if ext_db_size is not None:
|
||||
params["ext_db_size"] = ext_db_size
|
||||
|
||||
response = call_api(config, "post", f"/storage/ceph/osd/{osdid}", params=params)
|
||||
|
||||
if response.status_code == 200:
|
||||
@@ -400,7 +421,6 @@ def format_list_osd(config, osd_list):
|
||||
osd_used_length = 5
|
||||
osd_free_length = 6
|
||||
osd_util_length = 6
|
||||
osd_var_length = 5
|
||||
osd_wrops_length = 4
|
||||
osd_wrdata_length = 5
|
||||
osd_rdops_length = 4
|
||||
@@ -433,8 +453,14 @@ def format_list_osd(config, osd_list):
|
||||
)
|
||||
continue
|
||||
|
||||
if osd_information["is_split"]:
|
||||
osd_information["device"] = f"{osd_information['device']} [s]"
|
||||
|
||||
# Deal with the size to human readable
|
||||
osd_information["stats"]["size"] = osd_information["stats"]["kb"] * 1024
|
||||
if isinstance(osd_information["stats"]["kb"], int):
|
||||
osd_information["stats"]["size"] = osd_information["stats"]["kb"] * 1024
|
||||
else:
|
||||
osd_information["stats"]["size"] = "N/A"
|
||||
for datatype in "size", "wr_data", "rd_data":
|
||||
databytes = osd_information["stats"][datatype]
|
||||
if isinstance(databytes, int):
|
||||
@@ -503,10 +529,6 @@ def format_list_osd(config, osd_list):
|
||||
if _osd_util_length > osd_util_length:
|
||||
osd_util_length = _osd_util_length
|
||||
|
||||
_osd_var_length = len(str(osd_information["stats"]["var"])) + 1
|
||||
if _osd_var_length > osd_var_length:
|
||||
osd_var_length = _osd_var_length
|
||||
|
||||
# Set the read/write IOPS/data and length
|
||||
_osd_wrops_length = len(osd_information["stats"]["wr_ops"]) + 1
|
||||
if _osd_wrops_length > osd_wrops_length:
|
||||
@@ -542,8 +564,7 @@ def format_list_osd(config, osd_list):
|
||||
+ osd_used_length
|
||||
+ osd_free_length
|
||||
+ osd_util_length
|
||||
+ osd_var_length
|
||||
+ 7,
|
||||
+ 6,
|
||||
read_header_length=osd_rdops_length + osd_rddata_length + 1,
|
||||
write_header_length=osd_wrops_length + osd_wrdata_length + 1,
|
||||
osd_header="OSDs "
|
||||
@@ -575,8 +596,7 @@ def format_list_osd(config, osd_list):
|
||||
+ osd_used_length
|
||||
+ osd_free_length
|
||||
+ osd_util_length
|
||||
+ osd_var_length
|
||||
+ 6,
|
||||
+ 5,
|
||||
)
|
||||
]
|
||||
),
|
||||
@@ -602,7 +622,6 @@ def format_list_osd(config, osd_list):
|
||||
{osd_used: <{osd_used_length}} \
|
||||
{osd_free: <{osd_free_length}} \
|
||||
{osd_util: <{osd_util_length}} \
|
||||
{osd_var: <{osd_var_length}} \
|
||||
{osd_rdops: <{osd_rdops_length}} \
|
||||
{osd_rddata: <{osd_rddata_length}} \
|
||||
{osd_wrops: <{osd_wrops_length}} \
|
||||
@@ -623,7 +642,6 @@ def format_list_osd(config, osd_list):
|
||||
osd_used_length=osd_used_length,
|
||||
osd_free_length=osd_free_length,
|
||||
osd_util_length=osd_util_length,
|
||||
osd_var_length=osd_var_length,
|
||||
osd_wrops_length=osd_wrops_length,
|
||||
osd_wrdata_length=osd_wrdata_length,
|
||||
osd_rdops_length=osd_rdops_length,
|
||||
@@ -641,7 +659,6 @@ def format_list_osd(config, osd_list):
|
||||
osd_used="Used",
|
||||
osd_free="Free",
|
||||
osd_util="Util%",
|
||||
osd_var="Var",
|
||||
osd_wrops="OPS",
|
||||
osd_wrdata="Data",
|
||||
osd_rdops="OPS",
|
||||
@@ -674,7 +691,6 @@ def format_list_osd(config, osd_list):
|
||||
{osd_used: <{osd_used_length}} \
|
||||
{osd_free: <{osd_free_length}} \
|
||||
{osd_util: <{osd_util_length}} \
|
||||
{osd_var: <{osd_var_length}} \
|
||||
{osd_rdops: <{osd_rdops_length}} \
|
||||
{osd_rddata: <{osd_rddata_length}} \
|
||||
{osd_wrops: <{osd_wrops_length}} \
|
||||
@@ -696,7 +712,6 @@ def format_list_osd(config, osd_list):
|
||||
osd_used_length=osd_used_length,
|
||||
osd_free_length=osd_free_length,
|
||||
osd_util_length=osd_util_length,
|
||||
osd_var_length=osd_var_length,
|
||||
osd_wrops_length=osd_wrops_length,
|
||||
osd_wrdata_length=osd_wrdata_length,
|
||||
osd_rdops_length=osd_rdops_length,
|
||||
@@ -716,7 +731,6 @@ def format_list_osd(config, osd_list):
|
||||
osd_used=osd_information["stats"]["used"],
|
||||
osd_free=osd_information["stats"]["avail"],
|
||||
osd_util=osd_information["stats"]["utilization"],
|
||||
osd_var=osd_information["stats"]["var"],
|
||||
osd_wrops=osd_information["stats"]["wr_ops"],
|
||||
osd_wrdata=osd_information["stats"]["wr_data"],
|
||||
osd_rdops=osd_information["stats"]["rd_ops"],
|
||||
|
||||
@@ -26,6 +26,7 @@ import time
|
||||
import math
|
||||
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from distutils.util import strtobool
|
||||
|
||||
import daemon_lib.vm as vm
|
||||
import daemon_lib.common as common
|
||||
@@ -209,8 +210,10 @@ def getClusterOSDList(zkhandler):
|
||||
|
||||
def getOSDInformation(zkhandler, osd_id):
|
||||
# Get the devices
|
||||
osd_fsid = zkhandler.read(("osd.ofsid", osd_id))
|
||||
osd_node = zkhandler.read(("osd.node", osd_id))
|
||||
osd_device = zkhandler.read(("osd.device", osd_id))
|
||||
osd_is_split = bool(strtobool(zkhandler.read(("osd.is_split", osd_id))))
|
||||
osd_db_device = zkhandler.read(("osd.db_device", osd_id))
|
||||
# Parse the stats data
|
||||
osd_stats_raw = zkhandler.read(("osd.stats", osd_id))
|
||||
@@ -218,8 +221,10 @@ def getOSDInformation(zkhandler, osd_id):
|
||||
|
||||
osd_information = {
|
||||
"id": osd_id,
|
||||
"fsid": osd_fsid,
|
||||
"node": osd_node,
|
||||
"device": osd_device,
|
||||
"is_split": osd_is_split,
|
||||
"db_device": osd_db_device,
|
||||
"stats": osd_stats,
|
||||
}
|
||||
@@ -266,7 +271,22 @@ def add_osd_db_vg(zkhandler, node, device):
|
||||
|
||||
# OSD actions use the /cmd/ceph pipe
|
||||
# These actions must occur on the specific node they reference
|
||||
def add_osd(zkhandler, node, device, weight, ext_db_flag=False, ext_db_ratio=0.05):
|
||||
def add_osd(
|
||||
zkhandler,
|
||||
node,
|
||||
device,
|
||||
weight,
|
||||
ext_db_ratio=None,
|
||||
ext_db_size=None,
|
||||
split_count=None,
|
||||
):
|
||||
# Verify that options are valid
|
||||
if ext_db_ratio is not None and ext_db_size is not None:
|
||||
return (
|
||||
False,
|
||||
"ERROR: Both an ext_db_ratio and ext_db_size were specified; choose only one.",
|
||||
)
|
||||
|
||||
# Verify the target node exists
|
||||
if not common.verifyNode(zkhandler, node):
|
||||
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(
|
||||
@@ -284,8 +304,8 @@ def add_osd(zkhandler, node, device, weight, ext_db_flag=False, ext_db_ratio=0.0
|
||||
)
|
||||
|
||||
# Tell the cluster to create a new OSD for the host
|
||||
add_osd_string = "osd_add {},{},{},{},{}".format(
|
||||
node, device, weight, ext_db_flag, ext_db_ratio
|
||||
add_osd_string = "osd_add {},{},{},{},{},{}".format(
|
||||
node, device, weight, ext_db_ratio, ext_db_size, split_count
|
||||
)
|
||||
zkhandler.write([("base.cmd.ceph", add_osd_string)])
|
||||
# Wait 1/2 second for the cluster to get the message and start working
|
||||
@@ -295,14 +315,10 @@ def add_osd(zkhandler, node, device, weight, ext_db_flag=False, ext_db_ratio=0.0
|
||||
try:
|
||||
result = zkhandler.read("base.cmd.ceph").split()[0]
|
||||
if result == "success-osd_add":
|
||||
message = 'Created new OSD with block device "{}" on node "{}".'.format(
|
||||
device, node
|
||||
)
|
||||
message = f'Created {split_count} new OSD(s) on node "{node}" block device "{device}"'
|
||||
success = True
|
||||
else:
|
||||
message = (
|
||||
"ERROR: Failed to create new OSD; check node logs for details."
|
||||
)
|
||||
message = "ERROR: Failed to create OSD(s); check node logs for details."
|
||||
success = False
|
||||
except Exception:
|
||||
message = "ERROR: Command ignored by node."
|
||||
@@ -316,12 +332,18 @@ def add_osd(zkhandler, node, device, weight, ext_db_flag=False, ext_db_ratio=0.0
|
||||
return success, message
|
||||
|
||||
|
||||
def replace_osd(zkhandler, osd_id, new_device, weight):
|
||||
def replace_osd(
|
||||
zkhandler,
|
||||
osd_id,
|
||||
new_device,
|
||||
old_device=None,
|
||||
weight=None,
|
||||
ext_db_ratio=None,
|
||||
ext_db_size=None,
|
||||
):
|
||||
# Get current OSD information
|
||||
osd_information = getOSDInformation(zkhandler, osd_id)
|
||||
node = osd_information["node"]
|
||||
old_device = osd_information["device"]
|
||||
ext_db_flag = True if osd_information["db_device"] else False
|
||||
|
||||
# Verify target block device isn't in use
|
||||
block_osd = verifyOSDBlock(zkhandler, node, new_device)
|
||||
@@ -334,8 +356,8 @@ def replace_osd(zkhandler, osd_id, new_device, weight):
|
||||
)
|
||||
|
||||
# Tell the cluster to create a new OSD for the host
|
||||
replace_osd_string = "osd_replace {},{},{},{},{},{}".format(
|
||||
node, osd_id, old_device, new_device, weight, ext_db_flag
|
||||
replace_osd_string = "osd_replace {},{},{},{},{},{},{}".format(
|
||||
node, osd_id, new_device, old_device, weight, ext_db_ratio, ext_db_size
|
||||
)
|
||||
zkhandler.write([("base.cmd.ceph", replace_osd_string)])
|
||||
# Wait 1/2 second for the cluster to get the message and start working
|
||||
@@ -370,16 +392,6 @@ def refresh_osd(zkhandler, osd_id, device):
|
||||
node = osd_information["node"]
|
||||
ext_db_flag = True if osd_information["db_device"] else False
|
||||
|
||||
# Verify target block device isn't in use
|
||||
block_osd = verifyOSDBlock(zkhandler, node, device)
|
||||
if not block_osd or block_osd != osd_id:
|
||||
return (
|
||||
False,
|
||||
'ERROR: Block device "{}" on node "{}" is not used by OSD "{}"; use replace instead'.format(
|
||||
device, node, osd_id
|
||||
),
|
||||
)
|
||||
|
||||
# Tell the cluster to create a new OSD for the host
|
||||
refresh_osd_string = "osd_refresh {},{},{},{}".format(
|
||||
node, osd_id, device, ext_db_flag
|
||||
|
||||
1
daemon-common/migrations/versions/10.json
Normal file
1
daemon-common/migrations/versions/10.json
Normal file
@@ -0,0 +1 @@
|
||||
{"version": "10", "root": "", "base": {"root": "", "schema": "/schema", "schema.version": "/schema/version", "config": "/config", "config.maintenance": "/config/maintenance", "config.primary_node": "/config/primary_node", "config.primary_node.sync_lock": "/config/primary_node/sync_lock", "config.upstream_ip": "/config/upstream_ip", "config.migration_target_selector": "/config/migration_target_selector", "cmd": "/cmd", "cmd.node": "/cmd/nodes", "cmd.domain": "/cmd/domains", "cmd.ceph": "/cmd/ceph", "logs": "/logs", "node": "/nodes", "domain": "/domains", "network": "/networks", "storage": "/ceph", "storage.health": "/ceph/health", "storage.util": "/ceph/util", "osd": "/ceph/osds", "pool": "/ceph/pools", "volume": "/ceph/volumes", "snapshot": "/ceph/snapshots"}, "logs": {"node": "", "messages": "/messages"}, "node": {"name": "", "keepalive": "/keepalive", "mode": "/daemonmode", "data.active_schema": "/activeschema", "data.latest_schema": "/latestschema", "data.static": "/staticdata", "data.pvc_version": "/pvcversion", "running_domains": "/runningdomains", "count.provisioned_domains": "/domainscount", "count.networks": "/networkscount", "state.daemon": "/daemonstate", "state.router": "/routerstate", "state.domain": "/domainstate", "cpu.load": "/cpuload", "vcpu.allocated": "/vcpualloc", "memory.total": "/memtotal", "memory.used": "/memused", "memory.free": "/memfree", "memory.allocated": "/memalloc", "memory.provisioned": "/memprov", "ipmi.hostname": "/ipmihostname", "ipmi.username": "/ipmiusername", "ipmi.password": "/ipmipassword", "sriov": "/sriov", "sriov.pf": "/sriov/pf", "sriov.vf": "/sriov/vf", "monitoring.plugins": "/monitoring_plugins", "monitoring.data": "/monitoring_data", "monitoring.health": "/monitoring_health"}, "monitoring_plugin": {"name": "", "last_run": "/last_run", "health_delta": "/health_delta", "message": "/message", "data": "/data", "runtime": "/runtime"}, "sriov_pf": {"phy": "", "mtu": "/mtu", "vfcount": "/vfcount"}, "sriov_vf": {"phy": "", "pf": "/pf", "mtu": "/mtu", "mac": "/mac", "phy_mac": "/phy_mac", "config": "/config", "config.vlan_id": "/config/vlan_id", "config.vlan_qos": "/config/vlan_qos", "config.tx_rate_min": "/config/tx_rate_min", "config.tx_rate_max": "/config/tx_rate_max", "config.spoof_check": "/config/spoof_check", "config.link_state": "/config/link_state", "config.trust": "/config/trust", "config.query_rss": "/config/query_rss", "pci": "/pci", "pci.domain": "/pci/domain", "pci.bus": "/pci/bus", "pci.slot": "/pci/slot", "pci.function": "/pci/function", "used": "/used", "used_by": "/used_by"}, "domain": {"name": "", "xml": "/xml", "state": "/state", "profile": "/profile", "stats": "/stats", "node": "/node", "last_node": "/lastnode", "failed_reason": "/failedreason", "storage.volumes": "/rbdlist", "console.log": "/consolelog", "console.vnc": "/vnc", "meta.autostart": "/node_autostart", "meta.migrate_method": "/migration_method", "meta.node_selector": "/node_selector", "meta.node_limit": "/node_limit", "meta.tags": "/tags", "migrate.sync_lock": "/migrate_sync_lock"}, "tag": {"name": "", "type": "/type", "protected": "/protected"}, "network": {"vni": "", "type": "/nettype", "mtu": "/mtu", "rule": "/firewall_rules", "rule.in": "/firewall_rules/in", "rule.out": "/firewall_rules/out", "nameservers": "/name_servers", "domain": "/domain", "reservation": "/dhcp4_reservations", "lease": "/dhcp4_leases", "ip4.gateway": "/ip4_gateway", "ip4.network": "/ip4_network", "ip4.dhcp": "/dhcp4_flag", "ip4.dhcp_start": "/dhcp4_start", "ip4.dhcp_end": "/dhcp4_end", "ip6.gateway": "/ip6_gateway", "ip6.network": "/ip6_network", "ip6.dhcp": "/dhcp6_flag"}, "reservation": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname"}, "lease": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname", "expiry": "/expiry", "client_id": "/clientid"}, "rule": {"description": "", "rule": "/rule", "order": "/order"}, "osd": {"id": "", "node": "/node", "device": "/device", "db_device": "/db_device", "fsid": "/fsid", "ofsid": "/fsid/osd", "cfsid": "/fsid/cluster", "lvm": "/lvm", "vg": "/lvm/vg", "lv": "/lvm/lv", "is_split": "/is_split", "stats": "/stats"}, "pool": {"name": "", "pgs": "/pgs", "tier": "/tier", "stats": "/stats"}, "volume": {"name": "", "stats": "/stats"}, "snapshot": {"name": "", "stats": "/stats"}}
|
||||
@@ -540,7 +540,7 @@ class ZKHandler(object):
|
||||
#
|
||||
class ZKSchema(object):
|
||||
# Current version
|
||||
_version = 9
|
||||
_version = 10
|
||||
|
||||
# Root for doing nested keys
|
||||
_schema_root = ""
|
||||
@@ -719,6 +719,7 @@ class ZKSchema(object):
|
||||
"lvm": "/lvm",
|
||||
"vg": "/lvm/vg",
|
||||
"lv": "/lvm/lv",
|
||||
"is_split": "/is_split",
|
||||
"stats": "/stats",
|
||||
},
|
||||
# The schema of an individual pool entry (/ceph/pools/{pool_name})
|
||||
@@ -963,7 +964,9 @@ class ZKSchema(object):
|
||||
kpath = f"{elem}.{ikey}"
|
||||
# Validate that the key exists for that child
|
||||
if not zkhandler.zk_conn.exists(self.path(kpath, child)):
|
||||
if elem == "pool" and ikey == "tier":
|
||||
if elem == "osd" and ikey == "is_split":
|
||||
default_data = "False"
|
||||
elif elem == "pool" and ikey == "tier":
|
||||
default_data = "default"
|
||||
else:
|
||||
default_data = ""
|
||||
|
||||
2
debian/control
vendored
2
debian/control
vendored
@@ -16,7 +16,7 @@ Description: Parallel Virtual Cluster node daemon (Python 3)
|
||||
|
||||
Package: pvc-daemon-api
|
||||
Architecture: all
|
||||
Depends: systemd, pvc-daemon-common, python3-yaml, python3-flask, python3-flask-restful, python3-celery, python-celery-common, python3-distutils, redis, python3-redis, python3-lxml, python3-flask-migrate, fio
|
||||
Depends: systemd, pvc-daemon-common, python3-yaml, python3-flask, python3-flask-restful, python3-celery, python-celery-common, python3-distutils, python3-redis, python3-lxml, python3-flask-migrate, fio
|
||||
Description: Parallel Virtual Cluster API daemon (Python 3)
|
||||
A KVM/Zookeeper/Ceph-based VM and private cloud manager
|
||||
.
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -350,32 +350,35 @@ def collect_ceph_stats(logger, config, zkhandler, this_node, queue):
|
||||
elif line[0] == "+":
|
||||
continue
|
||||
|
||||
# If line begins with | and second entry is a digit (i.e. OSD ID)
|
||||
if line[0] == "|" and line[1].isdigit():
|
||||
# Parse the line in Ceph 14 format
|
||||
osd_id = line[1]
|
||||
node = line[3].split(".")[0]
|
||||
used = line[5]
|
||||
avail = line[7]
|
||||
wr_ops = line[9]
|
||||
wr_data = line[11]
|
||||
rd_ops = line[13]
|
||||
rd_data = line[15]
|
||||
state = line[17]
|
||||
# If first entry is a digit (i.e. OSD ID)
|
||||
elif line[0].isdigit():
|
||||
# Parse the line in Ceph 16 format
|
||||
osd_id = line[0]
|
||||
node = line[1].split(".")[0]
|
||||
used = line[2]
|
||||
avail = line[3]
|
||||
wr_ops = line[4]
|
||||
wr_data = line[5]
|
||||
rd_ops = line[6]
|
||||
rd_data = line[7]
|
||||
state = line[8]
|
||||
# Otherwise, it's the header line and is ignored
|
||||
else:
|
||||
try:
|
||||
# If line begins with | and second entry is a digit (i.e. OSD ID)
|
||||
if line[0] == "|" and line[1].isdigit():
|
||||
# Parse the line in Ceph 14 format
|
||||
osd_id = line[1]
|
||||
node = line[3].split(".")[0]
|
||||
used = line[5]
|
||||
avail = line[7]
|
||||
wr_ops = line[9]
|
||||
wr_data = line[11]
|
||||
rd_ops = line[13]
|
||||
rd_data = line[15]
|
||||
state = line[17]
|
||||
# If first entry is a digit (i.e. OSD ID)
|
||||
elif line[0].isdigit():
|
||||
# Parse the line in Ceph 16 format
|
||||
osd_id = line[0]
|
||||
node = line[1].split(".")[0]
|
||||
used = line[2]
|
||||
avail = line[3]
|
||||
wr_ops = line[4]
|
||||
wr_data = line[5]
|
||||
rd_ops = line[6]
|
||||
rd_data = line[7]
|
||||
state = line[8]
|
||||
# Otherwise, it's the header line and is ignored
|
||||
else:
|
||||
continue
|
||||
except IndexError:
|
||||
continue
|
||||
|
||||
# I don't know why 2018 me used this construct instead of a normal
|
||||
|
||||
@@ -69,6 +69,20 @@ def start_ceph_mgr(logger, config):
|
||||
)
|
||||
|
||||
|
||||
def start_keydb(logger, config):
|
||||
if config["enable_api"] and config["daemon_mode"] == "coordinator":
|
||||
logger.out("Starting KeyDB daemon", state="i")
|
||||
# TODO: Move our handling out of Systemd and integrate it directly as a subprocess?
|
||||
common.run_os_command("systemctl start keydb-server.service")
|
||||
|
||||
|
||||
def start_api_worker(logger, config):
|
||||
if config["enable_api"]:
|
||||
logger.out("Starting API worker daemon", state="i")
|
||||
# TODO: Move our handling out of Systemd and integrate it directly as a subprocess?
|
||||
common.run_os_command("systemctl start pvcapid-worker.service")
|
||||
|
||||
|
||||
def start_system_services(logger, config):
|
||||
start_zookeeper(logger, config)
|
||||
start_libvirtd(logger, config)
|
||||
@@ -76,6 +90,8 @@ def start_system_services(logger, config):
|
||||
start_frrouting(logger, config)
|
||||
start_ceph_mon(logger, config)
|
||||
start_ceph_mgr(logger, config)
|
||||
start_keydb(logger, config)
|
||||
start_api_worker(logger, config)
|
||||
|
||||
logger.out("Waiting 10 seconds for daemons to start", state="s")
|
||||
sleep(10)
|
||||
|
||||
Reference in New Issue
Block a user