From f4c7fdffb821b829c3cb876b7a68cbc78894c2f0 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Fri, 24 Dec 2021 15:18:38 -0500 Subject: [PATCH] Handle detect strings as arguments for blockdevs Allows specifying blockdevs in the OSD and OSD-DB addition commands as detect strings rather than actual block device paths. This provides greater flexibility for automation with pvcbootstrapd (which originates the concept of detect strings) and in general usage as well. --- api-daemon/pvcapid/flaskapi.py | 8 +- client-cli/pvc/pvc.py | 8 +- docs/manuals/swagger.json | 4 +- node-daemon/pvcnoded/objects/CephInstance.py | 115 +++++++++++++++++-- 4 files changed, 120 insertions(+), 15 deletions(-) diff --git a/api-daemon/pvcapid/flaskapi.py b/api-daemon/pvcapid/flaskapi.py index 8ced00de..b62721a5 100755 --- a/api-daemon/pvcapid/flaskapi.py +++ b/api-daemon/pvcapid/flaskapi.py @@ -3849,7 +3849,7 @@ class API_Storage_Ceph_OSDDB_Root(Resource): { "name": "device", "required": True, - "helptext": "A valid device must be specified.", + "helptext": "A valid device or detect string must be specified.", }, ] ) @@ -3871,7 +3871,7 @@ class API_Storage_Ceph_OSDDB_Root(Resource): name: device type: string required: true - description: The block device (e.g. "/dev/sdb", "/dev/disk/by-path/...", etc.) to create the OSD DB volume group on + description: The block device (e.g. "/dev/sdb", "/dev/disk/by-path/...", etc.) or detect string ("detect:NAME:SIZE:ID") to create the OSD DB volume group on responses: 200: description: OK @@ -4003,7 +4003,7 @@ class API_Storage_Ceph_OSD_Root(Resource): { "name": "device", "required": True, - "helptext": "A valid device must be specified.", + "helptext": "A valid device or detect string must be specified.", }, { "name": "weight", @@ -4040,7 +4040,7 @@ class API_Storage_Ceph_OSD_Root(Resource): name: device type: string required: true - description: The block device (e.g. "/dev/sdb", "/dev/disk/by-path/...", etc.) to create the OSD on + description: The block device (e.g. "/dev/sdb", "/dev/disk/by-path/...", etc.) or detect string ("detect:NAME:SIZE:ID") to create the OSD on - in: query name: weight type: number diff --git a/client-cli/pvc/pvc.py b/client-cli/pvc/pvc.py index e542377a..604f0ba4 100755 --- a/client-cli/pvc/pvc.py +++ b/client-cli/pvc/pvc.py @@ -3281,7 +3281,9 @@ def ceph_osd(): @cluster_req def ceph_osd_create_db_vg(node, device, confirm_flag): """ - Create a new Ceph OSD database volume group on node NODE with block device DEVICE. DEVICE must be a valid raw block device, one of e.g. '/dev/sda', '/dev/nvme0n1', '/dev/disk/by-path/...', '/dev/disk/by-id/...', etc. Using partitions is not supported. + Create a new Ceph OSD database volume group on node NODE with block device DEVICE. DEVICE must be a valid raw block device (e.g. '/dev/nvme0n1', '/dev/disk/by-path/...') or a "detect" string. Using partitions is not supported. + + A "detect" string is a string in the form "detect:::". Detect strings allow for automatic determination of Linux block device paths from known basic information about disks by leveraging "lsscsi" on the target host. The "NAME" should be some descriptive identifier, for instance the manufacturer (e.g. "INTEL"), the "HUMAN-SIZE" should be the labeled human-readable size of the device (e.g. "480GB", "1.92TB"), and "ID" specifies the Nth 0-indexed device which matches the "NAME" and "HUMAN-SIZE" values (e.g. "2" would match the third device with the corresponding "NAME" and "HUMAN-SIZE"). When matching against sizes, there is +/- 3% flexibility to account for base-1000 vs. base-1024 differences and rounding errors. The "NAME" may contain whitespace but if so the entire detect string should be quoted, and is case-insensitive. More information about detect strings can be found in the pvcbootstrapd manual. This volume group will be used for Ceph OSD database and WAL functionality if the '--ext-db' flag is passed to newly-created OSDs during 'pvc storage osd add'. DEVICE should be an extremely fast SSD device (NVMe, Intel Optane, etc.) which is significantly faster than the normal OSD disks and with very high write endurance. Only one OSD database volume group on a single physical device is supported per node, so it must be fast and large enough to act as an effective OSD database device for all OSDs on the node. Attempting to add additional database volume groups after the first will fail. """ @@ -3343,7 +3345,9 @@ def ceph_osd_create_db_vg(node, device, confirm_flag): @cluster_req def ceph_osd_add(node, device, weight, ext_db_flag, ext_db_ratio, confirm_flag): """ - Add a new Ceph OSD on node NODE with block device DEVICE. DEVICE must be a valid raw block device, one of e.g. '/dev/sda', '/dev/nvme0n1', '/dev/disk/by-path/...', '/dev/disk/by-id/...', etc. Using partitions is not supported. + Add a new Ceph OSD on node NODE with block device DEVICE. DEVICE must be a valid raw block device (e.g. '/dev/sda', '/dev/nvme0n1', '/dev/disk/by-path/...', '/dev/disk/by-id/...') or a "detect" string. Using partitions is not supported. + + A "detect" string is a string in the form "detect:::". Detect strings allow for automatic determination of Linux block device paths from known basic information about disks by leveraging "lsscsi" on the target host. The "NAME" should be some descriptive identifier, for instance the manufacturer (e.g. "INTEL"), the "HUMAN-SIZE" should be the labeled human-readable size of the device (e.g. "480GB", "1.92TB"), and "ID" specifies the Nth 0-indexed device which matches the "NAME" and "HUMAN-SIZE" values (e.g. "2" would match the third device with the corresponding "NAME" and "HUMAN-SIZE"). When matching against sizes, there is +/- 3% flexibility to account for base-1000 vs. base-1024 differences and rounding errors. The "NAME" may contain whitespace but if so the entire detect string should be quoted, and is case-insensitive. More information about detect strings can be found in the pvcbootstrapd manual. The weight of an OSD should reflect the ratio of the OSD to other OSDs in the storage cluster. For example, if all OSDs are the same size as recommended for PVC, 1 (the default) is a valid weight so that all are treated identically. If a new OSD is added later which is 4x the size of the existing OSDs, the new OSD's weight should then be 4 to tell the cluster that 4x the data can be stored on the OSD. Weights can also be tweaked for performance reasons, since OSDs with more data will incur more I/O load. For more information about CRUSH weights, please see the Ceph documentation. diff --git a/docs/manuals/swagger.json b/docs/manuals/swagger.json index cc6320f5..d7d8bd0e 100644 --- a/docs/manuals/swagger.json +++ b/docs/manuals/swagger.json @@ -5034,7 +5034,7 @@ "type": "string" }, { - "description": "The block device (e.g. \"/dev/sdb\", \"/dev/disk/by-path/...\", etc.) to create the OSD on", + "description": "The block device (e.g. \"/dev/sdb\", \"/dev/disk/by-path/...\", etc.) or detect string (\"detect:NAME:SIZE:ID\") to create the OSD on", "in": "query", "name": "device", "required": true, @@ -5194,7 +5194,7 @@ "type": "string" }, { - "description": "The block device (e.g. \"/dev/sdb\", \"/dev/disk/by-path/...\", etc.) to create the OSD DB volume group on", + "description": "The block device (e.g. \"/dev/sdb\", \"/dev/disk/by-path/...\", etc.) or detect string (\"detect:NAME:SIZE:ID\") to create the OSD DB volume group on", "in": "query", "name": "device", "required": true, diff --git a/node-daemon/pvcnoded/objects/CephInstance.py b/node-daemon/pvcnoded/objects/CephInstance.py index 66f8b1e4..11323f5d 100644 --- a/node-daemon/pvcnoded/objects/CephInstance.py +++ b/node-daemon/pvcnoded/objects/CephInstance.py @@ -26,7 +26,76 @@ import psutil import daemon_lib.common as common from distutils.util import strtobool -from re import search +from re import search, match, sub + + +def get_detect_device(detect_string): + """ + Parses a "detect:" string into a normalized block device path using lsscsi. + + A detect string is formatted "detect:::", where + NAME is some unique identifier in lsscsi, SIZE is a human-readable + size value to within +/- 3% of the real size of the device, and + ID is the Nth (0-indexed) matching entry of that NAME and SIZE. + """ + _, name, size, idd = detect_string.split(":") + if _ != "detect": + return None + + retcode, stdout, stderr = common.run_os_command("lsscsi -s") + if retcode: + print(f"Failed to run lsscsi: {stderr}") + return None + + # Get valid lines + lsscsi_lines_raw = stdout.split("\n") + lsscsi_lines = list() + for line in lsscsi_lines_raw: + if not line: + continue + split_line = line.split() + if split_line[1] != "disk": + continue + lsscsi_lines.append(line) + + # Handle size determination (+/- 3%) + lsscsi_sizes = set() + for line in lsscsi_lines: + lsscsi_sizes.add(split_line[-1]) + for l_size in lsscsi_sizes: + b_size = float(sub(r"\D.", "", size)) + t_size = float(sub(r"\D.", "", l_size)) + + plusthreepct = t_size * 1.03 + minusthreepct = t_size * 0.97 + + if b_size > minusthreepct and b_size < plusthreepct: + size = l_size + break + + blockdev = None + matches = list() + for idx, line in enumerate(lsscsi_lines): + # Skip non-disk entries + if line.split()[1] != "disk": + continue + # Skip if name is not contained in the line (case-insensitive) + if name.lower() not in line.lower(): + continue + # Skip if the size does not match + if size != line.split()[-1]: + continue + # Get our blockdev and append to the list + matches.append(line.split()[-2]) + + blockdev = None + # Find the blockdev at index {idd} + for idx, _blockdev in enumerate(matches): + if int(idx) == int(idd): + blockdev = _blockdev + break + + return blockdev class CephOSDInstance(object): @@ -76,6 +145,22 @@ class CephOSDInstance(object): def add_osd( zkhandler, logger, node, device, weight, ext_db_flag=False, ext_db_ratio=0.05 ): + # Handle a detect device if that is passed + if match(r"detect:", device): + ddevice = get_detect_device(device) + if ddevice is None: + logger.out( + f"Failed to determine block device from detect string {device}", + state="e", + ) + return False + else: + logger.out( + f"Determined block device {ddevice} from detect string {device}", + state="i", + ) + device = ddevice + # We are ready to create a new OSD on this node logger.out("Creating new OSD disk on block device {}".format(device), state="i") try: @@ -354,17 +439,33 @@ class CephOSDInstance(object): @staticmethod def add_db_vg(zkhandler, logger, device): + # Check if an existsing volume group exists + retcode, stdout, stderr = common.run_os_command("vgdisplay osd-db") + if retcode != 5: + logger.out('Ceph OSD database VG "osd-db" already exists', state="e") + return False + + # Handle a detect device if that is passed + if match(r"detect:", device): + ddevice = get_detect_device(device) + if ddevice is None: + logger.out( + f"Failed to determine block device from detect string {device}", + state="e", + ) + return False + else: + logger.out( + f"Determined block device {ddevice} from detect string {device}", + state="i", + ) + device = ddevice + logger.out( "Creating new OSD database volume group on block device {}".format(device), state="i", ) try: - # 0. Check if an existsing volume group exists - retcode, stdout, stderr = common.run_os_command("vgdisplay osd-db") - if retcode != 5: - logger.out('Ceph OSD database VG "osd-db" already exists', state="e") - return False - # 1. Create an empty partition table logger.out( "Creating partitions on block device {}".format(device), state="i"