From a438a4155ac460404ed430df3bbc0f312903aa9b Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Sun, 26 Sep 2021 00:08:54 -0400 Subject: [PATCH] Fix OSD creation for partition paths and fix gdisk The previous implementation did not work with /dev/nvme devices or any /dev/disk/by-* devices due to some logical failures in the partition naming scheme, so fix these, and be explicit about what is supported in the PVC CLI command output. The 'echo | gdisk' implementation of partition creation also did not work due to limitations of subprocess.run; instead, use sgdisk which allows these commands to be written out explicitly and is included in the same package as gdisk. --- client-cli/pvc/pvc.py | 4 +-- node-daemon/pvcnoded/objects/CephInstance.py | 32 +++++++++++++++++--- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/client-cli/pvc/pvc.py b/client-cli/pvc/pvc.py index 2b55751c..2d1f7209 100755 --- a/client-cli/pvc/pvc.py +++ b/client-cli/pvc/pvc.py @@ -2601,7 +2601,7 @@ def ceph_osd(): @cluster_req def ceph_osd_create_db_vg(node, device, confirm_flag): """ - Create a new Ceph OSD database volume group on node NODE with block device DEVICE. + Create a new Ceph OSD database volume group on node NODE with block device DEVICE. DEVICE must be a valid raw block device, one of e.g. '/dev/sda', '/dev/nvme0n1', '/dev/disk/by-path/...', '/dev/disk/by-id/...', etc. Using partitions is not supported. This volume group will be used for Ceph OSD database functionality if the '--ext-db' flag is passed to newly-created OSDs during 'pvc storage osd add'. DEVICE should be an extremely fast SSD device (NVMe, Intel Optane, etc.) which is significantly faster than the normal OSD disks and with very high write endurance. Only one OSD database volume group on a single physical device is supported per node, so it must be fast and large enough to act as an effective OSD database device for all OSDs on the node; the database volume for each OSD is fixed to 5% of the OSD's size. Attempting to add additional database volume groups after the first will fail. """ @@ -2648,7 +2648,7 @@ def ceph_osd_create_db_vg(node, device, confirm_flag): @cluster_req def ceph_osd_add(node, device, weight, ext_db_flag, ext_db_ratio, confirm_flag): """ - Add a new Ceph OSD on node NODE with block device DEVICE. + Add a new Ceph OSD on node NODE with block device DEVICE. DEVICE must be a valid raw block device, one of e.g. '/dev/sda', '/dev/nvme0n1', '/dev/disk/by-path/...', '/dev/disk/by-id/...', etc. Using partitions is not supported. If '--ext-db' is specified, the existing OSD database volume group on NODE will be used; it must exist first or OSD creation will fail. See the 'pvc storage osd create-db-vg' command for more details. diff --git a/node-daemon/pvcnoded/objects/CephInstance.py b/node-daemon/pvcnoded/objects/CephInstance.py index 5831e4ff..b6ff4b0b 100644 --- a/node-daemon/pvcnoded/objects/CephInstance.py +++ b/node-daemon/pvcnoded/objects/CephInstance.py @@ -26,6 +26,7 @@ import psutil import daemon_lib.common as common from distutils.util import strtobool +from re import match class CephOSDInstance(object): @@ -318,20 +319,41 @@ class CephOSDInstance(object): return False # 1. Create an empty partition table - logger.out('Creating empty partiton table on block device {}'.format(device), state='i') + logger.out('Creating partitons on block device {}'.format(device), state='i') retcode, stdout, stderr = common.run_os_command( - 'echo -e "o\ny\nn\n\n\n\n8e00\nw\ny\n" | sudo gdisk {}'.format(device) + 'sgdisk --clear {}'.format(device) ) if retcode: - print('gdisk partitioning') + print('sgdisk create partition table') print(stdout) print(stderr) raise + retcode, stdout, stderr = common.run_os_command( + 'sgdisk --new 1:: --typecore 1:8e00 {}'.format(device) + ) + if retcode: + print('sgdisk create pv partition') + print(stdout) + print(stderr) + raise + + # Handle the partition ID portion + if match(r'by-path', device) or match(r'by-id', device): + # /dev/disk/by-path/pci-0000:03:00.0-scsi-0:1:0:0 -> pci-0000:03:00.0-scsi-0:1:0:0-part1 + partition = '{}-part1'.format(device) + elif match(r'nvme', device): + # /dev/nvme0n1 -> nvme0n1p1 + partition = '{}p1'.format(device) + else: + # /dev/sda -> sda1 + # No other '/dev/disk/by-*' types are valid for raw block devices anyways + partition = '{}1'.format(device) + # 2. Create the PV logger.out('Creating PV on block device {}1'.format(device), state='i') retcode, stdout, stderr = common.run_os_command( - 'pvcreate --force {}1'.format(device) + 'pvcreate --force {}'.format(partition) ) if retcode: print('pv creation') @@ -342,7 +364,7 @@ class CephOSDInstance(object): # 2. Create the VG (named 'osd-db') logger.out('Creating VG "osd-db" on block device {}1'.format(device), state='i') retcode, stdout, stderr = common.run_os_command( - 'vgcreate --force osd-db {}1'.format(device) + 'vgcreate --force osd-db {}'.format(partition) ) if retcode: print('vg creation')