Handle detect strings as arguments for blockdevs

Allows specifying blockdevs in the OSD and OSD-DB addition commands as
detect strings rather than actual block device paths. This provides
greater flexibility for automation with pvcbootstrapd (which originates
the concept of detect strings) and in general usage as well.
This commit is contained in:
Joshua Boniface 2021-12-24 15:18:38 -05:00
parent 9f122e916f
commit abc23ebb18
4 changed files with 120 additions and 15 deletions

View File

@ -3849,7 +3849,7 @@ class API_Storage_Ceph_OSDDB_Root(Resource):
{ {
"name": "device", "name": "device",
"required": True, "required": True,
"helptext": "A valid device must be specified.", "helptext": "A valid device or detect string must be specified.",
}, },
] ]
) )
@ -3871,7 +3871,7 @@ class API_Storage_Ceph_OSDDB_Root(Resource):
name: device name: device
type: string type: string
required: true required: true
description: The block device (e.g. "/dev/sdb", "/dev/disk/by-path/...", etc.) to create the OSD DB volume group on description: The block device (e.g. "/dev/sdb", "/dev/disk/by-path/...", etc.) or detect string ("detect:NAME:SIZE:ID") to create the OSD DB volume group on
responses: responses:
200: 200:
description: OK description: OK
@ -4003,7 +4003,7 @@ class API_Storage_Ceph_OSD_Root(Resource):
{ {
"name": "device", "name": "device",
"required": True, "required": True,
"helptext": "A valid device must be specified.", "helptext": "A valid device or detect string must be specified.",
}, },
{ {
"name": "weight", "name": "weight",
@ -4040,7 +4040,7 @@ class API_Storage_Ceph_OSD_Root(Resource):
name: device name: device
type: string type: string
required: true required: true
description: The block device (e.g. "/dev/sdb", "/dev/disk/by-path/...", etc.) to create the OSD on description: The block device (e.g. "/dev/sdb", "/dev/disk/by-path/...", etc.) or detect string ("detect:NAME:SIZE:ID") to create the OSD on
- in: query - in: query
name: weight name: weight
type: number type: number

View File

@ -3281,7 +3281,9 @@ def ceph_osd():
@cluster_req @cluster_req
def ceph_osd_create_db_vg(node, device, confirm_flag): def ceph_osd_create_db_vg(node, device, confirm_flag):
""" """
Create a new Ceph OSD database volume group on node NODE with block device DEVICE. DEVICE must be a valid raw block device, one of e.g. '/dev/sda', '/dev/nvme0n1', '/dev/disk/by-path/...', '/dev/disk/by-id/...', etc. Using partitions is not supported. Create a new Ceph OSD database volume group on node NODE with block device DEVICE. DEVICE must be a valid raw block device (e.g. '/dev/nvme0n1', '/dev/disk/by-path/...') or a "detect" string. Using partitions is not supported.
A "detect" string is a string in the form "detect:<NAME>:<HUMAN-SIZE>:<ID>". Detect strings allow for automatic determination of Linux block device paths from known basic information about disks by leveraging "lsscsi" on the target host. The "NAME" should be some descriptive identifier, for instance the manufacturer (e.g. "INTEL"), the "HUMAN-SIZE" should be the labeled human-readable size of the device (e.g. "480GB", "1.92TB"), and "ID" specifies the Nth 0-indexed device which matches the "NAME" and "HUMAN-SIZE" values (e.g. "2" would match the third device with the corresponding "NAME" and "HUMAN-SIZE"). When matching against sizes, there is +/- 3% flexibility to account for base-1000 vs. base-1024 differences and rounding errors. The "NAME" may contain whitespace but if so the entire detect string should be quoted, and is case-insensitive. More information about detect strings can be found in the pvcbootstrapd manual.
This volume group will be used for Ceph OSD database and WAL functionality if the '--ext-db' flag is passed to newly-created OSDs during 'pvc storage osd add'. DEVICE should be an extremely fast SSD device (NVMe, Intel Optane, etc.) which is significantly faster than the normal OSD disks and with very high write endurance. Only one OSD database volume group on a single physical device is supported per node, so it must be fast and large enough to act as an effective OSD database device for all OSDs on the node. Attempting to add additional database volume groups after the first will fail. This volume group will be used for Ceph OSD database and WAL functionality if the '--ext-db' flag is passed to newly-created OSDs during 'pvc storage osd add'. DEVICE should be an extremely fast SSD device (NVMe, Intel Optane, etc.) which is significantly faster than the normal OSD disks and with very high write endurance. Only one OSD database volume group on a single physical device is supported per node, so it must be fast and large enough to act as an effective OSD database device for all OSDs on the node. Attempting to add additional database volume groups after the first will fail.
""" """
@ -3343,7 +3345,9 @@ def ceph_osd_create_db_vg(node, device, confirm_flag):
@cluster_req @cluster_req
def ceph_osd_add(node, device, weight, ext_db_flag, ext_db_ratio, confirm_flag): def ceph_osd_add(node, device, weight, ext_db_flag, ext_db_ratio, confirm_flag):
""" """
Add a new Ceph OSD on node NODE with block device DEVICE. DEVICE must be a valid raw block device, one of e.g. '/dev/sda', '/dev/nvme0n1', '/dev/disk/by-path/...', '/dev/disk/by-id/...', etc. Using partitions is not supported. Add a new Ceph OSD on node NODE with block device DEVICE. DEVICE must be a valid raw block device (e.g. '/dev/sda', '/dev/nvme0n1', '/dev/disk/by-path/...', '/dev/disk/by-id/...') or a "detect" string. Using partitions is not supported.
A "detect" string is a string in the form "detect:<NAME>:<HUMAN-SIZE>:<ID>". Detect strings allow for automatic determination of Linux block device paths from known basic information about disks by leveraging "lsscsi" on the target host. The "NAME" should be some descriptive identifier, for instance the manufacturer (e.g. "INTEL"), the "HUMAN-SIZE" should be the labeled human-readable size of the device (e.g. "480GB", "1.92TB"), and "ID" specifies the Nth 0-indexed device which matches the "NAME" and "HUMAN-SIZE" values (e.g. "2" would match the third device with the corresponding "NAME" and "HUMAN-SIZE"). When matching against sizes, there is +/- 3% flexibility to account for base-1000 vs. base-1024 differences and rounding errors. The "NAME" may contain whitespace but if so the entire detect string should be quoted, and is case-insensitive. More information about detect strings can be found in the pvcbootstrapd manual.
The weight of an OSD should reflect the ratio of the OSD to other OSDs in the storage cluster. For example, if all OSDs are the same size as recommended for PVC, 1 (the default) is a valid weight so that all are treated identically. If a new OSD is added later which is 4x the size of the existing OSDs, the new OSD's weight should then be 4 to tell the cluster that 4x the data can be stored on the OSD. Weights can also be tweaked for performance reasons, since OSDs with more data will incur more I/O load. For more information about CRUSH weights, please see the Ceph documentation. The weight of an OSD should reflect the ratio of the OSD to other OSDs in the storage cluster. For example, if all OSDs are the same size as recommended for PVC, 1 (the default) is a valid weight so that all are treated identically. If a new OSD is added later which is 4x the size of the existing OSDs, the new OSD's weight should then be 4 to tell the cluster that 4x the data can be stored on the OSD. Weights can also be tweaked for performance reasons, since OSDs with more data will incur more I/O load. For more information about CRUSH weights, please see the Ceph documentation.

View File

@ -5034,7 +5034,7 @@
"type": "string" "type": "string"
}, },
{ {
"description": "The block device (e.g. \"/dev/sdb\", \"/dev/disk/by-path/...\", etc.) to create the OSD on", "description": "The block device (e.g. \"/dev/sdb\", \"/dev/disk/by-path/...\", etc.) or detect string (\"detect:NAME:SIZE:ID\") to create the OSD on",
"in": "query", "in": "query",
"name": "device", "name": "device",
"required": true, "required": true,
@ -5194,7 +5194,7 @@
"type": "string" "type": "string"
}, },
{ {
"description": "The block device (e.g. \"/dev/sdb\", \"/dev/disk/by-path/...\", etc.) to create the OSD DB volume group on", "description": "The block device (e.g. \"/dev/sdb\", \"/dev/disk/by-path/...\", etc.) or detect string (\"detect:NAME:SIZE:ID\") to create the OSD DB volume group on",
"in": "query", "in": "query",
"name": "device", "name": "device",
"required": true, "required": true,

View File

@ -26,7 +26,76 @@ import psutil
import daemon_lib.common as common import daemon_lib.common as common
from distutils.util import strtobool from distutils.util import strtobool
from re import search from re import search, match, sub
def get_detect_device(detect_string):
"""
Parses a "detect:" string into a normalized block device path using lsscsi.
A detect string is formatted "detect:<NAME>:<SIZE>:<ID>", where
NAME is some unique identifier in lsscsi, SIZE is a human-readable
size value to within +/- 3% of the real size of the device, and
ID is the Nth (0-indexed) matching entry of that NAME and SIZE.
"""
_, name, size, idd = detect_string.split(":")
if _ != "detect":
return None
retcode, stdout, stderr = common.run_os_command("lsscsi -s")
if retcode:
print(f"Failed to run lsscsi: {stderr}")
return None
# Get valid lines
lsscsi_lines_raw = stdout.split("\n")
lsscsi_lines = list()
for line in lsscsi_lines_raw:
if not line:
continue
split_line = line.split()
if split_line[1] != "disk":
continue
lsscsi_lines.append(line)
# Handle size determination (+/- 3%)
lsscsi_sizes = set()
for line in lsscsi_lines:
lsscsi_sizes.add(split_line[-1])
for l_size in lsscsi_sizes:
b_size = float(sub(r"\D.", "", size))
t_size = float(sub(r"\D.", "", l_size))
plusthreepct = t_size * 1.03
minusthreepct = t_size * 0.97
if b_size > minusthreepct and b_size < plusthreepct:
size = l_size
break
blockdev = None
matches = list()
for idx, line in enumerate(lsscsi_lines):
# Skip non-disk entries
if line.split()[1] != "disk":
continue
# Skip if name is not contained in the line (case-insensitive)
if name.lower() not in line.lower():
continue
# Skip if the size does not match
if size != line.split()[-1]:
continue
# Get our blockdev and append to the list
matches.append(line.split()[-2])
blockdev = None
# Find the blockdev at index {idd}
for idx, _blockdev in enumerate(matches):
if int(idx) == int(idd):
blockdev = _blockdev
break
return blockdev
class CephOSDInstance(object): class CephOSDInstance(object):
@ -76,6 +145,22 @@ class CephOSDInstance(object):
def add_osd( def add_osd(
zkhandler, logger, node, device, weight, ext_db_flag=False, ext_db_ratio=0.05 zkhandler, logger, node, device, weight, ext_db_flag=False, ext_db_ratio=0.05
): ):
# Handle a detect device if that is passed
if match(r"detect:", device):
ddevice = get_detect_device(device)
if ddevice is None:
logger.out(
f"Failed to determine block device from detect string {device}",
state="e",
)
return False
else:
logger.out(
f"Determined block device {ddevice} from detect string {device}",
state="i",
)
device = ddevice
# We are ready to create a new OSD on this node # We are ready to create a new OSD on this node
logger.out("Creating new OSD disk on block device {}".format(device), state="i") logger.out("Creating new OSD disk on block device {}".format(device), state="i")
try: try:
@ -354,17 +439,33 @@ class CephOSDInstance(object):
@staticmethod @staticmethod
def add_db_vg(zkhandler, logger, device): def add_db_vg(zkhandler, logger, device):
# Check if an existsing volume group exists
retcode, stdout, stderr = common.run_os_command("vgdisplay osd-db")
if retcode != 5:
logger.out('Ceph OSD database VG "osd-db" already exists', state="e")
return False
# Handle a detect device if that is passed
if match(r"detect:", device):
ddevice = get_detect_device(device)
if ddevice is None:
logger.out(
f"Failed to determine block device from detect string {device}",
state="e",
)
return False
else:
logger.out(
f"Determined block device {ddevice} from detect string {device}",
state="i",
)
device = ddevice
logger.out( logger.out(
"Creating new OSD database volume group on block device {}".format(device), "Creating new OSD database volume group on block device {}".format(device),
state="i", state="i",
) )
try: try:
# 0. Check if an existsing volume group exists
retcode, stdout, stderr = common.run_os_command("vgdisplay osd-db")
if retcode != 5:
logger.out('Ceph OSD database VG "osd-db" already exists', state="e")
return False
# 1. Create an empty partition table # 1. Create an empty partition table
logger.out( logger.out(
"Creating partitions on block device {}".format(device), state="i" "Creating partitions on block device {}".format(device), state="i"