Add support for split OSD adds

Allows creating multiple OSDs on a single (NVMe) block device,
leveraging the "ceph-volume lvm batch" command. Replaces the previous
method of creating OSDs.

Also adds a new ZK item for each OSD indicating if it is split or not.
This commit is contained in:
Joshua Boniface 2023-11-01 21:17:38 -04:00
parent aa0b1f504f
commit 526a5f4a74
8 changed files with 281 additions and 197 deletions

View File

@ -4284,12 +4284,18 @@ class API_Storage_Ceph_OSD_Root(Resource):
{
"name": "ext_db",
"required": False,
"helptext": "Whether to use an external OSD DB LV device.",
},
{
"name": "ext_db_ratio",
"required": False,
"helptext": "Decimal size ratio of the external OSD DB LV device.",
},
{
"name": "split",
"required": False,
},
{
"name": "count",
"required": False,
},
]
)
@ -4327,6 +4333,16 @@ class API_Storage_Ceph_OSD_Root(Resource):
type: float
required: false
description: Decimal ratio of total OSD size for the external OSD DB LV device, default 0.05 (5%)
- in: query
name: split
type: boolean
required: false
description: Whether to split the block device into multiple OSDs (recommended for NVMe devices)
- in: query
name: count
type: integer
required: false
description: If {split}, how many OSDs to create on the block device; usually 2 or 4 depending on size
responses:
200:
description: OK
@ -4345,6 +4361,8 @@ class API_Storage_Ceph_OSD_Root(Resource):
reqargs.get("weight", None),
reqargs.get("ext_db", False),
float(reqargs.get("ext_db_ratio", 0.05)),
reqargs.get("split", False),
reqargs.get("count", 1),
)

View File

@ -1366,12 +1366,28 @@ def ceph_osd_db_vg_add(zkhandler, node, device):
@ZKConnection(config)
def ceph_osd_add(zkhandler, node, device, weight, ext_db_flag=False, ext_db_ratio=0.05):
def ceph_osd_add(
zkhandler,
node,
device,
weight,
ext_db_flag=False,
ext_db_ratio=0.05,
split_flag=False,
split_count=1,
):
"""
Add a Ceph OSD to the PVC Ceph storage cluster.
"""
retflag, retdata = pvc_ceph.add_osd(
zkhandler, node, device, weight, ext_db_flag, ext_db_ratio
zkhandler,
node,
device,
weight,
ext_db_flag,
ext_db_ratio,
split_flag,
split_count,
)
if retflag:

View File

@ -3411,8 +3411,17 @@ def cli_storage_osd_create_db_vg(node, device):
type=float,
help="Decimal ratio of the external database logical volume to the OSD size.",
)
@confirm_opt("Destroy all data on and create new OSD on node {node} device {device}")
def cli_storage_osd_add(node, device, weight, ext_db_flag, ext_db_ratio):
@click.option(
"-s",
"--split",
"split_count",
default=None,
show_default=False,
type=int,
help="Split an NVMe disk into this many OSDs",
)
@confirm_opt("Destroy all data on and create new OSD(s) on node {node} device {device}")
def cli_storage_osd_add(node, device, weight, ext_db_flag, ext_db_ratio, split_count):
"""
Add a new Ceph OSD on node NODE with block device DEVICE. DEVICE must be a valid block device path (e.g. '/dev/sda', '/dev/nvme0n1', '/dev/disk/by-path/...', '/dev/disk/by-id/...') or a "detect" string. Using partitions is not supported.
@ -3423,10 +3432,22 @@ def cli_storage_osd_add(node, device, weight, ext_db_flag, ext_db_ratio):
If '--ext-db' is specified, the OSD database and WAL will be placed on a new logical volume in NODE's OSD database volume group. An OSD database volume group must exist on the node or the OSD creation will fail. See the 'pvc storage osd create-db-vg' command for more details.
The default '--ext-db-ratio' of 0.05 (5%) is sufficient for most RBD workloads and OSD sizes, though this can be adjusted based on the sizes of the OSD(s) and the underlying database device. Ceph documentation recommends at least 0.02 (2%) for RBD use-cases, and higher values may improve WAL performance under write-heavy workloads with fewer OSDs per node.
For NVMe devices, it is recommended to split block device into multiple OSDs to provide better processing throughput. To do this, specify "-s"/"--split" and the number of OSDs to create on the block device. For most NVMe devices, the recommended value is 2 or 4, such that each OSD is at least 500GB. Numbers higher than 4 are not recommended. This is NOT RECOMMENDED for SATA SSDs.
"""
if split_count is not None:
split_flag = True
retcode, retmsg = pvc.lib.storage.ceph_osd_add(
CLI_CONFIG, node, device, weight, ext_db_flag, ext_db_ratio
CLI_CONFIG,
node,
device,
weight,
ext_db_flag,
ext_db_ratio,
split_flag,
split_count,
)
finish(retcode, retmsg)

View File

@ -231,12 +231,14 @@ def ceph_osd_list(config, limit):
return False, response.json().get("message", "")
def ceph_osd_add(config, node, device, weight, ext_db_flag, ext_db_ratio):
def ceph_osd_add(
config, node, device, weight, ext_db_flag, ext_db_ratio, split_flag, split_count
):
"""
Add new Ceph OSD
API endpoint: POST /api/v1/storage/ceph/osd
API arguments: node={node}, device={device}, weight={weight}, ext_db={ext_db_flag}, ext_db_ratio={ext_db_ratio}
API arguments: node={node}, device={device}, weight={weight}, ext_db={ext_db_flag}, ext_db_ratio={ext_db_ratio}, split={split_flag}, count={split_count}
API schema: {"message":"{data}"}
"""
params = {
@ -245,6 +247,8 @@ def ceph_osd_add(config, node, device, weight, ext_db_flag, ext_db_ratio):
"weight": weight,
"ext_db": ext_db_flag,
"ext_db_ratio": ext_db_ratio,
"split": split_flag,
"count": split_count,
}
response = call_api(config, "post", "/storage/ceph/osd", params=params)

View File

@ -211,6 +211,7 @@ def getOSDInformation(zkhandler, osd_id):
# Get the devices
osd_node = zkhandler.read(("osd.node", osd_id))
osd_device = zkhandler.read(("osd.device", osd_id))
osd_is_split = zkhandler.read(("osd.is_split", osd_id))
osd_db_device = zkhandler.read(("osd.db_device", osd_id))
# Parse the stats data
osd_stats_raw = zkhandler.read(("osd.stats", osd_id))
@ -220,6 +221,7 @@ def getOSDInformation(zkhandler, osd_id):
"id": osd_id,
"node": osd_node,
"device": osd_device,
"is_split": osd_is_split,
"db_device": osd_db_device,
"stats": osd_stats,
}
@ -266,7 +268,16 @@ def add_osd_db_vg(zkhandler, node, device):
# OSD actions use the /cmd/ceph pipe
# These actions must occur on the specific node they reference
def add_osd(zkhandler, node, device, weight, ext_db_flag=False, ext_db_ratio=0.05):
def add_osd(
zkhandler,
node,
device,
weight,
ext_db_flag=False,
ext_db_ratio=0.05,
split_flag=False,
split_count=1,
):
# Verify the target node exists
if not common.verifyNode(zkhandler, node):
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(
@ -284,8 +295,8 @@ def add_osd(zkhandler, node, device, weight, ext_db_flag=False, ext_db_ratio=0.0
)
# Tell the cluster to create a new OSD for the host
add_osd_string = "osd_add {},{},{},{},{}".format(
node, device, weight, ext_db_flag, ext_db_ratio
add_osd_string = "osd_add {},{},{},{},{},{},{}".format(
node, device, weight, ext_db_flag, ext_db_ratio, split_flag, split_count
)
zkhandler.write([("base.cmd.ceph", add_osd_string)])
# Wait 1/2 second for the cluster to get the message and start working
@ -295,14 +306,10 @@ def add_osd(zkhandler, node, device, weight, ext_db_flag=False, ext_db_ratio=0.0
try:
result = zkhandler.read("base.cmd.ceph").split()[0]
if result == "success-osd_add":
message = 'Created new OSD with block device "{}" on node "{}".'.format(
device, node
)
message = f'Created {split_count} new OSD(s) on node "{node}" block device "{device}"'
success = True
else:
message = (
"ERROR: Failed to create new OSD; check node logs for details."
)
message = "ERROR: Failed to create OSD(s); check node logs for details."
success = False
except Exception:
message = "ERROR: Command ignored by node."

View File

@ -0,0 +1 @@
{"version": "10", "root": "", "base": {"root": "", "schema": "/schema", "schema.version": "/schema/version", "config": "/config", "config.maintenance": "/config/maintenance", "config.primary_node": "/config/primary_node", "config.primary_node.sync_lock": "/config/primary_node/sync_lock", "config.upstream_ip": "/config/upstream_ip", "config.migration_target_selector": "/config/migration_target_selector", "cmd": "/cmd", "cmd.node": "/cmd/nodes", "cmd.domain": "/cmd/domains", "cmd.ceph": "/cmd/ceph", "logs": "/logs", "node": "/nodes", "domain": "/domains", "network": "/networks", "storage": "/ceph", "storage.health": "/ceph/health", "storage.util": "/ceph/util", "osd": "/ceph/osds", "pool": "/ceph/pools", "volume": "/ceph/volumes", "snapshot": "/ceph/snapshots"}, "logs": {"node": "", "messages": "/messages"}, "node": {"name": "", "keepalive": "/keepalive", "mode": "/daemonmode", "data.active_schema": "/activeschema", "data.latest_schema": "/latestschema", "data.static": "/staticdata", "data.pvc_version": "/pvcversion", "running_domains": "/runningdomains", "count.provisioned_domains": "/domainscount", "count.networks": "/networkscount", "state.daemon": "/daemonstate", "state.router": "/routerstate", "state.domain": "/domainstate", "cpu.load": "/cpuload", "vcpu.allocated": "/vcpualloc", "memory.total": "/memtotal", "memory.used": "/memused", "memory.free": "/memfree", "memory.allocated": "/memalloc", "memory.provisioned": "/memprov", "ipmi.hostname": "/ipmihostname", "ipmi.username": "/ipmiusername", "ipmi.password": "/ipmipassword", "sriov": "/sriov", "sriov.pf": "/sriov/pf", "sriov.vf": "/sriov/vf", "monitoring.plugins": "/monitoring_plugins", "monitoring.data": "/monitoring_data", "monitoring.health": "/monitoring_health"}, "monitoring_plugin": {"name": "", "last_run": "/last_run", "health_delta": "/health_delta", "message": "/message", "data": "/data", "runtime": "/runtime"}, "sriov_pf": {"phy": "", "mtu": "/mtu", "vfcount": "/vfcount"}, "sriov_vf": {"phy": "", "pf": "/pf", "mtu": "/mtu", "mac": "/mac", "phy_mac": "/phy_mac", "config": "/config", "config.vlan_id": "/config/vlan_id", "config.vlan_qos": "/config/vlan_qos", "config.tx_rate_min": "/config/tx_rate_min", "config.tx_rate_max": "/config/tx_rate_max", "config.spoof_check": "/config/spoof_check", "config.link_state": "/config/link_state", "config.trust": "/config/trust", "config.query_rss": "/config/query_rss", "pci": "/pci", "pci.domain": "/pci/domain", "pci.bus": "/pci/bus", "pci.slot": "/pci/slot", "pci.function": "/pci/function", "used": "/used", "used_by": "/used_by"}, "domain": {"name": "", "xml": "/xml", "state": "/state", "profile": "/profile", "stats": "/stats", "node": "/node", "last_node": "/lastnode", "failed_reason": "/failedreason", "storage.volumes": "/rbdlist", "console.log": "/consolelog", "console.vnc": "/vnc", "meta.autostart": "/node_autostart", "meta.migrate_method": "/migration_method", "meta.node_selector": "/node_selector", "meta.node_limit": "/node_limit", "meta.tags": "/tags", "migrate.sync_lock": "/migrate_sync_lock"}, "tag": {"name": "", "type": "/type", "protected": "/protected"}, "network": {"vni": "", "type": "/nettype", "mtu": "/mtu", "rule": "/firewall_rules", "rule.in": "/firewall_rules/in", "rule.out": "/firewall_rules/out", "nameservers": "/name_servers", "domain": "/domain", "reservation": "/dhcp4_reservations", "lease": "/dhcp4_leases", "ip4.gateway": "/ip4_gateway", "ip4.network": "/ip4_network", "ip4.dhcp": "/dhcp4_flag", "ip4.dhcp_start": "/dhcp4_start", "ip4.dhcp_end": "/dhcp4_end", "ip6.gateway": "/ip6_gateway", "ip6.network": "/ip6_network", "ip6.dhcp": "/dhcp6_flag"}, "reservation": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname"}, "lease": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname", "expiry": "/expiry", "client_id": "/clientid"}, "rule": {"description": "", "rule": "/rule", "order": "/order"}, "osd": {"id": "", "node": "/node", "device": "/device", "db_device": "/db_device", "fsid": "/fsid", "ofsid": "/fsid/osd", "cfsid": "/fsid/cluster", "lvm": "/lvm", "vg": "/lvm/vg", "lv": "/lvm/lv", "is_split": "/is_split", "stats": "/stats"}, "pool": {"name": "", "pgs": "/pgs", "tier": "/tier", "stats": "/stats"}, "volume": {"name": "", "stats": "/stats"}, "snapshot": {"name": "", "stats": "/stats"}}

View File

@ -540,7 +540,7 @@ class ZKHandler(object):
#
class ZKSchema(object):
# Current version
_version = 9
_version = 10
# Root for doing nested keys
_schema_root = ""
@ -719,6 +719,7 @@ class ZKSchema(object):
"lvm": "/lvm",
"vg": "/lvm/vg",
"lv": "/lvm/lv",
"is_split": "/is_split",
"stats": "/stats",
},
# The schema of an individual pool entry (/ceph/pools/{pool_name})
@ -963,7 +964,9 @@ class ZKSchema(object):
kpath = f"{elem}.{ikey}"
# Validate that the key exists for that child
if not zkhandler.zk_conn.exists(self.path(kpath, child)):
if elem == "pool" and ikey == "tier":
if elem == "osd" and ikey == "is_split":
default_data = "False"
elif elem == "pool" and ikey == "tier":
default_data = "default"
else:
default_data = ""

View File

@ -26,6 +26,7 @@ import daemon_lib.common as common
from distutils.util import strtobool
from re import search, match, sub
from json import loads as jloads
def get_detect_device(detect_string):
@ -260,7 +261,15 @@ class CephOSDInstance(object):
@staticmethod
def add_osd(
zkhandler, logger, node, device, weight, ext_db_flag=False, ext_db_ratio=0.05
zkhandler,
logger,
node,
device,
weight,
ext_db_flag=False,
ext_db_ratio=0.05,
split_device=False,
split_count=1,
):
# Handle a detect device if that is passed
if match(r"detect:", device):
@ -278,177 +287,185 @@ class CephOSDInstance(object):
)
device = ddevice
# We are ready to create a new OSD on this node
logger.out("Creating new OSD disk on block device {}".format(device), state="i")
if split_device and split_count > 1:
split_flag = f"--osds-per-device {split_count}"
logger.out(
f"Creating {split_count} new OSD disks on block device {device}",
state="i",
)
else:
split_flag = ""
logger.out(f"Creating 1 new OSD disk on block device {device}", state="i")
if "nvme" in device:
class_flag = "--crush-device-class nvme"
else:
class_flag = "--crush-device-class ssd"
try:
# 1. Create an OSD; we do this so we know what ID will be gen'd
retcode, stdout, stderr = common.run_os_command("ceph osd create")
if retcode:
print("ceph osd create")
print(stdout)
print(stderr)
raise Exception
osd_id = stdout.rstrip()
# 2. Remove that newly-created OSD
# 1. Zap the block device
logger.out(f"Zapping disk {device}", state="i")
retcode, stdout, stderr = common.run_os_command(
"ceph osd rm {}".format(osd_id)
f"ceph-volume lvm zap --destroy {device}"
)
if retcode:
print("ceph osd rm")
print(stdout)
print(stderr)
logger.out("Failed: ceph-volume lvm zap", state="e")
logger.out(stdout, state="d")
logger.out(stderr, state="d")
raise Exception
# 3a. Zap the disk to ensure it is ready to go
logger.out("Zapping disk {}".format(device), state="i")
# 2. Prepare the OSD(s)
logger.out(f"Preparing OSD(s) on disk {device}", state="i")
retcode, stdout, stderr = common.run_os_command(
"ceph-volume lvm zap --destroy {}".format(device)
f"ceph-volume lvm batch --yes --prepare --bluestore {split_flag} {class_flag} {device}"
)
if retcode:
print("ceph-volume lvm zap")
print(stdout)
print(stderr)
logger.out("Failed: ceph-volume lvm batch", state="e")
logger.out(stdout, state="d")
logger.out(stderr, state="d")
raise Exception
dev_flags = "--data {}".format(device)
# 3. Get the list of created OSDs on the device (initial pass)
logger.out(f"Querying OSD(s) on disk {device}", state="i")
retcode, stdout, stderr = common.run_os_command(
f"ceph-volume lvm list --format json {device}"
)
if retcode:
logger.out("Failed: ceph-volume lvm list", state="e")
logger.out(stdout, state="d")
logger.out(stderr, state="d")
raise Exception
# 3b. Prepare the logical volume if ext_db_flag
created_osds = jloads(stdout)
# 4. Prepare the WAL and DB devices
if ext_db_flag:
_, osd_size_bytes, _ = common.run_os_command(
"blockdev --getsize64 {}".format(device)
for created_osd in created_osds:
# 4a. Get the OSD FSID and ID from the details
osd_details = created_osds[created_osd][0]
osd_fsid = osd_details["tags"]["ceph.osd_fsid"]
osd_id = osd_details["tags"]["ceph.osd_id"]
osd_lv = osd_details["lv_path"]
logger.out(
f"Creating Bluestore DB volume for OSD {osd_id}", state="i"
)
# 4b. Prepare the logical volume if ext_db_flag
_, osd_size_bytes, _ = common.run_os_command(
f"blockdev --getsize64 {osd_lv}"
)
osd_size_bytes = int(osd_size_bytes)
result = CephOSDInstance.create_osd_db_lv(
zkhandler, logger, osd_id, ext_db_ratio, osd_size_bytes
)
if not result:
raise Exception
db_device = "osd-db/osd-{}".format(osd_id)
# 4c. Attach the new DB device to the OSD
retcode, stdout, stderr = common.run_os_command(
f"ceph-volume lvm new-db --osd-id {osd_id} --osd-fsid {osd_fsid} --target {db_device}"
)
if retcode:
logger.out("Failed: ceph-volume lvm new-db", state="e")
logger.out(stdout, state="d")
logger.out(stderr, state="d")
raise Exception
# 4d. Get the list of created OSDs on the device (final pass)
logger.out(f"(Requerying OSD(s) on disk {device}", state="i")
retcode, stdout, stderr = common.run_os_command(
f"ceph-volume lvm list --format json {device}"
)
osd_size_bytes = int(osd_size_bytes)
result = CephOSDInstance.create_osd_db_lv(
zkhandler, logger, osd_id, ext_db_ratio, osd_size_bytes
)
if not result:
if retcode:
logger.out("Failed: ceph-volume lvm list", state="e")
logger.out(stdout, state="d")
logger.out(stderr, state="d")
raise Exception
db_device = "osd-db/osd-{}".format(osd_id)
dev_flags += " --block.db {}".format(db_device)
else:
db_device = ""
# 3c. Create the OSD for real
logger.out(
"Preparing LVM for new OSD disk with ID {} on {}".format(
osd_id, device
),
state="i",
)
retcode, stdout, stderr = common.run_os_command(
"ceph-volume lvm prepare --bluestore {devices}".format(
devices=dev_flags
created_osds = jloads(stdout)
# 5. Activate the OSDs
logger.out(f"Activating OSD(s) on disk {device}", state="i")
for created_osd in created_osds:
# 5a. Get the OSD FSID and ID from the details
osd_details = created_osds[created_osd][0]
osd_clusterfsid = osd_details["tags"]["ceph.cluster_fsid"]
osd_fsid = osd_details["tags"]["ceph.osd_fsid"]
osd_id = osd_details["tags"]["ceph.osd_id"]
db_device = osd_details["tags"].get("ceph.db_device", None)
osd_vg = osd_details["vg_name"]
osd_lv = osd_details["lv_name"]
# 5b. Activate the OSD
logger.out(f"Activating OSD {osd_id}", state="i")
retcode, stdout, stderr = common.run_os_command(
f"ceph-volume lvm activate --bluestore {osd_id} {osd_fsid}"
)
)
if retcode:
print("ceph-volume lvm prepare")
print(stdout)
print(stderr)
raise Exception
if retcode:
logger.out("Failed: ceph-volume lvm activate", state="e")
logger.out(stdout, state="d")
logger.out(stderr, state="d")
raise Exception
# 4a. Get OSD information
logger.out(
"Getting OSD information for ID {} on {}".format(osd_id, device),
state="i",
)
retcode, stdout, stderr = common.run_os_command(
"ceph-volume lvm list {device}".format(device=device)
)
for line in stdout.split("\n"):
if "block device" in line:
osd_blockdev = line.split()[-1]
if "osd fsid" in line:
osd_fsid = line.split()[-1]
if "cluster fsid" in line:
osd_clusterfsid = line.split()[-1]
if "devices" in line:
osd_device = line.split()[-1]
if not osd_fsid:
print("ceph-volume lvm list")
print("Could not find OSD information in data:")
print(stdout)
print(stderr)
raise Exception
# Split OSD blockdev into VG and LV components
# osd_blockdev = /dev/ceph-<uuid>/osd-block-<uuid>
_, _, osd_vg, osd_lv = osd_blockdev.split("/")
# Reset whatever we were given to Ceph's /dev/xdX naming
if device != osd_device:
device = osd_device
# 4b. Activate the OSD
logger.out("Activating new OSD disk with ID {}".format(osd_id), state="i")
retcode, stdout, stderr = common.run_os_command(
"ceph-volume lvm activate --bluestore {osdid} {osdfsid}".format(
osdid=osd_id, osdfsid=osd_fsid
# 5c. Add it to the crush map
logger.out(f"Adding OSD {osd_id} to CRUSH map", state="i")
retcode, stdout, stderr = common.run_os_command(
f"ceph osd crush add osd.{osd_id} {weight} root=default host={node}"
)
)
if retcode:
print("ceph-volume lvm activate")
print(stdout)
print(stderr)
raise Exception
if retcode:
logger.out("Failed: ceph osd crush add", state="e")
logger.out(stdout, state="d")
logger.out(stderr, state="d")
raise Exception
# 5. Add it to the crush map
logger.out(
"Adding new OSD disk with ID {} to CRUSH map".format(osd_id), state="i"
)
retcode, stdout, stderr = common.run_os_command(
"ceph osd crush add osd.{osdid} {weight} root=default host={node}".format(
osdid=osd_id, weight=weight, node=node
# 5d. Wait half a second for it to activate
time.sleep(0.5)
# 5e. Verify it started
retcode, stdout, stderr = common.run_os_command(
"systemctl status ceph-osd@{osdid}".format(osdid=osd_id)
)
)
if retcode:
print("ceph osd crush add")
print(stdout)
print(stderr)
raise Exception
if retcode:
logger.out(f"Failed: OSD {osd_id} unit is not active", state="e")
logger.out(stdout, state="d")
logger.out(stderr, state="d")
raise Exception
time.sleep(0.5)
# 5f. Add the new OSD to PVC
logger.out(f"Adding OSD {osd_id} to PVC", state="i")
zkhandler.write(
[
(("osd", osd_id), ""),
(("osd.node", osd_id), node),
(("osd.device", osd_id), device),
(("osd.db_device", osd_id), db_device),
(("osd.fsid", osd_id), osd_fsid),
(("osd.ofsid", osd_id), osd_fsid),
(("osd.cfsid", osd_id), osd_clusterfsid),
(("osd.lvm", osd_id), ""),
(("osd.vg", osd_id), osd_vg),
(("osd.lv", osd_id), osd_lv),
(("osd.is_split", osd_id), split_flag),
(
("osd.stats", osd_id),
'{"uuid": "|", "up": 0, "in": 0, "primary_affinity": "|", "utilization": "|", "var": "|", "pgs": "|", "kb": "|", "weight": "|", "reweight": "|", "node": "|", "used": "|", "avail": "|", "wr_ops": "|", "wr_data": "|", "rd_ops": "|", "rd_data": "|", "state": "|"}',
),
]
)
# 6. Verify it started
retcode, stdout, stderr = common.run_os_command(
"systemctl status ceph-osd@{osdid}".format(osdid=osd_id)
)
if retcode:
print("systemctl status")
print(stdout)
print(stderr)
raise Exception
# 7. Add the new OSD to the list
# 6. Log it
logger.out(
"Adding new OSD disk with ID {} to Zookeeper".format(osd_id), state="i"
f"Successfully created {split_count} new OSD(s) {','.join(created_osds.keys())} on disk {device}",
state="o",
)
zkhandler.write(
[
(("osd", osd_id), ""),
(("osd.node", osd_id), node),
(("osd.device", osd_id), device),
(("osd.db_device", osd_id), db_device),
(("osd.fsid", osd_id), ""),
(("osd.ofsid", osd_id), osd_fsid),
(("osd.cfsid", osd_id), osd_clusterfsid),
(("osd.lvm", osd_id), ""),
(("osd.vg", osd_id), osd_vg),
(("osd.lv", osd_id), osd_lv),
(
("osd.stats", osd_id),
'{"uuid": "|", "up": 0, "in": 0, "primary_affinity": "|", "utilization": "|", "var": "|", "pgs": "|", "kb": "|", "weight": "|", "reweight": "|", "node": "|", "used": "|", "avail": "|", "wr_ops": "|", "wr_data": "|", "rd_ops": "|", "rd_data": "|", "state": "|"}',
),
]
)
# Log it
logger.out("Created new OSD disk with ID {}".format(osd_id), state="o")
return True
except Exception as e:
# Log it
logger.out("Failed to create new OSD disk: {}".format(e), state="e")
logger.out(
f"Failed to create {split_count} new OSD(s) on disk {device}: {e}",
state="e",
)
return False
@staticmethod
@ -828,7 +845,7 @@ class CephOSDInstance(object):
@staticmethod
def remove_osd(zkhandler, logger, osd_id, osd_obj, force_flag):
logger.out("Removing OSD disk {}".format(osd_id), state="i")
logger.out("Removing OSD {}".format(osd_id), state="i")
try:
# Verify the OSD is present
retcode, stdout, stderr = common.run_os_command("ceph osd ls")
@ -843,7 +860,7 @@ class CephOSDInstance(object):
return True
# 1. Set the OSD down and out so it will flush
logger.out("Setting down OSD disk with ID {}".format(osd_id), state="i")
logger.out("Setting down OSD {}".format(osd_id), state="i")
retcode, stdout, stderr = common.run_os_command(
"ceph osd down {}".format(osd_id)
)
@ -856,7 +873,7 @@ class CephOSDInstance(object):
else:
raise Exception
logger.out("Setting out OSD disk with ID {}".format(osd_id), state="i")
logger.out("Setting out OSD {}".format(osd_id), state="i")
retcode, stdout, stderr = common.run_os_command(
"ceph osd out {}".format(osd_id)
)
@ -881,7 +898,7 @@ class CephOSDInstance(object):
time.sleep(5)
# 3. Stop the OSD process and wait for it to be terminated
logger.out("Stopping OSD disk with ID {}".format(osd_id), state="i")
logger.out("Stopping OSD {}".format(osd_id), state="i")
retcode, stdout, stderr = common.run_os_command(
"systemctl stop ceph-osd@{}".format(osd_id)
)
@ -922,25 +939,8 @@ class CephOSDInstance(object):
else:
raise Exception
# 5. Zap the volumes
logger.out(
"Zapping OSD {} disk on {}".format(osd_id, osd_device),
state="i",
)
retcode, stdout, stderr = common.run_os_command(
"ceph-volume lvm zap --destroy {}".format(osd_device)
)
if retcode:
print("ceph-volume lvm zap")
print(stdout)
print(stderr)
if force_flag:
logger.out("Ignoring error due to force flag", state="i")
else:
raise Exception
# 6. Purge the OSD from Ceph
logger.out("Purging OSD disk with ID {}".format(osd_id), state="i")
# 5. Purge the OSD from Ceph
logger.out("Purging OSD {}".format(osd_id), state="i")
retcode, stdout, stderr = common.run_os_command(
"ceph osd purge {} --yes-i-really-mean-it".format(osd_id)
)
@ -964,19 +964,15 @@ class CephOSDInstance(object):
)
# 8. Delete OSD from ZK
logger.out(
"Deleting OSD disk with ID {} from Zookeeper".format(osd_id), state="i"
)
logger.out("Deleting OSD {} from Zookeeper".format(osd_id), state="i")
zkhandler.delete(("osd", osd_id), recursive=True)
# Log it
logger.out("Removed OSD disk with ID {}".format(osd_id), state="o")
logger.out("Successfully removed OSD {}".format(osd_id), state="o")
return True
except Exception as e:
# Log it
logger.out(
"Failed to purge OSD disk with ID {}: {}".format(osd_id, e), state="e"
)
logger.out("Failed to remove OSD {}: {}".format(osd_id, e), state="e")
return False
@staticmethod
@ -1245,16 +1241,34 @@ def ceph_command(zkhandler, logger, this_node, data, d_osd):
# Adding a new OSD
if command == "osd_add":
node, device, weight, ext_db_flag, ext_db_ratio = args.split(",")
(
node,
device,
weight,
ext_db_flag,
ext_db_ratio,
split_flag,
split_count,
) = args.split(",")
ext_db_flag = bool(strtobool(ext_db_flag))
ext_db_ratio = float(ext_db_ratio)
split_flag = bool(strtobool(split_flag))
split_count = int(split_count)
if node == this_node.name:
# Lock the command queue
zk_lock = zkhandler.writelock("base.cmd.ceph")
with zk_lock:
# Add the OSD
result = CephOSDInstance.add_osd(
zkhandler, logger, node, device, weight, ext_db_flag, ext_db_ratio
zkhandler,
logger,
node,
device,
weight,
ext_db_flag,
ext_db_ratio,
split_flag,
split_count,
)
# Command succeeded
if result: