Add support for split OSD adds

Allows creating multiple OSDs on a single (NVMe) block device, leveraging the "ceph-volume lvm batch" command. Replaces the previous method of creating OSDs. Also adds a new ZK item for each OSD indicating if it is split or not.
2023-11-01 21:17:38 -04:00
parent aa0b1f504f
commit 526a5f4a74
8 changed files with 281 additions and 197 deletions
--- a/api-daemon/pvcapid/flaskapi.py
+++ b/api-daemon/pvcapid/flaskapi.py
@ -4284,12 +4284,18 @@ class API_Storage_Ceph_OSD_Root(Resource):
            {
                "name": "ext_db",
                "required": False,
-                "helptext": "Whether to use an external OSD DB LV device.",
            },
            {
                "name": "ext_db_ratio",
                "required": False,
-                "helptext": "Decimal size ratio of the external OSD DB LV device.",
+            },
+            {
+                "name": "split",
+                "required": False,
+            },
+            {
+                "name": "count",
+                "required": False,
            },
        ]
    )
@ -4327,6 +4333,16 @@ class API_Storage_Ceph_OSD_Root(Resource):
            type: float
            required: false
            description: Decimal ratio of total OSD size for the external OSD DB LV device, default 0.05 (5%)
+          - in: query
+            name: split
+            type: boolean
+            required: false
+            description: Whether to split the block device into multiple OSDs (recommended for NVMe devices)
+          - in: query
+            name: count
+            type: integer
+            required: false
+            description: If {split}, how many OSDs to create on the block device; usually 2 or 4 depending on size
        responses:
          200:
            description: OK
@ -4345,6 +4361,8 @@ class API_Storage_Ceph_OSD_Root(Resource):
            reqargs.get("weight", None),
            reqargs.get("ext_db", False),
            float(reqargs.get("ext_db_ratio", 0.05)),
+            reqargs.get("split", False),
+            reqargs.get("count", 1),
        )


--- a/api-daemon/pvcapid/helper.py
+++ b/api-daemon/pvcapid/helper.py
@ -1366,12 +1366,28 @@ def ceph_osd_db_vg_add(zkhandler, node, device):


@ZKConnection(config)
-def ceph_osd_add(zkhandler, node, device, weight, ext_db_flag=False, ext_db_ratio=0.05):
+def ceph_osd_add(
+    zkhandler,
+    node,
+    device,
+    weight,
+    ext_db_flag=False,
+    ext_db_ratio=0.05,
+    split_flag=False,
+    split_count=1,
+):
    """
    Add a Ceph OSD to the PVC Ceph storage cluster.
    """
    retflag, retdata = pvc_ceph.add_osd(
-        zkhandler, node, device, weight, ext_db_flag, ext_db_ratio
+        zkhandler,
+        node,
+        device,
+        weight,
+        ext_db_flag,
+        ext_db_ratio,
+        split_flag,
+        split_count,
    )

    if retflag:
--- a/client-cli/pvc/cli/cli.py
+++ b/client-cli/pvc/cli/cli.py
@ -3411,8 +3411,17 @@ def cli_storage_osd_create_db_vg(node, device):
    type=float,
    help="Decimal ratio of the external database logical volume to the OSD size.",
 )
-@confirm_opt("Destroy all data on and create new OSD on node {node} device {device}")
-def cli_storage_osd_add(node, device, weight, ext_db_flag, ext_db_ratio):
+@click.option(
+    "-s",
+    "--split",
+    "split_count",
+    default=None,
+    show_default=False,
+    type=int,
+    help="Split an NVMe disk into this many OSDs",
+)
+@confirm_opt("Destroy all data on and create new OSD(s) on node {node} device {device}")
+def cli_storage_osd_add(node, device, weight, ext_db_flag, ext_db_ratio, split_count):
    """
    Add a new Ceph OSD on node NODE with block device DEVICE. DEVICE must be a valid block device path (e.g. '/dev/sda', '/dev/nvme0n1', '/dev/disk/by-path/...', '/dev/disk/by-id/...') or a "detect" string. Using partitions is not supported.

@ -3423,10 +3432,22 @@ def cli_storage_osd_add(node, device, weight, ext_db_flag, ext_db_ratio):
    If '--ext-db' is specified, the OSD database and WAL will be placed on a new logical volume in NODE's OSD database volume group. An OSD database volume group must exist on the node or the OSD creation will fail. See the 'pvc storage osd create-db-vg' command for more details.

    The default '--ext-db-ratio' of 0.05 (5%) is sufficient for most RBD workloads and OSD sizes, though this can be adjusted based on the sizes of the OSD(s) and the underlying database device. Ceph documentation recommends at least 0.02 (2%) for RBD use-cases, and higher values may improve WAL performance under write-heavy workloads with fewer OSDs per node.
+
+    For NVMe devices, it is recommended to split block device into multiple OSDs to provide better processing throughput. To do this, specify "-s"/"--split" and the number of OSDs to create on the block device. For most NVMe devices, the recommended value is 2 or 4, such that each OSD is at least 500GB. Numbers higher than 4 are not recommended. This is NOT RECOMMENDED for SATA SSDs.
    """

+    if split_count is not None:
+        split_flag = True
+
    retcode, retmsg = pvc.lib.storage.ceph_osd_add(
-        CLI_CONFIG, node, device, weight, ext_db_flag, ext_db_ratio
+        CLI_CONFIG,
+        node,
+        device,
+        weight,
+        ext_db_flag,
+        ext_db_ratio,
+        split_flag,
+        split_count,
    )
    finish(retcode, retmsg)

--- a/client-cli/pvc/lib/storage.py
+++ b/client-cli/pvc/lib/storage.py
@ -231,12 +231,14 @@ def ceph_osd_list(config, limit):
        return False, response.json().get("message", "")


-def ceph_osd_add(config, node, device, weight, ext_db_flag, ext_db_ratio):
+def ceph_osd_add(
+    config, node, device, weight, ext_db_flag, ext_db_ratio, split_flag, split_count
+):
    """
    Add new Ceph OSD

    API endpoint: POST /api/v1/storage/ceph/osd
-    API arguments: node={node}, device={device}, weight={weight}, ext_db={ext_db_flag}, ext_db_ratio={ext_db_ratio}
+    API arguments: node={node}, device={device}, weight={weight}, ext_db={ext_db_flag}, ext_db_ratio={ext_db_ratio}, split={split_flag}, count={split_count}
    API schema: {"message":"{data}"}
    """
    params = {
@ -245,6 +247,8 @@ def ceph_osd_add(config, node, device, weight, ext_db_flag, ext_db_ratio):
        "weight": weight,
        "ext_db": ext_db_flag,
        "ext_db_ratio": ext_db_ratio,
+        "split": split_flag,
+        "count": split_count,
    }
    response = call_api(config, "post", "/storage/ceph/osd", params=params)

--- a/daemon-common/ceph.py
+++ b/daemon-common/ceph.py
@ -211,6 +211,7 @@ def getOSDInformation(zkhandler, osd_id):
    # Get the devices
    osd_node = zkhandler.read(("osd.node", osd_id))
    osd_device = zkhandler.read(("osd.device", osd_id))
+    osd_is_split = zkhandler.read(("osd.is_split", osd_id))
    osd_db_device = zkhandler.read(("osd.db_device", osd_id))
    # Parse the stats data
    osd_stats_raw = zkhandler.read(("osd.stats", osd_id))
@ -220,6 +221,7 @@ def getOSDInformation(zkhandler, osd_id):
        "id": osd_id,
        "node": osd_node,
        "device": osd_device,
+        "is_split": osd_is_split,
        "db_device": osd_db_device,
        "stats": osd_stats,
    }
@ -266,7 +268,16 @@ def add_osd_db_vg(zkhandler, node, device):

 # OSD actions use the /cmd/ceph pipe
 # These actions must occur on the specific node they reference
-def add_osd(zkhandler, node, device, weight, ext_db_flag=False, ext_db_ratio=0.05):
+def add_osd(
+    zkhandler,
+    node,
+    device,
+    weight,
+    ext_db_flag=False,
+    ext_db_ratio=0.05,
+    split_flag=False,
+    split_count=1,
+):
    # Verify the target node exists
    if not common.verifyNode(zkhandler, node):
        return False, 'ERROR: No node named "{}" is present in the cluster.'.format(
@ -284,8 +295,8 @@ def add_osd(zkhandler, node, device, weight, ext_db_flag=False, ext_db_ratio=0.0
        )

    # Tell the cluster to create a new OSD for the host
-    add_osd_string = "osd_add {},{},{},{},{}".format(
-        node, device, weight, ext_db_flag, ext_db_ratio
+    add_osd_string = "osd_add {},{},{},{},{},{},{}".format(
+        node, device, weight, ext_db_flag, ext_db_ratio, split_flag, split_count
    )
    zkhandler.write([("base.cmd.ceph", add_osd_string)])
    # Wait 1/2 second for the cluster to get the message and start working
@ -295,14 +306,10 @@ def add_osd(zkhandler, node, device, weight, ext_db_flag=False, ext_db_ratio=0.0
        try:
            result = zkhandler.read("base.cmd.ceph").split()[0]
            if result == "success-osd_add":
-                message = 'Created new OSD with block device "{}" on node "{}".'.format(
-                    device, node
-                )
+                message = f'Created {split_count} new OSD(s) on node "{node}" block device "{device}"'
                success = True
            else:
-                message = (
-                    "ERROR: Failed to create new OSD; check node logs for details."
-                )
+                message = "ERROR: Failed to create OSD(s); check node logs for details."
                success = False
        except Exception:
            message = "ERROR: Command ignored by node."
--- a/daemon-common/migrations/versions/10.json
+++ b/daemon-common/migrations/versions/10.json
@ -0,0 +1 @@
+{"version": "10", "root": "", "base": {"root": "", "schema": "/schema", "schema.version": "/schema/version", "config": "/config", "config.maintenance": "/config/maintenance", "config.primary_node": "/config/primary_node", "config.primary_node.sync_lock": "/config/primary_node/sync_lock", "config.upstream_ip": "/config/upstream_ip", "config.migration_target_selector": "/config/migration_target_selector", "cmd": "/cmd", "cmd.node": "/cmd/nodes", "cmd.domain": "/cmd/domains", "cmd.ceph": "/cmd/ceph", "logs": "/logs", "node": "/nodes", "domain": "/domains", "network": "/networks", "storage": "/ceph", "storage.health": "/ceph/health", "storage.util": "/ceph/util", "osd": "/ceph/osds", "pool": "/ceph/pools", "volume": "/ceph/volumes", "snapshot": "/ceph/snapshots"}, "logs": {"node": "", "messages": "/messages"}, "node": {"name": "", "keepalive": "/keepalive", "mode": "/daemonmode", "data.active_schema": "/activeschema", "data.latest_schema": "/latestschema", "data.static": "/staticdata", "data.pvc_version": "/pvcversion", "running_domains": "/runningdomains", "count.provisioned_domains": "/domainscount", "count.networks": "/networkscount", "state.daemon": "/daemonstate", "state.router": "/routerstate", "state.domain": "/domainstate", "cpu.load": "/cpuload", "vcpu.allocated": "/vcpualloc", "memory.total": "/memtotal", "memory.used": "/memused", "memory.free": "/memfree", "memory.allocated": "/memalloc", "memory.provisioned": "/memprov", "ipmi.hostname": "/ipmihostname", "ipmi.username": "/ipmiusername", "ipmi.password": "/ipmipassword", "sriov": "/sriov", "sriov.pf": "/sriov/pf", "sriov.vf": "/sriov/vf", "monitoring.plugins": "/monitoring_plugins", "monitoring.data": "/monitoring_data", "monitoring.health": "/monitoring_health"}, "monitoring_plugin": {"name": "", "last_run": "/last_run", "health_delta": "/health_delta", "message": "/message", "data": "/data", "runtime": "/runtime"}, "sriov_pf": {"phy": "", "mtu": "/mtu", "vfcount": "/vfcount"}, "sriov_vf": {"phy": "", "pf": "/pf", "mtu": "/mtu", "mac": "/mac", "phy_mac": "/phy_mac", "config": "/config", "config.vlan_id": "/config/vlan_id", "config.vlan_qos": "/config/vlan_qos", "config.tx_rate_min": "/config/tx_rate_min", "config.tx_rate_max": "/config/tx_rate_max", "config.spoof_check": "/config/spoof_check", "config.link_state": "/config/link_state", "config.trust": "/config/trust", "config.query_rss": "/config/query_rss", "pci": "/pci", "pci.domain": "/pci/domain", "pci.bus": "/pci/bus", "pci.slot": "/pci/slot", "pci.function": "/pci/function", "used": "/used", "used_by": "/used_by"}, "domain": {"name": "", "xml": "/xml", "state": "/state", "profile": "/profile", "stats": "/stats", "node": "/node", "last_node": "/lastnode", "failed_reason": "/failedreason", "storage.volumes": "/rbdlist", "console.log": "/consolelog", "console.vnc": "/vnc", "meta.autostart": "/node_autostart", "meta.migrate_method": "/migration_method", "meta.node_selector": "/node_selector", "meta.node_limit": "/node_limit", "meta.tags": "/tags", "migrate.sync_lock": "/migrate_sync_lock"}, "tag": {"name": "", "type": "/type", "protected": "/protected"}, "network": {"vni": "", "type": "/nettype", "mtu": "/mtu", "rule": "/firewall_rules", "rule.in": "/firewall_rules/in", "rule.out": "/firewall_rules/out", "nameservers": "/name_servers", "domain": "/domain", "reservation": "/dhcp4_reservations", "lease": "/dhcp4_leases", "ip4.gateway": "/ip4_gateway", "ip4.network": "/ip4_network", "ip4.dhcp": "/dhcp4_flag", "ip4.dhcp_start": "/dhcp4_start", "ip4.dhcp_end": "/dhcp4_end", "ip6.gateway": "/ip6_gateway", "ip6.network": "/ip6_network", "ip6.dhcp": "/dhcp6_flag"}, "reservation": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname"}, "lease": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname", "expiry": "/expiry", "client_id": "/clientid"}, "rule": {"description": "", "rule": "/rule", "order": "/order"}, "osd": {"id": "", "node": "/node", "device": "/device", "db_device": "/db_device", "fsid": "/fsid", "ofsid": "/fsid/osd", "cfsid": "/fsid/cluster", "lvm": "/lvm", "vg": "/lvm/vg", "lv": "/lvm/lv", "is_split": "/is_split", "stats": "/stats"}, "pool": {"name": "", "pgs": "/pgs", "tier": "/tier", "stats": "/stats"}, "volume": {"name": "", "stats": "/stats"}, "snapshot": {"name": "", "stats": "/stats"}}
--- a/daemon-common/zkhandler.py
+++ b/daemon-common/zkhandler.py
@ -540,7 +540,7 @@ class ZKHandler(object):
 #
 class ZKSchema(object):
    # Current version
-    _version = 9
+    _version = 10

    # Root for doing nested keys
    _schema_root = ""
@ -719,6 +719,7 @@ class ZKSchema(object):
            "lvm": "/lvm",
            "vg": "/lvm/vg",
            "lv": "/lvm/lv",
+            "is_split": "/is_split",
            "stats": "/stats",
        },
        # The schema of an individual pool entry (/ceph/pools/{pool_name})
@ -963,7 +964,9 @@ class ZKSchema(object):
                    kpath = f"{elem}.{ikey}"
                    # Validate that the key exists for that child
                    if not zkhandler.zk_conn.exists(self.path(kpath, child)):
-                        if elem == "pool" and ikey == "tier":
+                        if elem == "osd" and ikey == "is_split":
+                            default_data = "False"
+                        elif elem == "pool" and ikey == "tier":
                            default_data = "default"
                        else:
                            default_data = ""
--- a/node-daemon/pvcnoded/objects/CephInstance.py
+++ b/node-daemon/pvcnoded/objects/CephInstance.py
@ -26,6 +26,7 @@ import daemon_lib.common as common

 from distutils.util import strtobool
 from re import search, match, sub
+from json import loads as jloads


 def get_detect_device(detect_string):
@ -260,7 +261,15 @@ class CephOSDInstance(object):

    @staticmethod
    def add_osd(
-        zkhandler, logger, node, device, weight, ext_db_flag=False, ext_db_ratio=0.05
+        zkhandler,
+        logger,
+        node,
+        device,
+        weight,
+        ext_db_flag=False,
+        ext_db_ratio=0.05,
+        split_device=False,
+        split_count=1,
    ):
        # Handle a detect device if that is passed
        if match(r"detect:", device):
@ -278,177 +287,185 @@ class CephOSDInstance(object):
                )
                device = ddevice

-        # We are ready to create a new OSD on this node
-        logger.out("Creating new OSD disk on block device {}".format(device), state="i")
+        if split_device and split_count > 1:
+            split_flag = f"--osds-per-device {split_count}"
+            logger.out(
+                f"Creating {split_count} new OSD disks on block device {device}",
+                state="i",
+            )
+        else:
+            split_flag = ""
+            logger.out(f"Creating 1 new OSD disk on block device {device}", state="i")
+
+        if "nvme" in device:
+            class_flag = "--crush-device-class nvme"
+        else:
+            class_flag = "--crush-device-class ssd"
+
        try:
-            # 1. Create an OSD; we do this so we know what ID will be gen'd
-            retcode, stdout, stderr = common.run_os_command("ceph osd create")
-            if retcode:
-                print("ceph osd create")
-                print(stdout)
-                print(stderr)
-                raise Exception
-            osd_id = stdout.rstrip()
-
-            # 2. Remove that newly-created OSD
+            # 1. Zap the block device
+            logger.out(f"Zapping disk {device}", state="i")
            retcode, stdout, stderr = common.run_os_command(
-                "ceph osd rm {}".format(osd_id)
+                f"ceph-volume lvm zap --destroy {device}"
            )
            if retcode:
-                print("ceph osd rm")
-                print(stdout)
-                print(stderr)
+                logger.out("Failed: ceph-volume lvm zap", state="e")
+                logger.out(stdout, state="d")
+                logger.out(stderr, state="d")
                raise Exception

-            # 3a. Zap the disk to ensure it is ready to go
-            logger.out("Zapping disk {}".format(device), state="i")
+            # 2. Prepare the OSD(s)
+            logger.out(f"Preparing OSD(s) on disk {device}", state="i")
            retcode, stdout, stderr = common.run_os_command(
-                "ceph-volume lvm zap --destroy {}".format(device)
+                f"ceph-volume lvm batch --yes --prepare --bluestore {split_flag} {class_flag} {device}"
            )
            if retcode:
-                print("ceph-volume lvm zap")
-                print(stdout)
-                print(stderr)
+                logger.out("Failed: ceph-volume lvm batch", state="e")
+                logger.out(stdout, state="d")
+                logger.out(stderr, state="d")
                raise Exception

-            dev_flags = "--data {}".format(device)
+            # 3. Get the list of created OSDs on the device (initial pass)
+            logger.out(f"Querying OSD(s) on disk {device}", state="i")
+            retcode, stdout, stderr = common.run_os_command(
+                f"ceph-volume lvm list --format json {device}"
+            )
+            if retcode:
+                logger.out("Failed: ceph-volume lvm list", state="e")
+                logger.out(stdout, state="d")
+                logger.out(stderr, state="d")
+                raise Exception

-            # 3b. Prepare the logical volume if ext_db_flag
+            created_osds = jloads(stdout)
+
+            # 4. Prepare the WAL and DB devices
            if ext_db_flag:
-                _, osd_size_bytes, _ = common.run_os_command(
-                    "blockdev --getsize64 {}".format(device)
+                for created_osd in created_osds:
+                    # 4a. Get the OSD FSID and ID from the details
+                    osd_details = created_osds[created_osd][0]
+                    osd_fsid = osd_details["tags"]["ceph.osd_fsid"]
+                    osd_id = osd_details["tags"]["ceph.osd_id"]
+                    osd_lv = osd_details["lv_path"]
+
+                    logger.out(
+                        f"Creating Bluestore DB volume for OSD {osd_id}", state="i"
+                    )
+
+                    # 4b. Prepare the logical volume if ext_db_flag
+                    _, osd_size_bytes, _ = common.run_os_command(
+                        f"blockdev --getsize64 {osd_lv}"
+                    )
+                    osd_size_bytes = int(osd_size_bytes)
+                    result = CephOSDInstance.create_osd_db_lv(
+                        zkhandler, logger, osd_id, ext_db_ratio, osd_size_bytes
+                    )
+                    if not result:
+                        raise Exception
+                    db_device = "osd-db/osd-{}".format(osd_id)
+
+                    # 4c. Attach the new DB device to the OSD
+                    retcode, stdout, stderr = common.run_os_command(
+                        f"ceph-volume lvm new-db --osd-id {osd_id} --osd-fsid {osd_fsid} --target {db_device}"
+                    )
+                    if retcode:
+                        logger.out("Failed: ceph-volume lvm new-db", state="e")
+                        logger.out(stdout, state="d")
+                        logger.out(stderr, state="d")
+                        raise Exception
+
+                # 4d. Get the list of created OSDs on the device (final pass)
+                logger.out(f"(Requerying OSD(s) on disk {device}", state="i")
+                retcode, stdout, stderr = common.run_os_command(
+                    f"ceph-volume lvm list --format json {device}"
                )
-                osd_size_bytes = int(osd_size_bytes)
-                result = CephOSDInstance.create_osd_db_lv(
-                    zkhandler, logger, osd_id, ext_db_ratio, osd_size_bytes
-                )
-                if not result:
+                if retcode:
+                    logger.out("Failed: ceph-volume lvm list", state="e")
+                    logger.out(stdout, state="d")
+                    logger.out(stderr, state="d")
                    raise Exception
-                db_device = "osd-db/osd-{}".format(osd_id)
-                dev_flags += " --block.db {}".format(db_device)
-            else:
-                db_device = ""

-            # 3c. Create the OSD for real
-            logger.out(
-                "Preparing LVM for new OSD disk with ID {} on {}".format(
-                    osd_id, device
-                ),
-                state="i",
-            )
-            retcode, stdout, stderr = common.run_os_command(
-                "ceph-volume lvm prepare --bluestore {devices}".format(
-                    devices=dev_flags
+                created_osds = jloads(stdout)
+
+            # 5. Activate the OSDs
+            logger.out(f"Activating OSD(s) on disk {device}", state="i")
+            for created_osd in created_osds:
+                # 5a. Get the OSD FSID and ID from the details
+                osd_details = created_osds[created_osd][0]
+                osd_clusterfsid = osd_details["tags"]["ceph.cluster_fsid"]
+                osd_fsid = osd_details["tags"]["ceph.osd_fsid"]
+                osd_id = osd_details["tags"]["ceph.osd_id"]
+                db_device = osd_details["tags"].get("ceph.db_device", None)
+                osd_vg = osd_details["vg_name"]
+                osd_lv = osd_details["lv_name"]
+
+                # 5b. Activate the OSD
+                logger.out(f"Activating OSD {osd_id}", state="i")
+                retcode, stdout, stderr = common.run_os_command(
+                    f"ceph-volume lvm activate --bluestore {osd_id} {osd_fsid}"
                )
-            )
-            if retcode:
-                print("ceph-volume lvm prepare")
-                print(stdout)
-                print(stderr)
-                raise Exception
+                if retcode:
+                    logger.out("Failed: ceph-volume lvm activate", state="e")
+                    logger.out(stdout, state="d")
+                    logger.out(stderr, state="d")
+                    raise Exception

-            # 4a. Get OSD information
-            logger.out(
-                "Getting OSD information for ID {} on {}".format(osd_id, device),
-                state="i",
-            )
-            retcode, stdout, stderr = common.run_os_command(
-                "ceph-volume lvm list {device}".format(device=device)
-            )
-            for line in stdout.split("\n"):
-                if "block device" in line:
-                    osd_blockdev = line.split()[-1]
-                if "osd fsid" in line:
-                    osd_fsid = line.split()[-1]
-                if "cluster fsid" in line:
-                    osd_clusterfsid = line.split()[-1]
-                if "devices" in line:
-                    osd_device = line.split()[-1]
-
-            if not osd_fsid:
-                print("ceph-volume lvm list")
-                print("Could not find OSD information in data:")
-                print(stdout)
-                print(stderr)
-                raise Exception
-
-            # Split OSD blockdev into VG and LV components
-            # osd_blockdev = /dev/ceph-<uuid>/osd-block-<uuid>
-            _, _, osd_vg, osd_lv = osd_blockdev.split("/")
-
-            # Reset whatever we were given to Ceph's /dev/xdX naming
-            if device != osd_device:
-                device = osd_device
-
-            # 4b. Activate the OSD
-            logger.out("Activating new OSD disk with ID {}".format(osd_id), state="i")
-            retcode, stdout, stderr = common.run_os_command(
-                "ceph-volume lvm activate --bluestore {osdid} {osdfsid}".format(
-                    osdid=osd_id, osdfsid=osd_fsid
+                # 5c. Add it to the crush map
+                logger.out(f"Adding OSD {osd_id} to CRUSH map", state="i")
+                retcode, stdout, stderr = common.run_os_command(
+                    f"ceph osd crush add osd.{osd_id} {weight} root=default host={node}"
                )
-            )
-            if retcode:
-                print("ceph-volume lvm activate")
-                print(stdout)
-                print(stderr)
-                raise Exception
+                if retcode:
+                    logger.out("Failed: ceph osd crush add", state="e")
+                    logger.out(stdout, state="d")
+                    logger.out(stderr, state="d")
+                    raise Exception

-            # 5. Add it to the crush map
-            logger.out(
-                "Adding new OSD disk with ID {} to CRUSH map".format(osd_id), state="i"
-            )
-            retcode, stdout, stderr = common.run_os_command(
-                "ceph osd crush add osd.{osdid} {weight} root=default host={node}".format(
-                    osdid=osd_id, weight=weight, node=node
+                # 5d. Wait half a second for it to activate
+                time.sleep(0.5)
+
+                # 5e. Verify it started
+                retcode, stdout, stderr = common.run_os_command(
+                    "systemctl status ceph-osd@{osdid}".format(osdid=osd_id)
                )
-            )
-            if retcode:
-                print("ceph osd crush add")
-                print(stdout)
-                print(stderr)
-                raise Exception
+                if retcode:
+                    logger.out(f"Failed: OSD {osd_id} unit is not active", state="e")
+                    logger.out(stdout, state="d")
+                    logger.out(stderr, state="d")
+                    raise Exception

-            time.sleep(0.5)
+                # 5f. Add the new OSD to PVC
+                logger.out(f"Adding OSD {osd_id} to PVC", state="i")
+                zkhandler.write(
+                    [
+                        (("osd", osd_id), ""),
+                        (("osd.node", osd_id), node),
+                        (("osd.device", osd_id), device),
+                        (("osd.db_device", osd_id), db_device),
+                        (("osd.fsid", osd_id), osd_fsid),
+                        (("osd.ofsid", osd_id), osd_fsid),
+                        (("osd.cfsid", osd_id), osd_clusterfsid),
+                        (("osd.lvm", osd_id), ""),
+                        (("osd.vg", osd_id), osd_vg),
+                        (("osd.lv", osd_id), osd_lv),
+                        (("osd.is_split", osd_id), split_flag),
+                        (
+                            ("osd.stats", osd_id),
+                            '{"uuid": "|", "up": 0, "in": 0, "primary_affinity": "|", "utilization": "|", "var": "|", "pgs": "|", "kb": "|", "weight": "|", "reweight": "|", "node": "|", "used": "|", "avail": "|", "wr_ops": "|", "wr_data": "|", "rd_ops": "|", "rd_data": "|", "state": "|"}',
+                        ),
+                    ]
+                )

-            # 6. Verify it started
-            retcode, stdout, stderr = common.run_os_command(
-                "systemctl status ceph-osd@{osdid}".format(osdid=osd_id)
-            )
-            if retcode:
-                print("systemctl status")
-                print(stdout)
-                print(stderr)
-                raise Exception
-
-            # 7. Add the new OSD to the list
+            # 6. Log it
            logger.out(
-                "Adding new OSD disk with ID {} to Zookeeper".format(osd_id), state="i"
+                f"Successfully created {split_count} new OSD(s) {','.join(created_osds.keys())} on disk {device}",
+                state="o",
            )
-            zkhandler.write(
-                [
-                    (("osd", osd_id), ""),
-                    (("osd.node", osd_id), node),
-                    (("osd.device", osd_id), device),
-                    (("osd.db_device", osd_id), db_device),
-                    (("osd.fsid", osd_id), ""),
-                    (("osd.ofsid", osd_id), osd_fsid),
-                    (("osd.cfsid", osd_id), osd_clusterfsid),
-                    (("osd.lvm", osd_id), ""),
-                    (("osd.vg", osd_id), osd_vg),
-                    (("osd.lv", osd_id), osd_lv),
-                    (
-                        ("osd.stats", osd_id),
-                        '{"uuid": "|", "up": 0, "in": 0, "primary_affinity": "|", "utilization": "|", "var": "|", "pgs": "|", "kb": "|", "weight": "|", "reweight": "|", "node": "|", "used": "|", "avail": "|", "wr_ops": "|", "wr_data": "|", "rd_ops": "|", "rd_data": "|", "state": "|"}',
-                    ),
-                ]
-            )
-
-            # Log it
-            logger.out("Created new OSD disk with ID {}".format(osd_id), state="o")
            return True
        except Exception as e:
-            # Log it
-            logger.out("Failed to create new OSD disk: {}".format(e), state="e")
+            logger.out(
+                f"Failed to create {split_count} new OSD(s) on disk {device}: {e}",
+                state="e",
+            )
            return False

    @staticmethod
@ -828,7 +845,7 @@ class CephOSDInstance(object):

    @staticmethod
    def remove_osd(zkhandler, logger, osd_id, osd_obj, force_flag):
-        logger.out("Removing OSD disk {}".format(osd_id), state="i")
+        logger.out("Removing OSD {}".format(osd_id), state="i")
        try:
            # Verify the OSD is present
            retcode, stdout, stderr = common.run_os_command("ceph osd ls")
@ -843,7 +860,7 @@ class CephOSDInstance(object):
                    return True

            # 1. Set the OSD down and out so it will flush
-            logger.out("Setting down OSD disk with ID {}".format(osd_id), state="i")
+            logger.out("Setting down OSD {}".format(osd_id), state="i")
            retcode, stdout, stderr = common.run_os_command(
                "ceph osd down {}".format(osd_id)
            )
@ -856,7 +873,7 @@ class CephOSDInstance(object):
                else:
                    raise Exception

-            logger.out("Setting out OSD disk with ID {}".format(osd_id), state="i")
+            logger.out("Setting out OSD {}".format(osd_id), state="i")
            retcode, stdout, stderr = common.run_os_command(
                "ceph osd out {}".format(osd_id)
            )
@ -881,7 +898,7 @@ class CephOSDInstance(object):
                    time.sleep(5)

            # 3. Stop the OSD process and wait for it to be terminated
-            logger.out("Stopping OSD disk with ID {}".format(osd_id), state="i")
+            logger.out("Stopping OSD {}".format(osd_id), state="i")
            retcode, stdout, stderr = common.run_os_command(
                "systemctl stop ceph-osd@{}".format(osd_id)
            )
@ -922,25 +939,8 @@ class CephOSDInstance(object):
                else:
                    raise Exception

-            # 5. Zap the volumes
-            logger.out(
-                "Zapping OSD {} disk on {}".format(osd_id, osd_device),
-                state="i",
-            )
-            retcode, stdout, stderr = common.run_os_command(
-                "ceph-volume lvm zap --destroy {}".format(osd_device)
-            )
-            if retcode:
-                print("ceph-volume lvm zap")
-                print(stdout)
-                print(stderr)
-                if force_flag:
-                    logger.out("Ignoring error due to force flag", state="i")
-                else:
-                    raise Exception
-
-            # 6. Purge the OSD from Ceph
-            logger.out("Purging OSD disk with ID {}".format(osd_id), state="i")
+            # 5. Purge the OSD from Ceph
+            logger.out("Purging OSD {}".format(osd_id), state="i")
            retcode, stdout, stderr = common.run_os_command(
                "ceph osd purge {} --yes-i-really-mean-it".format(osd_id)
            )
@ -964,19 +964,15 @@ class CephOSDInstance(object):
                )

            # 8. Delete OSD from ZK
-            logger.out(
-                "Deleting OSD disk with ID {} from Zookeeper".format(osd_id), state="i"
-            )
+            logger.out("Deleting OSD {} from Zookeeper".format(osd_id), state="i")
            zkhandler.delete(("osd", osd_id), recursive=True)

            # Log it
-            logger.out("Removed OSD disk with ID {}".format(osd_id), state="o")
+            logger.out("Successfully removed OSD {}".format(osd_id), state="o")
            return True
        except Exception as e:
            # Log it
-            logger.out(
-                "Failed to purge OSD disk with ID {}: {}".format(osd_id, e), state="e"
-            )
+            logger.out("Failed to remove OSD {}: {}".format(osd_id, e), state="e")
            return False

    @staticmethod
@ -1245,16 +1241,34 @@ def ceph_command(zkhandler, logger, this_node, data, d_osd):

    # Adding a new OSD
    if command == "osd_add":
-        node, device, weight, ext_db_flag, ext_db_ratio = args.split(",")
+        (
+            node,
+            device,
+            weight,
+            ext_db_flag,
+            ext_db_ratio,
+            split_flag,
+            split_count,
+        ) = args.split(",")
        ext_db_flag = bool(strtobool(ext_db_flag))
        ext_db_ratio = float(ext_db_ratio)
+        split_flag = bool(strtobool(split_flag))
+        split_count = int(split_count)
        if node == this_node.name:
            # Lock the command queue
            zk_lock = zkhandler.writelock("base.cmd.ceph")
            with zk_lock:
                # Add the OSD
                result = CephOSDInstance.add_osd(
-                    zkhandler, logger, node, device, weight, ext_db_flag, ext_db_ratio
+                    zkhandler,
+                    logger,
+                    node,
+                    device,
+                    weight,
+                    ext_db_flag,
+                    ext_db_ratio,
+                    split_flag,
+                    split_count,
                )
                # Command succeeded
                if result:
				`@ -0,0 +1 @@`
				{"version": "10", "root": "", "base": {"root": "", "schema": "/schema", "schema.version": "/schema/version", "config": "/config", "config.maintenance": "/config/maintenance", "config.primary_node": "/config/primary_node", "config.primary_node.sync_lock": "/config/primary_node/sync_lock", "config.upstream_ip": "/config/upstream_ip", "config.migration_target_selector": "/config/migration_target_selector", "cmd": "/cmd", "cmd.node": "/cmd/nodes", "cmd.domain": "/cmd/domains", "cmd.ceph": "/cmd/ceph", "logs": "/logs", "node": "/nodes", "domain": "/domains", "network": "/networks", "storage": "/ceph", "storage.health": "/ceph/health", "storage.util": "/ceph/util", "osd": "/ceph/osds", "pool": "/ceph/pools", "volume": "/ceph/volumes", "snapshot": "/ceph/snapshots"}, "logs": {"node": "", "messages": "/messages"}, "node": {"name": "", "keepalive": "/keepalive", "mode": "/daemonmode", "data.active_schema": "/activeschema", "data.latest_schema": "/latestschema", "data.static": "/staticdata", "data.pvc_version": "/pvcversion", "running_domains": "/runningdomains", "count.provisioned_domains": "/domainscount", "count.networks": "/networkscount", "state.daemon": "/daemonstate", "state.router": "/routerstate", "state.domain": "/domainstate", "cpu.load": "/cpuload", "vcpu.allocated": "/vcpualloc", "memory.total": "/memtotal", "memory.used": "/memused", "memory.free": "/memfree", "memory.allocated": "/memalloc", "memory.provisioned": "/memprov", "ipmi.hostname": "/ipmihostname", "ipmi.username": "/ipmiusername", "ipmi.password": "/ipmipassword", "sriov": "/sriov", "sriov.pf": "/sriov/pf", "sriov.vf": "/sriov/vf", "monitoring.plugins": "/monitoring_plugins", "monitoring.data": "/monitoring_data", "monitoring.health": "/monitoring_health"}, "monitoring_plugin": {"name": "", "last_run": "/last_run", "health_delta": "/health_delta", "message": "/message", "data": "/data", "runtime": "/runtime"}, "sriov_pf": {"phy": "", "mtu": "/mtu", "vfcount": "/vfcount"}, "sriov_vf": {"phy": "", "pf": "/pf", "mtu": "/mtu", "mac": "/mac", "phy_mac": "/phy_mac", "config": "/config", "config.vlan_id": "/config/vlan_id", "config.vlan_qos": "/config/vlan_qos", "config.tx_rate_min": "/config/tx_rate_min", "config.tx_rate_max": "/config/tx_rate_max", "config.spoof_check": "/config/spoof_check", "config.link_state": "/config/link_state", "config.trust": "/config/trust", "config.query_rss": "/config/query_rss", "pci": "/pci", "pci.domain": "/pci/domain", "pci.bus": "/pci/bus", "pci.slot": "/pci/slot", "pci.function": "/pci/function", "used": "/used", "used_by": "/used_by"}, "domain": {"name": "", "xml": "/xml", "state": "/state", "profile": "/profile", "stats": "/stats", "node": "/node", "last_node": "/lastnode", "failed_reason": "/failedreason", "storage.volumes": "/rbdlist", "console.log": "/consolelog", "console.vnc": "/vnc", "meta.autostart": "/node_autostart", "meta.migrate_method": "/migration_method", "meta.node_selector": "/node_selector", "meta.node_limit": "/node_limit", "meta.tags": "/tags", "migrate.sync_lock": "/migrate_sync_lock"}, "tag": {"name": "", "type": "/type", "protected": "/protected"}, "network": {"vni": "", "type": "/nettype", "mtu": "/mtu", "rule": "/firewall_rules", "rule.in": "/firewall_rules/in", "rule.out": "/firewall_rules/out", "nameservers": "/name_servers", "domain": "/domain", "reservation": "/dhcp4_reservations", "lease": "/dhcp4_leases", "ip4.gateway": "/ip4_gateway", "ip4.network": "/ip4_network", "ip4.dhcp": "/dhcp4_flag", "ip4.dhcp_start": "/dhcp4_start", "ip4.dhcp_end": "/dhcp4_end", "ip6.gateway": "/ip6_gateway", "ip6.network": "/ip6_network", "ip6.dhcp": "/dhcp6_flag"}, "reservation": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname"}, "lease": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname", "expiry": "/expiry", "client_id": "/clientid"}, "rule": {"description": "", "rule": "/rule", "order": "/order"}, "osd": {"id": "", "node": "/node", "device": "/device", "db_device": "/db_device", "fsid": "/fsid", "ofsid": "/fsid/osd", "cfsid": "/fsid/cluster", "lvm": "/lvm", "vg": "/lvm/vg", "lv": "/lvm/lv", "is_split": "/is_split", "stats": "/stats"}, "pool": {"name": "", "pgs": "/pgs", "tier": "/tier", "stats": "/stats"}, "volume": {"name": "", "stats": "/stats"}, "snapshot": {"name": "", "stats": "/stats"}}