Add support for split OSD adds
Allows creating multiple OSDs on a single (NVMe) block device, leveraging the "ceph-volume lvm batch" command. Replaces the previous method of creating OSDs. Also adds a new ZK item for each OSD indicating if it is split or not.
This commit is contained in:
parent
aa0b1f504f
commit
526a5f4a74
|
@ -4284,12 +4284,18 @@ class API_Storage_Ceph_OSD_Root(Resource):
|
||||||
{
|
{
|
||||||
"name": "ext_db",
|
"name": "ext_db",
|
||||||
"required": False,
|
"required": False,
|
||||||
"helptext": "Whether to use an external OSD DB LV device.",
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "ext_db_ratio",
|
"name": "ext_db_ratio",
|
||||||
"required": False,
|
"required": False,
|
||||||
"helptext": "Decimal size ratio of the external OSD DB LV device.",
|
},
|
||||||
|
{
|
||||||
|
"name": "split",
|
||||||
|
"required": False,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "count",
|
||||||
|
"required": False,
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
@ -4327,6 +4333,16 @@ class API_Storage_Ceph_OSD_Root(Resource):
|
||||||
type: float
|
type: float
|
||||||
required: false
|
required: false
|
||||||
description: Decimal ratio of total OSD size for the external OSD DB LV device, default 0.05 (5%)
|
description: Decimal ratio of total OSD size for the external OSD DB LV device, default 0.05 (5%)
|
||||||
|
- in: query
|
||||||
|
name: split
|
||||||
|
type: boolean
|
||||||
|
required: false
|
||||||
|
description: Whether to split the block device into multiple OSDs (recommended for NVMe devices)
|
||||||
|
- in: query
|
||||||
|
name: count
|
||||||
|
type: integer
|
||||||
|
required: false
|
||||||
|
description: If {split}, how many OSDs to create on the block device; usually 2 or 4 depending on size
|
||||||
responses:
|
responses:
|
||||||
200:
|
200:
|
||||||
description: OK
|
description: OK
|
||||||
|
@ -4345,6 +4361,8 @@ class API_Storage_Ceph_OSD_Root(Resource):
|
||||||
reqargs.get("weight", None),
|
reqargs.get("weight", None),
|
||||||
reqargs.get("ext_db", False),
|
reqargs.get("ext_db", False),
|
||||||
float(reqargs.get("ext_db_ratio", 0.05)),
|
float(reqargs.get("ext_db_ratio", 0.05)),
|
||||||
|
reqargs.get("split", False),
|
||||||
|
reqargs.get("count", 1),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1366,12 +1366,28 @@ def ceph_osd_db_vg_add(zkhandler, node, device):
|
||||||
|
|
||||||
|
|
||||||
@ZKConnection(config)
|
@ZKConnection(config)
|
||||||
def ceph_osd_add(zkhandler, node, device, weight, ext_db_flag=False, ext_db_ratio=0.05):
|
def ceph_osd_add(
|
||||||
|
zkhandler,
|
||||||
|
node,
|
||||||
|
device,
|
||||||
|
weight,
|
||||||
|
ext_db_flag=False,
|
||||||
|
ext_db_ratio=0.05,
|
||||||
|
split_flag=False,
|
||||||
|
split_count=1,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Add a Ceph OSD to the PVC Ceph storage cluster.
|
Add a Ceph OSD to the PVC Ceph storage cluster.
|
||||||
"""
|
"""
|
||||||
retflag, retdata = pvc_ceph.add_osd(
|
retflag, retdata = pvc_ceph.add_osd(
|
||||||
zkhandler, node, device, weight, ext_db_flag, ext_db_ratio
|
zkhandler,
|
||||||
|
node,
|
||||||
|
device,
|
||||||
|
weight,
|
||||||
|
ext_db_flag,
|
||||||
|
ext_db_ratio,
|
||||||
|
split_flag,
|
||||||
|
split_count,
|
||||||
)
|
)
|
||||||
|
|
||||||
if retflag:
|
if retflag:
|
||||||
|
|
|
@ -3411,8 +3411,17 @@ def cli_storage_osd_create_db_vg(node, device):
|
||||||
type=float,
|
type=float,
|
||||||
help="Decimal ratio of the external database logical volume to the OSD size.",
|
help="Decimal ratio of the external database logical volume to the OSD size.",
|
||||||
)
|
)
|
||||||
@confirm_opt("Destroy all data on and create new OSD on node {node} device {device}")
|
@click.option(
|
||||||
def cli_storage_osd_add(node, device, weight, ext_db_flag, ext_db_ratio):
|
"-s",
|
||||||
|
"--split",
|
||||||
|
"split_count",
|
||||||
|
default=None,
|
||||||
|
show_default=False,
|
||||||
|
type=int,
|
||||||
|
help="Split an NVMe disk into this many OSDs",
|
||||||
|
)
|
||||||
|
@confirm_opt("Destroy all data on and create new OSD(s) on node {node} device {device}")
|
||||||
|
def cli_storage_osd_add(node, device, weight, ext_db_flag, ext_db_ratio, split_count):
|
||||||
"""
|
"""
|
||||||
Add a new Ceph OSD on node NODE with block device DEVICE. DEVICE must be a valid block device path (e.g. '/dev/sda', '/dev/nvme0n1', '/dev/disk/by-path/...', '/dev/disk/by-id/...') or a "detect" string. Using partitions is not supported.
|
Add a new Ceph OSD on node NODE with block device DEVICE. DEVICE must be a valid block device path (e.g. '/dev/sda', '/dev/nvme0n1', '/dev/disk/by-path/...', '/dev/disk/by-id/...') or a "detect" string. Using partitions is not supported.
|
||||||
|
|
||||||
|
@ -3423,10 +3432,22 @@ def cli_storage_osd_add(node, device, weight, ext_db_flag, ext_db_ratio):
|
||||||
If '--ext-db' is specified, the OSD database and WAL will be placed on a new logical volume in NODE's OSD database volume group. An OSD database volume group must exist on the node or the OSD creation will fail. See the 'pvc storage osd create-db-vg' command for more details.
|
If '--ext-db' is specified, the OSD database and WAL will be placed on a new logical volume in NODE's OSD database volume group. An OSD database volume group must exist on the node or the OSD creation will fail. See the 'pvc storage osd create-db-vg' command for more details.
|
||||||
|
|
||||||
The default '--ext-db-ratio' of 0.05 (5%) is sufficient for most RBD workloads and OSD sizes, though this can be adjusted based on the sizes of the OSD(s) and the underlying database device. Ceph documentation recommends at least 0.02 (2%) for RBD use-cases, and higher values may improve WAL performance under write-heavy workloads with fewer OSDs per node.
|
The default '--ext-db-ratio' of 0.05 (5%) is sufficient for most RBD workloads and OSD sizes, though this can be adjusted based on the sizes of the OSD(s) and the underlying database device. Ceph documentation recommends at least 0.02 (2%) for RBD use-cases, and higher values may improve WAL performance under write-heavy workloads with fewer OSDs per node.
|
||||||
|
|
||||||
|
For NVMe devices, it is recommended to split block device into multiple OSDs to provide better processing throughput. To do this, specify "-s"/"--split" and the number of OSDs to create on the block device. For most NVMe devices, the recommended value is 2 or 4, such that each OSD is at least 500GB. Numbers higher than 4 are not recommended. This is NOT RECOMMENDED for SATA SSDs.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
if split_count is not None:
|
||||||
|
split_flag = True
|
||||||
|
|
||||||
retcode, retmsg = pvc.lib.storage.ceph_osd_add(
|
retcode, retmsg = pvc.lib.storage.ceph_osd_add(
|
||||||
CLI_CONFIG, node, device, weight, ext_db_flag, ext_db_ratio
|
CLI_CONFIG,
|
||||||
|
node,
|
||||||
|
device,
|
||||||
|
weight,
|
||||||
|
ext_db_flag,
|
||||||
|
ext_db_ratio,
|
||||||
|
split_flag,
|
||||||
|
split_count,
|
||||||
)
|
)
|
||||||
finish(retcode, retmsg)
|
finish(retcode, retmsg)
|
||||||
|
|
||||||
|
|
|
@ -231,12 +231,14 @@ def ceph_osd_list(config, limit):
|
||||||
return False, response.json().get("message", "")
|
return False, response.json().get("message", "")
|
||||||
|
|
||||||
|
|
||||||
def ceph_osd_add(config, node, device, weight, ext_db_flag, ext_db_ratio):
|
def ceph_osd_add(
|
||||||
|
config, node, device, weight, ext_db_flag, ext_db_ratio, split_flag, split_count
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Add new Ceph OSD
|
Add new Ceph OSD
|
||||||
|
|
||||||
API endpoint: POST /api/v1/storage/ceph/osd
|
API endpoint: POST /api/v1/storage/ceph/osd
|
||||||
API arguments: node={node}, device={device}, weight={weight}, ext_db={ext_db_flag}, ext_db_ratio={ext_db_ratio}
|
API arguments: node={node}, device={device}, weight={weight}, ext_db={ext_db_flag}, ext_db_ratio={ext_db_ratio}, split={split_flag}, count={split_count}
|
||||||
API schema: {"message":"{data}"}
|
API schema: {"message":"{data}"}
|
||||||
"""
|
"""
|
||||||
params = {
|
params = {
|
||||||
|
@ -245,6 +247,8 @@ def ceph_osd_add(config, node, device, weight, ext_db_flag, ext_db_ratio):
|
||||||
"weight": weight,
|
"weight": weight,
|
||||||
"ext_db": ext_db_flag,
|
"ext_db": ext_db_flag,
|
||||||
"ext_db_ratio": ext_db_ratio,
|
"ext_db_ratio": ext_db_ratio,
|
||||||
|
"split": split_flag,
|
||||||
|
"count": split_count,
|
||||||
}
|
}
|
||||||
response = call_api(config, "post", "/storage/ceph/osd", params=params)
|
response = call_api(config, "post", "/storage/ceph/osd", params=params)
|
||||||
|
|
||||||
|
|
|
@ -211,6 +211,7 @@ def getOSDInformation(zkhandler, osd_id):
|
||||||
# Get the devices
|
# Get the devices
|
||||||
osd_node = zkhandler.read(("osd.node", osd_id))
|
osd_node = zkhandler.read(("osd.node", osd_id))
|
||||||
osd_device = zkhandler.read(("osd.device", osd_id))
|
osd_device = zkhandler.read(("osd.device", osd_id))
|
||||||
|
osd_is_split = zkhandler.read(("osd.is_split", osd_id))
|
||||||
osd_db_device = zkhandler.read(("osd.db_device", osd_id))
|
osd_db_device = zkhandler.read(("osd.db_device", osd_id))
|
||||||
# Parse the stats data
|
# Parse the stats data
|
||||||
osd_stats_raw = zkhandler.read(("osd.stats", osd_id))
|
osd_stats_raw = zkhandler.read(("osd.stats", osd_id))
|
||||||
|
@ -220,6 +221,7 @@ def getOSDInformation(zkhandler, osd_id):
|
||||||
"id": osd_id,
|
"id": osd_id,
|
||||||
"node": osd_node,
|
"node": osd_node,
|
||||||
"device": osd_device,
|
"device": osd_device,
|
||||||
|
"is_split": osd_is_split,
|
||||||
"db_device": osd_db_device,
|
"db_device": osd_db_device,
|
||||||
"stats": osd_stats,
|
"stats": osd_stats,
|
||||||
}
|
}
|
||||||
|
@ -266,7 +268,16 @@ def add_osd_db_vg(zkhandler, node, device):
|
||||||
|
|
||||||
# OSD actions use the /cmd/ceph pipe
|
# OSD actions use the /cmd/ceph pipe
|
||||||
# These actions must occur on the specific node they reference
|
# These actions must occur on the specific node they reference
|
||||||
def add_osd(zkhandler, node, device, weight, ext_db_flag=False, ext_db_ratio=0.05):
|
def add_osd(
|
||||||
|
zkhandler,
|
||||||
|
node,
|
||||||
|
device,
|
||||||
|
weight,
|
||||||
|
ext_db_flag=False,
|
||||||
|
ext_db_ratio=0.05,
|
||||||
|
split_flag=False,
|
||||||
|
split_count=1,
|
||||||
|
):
|
||||||
# Verify the target node exists
|
# Verify the target node exists
|
||||||
if not common.verifyNode(zkhandler, node):
|
if not common.verifyNode(zkhandler, node):
|
||||||
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(
|
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(
|
||||||
|
@ -284,8 +295,8 @@ def add_osd(zkhandler, node, device, weight, ext_db_flag=False, ext_db_ratio=0.0
|
||||||
)
|
)
|
||||||
|
|
||||||
# Tell the cluster to create a new OSD for the host
|
# Tell the cluster to create a new OSD for the host
|
||||||
add_osd_string = "osd_add {},{},{},{},{}".format(
|
add_osd_string = "osd_add {},{},{},{},{},{},{}".format(
|
||||||
node, device, weight, ext_db_flag, ext_db_ratio
|
node, device, weight, ext_db_flag, ext_db_ratio, split_flag, split_count
|
||||||
)
|
)
|
||||||
zkhandler.write([("base.cmd.ceph", add_osd_string)])
|
zkhandler.write([("base.cmd.ceph", add_osd_string)])
|
||||||
# Wait 1/2 second for the cluster to get the message and start working
|
# Wait 1/2 second for the cluster to get the message and start working
|
||||||
|
@ -295,14 +306,10 @@ def add_osd(zkhandler, node, device, weight, ext_db_flag=False, ext_db_ratio=0.0
|
||||||
try:
|
try:
|
||||||
result = zkhandler.read("base.cmd.ceph").split()[0]
|
result = zkhandler.read("base.cmd.ceph").split()[0]
|
||||||
if result == "success-osd_add":
|
if result == "success-osd_add":
|
||||||
message = 'Created new OSD with block device "{}" on node "{}".'.format(
|
message = f'Created {split_count} new OSD(s) on node "{node}" block device "{device}"'
|
||||||
device, node
|
|
||||||
)
|
|
||||||
success = True
|
success = True
|
||||||
else:
|
else:
|
||||||
message = (
|
message = "ERROR: Failed to create OSD(s); check node logs for details."
|
||||||
"ERROR: Failed to create new OSD; check node logs for details."
|
|
||||||
)
|
|
||||||
success = False
|
success = False
|
||||||
except Exception:
|
except Exception:
|
||||||
message = "ERROR: Command ignored by node."
|
message = "ERROR: Command ignored by node."
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
{"version": "10", "root": "", "base": {"root": "", "schema": "/schema", "schema.version": "/schema/version", "config": "/config", "config.maintenance": "/config/maintenance", "config.primary_node": "/config/primary_node", "config.primary_node.sync_lock": "/config/primary_node/sync_lock", "config.upstream_ip": "/config/upstream_ip", "config.migration_target_selector": "/config/migration_target_selector", "cmd": "/cmd", "cmd.node": "/cmd/nodes", "cmd.domain": "/cmd/domains", "cmd.ceph": "/cmd/ceph", "logs": "/logs", "node": "/nodes", "domain": "/domains", "network": "/networks", "storage": "/ceph", "storage.health": "/ceph/health", "storage.util": "/ceph/util", "osd": "/ceph/osds", "pool": "/ceph/pools", "volume": "/ceph/volumes", "snapshot": "/ceph/snapshots"}, "logs": {"node": "", "messages": "/messages"}, "node": {"name": "", "keepalive": "/keepalive", "mode": "/daemonmode", "data.active_schema": "/activeschema", "data.latest_schema": "/latestschema", "data.static": "/staticdata", "data.pvc_version": "/pvcversion", "running_domains": "/runningdomains", "count.provisioned_domains": "/domainscount", "count.networks": "/networkscount", "state.daemon": "/daemonstate", "state.router": "/routerstate", "state.domain": "/domainstate", "cpu.load": "/cpuload", "vcpu.allocated": "/vcpualloc", "memory.total": "/memtotal", "memory.used": "/memused", "memory.free": "/memfree", "memory.allocated": "/memalloc", "memory.provisioned": "/memprov", "ipmi.hostname": "/ipmihostname", "ipmi.username": "/ipmiusername", "ipmi.password": "/ipmipassword", "sriov": "/sriov", "sriov.pf": "/sriov/pf", "sriov.vf": "/sriov/vf", "monitoring.plugins": "/monitoring_plugins", "monitoring.data": "/monitoring_data", "monitoring.health": "/monitoring_health"}, "monitoring_plugin": {"name": "", "last_run": "/last_run", "health_delta": "/health_delta", "message": "/message", "data": "/data", "runtime": "/runtime"}, "sriov_pf": {"phy": "", "mtu": "/mtu", "vfcount": "/vfcount"}, "sriov_vf": {"phy": "", "pf": "/pf", "mtu": "/mtu", "mac": "/mac", "phy_mac": "/phy_mac", "config": "/config", "config.vlan_id": "/config/vlan_id", "config.vlan_qos": "/config/vlan_qos", "config.tx_rate_min": "/config/tx_rate_min", "config.tx_rate_max": "/config/tx_rate_max", "config.spoof_check": "/config/spoof_check", "config.link_state": "/config/link_state", "config.trust": "/config/trust", "config.query_rss": "/config/query_rss", "pci": "/pci", "pci.domain": "/pci/domain", "pci.bus": "/pci/bus", "pci.slot": "/pci/slot", "pci.function": "/pci/function", "used": "/used", "used_by": "/used_by"}, "domain": {"name": "", "xml": "/xml", "state": "/state", "profile": "/profile", "stats": "/stats", "node": "/node", "last_node": "/lastnode", "failed_reason": "/failedreason", "storage.volumes": "/rbdlist", "console.log": "/consolelog", "console.vnc": "/vnc", "meta.autostart": "/node_autostart", "meta.migrate_method": "/migration_method", "meta.node_selector": "/node_selector", "meta.node_limit": "/node_limit", "meta.tags": "/tags", "migrate.sync_lock": "/migrate_sync_lock"}, "tag": {"name": "", "type": "/type", "protected": "/protected"}, "network": {"vni": "", "type": "/nettype", "mtu": "/mtu", "rule": "/firewall_rules", "rule.in": "/firewall_rules/in", "rule.out": "/firewall_rules/out", "nameservers": "/name_servers", "domain": "/domain", "reservation": "/dhcp4_reservations", "lease": "/dhcp4_leases", "ip4.gateway": "/ip4_gateway", "ip4.network": "/ip4_network", "ip4.dhcp": "/dhcp4_flag", "ip4.dhcp_start": "/dhcp4_start", "ip4.dhcp_end": "/dhcp4_end", "ip6.gateway": "/ip6_gateway", "ip6.network": "/ip6_network", "ip6.dhcp": "/dhcp6_flag"}, "reservation": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname"}, "lease": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname", "expiry": "/expiry", "client_id": "/clientid"}, "rule": {"description": "", "rule": "/rule", "order": "/order"}, "osd": {"id": "", "node": "/node", "device": "/device", "db_device": "/db_device", "fsid": "/fsid", "ofsid": "/fsid/osd", "cfsid": "/fsid/cluster", "lvm": "/lvm", "vg": "/lvm/vg", "lv": "/lvm/lv", "is_split": "/is_split", "stats": "/stats"}, "pool": {"name": "", "pgs": "/pgs", "tier": "/tier", "stats": "/stats"}, "volume": {"name": "", "stats": "/stats"}, "snapshot": {"name": "", "stats": "/stats"}}
|
|
@ -540,7 +540,7 @@ class ZKHandler(object):
|
||||||
#
|
#
|
||||||
class ZKSchema(object):
|
class ZKSchema(object):
|
||||||
# Current version
|
# Current version
|
||||||
_version = 9
|
_version = 10
|
||||||
|
|
||||||
# Root for doing nested keys
|
# Root for doing nested keys
|
||||||
_schema_root = ""
|
_schema_root = ""
|
||||||
|
@ -719,6 +719,7 @@ class ZKSchema(object):
|
||||||
"lvm": "/lvm",
|
"lvm": "/lvm",
|
||||||
"vg": "/lvm/vg",
|
"vg": "/lvm/vg",
|
||||||
"lv": "/lvm/lv",
|
"lv": "/lvm/lv",
|
||||||
|
"is_split": "/is_split",
|
||||||
"stats": "/stats",
|
"stats": "/stats",
|
||||||
},
|
},
|
||||||
# The schema of an individual pool entry (/ceph/pools/{pool_name})
|
# The schema of an individual pool entry (/ceph/pools/{pool_name})
|
||||||
|
@ -963,7 +964,9 @@ class ZKSchema(object):
|
||||||
kpath = f"{elem}.{ikey}"
|
kpath = f"{elem}.{ikey}"
|
||||||
# Validate that the key exists for that child
|
# Validate that the key exists for that child
|
||||||
if not zkhandler.zk_conn.exists(self.path(kpath, child)):
|
if not zkhandler.zk_conn.exists(self.path(kpath, child)):
|
||||||
if elem == "pool" and ikey == "tier":
|
if elem == "osd" and ikey == "is_split":
|
||||||
|
default_data = "False"
|
||||||
|
elif elem == "pool" and ikey == "tier":
|
||||||
default_data = "default"
|
default_data = "default"
|
||||||
else:
|
else:
|
||||||
default_data = ""
|
default_data = ""
|
||||||
|
|
|
@ -26,6 +26,7 @@ import daemon_lib.common as common
|
||||||
|
|
||||||
from distutils.util import strtobool
|
from distutils.util import strtobool
|
||||||
from re import search, match, sub
|
from re import search, match, sub
|
||||||
|
from json import loads as jloads
|
||||||
|
|
||||||
|
|
||||||
def get_detect_device(detect_string):
|
def get_detect_device(detect_string):
|
||||||
|
@ -260,7 +261,15 @@ class CephOSDInstance(object):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def add_osd(
|
def add_osd(
|
||||||
zkhandler, logger, node, device, weight, ext_db_flag=False, ext_db_ratio=0.05
|
zkhandler,
|
||||||
|
logger,
|
||||||
|
node,
|
||||||
|
device,
|
||||||
|
weight,
|
||||||
|
ext_db_flag=False,
|
||||||
|
ext_db_ratio=0.05,
|
||||||
|
split_device=False,
|
||||||
|
split_count=1,
|
||||||
):
|
):
|
||||||
# Handle a detect device if that is passed
|
# Handle a detect device if that is passed
|
||||||
if match(r"detect:", device):
|
if match(r"detect:", device):
|
||||||
|
@ -278,177 +287,185 @@ class CephOSDInstance(object):
|
||||||
)
|
)
|
||||||
device = ddevice
|
device = ddevice
|
||||||
|
|
||||||
# We are ready to create a new OSD on this node
|
if split_device and split_count > 1:
|
||||||
logger.out("Creating new OSD disk on block device {}".format(device), state="i")
|
split_flag = f"--osds-per-device {split_count}"
|
||||||
|
logger.out(
|
||||||
|
f"Creating {split_count} new OSD disks on block device {device}",
|
||||||
|
state="i",
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
split_flag = ""
|
||||||
|
logger.out(f"Creating 1 new OSD disk on block device {device}", state="i")
|
||||||
|
|
||||||
|
if "nvme" in device:
|
||||||
|
class_flag = "--crush-device-class nvme"
|
||||||
|
else:
|
||||||
|
class_flag = "--crush-device-class ssd"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# 1. Create an OSD; we do this so we know what ID will be gen'd
|
# 1. Zap the block device
|
||||||
retcode, stdout, stderr = common.run_os_command("ceph osd create")
|
logger.out(f"Zapping disk {device}", state="i")
|
||||||
if retcode:
|
|
||||||
print("ceph osd create")
|
|
||||||
print(stdout)
|
|
||||||
print(stderr)
|
|
||||||
raise Exception
|
|
||||||
osd_id = stdout.rstrip()
|
|
||||||
|
|
||||||
# 2. Remove that newly-created OSD
|
|
||||||
retcode, stdout, stderr = common.run_os_command(
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
"ceph osd rm {}".format(osd_id)
|
f"ceph-volume lvm zap --destroy {device}"
|
||||||
)
|
)
|
||||||
if retcode:
|
if retcode:
|
||||||
print("ceph osd rm")
|
logger.out("Failed: ceph-volume lvm zap", state="e")
|
||||||
print(stdout)
|
logger.out(stdout, state="d")
|
||||||
print(stderr)
|
logger.out(stderr, state="d")
|
||||||
raise Exception
|
raise Exception
|
||||||
|
|
||||||
# 3a. Zap the disk to ensure it is ready to go
|
# 2. Prepare the OSD(s)
|
||||||
logger.out("Zapping disk {}".format(device), state="i")
|
logger.out(f"Preparing OSD(s) on disk {device}", state="i")
|
||||||
retcode, stdout, stderr = common.run_os_command(
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
"ceph-volume lvm zap --destroy {}".format(device)
|
f"ceph-volume lvm batch --yes --prepare --bluestore {split_flag} {class_flag} {device}"
|
||||||
)
|
)
|
||||||
if retcode:
|
if retcode:
|
||||||
print("ceph-volume lvm zap")
|
logger.out("Failed: ceph-volume lvm batch", state="e")
|
||||||
print(stdout)
|
logger.out(stdout, state="d")
|
||||||
print(stderr)
|
logger.out(stderr, state="d")
|
||||||
raise Exception
|
raise Exception
|
||||||
|
|
||||||
dev_flags = "--data {}".format(device)
|
# 3. Get the list of created OSDs on the device (initial pass)
|
||||||
|
logger.out(f"Querying OSD(s) on disk {device}", state="i")
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
f"ceph-volume lvm list --format json {device}"
|
||||||
|
)
|
||||||
|
if retcode:
|
||||||
|
logger.out("Failed: ceph-volume lvm list", state="e")
|
||||||
|
logger.out(stdout, state="d")
|
||||||
|
logger.out(stderr, state="d")
|
||||||
|
raise Exception
|
||||||
|
|
||||||
# 3b. Prepare the logical volume if ext_db_flag
|
created_osds = jloads(stdout)
|
||||||
|
|
||||||
|
# 4. Prepare the WAL and DB devices
|
||||||
if ext_db_flag:
|
if ext_db_flag:
|
||||||
_, osd_size_bytes, _ = common.run_os_command(
|
for created_osd in created_osds:
|
||||||
"blockdev --getsize64 {}".format(device)
|
# 4a. Get the OSD FSID and ID from the details
|
||||||
|
osd_details = created_osds[created_osd][0]
|
||||||
|
osd_fsid = osd_details["tags"]["ceph.osd_fsid"]
|
||||||
|
osd_id = osd_details["tags"]["ceph.osd_id"]
|
||||||
|
osd_lv = osd_details["lv_path"]
|
||||||
|
|
||||||
|
logger.out(
|
||||||
|
f"Creating Bluestore DB volume for OSD {osd_id}", state="i"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 4b. Prepare the logical volume if ext_db_flag
|
||||||
|
_, osd_size_bytes, _ = common.run_os_command(
|
||||||
|
f"blockdev --getsize64 {osd_lv}"
|
||||||
|
)
|
||||||
|
osd_size_bytes = int(osd_size_bytes)
|
||||||
|
result = CephOSDInstance.create_osd_db_lv(
|
||||||
|
zkhandler, logger, osd_id, ext_db_ratio, osd_size_bytes
|
||||||
|
)
|
||||||
|
if not result:
|
||||||
|
raise Exception
|
||||||
|
db_device = "osd-db/osd-{}".format(osd_id)
|
||||||
|
|
||||||
|
# 4c. Attach the new DB device to the OSD
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
f"ceph-volume lvm new-db --osd-id {osd_id} --osd-fsid {osd_fsid} --target {db_device}"
|
||||||
|
)
|
||||||
|
if retcode:
|
||||||
|
logger.out("Failed: ceph-volume lvm new-db", state="e")
|
||||||
|
logger.out(stdout, state="d")
|
||||||
|
logger.out(stderr, state="d")
|
||||||
|
raise Exception
|
||||||
|
|
||||||
|
# 4d. Get the list of created OSDs on the device (final pass)
|
||||||
|
logger.out(f"(Requerying OSD(s) on disk {device}", state="i")
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
f"ceph-volume lvm list --format json {device}"
|
||||||
)
|
)
|
||||||
osd_size_bytes = int(osd_size_bytes)
|
if retcode:
|
||||||
result = CephOSDInstance.create_osd_db_lv(
|
logger.out("Failed: ceph-volume lvm list", state="e")
|
||||||
zkhandler, logger, osd_id, ext_db_ratio, osd_size_bytes
|
logger.out(stdout, state="d")
|
||||||
)
|
logger.out(stderr, state="d")
|
||||||
if not result:
|
|
||||||
raise Exception
|
raise Exception
|
||||||
db_device = "osd-db/osd-{}".format(osd_id)
|
|
||||||
dev_flags += " --block.db {}".format(db_device)
|
|
||||||
else:
|
|
||||||
db_device = ""
|
|
||||||
|
|
||||||
# 3c. Create the OSD for real
|
created_osds = jloads(stdout)
|
||||||
logger.out(
|
|
||||||
"Preparing LVM for new OSD disk with ID {} on {}".format(
|
# 5. Activate the OSDs
|
||||||
osd_id, device
|
logger.out(f"Activating OSD(s) on disk {device}", state="i")
|
||||||
),
|
for created_osd in created_osds:
|
||||||
state="i",
|
# 5a. Get the OSD FSID and ID from the details
|
||||||
)
|
osd_details = created_osds[created_osd][0]
|
||||||
retcode, stdout, stderr = common.run_os_command(
|
osd_clusterfsid = osd_details["tags"]["ceph.cluster_fsid"]
|
||||||
"ceph-volume lvm prepare --bluestore {devices}".format(
|
osd_fsid = osd_details["tags"]["ceph.osd_fsid"]
|
||||||
devices=dev_flags
|
osd_id = osd_details["tags"]["ceph.osd_id"]
|
||||||
|
db_device = osd_details["tags"].get("ceph.db_device", None)
|
||||||
|
osd_vg = osd_details["vg_name"]
|
||||||
|
osd_lv = osd_details["lv_name"]
|
||||||
|
|
||||||
|
# 5b. Activate the OSD
|
||||||
|
logger.out(f"Activating OSD {osd_id}", state="i")
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
f"ceph-volume lvm activate --bluestore {osd_id} {osd_fsid}"
|
||||||
)
|
)
|
||||||
)
|
if retcode:
|
||||||
if retcode:
|
logger.out("Failed: ceph-volume lvm activate", state="e")
|
||||||
print("ceph-volume lvm prepare")
|
logger.out(stdout, state="d")
|
||||||
print(stdout)
|
logger.out(stderr, state="d")
|
||||||
print(stderr)
|
raise Exception
|
||||||
raise Exception
|
|
||||||
|
|
||||||
# 4a. Get OSD information
|
# 5c. Add it to the crush map
|
||||||
logger.out(
|
logger.out(f"Adding OSD {osd_id} to CRUSH map", state="i")
|
||||||
"Getting OSD information for ID {} on {}".format(osd_id, device),
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
state="i",
|
f"ceph osd crush add osd.{osd_id} {weight} root=default host={node}"
|
||||||
)
|
|
||||||
retcode, stdout, stderr = common.run_os_command(
|
|
||||||
"ceph-volume lvm list {device}".format(device=device)
|
|
||||||
)
|
|
||||||
for line in stdout.split("\n"):
|
|
||||||
if "block device" in line:
|
|
||||||
osd_blockdev = line.split()[-1]
|
|
||||||
if "osd fsid" in line:
|
|
||||||
osd_fsid = line.split()[-1]
|
|
||||||
if "cluster fsid" in line:
|
|
||||||
osd_clusterfsid = line.split()[-1]
|
|
||||||
if "devices" in line:
|
|
||||||
osd_device = line.split()[-1]
|
|
||||||
|
|
||||||
if not osd_fsid:
|
|
||||||
print("ceph-volume lvm list")
|
|
||||||
print("Could not find OSD information in data:")
|
|
||||||
print(stdout)
|
|
||||||
print(stderr)
|
|
||||||
raise Exception
|
|
||||||
|
|
||||||
# Split OSD blockdev into VG and LV components
|
|
||||||
# osd_blockdev = /dev/ceph-<uuid>/osd-block-<uuid>
|
|
||||||
_, _, osd_vg, osd_lv = osd_blockdev.split("/")
|
|
||||||
|
|
||||||
# Reset whatever we were given to Ceph's /dev/xdX naming
|
|
||||||
if device != osd_device:
|
|
||||||
device = osd_device
|
|
||||||
|
|
||||||
# 4b. Activate the OSD
|
|
||||||
logger.out("Activating new OSD disk with ID {}".format(osd_id), state="i")
|
|
||||||
retcode, stdout, stderr = common.run_os_command(
|
|
||||||
"ceph-volume lvm activate --bluestore {osdid} {osdfsid}".format(
|
|
||||||
osdid=osd_id, osdfsid=osd_fsid
|
|
||||||
)
|
)
|
||||||
)
|
if retcode:
|
||||||
if retcode:
|
logger.out("Failed: ceph osd crush add", state="e")
|
||||||
print("ceph-volume lvm activate")
|
logger.out(stdout, state="d")
|
||||||
print(stdout)
|
logger.out(stderr, state="d")
|
||||||
print(stderr)
|
raise Exception
|
||||||
raise Exception
|
|
||||||
|
|
||||||
# 5. Add it to the crush map
|
# 5d. Wait half a second for it to activate
|
||||||
logger.out(
|
time.sleep(0.5)
|
||||||
"Adding new OSD disk with ID {} to CRUSH map".format(osd_id), state="i"
|
|
||||||
)
|
# 5e. Verify it started
|
||||||
retcode, stdout, stderr = common.run_os_command(
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
"ceph osd crush add osd.{osdid} {weight} root=default host={node}".format(
|
"systemctl status ceph-osd@{osdid}".format(osdid=osd_id)
|
||||||
osdid=osd_id, weight=weight, node=node
|
|
||||||
)
|
)
|
||||||
)
|
if retcode:
|
||||||
if retcode:
|
logger.out(f"Failed: OSD {osd_id} unit is not active", state="e")
|
||||||
print("ceph osd crush add")
|
logger.out(stdout, state="d")
|
||||||
print(stdout)
|
logger.out(stderr, state="d")
|
||||||
print(stderr)
|
raise Exception
|
||||||
raise Exception
|
|
||||||
|
|
||||||
time.sleep(0.5)
|
# 5f. Add the new OSD to PVC
|
||||||
|
logger.out(f"Adding OSD {osd_id} to PVC", state="i")
|
||||||
|
zkhandler.write(
|
||||||
|
[
|
||||||
|
(("osd", osd_id), ""),
|
||||||
|
(("osd.node", osd_id), node),
|
||||||
|
(("osd.device", osd_id), device),
|
||||||
|
(("osd.db_device", osd_id), db_device),
|
||||||
|
(("osd.fsid", osd_id), osd_fsid),
|
||||||
|
(("osd.ofsid", osd_id), osd_fsid),
|
||||||
|
(("osd.cfsid", osd_id), osd_clusterfsid),
|
||||||
|
(("osd.lvm", osd_id), ""),
|
||||||
|
(("osd.vg", osd_id), osd_vg),
|
||||||
|
(("osd.lv", osd_id), osd_lv),
|
||||||
|
(("osd.is_split", osd_id), split_flag),
|
||||||
|
(
|
||||||
|
("osd.stats", osd_id),
|
||||||
|
'{"uuid": "|", "up": 0, "in": 0, "primary_affinity": "|", "utilization": "|", "var": "|", "pgs": "|", "kb": "|", "weight": "|", "reweight": "|", "node": "|", "used": "|", "avail": "|", "wr_ops": "|", "wr_data": "|", "rd_ops": "|", "rd_data": "|", "state": "|"}',
|
||||||
|
),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
# 6. Verify it started
|
# 6. Log it
|
||||||
retcode, stdout, stderr = common.run_os_command(
|
|
||||||
"systemctl status ceph-osd@{osdid}".format(osdid=osd_id)
|
|
||||||
)
|
|
||||||
if retcode:
|
|
||||||
print("systemctl status")
|
|
||||||
print(stdout)
|
|
||||||
print(stderr)
|
|
||||||
raise Exception
|
|
||||||
|
|
||||||
# 7. Add the new OSD to the list
|
|
||||||
logger.out(
|
logger.out(
|
||||||
"Adding new OSD disk with ID {} to Zookeeper".format(osd_id), state="i"
|
f"Successfully created {split_count} new OSD(s) {','.join(created_osds.keys())} on disk {device}",
|
||||||
|
state="o",
|
||||||
)
|
)
|
||||||
zkhandler.write(
|
|
||||||
[
|
|
||||||
(("osd", osd_id), ""),
|
|
||||||
(("osd.node", osd_id), node),
|
|
||||||
(("osd.device", osd_id), device),
|
|
||||||
(("osd.db_device", osd_id), db_device),
|
|
||||||
(("osd.fsid", osd_id), ""),
|
|
||||||
(("osd.ofsid", osd_id), osd_fsid),
|
|
||||||
(("osd.cfsid", osd_id), osd_clusterfsid),
|
|
||||||
(("osd.lvm", osd_id), ""),
|
|
||||||
(("osd.vg", osd_id), osd_vg),
|
|
||||||
(("osd.lv", osd_id), osd_lv),
|
|
||||||
(
|
|
||||||
("osd.stats", osd_id),
|
|
||||||
'{"uuid": "|", "up": 0, "in": 0, "primary_affinity": "|", "utilization": "|", "var": "|", "pgs": "|", "kb": "|", "weight": "|", "reweight": "|", "node": "|", "used": "|", "avail": "|", "wr_ops": "|", "wr_data": "|", "rd_ops": "|", "rd_data": "|", "state": "|"}',
|
|
||||||
),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
# Log it
|
|
||||||
logger.out("Created new OSD disk with ID {}".format(osd_id), state="o")
|
|
||||||
return True
|
return True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Log it
|
logger.out(
|
||||||
logger.out("Failed to create new OSD disk: {}".format(e), state="e")
|
f"Failed to create {split_count} new OSD(s) on disk {device}: {e}",
|
||||||
|
state="e",
|
||||||
|
)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -828,7 +845,7 @@ class CephOSDInstance(object):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def remove_osd(zkhandler, logger, osd_id, osd_obj, force_flag):
|
def remove_osd(zkhandler, logger, osd_id, osd_obj, force_flag):
|
||||||
logger.out("Removing OSD disk {}".format(osd_id), state="i")
|
logger.out("Removing OSD {}".format(osd_id), state="i")
|
||||||
try:
|
try:
|
||||||
# Verify the OSD is present
|
# Verify the OSD is present
|
||||||
retcode, stdout, stderr = common.run_os_command("ceph osd ls")
|
retcode, stdout, stderr = common.run_os_command("ceph osd ls")
|
||||||
|
@ -843,7 +860,7 @@ class CephOSDInstance(object):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# 1. Set the OSD down and out so it will flush
|
# 1. Set the OSD down and out so it will flush
|
||||||
logger.out("Setting down OSD disk with ID {}".format(osd_id), state="i")
|
logger.out("Setting down OSD {}".format(osd_id), state="i")
|
||||||
retcode, stdout, stderr = common.run_os_command(
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
"ceph osd down {}".format(osd_id)
|
"ceph osd down {}".format(osd_id)
|
||||||
)
|
)
|
||||||
|
@ -856,7 +873,7 @@ class CephOSDInstance(object):
|
||||||
else:
|
else:
|
||||||
raise Exception
|
raise Exception
|
||||||
|
|
||||||
logger.out("Setting out OSD disk with ID {}".format(osd_id), state="i")
|
logger.out("Setting out OSD {}".format(osd_id), state="i")
|
||||||
retcode, stdout, stderr = common.run_os_command(
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
"ceph osd out {}".format(osd_id)
|
"ceph osd out {}".format(osd_id)
|
||||||
)
|
)
|
||||||
|
@ -881,7 +898,7 @@ class CephOSDInstance(object):
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
|
|
||||||
# 3. Stop the OSD process and wait for it to be terminated
|
# 3. Stop the OSD process and wait for it to be terminated
|
||||||
logger.out("Stopping OSD disk with ID {}".format(osd_id), state="i")
|
logger.out("Stopping OSD {}".format(osd_id), state="i")
|
||||||
retcode, stdout, stderr = common.run_os_command(
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
"systemctl stop ceph-osd@{}".format(osd_id)
|
"systemctl stop ceph-osd@{}".format(osd_id)
|
||||||
)
|
)
|
||||||
|
@ -922,25 +939,8 @@ class CephOSDInstance(object):
|
||||||
else:
|
else:
|
||||||
raise Exception
|
raise Exception
|
||||||
|
|
||||||
# 5. Zap the volumes
|
# 5. Purge the OSD from Ceph
|
||||||
logger.out(
|
logger.out("Purging OSD {}".format(osd_id), state="i")
|
||||||
"Zapping OSD {} disk on {}".format(osd_id, osd_device),
|
|
||||||
state="i",
|
|
||||||
)
|
|
||||||
retcode, stdout, stderr = common.run_os_command(
|
|
||||||
"ceph-volume lvm zap --destroy {}".format(osd_device)
|
|
||||||
)
|
|
||||||
if retcode:
|
|
||||||
print("ceph-volume lvm zap")
|
|
||||||
print(stdout)
|
|
||||||
print(stderr)
|
|
||||||
if force_flag:
|
|
||||||
logger.out("Ignoring error due to force flag", state="i")
|
|
||||||
else:
|
|
||||||
raise Exception
|
|
||||||
|
|
||||||
# 6. Purge the OSD from Ceph
|
|
||||||
logger.out("Purging OSD disk with ID {}".format(osd_id), state="i")
|
|
||||||
retcode, stdout, stderr = common.run_os_command(
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
"ceph osd purge {} --yes-i-really-mean-it".format(osd_id)
|
"ceph osd purge {} --yes-i-really-mean-it".format(osd_id)
|
||||||
)
|
)
|
||||||
|
@ -964,19 +964,15 @@ class CephOSDInstance(object):
|
||||||
)
|
)
|
||||||
|
|
||||||
# 8. Delete OSD from ZK
|
# 8. Delete OSD from ZK
|
||||||
logger.out(
|
logger.out("Deleting OSD {} from Zookeeper".format(osd_id), state="i")
|
||||||
"Deleting OSD disk with ID {} from Zookeeper".format(osd_id), state="i"
|
|
||||||
)
|
|
||||||
zkhandler.delete(("osd", osd_id), recursive=True)
|
zkhandler.delete(("osd", osd_id), recursive=True)
|
||||||
|
|
||||||
# Log it
|
# Log it
|
||||||
logger.out("Removed OSD disk with ID {}".format(osd_id), state="o")
|
logger.out("Successfully removed OSD {}".format(osd_id), state="o")
|
||||||
return True
|
return True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Log it
|
# Log it
|
||||||
logger.out(
|
logger.out("Failed to remove OSD {}: {}".format(osd_id, e), state="e")
|
||||||
"Failed to purge OSD disk with ID {}: {}".format(osd_id, e), state="e"
|
|
||||||
)
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -1245,16 +1241,34 @@ def ceph_command(zkhandler, logger, this_node, data, d_osd):
|
||||||
|
|
||||||
# Adding a new OSD
|
# Adding a new OSD
|
||||||
if command == "osd_add":
|
if command == "osd_add":
|
||||||
node, device, weight, ext_db_flag, ext_db_ratio = args.split(",")
|
(
|
||||||
|
node,
|
||||||
|
device,
|
||||||
|
weight,
|
||||||
|
ext_db_flag,
|
||||||
|
ext_db_ratio,
|
||||||
|
split_flag,
|
||||||
|
split_count,
|
||||||
|
) = args.split(",")
|
||||||
ext_db_flag = bool(strtobool(ext_db_flag))
|
ext_db_flag = bool(strtobool(ext_db_flag))
|
||||||
ext_db_ratio = float(ext_db_ratio)
|
ext_db_ratio = float(ext_db_ratio)
|
||||||
|
split_flag = bool(strtobool(split_flag))
|
||||||
|
split_count = int(split_count)
|
||||||
if node == this_node.name:
|
if node == this_node.name:
|
||||||
# Lock the command queue
|
# Lock the command queue
|
||||||
zk_lock = zkhandler.writelock("base.cmd.ceph")
|
zk_lock = zkhandler.writelock("base.cmd.ceph")
|
||||||
with zk_lock:
|
with zk_lock:
|
||||||
# Add the OSD
|
# Add the OSD
|
||||||
result = CephOSDInstance.add_osd(
|
result = CephOSDInstance.add_osd(
|
||||||
zkhandler, logger, node, device, weight, ext_db_flag, ext_db_ratio
|
zkhandler,
|
||||||
|
logger,
|
||||||
|
node,
|
||||||
|
device,
|
||||||
|
weight,
|
||||||
|
ext_db_flag,
|
||||||
|
ext_db_ratio,
|
||||||
|
split_flag,
|
||||||
|
split_count,
|
||||||
)
|
)
|
||||||
# Command succeeded
|
# Command succeeded
|
||||||
if result:
|
if result:
|
||||||
|
|
Loading…
Reference in New Issue