Add support for replacing/refreshing OSDs
Adds commands to both replace an OSD disk, and refresh (reimport) an existing OSD disk on a new node. This handles the cases where an OSD disk should be replaced (either due to upgrades or failures) or where a node is rebuilt in-place and an existing OSD must be re-imported to it. This should avoid the need to do a full remove/add sequence for either case. Also cleans up some aspects of OSD removal that are identical between methods (e.g. using safe-to-destroy and sleeping after stopping) and fixes a bug if an OSD does not truly exist when the daemon starts up.
This commit is contained in:
parent
8027a6efdc
commit
7a40c7a55b
|
@ -4097,6 +4097,102 @@ class API_Storage_Ceph_OSD_Element(Resource):
|
||||||
"""
|
"""
|
||||||
return api_helper.ceph_osd_list(osdid)
|
return api_helper.ceph_osd_list(osdid)
|
||||||
|
|
||||||
|
@RequestParser(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"name": "device",
|
||||||
|
"required": True,
|
||||||
|
"helptext": "A valid device or detect string must be specified.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "weight",
|
||||||
|
"required": True,
|
||||||
|
"helptext": "An OSD weight must be specified.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "yes-i-really-mean-it",
|
||||||
|
"required": True,
|
||||||
|
"helptext": "Please confirm that 'yes-i-really-mean-it'.",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
)
|
||||||
|
@Authenticator
|
||||||
|
def post(self, osdid, reqargs):
|
||||||
|
"""
|
||||||
|
Replace a Ceph OSD in the cluster
|
||||||
|
Note: This task may take up to 30s to complete and return
|
||||||
|
---
|
||||||
|
tags:
|
||||||
|
- storage / ceph
|
||||||
|
parameters:
|
||||||
|
- in: query
|
||||||
|
name: device
|
||||||
|
type: string
|
||||||
|
required: true
|
||||||
|
description: The block device (e.g. "/dev/sdb", "/dev/disk/by-path/...", etc.) or detect string ("detect:NAME:SIZE:ID") to replace the OSD onto
|
||||||
|
- in: query
|
||||||
|
name: weight
|
||||||
|
type: number
|
||||||
|
required: true
|
||||||
|
description: The Ceph CRUSH weight for the replaced OSD
|
||||||
|
responses:
|
||||||
|
200:
|
||||||
|
description: OK
|
||||||
|
schema:
|
||||||
|
type: object
|
||||||
|
id: Message
|
||||||
|
400:
|
||||||
|
description: Bad request
|
||||||
|
schema:
|
||||||
|
type: object
|
||||||
|
id: Message
|
||||||
|
"""
|
||||||
|
return api_helper.ceph_osd_replace(
|
||||||
|
osdid,
|
||||||
|
reqargs.get("device", None),
|
||||||
|
reqargs.get("weight", None),
|
||||||
|
)
|
||||||
|
|
||||||
|
@RequestParser(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"name": "device",
|
||||||
|
"required": True,
|
||||||
|
"helptext": "A valid device or detect string must be specified.",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
)
|
||||||
|
@Authenticator
|
||||||
|
def put(self, osdid, reqargs):
|
||||||
|
"""
|
||||||
|
Refresh (reimport) a Ceph OSD in the cluster
|
||||||
|
Note: This task may take up to 30s to complete and return
|
||||||
|
---
|
||||||
|
tags:
|
||||||
|
- storage / ceph
|
||||||
|
parameters:
|
||||||
|
- in: query
|
||||||
|
name: device
|
||||||
|
type: string
|
||||||
|
required: true
|
||||||
|
description: The block device (e.g. "/dev/sdb", "/dev/disk/by-path/...", etc.) or detect string ("detect:NAME:SIZE:ID") that the OSD should be using
|
||||||
|
responses:
|
||||||
|
200:
|
||||||
|
description: OK
|
||||||
|
schema:
|
||||||
|
type: object
|
||||||
|
id: Message
|
||||||
|
400:
|
||||||
|
description: Bad request
|
||||||
|
schema:
|
||||||
|
type: object
|
||||||
|
id: Message
|
||||||
|
"""
|
||||||
|
return api_helper.ceph_osd_refresh(
|
||||||
|
osdid,
|
||||||
|
reqargs.get("device", None),
|
||||||
|
)
|
||||||
|
|
||||||
@RequestParser(
|
@RequestParser(
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
|
|
|
@ -1301,6 +1301,38 @@ def ceph_osd_add(zkhandler, node, device, weight, ext_db_flag=False, ext_db_rati
|
||||||
return output, retcode
|
return output, retcode
|
||||||
|
|
||||||
|
|
||||||
|
@ZKConnection(config)
|
||||||
|
def ceph_osd_replace(zkhandler, osd_id, device, weight):
|
||||||
|
"""
|
||||||
|
Replace a Ceph OSD in the PVC Ceph storage cluster.
|
||||||
|
"""
|
||||||
|
retflag, retdata = pvc_ceph.replace_osd(zkhandler, osd_id, device, weight)
|
||||||
|
|
||||||
|
if retflag:
|
||||||
|
retcode = 200
|
||||||
|
else:
|
||||||
|
retcode = 400
|
||||||
|
|
||||||
|
output = {"message": retdata.replace('"', "'")}
|
||||||
|
return output, retcode
|
||||||
|
|
||||||
|
|
||||||
|
@ZKConnection(config)
|
||||||
|
def ceph_osd_refresh(zkhandler, osd_id, device):
|
||||||
|
"""
|
||||||
|
Refresh (reimport) a Ceph OSD in the PVC Ceph storage cluster.
|
||||||
|
"""
|
||||||
|
retflag, retdata = pvc_ceph.refresh_osd(zkhandler, osd_id, device)
|
||||||
|
|
||||||
|
if retflag:
|
||||||
|
retcode = 200
|
||||||
|
else:
|
||||||
|
retcode = 400
|
||||||
|
|
||||||
|
output = {"message": retdata.replace('"', "'")}
|
||||||
|
return output, retcode
|
||||||
|
|
||||||
|
|
||||||
@ZKConnection(config)
|
@ZKConnection(config)
|
||||||
def ceph_osd_remove(zkhandler, osd_id, force_flag):
|
def ceph_osd_remove(zkhandler, osd_id, force_flag):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -255,6 +255,46 @@ def ceph_osd_add(config, node, device, weight, ext_db_flag, ext_db_ratio):
|
||||||
return retstatus, response.json().get("message", "")
|
return retstatus, response.json().get("message", "")
|
||||||
|
|
||||||
|
|
||||||
|
def ceph_osd_replace(config, osdid, device, weight):
|
||||||
|
"""
|
||||||
|
Replace an existing Ceph OSD with a new device
|
||||||
|
|
||||||
|
API endpoint: POST /api/v1/storage/ceph/osd/{osdid}
|
||||||
|
API arguments: device={device}, weight={weight}
|
||||||
|
API schema: {"message":"{data}"}
|
||||||
|
"""
|
||||||
|
params = {"device": device, "weight": weight, "yes-i-really-mean-it": "yes"}
|
||||||
|
response = call_api(config, "post", f"/storage/ceph/osd/{osdid}", params=params)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
retstatus = True
|
||||||
|
else:
|
||||||
|
retstatus = False
|
||||||
|
|
||||||
|
return retstatus, response.json().get("message", "")
|
||||||
|
|
||||||
|
|
||||||
|
def ceph_osd_refresh(config, osdid, device):
|
||||||
|
"""
|
||||||
|
Refresh (reimport) an existing Ceph OSD with device {device}
|
||||||
|
|
||||||
|
API endpoint: PUT /api/v1/storage/ceph/osd/{osdid}
|
||||||
|
API arguments: device={device}
|
||||||
|
API schema: {"message":"{data}"}
|
||||||
|
"""
|
||||||
|
params = {
|
||||||
|
"device": device,
|
||||||
|
}
|
||||||
|
response = call_api(config, "put", f"/storage/ceph/osd/{osdid}", params=params)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
retstatus = True
|
||||||
|
else:
|
||||||
|
retstatus = False
|
||||||
|
|
||||||
|
return retstatus, response.json().get("message", "")
|
||||||
|
|
||||||
|
|
||||||
def ceph_osd_remove(config, osdid, force_flag):
|
def ceph_osd_remove(config, osdid, force_flag):
|
||||||
"""
|
"""
|
||||||
Remove Ceph OSD
|
Remove Ceph OSD
|
||||||
|
|
|
@ -3371,6 +3371,74 @@ def ceph_osd_add(node, device, weight, ext_db_flag, ext_db_ratio, confirm_flag):
|
||||||
cleanup(retcode, retmsg)
|
cleanup(retcode, retmsg)
|
||||||
|
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
# pvc storage osd replace
|
||||||
|
###############################################################################
|
||||||
|
@click.command(name="replace", short_help="Replace OSD block device.")
|
||||||
|
@click.argument("osdid")
|
||||||
|
@click.argument("device")
|
||||||
|
@click.option(
|
||||||
|
"-w",
|
||||||
|
"--weight",
|
||||||
|
"weight",
|
||||||
|
default=1.0,
|
||||||
|
show_default=True,
|
||||||
|
help="New weight of the OSD within the CRUSH map.",
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"-y",
|
||||||
|
"--yes",
|
||||||
|
"confirm_flag",
|
||||||
|
is_flag=True,
|
||||||
|
default=False,
|
||||||
|
help="Confirm the removal",
|
||||||
|
)
|
||||||
|
@cluster_req
|
||||||
|
def ceph_osd_replace(osdid, device, weight, confirm_flag):
|
||||||
|
"""
|
||||||
|
Replace the block device of an existing OSD with ID OSDID with DEVICE. Use this command to replace a failed or smaller OSD block device with a new one.
|
||||||
|
|
||||||
|
DEVICE must be a valid raw block device (e.g. '/dev/sda', '/dev/nvme0n1', '/dev/disk/by-path/...', '/dev/disk/by-id/...') or a "detect" string. Using partitions is not supported. A "detect" string is a string in the form "detect:<NAME>:<HUMAN-SIZE>:<ID>". For details, see 'pvc storage osd add --help'.
|
||||||
|
|
||||||
|
The weight of an OSD should reflect the ratio of the OSD to other OSDs in the storage cluster. For details, see 'pvc storage osd add --help'. Note that the current weight must be explicitly specified if it differs from the default.
|
||||||
|
|
||||||
|
Existing IDs, external DB devices, etc. of the OSD will be preserved; data will be lost and rebuilt from the remaining healthy OSDs.
|
||||||
|
"""
|
||||||
|
if not confirm_flag and not config["unsafe"]:
|
||||||
|
try:
|
||||||
|
click.confirm(
|
||||||
|
"Replace OSD {} with block device {}".format(osdid, device),
|
||||||
|
prompt_suffix="? ",
|
||||||
|
abort=True,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
exit(0)
|
||||||
|
|
||||||
|
retcode, retmsg = pvc_ceph.ceph_osd_replace(config, osdid, device, weight)
|
||||||
|
cleanup(retcode, retmsg)
|
||||||
|
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
# pvc storage osd refresh
|
||||||
|
###############################################################################
|
||||||
|
@click.command(name="refresh", short_help="Refresh (reimport) OSD device.")
|
||||||
|
@click.argument("osdid")
|
||||||
|
@click.argument("device")
|
||||||
|
@cluster_req
|
||||||
|
def ceph_osd_refresh(osdid, device):
|
||||||
|
"""
|
||||||
|
Refresh (reimport) the block DEVICE of an existing OSD with ID OSDID. Use this command to reimport a working OSD into a rebuilt/replaced node.
|
||||||
|
|
||||||
|
DEVICE must be a valid raw block device (e.g. '/dev/sda', '/dev/nvme0n1', '/dev/disk/by-path/...', '/dev/disk/by-id/...') or a "detect" string. Using partitions is not supported. A "detect" string is a string in the form "detect:<NAME>:<HUMAN-SIZE>:<ID>". For details, see 'pvc storage osd add --help'.
|
||||||
|
|
||||||
|
Existing data, IDs, weights, etc. of the OSD will be preserved.
|
||||||
|
|
||||||
|
NOTE: If a device had an external DB device, this is not automatically handled at this time. It is best to remove and re-add the OSD instead.
|
||||||
|
"""
|
||||||
|
retcode, retmsg = pvc_ceph.ceph_osd_refresh(config, osdid, device)
|
||||||
|
cleanup(retcode, retmsg)
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
# pvc storage osd remove
|
# pvc storage osd remove
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
@ -5872,6 +5940,8 @@ ceph_benchmark.add_command(ceph_benchmark_list)
|
||||||
|
|
||||||
ceph_osd.add_command(ceph_osd_create_db_vg)
|
ceph_osd.add_command(ceph_osd_create_db_vg)
|
||||||
ceph_osd.add_command(ceph_osd_add)
|
ceph_osd.add_command(ceph_osd_add)
|
||||||
|
ceph_osd.add_command(ceph_osd_replace)
|
||||||
|
ceph_osd.add_command(ceph_osd_refresh)
|
||||||
ceph_osd.add_command(ceph_osd_remove)
|
ceph_osd.add_command(ceph_osd_remove)
|
||||||
ceph_osd.add_command(ceph_osd_in)
|
ceph_osd.add_command(ceph_osd_in)
|
||||||
ceph_osd.add_command(ceph_osd_out)
|
ceph_osd.add_command(ceph_osd_out)
|
||||||
|
|
|
@ -236,7 +236,7 @@ def add_osd_db_vg(zkhandler, node, device):
|
||||||
return success, message
|
return success, message
|
||||||
|
|
||||||
|
|
||||||
# OSD addition and removal uses the /cmd/ceph pipe
|
# OSD actions use the /cmd/ceph pipe
|
||||||
# These actions must occur on the specific node they reference
|
# These actions must occur on the specific node they reference
|
||||||
def add_osd(zkhandler, node, device, weight, ext_db_flag=False, ext_db_ratio=0.05):
|
def add_osd(zkhandler, node, device, weight, ext_db_flag=False, ext_db_ratio=0.05):
|
||||||
# Verify the target node exists
|
# Verify the target node exists
|
||||||
|
@ -288,6 +288,103 @@ def add_osd(zkhandler, node, device, weight, ext_db_flag=False, ext_db_ratio=0.0
|
||||||
return success, message
|
return success, message
|
||||||
|
|
||||||
|
|
||||||
|
def replace_osd(zkhandler, osd_id, new_device, weight):
|
||||||
|
# Get current OSD information
|
||||||
|
osd_information = getOSDInformation(zkhandler, osd_id)
|
||||||
|
node = osd_information["node"]
|
||||||
|
old_device = osd_information["device"]
|
||||||
|
ext_db_flag = True if osd_information["db_device"] else False
|
||||||
|
|
||||||
|
# Verify target block device isn't in use
|
||||||
|
block_osd = verifyOSDBlock(zkhandler, node, new_device)
|
||||||
|
if block_osd and block_osd != osd_id:
|
||||||
|
return (
|
||||||
|
False,
|
||||||
|
'ERROR: Block device "{}" on node "{}" is used by OSD "{}"'.format(
|
||||||
|
new_device, node, block_osd
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Tell the cluster to create a new OSD for the host
|
||||||
|
replace_osd_string = "osd_replace {},{},{},{},{},{}".format(
|
||||||
|
node, osd_id, old_device, new_device, weight, ext_db_flag
|
||||||
|
)
|
||||||
|
zkhandler.write([("base.cmd.ceph", replace_osd_string)])
|
||||||
|
# Wait 1/2 second for the cluster to get the message and start working
|
||||||
|
time.sleep(0.5)
|
||||||
|
# Acquire a read lock, so we get the return exclusively
|
||||||
|
with zkhandler.readlock("base.cmd.ceph"):
|
||||||
|
try:
|
||||||
|
result = zkhandler.read("base.cmd.ceph").split()[0]
|
||||||
|
if result == "success-osd_replace":
|
||||||
|
message = 'Replaced OSD {} with block device "{}" on node "{}".'.format(
|
||||||
|
osd_id, new_device, node
|
||||||
|
)
|
||||||
|
success = True
|
||||||
|
else:
|
||||||
|
message = "ERROR: Failed to replace OSD; check node logs for details."
|
||||||
|
success = False
|
||||||
|
except Exception:
|
||||||
|
message = "ERROR: Command ignored by node."
|
||||||
|
success = False
|
||||||
|
|
||||||
|
# Acquire a write lock to ensure things go smoothly
|
||||||
|
with zkhandler.writelock("base.cmd.ceph"):
|
||||||
|
time.sleep(0.5)
|
||||||
|
zkhandler.write([("base.cmd.ceph", "")])
|
||||||
|
|
||||||
|
return success, message
|
||||||
|
|
||||||
|
|
||||||
|
def refresh_osd(zkhandler, osd_id, device):
|
||||||
|
# Get current OSD information
|
||||||
|
osd_information = getOSDInformation(zkhandler, osd_id)
|
||||||
|
node = osd_information["node"]
|
||||||
|
ext_db_flag = True if osd_information["db_device"] else False
|
||||||
|
|
||||||
|
# Verify target block device isn't in use
|
||||||
|
block_osd = verifyOSDBlock(zkhandler, node, device)
|
||||||
|
if not block_osd or block_osd != osd_id:
|
||||||
|
return (
|
||||||
|
False,
|
||||||
|
'ERROR: Block device "{}" on node "{}" is not used by OSD "{}"; use replace instead'.format(
|
||||||
|
device, node, osd_id
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Tell the cluster to create a new OSD for the host
|
||||||
|
refresh_osd_string = "osd_refresh {},{},{},{}".format(
|
||||||
|
node, osd_id, device, ext_db_flag
|
||||||
|
)
|
||||||
|
zkhandler.write([("base.cmd.ceph", refresh_osd_string)])
|
||||||
|
# Wait 1/2 second for the cluster to get the message and start working
|
||||||
|
time.sleep(0.5)
|
||||||
|
# Acquire a read lock, so we get the return exclusively
|
||||||
|
with zkhandler.readlock("base.cmd.ceph"):
|
||||||
|
try:
|
||||||
|
result = zkhandler.read("base.cmd.ceph").split()[0]
|
||||||
|
if result == "success-osd_refresh":
|
||||||
|
message = (
|
||||||
|
'Refreshed OSD {} with block device "{}" on node "{}".'.format(
|
||||||
|
osd_id, device, node
|
||||||
|
)
|
||||||
|
)
|
||||||
|
success = True
|
||||||
|
else:
|
||||||
|
message = "ERROR: Failed to refresh OSD; check node logs for details."
|
||||||
|
success = False
|
||||||
|
except Exception:
|
||||||
|
message = "ERROR: Command ignored by node."
|
||||||
|
success = False
|
||||||
|
|
||||||
|
# Acquire a write lock to ensure things go smoothly
|
||||||
|
with zkhandler.writelock("base.cmd.ceph"):
|
||||||
|
time.sleep(0.5)
|
||||||
|
zkhandler.write([("base.cmd.ceph", "")])
|
||||||
|
|
||||||
|
return success, message
|
||||||
|
|
||||||
|
|
||||||
def remove_osd(zkhandler, osd_id, force_flag):
|
def remove_osd(zkhandler, osd_id, force_flag):
|
||||||
if not verifyOSD(zkhandler, osd_id):
|
if not verifyOSD(zkhandler, osd_id):
|
||||||
return False, 'ERROR: No OSD with ID "{}" is present in the cluster.'.format(
|
return False, 'ERROR: No OSD with ID "{}" is present in the cluster.'.format(
|
||||||
|
|
|
@ -5094,6 +5094,13 @@
|
||||||
"delete": {
|
"delete": {
|
||||||
"description": "Note: This task may take up to 30s to complete and return<br/>Warning: This operation may have unintended consequences for the storage cluster; ensure the cluster can support removing the OSD before proceeding",
|
"description": "Note: This task may take up to 30s to complete and return<br/>Warning: This operation may have unintended consequences for the storage cluster; ensure the cluster can support removing the OSD before proceeding",
|
||||||
"parameters": [
|
"parameters": [
|
||||||
|
{
|
||||||
|
"description": "Force removal even if some step(s) fail",
|
||||||
|
"in": "query",
|
||||||
|
"name": "force",
|
||||||
|
"required": "flase",
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"description": "A confirmation string to ensure that the API consumer really means it",
|
"description": "A confirmation string to ensure that the API consumer really means it",
|
||||||
"in": "query",
|
"in": "query",
|
||||||
|
@ -5141,6 +5148,73 @@
|
||||||
"tags": [
|
"tags": [
|
||||||
"storage / ceph"
|
"storage / ceph"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
"post": {
|
||||||
|
"description": "Note: This task may take up to 30s to complete and return",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"description": "The block device (e.g. \"/dev/sdb\", \"/dev/disk/by-path/...\", etc.) or detect string (\"detect:NAME:SIZE:ID\") to replace the OSD onto",
|
||||||
|
"in": "query",
|
||||||
|
"name": "device",
|
||||||
|
"required": true,
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "The Ceph CRUSH weight for the replaced OSD",
|
||||||
|
"in": "query",
|
||||||
|
"name": "weight",
|
||||||
|
"required": true,
|
||||||
|
"type": "number"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "OK",
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/Message"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"description": "Bad request",
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/Message"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"summary": "Replace a Ceph OSD in the cluster",
|
||||||
|
"tags": [
|
||||||
|
"storage / ceph"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"put": {
|
||||||
|
"description": "Note: This task may take up to 30s to complete and return",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"description": "The block device (e.g. \"/dev/sdb\", \"/dev/disk/by-path/...\", etc.) or detect string (\"detect:NAME:SIZE:ID\") that the OSD should be using",
|
||||||
|
"in": "query",
|
||||||
|
"name": "device",
|
||||||
|
"required": true,
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "OK",
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/Message"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"description": "Bad request",
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/definitions/Message"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"summary": "Refresh (reimport) a Ceph OSD in the cluster",
|
||||||
|
"tags": [
|
||||||
|
"storage / ceph"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"/api/v1/storage/ceph/osd/{osdid}/state": {
|
"/api/v1/storage/ceph/osd/{osdid}/state": {
|
||||||
|
|
|
@ -21,7 +21,6 @@
|
||||||
|
|
||||||
import time
|
import time
|
||||||
import json
|
import json
|
||||||
import psutil
|
|
||||||
|
|
||||||
import daemon_lib.common as common
|
import daemon_lib.common as common
|
||||||
|
|
||||||
|
@ -217,6 +216,10 @@ class CephOSDInstance(object):
|
||||||
retcode, stdout, stderr = common.run_os_command(
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
f"ceph-volume lvm list {find_device}"
|
f"ceph-volume lvm list {find_device}"
|
||||||
)
|
)
|
||||||
|
osd_blockdev = None
|
||||||
|
osd_fsid = None
|
||||||
|
osd_clusterfsid = None
|
||||||
|
osd_device = None
|
||||||
for line in stdout.split("\n"):
|
for line in stdout.split("\n"):
|
||||||
if "block device" in line:
|
if "block device" in line:
|
||||||
osd_blockdev = line.split()[-1]
|
osd_blockdev = line.split()[-1]
|
||||||
|
@ -227,7 +230,7 @@ class CephOSDInstance(object):
|
||||||
if "devices" in line:
|
if "devices" in line:
|
||||||
osd_device = line.split()[-1]
|
osd_device = line.split()[-1]
|
||||||
|
|
||||||
if not osd_fsid:
|
if not osd_blockdev or not osd_fsid or not osd_clusterfsid or not osd_device:
|
||||||
self.logger.out(
|
self.logger.out(
|
||||||
f"Failed to find updated OSD information via ceph-volume for {find_device}",
|
f"Failed to find updated OSD information via ceph-volume for {find_device}",
|
||||||
state="e",
|
state="e",
|
||||||
|
@ -404,6 +407,7 @@ class CephOSDInstance(object):
|
||||||
print(stdout)
|
print(stdout)
|
||||||
print(stderr)
|
print(stderr)
|
||||||
raise Exception
|
raise Exception
|
||||||
|
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
|
|
||||||
# 6. Verify it started
|
# 6. Verify it started
|
||||||
|
@ -447,6 +451,381 @@ class CephOSDInstance(object):
|
||||||
logger.out("Failed to create new OSD disk: {}".format(e), state="e")
|
logger.out("Failed to create new OSD disk: {}".format(e), state="e")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def replace_osd(
|
||||||
|
zkhandler,
|
||||||
|
logger,
|
||||||
|
node,
|
||||||
|
osd_id,
|
||||||
|
old_device,
|
||||||
|
new_device,
|
||||||
|
weight,
|
||||||
|
ext_db_flag=False,
|
||||||
|
):
|
||||||
|
# Handle a detect device if that is passed
|
||||||
|
if match(r"detect:", new_device):
|
||||||
|
ddevice = get_detect_device(new_device)
|
||||||
|
if ddevice is None:
|
||||||
|
logger.out(
|
||||||
|
f"Failed to determine block device from detect string {new_device}",
|
||||||
|
state="e",
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
logger.out(
|
||||||
|
f"Determined block device {ddevice} from detect string {new_device}",
|
||||||
|
state="i",
|
||||||
|
)
|
||||||
|
new_device = ddevice
|
||||||
|
|
||||||
|
# We are ready to create a new OSD on this node
|
||||||
|
logger.out(
|
||||||
|
"Replacing OSD {} disk with block device {}".format(osd_id, new_device),
|
||||||
|
state="i",
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
# Verify the OSD is present
|
||||||
|
retcode, stdout, stderr = common.run_os_command("ceph osd ls")
|
||||||
|
osd_list = stdout.split("\n")
|
||||||
|
if osd_id not in osd_list:
|
||||||
|
logger.out(
|
||||||
|
"Could not find OSD {} in the cluster".format(osd_id), state="e"
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
# 1. Set the OSD down and out so it will flush
|
||||||
|
logger.out("Setting down OSD disk with ID {}".format(osd_id), state="i")
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
"ceph osd down {}".format(osd_id)
|
||||||
|
)
|
||||||
|
if retcode:
|
||||||
|
print("ceph osd down")
|
||||||
|
print(stdout)
|
||||||
|
print(stderr)
|
||||||
|
raise Exception
|
||||||
|
|
||||||
|
logger.out("Setting out OSD disk with ID {}".format(osd_id), state="i")
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
"ceph osd out {}".format(osd_id)
|
||||||
|
)
|
||||||
|
if retcode:
|
||||||
|
print("ceph osd out")
|
||||||
|
print(stdout)
|
||||||
|
print(stderr)
|
||||||
|
raise Exception
|
||||||
|
|
||||||
|
# 2. Wait for the OSD to be safe to remove (but don't wait for rebalancing to complete)
|
||||||
|
logger.out("Waiting for OSD {osd_id} to be safe to remove", state="i")
|
||||||
|
while True:
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
f"ceph osd safe-to-destroy osd.{osd_id}"
|
||||||
|
)
|
||||||
|
if retcode in [0, 11]:
|
||||||
|
# Code 0 = success
|
||||||
|
# Code 11 = "Error EAGAIN: OSD(s) 5 have no reported stats, and not all PGs are active+clean; we cannot draw any conclusions." which means all PGs have been remappped but backfill is still occurring
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
time.sleep(5)
|
||||||
|
|
||||||
|
# 3. Stop the OSD process
|
||||||
|
logger.out("Stopping OSD disk with ID {}".format(osd_id), state="i")
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
"systemctl stop ceph-osd@{}".format(osd_id)
|
||||||
|
)
|
||||||
|
if retcode:
|
||||||
|
print("systemctl stop")
|
||||||
|
print(stdout)
|
||||||
|
print(stderr)
|
||||||
|
raise Exception
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
# 4. Destroy the OSD
|
||||||
|
logger.out("Destroying OSD with ID {osd_id}", state="i")
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
f"ceph osd destroy {osd_id} --yes-i-really-mean-it"
|
||||||
|
)
|
||||||
|
if retcode:
|
||||||
|
print("ceph osd destroy")
|
||||||
|
print(stdout)
|
||||||
|
print(stderr)
|
||||||
|
raise Exception
|
||||||
|
|
||||||
|
# 5. Adjust the weight
|
||||||
|
logger.out(
|
||||||
|
"Adjusting weight of OSD disk with ID {} in CRUSH map".format(osd_id),
|
||||||
|
state="i",
|
||||||
|
)
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
"ceph osd crush reweight osd.{osdid} {weight}".format(
|
||||||
|
osdid=osd_id, weight=weight
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if retcode:
|
||||||
|
print("ceph osd crush reweight")
|
||||||
|
print(stdout)
|
||||||
|
print(stderr)
|
||||||
|
raise Exception
|
||||||
|
|
||||||
|
# 6a. Zap the new disk to ensure it is ready to go
|
||||||
|
logger.out("Zapping disk {}".format(new_device), state="i")
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
"ceph-volume lvm zap --destroy {}".format(new_device)
|
||||||
|
)
|
||||||
|
if retcode:
|
||||||
|
print("ceph-volume lvm zap")
|
||||||
|
print(stdout)
|
||||||
|
print(stderr)
|
||||||
|
raise Exception
|
||||||
|
|
||||||
|
dev_flags = "--data {}".format(new_device)
|
||||||
|
|
||||||
|
# 6b. Prepare the logical volume if ext_db_flag
|
||||||
|
if ext_db_flag:
|
||||||
|
db_device = "osd-db/osd-{}".format(osd_id)
|
||||||
|
dev_flags += " --block.db {}".format(db_device)
|
||||||
|
else:
|
||||||
|
db_device = ""
|
||||||
|
|
||||||
|
# 6c. Replace the OSD
|
||||||
|
logger.out(
|
||||||
|
"Preparing LVM for replaced OSD {} disk on {}".format(
|
||||||
|
osd_id, new_device
|
||||||
|
),
|
||||||
|
state="i",
|
||||||
|
)
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
"ceph-volume lvm prepare --osd-id {osdid} --bluestore {devices}".format(
|
||||||
|
osdid=osd_id, devices=dev_flags
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if retcode:
|
||||||
|
print("ceph-volume lvm prepare")
|
||||||
|
print(stdout)
|
||||||
|
print(stderr)
|
||||||
|
raise Exception
|
||||||
|
|
||||||
|
# 7a. Get OSD information
|
||||||
|
logger.out(
|
||||||
|
"Getting OSD information for ID {} on {}".format(osd_id, new_device),
|
||||||
|
state="i",
|
||||||
|
)
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
"ceph-volume lvm list {device}".format(device=new_device)
|
||||||
|
)
|
||||||
|
for line in stdout.split("\n"):
|
||||||
|
if "block device" in line:
|
||||||
|
osd_blockdev = line.split()[-1]
|
||||||
|
if "osd fsid" in line:
|
||||||
|
osd_fsid = line.split()[-1]
|
||||||
|
if "cluster fsid" in line:
|
||||||
|
osd_clusterfsid = line.split()[-1]
|
||||||
|
if "devices" in line:
|
||||||
|
osd_device = line.split()[-1]
|
||||||
|
|
||||||
|
if not osd_fsid:
|
||||||
|
print("ceph-volume lvm list")
|
||||||
|
print("Could not find OSD information in data:")
|
||||||
|
print(stdout)
|
||||||
|
print(stderr)
|
||||||
|
raise Exception
|
||||||
|
|
||||||
|
# Split OSD blockdev into VG and LV components
|
||||||
|
# osd_blockdev = /dev/ceph-<uuid>/osd-block-<uuid>
|
||||||
|
_, _, osd_vg, osd_lv = osd_blockdev.split("/")
|
||||||
|
|
||||||
|
# Reset whatever we were given to Ceph's /dev/xdX naming
|
||||||
|
if new_device != osd_device:
|
||||||
|
new_device = osd_device
|
||||||
|
|
||||||
|
# 7b. Activate the OSD
|
||||||
|
logger.out("Activating new OSD disk with ID {}".format(osd_id), state="i")
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
"ceph-volume lvm activate --bluestore {osdid} {osdfsid}".format(
|
||||||
|
osdid=osd_id, osdfsid=osd_fsid
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if retcode:
|
||||||
|
print("ceph-volume lvm activate")
|
||||||
|
print(stdout)
|
||||||
|
print(stderr)
|
||||||
|
raise Exception
|
||||||
|
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
|
# 8. Verify it started
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
"systemctl status ceph-osd@{osdid}".format(osdid=osd_id)
|
||||||
|
)
|
||||||
|
if retcode:
|
||||||
|
print("systemctl status")
|
||||||
|
print(stdout)
|
||||||
|
print(stderr)
|
||||||
|
raise Exception
|
||||||
|
|
||||||
|
# 9. Update Zookeeper information
|
||||||
|
logger.out(
|
||||||
|
"Adding new OSD disk with ID {} to Zookeeper".format(osd_id), state="i"
|
||||||
|
)
|
||||||
|
zkhandler.write(
|
||||||
|
[
|
||||||
|
(("osd", osd_id), ""),
|
||||||
|
(("osd.node", osd_id), node),
|
||||||
|
(("osd.device", osd_id), new_device),
|
||||||
|
(("osd.db_device", osd_id), db_device),
|
||||||
|
(("osd.fsid", osd_id), ""),
|
||||||
|
(("osd.ofsid", osd_id), osd_fsid),
|
||||||
|
(("osd.cfsid", osd_id), osd_clusterfsid),
|
||||||
|
(("osd.lvm", osd_id), ""),
|
||||||
|
(("osd.vg", osd_id), osd_vg),
|
||||||
|
(("osd.lv", osd_id), osd_lv),
|
||||||
|
(
|
||||||
|
("osd.stats", osd_id),
|
||||||
|
'{"uuid": "|", "up": 0, "in": 0, "primary_affinity": "|", "utilization": "|", "var": "|", "pgs": "|", "kb": "|", "weight": "|", "reweight": "|", "node": "|", "used": "|", "avail": "|", "wr_ops": "|", "wr_data": "|", "rd_ops": "|", "rd_data": "|", "state": "|"}',
|
||||||
|
),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Log it
|
||||||
|
logger.out(
|
||||||
|
"Replaced OSD {} disk with device {}".format(osd_id, new_device),
|
||||||
|
state="o",
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
# Log it
|
||||||
|
logger.out("Failed to replace OSD {} disk: {}".format(osd_id, e), state="e")
|
||||||
|
return False
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def refresh_osd(zkhandler, logger, node, osd_id, device, ext_db_flag):
|
||||||
|
# Handle a detect device if that is passed
|
||||||
|
if match(r"detect:", device):
|
||||||
|
ddevice = get_detect_device(device)
|
||||||
|
if ddevice is None:
|
||||||
|
logger.out(
|
||||||
|
f"Failed to determine block device from detect string {device}",
|
||||||
|
state="e",
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
logger.out(
|
||||||
|
f"Determined block device {ddevice} from detect string {device}",
|
||||||
|
state="i",
|
||||||
|
)
|
||||||
|
device = ddevice
|
||||||
|
|
||||||
|
# We are ready to create a new OSD on this node
|
||||||
|
logger.out(
|
||||||
|
"Refreshing OSD {} disk on block device {}".format(osd_id, device),
|
||||||
|
state="i",
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
# 1. Verify the OSD is present
|
||||||
|
retcode, stdout, stderr = common.run_os_command("ceph osd ls")
|
||||||
|
osd_list = stdout.split("\n")
|
||||||
|
if osd_id not in osd_list:
|
||||||
|
logger.out(
|
||||||
|
"Could not find OSD {} in the cluster".format(osd_id), state="e"
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
dev_flags = "--data {}".format(device)
|
||||||
|
|
||||||
|
if ext_db_flag:
|
||||||
|
db_device = "osd-db/osd-{}".format(osd_id)
|
||||||
|
dev_flags += " --block.db {}".format(db_device)
|
||||||
|
else:
|
||||||
|
db_device = ""
|
||||||
|
|
||||||
|
# 2. Get OSD information
|
||||||
|
logger.out(
|
||||||
|
"Getting OSD information for ID {} on {}".format(osd_id, device),
|
||||||
|
state="i",
|
||||||
|
)
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
"ceph-volume lvm list {device}".format(device=device)
|
||||||
|
)
|
||||||
|
for line in stdout.split("\n"):
|
||||||
|
if "block device" in line:
|
||||||
|
osd_blockdev = line.split()[-1]
|
||||||
|
if "osd fsid" in line:
|
||||||
|
osd_fsid = line.split()[-1]
|
||||||
|
if "cluster fsid" in line:
|
||||||
|
osd_clusterfsid = line.split()[-1]
|
||||||
|
if "devices" in line:
|
||||||
|
osd_device = line.split()[-1]
|
||||||
|
|
||||||
|
if not osd_fsid:
|
||||||
|
print("ceph-volume lvm list")
|
||||||
|
print("Could not find OSD information in data:")
|
||||||
|
print(stdout)
|
||||||
|
print(stderr)
|
||||||
|
raise Exception
|
||||||
|
|
||||||
|
# Split OSD blockdev into VG and LV components
|
||||||
|
# osd_blockdev = /dev/ceph-<uuid>/osd-block-<uuid>
|
||||||
|
_, _, osd_vg, osd_lv = osd_blockdev.split("/")
|
||||||
|
|
||||||
|
# Reset whatever we were given to Ceph's /dev/xdX naming
|
||||||
|
if device != osd_device:
|
||||||
|
device = osd_device
|
||||||
|
|
||||||
|
# 3. Activate the OSD
|
||||||
|
logger.out("Activating new OSD disk with ID {}".format(osd_id), state="i")
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
"ceph-volume lvm activate --bluestore {osdid} {osdfsid}".format(
|
||||||
|
osdid=osd_id, osdfsid=osd_fsid
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if retcode:
|
||||||
|
print("ceph-volume lvm activate")
|
||||||
|
print(stdout)
|
||||||
|
print(stderr)
|
||||||
|
raise Exception
|
||||||
|
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
|
# 4. Verify it started
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
"systemctl status ceph-osd@{osdid}".format(osdid=osd_id)
|
||||||
|
)
|
||||||
|
if retcode:
|
||||||
|
print("systemctl status")
|
||||||
|
print(stdout)
|
||||||
|
print(stderr)
|
||||||
|
raise Exception
|
||||||
|
|
||||||
|
# 5. Update Zookeeper information
|
||||||
|
logger.out(
|
||||||
|
"Adding new OSD disk with ID {} to Zookeeper".format(osd_id), state="i"
|
||||||
|
)
|
||||||
|
zkhandler.write(
|
||||||
|
[
|
||||||
|
(("osd", osd_id), ""),
|
||||||
|
(("osd.node", osd_id), node),
|
||||||
|
(("osd.device", osd_id), device),
|
||||||
|
(("osd.db_device", osd_id), db_device),
|
||||||
|
(("osd.fsid", osd_id), ""),
|
||||||
|
(("osd.ofsid", osd_id), osd_fsid),
|
||||||
|
(("osd.cfsid", osd_id), osd_clusterfsid),
|
||||||
|
(("osd.lvm", osd_id), ""),
|
||||||
|
(("osd.vg", osd_id), osd_vg),
|
||||||
|
(("osd.lv", osd_id), osd_lv),
|
||||||
|
(
|
||||||
|
("osd.stats", osd_id),
|
||||||
|
'{"uuid": "|", "up": 0, "in": 0, "primary_affinity": "|", "utilization": "|", "var": "|", "pgs": "|", "kb": "|", "weight": "|", "reweight": "|", "node": "|", "used": "|", "avail": "|", "wr_ops": "|", "wr_data": "|", "rd_ops": "|", "rd_data": "|", "state": "|"}',
|
||||||
|
),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Log it
|
||||||
|
logger.out("Refreshed OSD {} disk on {}".format(osd_id, device), state="o")
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
# Log it
|
||||||
|
logger.out("Failed to refresh OSD {} disk: {}".format(osd_id, e), state="e")
|
||||||
|
return False
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def remove_osd(zkhandler, logger, osd_id, osd_obj, force_flag):
|
def remove_osd(zkhandler, logger, osd_id, osd_obj, force_flag):
|
||||||
logger.out("Removing OSD disk {}".format(osd_id), state="i")
|
logger.out("Removing OSD disk {}".format(osd_id), state="i")
|
||||||
|
@ -490,29 +869,16 @@ class CephOSDInstance(object):
|
||||||
else:
|
else:
|
||||||
raise Exception
|
raise Exception
|
||||||
|
|
||||||
# 2. Wait for the OSD to flush
|
# 2. Wait for the OSD to be safe to remove (but don't wait for rebalancing to complete)
|
||||||
logger.out("Flushing OSD disk with ID {}".format(osd_id), state="i")
|
logger.out("Waiting for OSD {osd_id} to be safe to remove", state="i")
|
||||||
osd_string = str()
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
|
||||||
retcode, stdout, stderr = common.run_os_command(
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
"ceph pg dump osds --format json"
|
f"ceph osd safe-to-destroy osd.{osd_id}"
|
||||||
)
|
)
|
||||||
dump_string = json.loads(stdout)
|
if int(retcode) in [0, 11]:
|
||||||
for osd in dump_string:
|
|
||||||
if str(osd["osd"]) == osd_id:
|
|
||||||
osd_string = osd
|
|
||||||
num_pgs = osd_string["num_pgs"]
|
|
||||||
if num_pgs > 0:
|
|
||||||
time.sleep(5)
|
|
||||||
else:
|
|
||||||
if force_flag:
|
|
||||||
logger.out("Ignoring error due to force flag", state="i")
|
|
||||||
else:
|
|
||||||
raise Exception
|
|
||||||
|
|
||||||
except Exception:
|
|
||||||
break
|
break
|
||||||
|
else:
|
||||||
|
time.sleep(5)
|
||||||
|
|
||||||
# 3. Stop the OSD process and wait for it to be terminated
|
# 3. Stop the OSD process and wait for it to be terminated
|
||||||
logger.out("Stopping OSD disk with ID {}".format(osd_id), state="i")
|
logger.out("Stopping OSD disk with ID {}".format(osd_id), state="i")
|
||||||
|
@ -527,19 +893,7 @@ class CephOSDInstance(object):
|
||||||
logger.out("Ignoring error due to force flag", state="i")
|
logger.out("Ignoring error due to force flag", state="i")
|
||||||
else:
|
else:
|
||||||
raise Exception
|
raise Exception
|
||||||
|
time.sleep(2)
|
||||||
# FIXME: There has to be a better way to do this /shrug
|
|
||||||
while True:
|
|
||||||
is_osd_up = False
|
|
||||||
# Find if there is a process named ceph-osd with arg '--id {id}'
|
|
||||||
for p in psutil.process_iter(attrs=["name", "cmdline"]):
|
|
||||||
if "ceph-osd" == p.info["name"] and "--id {}".format(
|
|
||||||
osd_id
|
|
||||||
) in " ".join(p.info["cmdline"]):
|
|
||||||
is_osd_up = True
|
|
||||||
# If there isn't, continue
|
|
||||||
if not is_osd_up:
|
|
||||||
break
|
|
||||||
|
|
||||||
# 4. Determine the block devices
|
# 4. Determine the block devices
|
||||||
osd_vg = zkhandler.read(("osd.vg", osd_id))
|
osd_vg = zkhandler.read(("osd.vg", osd_id))
|
||||||
|
@ -912,6 +1266,59 @@ def ceph_command(zkhandler, logger, this_node, data, d_osd):
|
||||||
# Wait 1 seconds before we free the lock, to ensure the client hits the lock
|
# Wait 1 seconds before we free the lock, to ensure the client hits the lock
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
|
# Replacing an OSD
|
||||||
|
if command == "osd_replace":
|
||||||
|
node, osd_id, old_device, new_device, weight, ext_db_flag = args.split(",")
|
||||||
|
ext_db_flag = bool(strtobool(ext_db_flag))
|
||||||
|
if node == this_node.name:
|
||||||
|
# Lock the command queue
|
||||||
|
zk_lock = zkhandler.writelock("base.cmd.ceph")
|
||||||
|
with zk_lock:
|
||||||
|
# Add the OSD
|
||||||
|
result = CephOSDInstance.replace_osd(
|
||||||
|
zkhandler,
|
||||||
|
logger,
|
||||||
|
node,
|
||||||
|
osd_id,
|
||||||
|
old_device,
|
||||||
|
new_device,
|
||||||
|
weight,
|
||||||
|
ext_db_flag,
|
||||||
|
)
|
||||||
|
# Command succeeded
|
||||||
|
if result:
|
||||||
|
# Update the command queue
|
||||||
|
zkhandler.write([("base.cmd.ceph", "success-{}".format(data))])
|
||||||
|
# Command failed
|
||||||
|
else:
|
||||||
|
# Update the command queue
|
||||||
|
zkhandler.write([("base.cmd.ceph", "failure-{}".format(data))])
|
||||||
|
# Wait 1 seconds before we free the lock, to ensure the client hits the lock
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
# Refreshing an OSD
|
||||||
|
if command == "osd_refresh":
|
||||||
|
node, osd_id, device, ext_db_flag = args.split(",")
|
||||||
|
ext_db_flag = bool(strtobool(ext_db_flag))
|
||||||
|
if node == this_node.name:
|
||||||
|
# Lock the command queue
|
||||||
|
zk_lock = zkhandler.writelock("base.cmd.ceph")
|
||||||
|
with zk_lock:
|
||||||
|
# Add the OSD
|
||||||
|
result = CephOSDInstance.refresh_osd(
|
||||||
|
zkhandler, logger, node, osd_id, device, ext_db_flag
|
||||||
|
)
|
||||||
|
# Command succeeded
|
||||||
|
if result:
|
||||||
|
# Update the command queue
|
||||||
|
zkhandler.write([("base.cmd.ceph", "success-{}".format(data))])
|
||||||
|
# Command failed
|
||||||
|
else:
|
||||||
|
# Update the command queue
|
||||||
|
zkhandler.write([("base.cmd.ceph", "failure-{}".format(data))])
|
||||||
|
# Wait 1 seconds before we free the lock, to ensure the client hits the lock
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
# Removing an OSD
|
# Removing an OSD
|
||||||
elif command == "osd_remove":
|
elif command == "osd_remove":
|
||||||
osd_id, force = args.split(",")
|
osd_id, force = args.split(",")
|
||||||
|
|
Loading…
Reference in New Issue