Readd RBD lock detection and clearing on startup
This is still needed due to the nature of the locks and freeing them on startup, and to preserve lock=fail behaviour on VM startup. Also fixes the fencing lock flush to directly use the client library outside of Celery. I don't like this hack but it seems prudent until we move fencing to the workers as well.
This commit is contained in:
parent
2a9bc632fa
commit
83c4c6633d
|
@ -33,6 +33,8 @@ class TaskFailure(Exception):
|
||||||
def start(celery, msg, current=0, total=1):
|
def start(celery, msg, current=0, total=1):
|
||||||
logger = getLogger(__name__)
|
logger = getLogger(__name__)
|
||||||
logger.info(f"Starting {current}/{total}: {msg}")
|
logger.info(f"Starting {current}/{total}: {msg}")
|
||||||
|
if celery is None:
|
||||||
|
return
|
||||||
celery.update_state(
|
celery.update_state(
|
||||||
state="RUNNING", meta={"current": current, "total": total, "status": msg}
|
state="RUNNING", meta={"current": current, "total": total, "status": msg}
|
||||||
)
|
)
|
||||||
|
@ -64,6 +66,8 @@ def log_err(celery, msg):
|
||||||
def update(celery, msg, current=1, total=2):
|
def update(celery, msg, current=1, total=2):
|
||||||
logger = getLogger(__name__)
|
logger = getLogger(__name__)
|
||||||
logger.info(f"Task update {current}/{total}: {msg}")
|
logger.info(f"Task update {current}/{total}: {msg}")
|
||||||
|
if celery is None:
|
||||||
|
return
|
||||||
celery.update_state(
|
celery.update_state(
|
||||||
state="RUNNING", meta={"current": current, "total": total, "status": msg}
|
state="RUNNING", meta={"current": current, "total": total, "status": msg}
|
||||||
)
|
)
|
||||||
|
@ -73,6 +77,8 @@ def update(celery, msg, current=1, total=2):
|
||||||
def finish(celery, msg, current=2, total=2):
|
def finish(celery, msg, current=2, total=2):
|
||||||
logger = getLogger(__name__)
|
logger = getLogger(__name__)
|
||||||
logger.info(f"Task update {current}/{total}: Finishing up")
|
logger.info(f"Task update {current}/{total}: Finishing up")
|
||||||
|
if celery is None:
|
||||||
|
return
|
||||||
celery.update_state(
|
celery.update_state(
|
||||||
state="RUNNING",
|
state="RUNNING",
|
||||||
meta={"current": current, "total": total, "status": "Finishing up"},
|
meta={"current": current, "total": total, "status": "Finishing up"},
|
||||||
|
|
|
@ -24,8 +24,8 @@ import time
|
||||||
import libvirt
|
import libvirt
|
||||||
|
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
|
|
||||||
from xml.etree import ElementTree
|
from xml.etree import ElementTree
|
||||||
|
from json import loads as jloads
|
||||||
|
|
||||||
import daemon_lib.common as common
|
import daemon_lib.common as common
|
||||||
|
|
||||||
|
@ -283,9 +283,49 @@ class VMInstance(object):
|
||||||
self.logger.out(
|
self.logger.out(
|
||||||
"Flushing RBD locks", state="i", prefix="Domain {}".format(self.domuuid)
|
"Flushing RBD locks", state="i", prefix="Domain {}".format(self.domuuid)
|
||||||
)
|
)
|
||||||
VMInstance.flush_locks(
|
|
||||||
self.zkhandler, self.logger, self.domuuid, self.this_node
|
rbd_list = self.zkhandler.read(
|
||||||
)
|
("domain.storage.volumes", self.domuuid)
|
||||||
|
).split(",")
|
||||||
|
|
||||||
|
locks = list()
|
||||||
|
for rbd in rbd_list:
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
f"rbd lock list --format json {rbd}"
|
||||||
|
)
|
||||||
|
if retcode == 0:
|
||||||
|
_locks = jloads(stdout)
|
||||||
|
for lock in _locks:
|
||||||
|
lock["rbd"] = rbd
|
||||||
|
locks.append(lock)
|
||||||
|
|
||||||
|
for lock in locks:
|
||||||
|
lockid = lock["id"]
|
||||||
|
locker = lock["locker"]
|
||||||
|
owner = lock["address"].split(":")[0]
|
||||||
|
rbd = lock["rbd"]
|
||||||
|
|
||||||
|
if owner == self.this_node.storage_ipaddr:
|
||||||
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
|
f'rbd lock remove {rbd} "{lockid}" "{locker}"'
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.logger.out(
|
||||||
|
"RBD lock does not belong to this host (owner {owner}) so freeing this long is dangerous; aborting VM start",
|
||||||
|
state="e",
|
||||||
|
prefix="Domain {}".format(self.domuuid),
|
||||||
|
)
|
||||||
|
self.zkhandler.write(
|
||||||
|
[
|
||||||
|
(("domain.state", self.domuuid), "fail"),
|
||||||
|
(
|
||||||
|
("domain.failed_reason", self.domuuid),
|
||||||
|
f"Could not safely free RBD lock {lockid} ({owner}) on volume {rbd}; stop VM and flush locks manually",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
break
|
||||||
|
|
||||||
if self.zkhandler.read(("domain.state", self.domuuid)) == "fail":
|
if self.zkhandler.read(("domain.state", self.domuuid)) == "fail":
|
||||||
lv_conn.close()
|
lv_conn.close()
|
||||||
self.dom = None
|
self.dom = None
|
||||||
|
|
|
@ -23,7 +23,7 @@ import time
|
||||||
|
|
||||||
import daemon_lib.common as common
|
import daemon_lib.common as common
|
||||||
|
|
||||||
from pvcnoded.objects.VMInstance import VMInstance
|
from daemon_lib.vm import vm_worker_flush_locks
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
|
@ -121,7 +121,7 @@ def migrateFromFencedNode(zkhandler, node_name, config, logger):
|
||||||
|
|
||||||
# Migrate a VM after a flush
|
# Migrate a VM after a flush
|
||||||
def fence_migrate_vm(dom_uuid):
|
def fence_migrate_vm(dom_uuid):
|
||||||
VMInstance.flush_locks(zkhandler, logger, dom_uuid)
|
vm_worker_flush_locks(zkhandler, None, dom_uuid, force_unlock=True)
|
||||||
|
|
||||||
target_node = common.findTargetNode(zkhandler, dom_uuid)
|
target_node = common.findTargetNode(zkhandler, dom_uuid)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue