Better handle failing RBD lock frees
If the VM is not in a stop state, failing to free the lock is now considered a fatal error and will put the domain into fail state, aborting the start. This is better than being unsafe or trying to start a VM which will fail to boot due to read-only volumes.
This commit is contained in:
parent
7c99a7bda7
commit
3705daff43
|
@ -56,14 +56,18 @@ def flush_locks(zk_conn, logger, dom_uuid, this_node=None):
|
||||||
if lock_list:
|
if lock_list:
|
||||||
# Loop through the locks
|
# Loop through the locks
|
||||||
for lock in lock_list:
|
for lock in lock_list:
|
||||||
if this_node is not None and lock['address'].split(':')[0] != this_node.storage_ipaddr:
|
if this_node is not None and zkhandler.readdata(zk_conn, '/domains/{}/state'.format(dom_uuid)) != 'stop' and lock['address'].split(':')[0] != this_node.storage_ipaddr:
|
||||||
logger.out('RBD lock does not belong to this host (lock owner: {}): freeing this lock would be unsafe, aborting'.format(lock['address'].split(':')[0], state='e'))
|
logger.out('RBD lock does not belong to this host (lock owner: {}): freeing this lock would be unsafe, aborting'.format(lock['address'].split(':')[0], state='e'))
|
||||||
continue
|
zkhandler.writedata(zk_conn, {'/domains/{}/state'.format(dom_uuid): 'fail'})
|
||||||
|
zkhandler.writedata(zk_conn, {'/domains/{}/failedreason'.format(dom_uuid): 'Could not safely free RBD lock {} ({}) on volume {}; stop VM and flush locks manually'.format(lock['id'], lock['address'], rbd)})
|
||||||
|
break
|
||||||
# Free the lock
|
# Free the lock
|
||||||
lock_remove_retcode, lock_remove_stdout, lock_remove_stderr = common.run_os_command('rbd lock remove {} "{}" "{}"'.format(rbd, lock['id'], lock['locker']))
|
lock_remove_retcode, lock_remove_stdout, lock_remove_stderr = common.run_os_command('rbd lock remove {} "{}" "{}"'.format(rbd, lock['id'], lock['locker']))
|
||||||
if lock_remove_retcode != 0:
|
if lock_remove_retcode != 0:
|
||||||
logger.out('Failed to free RBD lock "{}" on volume "{}": {}'.format(lock['id'], rbd, lock_remove_stderr), state='e')
|
logger.out('Failed to free RBD lock "{}" on volume "{}": {}'.format(lock['id'], rbd, lock_remove_stderr), state='e')
|
||||||
continue
|
zkhandler.writedata(zk_conn, {'/domains/{}/state'.format(dom_uuid): 'fail'})
|
||||||
|
zkhandler.writedata(zk_conn, {'/domains/{}/failedreason'.format(dom_uuid): 'Could not free RBD lock {} ({}) on volume {}: {}'.format(lock['id'], lock['address'], rbd, lock_remove_stderr)})
|
||||||
|
break
|
||||||
logger.out('Freed RBD lock "{}" on volume "{}"'.format(lock['id'], rbd), state='o')
|
logger.out('Freed RBD lock "{}" on volume "{}"'.format(lock['id'], rbd), state='o')
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
@ -232,6 +236,11 @@ class VMInstance(object):
|
||||||
# Flush locks
|
# Flush locks
|
||||||
self.logger.out('Flushing RBD locks', state='i', prefix='Domain {}'.format(self.domuuid))
|
self.logger.out('Flushing RBD locks', state='i', prefix='Domain {}'.format(self.domuuid))
|
||||||
flush_locks(self.zk_conn, self.logger, self.domuuid, self.this_node)
|
flush_locks(self.zk_conn, self.logger, self.domuuid, self.this_node)
|
||||||
|
if zkhandler.readdata(self.zk_conn, '/domains/{}/state'.format(self.domuuid)) == 'fail':
|
||||||
|
lv_conn.close()
|
||||||
|
self.dom = None
|
||||||
|
self.instart = False
|
||||||
|
return
|
||||||
|
|
||||||
if curstate == libvirt.VIR_DOMAIN_RUNNING:
|
if curstate == libvirt.VIR_DOMAIN_RUNNING:
|
||||||
# If it is running just update the model
|
# If it is running just update the model
|
||||||
|
@ -251,7 +260,10 @@ class VMInstance(object):
|
||||||
self.logger.out('Failed to create VM', state='e', prefix='Domain {}'.format(self.domuuid))
|
self.logger.out('Failed to create VM', state='e', prefix='Domain {}'.format(self.domuuid))
|
||||||
zkhandler.writedata(self.zk_conn, {'/domains/{}/state'.format(self.domuuid): 'fail'})
|
zkhandler.writedata(self.zk_conn, {'/domains/{}/state'.format(self.domuuid): 'fail'})
|
||||||
zkhandler.writedata(self.zk_conn, {'/domains/{}/failedreason'.format(self.domuuid): str(e)})
|
zkhandler.writedata(self.zk_conn, {'/domains/{}/failedreason'.format(self.domuuid): str(e)})
|
||||||
|
lv_conn.close()
|
||||||
self.dom = None
|
self.dom = None
|
||||||
|
self.instart = False
|
||||||
|
return
|
||||||
|
|
||||||
lv_conn.close()
|
lv_conn.close()
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue