Better handle failing RBD lock frees
If the VM is not in a stop state, failing to free the lock is now considered a fatal error and will put the domain into fail state, aborting the start. This is better than being unsafe or trying to start a VM which will fail to boot due to read-only volumes.
This commit is contained in:
parent
7c99a7bda7
commit
3705daff43
|
@ -56,14 +56,18 @@ def flush_locks(zk_conn, logger, dom_uuid, this_node=None):
|
|||
if lock_list:
|
||||
# Loop through the locks
|
||||
for lock in lock_list:
|
||||
if this_node is not None and lock['address'].split(':')[0] != this_node.storage_ipaddr:
|
||||
if this_node is not None and zkhandler.readdata(zk_conn, '/domains/{}/state'.format(dom_uuid)) != 'stop' and lock['address'].split(':')[0] != this_node.storage_ipaddr:
|
||||
logger.out('RBD lock does not belong to this host (lock owner: {}): freeing this lock would be unsafe, aborting'.format(lock['address'].split(':')[0], state='e'))
|
||||
continue
|
||||
zkhandler.writedata(zk_conn, {'/domains/{}/state'.format(dom_uuid): 'fail'})
|
||||
zkhandler.writedata(zk_conn, {'/domains/{}/failedreason'.format(dom_uuid): 'Could not safely free RBD lock {} ({}) on volume {}; stop VM and flush locks manually'.format(lock['id'], lock['address'], rbd)})
|
||||
break
|
||||
# Free the lock
|
||||
lock_remove_retcode, lock_remove_stdout, lock_remove_stderr = common.run_os_command('rbd lock remove {} "{}" "{}"'.format(rbd, lock['id'], lock['locker']))
|
||||
if lock_remove_retcode != 0:
|
||||
logger.out('Failed to free RBD lock "{}" on volume "{}": {}'.format(lock['id'], rbd, lock_remove_stderr), state='e')
|
||||
continue
|
||||
zkhandler.writedata(zk_conn, {'/domains/{}/state'.format(dom_uuid): 'fail'})
|
||||
zkhandler.writedata(zk_conn, {'/domains/{}/failedreason'.format(dom_uuid): 'Could not free RBD lock {} ({}) on volume {}: {}'.format(lock['id'], lock['address'], rbd, lock_remove_stderr)})
|
||||
break
|
||||
logger.out('Freed RBD lock "{}" on volume "{}"'.format(lock['id'], rbd), state='o')
|
||||
|
||||
return True
|
||||
|
@ -232,6 +236,11 @@ class VMInstance(object):
|
|||
# Flush locks
|
||||
self.logger.out('Flushing RBD locks', state='i', prefix='Domain {}'.format(self.domuuid))
|
||||
flush_locks(self.zk_conn, self.logger, self.domuuid, self.this_node)
|
||||
if zkhandler.readdata(self.zk_conn, '/domains/{}/state'.format(self.domuuid)) == 'fail':
|
||||
lv_conn.close()
|
||||
self.dom = None
|
||||
self.instart = False
|
||||
return
|
||||
|
||||
if curstate == libvirt.VIR_DOMAIN_RUNNING:
|
||||
# If it is running just update the model
|
||||
|
@ -251,7 +260,10 @@ class VMInstance(object):
|
|||
self.logger.out('Failed to create VM', state='e', prefix='Domain {}'.format(self.domuuid))
|
||||
zkhandler.writedata(self.zk_conn, {'/domains/{}/state'.format(self.domuuid): 'fail'})
|
||||
zkhandler.writedata(self.zk_conn, {'/domains/{}/failedreason'.format(self.domuuid): str(e)})
|
||||
lv_conn.close()
|
||||
self.dom = None
|
||||
self.instart = False
|
||||
return
|
||||
|
||||
lv_conn.close()
|
||||
|
||||
|
|
Loading…
Reference in New Issue