Fail VM removal on disk removal failures

Prevents bad states where the VM is "removed" but some of its disks
remain due to e.g. stuck watchers.

Rearrange the sequence so it goes stop, delete disks, then delete VM,
and then return a failure if any of the disk(s) fail to remove, allowing
the task to be rerun after fixing the problem.
This commit is contained in:
Joshua Boniface 2021-07-09 15:39:06 -04:00
parent d1d355a96b
commit 2138f2f59f
1 changed files with 16 additions and 14 deletions

View File

@ -449,27 +449,29 @@ def remove_vm(zkhandler, domain):
if current_vm_state != 'stop': if current_vm_state != 'stop':
change_state(zkhandler, dom_uuid, 'stop') change_state(zkhandler, dom_uuid, 'stop')
# Gracefully terminate the class instances
change_state(zkhandler, dom_uuid, 'delete')
# Delete the configurations
zkhandler.delete([
('domain', dom_uuid)
])
# Wait for 1 second to allow state to flow to all nodes # Wait for 1 second to allow state to flow to all nodes
time.sleep(1) time.sleep(1)
# Remove disks # Remove disks
for disk in disk_list: for disk in disk_list:
# vmpool/vmname_volume # vmpool/vmname_volume
try:
disk_pool, disk_name = disk.split('/') disk_pool, disk_name = disk.split('/')
retcode, message = ceph.remove_volume(zkhandler, disk_pool, disk_name) retcode, message = ceph.remove_volume(zkhandler, disk_pool, disk_name)
except ValueError: if not retcode:
continue return False, message
return True, 'Removed VM "{}" and disks from the cluster.'.format(domain) # Gracefully terminate the class instances
change_state(zkhandler, dom_uuid, 'delete')
# Wait for 1/2 second to allow state to flow to all nodes
time.sleep(0.5)
# Delete the VM configuration from Zookeeper
zkhandler.delete([
('domain', dom_uuid)
])
return True, 'Removed VM "{}" and its disks from the cluster.'.format(domain)
def start_vm(zkhandler, domain): def start_vm(zkhandler, domain):