From 2138f2f59f70291b6c4029f675e75511c38bd008 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Fri, 9 Jul 2021 15:39:06 -0400 Subject: [PATCH] Fail VM removal on disk removal failures Prevents bad states where the VM is "removed" but some of its disks remain due to e.g. stuck watchers. Rearrange the sequence so it goes stop, delete disks, then delete VM, and then return a failure if any of the disk(s) fail to remove, allowing the task to be rerun after fixing the problem. --- daemon-common/vm.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/daemon-common/vm.py b/daemon-common/vm.py index 45038cb3..dad92a3d 100644 --- a/daemon-common/vm.py +++ b/daemon-common/vm.py @@ -449,27 +449,29 @@ def remove_vm(zkhandler, domain): if current_vm_state != 'stop': change_state(zkhandler, dom_uuid, 'stop') - # Gracefully terminate the class instances - change_state(zkhandler, dom_uuid, 'delete') - - # Delete the configurations - zkhandler.delete([ - ('domain', dom_uuid) - ]) - # Wait for 1 second to allow state to flow to all nodes time.sleep(1) # Remove disks for disk in disk_list: # vmpool/vmname_volume - try: - disk_pool, disk_name = disk.split('/') - retcode, message = ceph.remove_volume(zkhandler, disk_pool, disk_name) - except ValueError: - continue + disk_pool, disk_name = disk.split('/') + retcode, message = ceph.remove_volume(zkhandler, disk_pool, disk_name) + if not retcode: + return False, message - return True, 'Removed VM "{}" and disks from the cluster.'.format(domain) + # Gracefully terminate the class instances + change_state(zkhandler, dom_uuid, 'delete') + + # Wait for 1/2 second to allow state to flow to all nodes + time.sleep(0.5) + + # Delete the VM configuration from Zookeeper + zkhandler.delete([ + ('domain', dom_uuid) + ]) + + return True, 'Removed VM "{}" and its disks from the cluster.'.format(domain) def start_vm(zkhandler, domain):