Fail VM removal on disk removal failures

Prevents bad states where the VM is "removed" but some of its disks remain due to e.g. stuck watchers. Rearrange the sequence so it goes stop, delete disks, then delete VM, and then return a failure if any of the disk(s) fail to remove, allowing the task to be rerun after fixing the problem.
2021-07-09 15:39:06 -04:00
parent d1d355a96b
commit 2138f2f59f
1 changed files with 16 additions and 14 deletions
--- a/daemon-common/vm.py
+++ b/daemon-common/vm.py
@@ -449,27 +449,29 @@ def remove_vm(zkhandler, domain):
    if current_vm_state != 'stop':
        change_state(zkhandler, dom_uuid, 'stop')
    # Gracefully terminate the class instances
    change_state(zkhandler, dom_uuid, 'delete')
    # Delete the configurations
    zkhandler.delete([
        ('domain', dom_uuid)
    ])
    # Wait for 1 second to allow state to flow to all nodes
    time.sleep(1)
    # Remove disks
    for disk in disk_list:
        # vmpool/vmname_volume
        try:
        disk_pool, disk_name = disk.split('/')
        retcode, message = ceph.remove_volume(zkhandler, disk_pool, disk_name)
-        except ValueError:
+        if not retcode:
-            continue
+            return False, message
-    return True, 'Removed VM "{}" and disks from the cluster.'.format(domain)
+    # Gracefully terminate the class instances
    change_state(zkhandler, dom_uuid, 'delete')
    # Wait for 1/2 second to allow state to flow to all nodes
    time.sleep(0.5)
    # Delete the VM configuration from Zookeeper
    zkhandler.delete([
        ('domain', dom_uuid)
    ])
    return True, 'Removed VM "{}" and its disks from the cluster.'.format(domain)
 def start_vm(zkhandler, domain):