From 60e1da09dd793466f33c9da206edd6cc3b2b4746 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Tue, 22 Jun 2021 00:00:50 -0400 Subject: [PATCH] Don't try any shenannegans when updating NICs Trying to do this on the VMInstance side had problems because we can't differentiate the 3 types of migration there. So, just update this in the API side and hope everything goes well. This introduces an edge bug: if a VM is using a macvtap SR-IOV device, and then tries to migrate, and the migrate is aborted, the NIC lists will be inconsistent. When I revamp the VMInstance in the future, I should be able to correct this, but for now we'll have to live with that edgecase. --- daemon-common/vm.py | 23 +++++++++++------------ node-daemon/pvcnoded/VMInstance.py | 7 ------- 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/daemon-common/vm.py b/daemon-common/vm.py index 4b69b2c1..aaba3758 100644 --- a/daemon-common/vm.py +++ b/daemon-common/vm.py @@ -635,10 +635,6 @@ def move_vm(zkhandler, domain, target_node, wait=False, force_live=False): retmsg = 'Permanently migrating VM "{}" to node "{}".'.format(domain, target_node) - if target_state not in ['migrate', 'migrate-live']: - # Update any SR-IOV NICs - with online migrations this is done by pvcnoded, but offline we must do it here - update_vm_sriov_nics(zkhandler, dom_uuid, zkhandler.read(('domain.node', dom_uuid)), target_node) - lock = zkhandler.exclusivelock(('domain.state', dom_uuid)) with lock: zkhandler.write([ @@ -650,6 +646,9 @@ def move_vm(zkhandler, domain, target_node, wait=False, force_live=False): # Wait for 1/2 second for migration to start time.sleep(0.5) + # Update any SR-IOV NICs + update_vm_sriov_nics(zkhandler, dom_uuid, current_node, target_node) + if wait: while zkhandler.read(('domain.state', dom_uuid)) == target_state: time.sleep(0.5) @@ -702,15 +701,12 @@ def migrate_vm(zkhandler, domain, target_node, force_migrate, wait=False, force_ return False, 'ERROR: Could not find a valid migration target for VM "{}".'.format(domain) # Don't overwrite an existing last_node when using force_migrate + real_current_node = current_node # Used for the SR-IOV update if last_node and force_migrate: current_node = last_node retmsg = 'Migrating VM "{}" to node "{}".'.format(domain, target_node) - if target_state not in ['migrate', 'migrate-live']: - # Update any SR-IOV NICs - with online migrations this is done by pvcnoded, but offline we must do it here - update_vm_sriov_nics(zkhandler, dom_uuid, zkhandler.read(('domain.node', dom_uuid)), target_node) - lock = zkhandler.exclusivelock(('domain.state', dom_uuid)) with lock: zkhandler.write([ @@ -722,6 +718,9 @@ def migrate_vm(zkhandler, domain, target_node, force_migrate, wait=False, force_ # Wait for 1/2 second for migration to start time.sleep(0.5) + # Update any SR-IOV NICs + update_vm_sriov_nics(zkhandler, dom_uuid, real_current_node, target_node) + if wait: while zkhandler.read(('domain.state', dom_uuid)) == target_state: time.sleep(0.5) @@ -747,6 +746,7 @@ def unmigrate_vm(zkhandler, domain, wait=False, force_live=False): else: target_state = 'migrate' + current_node = zkhandler.read(('domain.node', dom_uuid)) target_node = zkhandler.read(('domain.last_node', dom_uuid)) if target_node == '': @@ -754,10 +754,6 @@ def unmigrate_vm(zkhandler, domain, wait=False, force_live=False): retmsg = 'Unmigrating VM "{}" back to node "{}".'.format(domain, target_node) - if target_state not in ['migrate', 'migrate-live']: - # Update any SR-IOV NICs - with online migrations this is done by pvcnoded, but offline we must do it here - update_vm_sriov_nics(zkhandler, dom_uuid, zkhandler.read(('domain.node', dom_uuid)), target_node) - lock = zkhandler.exclusivelock(('domain.state', dom_uuid)) with lock: zkhandler.write([ @@ -769,6 +765,9 @@ def unmigrate_vm(zkhandler, domain, wait=False, force_live=False): # Wait for 1/2 second for migration to start time.sleep(0.5) + # Update any SR-IOV NICs + update_vm_sriov_nics(zkhandler, dom_uuid, current_node, target_node) + if wait: while zkhandler.read(('domain.state', dom_uuid)) == target_state: time.sleep(0.5) diff --git a/node-daemon/pvcnoded/VMInstance.py b/node-daemon/pvcnoded/VMInstance.py index 759aaa66..dd0f3d0e 100644 --- a/node-daemon/pvcnoded/VMInstance.py +++ b/node-daemon/pvcnoded/VMInstance.py @@ -34,8 +34,6 @@ import pvcnoded.VMConsoleWatcherInstance as VMConsoleWatcherInstance import daemon_lib.common as daemon_common -from daemon_lib.vm import update_vm_sriov_nics - def flush_locks(zkhandler, logger, dom_uuid, this_node=None): logger.out('Flushing RBD locks for VM "{}"'.format(dom_uuid), state='i') @@ -674,11 +672,6 @@ class VMInstance(object): self.logger.out('Acquired write lock for synchronization phase D', state='o', prefix='Domain {}'.format(self.domuuid)) time.sleep(0.5) # Time for reader to acquire the lock - # Update any SR-IOV NIC states now - sriov_update_result, sriov_update_error = update_vm_sriov_nics(self.zkhandler, self.domuuid, self.last_currentnode, self.node) - if not sriov_update_result: - self.logger.out('{}; VM will likely fail to start.'.format(sriov_update_error), state='w', prefix='Domain {}'.format(self.domuuid)) - self.state = self.zkhandler.read(('domain.state', self.domuuid)) self.dom = self.lookupByUUID(self.domuuid) if self.dom: