From 24ce361a04aa931d59ffc013c900066d74e3bcd2 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Mon, 21 Jun 2021 23:18:34 -0400 Subject: [PATCH] Ensure SR-IOV NIC states are updated on migration --- daemon-common/vm.py | 27 +++++++++++++++++++++++++++ node-daemon/pvcnoded/VMInstance.py | 7 +++++++ 2 files changed, 34 insertions(+) diff --git a/daemon-common/vm.py b/daemon-common/vm.py index 6bba03e0..7ab78a7f 100644 --- a/daemon-common/vm.py +++ b/daemon-common/vm.py @@ -555,6 +555,33 @@ def disable_vm(zkhandler, domain): return True, 'Marked VM "{}" as disable.'.format(domain) +def update_vm_sriov_nics(zkhandler, dom_uuid, source_node, target_node): + # Update all the SR-IOV device states on both nodes, used during migrations but called by the node-side + vm_config = zkhandler.read(('domain.xml', dom_uuid)) + parsed_xml = lxml.objectify.fromstring(vm_config) + dnetworks = common.getDomainNetworks(parsed_xml, {}) + retcode = True + retmsg = '' + for network in dnetworks: + if network['type'] in ['direct', 'hostdev']: + # Check if the network is already in use + is_used = zkhandler.read(('node.sriov.vf', target_node, 'sriov_vf.used', network['source'])) + if is_used == 'True': + used_by_name = searchClusterByUUID(zkhandler, zkhandler.read(('node.sriov.vf', target_node, 'sriov_vf.used_by', network['source']))) + if retcode: + retcode = False + retmsg = 'Attempting to use SR-IOV network "{}" which is already used by VM "{}"'.format(network['source'], used_by_name) + + # We must update the "used" section + if retcode: + # This conditional ensure that if we failed the is_used check, we don't try to overwrite the information of a VF that belongs to another VM + set_sriov_vf_vm(zkhandler, dom_uuid, target_node, network['source'], network['mac'], network['type']) + # ... but we still want to free the old node in an case + unset_sriov_vf_vm(zkhandler, source_node, network['source']) + + return retcode, retmsg + + def move_vm(zkhandler, domain, target_node, wait=False, force_live=False): # Validate that VM exists in cluster dom_uuid = getDomainUUID(zkhandler, domain) diff --git a/node-daemon/pvcnoded/VMInstance.py b/node-daemon/pvcnoded/VMInstance.py index 634f72dd..3fdd72b6 100644 --- a/node-daemon/pvcnoded/VMInstance.py +++ b/node-daemon/pvcnoded/VMInstance.py @@ -34,6 +34,8 @@ import pvcnoded.VMConsoleWatcherInstance as VMConsoleWatcherInstance import daemon_lib.common as daemon_common +from daemon_lib.vm import update_vm_sriov_nics + def flush_locks(zkhandler, logger, dom_uuid, this_node=None): logger.out('Flushing RBD locks for VM "{}"'.format(dom_uuid), state='i') @@ -672,6 +674,11 @@ class VMInstance(object): self.logger.out('Acquired write lock for synchronization phase D', state='o', prefix='Domain {}'.format(self.domuuid)) time.sleep(0.5) # Time for reader to acquire the lock + # Update any SR-IOV NIC states now + sriov_update_result, sriov_update_error = update_vm_sriov_nics(self.zkhandler, self.domuuid, self.last_currentnode, self.node) + if not sriov_update_result: + self.logger.out('{}; VM will likely fail to start.'.format(sriov_update_error), state='w', prefix='Domain {}'.format(self.domuuid)) + self.state = self.zkhandler.read(('domain.state', self.domuuid)) self.dom = self.lookupByUUID(self.domuuid) if self.dom: