From 5995353597578649cfa2837a0d68391af1831ac1 Mon Sep 17 00:00:00 2001 From: Joshua Boniface Date: Sat, 12 Oct 2019 01:17:39 -0400 Subject: [PATCH] Implement VM metadata and use it Implements the storing of three VM metadata attributes: 1. Node limits - allows specifying a list of hosts on which the VM must run. This limit influences the migration behaviour of VMs. 2. Per-VM node selectors - allows each VM to have its migration autoselection method specified, to automatically allow different methods per VM based on the administrator's preferences. 3. VM autorestart - allows a VM to be automatically restarted from a stopped state, presumably due to a failure to find a target node (either due to limits or otherwise) during a flush/fence recovery, on the next node unflush/ready state of its home hypervisor. Useful mostly in conjunction with limits to ensure that VMs which were shut down due to there being no valid migration targets are started back up when their node becomes ready again. Includes the full client interaction with these metadata options, including printing, as well as defining a new function to modify this metadata. For the CLI it is set/modified either on `vm define` or via the `vm meta` command. For the API it is set/modified either on a POST to the `/vm` endpoint (during VM definition) or on POST to the `/vm/` endpoint. For the API this replaces the previous reserved word for VM creation from scratch as this will no longer be implemented in-daemon (see #22). Closes #52 --- client-api/api_lib/pvcapi.py | 24 ++++++++++----- client-api/pvc-api.py | 39 +++++++++++++++++++++--- client-cli/pvc.py | 51 ++++++++++++++++++++++++++++---- client-common/common.py | 7 +++++ client-common/vm.py | 32 +++++++++++++++++++- docs/manuals/api.md | 32 +++++++++++++++----- node-daemon/pvcd/NodeInstance.py | 20 +++++++++++-- node-daemon/pvcd/VMInstance.py | 4 +++ node-daemon/pvcd/common.py | 48 +++++++++++++++++++++--------- node-daemon/pvcd/fencing.py | 27 ++++++++++------- 10 files changed, 233 insertions(+), 51 deletions(-) diff --git a/client-api/api_lib/pvcapi.py b/client-api/api_lib/pvcapi.py index 2ab428bf..7bccd95a 100755 --- a/client-api/api_lib/pvcapi.py +++ b/client-api/api_lib/pvcapi.py @@ -272,13 +272,6 @@ def vm_list(node=None, state=None, limit=None, is_fuzzy=True): pvc_common.stopZKConnection(zk_conn) return flask.jsonify(retdata), retcode -# TODO: #22 -#def vm_add(): -# """ -# Add a VM named NAME to the PVC cluster. -# """ -# return '', 200 - def vm_define(name, xml, node, selector): """ Define a VM from Libvirt XML in the PVC cluster. @@ -296,6 +289,23 @@ def vm_define(name, xml, node, selector): } return flask.jsonify(output), retcode +def vm_meta(vm, limit, selector, autostart): + """ + Update metadata of a VM. + """ + zk_conn = pvc_common.startZKConnection(config['coordinators']) + retflag, retdata = pvc_vm.modify_vm_metadata(zk_conn, vm. limit, selector, autostart) + if retflag: + retcode = 200 + else: + retcode = 400 + + pvc_common.stopZKConnection(zk_conn) + output = { + 'message': retdata.replace('\"', '\'') + } + return flask.jsonify(output), retcode + def vm_modify(name, restart, xml): """ Modify a VM Libvirt XML in the PVC cluster. diff --git a/client-api/pvc-api.py b/client-api/pvc-api.py index 0254adeb..6d7d8be9 100755 --- a/client-api/pvc-api.py +++ b/client-api/pvc-api.py @@ -235,11 +235,23 @@ def api_vm_root(): else: node = None - # Get target selector + # Set target limit metadata + if 'limit' in flask.request.values: + limit = flask.request.values['limit'] + else: + limit = None + + # Set target selector metadata if 'selector' in flask.request.values: selector = flask.request.values['selector'] else: - selector = None + selector = 'mem' + + # Set target autostart metadata + if 'autostart' in flask.request.values: + autostart = True + else: + autostart = False return pvcapi.vm_define(vm, libvirt_xml, node, selector) @@ -251,8 +263,27 @@ def api_vm_element(vm): return pvcapi.vm_list(None, None, vm, is_fuzzy=False) if flask.request.method == 'POST': - # TODO: #22 - flask.abort(501) + # Set target limit metadata + if 'limit' in flask.request.values: + limit = flask.request.values['limit'] + else: + limit = None + + # Set target selector metadata + if 'selector' in flask.request.values: + selector = flask.request.values['selector'] + else: + selector = None + + # Set target autostart metadata + if 'no-autostart' in flask.request.values: + autostart = False + elif 'autostart' in flask.request.values: + autostart = True + else: + autostart = None + + return pvcapi.vm_meta(vm, limit, selector, autostart) if flask.request.method == 'PUT': libvirt_xml = flask.request.data diff --git a/client-cli/pvc.py b/client-cli/pvc.py index 88cb8288..37b7719e 100755 --- a/client-cli/pvc.py +++ b/client-cli/pvc.py @@ -217,17 +217,25 @@ def cli_vm(): @click.command(name='define', short_help='Define a new virtual machine from a Libvirt XML file.') @click.option( '-t', '--target', 'target_node', - help='Home node for this domain; autodetect if unspecified.' + help='Home node for this domain; autoselect if unspecified.' ) @click.option( - '-s', '--selector', 'selector', default='mem', show_default=True, + '-l', '--limit', 'node_limit', default=None, show_default=False, + help='Comma-separated list of nodes to limit VM operation to; saved with VM.' +) +@click.option( + '-s', '--selector', 'node_selector', default='mem', show_default=True, type=click.Choice(['mem','load','vcpus','vms']), - help='Method to determine optimal target node during autodetect.' + help='Method to determine optimal target node during autoselect; saved with VM.' +) +@click.option( + '-a'/'-A', '--autostart'/'--no-autostart', 'node_autostart', is_flag=True, default=False, + help='Start VM automatically on next unflush/ready state of home node; unset by daemon once used.' ) @click.argument( 'config', type=click.File() ) -def vm_define(config, target_node, selector): +def vm_define(config, target_node, node_limit, node_selector, node_autostart): """ Define a new virtual machine from Libvirt XML configuration file CONFIG. """ @@ -237,7 +245,39 @@ def vm_define(config, target_node, selector): config.close() zk_conn = pvc_common.startZKConnection(zk_host) - retcode, retmsg = pvc_vm.define_vm(zk_conn, config_data, target_node, selector) + retcode, retmsg = pvc_vm.define_vm(zk_conn, config_data, target_node, node_limit, node_selector, node_autostart) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc vm meta +############################################################################### +@click.command(name='meta', short_help='Modify PVC metadata of an existing VM.') +@click.option( + '-l', '--limit', 'node_limit', default=None, show_default=False, + help='Comma-separated list of nodes to limit VM operation to.' +) +@click.option( + '-s', '--selector', 'node_selector', default=None, show_default=False, + type=click.Choice(['mem','load','vcpus','vms']), + help='Method to determine optimal target node during autoselect; saved with VM.' +) +@click.option( + '-a'/'-A', '--autostart'/'--no-autostart', 'node_autostart', is_flag=True, default=None, + help='Start VM automatically on next unflush/ready state of home node; unset by daemon once used.' +) +@click.argument( + 'domain' +) +def vm_meta(domain, node_limit, node_selector, node_autostart): + """ + Modify the PVC metadata of existing virtual machine DOMAIN. At least one option to update must be specified. DOMAIN may be a UUID or name. + """ + + if node_limit is None and node_selector is None and node_autostart is None: + cleanup(False, 'At least one metadata option must be specified to update.') + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_vm.modify_vm_metadata(zk_conn, domain, node_limit, node_selector, node_autostart) cleanup(retcode, retmsg, zk_conn) ############################################################################### @@ -1774,6 +1814,7 @@ cli_node.add_command(node_info) cli_node.add_command(node_list) cli_vm.add_command(vm_define) +cli_vm.add_command(vm_meta) cli_vm.add_command(vm_modify) cli_vm.add_command(vm_undefine) cli_vm.add_command(vm_remove) diff --git a/client-common/common.py b/client-common/common.py index 86db2b09..397ad72c 100644 --- a/client-common/common.py +++ b/client-common/common.py @@ -164,6 +164,10 @@ def getInformationFromXML(zk_conn, uuid): domain_lastnode = zkhandler.readdata(zk_conn, '/domains/{}/lastnode'.format(uuid)) domain_failedreason = zkhandler.readdata(zk_conn, '/domains/{}/failedreason'.format(uuid)) + domain_node_limit = zkhandler.readdata(zk_conn, '/domains/{}/node_limit'.format(uuid)) + domain_node_selector = zkhandler.readdata(zk_conn, '/domains/{}/node_selector'.format(uuid)) + domain_node_autostart = zkhandler.readdata(zk_conn, '/domains/{}/node_autostart'.format(uuid)) + parsed_xml = getDomainXML(zk_conn, uuid) domain_uuid, domain_name, domain_description, domain_memory, domain_vcpu, domain_vcputopo = getDomainMainDetails(parsed_xml) @@ -188,6 +192,9 @@ def getInformationFromXML(zk_conn, uuid): 'last_node': domain_lastnode, 'migrated': domain_migrated, 'failed_reason': domain_failedreason, + 'node_limit': domain_node_limit, + 'node_selector': domain_node_selector, + 'node_autostart': domain_node_autostart, 'description': domain_description, 'memory': domain_memory, 'vcpu': domain_vcpu, diff --git a/client-common/vm.py b/client-common/vm.py index 95b7952f..e56cb84a 100644 --- a/client-common/vm.py +++ b/client-common/vm.py @@ -160,7 +160,7 @@ def flush_locks(zk_conn, domain): return success, message -def define_vm(zk_conn, config_data, target_node, selector): +def define_vm(zk_conn, config_data, target_node, node_limit, node_selector, node_autostart): # Parse the XML data try: parsed_xml = lxml.objectify.fromstring(config_data) @@ -190,6 +190,9 @@ def define_vm(zk_conn, config_data, target_node, selector): '/domains/{}/state'.format(dom_uuid): 'stop', '/domains/{}/node'.format(dom_uuid): target_node, '/domains/{}/lastnode'.format(dom_uuid): '', + '/domains/{}/node_limit'.format(dom_uuid): node_limit, + '/domains/{}/node_selector'.format(dom_uuid): node_selector, + '/domains/{}/node_autostart'.format(dom_uuid): node_autostart, '/domains/{}/failedreason'.format(dom_uuid): '', '/domains/{}/consolelog'.format(dom_uuid): '', '/domains/{}/rbdlist'.format(dom_uuid): ','.join(rbd_list), @@ -198,6 +201,28 @@ def define_vm(zk_conn, config_data, target_node, selector): return True, 'Added new VM with Name "{}" and UUID "{}" to database.'.format(dom_name, dom_uuid) +def modify_vm_metadata(zk_conn, domain, node_limit, node_selector, node_autostart): + dom_uuid = getDomainUUID(zk_conn, domain) + if not dom_uuid: + return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain) + + if node_limit is not None: + zkhandler.writedata(zk_conn, { + '/domains/{}/node_limit'.format(dom_uuid): node_limit + }) + + if node_selector is not None: + zkhandler.writedata(zk_conn, { + '/domains/{}/node_selector'.format(dom_uuid): node_selector + }) + + if node_autostart is not None: + zkhandler.writedata(zk_conn, { + '/domains/{}/node_autostart'.format(dom_uuid): node_autostart + }) + + return True, 'Successfully modified PVC metadata of VM "{}".'.format(domain) + def modify_vm(zk_conn, domain, restart, new_vm_config): dom_uuid = getDomainUUID(zk_conn, domain) if not dom_uuid: @@ -681,6 +706,11 @@ def format_info(zk_conn, domain_information, long_output): ainformation.append('') ainformation.append('{}Failure reason:{} {}'.format(ansiprint.purple(), ansiprint.end(), domain_information['failed_reason'])) + ainformation.append('') + ainformation.append('{}Node limit:{} {}'.format(ansiprint.purple(), ansiprint.end(), domain_information['node_limit'])) + ainformation.append('{}Migration selector:{} {}'.format(ansiprint.purple(), ansiprint.end(), domain_information['node_selector'])) + ainformation.append('{}Autostart:{} {}'.format(ansiprint.purple(), ansiprint.end(), domain_information['node_autostart'])) + # Network list net_list = [] for net in domain_information['networks']: diff --git a/docs/manuals/api.md b/docs/manuals/api.md index 18937ddb..4e4b1fdb 100644 --- a/docs/manuals/api.md +++ b/docs/manuals/api.md @@ -256,20 +256,26 @@ Return a JSON document containing information about all cluster VMs. If `limit` ###### `POST` * Mandatory values: `xml` - * Optional values: `node`, `selector` + * Optional values: `node`, `limit`, `selector`, `autostart` Define a new VM with Libvirt XML configuration `xml` (either single-line or human-readable multi-line). If `node` is specified and is valid, the VM will be assigned to `node` instead of automatically determining the target node. If `node` is specified and not valid, auto-selection occurs instead. -If `selector` is specified and no specific and valid `node` is specified, the automatic node determination will use `selector` to determine the optimal node instead of the default for the cluster. +If `limit` is speficied, the node will not be allowed to run on nodes not specified in the limit. + +The `limit` value must be a comma-separated list of nodes; invalid nodes are ignored. + +If `selector` is specified and no specific and valid `node` is specified, the automatic node determination will use `selector` to determine the optimal node instead of the default for the cluster. This value is stored as PVC metadata for this VM and is used in subsequent migrate (including node flush) and fence recovery operations. Valid `selector` values are: `mem`: the node with the least allocated VM memory; `vcpus`: the node with the least allocated VM vCPUs; `load`: the node with the least current load average; `vms`: the node with the least number of provisioned VMs. +If `autostart` is specified, the VM will be set to autostart on the next node unflush/ready operation of the home node. This metadata value is reset to False by the node daemon on each successful use. + **NOTE:** The `POST` operation assumes that the VM resources (i.e. disks, operating system, etc.) are already created. This is equivalent to the `pvc vm define` command in the PVC CLI client. *[todo v0.6]* Creating a new VM using the provisioner uses the `POST /api/vm/` endpoint instead. #### `/api/v1/vm/` - * Methods: `GET`, *[todo v0.6]* `POST`, `PUT`, `DELETE` + * Methods: `GET`, `POST`, `PUT`, `DELETE` ###### `GET` * Mandatory values: N/A @@ -277,11 +283,23 @@ Valid `selector` values are: `mem`: the node with the least allocated VM memory; Return a JSON document containing information about ``. The output is identical to `GET /api/v1/vm?limit=` without fuzzy regex matching. -###### *[todo v0.6]* `POST` - * Mandatory values: `vm_template` - * Optional values: `description` +###### `POST` + * Mandatory values: At least one of optional values must be specified + * Optional values: `limit`, `selector`, `autostart`/`no-autostart` -Create a new virtual machine `` with the specified VM template `vm_template` and optional text `description`. +Update the PVC metadata of `` with the specified values. + +If `limit` is speficied, the node will not be allowed to run on nodes not specified in the limit. + +The `limit` value must be a comma-separated list of nodes; invalid nodes are ignored. + +If `selector` is specified and no specific and valid `node` is specified, the automatic node determination will use `selector` to determine the optimal node instead of the default for the cluster. This value is stored as PVC metadata for this VM and is used in subsequent migrate (including node flush) and fence recovery operations. + +Valid `selector` values are: `mem`: the node with the least allocated VM memory; `vcpus`: the node with the least allocated VM vCPUs; `load`: the node with the least current load average; `vms`: the node with the least number of provisioned VMs. + +If `autostart` is specified, the VM will be set to autostart on the next node unflush/ready operation of the home node. This metadata value is reset to False by the node daemon on each successful use. + +If `no-autostart` is specified, an existing autostart will be disabled if applicable. ###### `PUT` * Mandatory values: `xml` diff --git a/node-daemon/pvcd/NodeInstance.py b/node-daemon/pvcd/NodeInstance.py index bc5c15eb..3d2127b1 100644 --- a/node-daemon/pvcd/NodeInstance.py +++ b/node-daemon/pvcd/NodeInstance.py @@ -382,7 +382,7 @@ class NodeInstance(object): self.logger.out('Selecting target to migrate VM "{}"'.format(dom_uuid), state='i') - target_node = common.findTargetHypervisor(self.zk_conn, 'mem', dom_uuid) + target_node = common.findTargetHypervisor(self.zk_conn, self.config, dom_uuid) # Don't replace the previous node if the VM is already migrated if zkhandler.readdata(self.zk_conn, '/domains/{}/lastnode'.format(dom_uuid)): @@ -390,9 +390,10 @@ class NodeInstance(object): else: current_node = zkhandler.readdata(self.zk_conn, '/domains/{}/node'.format(dom_uuid)) - if target_node == None: - self.logger.out('Failed to find migration target for VM "{}"; shutting down'.format(dom_uuid), state='e') + if target_node is None: + self.logger.out('Failed to find migration target for VM "{}"; shutting down and setting autostart flag'.format(dom_uuid), state='e') zkhandler.writedata(self.zk_conn, { '/domains/{}/state'.format(dom_uuid): 'shutdown' }) + zkhandler.writedata(self.zk_conn, { '/domains/{}/node_autostart'.format(dom_uuid): 'True' }) # Wait for the VM to shut down while zkhandler.readdata(self.zk_conn, '/domains/{}/state'.format(dom_uuid)) != 'stop': @@ -427,6 +428,19 @@ class NodeInstance(object): self.flush_stopper = False return + # Handle autostarts + autostart = zkhandler.readdata(self.zk_conn, '/domains/{}/node_autostart'.format(dom_uuid)) + node = zkhandler.readdata(self.zk_conn, '/domains/{}/node'.format(dom_uuid)) + if autostart == 'True' and node == self.name: + self.logger.out('Starting autostart VM "{}"'.format(dom_uuid), state='i') + zkhandler.writedata(self.zk_conn, { + '/domains/{}/state'.format(dom_uuid): 'start', + '/domains/{}/node'.format(dom_uuid): self.name, + '/domains/{}/lastnode'.format(dom_uuid): '', + '/domains/{}/node_autostart'.format(dom_uuid): 'False' + }) + continue + try: last_node = zkhandler.readdata(self.zk_conn, '/domains/{}/lastnode'.format(dom_uuid)) except: diff --git a/node-daemon/pvcd/VMInstance.py b/node-daemon/pvcd/VMInstance.py index 6cd017bf..646de629 100644 --- a/node-daemon/pvcd/VMInstance.py +++ b/node-daemon/pvcd/VMInstance.py @@ -105,6 +105,10 @@ class VMInstance(object): self.domname = zkhandler.readdata(zk_conn, '/domains/{}'.format(domuuid)) self.state = zkhandler.readdata(self.zk_conn, '/domains/{}/state'.format(self.domuuid)) self.node = zkhandler.readdata(self.zk_conn, '/domains/{}/node'.format(self.domuuid)) + try: + self.pinpolicy = zkhandler.readdata(self.zk_conn, '/domains/{}/pinpolicy'.format(self.domuuid)) + except: + self.pinpolicy = "None" # These will all be set later self.instart = False diff --git a/node-daemon/pvcd/common.py b/node-daemon/pvcd/common.py index f7840c2d..9325645f 100644 --- a/node-daemon/pvcd/common.py +++ b/node-daemon/pvcd/common.py @@ -139,24 +139,44 @@ def removeIPAddress(ipaddr, cidrnetmask, dev): # # Find a migration target # -def findTargetHypervisor(zk_conn, search_field, dom_uuid): +def findTargetHypervisor(zk_conn, config, dom_uuid): + # Determine VM node limits; set config value if read fails + try: + node_limit = zkhandler.readdata(zk_conn, '/domains/{}/node_limit'.format(node)).split(',') + except: + node_limit = None + zkhandler.writedata(zk_conn, { '/domains/{}/node_limit'.format(node): 'None' }) + + # Determine VM search field or use default; set config value if read fails + try: + search_field = zkhandler.readdata(zk_conn, '/domains/{}/node_selector'.format(node)).split(',') + except: + search_field = config.migration_target_selector + zkhandler.writedata(zk_conn, { '/domains/{}/node_selector'.format(node): config.migration_target_selector }) + + # Execute the search if search_field == 'mem': - return findTargetHypervisorMem(zk_conn, dom_uuid) + return findTargetHypervisorMem(zk_conn, node_limit, dom_uuid) if search_field == 'load': - return findTargetHypervisorLoad(zk_conn, dom_uuid) + return findTargetHypervisorLoad(zk_conn, node_limit, dom_uuid) if search_field == 'vcpus': - return findTargetHypervisorVCPUs(zk_conn, dom_uuid) + return findTargetHypervisorVCPUs(zk_conn, node_limit, dom_uuid) if search_field == 'vms': - return findTargetHypervisorVMs(zk_conn, dom_uuid) + return findTargetHypervisorVMs(zk_conn, node_limit, dom_uuid) + + # Nothing was found return None # Get the list of valid target nodes -def getHypervisors(zk_conn, dom_uuid): +def getHypervisors(zk_conn, node_limit, dom_uuid): valid_node_list = [] full_node_list = zkhandler.listchildren(zk_conn, '/nodes') current_node = zkhandler.readdata(zk_conn, '/domains/{}/node'.format(dom_uuid)) for node in full_node_list: + if node_limit and node not in node_limit: + continue + daemon_state = zkhandler.readdata(zk_conn, '/nodes/{}/daemonstate'.format(node)) domain_state = zkhandler.readdata(zk_conn, '/nodes/{}/domainstate'.format(node)) @@ -171,11 +191,11 @@ def getHypervisors(zk_conn, dom_uuid): return valid_node_list # via free memory (relative to allocated memory) -def findTargetHypervisorMem(zk_conn, dom_uuid): +def findTargetHypervisorMem(zk_conn, node_limit, dom_uuid): most_allocfree = 0 target_node = None - node_list = getHypervisors(zk_conn, dom_uuid) + node_list = getHypervisors(zk_conn, node_limit, dom_uuid) for node in node_list: memalloc = int(zkhandler.readdata(zk_conn, '/nodes/{}/memalloc'.format(node))) memused = int(zkhandler.readdata(zk_conn, '/nodes/{}/memused'.format(node))) @@ -190,11 +210,11 @@ def findTargetHypervisorMem(zk_conn, dom_uuid): return target_node # via load average -def findTargetHypervisorLoad(zk_conn, dom_uuid): +def findTargetHypervisorLoad(zk_conn, node_limit, dom_uuid): least_load = 9999 target_node = None - node_list = getHypervisors(zk_conn, dom_uuid) + node_list = getHypervisors(zk_conn, node_limit, dom_uuid) for node in node_list: load = int(zkhandler.readdata(zk_conn, '/nodes/{}/load'.format(node))) @@ -205,11 +225,11 @@ def findTargetHypervisorLoad(zk_conn, dom_uuid): return target_node # via total vCPUs -def findTargetHypervisorVCPUs(zk_conn, dom_uuid): +def findTargetHypervisorVCPUs(zk_conn, node_limit, dom_uuid): least_vcpus = 9999 target_node = None - node_list = getHypervisors(zk_conn, dom_uuid) + node_list = getHypervisors(zk_conn, node_limit, dom_uuid) for node in node_list: vcpus = int(zkhandler.readdata(zk_conn, '/nodes/{}/vcpualloc'.format(node))) @@ -220,11 +240,11 @@ def findTargetHypervisorVCPUs(zk_conn, dom_uuid): return target_node # via total VMs -def findTargetHypervisorVMs(zk_conn, dom_uuid): +def findTargetHypervisorVMs(zk_conn, node_limit, dom_uuid): least_vms = 9999 target_node = None - node_list = getHypervisors(zk_conn, dom_uuid) + node_list = getHypervisors(zk_conn, node_limit, dom_uuid) for node in node_list: vms = int(zkhandler.readdata(zk_conn, '/nodes/{}/domainscount'.format(node))) diff --git a/node-daemon/pvcd/fencing.py b/node-daemon/pvcd/fencing.py index 9e074281..afecd284 100644 --- a/node-daemon/pvcd/fencing.py +++ b/node-daemon/pvcd/fencing.py @@ -67,27 +67,34 @@ def fenceNode(node_name, zk_conn, config, logger): # If the fence succeeded and successful_fence is migrate if fence_status == True and config['successful_fence'] == 'migrate': - migrateFromFencedNode(zk_conn, node_name, logger) + migrateFromFencedNode(zk_conn, node_name, config, logger) # If the fence failed and failed_fence is migrate if fence_status == False and config['failed_fence'] == 'migrate' and config['suicide_intervals'] != '0': - migrateFromFencedNode(zk_conn, node_name, logger) + migrateFromFencedNode(zk_conn, node_name, config, logger) # Migrate hosts away from a fenced node -def migrateFromFencedNode(zk_conn, node_name, logger): +def migrateFromFencedNode(zk_conn, node_name, config, logger): logger.out('Migrating VMs from dead node "{}" to new hosts'.format(node_name), state='i') dead_node_running_domains = zkhandler.readdata(zk_conn, '/nodes/{}/runningdomains'.format(node_name)).split() for dom_uuid in dead_node_running_domains: VMInstance.flush_locks(zk_conn, logger, dom_uuid) - target_node = common.findTargetHypervisor(zk_conn, 'mem', dom_uuid) + target_node = common.findTargetHypervisor(zk_conn, config, dom_uuid) - logger.out('Migrating VM "{}" to node "{}"'.format(dom_uuid, target_node), state='i') - zkhandler.writedata(zk_conn, { - '/domains/{}/state'.format(dom_uuid): 'start', - '/domains/{}/node'.format(dom_uuid): target_node, - '/domains/{}/lastnode'.format(dom_uuid): node_name - }) + if target_node is not None: + logger.out('Migrating VM "{}" to node "{}"'.format(dom_uuid, target_node), state='i') + zkhandler.writedata(zk_conn, { + '/domains/{}/state'.format(dom_uuid): 'start', + '/domains/{}/node'.format(dom_uuid): target_node, + '/domains/{}/lastnode'.format(dom_uuid): node_name + }) + else: + logger.out('No target node found for VM "{}"; VM will autostart on next unflush/ready of current node'.format(dom_uuid), state='i') + zkhandler.writedata(zk_conn, { + '/domains/{}/state'.format(dom_uuid): 'stopped', + '/domains/{}/node_autostart'.format(dom_uuid): 'True' + } # Set node in flushed state for easy remigrating when it comes back zkhandler.writedata(zk_conn, { '/nodes/{}/domainstate'.format(node_name): 'flushed' })