Allow enforcement of live migration

Provides a CLI and API argument to force live migration, which triggers
a new VM state "migrate-live". The node daemon VMInstance during migrate
will read this flag from the state and, if enforced, will not trigger a
shutdown migration.

Closes #95
This commit is contained in:
Joshua Boniface 2020-06-06 11:49:21 -04:00
parent b5434ba744
commit ce60836c34
6 changed files with 72 additions and 32 deletions

View File

@ -1365,7 +1365,8 @@ class API_VM_Node(Resource):
{ 'name': 'action', 'choices': ('migrate', 'unmigrate', 'move'), 'helptext': "A valid action must be specified", 'required': True },
{ 'name': 'node' },
{ 'name': 'force' },
{ 'name': 'wait' }
{ 'name': 'wait' },
{ 'name': 'force_live' }
])
@Authenticator
def post(self, vm, reqargs):
@ -1396,6 +1397,10 @@ class API_VM_Node(Resource):
name: wait
type: boolean
description: Whether to block waiting for the migration to complete
- in: query
name: force_live
type: boolean
description: Whether to enforce live migration and disable shutdown-based fallback migration
responses:
200:
description: OK
@ -1412,13 +1417,14 @@ class API_VM_Node(Resource):
node = reqargs.get('node', None)
force = bool(strtobool(reqargs.get('force', 'false')))
wait = bool(strtobool(reqargs.get('wait', 'false')))
force_live = bool(strtobool(reqargs.get('force_live', 'false')))
if action == 'move':
return api_helper.vm_move(vm, node, wait)
return api_helper.vm_move(vm, node, wait, force_live)
if action == 'migrate':
return api_helper.vm_migrate(vm, node, force, wait)
return api_helper.vm_migrate(vm, node, force, wait, force_live)
if action == 'unmigrate':
return api_helper.vm_unmigrate(vm, wait)
return api_helper.vm_unmigrate(vm, wait, force_live)
abort(400)
api.add_resource(API_VM_Node, '/vm/<vm>/node')

View File

@ -661,12 +661,12 @@ def vm_disable(name):
}
return output, retcode
def vm_move(name, node, wait):
def vm_move(name, node, wait, force_live):
"""
Move a VM to another node.
"""
zk_conn = pvc_common.startZKConnection(config['coordinators'])
retflag, retdata = pvc_vm.move_vm(zk_conn, name, node, wait)
retflag, retdata = pvc_vm.move_vm(zk_conn, name, node, wait, force_live)
pvc_common.stopZKConnection(zk_conn)
if retflag:
@ -679,12 +679,12 @@ def vm_move(name, node, wait):
}
return output, retcode
def vm_migrate(name, node, flag_force, wait):
def vm_migrate(name, node, flag_force, wait, force_live):
"""
Temporarily migrate a VM to another node.
"""
zk_conn = pvc_common.startZKConnection(config['coordinators'])
retflag, retdata = pvc_vm.migrate_vm(zk_conn, name, node, flag_force, wait)
retflag, retdata = pvc_vm.migrate_vm(zk_conn, name, node, flag_force, wait, force_live)
pvc_common.stopZKConnection(zk_conn)
if retflag:
@ -697,12 +697,12 @@ def vm_migrate(name, node, flag_force, wait):
}
return output, retcode
def vm_unmigrate(name, wait):
def vm_unmigrate(name, wait, force_live):
"""
Unmigrate a migrated VM.
"""
zk_conn = pvc_common.startZKConnection(config['coordinators'])
retflag, retdata = pvc_vm.unmigrate_vm(zk_conn, name, wait)
retflag, retdata = pvc_vm.unmigrate_vm(zk_conn, name, wait, force_live)
pvc_common.stopZKConnection(zk_conn)
if retflag:

View File

@ -203,19 +203,20 @@ def vm_state(config, vm, target_state, wait=False):
return retstatus, response.json()['message']
def vm_node(config, vm, target_node, action, force=False, wait=False):
def vm_node(config, vm, target_node, action, force=False, wait=False, force_live=False):
"""
Modify the current node of VM via {action}
API endpoint: POST /vm/{vm}/node
API arguments: node={target_node}, action={action}, force={force}, wait={wait}
API arguments: node={target_node}, action={action}, force={force}, wait={wait}, force_live={force_live}
API schema: {"message":"{data}"}
"""
params={
'node': target_node,
'action': action,
'force': str(force).lower(),
'wait': str(wait).lower()
'wait': str(wait).lower(),
'force_live': str(force_live).lower()
}
response = call_api(config, 'post', '/vm/{vm}/node'.format(vm=vm), params=params)

View File

@ -868,13 +868,17 @@ def vm_disable(domain):
'-w', '--wait', 'wait', is_flag=True, default=False,
help='Wait for migration to complete before returning.'
)
@click.option(
'--force-live', 'force_live', is_flag=True, default=False,
help='Do not fall back to shutdown-based migration if live migration fails.'
)
@cluster_req
def vm_move(domain, target_node, wait):
def vm_move(domain, target_node, wait, force_live):
"""
Permanently move virtual machine DOMAIN, via live migration if running and possible, to another node. DOMAIN may be a UUID or name.
"""
retcode, retmsg = pvc_vm.vm_node(config, domain, target_node, 'move', force=False, wait=wait)
retcode, retmsg = pvc_vm.vm_node(config, domain, target_node, 'move', force=False, wait=wait, force_live=force_live)
cleanup(retcode, retmsg)
###############################################################################
@ -896,13 +900,17 @@ def vm_move(domain, target_node, wait):
'-w', '--wait', 'wait', is_flag=True, default=False,
help='Wait for migration to complete before returning.'
)
@click.option(
'--force-live', 'force_live', is_flag=True, default=False,
help='Do not fall back to shutdown-based migration if live migration fails.'
)
@cluster_req
def vm_migrate(domain, target_node, force_migrate, wait):
def vm_migrate(domain, target_node, force_migrate, wait, force_live):
"""
Temporarily migrate running virtual machine DOMAIN, via live migration if possible, to another node. DOMAIN may be a UUID or name. If DOMAIN is not running, it will be started on the target node.
"""
retcode, retmsg = pvc_vm.vm_node(config, domain, target_node, 'migrate', force=force_migrate, wait=wait)
retcode, retmsg = pvc_vm.vm_node(config, domain, target_node, 'migrate', force=force_migrate, wait=wait, force_live=force_live)
cleanup(retcode, retmsg)
###############################################################################
@ -916,13 +924,17 @@ def vm_migrate(domain, target_node, force_migrate, wait):
'-w', '--wait', 'wait', is_flag=True, default=False,
help='Wait for migration to complete before returning.'
)
@click.option(
'--force-live', 'force_live', is_flag=True, default=False,
help='Do not fall back to shutdown-based migration if live migration fails.'
)
@cluster_req
def vm_unmigrate(domain, wait):
def vm_unmigrate(domain, wait, force_live):
"""
Restore previously migrated virtual machine DOMAIN, via live migration if possible, to its original node. DOMAIN may be a UUID or name. If DOMAIN is not running, it will be started on the target node.
"""
retcode, retmsg = pvc_vm.vm_node(config, domain, None, 'unmigrate', force=False, wait=wait)
retcode, retmsg = pvc_vm.vm_node(config, domain, None, 'unmigrate', force=False, wait=wait, force_live=force_live)
cleanup(retcode, retmsg)
###############################################################################

View File

@ -441,7 +441,7 @@ def disable_vm(zk_conn, domain):
return True, 'Marked VM "{}" as disable.'.format(domain)
def move_vm(zk_conn, domain, target_node, wait=False):
def move_vm(zk_conn, domain, target_node, wait=False, force_live=False):
# Validate that VM exists in cluster
dom_uuid = getDomainUUID(zk_conn, domain)
if not dom_uuid:
@ -452,6 +452,9 @@ def move_vm(zk_conn, domain, target_node, wait=False):
if current_state != 'start':
# If the current state isn't start, preserve it; we're not doing live migration
target_state = current_state
else:
if force_live:
target_state = 'migrate-live'
else:
target_state = 'migrate'
@ -497,7 +500,7 @@ def move_vm(zk_conn, domain, target_node, wait=False):
return True, retmsg
def migrate_vm(zk_conn, domain, target_node, force_migrate, wait=False):
def migrate_vm(zk_conn, domain, target_node, force_migrate, wait=False, force_live=False):
# Validate that VM exists in cluster
dom_uuid = getDomainUUID(zk_conn, domain)
if not dom_uuid:
@ -508,6 +511,9 @@ def migrate_vm(zk_conn, domain, target_node, force_migrate, wait=False):
if current_state != 'start':
# If the current state isn't start, preserve it; we're not doing live migration
target_state = current_state
else:
if force_live:
target_state = 'migrate-live'
else:
target_state = 'migrate'
@ -556,7 +562,7 @@ def migrate_vm(zk_conn, domain, target_node, force_migrate, wait=False):
return True, retmsg
def unmigrate_vm(zk_conn, domain, wait=False):
def unmigrate_vm(zk_conn, domain, wait=False, force_live=False):
# Validate that VM exists in cluster
dom_uuid = getDomainUUID(zk_conn, domain)
if not dom_uuid:
@ -567,6 +573,9 @@ def unmigrate_vm(zk_conn, domain, wait=False):
if current_state != 'start':
# If the current state isn't start, preserve it; we're not doing live migration
target_state = current_state
else:
if force_live:
target_state = 'migrate-live'
else:
target_state = 'migrate'

View File

@ -371,7 +371,7 @@ class VMInstance(object):
return True
# Migrate the VM to a target host
def migrate_vm(self):
def migrate_vm(self, force_live=False):
# Don't try to migrate a node to itself, set back to start
if self.node == self.lastnode:
zkhandler.writedata(self.zk_conn, { '/domains/{}/state'.format(self.domuuid): 'start' })
@ -383,6 +383,14 @@ class VMInstance(object):
migrate_ret = self.live_migrate_vm()
if not migrate_ret:
if force_live:
self.logger.out('Could not live migrate VM; live migration enforced, aborting', state='e', prefix='Domain {}:'.format(self.domuuid))
zkhandler.writedata(self.zk_conn, {
'/domains/{}/state'.format(self.domuuid): 'start',
'/domains/{}/node'.format(self.domuuid): self.this_node.name,
'/domains/{}/lastnode'.format(self.domuuid): ''
})
else:
self.logger.out('Could not live migrate VM; shutting down to migrate instead', state='e', prefix='Domain {}:'.format(self.domuuid))
zkhandler.writedata(self.zk_conn, { '/domains/{}/state'.format(self.domuuid): 'shutdown' })
else:
@ -418,7 +426,7 @@ class VMInstance(object):
break
else:
# If the state is no longer migrate
if self.state != 'migrate':
if self.state not in ['migrate', 'migrate-live']:
# The receive was aborted before it timed out or was completed
self.logger.out('Receive aborted via state change', state='w', prefix='Domain {}:'.format(self.domuuid))
break
@ -498,6 +506,7 @@ class VMInstance(object):
# Valid states are:
# start
# migrate
# migrate-live
# restart
# shutdown
# stop
@ -523,7 +532,7 @@ class VMInstance(object):
# Add domain to running list
self.addDomainToList()
# VM is already running and should be but stuck in migrate state
elif self.state == "migrate":
elif self.state == "migrate" or self.state == "migrate-live":
# Start the log watcher
self.console_log_instance.start()
zkhandler.writedata(self.zk_conn, { '/domains/{}/state'.format(self.domuuid): 'start' })
@ -544,7 +553,7 @@ class VMInstance(object):
# Start the domain
self.start_vm()
# VM should be migrated to this node
elif self.state == "migrate":
elif self.state == "migrate" or self.state == "migrate-live":
# Receive the migration
self.receive_migrate()
# VM should be restarted (i.e. started since it isn't running)
@ -566,7 +575,10 @@ class VMInstance(object):
if running == libvirt.VIR_DOMAIN_RUNNING:
# VM should be migrated away from this node
if self.state == "migrate":
self.migrate_vm()
self.migrate_vm(force_live=False)
# VM should be migrated away from this node, live only (no shutdown fallback)
elif self.state == "migrate-live":
self.migrate_vm(force_live=True)
# VM should be shutdown gracefully
elif self.state == 'shutdown':
self.shutdown_vm()