Better handle aborting migrations
This commit is contained in:
parent
567fe8f36b
commit
abfe0108ab
|
@ -364,6 +364,15 @@ class VMInstance(object):
|
||||||
self.inmigrate = True
|
self.inmigrate = True
|
||||||
self.logger.out('Migrating VM to node "{}"'.format(self.node), state='i', prefix='Domain {}'.format(self.domuuid))
|
self.logger.out('Migrating VM to node "{}"'.format(self.node), state='i', prefix='Domain {}'.format(self.domuuid))
|
||||||
|
|
||||||
|
aborted = False
|
||||||
|
|
||||||
|
def abort_migrate():
|
||||||
|
zkhandler.writedata(self.zk_conn, {
|
||||||
|
'/domains/{}/state'.format(self.domuuid): 'start',
|
||||||
|
'/domains/{}/node'.format(self.domuuid): self.this_node.name,
|
||||||
|
'/domains/{}/lastnode'.format(self.domuuid): ''
|
||||||
|
})
|
||||||
|
|
||||||
# Acquire exclusive lock on the domain node key
|
# Acquire exclusive lock on the domain node key
|
||||||
migrate_lock = zkhandler.exclusivelock(self.zk_conn, '/domains/{}/node'.format(self.domuuid))
|
migrate_lock = zkhandler.exclusivelock(self.zk_conn, '/domains/{}/node'.format(self.domuuid))
|
||||||
migrate_lock.acquire()
|
migrate_lock.acquire()
|
||||||
|
@ -375,10 +384,23 @@ class VMInstance(object):
|
||||||
self.logger.out('Acquiring read lock for synchronization phase A', state='i', prefix='Domain {}'.format(self.domuuid))
|
self.logger.out('Acquiring read lock for synchronization phase A', state='i', prefix='Domain {}'.format(self.domuuid))
|
||||||
lock.acquire()
|
lock.acquire()
|
||||||
self.logger.out('Acquired read lock for synchronization phase A', state='o', prefix='Domain {}'.format(self.domuuid))
|
self.logger.out('Acquired read lock for synchronization phase A', state='o', prefix='Domain {}'.format(self.domuuid))
|
||||||
|
if zkhandler.readdata(self.zk_conn, '/locks/domain_migrate') == '':
|
||||||
|
self.logger.out('Waiting for peer', state='i', prefix='Domain {}'.format(self.domuuid))
|
||||||
|
ticks = 0
|
||||||
|
while zkhandler.readdata(self.zk_conn, '/locks/domain_migrate') == '':
|
||||||
|
time.sleep(0.1)
|
||||||
|
ticks += 1
|
||||||
|
if ticks > 300:
|
||||||
|
self.logger.out('Timed out waiting 30s for peer, aborting migration', state='e', prefix='Domain {}'.format(self.domuuid))
|
||||||
|
abort_migrate()
|
||||||
|
aborted = True
|
||||||
self.logger.out('Releasing read lock for synchronization phase A', state='i', prefix='Domain {}'.format(self.domuuid))
|
self.logger.out('Releasing read lock for synchronization phase A', state='i', prefix='Domain {}'.format(self.domuuid))
|
||||||
lock.release()
|
lock.release()
|
||||||
self.logger.out('Released read lock for synchronization phase A', state='o', prefix='Domain {}'.format(self.domuuid))
|
self.logger.out('Released read lock for synchronization phase A', state='o', prefix='Domain {}'.format(self.domuuid))
|
||||||
|
|
||||||
|
if aborted:
|
||||||
|
return
|
||||||
|
|
||||||
# Synchronize nodes B (I am writer)
|
# Synchronize nodes B (I am writer)
|
||||||
lock = zkhandler.writelock(self.zk_conn, '/locks/domain_migrate')
|
lock = zkhandler.writelock(self.zk_conn, '/locks/domain_migrate')
|
||||||
self.logger.out('Acquiring write lock for synchronization phase B', state='i', prefix='Domain {}'.format(self.domuuid))
|
self.logger.out('Acquiring write lock for synchronization phase B', state='i', prefix='Domain {}'.format(self.domuuid))
|
||||||
|
@ -431,11 +453,8 @@ class VMInstance(object):
|
||||||
if not migrate_live_result:
|
if not migrate_live_result:
|
||||||
if force_live:
|
if force_live:
|
||||||
self.logger.out('Could not live migrate VM; live migration enforced, aborting', state='e', prefix='Domain {}'.format(self.domuuid))
|
self.logger.out('Could not live migrate VM; live migration enforced, aborting', state='e', prefix='Domain {}'.format(self.domuuid))
|
||||||
zkhandler.writedata(self.zk_conn, {
|
abort_migrate()
|
||||||
'/domains/{}/state'.format(self.domuuid): 'start',
|
aborted = True
|
||||||
'/domains/{}/node'.format(self.domuuid): self.this_node.name,
|
|
||||||
'/domains/{}/lastnode'.format(self.domuuid): ''
|
|
||||||
})
|
|
||||||
else:
|
else:
|
||||||
do_migrate_shutdown = True
|
do_migrate_shutdown = True
|
||||||
|
|
||||||
|
@ -444,6 +463,9 @@ class VMInstance(object):
|
||||||
lock.release()
|
lock.release()
|
||||||
self.logger.out('Released write lock for synchronization phase B', state='o')
|
self.logger.out('Released write lock for synchronization phase B', state='o')
|
||||||
|
|
||||||
|
if aborted:
|
||||||
|
return
|
||||||
|
|
||||||
# Synchronize nodes C (I am writer)
|
# Synchronize nodes C (I am writer)
|
||||||
lock = zkhandler.writelock(self.zk_conn, '/locks/domain_migrate')
|
lock = zkhandler.writelock(self.zk_conn, '/locks/domain_migrate')
|
||||||
self.logger.out('Acquiring write lock for synchronization phase C', state='i', prefix='Domain {}'.format(self.domuuid))
|
self.logger.out('Acquiring write lock for synchronization phase C', state='i', prefix='Domain {}'.format(self.domuuid))
|
||||||
|
@ -554,7 +576,7 @@ class VMInstance(object):
|
||||||
zkhandler.writedata(self.zk_conn, { '/domains/{}/state'.format(self.domuuid): 'start' })
|
zkhandler.writedata(self.zk_conn, { '/domains/{}/state'.format(self.domuuid): 'start' })
|
||||||
else:
|
else:
|
||||||
# The send failed catastrophically
|
# The send failed catastrophically
|
||||||
self.logger.out('VM in undefined state: {}'.format(self.state), state='e', prefix='Domain {}'.format(self.domuuid))
|
self.logger.out('Migrate aborted or failed; VM in state {}'.format(self.state), state='w', prefix='Domain {}'.format(self.domuuid))
|
||||||
|
|
||||||
self.logger.out('Releasing write lock for synchronization phase D', state='i', prefix='Domain {}'.format(self.domuuid))
|
self.logger.out('Releasing write lock for synchronization phase D', state='i', prefix='Domain {}'.format(self.domuuid))
|
||||||
zkhandler.writedata(self.zk_conn, { '/locks/domain_migrate': '' })
|
zkhandler.writedata(self.zk_conn, { '/locks/domain_migrate': '' })
|
||||||
|
|
Loading…
Reference in New Issue