Adjust timing to avoid migrating to self quickly
Add another separate state lock, release it earlier, and ensure timings are good to avoid double-migrating one VM.
This commit is contained in:
parent
398d33778f
commit
ef762359f4
|
@ -350,12 +350,6 @@ class VMInstance(object):
|
||||||
|
|
||||||
# Migrate the VM to a target host
|
# Migrate the VM to a target host
|
||||||
def migrate_vm(self, force_live=False):
|
def migrate_vm(self, force_live=False):
|
||||||
# Don't try to migrate a node to itself, set back to start
|
|
||||||
if self.node == self.lastnode:
|
|
||||||
zkhandler.writedata(self.zk_conn, { '/domains/{}/state'.format(self.domuuid): 'start' })
|
|
||||||
zkhandler.writedata(self.zk_conn, { '/domains/{}/lastnode'.format(self.domuuid): '' })
|
|
||||||
return
|
|
||||||
|
|
||||||
self.inmigrate = True
|
self.inmigrate = True
|
||||||
self.logger.out('Migrating VM to node "{}"'.format(self.node), state='i', prefix='Domain {}'.format(self.domuuid))
|
self.logger.out('Migrating VM to node "{}"'.format(self.node), state='i', prefix='Domain {}'.format(self.domuuid))
|
||||||
|
|
||||||
|
@ -367,13 +361,22 @@ class VMInstance(object):
|
||||||
'/domains/{}/node'.format(self.domuuid): self.this_node.name,
|
'/domains/{}/node'.format(self.domuuid): self.this_node.name,
|
||||||
'/domains/{}/lastnode'.format(self.domuuid): self.last_lastnode
|
'/domains/{}/lastnode'.format(self.domuuid): self.last_lastnode
|
||||||
})
|
})
|
||||||
|
migrate_lock_node.release()
|
||||||
|
migrate_lock_state.release()
|
||||||
|
|
||||||
# Acquire exclusive lock on the domain node key
|
# Acquire exclusive lock on the domain node key
|
||||||
migrate_lock = zkhandler.exclusivelock(self.zk_conn, '/domains/{}/node'.format(self.domuuid))
|
migrate_lock_node = zkhandler.exclusivelock(self.zk_conn, '/domains/{}/node'.format(self.domuuid))
|
||||||
migrate_lock.acquire()
|
migrate_lock_state = zkhandler.exclusivelock(self.zk_conn, '/domains/{}/state'.format(self.domuuid))
|
||||||
|
migrate_lock_node.acquire()
|
||||||
|
migrate_lock_state.acquire()
|
||||||
|
|
||||||
time.sleep(0.2) # Initial delay for the first writer to grab the lock
|
time.sleep(0.2) # Initial delay for the first writer to grab the lock
|
||||||
|
|
||||||
|
# Don't try to migrate a node to itself, set back to start
|
||||||
|
if self.node == self.lastnode or self.node == self.this_node.name:
|
||||||
|
abort_migrate()
|
||||||
|
return
|
||||||
|
|
||||||
# Synchronize nodes A (I am reader)
|
# Synchronize nodes A (I am reader)
|
||||||
lock = zkhandler.readlock(self.zk_conn, '/locks/domain_migrate/{}'.format(self.domuuid))
|
lock = zkhandler.readlock(self.zk_conn, '/locks/domain_migrate/{}'.format(self.domuuid))
|
||||||
self.logger.out('Acquiring read lock for synchronization phase A', state='i', prefix='Domain {}'.format(self.domuuid))
|
self.logger.out('Acquiring read lock for synchronization phase A', state='i', prefix='Domain {}'.format(self.domuuid))
|
||||||
|
@ -387,7 +390,6 @@ class VMInstance(object):
|
||||||
ticks += 1
|
ticks += 1
|
||||||
if ticks > 300:
|
if ticks > 300:
|
||||||
self.logger.out('Timed out waiting 30s for peer, aborting migration', state='e', prefix='Domain {}'.format(self.domuuid))
|
self.logger.out('Timed out waiting 30s for peer, aborting migration', state='e', prefix='Domain {}'.format(self.domuuid))
|
||||||
abort_migrate()
|
|
||||||
aborted = True
|
aborted = True
|
||||||
break
|
break
|
||||||
self.logger.out('Releasing read lock for synchronization phase A', state='i', prefix='Domain {}'.format(self.domuuid))
|
self.logger.out('Releasing read lock for synchronization phase A', state='i', prefix='Domain {}'.format(self.domuuid))
|
||||||
|
@ -395,6 +397,7 @@ class VMInstance(object):
|
||||||
self.logger.out('Released read lock for synchronization phase A', state='o', prefix='Domain {}'.format(self.domuuid))
|
self.logger.out('Released read lock for synchronization phase A', state='o', prefix='Domain {}'.format(self.domuuid))
|
||||||
|
|
||||||
if aborted:
|
if aborted:
|
||||||
|
abort_migrate()
|
||||||
return
|
return
|
||||||
|
|
||||||
# Synchronize nodes B (I am writer)
|
# Synchronize nodes B (I am writer)
|
||||||
|
@ -450,6 +453,11 @@ class VMInstance(object):
|
||||||
do_migrate_shutdown = False
|
do_migrate_shutdown = False
|
||||||
migrate_live_result = False
|
migrate_live_result = False
|
||||||
|
|
||||||
|
# Do a final verification
|
||||||
|
if self.node == self.lastnode or self.node == self.this_node.name:
|
||||||
|
abort_migrate()
|
||||||
|
return
|
||||||
|
|
||||||
# A live migrate is attemped 3 times in succession
|
# A live migrate is attemped 3 times in succession
|
||||||
ticks = 0
|
ticks = 0
|
||||||
while True:
|
while True:
|
||||||
|
@ -464,7 +472,6 @@ class VMInstance(object):
|
||||||
if not migrate_live_result:
|
if not migrate_live_result:
|
||||||
if force_live:
|
if force_live:
|
||||||
self.logger.out('Could not live migrate VM; live migration enforced, aborting', state='e', prefix='Domain {}'.format(self.domuuid))
|
self.logger.out('Could not live migrate VM; live migration enforced, aborting', state='e', prefix='Domain {}'.format(self.domuuid))
|
||||||
abort_migrate()
|
|
||||||
aborted = True
|
aborted = True
|
||||||
else:
|
else:
|
||||||
do_migrate_shutdown = True
|
do_migrate_shutdown = True
|
||||||
|
@ -474,6 +481,7 @@ class VMInstance(object):
|
||||||
self.logger.out('Released write lock for synchronization phase B', state='o')
|
self.logger.out('Released write lock for synchronization phase B', state='o')
|
||||||
|
|
||||||
if aborted:
|
if aborted:
|
||||||
|
abort_migrate()
|
||||||
return
|
return
|
||||||
|
|
||||||
# Synchronize nodes C (I am writer)
|
# Synchronize nodes C (I am writer)
|
||||||
|
@ -486,6 +494,8 @@ class VMInstance(object):
|
||||||
if do_migrate_shutdown:
|
if do_migrate_shutdown:
|
||||||
migrate_shutdown_result = migrate_shutdown()
|
migrate_shutdown_result = migrate_shutdown()
|
||||||
|
|
||||||
|
migrate_lock_state.release()
|
||||||
|
|
||||||
self.logger.out('Releasing write lock for synchronization phase C', state='i', prefix='Domain {}'.format(self.domuuid))
|
self.logger.out('Releasing write lock for synchronization phase C', state='i', prefix='Domain {}'.format(self.domuuid))
|
||||||
lock.release()
|
lock.release()
|
||||||
self.logger.out('Released write lock for synchronization phase C', state='o')
|
self.logger.out('Released write lock for synchronization phase C', state='o')
|
||||||
|
@ -501,7 +511,7 @@ class VMInstance(object):
|
||||||
|
|
||||||
# Wait 0.5 seconds for everything to stabilize before we declare all-done
|
# Wait 0.5 seconds for everything to stabilize before we declare all-done
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
migrate_lock.release()
|
migrate_lock_node.release()
|
||||||
self.inmigrate = False
|
self.inmigrate = False
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue