Simplify locking process for VM migration

Rather than using a cumbersome and overly complex ping-pong of read and
write locks, instead move to a much simpler process using exclusive
locks.

Describing the process in ASCII or narrative is cumbersome, but the
process ping-pongs via a set of exclusive locks and wait timers, so that
the two sides are able to synchronize via blocking the exclusive lock.
The end result is a much more streamlined migration (takes about half
the time all things considered) which should be less error-prone.
This commit is contained in:
Joshua Boniface 2021-09-26 22:01:37 -04:00
parent 3b805cdc34
commit 2a4f38e933
1 changed files with 30 additions and 109 deletions

View File

@ -425,42 +425,12 @@ class VMInstance(object):
migrate_lock_node.acquire() migrate_lock_node.acquire()
migrate_lock_state.acquire() migrate_lock_state.acquire()
time.sleep(0.2) # Initial delay for the first writer to grab the lock
# Don't try to migrate a node to itself, set back to start # Don't try to migrate a node to itself, set back to start
if self.node == self.lastnode or self.node == self.this_node.name: if self.node == self.lastnode or self.node == self.this_node.name:
abort_migrate('Target node matches the current active node during initial check') abort_migrate('Target node matches the current active node during initial check')
return return
# Synchronize nodes A (I am reader) time.sleep(0.5) # Initial delay for the first writer to grab the lock
lock = self.zkhandler.readlock(('domain.migrate.sync_lock', self.domuuid))
self.logger.out('Acquiring read lock for synchronization phase A', state='i', prefix='Domain {}'.format(self.domuuid))
lock.acquire()
self.logger.out('Acquired read lock for synchronization phase A', state='o', prefix='Domain {}'.format(self.domuuid))
if self.zkhandler.read(('domain.migrate.sync_lock', self.domuuid)) == '':
self.logger.out('Waiting for peer', state='i', prefix='Domain {}'.format(self.domuuid))
ticks = 0
while self.zkhandler.read(('domain.migrate.sync_lock', self.domuuid)) == '':
time.sleep(0.1)
ticks += 1
if ticks > 300:
self.logger.out('Timed out waiting 30s for peer', state='e', prefix='Domain {}'.format(self.domuuid))
aborted = True
break
self.logger.out('Releasing read lock for synchronization phase A', state='i', prefix='Domain {}'.format(self.domuuid))
lock.release()
self.logger.out('Released read lock for synchronization phase A', state='o', prefix='Domain {}'.format(self.domuuid))
if aborted:
abort_migrate('Timed out waiting for peer')
return
# Synchronize nodes B (I am writer)
lock = self.zkhandler.writelock(('domain.migrate.sync_lock', self.domuuid))
self.logger.out('Acquiring write lock for synchronization phase B', state='i', prefix='Domain {}'.format(self.domuuid))
lock.acquire()
self.logger.out('Acquired write lock for synchronization phase B', state='o', prefix='Domain {}'.format(self.domuuid))
time.sleep(0.5) # Time for reader to acquire the lock
def migrate_live(): def migrate_live():
self.logger.out('Setting up live migration', state='i', prefix='Domain {}'.format(self.domuuid)) self.logger.out('Setting up live migration', state='i', prefix='Domain {}'.format(self.domuuid))
@ -500,9 +470,14 @@ class VMInstance(object):
self.shutdown_vm() self.shutdown_vm()
return True return True
do_migrate_shutdown = False self.logger.out('Acquiring lock for phase B', state='i')
lock = self.zkhandler.exclusivelock(('domain.migrate.sync_lock', self.domuuid))
try:
lock.acquire(timeout=30.0)
except Exception:
abort_migrate('Timed out waiting for peer')
return
migrate_live_result = False migrate_live_result = False
# Do a final verification # Do a final verification
if self.node == self.lastnode or self.node == self.this_node.name: if self.node == self.lastnode or self.node == self.this_node.name:
abort_migrate('Target node matches the current active node during final check') abort_migrate('Target node matches the current active node during final check')
@ -510,7 +485,6 @@ class VMInstance(object):
if self.node != target_node: if self.node != target_node:
abort_migrate('Target node changed during preparation') abort_migrate('Target node changed during preparation')
return return
if not force_shutdown: if not force_shutdown:
# A live migrate is attemped 3 times in succession # A live migrate is attemped 3 times in succession
ticks = 0 ticks = 0
@ -525,59 +499,27 @@ class VMInstance(object):
break break
else: else:
migrate_live_result = False migrate_live_result = False
if not migrate_live_result: if not migrate_live_result:
if force_live: if force_live:
self.logger.out('Could not live migrate VM while live migration enforced', state='e', prefix='Domain {}'.format(self.domuuid)) self.logger.out('Could not live migrate VM while live migration enforced', state='e', prefix='Domain {}'.format(self.domuuid))
aborted = True aborted = True
else: else:
do_migrate_shutdown = True migrate_shutdown()
self.logger.out('Releasing write lock for synchronization phase B', state='i', prefix='Domain {}'.format(self.domuuid))
lock.release() lock.release()
self.logger.out('Released write lock for synchronization phase B', state='o', prefix='Domain {}'.format(self.domuuid))
if aborted: if aborted:
abort_migrate('Live migration failed and is required') abort_migrate('Live migration failed and is required')
return return
# Synchronize nodes C (I am writer) time.sleep(0.5)
lock = self.zkhandler.writelock(('domain.migrate.sync_lock', self.domuuid))
self.logger.out('Acquiring write lock for synchronization phase C', state='i', prefix='Domain {}'.format(self.domuuid)) self.logger.out('Acquiring lock for phase D', state='i')
lock.acquire() lock.acquire()
self.logger.out('Acquired write lock for synchronization phase C', state='o', prefix='Domain {}'.format(self.domuuid))
time.sleep(0.5) # Time for reader to acquire the lock
if do_migrate_shutdown:
migrate_shutdown()
self.logger.out('Releasing write lock for synchronization phase C', state='i', prefix='Domain {}'.format(self.domuuid))
lock.release()
self.logger.out('Released write lock for synchronization phase C', state='o', prefix='Domain {}'.format(self.domuuid))
# Synchronize nodes D (I am reader)
lock = self.zkhandler.readlock(('domain.migrate.sync_lock', self.domuuid))
self.logger.out('Acquiring read lock for synchronization phase D', state='i', prefix='Domain {}'.format(self.domuuid))
lock.acquire()
self.logger.out('Acquired read lock for synchronization phase D', state='o', prefix='Domain {}'.format(self.domuuid))
self.last_currentnode = self.zkhandler.read(('domain.node', self.domuuid)) self.last_currentnode = self.zkhandler.read(('domain.node', self.domuuid))
self.last_lastnode = self.zkhandler.read(('domain.last_node', self.domuuid)) self.last_lastnode = self.zkhandler.read(('domain.last_node', self.domuuid))
self.logger.out('Releasing read lock for synchronization phase D', state='i', prefix='Domain {}'.format(self.domuuid))
lock.release()
self.logger.out('Released read lock for synchronization phase D', state='o', prefix='Domain {}'.format(self.domuuid))
# Wait for the receive side to complete before we declare all-done and release locks
ticks = 0
while self.zkhandler.read(('domain.migrate.sync_lock', self.domuuid)) != '':
time.sleep(0.1)
ticks += 1
if ticks > 100:
self.logger.out('Sync lock clear exceeded 10s timeout, continuing', state='w', prefix='Domain {}'.format(self.domuuid))
break
migrate_lock_node.release() migrate_lock_node.release()
migrate_lock_state.release() migrate_lock_state.release()
lock.release()
self.inmigrate = False self.inmigrate = False
return return
@ -592,55 +534,37 @@ class VMInstance(object):
self.logger.out('Receiving VM migration from node "{}"'.format(self.last_currentnode), state='i', prefix='Domain {}'.format(self.domuuid)) self.logger.out('Receiving VM migration from node "{}"'.format(self.last_currentnode), state='i', prefix='Domain {}'.format(self.domuuid))
# Short delay to ensure sender is in sync
time.sleep(0.5)
# Ensure our lock key is populated # Ensure our lock key is populated
self.zkhandler.write([ self.zkhandler.write([
(('domain.migrate.sync_lock', self.domuuid), self.domuuid) (('domain.migrate.sync_lock', self.domuuid), self.domuuid)
]) ])
# Synchronize nodes A (I am writer) self.logger.out('Acquiring lock for phase A', state='i')
lock = self.zkhandler.writelock(('domain.migrate.sync_lock', self.domuuid)) lock = self.zkhandler.exclusivelock(('domain.migrate.sync_lock', self.domuuid))
self.logger.out('Acquiring write lock for synchronization phase A', state='i', prefix='Domain {}'.format(self.domuuid)) try:
lock.acquire() lock.acquire(timeout=30.0)
self.logger.out('Acquired write lock for synchronization phase A', state='o', prefix='Domain {}'.format(self.domuuid)) except Exception:
time.sleep(1) # Time for reader to acquire the lock self.logger.out('Failed to acquire exclusive lock for VM', state='w')
self.logger.out('Releasing write lock for synchronization phase A', state='i', prefix='Domain {}'.format(self.domuuid)) return
# Exactly twice the amount of time that the other side is waiting
time.sleep(1)
lock.release() lock.release()
self.logger.out('Released write lock for synchronization phase A', state='o', prefix='Domain {}'.format(self.domuuid))
time.sleep(0.1) # Time for new writer to acquire the lock
# Synchronize nodes B (I am reader) time.sleep(0.5)
lock = self.zkhandler.readlock(('domain.migrate.sync_lock', self.domuuid))
self.logger.out('Acquiring read lock for synchronization phase B', state='i', prefix='Domain {}'.format(self.domuuid)) self.logger.out('Acquiring lock for phase C', state='i')
lock.acquire() lock.acquire()
self.logger.out('Acquired read lock for synchronization phase B', state='o', prefix='Domain {}'.format(self.domuuid)) # This is strictly a synchrozing step
self.logger.out('Releasing read lock for synchronization phase B', state='i', prefix='Domain {}'.format(self.domuuid)) time.sleep(0.1)
lock.release() lock.release()
self.logger.out('Released read lock for synchronization phase B', state='o', prefix='Domain {}'.format(self.domuuid))
# Synchronize nodes C (I am reader) time.sleep(0.5)
lock = self.zkhandler.readlock(('domain.migrate.sync_lock', self.domuuid))
self.logger.out('Acquiring read lock for synchronization phase C', state='i', prefix='Domain {}'.format(self.domuuid)) self.logger.out('Acquiring lock for phase E', state='i')
lock.acquire() lock.acquire()
self.logger.out('Acquired read lock for synchronization phase C', state='o', prefix='Domain {}'.format(self.domuuid))
# Set the updated data # Set the updated data
self.last_currentnode = self.zkhandler.read(('domain.node', self.domuuid)) self.last_currentnode = self.zkhandler.read(('domain.node', self.domuuid))
self.last_lastnode = self.zkhandler.read(('domain.last_node', self.domuuid)) self.last_lastnode = self.zkhandler.read(('domain.last_node', self.domuuid))
self.logger.out('Releasing read lock for synchronization phase C', state='i', prefix='Domain {}'.format(self.domuuid))
lock.release()
self.logger.out('Released read lock for synchronization phase C', state='o', prefix='Domain {}'.format(self.domuuid))
# Synchronize nodes D (I am writer)
lock = self.zkhandler.writelock(('domain.migrate.sync_lock', self.domuuid))
self.logger.out('Acquiring write lock for synchronization phase D', state='i', prefix='Domain {}'.format(self.domuuid))
lock.acquire()
self.logger.out('Acquired write lock for synchronization phase D', state='o', prefix='Domain {}'.format(self.domuuid))
time.sleep(0.5) # Time for reader to acquire the lock
self.state = self.zkhandler.read(('domain.state', self.domuuid)) self.state = self.zkhandler.read(('domain.state', self.domuuid))
self.dom = self.lookupByUUID(self.domuuid) self.dom = self.lookupByUUID(self.domuuid)
if self.dom: if self.dom:
@ -672,10 +596,7 @@ class VMInstance(object):
else: else:
# The send failed or was aborted # The send failed or was aborted
self.logger.out('Migrate aborted or failed; VM in state {}'.format(self.state), state='w', prefix='Domain {}'.format(self.domuuid)) self.logger.out('Migrate aborted or failed; VM in state {}'.format(self.state), state='w', prefix='Domain {}'.format(self.domuuid))
self.logger.out('Releasing write lock for synchronization phase D', state='i', prefix='Domain {}'.format(self.domuuid))
lock.release() lock.release()
self.logger.out('Released write lock for synchronization phase D', state='o', prefix='Domain {}'.format(self.domuuid))
self.zkhandler.write([ self.zkhandler.write([
(('domain.migrate.sync_lock', self.domuuid), '') (('domain.migrate.sync_lock', self.domuuid), '')