Simplify VM migration down to 3 steps
Remove two superfluous synchronization steps which are not needed here, since the exclusive lock handles that situation anyways. Still does not fix the weird flush->unflush lock timeout bug, but is better worked-around now due to the cancelling of the other wait freeing this up and continuing.
This commit is contained in:
parent
0d72798814
commit
55221b3d97
|
@ -507,16 +507,9 @@ class VMInstance(object):
|
||||||
aborted = True
|
aborted = True
|
||||||
else:
|
else:
|
||||||
migrate_shutdown()
|
migrate_shutdown()
|
||||||
lock.release()
|
|
||||||
|
|
||||||
if aborted:
|
if aborted:
|
||||||
abort_migrate('Live migration failed and is required')
|
abort_migrate('Live migration failed and is required')
|
||||||
return
|
return
|
||||||
|
|
||||||
time.sleep(0.5)
|
|
||||||
|
|
||||||
self.logger.out('Acquiring lock for phase D', state='i', prefix='Domain {}'.format(self.domuuid))
|
|
||||||
lock.acquire()
|
|
||||||
self.last_currentnode = self.zkhandler.read(('domain.node', self.domuuid))
|
self.last_currentnode = self.zkhandler.read(('domain.node', self.domuuid))
|
||||||
self.last_lastnode = self.zkhandler.read(('domain.last_node', self.domuuid))
|
self.last_lastnode = self.zkhandler.read(('domain.last_node', self.domuuid))
|
||||||
migrate_lock_node.release()
|
migrate_lock_node.release()
|
||||||
|
@ -541,7 +534,7 @@ class VMInstance(object):
|
||||||
(('domain.migrate.sync_lock', self.domuuid), self.domuuid)
|
(('domain.migrate.sync_lock', self.domuuid), self.domuuid)
|
||||||
])
|
])
|
||||||
|
|
||||||
self.logger.out('Acquiring lock for phase A', state='i', prefix='Domain {}'.format(self.domuuid))
|
self.logger.out('Acquiring lock for migrate synchronization', state='i', prefix='Domain {}'.format(self.domuuid))
|
||||||
lock = self.zkhandler.exclusivelock(('domain.migrate.sync_lock', self.domuuid))
|
lock = self.zkhandler.exclusivelock(('domain.migrate.sync_lock', self.domuuid))
|
||||||
try:
|
try:
|
||||||
lock.acquire(timeout=30.0)
|
lock.acquire(timeout=30.0)
|
||||||
|
@ -552,23 +545,7 @@ class VMInstance(object):
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
lock.release()
|
lock.release()
|
||||||
|
|
||||||
time.sleep(0.5)
|
|
||||||
|
|
||||||
self.logger.out('Acquiring lock for phase C', state='i', prefix='Domain {}'.format(self.domuuid))
|
self.logger.out('Acquiring lock for phase C', state='i', prefix='Domain {}'.format(self.domuuid))
|
||||||
try:
|
|
||||||
# Wait for only 900 seconds on this step since we don't do anything and it can fail
|
|
||||||
# if a flush or unflush is cancelled. 900 seconds should be plenty for real long
|
|
||||||
# migations while still avoiding an indefinite blocking here.
|
|
||||||
# TODO: Really dig into why
|
|
||||||
lock.acquire(timeout=900)
|
|
||||||
# This is strictly a synchronizng step
|
|
||||||
lock.release()
|
|
||||||
except Exception:
|
|
||||||
self.logger.out('Failed to acquire lock for phase C within 15 minutes, continuing', state='w', prefix='Domain {}'.format(self.domuuid))
|
|
||||||
|
|
||||||
time.sleep(0.5)
|
|
||||||
|
|
||||||
self.logger.out('Acquiring lock for phase E', state='i', prefix='Domain {}'.format(self.domuuid))
|
|
||||||
lock.acquire()
|
lock.acquire()
|
||||||
# Set the updated data
|
# Set the updated data
|
||||||
self.last_currentnode = self.zkhandler.read(('domain.node', self.domuuid))
|
self.last_currentnode = self.zkhandler.read(('domain.node', self.domuuid))
|
||||||
|
|
Loading…
Reference in New Issue