Adjust timing to avoid migrating to self quickly
Add another separate state lock, release it earlier, and ensure timings are good to avoid double-migrating one VM.
This commit is contained in:
		| @@ -350,12 +350,6 @@ class VMInstance(object): | |||||||
|  |  | ||||||
|     # Migrate the VM to a target host |     # Migrate the VM to a target host | ||||||
|     def migrate_vm(self, force_live=False): |     def migrate_vm(self, force_live=False): | ||||||
|         # Don't try to migrate a node to itself, set back to start |  | ||||||
|         if self.node == self.lastnode: |  | ||||||
|             zkhandler.writedata(self.zk_conn, { '/domains/{}/state'.format(self.domuuid): 'start' }) |  | ||||||
|             zkhandler.writedata(self.zk_conn, { '/domains/{}/lastnode'.format(self.domuuid): '' }) |  | ||||||
|             return |  | ||||||
|  |  | ||||||
|         self.inmigrate = True |         self.inmigrate = True | ||||||
|         self.logger.out('Migrating VM to node "{}"'.format(self.node), state='i', prefix='Domain {}'.format(self.domuuid)) |         self.logger.out('Migrating VM to node "{}"'.format(self.node), state='i', prefix='Domain {}'.format(self.domuuid)) | ||||||
|  |  | ||||||
| @@ -367,13 +361,22 @@ class VMInstance(object): | |||||||
|                 '/domains/{}/node'.format(self.domuuid): self.this_node.name, |                 '/domains/{}/node'.format(self.domuuid): self.this_node.name, | ||||||
|                 '/domains/{}/lastnode'.format(self.domuuid): self.last_lastnode |                 '/domains/{}/lastnode'.format(self.domuuid): self.last_lastnode | ||||||
|             }) |             }) | ||||||
|  |             migrate_lock_node.release() | ||||||
|  |             migrate_lock_state.release() | ||||||
|  |  | ||||||
|         # Acquire exclusive lock on the domain node key |         # Acquire exclusive lock on the domain node key | ||||||
|         migrate_lock = zkhandler.exclusivelock(self.zk_conn, '/domains/{}/node'.format(self.domuuid)) |         migrate_lock_node = zkhandler.exclusivelock(self.zk_conn, '/domains/{}/node'.format(self.domuuid)) | ||||||
|         migrate_lock.acquire() |         migrate_lock_state = zkhandler.exclusivelock(self.zk_conn, '/domains/{}/state'.format(self.domuuid)) | ||||||
|  |         migrate_lock_node.acquire() | ||||||
|  |         migrate_lock_state.acquire() | ||||||
|  |  | ||||||
|         time.sleep(0.2) # Initial delay for the first writer to grab the lock |         time.sleep(0.2) # Initial delay for the first writer to grab the lock | ||||||
|  |  | ||||||
|  |         # Don't try to migrate a node to itself, set back to start | ||||||
|  |         if self.node == self.lastnode or self.node == self.this_node.name: | ||||||
|  |             abort_migrate() | ||||||
|  |             return | ||||||
|  |  | ||||||
|         # Synchronize nodes A (I am reader) |         # Synchronize nodes A (I am reader) | ||||||
|         lock = zkhandler.readlock(self.zk_conn, '/locks/domain_migrate/{}'.format(self.domuuid)) |         lock = zkhandler.readlock(self.zk_conn, '/locks/domain_migrate/{}'.format(self.domuuid)) | ||||||
|         self.logger.out('Acquiring read lock for synchronization phase A', state='i', prefix='Domain {}'.format(self.domuuid)) |         self.logger.out('Acquiring read lock for synchronization phase A', state='i', prefix='Domain {}'.format(self.domuuid)) | ||||||
| @@ -387,7 +390,6 @@ class VMInstance(object): | |||||||
|                 ticks += 1 |                 ticks += 1 | ||||||
|                 if ticks > 300: |                 if ticks > 300: | ||||||
|                     self.logger.out('Timed out waiting 30s for peer, aborting migration', state='e', prefix='Domain {}'.format(self.domuuid)) |                     self.logger.out('Timed out waiting 30s for peer, aborting migration', state='e', prefix='Domain {}'.format(self.domuuid)) | ||||||
|                     abort_migrate() |  | ||||||
|                     aborted = True |                     aborted = True | ||||||
|                     break |                     break | ||||||
|         self.logger.out('Releasing read lock for synchronization phase A', state='i', prefix='Domain {}'.format(self.domuuid)) |         self.logger.out('Releasing read lock for synchronization phase A', state='i', prefix='Domain {}'.format(self.domuuid)) | ||||||
| @@ -395,6 +397,7 @@ class VMInstance(object): | |||||||
|         self.logger.out('Released read lock for synchronization phase A', state='o', prefix='Domain {}'.format(self.domuuid)) |         self.logger.out('Released read lock for synchronization phase A', state='o', prefix='Domain {}'.format(self.domuuid)) | ||||||
|  |  | ||||||
|         if aborted: |         if aborted: | ||||||
|  |             abort_migrate() | ||||||
|             return |             return | ||||||
|  |  | ||||||
|         # Synchronize nodes B (I am writer) |         # Synchronize nodes B (I am writer) | ||||||
| @@ -450,6 +453,11 @@ class VMInstance(object): | |||||||
|         do_migrate_shutdown = False |         do_migrate_shutdown = False | ||||||
|         migrate_live_result = False |         migrate_live_result = False | ||||||
|  |  | ||||||
|  |         # Do a final verification | ||||||
|  |         if self.node == self.lastnode or self.node == self.this_node.name: | ||||||
|  |             abort_migrate() | ||||||
|  |             return | ||||||
|  |  | ||||||
|         # A live migrate is attemped 3 times in succession |         # A live migrate is attemped 3 times in succession | ||||||
|         ticks = 0 |         ticks = 0 | ||||||
|         while True: |         while True: | ||||||
| @@ -464,7 +472,6 @@ class VMInstance(object): | |||||||
|         if not migrate_live_result: |         if not migrate_live_result: | ||||||
|             if force_live: |             if force_live: | ||||||
|                 self.logger.out('Could not live migrate VM; live migration enforced, aborting', state='e', prefix='Domain {}'.format(self.domuuid)) |                 self.logger.out('Could not live migrate VM; live migration enforced, aborting', state='e', prefix='Domain {}'.format(self.domuuid)) | ||||||
|                 abort_migrate() |  | ||||||
|                 aborted = True |                 aborted = True | ||||||
|             else: |             else: | ||||||
|                 do_migrate_shutdown = True |                 do_migrate_shutdown = True | ||||||
| @@ -474,6 +481,7 @@ class VMInstance(object): | |||||||
|         self.logger.out('Released write lock for synchronization phase B', state='o') |         self.logger.out('Released write lock for synchronization phase B', state='o') | ||||||
|  |  | ||||||
|         if aborted: |         if aborted: | ||||||
|  |             abort_migrate() | ||||||
|             return |             return | ||||||
|  |  | ||||||
|         # Synchronize nodes C (I am writer) |         # Synchronize nodes C (I am writer) | ||||||
| @@ -486,6 +494,8 @@ class VMInstance(object): | |||||||
|         if do_migrate_shutdown: |         if do_migrate_shutdown: | ||||||
|             migrate_shutdown_result = migrate_shutdown() |             migrate_shutdown_result = migrate_shutdown() | ||||||
|  |  | ||||||
|  |         migrate_lock_state.release() | ||||||
|  |  | ||||||
|         self.logger.out('Releasing write lock for synchronization phase C', state='i', prefix='Domain {}'.format(self.domuuid)) |         self.logger.out('Releasing write lock for synchronization phase C', state='i', prefix='Domain {}'.format(self.domuuid)) | ||||||
|         lock.release() |         lock.release() | ||||||
|         self.logger.out('Released write lock for synchronization phase C', state='o') |         self.logger.out('Released write lock for synchronization phase C', state='o') | ||||||
| @@ -501,7 +511,7 @@ class VMInstance(object): | |||||||
|  |  | ||||||
|         # Wait 0.5 seconds for everything to stabilize before we declare all-done |         # Wait 0.5 seconds for everything to stabilize before we declare all-done | ||||||
|         time.sleep(0.5) |         time.sleep(0.5) | ||||||
|         migrate_lock.release() |         migrate_lock_node.release() | ||||||
|         self.inmigrate = False |         self.inmigrate = False | ||||||
|         return |         return | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user