diff --git a/client-common/node.py b/client-common/node.py index 7b0f44a4..cdd8cf19 100644 --- a/client-common/node.py +++ b/client-common/node.py @@ -169,13 +169,34 @@ def ready_node(zk_conn, node): if not common.verifyNode(zk_conn, node): return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node) - retmsg = 'Restoring hypervisor {} to active service.'.format(node) + if zkhandler.readdata(zk_conn, '/locks/flush_lock') == 'True': + retmsg = 'Restoring hypervisor {} to active service. A flush lock currently exists; unflush will continue once the lock is freed.'.format(node) + lock_wait = True + else: + retmsg = 'Restoring hypervisor {} to active service.'.format(node) + lock_wait = False + + # Wait cannot be triggered from the API + if wait: + click.echo(retmsg) + retmsg = "" + if lock_wait: + time.sleep(1) + while zkhandler.readdata(zk_conn, '/locks/flush_lock') == 'True': + time.sleep(1) + click.echo('Previous flush completed. Proceeding with unflush.') # Add the new domain to Zookeeper zkhandler.writedata(zk_conn, { '/nodes/{}/domainstate'.format(node): 'unflush' }) + # Wait cannot be triggered from the API + if wait: + time.sleep(1) + while zkhandler.readdata(zk_conn, '/locks/flush_lock') == 'True': + time.sleep(1) + return True, retmsg def get_info(zk_conn, node): diff --git a/node-daemon/pvcd/NodeInstance.py b/node-daemon/pvcd/NodeInstance.py index 20939436..a0509a36 100644 --- a/node-daemon/pvcd/NodeInstance.py +++ b/node-daemon/pvcd/NodeInstance.py @@ -364,6 +364,16 @@ class NodeInstance(object): }) def unflush(self): + # Wait indefinitely for the flush_lock to be freed + time.sleep(0.5) + while zkhandler.readdata(self.zk_conn, '/locks/flush_lock') == 'True': + time.sleep(2) + + # Acquire the flush lock + zkhandler.writedata(self.zk_conn, { + '/locks/flush_lock': 'True' + }) + self.inflush = True self.logger.out('Restoring node {} to active service.'.format(self.name), state='i') zkhandler.writedata(self.zk_conn, { '/nodes/{}/domainstate'.format(self.name): 'ready' }) @@ -386,6 +396,11 @@ class NodeInstance(object): self.inflush = False + # Release the flush lock + zkhandler.writedata(self.zk_conn, { + '/locks/flush_lock': 'False' + }) + # # Find a migration target #