From 7a8aee9fe748d2f6af25fbacb105059e0f86d5ba Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Tue, 9 Jul 2019 23:15:43 -0400 Subject: [PATCH] Remove flush locking functionality This just seemed like more trouble that it was worth. Flush locks were originally intended as a way to counteract the weird issues around flushing that were mostly fixed by the code refactoring, so this will help test if those issues are truly gone. If not, will look into a cleaner solution that doesn't result in unchangeable states. --- client-cli/pvc.py | 2 -- client-common/node.py | 48 ++++---------------------------- node-daemon/pvcd/NodeInstance.py | 30 -------------------- 3 files changed, 5 insertions(+), 75 deletions(-) diff --git a/client-cli/pvc.py b/client-cli/pvc.py index 56d337fa..ce7b0ea1 100755 --- a/client-cli/pvc.py +++ b/client-cli/pvc.py @@ -1648,8 +1648,6 @@ def init_cluster(yes): transaction.create('/ceph/pools', ''.encode('ascii')) transaction.create('/ceph/volumes', ''.encode('ascii')) transaction.create('/ceph/snapshots', ''.encode('ascii')) - transaction.create('/locks', ''.encode('ascii')) - transaction.create('/locks/flush_lock', 'False'.encode('ascii')) transaction.commit() # Close the Zookeeper connection diff --git a/client-common/node.py b/client-common/node.py index 0f3184b5..732c4959 100644 --- a/client-common/node.py +++ b/client-common/node.py @@ -148,25 +148,7 @@ def flush_node(zk_conn, node, wait): if not common.verifyNode(zk_conn, node): return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node) - if zkhandler.readdata(zk_conn, '/locks/flush_lock') == 'True': - if not wait: - retmsg = 'A lock currently exists; use --wait to wait for it, or try again later.'.format(node) - return False, retmsg - retmsg = 'A lock currently exists; waiting for it to complete... ' - lock_wait = True - else: - retmsg = 'Flushing hypervisor {} of running VMs.'.format(node) - lock_wait = False - - # Wait cannot be triggered from the API - if wait: - click.echo(retmsg) - retmsg = "" - if lock_wait: - time.sleep(2) - while zkhandler.readdata(zk_conn, '/locks/flush_lock') == 'True': - time.sleep(2) - click.echo('Previous flush completed. Proceeding with flush.') + retmsg = 'Flushing hypervisor {} of running VMs.'.format(node) # Add the new domain to Zookeeper zkhandler.writedata(zk_conn, { @@ -175,9 +157,8 @@ def flush_node(zk_conn, node, wait): # Wait cannot be triggered from the API if wait: - time.sleep(2) - while zkhandler.readdata(zk_conn, '/locks/flush_lock') == 'True': - time.sleep(2) + while zkhandler.readdata(zk_conn, '/nodes/{}/domainstate') == 'flush': + time.sleep(1) return True, retmsg @@ -186,26 +167,8 @@ def ready_node(zk_conn, node, wait): if not common.verifyNode(zk_conn, node): return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node) - if zkhandler.readdata(zk_conn, '/locks/flush_lock') == 'True': - if not wait: - retmsg = 'A lock currently exists; use --wait to wait for it, or try again later.'.format(node) - return False, retmsg - retmsg = 'A lock currently exists; waiting for it to complete... ' - lock_wait = True - else: - retmsg = 'Restoring hypervisor {} to active service.'.format(node) - lock_wait = False + retmsg = 'Restoring hypervisor {} to active service.'.format(node) - # Wait cannot be triggered from the API - if wait: - click.echo(retmsg) - retmsg = "" - if lock_wait: - time.sleep(1) - while zkhandler.readdata(zk_conn, '/locks/flush_lock') == 'True': - time.sleep(1) - click.echo('Previous flush completed. Proceeding with unflush.') - # Add the new domain to Zookeeper zkhandler.writedata(zk_conn, { '/nodes/{}/domainstate'.format(node): 'unflush' @@ -213,8 +176,7 @@ def ready_node(zk_conn, node, wait): # Wait cannot be triggered from the API if wait: - time.sleep(1) - while zkhandler.readdata(zk_conn, '/locks/flush_lock') == 'True': + while zkhandler.readdata(zk_conn, '/nodes/{}/domainstate') == 'unflush': time.sleep(1) return True, retmsg diff --git a/node-daemon/pvcd/NodeInstance.py b/node-daemon/pvcd/NodeInstance.py index 8aacc289..dfc04a27 100644 --- a/node-daemon/pvcd/NodeInstance.py +++ b/node-daemon/pvcd/NodeInstance.py @@ -343,16 +343,6 @@ class NodeInstance(object): # Flush all VMs on the host def flush(self): - # Wait indefinitely for the flush_lock to be freed - time.sleep(0.5) - while zkhandler.readdata(self.zk_conn, '/locks/flush_lock') == 'True': - time.sleep(2) - - # Acquire the flush lock - zkhandler.writedata(self.zk_conn, { - '/locks/flush_lock': 'True' - }) - # Begin flush self.inflush = True self.logger.out('Flushing node "{}" of running VMs'.format(self.name), state='i') @@ -394,22 +384,7 @@ class NodeInstance(object): zkhandler.writedata(self.zk_conn, { '/nodes/{}/domainstate'.format(self.name): 'flushed' }) self.inflush = False - # Release the flush lock - zkhandler.writedata(self.zk_conn, { - '/locks/flush_lock': 'False' - }) - def unflush(self): - # Wait indefinitely for the flush_lock to be freed - time.sleep(0.5) - while zkhandler.readdata(self.zk_conn, '/locks/flush_lock') == 'True': - time.sleep(2) - - # Acquire the flush lock - zkhandler.writedata(self.zk_conn, { - '/locks/flush_lock': 'True' - }) - self.inflush = True self.logger.out('Restoring node {} to active service.'.format(self.name), state='i') fixed_domain_list = self.d_domain.copy() @@ -436,8 +411,3 @@ class NodeInstance(object): zkhandler.writedata(self.zk_conn, { '/nodes/{}/domainstate'.format(self.name): 'ready' }) self.inflush = False - # Release the flush lock - zkhandler.writedata(self.zk_conn, { - '/locks/flush_lock': 'False' - }) -