Remove flush locking functionality

This just seemed like more trouble that it was worth. Flush locks were
originally intended as a way to counteract the weird issues around
flushing that were mostly fixed by the code refactoring, so this will
help test if those issues are truly gone. If not, will look into a
cleaner solution that doesn't result in unchangeable states.
This commit is contained in:
Joshua Boniface 2019-07-09 23:15:43 -04:00
parent ad284b13bc
commit 7a8aee9fe7
3 changed files with 5 additions and 75 deletions

View File

@ -1648,8 +1648,6 @@ def init_cluster(yes):
transaction.create('/ceph/pools', ''.encode('ascii')) transaction.create('/ceph/pools', ''.encode('ascii'))
transaction.create('/ceph/volumes', ''.encode('ascii')) transaction.create('/ceph/volumes', ''.encode('ascii'))
transaction.create('/ceph/snapshots', ''.encode('ascii')) transaction.create('/ceph/snapshots', ''.encode('ascii'))
transaction.create('/locks', ''.encode('ascii'))
transaction.create('/locks/flush_lock', 'False'.encode('ascii'))
transaction.commit() transaction.commit()
# Close the Zookeeper connection # Close the Zookeeper connection

View File

@ -148,25 +148,7 @@ def flush_node(zk_conn, node, wait):
if not common.verifyNode(zk_conn, node): if not common.verifyNode(zk_conn, node):
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node) return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node)
if zkhandler.readdata(zk_conn, '/locks/flush_lock') == 'True': retmsg = 'Flushing hypervisor {} of running VMs.'.format(node)
if not wait:
retmsg = 'A lock currently exists; use --wait to wait for it, or try again later.'.format(node)
return False, retmsg
retmsg = 'A lock currently exists; waiting for it to complete... '
lock_wait = True
else:
retmsg = 'Flushing hypervisor {} of running VMs.'.format(node)
lock_wait = False
# Wait cannot be triggered from the API
if wait:
click.echo(retmsg)
retmsg = ""
if lock_wait:
time.sleep(2)
while zkhandler.readdata(zk_conn, '/locks/flush_lock') == 'True':
time.sleep(2)
click.echo('Previous flush completed. Proceeding with flush.')
# Add the new domain to Zookeeper # Add the new domain to Zookeeper
zkhandler.writedata(zk_conn, { zkhandler.writedata(zk_conn, {
@ -175,9 +157,8 @@ def flush_node(zk_conn, node, wait):
# Wait cannot be triggered from the API # Wait cannot be triggered from the API
if wait: if wait:
time.sleep(2) while zkhandler.readdata(zk_conn, '/nodes/{}/domainstate') == 'flush':
while zkhandler.readdata(zk_conn, '/locks/flush_lock') == 'True': time.sleep(1)
time.sleep(2)
return True, retmsg return True, retmsg
@ -186,25 +167,7 @@ def ready_node(zk_conn, node, wait):
if not common.verifyNode(zk_conn, node): if not common.verifyNode(zk_conn, node):
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node) return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node)
if zkhandler.readdata(zk_conn, '/locks/flush_lock') == 'True': retmsg = 'Restoring hypervisor {} to active service.'.format(node)
if not wait:
retmsg = 'A lock currently exists; use --wait to wait for it, or try again later.'.format(node)
return False, retmsg
retmsg = 'A lock currently exists; waiting for it to complete... '
lock_wait = True
else:
retmsg = 'Restoring hypervisor {} to active service.'.format(node)
lock_wait = False
# Wait cannot be triggered from the API
if wait:
click.echo(retmsg)
retmsg = ""
if lock_wait:
time.sleep(1)
while zkhandler.readdata(zk_conn, '/locks/flush_lock') == 'True':
time.sleep(1)
click.echo('Previous flush completed. Proceeding with unflush.')
# Add the new domain to Zookeeper # Add the new domain to Zookeeper
zkhandler.writedata(zk_conn, { zkhandler.writedata(zk_conn, {
@ -213,8 +176,7 @@ def ready_node(zk_conn, node, wait):
# Wait cannot be triggered from the API # Wait cannot be triggered from the API
if wait: if wait:
time.sleep(1) while zkhandler.readdata(zk_conn, '/nodes/{}/domainstate') == 'unflush':
while zkhandler.readdata(zk_conn, '/locks/flush_lock') == 'True':
time.sleep(1) time.sleep(1)
return True, retmsg return True, retmsg

View File

@ -343,16 +343,6 @@ class NodeInstance(object):
# Flush all VMs on the host # Flush all VMs on the host
def flush(self): def flush(self):
# Wait indefinitely for the flush_lock to be freed
time.sleep(0.5)
while zkhandler.readdata(self.zk_conn, '/locks/flush_lock') == 'True':
time.sleep(2)
# Acquire the flush lock
zkhandler.writedata(self.zk_conn, {
'/locks/flush_lock': 'True'
})
# Begin flush # Begin flush
self.inflush = True self.inflush = True
self.logger.out('Flushing node "{}" of running VMs'.format(self.name), state='i') self.logger.out('Flushing node "{}" of running VMs'.format(self.name), state='i')
@ -394,22 +384,7 @@ class NodeInstance(object):
zkhandler.writedata(self.zk_conn, { '/nodes/{}/domainstate'.format(self.name): 'flushed' }) zkhandler.writedata(self.zk_conn, { '/nodes/{}/domainstate'.format(self.name): 'flushed' })
self.inflush = False self.inflush = False
# Release the flush lock
zkhandler.writedata(self.zk_conn, {
'/locks/flush_lock': 'False'
})
def unflush(self): def unflush(self):
# Wait indefinitely for the flush_lock to be freed
time.sleep(0.5)
while zkhandler.readdata(self.zk_conn, '/locks/flush_lock') == 'True':
time.sleep(2)
# Acquire the flush lock
zkhandler.writedata(self.zk_conn, {
'/locks/flush_lock': 'True'
})
self.inflush = True self.inflush = True
self.logger.out('Restoring node {} to active service.'.format(self.name), state='i') self.logger.out('Restoring node {} to active service.'.format(self.name), state='i')
fixed_domain_list = self.d_domain.copy() fixed_domain_list = self.d_domain.copy()
@ -436,8 +411,3 @@ class NodeInstance(object):
zkhandler.writedata(self.zk_conn, { '/nodes/{}/domainstate'.format(self.name): 'ready' }) zkhandler.writedata(self.zk_conn, { '/nodes/{}/domainstate'.format(self.name): 'ready' })
self.inflush = False self.inflush = False
# Release the flush lock
zkhandler.writedata(self.zk_conn, {
'/locks/flush_lock': 'False'
})