From f9b126a10616bb88e4ffa2ece71c6bb05a64a00a Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Mon, 17 Aug 2020 12:58:26 -0400 Subject: [PATCH] Make zkhandler accept failures more robustly Most of these would silently fail if there was e.g. an issue with the ZK connection. Instead, encase things in try blocks and handle the exceptions in a more graceful way, returning None or False if applicable. Except for locks, which should retry 5 times before aborting. --- node-daemon/pvcnoded/zkhandler.py | 187 ++++++++++++++++++------------ 1 file changed, 116 insertions(+), 71 deletions(-) diff --git a/node-daemon/pvcnoded/zkhandler.py b/node-daemon/pvcnoded/zkhandler.py index 199ff5eb..1e2c1710 100644 --- a/node-daemon/pvcnoded/zkhandler.py +++ b/node-daemon/pvcnoded/zkhandler.py @@ -24,55 +24,65 @@ import uuid # Child list function def listchildren(zk_conn, key): - children = zk_conn.get_children(key) - return children + try: + children = zk_conn.get_children(key) + return children + except Exception: + return None # Key deletion function def deletekey(zk_conn, key, recursive=True): - zk_conn.delete(key, recursive=recursive) + try: + zk_conn.delete(key, recursive=recursive) + return True + except Exception: + return False # Data read function def readdata(zk_conn, key): - data_raw = zk_conn.get(key) - data = data_raw[0].decode('utf8') - meta = data_raw[1] - return data + try: + data_raw = zk_conn.get(key) + data = data_raw[0].decode('utf8') + meta = data_raw[1] + return data + except Exception: + return None # Data write function def writedata(zk_conn, kv): - # Start up a transaction - zk_transaction = zk_conn.transaction() - - # Proceed one KV pair at a time - for key in sorted(kv): - data = kv[key] - if not data: - data = '' - - # Check if this key already exists or not - if not zk_conn.exists(key): - # We're creating a new key - zk_transaction.create(key, str(data).encode('utf8')) - else: - # We're updating a key with version validation - orig_data = zk_conn.get(key) - version = orig_data[1].version - - # Set what we expect the new version to be - new_version = version + 1 - - # Update the data - zk_transaction.set_data(key, str(data).encode('utf8')) - - # Set up the check - try: - zk_transaction.check(key, new_version) - except TypeError: - print('Zookeeper key "{}" does not match expected version'.format(key)) - return False - # Commit the transaction try: + # Start up a transaction + zk_transaction = zk_conn.transaction() + + # Proceed one KV pair at a time + for key in sorted(kv): + data = kv[key] + if not data: + data = '' + + # Check if this key already exists or not + if not zk_conn.exists(key): + # We're creating a new key + zk_transaction.create(key, str(data).encode('utf8')) + else: + # We're updating a key with version validation + orig_data = zk_conn.get(key) + version = orig_data[1].version + + # Set what we expect the new version to be + new_version = version + 1 + + # Update the data + zk_transaction.set_data(key, str(data).encode('utf8')) + + # Set up the check + try: + zk_transaction.check(key, new_version) + except TypeError: + print('Zookeeper key "{}" does not match expected version'.format(key)) + return False + zk_transaction.commit() return True except Exception: @@ -84,54 +94,89 @@ def renamekey(zk_conn, kv): # support either the recursive delete or recursive create operations that # we need. Why? No explanation in the docs that I can find. - # Proceed one KV pair at a time - for key in sorted(kv): - old_name = key - new_name = kv[key] + try: + # Proceed one KV pair at a time + for key in sorted(kv): + old_name = key + new_name = kv[key] - old_data = zk_conn.get(old_name)[0] + old_data = zk_conn.get(old_name)[0] - # Find the children of old_name recursively - child_keys = list() - def get_children(key): - children = zk_conn.get_children(key) - if not children: - child_keys.append(key) - else: - for ckey in children: - get_children('{}/{}'.format(key, ckey)) - get_children(old_name) + # Find the children of old_name recursively + child_keys = list() + def get_children(key): + children = zk_conn.get_children(key) + if not children: + child_keys.append(key) + else: + for ckey in children: + get_children('{}/{}'.format(key, ckey)) + get_children(old_name) - # Get the data out of each of the child keys - child_data = dict() - for ckey in child_keys: - child_data[ckey] = zk_conn.get(ckey)[0] + # Get the data out of each of the child keys + child_data = dict() + for ckey in child_keys: + child_data[ckey] = zk_conn.get(ckey)[0] - # Create the new parent key - zk_conn.create(new_name, old_data, makepath=True) + # Create the new parent key + zk_conn.create(new_name, old_data, makepath=True) - # For each child key, create the key and add the data - for ckey in child_keys: - new_ckey_name = ckey.replace(old_name, new_name) - zk_conn.create(new_ckey_name, child_data[ckey], makepath=True) + # For each child key, create the key and add the data + for ckey in child_keys: + new_ckey_name = ckey.replace(old_name, new_name) + zk_conn.create(new_ckey_name, child_data[ckey], makepath=True) - # Remove recursively the old key - zk_conn.delete(old_name, recursive=True) + # Remove recursively the old key + zk_conn.delete(old_name, recursive=True) + + return True + except Exception: + return False # Write lock function def writelock(zk_conn, key): - lock_id = str(uuid.uuid1()) - lock = zk_conn.WriteLock('{}'.format(key), lock_id) + count = 1 + while True: + try: + lock_id = str(uuid.uuid1()) + lock = zk_conn.WriteLock('{}'.format(key), lock_id) + break + except Exception: + count += 1 + if count > 5: + break + else: + continue return lock # Read lock function def readlock(zk_conn, key): - lock_id = str(uuid.uuid1()) - lock = zk_conn.ReadLock('{}'.format(key), lock_id) + count = 1 + while True: + try: + lock_id = str(uuid.uuid1()) + lock = zk_conn.ReadLock('{}'.format(key), lock_id) + break + except Exception: + count += 1 + if count > 5: + break + else: + continue return lock # Exclusive lock function def exclusivelock(zk_conn, key): - lock_id = str(uuid.uuid1()) - lock = zk_conn.Lock('{}'.format(key), lock_id) + count = 1 + while True: + try: + lock_id = str(uuid.uuid1()) + lock = zk_conn.Lock('{}'.format(key), lock_id) + break + except Exception: + count += 1 + if count > 5: + break + else: + continue return lock