2018-09-20 03:25:58 -04:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
# zkhandler.py - Secure versioned ZooKeeper updates
|
|
|
|
# Part of the Parallel Virtual Cluster (PVC) system
|
|
|
|
#
|
2021-03-25 17:01:55 -04:00
|
|
|
# Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
|
2018-09-20 03:25:58 -04:00
|
|
|
#
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
2021-03-25 16:57:17 -04:00
|
|
|
# the Free Software Foundation, version 3.
|
2018-09-20 03:25:58 -04:00
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
#
|
|
|
|
###############################################################################
|
|
|
|
|
2018-10-30 22:41:44 -04:00
|
|
|
import uuid
|
|
|
|
|
2020-11-07 14:45:24 -05:00
|
|
|
|
2018-09-20 03:25:58 -04:00
|
|
|
# Child list function
|
|
|
|
def listchildren(zk_conn, key):
|
2020-08-17 12:58:26 -04:00
|
|
|
try:
|
|
|
|
children = zk_conn.get_children(key)
|
|
|
|
return children
|
|
|
|
except Exception:
|
|
|
|
return None
|
2018-09-20 03:25:58 -04:00
|
|
|
|
2020-11-07 14:45:24 -05:00
|
|
|
|
2018-10-14 02:01:35 -04:00
|
|
|
# Key deletion function
|
2018-10-30 22:41:44 -04:00
|
|
|
def deletekey(zk_conn, key, recursive=True):
|
2020-08-17 12:58:26 -04:00
|
|
|
try:
|
|
|
|
zk_conn.delete(key, recursive=recursive)
|
|
|
|
return True
|
|
|
|
except Exception:
|
|
|
|
return False
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2020-11-07 14:45:24 -05:00
|
|
|
|
2018-09-20 03:25:58 -04:00
|
|
|
# Data read function
|
|
|
|
def readdata(zk_conn, key):
|
2020-08-17 12:58:26 -04:00
|
|
|
try:
|
|
|
|
data_raw = zk_conn.get(key)
|
|
|
|
data = data_raw[0].decode('utf8')
|
|
|
|
return data
|
|
|
|
except Exception:
|
|
|
|
return None
|
2018-09-20 03:25:58 -04:00
|
|
|
|
2020-11-07 14:45:24 -05:00
|
|
|
|
2018-09-20 03:25:58 -04:00
|
|
|
# Data write function
|
|
|
|
def writedata(zk_conn, kv):
|
2018-09-28 16:14:31 -04:00
|
|
|
# Commit the transaction
|
2018-09-20 03:25:58 -04:00
|
|
|
try:
|
2020-08-17 12:58:26 -04:00
|
|
|
# Start up a transaction
|
|
|
|
zk_transaction = zk_conn.transaction()
|
|
|
|
|
|
|
|
# Proceed one KV pair at a time
|
|
|
|
for key in sorted(kv):
|
|
|
|
data = kv[key]
|
|
|
|
if not data:
|
|
|
|
data = ''
|
|
|
|
|
|
|
|
# Check if this key already exists or not
|
|
|
|
if not zk_conn.exists(key):
|
|
|
|
# We're creating a new key
|
|
|
|
zk_transaction.create(key, str(data).encode('utf8'))
|
|
|
|
else:
|
|
|
|
# We're updating a key with version validation
|
|
|
|
orig_data = zk_conn.get(key)
|
|
|
|
version = orig_data[1].version
|
|
|
|
|
|
|
|
# Set what we expect the new version to be
|
|
|
|
new_version = version + 1
|
|
|
|
|
|
|
|
# Update the data
|
|
|
|
zk_transaction.set_data(key, str(data).encode('utf8'))
|
|
|
|
|
|
|
|
# Set up the check
|
|
|
|
try:
|
|
|
|
zk_transaction.check(key, new_version)
|
|
|
|
except TypeError:
|
|
|
|
print('Zookeeper key "{}" does not match expected version'.format(key))
|
|
|
|
return False
|
|
|
|
|
2018-09-28 16:14:31 -04:00
|
|
|
zk_transaction.commit()
|
|
|
|
return True
|
|
|
|
except Exception:
|
|
|
|
return False
|
2018-09-20 03:25:58 -04:00
|
|
|
|
2020-11-07 14:45:24 -05:00
|
|
|
|
2019-07-26 14:53:27 -04:00
|
|
|
# Key rename function
|
|
|
|
def renamekey(zk_conn, kv):
|
2019-07-26 16:38:05 -04:00
|
|
|
# This one is not transactional because, inexplicably, transactions don't
|
|
|
|
# support either the recursive delete or recursive create operations that
|
|
|
|
# we need. Why? No explanation in the docs that I can find.
|
2019-07-26 14:53:27 -04:00
|
|
|
|
2020-08-17 12:58:26 -04:00
|
|
|
try:
|
|
|
|
# Proceed one KV pair at a time
|
|
|
|
for key in sorted(kv):
|
|
|
|
old_name = key
|
|
|
|
new_name = kv[key]
|
|
|
|
|
|
|
|
old_data = zk_conn.get(old_name)[0]
|
|
|
|
|
|
|
|
child_keys = list()
|
2020-11-07 12:17:38 -05:00
|
|
|
|
|
|
|
# Find the children of old_name recursively
|
2020-08-17 12:58:26 -04:00
|
|
|
def get_children(key):
|
|
|
|
children = zk_conn.get_children(key)
|
|
|
|
if not children:
|
|
|
|
child_keys.append(key)
|
|
|
|
else:
|
|
|
|
for ckey in children:
|
|
|
|
get_children('{}/{}'.format(key, ckey))
|
|
|
|
get_children(old_name)
|
|
|
|
|
|
|
|
# Get the data out of each of the child keys
|
|
|
|
child_data = dict()
|
|
|
|
for ckey in child_keys:
|
|
|
|
child_data[ckey] = zk_conn.get(ckey)[0]
|
|
|
|
|
|
|
|
# Create the new parent key
|
|
|
|
zk_conn.create(new_name, old_data, makepath=True)
|
|
|
|
|
|
|
|
# For each child key, create the key and add the data
|
|
|
|
for ckey in child_keys:
|
|
|
|
new_ckey_name = ckey.replace(old_name, new_name)
|
|
|
|
zk_conn.create(new_ckey_name, child_data[ckey], makepath=True)
|
|
|
|
|
|
|
|
# Remove recursively the old key
|
|
|
|
zk_conn.delete(old_name, recursive=True)
|
2019-07-26 14:53:27 -04:00
|
|
|
|
2020-08-17 12:58:26 -04:00
|
|
|
return True
|
|
|
|
except Exception:
|
|
|
|
return False
|
2019-07-26 14:53:27 -04:00
|
|
|
|
2020-11-07 14:45:24 -05:00
|
|
|
|
2018-10-30 22:41:44 -04:00
|
|
|
# Write lock function
|
|
|
|
def writelock(zk_conn, key):
|
2020-08-17 12:58:26 -04:00
|
|
|
count = 1
|
|
|
|
while True:
|
|
|
|
try:
|
|
|
|
lock_id = str(uuid.uuid1())
|
|
|
|
lock = zk_conn.WriteLock('{}'.format(key), lock_id)
|
|
|
|
break
|
|
|
|
except Exception:
|
|
|
|
count += 1
|
|
|
|
if count > 5:
|
|
|
|
break
|
|
|
|
else:
|
|
|
|
continue
|
2018-10-30 22:41:44 -04:00
|
|
|
return lock
|
|
|
|
|
2020-11-07 14:45:24 -05:00
|
|
|
|
2018-10-30 22:41:44 -04:00
|
|
|
# Read lock function
|
|
|
|
def readlock(zk_conn, key):
|
2020-08-17 12:58:26 -04:00
|
|
|
count = 1
|
|
|
|
while True:
|
|
|
|
try:
|
|
|
|
lock_id = str(uuid.uuid1())
|
|
|
|
lock = zk_conn.ReadLock('{}'.format(key), lock_id)
|
|
|
|
break
|
|
|
|
except Exception:
|
|
|
|
count += 1
|
|
|
|
if count > 5:
|
|
|
|
break
|
|
|
|
else:
|
|
|
|
continue
|
2018-10-30 22:41:44 -04:00
|
|
|
return lock
|
Improve handling of primary contention
Previously, contention could occasionally cause a flap/dual primary
contention state due to the lack of checking within this function. This
could cause a state where a node transitions to primary than is almost
immediately shifted away, which could cause undefined behaviour in the
cluster.
The solution includes several elements:
* Implement an exclusive lock operation in zkhandler
* Switch the become_primary function to use this exclusive lock
* Implement exclusive locking during the contention process
* As a failsafe, check stat versions before setting the node as the
primary node, in case another node already has
* Delay the start of takeover/relinquish operations by slightly
longer than the lock timeout
* Make the current router_state conditions more explicit (positive
conditionals rather than negative conditionals)
The new scenario ensures that during contention, only one secondary will
ever succeed at acquiring the lock. Ideally, the other would then grab
the lock and pass, but in testing this does not seem to be the case -
the lock always times out, so the failsafe check is technically not
needed but has been left as an added safety mechanism. With this setup,
the node that fails the contention will never block the switchover nor
will it try to force itself onto the cluster after another node has
successfully won contention.
Timeouts may need to be adjusted in the future, but the base timeout of
0.4 seconds (and transition delay of 0.5 seconds) seems to work reliably
during preliminary tests.
2020-04-12 03:40:17 -04:00
|
|
|
|
2020-11-07 14:45:24 -05:00
|
|
|
|
Improve handling of primary contention
Previously, contention could occasionally cause a flap/dual primary
contention state due to the lack of checking within this function. This
could cause a state where a node transitions to primary than is almost
immediately shifted away, which could cause undefined behaviour in the
cluster.
The solution includes several elements:
* Implement an exclusive lock operation in zkhandler
* Switch the become_primary function to use this exclusive lock
* Implement exclusive locking during the contention process
* As a failsafe, check stat versions before setting the node as the
primary node, in case another node already has
* Delay the start of takeover/relinquish operations by slightly
longer than the lock timeout
* Make the current router_state conditions more explicit (positive
conditionals rather than negative conditionals)
The new scenario ensures that during contention, only one secondary will
ever succeed at acquiring the lock. Ideally, the other would then grab
the lock and pass, but in testing this does not seem to be the case -
the lock always times out, so the failsafe check is technically not
needed but has been left as an added safety mechanism. With this setup,
the node that fails the contention will never block the switchover nor
will it try to force itself onto the cluster after another node has
successfully won contention.
Timeouts may need to be adjusted in the future, but the base timeout of
0.4 seconds (and transition delay of 0.5 seconds) seems to work reliably
during preliminary tests.
2020-04-12 03:40:17 -04:00
|
|
|
# Exclusive lock function
|
|
|
|
def exclusivelock(zk_conn, key):
|
2020-08-17 12:58:26 -04:00
|
|
|
count = 1
|
|
|
|
while True:
|
|
|
|
try:
|
|
|
|
lock_id = str(uuid.uuid1())
|
|
|
|
lock = zk_conn.Lock('{}'.format(key), lock_id)
|
|
|
|
break
|
|
|
|
except Exception:
|
|
|
|
count += 1
|
|
|
|
if count > 5:
|
|
|
|
break
|
|
|
|
else:
|
|
|
|
continue
|
Improve handling of primary contention
Previously, contention could occasionally cause a flap/dual primary
contention state due to the lack of checking within this function. This
could cause a state where a node transitions to primary than is almost
immediately shifted away, which could cause undefined behaviour in the
cluster.
The solution includes several elements:
* Implement an exclusive lock operation in zkhandler
* Switch the become_primary function to use this exclusive lock
* Implement exclusive locking during the contention process
* As a failsafe, check stat versions before setting the node as the
primary node, in case another node already has
* Delay the start of takeover/relinquish operations by slightly
longer than the lock timeout
* Make the current router_state conditions more explicit (positive
conditionals rather than negative conditionals)
The new scenario ensures that during contention, only one secondary will
ever succeed at acquiring the lock. Ideally, the other would then grab
the lock and pass, but in testing this does not seem to be the case -
the lock always times out, so the failsafe check is technically not
needed but has been left as an added safety mechanism. With this setup,
the node that fails the contention will never block the switchover nor
will it try to force itself onto the cluster after another node has
successfully won contention.
Timeouts may need to be adjusted in the future, but the base timeout of
0.4 seconds (and transition delay of 0.5 seconds) seems to work reliably
during preliminary tests.
2020-04-12 03:40:17 -04:00
|
|
|
return lock
|