Move configuration keys to /config tree

This commit is contained in:
Joshua Boniface 2021-06-01 10:40:32 -04:00
parent d6a8cf9780
commit 33a54cf7f2
9 changed files with 67 additions and 32 deletions

View File

@ -268,7 +268,7 @@ class API_Initialize(Resource):
"""
Initialize a new PVC cluster
If the 'overwrite' option is not True, the cluster will return 400 if the `/primary_node` key is found. If 'overwrite' is True, the existing cluster
If the 'overwrite' option is not True, the cluster will return 400 if the `/config/primary_node` key is found. If 'overwrite' is True, the existing cluster
data will be erased and new, empty data written in its place.
All node daemons should be stopped before running this command, and the API daemon started manually to avoid undefined behavior.

View File

@ -46,15 +46,16 @@ def initialize_cluster(zkhandler, overwrite=False):
Initialize a new cluster
"""
# Abort if we've initialized the cluster before
if zkhandler.exists('/primary_node') and not overwrite:
if zkhandler.exists('/config/primary_node') and not overwrite:
return False
if overwrite:
# Delete the existing keys; ignore any errors
status = zkhandler.delete([
'/primary_node',
'/upstream_ip',
'/maintenance',
'/config'
'/config/primary_node',
'/config/upstream_ip',
'/config/maintenance',
'/nodes',
'/domains',
'/networks',
@ -76,9 +77,10 @@ def initialize_cluster(zkhandler, overwrite=False):
# Create the root keys
status = zkhandler.write([
('/primary_node', 'none'),
('/upstream_ip', 'none'),
('/maintenance', 'False'),
('/config', ''),
('/config/primary_node', 'none'),
('/config/upstream_ip', 'none'),
('/config/maintenance', 'False'),
('/nodes', ''),
('/domains', ''),
('/networks', ''),

View File

@ -144,7 +144,7 @@ def format_pct_tohuman(datapct):
# Status functions
#
def get_status(zkhandler):
primary_node = zkhandler.read('/primary_node')
primary_node = zkhandler.read('/config/primary_node')
ceph_status = zkhandler.read('/ceph').rstrip()
# Create a data structure for the information
@ -157,7 +157,7 @@ def get_status(zkhandler):
def get_util(zkhandler):
primary_node = zkhandler.read('/primary_node')
primary_node = zkhandler.read('/config/primary_node')
ceph_df = zkhandler.read('/ceph/util').rstrip()
# Create a data structure for the information

View File

@ -32,12 +32,12 @@ def set_maintenance(zkhandler, maint_state):
try:
if maint_state == 'true':
zkhandler.write([
('/maintenance', 'true')
('/config/maintenance', 'true')
])
return True, 'Successfully set cluster in maintenance mode'
else:
zkhandler.write([
('/maintenance', 'false')
('/config/maintenance', 'false')
])
return True, 'Successfully set cluster in normal mode'
except Exception:
@ -47,7 +47,7 @@ def set_maintenance(zkhandler, maint_state):
def getClusterInformation(zkhandler):
# Get cluster maintenance state
try:
maint_state = zkhandler.read('/maintenance')
maint_state = zkhandler.read('/config/maintenance')
except Exception:
maint_state = 'false'
@ -238,7 +238,7 @@ def getClusterInformation(zkhandler):
'storage_health': storage_health,
'storage_health_msg': storage_health_msg,
'primary_node': common.getPrimaryNode(zkhandler),
'upstream_ip': zkhandler.read('/upstream_ip'),
'upstream_ip': zkhandler.read('/config/upstream_ip'),
'nodes': formatted_node_states,
'vms': formatted_vm_states,
'networks': network_count,

View File

@ -396,7 +396,7 @@ def getPrimaryNode(zkhandler):
failcount = 0
while True:
try:
primary_node = zkhandler.read('/primary_node')
primary_node = zkhandler.read('/config/primary_node')
except Exception:
primary_node == 'none'

View File

@ -98,7 +98,7 @@ def secondary_node(zkhandler, node):
if current_state == 'primary':
retmsg = 'Setting node {} in secondary router mode.'.format(node)
zkhandler.write([
('/primary_node', 'none')
('/config/primary_node', 'none')
])
else:
return False, 'Node "{}" is already in secondary router mode.'.format(node)
@ -126,7 +126,7 @@ def primary_node(zkhandler, node):
if current_state == 'secondary':
retmsg = 'Setting node {} in primary router mode.'.format(node)
zkhandler.write([
('/primary_node', node)
('/config/primary_node', node)
])
else:
return False, 'Node "{}" is already in primary router mode.'.format(node)

View File

@ -529,6 +529,39 @@ except Exception as e:
logger.out('ERROR: Failed to connect to Zookeeper cluster: {}'.format(e), state='e')
exit(1)
# Create the /config key if it does not exist
try:
zkhandler.read('/config')
except Exception:
zkhandler.write([
('/config', ''),
('/config/primary_node', 'none'),
('/config/upstream_ip', 'none'),
('/config/maintenance', 'False'),
])
# MIGRATION - populate the keys from their old values
try:
primary_node = zkhandler.read('/primary_node')
zkhandler.write([
('/config/primary_node', primary_node)
])
except Exception:
pass
try:
upstream_ip = zkhandler.read('/upstream_ip')
zkhandler.write([
('/config/upstream_ip', upstream_ip)
])
except Exception:
pass
try:
maintenance = zkhandler.read('/maintenance')
zkhandler.write([
('/config/maintenance', maintenance)
])
except Exception:
pass
###############################################################################
# PHASE 5 - Gracefully handle termination
@ -566,7 +599,7 @@ def cleanup():
try:
if this_node.router_state == 'primary':
zkhandler.write([
('/primary_node', 'none')
('/config/primary_node', 'none')
])
logger.out('Waiting for primary migration', state='s')
while this_node.router_state != 'secondary':
@ -673,7 +706,7 @@ else:
# Check that the primary key exists, and create it with us as master if not
try:
current_primary = zkhandler.read('/primary_node')
current_primary = zkhandler.read('/config/primary_node')
except kazoo.exceptions.NoNodeError:
current_primary = 'none'
@ -683,7 +716,7 @@ else:
if config['daemon_mode'] == 'coordinator':
logger.out('No primary node found; creating with us as primary.', state='i')
zkhandler.write([
('/primary_node', myhostname)
('/config/primary_node', myhostname)
])
###############################################################################
@ -819,7 +852,7 @@ this_node = d_node[myhostname]
# Maintenance mode
@zkhandler.zk_conn.DataWatch('/maintenance')
@zkhandler.zk_conn.DataWatch('/config/maintenance')
def set_maintenance(_maintenance, stat, event=''):
global maintenance
try:
@ -829,7 +862,7 @@ def set_maintenance(_maintenance, stat, event=''):
# Primary node
@zkhandler.zk_conn.DataWatch('/primary_node')
@zkhandler.zk_conn.DataWatch('/config/primary_node')
def update_primary(new_primary, stat, event=''):
try:
new_primary = new_primary.decode('ascii')
@ -844,7 +877,7 @@ def update_primary(new_primary, stat, event=''):
if this_node.daemon_state == 'run' and this_node.router_state not in ['primary', 'takeover', 'relinquish']:
logger.out('Contending for primary coordinator state', state='i')
# Acquire an exclusive lock on the primary_node key
primary_lock = zkhandler.exclusivelock('/primary_node')
primary_lock = zkhandler.exclusivelock('/config/primary_node')
try:
# This lock times out after 0.4s, which is 0.1s less than the pre-takeover
# timeout below, thus ensuring that a primary takeover will not deadlock
@ -852,9 +885,9 @@ def update_primary(new_primary, stat, event=''):
primary_lock.acquire(timeout=0.4)
# Ensure when we get the lock that the versions are still consistent and that
# another node hasn't already acquired primary state
if key_version == zkhandler.zk_conn.get('/primary_node')[1].version:
if key_version == zkhandler.zk_conn.get('/config/primary_node')[1].version:
zkhandler.write([
('/primary_node', myhostname)
('/config/primary_node', myhostname)
])
# Cleanly release the lock
primary_lock.release()
@ -1475,11 +1508,11 @@ def node_keepalive():
if config['enable_networking']:
if this_node.router_state == 'primary':
try:
if zkhandler.read('/upstream_ip') != config['upstream_floating_ip']:
if zkhandler.read('/config/upstream_ip') != config['upstream_floating_ip']:
raise
except Exception:
zkhandler.write([
('/upstream_ip', config['upstream_floating_ip'])
('/config/upstream_ip', config['upstream_floating_ip'])
])
# Get past state and update if needed
@ -1498,9 +1531,9 @@ def node_keepalive():
if debug:
logger.out("Ensure the primary key is properly set", state='d', prefix='main-thread')
if this_node.router_state == 'primary':
if zkhandler.read('/primary_node') != this_node.name:
if zkhandler.read('/config/primary_node') != this_node.name:
zkhandler.write([
('/primary_node', this_node.name)
('/config/primary_node', this_node.name)
])
# Run VM statistics collection in separate thread for parallelization

View File

@ -323,7 +323,7 @@ class NodeInstance(object):
Acquire primary coordinator status from a peer node
"""
# Lock the primary node until transition is complete
primary_lock = zkhandler.exclusivelock(self.zk_conn, '/primary_node')
primary_lock = zkhandler.exclusivelock(self.zk_conn, '/config/primary_node')
primary_lock.acquire()
# Ensure our lock key is populated

View File

@ -63,8 +63,8 @@ def fenceNode(node_name, zk_conn, config, logger):
if node_name in config['coordinators']:
logger.out('Forcing secondary status for node "{}"'.format(node_name), state='i')
zkhandler.writedata(zk_conn, {'/nodes/{}/routerstate'.format(node_name): 'secondary'})
if zkhandler.readdata(zk_conn, '/primary_node') == node_name:
zkhandler.writedata(zk_conn, {'/primary_node': 'none'})
if zkhandler.readdata(zk_conn, '/config/primary_node') == node_name:
zkhandler.writedata(zk_conn, {'/config/primary_node': 'none'})
# If the fence succeeded and successful_fence is migrate
if fence_status and config['successful_fence'] == 'migrate':