Move configuration keys to /config tree

This commit is contained in:
Joshua Boniface 2021-06-01 10:40:32 -04:00
parent d6a8cf9780
commit 33a54cf7f2
9 changed files with 67 additions and 32 deletions

View File

@ -268,7 +268,7 @@ class API_Initialize(Resource):
""" """
Initialize a new PVC cluster Initialize a new PVC cluster
If the 'overwrite' option is not True, the cluster will return 400 if the `/primary_node` key is found. If 'overwrite' is True, the existing cluster If the 'overwrite' option is not True, the cluster will return 400 if the `/config/primary_node` key is found. If 'overwrite' is True, the existing cluster
data will be erased and new, empty data written in its place. data will be erased and new, empty data written in its place.
All node daemons should be stopped before running this command, and the API daemon started manually to avoid undefined behavior. All node daemons should be stopped before running this command, and the API daemon started manually to avoid undefined behavior.

View File

@ -46,15 +46,16 @@ def initialize_cluster(zkhandler, overwrite=False):
Initialize a new cluster Initialize a new cluster
""" """
# Abort if we've initialized the cluster before # Abort if we've initialized the cluster before
if zkhandler.exists('/primary_node') and not overwrite: if zkhandler.exists('/config/primary_node') and not overwrite:
return False return False
if overwrite: if overwrite:
# Delete the existing keys; ignore any errors # Delete the existing keys; ignore any errors
status = zkhandler.delete([ status = zkhandler.delete([
'/primary_node', '/config'
'/upstream_ip', '/config/primary_node',
'/maintenance', '/config/upstream_ip',
'/config/maintenance',
'/nodes', '/nodes',
'/domains', '/domains',
'/networks', '/networks',
@ -76,9 +77,10 @@ def initialize_cluster(zkhandler, overwrite=False):
# Create the root keys # Create the root keys
status = zkhandler.write([ status = zkhandler.write([
('/primary_node', 'none'), ('/config', ''),
('/upstream_ip', 'none'), ('/config/primary_node', 'none'),
('/maintenance', 'False'), ('/config/upstream_ip', 'none'),
('/config/maintenance', 'False'),
('/nodes', ''), ('/nodes', ''),
('/domains', ''), ('/domains', ''),
('/networks', ''), ('/networks', ''),

View File

@ -144,7 +144,7 @@ def format_pct_tohuman(datapct):
# Status functions # Status functions
# #
def get_status(zkhandler): def get_status(zkhandler):
primary_node = zkhandler.read('/primary_node') primary_node = zkhandler.read('/config/primary_node')
ceph_status = zkhandler.read('/ceph').rstrip() ceph_status = zkhandler.read('/ceph').rstrip()
# Create a data structure for the information # Create a data structure for the information
@ -157,7 +157,7 @@ def get_status(zkhandler):
def get_util(zkhandler): def get_util(zkhandler):
primary_node = zkhandler.read('/primary_node') primary_node = zkhandler.read('/config/primary_node')
ceph_df = zkhandler.read('/ceph/util').rstrip() ceph_df = zkhandler.read('/ceph/util').rstrip()
# Create a data structure for the information # Create a data structure for the information

View File

@ -32,12 +32,12 @@ def set_maintenance(zkhandler, maint_state):
try: try:
if maint_state == 'true': if maint_state == 'true':
zkhandler.write([ zkhandler.write([
('/maintenance', 'true') ('/config/maintenance', 'true')
]) ])
return True, 'Successfully set cluster in maintenance mode' return True, 'Successfully set cluster in maintenance mode'
else: else:
zkhandler.write([ zkhandler.write([
('/maintenance', 'false') ('/config/maintenance', 'false')
]) ])
return True, 'Successfully set cluster in normal mode' return True, 'Successfully set cluster in normal mode'
except Exception: except Exception:
@ -47,7 +47,7 @@ def set_maintenance(zkhandler, maint_state):
def getClusterInformation(zkhandler): def getClusterInformation(zkhandler):
# Get cluster maintenance state # Get cluster maintenance state
try: try:
maint_state = zkhandler.read('/maintenance') maint_state = zkhandler.read('/config/maintenance')
except Exception: except Exception:
maint_state = 'false' maint_state = 'false'
@ -238,7 +238,7 @@ def getClusterInformation(zkhandler):
'storage_health': storage_health, 'storage_health': storage_health,
'storage_health_msg': storage_health_msg, 'storage_health_msg': storage_health_msg,
'primary_node': common.getPrimaryNode(zkhandler), 'primary_node': common.getPrimaryNode(zkhandler),
'upstream_ip': zkhandler.read('/upstream_ip'), 'upstream_ip': zkhandler.read('/config/upstream_ip'),
'nodes': formatted_node_states, 'nodes': formatted_node_states,
'vms': formatted_vm_states, 'vms': formatted_vm_states,
'networks': network_count, 'networks': network_count,

View File

@ -396,7 +396,7 @@ def getPrimaryNode(zkhandler):
failcount = 0 failcount = 0
while True: while True:
try: try:
primary_node = zkhandler.read('/primary_node') primary_node = zkhandler.read('/config/primary_node')
except Exception: except Exception:
primary_node == 'none' primary_node == 'none'

View File

@ -98,7 +98,7 @@ def secondary_node(zkhandler, node):
if current_state == 'primary': if current_state == 'primary':
retmsg = 'Setting node {} in secondary router mode.'.format(node) retmsg = 'Setting node {} in secondary router mode.'.format(node)
zkhandler.write([ zkhandler.write([
('/primary_node', 'none') ('/config/primary_node', 'none')
]) ])
else: else:
return False, 'Node "{}" is already in secondary router mode.'.format(node) return False, 'Node "{}" is already in secondary router mode.'.format(node)
@ -126,7 +126,7 @@ def primary_node(zkhandler, node):
if current_state == 'secondary': if current_state == 'secondary':
retmsg = 'Setting node {} in primary router mode.'.format(node) retmsg = 'Setting node {} in primary router mode.'.format(node)
zkhandler.write([ zkhandler.write([
('/primary_node', node) ('/config/primary_node', node)
]) ])
else: else:
return False, 'Node "{}" is already in primary router mode.'.format(node) return False, 'Node "{}" is already in primary router mode.'.format(node)

View File

@ -529,6 +529,39 @@ except Exception as e:
logger.out('ERROR: Failed to connect to Zookeeper cluster: {}'.format(e), state='e') logger.out('ERROR: Failed to connect to Zookeeper cluster: {}'.format(e), state='e')
exit(1) exit(1)
# Create the /config key if it does not exist
try:
zkhandler.read('/config')
except Exception:
zkhandler.write([
('/config', ''),
('/config/primary_node', 'none'),
('/config/upstream_ip', 'none'),
('/config/maintenance', 'False'),
])
# MIGRATION - populate the keys from their old values
try:
primary_node = zkhandler.read('/primary_node')
zkhandler.write([
('/config/primary_node', primary_node)
])
except Exception:
pass
try:
upstream_ip = zkhandler.read('/upstream_ip')
zkhandler.write([
('/config/upstream_ip', upstream_ip)
])
except Exception:
pass
try:
maintenance = zkhandler.read('/maintenance')
zkhandler.write([
('/config/maintenance', maintenance)
])
except Exception:
pass
############################################################################### ###############################################################################
# PHASE 5 - Gracefully handle termination # PHASE 5 - Gracefully handle termination
@ -566,7 +599,7 @@ def cleanup():
try: try:
if this_node.router_state == 'primary': if this_node.router_state == 'primary':
zkhandler.write([ zkhandler.write([
('/primary_node', 'none') ('/config/primary_node', 'none')
]) ])
logger.out('Waiting for primary migration', state='s') logger.out('Waiting for primary migration', state='s')
while this_node.router_state != 'secondary': while this_node.router_state != 'secondary':
@ -673,7 +706,7 @@ else:
# Check that the primary key exists, and create it with us as master if not # Check that the primary key exists, and create it with us as master if not
try: try:
current_primary = zkhandler.read('/primary_node') current_primary = zkhandler.read('/config/primary_node')
except kazoo.exceptions.NoNodeError: except kazoo.exceptions.NoNodeError:
current_primary = 'none' current_primary = 'none'
@ -683,7 +716,7 @@ else:
if config['daemon_mode'] == 'coordinator': if config['daemon_mode'] == 'coordinator':
logger.out('No primary node found; creating with us as primary.', state='i') logger.out('No primary node found; creating with us as primary.', state='i')
zkhandler.write([ zkhandler.write([
('/primary_node', myhostname) ('/config/primary_node', myhostname)
]) ])
############################################################################### ###############################################################################
@ -819,7 +852,7 @@ this_node = d_node[myhostname]
# Maintenance mode # Maintenance mode
@zkhandler.zk_conn.DataWatch('/maintenance') @zkhandler.zk_conn.DataWatch('/config/maintenance')
def set_maintenance(_maintenance, stat, event=''): def set_maintenance(_maintenance, stat, event=''):
global maintenance global maintenance
try: try:
@ -829,7 +862,7 @@ def set_maintenance(_maintenance, stat, event=''):
# Primary node # Primary node
@zkhandler.zk_conn.DataWatch('/primary_node') @zkhandler.zk_conn.DataWatch('/config/primary_node')
def update_primary(new_primary, stat, event=''): def update_primary(new_primary, stat, event=''):
try: try:
new_primary = new_primary.decode('ascii') new_primary = new_primary.decode('ascii')
@ -844,7 +877,7 @@ def update_primary(new_primary, stat, event=''):
if this_node.daemon_state == 'run' and this_node.router_state not in ['primary', 'takeover', 'relinquish']: if this_node.daemon_state == 'run' and this_node.router_state not in ['primary', 'takeover', 'relinquish']:
logger.out('Contending for primary coordinator state', state='i') logger.out('Contending for primary coordinator state', state='i')
# Acquire an exclusive lock on the primary_node key # Acquire an exclusive lock on the primary_node key
primary_lock = zkhandler.exclusivelock('/primary_node') primary_lock = zkhandler.exclusivelock('/config/primary_node')
try: try:
# This lock times out after 0.4s, which is 0.1s less than the pre-takeover # This lock times out after 0.4s, which is 0.1s less than the pre-takeover
# timeout below, thus ensuring that a primary takeover will not deadlock # timeout below, thus ensuring that a primary takeover will not deadlock
@ -852,9 +885,9 @@ def update_primary(new_primary, stat, event=''):
primary_lock.acquire(timeout=0.4) primary_lock.acquire(timeout=0.4)
# Ensure when we get the lock that the versions are still consistent and that # Ensure when we get the lock that the versions are still consistent and that
# another node hasn't already acquired primary state # another node hasn't already acquired primary state
if key_version == zkhandler.zk_conn.get('/primary_node')[1].version: if key_version == zkhandler.zk_conn.get('/config/primary_node')[1].version:
zkhandler.write([ zkhandler.write([
('/primary_node', myhostname) ('/config/primary_node', myhostname)
]) ])
# Cleanly release the lock # Cleanly release the lock
primary_lock.release() primary_lock.release()
@ -1475,11 +1508,11 @@ def node_keepalive():
if config['enable_networking']: if config['enable_networking']:
if this_node.router_state == 'primary': if this_node.router_state == 'primary':
try: try:
if zkhandler.read('/upstream_ip') != config['upstream_floating_ip']: if zkhandler.read('/config/upstream_ip') != config['upstream_floating_ip']:
raise raise
except Exception: except Exception:
zkhandler.write([ zkhandler.write([
('/upstream_ip', config['upstream_floating_ip']) ('/config/upstream_ip', config['upstream_floating_ip'])
]) ])
# Get past state and update if needed # Get past state and update if needed
@ -1498,9 +1531,9 @@ def node_keepalive():
if debug: if debug:
logger.out("Ensure the primary key is properly set", state='d', prefix='main-thread') logger.out("Ensure the primary key is properly set", state='d', prefix='main-thread')
if this_node.router_state == 'primary': if this_node.router_state == 'primary':
if zkhandler.read('/primary_node') != this_node.name: if zkhandler.read('/config/primary_node') != this_node.name:
zkhandler.write([ zkhandler.write([
('/primary_node', this_node.name) ('/config/primary_node', this_node.name)
]) ])
# Run VM statistics collection in separate thread for parallelization # Run VM statistics collection in separate thread for parallelization

View File

@ -323,7 +323,7 @@ class NodeInstance(object):
Acquire primary coordinator status from a peer node Acquire primary coordinator status from a peer node
""" """
# Lock the primary node until transition is complete # Lock the primary node until transition is complete
primary_lock = zkhandler.exclusivelock(self.zk_conn, '/primary_node') primary_lock = zkhandler.exclusivelock(self.zk_conn, '/config/primary_node')
primary_lock.acquire() primary_lock.acquire()
# Ensure our lock key is populated # Ensure our lock key is populated

View File

@ -63,8 +63,8 @@ def fenceNode(node_name, zk_conn, config, logger):
if node_name in config['coordinators']: if node_name in config['coordinators']:
logger.out('Forcing secondary status for node "{}"'.format(node_name), state='i') logger.out('Forcing secondary status for node "{}"'.format(node_name), state='i')
zkhandler.writedata(zk_conn, {'/nodes/{}/routerstate'.format(node_name): 'secondary'}) zkhandler.writedata(zk_conn, {'/nodes/{}/routerstate'.format(node_name): 'secondary'})
if zkhandler.readdata(zk_conn, '/primary_node') == node_name: if zkhandler.readdata(zk_conn, '/config/primary_node') == node_name:
zkhandler.writedata(zk_conn, {'/primary_node': 'none'}) zkhandler.writedata(zk_conn, {'/config/primary_node': 'none'})
# If the fence succeeded and successful_fence is migrate # If the fence succeeded and successful_fence is migrate
if fence_status and config['successful_fence'] == 'migrate': if fence_status and config['successful_fence'] == 'migrate':