Lock primary_node key during primary switchover

Also implements a looping to switch over the Patroni leader to ensure
this always follows the primary and clean up the code around here a bit.
This commit is contained in:
Joshua Boniface 2019-08-04 16:42:06 -04:00
parent 710d2cf9c2
commit a329376d33
1 changed files with 42 additions and 34 deletions

View File

@ -115,10 +115,11 @@ class NodeInstance(object):
# We're a coordinator so we care about networking # We're a coordinator so we care about networking
if data != self.router_state: if data != self.router_state:
self.router_state = data self.router_state = data
if self.router_state == 'primary': if self.config['enable_networking']:
self.become_primary() if self.router_state == 'primary':
else: self.become_primary()
self.become_secondary() else:
self.become_secondary()
@self.zk_conn.DataWatch('/nodes/{}/domainstate'.format(self.name)) @self.zk_conn.DataWatch('/nodes/{}/domainstate'.format(self.name))
def watch_node_domainstate(data, stat, event=''): def watch_node_domainstate(data, stat, event=''):
@ -259,22 +260,24 @@ class NodeInstance(object):
# Routing primary/secondary states # Routing primary/secondary states
def become_secondary(self): def become_secondary(self):
if self.config['enable_networking']: self.logger.out('Setting router {} to secondary state'.format(self.name), state='i')
self.logger.out('Setting router {} to secondary state'.format(self.name), state='i') self.logger.out('Network list: {}'.format(', '.join(self.network_list)), state='i')
self.logger.out('Network list: {}'.format(', '.join(self.network_list)), state='i') time.sleep(2)
time.sleep(2) if self.config['enable_api']:
if self.config['enable_api']: self.logger.out('Stopping PVC API client service', state='i')
self.logger.out('Stopping PVC API client service', state='i') common.run_os_command("systemctl stop pvc-api.service")
common.run_os_command("systemctl stop pvc-api.service") for network in self.d_network:
for network in self.d_network: self.d_network[network].stopDHCPServer()
self.d_network[network].stopDHCPServer() self.d_network[network].removeGateways()
self.d_network[network].removeGateways() self.removeFloatingAddresses()
self.removeFloatingAddresses() self.dns_aggregator.stop_aggregator()
self.dns_aggregator.stop_aggregator()
def become_primary(self): def become_primary(self):
if self.config['enable_networking']: # Establish a lock
with zkhandler.writelock(self.zk_conn, '/primary_node'):
self.logger.out('Setting router {} to primary state'.format(self.name), state='i') self.logger.out('Setting router {} to primary state'.format(self.name), state='i')
# Create floating addresses
self.logger.out('Network list: {}'.format(', '.join(self.network_list)), state='i') self.logger.out('Network list: {}'.format(', '.join(self.network_list)), state='i')
self.createFloatingAddresses() self.createFloatingAddresses()
# Start up the gateways and DHCP servers # Start up the gateways and DHCP servers
@ -285,25 +288,30 @@ class NodeInstance(object):
self.logger.out('Starting PVC API client service', state='i') self.logger.out('Starting PVC API client service', state='i')
common.run_os_command("systemctl start pvc-api.service") common.run_os_command("systemctl start pvc-api.service")
time.sleep(1) time.sleep(1)
# Force Patroni to switch to the local instance
# Switch Patroni leader to the local instance
self.logger.out('Setting Patroni leader to this node', state='i') self.logger.out('Setting Patroni leader to this node', state='i')
retcode, stdout, stderr = common.run_os_command( while True:
""" retcode, stdout, stderr = common.run_os_command(
patronictl """
-c /etc/patroni/config.yml patronictl
-d zookeeper://localhost:2181 -c /etc/patroni/config.yml
switchover -d zookeeper://localhost:2181
--candidate {} switchover
--force --candidate {}
pvcdns --force
""".format(self.name) pvcdns
) """.format(self.name)
if stdout: )
self.logger.out('Successfully switched Patroni leader\n{}'.format(stdout), state='o') if stdout:
else: self.logger.out('Successfully switched Patroni leader\n{}'.format(stdout), state='o')
self.logger.out('Failed to switch Patroni leader\n{}'.format(stderr), state='e') break
time.sleep(1) else:
self.logger.out('Failed to switch Patroni leader; retrying\n{}'.format(stderr), state='e')
time.sleep(2)
# Start the DNS aggregator instance # Start the DNS aggregator instance
time.sleep(1)
self.dns_aggregator.start_aggregator() self.dns_aggregator.start_aggregator()
def createFloatingAddresses(self): def createFloatingAddresses(self):