Improve handling of primary contention

Previously, contention could occasionally cause a flap/dual primary
contention state due to the lack of checking within this function. This
could cause a state where a node transitions to primary than is almost
immediately shifted away, which could cause undefined behaviour in the
cluster.

The solution includes several elements:
    * Implement an exclusive lock operation in zkhandler
    * Switch the become_primary function to use this exclusive lock
    * Implement exclusive locking during the contention process
    * As a failsafe, check stat versions before setting the node as the
      primary node, in case another node already has
    * Delay the start of takeover/relinquish operations by slightly
      longer than the lock timeout
    * Make the current router_state conditions more explicit (positive
      conditionals rather than negative conditionals)

The new scenario ensures that during contention, only one secondary will
ever succeed at acquiring the lock. Ideally, the other would then grab
the lock and pass, but in testing this does not seem to be the case -
the lock always times out, so the failsafe check is technically not
needed but has been left as an added safety mechanism. With this setup,
the node that fails the contention will never block the switchover nor
will it try to force itself onto the cluster after another node has
successfully won contention.

Timeouts may need to be adjusted in the future, but the base timeout of
0.4 seconds (and transition delay of 0.5 seconds) seems to work reliably
during preliminary tests.
This commit is contained in:
Joshua Boniface 2020-04-12 03:40:17 -04:00
parent e672d799a6
commit b413e042a6
3 changed files with 28 additions and 4 deletions

View File

@ -824,18 +824,36 @@ def update_primary(new_primary, stat, event=''):
new_primary = new_primary.decode('ascii') new_primary = new_primary.decode('ascii')
except AttributeError: except AttributeError:
new_primary = 'none' new_primary = 'none'
key_version = stat.version
if new_primary != this_node.primary_node: if new_primary != this_node.primary_node:
if config['daemon_mode'] == 'coordinator': if config['daemon_mode'] == 'coordinator':
# We're a coordinator and there is no primary # We're a coordinator and there is no primary
if new_primary == 'none': if new_primary == 'none':
if this_node.daemon_state == 'run' and this_node.router_state != 'primary': if this_node.daemon_state == 'run' and this_node.router_state == 'secondary':
logger.out('Contending for primary coordinator state', state='i') logger.out('Contending for primary coordinator state', state='i')
# Acquire an exclusive lock on the primary_node key
primary_lock = zkhandler.exclusivelock(zk_conn, '/primary_node')
try:
# This lock times out after 0.4s, which is 0.1s less than the pre-takeover
# timeout below, thus ensuring that a primary takeover will not deadlock
# against a node that failed the contention
primary_lock.acquire(timeout=0.4)
# Ensure when we get the lock that the versions are still consistent and that
# another node hasn't already acquired primary state
if key_version == zk_conn.get('/primary_node')[1].version:
zkhandler.writedata(zk_conn, {'/primary_node': myhostname}) zkhandler.writedata(zk_conn, {'/primary_node': myhostname})
# Cleanly release the lock
primary_lock.release()
# We timed out acquiring a lock, which means we failed contention, so just pass
except kazoo.exceptions.LockTimeout:
pass
elif new_primary == myhostname: elif new_primary == myhostname:
time.sleep(0.5)
zkhandler.writedata(zk_conn, {'/nodes/{}/routerstate'.format(myhostname): 'takeover'}) zkhandler.writedata(zk_conn, {'/nodes/{}/routerstate'.format(myhostname): 'takeover'})
else: else:
if this_node.router_state != 'secondary': if this_node.router_state == 'primary':
time.sleep(0.5)
zkhandler.writedata(zk_conn, {'/nodes/{}/routerstate'.format(myhostname): 'relinquish'}) zkhandler.writedata(zk_conn, {'/nodes/{}/routerstate'.format(myhostname): 'relinquish'})
else: else:
zkhandler.writedata(zk_conn, {'/nodes/{}/routerstate'.format(myhostname): 'client'}) zkhandler.writedata(zk_conn, {'/nodes/{}/routerstate'.format(myhostname): 'client'})

View File

@ -317,7 +317,7 @@ class NodeInstance(object):
Acquire primary coordinator status from a peer node Acquire primary coordinator status from a peer node
""" """
# Lock the primary node until transition is complete # Lock the primary node until transition is complete
primary_lock = zkhandler.writelock(self.zk_conn, '/primary_node') primary_lock = zkhandler.exclusivelock(self.zk_conn, '/primary_node')
primary_lock.acquire() primary_lock.acquire()
# Ensure our lock key is populated # Ensure our lock key is populated

View File

@ -130,3 +130,9 @@ def readlock(zk_conn, key):
lock_id = str(uuid.uuid1()) lock_id = str(uuid.uuid1())
lock = zk_conn.ReadLock('{}'.format(key), lock_id) lock = zk_conn.ReadLock('{}'.format(key), lock_id)
return lock return lock
# Exclusive lock function
def exclusivelock(zk_conn, key):
lock_id = str(uuid.uuid1())
lock = zk_conn.Lock('{}'.format(key), lock_id)
return lock