Adjust lock schema in NodeInstance and VMInstance

Removes a superfluous lock and puts the sync_lock keys in more usable
places.
This commit is contained in:
Joshua Boniface 2021-06-09 22:49:58 -04:00
parent 24663a3333
commit 7e42118e6f
3 changed files with 26 additions and 28 deletions

View File

@ -1 +1 @@
{"version": "0", "root": "", "base": {"schema": "/schema", "schema.version": "/schema/version", "config": "/config", "config.maintenance": "/config/maintenance", "config.primary_node": "/config/primary_node", "config.upstream_ip": "/config/upstream_ip", "config.migration_target_selector": "/config/migration_target_selector", "lock": "/locks", "lock.primary_node": "/locks/primary_node", "lock.flush_lock": "/locks/flush_lock", "lock.domain_migrate": "/locks/domain_migrate", "cmd": "/cmd", "cmd.node": "/cmd/nodes", "cmd.domain": "/cmd/domains", "cmd.ceph": "/cmd/ceph", "node": "/nodes", "domain": "/domains", "network": "/networks", "storage": "/ceph", "storage.util": "/ceph/util", "osd": "/ceph/osds", "pool": "/ceph/pools", "volume": "/ceph/volumes", "snapshot": "/ceph/snapshots"}, "node": {"keepalive": "/keepalive", "mode": "/daemonmode", "data.active_schema": "/activeschema", "data.latest_schema": "/latestschema", "data.static": "/staticdata", "running_domains": "/runningdomains", "count.provisioned_domains": "/domainscount", "count.networks": "/networkscount", "state.daemon": "/daemonstate", "state.router": "/routerstate", "state.domain": "/domainstate", "cpu.load": "/cpuload", "vcpu.allocated": "/vcpualloc", "memory.total": "/memtotal", "memory.used": "/memused", "memory.free": "/memfree", "memory.allocated": "/memalloc", "memory.provisioned": "/memprov", "ipmi.hostname": "/ipmihostname", "ipmi.username": "/ipmiusername", "ipmi.password": "/ipmipassword"}, "domain": {"name": "", "xml": "/xml", "state": "/state", "profile": "/profile", "stats": "/stats", "node": "/node", "last_node": "/lastnode", "failed_reason": "/failedreason", "storage.volumes": "/rbdlist", "console.log": "/consolelog", "console.vnc": "/vnc", "meta.autostart": "/node_autostart", "meta.migrate_method": "/migration_method", "meta.node_selector": "/node_selector", "meta.node_limit": "/node_limit"}, "network": {"type": "/nettype", "rules": "/firewall_rules", "nameservers": "/name_servers", "domain": "/domain", "ip4.gateway": "/ip4_gateway", "ip4.network": "/ip4_network", "ip4.dhcp": "/dhcp4_flag", "ip4.reservations": "/dhcp4_reservations", "ip4.dhcp_start": "/dhcp4_start", "ip4.dhcp_end": "/dhcp4_end", "ip6.gateway": "/ip6_gateway", "ip6.network": "/ip6_network", "ip6.dhcp": "/dhcp6_flag"}, "osd": {"node": "/node", "device": "/device", "stats": "/stats"}, "pool": {"pgs": "/pgs", "stats": "/stats"}, "volume": {"stats": "/stats"}, "snapshot": {"stats": "/stats"}}
{"version": "0", "root": "", "base": {"schema": "/schema", "schema.version": "/schema/version", "config": "/config", "config.maintenance": "/config/maintenance", "config.primary_node": "/config/primary_node", "config.primary_node.sync_lock": "/config/primary_node/sync_lock", "config.upstream_ip": "/config/upstream_ip", "config.migration_target_selector": "/config/migration_target_selector", "cmd": "/cmd", "cmd.node": "/cmd/nodes", "cmd.domain": "/cmd/domains", "cmd.ceph": "/cmd/ceph", "node": "/nodes", "domain": "/domains", "network": "/networks", "storage": "/ceph", "storage.util": "/ceph/util", "osd": "/ceph/osds", "pool": "/ceph/pools", "volume": "/ceph/volumes", "snapshot": "/ceph/snapshots"}, "node": {"keepalive": "/keepalive", "mode": "/daemonmode", "data.active_schema": "/activeschema", "data.latest_schema": "/latestschema", "data.static": "/staticdata", "running_domains": "/runningdomains", "count.provisioned_domains": "/domainscount", "count.networks": "/networkscount", "state.daemon": "/daemonstate", "state.router": "/routerstate", "state.domain": "/domainstate", "cpu.load": "/cpuload", "vcpu.allocated": "/vcpualloc", "memory.total": "/memtotal", "memory.used": "/memused", "memory.free": "/memfree", "memory.allocated": "/memalloc", "memory.provisioned": "/memprov", "ipmi.hostname": "/ipmihostname", "ipmi.username": "/ipmiusername", "ipmi.password": "/ipmipassword"}, "domain": {"name": "", "xml": "/xml", "state": "/state", "profile": "/profile", "stats": "/stats", "node": "/node", "last_node": "/lastnode", "failed_reason": "/failedreason", "storage.volumes": "/rbdlist", "console.log": "/consolelog", "console.vnc": "/vnc", "meta.autostart": "/node_autostart", "meta.migrate_method": "/migration_method", "meta.node_selector": "/node_selector", "meta.node_limit": "/node_limit", "migrate.sync_lock": "/migrate_sync_lock"}, "network": {"type": "/nettype", "rules": "/firewall_rules", "nameservers": "/name_servers", "domain": "/domain", "ip4.gateway": "/ip4_gateway", "ip4.network": "/ip4_network", "ip4.dhcp": "/dhcp4_flag", "ip4.reservations": "/dhcp4_reservations", "ip4.dhcp_start": "/dhcp4_start", "ip4.dhcp_end": "/dhcp4_end", "ip6.gateway": "/ip6_gateway", "ip6.network": "/ip6_network", "ip6.dhcp": "/dhcp6_flag"}, "osd": {"node": "/node", "device": "/device", "stats": "/stats"}, "pool": {"pgs": "/pgs", "stats": "/stats"}, "volume": {"stats": "/stats"}, "snapshot": {"stats": "/stats"}}

View File

@ -420,12 +420,9 @@ class ZKSchema(object):
'config': f'{_schema_root}/config',
'config.maintenance': f'{_schema_root}/config/maintenance',
'config.primary_node': f'{_schema_root}/config/primary_node',
'config.primary_node.sync_lock': f'{_schema_root}/config/primary_node/sync_lock',
'config.upstream_ip': f'{_schema_root}/config/upstream_ip',
'config.migration_target_selector': f'{_schema_root}/config/migration_target_selector',
'lock': f'{_schema_root}/locks',
'lock.primary_node': f'{_schema_root}/locks/primary_node',
'lock.flush_lock': f'{_schema_root}/locks/flush_lock',
'lock.domain_migrate': f'{_schema_root}/locks/domain_migrate',
'cmd': f'{_schema_root}/cmd',
'cmd.node': f'{_schema_root}/cmd/nodes',
'cmd.domain': f'{_schema_root}/cmd/domains',
@ -480,7 +477,8 @@ class ZKSchema(object):
'meta.autostart': '/node_autostart',
'meta.migrate_method': '/migration_method',
'meta.node_selector': '/node_selector',
'meta.node_limit': '/node_limit'
'meta.node_limit': '/node_limit',
'migrate.sync_lock': '/migrate_sync_lock'
},
# The schema of an individual network entry (/networks/{vni})
'network': {

View File

@ -329,25 +329,25 @@ class NodeInstance(object):
# Ensure our lock key is populated
self.zkhandler.write([
('base.lock.primary_node', '')
('base.config.primary_node.sync_lock', '')
])
# Synchronize nodes A (I am writer)
lock = self.zkhandler.writelock('base.lock.primary_node')
lock = self.zkhandler.writelock('base.config.primary_node.sync_lock')
self.logger.out('Acquiring write lock for synchronization phase A', state='i')
lock.acquire()
self.logger.out('Acquired write lock for synchronization phase A', state='o')
time.sleep(1) # Time fir reader to acquire the lock
self.logger.out('Releasing write lock for synchronization phase A', state='i')
self.zkhandler.write([
('base.lock.primary_node', '')
('base.config.primary_node.sync_lock', '')
])
lock.release()
self.logger.out('Released write lock for synchronization phase A', state='o')
time.sleep(0.1) # Time fir new writer to acquire the lock
# Synchronize nodes B (I am reader)
lock = self.zkhandler.readlock('base.lock.primary_node')
lock = self.zkhandler.readlock('base.config.primary_node.sync_lock')
self.logger.out('Acquiring read lock for synchronization phase B', state='i')
lock.acquire()
self.logger.out('Acquired read lock for synchronization phase B', state='o')
@ -356,7 +356,7 @@ class NodeInstance(object):
self.logger.out('Released read lock for synchronization phase B', state='o')
# Synchronize nodes C (I am writer)
lock = self.zkhandler.writelock('base.lock.primary_node')
lock = self.zkhandler.writelock('base.config.primary_node.sync_lock')
self.logger.out('Acquiring write lock for synchronization phase C', state='i')
lock.acquire()
self.logger.out('Acquired write lock for synchronization phase C', state='o')
@ -373,13 +373,13 @@ class NodeInstance(object):
common.createIPAddress(self.upstream_floatingipaddr, self.upstream_cidrnetmask, 'brupstream')
self.logger.out('Releasing write lock for synchronization phase C', state='i')
self.zkhandler.write([
('base.lock.primary_node', '')
('base.config.primary_node.sync_lock', '')
])
lock.release()
self.logger.out('Released write lock for synchronization phase C', state='o')
# Synchronize nodes D (I am writer)
lock = self.zkhandler.writelock('base.lock.primary_node')
lock = self.zkhandler.writelock('base.config.primary_node.sync_lock')
self.logger.out('Acquiring write lock for synchronization phase D', state='i')
lock.acquire()
self.logger.out('Acquired write lock for synchronization phase D', state='o')
@ -405,13 +405,13 @@ class NodeInstance(object):
common.createIPAddress(self.storage_floatingipaddr, self.storage_cidrnetmask, 'brstorage')
self.logger.out('Releasing write lock for synchronization phase D', state='i')
self.zkhandler.write([
('base.lock.primary_node', '')
('base.config.primary_node.sync_lock', '')
])
lock.release()
self.logger.out('Released write lock for synchronization phase D', state='o')
# Synchronize nodes E (I am writer)
lock = self.zkhandler.writelock('base.lock.primary_node')
lock = self.zkhandler.writelock('base.config.primary_node.sync_lock')
self.logger.out('Acquiring write lock for synchronization phase E', state='i')
lock.acquire()
self.logger.out('Acquired write lock for synchronization phase E', state='o')
@ -428,13 +428,13 @@ class NodeInstance(object):
common.createIPAddress('169.254.169.254', '32', 'lo')
self.logger.out('Releasing write lock for synchronization phase E', state='i')
self.zkhandler.write([
('base.lock.primary_node', '')
('base.config.primary_node.sync_lock', '')
])
lock.release()
self.logger.out('Released write lock for synchronization phase E', state='o')
# Synchronize nodes F (I am writer)
lock = self.zkhandler.writelock('base.lock.primary_node')
lock = self.zkhandler.writelock('base.config.primary_node.sync_lock')
self.logger.out('Acquiring write lock for synchronization phase F', state='i')
lock.acquire()
self.logger.out('Acquired write lock for synchronization phase F', state='o')
@ -444,13 +444,13 @@ class NodeInstance(object):
self.d_network[network].createGateways()
self.logger.out('Releasing write lock for synchronization phase F', state='i')
self.zkhandler.write([
('base.lock.primary_node', '')
('base.config.primary_node.sync_lock', '')
])
lock.release()
self.logger.out('Released write lock for synchronization phase F', state='o')
# Synchronize nodes G (I am writer)
lock = self.zkhandler.writelock('base.lock.primary_node')
lock = self.zkhandler.writelock('base.config.primary_node.sync_lock')
self.logger.out('Acquiring write lock for synchronization phase G', state='i')
lock.acquire()
self.logger.out('Acquired write lock for synchronization phase G', state='o')
@ -518,7 +518,7 @@ class NodeInstance(object):
self.logger.out('Not starting DNS aggregator due to Patroni failures', state='e')
self.logger.out('Releasing write lock for synchronization phase G', state='i')
self.zkhandler.write([
('base.lock.primary_node', '')
('base.config.primary_node.sync_lock', '')
])
lock.release()
self.logger.out('Released write lock for synchronization phase G', state='o')
@ -538,7 +538,7 @@ class NodeInstance(object):
time.sleep(0.2) # Initial delay for the first writer to grab the lock
# Synchronize nodes A (I am reader)
lock = self.zkhandler.readlock('base.lock.primary_node')
lock = self.zkhandler.readlock('base.config.primary_node.sync_lock')
self.logger.out('Acquiring read lock for synchronization phase A', state='i')
lock.acquire()
self.logger.out('Acquired read lock for synchronization phase A', state='o')
@ -547,7 +547,7 @@ class NodeInstance(object):
self.logger.out('Released read lock for synchronization phase A', state='o')
# Synchronize nodes B (I am writer)
lock = self.zkhandler.writelock('base.lock.primary_node')
lock = self.zkhandler.writelock('base.config.primary_node.sync_lock')
self.logger.out('Acquiring write lock for synchronization phase B', state='i')
lock.acquire()
self.logger.out('Acquired write lock for synchronization phase B', state='o')
@ -559,7 +559,7 @@ class NodeInstance(object):
self.d_network[network].stopDHCPServer()
self.logger.out('Releasing write lock for synchronization phase B', state='i')
self.zkhandler.write([
('base.lock.primary_node', '')
('base.config.primary_node.sync_lock', '')
])
lock.release()
self.logger.out('Released write lock for synchronization phase B', state='o')
@ -572,7 +572,7 @@ class NodeInstance(object):
time.sleep(0.1) # Time fir new writer to acquire the lock
# Synchronize nodes C (I am reader)
lock = self.zkhandler.readlock('base.lock.primary_node')
lock = self.zkhandler.readlock('base.config.primary_node.sync_lock')
self.logger.out('Acquiring read lock for synchronization phase C', state='i')
lock.acquire()
self.logger.out('Acquired read lock for synchronization phase C', state='o')
@ -591,7 +591,7 @@ class NodeInstance(object):
self.logger.out('Released read lock for synchronization phase C', state='o')
# Synchronize nodes D (I am reader)
lock = self.zkhandler.readlock('base.lock.primary_node')
lock = self.zkhandler.readlock('base.config.primary_node.sync_lock')
self.logger.out('Acquiring read lock for synchronization phase D', state='i')
lock.acquire()
self.logger.out('Acquired read lock for synchronization phase D', state='o')
@ -619,7 +619,7 @@ class NodeInstance(object):
self.logger.out('Released read lock for synchronization phase D', state='o')
# Synchronize nodes E (I am reader)
lock = self.zkhandler.readlock('base.lock.primary_node')
lock = self.zkhandler.readlock('base.config.primary_node.sync_lock')
self.logger.out('Acquiring read lock for synchronization phase E', state='i')
lock.acquire()
self.logger.out('Acquired read lock for synchronization phase E', state='o')
@ -638,7 +638,7 @@ class NodeInstance(object):
self.logger.out('Released read lock for synchronization phase E', state='o')
# Synchronize nodes F (I am reader)
lock = self.zkhandler.readlock('base.lock.primary_node')
lock = self.zkhandler.readlock('base.config.primary_node.sync_lock')
self.logger.out('Acquiring read lock for synchronization phase F', state='i')
lock.acquire()
self.logger.out('Acquired read lock for synchronization phase F', state='o')
@ -650,7 +650,7 @@ class NodeInstance(object):
self.logger.out('Released read lock for synchronization phase F', state='o')
# Synchronize nodes G (I am reader)
lock = self.zkhandler.readlock('base.lock.primary_node')
lock = self.zkhandler.readlock('base.config.primary_node.sync_lock')
self.logger.out('Acquiring read lock for synchronization phase G', state='i')
try:
lock.acquire(timeout=60) # Don't wait forever and completely block us