diff --git a/daemon-common/migrations/versions/0.json b/daemon-common/migrations/versions/0.json index 0f517d4b..4ee20264 100644 --- a/daemon-common/migrations/versions/0.json +++ b/daemon-common/migrations/versions/0.json @@ -1 +1 @@ -{"version": "0", "root": "", "base": {"schema": "/schema", "schema.version": "/schema/version", "config": "/config", "config.maintenance": "/config/maintenance", "config.primary_node": "/config/primary_node", "config.upstream_ip": "/config/upstream_ip", "config.migration_target_selector": "/config/migration_target_selector", "lock": "/locks", "lock.primary_node": "/locks/primary_node", "lock.flush_lock": "/locks/flush_lock", "lock.domain_migrate": "/locks/domain_migrate", "cmd": "/cmd", "cmd.node": "/cmd/nodes", "cmd.domain": "/cmd/domains", "cmd.ceph": "/cmd/ceph", "node": "/nodes", "domain": "/domains", "network": "/networks", "storage": "/ceph", "storage.util": "/ceph/util", "osd": "/ceph/osds", "pool": "/ceph/pools", "volume": "/ceph/volumes", "snapshot": "/ceph/snapshots"}, "node": {"keepalive": "/keepalive", "mode": "/daemonmode", "data.active_schema": "/activeschema", "data.latest_schema": "/latestschema", "data.static": "/staticdata", "running_domains": "/runningdomains", "count.provisioned_domains": "/domainscount", "count.networks": "/networkscount", "state.daemon": "/daemonstate", "state.router": "/routerstate", "state.domain": "/domainstate", "cpu.load": "/cpuload", "vcpu.allocated": "/vcpualloc", "memory.total": "/memtotal", "memory.used": "/memused", "memory.free": "/memfree", "memory.allocated": "/memalloc", "memory.provisioned": "/memprov", "ipmi.hostname": "/ipmihostname", "ipmi.username": "/ipmiusername", "ipmi.password": "/ipmipassword"}, "domain": {"name": "", "xml": "/xml", "state": "/state", "profile": "/profile", "stats": "/stats", "node": "/node", "last_node": "/lastnode", "failed_reason": "/failedreason", "storage.volumes": "/rbdlist", "console.log": "/consolelog", "console.vnc": "/vnc", "meta.autostart": "/node_autostart", "meta.migrate_method": "/migration_method", "meta.node_selector": "/node_selector", "meta.node_limit": "/node_limit"}, "network": {"type": "/nettype", "rules": "/firewall_rules", "nameservers": "/name_servers", "domain": "/domain", "ip4.gateway": "/ip4_gateway", "ip4.network": "/ip4_network", "ip4.dhcp": "/dhcp4_flag", "ip4.reservations": "/dhcp4_reservations", "ip4.dhcp_start": "/dhcp4_start", "ip4.dhcp_end": "/dhcp4_end", "ip6.gateway": "/ip6_gateway", "ip6.network": "/ip6_network", "ip6.dhcp": "/dhcp6_flag"}, "osd": {"node": "/node", "device": "/device", "stats": "/stats"}, "pool": {"pgs": "/pgs", "stats": "/stats"}, "volume": {"stats": "/stats"}, "snapshot": {"stats": "/stats"}} \ No newline at end of file +{"version": "0", "root": "", "base": {"schema": "/schema", "schema.version": "/schema/version", "config": "/config", "config.maintenance": "/config/maintenance", "config.primary_node": "/config/primary_node", "config.primary_node.sync_lock": "/config/primary_node/sync_lock", "config.upstream_ip": "/config/upstream_ip", "config.migration_target_selector": "/config/migration_target_selector", "cmd": "/cmd", "cmd.node": "/cmd/nodes", "cmd.domain": "/cmd/domains", "cmd.ceph": "/cmd/ceph", "node": "/nodes", "domain": "/domains", "network": "/networks", "storage": "/ceph", "storage.util": "/ceph/util", "osd": "/ceph/osds", "pool": "/ceph/pools", "volume": "/ceph/volumes", "snapshot": "/ceph/snapshots"}, "node": {"keepalive": "/keepalive", "mode": "/daemonmode", "data.active_schema": "/activeschema", "data.latest_schema": "/latestschema", "data.static": "/staticdata", "running_domains": "/runningdomains", "count.provisioned_domains": "/domainscount", "count.networks": "/networkscount", "state.daemon": "/daemonstate", "state.router": "/routerstate", "state.domain": "/domainstate", "cpu.load": "/cpuload", "vcpu.allocated": "/vcpualloc", "memory.total": "/memtotal", "memory.used": "/memused", "memory.free": "/memfree", "memory.allocated": "/memalloc", "memory.provisioned": "/memprov", "ipmi.hostname": "/ipmihostname", "ipmi.username": "/ipmiusername", "ipmi.password": "/ipmipassword"}, "domain": {"name": "", "xml": "/xml", "state": "/state", "profile": "/profile", "stats": "/stats", "node": "/node", "last_node": "/lastnode", "failed_reason": "/failedreason", "storage.volumes": "/rbdlist", "console.log": "/consolelog", "console.vnc": "/vnc", "meta.autostart": "/node_autostart", "meta.migrate_method": "/migration_method", "meta.node_selector": "/node_selector", "meta.node_limit": "/node_limit", "migrate.sync_lock": "/migrate_sync_lock"}, "network": {"type": "/nettype", "rules": "/firewall_rules", "nameservers": "/name_servers", "domain": "/domain", "ip4.gateway": "/ip4_gateway", "ip4.network": "/ip4_network", "ip4.dhcp": "/dhcp4_flag", "ip4.reservations": "/dhcp4_reservations", "ip4.dhcp_start": "/dhcp4_start", "ip4.dhcp_end": "/dhcp4_end", "ip6.gateway": "/ip6_gateway", "ip6.network": "/ip6_network", "ip6.dhcp": "/dhcp6_flag"}, "osd": {"node": "/node", "device": "/device", "stats": "/stats"}, "pool": {"pgs": "/pgs", "stats": "/stats"}, "volume": {"stats": "/stats"}, "snapshot": {"stats": "/stats"}} \ No newline at end of file diff --git a/daemon-common/zkhandler.py b/daemon-common/zkhandler.py index 9ab8863a..51a58437 100644 --- a/daemon-common/zkhandler.py +++ b/daemon-common/zkhandler.py @@ -420,12 +420,9 @@ class ZKSchema(object): 'config': f'{_schema_root}/config', 'config.maintenance': f'{_schema_root}/config/maintenance', 'config.primary_node': f'{_schema_root}/config/primary_node', + 'config.primary_node.sync_lock': f'{_schema_root}/config/primary_node/sync_lock', 'config.upstream_ip': f'{_schema_root}/config/upstream_ip', 'config.migration_target_selector': f'{_schema_root}/config/migration_target_selector', - 'lock': f'{_schema_root}/locks', - 'lock.primary_node': f'{_schema_root}/locks/primary_node', - 'lock.flush_lock': f'{_schema_root}/locks/flush_lock', - 'lock.domain_migrate': f'{_schema_root}/locks/domain_migrate', 'cmd': f'{_schema_root}/cmd', 'cmd.node': f'{_schema_root}/cmd/nodes', 'cmd.domain': f'{_schema_root}/cmd/domains', @@ -480,7 +477,8 @@ class ZKSchema(object): 'meta.autostart': '/node_autostart', 'meta.migrate_method': '/migration_method', 'meta.node_selector': '/node_selector', - 'meta.node_limit': '/node_limit' + 'meta.node_limit': '/node_limit', + 'migrate.sync_lock': '/migrate_sync_lock' }, # The schema of an individual network entry (/networks/{vni}) 'network': { diff --git a/node-daemon/pvcnoded/NodeInstance.py b/node-daemon/pvcnoded/NodeInstance.py index 2fc689a2..4172f6fb 100644 --- a/node-daemon/pvcnoded/NodeInstance.py +++ b/node-daemon/pvcnoded/NodeInstance.py @@ -329,25 +329,25 @@ class NodeInstance(object): # Ensure our lock key is populated self.zkhandler.write([ - ('base.lock.primary_node', '') + ('base.config.primary_node.sync_lock', '') ]) # Synchronize nodes A (I am writer) - lock = self.zkhandler.writelock('base.lock.primary_node') + lock = self.zkhandler.writelock('base.config.primary_node.sync_lock') self.logger.out('Acquiring write lock for synchronization phase A', state='i') lock.acquire() self.logger.out('Acquired write lock for synchronization phase A', state='o') time.sleep(1) # Time fir reader to acquire the lock self.logger.out('Releasing write lock for synchronization phase A', state='i') self.zkhandler.write([ - ('base.lock.primary_node', '') + ('base.config.primary_node.sync_lock', '') ]) lock.release() self.logger.out('Released write lock for synchronization phase A', state='o') time.sleep(0.1) # Time fir new writer to acquire the lock # Synchronize nodes B (I am reader) - lock = self.zkhandler.readlock('base.lock.primary_node') + lock = self.zkhandler.readlock('base.config.primary_node.sync_lock') self.logger.out('Acquiring read lock for synchronization phase B', state='i') lock.acquire() self.logger.out('Acquired read lock for synchronization phase B', state='o') @@ -356,7 +356,7 @@ class NodeInstance(object): self.logger.out('Released read lock for synchronization phase B', state='o') # Synchronize nodes C (I am writer) - lock = self.zkhandler.writelock('base.lock.primary_node') + lock = self.zkhandler.writelock('base.config.primary_node.sync_lock') self.logger.out('Acquiring write lock for synchronization phase C', state='i') lock.acquire() self.logger.out('Acquired write lock for synchronization phase C', state='o') @@ -373,13 +373,13 @@ class NodeInstance(object): common.createIPAddress(self.upstream_floatingipaddr, self.upstream_cidrnetmask, 'brupstream') self.logger.out('Releasing write lock for synchronization phase C', state='i') self.zkhandler.write([ - ('base.lock.primary_node', '') + ('base.config.primary_node.sync_lock', '') ]) lock.release() self.logger.out('Released write lock for synchronization phase C', state='o') # Synchronize nodes D (I am writer) - lock = self.zkhandler.writelock('base.lock.primary_node') + lock = self.zkhandler.writelock('base.config.primary_node.sync_lock') self.logger.out('Acquiring write lock for synchronization phase D', state='i') lock.acquire() self.logger.out('Acquired write lock for synchronization phase D', state='o') @@ -405,13 +405,13 @@ class NodeInstance(object): common.createIPAddress(self.storage_floatingipaddr, self.storage_cidrnetmask, 'brstorage') self.logger.out('Releasing write lock for synchronization phase D', state='i') self.zkhandler.write([ - ('base.lock.primary_node', '') + ('base.config.primary_node.sync_lock', '') ]) lock.release() self.logger.out('Released write lock for synchronization phase D', state='o') # Synchronize nodes E (I am writer) - lock = self.zkhandler.writelock('base.lock.primary_node') + lock = self.zkhandler.writelock('base.config.primary_node.sync_lock') self.logger.out('Acquiring write lock for synchronization phase E', state='i') lock.acquire() self.logger.out('Acquired write lock for synchronization phase E', state='o') @@ -428,13 +428,13 @@ class NodeInstance(object): common.createIPAddress('169.254.169.254', '32', 'lo') self.logger.out('Releasing write lock for synchronization phase E', state='i') self.zkhandler.write([ - ('base.lock.primary_node', '') + ('base.config.primary_node.sync_lock', '') ]) lock.release() self.logger.out('Released write lock for synchronization phase E', state='o') # Synchronize nodes F (I am writer) - lock = self.zkhandler.writelock('base.lock.primary_node') + lock = self.zkhandler.writelock('base.config.primary_node.sync_lock') self.logger.out('Acquiring write lock for synchronization phase F', state='i') lock.acquire() self.logger.out('Acquired write lock for synchronization phase F', state='o') @@ -444,13 +444,13 @@ class NodeInstance(object): self.d_network[network].createGateways() self.logger.out('Releasing write lock for synchronization phase F', state='i') self.zkhandler.write([ - ('base.lock.primary_node', '') + ('base.config.primary_node.sync_lock', '') ]) lock.release() self.logger.out('Released write lock for synchronization phase F', state='o') # Synchronize nodes G (I am writer) - lock = self.zkhandler.writelock('base.lock.primary_node') + lock = self.zkhandler.writelock('base.config.primary_node.sync_lock') self.logger.out('Acquiring write lock for synchronization phase G', state='i') lock.acquire() self.logger.out('Acquired write lock for synchronization phase G', state='o') @@ -518,7 +518,7 @@ class NodeInstance(object): self.logger.out('Not starting DNS aggregator due to Patroni failures', state='e') self.logger.out('Releasing write lock for synchronization phase G', state='i') self.zkhandler.write([ - ('base.lock.primary_node', '') + ('base.config.primary_node.sync_lock', '') ]) lock.release() self.logger.out('Released write lock for synchronization phase G', state='o') @@ -538,7 +538,7 @@ class NodeInstance(object): time.sleep(0.2) # Initial delay for the first writer to grab the lock # Synchronize nodes A (I am reader) - lock = self.zkhandler.readlock('base.lock.primary_node') + lock = self.zkhandler.readlock('base.config.primary_node.sync_lock') self.logger.out('Acquiring read lock for synchronization phase A', state='i') lock.acquire() self.logger.out('Acquired read lock for synchronization phase A', state='o') @@ -547,7 +547,7 @@ class NodeInstance(object): self.logger.out('Released read lock for synchronization phase A', state='o') # Synchronize nodes B (I am writer) - lock = self.zkhandler.writelock('base.lock.primary_node') + lock = self.zkhandler.writelock('base.config.primary_node.sync_lock') self.logger.out('Acquiring write lock for synchronization phase B', state='i') lock.acquire() self.logger.out('Acquired write lock for synchronization phase B', state='o') @@ -559,7 +559,7 @@ class NodeInstance(object): self.d_network[network].stopDHCPServer() self.logger.out('Releasing write lock for synchronization phase B', state='i') self.zkhandler.write([ - ('base.lock.primary_node', '') + ('base.config.primary_node.sync_lock', '') ]) lock.release() self.logger.out('Released write lock for synchronization phase B', state='o') @@ -572,7 +572,7 @@ class NodeInstance(object): time.sleep(0.1) # Time fir new writer to acquire the lock # Synchronize nodes C (I am reader) - lock = self.zkhandler.readlock('base.lock.primary_node') + lock = self.zkhandler.readlock('base.config.primary_node.sync_lock') self.logger.out('Acquiring read lock for synchronization phase C', state='i') lock.acquire() self.logger.out('Acquired read lock for synchronization phase C', state='o') @@ -591,7 +591,7 @@ class NodeInstance(object): self.logger.out('Released read lock for synchronization phase C', state='o') # Synchronize nodes D (I am reader) - lock = self.zkhandler.readlock('base.lock.primary_node') + lock = self.zkhandler.readlock('base.config.primary_node.sync_lock') self.logger.out('Acquiring read lock for synchronization phase D', state='i') lock.acquire() self.logger.out('Acquired read lock for synchronization phase D', state='o') @@ -619,7 +619,7 @@ class NodeInstance(object): self.logger.out('Released read lock for synchronization phase D', state='o') # Synchronize nodes E (I am reader) - lock = self.zkhandler.readlock('base.lock.primary_node') + lock = self.zkhandler.readlock('base.config.primary_node.sync_lock') self.logger.out('Acquiring read lock for synchronization phase E', state='i') lock.acquire() self.logger.out('Acquired read lock for synchronization phase E', state='o') @@ -638,7 +638,7 @@ class NodeInstance(object): self.logger.out('Released read lock for synchronization phase E', state='o') # Synchronize nodes F (I am reader) - lock = self.zkhandler.readlock('base.lock.primary_node') + lock = self.zkhandler.readlock('base.config.primary_node.sync_lock') self.logger.out('Acquiring read lock for synchronization phase F', state='i') lock.acquire() self.logger.out('Acquired read lock for synchronization phase F', state='o') @@ -650,7 +650,7 @@ class NodeInstance(object): self.logger.out('Released read lock for synchronization phase F', state='o') # Synchronize nodes G (I am reader) - lock = self.zkhandler.readlock('base.lock.primary_node') + lock = self.zkhandler.readlock('base.config.primary_node.sync_lock') self.logger.out('Acquiring read lock for synchronization phase G', state='i') try: lock.acquire(timeout=60) # Don't wait forever and completely block us