diff --git a/daemon-common/migrations/versions/10.json b/daemon-common/migrations/versions/10.json index 9f825c62..fb70d561 100644 --- a/daemon-common/migrations/versions/10.json +++ b/daemon-common/migrations/versions/10.json @@ -1 +1 @@ -{"version": "10", "root": "", "base": {"root": "", "schema": "/schema", "schema.version": "/schema/version", "config": "/config", "config.maintenance": "/config/maintenance", "config.primary_node": "/config/primary_node", "config.primary_node.sync_lock": "/config/primary_node/sync_lock", "config.upstream_ip": "/config/upstream_ip", "config.migration_target_selector": "/config/migration_target_selector", "cmd": "/cmd", "cmd.node": "/cmd/nodes", "cmd.domain": "/cmd/domains", "cmd.ceph": "/cmd/ceph", "logs": "/logs", "node": "/nodes", "domain": "/domains", "network": "/networks", "storage": "/ceph", "storage.health": "/ceph/health", "storage.util": "/ceph/util", "osd": "/ceph/osds", "pool": "/ceph/pools", "volume": "/ceph/volumes", "snapshot": "/ceph/snapshots"}, "logs": {"node": "", "messages": "/messages"}, "node": {"name": "", "keepalive": "/keepalive", "mode": "/daemonmode", "data.active_schema": "/activeschema", "data.latest_schema": "/latestschema", "data.static": "/staticdata", "data.pvc_version": "/pvcversion", "running_domains": "/runningdomains", "count.provisioned_domains": "/domainscount", "count.networks": "/networkscount", "state.daemon": "/daemonstate", "state.router": "/routerstate", "state.domain": "/domainstate", "cpu.load": "/cpuload", "vcpu.allocated": "/vcpualloc", "memory.total": "/memtotal", "memory.used": "/memused", "memory.free": "/memfree", "memory.allocated": "/memalloc", "memory.provisioned": "/memprov", "ipmi.hostname": "/ipmihostname", "ipmi.username": "/ipmiusername", "ipmi.password": "/ipmipassword", "sriov": "/sriov", "sriov.pf": "/sriov/pf", "sriov.vf": "/sriov/vf", "monitoring.plugins": "/monitoring_plugins", "monitoring.data": "/monitoring_data", "monitoring.health": "/monitoring_health"}, "monitoring_plugin": {"name": "", "last_run": "/last_run", "health_delta": "/health_delta", "message": "/message", "data": "/data", "runtime": "/runtime"}, "sriov_pf": {"phy": "", "mtu": "/mtu", "vfcount": "/vfcount"}, "sriov_vf": {"phy": "", "pf": "/pf", "mtu": "/mtu", "mac": "/mac", "phy_mac": "/phy_mac", "config": "/config", "config.vlan_id": "/config/vlan_id", "config.vlan_qos": "/config/vlan_qos", "config.tx_rate_min": "/config/tx_rate_min", "config.tx_rate_max": "/config/tx_rate_max", "config.spoof_check": "/config/spoof_check", "config.link_state": "/config/link_state", "config.trust": "/config/trust", "config.query_rss": "/config/query_rss", "pci": "/pci", "pci.domain": "/pci/domain", "pci.bus": "/pci/bus", "pci.slot": "/pci/slot", "pci.function": "/pci/function", "used": "/used", "used_by": "/used_by"}, "domain": {"name": "", "xml": "/xml", "state": "/state", "profile": "/profile", "stats": "/stats", "node": "/node", "last_node": "/lastnode", "failed_reason": "/failedreason", "storage.volumes": "/rbdlist", "console.log": "/consolelog", "console.vnc": "/vnc", "meta.autostart": "/node_autostart", "meta.migrate_method": "/migration_method", "meta.node_selector": "/node_selector", "meta.node_limit": "/node_limit", "meta.tags": "/tags", "migrate.sync_lock": "/migrate_sync_lock"}, "tag": {"name": "", "type": "/type", "protected": "/protected"}, "network": {"vni": "", "type": "/nettype", "mtu": "/mtu", "rule": "/firewall_rules", "rule.in": "/firewall_rules/in", "rule.out": "/firewall_rules/out", "nameservers": "/name_servers", "domain": "/domain", "reservation": "/dhcp4_reservations", "lease": "/dhcp4_leases", "ip4.gateway": "/ip4_gateway", "ip4.network": "/ip4_network", "ip4.dhcp": "/dhcp4_flag", "ip4.dhcp_start": "/dhcp4_start", "ip4.dhcp_end": "/dhcp4_end", "ip6.gateway": "/ip6_gateway", "ip6.network": "/ip6_network", "ip6.dhcp": "/dhcp6_flag"}, "reservation": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname"}, "lease": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname", "expiry": "/expiry", "client_id": "/clientid"}, "rule": {"description": "", "rule": "/rule", "order": "/order"}, "osd": {"id": "", "node": "/node", "device": "/device", "db_device": "/db_device", "fsid": "/fsid", "ofsid": "/fsid/osd", "cfsid": "/fsid/cluster", "lvm": "/lvm", "vg": "/lvm/vg", "lv": "/lvm/lv", "is_split": "/is_split", "stats": "/stats"}, "pool": {"name": "", "pgs": "/pgs", "tier": "/tier", "stats": "/stats"}, "volume": {"name": "", "stats": "/stats"}, "snapshot": {"name": "", "stats": "/stats"}} \ No newline at end of file +{"version": "10", "root": "", "base": {"root": "", "schema": "/schema", "schema.version": "/schema/version", "config": "/config", "config.maintenance": "/config/maintenance", "config.primary_node": "/config/primary_node", "config.primary_node.sync_lock": "/config/primary_node/sync_lock", "config.upstream_ip": "/config/upstream_ip", "config.migration_target_selector": "/config/migration_target_selector", "logs": "/logs", "node": "/nodes", "domain": "/domains", "network": "/networks", "storage": "/ceph", "storage.health": "/ceph/health", "storage.util": "/ceph/util", "osd": "/ceph/osds", "pool": "/ceph/pools", "volume": "/ceph/volumes", "snapshot": "/ceph/snapshots"}, "logs": {"node": "", "messages": "/messages"}, "node": {"name": "", "keepalive": "/keepalive", "mode": "/daemonmode", "data.active_schema": "/activeschema", "data.latest_schema": "/latestschema", "data.static": "/staticdata", "data.pvc_version": "/pvcversion", "running_domains": "/runningdomains", "count.provisioned_domains": "/domainscount", "count.networks": "/networkscount", "state.daemon": "/daemonstate", "state.router": "/routerstate", "state.domain": "/domainstate", "cpu.load": "/cpuload", "vcpu.allocated": "/vcpualloc", "memory.total": "/memtotal", "memory.used": "/memused", "memory.free": "/memfree", "memory.allocated": "/memalloc", "memory.provisioned": "/memprov", "ipmi.hostname": "/ipmihostname", "ipmi.username": "/ipmiusername", "ipmi.password": "/ipmipassword", "sriov": "/sriov", "sriov.pf": "/sriov/pf", "sriov.vf": "/sriov/vf", "monitoring.plugins": "/monitoring_plugins", "monitoring.data": "/monitoring_data", "monitoring.health": "/monitoring_health"}, "monitoring_plugin": {"name": "", "last_run": "/last_run", "health_delta": "/health_delta", "message": "/message", "data": "/data", "runtime": "/runtime"}, "sriov_pf": {"phy": "", "mtu": "/mtu", "vfcount": "/vfcount"}, "sriov_vf": {"phy": "", "pf": "/pf", "mtu": "/mtu", "mac": "/mac", "phy_mac": "/phy_mac", "config": "/config", "config.vlan_id": "/config/vlan_id", "config.vlan_qos": "/config/vlan_qos", "config.tx_rate_min": "/config/tx_rate_min", "config.tx_rate_max": "/config/tx_rate_max", "config.spoof_check": "/config/spoof_check", "config.link_state": "/config/link_state", "config.trust": "/config/trust", "config.query_rss": "/config/query_rss", "pci": "/pci", "pci.domain": "/pci/domain", "pci.bus": "/pci/bus", "pci.slot": "/pci/slot", "pci.function": "/pci/function", "used": "/used", "used_by": "/used_by"}, "domain": {"name": "", "xml": "/xml", "state": "/state", "profile": "/profile", "stats": "/stats", "node": "/node", "last_node": "/lastnode", "failed_reason": "/failedreason", "storage.volumes": "/rbdlist", "console.log": "/consolelog", "console.vnc": "/vnc", "meta.autostart": "/node_autostart", "meta.migrate_method": "/migration_method", "meta.node_selector": "/node_selector", "meta.node_limit": "/node_limit", "meta.tags": "/tags", "migrate.sync_lock": "/migrate_sync_lock"}, "tag": {"name": "", "type": "/type", "protected": "/protected"}, "network": {"vni": "", "type": "/nettype", "mtu": "/mtu", "rule": "/firewall_rules", "rule.in": "/firewall_rules/in", "rule.out": "/firewall_rules/out", "nameservers": "/name_servers", "domain": "/domain", "reservation": "/dhcp4_reservations", "lease": "/dhcp4_leases", "ip4.gateway": "/ip4_gateway", "ip4.network": "/ip4_network", "ip4.dhcp": "/dhcp4_flag", "ip4.dhcp_start": "/dhcp4_start", "ip4.dhcp_end": "/dhcp4_end", "ip6.gateway": "/ip6_gateway", "ip6.network": "/ip6_network", "ip6.dhcp": "/dhcp6_flag"}, "reservation": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname"}, "lease": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname", "expiry": "/expiry", "client_id": "/clientid"}, "rule": {"description": "", "rule": "/rule", "order": "/order"}, "osd": {"id": "", "node": "/node", "device": "/device", "db_device": "/db_device", "fsid": "/fsid", "ofsid": "/fsid/osd", "cfsid": "/fsid/cluster", "lvm": "/lvm", "vg": "/lvm/vg", "lv": "/lvm/lv", "is_split": "/is_split", "stats": "/stats"}, "pool": {"name": "", "pgs": "/pgs", "tier": "/tier", "stats": "/stats"}, "volume": {"name": "", "stats": "/stats"}, "snapshot": {"name": "", "stats": "/stats"}} \ No newline at end of file diff --git a/daemon-common/vm.py b/daemon-common/vm.py index b74d834c..870f9465 100644 --- a/daemon-common/vm.py +++ b/daemon-common/vm.py @@ -138,53 +138,6 @@ def is_migrated(zkhandler, domain): return False -def flush_locks(zkhandler, domain): - # Validate that VM exists in cluster - dom_uuid = getDomainUUID(zkhandler, domain) - if not dom_uuid: - return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain) - - # Verify that the VM is in a stopped state; freeing locks is not safe otherwise - state = zkhandler.read(("domain.state", dom_uuid)) - if state != "stop": - return ( - False, - 'ERROR: VM "{}" is not in stopped state; flushing RBD locks on a running VM is dangerous.'.format( - domain - ), - ) - - # Tell the cluster to create a new OSD for the host - flush_locks_string = "flush_locks {}".format(dom_uuid) - zkhandler.write([("base.cmd.domain", flush_locks_string)]) - # Wait 1/2 second for the cluster to get the message and start working - time.sleep(0.5) - # Acquire a read lock, so we get the return exclusively - lock = zkhandler.readlock("base.cmd.domain") - with lock: - try: - result = zkhandler.read("base.cmd.domain").split()[0] - if result == "success-flush_locks": - message = 'Flushed locks on VM "{}"'.format(domain) - success = True - else: - message = 'ERROR: Failed to flush locks on VM "{}"; check node logs for details.'.format( - domain - ) - success = False - except Exception: - message = "ERROR: Command ignored by node." - success = False - - # Acquire a write lock to ensure things go smoothly - lock = zkhandler.writelock("base.cmd.domain") - with lock: - time.sleep(0.5) - zkhandler.write([("base.cmd.domain", "")]) - - return success, message - - def define_vm( zkhandler, config_data, diff --git a/daemon-common/zkhandler.py b/daemon-common/zkhandler.py index 3d5e288f..76ff5557 100644 --- a/daemon-common/zkhandler.py +++ b/daemon-common/zkhandler.py @@ -560,10 +560,6 @@ class ZKSchema(object): "config.primary_node.sync_lock": f"{_schema_root}/config/primary_node/sync_lock", "config.upstream_ip": f"{_schema_root}/config/upstream_ip", "config.migration_target_selector": f"{_schema_root}/config/migration_target_selector", - "cmd": f"{_schema_root}/cmd", - "cmd.node": f"{_schema_root}/cmd/nodes", - "cmd.domain": f"{_schema_root}/cmd/domains", - "cmd.ceph": f"{_schema_root}/cmd/ceph", "logs": "/logs", "node": f"{_schema_root}/nodes", "domain": f"{_schema_root}/domains", diff --git a/node-daemon/pvcnoded/Daemon.py b/node-daemon/pvcnoded/Daemon.py index ea068e3e..f5be1075 100644 --- a/node-daemon/pvcnoded/Daemon.py +++ b/node-daemon/pvcnoded/Daemon.py @@ -903,14 +903,6 @@ def entrypoint(): ) if config["enable_hypervisor"]: - # VM command pipeline key - @zkhandler.zk_conn.DataWatch(zkhandler.schema.path("base.cmd.domain")) - def run_domain_command(data, stat, event=""): - if data: - VMInstance.vm_command( - zkhandler, logger, this_node, data.decode("ascii") - ) - # VM domain objects @zkhandler.zk_conn.ChildrenWatch(zkhandler.schema.path("base.domain")) def update_domains(new_domain_list): @@ -942,14 +934,6 @@ def entrypoint(): d_node[node].update_domain_list(d_domain) if config["enable_storage"]: - # Ceph command pipeline key - @zkhandler.zk_conn.DataWatch(zkhandler.schema.path("base.cmd.ceph")) - def run_ceph_command(data, stat, event=""): - if data: - CephInstance.ceph_command( - zkhandler, logger, this_node, data.decode("ascii"), d_osd - ) - # OSD objects @zkhandler.zk_conn.ChildrenWatch(zkhandler.schema.path("base.osd")) def update_osds(new_osd_list): diff --git a/node-daemon/pvcnoded/objects/VMInstance.py b/node-daemon/pvcnoded/objects/VMInstance.py index 0d3e4d4f..218ac2b6 100644 --- a/node-daemon/pvcnoded/objects/VMInstance.py +++ b/node-daemon/pvcnoded/objects/VMInstance.py @@ -22,7 +22,6 @@ import uuid import time import libvirt -import json from threading import Thread @@ -32,8 +31,6 @@ import daemon_lib.common as common import pvcnoded.objects.VMConsoleWatcherInstance as VMConsoleWatcherInstance -import daemon_lib.common as daemon_common - class VMInstance(object): # Initialization function @@ -116,7 +113,7 @@ class VMInstance(object): if self.dom is not None: memory = int(self.dom.info()[2] / 1024) else: - domain_information = daemon_common.getInformationFromXML( + domain_information = common.getInformationFromXML( self.zkhandler, self.domuuid ) memory = int(domain_information["memory"]) @@ -961,98 +958,3 @@ class VMInstance(object): # Return the dom object (or None) return dom - - # Flush the locks of a VM based on UUID - @staticmethod - def flush_locks(zkhandler, logger, dom_uuid, this_node=None): - logger.out('Flushing RBD locks for VM "{}"'.format(dom_uuid), state="i") - # Get the list of RBD images - rbd_list = zkhandler.read(("domain.storage.volumes", dom_uuid)).split(",") - - for rbd in rbd_list: - # Check if a lock exists - ( - lock_list_retcode, - lock_list_stdout, - lock_list_stderr, - ) = common.run_os_command("rbd lock list --format json {}".format(rbd)) - if lock_list_retcode != 0: - logger.out( - 'Failed to obtain lock list for volume "{}"'.format(rbd), state="e" - ) - continue - - try: - lock_list = json.loads(lock_list_stdout) - except Exception as e: - logger.out( - 'Failed to parse lock list for volume "{}": {}'.format(rbd, e), - state="e", - ) - continue - - # If there's at least one lock - if lock_list: - # Loop through the locks - for lock in lock_list: - if ( - this_node is not None - and zkhandler.read(("domain.state", dom_uuid)) != "stop" - and lock["address"].split(":")[0] != this_node.storage_ipaddr - ): - logger.out( - "RBD lock does not belong to this host (lock owner: {}): freeing this lock would be unsafe, aborting".format( - lock["address"].split(":")[0] - ), - state="e", - ) - zkhandler.write( - [ - (("domain.state", dom_uuid), "fail"), - ( - ("domain.failed_reason", dom_uuid), - "Could not safely free RBD lock {} ({}) on volume {}; stop VM and flush locks manually".format( - lock["id"], lock["address"], rbd - ), - ), - ] - ) - break - # Free the lock - ( - lock_remove_retcode, - lock_remove_stdout, - lock_remove_stderr, - ) = common.run_os_command( - 'rbd lock remove {} "{}" "{}"'.format( - rbd, lock["id"], lock["locker"] - ) - ) - if lock_remove_retcode != 0: - logger.out( - 'Failed to free RBD lock "{}" on volume "{}": {}'.format( - lock["id"], rbd, lock_remove_stderr - ), - state="e", - ) - zkhandler.write( - [ - (("domain.state", dom_uuid), "fail"), - ( - ("domain.failed_reason", dom_uuid), - "Could not free RBD lock {} ({}) on volume {}: {}".format( - lock["id"], - lock["address"], - rbd, - lock_remove_stderr, - ), - ), - ] - ) - break - logger.out( - 'Freed RBD lock "{}" on volume "{}"'.format(lock["id"], rbd), - state="o", - ) - - return True