353 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			353 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #!/usr/bin/env python3
 | |
| 
 | |
| # cluster.py - PVC client function library, cluster management
 | |
| # Part of the Parallel Virtual Cluster (PVC) system
 | |
| #
 | |
| #    Copyright (C) 2018-2022 Joshua M. Boniface <joshua@boniface.me>
 | |
| #
 | |
| #    This program is free software: you can redistribute it and/or modify
 | |
| #    it under the terms of the GNU General Public License as published by
 | |
| #    the Free Software Foundation, version 3.
 | |
| #
 | |
| #    This program is distributed in the hope that it will be useful,
 | |
| #    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
| #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
| #    GNU General Public License for more details.
 | |
| #
 | |
| #    You should have received a copy of the GNU General Public License
 | |
| #    along with this program.  If not, see <https://www.gnu.org/licenses/>.
 | |
| #
 | |
| ###############################################################################
 | |
| 
 | |
| from json import loads
 | |
| 
 | |
| import daemon_lib.common as common
 | |
| import daemon_lib.vm as pvc_vm
 | |
| import daemon_lib.node as pvc_node
 | |
| import daemon_lib.network as pvc_network
 | |
| import daemon_lib.ceph as pvc_ceph
 | |
| 
 | |
| 
 | |
| def set_maintenance(zkhandler, maint_state):
 | |
|     current_maint_state = zkhandler.read("base.config.maintenance")
 | |
|     if maint_state == current_maint_state:
 | |
|         if maint_state == "true":
 | |
|             return True, "Cluster is already in maintenance mode"
 | |
|         else:
 | |
|             return True, "Cluster is already in normal mode"
 | |
| 
 | |
|     if maint_state == "true":
 | |
|         zkhandler.write([("base.config.maintenance", "true")])
 | |
|         return True, "Successfully set cluster in maintenance mode"
 | |
|     else:
 | |
|         zkhandler.write([("base.config.maintenance", "false")])
 | |
|         return True, "Successfully set cluster in normal mode"
 | |
| 
 | |
| 
 | |
| def getClusterHealth(zkhandler, node_list, vm_list, ceph_osd_list):
 | |
|     health_delta_map = {
 | |
|         'node_stopped': 50,
 | |
|         'node_flushed': 10,
 | |
|         'vm_stopped': 10,
 | |
|         'osd_out': 50,
 | |
|         'osd_down': 10,
 | |
|         'memory_overprovisioned': 50,
 | |
|         'ceph_err': 50,
 | |
|         'ceph_warn': 10,
 | |
|     }
 | |
| 
 | |
|     # Generate total cluster health numbers
 | |
|     cluster_health = 100
 | |
|     messages = list()
 | |
| 
 | |
|     for index, node in enumerate(node_list):
 | |
|         # Apply node health values to total health number
 | |
|         cluster_health -= 100 - node['health']
 | |
|         for entry in node['health_details']:
 | |
|             if entry['health_delta'] > 0:
 | |
|                 messages.append(f"{node['name']}: plugin {entry['plugin_name']}: {entry['message']}")
 | |
| 
 | |
|         # Handle unhealthy node states
 | |
|         if node['daemon_state'] not in ['run']:
 | |
|             cluster_health -= health_delta_map['node_stopped']
 | |
|             messages.append(f"cluster: {node['name']} in {node['daemon_state']} daemon state")
 | |
|         elif node['domain_state'] not in ['ready']:
 | |
|             cluster_health -= health_delta_map['node_flushed']
 | |
|             messages.append(f"cluster: {node['name']} in {node['domain_state']} domain state")
 | |
| 
 | |
|     for index, vm in enumerate(vm_list):
 | |
|         # Handle unhealthy VM states
 | |
|         if vm['state'] not in ["start", "disable", "migrate", "unmigrate", "provision"]:
 | |
|             cluster_health -= health_delta_map['vm_stopped']
 | |
|             messages.append(f"cluster: {vm['name']} in {vm['state']} state")
 | |
| 
 | |
|     for index, ceph_osd in enumerate(ceph_osd_list):
 | |
|         in_texts = {1: "in", 0: "out"}
 | |
|         up_texts = {1: "up", 0: "down"}
 | |
| 
 | |
|         # Handle unhealthy OSD states
 | |
|         if in_texts[ceph_osd["stats"]["in"]] not in ["in"]:
 | |
|             cluster_health -= health_delta_map['osd_out']
 | |
|             messages.append(f"cluster: OSD {ceph_osd['id']} in {in_texts[ceph_osd['stats']['in']]} state")
 | |
|         elif up_texts[ceph_osd["stats"]["up"]] not in ['up']:
 | |
|             cluster_health -= health_delta_map['osd_down']
 | |
|             messages.append(f"cluster: OSD {ceph_osd['id']} in {up_texts[ceph_osd['stats']['up']]} state")
 | |
| 
 | |
|     # Check for (n-1) overprovisioning
 | |
|     #   Assume X nodes. If the total VM memory allocation (counting only running VMss) is greater than
 | |
|     #   the total memory of the (n-1) smallest nodes, trigger this warning.
 | |
|     n_minus_1_total = 0
 | |
|     alloc_total = 0
 | |
|     node_largest_index = None
 | |
|     node_largest_count = 0
 | |
|     for index, node in enumerate(node_list):
 | |
|         node_mem_total = node["memory"]["total"]
 | |
|         node_mem_alloc = node["memory"]["allocated"]
 | |
|         alloc_total += node_mem_alloc
 | |
|         # Determine if this node is the largest seen so far
 | |
|         if node_mem_total > node_largest_count:
 | |
|             node_largest_index = index
 | |
|             node_largest_count = node_mem_total
 | |
|     n_minus_1_node_list = list()
 | |
|     for index, node in enumerate(node_list):
 | |
|         if index == node_largest_index:
 | |
|             continue
 | |
|         n_minus_1_node_list.append(node)
 | |
|     for index, node in enumerate(n_minus_1_node_list):
 | |
|         n_minus_1_total += node["memory"]["total"]
 | |
|     if alloc_total > n_minus_1_total:
 | |
|         cluster_health -= health_delta_map['memory_overprovisioned']
 | |
|         messages.append(f"cluster: Total VM memory is overprovisioned ({alloc_total} > {n_minus_1_total} n-1)")
 | |
| 
 | |
|     # Check Ceph cluster health
 | |
|     ceph_health = loads(zkhandler.read("base.storage.health"))
 | |
|     ceph_health_status = ceph_health["status"]
 | |
|     ceph_health_entries = ceph_health["checks"].keys()
 | |
| 
 | |
|     if ceph_health_status == 'HEALTH_ERR':
 | |
|         cluster_health -= health_delta_map['ceph_err']
 | |
|         messages.append(f"cluster: Ceph cluster in ERROR state: {', '.join(ceph_health_entries)}")
 | |
|     elif ceph_health_status == 'HEALTH_WARN':
 | |
|         cluster_health -= health_delta_map['ceph_warn']
 | |
|         messages.append(f"cluster: Ceph cluster in WARNING state: {', '.join(ceph_health_entries)}")
 | |
| 
 | |
|     return cluster_health, messages
 | |
| 
 | |
| 
 | |
| def getClusterInformation(zkhandler):
 | |
|     # Get cluster maintenance state
 | |
|     maintenance_state = zkhandler.read("base.config.maintenance")
 | |
| 
 | |
|     # Get node information object list
 | |
|     retcode, node_list = pvc_node.get_list(zkhandler, None)
 | |
| 
 | |
|     # Get vm information object list
 | |
|     retcode, vm_list = pvc_vm.get_list(zkhandler, None, None, None, None)
 | |
| 
 | |
|     # Get network information object list
 | |
|     retcode, network_list = pvc_network.get_list(zkhandler, None, None)
 | |
| 
 | |
|     # Get storage information object list
 | |
|     retcode, ceph_osd_list = pvc_ceph.get_list_osd(zkhandler, None)
 | |
|     retcode, ceph_pool_list = pvc_ceph.get_list_pool(zkhandler, None)
 | |
|     retcode, ceph_volume_list = pvc_ceph.get_list_volume(zkhandler, None, None)
 | |
|     retcode, ceph_snapshot_list = pvc_ceph.get_list_snapshot(
 | |
|         zkhandler, None, None, None
 | |
|     )
 | |
| 
 | |
|     # Determine, for each subsection, the total count
 | |
|     node_count = len(node_list)
 | |
|     vm_count = len(vm_list)
 | |
|     network_count = len(network_list)
 | |
|     ceph_osd_count = len(ceph_osd_list)
 | |
|     ceph_pool_count = len(ceph_pool_list)
 | |
|     ceph_volume_count = len(ceph_volume_list)
 | |
|     ceph_snapshot_count = len(ceph_snapshot_list)
 | |
| 
 | |
|     # State lists
 | |
|     node_state_combinations = [
 | |
|         "run,ready",
 | |
|         "run,flush",
 | |
|         "run,flushed",
 | |
|         "run,unflush",
 | |
|         "init,ready",
 | |
|         "init,flush",
 | |
|         "init,flushed",
 | |
|         "init,unflush",
 | |
|         "stop,ready",
 | |
|         "stop,flush",
 | |
|         "stop,flushed",
 | |
|         "stop,unflush",
 | |
|         "dead,ready",
 | |
|         "dead,flush",
 | |
|         "dead,flushed",
 | |
|         "dead,unflush",
 | |
|     ]
 | |
|     vm_state_combinations = [
 | |
|         "start",
 | |
|         "restart",
 | |
|         "shutdown",
 | |
|         "stop",
 | |
|         "disable",
 | |
|         "fail",
 | |
|         "migrate",
 | |
|         "unmigrate",
 | |
|         "provision",
 | |
|     ]
 | |
|     ceph_osd_state_combinations = [
 | |
|         "up,in",
 | |
|         "up,out",
 | |
|         "down,in",
 | |
|         "down,out",
 | |
|     ]
 | |
| 
 | |
|     # Format the Node states
 | |
|     formatted_node_states = {"total": node_count}
 | |
|     for state in node_state_combinations:
 | |
|         state_count = 0
 | |
|         for node in node_list:
 | |
|             node_state = f"{node['daemon_state']},{node['domain_state']}"
 | |
|             if node_state == state:
 | |
|                 state_count += 1
 | |
|         if state_count > 0:
 | |
|             formatted_node_states[state] = state_count
 | |
| 
 | |
|     # Format the VM states
 | |
|     formatted_vm_states = {"total": vm_count}
 | |
|     for state in vm_state_combinations:
 | |
|         state_count = 0
 | |
|         for vm in vm_list:
 | |
|             if vm["state"] == state:
 | |
|                 state_count += 1
 | |
|         if state_count > 0:
 | |
|             formatted_vm_states[state] = state_count
 | |
| 
 | |
|     # Format the OSD states
 | |
|     up_texts = {1: "up", 0: "down"}
 | |
|     in_texts = {1: "in", 0: "out"}
 | |
|     formatted_osd_states = {"total": ceph_osd_count}
 | |
|     for state in ceph_osd_state_combinations:
 | |
|         state_count = 0
 | |
|         for ceph_osd in ceph_osd_list:
 | |
|             ceph_osd_state = f"{up_texts[ceph_osd['stats']['up']]},{in_texts[ceph_osd['stats']['in']]}"
 | |
|             if ceph_osd_state == state:
 | |
|                 state_count += 1
 | |
|         if state_count > 0:
 | |
|             formatted_osd_states[state] = state_count
 | |
| 
 | |
|     # Get cluster health data
 | |
|     cluster_health, cluster_health_messages = getClusterHealth(zkhandler, node_list, vm_list, ceph_osd_list)
 | |
| 
 | |
|     # Format the status data
 | |
|     cluster_information = {
 | |
|         "health": cluster_health,
 | |
|         "health_messages": cluster_health_messages,
 | |
|         "maintenance": maintenance_state,
 | |
|         "primary_node": common.getPrimaryNode(zkhandler),
 | |
|         "upstream_ip": zkhandler.read("base.config.upstream_ip"),
 | |
|         "nodes": formatted_node_states,
 | |
|         "vms": formatted_vm_states,
 | |
|         "networks": network_count,
 | |
|         "osds": formatted_osd_states,
 | |
|         "pools": ceph_pool_count,
 | |
|         "volumes": ceph_volume_count,
 | |
|         "snapshots": ceph_snapshot_count,
 | |
|     }
 | |
| 
 | |
|     return cluster_information
 | |
| 
 | |
| 
 | |
| def get_info(zkhandler):
 | |
|     # This is a thin wrapper function for naming purposes
 | |
|     cluster_information = getClusterInformation(zkhandler)
 | |
|     if cluster_information:
 | |
|         return True, cluster_information
 | |
|     else:
 | |
|         return False, "ERROR: Failed to obtain cluster information!"
 | |
| 
 | |
| 
 | |
| def cluster_initialize(zkhandler, overwrite=False):
 | |
|     # Abort if we've initialized the cluster before
 | |
|     if zkhandler.exists("base.config.primary_node") and not overwrite:
 | |
|         return False, "ERROR: Cluster contains data and overwrite not set."
 | |
| 
 | |
|     if overwrite:
 | |
|         # Delete the existing keys
 | |
|         for key in zkhandler.schema.keys("base"):
 | |
|             if key == "root":
 | |
|                 # Don't delete the root key
 | |
|                 continue
 | |
| 
 | |
|             status = zkhandler.delete("base.{}".format(key), recursive=True)
 | |
|             if not status:
 | |
|                 return (
 | |
|                     False,
 | |
|                     "ERROR: Failed to delete data in cluster; running nodes perhaps?",
 | |
|                 )
 | |
| 
 | |
|     # Create the root keys
 | |
|     zkhandler.schema.apply(zkhandler)
 | |
| 
 | |
|     return True, "Successfully initialized cluster"
 | |
| 
 | |
| 
 | |
| def cluster_backup(zkhandler):
 | |
|     # Dictionary of values to come
 | |
|     cluster_data = dict()
 | |
| 
 | |
|     def get_data(path):
 | |
|         data = zkhandler.read(path)
 | |
|         children = zkhandler.children(path)
 | |
| 
 | |
|         cluster_data[path] = data
 | |
| 
 | |
|         if children:
 | |
|             if path == "/":
 | |
|                 child_prefix = "/"
 | |
|             else:
 | |
|                 child_prefix = path + "/"
 | |
| 
 | |
|             for child in children:
 | |
|                 if child_prefix + child == "/zookeeper":
 | |
|                     # We must skip the built-in /zookeeper tree
 | |
|                     continue
 | |
|                 if child_prefix + child == "/patroni":
 | |
|                     # We must skip the /patroni tree
 | |
|                     continue
 | |
| 
 | |
|                 get_data(child_prefix + child)
 | |
| 
 | |
|     try:
 | |
|         get_data("/")
 | |
|     except Exception as e:
 | |
|         return False, "ERROR: Failed to obtain backup: {}".format(e)
 | |
| 
 | |
|     return True, cluster_data
 | |
| 
 | |
| 
 | |
| def cluster_restore(zkhandler, cluster_data):
 | |
|     # Build a key+value list
 | |
|     kv = []
 | |
|     schema_version = None
 | |
|     for key in cluster_data:
 | |
|         if key == zkhandler.schema.path("base.schema.version"):
 | |
|             schema_version = cluster_data[key]
 | |
|         data = cluster_data[key]
 | |
|         kv.append((key, data))
 | |
| 
 | |
|     if int(schema_version) != int(zkhandler.schema.version):
 | |
|         return (
 | |
|             False,
 | |
|             "ERROR: Schema version of backup ({}) does not match cluster schema version ({}).".format(
 | |
|                 schema_version, zkhandler.schema.version
 | |
|             ),
 | |
|         )
 | |
| 
 | |
|     # Close the Zookeeper connection
 | |
|     result = zkhandler.write(kv)
 | |
| 
 | |
|     if result:
 | |
|         return True, "Restore completed successfully."
 | |
|     else:
 | |
|         return False, "Restore failed."
 |