353 lines
12 KiB
Python
353 lines
12 KiB
Python
#!/usr/bin/env python3
|
|
|
|
# cluster.py - PVC client function library, cluster management
|
|
# Part of the Parallel Virtual Cluster (PVC) system
|
|
#
|
|
# Copyright (C) 2018-2022 Joshua M. Boniface <joshua@boniface.me>
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, version 3.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
#
|
|
###############################################################################
|
|
|
|
from json import loads
|
|
|
|
import daemon_lib.common as common
|
|
import daemon_lib.vm as pvc_vm
|
|
import daemon_lib.node as pvc_node
|
|
import daemon_lib.network as pvc_network
|
|
import daemon_lib.ceph as pvc_ceph
|
|
|
|
|
|
def set_maintenance(zkhandler, maint_state):
|
|
current_maint_state = zkhandler.read("base.config.maintenance")
|
|
if maint_state == current_maint_state:
|
|
if maint_state == "true":
|
|
return True, "Cluster is already in maintenance mode"
|
|
else:
|
|
return True, "Cluster is already in normal mode"
|
|
|
|
if maint_state == "true":
|
|
zkhandler.write([("base.config.maintenance", "true")])
|
|
return True, "Successfully set cluster in maintenance mode"
|
|
else:
|
|
zkhandler.write([("base.config.maintenance", "false")])
|
|
return True, "Successfully set cluster in normal mode"
|
|
|
|
|
|
def getClusterHealth(zkhandler, node_list, vm_list, ceph_osd_list):
|
|
health_delta_map = {
|
|
'node_stopped': 50,
|
|
'node_flushed': 10,
|
|
'vm_stopped': 10,
|
|
'osd_out': 50,
|
|
'osd_down': 10,
|
|
'memory_overprovisioned': 50,
|
|
'ceph_err': 50,
|
|
'ceph_warn': 10,
|
|
}
|
|
|
|
# Generate total cluster health numbers
|
|
cluster_health = 100
|
|
messages = list()
|
|
|
|
for index, node in enumerate(node_list):
|
|
# Apply node health values to total health number
|
|
cluster_health -= 100 - node['health']
|
|
for entry in node['health_details']:
|
|
if entry['health_delta'] > 0:
|
|
messages.append(f"{node['name']}: plugin {entry['plugin_name']}: {entry['message']}")
|
|
|
|
# Handle unhealthy node states
|
|
if node['daemon_state'] not in ['run']:
|
|
cluster_health -= health_delta_map['node_stopped']
|
|
messages.append(f"cluster: {node['name']} in {node['daemon_state']} daemon state")
|
|
elif node['domain_state'] not in ['ready']:
|
|
cluster_health -= health_delta_map['node_flushed']
|
|
messages.append(f"cluster: {node['name']} in {node['domain_state']} domain state")
|
|
|
|
for index, vm in enumerate(vm_list):
|
|
# Handle unhealthy VM states
|
|
if vm['state'] not in ["start", "disable", "migrate", "unmigrate", "provision"]:
|
|
cluster_health -= health_delta_map['vm_stopped']
|
|
messages.append(f"cluster: {vm['name']} in {vm['state']} state")
|
|
|
|
for index, ceph_osd in enumerate(ceph_osd_list):
|
|
in_texts = {1: "in", 0: "out"}
|
|
up_texts = {1: "up", 0: "down"}
|
|
|
|
# Handle unhealthy OSD states
|
|
if in_texts[ceph_osd["stats"]["in"]] not in ["in"]:
|
|
cluster_health -= health_delta_map['osd_out']
|
|
messages.append(f"cluster: OSD {ceph_osd['id']} in {in_texts[ceph_osd['stats']['in']]} state")
|
|
elif up_texts[ceph_osd["stats"]["up"]] not in ['up']:
|
|
cluster_health -= health_delta_map['osd_down']
|
|
messages.append(f"cluster: OSD {ceph_osd['id']} in {up_texts[ceph_osd['stats']['up']]} state")
|
|
|
|
# Check for (n-1) overprovisioning
|
|
# Assume X nodes. If the total VM memory allocation (counting only running VMss) is greater than
|
|
# the total memory of the (n-1) smallest nodes, trigger this warning.
|
|
n_minus_1_total = 0
|
|
alloc_total = 0
|
|
node_largest_index = None
|
|
node_largest_count = 0
|
|
for index, node in enumerate(node_list):
|
|
node_mem_total = node["memory"]["total"]
|
|
node_mem_alloc = node["memory"]["allocated"]
|
|
alloc_total += node_mem_alloc
|
|
# Determine if this node is the largest seen so far
|
|
if node_mem_total > node_largest_count:
|
|
node_largest_index = index
|
|
node_largest_count = node_mem_total
|
|
n_minus_1_node_list = list()
|
|
for index, node in enumerate(node_list):
|
|
if index == node_largest_index:
|
|
continue
|
|
n_minus_1_node_list.append(node)
|
|
for index, node in enumerate(n_minus_1_node_list):
|
|
n_minus_1_total += node["memory"]["total"]
|
|
if alloc_total > n_minus_1_total:
|
|
cluster_health -= health_delta_map['memory_overprovisioned']
|
|
messages.append(f"cluster: Total VM memory is overprovisioned ({alloc_total} > {n_minus_1_total} n-1)")
|
|
|
|
# Check Ceph cluster health
|
|
ceph_health = loads(zkhandler.read("base.storage.health"))
|
|
ceph_health_status = ceph_health["status"]
|
|
ceph_health_entries = ceph_health["checks"].keys()
|
|
|
|
if ceph_health_status == 'HEALTH_ERR':
|
|
cluster_health -= health_delta_map['ceph_err']
|
|
messages.append(f"cluster: Ceph cluster in ERROR state: {', '.join(ceph_health_entries)}")
|
|
elif ceph_health_status == 'HEALTH_WARN':
|
|
cluster_health -= health_delta_map['ceph_warn']
|
|
messages.append(f"cluster: Ceph cluster in WARNING state: {', '.join(ceph_health_entries)}")
|
|
|
|
return cluster_health, messages
|
|
|
|
|
|
def getClusterInformation(zkhandler):
|
|
# Get cluster maintenance state
|
|
maintenance_state = zkhandler.read("base.config.maintenance")
|
|
|
|
# Get node information object list
|
|
retcode, node_list = pvc_node.get_list(zkhandler, None)
|
|
|
|
# Get vm information object list
|
|
retcode, vm_list = pvc_vm.get_list(zkhandler, None, None, None, None)
|
|
|
|
# Get network information object list
|
|
retcode, network_list = pvc_network.get_list(zkhandler, None, None)
|
|
|
|
# Get storage information object list
|
|
retcode, ceph_osd_list = pvc_ceph.get_list_osd(zkhandler, None)
|
|
retcode, ceph_pool_list = pvc_ceph.get_list_pool(zkhandler, None)
|
|
retcode, ceph_volume_list = pvc_ceph.get_list_volume(zkhandler, None, None)
|
|
retcode, ceph_snapshot_list = pvc_ceph.get_list_snapshot(
|
|
zkhandler, None, None, None
|
|
)
|
|
|
|
# Determine, for each subsection, the total count
|
|
node_count = len(node_list)
|
|
vm_count = len(vm_list)
|
|
network_count = len(network_list)
|
|
ceph_osd_count = len(ceph_osd_list)
|
|
ceph_pool_count = len(ceph_pool_list)
|
|
ceph_volume_count = len(ceph_volume_list)
|
|
ceph_snapshot_count = len(ceph_snapshot_list)
|
|
|
|
# State lists
|
|
node_state_combinations = [
|
|
"run,ready",
|
|
"run,flush",
|
|
"run,flushed",
|
|
"run,unflush",
|
|
"init,ready",
|
|
"init,flush",
|
|
"init,flushed",
|
|
"init,unflush",
|
|
"stop,ready",
|
|
"stop,flush",
|
|
"stop,flushed",
|
|
"stop,unflush",
|
|
"dead,ready",
|
|
"dead,flush",
|
|
"dead,flushed",
|
|
"dead,unflush",
|
|
]
|
|
vm_state_combinations = [
|
|
"start",
|
|
"restart",
|
|
"shutdown",
|
|
"stop",
|
|
"disable",
|
|
"fail",
|
|
"migrate",
|
|
"unmigrate",
|
|
"provision",
|
|
]
|
|
ceph_osd_state_combinations = [
|
|
"up,in",
|
|
"up,out",
|
|
"down,in",
|
|
"down,out",
|
|
]
|
|
|
|
# Format the Node states
|
|
formatted_node_states = {"total": node_count}
|
|
for state in node_state_combinations:
|
|
state_count = 0
|
|
for node in node_list:
|
|
node_state = f"{node['daemon_state']},{node['domain_state']}"
|
|
if node_state == state:
|
|
state_count += 1
|
|
if state_count > 0:
|
|
formatted_node_states[state] = state_count
|
|
|
|
# Format the VM states
|
|
formatted_vm_states = {"total": vm_count}
|
|
for state in vm_state_combinations:
|
|
state_count = 0
|
|
for vm in vm_list:
|
|
if vm["state"] == state:
|
|
state_count += 1
|
|
if state_count > 0:
|
|
formatted_vm_states[state] = state_count
|
|
|
|
# Format the OSD states
|
|
up_texts = {1: "up", 0: "down"}
|
|
in_texts = {1: "in", 0: "out"}
|
|
formatted_osd_states = {"total": ceph_osd_count}
|
|
for state in ceph_osd_state_combinations:
|
|
state_count = 0
|
|
for ceph_osd in ceph_osd_list:
|
|
ceph_osd_state = f"{up_texts[ceph_osd['stats']['up']]},{in_texts[ceph_osd['stats']['in']]}"
|
|
if ceph_osd_state == state:
|
|
state_count += 1
|
|
if state_count > 0:
|
|
formatted_osd_states[state] = state_count
|
|
|
|
# Get cluster health data
|
|
cluster_health, cluster_health_messages = getClusterHealth(zkhandler, node_list, vm_list, ceph_osd_list)
|
|
|
|
# Format the status data
|
|
cluster_information = {
|
|
"health": cluster_health,
|
|
"health_messages": cluster_health_messages,
|
|
"maintenance": maintenance_state,
|
|
"primary_node": common.getPrimaryNode(zkhandler),
|
|
"upstream_ip": zkhandler.read("base.config.upstream_ip"),
|
|
"nodes": formatted_node_states,
|
|
"vms": formatted_vm_states,
|
|
"networks": network_count,
|
|
"osds": formatted_osd_states,
|
|
"pools": ceph_pool_count,
|
|
"volumes": ceph_volume_count,
|
|
"snapshots": ceph_snapshot_count,
|
|
}
|
|
|
|
return cluster_information
|
|
|
|
|
|
def get_info(zkhandler):
|
|
# This is a thin wrapper function for naming purposes
|
|
cluster_information = getClusterInformation(zkhandler)
|
|
if cluster_information:
|
|
return True, cluster_information
|
|
else:
|
|
return False, "ERROR: Failed to obtain cluster information!"
|
|
|
|
|
|
def cluster_initialize(zkhandler, overwrite=False):
|
|
# Abort if we've initialized the cluster before
|
|
if zkhandler.exists("base.config.primary_node") and not overwrite:
|
|
return False, "ERROR: Cluster contains data and overwrite not set."
|
|
|
|
if overwrite:
|
|
# Delete the existing keys
|
|
for key in zkhandler.schema.keys("base"):
|
|
if key == "root":
|
|
# Don't delete the root key
|
|
continue
|
|
|
|
status = zkhandler.delete("base.{}".format(key), recursive=True)
|
|
if not status:
|
|
return (
|
|
False,
|
|
"ERROR: Failed to delete data in cluster; running nodes perhaps?",
|
|
)
|
|
|
|
# Create the root keys
|
|
zkhandler.schema.apply(zkhandler)
|
|
|
|
return True, "Successfully initialized cluster"
|
|
|
|
|
|
def cluster_backup(zkhandler):
|
|
# Dictionary of values to come
|
|
cluster_data = dict()
|
|
|
|
def get_data(path):
|
|
data = zkhandler.read(path)
|
|
children = zkhandler.children(path)
|
|
|
|
cluster_data[path] = data
|
|
|
|
if children:
|
|
if path == "/":
|
|
child_prefix = "/"
|
|
else:
|
|
child_prefix = path + "/"
|
|
|
|
for child in children:
|
|
if child_prefix + child == "/zookeeper":
|
|
# We must skip the built-in /zookeeper tree
|
|
continue
|
|
if child_prefix + child == "/patroni":
|
|
# We must skip the /patroni tree
|
|
continue
|
|
|
|
get_data(child_prefix + child)
|
|
|
|
try:
|
|
get_data("/")
|
|
except Exception as e:
|
|
return False, "ERROR: Failed to obtain backup: {}".format(e)
|
|
|
|
return True, cluster_data
|
|
|
|
|
|
def cluster_restore(zkhandler, cluster_data):
|
|
# Build a key+value list
|
|
kv = []
|
|
schema_version = None
|
|
for key in cluster_data:
|
|
if key == zkhandler.schema.path("base.schema.version"):
|
|
schema_version = cluster_data[key]
|
|
data = cluster_data[key]
|
|
kv.append((key, data))
|
|
|
|
if int(schema_version) != int(zkhandler.schema.version):
|
|
return (
|
|
False,
|
|
"ERROR: Schema version of backup ({}) does not match cluster schema version ({}).".format(
|
|
schema_version, zkhandler.schema.version
|
|
),
|
|
)
|
|
|
|
# Close the Zookeeper connection
|
|
result = zkhandler.write(kv)
|
|
|
|
if result:
|
|
return True, "Restore completed successfully."
|
|
else:
|
|
return False, "Restore failed."
|