Compare commits
24 Commits
746416b8ed
...
v0.9.86
Author | SHA1 | Date | |
---|---|---|---|
c64e888d30 | |||
f1249452e5 | |||
0a93f526e0 | |||
7c9512fb22 | |||
e88b97f3a9 | |||
709c9cb73e | |||
f41c5176be | |||
38e43b46c3 | |||
ed9c37982a | |||
0f24184b78 | |||
1ba37fe33d | |||
1a05077b10 | |||
57c28376a6 | |||
e781d742e6 | |||
6c6d1508a1 | |||
741dafb26b | |||
032d3ebf18 | |||
5d9e83e8ed | |||
ad0bd8649f | |||
9b5e53e4b6 | |||
9617660342 | |||
ab0a1e0946 | |||
7c116b2fbc | |||
1023c55087 |
@ -1,5 +1,14 @@
|
|||||||
## PVC Changelog
|
## PVC Changelog
|
||||||
|
|
||||||
|
###### [v0.9.86](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.86)
|
||||||
|
|
||||||
|
* [API Daemon] Significantly improves the performance of several commands via async Zookeeper calls and removal of superfluous backend calls.
|
||||||
|
* [Docs] Improves the project README and updates screenshot images to show the current output and more functionality.
|
||||||
|
* [API Daemon/CLI] Corrects some bugs in VM metainformation output.
|
||||||
|
* [Node Daemon] Fixes resource reporting bugs from 0.9.81 and properly clears node resource numbers on a fence.
|
||||||
|
* [Health Daemon] Adds a wait during pvchealthd startup until the node is in run state, to avoid erroneous faults during node bootup.
|
||||||
|
* [API Daemon] Fixes an incorrect reference to legacy pvcapid.yaml file in migration script.
|
||||||
|
|
||||||
###### [v0.9.85](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.85)
|
###### [v0.9.85](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.85)
|
||||||
|
|
||||||
* [Packaging] Fixes a dependency bug introduced in 0.9.84
|
* [Packaging] Fixes a dependency bug introduced in 0.9.84
|
||||||
|
@ -52,7 +52,7 @@ These screenshots show some of the available functionality of the PVC system and
|
|||||||
</p>
|
</p>
|
||||||
|
|
||||||
<p><img alt="4. VM information" src="images/4-vm-information.png"/><br/>
|
<p><img alt="4. VM information" src="images/4-vm-information.png"/><br/>
|
||||||
<i>PVC can show details about the VMs in the cluster, including their state, resource allocations</i>
|
<i>PVC can show details about the VMs in the cluster, including their state, resource allocations, current hosting node, and metadata.</i>
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<p><img alt="5. VM details" src="images/5-vm-details.png"/><br/>
|
<p><img alt="5. VM details" src="images/5-vm-details.png"/><br/>
|
||||||
@ -78,3 +78,7 @@ These screenshots show some of the available functionality of the PVC system and
|
|||||||
<p><img alt="10. Provisioner" src="images/10-provisioner.png"/><br/>
|
<p><img alt="10. Provisioner" src="images/10-provisioner.png"/><br/>
|
||||||
<i>PVC features an extensively customizable and configurable VM provisioner system, including EC2-compatible CloudInit support, allowing you to define flexible VM profiles and provision new VMs with a single command.</i>
|
<i>PVC features an extensively customizable and configurable VM provisioner system, including EC2-compatible CloudInit support, allowing you to define flexible VM profiles and provision new VMs with a single command.</i>
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
|
<p><img alt="11. Prometheus and Grafana dashboard" src="images/11-prometheus-grafana.png"/><br/>
|
||||||
|
<i>PVC features several monitoring integration examples under "node-daemon/monitoring", including CheckMK, Munin, and, most recently, Prometheus, including an example Grafana dashboard for cluster monitoring and alerting.</i>
|
||||||
|
</p>
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
# Apply PVC database migrations
|
# Apply PVC database migrations
|
||||||
# Part of the Parallel Virtual Cluster (PVC) system
|
# Part of the Parallel Virtual Cluster (PVC) system
|
||||||
|
|
||||||
export PVC_CONFIG_FILE="/etc/pvc/pvcapid.yaml"
|
export PVC_CONFIG_FILE="/etc/pvc/pvc.conf"
|
||||||
|
|
||||||
if [[ ! -f ${PVC_CONFIG_FILE} ]]; then
|
if [[ ! -f ${PVC_CONFIG_FILE} ]]; then
|
||||||
echo "Create a configuration file at ${PVC_CONFIG_FILE} before upgrading the database."
|
echo "Create a configuration file at ${PVC_CONFIG_FILE} before upgrading the database."
|
||||||
|
@ -27,7 +27,7 @@ from distutils.util import strtobool as dustrtobool
|
|||||||
import daemon_lib.config as cfg
|
import daemon_lib.config as cfg
|
||||||
|
|
||||||
# Daemon version
|
# Daemon version
|
||||||
version = "0.9.85"
|
version = "0.9.86"
|
||||||
|
|
||||||
# API version
|
# API version
|
||||||
API_VERSION = 1.0
|
API_VERSION = 1.0
|
||||||
|
@ -131,154 +131,12 @@ def cluster_metrics(zkhandler):
|
|||||||
Format status data from cluster_status into Prometheus-compatible metrics
|
Format status data from cluster_status into Prometheus-compatible metrics
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Get general cluster information
|
retflag, retdata = pvc_cluster.get_metrics(zkhandler)
|
||||||
status_retflag, status_data = pvc_cluster.get_info(zkhandler)
|
if retflag:
|
||||||
if not status_retflag:
|
retcode = 200
|
||||||
return "Error: Status data threw error", 400
|
else:
|
||||||
|
retcode = 400
|
||||||
faults_data = status_data["detail"]["faults"]
|
return retdata, retcode
|
||||||
node_data = status_data["detail"]["node"]
|
|
||||||
vm_data = status_data["detail"]["vm"]
|
|
||||||
osd_data = status_data["detail"]["osd"]
|
|
||||||
|
|
||||||
output_lines = list()
|
|
||||||
|
|
||||||
output_lines.append("# HELP pvc_info PVC cluster information")
|
|
||||||
output_lines.append("# TYPE pvc_info gauge")
|
|
||||||
output_lines.append(
|
|
||||||
f"pvc_info{{primary_node=\"{status_data['primary_node']}\", version=\"{status_data['pvc_version']}\", upstream_ip=\"{status_data['upstream_ip']}\"}} 1"
|
|
||||||
)
|
|
||||||
|
|
||||||
output_lines.append("# HELP pvc_cluster_maintenance PVC cluster maintenance state")
|
|
||||||
output_lines.append("# TYPE pvc_cluster_maintenance gauge")
|
|
||||||
output_lines.append(
|
|
||||||
f"pvc_cluster_maintenance {1 if bool(strtobool(status_data['maintenance'])) else 0}"
|
|
||||||
)
|
|
||||||
|
|
||||||
output_lines.append("# HELP pvc_cluster_health PVC cluster health status")
|
|
||||||
output_lines.append("# TYPE pvc_cluster_health gauge")
|
|
||||||
output_lines.append(f"pvc_cluster_health {status_data['cluster_health']['health']}")
|
|
||||||
|
|
||||||
output_lines.append("# HELP pvc_cluster_faults PVC cluster new faults")
|
|
||||||
output_lines.append("# TYPE pvc_cluster_faults gauge")
|
|
||||||
fault_map = dict()
|
|
||||||
for fault_type in pvc_common.fault_state_combinations:
|
|
||||||
fault_map[fault_type] = 0
|
|
||||||
for fault in faults_data:
|
|
||||||
fault_map[fault["status"]] += 1
|
|
||||||
for fault_type in fault_map:
|
|
||||||
output_lines.append(
|
|
||||||
f'pvc_cluster_faults{{status="{fault_type}"}} {fault_map[fault_type]}'
|
|
||||||
)
|
|
||||||
|
|
||||||
# output_lines.append("# HELP pvc_cluster_faults PVC cluster health faults")
|
|
||||||
# output_lines.append("# TYPE pvc_cluster_faults gauge")
|
|
||||||
# for fault_msg in status_data["cluster_health"]["messages"]:
|
|
||||||
# output_lines.append(
|
|
||||||
# f"pvc_cluster_faults{{id=\"{fault_msg['id']}\", message=\"{fault_msg['text']}\"}} {fault_msg['health_delta']}"
|
|
||||||
# )
|
|
||||||
|
|
||||||
output_lines.append("# HELP pvc_node_health PVC cluster node health status")
|
|
||||||
output_lines.append("# TYPE pvc_node_health gauge")
|
|
||||||
for node in status_data["node_health"]:
|
|
||||||
if isinstance(status_data["node_health"][node]["health"], int):
|
|
||||||
output_lines.append(
|
|
||||||
f"pvc_node_health{{node=\"{node}\"}} {status_data['node_health'][node]['health']}"
|
|
||||||
)
|
|
||||||
|
|
||||||
output_lines.append("# HELP pvc_node_daemon_states PVC Node daemon state counts")
|
|
||||||
output_lines.append("# TYPE pvc_node_daemon_states gauge")
|
|
||||||
node_daemon_state_map = dict()
|
|
||||||
for state in set([s.split(",")[0] for s in pvc_common.node_state_combinations]):
|
|
||||||
node_daemon_state_map[state] = 0
|
|
||||||
for node in node_data:
|
|
||||||
node_daemon_state_map[node["daemon_state"]] += 1
|
|
||||||
for state in node_daemon_state_map:
|
|
||||||
output_lines.append(
|
|
||||||
f'pvc_node_daemon_states{{state="{state}"}} {node_daemon_state_map[state]}'
|
|
||||||
)
|
|
||||||
|
|
||||||
output_lines.append("# HELP pvc_node_domain_states PVC Node domain state counts")
|
|
||||||
output_lines.append("# TYPE pvc_node_domain_states gauge")
|
|
||||||
node_domain_state_map = dict()
|
|
||||||
for state in set([s.split(",")[1] for s in pvc_common.node_state_combinations]):
|
|
||||||
node_domain_state_map[state] = 0
|
|
||||||
for node in node_data:
|
|
||||||
node_domain_state_map[node["domain_state"]] += 1
|
|
||||||
for state in node_domain_state_map:
|
|
||||||
output_lines.append(
|
|
||||||
f'pvc_node_domain_states{{state="{state}"}} {node_domain_state_map[state]}'
|
|
||||||
)
|
|
||||||
|
|
||||||
output_lines.append("# HELP pvc_vm_states PVC VM state counts")
|
|
||||||
output_lines.append("# TYPE pvc_vm_states gauge")
|
|
||||||
vm_state_map = dict()
|
|
||||||
for state in set(pvc_common.vm_state_combinations):
|
|
||||||
vm_state_map[state] = 0
|
|
||||||
for vm in vm_data:
|
|
||||||
vm_state_map[vm["state"]] += 1
|
|
||||||
for state in vm_state_map:
|
|
||||||
output_lines.append(f'pvc_vm_states{{state="{state}"}} {vm_state_map[state]}')
|
|
||||||
|
|
||||||
output_lines.append("# HELP pvc_osd_up_states PVC OSD up state counts")
|
|
||||||
output_lines.append("# TYPE pvc_osd_up_states gauge")
|
|
||||||
osd_up_state_map = dict()
|
|
||||||
for state in set([s.split(",")[0] for s in pvc_common.ceph_osd_state_combinations]):
|
|
||||||
osd_up_state_map[state] = 0
|
|
||||||
for osd in osd_data:
|
|
||||||
if osd["up"] == "up":
|
|
||||||
osd_up_state_map["up"] += 1
|
|
||||||
else:
|
|
||||||
osd_up_state_map["down"] += 1
|
|
||||||
for state in osd_up_state_map:
|
|
||||||
output_lines.append(
|
|
||||||
f'pvc_osd_up_states{{state="{state}"}} {osd_up_state_map[state]}'
|
|
||||||
)
|
|
||||||
|
|
||||||
output_lines.append("# HELP pvc_osd_in_states PVC OSD in state counts")
|
|
||||||
output_lines.append("# TYPE pvc_osd_in_states gauge")
|
|
||||||
osd_in_state_map = dict()
|
|
||||||
for state in set([s.split(",")[1] for s in pvc_common.ceph_osd_state_combinations]):
|
|
||||||
osd_in_state_map[state] = 0
|
|
||||||
for osd in osd_data:
|
|
||||||
if osd["in"] == "in":
|
|
||||||
osd_in_state_map["in"] += 1
|
|
||||||
else:
|
|
||||||
osd_in_state_map["out"] += 1
|
|
||||||
for state in osd_in_state_map:
|
|
||||||
output_lines.append(
|
|
||||||
f'pvc_osd_in_states{{state="{state}"}} {osd_in_state_map[state]}'
|
|
||||||
)
|
|
||||||
|
|
||||||
output_lines.append("# HELP pvc_nodes PVC Node count")
|
|
||||||
output_lines.append("# TYPE pvc_nodes gauge")
|
|
||||||
output_lines.append(f"pvc_nodes {status_data['nodes']['total']}")
|
|
||||||
|
|
||||||
output_lines.append("# HELP pvc_vms PVC VM count")
|
|
||||||
output_lines.append("# TYPE pvc_vms gauge")
|
|
||||||
output_lines.append(f"pvc_vms {status_data['vms']['total']}")
|
|
||||||
|
|
||||||
output_lines.append("# HELP pvc_osds PVC OSD count")
|
|
||||||
output_lines.append("# TYPE pvc_osds gauge")
|
|
||||||
output_lines.append(f"pvc_osds {status_data['osds']['total']}")
|
|
||||||
|
|
||||||
output_lines.append("# HELP pvc_networks PVC Network count")
|
|
||||||
output_lines.append("# TYPE pvc_networks gauge")
|
|
||||||
output_lines.append(f"pvc_networks {status_data['networks']}")
|
|
||||||
|
|
||||||
output_lines.append("# HELP pvc_pools PVC Storage Pool count")
|
|
||||||
output_lines.append("# TYPE pvc_pools gauge")
|
|
||||||
output_lines.append(f"pvc_pools {status_data['pools']}")
|
|
||||||
|
|
||||||
output_lines.append("# HELP pvc_volumes PVC Storage Volume count")
|
|
||||||
output_lines.append("# TYPE pvc_volumes gauge")
|
|
||||||
output_lines.append(f"pvc_volumes {status_data['volumes']}")
|
|
||||||
|
|
||||||
output_lines.append("# HELP pvc_snapshots PVC Storage Snapshot count")
|
|
||||||
output_lines.append("# TYPE pvc_snapshots gauge")
|
|
||||||
output_lines.append(f"pvc_snapshots {status_data['snapshots']}")
|
|
||||||
|
|
||||||
return "\n".join(output_lines) + "\n", 200
|
|
||||||
|
|
||||||
|
|
||||||
@pvc_common.Profiler(config)
|
@pvc_common.Profiler(config)
|
||||||
|
@ -249,6 +249,8 @@ def getOutputColours(node_information):
|
|||||||
daemon_state_colour = ansiprint.yellow()
|
daemon_state_colour = ansiprint.yellow()
|
||||||
elif node_information["daemon_state"] == "dead":
|
elif node_information["daemon_state"] == "dead":
|
||||||
daemon_state_colour = ansiprint.red() + ansiprint.bold()
|
daemon_state_colour = ansiprint.red() + ansiprint.bold()
|
||||||
|
elif node_information["daemon_state"] == "fenced":
|
||||||
|
daemon_state_colour = ansiprint.red()
|
||||||
else:
|
else:
|
||||||
daemon_state_colour = ansiprint.blue()
|
daemon_state_colour = ansiprint.blue()
|
||||||
|
|
||||||
|
@ -1659,24 +1659,26 @@ def format_info(config, domain_information, long_output):
|
|||||||
)
|
)
|
||||||
|
|
||||||
if not domain_information.get("node_selector"):
|
if not domain_information.get("node_selector"):
|
||||||
formatted_node_selector = "False"
|
formatted_node_selector = "Default"
|
||||||
else:
|
else:
|
||||||
formatted_node_selector = domain_information["node_selector"]
|
formatted_node_selector = str(domain_information["node_selector"]).title()
|
||||||
|
|
||||||
if not domain_information.get("node_limit"):
|
if not domain_information.get("node_limit"):
|
||||||
formatted_node_limit = "False"
|
formatted_node_limit = "Any"
|
||||||
else:
|
else:
|
||||||
formatted_node_limit = ", ".join(domain_information["node_limit"])
|
formatted_node_limit = ", ".join(domain_information["node_limit"])
|
||||||
|
|
||||||
if not domain_information.get("node_autostart"):
|
if not domain_information.get("node_autostart"):
|
||||||
|
autostart_colour = ansiprint.blue()
|
||||||
formatted_node_autostart = "False"
|
formatted_node_autostart = "False"
|
||||||
else:
|
else:
|
||||||
formatted_node_autostart = domain_information["node_autostart"]
|
autostart_colour = ansiprint.green()
|
||||||
|
formatted_node_autostart = "True"
|
||||||
|
|
||||||
if not domain_information.get("migration_method"):
|
if not domain_information.get("migration_method"):
|
||||||
formatted_migration_method = "any"
|
formatted_migration_method = "Any"
|
||||||
else:
|
else:
|
||||||
formatted_migration_method = domain_information["migration_method"]
|
formatted_migration_method = str(domain_information["migration_method"]).title()
|
||||||
|
|
||||||
ainformation.append(
|
ainformation.append(
|
||||||
"{}Migration selector:{} {}".format(
|
"{}Migration selector:{} {}".format(
|
||||||
@ -1689,8 +1691,12 @@ def format_info(config, domain_information, long_output):
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
ainformation.append(
|
ainformation.append(
|
||||||
"{}Autostart:{} {}".format(
|
"{}Autostart:{} {}{}{}".format(
|
||||||
ansiprint.purple(), ansiprint.end(), formatted_node_autostart
|
ansiprint.purple(),
|
||||||
|
ansiprint.end(),
|
||||||
|
autostart_colour,
|
||||||
|
formatted_node_autostart,
|
||||||
|
ansiprint.end(),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
ainformation.append(
|
ainformation.append(
|
||||||
@ -1736,13 +1742,17 @@ def format_info(config, domain_information, long_output):
|
|||||||
domain_information["tags"], key=lambda t: t["type"] + t["name"]
|
domain_information["tags"], key=lambda t: t["type"] + t["name"]
|
||||||
):
|
):
|
||||||
ainformation.append(
|
ainformation.append(
|
||||||
" {tags_name: <{tags_name_length}} {tags_type: <{tags_type_length}} {tags_protected: <{tags_protected_length}}".format(
|
" {tags_name: <{tags_name_length}} {tags_type: <{tags_type_length}} {tags_protected_colour}{tags_protected: <{tags_protected_length}}{end}".format(
|
||||||
tags_name_length=tags_name_length,
|
tags_name_length=tags_name_length,
|
||||||
tags_type_length=tags_type_length,
|
tags_type_length=tags_type_length,
|
||||||
tags_protected_length=tags_protected_length,
|
tags_protected_length=tags_protected_length,
|
||||||
tags_name=tag["name"],
|
tags_name=tag["name"],
|
||||||
tags_type=tag["type"],
|
tags_type=tag["type"],
|
||||||
tags_protected=str(tag["protected"]),
|
tags_protected=str(tag["protected"]),
|
||||||
|
tags_protected_colour=ansiprint.green()
|
||||||
|
if tag["protected"]
|
||||||
|
else ansiprint.blue(),
|
||||||
|
end=ansiprint.end(),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
@ -2,7 +2,7 @@ from setuptools import setup
|
|||||||
|
|
||||||
setup(
|
setup(
|
||||||
name="pvc",
|
name="pvc",
|
||||||
version="0.9.85",
|
version="0.9.86",
|
||||||
packages=["pvc.cli", "pvc.lib"],
|
packages=["pvc.cli", "pvc.lib"],
|
||||||
install_requires=[
|
install_requires=[
|
||||||
"Click",
|
"Click",
|
||||||
|
@ -320,13 +320,18 @@ def get_list_osd(zkhandler, limit=None, is_fuzzy=True):
|
|||||||
#
|
#
|
||||||
def getPoolInformation(zkhandler, pool):
|
def getPoolInformation(zkhandler, pool):
|
||||||
# Parse the stats data
|
# Parse the stats data
|
||||||
pool_stats_raw = zkhandler.read(("pool.stats", pool))
|
(pool_stats_raw, tier, pgs,) = zkhandler.read_many(
|
||||||
|
[
|
||||||
|
("pool.stats", pool),
|
||||||
|
("pool.tier", pool),
|
||||||
|
("pool.pgs", pool),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
pool_stats = dict(json.loads(pool_stats_raw))
|
pool_stats = dict(json.loads(pool_stats_raw))
|
||||||
volume_count = len(getCephVolumes(zkhandler, pool))
|
volume_count = len(getCephVolumes(zkhandler, pool))
|
||||||
tier = zkhandler.read(("pool.tier", pool))
|
|
||||||
if tier is None:
|
if tier is None:
|
||||||
tier = "default"
|
tier = "default"
|
||||||
pgs = zkhandler.read(("pool.pgs", pool))
|
|
||||||
|
|
||||||
pool_information = {
|
pool_information = {
|
||||||
"name": pool,
|
"name": pool,
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
#
|
#
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
|
from distutils.util import strtobool
|
||||||
from json import loads
|
from json import loads
|
||||||
|
|
||||||
import daemon_lib.common as common
|
import daemon_lib.common as common
|
||||||
@ -240,7 +241,9 @@ def getNodeHealth(zkhandler, node_list):
|
|||||||
node_health_messages.append(f"'{entry['name']}': {entry['message']}")
|
node_health_messages.append(f"'{entry['name']}': {entry['message']}")
|
||||||
|
|
||||||
node_health_entry = {
|
node_health_entry = {
|
||||||
"health": node_health_value,
|
"health": int(node_health_value)
|
||||||
|
if isinstance(node_health_value, int)
|
||||||
|
else node_health_value,
|
||||||
"messages": node_health_messages,
|
"messages": node_health_messages,
|
||||||
}
|
}
|
||||||
node_health[node] = node_health_entry
|
node_health[node] = node_health_entry
|
||||||
@ -315,8 +318,8 @@ def getClusterInformation(zkhandler):
|
|||||||
for vidx, vm in enumerate(vm_list):
|
for vidx, vm in enumerate(vm_list):
|
||||||
# Split the large list of return values by the IDX of this VM
|
# Split the large list of return values by the IDX of this VM
|
||||||
# Each VM result is 2 field long
|
# Each VM result is 2 field long
|
||||||
pos_start = nidx * 2
|
pos_start = vidx * 2
|
||||||
pos_end = nidx * 2 + 2
|
pos_end = vidx * 2 + 2
|
||||||
vm_name, vm_state = tuple(all_vm_states[pos_start:pos_end])
|
vm_name, vm_state = tuple(all_vm_states[pos_start:pos_end])
|
||||||
vm_data.append(
|
vm_data.append(
|
||||||
{
|
{
|
||||||
@ -379,11 +382,21 @@ def getClusterInformation(zkhandler):
|
|||||||
ceph_pool_count = len(ceph_pool_list)
|
ceph_pool_count = len(ceph_pool_list)
|
||||||
|
|
||||||
# Get the list of Ceph volumes
|
# Get the list of Ceph volumes
|
||||||
ceph_volume_list = zkhandler.children("base.volume")
|
ceph_volume_list = list()
|
||||||
|
for pool in ceph_pool_list:
|
||||||
|
ceph_volume_list_pool = zkhandler.children(("volume", pool))
|
||||||
|
if ceph_volume_list_pool is not None:
|
||||||
|
ceph_volume_list += [f"{pool}/{volume}" for volume in ceph_volume_list_pool]
|
||||||
ceph_volume_count = len(ceph_volume_list)
|
ceph_volume_count = len(ceph_volume_list)
|
||||||
|
|
||||||
# Get the list of Ceph snapshots
|
# Get the list of Ceph snapshots
|
||||||
ceph_snapshot_list = zkhandler.children("base.snapshot")
|
ceph_snapshot_list = list()
|
||||||
|
for volume in ceph_volume_list:
|
||||||
|
ceph_snapshot_list_volume = zkhandler.children(("snapshot", volume))
|
||||||
|
if ceph_snapshot_list_volume is not None:
|
||||||
|
ceph_snapshot_list += [
|
||||||
|
f"{volume}@{snapshot}" for snapshot in ceph_snapshot_list_volume
|
||||||
|
]
|
||||||
ceph_snapshot_count = len(ceph_snapshot_list)
|
ceph_snapshot_count = len(ceph_snapshot_list)
|
||||||
|
|
||||||
# Get the list of faults
|
# Get the list of faults
|
||||||
@ -424,6 +437,157 @@ def get_info(zkhandler):
|
|||||||
return False, "ERROR: Failed to obtain cluster information!"
|
return False, "ERROR: Failed to obtain cluster information!"
|
||||||
|
|
||||||
|
|
||||||
|
def get_metrics(zkhandler):
|
||||||
|
# Get general cluster information
|
||||||
|
status_retflag, status_data = get_info(zkhandler)
|
||||||
|
if not status_retflag:
|
||||||
|
return False, "Error: Status data threw error"
|
||||||
|
|
||||||
|
faults_data = status_data["detail"]["faults"]
|
||||||
|
node_data = status_data["detail"]["node"]
|
||||||
|
vm_data = status_data["detail"]["vm"]
|
||||||
|
osd_data = status_data["detail"]["osd"]
|
||||||
|
|
||||||
|
output_lines = list()
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_info PVC cluster information")
|
||||||
|
output_lines.append("# TYPE pvc_info gauge")
|
||||||
|
output_lines.append(
|
||||||
|
f"pvc_info{{primary_node=\"{status_data['primary_node']}\", version=\"{status_data['pvc_version']}\", upstream_ip=\"{status_data['upstream_ip']}\"}} 1"
|
||||||
|
)
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_cluster_maintenance PVC cluster maintenance state")
|
||||||
|
output_lines.append("# TYPE pvc_cluster_maintenance gauge")
|
||||||
|
output_lines.append(
|
||||||
|
f"pvc_cluster_maintenance {1 if bool(strtobool(status_data['maintenance'])) else 0}"
|
||||||
|
)
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_cluster_health PVC cluster health status")
|
||||||
|
output_lines.append("# TYPE pvc_cluster_health gauge")
|
||||||
|
output_lines.append(f"pvc_cluster_health {status_data['cluster_health']['health']}")
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_cluster_faults PVC cluster new faults")
|
||||||
|
output_lines.append("# TYPE pvc_cluster_faults gauge")
|
||||||
|
fault_map = dict()
|
||||||
|
for fault_type in common.fault_state_combinations:
|
||||||
|
fault_map[fault_type] = 0
|
||||||
|
for fault in faults_data:
|
||||||
|
fault_map[fault["status"]] += 1
|
||||||
|
for fault_type in fault_map:
|
||||||
|
output_lines.append(
|
||||||
|
f'pvc_cluster_faults{{status="{fault_type}"}} {fault_map[fault_type]}'
|
||||||
|
)
|
||||||
|
|
||||||
|
# output_lines.append("# HELP pvc_cluster_faults PVC cluster health faults")
|
||||||
|
# output_lines.append("# TYPE pvc_cluster_faults gauge")
|
||||||
|
# for fault_msg in status_data["cluster_health"]["messages"]:
|
||||||
|
# output_lines.append(
|
||||||
|
# f"pvc_cluster_faults{{id=\"{fault_msg['id']}\", message=\"{fault_msg['text']}\"}} {fault_msg['health_delta']}"
|
||||||
|
# )
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_node_health PVC cluster node health status")
|
||||||
|
output_lines.append("# TYPE pvc_node_health gauge")
|
||||||
|
for node in status_data["node_health"]:
|
||||||
|
if isinstance(status_data["node_health"][node]["health"], int):
|
||||||
|
output_lines.append(
|
||||||
|
f"pvc_node_health{{node=\"{node}\"}} {status_data['node_health'][node]['health']}"
|
||||||
|
)
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_node_daemon_states PVC Node daemon state counts")
|
||||||
|
output_lines.append("# TYPE pvc_node_daemon_states gauge")
|
||||||
|
node_daemon_state_map = dict()
|
||||||
|
for state in set([s.split(",")[0] for s in common.node_state_combinations]):
|
||||||
|
node_daemon_state_map[state] = 0
|
||||||
|
for node in node_data:
|
||||||
|
node_daemon_state_map[node["daemon_state"]] += 1
|
||||||
|
for state in node_daemon_state_map:
|
||||||
|
output_lines.append(
|
||||||
|
f'pvc_node_daemon_states{{state="{state}"}} {node_daemon_state_map[state]}'
|
||||||
|
)
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_node_domain_states PVC Node domain state counts")
|
||||||
|
output_lines.append("# TYPE pvc_node_domain_states gauge")
|
||||||
|
node_domain_state_map = dict()
|
||||||
|
for state in set([s.split(",")[1] for s in common.node_state_combinations]):
|
||||||
|
node_domain_state_map[state] = 0
|
||||||
|
for node in node_data:
|
||||||
|
node_domain_state_map[node["domain_state"]] += 1
|
||||||
|
for state in node_domain_state_map:
|
||||||
|
output_lines.append(
|
||||||
|
f'pvc_node_domain_states{{state="{state}"}} {node_domain_state_map[state]}'
|
||||||
|
)
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_vm_states PVC VM state counts")
|
||||||
|
output_lines.append("# TYPE pvc_vm_states gauge")
|
||||||
|
vm_state_map = dict()
|
||||||
|
for state in set(common.vm_state_combinations):
|
||||||
|
vm_state_map[state] = 0
|
||||||
|
for vm in vm_data:
|
||||||
|
vm_state_map[vm["state"]] += 1
|
||||||
|
for state in vm_state_map:
|
||||||
|
output_lines.append(f'pvc_vm_states{{state="{state}"}} {vm_state_map[state]}')
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_osd_up_states PVC OSD up state counts")
|
||||||
|
output_lines.append("# TYPE pvc_osd_up_states gauge")
|
||||||
|
osd_up_state_map = dict()
|
||||||
|
for state in set([s.split(",")[0] for s in common.ceph_osd_state_combinations]):
|
||||||
|
osd_up_state_map[state] = 0
|
||||||
|
for osd in osd_data:
|
||||||
|
if osd["up"] == "up":
|
||||||
|
osd_up_state_map["up"] += 1
|
||||||
|
else:
|
||||||
|
osd_up_state_map["down"] += 1
|
||||||
|
for state in osd_up_state_map:
|
||||||
|
output_lines.append(
|
||||||
|
f'pvc_osd_up_states{{state="{state}"}} {osd_up_state_map[state]}'
|
||||||
|
)
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_osd_in_states PVC OSD in state counts")
|
||||||
|
output_lines.append("# TYPE pvc_osd_in_states gauge")
|
||||||
|
osd_in_state_map = dict()
|
||||||
|
for state in set([s.split(",")[1] for s in common.ceph_osd_state_combinations]):
|
||||||
|
osd_in_state_map[state] = 0
|
||||||
|
for osd in osd_data:
|
||||||
|
if osd["in"] == "in":
|
||||||
|
osd_in_state_map["in"] += 1
|
||||||
|
else:
|
||||||
|
osd_in_state_map["out"] += 1
|
||||||
|
for state in osd_in_state_map:
|
||||||
|
output_lines.append(
|
||||||
|
f'pvc_osd_in_states{{state="{state}"}} {osd_in_state_map[state]}'
|
||||||
|
)
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_nodes PVC Node count")
|
||||||
|
output_lines.append("# TYPE pvc_nodes gauge")
|
||||||
|
output_lines.append(f"pvc_nodes {status_data['nodes']['total']}")
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_vms PVC VM count")
|
||||||
|
output_lines.append("# TYPE pvc_vms gauge")
|
||||||
|
output_lines.append(f"pvc_vms {status_data['vms']['total']}")
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_osds PVC OSD count")
|
||||||
|
output_lines.append("# TYPE pvc_osds gauge")
|
||||||
|
output_lines.append(f"pvc_osds {status_data['osds']['total']}")
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_networks PVC Network count")
|
||||||
|
output_lines.append("# TYPE pvc_networks gauge")
|
||||||
|
output_lines.append(f"pvc_networks {status_data['networks']}")
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_pools PVC Storage Pool count")
|
||||||
|
output_lines.append("# TYPE pvc_pools gauge")
|
||||||
|
output_lines.append(f"pvc_pools {status_data['pools']}")
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_volumes PVC Storage Volume count")
|
||||||
|
output_lines.append("# TYPE pvc_volumes gauge")
|
||||||
|
output_lines.append(f"pvc_volumes {status_data['volumes']}")
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_snapshots PVC Storage Snapshot count")
|
||||||
|
output_lines.append("# TYPE pvc_snapshots gauge")
|
||||||
|
output_lines.append(f"pvc_snapshots {status_data['snapshots']}")
|
||||||
|
|
||||||
|
return True, "\n".join(output_lines) + "\n"
|
||||||
|
|
||||||
|
|
||||||
def cluster_initialize(zkhandler, overwrite=False):
|
def cluster_initialize(zkhandler, overwrite=False):
|
||||||
# Abort if we've initialized the cluster before
|
# Abort if we've initialized the cluster before
|
||||||
if zkhandler.exists("base.config.primary_node") and not overwrite:
|
if zkhandler.exists("base.config.primary_node") and not overwrite:
|
||||||
|
@ -401,13 +401,23 @@ def getDomainTags(zkhandler, dom_uuid):
|
|||||||
"""
|
"""
|
||||||
tags = list()
|
tags = list()
|
||||||
|
|
||||||
for tag in zkhandler.children(("domain.meta.tags", dom_uuid)):
|
all_tags = zkhandler.children(("domain.meta.tags", dom_uuid))
|
||||||
tag_type = zkhandler.read(("domain.meta.tags", dom_uuid, "tag.type", tag))
|
|
||||||
protected = bool(
|
tag_reads = list()
|
||||||
strtobool(
|
for tag in all_tags:
|
||||||
zkhandler.read(("domain.meta.tags", dom_uuid, "tag.protected", tag))
|
tag_reads += [
|
||||||
)
|
("domain.meta.tags", dom_uuid, "tag.type", tag),
|
||||||
)
|
("domain.meta.tags", dom_uuid, "tag.protected", tag),
|
||||||
|
]
|
||||||
|
all_tag_data = zkhandler.read_many(tag_reads)
|
||||||
|
|
||||||
|
for tidx, tag in enumerate(all_tags):
|
||||||
|
# Split the large list of return values by the IDX of this tag
|
||||||
|
# Each tag result is 2 fields long
|
||||||
|
pos_start = tidx * 2
|
||||||
|
pos_end = tidx * 2 + 2
|
||||||
|
tag_type, protected = tuple(all_tag_data[pos_start:pos_end])
|
||||||
|
protected = bool(strtobool(protected))
|
||||||
tags.append({"name": tag, "type": tag_type, "protected": protected})
|
tags.append({"name": tag, "type": tag_type, "protected": protected})
|
||||||
|
|
||||||
return tags
|
return tags
|
||||||
@ -422,19 +432,34 @@ def getDomainMetadata(zkhandler, dom_uuid):
|
|||||||
|
|
||||||
The UUID must be validated before calling this function!
|
The UUID must be validated before calling this function!
|
||||||
"""
|
"""
|
||||||
domain_node_limit = zkhandler.read(("domain.meta.node_limit", dom_uuid))
|
(
|
||||||
domain_node_selector = zkhandler.read(("domain.meta.node_selector", dom_uuid))
|
domain_node_limit,
|
||||||
domain_node_autostart = zkhandler.read(("domain.meta.autostart", dom_uuid))
|
domain_node_selector,
|
||||||
domain_migration_method = zkhandler.read(("domain.meta.migrate_method", dom_uuid))
|
domain_node_autostart,
|
||||||
|
domain_migration_method,
|
||||||
|
) = zkhandler.read_many(
|
||||||
|
[
|
||||||
|
("domain.meta.node_limit", dom_uuid),
|
||||||
|
("domain.meta.node_selector", dom_uuid),
|
||||||
|
("domain.meta.autostart", dom_uuid),
|
||||||
|
("domain.meta.migrate_method", dom_uuid),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
if not domain_node_limit:
|
if not domain_node_limit:
|
||||||
domain_node_limit = None
|
domain_node_limit = None
|
||||||
else:
|
else:
|
||||||
domain_node_limit = domain_node_limit.split(",")
|
domain_node_limit = domain_node_limit.split(",")
|
||||||
|
|
||||||
|
if not domain_node_selector or domain_node_selector == "none":
|
||||||
|
domain_node_selector = None
|
||||||
|
|
||||||
if not domain_node_autostart:
|
if not domain_node_autostart:
|
||||||
domain_node_autostart = None
|
domain_node_autostart = None
|
||||||
|
|
||||||
|
if not domain_migration_method or domain_migration_method == "none":
|
||||||
|
domain_migration_method = None
|
||||||
|
|
||||||
return (
|
return (
|
||||||
domain_node_limit,
|
domain_node_limit,
|
||||||
domain_node_selector,
|
domain_node_selector,
|
||||||
@ -451,10 +476,25 @@ def getInformationFromXML(zkhandler, uuid):
|
|||||||
Gather information about a VM from the Libvirt XML configuration in the Zookeper database
|
Gather information about a VM from the Libvirt XML configuration in the Zookeper database
|
||||||
and return a dict() containing it.
|
and return a dict() containing it.
|
||||||
"""
|
"""
|
||||||
domain_state = zkhandler.read(("domain.state", uuid))
|
(
|
||||||
domain_node = zkhandler.read(("domain.node", uuid))
|
domain_state,
|
||||||
domain_lastnode = zkhandler.read(("domain.last_node", uuid))
|
domain_node,
|
||||||
domain_failedreason = zkhandler.read(("domain.failed_reason", uuid))
|
domain_lastnode,
|
||||||
|
domain_failedreason,
|
||||||
|
domain_profile,
|
||||||
|
domain_vnc,
|
||||||
|
stats_data,
|
||||||
|
) = zkhandler.read_many(
|
||||||
|
[
|
||||||
|
("domain.state", uuid),
|
||||||
|
("domain.node", uuid),
|
||||||
|
("domain.last_node", uuid),
|
||||||
|
("domain.failed_reason", uuid),
|
||||||
|
("domain.profile", uuid),
|
||||||
|
("domain.console.vnc", uuid),
|
||||||
|
("domain.stats", uuid),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
(
|
(
|
||||||
domain_node_limit,
|
domain_node_limit,
|
||||||
@ -462,19 +502,17 @@ def getInformationFromXML(zkhandler, uuid):
|
|||||||
domain_node_autostart,
|
domain_node_autostart,
|
||||||
domain_migration_method,
|
domain_migration_method,
|
||||||
) = getDomainMetadata(zkhandler, uuid)
|
) = getDomainMetadata(zkhandler, uuid)
|
||||||
domain_tags = getDomainTags(zkhandler, uuid)
|
|
||||||
domain_profile = zkhandler.read(("domain.profile", uuid))
|
|
||||||
|
|
||||||
domain_vnc = zkhandler.read(("domain.console.vnc", uuid))
|
domain_tags = getDomainTags(zkhandler, uuid)
|
||||||
|
|
||||||
if domain_vnc:
|
if domain_vnc:
|
||||||
domain_vnc_listen, domain_vnc_port = domain_vnc.split(":")
|
domain_vnc_listen, domain_vnc_port = domain_vnc.split(":")
|
||||||
else:
|
else:
|
||||||
domain_vnc_listen = "None"
|
domain_vnc_listen = None
|
||||||
domain_vnc_port = "None"
|
domain_vnc_port = None
|
||||||
|
|
||||||
parsed_xml = getDomainXML(zkhandler, uuid)
|
parsed_xml = getDomainXML(zkhandler, uuid)
|
||||||
|
|
||||||
stats_data = zkhandler.read(("domain.stats", uuid))
|
|
||||||
if stats_data is not None:
|
if stats_data is not None:
|
||||||
try:
|
try:
|
||||||
stats_data = loads(stats_data)
|
stats_data = loads(stats_data)
|
||||||
@ -491,6 +529,7 @@ def getInformationFromXML(zkhandler, uuid):
|
|||||||
domain_vcpu,
|
domain_vcpu,
|
||||||
domain_vcputopo,
|
domain_vcputopo,
|
||||||
) = getDomainMainDetails(parsed_xml)
|
) = getDomainMainDetails(parsed_xml)
|
||||||
|
|
||||||
domain_networks = getDomainNetworks(parsed_xml, stats_data)
|
domain_networks = getDomainNetworks(parsed_xml, stats_data)
|
||||||
|
|
||||||
(
|
(
|
||||||
|
@ -71,7 +71,7 @@ def getNodeHealthDetails(zkhandler, node_name, node_health_plugins):
|
|||||||
) = tuple(all_plugin_data[pos_start:pos_end])
|
) = tuple(all_plugin_data[pos_start:pos_end])
|
||||||
plugin_output = {
|
plugin_output = {
|
||||||
"name": plugin,
|
"name": plugin,
|
||||||
"last_run": int(plugin_last_run),
|
"last_run": int(plugin_last_run) if plugin_last_run is not None else None,
|
||||||
"health_delta": int(plugin_health_delta),
|
"health_delta": int(plugin_health_delta),
|
||||||
"message": plugin_message,
|
"message": plugin_message,
|
||||||
"data": json.loads(plugin_data),
|
"data": json.loads(plugin_data),
|
||||||
@ -334,6 +334,8 @@ def get_list(
|
|||||||
):
|
):
|
||||||
node_list = []
|
node_list = []
|
||||||
full_node_list = zkhandler.children("base.node")
|
full_node_list = zkhandler.children("base.node")
|
||||||
|
if full_node_list is None:
|
||||||
|
full_node_list = list()
|
||||||
full_node_list.sort()
|
full_node_list.sort()
|
||||||
|
|
||||||
if is_fuzzy and limit:
|
if is_fuzzy and limit:
|
||||||
|
11
debian/changelog
vendored
@ -1,3 +1,14 @@
|
|||||||
|
pvc (0.9.86-0) unstable; urgency=high
|
||||||
|
|
||||||
|
* [API Daemon] Significantly improves the performance of several commands via async Zookeeper calls and removal of superfluous backend calls.
|
||||||
|
* [Docs] Improves the project README and updates screenshot images to show the current output and more functionality.
|
||||||
|
* [API Daemon/CLI] Corrects some bugs in VM metainformation output.
|
||||||
|
* [Node Daemon] Fixes resource reporting bugs from 0.9.81 and properly clears node resource numbers on a fence.
|
||||||
|
* [Health Daemon] Adds a wait during pvchealthd startup until the node is in run state, to avoid erroneous faults during node bootup.
|
||||||
|
* [API Daemon] Fixes an incorrect reference to legacy pvcapid.yaml file in migration script.
|
||||||
|
|
||||||
|
-- Joshua M. Boniface <joshua@boniface.me> Thu, 14 Dec 2023 14:46:29 -0500
|
||||||
|
|
||||||
pvc (0.9.85-0) unstable; urgency=high
|
pvc (0.9.85-0) unstable; urgency=high
|
||||||
|
|
||||||
* [Packaging] Fixes a dependency bug introduced in 0.9.84
|
* [Packaging] Fixes a dependency bug introduced in 0.9.84
|
||||||
|
@ -33,7 +33,7 @@ import os
|
|||||||
import signal
|
import signal
|
||||||
|
|
||||||
# Daemon version
|
# Daemon version
|
||||||
version = "0.9.85"
|
version = "0.9.86"
|
||||||
|
|
||||||
|
|
||||||
##########################################################
|
##########################################################
|
||||||
@ -80,6 +80,11 @@ def entrypoint():
|
|||||||
# Connect to Zookeeper and return our handler and current schema version
|
# Connect to Zookeeper and return our handler and current schema version
|
||||||
zkhandler, _ = pvchealthd.util.zookeeper.connect(logger, config)
|
zkhandler, _ = pvchealthd.util.zookeeper.connect(logger, config)
|
||||||
|
|
||||||
|
logger.out("Waiting for node daemon to be operating", state="s")
|
||||||
|
while zkhandler.read(("node.state.daemon", config["node_hostname"])) != "run":
|
||||||
|
sleep(5)
|
||||||
|
logger.out("Node daemon in run state, continuing health daemon startup", state="s")
|
||||||
|
|
||||||
# Define a cleanup function
|
# Define a cleanup function
|
||||||
def cleanup(failure=False):
|
def cleanup(failure=False):
|
||||||
nonlocal logger, zkhandler, monitoring_instance
|
nonlocal logger, zkhandler, monitoring_instance
|
||||||
|
BIN
images/11-prometheus-grafana.png
Normal file
After Width: | Height: | Size: 168 KiB |
Before Width: | Height: | Size: 115 KiB After Width: | Height: | Size: 140 KiB |
Before Width: | Height: | Size: 94 KiB After Width: | Height: | Size: 109 KiB |
Before Width: | Height: | Size: 126 KiB After Width: | Height: | Size: 136 KiB |
@ -70,7 +70,7 @@ def check_pvc(item, params, section):
|
|||||||
summary = f"Cluster health is {cluster_health}% (maintenance {maintenance})"
|
summary = f"Cluster health is {cluster_health}% (maintenance {maintenance})"
|
||||||
|
|
||||||
if len(cluster_messages) > 0:
|
if len(cluster_messages) > 0:
|
||||||
details = ", ".join(cluster_messages)
|
details = ", ".join([m["text"] for m in cluster_messages])
|
||||||
|
|
||||||
if cluster_health <= 50 and maintenance == "off":
|
if cluster_health <= 50 and maintenance == "off":
|
||||||
state = State.CRIT
|
state = State.CRIT
|
||||||
|
@ -2555,7 +2555,9 @@
|
|||||||
],
|
],
|
||||||
"refresh": "5s",
|
"refresh": "5s",
|
||||||
"schemaVersion": 38,
|
"schemaVersion": 38,
|
||||||
"tags": [],
|
"tags": [
|
||||||
|
"pvc"
|
||||||
|
],
|
||||||
"templating": {
|
"templating": {
|
||||||
"list": [
|
"list": [
|
||||||
{
|
{
|
||||||
@ -2592,6 +2594,6 @@
|
|||||||
"timezone": "",
|
"timezone": "",
|
||||||
"title": "PVC Cluster",
|
"title": "PVC Cluster",
|
||||||
"uid": "fbddd9f9-aadb-4c97-8aea-57c29e5de234",
|
"uid": "fbddd9f9-aadb-4c97-8aea-57c29e5de234",
|
||||||
"version": 56,
|
"version": 57,
|
||||||
"weekStart": ""
|
"weekStart": ""
|
||||||
}
|
}
|
@ -48,7 +48,7 @@ import re
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
# Daemon version
|
# Daemon version
|
||||||
version = "0.9.85"
|
version = "0.9.86"
|
||||||
|
|
||||||
|
|
||||||
##########################################################
|
##########################################################
|
||||||
|
@ -115,6 +115,27 @@ def fence_node(node_name, zkhandler, config, logger):
|
|||||||
):
|
):
|
||||||
migrateFromFencedNode(zkhandler, node_name, config, logger)
|
migrateFromFencedNode(zkhandler, node_name, config, logger)
|
||||||
|
|
||||||
|
# Reset all node resource values
|
||||||
|
logger.out(
|
||||||
|
f"Resetting all resource values for dead node {node_name} to zero",
|
||||||
|
state="i",
|
||||||
|
prefix=f"fencing {node_name}",
|
||||||
|
)
|
||||||
|
zkhandler.write(
|
||||||
|
[
|
||||||
|
(("node.running_domains", node_name), "0"),
|
||||||
|
(("node.count.provisioned_domains", node_name), "0"),
|
||||||
|
(("node.cpu.load", node_name), "0"),
|
||||||
|
(("node.vcpu.allocated", node_name), "0"),
|
||||||
|
(("node.memory.total", node_name), "0"),
|
||||||
|
(("node.memory.used", node_name), "0"),
|
||||||
|
(("node.memory.free", node_name), "0"),
|
||||||
|
(("node.memory.allocated", node_name), "0"),
|
||||||
|
(("node.memory.provisioned", node_name), "0"),
|
||||||
|
(("node.monitoring.health", node_name), None),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# Migrate hosts away from a fenced node
|
# Migrate hosts away from a fenced node
|
||||||
def migrateFromFencedNode(zkhandler, node_name, config, logger):
|
def migrateFromFencedNode(zkhandler, node_name, config, logger):
|
||||||
|
@ -477,6 +477,10 @@ def collect_vm_stats(logger, config, zkhandler, this_node, queue):
|
|||||||
fixed_d_domain = this_node.d_domain.copy()
|
fixed_d_domain = this_node.d_domain.copy()
|
||||||
for domain, instance in fixed_d_domain.items():
|
for domain, instance in fixed_d_domain.items():
|
||||||
if domain in this_node.domain_list:
|
if domain in this_node.domain_list:
|
||||||
|
# Add the allocated memory to our memalloc value
|
||||||
|
memalloc += instance.getmemory()
|
||||||
|
memprov += instance.getmemory()
|
||||||
|
vcpualloc += instance.getvcpus()
|
||||||
if instance.getstate() == "start" and instance.getnode() == this_node.name:
|
if instance.getstate() == "start" and instance.getnode() == this_node.name:
|
||||||
if instance.getdom() is not None:
|
if instance.getdom() is not None:
|
||||||
try:
|
try:
|
||||||
@ -532,11 +536,6 @@ def collect_vm_stats(logger, config, zkhandler, this_node, queue):
|
|||||||
continue
|
continue
|
||||||
domain_memory_stats = domain.memoryStats()
|
domain_memory_stats = domain.memoryStats()
|
||||||
domain_cpu_stats = domain.getCPUStats(True)[0]
|
domain_cpu_stats = domain.getCPUStats(True)[0]
|
||||||
|
|
||||||
# Add the allocated memory to our memalloc value
|
|
||||||
memalloc += instance.getmemory()
|
|
||||||
memprov += instance.getmemory()
|
|
||||||
vcpualloc += instance.getvcpus()
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if debug:
|
if debug:
|
||||||
try:
|
try:
|
||||||
@ -701,7 +700,7 @@ def node_keepalive(logger, config, zkhandler, this_node):
|
|||||||
|
|
||||||
runtime_start = datetime.now()
|
runtime_start = datetime.now()
|
||||||
logger.out(
|
logger.out(
|
||||||
"Starting node keepalive run at {datetime.now()}",
|
f"Starting node keepalive run at {datetime.now()}",
|
||||||
state="t",
|
state="t",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -44,7 +44,7 @@ from daemon_lib.vmbuilder import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Daemon version
|
# Daemon version
|
||||||
version = "0.9.85"
|
version = "0.9.86"
|
||||||
|
|
||||||
|
|
||||||
config = cfg.get_configuration()
|
config = cfg.get_configuration()
|
||||||
|