Improve node utilization metrics and fix bugs
This commit is contained in:
parent
3e4cc53fdd
commit
9604f655d0
|
@ -498,8 +498,7 @@ def get_health_metrics(zkhandler):
|
||||||
output_lines.append("# TYPE pvc_node_health gauge")
|
output_lines.append("# TYPE pvc_node_health gauge")
|
||||||
for node in status_data["node_health"]:
|
for node in status_data["node_health"]:
|
||||||
node_health = status_data["node_health"][node]["health"]
|
node_health = status_data["node_health"][node]["health"]
|
||||||
print(node_health)
|
if isinstance(node_health, (int, float)):
|
||||||
if isinstance(node_health, int):
|
|
||||||
output_lines.append(f'pvc_node_health{{node="{node}"}} {node_health}')
|
output_lines.append(f'pvc_node_health{{node="{node}"}} {node_health}')
|
||||||
|
|
||||||
output_lines.append("# HELP pvc_node_daemon_states PVC Node daemon state counts")
|
output_lines.append("# HELP pvc_node_daemon_states PVC Node daemon state counts")
|
||||||
|
@ -630,6 +629,7 @@ def get_resource_metrics(zkhandler):
|
||||||
all_total_speed = 0
|
all_total_speed = 0
|
||||||
all_total_util = 0
|
all_total_util = 0
|
||||||
all_total_count = 0
|
all_total_count = 0
|
||||||
|
per_node_network_utilization = dict()
|
||||||
for node in node_data:
|
for node in node_data:
|
||||||
if node["daemon_state"] != "run":
|
if node["daemon_state"] != "run":
|
||||||
continue
|
continue
|
||||||
|
@ -652,13 +652,17 @@ def get_resource_metrics(zkhandler):
|
||||||
|
|
||||||
if total_count > 0:
|
if total_count > 0:
|
||||||
# Average the speed and util by the count
|
# Average the speed and util by the count
|
||||||
avg_speed = int(total_speed / total_count)
|
avg_speed = float(total_speed / total_count)
|
||||||
all_total_speed += avg_speed
|
all_total_speed += avg_speed
|
||||||
avg_util = int(total_util / total_count)
|
avg_util = float(total_util / total_count)
|
||||||
all_total_util += avg_util
|
all_total_util += avg_util
|
||||||
|
|
||||||
all_total_count += 1
|
all_total_count += 1
|
||||||
|
|
||||||
|
per_node_network_utilization[node["name"]] = avg_util / avg_speed * 100
|
||||||
|
else:
|
||||||
|
per_node_network_utilization[node["name"]] = 0.0
|
||||||
|
|
||||||
if all_total_count > 0:
|
if all_total_count > 0:
|
||||||
all_avg_speed = all_total_speed / all_total_count
|
all_avg_speed = all_total_speed / all_total_count
|
||||||
all_avg_util = all_total_util / all_total_count
|
all_avg_util = all_total_util / all_total_count
|
||||||
|
@ -678,7 +682,7 @@ def get_resource_metrics(zkhandler):
|
||||||
n["cpu_count"]
|
n["cpu_count"]
|
||||||
for n in sorted(node_data, key=lambda n: n["cpu_count"], reverse=False)
|
for n in sorted(node_data, key=lambda n: n["cpu_count"], reverse=False)
|
||||||
]
|
]
|
||||||
total_cpu = sum(node_sorted_cpu[:-2])
|
total_cpu = sum(node_sorted_cpu[:-1])
|
||||||
used_cpu = sum([n["load"] for n in node_data])
|
used_cpu = sum([n["load"] for n in node_data])
|
||||||
used_cpu_percentage = used_cpu / total_cpu * 100
|
used_cpu_percentage = used_cpu / total_cpu * 100
|
||||||
output_lines.append(f"pvc_cluster_cpu_utilization {used_cpu_percentage:2.2f}")
|
output_lines.append(f"pvc_cluster_cpu_utilization {used_cpu_percentage:2.2f}")
|
||||||
|
@ -695,7 +699,7 @@ def get_resource_metrics(zkhandler):
|
||||||
n["memory"]["total"]
|
n["memory"]["total"]
|
||||||
for n in sorted(node_data, key=lambda n: n["memory"]["total"], reverse=False)
|
for n in sorted(node_data, key=lambda n: n["memory"]["total"], reverse=False)
|
||||||
]
|
]
|
||||||
total_memory = sum(node_sorted_memory[:-2])
|
total_memory = sum(node_sorted_memory[:-1])
|
||||||
|
|
||||||
used_memory = sum([n["memory"]["used"] for n in node_data])
|
used_memory = sum([n["memory"]["used"] for n in node_data])
|
||||||
used_memory_percentage = used_memory / total_memory * 100
|
used_memory_percentage = used_memory / total_memory * 100
|
||||||
|
@ -750,7 +754,9 @@ def get_resource_metrics(zkhandler):
|
||||||
output_lines.append("# TYPE pvc_node_host_cpus gauge")
|
output_lines.append("# TYPE pvc_node_host_cpus gauge")
|
||||||
for node in node_data:
|
for node in node_data:
|
||||||
total_cpus = (
|
total_cpus = (
|
||||||
node["vcpu"]["total"] if isinstance(node["vcpu"]["total"], int) else 0
|
node["vcpu"]["total"]
|
||||||
|
if isinstance(node["vcpu"]["total"], (int, float))
|
||||||
|
else 0
|
||||||
)
|
)
|
||||||
output_lines.append(
|
output_lines.append(
|
||||||
f"pvc_node_host_cpus{{node=\"{node['name']}\"}} {total_cpus}"
|
f"pvc_node_host_cpus{{node=\"{node['name']}\"}} {total_cpus}"
|
||||||
|
@ -761,7 +767,7 @@ def get_resource_metrics(zkhandler):
|
||||||
for node in node_data:
|
for node in node_data:
|
||||||
allocated_cpus = (
|
allocated_cpus = (
|
||||||
node["vcpu"]["allocated"]
|
node["vcpu"]["allocated"]
|
||||||
if isinstance(node["vcpu"]["allocated"], int)
|
if isinstance(node["vcpu"]["allocated"], (int, float))
|
||||||
else 0
|
else 0
|
||||||
)
|
)
|
||||||
output_lines.append(
|
output_lines.append(
|
||||||
|
@ -771,16 +777,33 @@ def get_resource_metrics(zkhandler):
|
||||||
output_lines.append("# HELP pvc_node_load PVC node 1 minute load average")
|
output_lines.append("# HELP pvc_node_load PVC node 1 minute load average")
|
||||||
output_lines.append("# TYPE pvc_node_load gauge")
|
output_lines.append("# TYPE pvc_node_load gauge")
|
||||||
for node in node_data:
|
for node in node_data:
|
||||||
load_average = node["load"] if isinstance(node["load"], float) else 0.0
|
load_average = node["load"] if isinstance(node["load"], (int, float)) else 0.0
|
||||||
output_lines.append(
|
output_lines.append(
|
||||||
f"pvc_node_load_average{{node=\"{node['name']}\"}} {load_average}"
|
f"pvc_node_load_average{{node=\"{node['name']}\"}} {load_average}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_node_cpu_utilization PVC node CPU utilization")
|
||||||
|
output_lines.append("# TYPE pvc_node_cpu_utilization gauge")
|
||||||
|
for node in node_data:
|
||||||
|
load_average = node["load"] if isinstance(node["load"], (int, float)) else 0.0
|
||||||
|
cpu_count = (
|
||||||
|
node["cpu_count"] if isinstance(node["cpu_count"], (int, float)) else 0
|
||||||
|
)
|
||||||
|
if cpu_count > 0:
|
||||||
|
used_cpu_percentage = load_average / cpu_count * 100
|
||||||
|
else:
|
||||||
|
used_cpu_percentage = 0.0
|
||||||
|
output_lines.append(
|
||||||
|
f"pvc_node_cpu_utilization{{node=\"{node['name']}\"}} {used_cpu_percentage:2.2f}"
|
||||||
|
)
|
||||||
|
|
||||||
output_lines.append("# HELP pvc_node_domains_count PVC node running domain count")
|
output_lines.append("# HELP pvc_node_domains_count PVC node running domain count")
|
||||||
output_lines.append("# TYPE pvc_node_domains_count gauge")
|
output_lines.append("# TYPE pvc_node_domains_count gauge")
|
||||||
for node in node_data:
|
for node in node_data:
|
||||||
running_domains_count = (
|
running_domains_count = (
|
||||||
node["domains_count"] if isinstance(node["domains_count"], int) else 0
|
node["domains_count"]
|
||||||
|
if isinstance(node["domains_count"], (int, float))
|
||||||
|
else 0
|
||||||
)
|
)
|
||||||
output_lines.append(
|
output_lines.append(
|
||||||
f"pvc_node_domains_count{{node=\"{node['name']}\"}} {running_domains_count}"
|
f"pvc_node_domains_count{{node=\"{node['name']}\"}} {running_domains_count}"
|
||||||
|
@ -802,11 +825,71 @@ def get_resource_metrics(zkhandler):
|
||||||
f"pvc_node_kernel{{node=\"{node['name']}\",kernel=\"{kernel}\"}} 1"
|
f"pvc_node_kernel{{node=\"{node['name']}\",kernel=\"{kernel}\"}} 1"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
output_lines.append(
|
||||||
|
"# HELP pvc_node_network_traffic_rx PVC node received network traffic"
|
||||||
|
)
|
||||||
|
output_lines.append("# TYPE pvc_node_network_traffic_rx gauge")
|
||||||
|
for node in node_data:
|
||||||
|
rx_bps = 0
|
||||||
|
for interface in node["interfaces"].keys():
|
||||||
|
rx_bps += node["interfaces"][interface]["rx_bps"]
|
||||||
|
output_lines.append(
|
||||||
|
f"pvc_node_network_traffic_rx{{node=\"{node['name']}\"}} {rx_bps:2.2f}"
|
||||||
|
)
|
||||||
|
|
||||||
|
output_lines.append(
|
||||||
|
"# HELP pvc_node_network_traffic_tx PVC node transmitted network traffic"
|
||||||
|
)
|
||||||
|
output_lines.append("# TYPE pvc_node_network_traffic_tx gauge")
|
||||||
|
for node in node_data:
|
||||||
|
tx_bps = 0
|
||||||
|
for interface in node["interfaces"].keys():
|
||||||
|
tx_bps += node["interfaces"][interface]["tx_bps"]
|
||||||
|
output_lines.append(
|
||||||
|
f"pvc_node_network_traffic_tx{{node=\"{node['name']}\"}} {tx_bps:2.2f}"
|
||||||
|
)
|
||||||
|
|
||||||
|
output_lines.append(
|
||||||
|
"# HELP pvc_node_network_packets_rx PVC node received network packets"
|
||||||
|
)
|
||||||
|
output_lines.append("# TYPE pvc_node_network_packets_rx gauge")
|
||||||
|
for node in node_data:
|
||||||
|
rx_pps = 0
|
||||||
|
for interface in node["interfaces"].keys():
|
||||||
|
rx_pps += node["interfaces"][interface]["rx_pps"]
|
||||||
|
output_lines.append(
|
||||||
|
f"pvc_node_network_packets_rx{{node=\"{node['name']}\"}} {rx_pps:2.2f}"
|
||||||
|
)
|
||||||
|
|
||||||
|
output_lines.append(
|
||||||
|
"# HELP pvc_node_network_packets_tx PVC node transmitted network packets"
|
||||||
|
)
|
||||||
|
output_lines.append("# TYPE pvc_node_network_packets_tx gauge")
|
||||||
|
for node in node_data:
|
||||||
|
tx_pps = 0
|
||||||
|
for interface in node["interfaces"].keys():
|
||||||
|
tx_pps += node["interfaces"][interface]["tx_pps"]
|
||||||
|
output_lines.append(
|
||||||
|
f"pvc_node_network_packets_tx{{node=\"{node['name']}\"}} {tx_pps:2.2f}"
|
||||||
|
)
|
||||||
|
|
||||||
|
output_lines.append(
|
||||||
|
"# HELP pvc_node_network_utilization PVC node network utilization percentage"
|
||||||
|
)
|
||||||
|
output_lines.append("# TYPE pvc_node_network_utilization gauge")
|
||||||
|
for node in node_data:
|
||||||
|
used_network_percentage = per_node_network_utilization[node["name"]]
|
||||||
|
output_lines.append(
|
||||||
|
f"pvc_node_network_utilization{{node=\"{node['name']}\"}} {used_network_percentage:2.2f}"
|
||||||
|
)
|
||||||
|
|
||||||
output_lines.append("# HELP pvc_node_total_memory PVC node total memory in MB")
|
output_lines.append("# HELP pvc_node_total_memory PVC node total memory in MB")
|
||||||
output_lines.append("# TYPE pvc_node_total_memory gauge")
|
output_lines.append("# TYPE pvc_node_total_memory gauge")
|
||||||
for node in node_data:
|
for node in node_data:
|
||||||
total_memory = (
|
total_memory = (
|
||||||
node["memory"]["total"] if isinstance(node["memory"]["total"], int) else 0
|
node["memory"]["total"]
|
||||||
|
if isinstance(node["memory"]["total"], (int, float))
|
||||||
|
else 0
|
||||||
)
|
)
|
||||||
output_lines.append(
|
output_lines.append(
|
||||||
f"pvc_node_total_memory{{node=\"{node['name']}\"}} {total_memory}"
|
f"pvc_node_total_memory{{node=\"{node['name']}\"}} {total_memory}"
|
||||||
|
@ -819,13 +902,35 @@ def get_resource_metrics(zkhandler):
|
||||||
for node in node_data:
|
for node in node_data:
|
||||||
allocated_memory = (
|
allocated_memory = (
|
||||||
node["memory"]["allocated"]
|
node["memory"]["allocated"]
|
||||||
if isinstance(node["memory"]["allocated"], int)
|
if isinstance(node["memory"]["allocated"], (int, float))
|
||||||
else 0
|
else 0
|
||||||
)
|
)
|
||||||
output_lines.append(
|
output_lines.append(
|
||||||
f"pvc_node_allocated_memory{{node=\"{node['name']}\"}} {allocated_memory}"
|
f"pvc_node_allocated_memory{{node=\"{node['name']}\"}} {allocated_memory}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
output_lines.append(
|
||||||
|
"# HELP pvc_node_allocated_memory_utilization PVC node allocated memory utilization"
|
||||||
|
)
|
||||||
|
output_lines.append("# TYPE pvc_node_allocated_memory_utilization gauge")
|
||||||
|
for node in node_data:
|
||||||
|
allocated_memory = (
|
||||||
|
node["memory"]["allocated"]
|
||||||
|
if isinstance(node["memory"]["allocated"], (int, float))
|
||||||
|
else 0
|
||||||
|
)
|
||||||
|
total_memory = (
|
||||||
|
node["memory"]["total"]
|
||||||
|
if isinstance(node["memory"]["total"], (int, float))
|
||||||
|
else 0
|
||||||
|
)
|
||||||
|
allocated_memory_utilization = (
|
||||||
|
(allocated_memory / total_memory * 100) if total_memory > 0 else 0.0
|
||||||
|
)
|
||||||
|
output_lines.append(
|
||||||
|
f"pvc_node_allocated_memory_utilization{{node=\"{node['name']}\"}} {allocated_memory_utilization}"
|
||||||
|
)
|
||||||
|
|
||||||
output_lines.append(
|
output_lines.append(
|
||||||
"# HELP pvc_node_provisioned_memory PVC node provisioned memory in MB"
|
"# HELP pvc_node_provisioned_memory PVC node provisioned memory in MB"
|
||||||
)
|
)
|
||||||
|
@ -833,28 +938,76 @@ def get_resource_metrics(zkhandler):
|
||||||
for node in node_data:
|
for node in node_data:
|
||||||
provisioned_memory = (
|
provisioned_memory = (
|
||||||
node["memory"]["provisioned"]
|
node["memory"]["provisioned"]
|
||||||
if isinstance(node["memory"]["provisioned"], int)
|
if isinstance(node["memory"]["provisioned"], (int, float))
|
||||||
else 0
|
else 0
|
||||||
)
|
)
|
||||||
output_lines.append(
|
output_lines.append(
|
||||||
f"pvc_node_provisioned_memory{{node=\"{node['name']}\"}} {provisioned_memory}"
|
f"pvc_node_provisioned_memory{{node=\"{node['name']}\"}} {provisioned_memory}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
output_lines.append(
|
||||||
|
"# HELP pvc_node_provisioned_memory_utilization PVC node provisioned memory utilization"
|
||||||
|
)
|
||||||
|
output_lines.append("# TYPE pvc_node_provisioned_memory_utilization gauge")
|
||||||
|
for node in node_data:
|
||||||
|
provisioned_memory = (
|
||||||
|
node["memory"]["provisioned"]
|
||||||
|
if isinstance(node["memory"]["provisioned"], (int, float))
|
||||||
|
else 0
|
||||||
|
)
|
||||||
|
total_memory = (
|
||||||
|
node["memory"]["total"]
|
||||||
|
if isinstance(node["memory"]["total"], (int, float))
|
||||||
|
else 0
|
||||||
|
)
|
||||||
|
provisioned_memory_utilization = (
|
||||||
|
(provisioned_memory / total_memory * 100) if total_memory > 0 else 0.0
|
||||||
|
)
|
||||||
|
output_lines.append(
|
||||||
|
f"pvc_node_provisioned_memory_utilization{{node=\"{node['name']}\"}} {provisioned_memory_utilization}"
|
||||||
|
)
|
||||||
|
|
||||||
output_lines.append("# HELP pvc_node_used_memory PVC node used memory in MB")
|
output_lines.append("# HELP pvc_node_used_memory PVC node used memory in MB")
|
||||||
output_lines.append("# TYPE pvc_node_used_memory gauge")
|
output_lines.append("# TYPE pvc_node_used_memory gauge")
|
||||||
for node in node_data:
|
for node in node_data:
|
||||||
used_memory = (
|
used_memory = (
|
||||||
node["memory"]["used"] if isinstance(node["memory"]["used"], int) else 0
|
node["memory"]["used"]
|
||||||
|
if isinstance(node["memory"]["used"], (int, float))
|
||||||
|
else 0
|
||||||
)
|
)
|
||||||
output_lines.append(
|
output_lines.append(
|
||||||
f"pvc_node_used_memory{{node=\"{node['name']}\"}} {used_memory}"
|
f"pvc_node_used_memory{{node=\"{node['name']}\"}} {used_memory}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
output_lines.append(
|
||||||
|
"# HELP pvc_node_used_memory_utilization PVC node used memory utilization"
|
||||||
|
)
|
||||||
|
output_lines.append("# TYPE pvc_node_used_memory_utilization gauge")
|
||||||
|
for node in node_data:
|
||||||
|
used_memory = (
|
||||||
|
node["memory"]["used"]
|
||||||
|
if isinstance(node["memory"]["used"], (int, float))
|
||||||
|
else 0
|
||||||
|
)
|
||||||
|
total_memory = (
|
||||||
|
node["memory"]["total"]
|
||||||
|
if isinstance(node["memory"]["total"], (int, float))
|
||||||
|
else 0
|
||||||
|
)
|
||||||
|
used_memory_utilization = (
|
||||||
|
(used_memory / total_memory * 100) if total_memory > 0 else 0.0
|
||||||
|
)
|
||||||
|
output_lines.append(
|
||||||
|
f"pvc_node_used_memory_utilization{{node=\"{node['name']}\"}} {used_memory_utilization}"
|
||||||
|
)
|
||||||
|
|
||||||
output_lines.append("# HELP pvc_node_free_memory PVC node free memory in MB")
|
output_lines.append("# HELP pvc_node_free_memory PVC node free memory in MB")
|
||||||
output_lines.append("# TYPE pvc_node_free_memory gauge")
|
output_lines.append("# TYPE pvc_node_free_memory gauge")
|
||||||
for node in node_data:
|
for node in node_data:
|
||||||
free_memory = (
|
free_memory = (
|
||||||
node["memory"]["free"] if isinstance(node["memory"]["free"], int) else 0
|
node["memory"]["free"]
|
||||||
|
if isinstance(node["memory"]["free"], (int, float))
|
||||||
|
else 0
|
||||||
)
|
)
|
||||||
output_lines.append(
|
output_lines.append(
|
||||||
f"pvc_node_free_memory{{node=\"{node['name']}\"}} {free_memory}"
|
f"pvc_node_free_memory{{node=\"{node['name']}\"}} {free_memory}"
|
||||||
|
|
Loading…
Reference in New Issue