Compare commits

...

6 Commits

Author SHA1 Message Date
d0de4f1825 Update Grafana dashboard to overview
Adds resource utilization in addition to health.
2023-12-27 11:38:39 -05:00
494c20263d Move monitoring folder to top level 2023-12-27 11:37:49 -05:00
431ee69620 Use proper percentage for pool util 2023-12-27 10:03:00 -05:00
88f4d79d5a Handle invalid values on older Libvirt versions 2023-12-27 09:51:24 -05:00
84d22751d8 Fix bad JSON data handler 2023-12-27 09:43:37 -05:00
40ff005a09 Fix handling of Ceph OSD bytes 2023-12-26 12:43:51 -05:00
14 changed files with 6658 additions and 2646 deletions

View File

@ -8,7 +8,7 @@
ignore = W503, E501, F403, F405
extend-ignore = E203
# We exclude the Debian, migrations, and provisioner examples
exclude = debian,api-daemon/migrations/versions,api-daemon/provisioner/examples,node-daemon/monitoring
exclude = debian,monitoring,api-daemon/migrations/versions,api-daemon/provisioner/examples
# Set the max line length to 88 for Black
max-line-length = 88

View File

@ -123,13 +123,13 @@ def format_bytes_tohuman(databytes):
def format_bytes_fromhuman(datahuman):
if not re.search(r"[A-Za-z]+", datahuman):
dataunit = "B"
datasize = int(datahuman)
datasize = float(datahuman)
else:
dataunit = str(re.match(r"[0-9]+([A-Za-z])[iBb]*", datahuman).group(1))
datasize = int(re.match(r"([0-9]+)[A-Za-z]+", datahuman).group(1))
dataunit = str(re.match(r"[0-9\.]+([A-Za-z])[iBb]*", datahuman).group(1))
datasize = float(re.match(r"([0-9\.]+)[A-Za-z]+", datahuman).group(1))
if byte_unit_matrix.get(dataunit):
databytes = datasize * byte_unit_matrix[dataunit]
if byte_unit_matrix.get(dataunit.upper()):
databytes = int(datasize * byte_unit_matrix[dataunit.upper()])
return databytes
else:
return None
@ -155,7 +155,7 @@ def format_ops_fromhuman(datahuman):
# Trim off human-readable character
dataunit = datahuman[-1]
datasize = int(datahuman[:-1])
dataops = datasize * ops_unit_matrix[dataunit]
dataops = datasize * ops_unit_matrix[dataunit.upper()]
return "{}".format(dataops)

View File

@ -1106,30 +1106,6 @@ def get_resource_metrics(zkhandler):
f"pvc_vm_memory_stats_actual{{vm=\"{vm['name']}\"}} {actual_memory}"
)
output_lines.append("# HELP pvc_vm_memory_stats_unused PVC VM unused memory")
output_lines.append("# TYPE pvc_vm_memory_stats_unused gauge")
for vm in vm_data:
unused_memory = vm["memory_stats"]["unused"]
output_lines.append(
f"pvc_vm_memory_stats_unused{{vm=\"{vm['name']}\"}} {unused_memory}"
)
output_lines.append("# HELP pvc_vm_memory_stats_available PVC VM available memory")
output_lines.append("# TYPE pvc_vm_memory_stats_available gauge")
for vm in vm_data:
available_memory = vm["memory_stats"]["available"]
output_lines.append(
f"pvc_vm_memory_stats_available{{vm=\"{vm['name']}\"}} {available_memory}"
)
output_lines.append("# HELP pvc_vm_memory_stats_usable PVC VM usable memory")
output_lines.append("# TYPE pvc_vm_memory_stats_usable gauge")
for vm in vm_data:
usable_memory = vm["memory_stats"]["usable"]
output_lines.append(
f"pvc_vm_memory_stats_usable{{vm=\"{vm['name']}\"}} {usable_memory}"
)
output_lines.append("# HELP pvc_vm_memory_stats_rss PVC VM RSS memory")
output_lines.append("# TYPE pvc_vm_memory_stats_rss gauge")
for vm in vm_data:
@ -1138,12 +1114,52 @@ def get_resource_metrics(zkhandler):
f"pvc_vm_memory_stats_rss{{vm=\"{vm['name']}\"}} {rss_memory}"
)
output_lines.append("# HELP pvc_vm_memory_stats_unused PVC VM unused memory")
output_lines.append("# TYPE pvc_vm_memory_stats_unused gauge")
for vm in vm_data:
unused_memory = (
vm["memory_stats"]["unused"]
if vm["memory_stats"].get("unused") is not None
else 0
)
output_lines.append(
f"pvc_vm_memory_stats_unused{{vm=\"{vm['name']}\"}} {unused_memory}"
)
output_lines.append("# HELP pvc_vm_memory_stats_available PVC VM available memory")
output_lines.append("# TYPE pvc_vm_memory_stats_available gauge")
for vm in vm_data:
available_memory = (
vm["memory_stats"]["available"]
if vm["memory_stats"].get("available") is not None
else 0
)
output_lines.append(
f"pvc_vm_memory_stats_available{{vm=\"{vm['name']}\"}} {available_memory}"
)
output_lines.append("# HELP pvc_vm_memory_stats_usable PVC VM usable memory")
output_lines.append("# TYPE pvc_vm_memory_stats_usable gauge")
for vm in vm_data:
usable_memory = (
vm["memory_stats"]["usable"]
if vm["memory_stats"].get("usable") is not None
else 0
)
output_lines.append(
f"pvc_vm_memory_stats_usable{{vm=\"{vm['name']}\"}} {usable_memory}"
)
output_lines.append(
"# HELP pvc_vm_memory_stats_disk_caches PVC VM disk cache memory"
)
output_lines.append("# TYPE pvc_vm_memory_stats_disk_caches gauge")
for vm in vm_data:
disk_caches_memory = vm["memory_stats"]["disk_caches"]
disk_caches_memory = (
vm["memory_stats"]["disk_caches"]
if vm["memory_stats"].get("disk_caches") is not None
else 0
)
output_lines.append(
f"pvc_vm_memory_stats_disk_caches{{vm=\"{vm['name']}\"}} {disk_caches_memory}"
)
@ -1151,7 +1167,11 @@ def get_resource_metrics(zkhandler):
output_lines.append("# HELP pvc_vm_memory_swap_in PVC VM memory swap in")
output_lines.append("# TYPE pvc_vm_memory_swap_in gauge")
for vm in vm_data:
swap_in_memory = vm["memory_stats"]["swap_in"]
swap_in_memory = (
vm["memory_stats"]["swap_in"]
if vm["memory_stats"].get("swap_in") is not None
else 0
)
output_lines.append(
f"pvc_vm_memory_stats_swap_in{{vm=\"{vm['name']}\"}} {swap_in_memory}"
)
@ -1159,7 +1179,11 @@ def get_resource_metrics(zkhandler):
output_lines.append("# HELP pvc_vm_memory_swap_out PVC VM memory swap out")
output_lines.append("# TYPE pvc_vm_memory_swap_out gauge")
for vm in vm_data:
swap_out_memory = vm["memory_stats"]["swap_out"]
swap_out_memory = (
vm["memory_stats"]["swap_out"]
if vm["memory_stats"].get("swap_out") is not None
else 0
)
output_lines.append(
f"pvc_vm_memory_stats_swap_out{{vm=\"{vm['name']}\"}} {swap_out_memory}"
)
@ -1167,7 +1191,11 @@ def get_resource_metrics(zkhandler):
output_lines.append("# HELP pvc_vm_memory_major_fault PVC VM memory major faults")
output_lines.append("# TYPE pvc_vm_memory_major_fault gauge")
for vm in vm_data:
major_fault_memory = vm["memory_stats"]["major_fault"]
major_fault_memory = (
vm["memory_stats"]["major_fault"]
if vm["memory_stats"].get("major_fault") is not None
else 0
)
output_lines.append(
f"pvc_vm_memory_stats_major_fault{{vm=\"{vm['name']}\"}} {major_fault_memory}"
)
@ -1175,7 +1203,11 @@ def get_resource_metrics(zkhandler):
output_lines.append("# HELP pvc_vm_memory_minor_fault PVC VM memory minor faults")
output_lines.append("# TYPE pvc_vm_memory_minor_fault gauge")
for vm in vm_data:
minor_fault_memory = vm["memory_stats"]["minor_fault"]
minor_fault_memory = (
vm["memory_stats"]["minor_fault"]
if vm["memory_stats"].get("minor_fault") is not None
else 0
)
output_lines.append(
f"pvc_vm_memory_stats_minor_fault{{vm=\"{vm['name']}\"}} {minor_fault_memory}"
)
@ -1185,7 +1217,11 @@ def get_resource_metrics(zkhandler):
)
output_lines.append("# TYPE pvc_vm_memory_hugetlb_pgalloc gauge")
for vm in vm_data:
hugetlb_pgalloc_memory = vm["memory_stats"]["hugetlb_pgalloc"]
hugetlb_pgalloc_memory = (
vm["memory_stats"]["hugetlb_pgalloc"]
if vm["memory_stats"].get("hugetlb_pgalloc") is not None
else 0
)
output_lines.append(
f"pvc_vm_memory_stats_hugetlb_pgalloc{{vm=\"{vm['name']}\"}} {hugetlb_pgalloc_memory}"
)
@ -1195,7 +1231,11 @@ def get_resource_metrics(zkhandler):
)
output_lines.append("# TYPE pvc_vm_memory_hugetlb_pgfail gauge")
for vm in vm_data:
hugetlb_pgfail_memory = vm["memory_stats"]["hugetlb_pgfail"]
hugetlb_pgfail_memory = (
vm["memory_stats"]["hugetlb_pgfail"]
if vm["memory_stats"].get("hugetlb_pgfail") is not None
else 0
)
output_lines.append(
f"pvc_vm_memory_stats_hugetlb_pgfail{{vm=\"{vm['name']}\"}} {hugetlb_pgfail_memory}"
)
@ -1496,11 +1536,11 @@ def get_resource_metrics(zkhandler):
continue
output_lines.append(f"pvc_ceph_osd_wr_ops{{osd=\"{osd['id']}\"}} {osd_wr_ops}")
output_lines.append("# HELP pvc_ceph_osd_wr_data PVC OSD write KB per second")
output_lines.append("# HELP pvc_ceph_osd_wr_data PVC OSD write bytes per second")
output_lines.append("# TYPE pvc_ceph_osd_wr_data gauge")
for osd in osd_data:
try:
osd_wr_data = osd["stats"]["wr_data"]
osd_wr_data = pvc_ceph.format_bytes_fromhuman(osd["stats"]["wr_data"])
except Exception:
continue
output_lines.append(
@ -1516,11 +1556,11 @@ def get_resource_metrics(zkhandler):
continue
output_lines.append(f"pvc_ceph_osd_rd_ops{{osd=\"{osd['id']}\"}} {osd_rd_ops}")
output_lines.append("# HELP pvc_ceph_osd_rd_data PVC OSD read KB per second")
output_lines.append("# HELP pvc_ceph_osd_rd_data PVC OSD read bytes per second")
output_lines.append("# TYPE pvc_ceph_osd_rd_data gauge")
for osd in osd_data:
try:
osd_rd_data = osd["stats"]["rd_data"]
osd_rd_data = pvc_ceph.format_bytes_fromhuman(osd["stats"]["rd_data"])
except Exception:
continue
output_lines.append(
@ -1589,11 +1629,11 @@ def get_resource_metrics(zkhandler):
output_lines.append("# TYPE pvc_ceph_pool_used_percent gauge")
for pool in pool_data:
try:
pool_used_percent = pool["stats"]["used_percent"]
pool_used_percent = pool["stats"]["used_percent"] * 100
except Exception:
continue
output_lines.append(
f"pvc_ceph_pool_used_percent{{pool=\"{pool['name']}\"}} {pool_used_percent}"
f"pvc_ceph_pool_used_percent{{pool=\"{pool['name']}\"}} {pool_used_percent:2.2f}"
)
output_lines.append("# HELP pvc_ceph_pool_num_objects PVC Pool total objects")

View File

@ -156,7 +156,10 @@ def getNodeInformation(zkhandler, node_name):
zkhandler, node_name, node_health_plugins
)
if _node_network_stats is not None:
node_network_stats = json.loads(_node_network_stats)
else:
node_network_stats = dict()
# Construct a data structure to represent the data
node_information = {

View File

@ -3,4 +3,4 @@ node-daemon/pvcnoded usr/share/pvc
node-daemon/pvcnoded.service lib/systemd/system
node-daemon/pvc.target lib/systemd/system
node-daemon/pvcautoready.service lib/systemd/system
node-daemon/monitoring usr/share/pvc
monitoring usr/share/pvc

File diff suppressed because it is too large Load Diff