Add Prometheus metric exporter

Adds a "fake" Prometheus metrics endpoint which returns cluster status
information in Prometheus format.
This commit is contained in:
Joshua Boniface 2023-12-08 11:45:52 -05:00
parent d0e7c19602
commit 7373bfed3f
2 changed files with 114 additions and 0 deletions

View File

@ -622,6 +622,30 @@ class API_Status(Resource):
api.add_resource(API_Status, "/status")
# /metrics
class API_Metrics(Resource):
def get(self):
"""
Return the current PVC cluster status in Prometheus-compatible metrics format
Endpoint is unauthenticated to allow metrics exfiltration without having to deal
with the Prometheus compatibility later.
---
tags:
- root
responses:
200:
description: OK
400:
description: Bad request
"""
status_data, status_retcode = api_helper.cluster_status()
return api_helper.cluster_format_metrics(status_data, status_retcode)
api.add_resource(API_Metrics, "/metrics")
# /faults
class API_Faults(Resource):
@RequestParser(

View File

@ -119,6 +119,96 @@ def cluster_maintenance(zkhandler, maint_state="false"):
return retdata, retcode
#
# Metrics functions
#
@pvc_common.Profiler(config)
@ZKConnection(config)
def cluster_format_metrics(zkhandler, status_data, status_retcode):
"""
Format status data from cluster_status into Prometheus-compatible metrics
"""
from flask import make_response
if status_retcode != 200:
return "Error: Status data threw error", status_retcode
retcode = 200
output_lines = list()
print(status_data)
output_lines.append("# HELP pvc_info PVC cluster information")
output_lines.append("# TYPE pvc_info gauge")
output_lines.append(
f"pvc_info{{primary_node=\"{status_data['primary_node']}\", version=\"{status_data['pvc_version']}\", upstream_ip=\"{status_data['upstream_ip']}\"}} 1"
)
output_lines.append("# HELP pvc_cluster_maintenance PVC cluster maintenance state")
output_lines.append("# TYPE pvc_cluster_maintenance gauge")
output_lines.append(
f"pvc_cluster_maintenance {1 if bool(strtobool(status_data['maintenance'])) else 0}"
)
output_lines.append("# HELP pvc_cluster_health PVC cluster health status")
output_lines.append("# TYPE pvc_cluster_health gauge")
output_lines.append(f"pvc_cluster_health {status_data['cluster_health']['health']}")
# output_lines.append("# HELP pvc_cluster_faults PVC cluster health faults")
# output_lines.append("# TYPE pvc_cluster_faults gauge")
# for fault_msg in status_data["cluster_health"]["messages"]:
# output_lines.append(
# f"pvc_cluster_faults{{id=\"{fault_msg['id']}\", message=\"{fault_msg['text']}\"}} {fault_msg['health_delta']}"
# )
output_lines.append("# HELP pvc_node_health PVC cluster node health status")
output_lines.append("# TYPE pvc_node_health gauge")
for node in status_data["node_health"]:
if isinstance(status_data["node_health"][node]["health"], int):
output_lines.append(
f"pvc_node_health{{node=\"{node}\"}} {status_data['node_health'][node]['health']}"
)
output_lines.append("# HELP pvc_nodes PVC node state counts")
output_lines.append("# TYPE pvc_nodes gauge")
for state in status_data["nodes"]:
output_lines.append(
f"pvc_nodes{{state=\"{state}\"}} {status_data['nodes'][state]}"
)
output_lines.append("# HELP pvc_vms PVC VM state counts")
output_lines.append("# TYPE pvc_vms gauge")
for state in status_data["vms"]:
output_lines.append(f"pvc_vms{{state=\"{state}\"}} {status_data['vms'][state]}")
output_lines.append("# HELP pvc_osds PVC OSD state counts")
output_lines.append("# TYPE pvc_osds gauge")
for state in status_data["osds"]:
output_lines.append(
f"pvc_osds{{state=\"{state}\"}} {status_data['osds'][state]}"
)
output_lines.append("# HELP pvc_networks PVC network count")
output_lines.append("# TYPE pvc_networks gauge")
output_lines.append(f"pvc_networks {status_data['networks']}")
output_lines.append("# HELP pvc_pools PVC storage pool count")
output_lines.append("# TYPE pvc_pools gauge")
output_lines.append(f"pvc_pools {status_data['pools']}")
output_lines.append("# HELP pvc_volumes PVC storage volume count")
output_lines.append("# TYPE pvc_volumes gauge")
output_lines.append(f"pvc_volumes {status_data['volumes']}")
output_lines.append("# HELP pvc_snapshots PVC storage snapshot count")
output_lines.append("# TYPE pvc_snapshots gauge")
output_lines.append(f"pvc_snapshots {status_data['snapshots']}")
# We manually make the Flask response here so the output format is correct.
response = make_response("\n".join(output_lines) + "\n", retcode)
response.mimetype = "text/plain"
return response
#
# Fault functions
#