Add Prometheus metric exporter
Adds a "fake" Prometheus metrics endpoint which returns cluster status information in Prometheus format.
This commit is contained in:
parent
d0e7c19602
commit
7373bfed3f
|
@ -622,6 +622,30 @@ class API_Status(Resource):
|
||||||
api.add_resource(API_Status, "/status")
|
api.add_resource(API_Status, "/status")
|
||||||
|
|
||||||
|
|
||||||
|
# /metrics
|
||||||
|
class API_Metrics(Resource):
|
||||||
|
def get(self):
|
||||||
|
"""
|
||||||
|
Return the current PVC cluster status in Prometheus-compatible metrics format
|
||||||
|
|
||||||
|
Endpoint is unauthenticated to allow metrics exfiltration without having to deal
|
||||||
|
with the Prometheus compatibility later.
|
||||||
|
---
|
||||||
|
tags:
|
||||||
|
- root
|
||||||
|
responses:
|
||||||
|
200:
|
||||||
|
description: OK
|
||||||
|
400:
|
||||||
|
description: Bad request
|
||||||
|
"""
|
||||||
|
status_data, status_retcode = api_helper.cluster_status()
|
||||||
|
return api_helper.cluster_format_metrics(status_data, status_retcode)
|
||||||
|
|
||||||
|
|
||||||
|
api.add_resource(API_Metrics, "/metrics")
|
||||||
|
|
||||||
|
|
||||||
# /faults
|
# /faults
|
||||||
class API_Faults(Resource):
|
class API_Faults(Resource):
|
||||||
@RequestParser(
|
@RequestParser(
|
||||||
|
|
|
@ -119,6 +119,96 @@ def cluster_maintenance(zkhandler, maint_state="false"):
|
||||||
return retdata, retcode
|
return retdata, retcode
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# Metrics functions
|
||||||
|
#
|
||||||
|
@pvc_common.Profiler(config)
|
||||||
|
@ZKConnection(config)
|
||||||
|
def cluster_format_metrics(zkhandler, status_data, status_retcode):
|
||||||
|
"""
|
||||||
|
Format status data from cluster_status into Prometheus-compatible metrics
|
||||||
|
"""
|
||||||
|
from flask import make_response
|
||||||
|
|
||||||
|
if status_retcode != 200:
|
||||||
|
return "Error: Status data threw error", status_retcode
|
||||||
|
|
||||||
|
retcode = 200
|
||||||
|
output_lines = list()
|
||||||
|
print(status_data)
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_info PVC cluster information")
|
||||||
|
output_lines.append("# TYPE pvc_info gauge")
|
||||||
|
output_lines.append(
|
||||||
|
f"pvc_info{{primary_node=\"{status_data['primary_node']}\", version=\"{status_data['pvc_version']}\", upstream_ip=\"{status_data['upstream_ip']}\"}} 1"
|
||||||
|
)
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_cluster_maintenance PVC cluster maintenance state")
|
||||||
|
output_lines.append("# TYPE pvc_cluster_maintenance gauge")
|
||||||
|
output_lines.append(
|
||||||
|
f"pvc_cluster_maintenance {1 if bool(strtobool(status_data['maintenance'])) else 0}"
|
||||||
|
)
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_cluster_health PVC cluster health status")
|
||||||
|
output_lines.append("# TYPE pvc_cluster_health gauge")
|
||||||
|
output_lines.append(f"pvc_cluster_health {status_data['cluster_health']['health']}")
|
||||||
|
|
||||||
|
# output_lines.append("# HELP pvc_cluster_faults PVC cluster health faults")
|
||||||
|
# output_lines.append("# TYPE pvc_cluster_faults gauge")
|
||||||
|
# for fault_msg in status_data["cluster_health"]["messages"]:
|
||||||
|
# output_lines.append(
|
||||||
|
# f"pvc_cluster_faults{{id=\"{fault_msg['id']}\", message=\"{fault_msg['text']}\"}} {fault_msg['health_delta']}"
|
||||||
|
# )
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_node_health PVC cluster node health status")
|
||||||
|
output_lines.append("# TYPE pvc_node_health gauge")
|
||||||
|
for node in status_data["node_health"]:
|
||||||
|
if isinstance(status_data["node_health"][node]["health"], int):
|
||||||
|
output_lines.append(
|
||||||
|
f"pvc_node_health{{node=\"{node}\"}} {status_data['node_health'][node]['health']}"
|
||||||
|
)
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_nodes PVC node state counts")
|
||||||
|
output_lines.append("# TYPE pvc_nodes gauge")
|
||||||
|
for state in status_data["nodes"]:
|
||||||
|
output_lines.append(
|
||||||
|
f"pvc_nodes{{state=\"{state}\"}} {status_data['nodes'][state]}"
|
||||||
|
)
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_vms PVC VM state counts")
|
||||||
|
output_lines.append("# TYPE pvc_vms gauge")
|
||||||
|
for state in status_data["vms"]:
|
||||||
|
output_lines.append(f"pvc_vms{{state=\"{state}\"}} {status_data['vms'][state]}")
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_osds PVC OSD state counts")
|
||||||
|
output_lines.append("# TYPE pvc_osds gauge")
|
||||||
|
for state in status_data["osds"]:
|
||||||
|
output_lines.append(
|
||||||
|
f"pvc_osds{{state=\"{state}\"}} {status_data['osds'][state]}"
|
||||||
|
)
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_networks PVC network count")
|
||||||
|
output_lines.append("# TYPE pvc_networks gauge")
|
||||||
|
output_lines.append(f"pvc_networks {status_data['networks']}")
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_pools PVC storage pool count")
|
||||||
|
output_lines.append("# TYPE pvc_pools gauge")
|
||||||
|
output_lines.append(f"pvc_pools {status_data['pools']}")
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_volumes PVC storage volume count")
|
||||||
|
output_lines.append("# TYPE pvc_volumes gauge")
|
||||||
|
output_lines.append(f"pvc_volumes {status_data['volumes']}")
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_snapshots PVC storage snapshot count")
|
||||||
|
output_lines.append("# TYPE pvc_snapshots gauge")
|
||||||
|
output_lines.append(f"pvc_snapshots {status_data['snapshots']}")
|
||||||
|
|
||||||
|
# We manually make the Flask response here so the output format is correct.
|
||||||
|
response = make_response("\n".join(output_lines) + "\n", retcode)
|
||||||
|
response.mimetype = "text/plain"
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# Fault functions
|
# Fault functions
|
||||||
#
|
#
|
||||||
|
|
Loading…
Reference in New Issue