Add Prometheus metric exporter
Adds a "fake" Prometheus metrics endpoint which returns cluster status information in Prometheus format.
This commit is contained in:
		| @@ -622,6 +622,30 @@ class API_Status(Resource): | ||||
| api.add_resource(API_Status, "/status") | ||||
|  | ||||
|  | ||||
| # /metrics | ||||
| class API_Metrics(Resource): | ||||
|     def get(self): | ||||
|         """ | ||||
|         Return the current PVC cluster status in Prometheus-compatible metrics format | ||||
|  | ||||
|         Endpoint is unauthenticated to allow metrics exfiltration without having to deal | ||||
|         with the Prometheus compatibility later. | ||||
|         --- | ||||
|         tags: | ||||
|           - root | ||||
|         responses: | ||||
|           200: | ||||
|             description: OK | ||||
|           400: | ||||
|             description: Bad request | ||||
|         """ | ||||
|         status_data, status_retcode = api_helper.cluster_status() | ||||
|         return api_helper.cluster_format_metrics(status_data, status_retcode) | ||||
|  | ||||
|  | ||||
| api.add_resource(API_Metrics, "/metrics") | ||||
|  | ||||
|  | ||||
| # /faults | ||||
| class API_Faults(Resource): | ||||
|     @RequestParser( | ||||
|   | ||||
| @@ -119,6 +119,96 @@ def cluster_maintenance(zkhandler, maint_state="false"): | ||||
|     return retdata, retcode | ||||
|  | ||||
|  | ||||
| # | ||||
| # Metrics functions | ||||
| # | ||||
| @pvc_common.Profiler(config) | ||||
| @ZKConnection(config) | ||||
| def cluster_format_metrics(zkhandler, status_data, status_retcode): | ||||
|     """ | ||||
|     Format status data from cluster_status into Prometheus-compatible metrics | ||||
|     """ | ||||
|     from flask import make_response | ||||
|  | ||||
|     if status_retcode != 200: | ||||
|         return "Error: Status data threw error", status_retcode | ||||
|  | ||||
|     retcode = 200 | ||||
|     output_lines = list() | ||||
|     print(status_data) | ||||
|  | ||||
|     output_lines.append("# HELP pvc_info PVC cluster information") | ||||
|     output_lines.append("# TYPE pvc_info gauge") | ||||
|     output_lines.append( | ||||
|         f"pvc_info{{primary_node=\"{status_data['primary_node']}\", version=\"{status_data['pvc_version']}\", upstream_ip=\"{status_data['upstream_ip']}\"}} 1" | ||||
|     ) | ||||
|  | ||||
|     output_lines.append("# HELP pvc_cluster_maintenance PVC cluster maintenance state") | ||||
|     output_lines.append("# TYPE pvc_cluster_maintenance gauge") | ||||
|     output_lines.append( | ||||
|         f"pvc_cluster_maintenance {1 if bool(strtobool(status_data['maintenance'])) else 0}" | ||||
|     ) | ||||
|  | ||||
|     output_lines.append("# HELP pvc_cluster_health PVC cluster health status") | ||||
|     output_lines.append("# TYPE pvc_cluster_health gauge") | ||||
|     output_lines.append(f"pvc_cluster_health {status_data['cluster_health']['health']}") | ||||
|  | ||||
|     # output_lines.append("# HELP pvc_cluster_faults PVC cluster health faults") | ||||
|     # output_lines.append("# TYPE pvc_cluster_faults gauge") | ||||
|     # for fault_msg in status_data["cluster_health"]["messages"]: | ||||
|     #     output_lines.append( | ||||
|     #         f"pvc_cluster_faults{{id=\"{fault_msg['id']}\", message=\"{fault_msg['text']}\"}} {fault_msg['health_delta']}" | ||||
|     #     ) | ||||
|  | ||||
|     output_lines.append("# HELP pvc_node_health PVC cluster node health status") | ||||
|     output_lines.append("# TYPE pvc_node_health gauge") | ||||
|     for node in status_data["node_health"]: | ||||
|         if isinstance(status_data["node_health"][node]["health"], int): | ||||
|             output_lines.append( | ||||
|                 f"pvc_node_health{{node=\"{node}\"}} {status_data['node_health'][node]['health']}" | ||||
|             ) | ||||
|  | ||||
|     output_lines.append("# HELP pvc_nodes PVC node state counts") | ||||
|     output_lines.append("# TYPE pvc_nodes gauge") | ||||
|     for state in status_data["nodes"]: | ||||
|         output_lines.append( | ||||
|             f"pvc_nodes{{state=\"{state}\"}} {status_data['nodes'][state]}" | ||||
|         ) | ||||
|  | ||||
|     output_lines.append("# HELP pvc_vms PVC VM state counts") | ||||
|     output_lines.append("# TYPE pvc_vms gauge") | ||||
|     for state in status_data["vms"]: | ||||
|         output_lines.append(f"pvc_vms{{state=\"{state}\"}} {status_data['vms'][state]}") | ||||
|  | ||||
|     output_lines.append("# HELP pvc_osds PVC OSD state counts") | ||||
|     output_lines.append("# TYPE pvc_osds gauge") | ||||
|     for state in status_data["osds"]: | ||||
|         output_lines.append( | ||||
|             f"pvc_osds{{state=\"{state}\"}} {status_data['osds'][state]}" | ||||
|         ) | ||||
|  | ||||
|     output_lines.append("# HELP pvc_networks PVC network count") | ||||
|     output_lines.append("# TYPE pvc_networks gauge") | ||||
|     output_lines.append(f"pvc_networks {status_data['networks']}") | ||||
|  | ||||
|     output_lines.append("# HELP pvc_pools PVC storage pool count") | ||||
|     output_lines.append("# TYPE pvc_pools gauge") | ||||
|     output_lines.append(f"pvc_pools {status_data['pools']}") | ||||
|  | ||||
|     output_lines.append("# HELP pvc_volumes PVC storage volume count") | ||||
|     output_lines.append("# TYPE pvc_volumes gauge") | ||||
|     output_lines.append(f"pvc_volumes {status_data['volumes']}") | ||||
|  | ||||
|     output_lines.append("# HELP pvc_snapshots PVC storage snapshot count") | ||||
|     output_lines.append("# TYPE pvc_snapshots gauge") | ||||
|     output_lines.append(f"pvc_snapshots {status_data['snapshots']}") | ||||
|  | ||||
|     # We manually make the Flask response here so the output format is correct. | ||||
|     response = make_response("\n".join(output_lines) + "\n", retcode) | ||||
|     response.mimetype = "text/plain" | ||||
|     return response | ||||
|  | ||||
|  | ||||
| # | ||||
| # Fault functions | ||||
| # | ||||
|   | ||||
		Reference in New Issue
	
	Block a user