Rework metrics output and add combined endpoint

This commit is contained in:
Joshua Boniface 2023-12-09 15:47:40 -05:00
parent 4003204f14
commit 4ca2381077
2 changed files with 67 additions and 16 deletions

View File

@ -624,6 +624,42 @@ api.add_resource(API_Status, "/status")
# /metrics # /metrics
class API_Metrics(Resource): class API_Metrics(Resource):
def get(self):
"""
Return the current PVC cluster status in Prometheus-compatible metrics format and
the Ceph cluster metrics as one document.
Endpoint is unauthenticated to allow metrics exfiltration without having to deal
with the Prometheus compatibility later.
---
tags:
- root
responses:
200:
description: OK
400:
description: Bad request
"""
cluster_output, cluster_retcode = api_helper.cluster_metrics()
ceph_output, ceph_retcode = api_helper.ceph_metrics()
if cluster_retcode != 200 or ceph_retcode != 200:
output = "Error: Failed to obtain data"
retcode = 400
else:
output = cluster_output + ceph_output
retcode = 200
response = flask.make_response(output, retcode)
response.mimetype = "text/plain"
return response
api.add_resource(API_Metrics, "/metrics")
# /metrics/pvc
class API_Metrics_PVC(Resource):
def get(self): def get(self):
""" """
Return the current PVC cluster status in Prometheus-compatible metrics format Return the current PVC cluster status in Prometheus-compatible metrics format
@ -639,10 +675,21 @@ class API_Metrics(Resource):
400: 400:
description: Bad request description: Bad request
""" """
return api_helper.cluster_metrics() cluster_output, cluster_retcode = api_helper.cluster_metrics()
if cluster_retcode != 200:
output = "Error: Failed to obtain data"
retcode = 400
else:
output = cluster_output
retcode = 200
response = flask.make_response(output, retcode)
response.mimetype = "text/plain"
return response
api.add_resource(API_Metrics, "/metrics") api.add_resource(API_Metrics_PVC, "/metrics/pvc")
# /metrics/ceph # /metrics/ceph
@ -662,7 +709,18 @@ class API_Metrics_Ceph(Resource):
400: 400:
description: Bad request description: Bad request
""" """
return api_helper.cluster_ceph_metrics_proxy() ceph_output, ceph_retcode = api_helper.ceph_metrics()
if ceph_retcode != 200:
output = "Error: Failed to obtain data"
retcode = 400
else:
output = ceph_output
retcode = 200
response = flask.make_response(output, retcode)
response.mimetype = "text/plain"
return response
api.add_resource(API_Metrics_Ceph, "/metrics/ceph") api.add_resource(API_Metrics_Ceph, "/metrics/ceph")

View File

@ -152,7 +152,6 @@ def cluster_metrics(zkhandler):
if not osd_retflag: if not osd_retflag:
return "Error: OSD data threw error", 400 return "Error: OSD data threw error", 400
retcode = 200
output_lines = list() output_lines = list()
output_lines.append("# HELP pvc_info PVC cluster information") output_lines.append("# HELP pvc_info PVC cluster information")
@ -290,15 +289,12 @@ def cluster_metrics(zkhandler):
output_lines.append("# TYPE pvc_snapshots gauge") output_lines.append("# TYPE pvc_snapshots gauge")
output_lines.append(f"pvc_snapshots {status_data['snapshots']}") output_lines.append(f"pvc_snapshots {status_data['snapshots']}")
# We manually make the Flask response here so the output format is correct. return "\n".join(output_lines) + "\n", 200
response = flask.make_response("\n".join(output_lines) + "\n", retcode)
response.mimetype = "text/plain"
return response
@pvc_common.Profiler(config) @pvc_common.Profiler(config)
@ZKConnection(config) @ZKConnection(config)
def cluster_ceph_metrics_proxy(zkhandler): def ceph_metrics(zkhandler):
""" """
Obtain current Ceph Prometheus metrics from the active MGR Obtain current Ceph Prometheus metrics from the active MGR
""" """
@ -327,21 +323,18 @@ def cluster_ceph_metrics_proxy(zkhandler):
response = get(ceph_prometheus_uri) response = get(ceph_prometheus_uri)
if response.status_code == 200: if response.status_code == 200:
status_code = 200
output = response.text output = response.text
status_code = 200
else: else:
status_code = 400
output = ( output = (
f"Error: Failed to obtain metric data from {ceph_mgr_node} MGR daemon\n" f"Error: Failed to obtain metric data from {ceph_mgr_node} MGR daemon\n"
) )
status_code = 400
else: else:
status_code = 400
output = "Error: Failed to find an active MGR node\n" output = "Error: Failed to find an active MGR node\n"
status_code = 400
# We manually make the Flask response here so the output format is correct. return output, status_code
response = flask.make_response(output, status_code)
response.mimetype = "text/plain"
return response
# #