Rework metrics output and add combined endpoint

This commit is contained in:
Joshua Boniface 2023-12-09 15:47:40 -05:00
parent 4003204f14
commit 4ca2381077
2 changed files with 67 additions and 16 deletions

View File

@ -624,6 +624,42 @@ api.add_resource(API_Status, "/status")
# /metrics
class API_Metrics(Resource):
def get(self):
"""
Return the current PVC cluster status in Prometheus-compatible metrics format and
the Ceph cluster metrics as one document.
Endpoint is unauthenticated to allow metrics exfiltration without having to deal
with the Prometheus compatibility later.
---
tags:
- root
responses:
200:
description: OK
400:
description: Bad request
"""
cluster_output, cluster_retcode = api_helper.cluster_metrics()
ceph_output, ceph_retcode = api_helper.ceph_metrics()
if cluster_retcode != 200 or ceph_retcode != 200:
output = "Error: Failed to obtain data"
retcode = 400
else:
output = cluster_output + ceph_output
retcode = 200
response = flask.make_response(output, retcode)
response.mimetype = "text/plain"
return response
api.add_resource(API_Metrics, "/metrics")
# /metrics/pvc
class API_Metrics_PVC(Resource):
def get(self):
"""
Return the current PVC cluster status in Prometheus-compatible metrics format
@ -639,10 +675,21 @@ class API_Metrics(Resource):
400:
description: Bad request
"""
return api_helper.cluster_metrics()
cluster_output, cluster_retcode = api_helper.cluster_metrics()
if cluster_retcode != 200:
output = "Error: Failed to obtain data"
retcode = 400
else:
output = cluster_output
retcode = 200
response = flask.make_response(output, retcode)
response.mimetype = "text/plain"
return response
api.add_resource(API_Metrics, "/metrics")
api.add_resource(API_Metrics_PVC, "/metrics/pvc")
# /metrics/ceph
@ -662,7 +709,18 @@ class API_Metrics_Ceph(Resource):
400:
description: Bad request
"""
return api_helper.cluster_ceph_metrics_proxy()
ceph_output, ceph_retcode = api_helper.ceph_metrics()
if ceph_retcode != 200:
output = "Error: Failed to obtain data"
retcode = 400
else:
output = ceph_output
retcode = 200
response = flask.make_response(output, retcode)
response.mimetype = "text/plain"
return response
api.add_resource(API_Metrics_Ceph, "/metrics/ceph")

View File

@ -152,7 +152,6 @@ def cluster_metrics(zkhandler):
if not osd_retflag:
return "Error: OSD data threw error", 400
retcode = 200
output_lines = list()
output_lines.append("# HELP pvc_info PVC cluster information")
@ -290,15 +289,12 @@ def cluster_metrics(zkhandler):
output_lines.append("# TYPE pvc_snapshots gauge")
output_lines.append(f"pvc_snapshots {status_data['snapshots']}")
# We manually make the Flask response here so the output format is correct.
response = flask.make_response("\n".join(output_lines) + "\n", retcode)
response.mimetype = "text/plain"
return response
return "\n".join(output_lines) + "\n", 200
@pvc_common.Profiler(config)
@ZKConnection(config)
def cluster_ceph_metrics_proxy(zkhandler):
def ceph_metrics(zkhandler):
"""
Obtain current Ceph Prometheus metrics from the active MGR
"""
@ -327,21 +323,18 @@ def cluster_ceph_metrics_proxy(zkhandler):
response = get(ceph_prometheus_uri)
if response.status_code == 200:
status_code = 200
output = response.text
status_code = 200
else:
status_code = 400
output = (
f"Error: Failed to obtain metric data from {ceph_mgr_node} MGR daemon\n"
)
status_code = 400
else:
status_code = 400
output = "Error: Failed to find an active MGR node\n"
status_code = 400
# We manually make the Flask response here so the output format is correct.
response = flask.make_response(output, status_code)
response.mimetype = "text/plain"
return response
return output, status_code
#