Rename health metrics and add resource metrics

This commit is contained in:
Joshua Boniface 2023-12-20 16:14:28 -05:00
parent c64e888d30
commit 39f9f3640c
4 changed files with 1016 additions and 35 deletions

View File

@ -640,14 +640,15 @@ class API_Metrics(Resource):
400: 400:
description: Bad request description: Bad request
""" """
cluster_output, cluster_retcode = api_helper.cluster_metrics() health_output, health_retcode = api_helper.cluster_health_metrics()
resource_output, resource_retcode = api_helper.cluster_resource_metrics()
ceph_output, ceph_retcode = api_helper.ceph_metrics() ceph_output, ceph_retcode = api_helper.ceph_metrics()
if cluster_retcode != 200 or ceph_retcode != 200: if health_retcode != 200 or resource_retcode != 200 or ceph_retcode != 200:
output = "Error: Failed to obtain data" output = "Error: Failed to obtain data"
retcode = 400 retcode = 400
else: else:
output = cluster_output + ceph_output output = health_output + resource_output + ceph_output
retcode = 200 retcode = 200
response = flask.make_response(output, retcode) response = flask.make_response(output, retcode)
@ -658,11 +659,11 @@ class API_Metrics(Resource):
api.add_resource(API_Metrics, "/metrics") api.add_resource(API_Metrics, "/metrics")
# /metrics/pvc # /metrics/health
class API_Metrics_PVC(Resource): class API_Metrics_Health(Resource):
def get(self): def get(self):
""" """
Return the current PVC cluster status in Prometheus-compatible metrics format Return the current PVC cluster health status in Prometheus-compatible metrics format
Endpoint is unauthenticated to allow metrics exfiltration without having to deal Endpoint is unauthenticated to allow metrics exfiltration without having to deal
with the Prometheus compatibility later. with the Prometheus compatibility later.
@ -675,13 +676,13 @@ class API_Metrics_PVC(Resource):
400: 400:
description: Bad request description: Bad request
""" """
cluster_output, cluster_retcode = api_helper.cluster_metrics() health_output, health_retcode = api_helper.cluster_health_metrics()
if cluster_retcode != 200: if health_retcode != 200:
output = "Error: Failed to obtain data" output = "Error: Failed to obtain data"
retcode = 400 retcode = 400
else: else:
output = cluster_output output = health_output
retcode = 200 retcode = 200
response = flask.make_response(output, retcode) response = flask.make_response(output, retcode)
@ -689,7 +690,41 @@ class API_Metrics_PVC(Resource):
return response return response
api.add_resource(API_Metrics_PVC, "/metrics/pvc") api.add_resource(API_Metrics_Health, "/metrics/health")
# /metrics/resource
class API_Metrics_Resource(Resource):
def get(self):
"""
Return the current PVC cluster resource utilizations in Prometheus-compatible metrics format
Endpoint is unauthenticated to allow metrics exfiltration without having to deal
with the Prometheus compatibility later.
---
tags:
- root
responses:
200:
description: OK
400:
description: Bad request
"""
resource_output, resource_retcode = api_helper.cluster_resource_metrics()
if resource_retcode != 200:
output = "Error: Failed to obtain data"
retcode = 400
else:
output = resource_output
retcode = 200
response = flask.make_response(output, retcode)
response.mimetype = "text/plain"
return response
api.add_resource(API_Metrics_Resource, "/metrics/resource")
# /metrics/ceph # /metrics/ceph

View File

@ -126,12 +126,27 @@ def cluster_maintenance(zkhandler, maint_state="false"):
# #
@pvc_common.Profiler(config) @pvc_common.Profiler(config)
@ZKConnection(config) @ZKConnection(config)
def cluster_metrics(zkhandler): def cluster_health_metrics(zkhandler):
""" """
Format status data from cluster_status into Prometheus-compatible metrics Get cluster-wide Prometheus metrics for health
""" """
retflag, retdata = pvc_cluster.get_metrics(zkhandler) retflag, retdata = pvc_cluster.get_health_metrics(zkhandler)
if retflag:
retcode = 200
else:
retcode = 400
return retdata, retcode
@pvc_common.Profiler(config)
@ZKConnection(config)
def cluster_resource_metrics(zkhandler):
"""
Get cluster-wide Prometheus metrics for resource utilization
"""
retflag, retdata = pvc_cluster.get_resource_metrics(zkhandler)
if retflag: if retflag:
retcode = 200 retcode = 200
else: else:

File diff suppressed because it is too large Load Diff

View File

@ -1510,7 +1510,7 @@
{ {
"id": "color", "id": "color",
"value": { "value": {
"fixedColor": "light-red", "fixedColor": "super-light-red",
"mode": "fixed" "mode": "fixed"
} }
} }
@ -1525,7 +1525,7 @@
{ {
"id": "color", "id": "color",
"value": { "value": {
"fixedColor": "semi-dark-red", "fixedColor": "red",
"mode": "fixed" "mode": "fixed"
} }
} }
@ -1540,7 +1540,7 @@
{ {
"id": "color", "id": "color",
"value": { "value": {
"fixedColor": "semi-dark-blue", "fixedColor": "dark-red",
"mode": "fixed" "mode": "fixed"
} }
} }
@ -2556,7 +2556,8 @@
"refresh": "5s", "refresh": "5s",
"schemaVersion": 38, "schemaVersion": 38,
"tags": [ "tags": [
"pvc" "pvc",
"health"
], ],
"templating": { "templating": {
"list": [ "list": [
@ -2592,8 +2593,8 @@
}, },
"timepicker": {}, "timepicker": {},
"timezone": "", "timezone": "",
"title": "PVC Cluster", "title": "PVC Cluster Health",
"uid": "fbddd9f9-aadb-4c97-8aea-57c29e5de234", "uid": "fbddd9f9-aadb-4c97-8aea-57c29e5de234",
"version": 57, "version": 1,
"weekStart": "" "weekStart": ""
} }