Rename health metrics and add resource metrics

This commit is contained in:
Joshua Boniface 2023-12-20 16:14:28 -05:00
parent c64e888d30
commit 39f9f3640c
4 changed files with 1016 additions and 35 deletions

View File

@ -640,14 +640,15 @@ class API_Metrics(Resource):
400:
description: Bad request
"""
cluster_output, cluster_retcode = api_helper.cluster_metrics()
health_output, health_retcode = api_helper.cluster_health_metrics()
resource_output, resource_retcode = api_helper.cluster_resource_metrics()
ceph_output, ceph_retcode = api_helper.ceph_metrics()
if cluster_retcode != 200 or ceph_retcode != 200:
if health_retcode != 200 or resource_retcode != 200 or ceph_retcode != 200:
output = "Error: Failed to obtain data"
retcode = 400
else:
output = cluster_output + ceph_output
output = health_output + resource_output + ceph_output
retcode = 200
response = flask.make_response(output, retcode)
@ -658,11 +659,11 @@ class API_Metrics(Resource):
api.add_resource(API_Metrics, "/metrics")
# /metrics/pvc
class API_Metrics_PVC(Resource):
# /metrics/health
class API_Metrics_Health(Resource):
def get(self):
"""
Return the current PVC cluster status in Prometheus-compatible metrics format
Return the current PVC cluster health status in Prometheus-compatible metrics format
Endpoint is unauthenticated to allow metrics exfiltration without having to deal
with the Prometheus compatibility later.
@ -675,13 +676,13 @@ class API_Metrics_PVC(Resource):
400:
description: Bad request
"""
cluster_output, cluster_retcode = api_helper.cluster_metrics()
health_output, health_retcode = api_helper.cluster_health_metrics()
if cluster_retcode != 200:
if health_retcode != 200:
output = "Error: Failed to obtain data"
retcode = 400
else:
output = cluster_output
output = health_output
retcode = 200
response = flask.make_response(output, retcode)
@ -689,7 +690,41 @@ class API_Metrics_PVC(Resource):
return response
api.add_resource(API_Metrics_PVC, "/metrics/pvc")
api.add_resource(API_Metrics_Health, "/metrics/health")
# /metrics/resource
class API_Metrics_Resource(Resource):
def get(self):
"""
Return the current PVC cluster resource utilizations in Prometheus-compatible metrics format
Endpoint is unauthenticated to allow metrics exfiltration without having to deal
with the Prometheus compatibility later.
---
tags:
- root
responses:
200:
description: OK
400:
description: Bad request
"""
resource_output, resource_retcode = api_helper.cluster_resource_metrics()
if resource_retcode != 200:
output = "Error: Failed to obtain data"
retcode = 400
else:
output = resource_output
retcode = 200
response = flask.make_response(output, retcode)
response.mimetype = "text/plain"
return response
api.add_resource(API_Metrics_Resource, "/metrics/resource")
# /metrics/ceph

View File

@ -126,12 +126,27 @@ def cluster_maintenance(zkhandler, maint_state="false"):
#
@pvc_common.Profiler(config)
@ZKConnection(config)
def cluster_metrics(zkhandler):
def cluster_health_metrics(zkhandler):
"""
Format status data from cluster_status into Prometheus-compatible metrics
Get cluster-wide Prometheus metrics for health
"""
retflag, retdata = pvc_cluster.get_metrics(zkhandler)
retflag, retdata = pvc_cluster.get_health_metrics(zkhandler)
if retflag:
retcode = 200
else:
retcode = 400
return retdata, retcode
@pvc_common.Profiler(config)
@ZKConnection(config)
def cluster_resource_metrics(zkhandler):
"""
Get cluster-wide Prometheus metrics for resource utilization
"""
retflag, retdata = pvc_cluster.get_resource_metrics(zkhandler)
if retflag:
retcode = 200
else:

File diff suppressed because it is too large Load Diff

View File

@ -1510,7 +1510,7 @@
{
"id": "color",
"value": {
"fixedColor": "light-red",
"fixedColor": "super-light-red",
"mode": "fixed"
}
}
@ -1525,7 +1525,7 @@
{
"id": "color",
"value": {
"fixedColor": "semi-dark-red",
"fixedColor": "red",
"mode": "fixed"
}
}
@ -1540,7 +1540,7 @@
{
"id": "color",
"value": {
"fixedColor": "semi-dark-blue",
"fixedColor": "dark-red",
"mode": "fixed"
}
}
@ -2556,7 +2556,8 @@
"refresh": "5s",
"schemaVersion": 38,
"tags": [
"pvc"
"pvc",
"health"
],
"templating": {
"list": [
@ -2592,8 +2593,8 @@
},
"timepicker": {},
"timezone": "",
"title": "PVC Cluster",
"title": "PVC Cluster Health",
"uid": "fbddd9f9-aadb-4c97-8aea-57c29e5de234",
"version": 57,
"version": 1,
"weekStart": ""
}