Add Zookeeper metrics proxy

This commit is contained in:
Joshua Boniface 2023-12-28 12:02:31 -05:00
parent 2bb24d3b57
commit 0bcf8cfe19
2 changed files with 68 additions and 1 deletions

View File

@ -643,12 +643,13 @@ class API_Metrics(Resource):
health_output, health_retcode = api_helper.cluster_health_metrics()
resource_output, resource_retcode = api_helper.cluster_resource_metrics()
ceph_output, ceph_retcode = api_helper.ceph_metrics()
zookeeper_output, zookeeper_retcode = api_helper.zookeeper_metrics()
if health_retcode != 200 or resource_retcode != 200 or ceph_retcode != 200:
output = "Error: Failed to obtain data"
retcode = 400
else:
output = health_output + resource_output + ceph_output
output = health_output + resource_output + ceph_output + zookeeper_output
retcode = 200
response = flask.make_response(output, retcode)
@ -761,6 +762,40 @@ class API_Metrics_Ceph(Resource):
api.add_resource(API_Metrics_Ceph, "/metrics/ceph")
# /metrics/zookeeper
class API_Metrics_Zookeeper(Resource):
def get(self):
"""
Return the current PVC Zookeeper Prometheus metrics
Proxies a metrics request to the current primary node, since all coordinators
run an active Zookeeper instance and we want one central location.
---
tags:
- root
responses:
200:
description: OK
400:
description: Bad request
"""
zookeeper_output, zookeeper_retcode = api_helper.zookeeper_metrics()
if zookeeper_retcode != 200:
output = "Error: Failed to obtain data"
retcode = 400
else:
output = zookeeper_output
retcode = 200
response = flask.make_response(output, retcode)
response.mimetype = "text/plain"
return response
api.add_resource(API_Metrics_Zookeeper, "/metrics/zookeeper")
# /faults
class API_Faults(Resource):
@RequestParser(

View File

@ -199,6 +199,38 @@ def ceph_metrics(zkhandler):
return output, status_code
@pvc_common.Profiler(config)
@ZKConnection(config)
def zookeeper_metrics(zkhandler):
"""
Obtain current Zookeeper Prometheus metrics from the active coordinator node
"""
primary_node = zkhandler.read("base.config.primary_node")
if primary_node is not None:
# Get the data from the endpoint
# We use the default port of 9141
zookeeper_prometheus_uri = f"http://{primary_node}:9141/metrics"
response = get(zookeeper_prometheus_uri)
if response.status_code == 200:
output = response.text
# Parse the text to remove annoying ports (":2181")
output = output.replace(":2181", "")
# Sort the output text
output_lines = output.split("\n")
output_lines.sort()
output = "\n".join(output_lines) + "\n"
status_code = 200
else:
output = f"Error: Failed to obtain metric data from {primary_node} primary node daemon\n"
status_code = 400
else:
output = "Error: Failed to find an active primary node\n"
status_code = 400
return output, status_code
#
# Fault functions
#