Compare commits
16 Commits
5a7ea25266
...
4ca2381077
Author | SHA1 | Date | |
---|---|---|---|
4ca2381077 | |||
4003204f14 | |||
a70c1d63b0 | |||
2bea78d25e | |||
fd717b702d | |||
132cde5591 | |||
ba565ead4c | |||
317ca4b98c | |||
2b8abea8df | |||
9b3c9f1be5 | |||
7373bfed3f | |||
d0e7c19602 | |||
f01c12c86b | |||
0bda095571 | |||
7976e1d2d0 | |||
813aef1463 |
@ -147,7 +147,7 @@
|
|||||||
|
|
||||||
|
|
||||||
# This import is always required here, as VMBuilder is used by the VMBuilderScript class.
|
# This import is always required here, as VMBuilder is used by the VMBuilderScript class.
|
||||||
from pvcapid.vmbuilder import VMBuilder
|
from daemon_lib.vmbuilder import VMBuilder
|
||||||
|
|
||||||
|
|
||||||
# The VMBuilderScript class must be named as such, and extend VMBuilder.
|
# The VMBuilderScript class must be named as such, and extend VMBuilder.
|
||||||
@ -174,7 +174,7 @@ class VMBuilderScript(VMBuilder):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# Run any imports first
|
# Run any imports first
|
||||||
import pvcapid.libvirt_schema as libvirt_schema
|
import daemon_lib.libvirt_schema as libvirt_schema
|
||||||
import datetime
|
import datetime
|
||||||
import random
|
import random
|
||||||
|
|
||||||
|
@ -148,7 +148,7 @@
|
|||||||
|
|
||||||
|
|
||||||
# This import is always required here, as VMBuilder is used by the VMBuilderScript class.
|
# This import is always required here, as VMBuilder is used by the VMBuilderScript class.
|
||||||
from pvcapid.vmbuilder import VMBuilder
|
from daemon_lib.vmbuilder import VMBuilder
|
||||||
|
|
||||||
|
|
||||||
# The VMBuilderScript class must be named as such, and extend VMBuilder.
|
# The VMBuilderScript class must be named as such, and extend VMBuilder.
|
||||||
@ -177,7 +177,7 @@ class VMBuilderScript(VMBuilder):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# Run any imports first
|
# Run any imports first
|
||||||
import pvcapid.libvirt_schema as libvirt_schema
|
import daemon_lib.libvirt_schema as libvirt_schema
|
||||||
import datetime
|
import datetime
|
||||||
import random
|
import random
|
||||||
|
|
||||||
@ -289,8 +289,8 @@ class VMBuilderScript(VMBuilder):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# Run any imports first
|
# Run any imports first
|
||||||
from pvcapid.vmbuilder import open_zk
|
from daemon_lib.vmbuilder import open_zk
|
||||||
from pvcapid.Daemon import config
|
from pvcworkerd.Daemon import config
|
||||||
import daemon_lib.common as pvc_common
|
import daemon_lib.common as pvc_common
|
||||||
import daemon_lib.ceph as pvc_ceph
|
import daemon_lib.ceph as pvc_ceph
|
||||||
import os
|
import os
|
||||||
@ -383,8 +383,8 @@ class VMBuilderScript(VMBuilder):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# Run any imports first
|
# Run any imports first
|
||||||
from pvcapid.vmbuilder import open_zk
|
from daemon_lib.vmbuilder import open_zk
|
||||||
from pvcapid.Daemon import config
|
from pvcworkerd.Daemon import config
|
||||||
import daemon_lib.ceph as pvc_ceph
|
import daemon_lib.ceph as pvc_ceph
|
||||||
|
|
||||||
for volume in list(reversed(self.vm_data["volumes"])):
|
for volume in list(reversed(self.vm_data["volumes"])):
|
||||||
|
@ -147,7 +147,7 @@
|
|||||||
|
|
||||||
|
|
||||||
# This import is always required here, as VMBuilder is used by the VMBuilderScript class.
|
# This import is always required here, as VMBuilder is used by the VMBuilderScript class.
|
||||||
from pvcapid.vmbuilder import VMBuilder
|
from daemon_lib.vmbuilder import VMBuilder
|
||||||
|
|
||||||
|
|
||||||
# The VMBuilderScript class must be named as such, and extend VMBuilder.
|
# The VMBuilderScript class must be named as such, and extend VMBuilder.
|
||||||
@ -186,7 +186,7 @@ class VMBuilderScript(VMBuilder):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# Run any imports first
|
# Run any imports first
|
||||||
import pvcapid.libvirt_schema as libvirt_schema
|
import daemon_lib.libvirt_schema as libvirt_schema
|
||||||
import datetime
|
import datetime
|
||||||
import random
|
import random
|
||||||
|
|
||||||
@ -301,16 +301,16 @@ class VMBuilderScript(VMBuilder):
|
|||||||
This function should use the various exposed PVC commands as indicated to create
|
This function should use the various exposed PVC commands as indicated to create
|
||||||
RBD block devices and map them to the host as required.
|
RBD block devices and map them to the host as required.
|
||||||
|
|
||||||
open_zk is exposed from pvcapid.vmbuilder to provide a context manager for opening
|
open_zk is exposed from daemon_lib.vmbuilder to provide a context manager for opening
|
||||||
connections to the PVC Zookeeper cluster; ensure you also import (and pass it)
|
connections to the PVC Zookeeper cluster; ensure you also import (and pass it)
|
||||||
the config object from pvcapid.Daemon as well. This context manager then allows
|
the config object from pvcworkerd.Daemon as well. This context manager then allows
|
||||||
the use of various common daemon library functions, without going through the API.
|
the use of various common daemon library functions, without going through the API.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Run any imports first
|
# Run any imports first
|
||||||
import os
|
import os
|
||||||
from pvcapid.vmbuilder import open_zk
|
from daemon_lib.vmbuilder import open_zk
|
||||||
from pvcapid.Daemon import config
|
from pvcworkerd.Daemon import config
|
||||||
import daemon_lib.common as pvc_common
|
import daemon_lib.common as pvc_common
|
||||||
import daemon_lib.ceph as pvc_ceph
|
import daemon_lib.ceph as pvc_ceph
|
||||||
|
|
||||||
@ -446,7 +446,7 @@ class VMBuilderScript(VMBuilder):
|
|||||||
|
|
||||||
# Run any imports first
|
# Run any imports first
|
||||||
import os
|
import os
|
||||||
from pvcapid.vmbuilder import chroot
|
from daemon_lib.vmbuilder import chroot
|
||||||
|
|
||||||
# The directory we mounted things on earlier during prepare(); this could very well
|
# The directory we mounted things on earlier during prepare(); this could very well
|
||||||
# be exposed as a module-level variable if you so choose
|
# be exposed as a module-level variable if you so choose
|
||||||
@ -718,8 +718,8 @@ GRUB_DISABLE_LINUX_UUID=false
|
|||||||
|
|
||||||
# Run any imports first
|
# Run any imports first
|
||||||
import os
|
import os
|
||||||
from pvcapid.vmbuilder import open_zk
|
from daemon_lib.vmbuilder import open_zk
|
||||||
from pvcapid.Daemon import config
|
from pvcworkerd.Daemon import config
|
||||||
import daemon_lib.common as pvc_common
|
import daemon_lib.common as pvc_common
|
||||||
import daemon_lib.ceph as pvc_ceph
|
import daemon_lib.ceph as pvc_ceph
|
||||||
|
|
||||||
|
@ -147,7 +147,7 @@
|
|||||||
|
|
||||||
|
|
||||||
# This import is always required here, as VMBuilder is used by the VMBuilderScript class.
|
# This import is always required here, as VMBuilder is used by the VMBuilderScript class.
|
||||||
from pvcapid.vmbuilder import VMBuilder
|
from daemon_lib.vmbuilder import VMBuilder
|
||||||
|
|
||||||
|
|
||||||
# The VMBuilderScript class must be named as such, and extend VMBuilder.
|
# The VMBuilderScript class must be named as such, and extend VMBuilder.
|
||||||
@ -186,7 +186,7 @@ class VMBuilderScript(VMBuilder):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# Run any imports first
|
# Run any imports first
|
||||||
import pvcapid.libvirt_schema as libvirt_schema
|
import daemon_lib.libvirt_schema as libvirt_schema
|
||||||
import datetime
|
import datetime
|
||||||
import random
|
import random
|
||||||
|
|
||||||
@ -301,16 +301,16 @@ class VMBuilderScript(VMBuilder):
|
|||||||
This function should use the various exposed PVC commands as indicated to create
|
This function should use the various exposed PVC commands as indicated to create
|
||||||
RBD block devices and map them to the host as required.
|
RBD block devices and map them to the host as required.
|
||||||
|
|
||||||
open_zk is exposed from pvcapid.vmbuilder to provide a context manager for opening
|
open_zk is exposed from daemon_lib.vmbuilder to provide a context manager for opening
|
||||||
connections to the PVC Zookeeper cluster; ensure you also import (and pass it)
|
connections to the PVC Zookeeper cluster; ensure you also import (and pass it)
|
||||||
the config object from pvcapid.Daemon as well. This context manager then allows
|
the config object from pvcworkerd.Daemon as well. This context manager then allows
|
||||||
the use of various common daemon library functions, without going through the API.
|
the use of various common daemon library functions, without going through the API.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Run any imports first
|
# Run any imports first
|
||||||
import os
|
import os
|
||||||
from pvcapid.vmbuilder import open_zk
|
from daemon_lib.vmbuilder import open_zk
|
||||||
from pvcapid.Daemon import config
|
from pvcworkerd.Daemon import config
|
||||||
import daemon_lib.common as pvc_common
|
import daemon_lib.common as pvc_common
|
||||||
import daemon_lib.ceph as pvc_ceph
|
import daemon_lib.ceph as pvc_ceph
|
||||||
|
|
||||||
@ -446,7 +446,7 @@ class VMBuilderScript(VMBuilder):
|
|||||||
|
|
||||||
# Run any imports first
|
# Run any imports first
|
||||||
import os
|
import os
|
||||||
from pvcapid.vmbuilder import chroot
|
from daemon_lib.vmbuilder import chroot
|
||||||
import daemon_lib.common as pvc_common
|
import daemon_lib.common as pvc_common
|
||||||
|
|
||||||
# The directory we mounted things on earlier during prepare(); this could very well
|
# The directory we mounted things on earlier during prepare(); this could very well
|
||||||
@ -692,8 +692,8 @@ GRUB_SERIAL_COMMAND="serial --speed=115200 --unit=0 --word=8 --parity=no --stop=
|
|||||||
|
|
||||||
# Run any imports first
|
# Run any imports first
|
||||||
import os
|
import os
|
||||||
from pvcapid.vmbuilder import open_zk
|
from daemon_lib.vmbuilder import open_zk
|
||||||
from pvcapid.Daemon import config
|
from pvcworkerd.Daemon import config
|
||||||
import daemon_lib.common as pvc_common
|
import daemon_lib.common as pvc_common
|
||||||
import daemon_lib.ceph as pvc_ceph
|
import daemon_lib.ceph as pvc_ceph
|
||||||
|
|
||||||
|
@ -173,7 +173,7 @@
|
|||||||
|
|
||||||
|
|
||||||
# This import is always required here, as VMBuilder is used by the VMBuilderScript class.
|
# This import is always required here, as VMBuilder is used by the VMBuilderScript class.
|
||||||
from pvcapid.vmbuilder import VMBuilder
|
from daemon_lib.vmbuilder import VMBuilder
|
||||||
|
|
||||||
|
|
||||||
# Set up some variables for later; if you frequently use these tools, you might benefit from
|
# Set up some variables for later; if you frequently use these tools, you might benefit from
|
||||||
@ -243,7 +243,7 @@ class VMBuilderScript(VMBuilder):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# Run any imports first
|
# Run any imports first
|
||||||
import pvcapid.libvirt_schema as libvirt_schema
|
import daemon_lib.libvirt_schema as libvirt_schema
|
||||||
import datetime
|
import datetime
|
||||||
import random
|
import random
|
||||||
|
|
||||||
@ -358,8 +358,8 @@ class VMBuilderScript(VMBuilder):
|
|||||||
|
|
||||||
# Run any imports first; as shown here, you can import anything from the PVC
|
# Run any imports first; as shown here, you can import anything from the PVC
|
||||||
# namespace, as well as (of course) the main Python namespaces
|
# namespace, as well as (of course) the main Python namespaces
|
||||||
from pvcapid.vmbuilder import open_zk
|
from daemon_lib.vmbuilder import open_zk
|
||||||
from pvcapid.Daemon import config
|
from pvcworkerd.Daemon import config
|
||||||
import daemon_lib.common as pvc_common
|
import daemon_lib.common as pvc_common
|
||||||
import daemon_lib.ceph as pvc_ceph
|
import daemon_lib.ceph as pvc_ceph
|
||||||
import json
|
import json
|
||||||
@ -902,8 +902,8 @@ class VMBuilderScript(VMBuilder):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# Run any imports first
|
# Run any imports first
|
||||||
from pvcapid.vmbuilder import open_zk
|
from daemon_lib.vmbuilder import open_zk
|
||||||
from pvcapid.Daemon import config
|
from pvcworkerd.Daemon import config
|
||||||
import daemon_lib.ceph as pvc_ceph
|
import daemon_lib.ceph as pvc_ceph
|
||||||
|
|
||||||
# Use this construct for reversing the list, as the normal reverse() messes with the list
|
# Use this construct for reversing the list, as the normal reverse() messes with the list
|
||||||
|
@ -622,6 +622,110 @@ class API_Status(Resource):
|
|||||||
api.add_resource(API_Status, "/status")
|
api.add_resource(API_Status, "/status")
|
||||||
|
|
||||||
|
|
||||||
|
# /metrics
|
||||||
|
class API_Metrics(Resource):
|
||||||
|
def get(self):
|
||||||
|
"""
|
||||||
|
Return the current PVC cluster status in Prometheus-compatible metrics format and
|
||||||
|
the Ceph cluster metrics as one document.
|
||||||
|
|
||||||
|
Endpoint is unauthenticated to allow metrics exfiltration without having to deal
|
||||||
|
with the Prometheus compatibility later.
|
||||||
|
---
|
||||||
|
tags:
|
||||||
|
- root
|
||||||
|
responses:
|
||||||
|
200:
|
||||||
|
description: OK
|
||||||
|
400:
|
||||||
|
description: Bad request
|
||||||
|
"""
|
||||||
|
cluster_output, cluster_retcode = api_helper.cluster_metrics()
|
||||||
|
ceph_output, ceph_retcode = api_helper.ceph_metrics()
|
||||||
|
|
||||||
|
if cluster_retcode != 200 or ceph_retcode != 200:
|
||||||
|
output = "Error: Failed to obtain data"
|
||||||
|
retcode = 400
|
||||||
|
else:
|
||||||
|
output = cluster_output + ceph_output
|
||||||
|
retcode = 200
|
||||||
|
|
||||||
|
response = flask.make_response(output, retcode)
|
||||||
|
response.mimetype = "text/plain"
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
api.add_resource(API_Metrics, "/metrics")
|
||||||
|
|
||||||
|
|
||||||
|
# /metrics/pvc
|
||||||
|
class API_Metrics_PVC(Resource):
|
||||||
|
def get(self):
|
||||||
|
"""
|
||||||
|
Return the current PVC cluster status in Prometheus-compatible metrics format
|
||||||
|
|
||||||
|
Endpoint is unauthenticated to allow metrics exfiltration without having to deal
|
||||||
|
with the Prometheus compatibility later.
|
||||||
|
---
|
||||||
|
tags:
|
||||||
|
- root
|
||||||
|
responses:
|
||||||
|
200:
|
||||||
|
description: OK
|
||||||
|
400:
|
||||||
|
description: Bad request
|
||||||
|
"""
|
||||||
|
cluster_output, cluster_retcode = api_helper.cluster_metrics()
|
||||||
|
|
||||||
|
if cluster_retcode != 200:
|
||||||
|
output = "Error: Failed to obtain data"
|
||||||
|
retcode = 400
|
||||||
|
else:
|
||||||
|
output = cluster_output
|
||||||
|
retcode = 200
|
||||||
|
|
||||||
|
response = flask.make_response(output, retcode)
|
||||||
|
response.mimetype = "text/plain"
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
api.add_resource(API_Metrics_PVC, "/metrics/pvc")
|
||||||
|
|
||||||
|
|
||||||
|
# /metrics/ceph
|
||||||
|
class API_Metrics_Ceph(Resource):
|
||||||
|
def get(self):
|
||||||
|
"""
|
||||||
|
Return the current PVC Ceph Prometheus metrics
|
||||||
|
|
||||||
|
Proxies a metrics request to the current active MGR, since this is dynamic
|
||||||
|
and can't be controlled by PVC easily.
|
||||||
|
---
|
||||||
|
tags:
|
||||||
|
- root
|
||||||
|
responses:
|
||||||
|
200:
|
||||||
|
description: OK
|
||||||
|
400:
|
||||||
|
description: Bad request
|
||||||
|
"""
|
||||||
|
ceph_output, ceph_retcode = api_helper.ceph_metrics()
|
||||||
|
|
||||||
|
if ceph_retcode != 200:
|
||||||
|
output = "Error: Failed to obtain data"
|
||||||
|
retcode = 400
|
||||||
|
else:
|
||||||
|
output = ceph_output
|
||||||
|
retcode = 200
|
||||||
|
|
||||||
|
response = flask.make_response(output, retcode)
|
||||||
|
response.mimetype = "text/plain"
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
api.add_resource(API_Metrics_Ceph, "/metrics/ceph")
|
||||||
|
|
||||||
|
|
||||||
# /faults
|
# /faults
|
||||||
class API_Faults(Resource):
|
class API_Faults(Resource):
|
||||||
@RequestParser(
|
@RequestParser(
|
||||||
|
@ -23,6 +23,8 @@ import flask
|
|||||||
import json
|
import json
|
||||||
import lxml.etree as etree
|
import lxml.etree as etree
|
||||||
|
|
||||||
|
from re import match
|
||||||
|
from requests import get
|
||||||
from werkzeug.formparser import parse_form_data
|
from werkzeug.formparser import parse_form_data
|
||||||
|
|
||||||
from pvcapid.Daemon import config, strtobool
|
from pvcapid.Daemon import config, strtobool
|
||||||
@ -119,6 +121,222 @@ def cluster_maintenance(zkhandler, maint_state="false"):
|
|||||||
return retdata, retcode
|
return retdata, retcode
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# Metrics functions
|
||||||
|
#
|
||||||
|
@pvc_common.Profiler(config)
|
||||||
|
@ZKConnection(config)
|
||||||
|
def cluster_metrics(zkhandler):
|
||||||
|
"""
|
||||||
|
Format status data from cluster_status into Prometheus-compatible metrics
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Get general cluster information
|
||||||
|
status_retflag, status_data = pvc_cluster.get_info(zkhandler)
|
||||||
|
if not status_retflag:
|
||||||
|
return "Error: Status data threw error", 400
|
||||||
|
|
||||||
|
faults_retflag, faults_data = pvc_faults.get_list(zkhandler)
|
||||||
|
if not faults_retflag:
|
||||||
|
return "Error: Faults data threw error", 400
|
||||||
|
|
||||||
|
node_retflag, node_data = pvc_node.get_list(zkhandler)
|
||||||
|
if not node_retflag:
|
||||||
|
return "Error: Node data threw error", 400
|
||||||
|
|
||||||
|
vm_retflag, vm_data = pvc_vm.get_list(zkhandler)
|
||||||
|
if not vm_retflag:
|
||||||
|
return "Error: VM data threw error", 400
|
||||||
|
|
||||||
|
osd_retflag, osd_data = pvc_ceph.get_list_osd(zkhandler)
|
||||||
|
if not osd_retflag:
|
||||||
|
return "Error: OSD data threw error", 400
|
||||||
|
|
||||||
|
output_lines = list()
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_info PVC cluster information")
|
||||||
|
output_lines.append("# TYPE pvc_info gauge")
|
||||||
|
output_lines.append(
|
||||||
|
f"pvc_info{{primary_node=\"{status_data['primary_node']}\", version=\"{status_data['pvc_version']}\", upstream_ip=\"{status_data['upstream_ip']}\"}} 1"
|
||||||
|
)
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_cluster_maintenance PVC cluster maintenance state")
|
||||||
|
output_lines.append("# TYPE pvc_cluster_maintenance gauge")
|
||||||
|
output_lines.append(
|
||||||
|
f"pvc_cluster_maintenance {1 if bool(strtobool(status_data['maintenance'])) else 0}"
|
||||||
|
)
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_cluster_health PVC cluster health status")
|
||||||
|
output_lines.append("# TYPE pvc_cluster_health gauge")
|
||||||
|
output_lines.append(f"pvc_cluster_health {status_data['cluster_health']['health']}")
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_cluster_faults PVC cluster new faults")
|
||||||
|
output_lines.append("# TYPE pvc_cluster_faults gauge")
|
||||||
|
fault_map = dict()
|
||||||
|
for fault_type in pvc_common.fault_state_combinations:
|
||||||
|
fault_map[fault_type] = 0
|
||||||
|
for fault in faults_data:
|
||||||
|
fault_map[fault["status"]] += 1
|
||||||
|
for fault_type in fault_map:
|
||||||
|
output_lines.append(
|
||||||
|
f'pvc_cluster_faults{{status="{fault_type}"}} {fault_map[fault_type]}'
|
||||||
|
)
|
||||||
|
|
||||||
|
# output_lines.append("# HELP pvc_cluster_faults PVC cluster health faults")
|
||||||
|
# output_lines.append("# TYPE pvc_cluster_faults gauge")
|
||||||
|
# for fault_msg in status_data["cluster_health"]["messages"]:
|
||||||
|
# output_lines.append(
|
||||||
|
# f"pvc_cluster_faults{{id=\"{fault_msg['id']}\", message=\"{fault_msg['text']}\"}} {fault_msg['health_delta']}"
|
||||||
|
# )
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_node_health PVC cluster node health status")
|
||||||
|
output_lines.append("# TYPE pvc_node_health gauge")
|
||||||
|
for node in status_data["node_health"]:
|
||||||
|
if isinstance(status_data["node_health"][node]["health"], int):
|
||||||
|
output_lines.append(
|
||||||
|
f"pvc_node_health{{node=\"{node}\"}} {status_data['node_health'][node]['health']}"
|
||||||
|
)
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_node_daemon_states PVC Node daemon state counts")
|
||||||
|
output_lines.append("# TYPE pvc_node_daemon_states gauge")
|
||||||
|
node_daemon_state_map = dict()
|
||||||
|
for state in set([s.split(",")[0] for s in pvc_common.node_state_combinations]):
|
||||||
|
node_daemon_state_map[state] = 0
|
||||||
|
for node in node_data:
|
||||||
|
node_daemon_state_map[node["daemon_state"]] += 1
|
||||||
|
for state in node_daemon_state_map:
|
||||||
|
output_lines.append(
|
||||||
|
f'pvc_node_daemon_states{{state="{state}"}} {node_daemon_state_map[state]}'
|
||||||
|
)
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_node_domain_states PVC Node domain state counts")
|
||||||
|
output_lines.append("# TYPE pvc_node_domain_states gauge")
|
||||||
|
node_domain_state_map = dict()
|
||||||
|
for state in set([s.split(",")[1] for s in pvc_common.node_state_combinations]):
|
||||||
|
node_domain_state_map[state] = 0
|
||||||
|
for node in node_data:
|
||||||
|
node_domain_state_map[node["domain_state"]] += 1
|
||||||
|
for state in node_domain_state_map:
|
||||||
|
output_lines.append(
|
||||||
|
f'pvc_node_domain_states{{state="{state}"}} {node_domain_state_map[state]}'
|
||||||
|
)
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_vm_states PVC VM state counts")
|
||||||
|
output_lines.append("# TYPE pvc_vm_states gauge")
|
||||||
|
vm_state_map = dict()
|
||||||
|
for state in set(pvc_common.vm_state_combinations):
|
||||||
|
vm_state_map[state] = 0
|
||||||
|
for vm in vm_data:
|
||||||
|
vm_state_map[vm["state"]] += 1
|
||||||
|
for state in vm_state_map:
|
||||||
|
output_lines.append(f'pvc_vm_states{{state="{state}"}} {vm_state_map[state]}')
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_osd_up_states PVC OSD up state counts")
|
||||||
|
output_lines.append("# TYPE pvc_osd_up_states gauge")
|
||||||
|
osd_up_state_map = dict()
|
||||||
|
for state in set([s.split(",")[0] for s in pvc_common.ceph_osd_state_combinations]):
|
||||||
|
osd_up_state_map[state] = 0
|
||||||
|
for osd in osd_data:
|
||||||
|
if osd["stats"]["up"] > 0:
|
||||||
|
osd_up_state_map["up"] += 1
|
||||||
|
else:
|
||||||
|
osd_up_state_map["down"] += 1
|
||||||
|
for state in osd_up_state_map:
|
||||||
|
output_lines.append(
|
||||||
|
f'pvc_osd_up_states{{state="{state}"}} {osd_up_state_map[state]}'
|
||||||
|
)
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_osd_in_states PVC OSD in state counts")
|
||||||
|
output_lines.append("# TYPE pvc_osd_in_states gauge")
|
||||||
|
osd_in_state_map = dict()
|
||||||
|
for state in set([s.split(",")[1] for s in pvc_common.ceph_osd_state_combinations]):
|
||||||
|
osd_in_state_map[state] = 0
|
||||||
|
for osd in osd_data:
|
||||||
|
if osd["stats"]["in"] > 0:
|
||||||
|
osd_in_state_map["in"] += 1
|
||||||
|
else:
|
||||||
|
osd_in_state_map["out"] += 1
|
||||||
|
for state in osd_in_state_map:
|
||||||
|
output_lines.append(
|
||||||
|
f'pvc_osd_in_states{{state="{state}"}} {osd_in_state_map[state]}'
|
||||||
|
)
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_nodes PVC Node count")
|
||||||
|
output_lines.append("# TYPE pvc_nodes gauge")
|
||||||
|
output_lines.append(f"pvc_nodes {status_data['nodes']['total']}")
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_vms PVC VM count")
|
||||||
|
output_lines.append("# TYPE pvc_vms gauge")
|
||||||
|
output_lines.append(f"pvc_vms {status_data['vms']['total']}")
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_osds PVC OSD count")
|
||||||
|
output_lines.append("# TYPE pvc_osds gauge")
|
||||||
|
output_lines.append(f"pvc_osds {status_data['osds']['total']}")
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_networks PVC Network count")
|
||||||
|
output_lines.append("# TYPE pvc_networks gauge")
|
||||||
|
output_lines.append(f"pvc_networks {status_data['networks']}")
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_pools PVC Storage Pool count")
|
||||||
|
output_lines.append("# TYPE pvc_pools gauge")
|
||||||
|
output_lines.append(f"pvc_pools {status_data['pools']}")
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_volumes PVC Storage Volume count")
|
||||||
|
output_lines.append("# TYPE pvc_volumes gauge")
|
||||||
|
output_lines.append(f"pvc_volumes {status_data['volumes']}")
|
||||||
|
|
||||||
|
output_lines.append("# HELP pvc_snapshots PVC Storage Snapshot count")
|
||||||
|
output_lines.append("# TYPE pvc_snapshots gauge")
|
||||||
|
output_lines.append(f"pvc_snapshots {status_data['snapshots']}")
|
||||||
|
|
||||||
|
return "\n".join(output_lines) + "\n", 200
|
||||||
|
|
||||||
|
|
||||||
|
@pvc_common.Profiler(config)
|
||||||
|
@ZKConnection(config)
|
||||||
|
def ceph_metrics(zkhandler):
|
||||||
|
"""
|
||||||
|
Obtain current Ceph Prometheus metrics from the active MGR
|
||||||
|
"""
|
||||||
|
# We have to parse out the *name* of the currently active MGR
|
||||||
|
# While the JSON version of the "ceph status" output provides a
|
||||||
|
# URL, this URL is in the backend (i.e. storage) network, which
|
||||||
|
# the API might not have access to. This way, we can connect to
|
||||||
|
# the node name which can be handled however.
|
||||||
|
retcode, retdata = pvc_ceph.get_status(zkhandler)
|
||||||
|
if not retcode:
|
||||||
|
ceph_mgr_node = None
|
||||||
|
else:
|
||||||
|
ceph_data = retdata["ceph_data"]
|
||||||
|
try:
|
||||||
|
ceph_mgr_line = [
|
||||||
|
n for n in ceph_data.split("\n") if match(r"^mgr:", n.strip())
|
||||||
|
][0]
|
||||||
|
ceph_mgr_node = ceph_mgr_line.split()[1].split("(")[0]
|
||||||
|
except Exception:
|
||||||
|
ceph_mgr_node = None
|
||||||
|
|
||||||
|
if ceph_mgr_node is not None:
|
||||||
|
# Get the data from the endpoint
|
||||||
|
# We use the default port of 9283
|
||||||
|
ceph_prometheus_uri = f"http://{ceph_mgr_node}:9283/metrics"
|
||||||
|
response = get(ceph_prometheus_uri)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
output = response.text
|
||||||
|
status_code = 200
|
||||||
|
else:
|
||||||
|
output = (
|
||||||
|
f"Error: Failed to obtain metric data from {ceph_mgr_node} MGR daemon\n"
|
||||||
|
)
|
||||||
|
status_code = 400
|
||||||
|
else:
|
||||||
|
output = "Error: Failed to find an active MGR node\n"
|
||||||
|
status_code = 400
|
||||||
|
|
||||||
|
return output, status_code
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# Fault functions
|
# Fault functions
|
||||||
#
|
#
|
||||||
|
@ -279,7 +279,7 @@ def unset_osd(zkhandler, option):
|
|||||||
return True, 'Unset OSD property "{}".'.format(option)
|
return True, 'Unset OSD property "{}".'.format(option)
|
||||||
|
|
||||||
|
|
||||||
def get_list_osd(zkhandler, limit, is_fuzzy=True):
|
def get_list_osd(zkhandler, limit=None, is_fuzzy=True):
|
||||||
osd_list = []
|
osd_list = []
|
||||||
full_osd_list = zkhandler.children("base.osd")
|
full_osd_list = zkhandler.children("base.osd")
|
||||||
|
|
||||||
@ -472,7 +472,7 @@ def set_pgs_pool(zkhandler, name, pgs):
|
|||||||
return True, f'Set PGs count to {pgs} for RBD pool "{name}".'
|
return True, f'Set PGs count to {pgs} for RBD pool "{name}".'
|
||||||
|
|
||||||
|
|
||||||
def get_list_pool(zkhandler, limit, is_fuzzy=True):
|
def get_list_pool(zkhandler, limit=None, is_fuzzy=True):
|
||||||
full_pool_list = zkhandler.children("base.pool")
|
full_pool_list = zkhandler.children("base.pool")
|
||||||
|
|
||||||
if is_fuzzy and limit:
|
if is_fuzzy and limit:
|
||||||
@ -830,7 +830,7 @@ def unmap_volume(zkhandler, pool, name):
|
|||||||
return True, 'Unmapped RBD volume at "{}".'.format(mapped_volume)
|
return True, 'Unmapped RBD volume at "{}".'.format(mapped_volume)
|
||||||
|
|
||||||
|
|
||||||
def get_list_volume(zkhandler, pool, limit, is_fuzzy=True):
|
def get_list_volume(zkhandler, pool, limit=None, is_fuzzy=True):
|
||||||
if pool and not verifyPool(zkhandler, pool):
|
if pool and not verifyPool(zkhandler, pool):
|
||||||
return False, 'ERROR: No pool with name "{}" is present in the cluster.'.format(
|
return False, 'ERROR: No pool with name "{}" is present in the cluster.'.format(
|
||||||
pool
|
pool
|
||||||
@ -1034,7 +1034,7 @@ def remove_snapshot(zkhandler, pool, volume, name):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_list_snapshot(zkhandler, pool, volume, limit, is_fuzzy=True):
|
def get_list_snapshot(zkhandler, pool, volume, limit=None, is_fuzzy=True):
|
||||||
snapshot_list = []
|
snapshot_list = []
|
||||||
if pool and not verifyPool(zkhandler, pool):
|
if pool and not verifyPool(zkhandler, pool):
|
||||||
return False, 'ERROR: No pool with name "{}" is present in the cluster.'.format(
|
return False, 'ERROR: No pool with name "{}" is present in the cluster.'.format(
|
||||||
|
@ -274,51 +274,9 @@ def getClusterInformation(zkhandler):
|
|||||||
ceph_volume_count = len(ceph_volume_list)
|
ceph_volume_count = len(ceph_volume_list)
|
||||||
ceph_snapshot_count = len(ceph_snapshot_list)
|
ceph_snapshot_count = len(ceph_snapshot_list)
|
||||||
|
|
||||||
# State lists
|
|
||||||
node_state_combinations = [
|
|
||||||
"run,ready",
|
|
||||||
"run,flush",
|
|
||||||
"run,flushed",
|
|
||||||
"run,unflush",
|
|
||||||
"init,ready",
|
|
||||||
"init,flush",
|
|
||||||
"init,flushed",
|
|
||||||
"init,unflush",
|
|
||||||
"stop,ready",
|
|
||||||
"stop,flush",
|
|
||||||
"stop,flushed",
|
|
||||||
"stop,unflush",
|
|
||||||
"dead,ready",
|
|
||||||
"dead,flush",
|
|
||||||
"dead,fence-flush",
|
|
||||||
"dead,flushed",
|
|
||||||
"dead,unflush",
|
|
||||||
"fenced,ready",
|
|
||||||
"fenced,flush",
|
|
||||||
"fenced,flushed",
|
|
||||||
"fenced,unflush",
|
|
||||||
]
|
|
||||||
vm_state_combinations = [
|
|
||||||
"start",
|
|
||||||
"restart",
|
|
||||||
"shutdown",
|
|
||||||
"stop",
|
|
||||||
"disable",
|
|
||||||
"fail",
|
|
||||||
"migrate",
|
|
||||||
"unmigrate",
|
|
||||||
"provision",
|
|
||||||
]
|
|
||||||
ceph_osd_state_combinations = [
|
|
||||||
"up,in",
|
|
||||||
"up,out",
|
|
||||||
"down,in",
|
|
||||||
"down,out",
|
|
||||||
]
|
|
||||||
|
|
||||||
# Format the Node states
|
# Format the Node states
|
||||||
formatted_node_states = {"total": node_count}
|
formatted_node_states = {"total": node_count}
|
||||||
for state in node_state_combinations:
|
for state in common.node_state_combinations:
|
||||||
state_count = 0
|
state_count = 0
|
||||||
for node in node_list:
|
for node in node_list:
|
||||||
node_state = f"{node['daemon_state']},{node['domain_state']}"
|
node_state = f"{node['daemon_state']},{node['domain_state']}"
|
||||||
@ -329,7 +287,7 @@ def getClusterInformation(zkhandler):
|
|||||||
|
|
||||||
# Format the VM states
|
# Format the VM states
|
||||||
formatted_vm_states = {"total": vm_count}
|
formatted_vm_states = {"total": vm_count}
|
||||||
for state in vm_state_combinations:
|
for state in common.vm_state_combinations:
|
||||||
state_count = 0
|
state_count = 0
|
||||||
for vm in vm_list:
|
for vm in vm_list:
|
||||||
if vm["state"] == state:
|
if vm["state"] == state:
|
||||||
@ -341,7 +299,7 @@ def getClusterInformation(zkhandler):
|
|||||||
up_texts = {1: "up", 0: "down"}
|
up_texts = {1: "up", 0: "down"}
|
||||||
in_texts = {1: "in", 0: "out"}
|
in_texts = {1: "in", 0: "out"}
|
||||||
formatted_osd_states = {"total": ceph_osd_count}
|
formatted_osd_states = {"total": ceph_osd_count}
|
||||||
for state in ceph_osd_state_combinations:
|
for state in common.ceph_osd_state_combinations:
|
||||||
state_count = 0
|
state_count = 0
|
||||||
for ceph_osd in ceph_osd_list:
|
for ceph_osd in ceph_osd_list:
|
||||||
ceph_osd_state = f"{up_texts[ceph_osd['stats']['up']]},{in_texts[ceph_osd['stats']['in']]}"
|
ceph_osd_state = f"{up_texts[ceph_osd['stats']['up']]},{in_texts[ceph_osd['stats']['in']]}"
|
||||||
|
@ -34,6 +34,58 @@ from shlex import split as shlex_split
|
|||||||
from functools import wraps
|
from functools import wraps
|
||||||
|
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
# Global Variables
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
|
||||||
|
# State lists
|
||||||
|
fault_state_combinations = [
|
||||||
|
"new",
|
||||||
|
"ack",
|
||||||
|
]
|
||||||
|
node_state_combinations = [
|
||||||
|
"run,ready",
|
||||||
|
"run,flush",
|
||||||
|
"run,flushed",
|
||||||
|
"run,unflush",
|
||||||
|
"init,ready",
|
||||||
|
"init,flush",
|
||||||
|
"init,flushed",
|
||||||
|
"init,unflush",
|
||||||
|
"stop,ready",
|
||||||
|
"stop,flush",
|
||||||
|
"stop,flushed",
|
||||||
|
"stop,unflush",
|
||||||
|
"dead,ready",
|
||||||
|
"dead,flush",
|
||||||
|
"dead,fence-flush",
|
||||||
|
"dead,flushed",
|
||||||
|
"dead,unflush",
|
||||||
|
"fenced,ready",
|
||||||
|
"fenced,flush",
|
||||||
|
"fenced,flushed",
|
||||||
|
"fenced,unflush",
|
||||||
|
]
|
||||||
|
vm_state_combinations = [
|
||||||
|
"start",
|
||||||
|
"restart",
|
||||||
|
"shutdown",
|
||||||
|
"stop",
|
||||||
|
"disable",
|
||||||
|
"fail",
|
||||||
|
"migrate",
|
||||||
|
"unmigrate",
|
||||||
|
"provision",
|
||||||
|
]
|
||||||
|
ceph_osd_state_combinations = [
|
||||||
|
"up,in",
|
||||||
|
"up,out",
|
||||||
|
"down,in",
|
||||||
|
"down,out",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
# Performance Profiler decorator
|
# Performance Profiler decorator
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
@ -21,13 +21,16 @@
|
|||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from hashlib import md5
|
from hashlib import md5
|
||||||
|
from re import sub
|
||||||
|
|
||||||
|
|
||||||
def generate_fault(
|
def generate_fault(
|
||||||
zkhandler, logger, fault_name, fault_time, fault_delta, fault_message
|
zkhandler, logger, fault_name, fault_time, fault_delta, fault_message
|
||||||
):
|
):
|
||||||
# Generate a fault ID from the fault_message and fault_delta
|
# Strip off any "extra" data from the message (things in brackets)
|
||||||
fault_str = f"{fault_name} {fault_delta} {fault_message}"
|
fault_core_message = sub(r"[\(\[].*?[\)\]]", "", fault_message).strip()
|
||||||
|
# Generate a fault ID from the fault_name, fault_delta, and fault_core_message
|
||||||
|
fault_str = f"{fault_name} {fault_delta} {fault_core_message}"
|
||||||
fault_id = str(md5(fault_str.encode("utf-8")).hexdigest())[:8]
|
fault_id = str(md5(fault_str.encode("utf-8")).hexdigest())[:8]
|
||||||
|
|
||||||
# Strip the microseconds off of the fault time; we don't care about that precision
|
# Strip the microseconds off of the fault time; we don't care about that precision
|
||||||
@ -63,6 +66,7 @@ def generate_fault(
|
|||||||
zkhandler.write(
|
zkhandler.write(
|
||||||
[
|
[
|
||||||
(("faults.last_time", fault_id), fault_time),
|
(("faults.last_time", fault_id), fault_time),
|
||||||
|
(("faults.message", fault_id), fault_message),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
# Otherwise, generate a new fault event
|
# Otherwise, generate a new fault event
|
||||||
|
0
api-daemon/pvcapid/libvirt_schema.py → daemon-common/libvirt_schema.py
Executable file → Normal file
0
api-daemon/pvcapid/libvirt_schema.py → daemon-common/libvirt_schema.py
Executable file → Normal file
@ -261,7 +261,7 @@ def get_info(zkhandler, node):
|
|||||||
|
|
||||||
def get_list(
|
def get_list(
|
||||||
zkhandler,
|
zkhandler,
|
||||||
limit,
|
limit=None,
|
||||||
daemon_state=None,
|
daemon_state=None,
|
||||||
coordinator_state=None,
|
coordinator_state=None,
|
||||||
domain_state=None,
|
domain_state=None,
|
||||||
|
@ -335,7 +335,7 @@ def worker_create_vm(
|
|||||||
monitor_list.append("{}.{}".format(monitor, config["storage_domain"]))
|
monitor_list.append("{}.{}".format(monitor, config["storage_domain"]))
|
||||||
vm_data["ceph_monitor_list"] = monitor_list
|
vm_data["ceph_monitor_list"] = monitor_list
|
||||||
vm_data["ceph_monitor_port"] = config["ceph_monitor_port"]
|
vm_data["ceph_monitor_port"] = config["ceph_monitor_port"]
|
||||||
vm_data["ceph_monitor_secret"] = config["ceph_storage_secret_uuid"]
|
vm_data["ceph_monitor_secret"] = config["ceph_secret_uuid"]
|
||||||
|
|
||||||
# Parse the script arguments
|
# Parse the script arguments
|
||||||
script_arguments = dict()
|
script_arguments = dict()
|
||||||
|
8
debian/control
vendored
8
debian/control
vendored
@ -8,7 +8,7 @@ X-Python3-Version: >= 3.7
|
|||||||
|
|
||||||
Package: pvc-daemon-node
|
Package: pvc-daemon-node
|
||||||
Architecture: all
|
Architecture: all
|
||||||
Depends: systemd, pvc-daemon-common, pvc-daemon-health, pvc-daemon-worker, python3-kazoo, python3-psutil, python3-apscheduler, python3-libvirt, python3-psycopg2, python3-dnspython, python3-yaml, python3-distutils, python3-rados, python3-gevent, ipmitool, libvirt-daemon-system, arping, vlan, bridge-utils, dnsmasq, nftables, pdns-server, pdns-backend-pgsql
|
Depends: systemd, pvc-daemon-common, pvc-daemon-health, pvc-daemon-worker, python3-kazoo, python3-psutil, python3-apscheduler, python3-libvirt, python3-psycopg2, python3-dnspython, python3-yaml, python3-distutils, python3-rados, python3-gevent, python3-prometheus-client, ipmitool, libvirt-daemon-system, arping, vlan, bridge-utils, dnsmasq, nftables, pdns-server, pdns-backend-pgsql
|
||||||
Description: Parallel Virtual Cluster node daemon
|
Description: Parallel Virtual Cluster node daemon
|
||||||
A KVM/Zookeeper/Ceph-based VM and private cloud manager
|
A KVM/Zookeeper/Ceph-based VM and private cloud manager
|
||||||
.
|
.
|
||||||
@ -16,7 +16,7 @@ Description: Parallel Virtual Cluster node daemon
|
|||||||
|
|
||||||
Package: pvc-daemon-health
|
Package: pvc-daemon-health
|
||||||
Architecture: all
|
Architecture: all
|
||||||
Depends: systemd, pvc-daemon-common, python3-kazoo, python3-psutil, python3-apscheduler, python3-yaml
|
Depends: systemd, pvc-daemon-common, python3-kazoo, python3-psutil, python3-apscheduler, python3-yaml, python3-prometheus-client
|
||||||
Description: Parallel Virtual Cluster health daemon
|
Description: Parallel Virtual Cluster health daemon
|
||||||
A KVM/Zookeeper/Ceph-based VM and private cloud manager
|
A KVM/Zookeeper/Ceph-based VM and private cloud manager
|
||||||
.
|
.
|
||||||
@ -24,7 +24,7 @@ Description: Parallel Virtual Cluster health daemon
|
|||||||
|
|
||||||
Package: pvc-daemon-worker
|
Package: pvc-daemon-worker
|
||||||
Architecture: all
|
Architecture: all
|
||||||
Depends: systemd, pvc-daemon-common, python3-kazoo, python3-celery, python3-redis, python3-yaml, python-celery-common, fio
|
Depends: systemd, pvc-daemon-common, python3-kazoo, python3-celery, python3-redis, python3-yaml, python3-prometheus-client, python-celery-common, fio
|
||||||
Description: Parallel Virtual Cluster worker daemon
|
Description: Parallel Virtual Cluster worker daemon
|
||||||
A KVM/Zookeeper/Ceph-based VM and private cloud manager
|
A KVM/Zookeeper/Ceph-based VM and private cloud manager
|
||||||
.
|
.
|
||||||
@ -32,7 +32,7 @@ Description: Parallel Virtual Cluster worker daemon
|
|||||||
|
|
||||||
Package: pvc-daemon-api
|
Package: pvc-daemon-api
|
||||||
Architecture: all
|
Architecture: all
|
||||||
Depends: systemd, pvc-daemon-common, python3-yaml, python3-flask, python3-flask-restful, python3-celery, python3-distutils, python3-redis, python3-lxml, python3-flask-migrate
|
Depends: systemd, pvc-daemon-common, python3-yaml, python3-flask, python3-flask-restful, python3-celery, python3-distutils, python3-redis, python3-lxml, python3-flask-migrate, python3-prometheus-client
|
||||||
Description: Parallel Virtual Cluster API daemon
|
Description: Parallel Virtual Cluster API daemon
|
||||||
A KVM/Zookeeper/Ceph-based VM and private cloud manager
|
A KVM/Zookeeper/Ceph-based VM and private cloud manager
|
||||||
.
|
.
|
||||||
|
Reference in New Issue
Block a user