Compare commits

...

16 Commits

Author SHA1 Message Date
4ca2381077 Rework metrics output and add combined endpoint 2023-12-09 15:47:40 -05:00
4003204f14 Remove bracketed text from fault_str
This ensures that certain faults e.g. Ceph status faults, will be
combined despite the added text in brackets, while still keeping them
mostly separate.

Also ensure the health text is updated each time to assist with this, as
this health text may now change independent of the fault ID.
2023-12-09 15:34:18 -05:00
a70c1d63b0 Separate state totals from states, separate states 2023-12-09 13:59:17 -05:00
2bea78d25e Make all remaining limits optional 2023-12-09 13:43:58 -05:00
fd717b702d Use external list of fault states 2023-12-09 12:51:41 -05:00
132cde5591 Add totals and nice-format states
Avoids tons of annoying rewriting in the UI later.
2023-12-09 12:50:19 -05:00
ba565ead4c Report all state combinations in Prom metrics
Ensures that every state combination is always shown to metrics, even if
it contains 0 entries.
2023-12-09 12:40:37 -05:00
317ca4b98c Move defined state combinations into common 2023-12-09 12:36:32 -05:00
2b8abea8df Remove debug printing 2023-12-09 12:22:36 -05:00
9b3c9f1be5 Add Ceph metrics proxy and health fault counts 2023-12-09 12:22:36 -05:00
7373bfed3f Add Prometheus metric exporter
Adds a "fake" Prometheus metrics endpoint which returns cluster status
information in Prometheus format.
2023-12-09 12:22:36 -05:00
d0e7c19602 Add prometheus client dependencies 2023-12-09 12:22:36 -05:00
f01c12c86b Import from pvcworkerd not pvcapid 2023-12-09 12:22:19 -05:00
0bda095571 Move libvirt_schema and fix other imports 2023-12-09 12:20:29 -05:00
7976e1d2d0 Correct import location in scripts 2023-12-09 12:18:33 -05:00
813aef1463 Fix incorrect UUID key name 2023-12-09 12:14:57 -05:00
15 changed files with 425 additions and 89 deletions

View File

@ -147,7 +147,7 @@
# This import is always required here, as VMBuilder is used by the VMBuilderScript class. # This import is always required here, as VMBuilder is used by the VMBuilderScript class.
from pvcapid.vmbuilder import VMBuilder from daemon_lib.vmbuilder import VMBuilder
# The VMBuilderScript class must be named as such, and extend VMBuilder. # The VMBuilderScript class must be named as such, and extend VMBuilder.
@ -174,7 +174,7 @@ class VMBuilderScript(VMBuilder):
""" """
# Run any imports first # Run any imports first
import pvcapid.libvirt_schema as libvirt_schema import daemon_lib.libvirt_schema as libvirt_schema
import datetime import datetime
import random import random

View File

@ -148,7 +148,7 @@
# This import is always required here, as VMBuilder is used by the VMBuilderScript class. # This import is always required here, as VMBuilder is used by the VMBuilderScript class.
from pvcapid.vmbuilder import VMBuilder from daemon_lib.vmbuilder import VMBuilder
# The VMBuilderScript class must be named as such, and extend VMBuilder. # The VMBuilderScript class must be named as such, and extend VMBuilder.
@ -177,7 +177,7 @@ class VMBuilderScript(VMBuilder):
""" """
# Run any imports first # Run any imports first
import pvcapid.libvirt_schema as libvirt_schema import daemon_lib.libvirt_schema as libvirt_schema
import datetime import datetime
import random import random
@ -289,8 +289,8 @@ class VMBuilderScript(VMBuilder):
""" """
# Run any imports first # Run any imports first
from pvcapid.vmbuilder import open_zk from daemon_lib.vmbuilder import open_zk
from pvcapid.Daemon import config from pvcworkerd.Daemon import config
import daemon_lib.common as pvc_common import daemon_lib.common as pvc_common
import daemon_lib.ceph as pvc_ceph import daemon_lib.ceph as pvc_ceph
import os import os
@ -383,8 +383,8 @@ class VMBuilderScript(VMBuilder):
""" """
# Run any imports first # Run any imports first
from pvcapid.vmbuilder import open_zk from daemon_lib.vmbuilder import open_zk
from pvcapid.Daemon import config from pvcworkerd.Daemon import config
import daemon_lib.ceph as pvc_ceph import daemon_lib.ceph as pvc_ceph
for volume in list(reversed(self.vm_data["volumes"])): for volume in list(reversed(self.vm_data["volumes"])):

View File

@ -147,7 +147,7 @@
# This import is always required here, as VMBuilder is used by the VMBuilderScript class. # This import is always required here, as VMBuilder is used by the VMBuilderScript class.
from pvcapid.vmbuilder import VMBuilder from daemon_lib.vmbuilder import VMBuilder
# The VMBuilderScript class must be named as such, and extend VMBuilder. # The VMBuilderScript class must be named as such, and extend VMBuilder.
@ -186,7 +186,7 @@ class VMBuilderScript(VMBuilder):
""" """
# Run any imports first # Run any imports first
import pvcapid.libvirt_schema as libvirt_schema import daemon_lib.libvirt_schema as libvirt_schema
import datetime import datetime
import random import random
@ -301,16 +301,16 @@ class VMBuilderScript(VMBuilder):
This function should use the various exposed PVC commands as indicated to create This function should use the various exposed PVC commands as indicated to create
RBD block devices and map them to the host as required. RBD block devices and map them to the host as required.
open_zk is exposed from pvcapid.vmbuilder to provide a context manager for opening open_zk is exposed from daemon_lib.vmbuilder to provide a context manager for opening
connections to the PVC Zookeeper cluster; ensure you also import (and pass it) connections to the PVC Zookeeper cluster; ensure you also import (and pass it)
the config object from pvcapid.Daemon as well. This context manager then allows the config object from pvcworkerd.Daemon as well. This context manager then allows
the use of various common daemon library functions, without going through the API. the use of various common daemon library functions, without going through the API.
""" """
# Run any imports first # Run any imports first
import os import os
from pvcapid.vmbuilder import open_zk from daemon_lib.vmbuilder import open_zk
from pvcapid.Daemon import config from pvcworkerd.Daemon import config
import daemon_lib.common as pvc_common import daemon_lib.common as pvc_common
import daemon_lib.ceph as pvc_ceph import daemon_lib.ceph as pvc_ceph
@ -446,7 +446,7 @@ class VMBuilderScript(VMBuilder):
# Run any imports first # Run any imports first
import os import os
from pvcapid.vmbuilder import chroot from daemon_lib.vmbuilder import chroot
# The directory we mounted things on earlier during prepare(); this could very well # The directory we mounted things on earlier during prepare(); this could very well
# be exposed as a module-level variable if you so choose # be exposed as a module-level variable if you so choose
@ -718,8 +718,8 @@ GRUB_DISABLE_LINUX_UUID=false
# Run any imports first # Run any imports first
import os import os
from pvcapid.vmbuilder import open_zk from daemon_lib.vmbuilder import open_zk
from pvcapid.Daemon import config from pvcworkerd.Daemon import config
import daemon_lib.common as pvc_common import daemon_lib.common as pvc_common
import daemon_lib.ceph as pvc_ceph import daemon_lib.ceph as pvc_ceph

View File

@ -147,7 +147,7 @@
# This import is always required here, as VMBuilder is used by the VMBuilderScript class. # This import is always required here, as VMBuilder is used by the VMBuilderScript class.
from pvcapid.vmbuilder import VMBuilder from daemon_lib.vmbuilder import VMBuilder
# The VMBuilderScript class must be named as such, and extend VMBuilder. # The VMBuilderScript class must be named as such, and extend VMBuilder.
@ -186,7 +186,7 @@ class VMBuilderScript(VMBuilder):
""" """
# Run any imports first # Run any imports first
import pvcapid.libvirt_schema as libvirt_schema import daemon_lib.libvirt_schema as libvirt_schema
import datetime import datetime
import random import random
@ -301,16 +301,16 @@ class VMBuilderScript(VMBuilder):
This function should use the various exposed PVC commands as indicated to create This function should use the various exposed PVC commands as indicated to create
RBD block devices and map them to the host as required. RBD block devices and map them to the host as required.
open_zk is exposed from pvcapid.vmbuilder to provide a context manager for opening open_zk is exposed from daemon_lib.vmbuilder to provide a context manager for opening
connections to the PVC Zookeeper cluster; ensure you also import (and pass it) connections to the PVC Zookeeper cluster; ensure you also import (and pass it)
the config object from pvcapid.Daemon as well. This context manager then allows the config object from pvcworkerd.Daemon as well. This context manager then allows
the use of various common daemon library functions, without going through the API. the use of various common daemon library functions, without going through the API.
""" """
# Run any imports first # Run any imports first
import os import os
from pvcapid.vmbuilder import open_zk from daemon_lib.vmbuilder import open_zk
from pvcapid.Daemon import config from pvcworkerd.Daemon import config
import daemon_lib.common as pvc_common import daemon_lib.common as pvc_common
import daemon_lib.ceph as pvc_ceph import daemon_lib.ceph as pvc_ceph
@ -446,7 +446,7 @@ class VMBuilderScript(VMBuilder):
# Run any imports first # Run any imports first
import os import os
from pvcapid.vmbuilder import chroot from daemon_lib.vmbuilder import chroot
import daemon_lib.common as pvc_common import daemon_lib.common as pvc_common
# The directory we mounted things on earlier during prepare(); this could very well # The directory we mounted things on earlier during prepare(); this could very well
@ -692,8 +692,8 @@ GRUB_SERIAL_COMMAND="serial --speed=115200 --unit=0 --word=8 --parity=no --stop=
# Run any imports first # Run any imports first
import os import os
from pvcapid.vmbuilder import open_zk from daemon_lib.vmbuilder import open_zk
from pvcapid.Daemon import config from pvcworkerd.Daemon import config
import daemon_lib.common as pvc_common import daemon_lib.common as pvc_common
import daemon_lib.ceph as pvc_ceph import daemon_lib.ceph as pvc_ceph

View File

@ -173,7 +173,7 @@
# This import is always required here, as VMBuilder is used by the VMBuilderScript class. # This import is always required here, as VMBuilder is used by the VMBuilderScript class.
from pvcapid.vmbuilder import VMBuilder from daemon_lib.vmbuilder import VMBuilder
# Set up some variables for later; if you frequently use these tools, you might benefit from # Set up some variables for later; if you frequently use these tools, you might benefit from
@ -243,7 +243,7 @@ class VMBuilderScript(VMBuilder):
""" """
# Run any imports first # Run any imports first
import pvcapid.libvirt_schema as libvirt_schema import daemon_lib.libvirt_schema as libvirt_schema
import datetime import datetime
import random import random
@ -358,8 +358,8 @@ class VMBuilderScript(VMBuilder):
# Run any imports first; as shown here, you can import anything from the PVC # Run any imports first; as shown here, you can import anything from the PVC
# namespace, as well as (of course) the main Python namespaces # namespace, as well as (of course) the main Python namespaces
from pvcapid.vmbuilder import open_zk from daemon_lib.vmbuilder import open_zk
from pvcapid.Daemon import config from pvcworkerd.Daemon import config
import daemon_lib.common as pvc_common import daemon_lib.common as pvc_common
import daemon_lib.ceph as pvc_ceph import daemon_lib.ceph as pvc_ceph
import json import json
@ -902,8 +902,8 @@ class VMBuilderScript(VMBuilder):
""" """
# Run any imports first # Run any imports first
from pvcapid.vmbuilder import open_zk from daemon_lib.vmbuilder import open_zk
from pvcapid.Daemon import config from pvcworkerd.Daemon import config
import daemon_lib.ceph as pvc_ceph import daemon_lib.ceph as pvc_ceph
# Use this construct for reversing the list, as the normal reverse() messes with the list # Use this construct for reversing the list, as the normal reverse() messes with the list

View File

@ -622,6 +622,110 @@ class API_Status(Resource):
api.add_resource(API_Status, "/status") api.add_resource(API_Status, "/status")
# /metrics
class API_Metrics(Resource):
def get(self):
"""
Return the current PVC cluster status in Prometheus-compatible metrics format and
the Ceph cluster metrics as one document.
Endpoint is unauthenticated to allow metrics exfiltration without having to deal
with the Prometheus compatibility later.
---
tags:
- root
responses:
200:
description: OK
400:
description: Bad request
"""
cluster_output, cluster_retcode = api_helper.cluster_metrics()
ceph_output, ceph_retcode = api_helper.ceph_metrics()
if cluster_retcode != 200 or ceph_retcode != 200:
output = "Error: Failed to obtain data"
retcode = 400
else:
output = cluster_output + ceph_output
retcode = 200
response = flask.make_response(output, retcode)
response.mimetype = "text/plain"
return response
api.add_resource(API_Metrics, "/metrics")
# /metrics/pvc
class API_Metrics_PVC(Resource):
def get(self):
"""
Return the current PVC cluster status in Prometheus-compatible metrics format
Endpoint is unauthenticated to allow metrics exfiltration without having to deal
with the Prometheus compatibility later.
---
tags:
- root
responses:
200:
description: OK
400:
description: Bad request
"""
cluster_output, cluster_retcode = api_helper.cluster_metrics()
if cluster_retcode != 200:
output = "Error: Failed to obtain data"
retcode = 400
else:
output = cluster_output
retcode = 200
response = flask.make_response(output, retcode)
response.mimetype = "text/plain"
return response
api.add_resource(API_Metrics_PVC, "/metrics/pvc")
# /metrics/ceph
class API_Metrics_Ceph(Resource):
def get(self):
"""
Return the current PVC Ceph Prometheus metrics
Proxies a metrics request to the current active MGR, since this is dynamic
and can't be controlled by PVC easily.
---
tags:
- root
responses:
200:
description: OK
400:
description: Bad request
"""
ceph_output, ceph_retcode = api_helper.ceph_metrics()
if ceph_retcode != 200:
output = "Error: Failed to obtain data"
retcode = 400
else:
output = ceph_output
retcode = 200
response = flask.make_response(output, retcode)
response.mimetype = "text/plain"
return response
api.add_resource(API_Metrics_Ceph, "/metrics/ceph")
# /faults # /faults
class API_Faults(Resource): class API_Faults(Resource):
@RequestParser( @RequestParser(

View File

@ -23,6 +23,8 @@ import flask
import json import json
import lxml.etree as etree import lxml.etree as etree
from re import match
from requests import get
from werkzeug.formparser import parse_form_data from werkzeug.formparser import parse_form_data
from pvcapid.Daemon import config, strtobool from pvcapid.Daemon import config, strtobool
@ -119,6 +121,222 @@ def cluster_maintenance(zkhandler, maint_state="false"):
return retdata, retcode return retdata, retcode
#
# Metrics functions
#
@pvc_common.Profiler(config)
@ZKConnection(config)
def cluster_metrics(zkhandler):
"""
Format status data from cluster_status into Prometheus-compatible metrics
"""
# Get general cluster information
status_retflag, status_data = pvc_cluster.get_info(zkhandler)
if not status_retflag:
return "Error: Status data threw error", 400
faults_retflag, faults_data = pvc_faults.get_list(zkhandler)
if not faults_retflag:
return "Error: Faults data threw error", 400
node_retflag, node_data = pvc_node.get_list(zkhandler)
if not node_retflag:
return "Error: Node data threw error", 400
vm_retflag, vm_data = pvc_vm.get_list(zkhandler)
if not vm_retflag:
return "Error: VM data threw error", 400
osd_retflag, osd_data = pvc_ceph.get_list_osd(zkhandler)
if not osd_retflag:
return "Error: OSD data threw error", 400
output_lines = list()
output_lines.append("# HELP pvc_info PVC cluster information")
output_lines.append("# TYPE pvc_info gauge")
output_lines.append(
f"pvc_info{{primary_node=\"{status_data['primary_node']}\", version=\"{status_data['pvc_version']}\", upstream_ip=\"{status_data['upstream_ip']}\"}} 1"
)
output_lines.append("# HELP pvc_cluster_maintenance PVC cluster maintenance state")
output_lines.append("# TYPE pvc_cluster_maintenance gauge")
output_lines.append(
f"pvc_cluster_maintenance {1 if bool(strtobool(status_data['maintenance'])) else 0}"
)
output_lines.append("# HELP pvc_cluster_health PVC cluster health status")
output_lines.append("# TYPE pvc_cluster_health gauge")
output_lines.append(f"pvc_cluster_health {status_data['cluster_health']['health']}")
output_lines.append("# HELP pvc_cluster_faults PVC cluster new faults")
output_lines.append("# TYPE pvc_cluster_faults gauge")
fault_map = dict()
for fault_type in pvc_common.fault_state_combinations:
fault_map[fault_type] = 0
for fault in faults_data:
fault_map[fault["status"]] += 1
for fault_type in fault_map:
output_lines.append(
f'pvc_cluster_faults{{status="{fault_type}"}} {fault_map[fault_type]}'
)
# output_lines.append("# HELP pvc_cluster_faults PVC cluster health faults")
# output_lines.append("# TYPE pvc_cluster_faults gauge")
# for fault_msg in status_data["cluster_health"]["messages"]:
# output_lines.append(
# f"pvc_cluster_faults{{id=\"{fault_msg['id']}\", message=\"{fault_msg['text']}\"}} {fault_msg['health_delta']}"
# )
output_lines.append("# HELP pvc_node_health PVC cluster node health status")
output_lines.append("# TYPE pvc_node_health gauge")
for node in status_data["node_health"]:
if isinstance(status_data["node_health"][node]["health"], int):
output_lines.append(
f"pvc_node_health{{node=\"{node}\"}} {status_data['node_health'][node]['health']}"
)
output_lines.append("# HELP pvc_node_daemon_states PVC Node daemon state counts")
output_lines.append("# TYPE pvc_node_daemon_states gauge")
node_daemon_state_map = dict()
for state in set([s.split(",")[0] for s in pvc_common.node_state_combinations]):
node_daemon_state_map[state] = 0
for node in node_data:
node_daemon_state_map[node["daemon_state"]] += 1
for state in node_daemon_state_map:
output_lines.append(
f'pvc_node_daemon_states{{state="{state}"}} {node_daemon_state_map[state]}'
)
output_lines.append("# HELP pvc_node_domain_states PVC Node domain state counts")
output_lines.append("# TYPE pvc_node_domain_states gauge")
node_domain_state_map = dict()
for state in set([s.split(",")[1] for s in pvc_common.node_state_combinations]):
node_domain_state_map[state] = 0
for node in node_data:
node_domain_state_map[node["domain_state"]] += 1
for state in node_domain_state_map:
output_lines.append(
f'pvc_node_domain_states{{state="{state}"}} {node_domain_state_map[state]}'
)
output_lines.append("# HELP pvc_vm_states PVC VM state counts")
output_lines.append("# TYPE pvc_vm_states gauge")
vm_state_map = dict()
for state in set(pvc_common.vm_state_combinations):
vm_state_map[state] = 0
for vm in vm_data:
vm_state_map[vm["state"]] += 1
for state in vm_state_map:
output_lines.append(f'pvc_vm_states{{state="{state}"}} {vm_state_map[state]}')
output_lines.append("# HELP pvc_osd_up_states PVC OSD up state counts")
output_lines.append("# TYPE pvc_osd_up_states gauge")
osd_up_state_map = dict()
for state in set([s.split(",")[0] for s in pvc_common.ceph_osd_state_combinations]):
osd_up_state_map[state] = 0
for osd in osd_data:
if osd["stats"]["up"] > 0:
osd_up_state_map["up"] += 1
else:
osd_up_state_map["down"] += 1
for state in osd_up_state_map:
output_lines.append(
f'pvc_osd_up_states{{state="{state}"}} {osd_up_state_map[state]}'
)
output_lines.append("# HELP pvc_osd_in_states PVC OSD in state counts")
output_lines.append("# TYPE pvc_osd_in_states gauge")
osd_in_state_map = dict()
for state in set([s.split(",")[1] for s in pvc_common.ceph_osd_state_combinations]):
osd_in_state_map[state] = 0
for osd in osd_data:
if osd["stats"]["in"] > 0:
osd_in_state_map["in"] += 1
else:
osd_in_state_map["out"] += 1
for state in osd_in_state_map:
output_lines.append(
f'pvc_osd_in_states{{state="{state}"}} {osd_in_state_map[state]}'
)
output_lines.append("# HELP pvc_nodes PVC Node count")
output_lines.append("# TYPE pvc_nodes gauge")
output_lines.append(f"pvc_nodes {status_data['nodes']['total']}")
output_lines.append("# HELP pvc_vms PVC VM count")
output_lines.append("# TYPE pvc_vms gauge")
output_lines.append(f"pvc_vms {status_data['vms']['total']}")
output_lines.append("# HELP pvc_osds PVC OSD count")
output_lines.append("# TYPE pvc_osds gauge")
output_lines.append(f"pvc_osds {status_data['osds']['total']}")
output_lines.append("# HELP pvc_networks PVC Network count")
output_lines.append("# TYPE pvc_networks gauge")
output_lines.append(f"pvc_networks {status_data['networks']}")
output_lines.append("# HELP pvc_pools PVC Storage Pool count")
output_lines.append("# TYPE pvc_pools gauge")
output_lines.append(f"pvc_pools {status_data['pools']}")
output_lines.append("# HELP pvc_volumes PVC Storage Volume count")
output_lines.append("# TYPE pvc_volumes gauge")
output_lines.append(f"pvc_volumes {status_data['volumes']}")
output_lines.append("# HELP pvc_snapshots PVC Storage Snapshot count")
output_lines.append("# TYPE pvc_snapshots gauge")
output_lines.append(f"pvc_snapshots {status_data['snapshots']}")
return "\n".join(output_lines) + "\n", 200
@pvc_common.Profiler(config)
@ZKConnection(config)
def ceph_metrics(zkhandler):
"""
Obtain current Ceph Prometheus metrics from the active MGR
"""
# We have to parse out the *name* of the currently active MGR
# While the JSON version of the "ceph status" output provides a
# URL, this URL is in the backend (i.e. storage) network, which
# the API might not have access to. This way, we can connect to
# the node name which can be handled however.
retcode, retdata = pvc_ceph.get_status(zkhandler)
if not retcode:
ceph_mgr_node = None
else:
ceph_data = retdata["ceph_data"]
try:
ceph_mgr_line = [
n for n in ceph_data.split("\n") if match(r"^mgr:", n.strip())
][0]
ceph_mgr_node = ceph_mgr_line.split()[1].split("(")[0]
except Exception:
ceph_mgr_node = None
if ceph_mgr_node is not None:
# Get the data from the endpoint
# We use the default port of 9283
ceph_prometheus_uri = f"http://{ceph_mgr_node}:9283/metrics"
response = get(ceph_prometheus_uri)
if response.status_code == 200:
output = response.text
status_code = 200
else:
output = (
f"Error: Failed to obtain metric data from {ceph_mgr_node} MGR daemon\n"
)
status_code = 400
else:
output = "Error: Failed to find an active MGR node\n"
status_code = 400
return output, status_code
# #
# Fault functions # Fault functions
# #

View File

@ -279,7 +279,7 @@ def unset_osd(zkhandler, option):
return True, 'Unset OSD property "{}".'.format(option) return True, 'Unset OSD property "{}".'.format(option)
def get_list_osd(zkhandler, limit, is_fuzzy=True): def get_list_osd(zkhandler, limit=None, is_fuzzy=True):
osd_list = [] osd_list = []
full_osd_list = zkhandler.children("base.osd") full_osd_list = zkhandler.children("base.osd")
@ -472,7 +472,7 @@ def set_pgs_pool(zkhandler, name, pgs):
return True, f'Set PGs count to {pgs} for RBD pool "{name}".' return True, f'Set PGs count to {pgs} for RBD pool "{name}".'
def get_list_pool(zkhandler, limit, is_fuzzy=True): def get_list_pool(zkhandler, limit=None, is_fuzzy=True):
full_pool_list = zkhandler.children("base.pool") full_pool_list = zkhandler.children("base.pool")
if is_fuzzy and limit: if is_fuzzy and limit:
@ -830,7 +830,7 @@ def unmap_volume(zkhandler, pool, name):
return True, 'Unmapped RBD volume at "{}".'.format(mapped_volume) return True, 'Unmapped RBD volume at "{}".'.format(mapped_volume)
def get_list_volume(zkhandler, pool, limit, is_fuzzy=True): def get_list_volume(zkhandler, pool, limit=None, is_fuzzy=True):
if pool and not verifyPool(zkhandler, pool): if pool and not verifyPool(zkhandler, pool):
return False, 'ERROR: No pool with name "{}" is present in the cluster.'.format( return False, 'ERROR: No pool with name "{}" is present in the cluster.'.format(
pool pool
@ -1034,7 +1034,7 @@ def remove_snapshot(zkhandler, pool, volume, name):
) )
def get_list_snapshot(zkhandler, pool, volume, limit, is_fuzzy=True): def get_list_snapshot(zkhandler, pool, volume, limit=None, is_fuzzy=True):
snapshot_list = [] snapshot_list = []
if pool and not verifyPool(zkhandler, pool): if pool and not verifyPool(zkhandler, pool):
return False, 'ERROR: No pool with name "{}" is present in the cluster.'.format( return False, 'ERROR: No pool with name "{}" is present in the cluster.'.format(

View File

@ -274,51 +274,9 @@ def getClusterInformation(zkhandler):
ceph_volume_count = len(ceph_volume_list) ceph_volume_count = len(ceph_volume_list)
ceph_snapshot_count = len(ceph_snapshot_list) ceph_snapshot_count = len(ceph_snapshot_list)
# State lists
node_state_combinations = [
"run,ready",
"run,flush",
"run,flushed",
"run,unflush",
"init,ready",
"init,flush",
"init,flushed",
"init,unflush",
"stop,ready",
"stop,flush",
"stop,flushed",
"stop,unflush",
"dead,ready",
"dead,flush",
"dead,fence-flush",
"dead,flushed",
"dead,unflush",
"fenced,ready",
"fenced,flush",
"fenced,flushed",
"fenced,unflush",
]
vm_state_combinations = [
"start",
"restart",
"shutdown",
"stop",
"disable",
"fail",
"migrate",
"unmigrate",
"provision",
]
ceph_osd_state_combinations = [
"up,in",
"up,out",
"down,in",
"down,out",
]
# Format the Node states # Format the Node states
formatted_node_states = {"total": node_count} formatted_node_states = {"total": node_count}
for state in node_state_combinations: for state in common.node_state_combinations:
state_count = 0 state_count = 0
for node in node_list: for node in node_list:
node_state = f"{node['daemon_state']},{node['domain_state']}" node_state = f"{node['daemon_state']},{node['domain_state']}"
@ -329,7 +287,7 @@ def getClusterInformation(zkhandler):
# Format the VM states # Format the VM states
formatted_vm_states = {"total": vm_count} formatted_vm_states = {"total": vm_count}
for state in vm_state_combinations: for state in common.vm_state_combinations:
state_count = 0 state_count = 0
for vm in vm_list: for vm in vm_list:
if vm["state"] == state: if vm["state"] == state:
@ -341,7 +299,7 @@ def getClusterInformation(zkhandler):
up_texts = {1: "up", 0: "down"} up_texts = {1: "up", 0: "down"}
in_texts = {1: "in", 0: "out"} in_texts = {1: "in", 0: "out"}
formatted_osd_states = {"total": ceph_osd_count} formatted_osd_states = {"total": ceph_osd_count}
for state in ceph_osd_state_combinations: for state in common.ceph_osd_state_combinations:
state_count = 0 state_count = 0
for ceph_osd in ceph_osd_list: for ceph_osd in ceph_osd_list:
ceph_osd_state = f"{up_texts[ceph_osd['stats']['up']]},{in_texts[ceph_osd['stats']['in']]}" ceph_osd_state = f"{up_texts[ceph_osd['stats']['up']]},{in_texts[ceph_osd['stats']['in']]}"

View File

@ -34,6 +34,58 @@ from shlex import split as shlex_split
from functools import wraps from functools import wraps
###############################################################################
# Global Variables
###############################################################################
# State lists
fault_state_combinations = [
"new",
"ack",
]
node_state_combinations = [
"run,ready",
"run,flush",
"run,flushed",
"run,unflush",
"init,ready",
"init,flush",
"init,flushed",
"init,unflush",
"stop,ready",
"stop,flush",
"stop,flushed",
"stop,unflush",
"dead,ready",
"dead,flush",
"dead,fence-flush",
"dead,flushed",
"dead,unflush",
"fenced,ready",
"fenced,flush",
"fenced,flushed",
"fenced,unflush",
]
vm_state_combinations = [
"start",
"restart",
"shutdown",
"stop",
"disable",
"fail",
"migrate",
"unmigrate",
"provision",
]
ceph_osd_state_combinations = [
"up,in",
"up,out",
"down,in",
"down,out",
]
############################################################################### ###############################################################################
# Performance Profiler decorator # Performance Profiler decorator
############################################################################### ###############################################################################

View File

@ -21,13 +21,16 @@
from datetime import datetime from datetime import datetime
from hashlib import md5 from hashlib import md5
from re import sub
def generate_fault( def generate_fault(
zkhandler, logger, fault_name, fault_time, fault_delta, fault_message zkhandler, logger, fault_name, fault_time, fault_delta, fault_message
): ):
# Generate a fault ID from the fault_message and fault_delta # Strip off any "extra" data from the message (things in brackets)
fault_str = f"{fault_name} {fault_delta} {fault_message}" fault_core_message = sub(r"[\(\[].*?[\)\]]", "", fault_message).strip()
# Generate a fault ID from the fault_name, fault_delta, and fault_core_message
fault_str = f"{fault_name} {fault_delta} {fault_core_message}"
fault_id = str(md5(fault_str.encode("utf-8")).hexdigest())[:8] fault_id = str(md5(fault_str.encode("utf-8")).hexdigest())[:8]
# Strip the microseconds off of the fault time; we don't care about that precision # Strip the microseconds off of the fault time; we don't care about that precision
@ -63,6 +66,7 @@ def generate_fault(
zkhandler.write( zkhandler.write(
[ [
(("faults.last_time", fault_id), fault_time), (("faults.last_time", fault_id), fault_time),
(("faults.message", fault_id), fault_message),
] ]
) )
# Otherwise, generate a new fault event # Otherwise, generate a new fault event

View File

@ -261,7 +261,7 @@ def get_info(zkhandler, node):
def get_list( def get_list(
zkhandler, zkhandler,
limit, limit=None,
daemon_state=None, daemon_state=None,
coordinator_state=None, coordinator_state=None,
domain_state=None, domain_state=None,

View File

@ -335,7 +335,7 @@ def worker_create_vm(
monitor_list.append("{}.{}".format(monitor, config["storage_domain"])) monitor_list.append("{}.{}".format(monitor, config["storage_domain"]))
vm_data["ceph_monitor_list"] = monitor_list vm_data["ceph_monitor_list"] = monitor_list
vm_data["ceph_monitor_port"] = config["ceph_monitor_port"] vm_data["ceph_monitor_port"] = config["ceph_monitor_port"]
vm_data["ceph_monitor_secret"] = config["ceph_storage_secret_uuid"] vm_data["ceph_monitor_secret"] = config["ceph_secret_uuid"]
# Parse the script arguments # Parse the script arguments
script_arguments = dict() script_arguments = dict()

8
debian/control vendored
View File

@ -8,7 +8,7 @@ X-Python3-Version: >= 3.7
Package: pvc-daemon-node Package: pvc-daemon-node
Architecture: all Architecture: all
Depends: systemd, pvc-daemon-common, pvc-daemon-health, pvc-daemon-worker, python3-kazoo, python3-psutil, python3-apscheduler, python3-libvirt, python3-psycopg2, python3-dnspython, python3-yaml, python3-distutils, python3-rados, python3-gevent, ipmitool, libvirt-daemon-system, arping, vlan, bridge-utils, dnsmasq, nftables, pdns-server, pdns-backend-pgsql Depends: systemd, pvc-daemon-common, pvc-daemon-health, pvc-daemon-worker, python3-kazoo, python3-psutil, python3-apscheduler, python3-libvirt, python3-psycopg2, python3-dnspython, python3-yaml, python3-distutils, python3-rados, python3-gevent, python3-prometheus-client, ipmitool, libvirt-daemon-system, arping, vlan, bridge-utils, dnsmasq, nftables, pdns-server, pdns-backend-pgsql
Description: Parallel Virtual Cluster node daemon Description: Parallel Virtual Cluster node daemon
A KVM/Zookeeper/Ceph-based VM and private cloud manager A KVM/Zookeeper/Ceph-based VM and private cloud manager
. .
@ -16,7 +16,7 @@ Description: Parallel Virtual Cluster node daemon
Package: pvc-daemon-health Package: pvc-daemon-health
Architecture: all Architecture: all
Depends: systemd, pvc-daemon-common, python3-kazoo, python3-psutil, python3-apscheduler, python3-yaml Depends: systemd, pvc-daemon-common, python3-kazoo, python3-psutil, python3-apscheduler, python3-yaml, python3-prometheus-client
Description: Parallel Virtual Cluster health daemon Description: Parallel Virtual Cluster health daemon
A KVM/Zookeeper/Ceph-based VM and private cloud manager A KVM/Zookeeper/Ceph-based VM and private cloud manager
. .
@ -24,7 +24,7 @@ Description: Parallel Virtual Cluster health daemon
Package: pvc-daemon-worker Package: pvc-daemon-worker
Architecture: all Architecture: all
Depends: systemd, pvc-daemon-common, python3-kazoo, python3-celery, python3-redis, python3-yaml, python-celery-common, fio Depends: systemd, pvc-daemon-common, python3-kazoo, python3-celery, python3-redis, python3-yaml, python3-prometheus-client, python-celery-common, fio
Description: Parallel Virtual Cluster worker daemon Description: Parallel Virtual Cluster worker daemon
A KVM/Zookeeper/Ceph-based VM and private cloud manager A KVM/Zookeeper/Ceph-based VM and private cloud manager
. .
@ -32,7 +32,7 @@ Description: Parallel Virtual Cluster worker daemon
Package: pvc-daemon-api Package: pvc-daemon-api
Architecture: all Architecture: all
Depends: systemd, pvc-daemon-common, python3-yaml, python3-flask, python3-flask-restful, python3-celery, python3-distutils, python3-redis, python3-lxml, python3-flask-migrate Depends: systemd, pvc-daemon-common, python3-yaml, python3-flask, python3-flask-restful, python3-celery, python3-distutils, python3-redis, python3-lxml, python3-flask-migrate, python3-prometheus-client
Description: Parallel Virtual Cluster API daemon Description: Parallel Virtual Cluster API daemon
A KVM/Zookeeper/Ceph-based VM and private cloud manager A KVM/Zookeeper/Ceph-based VM and private cloud manager
. .