Move fault generation to common library

This commit is contained in:
Joshua Boniface 2023-12-06 13:17:10 -05:00
parent 536fb2080f
commit 79eb54d5da
2 changed files with 65 additions and 55 deletions

View File

@ -20,6 +20,61 @@
###############################################################################
from datetime import datetime
from hashlib import md5
def generate_fault(
zkhandler, logger, fault_name, fault_time, fault_delta, fault_message
):
# Generate a fault ID from the fault_message and fault_delta
fault_str = f"{fault_name} {fault_delta} {fault_message}"
fault_id = str(md5(fault_str.encode("utf-8")).hexdigest())[:8]
# If a fault already exists with this ID, just update the time
if not zkhandler.exists("base.faults"):
logger.out(
f"Skipping fault reporting for {fault_id} due to missing Zookeeper schemas",
state="w",
)
return
existing_faults = zkhandler.children("base.faults")
if fault_id in existing_faults:
logger.out(
f"Updating fault {fault_id}: {fault_message} @ {fault_time}", state="i"
)
else:
logger.out(
f"Generating fault {fault_id}: {fault_message} @ {fault_time}",
state="i",
)
if zkhandler.read("base.config.maintenance") == "true":
logger.out(
f"Skipping fault reporting for {fault_id} due to maintenance mode",
state="w",
)
return
if fault_id in existing_faults:
zkhandler.write(
[
(("faults.last_time", fault_id), str(fault_time)),
]
)
# Otherwise, generate a new fault event
else:
zkhandler.write(
[
(("faults.id", fault_id), ""),
(("faults.first_time", fault_id), str(fault_time)),
(("faults.last_time", fault_id), str(fault_time)),
(("faults.ack_time", fault_id), ""),
(("faults.status", fault_id), "new"),
(("faults.delta", fault_id), fault_delta),
(("faults.message", fault_id), fault_message),
]
)
def getFault(zkhandler, fault_id):

View File

@ -25,10 +25,11 @@ import importlib.util
from os import walk
from datetime import datetime
from hashlib import md5
from json import dumps, loads
from apscheduler.schedulers.background import BackgroundScheduler
from daemon_lib.fault import generate_fault
class PluginError(Exception):
"""
@ -525,57 +526,6 @@ class MonitoringInstance(object):
except Exception:
self.logger.out("Failed to stop monitoring check timer", state="w")
def generate_fault(self, fault_name, fault_time, fault_delta, fault_message):
# Generate a fault ID from the fault_message and fault_delta
fault_str = f"{fault_name} {fault_delta} {fault_message}"
fault_id = str(md5(fault_str.encode("utf-8")).hexdigest())[:8]
# If a fault already exists with this ID, just update the time
if not self.zkhandler.exists("base.faults"):
self.logger.out(
f"Skipping fault reporting for {fault_id} due to missing Zookeeper schemas",
state="w",
)
return
existing_faults = self.zkhandler.children("base.faults")
if fault_id in existing_faults:
self.logger.out(
f"Updating fault {fault_id}: {fault_message} @ {fault_time}", state="i"
)
else:
self.logger.out(
f"Generating fault {fault_id}: {fault_message} @ {fault_time}",
state="i",
)
if self.zkhandler.read("base.config.maintenance") == "true":
self.logger.out(
f"Skipping fault reporting for {fault_id} due to maintenance mode",
state="w",
)
return
if fault_id in existing_faults:
self.zkhandler.write(
[
(("faults.last_time", fault_id), str(fault_time)),
]
)
# Otherwise, generate a new fault event
else:
self.zkhandler.write(
[
(("faults.id", fault_id), ""),
(("faults.first_time", fault_id), str(fault_time)),
(("faults.last_time", fault_id), str(fault_time)),
(("faults.ack_time", fault_id), ""),
(("faults.status", fault_id), "new"),
(("faults.delta", fault_id), fault_delta),
(("faults.message", fault_id), fault_message),
]
)
def run_faults(self):
coordinator_state = self.this_node.coordinator_state
@ -630,8 +580,13 @@ class MonitoringInstance(object):
entry=entry, details=details
)
fault_count += 1
self.generate_fault(
fault_type, fault_time, fault_delta, fault_message
generate_fault(
self.zkhandler,
self.logger,
fault_type,
fault_time,
fault_delta,
fault_message,
)
runtime_end = datetime.now()
@ -716,7 +671,7 @@ class MonitoringInstance(object):
# fault_message = (
# f"{self.this_node.name} {result.plugin_name} {result.message}"
# )
# self.generate_fault(fault_type, fault_time, fault_delta, fault_message)
# generate_fault(self.zkhandler, self.logger, fault_type, fault_time, fault_delta, fault_message)
total_health -= result.health_delta
if total_health < 0: