Move fault generation to common library

This commit is contained in:
Joshua Boniface 2023-12-06 13:17:10 -05:00
parent 536fb2080f
commit 79eb54d5da
2 changed files with 65 additions and 55 deletions

View File

@ -20,6 +20,61 @@
############################################################################### ###############################################################################
from datetime import datetime from datetime import datetime
from hashlib import md5
def generate_fault(
zkhandler, logger, fault_name, fault_time, fault_delta, fault_message
):
# Generate a fault ID from the fault_message and fault_delta
fault_str = f"{fault_name} {fault_delta} {fault_message}"
fault_id = str(md5(fault_str.encode("utf-8")).hexdigest())[:8]
# If a fault already exists with this ID, just update the time
if not zkhandler.exists("base.faults"):
logger.out(
f"Skipping fault reporting for {fault_id} due to missing Zookeeper schemas",
state="w",
)
return
existing_faults = zkhandler.children("base.faults")
if fault_id in existing_faults:
logger.out(
f"Updating fault {fault_id}: {fault_message} @ {fault_time}", state="i"
)
else:
logger.out(
f"Generating fault {fault_id}: {fault_message} @ {fault_time}",
state="i",
)
if zkhandler.read("base.config.maintenance") == "true":
logger.out(
f"Skipping fault reporting for {fault_id} due to maintenance mode",
state="w",
)
return
if fault_id in existing_faults:
zkhandler.write(
[
(("faults.last_time", fault_id), str(fault_time)),
]
)
# Otherwise, generate a new fault event
else:
zkhandler.write(
[
(("faults.id", fault_id), ""),
(("faults.first_time", fault_id), str(fault_time)),
(("faults.last_time", fault_id), str(fault_time)),
(("faults.ack_time", fault_id), ""),
(("faults.status", fault_id), "new"),
(("faults.delta", fault_id), fault_delta),
(("faults.message", fault_id), fault_message),
]
)
def getFault(zkhandler, fault_id): def getFault(zkhandler, fault_id):

View File

@ -25,10 +25,11 @@ import importlib.util
from os import walk from os import walk
from datetime import datetime from datetime import datetime
from hashlib import md5
from json import dumps, loads from json import dumps, loads
from apscheduler.schedulers.background import BackgroundScheduler from apscheduler.schedulers.background import BackgroundScheduler
from daemon_lib.fault import generate_fault
class PluginError(Exception): class PluginError(Exception):
""" """
@ -525,57 +526,6 @@ class MonitoringInstance(object):
except Exception: except Exception:
self.logger.out("Failed to stop monitoring check timer", state="w") self.logger.out("Failed to stop monitoring check timer", state="w")
def generate_fault(self, fault_name, fault_time, fault_delta, fault_message):
# Generate a fault ID from the fault_message and fault_delta
fault_str = f"{fault_name} {fault_delta} {fault_message}"
fault_id = str(md5(fault_str.encode("utf-8")).hexdigest())[:8]
# If a fault already exists with this ID, just update the time
if not self.zkhandler.exists("base.faults"):
self.logger.out(
f"Skipping fault reporting for {fault_id} due to missing Zookeeper schemas",
state="w",
)
return
existing_faults = self.zkhandler.children("base.faults")
if fault_id in existing_faults:
self.logger.out(
f"Updating fault {fault_id}: {fault_message} @ {fault_time}", state="i"
)
else:
self.logger.out(
f"Generating fault {fault_id}: {fault_message} @ {fault_time}",
state="i",
)
if self.zkhandler.read("base.config.maintenance") == "true":
self.logger.out(
f"Skipping fault reporting for {fault_id} due to maintenance mode",
state="w",
)
return
if fault_id in existing_faults:
self.zkhandler.write(
[
(("faults.last_time", fault_id), str(fault_time)),
]
)
# Otherwise, generate a new fault event
else:
self.zkhandler.write(
[
(("faults.id", fault_id), ""),
(("faults.first_time", fault_id), str(fault_time)),
(("faults.last_time", fault_id), str(fault_time)),
(("faults.ack_time", fault_id), ""),
(("faults.status", fault_id), "new"),
(("faults.delta", fault_id), fault_delta),
(("faults.message", fault_id), fault_message),
]
)
def run_faults(self): def run_faults(self):
coordinator_state = self.this_node.coordinator_state coordinator_state = self.this_node.coordinator_state
@ -630,8 +580,13 @@ class MonitoringInstance(object):
entry=entry, details=details entry=entry, details=details
) )
fault_count += 1 fault_count += 1
self.generate_fault( generate_fault(
fault_type, fault_time, fault_delta, fault_message self.zkhandler,
self.logger,
fault_type,
fault_time,
fault_delta,
fault_message,
) )
runtime_end = datetime.now() runtime_end = datetime.now()
@ -716,7 +671,7 @@ class MonitoringInstance(object):
# fault_message = ( # fault_message = (
# f"{self.this_node.name} {result.plugin_name} {result.message}" # f"{self.this_node.name} {result.plugin_name} {result.message}"
# ) # )
# self.generate_fault(fault_type, fault_time, fault_delta, fault_message) # generate_fault(self.zkhandler, self.logger, fault_type, fault_time, fault_delta, fault_message)
total_health -= result.health_delta total_health -= result.health_delta
if total_health < 0: if total_health < 0: