parent
988de1218f
commit
8594eb697f
|
@ -0,0 +1 @@
|
||||||
|
{"version": "11", "root": "", "base": {"root": "", "schema": "/schema", "schema.version": "/schema/version", "config": "/config", "config.maintenance": "/config/maintenance", "config.primary_node": "/config/primary_node", "config.primary_node.sync_lock": "/config/primary_node/sync_lock", "config.upstream_ip": "/config/upstream_ip", "config.migration_target_selector": "/config/migration_target_selector", "logs": "/logs", "faults": "/faults", "node": "/nodes", "domain": "/domains", "network": "/networks", "storage": "/ceph", "storage.health": "/ceph/health", "storage.util": "/ceph/util", "osd": "/ceph/osds", "pool": "/ceph/pools", "volume": "/ceph/volumes", "snapshot": "/ceph/snapshots"}, "logs": {"node": "", "messages": "/messages"}, "faults": {"id": "", "last_time": "/last_time", "first_time": "/first_time", "ack_time": "/ack_time", "status": "/status", "delta": "/delta", "message": "/message"}, "node": {"name": "", "keepalive": "/keepalive", "mode": "/daemonmode", "data.active_schema": "/activeschema", "data.latest_schema": "/latestschema", "data.static": "/staticdata", "data.pvc_version": "/pvcversion", "running_domains": "/runningdomains", "count.provisioned_domains": "/domainscount", "count.networks": "/networkscount", "state.daemon": "/daemonstate", "state.router": "/routerstate", "state.domain": "/domainstate", "cpu.load": "/cpuload", "vcpu.allocated": "/vcpualloc", "memory.total": "/memtotal", "memory.used": "/memused", "memory.free": "/memfree", "memory.allocated": "/memalloc", "memory.provisioned": "/memprov", "ipmi.hostname": "/ipmihostname", "ipmi.username": "/ipmiusername", "ipmi.password": "/ipmipassword", "sriov": "/sriov", "sriov.pf": "/sriov/pf", "sriov.vf": "/sriov/vf", "monitoring.plugins": "/monitoring_plugins", "monitoring.data": "/monitoring_data", "monitoring.health": "/monitoring_health"}, "monitoring_plugin": {"name": "", "last_run": "/last_run", "health_delta": "/health_delta", "message": "/message", "data": "/data", "runtime": "/runtime"}, "sriov_pf": {"phy": "", "mtu": "/mtu", "vfcount": "/vfcount"}, "sriov_vf": {"phy": "", "pf": "/pf", "mtu": "/mtu", "mac": "/mac", "phy_mac": "/phy_mac", "config": "/config", "config.vlan_id": "/config/vlan_id", "config.vlan_qos": "/config/vlan_qos", "config.tx_rate_min": "/config/tx_rate_min", "config.tx_rate_max": "/config/tx_rate_max", "config.spoof_check": "/config/spoof_check", "config.link_state": "/config/link_state", "config.trust": "/config/trust", "config.query_rss": "/config/query_rss", "pci": "/pci", "pci.domain": "/pci/domain", "pci.bus": "/pci/bus", "pci.slot": "/pci/slot", "pci.function": "/pci/function", "used": "/used", "used_by": "/used_by"}, "domain": {"name": "", "xml": "/xml", "state": "/state", "profile": "/profile", "stats": "/stats", "node": "/node", "last_node": "/lastnode", "failed_reason": "/failedreason", "storage.volumes": "/rbdlist", "console.log": "/consolelog", "console.vnc": "/vnc", "meta.autostart": "/node_autostart", "meta.migrate_method": "/migration_method", "meta.node_selector": "/node_selector", "meta.node_limit": "/node_limit", "meta.tags": "/tags", "migrate.sync_lock": "/migrate_sync_lock"}, "tag": {"name": "", "type": "/type", "protected": "/protected"}, "network": {"vni": "", "type": "/nettype", "mtu": "/mtu", "rule": "/firewall_rules", "rule.in": "/firewall_rules/in", "rule.out": "/firewall_rules/out", "nameservers": "/name_servers", "domain": "/domain", "reservation": "/dhcp4_reservations", "lease": "/dhcp4_leases", "ip4.gateway": "/ip4_gateway", "ip4.network": "/ip4_network", "ip4.dhcp": "/dhcp4_flag", "ip4.dhcp_start": "/dhcp4_start", "ip4.dhcp_end": "/dhcp4_end", "ip6.gateway": "/ip6_gateway", "ip6.network": "/ip6_network", "ip6.dhcp": "/dhcp6_flag"}, "reservation": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname"}, "lease": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname", "expiry": "/expiry", "client_id": "/clientid"}, "rule": {"description": "", "rule": "/rule", "order": "/order"}, "osd": {"id": "", "node": "/node", "device": "/device", "db_device": "/db_device", "fsid": "/fsid", "ofsid": "/fsid/osd", "cfsid": "/fsid/cluster", "lvm": "/lvm", "vg": "/lvm/vg", "lv": "/lvm/lv", "is_split": "/is_split", "stats": "/stats"}, "pool": {"name": "", "pgs": "/pgs", "tier": "/tier", "stats": "/stats"}, "volume": {"name": "", "stats": "/stats"}, "snapshot": {"name": "", "stats": "/stats"}}
|
|
@ -328,7 +328,7 @@ class ZKHandler(object):
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def children(self, key):
|
def children(self, key, retval=None):
|
||||||
"""
|
"""
|
||||||
Lists all children of a key
|
Lists all children of a key
|
||||||
"""
|
"""
|
||||||
|
@ -336,11 +336,11 @@ class ZKHandler(object):
|
||||||
path = self.get_schema_path(key)
|
path = self.get_schema_path(key)
|
||||||
if path is None:
|
if path is None:
|
||||||
# This path is invalid; this is likely due to missing schema entries, so return None
|
# This path is invalid; this is likely due to missing schema entries, so return None
|
||||||
return None
|
return retval
|
||||||
|
|
||||||
return self.zk_conn.get_children(path)
|
return self.zk_conn.get_children(path)
|
||||||
except NoNodeError:
|
except NoNodeError:
|
||||||
return None
|
return retval
|
||||||
|
|
||||||
def rename(self, kkpairs):
|
def rename(self, kkpairs):
|
||||||
"""
|
"""
|
||||||
|
@ -540,7 +540,7 @@ class ZKHandler(object):
|
||||||
#
|
#
|
||||||
class ZKSchema(object):
|
class ZKSchema(object):
|
||||||
# Current version
|
# Current version
|
||||||
_version = 10
|
_version = 11
|
||||||
|
|
||||||
# Root for doing nested keys
|
# Root for doing nested keys
|
||||||
_schema_root = ""
|
_schema_root = ""
|
||||||
|
@ -560,7 +560,8 @@ class ZKSchema(object):
|
||||||
"config.primary_node.sync_lock": f"{_schema_root}/config/primary_node/sync_lock",
|
"config.primary_node.sync_lock": f"{_schema_root}/config/primary_node/sync_lock",
|
||||||
"config.upstream_ip": f"{_schema_root}/config/upstream_ip",
|
"config.upstream_ip": f"{_schema_root}/config/upstream_ip",
|
||||||
"config.migration_target_selector": f"{_schema_root}/config/migration_target_selector",
|
"config.migration_target_selector": f"{_schema_root}/config/migration_target_selector",
|
||||||
"logs": "/logs",
|
"logs": f"{_schema_root}/logs",
|
||||||
|
"faults": f"{_schema_root}/faults",
|
||||||
"node": f"{_schema_root}/nodes",
|
"node": f"{_schema_root}/nodes",
|
||||||
"domain": f"{_schema_root}/domains",
|
"domain": f"{_schema_root}/domains",
|
||||||
"network": f"{_schema_root}/networks",
|
"network": f"{_schema_root}/networks",
|
||||||
|
@ -577,6 +578,16 @@ class ZKSchema(object):
|
||||||
"node": "", # The root key
|
"node": "", # The root key
|
||||||
"messages": "/messages",
|
"messages": "/messages",
|
||||||
},
|
},
|
||||||
|
# The schema of an individual logs entry (/logs/{id})
|
||||||
|
"faults": {
|
||||||
|
"id": "", # The root key
|
||||||
|
"last_time": "/last_time",
|
||||||
|
"first_time": "/first_time",
|
||||||
|
"ack_time": "/ack_time",
|
||||||
|
"status": "/status",
|
||||||
|
"delta": "/delta",
|
||||||
|
"message": "/message",
|
||||||
|
},
|
||||||
# The schema of an individual node entry (/nodes/{node_name})
|
# The schema of an individual node entry (/nodes/{node_name})
|
||||||
"node": {
|
"node": {
|
||||||
"name": "", # The root key
|
"name": "", # The root key
|
||||||
|
@ -619,7 +630,11 @@ class ZKSchema(object):
|
||||||
"runtime": "/runtime",
|
"runtime": "/runtime",
|
||||||
},
|
},
|
||||||
# The schema of an individual SR-IOV PF entry (/nodes/{node_name}/sriov/pf/{pf})
|
# The schema of an individual SR-IOV PF entry (/nodes/{node_name}/sriov/pf/{pf})
|
||||||
"sriov_pf": {"phy": "", "mtu": "/mtu", "vfcount": "/vfcount"}, # The root key
|
"sriov_pf": {
|
||||||
|
"phy": "",
|
||||||
|
"mtu": "/mtu",
|
||||||
|
"vfcount": "/vfcount",
|
||||||
|
}, # The root key
|
||||||
# The schema of an individual SR-IOV VF entry (/nodes/{node_name}/sriov/vf/{vf})
|
# The schema of an individual SR-IOV VF entry (/nodes/{node_name}/sriov/vf/{vf})
|
||||||
"sriov_vf": {
|
"sriov_vf": {
|
||||||
"phy": "", # The root key
|
"phy": "", # The root key
|
||||||
|
@ -665,7 +680,11 @@ class ZKSchema(object):
|
||||||
"migrate.sync_lock": "/migrate_sync_lock",
|
"migrate.sync_lock": "/migrate_sync_lock",
|
||||||
},
|
},
|
||||||
# The schema of an individual domain tag entry (/domains/{domain}/tags/{tag})
|
# The schema of an individual domain tag entry (/domains/{domain}/tags/{tag})
|
||||||
"tag": {"name": "", "type": "/type", "protected": "/protected"}, # The root key
|
"tag": {
|
||||||
|
"name": "",
|
||||||
|
"type": "/type",
|
||||||
|
"protected": "/protected",
|
||||||
|
}, # The root key
|
||||||
# The schema of an individual network entry (/networks/{vni})
|
# The schema of an individual network entry (/networks/{vni})
|
||||||
"network": {
|
"network": {
|
||||||
"vni": "", # The root key
|
"vni": "", # The root key
|
||||||
|
@ -702,7 +721,11 @@ class ZKSchema(object):
|
||||||
"client_id": "/clientid",
|
"client_id": "/clientid",
|
||||||
},
|
},
|
||||||
# The schema for an individual network ACL entry (/networks/{vni}/firewall_rules/(in|out)/{acl}
|
# The schema for an individual network ACL entry (/networks/{vni}/firewall_rules/(in|out)/{acl}
|
||||||
"rule": {"description": "", "rule": "/rule", "order": "/order"}, # The root key
|
"rule": {
|
||||||
|
"description": "",
|
||||||
|
"rule": "/rule",
|
||||||
|
"order": "/order",
|
||||||
|
}, # The root key
|
||||||
# The schema of an individual OSD entry (/ceph/osds/{osd_id})
|
# The schema of an individual OSD entry (/ceph/osds/{osd_id})
|
||||||
"osd": {
|
"osd": {
|
||||||
"id": "", # The root key
|
"id": "", # The root key
|
||||||
|
@ -726,9 +749,15 @@ class ZKSchema(object):
|
||||||
"stats": "/stats",
|
"stats": "/stats",
|
||||||
}, # The root key
|
}, # The root key
|
||||||
# The schema of an individual volume entry (/ceph/volumes/{pool_name}/{volume_name})
|
# The schema of an individual volume entry (/ceph/volumes/{pool_name}/{volume_name})
|
||||||
"volume": {"name": "", "stats": "/stats"}, # The root key
|
"volume": {
|
||||||
|
"name": "",
|
||||||
|
"stats": "/stats",
|
||||||
|
}, # The root key
|
||||||
# The schema of an individual snapshot entry (/ceph/volumes/{pool_name}/{volume_name}/{snapshot_name})
|
# The schema of an individual snapshot entry (/ceph/volumes/{pool_name}/{volume_name}/{snapshot_name})
|
||||||
"snapshot": {"name": "", "stats": "/stats"}, # The root key
|
"snapshot": {
|
||||||
|
"name": "",
|
||||||
|
"stats": "/stats",
|
||||||
|
}, # The root key
|
||||||
}
|
}
|
||||||
|
|
||||||
# Properties
|
# Properties
|
||||||
|
|
|
@ -25,7 +25,8 @@ import importlib.util
|
||||||
|
|
||||||
from os import walk
|
from os import walk
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from json import dumps
|
from hashlib import sha1
|
||||||
|
from json import dumps, loads
|
||||||
from apscheduler.schedulers.background import BackgroundScheduler
|
from apscheduler.schedulers.background import BackgroundScheduler
|
||||||
|
|
||||||
|
|
||||||
|
@ -197,6 +198,97 @@ class MonitoringInstance(object):
|
||||||
self.logger = logger
|
self.logger = logger
|
||||||
self.this_node = this_node
|
self.this_node = this_node
|
||||||
|
|
||||||
|
# Create functions for each fault type
|
||||||
|
def get_node_daemon_states():
|
||||||
|
return [
|
||||||
|
(node, self.zkhandler.read(("node.state.daemon", node)))
|
||||||
|
for node in self.zkhandler.children("base.node")
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_osd_out_states():
|
||||||
|
return [
|
||||||
|
(osd, loads(self.zkhandler.read(("osd.stats", osd))).get("out", 0))
|
||||||
|
for osd in self.zkhandler.children("base.osd")
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_ceph_health_entries():
|
||||||
|
return [
|
||||||
|
(value, key)
|
||||||
|
for key, value in loads(zkhandler.read("base.storage.health"))[
|
||||||
|
"checks"
|
||||||
|
].items()
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_vm_states():
|
||||||
|
return [
|
||||||
|
(
|
||||||
|
self.zkhandler.read(("domain.name", domain)),
|
||||||
|
self.zkhandler.read(("domain.state", domain)),
|
||||||
|
)
|
||||||
|
for domain in self.zkhandler.children("base.domain")
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_overprovisioned_memory():
|
||||||
|
all_nodes = self.zkhandler.children("base.node")
|
||||||
|
current_memory_provisioned = sum(
|
||||||
|
[
|
||||||
|
int(self.zkhandler.read(("node.memory.allocated", node)))
|
||||||
|
for node in all_nodes
|
||||||
|
]
|
||||||
|
)
|
||||||
|
node_memory_totals = [
|
||||||
|
int(self.zkhandler.read(("node.memory.total", node)))
|
||||||
|
for node in all_nodes
|
||||||
|
]
|
||||||
|
total_node_memory = sum(node_memory_totals)
|
||||||
|
most_node_memory = sorted(node_memory_totals)[-1]
|
||||||
|
available_node_memory = total_node_memory - most_node_memory
|
||||||
|
|
||||||
|
if current_memory_provisioned >= available_node_memory:
|
||||||
|
op_str = "overprovisioned"
|
||||||
|
else:
|
||||||
|
op_str = "ok"
|
||||||
|
return [
|
||||||
|
(
|
||||||
|
f"{current_memory_provisioned}MB > {available_node_memory}MB (N-1)",
|
||||||
|
op_str,
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
# This is a list of all possible faults (cluster error messages) and their corresponding details
|
||||||
|
self.faults_map = {
|
||||||
|
"dead_or_fenced_node": {
|
||||||
|
"entries": get_node_daemon_states,
|
||||||
|
"conditions": ["dead", "fenced"],
|
||||||
|
"delta": 50,
|
||||||
|
"message": "Node {entry} was dead and/or fenced.",
|
||||||
|
},
|
||||||
|
"ceph_osd_out": {
|
||||||
|
"entries": get_osd_out_states,
|
||||||
|
"conditions": ["1"],
|
||||||
|
"delta": 25,
|
||||||
|
"message": "OSD {entry} was out.",
|
||||||
|
},
|
||||||
|
"ceph_err": {
|
||||||
|
"entries": get_ceph_health_entries,
|
||||||
|
"conditions": ["HEALTH_ERR"],
|
||||||
|
"delta": 50,
|
||||||
|
"message": "Ceph cluster reported ERR: {entry}",
|
||||||
|
},
|
||||||
|
"vm_failed": {
|
||||||
|
"entries": get_vm_states,
|
||||||
|
"conditions": ["fail"],
|
||||||
|
"delta": 10,
|
||||||
|
"message": "VM {entry} was failed.",
|
||||||
|
},
|
||||||
|
"memory_overprovisioned": {
|
||||||
|
"entries": get_overprovisioned_memory,
|
||||||
|
"conditions": ["overprovisioned"],
|
||||||
|
"delta": 25,
|
||||||
|
"message": "Cluster memory was overprovisioned {entry}",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
# Get a list of plugins from the plugin_directory
|
# Get a list of plugins from the plugin_directory
|
||||||
plugin_files = next(walk(self.config["plugin_directory"]), (None, None, []))[
|
plugin_files = next(walk(self.config["plugin_directory"]), (None, None, []))[
|
||||||
2
|
2
|
||||||
|
@ -344,38 +436,133 @@ class MonitoringInstance(object):
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
self.run_plugins()
|
self.start_timer()
|
||||||
self.start_check_timer()
|
|
||||||
|
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
self.shutdown()
|
self.shutdown()
|
||||||
|
|
||||||
def shutdown(self):
|
def shutdown(self):
|
||||||
self.stop_check_timer()
|
self.stop_timer()
|
||||||
self.run_cleanups()
|
self.run_cleanups()
|
||||||
return
|
return
|
||||||
|
|
||||||
def start_check_timer(self):
|
def start_timer(self):
|
||||||
check_interval = self.config["monitoring_interval"]
|
check_interval = int(self.config["monitoring_interval"])
|
||||||
|
|
||||||
|
self.timer = BackgroundScheduler()
|
||||||
|
self.timer.add_job(
|
||||||
|
self.run_checks,
|
||||||
|
trigger="interval",
|
||||||
|
seconds=check_interval,
|
||||||
|
)
|
||||||
|
|
||||||
self.logger.out(
|
self.logger.out(
|
||||||
f"Starting monitoring check timer ({check_interval} second interval)",
|
f"Starting monitoring check timer ({check_interval} second interval)",
|
||||||
state="s",
|
state="s",
|
||||||
)
|
)
|
||||||
self.check_timer = BackgroundScheduler()
|
self.timer.start()
|
||||||
self.check_timer.add_job(
|
|
||||||
self.run_plugins,
|
|
||||||
trigger="interval",
|
|
||||||
seconds=check_interval,
|
|
||||||
)
|
|
||||||
self.check_timer.start()
|
|
||||||
|
|
||||||
def stop_check_timer(self):
|
self.run_faults()
|
||||||
|
self.run_plugins()
|
||||||
|
|
||||||
|
def stop_timer(self):
|
||||||
try:
|
try:
|
||||||
self.check_timer.shutdown()
|
|
||||||
self.logger.out("Stopping monitoring check timer", state="s")
|
self.logger.out("Stopping monitoring check timer", state="s")
|
||||||
|
self.timer.shutdown()
|
||||||
except Exception:
|
except Exception:
|
||||||
self.logger.out("Failed to stop monitoring check timer", state="w")
|
self.logger.out("Failed to stop monitoring check timer", state="w")
|
||||||
|
|
||||||
|
def generate_fault(self, fault_time, fault_delta, fault_message):
|
||||||
|
# Generate a fault ID from the fault_message and fault_delta
|
||||||
|
fault_str = f"{fault_delta} {fault_message}"
|
||||||
|
fault_id = int(sha1(fault_str.encode("utf-8")).hexdigest(), 16) % (10**8)
|
||||||
|
|
||||||
|
self.logger.out(
|
||||||
|
f"Generating fault {fault_id}: {fault_message} @ {fault_time}", state="i"
|
||||||
|
)
|
||||||
|
|
||||||
|
# If a fault already exists with this ID, just update the time
|
||||||
|
if not self.zkhandler.exists("base.faults"):
|
||||||
|
self.logger.out(
|
||||||
|
"Skipping fault reporting due to missing Zookeeper schemas", state="w"
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
if fault_id in self.zkhandler.children("base.faults", retval=[]):
|
||||||
|
self.zkhandler.write(
|
||||||
|
[
|
||||||
|
(("faults.last_time", fault_id), str(fault_time)),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
# Otherwise, generate a new fault event
|
||||||
|
else:
|
||||||
|
self.zkhandler.write(
|
||||||
|
[
|
||||||
|
(("faults.id", fault_id), ""),
|
||||||
|
(("faults.first_time", fault_id), str(fault_time)),
|
||||||
|
(("faults.last_time", fault_id), str(fault_time)),
|
||||||
|
(("faults.ack_time", fault_id), ""),
|
||||||
|
(("faults.status", fault_id), "new"),
|
||||||
|
(("faults.delta", fault_id), fault_delta),
|
||||||
|
(("faults.message", fault_id), fault_message),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
def run_faults(self):
|
||||||
|
if self.this_node.coordinator_state == "primary":
|
||||||
|
cst_colour = self.logger.fmt_green
|
||||||
|
elif self.this_node.coordinator_state == "secondary":
|
||||||
|
cst_colour = self.logger.fmt_blue
|
||||||
|
else:
|
||||||
|
cst_colour = self.logger.fmt_cyan
|
||||||
|
|
||||||
|
active_coordinator_state = self.this_node.coordinator_state
|
||||||
|
|
||||||
|
runtime_start = datetime.now()
|
||||||
|
self.logger.out(
|
||||||
|
"Starting monitoring fault check run",
|
||||||
|
state="t",
|
||||||
|
)
|
||||||
|
|
||||||
|
fault_count = 0
|
||||||
|
for fault_type in self.faults_map.keys():
|
||||||
|
fault_details = self.faults_map[fault_type]
|
||||||
|
|
||||||
|
entries = fault_details["entries"]()
|
||||||
|
for _entry in entries:
|
||||||
|
entry = _entry[0]
|
||||||
|
detail = _entry[1]
|
||||||
|
for condition in fault_details["conditions"]:
|
||||||
|
if str(condition) in str(detail):
|
||||||
|
fault_time = datetime.now()
|
||||||
|
fault_delta = fault_details["delta"]
|
||||||
|
fault_message = fault_details["message"].format(entry=entry)
|
||||||
|
fault_count += 1
|
||||||
|
self.generate_fault(fault_time, fault_delta, fault_message)
|
||||||
|
|
||||||
|
runtime_end = datetime.now()
|
||||||
|
runtime_delta = runtime_end - runtime_start
|
||||||
|
runtime = "{:0.02f}".format(runtime_delta.total_seconds())
|
||||||
|
if fault_count > 0:
|
||||||
|
fault_colour = self.logger.fmt_red
|
||||||
|
else:
|
||||||
|
fault_colour = self.logger.fmt_green
|
||||||
|
|
||||||
|
self.logger.out(
|
||||||
|
"{start_colour}{hostname} fault check @ {starttime}{nofmt} [{cst_colour}{costate}{nofmt}] result is {fault_colour}{fault_count} faults{nofmt} in {runtime} seconds".format(
|
||||||
|
start_colour=self.logger.fmt_purple,
|
||||||
|
cst_colour=self.logger.fmt_bold + cst_colour,
|
||||||
|
fault_colour=fault_colour,
|
||||||
|
nofmt=self.logger.fmt_end,
|
||||||
|
hostname=self.config["node_hostname"],
|
||||||
|
starttime=runtime_start,
|
||||||
|
costate=active_coordinator_state,
|
||||||
|
fault_count=fault_count,
|
||||||
|
runtime=runtime,
|
||||||
|
),
|
||||||
|
state="t",
|
||||||
|
)
|
||||||
|
|
||||||
def run_plugin(self, plugin):
|
def run_plugin(self, plugin):
|
||||||
time_start = datetime.now()
|
time_start = datetime.now()
|
||||||
try:
|
try:
|
||||||
|
@ -406,7 +593,7 @@ class MonitoringInstance(object):
|
||||||
|
|
||||||
runtime_start = datetime.now()
|
runtime_start = datetime.now()
|
||||||
self.logger.out(
|
self.logger.out(
|
||||||
"Starting monitoring healthcheck run",
|
"Starting monitoring plugin check run",
|
||||||
state="t",
|
state="t",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -459,7 +646,7 @@ class MonitoringInstance(object):
|
||||||
health_text = "N/A"
|
health_text = "N/A"
|
||||||
|
|
||||||
self.logger.out(
|
self.logger.out(
|
||||||
"{start_colour}{hostname} healthcheck @ {starttime}{nofmt} [{cst_colour}{costate}{nofmt}] result is {health_colour}{health}{nofmt} in {runtime} seconds".format(
|
"{start_colour}{hostname} plugin check @ {starttime}{nofmt} [{cst_colour}{costate}{nofmt}] result is {health_colour}{health}{nofmt} in {runtime} seconds".format(
|
||||||
start_colour=self.logger.fmt_purple,
|
start_colour=self.logger.fmt_purple,
|
||||||
cst_colour=self.logger.fmt_bold + cst_colour,
|
cst_colour=self.logger.fmt_bold + cst_colour,
|
||||||
health_colour=health_colour,
|
health_colour=health_colour,
|
||||||
|
@ -494,3 +681,7 @@ class MonitoringInstance(object):
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def run_checks(self):
|
||||||
|
self.run_faults()
|
||||||
|
self.run_plugins()
|
||||||
|
|
Loading…
Reference in New Issue