Move debug condition handling to Logger

Avoids many dozens of conditionals sprinkled throughout the code by centralizing this check into the main Logger instance.
2023-12-27 12:58:36 -05:00
parent 52bf5ad0ef
commit e654fbba08
4 changed files with 205 additions and 267 deletions
--- a/daemon-common/log.py
+++ b/daemon-common/log.py
@@ -115,6 +115,10 @@ class Logger(object):
    # Output function
    def out(self, message, state=None, prefix=""):
        # Only handle d-state (debug) messages if we're in debug mode
        if state in ["d"] and not self.config["debug"]:
            return
        # Get the date
        if self.config["log_dates"]:
            date = "{} ".format(datetime.now().strftime("%Y/%m/%d %H:%M:%S.%f"))
--- a/health-daemon/pvchealthd/objects/MonitoringInstance.py
+++ b/health-daemon/pvchealthd/objects/MonitoringInstance.py
@@ -157,9 +157,6 @@ class MonitoringPlugin(object):
            "w": warning
            "e": error
        """
        if state == "d" and not self.config["debug"]:
            return
        self.logger.out(message, state=state, prefix=self.plugin_name)
    #
@@ -523,7 +520,6 @@ class MonitoringInstance(object):
            entries = fault_data["entries"]()
            if self.config["debug"]:
            self.logger.out(
                f"Entries for fault check {fault_type}: {dumps(entries)}",
                state="d",
--- a/node-daemon/pvcnoded/objects/DNSAggregatorInstance.py
+++ b/node-daemon/pvcnoded/objects/DNSAggregatorInstance.py
@@ -333,7 +333,6 @@ class AXFRDaemonInstance(object):
                    z = dns.zone.from_xfr(axfr)
                    records_raw = [z[n].to_text(n) for n in z.nodes.keys()]
                except Exception as e:
                    if self.config["debug"]:
                    self.logger.out(
                        "{} {} ({})".format(e, dnsmasq_ip, domain),
                        state="d",
@@ -370,7 +369,6 @@ class AXFRDaemonInstance(object):
                        "SELECT * FROM records WHERE domain_id=%s", (domain_id,)
                    )
                    results = list(sql_curs.fetchall())
                    if self.config["debug"]:
                    self.logger.out(
                        "SQL query results: {}".format(results),
                        state="d",
@@ -388,7 +386,6 @@ class AXFRDaemonInstance(object):
                records_old = list()
                records_old_ids = list()
                if not results:
                    if self.config["debug"]:
                    self.logger.out(
                        "No results found, skipping.",
                        state="d",
@@ -404,7 +401,6 @@ class AXFRDaemonInstance(object):
                    r_data = record[4]
                    # Assemble a list element in the same format as the AXFR data
                    entry = "{} {} IN {} {}".format(r_name, r_ttl, r_type, r_data)
                    if self.config["debug"]:
                    self.logger.out(
                        "Found record: {}".format(entry),
                        state="d",
@@ -413,7 +409,6 @@ class AXFRDaemonInstance(object):
                    # Skip non-A or AAAA records
                    if r_type != "A" and r_type != "AAAA":
                        if self.config["debug"]:
                        self.logger.out(
                            'Skipping record {}, not A or AAAA: "{}"'.format(
                                entry, r_type
@@ -429,7 +424,6 @@ class AXFRDaemonInstance(object):
                records_new.sort()
                records_old.sort()
                if self.config["debug"]:
                self.logger.out(
                    "New: {}".format(records_new),
                    state="d",
@@ -450,7 +444,6 @@ class AXFRDaemonInstance(object):
                    in_new_not_in_old = in_new - in_old
                    in_old_not_in_new = in_old - in_new
                    if self.config["debug"]:
                    self.logger.out(
                        "New but not old: {}".format(in_new_not_in_old),
                        state="d",
@@ -487,7 +480,6 @@ class AXFRDaemonInstance(object):
                    if len(remove_records) > 0:
                        # Remove the invalid old records
                        for record_id in remove_records:
                            if self.config["debug"]:
                            self.logger.out(
                                "Removing record: {}".format(record_id),
                                state="d",
@@ -507,7 +499,6 @@ class AXFRDaemonInstance(object):
                            r_ttl = record[1]
                            r_type = record[3]
                            r_data = record[4]
                            if self.config["debug"]:
                            self.logger.out(
                                "Add record: {}".format(name),
                                state="d",
@@ -520,7 +511,6 @@ class AXFRDaemonInstance(object):
                                )
                                changed = True
                            except psycopg2.IntegrityError as e:
                                if self.config["debug"]:
                                self.logger.out(
                                    "Failed to add record due to {}: {}".format(
                                        e, name
@@ -529,7 +519,6 @@ class AXFRDaemonInstance(object):
                                    prefix="dns-aggregator",
                                )
                            except psycopg2.errors.InFailedSqlTransaction as e:
                                if self.config["debug"]:
                                self.logger.out(
                                    "Failed to add record due to {}: {}".format(
                                        e, name
@@ -548,7 +537,6 @@ class AXFRDaemonInstance(object):
                        current_serial = int(soa_record[2])
                        new_serial = current_serial + 1
                        soa_record[2] = str(new_serial)
                        if self.config["debug"]:
                        self.logger.out(
                            "Records changed; bumping SOA: {}".format(new_serial),
                            state="d",
@@ -560,7 +548,6 @@ class AXFRDaemonInstance(object):
                        )
                        # Commit all the previous changes
                        if self.config["debug"]:
                        self.logger.out(
                            "Committing database changes and reloading PDNS",
                            state="d",
--- a/node-daemon/pvcnoded/util/keepalive.py
+++ b/node-daemon/pvcnoded/util/keepalive.py
@@ -80,8 +80,6 @@ def collect_ceph_stats(logger, config, zkhandler, this_node, queue):
    pool_list = zkhandler.children("base.pool")
    osd_list = zkhandler.children("base.osd")
    debug = config["debug"]
    if debug:
    logger.out("Thread starting", state="d", prefix="ceph-thread")
    # Connect to the Ceph cluster
@@ -90,7 +88,6 @@ def collect_ceph_stats(logger, config, zkhandler, this_node, queue):
            conffile=config["ceph_config_file"],
            conf=dict(keyring=config["ceph_admin_keyring"]),
        )
        if debug:
        logger.out("Connecting to cluster", state="d", prefix="ceph-thread")
        ceph_conn.connect(timeout=1)
    except Exception as e:
@@ -100,7 +97,6 @@ def collect_ceph_stats(logger, config, zkhandler, this_node, queue):
    # Primary-only functions
    if this_node.coordinator_state == "primary":
        # Get Ceph status information (pretty)
        if debug:
        logger.out(
            "Set Ceph status information in zookeeper (primary only)",
            state="d",
@@ -117,7 +113,6 @@ def collect_ceph_stats(logger, config, zkhandler, this_node, queue):
            logger.out("Failed to set Ceph status data: {}".format(e), state="e")
        # Get Ceph health information (JSON)
        if debug:
        logger.out(
            "Set Ceph health information in zookeeper (primary only)",
            state="d",
@@ -134,7 +129,6 @@ def collect_ceph_stats(logger, config, zkhandler, this_node, queue):
            logger.out("Failed to set Ceph health data: {}".format(e), state="e")
        # Get Ceph df information (pretty)
        if debug:
        logger.out(
            "Set Ceph rados df information in zookeeper (primary only)",
            state="d",
@@ -151,7 +145,6 @@ def collect_ceph_stats(logger, config, zkhandler, this_node, queue):
        except Exception as e:
            logger.out("Failed to set Ceph utilization data: {}".format(e), state="e")
        if debug:
        logger.out(
            "Set pool information in zookeeper (primary only)",
            state="d",
@@ -179,7 +172,6 @@ def collect_ceph_stats(logger, config, zkhandler, this_node, queue):
            rados_pool_df_raw = []
        pool_count = len(ceph_pool_df_raw)
        if debug:
        logger.out(
            "Getting info for {} pools".format(pool_count),
            state="d",
@@ -195,17 +187,13 @@ def collect_ceph_stats(logger, config, zkhandler, this_node, queue):
                # Ignore any pools that aren't in our pool list
                if pool["name"] not in pool_list:
                    if debug:
                    logger.out(
-                            "Pool {} not in pool list {}".format(
+                        "Pool {} not in pool list {}".format(pool["name"], pool_list),
                                pool["name"], pool_list
                            ),
                        state="d",
                        prefix="ceph-thread",
                    )
                    continue
                else:
                    if debug:
                    logger.out(
                        "Parsing data for pool {}".format(pool["name"]),
                        state="d",
@@ -248,7 +236,6 @@ def collect_ceph_stats(logger, config, zkhandler, this_node, queue):
    osds_this_node = 0
    if len(osd_list) > 0:
        # Get data from Ceph OSDs
        if debug:
        logger.out("Get data from Ceph OSDs", state="d", prefix="ceph-thread")
        # Parse the dump data
@@ -264,7 +251,6 @@ def collect_ceph_stats(logger, config, zkhandler, this_node, queue):
            logger.out("Failed to obtain OSD data: {}".format(e), state="w")
            osd_dump_raw = []
        if debug:
        logger.out("Loop through OSD dump", state="d", prefix="ceph-thread")
        for osd in osd_dump_raw:
            osd_dump.update(
@@ -279,7 +265,6 @@ def collect_ceph_stats(logger, config, zkhandler, this_node, queue):
            )
        # Parse the df data
        if debug:
        logger.out("Parse the OSD df data", state="d", prefix="ceph-thread")
        osd_df = dict()
@@ -293,7 +278,6 @@ def collect_ceph_stats(logger, config, zkhandler, this_node, queue):
            logger.out("Failed to obtain OSD data: {}".format(e), state="w")
            osd_df_raw = []
        if debug:
        logger.out("Loop through OSD df", state="d", prefix="ceph-thread")
        for osd in osd_df_raw:
            osd_df.update(
@@ -316,7 +300,6 @@ def collect_ceph_stats(logger, config, zkhandler, this_node, queue):
            )
        # Parse the status data
        if debug:
        logger.out("Parse the OSD status data", state="d", prefix="ceph-thread")
        osd_status = dict()
@@ -330,7 +313,6 @@ def collect_ceph_stats(logger, config, zkhandler, this_node, queue):
            logger.out("Failed to obtain OSD status data: {}".format(e), state="w")
            osd_status_raw = []
        if debug:
        logger.out("Loop through OSD status data", state="d", prefix="ceph-thread")
        for line in osd_status_raw.split("\n"):
@@ -400,7 +382,6 @@ def collect_ceph_stats(logger, config, zkhandler, this_node, queue):
            )
        # Merge them together into a single meaningful dict
        if debug:
        logger.out("Merge OSD data together", state="d", prefix="ceph-thread")
        osd_stats = dict()
@@ -421,10 +402,7 @@ def collect_ceph_stats(logger, config, zkhandler, this_node, queue):
        # Upload OSD data for the cluster (primary-only)
        if this_node.coordinator_state == "primary":
-            if debug:
+            logger.out("Trigger updates for each OSD", state="d", prefix="ceph-thread")
                logger.out(
                    "Trigger updates for each OSD", state="d", prefix="ceph-thread"
                )
            for osd in osd_list:
                try:
@@ -441,19 +419,15 @@ def collect_ceph_stats(logger, config, zkhandler, this_node, queue):
    queue.put(osds_this_node)
    if debug:
    logger.out("Thread finished", state="d", prefix="ceph-thread")
 # VM stats update function
 def collect_vm_stats(logger, config, zkhandler, this_node, queue):
    debug = config["debug"]
    if debug:
    logger.out("Thread starting", state="d", prefix="vm-thread")
    # Connect to libvirt
    libvirt_name = "qemu:///system"
    if debug:
    logger.out("Connecting to libvirt", state="d", prefix="vm-thread")
    try:
        lv_conn = libvirt.open(libvirt_name)
@@ -467,7 +441,6 @@ def collect_vm_stats(logger, config, zkhandler, this_node, queue):
    memprov = 0
    vcpualloc = 0
    # Toggle state management of dead VMs to restart them
    if debug:
    logger.out(
        "Toggle state management of dead VMs to restart them",
        state="d",
@@ -518,7 +491,6 @@ def collect_vm_stats(logger, config, zkhandler, this_node, queue):
            domain_name = domain.name()
            # Get all the raw information about the VM
            if debug:
            logger.out(
                "Getting general statistics for VM {}".format(domain_name),
                state="d",
@@ -537,12 +509,9 @@ def collect_vm_stats(logger, config, zkhandler, this_node, queue):
            domain_memory_stats = domain.memoryStats()
            domain_cpu_stats = domain.getCPUStats(True)[0]
        except Exception as e:
            if debug:
            try:
                logger.out(
-                        "Failed getting VM information for {}: {}".format(
+                    "Failed getting VM information for {}: {}".format(domain.name(), e),
                            domain.name(), e
                        ),
                    state="d",
                    prefix="vm-thread",
                )
@@ -554,7 +523,6 @@ def collect_vm_stats(logger, config, zkhandler, this_node, queue):
        if domain_uuid not in this_node.domain_list:
            this_node.domain_list.append(domain_uuid)
        if debug:
        logger.out(
            "Getting disk statistics for VM {}".format(domain_name),
            state="d",
@@ -578,7 +546,6 @@ def collect_vm_stats(logger, config, zkhandler, this_node, queue):
                    }
                )
        except Exception as e:
            if debug:
            try:
                logger.out(
                    "Failed getting disk stats for {}: {}".format(domain.name(), e),
@@ -589,7 +556,6 @@ def collect_vm_stats(logger, config, zkhandler, this_node, queue):
                pass
            continue
        if debug:
        logger.out(
            "Getting network statistics for VM {}".format(domain_name),
            state="d",
@@ -619,12 +585,9 @@ def collect_vm_stats(logger, config, zkhandler, this_node, queue):
                    }
                )
        except Exception as e:
            if debug:
            try:
                logger.out(
-                        "Failed getting network stats for {}: {}".format(
+                    "Failed getting network stats for {}: {}".format(domain.name(), e),
                            domain.name(), e
                        ),
                    state="d",
                    prefix="vm-thread",
                )
@@ -645,7 +608,6 @@ def collect_vm_stats(logger, config, zkhandler, this_node, queue):
            "net_stats": domain_network_stats,
        }
        if debug:
        logger.out(
            "Writing statistics for VM {} to Zookeeper".format(domain_name),
            state="d",
@@ -657,7 +619,6 @@ def collect_vm_stats(logger, config, zkhandler, this_node, queue):
                [(("domain.stats", domain_uuid), str(json.dumps(domain_stats)))]
            )
        except Exception as e:
            if debug:
            logger.out(
                "Failed to write domain statistics: {}".format(e),
                state="d",
@@ -667,7 +628,6 @@ def collect_vm_stats(logger, config, zkhandler, this_node, queue):
    # Close the Libvirt connection
    lv_conn.close()
    if debug:
    logger.out(
        f"VM stats: doms: {len(running_domains)}; memalloc: {memalloc}; memprov: {memprov}; vcpualloc: {vcpualloc}",
        state="d",
@@ -679,14 +639,11 @@ def collect_vm_stats(logger, config, zkhandler, this_node, queue):
    queue.put(memprov)
    queue.put(vcpualloc)
    if debug:
    logger.out("Thread finished", state="d", prefix="vm-thread")
 # Keepalive update function
 def node_keepalive(logger, config, zkhandler, this_node, netstats):
    debug = config["debug"]
    # Display node information to the terminal
    if config["log_keepalives"]:
        if this_node.coordinator_state == "primary":
@@ -746,10 +703,7 @@ def node_keepalive(logger, config, zkhandler, this_node, netstats):
                )
    # Get past state and update if needed
-    if debug:
+    logger.out("Get past state and update if needed", state="d", prefix="main-thread")
        logger.out(
            "Get past state and update if needed", state="d", prefix="main-thread"
        )
    past_state = zkhandler.read(("node.state.daemon", this_node.name))
    if past_state != "run" and past_state != "shutdown":
@@ -759,7 +713,6 @@ def node_keepalive(logger, config, zkhandler, this_node, netstats):
        this_node.daemon_state = "run"
    # Ensure the primary key is properly set
    if debug:
    logger.out(
        "Ensure the primary key is properly set", state="d", prefix="main-thread"
    )
@@ -843,7 +796,6 @@ def node_keepalive(logger, config, zkhandler, this_node, netstats):
    # Set our information in zookeeper
    keepalive_time = int(time.time())
    if debug:
    logger.out("Set our information in zookeeper", state="d", prefix="main-thread")
    try:
        zkhandler.write(
@@ -932,7 +884,6 @@ def node_keepalive(logger, config, zkhandler, this_node, netstats):
    # Look for dead nodes and fence them
    if not this_node.maintenance:
        if debug:
        logger.out(
            "Look for dead nodes and fence them", state="d", prefix="main-thread"
        )