diff --git a/bootstrap-daemon/pvcbootstrapd.yaml.sample b/bootstrap-daemon/pvcbootstrapd.yaml.sample index 2b0dc8e..5517951 100644 --- a/bootstrap-daemon/pvcbootstrapd.yaml.sample +++ b/bootstrap-daemon/pvcbootstrapd.yaml.sample @@ -104,6 +104,7 @@ pvc: begin: "🤞" # A task is beginning success: "✅" # A task succeeded failure: "❌" # A task failed + completed: "👌" # A task is completed # The webhook body elements; this is specific to the webhook target, and is converted into raw # JSON before sending. # Two special variables are used: "{icon}" displays one of the above icons, and "{message}" displays diff --git a/bootstrap-daemon/pvcbootstrapd.yaml.template b/bootstrap-daemon/pvcbootstrapd.yaml.template index 5ee35c2..0a8b3c1 100644 --- a/bootstrap-daemon/pvcbootstrapd.yaml.template +++ b/bootstrap-daemon/pvcbootstrapd.yaml.template @@ -39,6 +39,7 @@ pvc: begin: "🤞" # A task is beginning success: "✅" # A task succeeded failure: "❌" # A task failed + completed: "👌" # A task is completed body: channel: "mychannel" username: "pvcbootstrapd" diff --git a/bootstrap-daemon/pvcbootstrapd/Daemon.py b/bootstrap-daemon/pvcbootstrapd/Daemon.py index 6a70769..24d4bac 100755 --- a/bootstrap-daemon/pvcbootstrapd/Daemon.py +++ b/bootstrap-daemon/pvcbootstrapd/Daemon.py @@ -250,6 +250,9 @@ def entrypoint(): notifications.send_webhook(config, "begin", "Starting up pvcbootstrapd") + cspec = git.load_cspec_yaml(config) + print(cspec) + # Initialize the database db.init_database(config) diff --git a/bootstrap-daemon/pvcbootstrapd/lib/ansible.py b/bootstrap-daemon/pvcbootstrapd/lib/ansible.py index 9c55142..d428425 100755 --- a/bootstrap-daemon/pvcbootstrapd/lib/ansible.py +++ b/bootstrap-daemon/pvcbootstrapd/lib/ansible.py @@ -19,6 +19,7 @@ # ############################################################################### +import pvcbootstrapd.lib.notifications as notifications import pvcbootstrapd.lib.git as git import ansible_runner @@ -53,6 +54,9 @@ def run_bootstrap(config, cspec, cluster, nodes): logger.info("Waiting 60s before starting Ansible bootstrap.") sleep(60) + logger.info("Starting Ansible bootstrap of cluster {cluster.name}") + notifications.send_webhook(config, "begin", f"Starting Ansible bootstrap of cluster {cluster.name}") + # Run the Ansible playbooks with tempfile.TemporaryDirectory(prefix="pvc-ansible-bootstrap_") as pdir: try: @@ -74,5 +78,9 @@ def run_bootstrap(config, cspec, cluster, nodes): if r.rc == 0: git.commit_repository(config) git.push_repository(config) + notifications.send_webhook(config, "success", f"Completed Ansible bootstrap of cluster {cluster.name}") + else: + notifications.send_webhook(config, "failure", f"Failed Ansible bootstrap of cluster {cluster.name}; check pvcbootstrapd logs") except Exception as e: logger.warning(f"Error: {e}") + notifications.send_webhook(config, "failure", f"Failed Ansible bootstrap of cluster {cluster.name} with error {e}; check pvcbootstrapd logs") diff --git a/bootstrap-daemon/pvcbootstrapd/lib/db.py b/bootstrap-daemon/pvcbootstrapd/lib/db.py index ca07f20..001006b 100755 --- a/bootstrap-daemon/pvcbootstrapd/lib/db.py +++ b/bootstrap-daemon/pvcbootstrapd/lib/db.py @@ -23,6 +23,8 @@ import os import sqlite3 import contextlib +import pvcbootstrapd.lib.notifications as notifications + from pvcbootstrapd.lib.dataclasses import Cluster, Node from celery.utils.log import get_task_logger @@ -48,6 +50,7 @@ def init_database(config): db_path = config["database_path"] if not os.path.isfile(db_path): print("First run: initializing database.") + notifications.send_webhook(config, "begin", "First run: initializing database") # Initializing the database with dbconn(db_path) as cur: # Table listing all clusters @@ -73,6 +76,8 @@ def init_database(config): CONSTRAINT cluster_col FOREIGN KEY (cluster) REFERENCES clusters(id) ON DELETE CASCADE )""" ) + notifications.send_webhook(config, "success", "First run: successfully initialized database") + # # Cluster functions diff --git a/bootstrap-daemon/pvcbootstrapd/lib/git.py b/bootstrap-daemon/pvcbootstrapd/lib/git.py index bc39096..16c38d2 100755 --- a/bootstrap-daemon/pvcbootstrapd/lib/git.py +++ b/bootstrap-daemon/pvcbootstrapd/lib/git.py @@ -23,6 +23,8 @@ import os.path import git import yaml +import pvcbootstrapd.lib.notifications as notifications + from celery.utils.log import get_task_logger @@ -39,6 +41,7 @@ def init_repository(config): print( f"First run: cloning repository {config['ansible_remote']} branch {config['ansible_branch']} to {config['ansible_path']}" ) + notifications.send_webhook(config, "begin", f"First run: cloning repository {config['ansible_remote']} branch {config['ansible_branch']} to {config['ansible_path']}") git.Repo.clone_from( config["ansible_remote"], config["ansible_path"], @@ -49,6 +52,7 @@ def init_repository(config): g = git.cmd.Git(f"{config['ansible_path']}") g.checkout(config["ansible_branch"]) g.submodule("update", "--init", env=dict(GIT_SSH_COMMAND=git_ssh_cmd)) + notifications.send_webhook(config, "success", "First run: successfully initialized Git repository") except Exception as e: print(f"Error: {e}") diff --git a/bootstrap-daemon/pvcbootstrapd/lib/hooks.py b/bootstrap-daemon/pvcbootstrapd/lib/hooks.py index e011be6..f43bb44 100755 --- a/bootstrap-daemon/pvcbootstrapd/lib/hooks.py +++ b/bootstrap-daemon/pvcbootstrapd/lib/hooks.py @@ -19,6 +19,7 @@ # ############################################################################### +import pvcbootstrapd.lib.notifications as notifications import pvcbootstrapd.lib.db as db import json @@ -311,6 +312,8 @@ def run_hooks(config, cspec, cluster, nodes): logger.info("Waiting 300s before starting hook run.") sleep(300) + notifications.send_webhook(config, "begin", f"Running hook tasks for cluster {cluster.name}") + cluster_hooks = cspec["hooks"][cluster.name] cluster_nodes = db.get_nodes_in_cluster(config, cluster.name) @@ -334,9 +337,12 @@ def run_hooks(config, cspec, cluster, nodes): # Run the hook function try: + notifications.send_webhook(config, "begin", f"Cluster {cluster.name}: Running hook task '{hook_name}'") hook_functions[hook_type](config, target_nodes, hook_args) + notifications.send_webhook(config, "success", f"Cluster {cluster.name}: Completed hook task '{hook_name}'") except Exception as e: logger.warning(f"Error running hook: {e}") + notifications.send_webhook(config, "failure", f"Cluster {cluster.name}: Failed hook task '{hook_name}' with error {e}") # Wait 5s between hooks sleep(5) @@ -349,3 +355,5 @@ def run_hooks(config, cspec, cluster, nodes): "script": "#!/usr/bin/env bash\necho bootstrapped | sudo tee /etc/pvc-install.hooks\nsudo reboot" }, ) + + notifications.send_webhook(config, "success", f"Completed hook tasks for cluster {cluster.name}") diff --git a/bootstrap-daemon/pvcbootstrapd/lib/host.py b/bootstrap-daemon/pvcbootstrapd/lib/host.py index 4bdc963..363e265 100755 --- a/bootstrap-daemon/pvcbootstrapd/lib/host.py +++ b/bootstrap-daemon/pvcbootstrapd/lib/host.py @@ -19,10 +19,10 @@ # ############################################################################### -from celery.utils.log import get_task_logger - import pvcbootstrapd.lib.db as db +from celery.utils.log import get_task_logger + logger = get_task_logger(__name__) diff --git a/bootstrap-daemon/pvcbootstrapd/lib/lib.py b/bootstrap-daemon/pvcbootstrapd/lib/lib.py index a87e39b..841bcfb 100755 --- a/bootstrap-daemon/pvcbootstrapd/lib/lib.py +++ b/bootstrap-daemon/pvcbootstrapd/lib/lib.py @@ -19,6 +19,7 @@ # ############################################################################### +import pvcbootstrapd.lib.notifications as notifications import pvcbootstrapd.lib.db as db import pvcbootstrapd.lib.git as git import pvcbootstrapd.lib.redfish as redfish @@ -50,6 +51,7 @@ def dnsmasq_checkin(config, data): cspec = git.load_cspec_yaml(config) is_in_bootstrap_map = True if data["macaddr"] in cspec["bootstrap"] else False if is_in_bootstrap_map: + notifications.send_webhook(config, "begin", f"New host checkin from MAC '{data['macaddr']}' as host {cspec['bootstrap'][data['macaddr']]['node']['fqdn']}") if ( cspec["bootstrap"][data["macaddr"]]["bmc"].get("redfish", None) is not None @@ -90,16 +92,19 @@ def host_checkin(config, data): if data["action"] in ["install-start"]: # Node install has started logger.info(f"Registering install start for host {data['hostname']}") + notifications.send_webhook(config, "begin", f"Cluster {cspec_cluster}: Registering install start for host {data['hostname']}") host.installer_init(config, cspec, data) elif data["action"] in ["install-complete"]: # Node install has finished logger.info(f"Registering install complete for host {data['hostname']}") + notifications.send_webhook(config, "begin", f"Cluster {cspec_cluster}: Registering install complete for host {data['hostname']}") host.installer_complete(config, cspec, data) elif data["action"] in ["system-boot_initial"]: # Node has booted for the first time and can begin Ansible runs once all nodes up logger.info(f"Registering first boot for host {data['hostname']}") + notifications.send_webhook(config, "begin", f"Cluster {cspec_cluster}: Registering first boot for host {data['hostname']}") target_state = "booted-initial" host.set_boot_state(config, cspec, data, target_state) @@ -118,6 +123,7 @@ def host_checkin(config, data): elif data["action"] in ["system-boot_configured"]: # Node has been booted after Ansible run and can begin hook runs logger.info(f"Registering post-Ansible boot for host {data['hostname']}") + notifications.send_webhook(config, "begin", f"Cluster {cspec_cluster}: Registering post-Ansible boot for host {data['hostname']}") target_state = "booted-configured" host.set_boot_state(config, cspec, data, target_state) @@ -136,6 +142,7 @@ def host_checkin(config, data): elif data["action"] in ["system-boot_completed"]: # Node has been fully configured and can be shut down for the final time logger.info(f"Registering post-hooks boot for host {data['hostname']}") + notifications.send_webhook(config, "begin", f"Cluster {cspec_cluster}: Registering post-hooks boot for host {data['hostname']}") target_state = "booted-completed" host.set_boot_state(config, cspec, data, target_state) @@ -148,4 +155,5 @@ def host_checkin(config, data): if len(ready_nodes) >= len(all_nodes): cluster = db.update_cluster_state(config, cspec_cluster, "completed") + notifications.send_webhook(config, "completed", f"Cluster {cspec_cluster} deployment completed") # Hosts will now power down ready for real activation in production diff --git a/bootstrap-daemon/pvcbootstrapd/lib/redfish.py b/bootstrap-daemon/pvcbootstrapd/lib/redfish.py index 9093f4a..7dd1afa 100755 --- a/bootstrap-daemon/pvcbootstrapd/lib/redfish.py +++ b/bootstrap-daemon/pvcbootstrapd/lib/redfish.py @@ -31,6 +31,7 @@ import math from time import sleep from celery.utils.log import get_task_logger +import pvcbootstrapd.lib.notifications as notifications import pvcbootstrapd.lib.installer as installer import pvcbootstrapd.lib.db as db @@ -688,6 +689,8 @@ def redfish_init(config, cspec, data): cspec_cluster = cspec_node["node"]["cluster"] cspec_hostname = cspec_node["node"]["hostname"] + notifications.send_webhook(config, "begin", f"Cluster {cspec_cluster}: Beginning Redfish initialization of host {cspec_hostname}") + cluster = db.get_cluster(config, name=cspec_cluster) if cluster is None: cluster = db.add_cluster(config, cspec, cspec_cluster, "provisioning") @@ -852,6 +855,7 @@ def redfish_init(config, cspec, data): node = db.update_node_state(config, cspec_cluster, cspec_hostname, "pxe-booting") + notifications.send_webhook(config, "success", f"Cluster {cspec_cluster}: Completed Redfish initialization of host {cspec_hostname}") logger.info("Waiting for completion of node and cluster installation...") # Wait for the system to install and be configured while node.state != "booted-completed": @@ -862,6 +866,7 @@ def redfish_init(config, cspec, data): node = db.get_node(config, cspec_cluster, name=cspec_hostname) # Graceful shutdown of the machine + notifications.send_webhook(config, "begin", f"Cluster {cspec_cluster}: Powering off host {cspec_hostname}") set_power_state(session, system_root, redfish_vendor, "GracefulShutdown") system_power_state = "On" while system_power_state != "Off": @@ -872,6 +877,7 @@ def redfish_init(config, cspec, data): # Turn off the indicator to indicate bootstrap has completed set_indicator_state(session, system_root, redfish_vendor, "off") + notifications.send_webhook(config, "completed", f"Cluster {cspec_cluster}: Powered off host {cspec_hostname}") # We must delete the session del session diff --git a/bootstrap-daemon/pvcbootstrapd/lib/tftp.py b/bootstrap-daemon/pvcbootstrapd/lib/tftp.py index 680fdaf..0a4eb54 100755 --- a/bootstrap-daemon/pvcbootstrapd/lib/tftp.py +++ b/bootstrap-daemon/pvcbootstrapd/lib/tftp.py @@ -22,11 +22,14 @@ import os.path import shutil +import pvcbootstrapd.lib.notifications as notifications + def build_tftp_repository(config): # Generate an installer config build_cmd = f"{config['ansible_path']}/pvc-installer/buildpxe.sh -o {config['tftp_root_path']} -u {config['deploy_username']}" print(f"Building TFTP contents via pvc-installer command: {build_cmd}") + notifications.send_webhook(config, "begin", f"Building TFTP contents via pvc-installer command: {build_cmd}") os.system(build_cmd) @@ -36,6 +39,7 @@ def init_tftp(config): """ if not os.path.exists(config["tftp_root_path"]): print("First run: building TFTP root and contents - this will take some time!") + notifications.send_webhook(config, "begin", "First run: building TFTP root and contents") os.makedirs(config["tftp_root_path"]) os.makedirs(config["tftp_host_path"]) shutil.copyfile( @@ -43,3 +47,4 @@ def init_tftp(config): ) build_tftp_repository(config) + notifications.send_webhook(config, "success", "First run: successfully initialized TFTP root and contents")