Compare commits
17 Commits
0565a6b635
...
75de65fe71
Author | SHA1 | Date | |
---|---|---|---|
75de65fe71 | |||
0ca705ddd9 | |||
5c2653395d | |||
30f8368886 | |||
21bcf32ac7 | |||
f10e0930e4 | |||
a30c94bcb4 | |||
0b026faaca | |||
6ab39cd2e2 | |||
405b4a63f7 | |||
6acacc153c | |||
668b5c9939 | |||
fe3d79b5f1 | |||
59228ed2eb | |||
a30ac176f4 | |||
fe6f34a3eb | |||
9a9caae923 |
@ -101,6 +101,7 @@ pvc:
|
||||
action: post
|
||||
# Icons to use for various status types; embedded in the message with `{icon}`
|
||||
icons:
|
||||
info: "❕" # A note about an event
|
||||
begin: "🤞" # A task is beginning
|
||||
success: "✅" # A task succeeded
|
||||
failure: "❌" # A task failed
|
||||
|
@ -36,6 +36,7 @@ pvc:
|
||||
uri: https://mattermost.domain.tld/hooks/asecretstring
|
||||
action: post
|
||||
icons:
|
||||
info: "❕" # A note about an event
|
||||
begin: "🤞" # A task is beginning
|
||||
success: "✅" # A task succeeded
|
||||
failure: "❌" # A task failed
|
||||
|
@ -248,7 +248,7 @@ def entrypoint():
|
||||
print("|----------------------------------------------------------|")
|
||||
print("")
|
||||
|
||||
notifications.send_webhook(config, "begin", "Starting up pvcbootstrapd")
|
||||
notifications.send_webhook(config, "info", "Initializing pvcbootstrapd")
|
||||
|
||||
# Initialize the database
|
||||
db.init_database(config)
|
||||
@ -261,7 +261,7 @@ def entrypoint():
|
||||
|
||||
if "--init-only" in argv:
|
||||
print("Successfully initialized pvcbootstrapd; exiting.")
|
||||
notifications.send_webhook(config, "success", "Successfully initialized pvcbootstrapd")
|
||||
notifications.send_webhook(config, "completed", "Successfully initialized pvcbootstrapd")
|
||||
exit(0)
|
||||
|
||||
# Start DNSMasq
|
||||
@ -274,14 +274,14 @@ def entrypoint():
|
||||
|
||||
def term(signum="", frame=""):
|
||||
print("Received TERM, exiting.")
|
||||
notifications.send_webhook(config, "completed", "Received TERM, exiting pvcbootstrapd")
|
||||
notifications.send_webhook(config, "info", "Received TERM, exiting pvcbootstrapd")
|
||||
cleanup(0)
|
||||
|
||||
signal.signal(signal.SIGTERM, term)
|
||||
signal.signal(signal.SIGINT, term)
|
||||
signal.signal(signal.SIGQUIT, term)
|
||||
|
||||
notifications.send_webhook(config, "success", "Started up pvcbootstrapd")
|
||||
notifications.send_webhook(config, "info", "Starting up pvcbootstrapd")
|
||||
|
||||
# Start Flask
|
||||
pvcbootstrapd.app.run(
|
||||
|
@ -55,7 +55,7 @@ def run_bootstrap(config, cspec, cluster, nodes):
|
||||
sleep(60)
|
||||
|
||||
logger.info("Starting Ansible bootstrap of cluster {cluster.name}")
|
||||
notifications.send_webhook(config, "begin", f"Starting Ansible bootstrap of cluster {cluster.name}")
|
||||
notifications.send_webhook(config, "begin", f"Cluster {cluster.name}: Starting Ansible bootstrap")
|
||||
|
||||
# Run the Ansible playbooks
|
||||
with tempfile.TemporaryDirectory(prefix="pvc-ansible-bootstrap_") as pdir:
|
||||
@ -78,9 +78,9 @@ def run_bootstrap(config, cspec, cluster, nodes):
|
||||
if r.rc == 0:
|
||||
git.commit_repository(config)
|
||||
git.push_repository(config)
|
||||
notifications.send_webhook(config, "success", f"Completed Ansible bootstrap of cluster {cluster.name}")
|
||||
notifications.send_webhook(config, "success", f"Cluster {cluster.name}: Completed Ansible bootstrap")
|
||||
else:
|
||||
notifications.send_webhook(config, "failure", f"Failed Ansible bootstrap of cluster {cluster.name}; check pvcbootstrapd logs")
|
||||
notifications.send_webhook(config, "failure", f"Cluster {cluster.name}: Failed Ansible bootstrap; check pvcbootstrapd logs")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error: {e}")
|
||||
notifications.send_webhook(config, "failure", f"Failed Ansible bootstrap of cluster {cluster.name} with error {e}; check pvcbootstrapd logs")
|
||||
notifications.send_webhook(config, "failure", f"Cluster {cluster.name}: Failed Ansible bootstrap with error '{e}'; check pvcbootstrapd logs")
|
||||
|
@ -52,7 +52,6 @@ def init_repository(config):
|
||||
g = git.cmd.Git(f"{config['ansible_path']}")
|
||||
g.checkout(config["ansible_branch"])
|
||||
g.submodule("update", "--init", env=dict(GIT_SSH_COMMAND=git_ssh_cmd))
|
||||
notifications.send_webhook(config, "success", "Successfully initialized Git repository")
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
|
||||
@ -69,6 +68,7 @@ def pull_repository(config):
|
||||
g.submodule("update", "--init", env=dict(GIT_SSH_COMMAND=git_ssh_cmd))
|
||||
except Exception as e:
|
||||
logger.warn(e)
|
||||
notifications.send_webhook(config, "failure", "Failed to update Git repository")
|
||||
|
||||
|
||||
def commit_repository(config):
|
||||
@ -92,8 +92,10 @@ def commit_repository(config):
|
||||
author="PVC Bootstrap <git@pvcbootstrapd>",
|
||||
env=commit_env,
|
||||
)
|
||||
notifications.send_webhook(config, "success", "Successfully committed to Git repository")
|
||||
except Exception as e:
|
||||
logger.warn(e)
|
||||
notifications.send_webhook(config, "failure", "Failed to commit to Git repository")
|
||||
|
||||
|
||||
def push_repository(config):
|
||||
@ -109,8 +111,10 @@ def push_repository(config):
|
||||
g = git.Repo(f"{config['ansible_path']}")
|
||||
origin = g.remote(name="origin")
|
||||
origin.push(env=dict(GIT_SSH_COMMAND=git_ssh_cmd))
|
||||
notifications.send_webhook(config, "success", "Successfully pushed Git repository")
|
||||
except Exception as e:
|
||||
logger.warn(e)
|
||||
notifications.send_webhook(config, "failure", "Failed to push Git repository")
|
||||
|
||||
|
||||
def load_cspec_yaml(config):
|
||||
|
@ -203,8 +203,6 @@ def run_hook_copy(config, targets, args):
|
||||
node_address = node.host_ipaddr
|
||||
|
||||
source = args.get("source", [])
|
||||
if not match(r"^/", source):
|
||||
source = f"{config['ansible_source']}/{source}"
|
||||
destination = args.get("destination", [])
|
||||
mode = args.get("mode", [])
|
||||
|
||||
@ -212,9 +210,11 @@ def run_hook_copy(config, targets, args):
|
||||
|
||||
with run_paramiko(config, node_address) as c:
|
||||
for sfile, dfile, dmode in zip(source, destination, mode):
|
||||
if not match(r"^/", sfile):
|
||||
sfile = f"{config['ansible_path']}/{sfile}"
|
||||
tc = c.open_sftp()
|
||||
tc.put(sfile, dfile)
|
||||
tc.chmod(dfile, dmode)
|
||||
tc.chmod(dfile, int(dmode))
|
||||
tc.close()
|
||||
|
||||
|
||||
@ -319,7 +319,7 @@ def run_hooks(config, cspec, cluster, nodes):
|
||||
logger.info("Waiting 300s before starting hook run.")
|
||||
sleep(300)
|
||||
|
||||
notifications.send_webhook(config, "begin", f"Running hook tasks for cluster {cluster.name}")
|
||||
notifications.send_webhook(config, "begin", f"Cluster {cluster.name}: Running post-setup hook tasks")
|
||||
|
||||
cluster_hooks = cspec["hooks"][cluster.name]
|
||||
|
||||
@ -349,7 +349,7 @@ def run_hooks(config, cspec, cluster, nodes):
|
||||
notifications.send_webhook(config, "success", f"Cluster {cluster.name}: Completed hook task '{hook_name}'")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error running hook: {e}")
|
||||
notifications.send_webhook(config, "failure", f"Cluster {cluster.name}: Failed hook task '{hook_name}' with error {e}")
|
||||
notifications.send_webhook(config, "failure", f"Cluster {cluster.name}: Failed hook task '{hook_name}' with error '{e}'")
|
||||
|
||||
# Wait 5s between hooks
|
||||
sleep(5)
|
||||
@ -363,4 +363,4 @@ def run_hooks(config, cspec, cluster, nodes):
|
||||
},
|
||||
)
|
||||
|
||||
notifications.send_webhook(config, "success", f"Completed hook tasks for cluster {cluster.name}")
|
||||
notifications.send_webhook(config, "success", f"Cluster {cluster.name}: Completed post-setup hook tasks")
|
||||
|
@ -51,7 +51,7 @@ def dnsmasq_checkin(config, data):
|
||||
cspec = git.load_cspec_yaml(config)
|
||||
is_in_bootstrap_map = True if data["macaddr"] in cspec["bootstrap"] else False
|
||||
if is_in_bootstrap_map:
|
||||
notifications.send_webhook(config, "begin", f"New host checkin from MAC '{data['macaddr']}' as host {cspec['bootstrap'][data['macaddr']]['node']['fqdn']}")
|
||||
notifications.send_webhook(config, "info", f"New host checkin from MAC {data['macaddr']} as host {cspec['bootstrap'][data['macaddr']]['node']['fqdn']} in cluster {cspec['bootstrap'][data['macaddr']]['node']['cluster']}")
|
||||
if (
|
||||
cspec["bootstrap"][data["macaddr"]]["bmc"].get("redfish", None)
|
||||
is not None
|
||||
@ -83,28 +83,29 @@ def host_checkin(config, data):
|
||||
"""
|
||||
Handle checkins from the PVC node
|
||||
"""
|
||||
logger.info(f"Registering checkin for host {data['hostname']}")
|
||||
logger.info(f"Registering checkin for {data['bmc_macaddr']}")
|
||||
logger.debug(f"data = {data}")
|
||||
cspec = git.load_cspec_yaml(config)
|
||||
bmc_macaddr = data["bmc_macaddr"]
|
||||
cspec_cluster = cspec["bootstrap"][bmc_macaddr]["node"]["cluster"]
|
||||
cspec_fqdn = cspec["bootstrap"][bmc_macaddr]["node"]["fqdn"]
|
||||
|
||||
if data["action"] in ["install-start"]:
|
||||
# Node install has started
|
||||
logger.info(f"Registering install start for host {data['hostname']}")
|
||||
notifications.send_webhook(config, "begin", f"Cluster {cspec_cluster}: Registering install start for host {data['hostname']}")
|
||||
logger.info(f"Registering install start for host {cspec_fqdn}")
|
||||
notifications.send_webhook(config, "begin", f"Cluster {cspec_cluster}: Base install starting for host {cspec_fqdn}")
|
||||
host.installer_init(config, cspec, data)
|
||||
|
||||
elif data["action"] in ["install-complete"]:
|
||||
# Node install has finished
|
||||
logger.info(f"Registering install complete for host {data['hostname']}")
|
||||
notifications.send_webhook(config, "success", f"Cluster {cspec_cluster}: Registering install complete for host {data['hostname']}")
|
||||
logger.info(f"Registering install complete for host {cspec_fqdn}")
|
||||
notifications.send_webhook(config, "success", f"Cluster {cspec_cluster}: Base install completed for host {cspec_fqdn}")
|
||||
host.installer_complete(config, cspec, data)
|
||||
|
||||
elif data["action"] in ["system-boot_initial"]:
|
||||
# Node has booted for the first time and can begin Ansible runs once all nodes up
|
||||
logger.info(f"Registering first boot for host {data['hostname']}")
|
||||
notifications.send_webhook(config, "begin", f"Cluster {cspec_cluster}: Registering first boot for host {data['hostname']}")
|
||||
logger.info(f"Registering first boot for host {cspec_fqdn}")
|
||||
notifications.send_webhook(config, "info", f"Cluster {cspec_cluster}: Registering first boot for host {cspec_fqdn}")
|
||||
target_state = "booted-initial"
|
||||
|
||||
host.set_boot_state(config, cspec, data, target_state)
|
||||
@ -122,8 +123,8 @@ def host_checkin(config, data):
|
||||
|
||||
elif data["action"] in ["system-boot_configured"]:
|
||||
# Node has been booted after Ansible run and can begin hook runs
|
||||
logger.info(f"Registering post-Ansible boot for host {data['hostname']}")
|
||||
notifications.send_webhook(config, "begin", f"Cluster {cspec_cluster}: Registering post-Ansible boot for host {data['hostname']}")
|
||||
logger.info(f"Registering post-Ansible boot for host {cspec_fqdn}")
|
||||
notifications.send_webhook(config, "info", f"Cluster {cspec_cluster}: Registering post-Ansible boot for host {cspec_fqdn}")
|
||||
target_state = "booted-configured"
|
||||
|
||||
host.set_boot_state(config, cspec, data, target_state)
|
||||
@ -141,8 +142,8 @@ def host_checkin(config, data):
|
||||
|
||||
elif data["action"] in ["system-boot_completed"]:
|
||||
# Node has been fully configured and can be shut down for the final time
|
||||
logger.info(f"Registering post-hooks boot for host {data['hostname']}")
|
||||
notifications.send_webhook(config, "begin", f"Cluster {cspec_cluster}: Registering post-hooks boot for host {data['hostname']}")
|
||||
logger.info(f"Registering post-hooks boot for host {cspec_fqdn}")
|
||||
notifications.send_webhook(config, "info", f"Cluster {cspec_cluster}: Registering post-hooks boot for host {cspec_fqdn}")
|
||||
target_state = "booted-completed"
|
||||
|
||||
host.set_boot_state(config, cspec, data, target_state)
|
||||
@ -153,7 +154,7 @@ def host_checkin(config, data):
|
||||
|
||||
logger.info(f"Ready: {len(ready_nodes)} All: {len(all_nodes)}")
|
||||
if len(ready_nodes) >= len(all_nodes):
|
||||
cluster = db.update_cluster_state(config, cspec_cluster, "completed")
|
||||
|
||||
notifications.send_webhook(config, "completed", f"Cluster {cspec_cluster} deployment completed")
|
||||
# Hosts will now power down ready for real activation in production
|
||||
sleep(30)
|
||||
cluster = db.update_cluster_state(config, cspec_cluster, "completed")
|
||||
notifications.send_webhook(config, "completed", f"Cluster {cspec_cluster}: Deployment completed")
|
||||
|
@ -103,12 +103,9 @@ class RedfishSession:
|
||||
sleep(2)
|
||||
tries += 1
|
||||
|
||||
if login_response is None:
|
||||
logger.error("Failed to log in to Redfish")
|
||||
return
|
||||
|
||||
if login_response.status_code not in [200, 201]:
|
||||
if login_response is None or login_response.status_code not in [200, 201]:
|
||||
raise AuthenticationException("Login failed", response=login_response)
|
||||
|
||||
logger.info(f"Logged in to Redfish at {host} successfully")
|
||||
|
||||
self.host = host
|
||||
@ -620,18 +617,41 @@ def set_boot_override(session, system_root, redfish_vendor, target):
|
||||
"""
|
||||
Set the system boot override to the desired target
|
||||
"""
|
||||
try:
|
||||
system_detail = session.get(system_root)
|
||||
boot_targets = system_detail["Boot"]["BootSourceOverrideSupported"]
|
||||
except KeyError:
|
||||
return False
|
||||
print(redfish_vendor)
|
||||
system_detail = session.get(system_root)
|
||||
|
||||
if target not in boot_targets:
|
||||
return False
|
||||
def set_boot_override_dell():
|
||||
try:
|
||||
boot_targets = system_detail["Boot"]["BootSourceOverrideTarget@Redfish.AllowableValues"]
|
||||
except KeyError:
|
||||
logger.warn(f"Failed to set boot override, no BootSourceOverrideSupported key at {system_detail}")
|
||||
return False
|
||||
|
||||
session.patch(system_root, {"Boot": {"BootSourceOverrideTarget": target}})
|
||||
if target not in boot_targets:
|
||||
logger.warn(f"Failed to set boot override, key {target} not in {boot_targets}")
|
||||
return False
|
||||
|
||||
return True
|
||||
session.patch(system_root, {"Boot": {"BootSourceOverrideMode": "UEFI", "BootSourceOverrideTarget": target}})
|
||||
return True
|
||||
|
||||
def set_boot_override_generic():
|
||||
try:
|
||||
boot_targets = system_detail["Boot"]["BootSourceOverrideSupported"]
|
||||
except KeyError:
|
||||
logger.warn(f"Failed to set boot override, no BootSourceOverrideSupported key at {system_detail}")
|
||||
return False
|
||||
|
||||
if target not in boot_targets:
|
||||
logger.warn(f"Failed to set boot override, key {target} not in {boot_targets}")
|
||||
return False
|
||||
|
||||
session.patch(system_root, {"Boot": {"BootSourceOverrideTarget": target}})
|
||||
return True
|
||||
|
||||
if redfish_vendor == "Dell":
|
||||
return set_boot_override_dell()
|
||||
else:
|
||||
return set_boot_override_generic()
|
||||
|
||||
|
||||
def check_redfish(config, data):
|
||||
@ -688,8 +708,12 @@ def redfish_init(config, cspec, data):
|
||||
|
||||
cspec_cluster = cspec_node["node"]["cluster"]
|
||||
cspec_hostname = cspec_node["node"]["hostname"]
|
||||
cspec_fqdn = cspec_node["node"]["fqdn"]
|
||||
|
||||
notifications.send_webhook(config, "begin", f"Cluster {cspec_cluster}: Beginning Redfish initialization of host {cspec_hostname}")
|
||||
logger.info("Waiting 60 seconds for system normalization")
|
||||
sleep(60)
|
||||
|
||||
notifications.send_webhook(config, "begin", f"Cluster {cspec_cluster}: Beginning Redfish initialization of host {cspec_fqdn}")
|
||||
|
||||
cluster = db.get_cluster(config, name=cspec_cluster)
|
||||
if cluster is None:
|
||||
@ -713,11 +737,17 @@ def redfish_init(config, cspec, data):
|
||||
# Create the session and log in
|
||||
session = RedfishSession(bmc_host, bmc_username, bmc_password)
|
||||
if session.host is None:
|
||||
notifications.send_webhook(config, "failure", f"Cluster {cspec_cluster}: Failed to log in to Redfish for host {cspec_fqdn} at {bmc_host}")
|
||||
logger.info("Aborting Redfish configuration; reboot BMC to try again.")
|
||||
del session
|
||||
return
|
||||
notifications.send_webhook(config, "success", f"Cluster {cspec_cluster}: Logged in to Redfish for host {cspec_fqdn} at {bmc_host}")
|
||||
|
||||
logger.info("Characterizing node...")
|
||||
|
||||
logger.info("Waiting 60 seconds for system normalization")
|
||||
sleep(60)
|
||||
|
||||
# Get Refish bases
|
||||
logger.debug("Getting redfish bases")
|
||||
redfish_base_root = "/redfish/v1"
|
||||
@ -740,9 +770,6 @@ def redfish_init(config, cspec, data):
|
||||
set_power_state(session, system_root, redfish_vendor, "off")
|
||||
set_indicator_state(session, system_root, redfish_vendor, "on")
|
||||
|
||||
logger.info("Waiting 60 seconds for system normalization")
|
||||
sleep(60)
|
||||
|
||||
# Get the system details
|
||||
logger.debug("Get the system details")
|
||||
system_detail = session.get(system_root)
|
||||
@ -805,6 +832,9 @@ def redfish_init(config, cspec, data):
|
||||
)
|
||||
logger.debug(node)
|
||||
|
||||
logger.info("Waiting 60 seconds for system normalization")
|
||||
sleep(60)
|
||||
|
||||
logger.info("Determining system disk...")
|
||||
storage_root = system_detail.get("Storage", {}).get("@odata.id")
|
||||
system_drive_target = get_system_drive_target(session, cspec_node, storage_root)
|
||||
@ -839,7 +869,7 @@ def redfish_init(config, cspec, data):
|
||||
mgrattribute_detail = session.get(mgrattribute_root)
|
||||
mgrattribute_attributes = list(mgrattribute_detail["Attributes"].keys())
|
||||
for setting, value in cspec_node["bmc"].get("manager_settings", {}).items():
|
||||
if setting not in bios_attributes:
|
||||
if setting not in mgrattribute_attributes:
|
||||
continue
|
||||
|
||||
payload = {"Attributes": {setting: value}}
|
||||
@ -849,12 +879,12 @@ def redfish_init(config, cspec, data):
|
||||
logger.info("Setting temporary PXE boot...")
|
||||
set_boot_override(session, system_root, redfish_vendor, "Pxe")
|
||||
|
||||
notifications.send_webhook(config, "success", f"Cluster {cspec_cluster}: Completed Redfish initialization of host {cspec_hostname}")
|
||||
notifications.send_webhook(config, "success", f"Cluster {cspec_cluster}: Completed Redfish initialization of host {cspec_fqdn}")
|
||||
|
||||
# Turn on the system
|
||||
logger.info("Powering on node...")
|
||||
set_power_state(session, system_root, redfish_vendor, "on")
|
||||
notifications.send_webhook(config, "begin", f"Cluster {cspec_cluster}: Powering on host {cspec_hostname}")
|
||||
notifications.send_webhook(config, "info", f"Cluster {cspec_cluster}: Powering on host {cspec_fqdn}")
|
||||
|
||||
node = db.update_node_state(config, cspec_cluster, cspec_hostname, "pxe-booting")
|
||||
|
||||
@ -868,7 +898,7 @@ def redfish_init(config, cspec, data):
|
||||
node = db.get_node(config, cspec_cluster, name=cspec_hostname)
|
||||
|
||||
# Graceful shutdown of the machine
|
||||
notifications.send_webhook(config, "begin", f"Cluster {cspec_cluster}: Powering off host {cspec_hostname}")
|
||||
notifications.send_webhook(config, "info", f"Cluster {cspec_cluster}: Powering off host {cspec_fqdn}")
|
||||
set_power_state(session, system_root, redfish_vendor, "GracefulShutdown")
|
||||
system_power_state = "On"
|
||||
while system_power_state != "Off":
|
||||
@ -879,7 +909,6 @@ def redfish_init(config, cspec, data):
|
||||
|
||||
# Turn off the indicator to indicate bootstrap has completed
|
||||
set_indicator_state(session, system_root, redfish_vendor, "off")
|
||||
notifications.send_webhook(config, "completed", f"Cluster {cspec_cluster}: Powered off host {cspec_hostname}")
|
||||
|
||||
# We must delete the session
|
||||
del session
|
||||
|
Loading…
x
Reference in New Issue
Block a user