Add additional error state webhooks

Ensures that webhooks are sent for failures during Redfish setup.
This commit is contained in:
Joshua Boniface 2022-08-02 18:23:54 +00:00
parent f927118f5b
commit 8d8898077f
1 changed files with 159 additions and 110 deletions

View File

@ -743,7 +743,7 @@ def redfish_init(config, cspec, data):
session = RedfishSession(bmc_host, bmc_username, bmc_password) session = RedfishSession(bmc_host, bmc_username, bmc_password)
if session.host is None: if session.host is None:
notifications.send_webhook(config, "failure", f"Cluster {cspec_cluster}: Failed to log in to Redfish for host {cspec_fqdn} at {bmc_host}") notifications.send_webhook(config, "failure", f"Cluster {cspec_cluster}: Failed to log in to Redfish for host {cspec_fqdn} at {bmc_host}")
logger.error("Aborting Redfish configuration; reboot BMC to try again.") logger.error("Aborting Redfish configuration; reset BMC to retry.")
del session del session
return return
notifications.send_webhook(config, "success", f"Cluster {cspec_cluster}: Logged in to Redfish for host {cspec_fqdn} at {bmc_host}") notifications.send_webhook(config, "success", f"Cluster {cspec_cluster}: Logged in to Redfish for host {cspec_fqdn} at {bmc_host}")
@ -752,6 +752,7 @@ def redfish_init(config, cspec, data):
sleep(60) sleep(60)
logger.info("Characterizing node...") logger.info("Characterizing node...")
try:
# Get Refish bases # Get Refish bases
logger.debug("Getting redfish bases") logger.debug("Getting redfish bases")
@ -836,27 +837,48 @@ def redfish_init(config, cspec, data):
host_ipaddr, host_ipaddr,
) )
logger.debug(node) logger.debug(node)
except Exception as e:
notifications.send_webhook(config, "failure", f"Cluster {cspec_cluster}: Failed to characterize Redfish for host {cspec_fqdn} at {bmc_host}. Check pvcbootstrapd logs and reset this host's BMC to retry.")
logger.error(f"Cluster {cspec_cluster}: Failed to characterize Redfish for host {cspec_fqdn} at {bmc_host}: {e}")
logger.error("Aborting Redfish configuration; reset BMC to retry.")
del session
return
logger.info("Waiting 60 seconds for system normalization") logger.info("Waiting 60 seconds for system normalization")
sleep(60) sleep(60)
logger.info("Determining system disk...") logger.info("Determining system disk...")
try:
storage_root = system_detail.get("Storage", {}).get("@odata.id") storage_root = system_detail.get("Storage", {}).get("@odata.id")
system_drive_target = get_system_drive_target(session, cspec_node, storage_root) system_drive_target = get_system_drive_target(session, cspec_node, storage_root)
if system_drive_target is None: if system_drive_target is None:
logger.error( logger.error(
"No valid drives found; configure a single system drive as a 'detect:' string or Linux '/dev' path instead and try again." "No valid drives found; configure a single system drive as a 'detect:' string or Linux '/dev' path instead and retry."
) )
return return
logger.info(f"Found system disk {system_drive_target}") logger.info(f"Found system disk {system_drive_target}")
except Exception as e:
notifications.send_webhook(config, "failure", f"Cluster {cspec_cluster}: Failed to configure system disk for host {cspec_fqdn} at {bmc_host}. Check pvcbootstrapd logs and reset this host's BMC to retry.")
logger.error(f"Cluster {cspec_cluster}: Failed to configure system disk for host {cspec_fqdn} at {bmc_host}: {e}")
logger.error("Aborting Redfish configuration; reset BMC to retry.")
del session
return
# Create our preseed configuration # Create our preseed configuration
logger.info("Creating node boot configurations...") logger.info("Creating node boot configurations...")
try:
installer.add_pxe(config, cspec_node, host_macaddr) installer.add_pxe(config, cspec_node, host_macaddr)
installer.add_preseed(config, cspec_node, host_macaddr, system_drive_target) installer.add_preseed(config, cspec_node, host_macaddr, system_drive_target)
except Exception as e:
notifications.send_webhook(config, "failure", f"Cluster {cspec_cluster}: Failed to generate PXE configurations for host {cspec_fqdn} at {bmc_host}. Check pvcbootstrapd logs and reset this host's BMC to retry.")
logger.error(f"Cluster {cspec_cluster}: Failed to generate PXE configurations for host {cspec_fqdn} at {bmc_host}: {e}")
logger.error("Aborting Redfish configuration; reset BMC to retry.")
del session
return
# Adjust any BIOS settings # Adjust any BIOS settings
logger.info("Adjusting BIOS settings...") logger.info("Adjusting BIOS settings...")
try:
bios_root = system_detail.get("Bios", {}).get("@odata.id") bios_root = system_detail.get("Bios", {}).get("@odata.id")
if bios_root is not None: if bios_root is not None:
bios_detail = session.get(bios_root) bios_detail = session.get(bios_root)
@ -867,9 +889,16 @@ def redfish_init(config, cspec, data):
payload = {"Attributes": {setting: value}} payload = {"Attributes": {setting: value}}
session.patch(f"{bios_root}/Settings", payload) session.patch(f"{bios_root}/Settings", payload)
except Exception as e:
notifications.send_webhook(config, "failure", f"Cluster {cspec_cluster}: Failed to set BIOS settings for host {cspec_fqdn} at {bmc_host}. Check pvcbootstrapd logs and reset this host's BMC to retry.")
logger.error(f"Cluster {cspec_cluster}: Failed to set BIOS settings for host {cspec_fqdn} at {bmc_host}: {e}")
logger.error("Aborting Redfish configuration; reset BMC to retry.")
del session
return
# Adjust any Manager settings # Adjust any Manager settings
logger.info("Adjusting Manager settings...") logger.info("Adjusting Manager settings...")
try:
mgrattribute_root = f"{manager_root}/Attributes" mgrattribute_root = f"{manager_root}/Attributes"
mgrattribute_detail = session.get(mgrattribute_root) mgrattribute_detail = session.get(mgrattribute_root)
mgrattribute_attributes = list(mgrattribute_detail["Attributes"].keys()) mgrattribute_attributes = list(mgrattribute_detail["Attributes"].keys())
@ -879,17 +908,37 @@ def redfish_init(config, cspec, data):
payload = {"Attributes": {setting: value}} payload = {"Attributes": {setting: value}}
session.patch(mgrattribute_root, payload) session.patch(mgrattribute_root, payload)
except Exception as e:
notifications.send_webhook(config, "failure", f"Cluster {cspec_cluster}: Failed to set BMC settings for host {cspec_fqdn} at {bmc_host}. Check pvcbootstrapd logs and reset this host's BMC to retry.")
logger.error(f"Cluster {cspec_cluster}: Failed to set BMC settings for host {cspec_fqdn} at {bmc_host}: {e}")
logger.error("Aborting Redfish configuration; reset BMC to retry.")
del session
return
# Set boot override to Pxe for the installer boot # Set boot override to Pxe for the installer boot
logger.info("Setting temporary PXE boot...") logger.info("Setting temporary PXE boot...")
try:
set_boot_override(session, system_root, redfish_vendor, "Pxe") set_boot_override(session, system_root, redfish_vendor, "Pxe")
except Exception as e:
notifications.send_webhook(config, "failure", f"Cluster {cspec_cluster}: Failed to set PXE boot override for host {cspec_fqdn} at {bmc_host}. Check pvcbootstrapd logs and reset this host's BMC to retry.")
logger.error(f"Cluster {cspec_cluster}: Failed to set PXE boot override for host {cspec_fqdn} at {bmc_host}: {e}")
logger.error("Aborting Redfish configuration; reset BMC to retry.")
del session
return
notifications.send_webhook(config, "success", f"Cluster {cspec_cluster}: Completed Redfish initialization of host {cspec_fqdn}") notifications.send_webhook(config, "success", f"Cluster {cspec_cluster}: Completed Redfish initialization of host {cspec_fqdn}")
# Turn on the system # Turn on the system
logger.info("Powering on node...") logger.info("Powering on node...")
try:
set_power_state(session, system_root, redfish_vendor, "on") set_power_state(session, system_root, redfish_vendor, "on")
notifications.send_webhook(config, "info", f"Cluster {cspec_cluster}: Powering on host {cspec_fqdn}") notifications.send_webhook(config, "info", f"Cluster {cspec_cluster}: Powering on host {cspec_fqdn}")
except Exception as e:
notifications.send_webhook(config, "failure", f"Cluster {cspec_cluster}: Failed to power on host {cspec_fqdn} at {bmc_host}. Check pvcbootstrapd logs and reset this host's BMC to retry.")
logger.error(f"Cluster {cspec_cluster}: Failed to power on host {cspec_fqdn} at {bmc_host}: {e}")
logger.error("Aborting Redfish configuration; reset BMC to retry.")
del session
return
node = db.update_node_state(config, cspec_cluster, cspec_hostname, "pxe-booting") node = db.update_node_state(config, cspec_cluster, cspec_hostname, "pxe-booting")