Add additional error state webhooks
Ensures that webhooks are sent for failures during Redfish setup.
This commit is contained in:
parent
f927118f5b
commit
8d8898077f
|
@ -743,7 +743,7 @@ def redfish_init(config, cspec, data):
|
||||||
session = RedfishSession(bmc_host, bmc_username, bmc_password)
|
session = RedfishSession(bmc_host, bmc_username, bmc_password)
|
||||||
if session.host is None:
|
if session.host is None:
|
||||||
notifications.send_webhook(config, "failure", f"Cluster {cspec_cluster}: Failed to log in to Redfish for host {cspec_fqdn} at {bmc_host}")
|
notifications.send_webhook(config, "failure", f"Cluster {cspec_cluster}: Failed to log in to Redfish for host {cspec_fqdn} at {bmc_host}")
|
||||||
logger.error("Aborting Redfish configuration; reboot BMC to try again.")
|
logger.error("Aborting Redfish configuration; reset BMC to retry.")
|
||||||
del session
|
del session
|
||||||
return
|
return
|
||||||
notifications.send_webhook(config, "success", f"Cluster {cspec_cluster}: Logged in to Redfish for host {cspec_fqdn} at {bmc_host}")
|
notifications.send_webhook(config, "success", f"Cluster {cspec_cluster}: Logged in to Redfish for host {cspec_fqdn} at {bmc_host}")
|
||||||
|
@ -752,144 +752,193 @@ def redfish_init(config, cspec, data):
|
||||||
sleep(60)
|
sleep(60)
|
||||||
|
|
||||||
logger.info("Characterizing node...")
|
logger.info("Characterizing node...")
|
||||||
|
|
||||||
# Get Refish bases
|
|
||||||
logger.debug("Getting redfish bases")
|
|
||||||
redfish_base_root = "/redfish/v1"
|
|
||||||
redfish_base_detail = session.get(redfish_base_root)
|
|
||||||
|
|
||||||
redfish_vendor = list(redfish_base_detail["Oem"].keys())[0]
|
|
||||||
redfish_name = redfish_base_detail["Name"]
|
|
||||||
redfish_version = redfish_base_detail["RedfishVersion"]
|
|
||||||
|
|
||||||
managers_base_root = redfish_base_detail["Managers"]["@odata.id"].rstrip("/")
|
|
||||||
managers_base_detail = session.get(managers_base_root)
|
|
||||||
manager_root = managers_base_detail["Members"][0]["@odata.id"].rstrip("/")
|
|
||||||
|
|
||||||
systems_base_root = redfish_base_detail["Systems"]["@odata.id"].rstrip("/")
|
|
||||||
systems_base_detail = session.get(systems_base_root)
|
|
||||||
system_root = systems_base_detail["Members"][0]["@odata.id"].rstrip("/")
|
|
||||||
|
|
||||||
# Force off the system and turn on the indicator
|
|
||||||
logger.debug("Force off the system and turn on the indicator")
|
|
||||||
set_power_state(session, system_root, redfish_vendor, "off")
|
|
||||||
set_indicator_state(session, system_root, redfish_vendor, "on")
|
|
||||||
|
|
||||||
# Get the system details
|
|
||||||
logger.debug("Get the system details")
|
|
||||||
system_detail = session.get(system_root)
|
|
||||||
|
|
||||||
system_sku = system_detail["SKU"].strip()
|
|
||||||
system_serial = system_detail["SerialNumber"].strip()
|
|
||||||
system_power_state = system_detail["PowerState"].strip()
|
|
||||||
system_indicator_state = system_detail["IndicatorLED"].strip()
|
|
||||||
system_health_state = system_detail["Status"]["Health"].strip()
|
|
||||||
|
|
||||||
# Walk down the EthernetInterfaces construct to get the bootstrap interface MAC address
|
|
||||||
logger.debug("Walk down the EthernetInterfaces construct to get the bootstrap interface MAC address")
|
|
||||||
try:
|
try:
|
||||||
ethernet_root = system_detail["EthernetInterfaces"]["@odata.id"].rstrip("/")
|
|
||||||
ethernet_detail = session.get(ethernet_root)
|
|
||||||
embedded_ethernet_detail_members = [e for e in ethernet_detail["Members"] if "Embedded" in e["@odata.id"]]
|
|
||||||
embedded_ethernet_detail_members.sort(key = lambda k: k["@odata.id"])
|
|
||||||
first_interface_root = embedded_ethernet_detail_members[0]["@odata.id"].rstrip("/")
|
|
||||||
first_interface_detail = session.get(first_interface_root)
|
|
||||||
# Something went wrong, so fall back
|
|
||||||
except KeyError:
|
|
||||||
first_interface_detail = dict()
|
|
||||||
|
|
||||||
# Try to get the MAC address directly from the interface detail (Redfish standard)
|
# Get Refish bases
|
||||||
logger.debug("Try to get the MAC address directly from the interface detail (Redfish standard)")
|
logger.debug("Getting redfish bases")
|
||||||
if first_interface_detail.get("MACAddress") is not None:
|
redfish_base_root = "/redfish/v1"
|
||||||
bootstrap_mac_address = first_interface_detail["MACAddress"].strip().lower()
|
redfish_base_detail = session.get(redfish_base_root)
|
||||||
# Try to get the MAC address from the HostCorrelation->HostMACAddress (HP DL360x G8)
|
|
||||||
elif len(system_detail.get("HostCorrelation", {}).get("HostMACAddress", [])) > 0:
|
redfish_vendor = list(redfish_base_detail["Oem"].keys())[0]
|
||||||
bootstrap_mac_address = (
|
redfish_name = redfish_base_detail["Name"]
|
||||||
system_detail["HostCorrelation"]["HostMACAddress"][0].strip().lower()
|
redfish_version = redfish_base_detail["RedfishVersion"]
|
||||||
|
|
||||||
|
managers_base_root = redfish_base_detail["Managers"]["@odata.id"].rstrip("/")
|
||||||
|
managers_base_detail = session.get(managers_base_root)
|
||||||
|
manager_root = managers_base_detail["Members"][0]["@odata.id"].rstrip("/")
|
||||||
|
|
||||||
|
systems_base_root = redfish_base_detail["Systems"]["@odata.id"].rstrip("/")
|
||||||
|
systems_base_detail = session.get(systems_base_root)
|
||||||
|
system_root = systems_base_detail["Members"][0]["@odata.id"].rstrip("/")
|
||||||
|
|
||||||
|
# Force off the system and turn on the indicator
|
||||||
|
logger.debug("Force off the system and turn on the indicator")
|
||||||
|
set_power_state(session, system_root, redfish_vendor, "off")
|
||||||
|
set_indicator_state(session, system_root, redfish_vendor, "on")
|
||||||
|
|
||||||
|
# Get the system details
|
||||||
|
logger.debug("Get the system details")
|
||||||
|
system_detail = session.get(system_root)
|
||||||
|
|
||||||
|
system_sku = system_detail["SKU"].strip()
|
||||||
|
system_serial = system_detail["SerialNumber"].strip()
|
||||||
|
system_power_state = system_detail["PowerState"].strip()
|
||||||
|
system_indicator_state = system_detail["IndicatorLED"].strip()
|
||||||
|
system_health_state = system_detail["Status"]["Health"].strip()
|
||||||
|
|
||||||
|
# Walk down the EthernetInterfaces construct to get the bootstrap interface MAC address
|
||||||
|
logger.debug("Walk down the EthernetInterfaces construct to get the bootstrap interface MAC address")
|
||||||
|
try:
|
||||||
|
ethernet_root = system_detail["EthernetInterfaces"]["@odata.id"].rstrip("/")
|
||||||
|
ethernet_detail = session.get(ethernet_root)
|
||||||
|
embedded_ethernet_detail_members = [e for e in ethernet_detail["Members"] if "Embedded" in e["@odata.id"]]
|
||||||
|
embedded_ethernet_detail_members.sort(key = lambda k: k["@odata.id"])
|
||||||
|
first_interface_root = embedded_ethernet_detail_members[0]["@odata.id"].rstrip("/")
|
||||||
|
first_interface_detail = session.get(first_interface_root)
|
||||||
|
# Something went wrong, so fall back
|
||||||
|
except KeyError:
|
||||||
|
first_interface_detail = dict()
|
||||||
|
|
||||||
|
# Try to get the MAC address directly from the interface detail (Redfish standard)
|
||||||
|
logger.debug("Try to get the MAC address directly from the interface detail (Redfish standard)")
|
||||||
|
if first_interface_detail.get("MACAddress") is not None:
|
||||||
|
bootstrap_mac_address = first_interface_detail["MACAddress"].strip().lower()
|
||||||
|
# Try to get the MAC address from the HostCorrelation->HostMACAddress (HP DL360x G8)
|
||||||
|
elif len(system_detail.get("HostCorrelation", {}).get("HostMACAddress", [])) > 0:
|
||||||
|
bootstrap_mac_address = (
|
||||||
|
system_detail["HostCorrelation"]["HostMACAddress"][0].strip().lower()
|
||||||
|
)
|
||||||
|
# We can't find it, so use a dummy value
|
||||||
|
else:
|
||||||
|
logger.error("Could not find a valid MAC address for the bootstrap interface.")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Display the system details
|
||||||
|
logger.info("Found details from node characterization:")
|
||||||
|
logger.info(f"> System Manufacturer: {redfish_vendor}")
|
||||||
|
logger.info(f"> System Redfish Version: {redfish_version}")
|
||||||
|
logger.info(f"> System Redfish Name: {redfish_name}")
|
||||||
|
logger.info(f"> System SKU: {system_sku}")
|
||||||
|
logger.info(f"> System Serial: {system_serial}")
|
||||||
|
logger.info(f"> Power State: {system_power_state}")
|
||||||
|
logger.info(f"> Indicator LED: {system_indicator_state}")
|
||||||
|
logger.info(f"> Health State: {system_health_state}")
|
||||||
|
logger.info(f"> Bootstrap NIC MAC: {bootstrap_mac_address}")
|
||||||
|
|
||||||
|
# Update node host MAC address
|
||||||
|
host_macaddr = bootstrap_mac_address
|
||||||
|
node = db.update_node_addresses(
|
||||||
|
config,
|
||||||
|
cspec_cluster,
|
||||||
|
cspec_hostname,
|
||||||
|
bmc_macaddr,
|
||||||
|
bmc_ipaddr,
|
||||||
|
host_macaddr,
|
||||||
|
host_ipaddr,
|
||||||
)
|
)
|
||||||
# We can't find it, so use a dummy value
|
logger.debug(node)
|
||||||
else:
|
except Exception as e:
|
||||||
logger.error("Could not find a valid MAC address for the bootstrap interface.")
|
notifications.send_webhook(config, "failure", f"Cluster {cspec_cluster}: Failed to characterize Redfish for host {cspec_fqdn} at {bmc_host}. Check pvcbootstrapd logs and reset this host's BMC to retry.")
|
||||||
|
logger.error(f"Cluster {cspec_cluster}: Failed to characterize Redfish for host {cspec_fqdn} at {bmc_host}: {e}")
|
||||||
|
logger.error("Aborting Redfish configuration; reset BMC to retry.")
|
||||||
|
del session
|
||||||
return
|
return
|
||||||
|
|
||||||
# Display the system details
|
|
||||||
logger.info("Found details from node characterization:")
|
|
||||||
logger.info(f"> System Manufacturer: {redfish_vendor}")
|
|
||||||
logger.info(f"> System Redfish Version: {redfish_version}")
|
|
||||||
logger.info(f"> System Redfish Name: {redfish_name}")
|
|
||||||
logger.info(f"> System SKU: {system_sku}")
|
|
||||||
logger.info(f"> System Serial: {system_serial}")
|
|
||||||
logger.info(f"> Power State: {system_power_state}")
|
|
||||||
logger.info(f"> Indicator LED: {system_indicator_state}")
|
|
||||||
logger.info(f"> Health State: {system_health_state}")
|
|
||||||
logger.info(f"> Bootstrap NIC MAC: {bootstrap_mac_address}")
|
|
||||||
|
|
||||||
# Update node host MAC address
|
|
||||||
host_macaddr = bootstrap_mac_address
|
|
||||||
node = db.update_node_addresses(
|
|
||||||
config,
|
|
||||||
cspec_cluster,
|
|
||||||
cspec_hostname,
|
|
||||||
bmc_macaddr,
|
|
||||||
bmc_ipaddr,
|
|
||||||
host_macaddr,
|
|
||||||
host_ipaddr,
|
|
||||||
)
|
|
||||||
logger.debug(node)
|
|
||||||
|
|
||||||
logger.info("Waiting 60 seconds for system normalization")
|
logger.info("Waiting 60 seconds for system normalization")
|
||||||
sleep(60)
|
sleep(60)
|
||||||
|
|
||||||
logger.info("Determining system disk...")
|
logger.info("Determining system disk...")
|
||||||
storage_root = system_detail.get("Storage", {}).get("@odata.id")
|
try:
|
||||||
system_drive_target = get_system_drive_target(session, cspec_node, storage_root)
|
storage_root = system_detail.get("Storage", {}).get("@odata.id")
|
||||||
if system_drive_target is None:
|
system_drive_target = get_system_drive_target(session, cspec_node, storage_root)
|
||||||
logger.error(
|
if system_drive_target is None:
|
||||||
"No valid drives found; configure a single system drive as a 'detect:' string or Linux '/dev' path instead and try again."
|
logger.error(
|
||||||
)
|
"No valid drives found; configure a single system drive as a 'detect:' string or Linux '/dev' path instead and retry."
|
||||||
|
)
|
||||||
|
return
|
||||||
|
logger.info(f"Found system disk {system_drive_target}")
|
||||||
|
except Exception as e:
|
||||||
|
notifications.send_webhook(config, "failure", f"Cluster {cspec_cluster}: Failed to configure system disk for host {cspec_fqdn} at {bmc_host}. Check pvcbootstrapd logs and reset this host's BMC to retry.")
|
||||||
|
logger.error(f"Cluster {cspec_cluster}: Failed to configure system disk for host {cspec_fqdn} at {bmc_host}: {e}")
|
||||||
|
logger.error("Aborting Redfish configuration; reset BMC to retry.")
|
||||||
|
del session
|
||||||
return
|
return
|
||||||
logger.info(f"Found system disk {system_drive_target}")
|
|
||||||
|
|
||||||
# Create our preseed configuration
|
# Create our preseed configuration
|
||||||
logger.info("Creating node boot configurations...")
|
logger.info("Creating node boot configurations...")
|
||||||
installer.add_pxe(config, cspec_node, host_macaddr)
|
try:
|
||||||
installer.add_preseed(config, cspec_node, host_macaddr, system_drive_target)
|
installer.add_pxe(config, cspec_node, host_macaddr)
|
||||||
|
installer.add_preseed(config, cspec_node, host_macaddr, system_drive_target)
|
||||||
|
except Exception as e:
|
||||||
|
notifications.send_webhook(config, "failure", f"Cluster {cspec_cluster}: Failed to generate PXE configurations for host {cspec_fqdn} at {bmc_host}. Check pvcbootstrapd logs and reset this host's BMC to retry.")
|
||||||
|
logger.error(f"Cluster {cspec_cluster}: Failed to generate PXE configurations for host {cspec_fqdn} at {bmc_host}: {e}")
|
||||||
|
logger.error("Aborting Redfish configuration; reset BMC to retry.")
|
||||||
|
del session
|
||||||
|
return
|
||||||
|
|
||||||
# Adjust any BIOS settings
|
# Adjust any BIOS settings
|
||||||
logger.info("Adjusting BIOS settings...")
|
logger.info("Adjusting BIOS settings...")
|
||||||
bios_root = system_detail.get("Bios", {}).get("@odata.id")
|
try:
|
||||||
if bios_root is not None:
|
bios_root = system_detail.get("Bios", {}).get("@odata.id")
|
||||||
bios_detail = session.get(bios_root)
|
if bios_root is not None:
|
||||||
bios_attributes = list(bios_detail["Attributes"].keys())
|
bios_detail = session.get(bios_root)
|
||||||
for setting, value in cspec_node["bmc"].get("bios_settings", {}).items():
|
bios_attributes = list(bios_detail["Attributes"].keys())
|
||||||
if setting not in bios_attributes:
|
for setting, value in cspec_node["bmc"].get("bios_settings", {}).items():
|
||||||
continue
|
if setting not in bios_attributes:
|
||||||
|
continue
|
||||||
|
|
||||||
payload = {"Attributes": {setting: value}}
|
payload = {"Attributes": {setting: value}}
|
||||||
session.patch(f"{bios_root}/Settings", payload)
|
session.patch(f"{bios_root}/Settings", payload)
|
||||||
|
except Exception as e:
|
||||||
|
notifications.send_webhook(config, "failure", f"Cluster {cspec_cluster}: Failed to set BIOS settings for host {cspec_fqdn} at {bmc_host}. Check pvcbootstrapd logs and reset this host's BMC to retry.")
|
||||||
|
logger.error(f"Cluster {cspec_cluster}: Failed to set BIOS settings for host {cspec_fqdn} at {bmc_host}: {e}")
|
||||||
|
logger.error("Aborting Redfish configuration; reset BMC to retry.")
|
||||||
|
del session
|
||||||
|
return
|
||||||
|
|
||||||
# Adjust any Manager settings
|
# Adjust any Manager settings
|
||||||
logger.info("Adjusting Manager settings...")
|
logger.info("Adjusting Manager settings...")
|
||||||
mgrattribute_root = f"{manager_root}/Attributes"
|
try:
|
||||||
mgrattribute_detail = session.get(mgrattribute_root)
|
mgrattribute_root = f"{manager_root}/Attributes"
|
||||||
mgrattribute_attributes = list(mgrattribute_detail["Attributes"].keys())
|
mgrattribute_detail = session.get(mgrattribute_root)
|
||||||
for setting, value in cspec_node["bmc"].get("manager_settings", {}).items():
|
mgrattribute_attributes = list(mgrattribute_detail["Attributes"].keys())
|
||||||
if setting not in mgrattribute_attributes:
|
for setting, value in cspec_node["bmc"].get("manager_settings", {}).items():
|
||||||
continue
|
if setting not in mgrattribute_attributes:
|
||||||
|
continue
|
||||||
|
|
||||||
payload = {"Attributes": {setting: value}}
|
payload = {"Attributes": {setting: value}}
|
||||||
session.patch(mgrattribute_root, payload)
|
session.patch(mgrattribute_root, payload)
|
||||||
|
except Exception as e:
|
||||||
|
notifications.send_webhook(config, "failure", f"Cluster {cspec_cluster}: Failed to set BMC settings for host {cspec_fqdn} at {bmc_host}. Check pvcbootstrapd logs and reset this host's BMC to retry.")
|
||||||
|
logger.error(f"Cluster {cspec_cluster}: Failed to set BMC settings for host {cspec_fqdn} at {bmc_host}: {e}")
|
||||||
|
logger.error("Aborting Redfish configuration; reset BMC to retry.")
|
||||||
|
del session
|
||||||
|
return
|
||||||
|
|
||||||
# Set boot override to Pxe for the installer boot
|
# Set boot override to Pxe for the installer boot
|
||||||
logger.info("Setting temporary PXE boot...")
|
logger.info("Setting temporary PXE boot...")
|
||||||
set_boot_override(session, system_root, redfish_vendor, "Pxe")
|
try:
|
||||||
|
set_boot_override(session, system_root, redfish_vendor, "Pxe")
|
||||||
|
except Exception as e:
|
||||||
|
notifications.send_webhook(config, "failure", f"Cluster {cspec_cluster}: Failed to set PXE boot override for host {cspec_fqdn} at {bmc_host}. Check pvcbootstrapd logs and reset this host's BMC to retry.")
|
||||||
|
logger.error(f"Cluster {cspec_cluster}: Failed to set PXE boot override for host {cspec_fqdn} at {bmc_host}: {e}")
|
||||||
|
logger.error("Aborting Redfish configuration; reset BMC to retry.")
|
||||||
|
del session
|
||||||
|
return
|
||||||
|
|
||||||
notifications.send_webhook(config, "success", f"Cluster {cspec_cluster}: Completed Redfish initialization of host {cspec_fqdn}")
|
notifications.send_webhook(config, "success", f"Cluster {cspec_cluster}: Completed Redfish initialization of host {cspec_fqdn}")
|
||||||
|
|
||||||
# Turn on the system
|
# Turn on the system
|
||||||
logger.info("Powering on node...")
|
logger.info("Powering on node...")
|
||||||
set_power_state(session, system_root, redfish_vendor, "on")
|
try:
|
||||||
notifications.send_webhook(config, "info", f"Cluster {cspec_cluster}: Powering on host {cspec_fqdn}")
|
set_power_state(session, system_root, redfish_vendor, "on")
|
||||||
|
notifications.send_webhook(config, "info", f"Cluster {cspec_cluster}: Powering on host {cspec_fqdn}")
|
||||||
|
except Exception as e:
|
||||||
|
notifications.send_webhook(config, "failure", f"Cluster {cspec_cluster}: Failed to power on host {cspec_fqdn} at {bmc_host}. Check pvcbootstrapd logs and reset this host's BMC to retry.")
|
||||||
|
logger.error(f"Cluster {cspec_cluster}: Failed to power on host {cspec_fqdn} at {bmc_host}: {e}")
|
||||||
|
logger.error("Aborting Redfish configuration; reset BMC to retry.")
|
||||||
|
del session
|
||||||
|
return
|
||||||
|
|
||||||
node = db.update_node_state(config, cspec_cluster, cspec_hostname, "pxe-booting")
|
node = db.update_node_state(config, cspec_cluster, cspec_hostname, "pxe-booting")
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue