pvc-bootstrap/bootstrap-daemon/pvcbootstrapd/lib/redfish.py

980 lines
39 KiB
Python
Executable File

#!/usr/bin/env python3
# redfish.py - PVC Cluster Auto-bootstrap Redfish libraries
# Part of the Parallel Virtual Cluster (PVC) system
#
# Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################
# Refs:
# https://downloads.dell.com/manuals/all-products/esuprt_software/esuprt_it_ops_datcentr_mgmt/dell-management-solution-resources_white-papers11_en-us.pdf
# https://downloads.dell.com/solutions/dell-management-solution-resources/RESTfulSerConfig-using-iDRAC-REST%20API%28DTC%20copy%29.pdf
import requests
import urllib3
import json
import re
import math
from time import sleep
from celery.utils.log import get_task_logger
import pvcbootstrapd.lib.notifications as notifications
import pvcbootstrapd.lib.installer as installer
import pvcbootstrapd.lib.db as db
logger = get_task_logger(__name__)
#
# Helper Classes
#
class RedfishSession:
def __init__(self, host, username, password):
# Disable urllib3 warnings
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
# Perform login
login_payload = {"UserName": username, "Password": password}
login_uri = f"{host}/redfish/v1/Sessions"
login_headers = {"content-type": "application/json"}
self.host = None
login_response = None
tries = 1
max_tries = 61
while tries < max_tries:
logger.info(f"Trying to log in to Redfish ({tries}/{max_tries - 1})...")
try:
login_response = requests.post(
login_uri,
data=json.dumps(login_payload),
headers=login_headers,
verify=False,
timeout=5,
)
break
except Exception:
sleep(2)
tries += 1
if login_response is None or login_response.status_code not in [200, 201]:
try:
rinfo = response.json()["error"]["@Message.ExtendedInfo"][0]
except Exception:
rinfo = {}
if rinfo.get("Message") is not None:
full_message = rinfo["Message"]
res_message = rinfo["Resolution"]
severity = rinfo["Severity"]
message_id = rinfo["MessageId"]
else:
full_message = ""
res_message = ""
severity = "Fatal"
message_id = rinfo.get("MessageId", "No message ID")
status_code = login_response.status_code
failure_message = f"Redfish failure: {full_message} {res_message} (HTTP Code: {status_code}, Severity: {severity}, ID: {message_id})"
logger.error(f"Failed to log in to Redfish at {host}")
logger.error(failure_message)
return
logger.info(f"Logged in to Redfish at {host} successfully")
self.host = host
self.token = login_response.headers.get("X-Auth-Token")
self.headers = {"content-type": "application/json", "x-auth-token": self.token}
logout_uri = login_response.headers.get("Location")
if re.match(r"^/", logout_uri):
self.logout_uri = f"{host}{logout_uri}"
else:
self.logout_uri = logout_uri
def __del__(self):
if self.host is None:
return
logout_headers = {
"Content-Type": "application/json",
"X-Auth-Token": self.token,
}
logout_response = requests.delete(
self.logout_uri, headers=logout_headers, verify=False, timeout=15
)
if logout_response.status_code not in [200, 201]:
try:
rinfo = response.json()["error"]["@Message.ExtendedInfo"][0]
except Exception:
rinfo = {}
if rinfo.get("Message") is not None:
full_message = rinfo["Message"]
res_message = rinfo["Resolution"]
severity = rinfo["Severity"]
message_id = rinfo["MessageId"]
else:
full_message = ""
res_message = ""
severity = "Fatal"
message_id = rinfo.get("MessageId", "No message ID")
status_code = logout_response.status_code
failure_message = f"Redfish failure: {full_message} {res_message} (HTTP Code: {status_code}, Severity: {severity}, ID: {message_id})"
logger.error(f"Failed to log out of Redfish at {host}")
logger.error(failure_message)
return
logger.info(f"Logged out of Redfish at {self.host} successfully")
def get(self, uri):
url = f"{self.host}{uri}"
response = requests.get(url, headers=self.headers, verify=False)
if response.status_code in [200, 201]:
return response.json()
else:
rinfo = response.json()["error"]["@Message.ExtendedInfo"][0]
if rinfo.get("Message") is not None:
message = f"{rinfo['Message']} {rinfo['Resolution']}"
severity = rinfo["Severity"]
message_id = rinfo["MessageId"]
else:
message = rinfo
severity = "Error"
message_id = "N/A"
logger.warn(f"! Error: GET request to {url} failed")
logger.warn(
f"! HTTP Code: {response.status_code} Severity: {severity} ID: {message_id}"
)
logger.warn(f"! Details: {message}")
return None
def delete(self, uri):
url = f"{self.host}{uri}"
response = requests.delete(url, headers=self.headers, verify=False)
if response.status_code in [200, 201]:
return response.json()
else:
rinfo = response.json()["error"]["@Message.ExtendedInfo"][0]
if rinfo.get("Message") is not None:
message = f"{rinfo['Message']} {rinfo['Resolution']}"
severity = rinfo["Severity"]
message_id = rinfo["MessageId"]
else:
message = rinfo
severity = "Error"
message_id = "N/A"
logger.warn(f"! Error: DELETE request to {url} failed")
logger.warn(
f"! HTTP Code: {response.status_code} Severity: {severity} ID: {message_id}"
)
logger.warn(f"! Details: {message}")
return None
def post(self, uri, data):
url = f"{self.host}{uri}"
payload = json.dumps(data)
logger.debug(f"POST payload: {payload}")
response = requests.post(url, data=payload, headers=self.headers, verify=False)
logger.debug(f"Response: {response.status_code}")
if response.status_code in [200, 201, 204]:
try:
return response.json()
except Exception:
return {"json_err": e}
else:
try:
rinfo = response.json()["error"]["@Message.ExtendedInfo"][0]
except Exception:
logger.debug(response)
raise
if rinfo.get("Message") is not None:
message = f"{rinfo['Message']} {rinfo['Resolution']}"
severity = rinfo["Severity"]
message_id = rinfo["MessageId"]
else:
message = rinfo
severity = "Error"
message_id = "N/A"
logger.warn(f"! Error: POST request to {url} failed")
logger.warn(
f"! HTTP Code: {response.status_code} Severity: {severity} ID: {message_id}"
)
logger.warn(f"! Details: {message}")
return None
def put(self, uri, data):
url = f"{self.host}{uri}"
payload = json.dumps(data)
logger.debug(f"PUT payload: {payload}")
response = requests.put(url, data=payload, headers=self.headers, verify=False)
if response.status_code in [200, 201]:
return response.json()
else:
rinfo = response.json()["error"]["@Message.ExtendedInfo"][0]
if rinfo.get("Message") is not None:
message = f"{rinfo['Message']} {rinfo['Resolution']}"
severity = rinfo["Severity"]
message_id = rinfo["MessageId"]
else:
message = rinfo
severity = "Error"
message_id = "N/A"
logger.warn(f"! Error: PUT request to {url} failed")
logger.warn(
f"! HTTP Code: {response.status_code} Severity: {severity} ID: {message_id}"
)
logger.warn(f"! Details: {message}")
return None
def patch(self, uri, data):
url = f"{self.host}{uri}"
payload = json.dumps(data)
logger.debug(f"PATCH payload: {payload}")
response = requests.patch(url, data=payload, headers=self.headers, verify=False)
if response.status_code in [200, 201]:
return response.json()
else:
rinfo = response.json()["error"]["@Message.ExtendedInfo"][0]
if rinfo.get("Message") is not None:
message = f"{rinfo['Message']} {rinfo['Resolution']}"
severity = rinfo["Severity"]
message_id = rinfo["MessageId"]
else:
message = rinfo
severity = "Error"
message_id = "N/A"
logger.warn(f"! Error: PATCH request to {url} failed")
logger.warn(
f"! HTTP Code: {response.status_code} Severity: {severity} ID: {message_id}"
)
logger.warn(f"! Details: {message}")
return None
#
# Helper functions
#
def format_bytes_tohuman(databytes):
"""
Format a string of bytes into a human-readable value (using base-1000)
"""
# Matrix of human-to-byte values
byte_unit_matrix = {
"B": 1,
"KB": 1000,
"MB": 1000 * 1000,
"GB": 1000 * 1000 * 1000,
"TB": 1000 * 1000 * 1000 * 1000,
"PB": 1000 * 1000 * 1000 * 1000 * 1000,
"EB": 1000 * 1000 * 1000 * 1000 * 1000 * 1000,
}
datahuman = ""
for unit in sorted(byte_unit_matrix, key=byte_unit_matrix.get, reverse=True):
if unit in ["TB", "PB", "EB"]:
# Handle the situation where we might want to round to integer values
# for some entries (2TB) but not others (e.g. 1.92TB). We round if the
# result is within +/- 2% of the integer value, otherwise we use two
# decimal places.
new_bytes = databytes / byte_unit_matrix[unit]
new_bytes_plustwopct = new_bytes * 1.02
new_bytes_minustwopct = new_bytes * 0.98
cieled_bytes = int(math.ceil(databytes / byte_unit_matrix[unit]))
rounded_bytes = round(databytes / byte_unit_matrix[unit], 2)
if (
cieled_bytes > new_bytes_minustwopct
and cieled_bytes < new_bytes_plustwopct
):
new_bytes = cieled_bytes
else:
new_bytes = rounded_bytes
# Round up if 5 or more digits
if new_bytes > 999:
# We can jump down another level
continue
else:
# We're at the end, display with this size
datahuman = "{}{}".format(new_bytes, unit)
return datahuman
def get_system_drive_target(session, cspec_node, storage_root):
"""
Determine the system drive target for the installer
"""
# Handle an invalid >2 number of system disks, use only first 2
if len(cspec_node["config"]["system_disks"]) > 2:
cspec_drives = cspec_node["config"]["system_disks"][0:2]
else:
cspec_drives = cspec_node["config"]["system_disks"]
# If we have no storage root, we just return the first entry from
# the cpsec_drives as-is and hope the administrator has the right
# format here.
if storage_root is None:
return cspec_drives[0]
# We proceed with Redfish configuration to determine the disks
else:
storage_detail = session.get(storage_root)
# Grab a full list of drives
drive_list = list()
for storage_member in storage_detail["Members"]:
storage_member_root = storage_member["@odata.id"]
storage_member_detail = session.get(storage_member_root)
for drive in storage_member_detail["Drives"]:
drive_root = drive["@odata.id"]
drive_detail = session.get(drive_root)
drive_list.append(drive_detail)
system_drives = list()
# Iterate through each drive and include those that match
for cspec_drive in cspec_drives:
if re.match(r"^\/dev", cspec_drive) or re.match(r"^detect:", cspec_drive):
# We only match the first drive that has these conditions for use in the preseed config
logger.info(
"Found a drive with a 'detect:' string or Linux '/dev' path, using it for bootstrap."
)
return cspec_drive
# Match any chassis-ID spec drives
for drive in drive_list:
# Like "Disk.Bay.2:Enclosure.Internal.0-1:RAID.Integrated.1-1"
drive_name = drive["Id"].split(":")[0]
# Craft up the cspec version of this
cspec_drive_name = f"Drive.Bay.{cspec_drive}"
if drive_name == cspec_drive_name:
system_drives.append(drive)
# We found a single drive, so determine its actual detect string
if len(system_drives) == 1:
logger.info(
"Found a single drive matching the requested chassis ID, using it as the system disk."
)
# Get the model's first word
drive_model = system_drives[0].get("Model", "INVALID").split()[0]
# Get and convert the size in bytes value to human
drive_size_bytes = system_drives[0].get("CapacityBytes", 0)
drive_size_human = format_bytes_tohuman(drive_size_bytes)
# Get the drive ID out of all the valid entries
# How this works is that, for each non-array disk, we must find what position our exact disk is
# So for example, say we want disk 3 out of 4, and all 4 are the same size and model and not in
# another (RAID) volume. This will give us an index of 2. Then in the installer this will match
# the 3rd list entry from "lsscsi". This is probably an unneccessary hack, since people will
# probably just give the first disk if they want one, or 2 disks if they want a RAID-1, but this
# is here just in case
idx = 0
for drive in drive_list:
list_drive_model = drive.get("Model", "INVALID").split()[0]
list_drive_size_bytes = drive.get("CapacityBytes", 0)
list_drive_in_array = (
False
if drive.get("Links", {})
.get("Volumes", [""])[0]
.get("@odata.id")
.split("/")[-1]
== drive.get("Id")
else True
)
if (
drive_model == list_drive_model
and drive_size_bytes == list_drive_size_bytes
and not list_drive_in_array
):
index = idx
idx += 1
drive_id = index
# Create the target string
system_drive_target = f"detect:{drive_model}:{drive_size_human}:{drive_id}"
# We found two drives, so create a RAID-1 array then determine the volume's detect string
elif len(system_drives) == 2:
logger.info(
"Found two drives matching the requested chassis IDs, creating a RAID-1 and using it as the system disk."
)
drive_one = system_drives[0]
drive_one_id = drive_one.get("Id", "INVALID")
drive_one_path = drive_one.get("@odata.id", "INVALID")
drive_one_controller = drive_one_id.split(":")[-1]
drive_two = system_drives[1]
drive_two_id = drive_two.get("Id", "INVALID")
drive_two_path = drive_two.get("@odata.id", "INVALID")
drive_two_controller = drive_two_id.split(":")[-1]
# Determine that the drives are on the same controller
if drive_one_controller != drive_two_controller:
logger.error(
"Two drives are not on the same controller; this should not happen"
)
return None
# Get the controller details
controller_root = f"{storage_root}/{drive_one_controller}"
controller_detail = session.get(controller_root)
# Get the name of the controller (for crafting the detect string)
controller_name = controller_detail.get("Name", "INVALID").split()[0]
# Get the volume root for the controller
controller_volume_root = controller_detail.get("Volumes", {}).get(
"@odata.id"
)
# Get the pre-creation list of volumes on the controller
controller_volumes_pre = [
volume["@odata.id"]
for volume in session.get(controller_volume_root).get("Members", [])
]
# Create the RAID-1 volume
payload = {
"VolumeType": "Mirrored",
"Drives": [
{"@odata.id": drive_one_path},
{"@odata.id": drive_two_path},
],
}
session.post(controller_volume_root, payload)
# Wait for the volume to be created
new_volume_list = []
while len(new_volume_list) < 1:
sleep(5)
controller_volumes_post = [
volume["@odata.id"]
for volume in session.get(controller_volume_root).get("Members", [])
]
new_volume_list = list(
set(controller_volumes_post).difference(controller_volumes_pre)
)
new_volume_root = new_volume_list[0]
# Get the IDX of the volume out of any others
volume_id = 0
for idx, volume in enumerate(controller_volumes_post):
if volume == new_volume_root:
volume_id = idx
break
# Get and convert the size in bytes value to human
volume_detail = session.get(new_volume_root)
volume_size_bytes = volume_detail.get("CapacityBytes", 0)
volume_size_human = format_bytes_tohuman(volume_size_bytes)
# Create the target string
system_drive_target = (
f"detect:{controller_name}:{volume_size_human}:{volume_id}"
)
# We found too few or too many drives, error
else:
system_drive_target = None
return system_drive_target
#
# Redfish Task functions
#
def set_indicator_state(session, system_root, redfish_vendor, state):
"""
Set the system indicator LED to the desired state (on/off)
"""
state_values_write = {
"Dell": {
"on": "Blinking",
"off": "Lit",
},
"default": {
"on": "Lit",
"off": "Off",
},
}
state_values_read = {
"Dell": {
"on": "Blinking",
"off": "Lit",
},
"default": {
"on": "Lit",
"off": "Off",
},
}
try:
# Allow vendor-specific overrides
if redfish_vendor not in state_values_write:
redfish_vendor = "default"
# Allow nice names ("on"/"off")
if state in state_values_write[redfish_vendor]:
state = state_values_write[redfish_vendor][state]
# Get current state
system_detail = session.get(system_root)
current_state = system_detail["IndicatorLED"]
except KeyError:
return False
try:
state_read = state
# Allow vendor-specific overrides
if redfish_vendor not in state_values_read:
redfish_vendor = "default"
# Allow nice names ("on"/"off")
if state_read in state_values_read[redfish_vendor]:
state_read = state_values_read[redfish_vendor][state]
if state_read == current_state:
return False
except KeyError:
return False
session.patch(system_root, {"IndicatorLED": state})
return True
def set_power_state(session, system_root, redfish_vendor, state):
"""
Set the system power state to the desired state
"""
logger.debug(f"Calling set_power_state with {session}, {system_root}, {redfish_vendor}, {state}")
state_values = {
"default": {
"on": "On",
"off": "ForceOff",
},
}
try:
# Allow vendor-specific overrides
if redfish_vendor not in state_values:
redfish_vendor = "default"
# Allow nice names ("on"/"off")
if state in state_values[redfish_vendor]:
state = state_values[redfish_vendor][state]
# Get current state, target URI, and allowable values
system_detail = session.get(system_root)
current_state = system_detail["PowerState"]
power_root = system_detail["Actions"]["#ComputerSystem.Reset"]["target"]
power_choices = system_detail["Actions"]["#ComputerSystem.Reset"][
"ResetType@Redfish.AllowableValues"
]
except KeyError:
return False
# Remap some namings so we can check the current state against the target state
if state in ["ForceOff"]:
target_state = "Off"
else:
target_state = state
# if target_state == current_state:
# return False
# if state not in power_choices:
# return False
session.post(power_root, {"ResetType": state})
return True
def set_boot_override(session, system_root, redfish_vendor, target):
"""
Set the system boot override to the desired target
"""
print(redfish_vendor)
system_detail = session.get(system_root)
def set_boot_override_dell():
try:
boot_targets = system_detail["Boot"]["BootSourceOverrideTarget@Redfish.AllowableValues"]
except KeyError:
logger.warn(f"Failed to set boot override, no BootSourceOverrideSupported key at {system_detail}")
return False
if target not in boot_targets:
logger.warn(f"Failed to set boot override, key {target} not in {boot_targets}")
return False
session.patch(system_root, {"Boot": {"BootSourceOverrideMode": "UEFI", "BootSourceOverrideTarget": target}})
return True
def set_boot_override_generic():
try:
boot_targets = system_detail["Boot"]["BootSourceOverrideSupported"]
except KeyError:
logger.warn(f"Failed to set boot override, no BootSourceOverrideSupported key at {system_detail}")
return False
if target not in boot_targets:
logger.warn(f"Failed to set boot override, key {target} not in {boot_targets}")
return False
session.patch(system_root, {"Boot": {"BootSourceOverrideTarget": target}})
return True
if redfish_vendor == "Dell":
return set_boot_override_dell()
else:
return set_boot_override_generic()
def check_redfish(config, data):
"""
Validate that a BMC is Redfish-capable
"""
headers = {"Content-Type": "application/json"}
logger.info("Checking for Redfish response...")
count = 0
while True:
try:
count += 1
if count > 30:
retcode = 500
logger.warn("Aborted after 300s; device too slow or not booting.")
break
resp = requests.get(
f"https://{data['ipaddr']}/redfish/v1",
headers=headers,
verify=False,
timeout=10,
)
retcode = resp.retcode
break
except Exception:
logger.info(f"Attempt {count}...")
continue
if retcode == 200:
return True
else:
return False
#
# Entry function
#
def redfish_init(config, cspec, data):
"""
Initialize a new node with Redfish
"""
bmc_ipaddr = data["ipaddr"]
bmc_macaddr = data["macaddr"]
bmc_host = f"https://{bmc_ipaddr}"
cspec_node = cspec["bootstrap"][bmc_macaddr]
logger.debug(f"cspec_node = {cspec_node}")
bmc_username = cspec_node["bmc"]["username"]
bmc_password = cspec_node["bmc"]["password"]
host_macaddr = ""
host_ipaddr = ""
cspec_cluster = cspec_node["node"]["cluster"]
cspec_hostname = cspec_node["node"]["hostname"]
cspec_fqdn = cspec_node["node"]["fqdn"]
logger.info("Waiting 30 seconds for system normalization")
sleep(30)
notifications.send_webhook(config, "begin", f"Cluster {cspec_cluster}: Beginning Redfish initialization of host {cspec_fqdn}")
cluster = db.get_cluster(config, name=cspec_cluster)
if cluster is None:
cluster = db.add_cluster(config, cspec, cspec_cluster, "provisioning")
logger.debug(cluster)
db.update_node_state(config, cspec_cluster, cspec_hostname, "characterizing")
db.update_node_addresses(
config,
cspec_cluster,
cspec_hostname,
bmc_macaddr,
bmc_ipaddr,
host_macaddr,
host_ipaddr,
)
node = db.get_node(config, cspec_cluster, name=cspec_hostname)
logger.debug(node)
# Create the session and log in
session = RedfishSession(bmc_host, bmc_username, bmc_password)
if session.host is None:
notifications.send_webhook(config, "failure", f"Cluster {cspec_cluster}: Failed to log in to Redfish for host {cspec_fqdn} at {bmc_host}. Check pvcbootstrapd logs and reset this host's BMC to retry.")
logger.error("Aborting Redfish configuration; reset BMC to retry.")
del session
return
notifications.send_webhook(config, "success", f"Cluster {cspec_cluster}: Logged in to Redfish for host {cspec_fqdn} at {bmc_host}")
logger.info("Waiting 30 seconds for system normalization")
sleep(30)
logger.info("Characterizing node...")
notifications.send_webhook(config, "begin", f"Cluster {cspec_cluster}: Beginning Redfish characterization of host {cspec_fqdn} at {bmc_host}")
try:
# Get Refish bases
logger.debug("Getting redfish bases")
redfish_base_root = "/redfish/v1"
redfish_base_detail = session.get(redfish_base_root)
redfish_vendor = list(redfish_base_detail["Oem"].keys())[0]
redfish_name = redfish_base_detail["Name"]
redfish_version = redfish_base_detail["RedfishVersion"]
managers_base_root = redfish_base_detail["Managers"]["@odata.id"].rstrip("/")
managers_base_detail = session.get(managers_base_root)
manager_root = managers_base_detail["Members"][0]["@odata.id"].rstrip("/")
systems_base_root = redfish_base_detail["Systems"]["@odata.id"].rstrip("/")
systems_base_detail = session.get(systems_base_root)
system_root = systems_base_detail["Members"][0]["@odata.id"].rstrip("/")
# Force off the system and turn on the indicator
logger.debug("Force off the system and turn on the indicator")
set_power_state(session, system_root, redfish_vendor, "off")
set_indicator_state(session, system_root, redfish_vendor, "on")
# Get the system details
logger.debug("Get the system details")
system_detail = session.get(system_root)
system_sku = system_detail["SKU"].strip()
system_serial = system_detail["SerialNumber"].strip()
system_power_state = system_detail["PowerState"].strip()
system_indicator_state = system_detail["IndicatorLED"].strip()
system_health_state = system_detail["Status"]["Health"].strip()
# Walk down the EthernetInterfaces construct to get the bootstrap interface MAC address
logger.debug("Walk down the EthernetInterfaces construct to get the bootstrap interface MAC address")
try:
ethernet_root = system_detail["EthernetInterfaces"]["@odata.id"].rstrip("/")
ethernet_detail = session.get(ethernet_root)
logger.debug(f"Found Ethernet detail: {ethernet_detail}")
embedded_ethernet_detail_members = [e for e in ethernet_detail["Members"] if "Embedded" in e["@odata.id"]]
embedded_ethernet_detail_members.sort(key = lambda k: k["@odata.id"])
logger.debug(f"Found Ethernet members: {embedded_ethernet_detail_members}")
first_interface_root = embedded_ethernet_detail_members[0]["@odata.id"].rstrip("/")
first_interface_detail = session.get(first_interface_root)
# Something went wrong, so fall back
except Exception:
first_interface_detail = dict()
logger.debug(f"First interface detail: {first_interface_detail}")
logger.debug(f"HostCorrelation detail: {system_detail.get('HostCorrelation', {})}")
# Try to get the MAC address directly from the interface detail (Redfish standard)
if first_interface_detail.get("MACAddress") is not None:
logger.debug("Try to get the MAC address directly from the interface detail (Redfish standard)")
bootstrap_mac_address = first_interface_detail["MACAddress"].strip().lower()
# Try to get the MAC address from the HostCorrelation->HostMACAddress (HP DL360x G8)
elif len(system_detail.get("HostCorrelation", {}).get("HostMACAddress", [])) > 0:
logger.debug("Try to get the MAC address from the HostCorrelation (HP iLO)")
bootstrap_mac_address = (
system_detail["HostCorrelation"]["HostMACAddress"][0].strip().lower()
)
# We can't find it, so abort
else:
logger.error("Could not find a valid MAC address for the bootstrap interface.")
return
# Display the system details
logger.info("Found details from node characterization:")
logger.info(f"> System Manufacturer: {redfish_vendor}")
logger.info(f"> System Redfish Version: {redfish_version}")
logger.info(f"> System Redfish Name: {redfish_name}")
logger.info(f"> System SKU: {system_sku}")
logger.info(f"> System Serial: {system_serial}")
logger.info(f"> Power State: {system_power_state}")
logger.info(f"> Indicator LED: {system_indicator_state}")
logger.info(f"> Health State: {system_health_state}")
logger.info(f"> Bootstrap NIC MAC: {bootstrap_mac_address}")
# Update node host MAC address
host_macaddr = bootstrap_mac_address
node = db.update_node_addresses(
config,
cspec_cluster,
cspec_hostname,
bmc_macaddr,
bmc_ipaddr,
host_macaddr,
host_ipaddr,
)
logger.debug(node)
except Exception as e:
notifications.send_webhook(config, "failure", f"Cluster {cspec_cluster}: Failed to characterize Redfish for host {cspec_fqdn} at {bmc_host}. Check pvcbootstrapd logs and reset this host's BMC to retry.")
logger.error(f"Cluster {cspec_cluster}: Failed to characterize Redfish for host {cspec_fqdn} at {bmc_host}: {e}")
logger.error("Aborting Redfish configuration; reset BMC to retry.")
del session
return
logger.info("Waiting 60 seconds for system normalization")
sleep(60)
logger.info("Determining system disk...")
try:
storage_root = system_detail.get("Storage", {}).get("@odata.id")
system_drive_target = get_system_drive_target(session, cspec_node, storage_root)
if system_drive_target is None:
logger.error(
"No valid drives found; configure a single system drive as a 'detect:' string or Linux '/dev' path instead and retry."
)
return
logger.info(f"Found system disk {system_drive_target}")
except Exception as e:
notifications.send_webhook(config, "failure", f"Cluster {cspec_cluster}: Failed to configure system disk for host {cspec_fqdn} at {bmc_host}. Check pvcbootstrapd logs and reset this host's BMC to retry.")
logger.error(f"Cluster {cspec_cluster}: Failed to configure system disk for host {cspec_fqdn} at {bmc_host}: {e}")
logger.error("Aborting Redfish configuration; reset BMC to retry.")
del session
return
# Create our preseed configuration
logger.info("Creating node boot configurations...")
try:
installer.add_pxe(config, cspec_node, host_macaddr)
installer.add_preseed(config, cspec_node, host_macaddr, system_drive_target)
except Exception as e:
notifications.send_webhook(config, "failure", f"Cluster {cspec_cluster}: Failed to generate PXE configurations for host {cspec_fqdn} at {bmc_host}. Check pvcbootstrapd logs and reset this host's BMC to retry.")
logger.error(f"Cluster {cspec_cluster}: Failed to generate PXE configurations for host {cspec_fqdn} at {bmc_host}: {e}")
logger.error("Aborting Redfish configuration; reset BMC to retry.")
del session
return
# Adjust any BIOS settings
if len(cspec_node["bmc"].get("bios_settings", {}).items()) > 0:
logger.info("Adjusting BIOS settings...")
try:
bios_root = system_detail.get("Bios", {}).get("@odata.id")
if bios_root is not None:
bios_detail = session.get(bios_root)
bios_attributes = list(bios_detail["Attributes"].keys())
for setting, value in cspec_node["bmc"].get("bios_settings", {}).items():
if setting not in bios_attributes:
continue
payload = {"Attributes": {setting: value}}
session.patch(f"{bios_root}/Settings", payload)
except Exception as e:
notifications.send_webhook(config, "failure", f"Cluster {cspec_cluster}: Failed to set BIOS settings for host {cspec_fqdn} at {bmc_host}. Check pvcbootstrapd logs and reset this host's BMC to retry.")
logger.error(f"Cluster {cspec_cluster}: Failed to set BIOS settings for host {cspec_fqdn} at {bmc_host}: {e}")
logger.error("Aborting Redfish configuration; reset BMC to retry.")
del session
return
# Adjust any Manager settings
if len(cspec_node["bmc"].get("manager_settings", {}).items()) > 0:
logger.info("Adjusting Manager settings...")
try:
mgrattribute_root = f"{manager_root}/Attributes"
mgrattribute_detail = session.get(mgrattribute_root)
mgrattribute_attributes = list(mgrattribute_detail["Attributes"].keys())
for setting, value in cspec_node["bmc"].get("manager_settings", {}).items():
if setting not in mgrattribute_attributes:
continue
payload = {"Attributes": {setting: value}}
session.patch(mgrattribute_root, payload)
except Exception as e:
notifications.send_webhook(config, "failure", f"Cluster {cspec_cluster}: Failed to set BMC settings for host {cspec_fqdn} at {bmc_host}. Check pvcbootstrapd logs and reset this host's BMC to retry.")
logger.error(f"Cluster {cspec_cluster}: Failed to set BMC settings for host {cspec_fqdn} at {bmc_host}: {e}")
logger.error("Aborting Redfish configuration; reset BMC to retry.")
del session
return
# Set boot override to Pxe for the installer boot
logger.info("Setting temporary PXE boot...")
try:
set_boot_override(session, system_root, redfish_vendor, "Pxe")
except Exception as e:
notifications.send_webhook(config, "failure", f"Cluster {cspec_cluster}: Failed to set PXE boot override for host {cspec_fqdn} at {bmc_host}. Check pvcbootstrapd logs and reset this host's BMC to retry.")
logger.error(f"Cluster {cspec_cluster}: Failed to set PXE boot override for host {cspec_fqdn} at {bmc_host}: {e}")
logger.error("Aborting Redfish configuration; reset BMC to retry.")
del session
return
notifications.send_webhook(config, "success", f"Cluster {cspec_cluster}: Completed Redfish initialization of host {cspec_fqdn}")
# Turn on the system
logger.info("Powering on node...")
try:
set_power_state(session, system_root, redfish_vendor, "on")
notifications.send_webhook(config, "info", f"Cluster {cspec_cluster}: Powering on host {cspec_fqdn}")
except Exception as e:
notifications.send_webhook(config, "failure", f"Cluster {cspec_cluster}: Failed to power on host {cspec_fqdn} at {bmc_host}. Check pvcbootstrapd logs and reset this host's BMC to retry.")
logger.error(f"Cluster {cspec_cluster}: Failed to power on host {cspec_fqdn} at {bmc_host}: {e}")
logger.error("Aborting Redfish configuration; reset BMC to retry.")
del session
return
node = db.update_node_state(config, cspec_cluster, cspec_hostname, "pxe-booting")
logger.info("Waiting for completion of node and cluster installation...")
# Wait for the system to install and be configured
while node.state != "completed":
sleep(60)
# Keep the Redfish session alive
session.get(redfish_base_root)
# Refresh our node state
node = db.get_node(config, cspec_cluster, name=cspec_hostname)
# Graceful shutdown of the machine
notifications.send_webhook(config, "info", f"Cluster {cspec_cluster}: Shutting down host {cspec_fqdn}")
set_power_state(session, system_root, redfish_vendor, "GracefulShutdown")
system_power_state = "On"
while system_power_state != "Off":
sleep(5)
# Refresh our power state from the system details
system_detail = session.get(system_root)
system_power_state = system_detail["PowerState"].strip()
# Turn off the indicator to indicate bootstrap has completed
set_indicator_state(session, system_root, redfish_vendor, "off")
notifications.send_webhook(config, "success", f"Cluster {cspec_cluster}: Powered off host {cspec_fqdn}")
# We must delete the session
del session
return