Compare commits

...

8 Commits

Author SHA1 Message Date
871955e5b6 Add additional detail in commit 2022-10-25 21:03:30 +00:00
511df41fa4 Reduce second wait and add hook after 2022-10-25 20:01:51 +00:00
5c2ec9ce78 Standardize names and lock config 2022-10-25 19:25:38 +00:00
f2a6a4ac1f Add locking to git commands
Avoids conflicting attempts when multiple hosts check in at once.
2022-10-25 19:25:38 +00:00
Joshua Boniface
31c7c2522f Add additional detail for status diagram 2022-10-24 10:11:09 -04:00
Joshua Boniface
35a5052e2b Add additional details on detect strings 2022-10-24 10:09:43 -04:00
Joshua Boniface
390b0c6257 Update second reference too 2022-10-24 09:55:45 -04:00
Joshua Boniface
3b5b1f258d Mention webhook system in readme 2022-10-24 09:54:04 -04:00
8 changed files with 78 additions and 58 deletions

View File

@@ -60,7 +60,7 @@ The PVC Bootstrap system is designed to heavily leverage Redfish in its automati
1. Connect power to the servers, but do not manually power on the servers - Redfish will handle this aspect after characterizing each host, as well as manage boot, RAID array creation (as documented in `bootstrap.yml`), BIOS configuration, etc. 1. Connect power to the servers, but do not manually power on the servers - Redfish will handle this aspect after characterizing each host, as well as manage boot, RAID array creation (as documented in `bootstrap.yml`), BIOS configuration, etc.
1. Wait for the cluster bootstrapping to complete; you can watch the output of the `pvcbootstrapd` and `pvcbootstrapd-worker` services on the Bootstrap host to see progress. If supported, the indicator LEDs of the nodes will be lit during setup and will be disabled upon completion to provide a physical indication of the process. 1. Wait for the cluster bootstrapping to complete; you can watch the output of the `pvcbootstrapd` and `pvcbootstrapd-worker` services on the Bootstrap host to see progress, or configure the system to send webhooks to a remote target (e.g. Slack/Mattermost messages). If supported, the indicator LEDs of the nodes will be lit during setup and will be disabled upon completion to provide a physical indication of the process.
1. Verify and power off the servers and put them into production; you may need to complete several post-install tasks (for instance setting the production BMC networking via `sudo ifup ipmi` on each node) before the cluster is completely finished. 1. Verify and power off the servers and put them into production; you may need to complete several post-install tasks (for instance setting the production BMC networking via `sudo ifup ipmi` on each node) before the cluster is completely finished.
@@ -84,7 +84,7 @@ The PVC Bootstrap system can still handle nodes without Redfish support, for ins
1. Power on the servers and set them to boot temporarily (one time) from PXE. 1. Power on the servers and set them to boot temporarily (one time) from PXE.
1. Wait for the cluster bootstrapping to complete; you can watch the output of the `pvcbootstrapd` and `pvcbootstrapd-worker` services on the Bootstrap host to see progress. If supported, the indicator LEDs of the nodes will be lit during setup and will be disabled upon completion to provide a physical indication of the process. 1. Wait for the cluster bootstrapping to complete; you can watch the output of the `pvcbootstrapd` and `pvcbootstrapd-worker` services on the Bootstrap host to see progress, or configure the system to send webhooks to a remote target (e.g. Slack/Mattermost messages). If supported, the indicator LEDs of the nodes will be lit during setup and will be disabled upon completion to provide a physical indication of the process.
1. Verify and power off the servers and put them into production; you may need to complete several post-install tasks (for instance setting the production BMC networking via `sudo ifup ipmi` on each node) before the cluster is completely finished. 1. Verify and power off the servers and put them into production; you may need to complete several post-install tasks (for instance setting the production BMC networking via `sudo ifup ipmi` on each node) before the cluster is completely finished.
@@ -150,13 +150,22 @@ filesystem="ext4"
# The hostname of the system (set per-run) # The hostname of the system (set per-run)
target_hostname="hv1.example.tld" target_hostname="hv1.example.tld"
# The target system disk path # The target system disk path; must be a single disk (mdadm/software RAID is not supported)
# This will usually use a `detect` string. A "detect" string is a string in the form "detect:<NAME>:<HUMAN-SIZE>:<ID>".
# Detect strings allow for automatic determination of Linux block device paths from known basic information
# about disks by leveraging "lsscsi" on the target host. The "NAME" should be some descriptive identifier,
# for instance the manufacturer (e.g. "INTEL"), the "HUMAN-SIZE" should be the labeled human-readable size
# of the device (e.g. "480GB", "1.92TB"), and "ID" specifies the Nth 0-indexed device which matches the
# NAME" and "HUMAN-SIZE" values (e.g. "2" would match the third device with the corresponding "NAME" and
# "HUMAN-SIZE"). When matching against sizes, there is +/- 3% flexibility to account for base-1000 vs.
# base-1024 differences and rounding errors. The "NAME" may contain whitespace but if so the entire detect
# string should be quoted, and is case-insensitive.
target_disk="detect:LOGICAL:146GB:0" target_disk="detect:LOGICAL:146GB:0"
# SSH key method (usually tftp) # SSH key fetch method (usually tftp)
target_keys_method="tftp" target_keys_method="tftp"
# SSH key path (usually keys.txt) # SSH key fetch path (usually keys.txt)
target_keys_path="keys.txt" target_keys_path="keys.txt"
# Deploy username (usually deploy) # Deploy username (usually deploy)
@@ -179,6 +188,6 @@ pvcbootstrapd_checkin_uri="http://10.255.255.1:9999/checkin/host"
## Bootstrap Process ## Bootstrap Process
This diagram outlines the various states the nodes and clusters will be in throughout the setup process along with the individual steps for reference. This diagram outlines the various states the nodes and clusters will be in throughout the setup process along with the individual steps for reference. Which node starts characterizing first can be random, but is shown as `node1` for clarity. For non-Redflish installs, the first several steps must be completed manually as referenced above.
![PVC Bootstrap Process](/docs/images/pvcbootstrapd-process.png) ![PVC Bootstrap Process](/docs/images/pvcbootstrapd-process.png)

View File

@@ -66,7 +66,7 @@ pvc:
path: "/var/home/joshua/pvc" path: "/var/home/joshua/pvc"
# Path to the deploy key (if applicable) used to clone and pull the repository # Path to the deploy key (if applicable) used to clone and pull the repository
keyfile: "/var/home/joshua/id_ed25519.joshua.key" key_file: "/var/home/joshua/id_ed25519.joshua.key"
# Git remote URI for the repository # Git remote URI for the repository
remote: "ssh://git@git.bonifacelabs.ca:2222/bonifacelabs/pvc.git" remote: "ssh://git@git.bonifacelabs.ca:2222/bonifacelabs/pvc.git"
@@ -77,6 +77,9 @@ pvc:
# Clusters configuration file # Clusters configuration file
clusters_file: "clusters.yml" clusters_file: "clusters.yml"
# Lock file to use for Git interaction
lock_file: "/run/pvcbootstrapd.lock"
# Filenames of the various group_vars components of a cluster # Filenames of the various group_vars components of a cluster
# Generally with pvc-ansible this will contain 2 files: "base.yml", and "pvc.yml"; refer to the # Generally with pvc-ansible this will contain 2 files: "base.yml", and "pvc.yml"; refer to the
# pvc-ansible documentation and examples for details on these files. # pvc-ansible documentation and examples for details on these files.

View File

@@ -179,7 +179,7 @@ def read_config():
) )
# Get the Ansible configuration # Get the Ansible configuration
for key in ["path", "keyfile", "remote", "branch", "clusters_file"]: for key in ["path", "key_file", "remote", "branch", "clusters_file", "lock_file"]:
try: try:
config[f"ansible_{key}"] = o_ansible[key] config[f"ansible_{key}"] = o_ansible[key]
except Exception: except Exception:

View File

@@ -66,7 +66,7 @@ def run_bootstrap(config, cspec, cluster, nodes):
limit=f"{cluster.name}", limit=f"{cluster.name}",
playbook=f"{config['ansible_path']}/pvc.yml", playbook=f"{config['ansible_path']}/pvc.yml",
extravars={ extravars={
"ansible_ssh_private_key_file": config["ansible_keyfile"], "ansible_ssh_private_key_file": config["ansible_key_file"],
"bootstrap": "yes", "bootstrap": "yes",
}, },
forks=len(nodes), forks=len(nodes),
@@ -76,7 +76,7 @@ def run_bootstrap(config, cspec, cluster, nodes):
logger.info("{}: {}".format(r.status, r.rc)) logger.info("{}: {}".format(r.status, r.rc))
logger.info(r.stats) logger.info(r.stats)
if r.rc == 0: if r.rc == 0:
git.commit_repository(config) git.commit_repository(config, f"Generated files for cluster '{cluster.name}'")
git.push_repository(config) git.push_repository(config)
notifications.send_webhook(config, "success", f"Cluster {cluster.name}: Completed Ansible bootstrap") notifications.send_webhook(config, "success", f"Cluster {cluster.name}: Completed Ansible bootstrap")
else: else:

View File

@@ -22,6 +22,7 @@
import os.path import os.path
import git import git
import yaml import yaml
from filelock import FileLock
import pvcbootstrapd.lib.notifications as notifications import pvcbootstrapd.lib.notifications as notifications
@@ -36,7 +37,7 @@ def init_repository(config):
Clone the Ansible git repository Clone the Ansible git repository
""" """
try: try:
git_ssh_cmd = f"ssh -i {config['ansible_keyfile']} -o StrictHostKeyChecking=no" git_ssh_cmd = f"ssh -i {config['ansible_key_file']} -o StrictHostKeyChecking=no"
if not os.path.exists(config["ansible_path"]): if not os.path.exists(config["ansible_path"]):
print( print(
f"First run: cloning repository {config['ansible_remote']} branch {config['ansible_branch']} to {config['ansible_path']}" f"First run: cloning repository {config['ansible_remote']} branch {config['ansible_branch']} to {config['ansible_path']}"
@@ -60,61 +61,67 @@ def pull_repository(config):
""" """
Pull (with rebase) the Ansible git repository Pull (with rebase) the Ansible git repository
""" """
logger.info(f"Updating local configuration repository {config['ansible_path']}") with FileLock(config['ansible_lock_file']):
try: logger.info(f"Updating local configuration repository {config['ansible_path']}")
git_ssh_cmd = f"ssh -i {config['ansible_keyfile']} -o StrictHostKeyChecking=no" try:
g = git.cmd.Git(f"{config['ansible_path']}") git_ssh_cmd = f"ssh -i {config['ansible_key_file']} -o StrictHostKeyChecking=no"
g.pull(rebase=True, env=dict(GIT_SSH_COMMAND=git_ssh_cmd)) g = git.cmd.Git(f"{config['ansible_path']}")
g.submodule("update", "--init", env=dict(GIT_SSH_COMMAND=git_ssh_cmd)) logger.debug("Performing git pull")
except Exception as e: g.pull(rebase=True, env=dict(GIT_SSH_COMMAND=git_ssh_cmd))
logger.warn(e) logger.debug("Performing git submodule update")
notifications.send_webhook(config, "failure", "Failed to update Git repository") g.submodule("update", "--init", env=dict(GIT_SSH_COMMAND=git_ssh_cmd))
except Exception as e:
logger.warn(e)
notifications.send_webhook(config, "failure", "Failed to update Git repository")
logger.info("Completed repository synchonization")
def commit_repository(config): def commit_repository(config, message="Generic commit"):
""" """
Commit uncommitted changes to the Ansible git repository Commit uncommitted changes to the Ansible git repository
""" """
logger.info( with FileLock(config['ansible_lock_file']):
f"Committing changes to local configuration repository {config['ansible_path']}" logger.info(
) f"Committing changes to local configuration repository {config['ansible_path']}"
try:
g = git.cmd.Git(f"{config['ansible_path']}")
g.add("--all")
commit_env = {
"GIT_COMMITTER_NAME": "PVC Bootstrap",
"GIT_COMMITTER_EMAIL": "git@pvcbootstrapd",
}
g.commit(
"-m",
"Automated commit from PVC Bootstrap Ansible subsystem",
author="PVC Bootstrap <git@pvcbootstrapd>",
env=commit_env,
) )
notifications.send_webhook(config, "success", "Successfully committed to Git repository") try:
except Exception as e: g = git.cmd.Git(f"{config['ansible_path']}")
logger.warn(e) g.add("--all")
notifications.send_webhook(config, "failure", "Failed to commit to Git repository") commit_env = {
"GIT_COMMITTER_NAME": "PVC Bootstrap",
"GIT_COMMITTER_EMAIL": "git@pvcbootstrapd",
}
g.commit(
"-m",
"Automated commit from PVC Bootstrap Ansible subsystem",
"-m",
message,
author="PVC Bootstrap <git@pvcbootstrapd>",
env=commit_env,
)
notifications.send_webhook(config, "success", "Successfully committed to Git repository")
except Exception as e:
logger.warn(e)
notifications.send_webhook(config, "failure", "Failed to commit to Git repository")
def push_repository(config): def push_repository(config):
""" """
Push changes to the default remote Push changes to the default remote
""" """
logger.info( with FileLock(config['ansible_lock_file']):
f"Pushing changes from local configuration repository {config['ansible_path']}" logger.info(
) f"Pushing changes from local configuration repository {config['ansible_path']}"
)
try: try:
git_ssh_cmd = f"ssh -i {config['ansible_keyfile']} -o StrictHostKeyChecking=no" git_ssh_cmd = f"ssh -i {config['ansible_key_file']} -o StrictHostKeyChecking=no"
g = git.Repo(f"{config['ansible_path']}") g = git.Repo(f"{config['ansible_path']}")
origin = g.remote(name="origin") origin = g.remote(name="origin")
origin.push(env=dict(GIT_SSH_COMMAND=git_ssh_cmd)) origin.push(env=dict(GIT_SSH_COMMAND=git_ssh_cmd))
notifications.send_webhook(config, "success", "Successfully pushed Git repository") notifications.send_webhook(config, "success", "Successfully pushed Git repository")
except Exception as e: except Exception as e:
logger.warn(e) logger.warn(e)
notifications.send_webhook(config, "failure", "Failed to push Git repository") notifications.send_webhook(config, "failure", "Failed to push Git repository")
def load_cspec_yaml(config): def load_cspec_yaml(config):

View File

@@ -43,7 +43,7 @@ def run_paramiko(config, node_address):
ssh_client.connect( ssh_client.connect(
hostname=node_address, hostname=node_address,
username=config["deploy_username"], username=config["deploy_username"],
key_filename=config["ansible_keyfile"], key_filename=config["ansible_key_file"],
) )
yield ssh_client yield ssh_client
ssh_client.close() ssh_client.close()

View File

@@ -748,10 +748,11 @@ def redfish_init(config, cspec, data):
return return
notifications.send_webhook(config, "success", f"Cluster {cspec_cluster}: Logged in to Redfish for host {cspec_fqdn} at {bmc_host}") notifications.send_webhook(config, "success", f"Cluster {cspec_cluster}: Logged in to Redfish for host {cspec_fqdn} at {bmc_host}")
logger.info("Waiting 60 seconds for system normalization") logger.info("Waiting 5 seconds for system normalization")
sleep(60) sleep(5)
logger.info("Characterizing node...") logger.info("Characterizing node...")
notifications.send_webhook(config, "begin", f"Cluster {cspec_cluster}: Beginning Redfish characterization of host {cspec_fqdn} at {bmc_host}")
try: try:
# Get Refish bases # Get Refish bases

View File

@@ -43,7 +43,7 @@ def init_tftp(config):
os.makedirs(config["tftp_root_path"]) os.makedirs(config["tftp_root_path"])
os.makedirs(config["tftp_host_path"]) os.makedirs(config["tftp_host_path"])
shutil.copyfile( shutil.copyfile(
f"{config['ansible_keyfile']}.pub", f"{config['tftp_root_path']}/keys.txt" f"{config['ansible_key_file']}.pub", f"{config['tftp_root_path']}/keys.txt"
) )
build_tftp_repository(config) build_tftp_repository(config)