pvc/daemon-common/vmbuilder.py

865 lines
26 KiB
Python

#!/usr/bin/env python3
# vmbuilder.py - pvc api vm builder (provisioner) functions
# part of the parallel virtual cluster (pvc) system
#
# copyright (c) 2018-2022 joshua m. boniface <joshua@boniface.me>
#
# this program is free software: you can redistribute it and/or modify
# it under the terms of the gnu general public license as published by
# the free software foundation, version 3.
#
# this program is distributed in the hope that it will be useful,
# but without any warranty; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################
import json
import psycopg2
import psycopg2.extras
import re
import os
# import sys
import importlib.util
import uuid
from contextlib import contextmanager
from daemon_lib.zkhandler import ZKHandler
from daemon_lib.celery import start, fail, log_info, log_warn, log_err, update, finish
import daemon_lib.common as pvc_common
import daemon_lib.node as pvc_node
import daemon_lib.vm as pvc_vm
import daemon_lib.network as pvc_network
import daemon_lib.ceph as pvc_ceph
#
# Exceptions (used by Celery tasks)
#
class ValidationError(Exception):
"""
An exception that results from some value being un- or mis-defined.
"""
pass
class ClusterError(Exception):
"""
An exception that results from the PVC cluster being out of alignment with the action.
"""
pass
class ProvisioningError(Exception):
"""
An exception that results from a failure of a provisioning command.
"""
pass
#
# VMBuilder class - subclassed by install scripts
#
class VMBuilder(object):
def __init__(
self,
vm_name,
vm_id,
vm_profile,
vm_data,
):
self.vm_name = vm_name
self.vm_id = vm_id
self.vm_uuid = uuid.uuid4()
self.vm_profile = vm_profile
self.vm_data = vm_data
#
# Helper class functions; used by the individual scripts
#
def log_info(self, msg):
log_info(None, msg)
def log_warn(self, msg):
log_warn(None, msg)
def log_err(self, msg):
log_err(None, msg)
def fail(self, msg, exception=ProvisioningError):
raise exception(msg)
#
# Primary class functions; implemented by the individual scripts
#
def setup(self):
"""
setup(): Perform special setup steps before proceeding
OPTIONAL
"""
pass
def create(self):
"""
create(): Create the VM libvirt schema definition which is defined afterwards
"""
pass
def prepare(self):
"""
prepare(): Prepare any disks/volumes for the install step
"""
pass
def install(self):
"""
install(): Perform the installation
"""
pass
def cleanup(self):
"""
cleanup(): Perform any cleanup required after the prepare() step or on failure of the install() step
"""
pass
#
# Helper functions (as context managers)
#
@contextmanager
def chroot(destination):
"""
Change root directory to a given destination
"""
try:
real_root = os.open("/", os.O_RDONLY)
os.chroot(destination)
fake_root = os.open("/", os.O_RDONLY)
os.fchdir(fake_root)
yield
except Exception:
raise
finally:
os.fchdir(real_root)
os.chroot(".")
os.fchdir(real_root)
os.close(fake_root)
os.close(real_root)
del fake_root
del real_root
@contextmanager
def open_db(config):
try:
conn = psycopg2.connect(
host=config["api_postgresql_host"],
port=config["api_postgresql_port"],
dbname=config["api_postgresql_dbname"],
user=config["api_postgresql_user"],
password=config["api_postgresql_password"],
)
cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
except Exception:
fail(
None,
"Failed to connect to Postgres",
exception=ClusterError,
)
try:
yield cur
except Exception:
raise
finally:
conn.commit()
cur.close()
conn.close()
del conn
@contextmanager
def open_zk(config):
try:
zkhandler = ZKHandler(config)
zkhandler.connect()
except Exception:
fail(
None,
"Failed to connect to Zookeeper",
exception=ClusterError,
)
try:
yield zkhandler
except Exception:
raise
finally:
zkhandler.disconnect()
del zkhandler
#
# Main VM provisioning function - executed by the Celery worker
#
def worker_create_vm(
celery,
config,
vm_name,
vm_profile,
define_vm=True,
start_vm=True,
script_run_args=[],
):
current_stage = 0
total_stages = 11
start(
celery,
f"Provisioning new VM '{vm_name}' with profile '{vm_profile}'",
current=current_stage,
total=total_stages,
)
# Phase 1 - setup
# * Get the profile elements
# * Get the details from these elements
# * Assemble a VM configuration dictionary
current_stage += 1
update(
celery,
"Collecting configuration details",
current=current_stage,
total=total_stages,
)
vm_id = re.findall(r"/(\d+)$/", vm_name)
if not vm_id:
vm_id = 0
else:
vm_id = vm_id[0]
vm_data = dict()
with open_db(config) as db_cur:
# Get the profile information
query = "SELECT * FROM profile WHERE name = %s"
args = (vm_profile,)
db_cur.execute(query, args)
profile_data = db_cur.fetchone()
if profile_data is None:
fail(
celery,
f'Provisioner profile "{vm_profile}" is not present on the cluster',
exception=ClusterError,
)
if profile_data.get("arguments"):
vm_data["script_arguments"] = profile_data.get("arguments").split("|")
else:
vm_data["script_arguments"] = []
# Get the system details
query = "SELECT * FROM system_template WHERE id = %s"
args = (profile_data["system_template"],)
db_cur.execute(query, args)
vm_data["system_details"] = db_cur.fetchone()
# Get the MAC template
query = "SELECT mac_template FROM network_template WHERE id = %s"
args = (profile_data["network_template"],)
db_cur.execute(query, args)
db_row = db_cur.fetchone()
if db_row:
vm_data["mac_template"] = db_row.get("mac_template")
else:
vm_data["mac_template"] = None
# Get the networks
query = "SELECT * FROM network WHERE network_template = %s"
args = (profile_data["network_template"],)
db_cur.execute(query, args)
_vm_networks = db_cur.fetchall()
vm_networks = list()
# Set the eth_bridge for each network
for network in _vm_networks:
vni = network["vni"]
if vni in ["upstream", "cluster", "storage"]:
eth_bridge = "br{}".format(vni)
else:
eth_bridge = "vmbr{}".format(vni)
network["eth_bridge"] = eth_bridge
vm_networks.append(network)
vm_data["networks"] = vm_networks
# Get the storage volumes
# ORDER BY ensures disks are always in the sdX/vdX order, regardless of add order
query = "SELECT * FROM storage WHERE storage_template = %s ORDER BY disk_id"
args = (profile_data["storage_template"],)
db_cur.execute(query, args)
vm_data["volumes"] = db_cur.fetchall()
# Get the script
query = "SELECT script FROM script WHERE id = %s"
args = (profile_data["script"],)
db_cur.execute(query, args)
db_row = db_cur.fetchone()
if db_row:
vm_data["script"] = db_row.get("script")
else:
vm_data["script"] = None
if profile_data.get("profile_type") == "ova":
query = "SELECT * FROM ova WHERE id = %s"
args = (profile_data["ova"],)
db_cur.execute(query, args)
vm_data["ova_details"] = db_cur.fetchone()
query = "SELECT * FROM ova_volume WHERE ova = %s"
args = (profile_data["ova"],)
db_cur.execute(query, args)
# Replace the existing volumes list with our OVA volume list
vm_data["volumes"] = db_cur.fetchall()
retcode, stdout, stderr = pvc_common.run_os_command("uname -m")
vm_data["system_architecture"] = stdout.strip()
monitor_list = list()
monitor_names = config["storage_hosts"]
for monitor in monitor_names:
monitor_list.append("{}.{}".format(monitor, config["storage_domain"]))
vm_data["ceph_monitor_list"] = monitor_list
vm_data["ceph_monitor_port"] = config["ceph_monitor_port"]
vm_data["ceph_monitor_secret"] = config["ceph_secret_uuid"]
# Parse the script arguments
script_arguments = dict()
for argument in vm_data["script_arguments"]:
argument_name, argument_data = argument.split("=")
script_arguments[argument_name] = argument_data
# Parse the runtime arguments
if script_run_args is not None:
for argument in script_run_args:
argument_name, argument_data = argument.split("=")
script_arguments[argument_name] = argument_data
log_info(celery, f"Script arguments: {script_arguments}")
vm_data["script_arguments"] = script_arguments
log_info(
celery,
"VM configuration data:\n{}".format(
json.dumps(vm_data, sort_keys=True, indent=2)
),
)
# Phase 2 - verification
# * Ensure that at least one node has enough free RAM to hold the VM (becomes main host)
# * Ensure that all networks are valid
# * Ensure that there is enough disk space in the Ceph cluster for the disks
# This is the "safe fail" step when an invalid configuration will be caught
current_stage += 1
update(
celery,
"Verifying configuration against cluster",
current=current_stage,
total=total_stages,
)
with open_zk(config) as zkhandler:
# Verify that a VM with this name does not already exist
if pvc_vm.searchClusterByName(zkhandler, vm_name):
fail(
celery,
f"A VM with the name '{vm_name}' already exists in the cluster",
exception=ClusterError,
)
# Verify that at least one host has enough free RAM to run the VM
_discard, nodes = pvc_node.get_list(zkhandler, None)
target_node = None
last_free = 0
for node in nodes:
# Skip the node if it is not ready to run VMs
if node["daemon_state"] != "run" or node["domain_state"] != "ready":
continue
# Skip the node if its free memory is less than the new VM's size, plus a 512MB buffer
if node["memory"]["free"] < (vm_data["system_details"]["vram_mb"] + 512):
continue
# If this node has the most free, use it
if node["memory"]["free"] > last_free:
last_free = node["memory"]["free"]
target_node = node["name"]
# Raise if no node was found
if not target_node:
fail(
celery,
f"No ready cluster node contains at least {vm_data['system_details']['vram_mb']}+512 MB of free RAM",
exception=ClusterError,
)
log_info(
celery,
f'Selecting target node "{target_node}" with "{last_free}" MB free RAM',
)
# Verify that all configured networks are present on the cluster
cluster_networks, _discard = pvc_network.getClusterNetworkList(zkhandler)
for network in vm_data["networks"]:
vni = str(network["vni"])
if vni not in cluster_networks and vni not in [
"upstream",
"cluster",
"storage",
]:
fail(
celery,
f'The network VNI "{vni}" is not present on the cluster',
exception=ClusterError,
)
log_info(celery, "All configured networks for VM are valid")
# Verify that there is enough disk space free to provision all VM disks
pools = dict()
for volume in vm_data["volumes"]:
if volume.get("source_volume") is not None:
volume_data = pvc_ceph.getVolumeInformation(
zkhandler, volume["pool"], volume["source_volume"]
)
if not volume_data:
fail(
celery,
f"The source volume {volume['pool']}/{volume['source_volume']} could not be found",
exception=ClusterError,
)
if not volume["pool"] in pools:
pools[volume["pool"]] = int(
pvc_ceph.format_bytes_fromhuman(volume_data["stats"]["size"])
/ 1024
/ 1024
/ 1024
)
else:
pools[volume["pool"]] += int(
pvc_ceph.format_bytes_fromhuman(volume_data["stats"]["size"])
/ 1024
/ 1024
/ 1024
)
else:
if not volume["pool"] in pools:
pools[volume["pool"]] = volume["disk_size_gb"]
else:
pools[volume["pool"]] += volume["disk_size_gb"]
for pool in pools:
try:
pool_information = pvc_ceph.getPoolInformation(zkhandler, pool)
if not pool_information:
raise
except Exception:
fail(
celery,
f'Pool "{pool}" is not present on the cluster',
exception=ClusterError,
)
pool_free_space_gb = int(
pool_information["stats"]["free_bytes"] / 1024 / 1024 / 1024
)
pool_vm_usage_gb = int(pools[pool])
if pool_vm_usage_gb >= pool_free_space_gb:
fail(
celery,
f'Pool "{pool}" has only {pool_free_space_gb} GB free but VM requires {pool_vm_usage_gb} GB',
exception=ClusterError,
)
log_info(celery, "There is enough space on cluster to store VM volumes")
# Verify that every specified filesystem is valid
used_filesystems = list()
for volume in vm_data["volumes"]:
if volume.get("source_volume") is not None:
continue
if (
volume.get("filesystem") is not None
and volume["filesystem"] not in used_filesystems
):
used_filesystems.append(volume["filesystem"])
for filesystem in used_filesystems:
if filesystem is None or filesystem == "None":
continue
elif filesystem == "swap":
retcode, stdout, stderr = pvc_common.run_os_command("which mkswap")
if retcode:
fail(
celery,
f"Failed to find binary for mkswap: {stderr}",
exception=ProvisioningError,
)
else:
retcode, stdout, stderr = pvc_common.run_os_command(
"which mkfs.{}".format(filesystem)
)
if retcode:
fail(
celery,
f"Failed to find binary for mkfs.{filesystem}: {stderr}",
exception=ProvisioningError,
)
log_info(celery, "All selected filesystems are valid")
# Phase 3 - provisioning script preparation
# * Import the provisioning script as a library with importlib
# * Ensure the required function(s) are present
current_stage += 1
update(
celery,
"Preparing provisioning script",
current=current_stage,
total=total_stages,
)
# Write the script out to a temporary file
retcode, stdout, stderr = pvc_common.run_os_command("mktemp")
if retcode:
fail(
celery,
f"Failed to create a temporary file: {stderr}",
exception=ProvisioningError,
)
script_file = stdout.strip()
with open(script_file, "w") as fh:
fh.write(vm_data["script"])
fh.write("\n")
# Import the script file
loader = importlib.machinery.SourceFileLoader("installer_script", script_file)
spec = importlib.util.spec_from_loader(loader.name, loader)
installer_script = importlib.util.module_from_spec(spec)
spec.loader.exec_module(installer_script)
# Set up the VMBuilderScript object
vm_builder = installer_script.VMBuilderScript(
vm_name=vm_name,
vm_id=vm_id,
vm_profile=vm_profile,
vm_data=vm_data,
)
log_info(celery, "Provisioning script imported successfully")
# Create temporary directory for external chroot
retcode, stdout, stderr = pvc_common.run_os_command("mktemp -d")
if retcode:
fail(
celery,
f"Failed to create a temporary directory: {stderr}",
exception=ProvisioningError,
)
temp_dir = stdout.strip()
# Bind mount / to the chroot location /
retcode, stdout, stderr = pvc_common.run_os_command(
f"mount --bind --options ro / {temp_dir}"
)
if retcode:
fail(
celery,
f"Failed to mount rootfs into {temp_dir} for chroot: {stderr}",
exception=ProvisioningError,
)
# Mount tmpfs to the chroot location /tmp
retcode, stdout, stderr = pvc_common.run_os_command(
f"mount --type tmpfs tmpfs {temp_dir}/tmp"
)
if retcode:
fail(
celery,
f"Failed to mount tmpfs onto {temp_dir}/tmp for chroot: {stderr}",
exception=ProvisioningError,
)
# Bind mount /dev to the chroot location /dev
retcode, stdout, stderr = pvc_common.run_os_command(
f"mount --bind --options ro /dev {temp_dir}/dev"
)
if retcode:
fail(
celery,
f"Failed to mount devfs onto {temp_dir}/dev for chroot: {stderr}",
exception=ProvisioningError,
)
# Bind mount /run to the chroot location /run
retcode, stdout, stderr = pvc_common.run_os_command(
f"mount --bind --options rw /run {temp_dir}/run"
)
if retcode:
fail(
celery,
f"Failed to mount runfs onto {temp_dir}/run for chroot: {stderr}",
exception=ProvisioningError,
)
# Bind mount /sys to the chroot location /sys
retcode, stdout, stderr = pvc_common.run_os_command(
f"mount --bind --options rw /sys {temp_dir}/sys"
)
if retcode:
fail(
celery,
f"Failed to mount sysfs onto {temp_dir}/sys for chroot: {stderr}",
exception=ProvisioningError,
)
# Bind mount /proc to the chroot location /proc
retcode, stdout, stderr = pvc_common.run_os_command(
f"mount --bind --options rw /proc {temp_dir}/proc"
)
if retcode:
fail(
celery,
f"Failed to mount procfs onto {temp_dir}/proc for chroot: {stderr}",
exception=ProvisioningError,
)
log_info(celery, "Chroot environment prepared successfully")
def general_cleanup():
log_info(celery, "Running upper cleanup steps")
try:
# Unmount bind-mounted devfs on the chroot
retcode, stdout, stderr = pvc_common.run_os_command(
f"umount {temp_dir}/dev"
)
# Unmount bind-mounted runfs on the chroot
retcode, stdout, stderr = pvc_common.run_os_command(
f"umount {temp_dir}/run"
)
# Unmount bind-mounted sysfs on the chroot
retcode, stdout, stderr = pvc_common.run_os_command(
f"umount {temp_dir}/sys"
)
# Unmount bind-mounted procfs on the chroot
retcode, stdout, stderr = pvc_common.run_os_command(
f"umount {temp_dir}/proc"
)
# Unmount bind-mounted tmpfs on the chroot
retcode, stdout, stderr = pvc_common.run_os_command(
f"umount {temp_dir}/tmp"
)
# Unmount bind-mounted rootfs on the chroot
retcode, stdout, stderr = pvc_common.run_os_command(f"umount {temp_dir}")
except Exception as e:
# We don't care about fails during cleanup, log and continue
log_warn(celery, f"Suberror during general cleanup unmounts: {e}")
try:
# Remove the temp_dir
os.rmdir(temp_dir)
except Exception as e:
# We don't care about fails during cleanup, log and continue
log_warn(celery, f"Suberror during general cleanup directory removal: {e}")
try:
# Remote temporary script (don't fail if not removed)
os.remove(script_file)
except Exception as e:
# We don't care about fails during cleanup, log and continue
log_warn(celery, f"Suberror during general cleanup script removal: {e}")
def fail_clean(celery, msg, exception=ProvisioningError):
try:
with chroot(temp_dir):
vm_builder.cleanup()
except Exception:
# We're already failing, do the best we can
pass
try:
general_cleanup()
except Exception:
# We're already failing, do the best we can
pass
fail(celery, msg, exception=exception)
# Phase 4 - script: setup()
# * Run pre-setup steps
current_stage += 1
update(
celery, "Running script setup() step", current=current_stage, total=total_stages
)
try:
with chroot(temp_dir):
vm_builder.setup()
except Exception as e:
fail_clean(
celery,
f"Error in script setup() step: {e}",
exception=ProvisioningError,
)
# Phase 5 - script: create()
# * Prepare the libvirt XML defintion for the VM
current_stage += 1
update(
celery,
"Running script create() step",
current=current_stage,
total=total_stages,
)
if define_vm:
try:
with chroot(temp_dir):
vm_schema = vm_builder.create()
except Exception as e:
fail_clean(
celery,
f"Error in script create() step: {e}",
exception=ProvisioningError,
)
log_info(celery, "Generated VM schema:\n{}\n".format(vm_schema))
log_info(celery, "Defining VM on cluster")
node_limit = vm_data["system_details"]["node_limit"]
if node_limit:
node_limit = node_limit.split(",")
node_selector = vm_data["system_details"]["node_selector"]
node_autostart = vm_data["system_details"]["node_autostart"]
migration_method = vm_data["system_details"]["migration_method"]
migration_max_downtime = vm_data["system_details"]["migration_max_downtime"]
with open_zk(config) as zkhandler:
retcode, retmsg = pvc_vm.define_vm(
zkhandler,
vm_schema.strip(),
target_node,
node_limit,
node_selector,
node_autostart,
migration_method,
migration_max_downtime,
vm_profile,
initial_state="provision",
)
log_info(celery, retmsg)
else:
log_info("Skipping VM definition due to define_vm=False")
# Phase 6 - script: prepare()
# * Run preparation steps (e.g. disk creation and mapping, filesystem creation, etc.)
current_stage += 1
update(
celery,
"Running script prepare() step",
current=current_stage,
total=total_stages,
)
try:
with chroot(temp_dir):
vm_builder.prepare()
except Exception as e:
fail_clean(
celery,
f"Error in script prepare() step: {e}",
exception=ProvisioningError,
)
# Phase 7 - script: install()
# * Run installation with arguments
current_stage += 1
update(
celery,
"Running script install() step",
current=current_stage,
total=total_stages,
)
try:
with chroot(temp_dir):
vm_builder.install()
except Exception as e:
fail_clean(
celery,
f"Error in script install() step: {e}",
exception=ProvisioningError,
)
# Phase 8 - script: cleanup()
# * Run cleanup steps
current_stage += 1
update(
celery,
"Running script cleanup() step",
current=current_stage,
total=total_stages,
)
try:
with chroot(temp_dir):
vm_builder.cleanup()
except Exception as e:
fail(
celery,
f"Error in script cleanup() step: {e}",
exception=ProvisioningError,
)
# Phase 9 - general cleanup
# * Clean up the chroot from earlier
current_stage += 1
update(
celery,
"Running general cleanup steps",
current=current_stage,
total=total_stages,
)
general_cleanup()
# Phase 10 - startup
# * Start the VM in the PVC cluster
current_stage += 1
update(celery, "Starting VM", current=current_stage, total=total_stages)
if start_vm:
with open_zk(config) as zkhandler:
success, message = pvc_vm.start_vm(zkhandler, vm_name)
log_info(celery, message)
end_message = f'VM "{vm_name}" with profile "{vm_profile}" has been provisioned and started successfully'
else:
end_message = f'VM "{vm_name}" with profile "{vm_profile}" has been provisioned successfully'
current_stage += 1
return finish(
celery,
end_message,
current=current_stage,
total=total_stages,
)