Compare commits

..

45 Commits

Author SHA1 Message Date
0769f1ea52 Increase service start time to 10s 2023-10-23 22:24:03 -04:00
c858ae8fed Improve waiting in build-and-deploy 2023-10-23 22:23:48 -04:00
8d256a1737 Complete VM restore functionality 2023-10-23 22:23:17 -04:00
d3b3fdfc80 Revert "Export backup images to a tar archive"
This reverts commit 38abd078af.
2023-10-23 11:01:16 -04:00
f1b29ea94e Initial VM restore work 2023-10-23 11:00:54 -04:00
38abd078af Export backup images to a tar archive
This helps ensure an easier restore as the tar archive(s) can be sent
directly to the API via the normal process of image uploading, instead
of individual disks.
2023-10-23 09:56:50 -04:00
fabb97cf48 Only split a command_string if its not a list 2023-10-23 09:50:58 -04:00
50aabde320 Ensure bond count is compared with actual qty 2023-10-22 02:28:04 -04:00
68124db323 Remove extra spaces 2023-10-17 13:01:38 -04:00
8921efd269 Fix incorrect tuple construct 2023-10-17 12:55:44 -04:00
3e259bd926 Add state confirmation to newline 2023-10-17 12:53:20 -04:00
3d12915989 Further improve return messages 2023-10-17 12:53:08 -04:00
67b0b19bca Use better time functionality 2023-10-17 12:39:37 -04:00
5d0c674d1d Add runtime and adjust ordering 2023-10-17 12:32:40 -04:00
f3bc4dee04 Fix ordering of empty line 2023-10-17 12:27:06 -04:00
f441b0d823 Improve missing parent message 2023-10-17 12:17:29 -04:00
fd2331faa6 Add waiting message during backup 2023-10-17 12:16:31 -04:00
a5d0f219e4 Improve return messages 2023-10-17 12:10:55 -04:00
0169510df0 Fix up datestring generation 2023-10-17 12:05:45 -04:00
a58c1d5a8c Fix bad snapshot removals 2023-10-17 12:02:24 -04:00
a8e4b01b67 Handle return data even better 2023-10-17 11:51:03 -04:00
45c4c86911 Handle extra return variable 2023-10-17 11:47:01 -04:00
6448b31d2c Improve VM list arguments
Use kwargs here instead of fixed args to allow default None values.
2023-10-17 11:01:38 -04:00
4fc9b15652 Fix bad function name 2023-10-17 10:56:32 -04:00
75b839692b Fix missing comma 2023-10-17 10:51:30 -04:00
751cfe0b29 Use consistent shebangs in scripts 2023-10-17 10:35:38 -04:00
b997c6f31e Add support for full VM backups
Adds support for exporting full VM backups, including configuration,
metainfo, and RBD disk images, with incremental support.
2023-10-17 10:15:06 -04:00
6e83300d78 Increase ipmi plugin timeout 2023-10-04 19:21:59 -04:00
522da3fd95 Adjust wording for volume create too 2023-10-03 09:42:23 -04:00
3a1bf0724e Mention file_size as bytes 2023-10-03 09:39:19 -04:00
ee494fb1c0 Adjust the help text of storage pools
Makes some places clearer, cleans up cruft, and adds references to the
main documentation as required.
2023-10-02 11:46:12 -04:00
c6c44bf775 Bump version to 0.9.78 2023-09-30 12:57:55 -04:00
bbb940da65 Remove spurious comments 2023-09-30 12:37:58 -04:00
a0b45a2bcd Always create RBDs with bytes value
Converting into human results in imprecise values when specifying bytes
directly, which in turn breaks VMDK image uploads. Instead, just use the
raw bytes value when creating the volume instead of converting it back.
2023-09-30 12:37:43 -04:00
35e27f79ef Fix uploading of non-raw image files
Adds a new API query parameter to define the file size, which is then
used for the temporary image. This is required for, at least VMDK, files
to work properly in qemu-img convert.
2023-09-29 16:19:22 -04:00
ad2e7750ff Fix output path and print message 2023-09-21 02:32:53 -04:00
7c0f12750e Bump version to 0.9.77 2023-09-19 11:05:55 -04:00
1c68e83d98 Fix bad refs to etree library 2023-09-19 11:05:19 -04:00
51e78480fa Bump version to 0.9.76 2023-09-18 10:15:52 -04:00
c4397219da Ensure fencing states are properly reflected 2023-09-18 09:59:18 -04:00
f46bfc962f Bump version to 0.9.75 2023-09-16 23:06:38 -04:00
714d4b6005 Revert float conversion of cpu_cores
Results in much uglier output, there are no decimal core counts.
2023-09-16 23:06:07 -04:00
fa8329ac3d Explicitly round load avg in load plugin 2023-09-16 22:58:49 -04:00
457b7bed3d Handle exceptions in fence migrations 2023-09-16 22:56:09 -04:00
86115b2928 Add startup message for IPMI reachability
It's good to know that this succeeded in addition to knowing if it
failed.
2023-09-16 22:41:58 -04:00
27 changed files with 862 additions and 65 deletions

View File

@ -1 +1 @@
0.9.74
0.9.78

View File

@ -1,5 +1,24 @@
## PVC Changelog
###### [v0.9.78](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.78)
* [API, Client CLI] Fixes several bugs around image uploads; adds a new query parameter for non-raw images
* [API] Ensures RBD images are created with a raw bytes value to avoid rounding errors
###### [v0.9.77](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.77)
* [Client CLI] Fixes a bug from a bad library import
###### [v0.9.76](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.76)
* [API, Client CLI] Corrects some missing node states for fencing in status output
###### [v0.9.75](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.75)
* [Node Daemon] Adds a startup message about IPMI when succeeding
* [Node Daemon] Fixes a bug in fencing allowing non-failing VMs to migrate
* [Node Daemon] Adds rounding to load average in load plugin for consistency
###### [v0.9.74](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.74)
* [Docs] Removes docs from the main repo

View File

@ -27,7 +27,7 @@ from ssl import SSLContext, TLSVersion
from distutils.util import strtobool as dustrtobool
# Daemon version
version = "0.9.74"
version = "0.9.78"
# API version
API_VERSION = 1.0

View File

@ -2140,7 +2140,7 @@ class API_VM_Locks(Resource):
api.add_resource(API_VM_Locks, "/vm/<vm>/locks")
# /vm/<vm</console
# /vm/<vm>/console
class API_VM_Console(Resource):
@RequestParser([{"name": "lines"}])
@Authenticator
@ -2293,6 +2293,138 @@ class API_VM_Device(Resource):
api.add_resource(API_VM_Device, "/vm/<vm>/device")
# /vm/<vm>/backup
class API_VM_Backup(Resource):
@RequestParser(
[
{
"name": "target_path",
"required": True,
"helptext": "A local filesystem path on the primary coordinator must be specified",
},
{
"name": "incremental_parent",
"required": False,
},
{
"name": "retain_snapshots",
"required": False,
},
]
)
@Authenticator
def post(self, vm, reqargs):
"""
Create a backup of {vm} and its volumes to a local primary coordinator filesystem path
---
tags:
- vm
parameters:
- in: query
name: target_path
type: string
required: true
description: A local filesystem path on the primary coordinator to store the backup
- in: query
name: incremental_parent
type: string
required: false
description: A previous backup datestamp to use as an incremental parent; if unspecified a full backup is taken
- in: query
name: retain_snapshots
type: boolean
required: false
default: false
description: Whether or not to retain this backup's volume snapshots to use as a future incremental parent
responses:
200:
description: OK
schema:
type: object
id: Message
400:
description: Execution error
schema:
type: object
id: Message
404:
description: Not found
schema:
type: object
id: Message
"""
target_path = reqargs.get("target_path", None)
incremental_parent = reqargs.get("incremental_parent", None)
retain_snapshots = bool(strtobool(reqargs.get("retain_snapshots", "false")))
return api_helper.vm_backup(
vm, target_path, incremental_parent, retain_snapshots
)
api.add_resource(API_VM_Backup, "/vm/<vm>/backup")
# /vm/<vm>/restore
class API_VM_Restore(Resource):
@RequestParser(
[
{
"name": "target_path",
"required": True,
"helptext": "A local filesystem path on the primary coordinator must be specified",
},
{
"name": "backup_datestring",
"required": True,
"helptext": "A backup datestring must be specified",
}
]
)
@Authenticator
def post(self, vm, reqargs):
"""
Restore a backup of {vm} and its volumes from a local primary coordinator filesystem path
---
tags:
- vm
parameters:
- in: query
name: target_path
type: string
required: true
description: A local filesystem path on the primary coordinator where the backup is stored
- in: query
name: backup_datestring
type: string
required: true
description: The backup datestring identifier (e.g. 20230102030405)
responses:
200:
description: OK
schema:
type: object
id: Message
400:
description: Execution error
schema:
type: object
id: Message
404:
description: Not found
schema:
type: object
id: Message
"""
target_path = reqargs.get("target_path", None)
backup_datestring = reqargs.get("backup_datestring", None)
return api_helper.vm_restore(
vm, target_path, backup_datestring
)
api.add_resource(API_VM_Restore, "/vm/<vm>/restore")
##########################################################
# Client API - Network
##########################################################
@ -4843,7 +4975,7 @@ class API_Storage_Ceph_Volume_Root(Resource):
{
"name": "size",
"required": True,
"helptext": "A volume size in bytes (or with k/M/G/T suffix) must be specified.",
"helptext": "A volume size in bytes (B implied or with SI suffix k/M/G/T) must be specified.",
},
]
)
@ -4869,7 +5001,7 @@ class API_Storage_Ceph_Volume_Root(Resource):
name: size
type: string
required: true
description: The volume size in bytes (or with a metric suffix, i.e. k/M/G/T)
description: The volume size, in bytes (B implied) or with a single-character SI suffix (k/M/G/T)
responses:
200:
description: OK
@ -5088,7 +5220,12 @@ class API_Storage_Ceph_Volume_Element_Upload(Resource):
"required": True,
"location": ["args"],
"helptext": "A source image format must be specified.",
}
},
{
"name": "file_size",
"required": False,
"location": ["args"],
},
]
)
@Authenticator
@ -5113,6 +5250,11 @@ class API_Storage_Ceph_Volume_Element_Upload(Resource):
- qed
- vdi
- vpc
- in: query
name: file_size
type: integer
required: false
description: The size of the image file, in bytes, if {image_format} is not "raw"
responses:
200:
description: OK
@ -5131,7 +5273,10 @@ class API_Storage_Ceph_Volume_Element_Upload(Resource):
id: Message
"""
return api_helper.ceph_volume_upload(
pool, volume, reqargs.get("image_format", None)
pool,
volume,
reqargs.get("image_format", None),
reqargs.get("file_size", None),
)

View File

@ -470,6 +470,60 @@ def vm_define(
return output, retcode
@ZKConnection(config)
def vm_backup(
zkhandler,
domain,
target_path,
incremental_parent=None,
retain_snapshots=False,
):
"""
Back up a VM to a local (primary coordinator) filesystem path.
"""
retflag, retdata = pvc_vm.backup_vm(
zkhandler,
domain,
target_path,
incremental_parent,
retain_snapshots,
)
if retflag:
retcode = 200
else:
retcode = 400
output = {"message": retdata.replace('"', "'")}
return output, retcode
@ZKConnection(config)
def vm_restore(
zkhandler,
domain,
target_path,
datestring,
):
"""
Restore a VM from a local (primary coordinator) filesystem path.
"""
retflag, retdata = pvc_vm.restore_vm(
zkhandler,
domain,
target_path,
datestring,
)
if retflag:
retcode = 200
else:
retcode = 400
output = {"message": retdata.replace('"', "'")}
return output, retcode
@ZKConnection(config)
def vm_attach_device(zkhandler, vm, device_spec_xml):
"""
@ -1584,7 +1638,7 @@ def ceph_volume_remove(zkhandler, pool, name):
@ZKConnection(config)
def ceph_volume_upload(zkhandler, pool, volume, img_type):
def ceph_volume_upload(zkhandler, pool, volume, img_type, file_size=None):
"""
Upload a raw file via HTTP post to a PVC Ceph volume
"""
@ -1605,7 +1659,17 @@ def ceph_volume_upload(zkhandler, pool, volume, img_type):
}
retcode = 400
return output, retcode
dev_size = retdata[0]["stats"]["size"]
try:
dev_size = retdata[0]["stats"]["size"]
except Exception:
output = {
"message": "Target volume '{}' does not exist in pool '{}'.".format(
volume, pool
)
}
retcode = 400
return output, retcode
def cleanup_maps_and_volumes():
# Unmap the target blockdev
@ -1619,8 +1683,14 @@ def ceph_volume_upload(zkhandler, pool, volume, img_type):
zkhandler, pool, "{}_tmp".format(volume)
)
# Create a temporary block device to store non-raw images
if img_type == "raw":
if file_size != dev_size:
output = {
"message": f"Image file size {file_size} does not match volume size {dev_size}"
}
retcode = 400
return output, retcode
# Map the target blockdev
retflag, retdata = pvc_ceph.map_volume(zkhandler, pool, volume)
if not retflag:
@ -1659,11 +1729,15 @@ def ceph_volume_upload(zkhandler, pool, volume, img_type):
cleanup_maps_and_volumes()
return output, retcode
# Write the image directly to the blockdev
else:
if file_size is None:
output = {"message": "A file size must be specified"}
retcode = 400
return output, retcode
# Create a temporary blockdev
retflag, retdata = pvc_ceph.add_volume(
zkhandler, pool, "{}_tmp".format(volume), dev_size
zkhandler, pool, "{}_tmp".format(volume), file_size
)
if not retflag:
output = {"message": retdata.replace('"', "'")}

View File

@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
# A useful script for testing out changes to PVC by building the debs and deploying them out to a
# set of hosts automatically, including restarting the daemon (with a pause between) on the remote
@ -36,34 +36,37 @@ echo "Preparing code (format and lint)..."
./lint || exit 1
# Build the packages
echo -n "Building packages... "
echo -n "Building packages..."
version="$( ./build-unstable-deb.sh 2>/dev/null )"
echo "done. Package version ${version}."
echo " done. Package version ${version}."
# Install the client(s) locally
echo -n "Installing client packages locally... "
echo -n "Installing client packages locally..."
$SUDO dpkg -i ../pvc-client*_${version}*.deb &>/dev/null
echo "done".
echo " done".
for HOST in ${HOSTS[@]}; do
echo "> Deploying packages to host ${HOST}"
echo -n "Copying packages... "
echo -n "Copying packages..."
ssh $HOST $SUDO rm -rf /tmp/pvc &>/dev/null
ssh $HOST mkdir /tmp/pvc &>/dev/null
scp ../pvc-*_${version}*.deb $HOST:/tmp/pvc/ &>/dev/null
echo "done."
echo -n "Installing packages... "
echo " done."
echo -n "Installing packages..."
ssh $HOST $SUDO dpkg -i /tmp/pvc/{pvc-client-cli,pvc-daemon-common,pvc-daemon-api,pvc-daemon-node}*.deb &>/dev/null
ssh $HOST rm -rf /tmp/pvc &>/dev/null
echo "done."
echo -n "Restarting PVC daemons... "
echo " done."
echo -n "Restarting PVC daemons..."
ssh $HOST $SUDO systemctl restart pvcapid &>/dev/null
ssh $HOST $SUDO systemctl restart pvcapid-worker &>/dev/null
ssh $HOST $SUDO systemctl restart pvcnoded &>/dev/null
echo "done."
echo -n "Waiting 30s for host to stabilize... "
sleep 30
echo "done."
echo " done."
echo -n "Waiting for node daemon to be running..."
while [[ $( ssh $HOST "pvc -q node list -f json ${HOST%%.*} | jq -r '.[].daemon_state'" ) != "run" ]]; do
sleep 5
echo -n "."
done
echo " done."
done
if [[ -z ${KEEP_ARTIFACTS} ]]; then
rm ../pvc*_${version}*

View File

@ -1,4 +1,4 @@
#!/bin/sh
#!/usr/bin/env bash
pushd $( git rev-parse --show-toplevel ) &>/dev/null
ver="$( head -1 debian/changelog | awk -F'[()-]' '{ print $2 }' )"
git pull

View File

@ -1,4 +1,4 @@
#!/bin/sh
#!/usr/bin/env bash
set -o xtrace
exec 3>&1
exec 1>&2

View File

@ -1188,10 +1188,8 @@ def cli_vm_modify(
# Grab the current config
current_vm_cfg_raw = vm_information.get("xml")
xml_data = etree.fromstring(current_vm_cfg_raw)
current_vm_cfgfile = (
etree.tostring(xml_data, pretty_print=True).decode("utf8").strip()
)
xml_data = fromstring(current_vm_cfg_raw)
current_vm_cfgfile = tostring(xml_data, pretty_print=True).decode("utf8").strip()
if editor is True:
new_vm_cfgfile = click.edit(
@ -1592,6 +1590,92 @@ def cli_vm_flush_locks(domain):
finish(retcode, retmsg)
###############################################################################
# > pvc vm backup
###############################################################################
@click.command(name="backup", short_help="Create a backup of a virtual machine.")
@connection_req
@click.argument("domain")
@click.argument("target_path")
@click.option(
"-i",
"--incremental",
"incremental_parent",
default=None,
help="Perform an incremental volume backup from this parent backup datestring.",
)
@click.option(
"-r",
"--retain-snapshots",
"retain_snapshots",
is_flag=True,
default=False,
help="Retain volume snapshots for future incremental use.",
)
def cli_vm_backup(domain, target_path, incremental_parent, retain_snapshots):
"""
Create a backup of virtual machine DOMAIN to TARGET_PATH on the cluster primary coordinator. DOMAIN may be a UUID or name.
TARGET_PATH must be a valid absolute directory path on the cluster "primary" coordinator (see "pvc node list") allowing writes from the API daemon (normally running as "root"). The TARGET_PATH should be a large storage volume, ideally a remotely mounted filesystem (e.g. NFS, SSHFS, etc.) or non-Ceph-backed disk; PVC does not handle this path, that is up to the administrator to configure and manage.
The backup will export the VM configuration, metainfo, and a point-in-time snapshot of all attached RBD volumes, using a datestring formatted backup name (i.e. YYYYMMDDHHMMSS).
The virtual machine DOMAIN may be running, and due to snapshots the backup should be crash-consistent, but will be in an unclean state and this must be considered when restoring from backups.
Incremental snapshots are possible by specifying the "-i"/"--incremental" option along with a source backup datestring. The snapshots from that source backup must have been retained using the "-r"/"--retain-snapshots" option. Arbitrary snapshots, assuming they are valid for all attached RBD volumes, may also be used, as long as they are prefixed with "backup_". Retaining snapshots of incremental backups is supported, though it is not recommended to "chain" incremental backups in this way as it can make managing restores more difficult.
Full backup volume images are sparse-allocated, however it is recommended for safety to consider their maximum allocated size when allocated space for the TARGET_PATH. Incremental volume images are generally small but are dependent entirely on the rate of data change in each volume.
"""
echo(
CLI_CONFIG,
f"Backing up VM '{domain}'... ",
newline=False,
)
retcode, retmsg = pvc.lib.vm.vm_backup(
CLI_CONFIG, domain, target_path, incremental_parent, retain_snapshots
)
if retcode:
echo(CLI_CONFIG, "done.")
else:
echo(CLI_CONFIG, "failed.")
finish(retcode, retmsg)
###############################################################################
# > pvc vm restore
###############################################################################
@click.command(name="restore", short_help="Restore a backup of a virtual machine.")
@connection_req
@click.argument("domain")
@click.argument("backup_datestring")
@click.argument("target_path")
def cli_vm_restore(domain, backup_datestring, target_path):
"""
Restore the backup BACKUP_DATESTRING of virtual machine DOMAIN stored in TARGET_PATH on the cluster primary coordinator. DOMAIN may be a UUID or name.
TARGET_PATH must be a valid absolute directory path on the cluster "primary" coordinator (see "pvc node list") allowing reads from the API daemon (normally running as "root"). The TARGET_PATH should be a large storage volume, ideally a remotely mounted filesystem (e.g. NFS, SSHFS, etc.) or non-Ceph-backed disk; PVC does not handle this path, that is up to the administrator to configure and manage.
The restore will import the VM configuration, metainfo, and the point-in-time snapshot of all attached RBD volumes. Incremental backups will be automatically handled.
A VM named DOMAIN must not exist; if the VM already exists, it must be removed before restoring. Renaming is not sufficient as the UUID will remain the same.
"""
echo(
CLI_CONFIG,
f"Restoring backup {backup_datestring} of VM '{domain}'... ",
newline=False,
)
retcode, retmsg = pvc.lib.vm.vm_restore(
CLI_CONFIG, domain, target_path, backup_datestring
)
if retcode:
echo(CLI_CONFIG, "done.")
else:
echo(CLI_CONFIG, "failed.")
finish(retcode, retmsg)
###############################################################################
# > pvc vm tag
###############################################################################
@ -3459,14 +3543,14 @@ def cli_storage_pool():
show_default=True,
required=False,
help="""
The replication configuration, specifying both a "copies" and "mincopies" value, separated by a comma, e.g. "copies=3,mincopies=2". The "copies" value specifies the total number of replicas and should not exceed the total number of nodes; the "mincopies" value specifies the minimum number of available copies to allow writes. For additional details please see the Cluster Architecture documentation.
The replication configuration, specifying both a "copies" and "mincopies" value, separated by a comma, e.g. "copies=3,mincopies=2". The "copies" value specifies the total number of replicas and the "mincopies" value specifies the minimum number of active replicas to allow I/O. For additional details please see the documentation.
""",
)
def cli_storage_pool_add(name, pgs, tier, replcfg):
"""
Add a new Ceph RBD pool with name NAME and PGS placement groups.
The placement group count must be a non-zero power of 2.
The placement group count must be a non-zero power of 2. Generally you should choose a PGS number such that there will be 50-150 PGs on each OSD in a single node (before replicas); 64, 128, or 256 are good values for small clusters (1-5 OSDs per node); higher values are recommended for higher node or OSD counts. For additional details please see the documentation.
"""
retcode, retmsg = pvc.lib.storage.ceph_pool_add(
@ -3505,9 +3589,9 @@ def cli_storage_pool_set_pgs(name, pgs):
"""
Set the placement groups (PGs) count for the pool NAME to PGS.
The placement group count must be a non-zero power of 2.
The placement group count must be a non-zero power of 2. Generally you should choose a PGS number such that there will be 50-150 PGs on each OSD in a single node (before replicas); 64, 128, or 256 are good values for small clusters (1-5 OSDs per node); higher values are recommended for higher node or OSD counts. For additional details please see the documentation.
Placement group counts may be increased or decreased as required though frequent alteration is not recommended.
Placement group counts may be increased or decreased as required though frequent alteration is not recommended. Placement group alterations are intensive operations on the storage cluster.
"""
retcode, retmsg = pvc.lib.storage.ceph_pool_set_pgs(CLI_CONFIG, name, pgs)
@ -3600,7 +3684,7 @@ def cli_storage_volume_upload(pool, name, image_format, image_file):
If the image format is "raw", the image is uploaded directly to the target volume without modification. Otherwise, it will be converted into raw format by "qemu-img convert" on the remote side before writing using a temporary volume. The image format must be a valid format recognized by "qemu-img", such as "vmdk" or "qcow2".
"""
if not os.path.exists(image_file):
if not path.exists(image_file):
echo(CLI_CONFIG, "ERROR: File '{}' does not exist!".format(image_file))
exit(1)
@ -4912,13 +4996,13 @@ def cli_provisioner_ova_upload(name, filename, pool):
Storage templates, provisioning scripts, and arguments for OVA-type profiles will be ignored and should not be set.
"""
if not os.path.exists(filename):
if not path.exists(filename):
echo(CLI_CONFIG, "ERROR: File '{}' does not exist!".format(filename))
exit(1)
params = dict()
params["pool"] = pool
params["ova_size"] = os.path.getsize(filename)
params["ova_size"] = path.getsize(filename)
retcode, retdata = pvc.lib.provisioner.ova_upload(
CLI_CONFIG, name, filename, params
@ -5661,6 +5745,8 @@ cli_vm.add_command(cli_vm_move)
cli_vm.add_command(cli_vm_migrate)
cli_vm.add_command(cli_vm_unmigrate)
cli_vm.add_command(cli_vm_flush_locks)
cli_vm.add_command(cli_vm_backup)
cli_vm.add_command(cli_vm_restore)
cli_vm_tag.add_command(cli_vm_tag_get)
cli_vm_tag.add_command(cli_vm_tag_add)
cli_vm_tag.add_command(cli_vm_tag_remove)

View File

@ -135,7 +135,7 @@ def cli_cluster_status_format_pretty(CLI_CONFIG, data):
state_colour = ansii["green"]
elif state in ["run,flush", "run,unflush", "run,flushed"]:
state_colour = ansii["blue"]
elif "dead" in state or "stop" in state:
elif "dead" in state or "fenced" in state or "stop" in state:
state_colour = ansii["red"]
else:
state_colour = ansii["yellow"]

View File

@ -21,6 +21,7 @@
import math
from os import path
from json import loads
from requests_toolbelt.multipart.encoder import (
MultipartEncoder,
@ -1209,6 +1210,11 @@ def ceph_volume_upload(config, pool, volume, image_format, image_file):
"""
import click
if image_format != "raw":
file_size = path.getsize(image_file)
else:
file_size = None
bar = UploadProgressBar(
image_file, end_message="Parsing file on remote side...", end_nl=False
)
@ -1220,7 +1226,7 @@ def ceph_volume_upload(config, pool, volume, image_format, image_file):
upload_monitor = MultipartEncoderMonitor(upload_data, bar.update)
headers = {"Content-Type": upload_monitor.content_type}
params = {"image_format": image_format}
params = {"image_format": image_format, "file_size": file_size}
response = call_api(
config,

View File

@ -433,6 +433,47 @@ def vm_locks(config, vm):
return retstatus, response.json().get("message", "")
def vm_backup(config, vm, target_path, incremental_parent=None, retain_snapshots=False):
"""
Create a backup of {vm} and its volumes to a local primary coordinator filesystem path
API endpoint: POST /vm/{vm}/backup
API arguments: target_path={target_path}, incremental_parent={incremental_parent}, retain_snapshots={retain_snapshots}
API schema: {"message":"{data}"}
"""
params = {
"target_path": target_path,
"incremental_parent": incremental_parent,
"retain_snapshots": retain_snapshots,
}
response = call_api(config, "post", "/vm/{vm}/backup".format(vm=vm), params=params)
if response.status_code != 200:
return False, response.json().get("message", "")
else:
return True, response.json().get("message", "")
def vm_restore(config, vm, target_path, backup_datestring):
"""
Restore a backup of {vm} and its volumes from a local primary coordinator filesystem path
API endpoint: POST /vm/{vm}/restore
API arguments: target_path={target_path}, backup_datestring={backup_datestring}
API schema: {"message":"{data}"}
"""
params = {
"target_path": target_path,
"backup_datestring": backup_datestring,
}
response = call_api(config, "post", "/vm/{vm}/restore".format(vm=vm), params=params)
if response.status_code != 200:
return False, response.json().get("message", "")
else:
return True, response.json().get("message", "")
def vm_vcpus_set(config, vm, vcpus, topology, restart):
"""
Set the vCPU count of the VM with topology

View File

@ -2,7 +2,7 @@ from setuptools import setup
setup(
name="pvc",
version="0.9.74",
version="0.9.78",
packages=["pvc.cli", "pvc.lib"],
install_requires=[
"Click",

View File

@ -763,9 +763,7 @@ def add_volume(zkhandler, pool, name, size):
# 2. Create the volume
retcode, stdout, stderr = common.run_os_command(
"rbd create --size {} {}/{}".format(
format_bytes_tohuman(size_bytes), pool, name
)
"rbd create --size {}B {}/{}".format(size_bytes, pool, name)
)
if retcode:
return False, 'ERROR: Failed to create RBD volume "{}": {}'.format(name, stderr)

View File

@ -256,8 +256,13 @@ def getClusterInformation(zkhandler):
"stop,unflush",
"dead,ready",
"dead,flush",
"dead,fence-flush",
"dead,flushed",
"dead,unflush",
"fenced,ready",
"fenced,flush",
"fenced,flushed",
"fenced,unflush",
]
vm_state_combinations = [
"start",

View File

@ -146,7 +146,11 @@ def run_os_daemon(command_string, environment=None, logfile=None):
# Run a local OS command via shell
#
def run_os_command(command_string, background=False, environment=None, timeout=None):
command = shlex_split(command_string)
if not isinstance(command_string, list):
command = shlex_split(command_string)
else:
command = command_string
if background:
def runcmd():

View File

@ -21,12 +21,17 @@
import time
import re
import os.path
import lxml.objectify
import lxml.etree
from distutils.util import strtobool
from uuid import UUID
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
from socket import gethostname
from json import dump as jdump
from json import load as jload
import daemon_lib.common as common
@ -1175,13 +1180,15 @@ def get_info(zkhandler, domain):
return True, domain_information
def get_list(zkhandler, node, state, tag, limit, is_fuzzy=True, negate=False):
if node:
def get_list(
zkhandler, node=None, state=None, tag=None, limit=None, is_fuzzy=True, negate=False
):
if node is not None:
# Verify node is valid
if not common.verifyNode(zkhandler, node):
return False, 'Specified node "{}" is invalid.'.format(node)
if state:
if state is not None:
valid_states = [
"start",
"restart",
@ -1200,7 +1207,7 @@ def get_list(zkhandler, node, state, tag, limit, is_fuzzy=True, negate=False):
full_vm_list.sort()
# Set our limit to a sensible regex
if limit:
if limit is not None:
# Check if the limit is a UUID
is_limit_uuid = False
try:
@ -1229,7 +1236,7 @@ def get_list(zkhandler, node, state, tag, limit, is_fuzzy=True, negate=False):
is_state_match = False
# Check on limit
if limit:
if limit is not None:
# Try to match the limit against the UUID (if applicable) and name
try:
if is_limit_uuid and re.fullmatch(limit, vm):
@ -1241,7 +1248,7 @@ def get_list(zkhandler, node, state, tag, limit, is_fuzzy=True, negate=False):
else:
is_limit_match = True
if tag:
if tag is not None:
vm_tags = zkhandler.children(("domain.meta.tags", vm))
if negate and tag not in vm_tags:
is_tag_match = True
@ -1251,7 +1258,7 @@ def get_list(zkhandler, node, state, tag, limit, is_fuzzy=True, negate=False):
is_tag_match = True
# Check on node
if node:
if node is not None:
vm_node = zkhandler.read(("domain.node", vm))
if negate and vm_node != node:
is_node_match = True
@ -1261,7 +1268,7 @@ def get_list(zkhandler, node, state, tag, limit, is_fuzzy=True, negate=False):
is_node_match = True
# Check on state
if state:
if state is not None:
vm_state = zkhandler.read(("domain.state", vm))
if negate and vm_state != state:
is_state_match = True
@ -1297,3 +1304,372 @@ def get_list(zkhandler, node, state, tag, limit, is_fuzzy=True, negate=False):
pass
return True, sorted(vm_data_list, key=lambda d: d["name"])
def backup_vm(
zkhandler, domain, target_path, incremental_parent=None, retain_snapshots=False
):
tstart = time.time()
# 0. Validations
# Validate that VM exists in cluster
dom_uuid = getDomainUUID(zkhandler, domain)
if not dom_uuid:
return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain)
# Validate that the target path exists
if not re.match(r"^/", target_path):
return (
False,
f"ERROR: Target path {target_path} is not a valid absolute path on the primary coordinator!",
)
# Ensure that target_path (on this node) exists
if not os.path.isdir(target_path):
return False, f"ERROR: Target path {target_path} does not exist!"
# 1. Get information about VM
vm_detail = get_list(zkhandler, limit=dom_uuid, is_fuzzy=False)[1][0]
if not isinstance(vm_detail, dict):
return False, f"ERROR: VM listing returned invalid data: {vm_detail}"
vm_volumes = list()
for disk in vm_detail["disks"]:
if disk["type"] != "rbd":
continue
pool, volume = disk["name"].split('/')
retcode, retdata = ceph.get_list_volume(zkhandler, pool, volume, is_fuzzy=False)
if not retcode or len(retdata) != 1:
if len(retdata) < 1:
retdata = "No volumes returned."
elif len(retdata) > 1:
retdata = "Multiple volumes returned."
return False, f"ERROR: Failed to get volume details for {pool}/{volume}: {retdata}"
try:
size = retdata[0]["stats"]["size"]
except Exception as e:
return False, f"ERROR: Failed to get volume size for {pool}/{volume}: {e}"
vm_volumes.append((pool, volume, size))
# 2a. Validate that all volumes exist (they should, but just in case)
for pool, volume, _ in vm_volumes:
if not ceph.verifyVolume(zkhandler, pool, volume):
return (
False,
f"ERROR: VM defines a volume {pool}/{volume} which does not exist!",
)
# 2b. Validate that, if an incremental_parent is given, it is valid
# The incremental parent is just a datestring
if incremental_parent is not None:
for pool, volume, _ in vm_volumes:
if not ceph.verifySnapshot(
zkhandler, pool, volume, f"backup_{incremental_parent}"
):
return (
False,
f"ERROR: Incremental parent {incremental_parent} given, but no snapshots were found; cannot export an incremental backup.",
)
export_fileext = "rbddiff"
else:
export_fileext = "rbdimg"
# 2c. Validate that there's enough space on the target
# TODO
# 3. Set datestring in YYYYMMDDHHMMSS format
now = datetime.now()
datestring = now.strftime("%Y%m%d%H%M%S")
snapshot_name = f"backup_{datestring}"
# 4. Create destination directory
vm_target_root = f"{target_path}/{domain}"
vm_target_backup = f"{target_path}/{domain}/{domain}.{datestring}.pvcdisks"
if not os.path.isdir(vm_target_backup):
try:
os.makedirs(vm_target_backup)
except Exception as e:
return False, f"ERROR: Failed to create backup directory: {e}"
# 5. Take snapshot of each disks with the name @backup_{datestring}
is_snapshot_create_failed = False
which_snapshot_create_failed = list()
msg_snapshot_create_failed = list()
for pool, volume, _ in vm_volumes:
retcode, retmsg = ceph.add_snapshot(zkhandler, pool, volume, snapshot_name)
if not retcode:
is_snapshot_create_failed = True
which_snapshot_create_failed.append(f"{pool}/{volume}")
msg_snapshot_create_failed.append(retmsg)
if is_snapshot_create_failed:
for pool, volume, _ in vm_volumes:
if ceph.verifySnapshot(zkhandler, pool, volume, snapshot_name):
ceph.remove_snapshot(zkhandler, pool, volume, snapshot_name)
return (
False,
f'ERROR: Failed to create snapshot for volume(s) {", ".join(which_snapshot_create_failed)}: {", ".join(msg_snapshot_create_failed)}',
)
# 6. Dump snapshot to folder with `rbd export` (full) or `rbd export-diff` (incremental)
is_snapshot_export_failed = False
which_snapshot_export_failed = list()
msg_snapshot_export_failed = list()
for pool, volume, _ in vm_volumes:
if incremental_parent is not None:
incremental_parent_snapshot_name = f"backup_{incremental_parent}"
retcode, stdout, stderr = common.run_os_command(
f"rbd export-diff --from-snap {incremental_parent_snapshot_name} {pool}/{volume}@{snapshot_name} {vm_target_backup}/{pool}.{volume}.{export_fileext}"
)
if retcode:
is_snapshot_export_failed = True
which_snapshot_export_failed.append(f"{pool}/{volume}")
msg_snapshot_export_failed.append(stderr)
else:
retcode, stdout, stderr = common.run_os_command(
f"rbd export --export-format 2 {pool}/{volume}@{snapshot_name} {vm_target_backup}/{pool}.{volume}.{export_fileext}"
)
if retcode:
is_snapshot_export_failed = True
which_snapshot_export_failed.append(f"{pool}/{volume}")
msg_snapshot_export_failed.append(stderr)
if is_snapshot_export_failed:
for pool, volume, _ in vm_volumes:
if ceph.verifySnapshot(zkhandler, pool, volume, snapshot_name):
ceph.remove_snapshot(zkhandler, pool, volume, snapshot_name)
return (
False,
f'ERROR: Failed to export snapshot for volume(s) {", ".join(which_snapshot_export_failed)}: {", ".join(msg_snapshot_export_failed)}',
)
# 7. Create and dump VM backup information
backup_type = "incremental" if incremental_parent is not None else "full"
vm_backup = {
"type": backup_type,
"datestring": datestring,
"incremental_parent": incremental_parent,
"vm_detail": vm_detail,
"backup_files": [(f"{domain}.{datestring}.pvcdisks/{p}.{v}.{export_fileext}", s) for p, v, s in vm_volumes],
}
with open(f"{vm_target_root}/{domain}.{datestring}.pvcbackup", "w") as fh:
jdump(vm_backup, fh)
# 8. Remove snapshots if retain_snapshot is False
is_snapshot_remove_failed = False
which_snapshot_remove_failed = list()
msg_snapshot_remove_failed = list()
if not retain_snapshots:
for pool, volume, _ in vm_volumes:
if ceph.verifySnapshot(zkhandler, pool, volume, snapshot_name):
retcode, retmsg = ceph.remove_snapshot(
zkhandler, pool, volume, snapshot_name
)
if not retcode:
is_snapshot_remove_failed = True
which_snapshot_remove_failed.append(f"{pool}/{volume}")
msg_snapshot_remove_failed.append(retmsg)
tend = time.time()
ttot = round(tend - tstart, 2)
retlines = list()
if is_snapshot_remove_failed:
retlines.append(f"WARNING: Failed to remove snapshot as requested for volume(s) {', '.join(which_snapshot_remove_failed)}: {', '.join(msg_snapshot_remove_failed)}")
myhostname = gethostname().split(".")[0]
if retain_snapshots:
retlines.append(f"Successfully backed up VM '{domain}' ({backup_type}@{datestring}, snapshots retained) to '{myhostname}:{target_path}' in {ttot}s.")
else:
retlines.append(f"Successfully backed up VM '{domain}' ({backup_type}@{datestring}) to '{myhostname}:{target_path}' in {ttot}s.")
return True, '\n'.join(retlines)
def restore_vm(zkhandler, domain, source_path, datestring):
tstart = time.time()
# 0. Validations
# Validate that VM does not exist in cluster
dom_uuid = getDomainUUID(zkhandler, domain)
if dom_uuid:
return (
False,
f'ERROR: VM "{domain}" already exists in the cluster! Remove or rename it before restoring a backup.',
)
# Validate that the target path exists
if not re.match(r"^/", source_path):
return (
False,
f"ERROR: Source path {source_path} is not a valid absolute path on the primary coordinator!",
)
# Ensure that source_path (on this node) exists
if not os.path.isdir(source_path):
return False, f"ERROR: Source path {source_path} does not exist!"
# Ensure that domain path (on this node) exists
backup_source_path = f"{source_path}/{domain}"
if not os.path.isdir(backup_source_path):
return False, f"ERROR: Source VM path {backup_source_path} does not exist!"
# Ensure that the archives are present
backup_source_pvcbackup_file = f"{backup_source_path}/{domain}.{datestring}.pvcbackup"
if not os.path.isfile(backup_source_pvcbackup_file):
return False, "ERROR: The specified source backup files do not exist!"
# 1. Read the backup file and get VM details
try:
with open(backup_source_pvcbackup_file) as fh:
backup_source_details = jload(fh)
except Exception as e:
return False, f"ERROR: Failed to read source backup details: {e}"
# Handle incrementals
incremental_parent = backup_source_details.get("incremental_parent", None)
if incremental_parent is not None:
backup_source_parent_pvcbackup_file = (
f"{backup_source_path}/{domain}.{incremental_parent}.pvcbackup"
)
if not os.path.isfile(backup_source_parent_pvcbackup_file):
return (
False,
"ERROR: The specified backup is incremental but the required incremental parent source backup files do not exist!",
)
try:
with open(backup_source_parent_pvcbackup_file) as fh:
backup_source_parent_details = jload(fh)
except Exception as e:
return False, f"ERROR: Failed to read source incremental parent backup details: {e}"
# 2. Import VM config and metadata in provision state
try:
retcode, retmsg = define_vm(
zkhandler,
backup_source_details["vm_detail"]["xml"],
backup_source_details["vm_detail"]["node"],
backup_source_details["vm_detail"]["node_limit"],
backup_source_details["vm_detail"]["node_selector"],
backup_source_details["vm_detail"]["node_autostart"],
backup_source_details["vm_detail"]["migration_method"],
backup_source_details["vm_detail"]["profile"],
backup_source_details["vm_detail"]["tags"],
"restore",
)
if not retcode:
return False, f"ERROR: Failed to define restored VM: {retmsg}"
except Exception as e:
return False, f"ERROR: Failed to parse VM backup details: {e}"
# 4. Import volumes
is_snapshot_remove_failed = False
which_snapshot_remove_failed = list()
msg_snapshot_remove_failed = list()
if incremental_parent is not None:
for volume_file, volume_size in backup_source_details.get('backup_files'):
pool, volume, _ = volume_file.split('/')[-1].split('.')
try:
parent_volume_file = [f[0] for f in backup_source_parent_details.get('backup_files') if f[0].split('/')[-1].replace('.rbdimg', '') == volume_file.split('/')[-1].replace('.rbddiff', '')][0]
except Exception as e:
return False, f"ERROR: Failed to find parent volume for volume {pool}/{volume}; backup may be corrupt or invalid: {e}"
# First we create the expected volumes then clean them up
# This process is a bit of a hack because rbd import does not expect an existing volume,
# but we need the information in PVC.
# Thus create the RBD volume using ceph.add_volume based on the backup size, and then
# manually remove the RBD volume (leaving the PVC metainfo)
retcode, retmsg = ceph.add_volume(zkhandler, pool, volume, volume_size)
if not retcode:
return False, f"ERROR: Failed to create restored volume: {retmsg}"
retcode, stdout, stderr = common.run_os_command(
f"rbd remove {pool}/{volume}"
)
if retcode:
return False, f"ERROR: Failed to remove temporary RBD volume '{pool}/{volume}': {stderr}"
# Next we import the parent images
retcode, stdout, stderr = common.run_os_command(
f"rbd import --export-format 2 --dest-pool {pool} {source_path}/{domain}/{parent_volume_file} {volume}"
)
if retcode:
return False, f"ERROR: Failed to import parent backup image {parent_volume_file}: {stderr}"
# Then we import the incremental diffs
retcode, stdout, stderr = common.run_os_command(
f"rbd import-diff {source_path}/{domain}/{volume_file} {pool}/{volume}"
)
if retcode:
return False, f"ERROR: Failed to import incremental backup image {volume_file}: {stderr}"
# Finally we remove the parent and child snapshots (no longer required required)
retcode, stdout, stderr = common.run_os_command(
f"rbd snap rm {pool}/{volume}@backup_{incremental_parent}"
)
if retcode:
return False, f"ERROR: Failed to remove imported image snapshot for {parent_volume_file}: {stderr}"
retcode, stdout, stderr = common.run_os_command(
f"rbd snap rm {pool}/{volume}@backup_{datestring}"
)
if retcode:
return False, f"ERROR: Failed to remove imported image snapshot for {volume_file}: {stderr}"
else:
for volume_file, volume_size in backup_source_details.get('backup_files'):
pool, volume, _ = volume_file.split('/')[-1].split('.')
# First we create the expected volumes then clean them up
# This process is a bit of a hack because rbd import does not expect an existing volume,
# but we need the information in PVC.
# Thus create the RBD volume using ceph.add_volume based on the backup size, and then
# manually remove the RBD volume (leaving the PVC metainfo)
retcode, retmsg = ceph.add_volume(zkhandler, pool, volume, volume_size)
if not retcode:
return False, f"ERROR: Failed to create restored volume: {retmsg}"
retcode, stdout, stderr = common.run_os_command(
f"rbd remove {pool}/{volume}"
)
if retcode:
return False, f"ERROR: Failed to remove temporary RBD volume '{pool}/{volume}': {stderr}"
# Then we perform the actual import
retcode, stdout, stderr = common.run_os_command(
f"rbd import --export-format 2 --dest-pool {pool} {source_path}/{domain}/{volume_file} {volume}"
)
if retcode:
return False, f"ERROR: Failed to import backup image {volume_file}: {stderr}"
# Finally we remove the source snapshot (not required)
retcode, stdout, stderr = common.run_os_command(
f"rbd snap rm {pool}/{volume}@backup_{datestring}"
)
if retcode:
return False, f"ERROR: Failed to remove imported image snapshot for {volume_file}: {stderr}"
# 5. Start VM
retcode, retmsg = start_vm(zkhandler, domain)
if not retcode:
return False, f"ERROR: Failed to start restored VM {domain}: {retmsg}"
tend = time.time()
ttot = round(tend - tstart, 2)
retlines = list()
if is_snapshot_remove_failed:
retlines.append(f"WARNING: Failed to remove parent snapshot as requested for volume(s) {', '.join(which_snapshot_remove_failed)}: {', '.join(msg_snapshot_remove_failed)}")
myhostname = gethostname().split(".")[0]
retlines.append(f"Successfully restored VM backup {datestring} for '{domain}' from '{myhostname}:{source_path}' in {ttot}s.")
return True, '\n'.join(retlines)

27
debian/changelog vendored
View File

@ -1,3 +1,30 @@
pvc (0.9.78-0) unstable; urgency=high
* [API, Client CLI] Fixes several bugs around image uploads; adds a new query parameter for non-raw images
* [API] Ensures RBD images are created with a raw bytes value to avoid rounding errors
-- Joshua M. Boniface <joshua@boniface.me> Sat, 30 Sep 2023 12:57:55 -0400
pvc (0.9.77-0) unstable; urgency=high
* [Client CLI] Fixes a bug from a bad library import
-- Joshua M. Boniface <joshua@boniface.me> Tue, 19 Sep 2023 11:05:55 -0400
pvc (0.9.76-0) unstable; urgency=high
* [API, Client CLI] Corrects some missing node states for fencing in status output
-- Joshua M. Boniface <joshua@boniface.me> Mon, 18 Sep 2023 10:15:52 -0400
pvc (0.9.75-0) unstable; urgency=high
* [Node Daemon] Adds a startup message about IPMI when succeeding
* [Node Daemon] Fixes a bug in fencing allowing non-failing VMs to migrate
* [Node Daemon] Adds rounding to load average in load plugin for consistency
-- Joshua M. Boniface <joshua@boniface.me> Sat, 16 Sep 2023 23:06:38 -0400
pvc (0.9.74-0) unstable; urgency=high
* [Docs] Removes docs from the main repo

View File

@ -14,7 +14,7 @@ sys.path.append('api-daemon')
import pvcapid.flaskapi as pvc_api
swagger_file = "docs/manuals/swagger.json"
swagger_file = "swagger.json"
swagger_data = swagger(pvc_api.app)
swagger_data['info']['version'] = "1.0"
swagger_data['info']['title'] = "PVC Client and Provisioner API"
@ -22,3 +22,5 @@ swagger_data['host'] = "pvc.local:7370"
with open(swagger_file, 'w') as fd:
fd.write(json.dumps(swagger_data, sort_keys=True, indent=4))
print(f"Swagger file output to {swagger_file}; add it to the PVC documentation repo.")

View File

@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
# Generate the database migration files

View File

@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
# Generate the Zookeeper migration files

View File

@ -76,7 +76,7 @@ class MonitoringPluginScript(MonitoringPlugin):
ipmi_password = self.config["ipmi_password"]
retcode, _, _ = run_os_command(
f"/usr/bin/ipmitool -I lanplus -H {ipmi_hostname} -U {ipmi_username} -P {ipmi_password} chassis power status",
timeout=2
timeout=5
)
if retcode > 0:

View File

@ -72,7 +72,7 @@ class MonitoringPluginScript(MonitoringPlugin):
from psutil import cpu_count
# Get the current 1-minute system load average
load_average = getloadavg()[0]
load_average = float(round(getloadavg()[0], 2))
# Get the number of CPU cores
cpu_cores = cpu_count()

View File

@ -132,7 +132,7 @@ class MonitoringPluginScript(MonitoringPlugin):
for slave_interface in slave_interfaces:
if slave_interface[1] == 'up':
slave_interface_up_count += 1
if slave_interface_up_count < 2:
if slave_interface_up_count < len(slave_interfaces):
messages.append(f"{dev} DEGRADED with {slave_interface_up_count} active slaves")
health_delta += 10
else:

View File

@ -49,7 +49,7 @@ import re
import json
# Daemon version
version = "0.9.74"
version = "0.9.78"
##########################################################
@ -324,9 +324,14 @@ def entrypoint():
config["ipmi_hostname"], config["ipmi_username"], config["ipmi_password"]
):
logger.out(
"Our IPMI is not reachable; fencing of this node will likely fail",
"Our IPMI interface is not reachable; fencing of this node will fail until corrected",
state="w",
)
else:
logger.out(
"Our IPMI interface is reachable; fencing of this node is possible",
state="o",
)
# Validate libvirt
if not pvcnoded.util.libvirt.validate_libvirtd(logger, config):

View File

@ -153,7 +153,13 @@ def migrateFromFencedNode(zkhandler, node_name, config, logger):
# Loop through the VMs
for dom_uuid in dead_node_running_domains:
fence_migrate_vm(dom_uuid)
try:
fence_migrate_vm(dom_uuid)
except Exception as e:
logger.out(
f"Failed to migrate VM {dom_uuid}, continuing: {e}",
state="w",
)
# Set node in flushed state for easy remigrating when it comes back
zkhandler.write([(("node.state.domain", node_name), "flushed")])

View File

@ -77,5 +77,5 @@ def start_system_services(logger, config):
start_ceph_mon(logger, config)
start_ceph_mgr(logger, config)
logger.out("Waiting 3 seconds for daemons to start", state="s")
sleep(3)
logger.out("Waiting 10 seconds for daemons to start", state="s")
sleep(10)