Compare commits
42 Commits
35e27f79ef
...
v0.9.79
Author | SHA1 | Date | |
---|---|---|---|
221af3f241 | |||
35f80e544c | |||
83b937654c | |||
714bde89e6 | |||
c87736eb0a | |||
63d0a85e29 | |||
43e8cd3b07 | |||
55ca131c2c | |||
0769f1ea52 | |||
c858ae8fed | |||
8d256a1737 | |||
d3b3fdfc80 | |||
f1b29ea94e | |||
38abd078af | |||
fabb97cf48 | |||
50aabde320 | |||
68124db323 | |||
8921efd269 | |||
3e259bd926 | |||
3d12915989 | |||
67b0b19bca | |||
5d0c674d1d | |||
f3bc4dee04 | |||
f441b0d823 | |||
fd2331faa6 | |||
a5d0f219e4 | |||
0169510df0 | |||
a58c1d5a8c | |||
a8e4b01b67 | |||
45c4c86911 | |||
6448b31d2c | |||
4fc9b15652 | |||
75b839692b | |||
751cfe0b29 | |||
b997c6f31e | |||
6e83300d78 | |||
522da3fd95 | |||
3a1bf0724e | |||
ee494fb1c0 | |||
c6c44bf775 | |||
bbb940da65 | |||
a0b45a2bcd |
15
CHANGELOG.md
15
CHANGELOG.md
@ -1,5 +1,20 @@
|
||||
## PVC Changelog
|
||||
|
||||
###### [v0.9.79](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.79)
|
||||
|
||||
**API Changes**: New endpoints /vm/{vm}/backup, /vm/{vm}/restore
|
||||
|
||||
* [CLI Client] Fixes some storage pool help text messages
|
||||
* [Node Daemon] Increases the IPMI monitoring plugin timeout
|
||||
* [All] Adds support for VM backups, including creation, removal, and restore
|
||||
* [Repository] Fixes shebangs in scripts to be consistent
|
||||
* [Daemon Library] Improves the handling of VM list arguments (default None)
|
||||
|
||||
###### [v0.9.78](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.78)
|
||||
|
||||
* [API, Client CLI] Fixes several bugs around image uploads; adds a new query parameter for non-raw images
|
||||
* [API] Ensures RBD images are created with a raw bytes value to avoid rounding errors
|
||||
|
||||
###### [v0.9.77](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.77)
|
||||
|
||||
* [Client CLI] Fixes a bug from a bad library import
|
||||
|
@ -27,7 +27,7 @@ from ssl import SSLContext, TLSVersion
|
||||
from distutils.util import strtobool as dustrtobool
|
||||
|
||||
# Daemon version
|
||||
version = "0.9.77"
|
||||
version = "0.9.79"
|
||||
|
||||
# API version
|
||||
API_VERSION = 1.0
|
||||
|
@ -2140,7 +2140,7 @@ class API_VM_Locks(Resource):
|
||||
api.add_resource(API_VM_Locks, "/vm/<vm>/locks")
|
||||
|
||||
|
||||
# /vm/<vm</console
|
||||
# /vm/<vm>/console
|
||||
class API_VM_Console(Resource):
|
||||
@RequestParser([{"name": "lines"}])
|
||||
@Authenticator
|
||||
@ -2293,6 +2293,202 @@ class API_VM_Device(Resource):
|
||||
api.add_resource(API_VM_Device, "/vm/<vm>/device")
|
||||
|
||||
|
||||
# /vm/<vm>/backup
|
||||
class API_VM_Backup(Resource):
|
||||
@RequestParser(
|
||||
[
|
||||
{
|
||||
"name": "backup_path",
|
||||
"required": True,
|
||||
"helptext": "A local filesystem path on the primary coordinator must be specified",
|
||||
},
|
||||
{
|
||||
"name": "incremental_parent",
|
||||
"required": False,
|
||||
},
|
||||
{
|
||||
"name": "retain_snapshot",
|
||||
"required": False,
|
||||
},
|
||||
]
|
||||
)
|
||||
@Authenticator
|
||||
def post(self, vm, reqargs):
|
||||
"""
|
||||
Create a backup of {vm} and its volumes to a local primary coordinator filesystem path
|
||||
---
|
||||
tags:
|
||||
- vm
|
||||
parameters:
|
||||
- in: query
|
||||
name: backup_path
|
||||
type: string
|
||||
required: true
|
||||
description: A local filesystem path on the primary coordinator to store the backup
|
||||
- in: query
|
||||
name: incremental_parent
|
||||
type: string
|
||||
required: false
|
||||
description: A previous backup datestamp to use as an incremental parent; if unspecified a full backup is taken
|
||||
- in: query
|
||||
name: retain_snapshot
|
||||
type: boolean
|
||||
required: false
|
||||
default: false
|
||||
description: Whether or not to retain this backup's volume snapshots to use as a future incremental parent; full backups only
|
||||
responses:
|
||||
200:
|
||||
description: OK
|
||||
schema:
|
||||
type: object
|
||||
id: Message
|
||||
400:
|
||||
description: Execution error
|
||||
schema:
|
||||
type: object
|
||||
id: Message
|
||||
404:
|
||||
description: Not found
|
||||
schema:
|
||||
type: object
|
||||
id: Message
|
||||
"""
|
||||
backup_path = reqargs.get("backup_path", None)
|
||||
incremental_parent = reqargs.get("incremental_parent", None)
|
||||
retain_snapshot = bool(strtobool(reqargs.get("retain_snapshot", "false")))
|
||||
return api_helper.vm_backup(
|
||||
vm, backup_path, incremental_parent, retain_snapshot
|
||||
)
|
||||
|
||||
@RequestParser(
|
||||
[
|
||||
{
|
||||
"name": "backup_path",
|
||||
"required": True,
|
||||
"helptext": "A local filesystem path on the primary coordinator must be specified",
|
||||
},
|
||||
{
|
||||
"name": "backup_datestring",
|
||||
"required": True,
|
||||
"helptext": "A backup datestring must be specified",
|
||||
},
|
||||
]
|
||||
)
|
||||
@Authenticator
|
||||
def delete(self, vm, reqargs):
|
||||
"""
|
||||
Remove a backup of {vm}, including snapshots, from a local primary coordinator filesystem path
|
||||
---
|
||||
tags:
|
||||
- vm
|
||||
parameters:
|
||||
- in: query
|
||||
name: backup_path
|
||||
type: string
|
||||
required: true
|
||||
description: A local filesystem path on the primary coordinator where the backup is stored
|
||||
- in: query
|
||||
name: backup_datestring
|
||||
type: string
|
||||
required: true
|
||||
description: The backup datestring identifier (e.g. 20230102030405)
|
||||
responses:
|
||||
200:
|
||||
description: OK
|
||||
schema:
|
||||
type: object
|
||||
id: Message
|
||||
400:
|
||||
description: Execution error
|
||||
schema:
|
||||
type: object
|
||||
id: Message
|
||||
404:
|
||||
description: Not found
|
||||
schema:
|
||||
type: object
|
||||
id: Message
|
||||
"""
|
||||
backup_path = reqargs.get("backup_path", None)
|
||||
backup_datestring = reqargs.get("backup_datestring", None)
|
||||
return api_helper.vm_remove_backup(vm, backup_path, backup_datestring)
|
||||
|
||||
|
||||
api.add_resource(API_VM_Backup, "/vm/<vm>/backup")
|
||||
|
||||
|
||||
# /vm/<vm>/restore
|
||||
class API_VM_Restore(Resource):
|
||||
@RequestParser(
|
||||
[
|
||||
{
|
||||
"name": "backup_path",
|
||||
"required": True,
|
||||
"helptext": "A local filesystem path on the primary coordinator must be specified",
|
||||
},
|
||||
{
|
||||
"name": "backup_datestring",
|
||||
"required": True,
|
||||
"helptext": "A backup datestring must be specified",
|
||||
},
|
||||
{
|
||||
"name": "retain_snapshot",
|
||||
"required": False,
|
||||
},
|
||||
]
|
||||
)
|
||||
@Authenticator
|
||||
def post(self, vm, reqargs):
|
||||
"""
|
||||
Restore a backup of {vm} and its volumes from a local primary coordinator filesystem path
|
||||
---
|
||||
tags:
|
||||
- vm
|
||||
parameters:
|
||||
- in: query
|
||||
name: backup_path
|
||||
type: string
|
||||
required: true
|
||||
description: A local filesystem path on the primary coordinator where the backup is stored
|
||||
- in: query
|
||||
name: backup_datestring
|
||||
type: string
|
||||
required: true
|
||||
description: The backup datestring identifier (e.g. 20230102030405)
|
||||
- in: query
|
||||
name: retain_snapshot
|
||||
type: boolean
|
||||
required: false
|
||||
default: true
|
||||
description: Whether or not to retain the (parent, if incremental) volume snapshot after restore
|
||||
responses:
|
||||
200:
|
||||
description: OK
|
||||
schema:
|
||||
type: object
|
||||
id: Message
|
||||
400:
|
||||
description: Execution error
|
||||
schema:
|
||||
type: object
|
||||
id: Message
|
||||
404:
|
||||
description: Not found
|
||||
schema:
|
||||
type: object
|
||||
id: Message
|
||||
"""
|
||||
backup_path = reqargs.get("backup_path", None)
|
||||
backup_datestring = reqargs.get("backup_datestring", None)
|
||||
retain_snapshot = bool(strtobool(reqargs.get("retain_snapshot", "true")))
|
||||
return api_helper.vm_restore(
|
||||
vm, backup_path, backup_datestring, retain_snapshot
|
||||
)
|
||||
|
||||
|
||||
api.add_resource(API_VM_Restore, "/vm/<vm>/restore")
|
||||
|
||||
|
||||
##########################################################
|
||||
# Client API - Network
|
||||
##########################################################
|
||||
@ -4843,7 +5039,7 @@ class API_Storage_Ceph_Volume_Root(Resource):
|
||||
{
|
||||
"name": "size",
|
||||
"required": True,
|
||||
"helptext": "A volume size in bytes (or with k/M/G/T suffix) must be specified.",
|
||||
"helptext": "A volume size in bytes (B implied or with SI suffix k/M/G/T) must be specified.",
|
||||
},
|
||||
]
|
||||
)
|
||||
@ -4869,7 +5065,7 @@ class API_Storage_Ceph_Volume_Root(Resource):
|
||||
name: size
|
||||
type: string
|
||||
required: true
|
||||
description: The volume size in bytes (or with a metric suffix, i.e. k/M/G/T)
|
||||
description: The volume size, in bytes (B implied) or with a single-character SI suffix (k/M/G/T)
|
||||
responses:
|
||||
200:
|
||||
description: OK
|
||||
@ -5122,7 +5318,7 @@ class API_Storage_Ceph_Volume_Element_Upload(Resource):
|
||||
name: file_size
|
||||
type: integer
|
||||
required: false
|
||||
description: The size of the image file, if {image_format} is not "raw"
|
||||
description: The size of the image file, in bytes, if {image_format} is not "raw"
|
||||
responses:
|
||||
200:
|
||||
description: OK
|
||||
|
@ -470,6 +470,88 @@ def vm_define(
|
||||
return output, retcode
|
||||
|
||||
|
||||
@ZKConnection(config)
|
||||
def vm_backup(
|
||||
zkhandler,
|
||||
domain,
|
||||
backup_path,
|
||||
incremental_parent=None,
|
||||
retain_snapshot=False,
|
||||
):
|
||||
"""
|
||||
Back up a VM to a local (primary coordinator) filesystem path.
|
||||
"""
|
||||
retflag, retdata = pvc_vm.backup_vm(
|
||||
zkhandler,
|
||||
domain,
|
||||
backup_path,
|
||||
incremental_parent,
|
||||
retain_snapshot,
|
||||
)
|
||||
|
||||
if retflag:
|
||||
retcode = 200
|
||||
else:
|
||||
retcode = 400
|
||||
|
||||
output = {"message": retdata.replace('"', "'")}
|
||||
return output, retcode
|
||||
|
||||
|
||||
@ZKConnection(config)
|
||||
def vm_remove_backup(
|
||||
zkhandler,
|
||||
domain,
|
||||
source_path,
|
||||
datestring,
|
||||
):
|
||||
"""
|
||||
Remove a VM backup from snapshots and a local (primary coordinator) filesystem path.
|
||||
"""
|
||||
retflag, retdata = pvc_vm.remove_backup(
|
||||
zkhandler,
|
||||
domain,
|
||||
source_path,
|
||||
datestring,
|
||||
)
|
||||
|
||||
if retflag:
|
||||
retcode = 200
|
||||
else:
|
||||
retcode = 400
|
||||
|
||||
output = {"message": retdata.replace('"', "'")}
|
||||
return output, retcode
|
||||
|
||||
|
||||
@ZKConnection(config)
|
||||
def vm_restore(
|
||||
zkhandler,
|
||||
domain,
|
||||
backup_path,
|
||||
datestring,
|
||||
retain_snapshot=False,
|
||||
):
|
||||
"""
|
||||
Restore a VM from a local (primary coordinator) filesystem path.
|
||||
"""
|
||||
retflag, retdata = pvc_vm.restore_vm(
|
||||
zkhandler,
|
||||
domain,
|
||||
backup_path,
|
||||
datestring,
|
||||
retain_snapshot,
|
||||
)
|
||||
|
||||
if retflag:
|
||||
retcode = 200
|
||||
else:
|
||||
retcode = 400
|
||||
|
||||
output = {"message": retdata.replace('"', "'")}
|
||||
return output, retcode
|
||||
|
||||
|
||||
@ZKConnection(config)
|
||||
def vm_attach_device(zkhandler, vm, device_spec_xml):
|
||||
"""
|
||||
@ -1629,7 +1711,6 @@ def ceph_volume_upload(zkhandler, pool, volume, img_type, file_size=None):
|
||||
zkhandler, pool, "{}_tmp".format(volume)
|
||||
)
|
||||
|
||||
# Create a temporary block device to store non-raw images
|
||||
if img_type == "raw":
|
||||
if file_size != dev_size:
|
||||
output = {
|
||||
@ -1676,7 +1757,6 @@ def ceph_volume_upload(zkhandler, pool, volume, img_type, file_size=None):
|
||||
cleanup_maps_and_volumes()
|
||||
return output, retcode
|
||||
|
||||
# Write the image directly to the blockdev
|
||||
else:
|
||||
if file_size is None:
|
||||
output = {"message": "A file size must be specified"}
|
||||
|
@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# A useful script for testing out changes to PVC by building the debs and deploying them out to a
|
||||
# set of hosts automatically, including restarting the daemon (with a pause between) on the remote
|
||||
@ -36,34 +36,37 @@ echo "Preparing code (format and lint)..."
|
||||
./lint || exit 1
|
||||
|
||||
# Build the packages
|
||||
echo -n "Building packages... "
|
||||
echo -n "Building packages..."
|
||||
version="$( ./build-unstable-deb.sh 2>/dev/null )"
|
||||
echo "done. Package version ${version}."
|
||||
echo " done. Package version ${version}."
|
||||
|
||||
# Install the client(s) locally
|
||||
echo -n "Installing client packages locally... "
|
||||
echo -n "Installing client packages locally..."
|
||||
$SUDO dpkg -i ../pvc-client*_${version}*.deb &>/dev/null
|
||||
echo "done".
|
||||
echo " done".
|
||||
|
||||
for HOST in ${HOSTS[@]}; do
|
||||
echo "> Deploying packages to host ${HOST}"
|
||||
echo -n "Copying packages... "
|
||||
echo -n "Copying packages..."
|
||||
ssh $HOST $SUDO rm -rf /tmp/pvc &>/dev/null
|
||||
ssh $HOST mkdir /tmp/pvc &>/dev/null
|
||||
scp ../pvc-*_${version}*.deb $HOST:/tmp/pvc/ &>/dev/null
|
||||
echo "done."
|
||||
echo -n "Installing packages... "
|
||||
echo " done."
|
||||
echo -n "Installing packages..."
|
||||
ssh $HOST $SUDO dpkg -i /tmp/pvc/{pvc-client-cli,pvc-daemon-common,pvc-daemon-api,pvc-daemon-node}*.deb &>/dev/null
|
||||
ssh $HOST rm -rf /tmp/pvc &>/dev/null
|
||||
echo "done."
|
||||
echo -n "Restarting PVC daemons... "
|
||||
echo " done."
|
||||
echo -n "Restarting PVC daemons..."
|
||||
ssh $HOST $SUDO systemctl restart pvcapid &>/dev/null
|
||||
ssh $HOST $SUDO systemctl restart pvcapid-worker &>/dev/null
|
||||
ssh $HOST $SUDO systemctl restart pvcnoded &>/dev/null
|
||||
echo "done."
|
||||
echo -n "Waiting 30s for host to stabilize... "
|
||||
sleep 30
|
||||
echo "done."
|
||||
echo " done."
|
||||
echo -n "Waiting for node daemon to be running..."
|
||||
while [[ $( ssh $HOST "pvc -q node list -f json ${HOST%%.*} | jq -r '.[].daemon_state'" ) != "run" ]]; do
|
||||
sleep 5
|
||||
echo -n "."
|
||||
done
|
||||
echo " done."
|
||||
done
|
||||
if [[ -z ${KEEP_ARTIFACTS} ]]; then
|
||||
rm ../pvc*_${version}*
|
||||
|
@ -1,4 +1,4 @@
|
||||
#!/bin/sh
|
||||
#!/usr/bin/env bash
|
||||
pushd $( git rev-parse --show-toplevel ) &>/dev/null
|
||||
ver="$( head -1 debian/changelog | awk -F'[()-]' '{ print $2 }' )"
|
||||
git pull
|
||||
|
@ -1,4 +1,4 @@
|
||||
#!/bin/sh
|
||||
#!/usr/bin/env bash
|
||||
set -o xtrace
|
||||
exec 3>&1
|
||||
exec 1>&2
|
||||
|
@ -1590,6 +1590,149 @@ def cli_vm_flush_locks(domain):
|
||||
finish(retcode, retmsg)
|
||||
|
||||
|
||||
###############################################################################
|
||||
# > pvc vm backup
|
||||
###############################################################################
|
||||
@click.group(
|
||||
name="backup",
|
||||
short_help="Manage backups for PVC VMs.",
|
||||
context_settings=CONTEXT_SETTINGS,
|
||||
)
|
||||
def cli_vm_backup():
|
||||
"""
|
||||
Manage backups of VMs in a PVC cluster.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
###############################################################################
|
||||
# > pvc vm backup create
|
||||
###############################################################################
|
||||
@click.command(name="create", short_help="Create a backup of a virtual machine.")
|
||||
@connection_req
|
||||
@click.argument("domain")
|
||||
@click.argument("backup_path")
|
||||
@click.option(
|
||||
"-i",
|
||||
"--incremental",
|
||||
"incremental_parent",
|
||||
default=None,
|
||||
help="Perform an incremental volume backup from this parent backup datestring.",
|
||||
)
|
||||
@click.option(
|
||||
"-r",
|
||||
"--retain-snapshot",
|
||||
"retain_snapshot",
|
||||
is_flag=True,
|
||||
default=False,
|
||||
help="Retain volume snapshot for future incremental use (full only).",
|
||||
)
|
||||
def cli_vm_backup_create(domain, backup_path, incremental_parent, retain_snapshot):
|
||||
"""
|
||||
Create a backup of virtual machine DOMAIN to BACKUP_PATH on the cluster primary coordinator. DOMAIN may be a UUID or name.
|
||||
|
||||
BACKUP_PATH must be a valid absolute directory path on the cluster "primary" coordinator (see "pvc node list") allowing writes from the API daemon (normally running as "root"). The BACKUP_PATH should be a large storage volume, ideally a remotely mounted filesystem (e.g. NFS, SSHFS, etc.) or non-Ceph-backed disk; PVC does not handle this path, that is up to the administrator to configure and manage.
|
||||
|
||||
The backup will export the VM configuration, metainfo, and a point-in-time snapshot of all attached RBD volumes, using a datestring formatted backup name (i.e. YYYYMMDDHHMMSS).
|
||||
|
||||
The virtual machine DOMAIN may be running, and due to snapshots the backup should be crash-consistent, but will be in an unclean state and this must be considered when restoring from backups.
|
||||
|
||||
Incremental snapshots are possible by specifying the "-i"/"--incremental" option along with a source backup datestring. The snapshots from that source backup must have been retained using the "-r"/"--retain-snapshots" option. Retaining snapshots of incremental backups is not supported as incremental backups cannot be chained.
|
||||
|
||||
Full backup volume images are sparse-allocated, however it is recommended for safety to consider their maximum allocated size when allocated space for the BACKUP_PATH. Incremental volume images are generally small but are dependent entirely on the rate of data change in each volume.
|
||||
"""
|
||||
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"Backing up VM '{domain}'... ",
|
||||
newline=False,
|
||||
)
|
||||
retcode, retmsg = pvc.lib.vm.vm_backup(
|
||||
CLI_CONFIG, domain, backup_path, incremental_parent, retain_snapshot
|
||||
)
|
||||
if retcode:
|
||||
echo(CLI_CONFIG, "done.")
|
||||
else:
|
||||
echo(CLI_CONFIG, "failed.")
|
||||
finish(retcode, retmsg)
|
||||
|
||||
|
||||
###############################################################################
|
||||
# > pvc vm backup restore
|
||||
###############################################################################
|
||||
@click.command(name="restore", short_help="Restore a backup of a virtual machine.")
|
||||
@connection_req
|
||||
@click.argument("domain")
|
||||
@click.argument("backup_datestring")
|
||||
@click.argument("backup_path")
|
||||
@click.option(
|
||||
"-r/-R",
|
||||
"--retain-snapshot/--remove-snapshot",
|
||||
"retain_snapshot",
|
||||
is_flag=True,
|
||||
default=True,
|
||||
help="Retain or remove restored (parent, if incremental) snapshot.",
|
||||
)
|
||||
def cli_vm_backup_restore(domain, backup_datestring, backup_path, retain_snapshot):
|
||||
"""
|
||||
Restore the backup BACKUP_DATESTRING of virtual machine DOMAIN stored in BACKUP_PATH on the cluster primary coordinator. DOMAIN may be a UUID or name.
|
||||
|
||||
BACKUP_PATH must be a valid absolute directory path on the cluster "primary" coordinator (see "pvc node list") allowing reads from the API daemon (normally running as "root"). The BACKUP_PATH should be a large storage volume, ideally a remotely mounted filesystem (e.g. NFS, SSHFS, etc.) or non-Ceph-backed disk; PVC does not handle this path, that is up to the administrator to configure and manage.
|
||||
|
||||
The restore will import the VM configuration, metainfo, and the point-in-time snapshot of all attached RBD volumes. Incremental backups will be automatically handled.
|
||||
|
||||
A VM named DOMAIN or with the same UUID must not exist; if a VM with the same name or UUID already exists, it must be removed, or renamed and then undefined (to preserve volumes), before restoring.
|
||||
|
||||
If the "-r"/"--retain-snapshot" option is specified (the default), for incremental restores, only the parent snapshot is kept; for full restores, the restored snapshot is kept. If the "-R"/"--remove-snapshot" option is specified, the imported snapshot is removed.
|
||||
|
||||
WARNING: The "-R"/"--remove-snapshot" option will invalidate any existing incremental backups based on the same incremental parent for the restored VM.
|
||||
"""
|
||||
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"Restoring backup {backup_datestring} of VM '{domain}'... ",
|
||||
newline=False,
|
||||
)
|
||||
retcode, retmsg = pvc.lib.vm.vm_restore(
|
||||
CLI_CONFIG, domain, backup_path, backup_datestring, retain_snapshot
|
||||
)
|
||||
if retcode:
|
||||
echo(CLI_CONFIG, "done.")
|
||||
else:
|
||||
echo(CLI_CONFIG, "failed.")
|
||||
finish(retcode, retmsg)
|
||||
|
||||
|
||||
###############################################################################
|
||||
# > pvc vm backup remove
|
||||
###############################################################################
|
||||
@click.command(name="remove", short_help="Remove a backup of a virtual machine.")
|
||||
@connection_req
|
||||
@click.argument("domain")
|
||||
@click.argument("backup_datestring")
|
||||
@click.argument("backup_path")
|
||||
def cli_vm_backup_remove(domain, backup_datestring, backup_path):
|
||||
"""
|
||||
Remove the backup BACKUP_DATESTRING, including snapshots, of virtual machine DOMAIN stored in BACKUP_PATH on the cluster primary coordinator. DOMAIN may be a UUID or name.
|
||||
|
||||
WARNING: Removing an incremental parent will invalidate any existing incremental backups based on that backup.
|
||||
"""
|
||||
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"Removing backup {backup_datestring} of VM '{domain}'... ",
|
||||
newline=False,
|
||||
)
|
||||
retcode, retmsg = pvc.lib.vm.vm_remove_backup(
|
||||
CLI_CONFIG, domain, backup_path, backup_datestring
|
||||
)
|
||||
if retcode:
|
||||
echo(CLI_CONFIG, "done.")
|
||||
else:
|
||||
echo(CLI_CONFIG, "failed.")
|
||||
finish(retcode, retmsg)
|
||||
|
||||
|
||||
###############################################################################
|
||||
# > pvc vm tag
|
||||
###############################################################################
|
||||
@ -3457,14 +3600,14 @@ def cli_storage_pool():
|
||||
show_default=True,
|
||||
required=False,
|
||||
help="""
|
||||
The replication configuration, specifying both a "copies" and "mincopies" value, separated by a comma, e.g. "copies=3,mincopies=2". The "copies" value specifies the total number of replicas and should not exceed the total number of nodes; the "mincopies" value specifies the minimum number of available copies to allow writes. For additional details please see the Cluster Architecture documentation.
|
||||
The replication configuration, specifying both a "copies" and "mincopies" value, separated by a comma, e.g. "copies=3,mincopies=2". The "copies" value specifies the total number of replicas and the "mincopies" value specifies the minimum number of active replicas to allow I/O. For additional details please see the documentation.
|
||||
""",
|
||||
)
|
||||
def cli_storage_pool_add(name, pgs, tier, replcfg):
|
||||
"""
|
||||
Add a new Ceph RBD pool with name NAME and PGS placement groups.
|
||||
|
||||
The placement group count must be a non-zero power of 2.
|
||||
The placement group count must be a non-zero power of 2. Generally you should choose a PGS number such that there will be 50-150 PGs on each OSD in a single node (before replicas); 64, 128, or 256 are good values for small clusters (1-5 OSDs per node); higher values are recommended for higher node or OSD counts. For additional details please see the documentation.
|
||||
"""
|
||||
|
||||
retcode, retmsg = pvc.lib.storage.ceph_pool_add(
|
||||
@ -3503,9 +3646,9 @@ def cli_storage_pool_set_pgs(name, pgs):
|
||||
"""
|
||||
Set the placement groups (PGs) count for the pool NAME to PGS.
|
||||
|
||||
The placement group count must be a non-zero power of 2.
|
||||
The placement group count must be a non-zero power of 2. Generally you should choose a PGS number such that there will be 50-150 PGs on each OSD in a single node (before replicas); 64, 128, or 256 are good values for small clusters (1-5 OSDs per node); higher values are recommended for higher node or OSD counts. For additional details please see the documentation.
|
||||
|
||||
Placement group counts may be increased or decreased as required though frequent alteration is not recommended.
|
||||
Placement group counts may be increased or decreased as required though frequent alteration is not recommended. Placement group alterations are intensive operations on the storage cluster.
|
||||
"""
|
||||
|
||||
retcode, retmsg = pvc.lib.storage.ceph_pool_set_pgs(CLI_CONFIG, name, pgs)
|
||||
@ -5659,6 +5802,10 @@ cli_vm.add_command(cli_vm_move)
|
||||
cli_vm.add_command(cli_vm_migrate)
|
||||
cli_vm.add_command(cli_vm_unmigrate)
|
||||
cli_vm.add_command(cli_vm_flush_locks)
|
||||
cli_vm_backup.add_command(cli_vm_backup_create)
|
||||
cli_vm_backup.add_command(cli_vm_backup_restore)
|
||||
cli_vm_backup.add_command(cli_vm_backup_remove)
|
||||
cli_vm.add_command(cli_vm_backup)
|
||||
cli_vm_tag.add_command(cli_vm_tag_get)
|
||||
cli_vm_tag.add_command(cli_vm_tag_add)
|
||||
cli_vm_tag.add_command(cli_vm_tag_remove)
|
||||
|
@ -433,6 +433,70 @@ def vm_locks(config, vm):
|
||||
return retstatus, response.json().get("message", "")
|
||||
|
||||
|
||||
def vm_backup(config, vm, backup_path, incremental_parent=None, retain_snapshot=False):
|
||||
"""
|
||||
Create a backup of {vm} and its volumes to a local primary coordinator filesystem path
|
||||
|
||||
API endpoint: POST /vm/{vm}/backup
|
||||
API arguments: backup_path={backup_path}, incremental_parent={incremental_parent}, retain_snapshot={retain_snapshot}
|
||||
API schema: {"message":"{data}"}
|
||||
"""
|
||||
params = {
|
||||
"backup_path": backup_path,
|
||||
"incremental_parent": incremental_parent,
|
||||
"retain_snapshot": retain_snapshot,
|
||||
}
|
||||
response = call_api(config, "post", "/vm/{vm}/backup".format(vm=vm), params=params)
|
||||
|
||||
if response.status_code != 200:
|
||||
return False, response.json().get("message", "")
|
||||
else:
|
||||
return True, response.json().get("message", "")
|
||||
|
||||
|
||||
def vm_remove_backup(config, vm, backup_path, backup_datestring):
|
||||
"""
|
||||
Remove a backup of {vm}, including snapshots, from a local primary coordinator filesystem path
|
||||
|
||||
API endpoint: DELETE /vm/{vm}/backup
|
||||
API arguments: backup_path={backup_path}, backup_datestring={backup_datestring}
|
||||
API schema: {"message":"{data}"}
|
||||
"""
|
||||
params = {
|
||||
"backup_path": backup_path,
|
||||
"backup_datestring": backup_datestring,
|
||||
}
|
||||
response = call_api(
|
||||
config, "delete", "/vm/{vm}/backup".format(vm=vm), params=params
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
return False, response.json().get("message", "")
|
||||
else:
|
||||
return True, response.json().get("message", "")
|
||||
|
||||
|
||||
def vm_restore(config, vm, backup_path, backup_datestring, retain_snapshot=False):
|
||||
"""
|
||||
Restore a backup of {vm} and its volumes from a local primary coordinator filesystem path
|
||||
|
||||
API endpoint: POST /vm/{vm}/restore
|
||||
API arguments: backup_path={backup_path}, backup_datestring={backup_datestring}, retain_snapshot={retain_snapshot}
|
||||
API schema: {"message":"{data}"}
|
||||
"""
|
||||
params = {
|
||||
"backup_path": backup_path,
|
||||
"backup_datestring": backup_datestring,
|
||||
"retain_snapshot": retain_snapshot,
|
||||
}
|
||||
response = call_api(config, "post", "/vm/{vm}/restore".format(vm=vm), params=params)
|
||||
|
||||
if response.status_code != 200:
|
||||
return False, response.json().get("message", "")
|
||||
else:
|
||||
return True, response.json().get("message", "")
|
||||
|
||||
|
||||
def vm_vcpus_set(config, vm, vcpus, topology, restart):
|
||||
"""
|
||||
Set the vCPU count of the VM with topology
|
||||
|
@ -2,7 +2,7 @@ from setuptools import setup
|
||||
|
||||
setup(
|
||||
name="pvc",
|
||||
version="0.9.77",
|
||||
version="0.9.79",
|
||||
packages=["pvc.cli", "pvc.lib"],
|
||||
install_requires=[
|
||||
"Click",
|
||||
|
@ -763,9 +763,7 @@ def add_volume(zkhandler, pool, name, size):
|
||||
|
||||
# 2. Create the volume
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
"rbd create --size {} {}/{}".format(
|
||||
format_bytes_tohuman(size_bytes), pool, name
|
||||
)
|
||||
"rbd create --size {}B {}/{}".format(size_bytes, pool, name)
|
||||
)
|
||||
if retcode:
|
||||
return False, 'ERROR: Failed to create RBD volume "{}": {}'.format(name, stderr)
|
||||
@ -1115,23 +1113,24 @@ def getCephSnapshots(zkhandler, pool, volume):
|
||||
return snapshot_list
|
||||
|
||||
|
||||
def add_snapshot(zkhandler, pool, volume, name):
|
||||
def add_snapshot(zkhandler, pool, volume, name, zk_only=False):
|
||||
if not verifyVolume(zkhandler, pool, volume):
|
||||
return False, 'ERROR: No volume with name "{}" is present in pool "{}".'.format(
|
||||
volume, pool
|
||||
)
|
||||
|
||||
# 1. Create the snapshot
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
"rbd snap create {}/{}@{}".format(pool, volume, name)
|
||||
)
|
||||
if retcode:
|
||||
return (
|
||||
False,
|
||||
'ERROR: Failed to create RBD snapshot "{}" of volume "{}" in pool "{}": {}'.format(
|
||||
name, volume, pool, stderr
|
||||
),
|
||||
if not zk_only:
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
"rbd snap create {}/{}@{}".format(pool, volume, name)
|
||||
)
|
||||
if retcode:
|
||||
return (
|
||||
False,
|
||||
'ERROR: Failed to create RBD snapshot "{}" of volume "{}" in pool "{}": {}'.format(
|
||||
name, volume, pool, stderr
|
||||
),
|
||||
)
|
||||
|
||||
# 2. Add the snapshot to Zookeeper
|
||||
zkhandler.write(
|
||||
|
@ -146,7 +146,11 @@ def run_os_daemon(command_string, environment=None, logfile=None):
|
||||
# Run a local OS command via shell
|
||||
#
|
||||
def run_os_command(command_string, background=False, environment=None, timeout=None):
|
||||
command = shlex_split(command_string)
|
||||
if not isinstance(command_string, list):
|
||||
command = shlex_split(command_string)
|
||||
else:
|
||||
command = command_string
|
||||
|
||||
if background:
|
||||
|
||||
def runcmd():
|
||||
|
@ -21,12 +21,18 @@
|
||||
|
||||
import time
|
||||
import re
|
||||
import os.path
|
||||
import lxml.objectify
|
||||
import lxml.etree
|
||||
|
||||
from distutils.util import strtobool
|
||||
from uuid import UUID
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from datetime import datetime
|
||||
from distutils.util import strtobool
|
||||
from json import dump as jdump
|
||||
from json import load as jload
|
||||
from shutil import rmtree
|
||||
from socket import gethostname
|
||||
from uuid import UUID
|
||||
|
||||
import daemon_lib.common as common
|
||||
|
||||
@ -1175,13 +1181,15 @@ def get_info(zkhandler, domain):
|
||||
return True, domain_information
|
||||
|
||||
|
||||
def get_list(zkhandler, node, state, tag, limit, is_fuzzy=True, negate=False):
|
||||
if node:
|
||||
def get_list(
|
||||
zkhandler, node=None, state=None, tag=None, limit=None, is_fuzzy=True, negate=False
|
||||
):
|
||||
if node is not None:
|
||||
# Verify node is valid
|
||||
if not common.verifyNode(zkhandler, node):
|
||||
return False, 'Specified node "{}" is invalid.'.format(node)
|
||||
|
||||
if state:
|
||||
if state is not None:
|
||||
valid_states = [
|
||||
"start",
|
||||
"restart",
|
||||
@ -1200,7 +1208,7 @@ def get_list(zkhandler, node, state, tag, limit, is_fuzzy=True, negate=False):
|
||||
full_vm_list.sort()
|
||||
|
||||
# Set our limit to a sensible regex
|
||||
if limit:
|
||||
if limit is not None:
|
||||
# Check if the limit is a UUID
|
||||
is_limit_uuid = False
|
||||
try:
|
||||
@ -1229,7 +1237,7 @@ def get_list(zkhandler, node, state, tag, limit, is_fuzzy=True, negate=False):
|
||||
is_state_match = False
|
||||
|
||||
# Check on limit
|
||||
if limit:
|
||||
if limit is not None:
|
||||
# Try to match the limit against the UUID (if applicable) and name
|
||||
try:
|
||||
if is_limit_uuid and re.fullmatch(limit, vm):
|
||||
@ -1241,7 +1249,7 @@ def get_list(zkhandler, node, state, tag, limit, is_fuzzy=True, negate=False):
|
||||
else:
|
||||
is_limit_match = True
|
||||
|
||||
if tag:
|
||||
if tag is not None:
|
||||
vm_tags = zkhandler.children(("domain.meta.tags", vm))
|
||||
if negate and tag not in vm_tags:
|
||||
is_tag_match = True
|
||||
@ -1251,7 +1259,7 @@ def get_list(zkhandler, node, state, tag, limit, is_fuzzy=True, negate=False):
|
||||
is_tag_match = True
|
||||
|
||||
# Check on node
|
||||
if node:
|
||||
if node is not None:
|
||||
vm_node = zkhandler.read(("domain.node", vm))
|
||||
if negate and vm_node != node:
|
||||
is_node_match = True
|
||||
@ -1261,7 +1269,7 @@ def get_list(zkhandler, node, state, tag, limit, is_fuzzy=True, negate=False):
|
||||
is_node_match = True
|
||||
|
||||
# Check on state
|
||||
if state:
|
||||
if state is not None:
|
||||
vm_state = zkhandler.read(("domain.state", vm))
|
||||
if negate and vm_state != state:
|
||||
is_state_match = True
|
||||
@ -1297,3 +1305,541 @@ def get_list(zkhandler, node, state, tag, limit, is_fuzzy=True, negate=False):
|
||||
pass
|
||||
|
||||
return True, sorted(vm_data_list, key=lambda d: d["name"])
|
||||
|
||||
|
||||
def backup_vm(
|
||||
zkhandler, domain, backup_path, incremental_parent=None, retain_snapshot=False
|
||||
):
|
||||
|
||||
tstart = time.time()
|
||||
|
||||
# 0. Validations
|
||||
# Disallow retaining snapshots with an incremental parent
|
||||
if incremental_parent is not None and retain_snapshot:
|
||||
return (
|
||||
False,
|
||||
"ERROR: Retaining snapshots of incremental backups is not supported!",
|
||||
)
|
||||
|
||||
# Validate that VM exists in cluster
|
||||
dom_uuid = getDomainUUID(zkhandler, domain)
|
||||
if not dom_uuid:
|
||||
return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain)
|
||||
|
||||
# Validate that the target path is valid
|
||||
if not re.match(r"^/", backup_path):
|
||||
return (
|
||||
False,
|
||||
f"ERROR: Target path {backup_path} is not a valid absolute path on the primary coordinator!",
|
||||
)
|
||||
|
||||
# Ensure that backup_path (on this node) exists
|
||||
if not os.path.isdir(backup_path):
|
||||
return False, f"ERROR: Target path {backup_path} does not exist!"
|
||||
|
||||
# 1. Get information about VM
|
||||
vm_detail = get_list(zkhandler, limit=dom_uuid, is_fuzzy=False)[1][0]
|
||||
if not isinstance(vm_detail, dict):
|
||||
return False, f"ERROR: VM listing returned invalid data: {vm_detail}"
|
||||
|
||||
vm_volumes = list()
|
||||
for disk in vm_detail["disks"]:
|
||||
if disk["type"] != "rbd":
|
||||
continue
|
||||
|
||||
pool, volume = disk["name"].split("/")
|
||||
|
||||
retcode, retdata = ceph.get_list_volume(zkhandler, pool, volume, is_fuzzy=False)
|
||||
if not retcode or len(retdata) != 1:
|
||||
if len(retdata) < 1:
|
||||
retdata = "No volumes returned."
|
||||
elif len(retdata) > 1:
|
||||
retdata = "Multiple volumes returned."
|
||||
return (
|
||||
False,
|
||||
f"ERROR: Failed to get volume details for {pool}/{volume}: {retdata}",
|
||||
)
|
||||
|
||||
try:
|
||||
size = retdata[0]["stats"]["size"]
|
||||
except Exception as e:
|
||||
return False, f"ERROR: Failed to get volume size for {pool}/{volume}: {e}"
|
||||
|
||||
vm_volumes.append((pool, volume, size))
|
||||
|
||||
# 2a. Validate that all volumes exist (they should, but just in case)
|
||||
for pool, volume, _ in vm_volumes:
|
||||
if not ceph.verifyVolume(zkhandler, pool, volume):
|
||||
return (
|
||||
False,
|
||||
f"ERROR: VM defines a volume {pool}/{volume} which does not exist!",
|
||||
)
|
||||
|
||||
# 2b. Validate that, if an incremental_parent is given, it is valid
|
||||
# The incremental parent is just a datestring
|
||||
if incremental_parent is not None:
|
||||
for pool, volume, _ in vm_volumes:
|
||||
if not ceph.verifySnapshot(
|
||||
zkhandler, pool, volume, f"backup_{incremental_parent}"
|
||||
):
|
||||
return (
|
||||
False,
|
||||
f"ERROR: Incremental parent {incremental_parent} given, but no snapshots were found; cannot export an incremental backup.",
|
||||
)
|
||||
|
||||
export_fileext = "rbddiff"
|
||||
else:
|
||||
export_fileext = "rbdimg"
|
||||
|
||||
# 2c. Validate that there's enough space on the target
|
||||
# TODO
|
||||
|
||||
# 3. Set datestring in YYYYMMDDHHMMSS format
|
||||
now = datetime.now()
|
||||
datestring = now.strftime("%Y%m%d%H%M%S")
|
||||
|
||||
snapshot_name = f"backup_{datestring}"
|
||||
|
||||
# 4. Create destination directory
|
||||
vm_target_root = f"{backup_path}/{domain}"
|
||||
vm_target_backup = f"{backup_path}/{domain}/{datestring}/pvcdisks"
|
||||
if not os.path.isdir(vm_target_backup):
|
||||
try:
|
||||
os.makedirs(vm_target_backup)
|
||||
except Exception as e:
|
||||
return False, f"ERROR: Failed to create backup directory: {e}"
|
||||
|
||||
# 5. Take snapshot of each disks with the name @backup_{datestring}
|
||||
is_snapshot_create_failed = False
|
||||
which_snapshot_create_failed = list()
|
||||
msg_snapshot_create_failed = list()
|
||||
for pool, volume, _ in vm_volumes:
|
||||
retcode, retmsg = ceph.add_snapshot(zkhandler, pool, volume, snapshot_name)
|
||||
if not retcode:
|
||||
is_snapshot_create_failed = True
|
||||
which_snapshot_create_failed.append(f"{pool}/{volume}")
|
||||
msg_snapshot_create_failed.append(retmsg)
|
||||
|
||||
if is_snapshot_create_failed:
|
||||
for pool, volume, _ in vm_volumes:
|
||||
if ceph.verifySnapshot(zkhandler, pool, volume, snapshot_name):
|
||||
ceph.remove_snapshot(zkhandler, pool, volume, snapshot_name)
|
||||
return (
|
||||
False,
|
||||
f'ERROR: Failed to create snapshot for volume(s) {", ".join(which_snapshot_create_failed)}: {", ".join(msg_snapshot_create_failed)}',
|
||||
)
|
||||
|
||||
# 6. Dump snapshot to folder with `rbd export` (full) or `rbd export-diff` (incremental)
|
||||
is_snapshot_export_failed = False
|
||||
which_snapshot_export_failed = list()
|
||||
msg_snapshot_export_failed = list()
|
||||
for pool, volume, _ in vm_volumes:
|
||||
if incremental_parent is not None:
|
||||
incremental_parent_snapshot_name = f"backup_{incremental_parent}"
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
f"rbd export-diff --from-snap {incremental_parent_snapshot_name} {pool}/{volume}@{snapshot_name} {vm_target_backup}/{pool}.{volume}.{export_fileext}"
|
||||
)
|
||||
if retcode:
|
||||
is_snapshot_export_failed = True
|
||||
which_snapshot_export_failed.append(f"{pool}/{volume}")
|
||||
msg_snapshot_export_failed.append(stderr)
|
||||
else:
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
f"rbd export --export-format 2 {pool}/{volume}@{snapshot_name} {vm_target_backup}/{pool}.{volume}.{export_fileext}"
|
||||
)
|
||||
if retcode:
|
||||
is_snapshot_export_failed = True
|
||||
which_snapshot_export_failed.append(f"{pool}/{volume}")
|
||||
msg_snapshot_export_failed.append(stderr)
|
||||
|
||||
if is_snapshot_export_failed:
|
||||
for pool, volume, _ in vm_volumes:
|
||||
if ceph.verifySnapshot(zkhandler, pool, volume, snapshot_name):
|
||||
ceph.remove_snapshot(zkhandler, pool, volume, snapshot_name)
|
||||
return (
|
||||
False,
|
||||
f'ERROR: Failed to export snapshot for volume(s) {", ".join(which_snapshot_export_failed)}: {", ".join(msg_snapshot_export_failed)}',
|
||||
)
|
||||
|
||||
# 7. Create and dump VM backup information
|
||||
backup_type = "incremental" if incremental_parent is not None else "full"
|
||||
vm_backup = {
|
||||
"type": backup_type,
|
||||
"datestring": datestring,
|
||||
"incremental_parent": incremental_parent,
|
||||
"retained_snapshot": retain_snapshot,
|
||||
"vm_detail": vm_detail,
|
||||
"backup_files": [
|
||||
(f"pvcdisks/{p}.{v}.{export_fileext}", s) for p, v, s in vm_volumes
|
||||
],
|
||||
}
|
||||
with open(f"{vm_target_root}/{datestring}/pvcbackup.json", "w") as fh:
|
||||
jdump(vm_backup, fh)
|
||||
|
||||
# 8. Remove snapshots if retain_snapshot is False
|
||||
is_snapshot_remove_failed = False
|
||||
which_snapshot_remove_failed = list()
|
||||
msg_snapshot_remove_failed = list()
|
||||
if not retain_snapshot:
|
||||
for pool, volume, _ in vm_volumes:
|
||||
if ceph.verifySnapshot(zkhandler, pool, volume, snapshot_name):
|
||||
retcode, retmsg = ceph.remove_snapshot(
|
||||
zkhandler, pool, volume, snapshot_name
|
||||
)
|
||||
if not retcode:
|
||||
is_snapshot_remove_failed = True
|
||||
which_snapshot_remove_failed.append(f"{pool}/{volume}")
|
||||
msg_snapshot_remove_failed.append(retmsg)
|
||||
|
||||
tend = time.time()
|
||||
ttot = round(tend - tstart, 2)
|
||||
retlines = list()
|
||||
|
||||
if is_snapshot_remove_failed:
|
||||
retlines.append(
|
||||
f"WARNING: Failed to remove snapshot(s) as requested for volume(s) {', '.join(which_snapshot_remove_failed)}: {', '.join(msg_snapshot_remove_failed)}"
|
||||
)
|
||||
|
||||
myhostname = gethostname().split(".")[0]
|
||||
if retain_snapshot:
|
||||
retlines.append(
|
||||
f"Successfully backed up VM '{domain}' ({backup_type}@{datestring}, snapshots retained) to '{myhostname}:{backup_path}' in {ttot}s."
|
||||
)
|
||||
else:
|
||||
retlines.append(
|
||||
f"Successfully backed up VM '{domain}' ({backup_type}@{datestring}) to '{myhostname}:{backup_path}' in {ttot}s."
|
||||
)
|
||||
|
||||
return True, "\n".join(retlines)
|
||||
|
||||
|
||||
def remove_backup(zkhandler, domain, backup_path, datestring):
|
||||
tstart = time.time()
|
||||
|
||||
# 0. Validation
|
||||
# Validate that VM exists in cluster
|
||||
dom_uuid = getDomainUUID(zkhandler, domain)
|
||||
if not dom_uuid:
|
||||
return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain)
|
||||
|
||||
# Validate that the source path is valid
|
||||
if not re.match(r"^/", backup_path):
|
||||
return (
|
||||
False,
|
||||
f"ERROR: Source path {backup_path} is not a valid absolute path on the primary coordinator!",
|
||||
)
|
||||
|
||||
# Ensure that backup_path (on this node) exists
|
||||
if not os.path.isdir(backup_path):
|
||||
return False, f"ERROR: Source path {backup_path} does not exist!"
|
||||
|
||||
# Ensure that domain path (on this node) exists
|
||||
vm_backup_path = f"{backup_path}/{domain}"
|
||||
if not os.path.isdir(vm_backup_path):
|
||||
return False, f"ERROR: Source VM path {vm_backup_path} does not exist!"
|
||||
|
||||
# Ensure that the archives are present
|
||||
backup_source_pvcbackup_file = f"{vm_backup_path}/{datestring}/pvcbackup.json"
|
||||
if not os.path.isfile(backup_source_pvcbackup_file):
|
||||
return False, "ERROR: The specified source backup files do not exist!"
|
||||
|
||||
backup_source_pvcdisks_path = f"{vm_backup_path}/{datestring}/pvcdisks"
|
||||
if not os.path.isdir(backup_source_pvcdisks_path):
|
||||
return False, "ERROR: The specified source backup files do not exist!"
|
||||
|
||||
# 1. Read the backup file and get VM details
|
||||
try:
|
||||
with open(backup_source_pvcbackup_file) as fh:
|
||||
backup_source_details = jload(fh)
|
||||
except Exception as e:
|
||||
return False, f"ERROR: Failed to read source backup details: {e}"
|
||||
|
||||
# 2. Remove snapshots
|
||||
is_snapshot_remove_failed = False
|
||||
which_snapshot_remove_failed = list()
|
||||
msg_snapshot_remove_failed = list()
|
||||
if backup_source_details["retained_snapshot"]:
|
||||
for volume_file, _ in backup_source_details.get("backup_files"):
|
||||
pool, volume, _ = volume_file.split("/")[-1].split(".")
|
||||
snapshot = f"backup_{datestring}"
|
||||
retcode, retmsg = ceph.remove_snapshot(zkhandler, pool, volume, snapshot)
|
||||
if not retcode:
|
||||
is_snapshot_remove_failed = True
|
||||
which_snapshot_remove_failed.append(f"{pool}/{volume}")
|
||||
msg_snapshot_remove_failed.append(retmsg)
|
||||
|
||||
# 3. Remove files
|
||||
is_files_remove_failed = False
|
||||
msg_files_remove_failed = None
|
||||
try:
|
||||
rmtree(f"{vm_backup_path}/{datestring}")
|
||||
except Exception as e:
|
||||
is_files_remove_failed = True
|
||||
msg_files_remove_failed = e
|
||||
|
||||
tend = time.time()
|
||||
ttot = round(tend - tstart, 2)
|
||||
retlines = list()
|
||||
|
||||
if is_snapshot_remove_failed:
|
||||
retlines.append(
|
||||
f"WARNING: Failed to remove snapshot(s) as requested for volume(s) {', '.join(which_snapshot_remove_failed)}: {', '.join(msg_snapshot_remove_failed)}"
|
||||
)
|
||||
|
||||
if is_files_remove_failed:
|
||||
retlines.append(
|
||||
f"WARNING: Failed to remove backup file(s) from {backup_path}: {msg_files_remove_failed}"
|
||||
)
|
||||
|
||||
myhostname = gethostname().split(".")[0]
|
||||
retlines.append(
|
||||
f"Removed VM backup {datestring} for '{domain}' from '{myhostname}:{backup_path}' in {ttot}s."
|
||||
)
|
||||
|
||||
return True, "\n".join(retlines)
|
||||
|
||||
|
||||
def restore_vm(zkhandler, domain, backup_path, datestring, retain_snapshot=False):
|
||||
tstart = time.time()
|
||||
|
||||
# 0. Validations
|
||||
# Validate that VM does not exist in cluster
|
||||
dom_uuid = getDomainUUID(zkhandler, domain)
|
||||
if dom_uuid:
|
||||
return (
|
||||
False,
|
||||
f'ERROR: VM "{domain}" already exists in the cluster! Remove or rename it before restoring a backup.',
|
||||
)
|
||||
|
||||
# Validate that the source path is valid
|
||||
if not re.match(r"^/", backup_path):
|
||||
return (
|
||||
False,
|
||||
f"ERROR: Source path {backup_path} is not a valid absolute path on the primary coordinator!",
|
||||
)
|
||||
|
||||
# Ensure that backup_path (on this node) exists
|
||||
if not os.path.isdir(backup_path):
|
||||
return False, f"ERROR: Source path {backup_path} does not exist!"
|
||||
|
||||
# Ensure that domain path (on this node) exists
|
||||
vm_backup_path = f"{backup_path}/{domain}"
|
||||
if not os.path.isdir(vm_backup_path):
|
||||
return False, f"ERROR: Source VM path {vm_backup_path} does not exist!"
|
||||
|
||||
# Ensure that the archives are present
|
||||
backup_source_pvcbackup_file = f"{vm_backup_path}/{datestring}/pvcbackup.json"
|
||||
if not os.path.isfile(backup_source_pvcbackup_file):
|
||||
return False, "ERROR: The specified source backup files do not exist!"
|
||||
|
||||
# 1. Read the backup file and get VM details
|
||||
try:
|
||||
with open(backup_source_pvcbackup_file) as fh:
|
||||
backup_source_details = jload(fh)
|
||||
except Exception as e:
|
||||
return False, f"ERROR: Failed to read source backup details: {e}"
|
||||
|
||||
# Handle incrementals
|
||||
incremental_parent = backup_source_details.get("incremental_parent", None)
|
||||
if incremental_parent is not None:
|
||||
backup_source_parent_pvcbackup_file = (
|
||||
f"{vm_backup_path}/{incremental_parent}/pvcbackup.json"
|
||||
)
|
||||
if not os.path.isfile(backup_source_parent_pvcbackup_file):
|
||||
return (
|
||||
False,
|
||||
"ERROR: The specified backup is incremental but the required incremental parent source backup files do not exist!",
|
||||
)
|
||||
|
||||
try:
|
||||
with open(backup_source_parent_pvcbackup_file) as fh:
|
||||
backup_source_parent_details = jload(fh)
|
||||
except Exception as e:
|
||||
return (
|
||||
False,
|
||||
f"ERROR: Failed to read source incremental parent backup details: {e}",
|
||||
)
|
||||
|
||||
# 2. Import VM config and metadata in provision state
|
||||
try:
|
||||
retcode, retmsg = define_vm(
|
||||
zkhandler,
|
||||
backup_source_details["vm_detail"]["xml"],
|
||||
backup_source_details["vm_detail"]["node"],
|
||||
backup_source_details["vm_detail"]["node_limit"],
|
||||
backup_source_details["vm_detail"]["node_selector"],
|
||||
backup_source_details["vm_detail"]["node_autostart"],
|
||||
backup_source_details["vm_detail"]["migration_method"],
|
||||
backup_source_details["vm_detail"]["profile"],
|
||||
backup_source_details["vm_detail"]["tags"],
|
||||
"restore",
|
||||
)
|
||||
if not retcode:
|
||||
return False, f"ERROR: Failed to define restored VM: {retmsg}"
|
||||
except Exception as e:
|
||||
return False, f"ERROR: Failed to parse VM backup details: {e}"
|
||||
|
||||
# 4. Import volumes
|
||||
is_snapshot_remove_failed = False
|
||||
which_snapshot_remove_failed = list()
|
||||
msg_snapshot_remove_failed = list()
|
||||
if incremental_parent is not None:
|
||||
for volume_file, volume_size in backup_source_details.get("backup_files"):
|
||||
pool, volume, _ = volume_file.split("/")[-1].split(".")
|
||||
try:
|
||||
parent_volume_file = [
|
||||
f[0]
|
||||
for f in backup_source_parent_details.get("backup_files")
|
||||
if f[0].split("/")[-1].replace(".rbdimg", "")
|
||||
== volume_file.split("/")[-1].replace(".rbddiff", "")
|
||||
][0]
|
||||
except Exception as e:
|
||||
return (
|
||||
False,
|
||||
f"ERROR: Failed to find parent volume for volume {pool}/{volume}; backup may be corrupt or invalid: {e}",
|
||||
)
|
||||
|
||||
# First we create the expected volumes then clean them up
|
||||
# This process is a bit of a hack because rbd import does not expect an existing volume,
|
||||
# but we need the information in PVC.
|
||||
# Thus create the RBD volume using ceph.add_volume based on the backup size, and then
|
||||
# manually remove the RBD volume (leaving the PVC metainfo)
|
||||
retcode, retmsg = ceph.add_volume(zkhandler, pool, volume, volume_size)
|
||||
if not retcode:
|
||||
return False, f"ERROR: Failed to create restored volume: {retmsg}"
|
||||
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
f"rbd remove {pool}/{volume}"
|
||||
)
|
||||
if retcode:
|
||||
return (
|
||||
False,
|
||||
f"ERROR: Failed to remove temporary RBD volume '{pool}/{volume}': {stderr}",
|
||||
)
|
||||
|
||||
# Next we import the parent images
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
f"rbd import --export-format 2 --dest-pool {pool} {backup_path}/{domain}/{incremental_parent}/{parent_volume_file} {volume}"
|
||||
)
|
||||
if retcode:
|
||||
return (
|
||||
False,
|
||||
f"ERROR: Failed to import parent backup image {parent_volume_file}: {stderr}",
|
||||
)
|
||||
|
||||
# Then we import the incremental diffs
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
f"rbd import-diff {backup_path}/{domain}/{datestring}/{volume_file} {pool}/{volume}"
|
||||
)
|
||||
if retcode:
|
||||
return (
|
||||
False,
|
||||
f"ERROR: Failed to import incremental backup image {volume_file}: {stderr}",
|
||||
)
|
||||
|
||||
# Finally we remove the parent and child snapshots (no longer required required)
|
||||
if retain_snapshot:
|
||||
retcode, retmsg = ceph.add_snapshot(
|
||||
zkhandler,
|
||||
pool,
|
||||
volume,
|
||||
f"backup_{incremental_parent}",
|
||||
zk_only=True,
|
||||
)
|
||||
if not retcode:
|
||||
return (
|
||||
False,
|
||||
f"ERROR: Failed to add imported image snapshot for {parent_volume_file}: {retmsg}",
|
||||
)
|
||||
else:
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
f"rbd snap rm {pool}/{volume}@backup_{incremental_parent}"
|
||||
)
|
||||
if retcode:
|
||||
is_snapshot_remove_failed = True
|
||||
which_snapshot_remove_failed.append(f"{pool}/{volume}")
|
||||
msg_snapshot_remove_failed.append(retmsg)
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
f"rbd snap rm {pool}/{volume}@backup_{datestring}"
|
||||
)
|
||||
if retcode:
|
||||
is_snapshot_remove_failed = True
|
||||
which_snapshot_remove_failed.append(f"{pool}/{volume}")
|
||||
msg_snapshot_remove_failed.append(retmsg)
|
||||
|
||||
else:
|
||||
for volume_file, volume_size in backup_source_details.get("backup_files"):
|
||||
pool, volume, _ = volume_file.split("/")[-1].split(".")
|
||||
|
||||
# First we create the expected volumes then clean them up
|
||||
# This process is a bit of a hack because rbd import does not expect an existing volume,
|
||||
# but we need the information in PVC.
|
||||
# Thus create the RBD volume using ceph.add_volume based on the backup size, and then
|
||||
# manually remove the RBD volume (leaving the PVC metainfo)
|
||||
retcode, retmsg = ceph.add_volume(zkhandler, pool, volume, volume_size)
|
||||
if not retcode:
|
||||
return False, f"ERROR: Failed to create restored volume: {retmsg}"
|
||||
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
f"rbd remove {pool}/{volume}"
|
||||
)
|
||||
if retcode:
|
||||
return (
|
||||
False,
|
||||
f"ERROR: Failed to remove temporary RBD volume '{pool}/{volume}': {stderr}",
|
||||
)
|
||||
|
||||
# Then we perform the actual import
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
f"rbd import --export-format 2 --dest-pool {pool} {backup_path}/{domain}/{datestring}/{volume_file} {volume}"
|
||||
)
|
||||
if retcode:
|
||||
return (
|
||||
False,
|
||||
f"ERROR: Failed to import backup image {volume_file}: {stderr}",
|
||||
)
|
||||
|
||||
# Finally we remove the source snapshot (not required)
|
||||
if retain_snapshot:
|
||||
retcode, retmsg = ceph.add_snapshot(
|
||||
zkhandler,
|
||||
pool,
|
||||
volume,
|
||||
f"backup_{datestring}",
|
||||
zk_only=True,
|
||||
)
|
||||
if not retcode:
|
||||
return (
|
||||
False,
|
||||
f"ERROR: Failed to add imported image snapshot for {volume_file}: {retmsg}",
|
||||
)
|
||||
else:
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
f"rbd snap rm {pool}/{volume}@backup_{datestring}"
|
||||
)
|
||||
if retcode:
|
||||
return (
|
||||
False,
|
||||
f"ERROR: Failed to remove imported image snapshot for {volume_file}: {stderr}",
|
||||
)
|
||||
|
||||
# 5. Start VM
|
||||
retcode, retmsg = start_vm(zkhandler, domain)
|
||||
if not retcode:
|
||||
return False, f"ERROR: Failed to start restored VM {domain}: {retmsg}"
|
||||
|
||||
tend = time.time()
|
||||
ttot = round(tend - tstart, 2)
|
||||
retlines = list()
|
||||
|
||||
if is_snapshot_remove_failed:
|
||||
retlines.append(
|
||||
f"WARNING: Failed to remove hanging snapshot(s) as requested for volume(s) {', '.join(which_snapshot_remove_failed)}: {', '.join(msg_snapshot_remove_failed)}"
|
||||
)
|
||||
|
||||
myhostname = gethostname().split(".")[0]
|
||||
retlines.append(
|
||||
f"Successfully restored VM backup {datestring} for '{domain}' from '{myhostname}:{backup_path}' in {ttot}s."
|
||||
)
|
||||
|
||||
return True, "\n".join(retlines)
|
||||
|
19
debian/changelog
vendored
19
debian/changelog
vendored
@ -1,3 +1,22 @@
|
||||
pvc (0.9.79-0) unstable; urgency=high
|
||||
|
||||
**API Changes**: New endpoints /vm/{vm}/backup, /vm/{vm}/restore
|
||||
|
||||
* [CLI Client] Fixes some storage pool help text messages
|
||||
* [Node Daemon] Increases the IPMI monitoring plugin timeout
|
||||
* [All] Adds support for VM backups, including creation, removal, and restore
|
||||
* [Repository] Fixes shebangs in scripts to be consistent
|
||||
* [Daemon Library] Improves the handling of VM list arguments (default None)
|
||||
|
||||
-- Joshua M. Boniface <joshua@boniface.me> Tue, 24 Oct 2023 02:10:24 -0400
|
||||
|
||||
pvc (0.9.78-0) unstable; urgency=high
|
||||
|
||||
* [API, Client CLI] Fixes several bugs around image uploads; adds a new query parameter for non-raw images
|
||||
* [API] Ensures RBD images are created with a raw bytes value to avoid rounding errors
|
||||
|
||||
-- Joshua M. Boniface <joshua@boniface.me> Sat, 30 Sep 2023 12:57:55 -0400
|
||||
|
||||
pvc (0.9.77-0) unstable; urgency=high
|
||||
|
||||
* [Client CLI] Fixes a bug from a bad library import
|
||||
|
@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Generate the database migration files
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Generate the Zookeeper migration files
|
||||
|
||||
|
@ -76,7 +76,7 @@ class MonitoringPluginScript(MonitoringPlugin):
|
||||
ipmi_password = self.config["ipmi_password"]
|
||||
retcode, _, _ = run_os_command(
|
||||
f"/usr/bin/ipmitool -I lanplus -H {ipmi_hostname} -U {ipmi_username} -P {ipmi_password} chassis power status",
|
||||
timeout=2
|
||||
timeout=5
|
||||
)
|
||||
|
||||
if retcode > 0:
|
||||
|
@ -132,7 +132,7 @@ class MonitoringPluginScript(MonitoringPlugin):
|
||||
for slave_interface in slave_interfaces:
|
||||
if slave_interface[1] == 'up':
|
||||
slave_interface_up_count += 1
|
||||
if slave_interface_up_count < 2:
|
||||
if slave_interface_up_count < len(slave_interfaces):
|
||||
messages.append(f"{dev} DEGRADED with {slave_interface_up_count} active slaves")
|
||||
health_delta += 10
|
||||
else:
|
||||
|
@ -49,7 +49,7 @@ import re
|
||||
import json
|
||||
|
||||
# Daemon version
|
||||
version = "0.9.77"
|
||||
version = "0.9.79"
|
||||
|
||||
|
||||
##########################################################
|
||||
|
@ -77,5 +77,5 @@ def start_system_services(logger, config):
|
||||
start_ceph_mon(logger, config)
|
||||
start_ceph_mgr(logger, config)
|
||||
|
||||
logger.out("Waiting 3 seconds for daemons to start", state="s")
|
||||
sleep(3)
|
||||
logger.out("Waiting 10 seconds for daemons to start", state="s")
|
||||
sleep(10)
|
||||
|
Reference in New Issue
Block a user