Compare commits
27 Commits
6e83300d78
...
0769f1ea52
Author | SHA1 | Date | |
---|---|---|---|
0769f1ea52 | |||
c858ae8fed | |||
8d256a1737 | |||
d3b3fdfc80 | |||
f1b29ea94e | |||
38abd078af | |||
fabb97cf48 | |||
50aabde320 | |||
68124db323 | |||
8921efd269 | |||
3e259bd926 | |||
3d12915989 | |||
67b0b19bca | |||
5d0c674d1d | |||
f3bc4dee04 | |||
f441b0d823 | |||
fd2331faa6 | |||
a5d0f219e4 | |||
0169510df0 | |||
a58c1d5a8c | |||
a8e4b01b67 | |||
45c4c86911 | |||
6448b31d2c | |||
4fc9b15652 | |||
75b839692b | |||
751cfe0b29 | |||
b997c6f31e |
@ -2140,7 +2140,7 @@ class API_VM_Locks(Resource):
|
||||
api.add_resource(API_VM_Locks, "/vm/<vm>/locks")
|
||||
|
||||
|
||||
# /vm/<vm</console
|
||||
# /vm/<vm>/console
|
||||
class API_VM_Console(Resource):
|
||||
@RequestParser([{"name": "lines"}])
|
||||
@Authenticator
|
||||
@ -2293,6 +2293,138 @@ class API_VM_Device(Resource):
|
||||
api.add_resource(API_VM_Device, "/vm/<vm>/device")
|
||||
|
||||
|
||||
# /vm/<vm>/backup
|
||||
class API_VM_Backup(Resource):
|
||||
@RequestParser(
|
||||
[
|
||||
{
|
||||
"name": "target_path",
|
||||
"required": True,
|
||||
"helptext": "A local filesystem path on the primary coordinator must be specified",
|
||||
},
|
||||
{
|
||||
"name": "incremental_parent",
|
||||
"required": False,
|
||||
},
|
||||
{
|
||||
"name": "retain_snapshots",
|
||||
"required": False,
|
||||
},
|
||||
]
|
||||
)
|
||||
@Authenticator
|
||||
def post(self, vm, reqargs):
|
||||
"""
|
||||
Create a backup of {vm} and its volumes to a local primary coordinator filesystem path
|
||||
---
|
||||
tags:
|
||||
- vm
|
||||
parameters:
|
||||
- in: query
|
||||
name: target_path
|
||||
type: string
|
||||
required: true
|
||||
description: A local filesystem path on the primary coordinator to store the backup
|
||||
- in: query
|
||||
name: incremental_parent
|
||||
type: string
|
||||
required: false
|
||||
description: A previous backup datestamp to use as an incremental parent; if unspecified a full backup is taken
|
||||
- in: query
|
||||
name: retain_snapshots
|
||||
type: boolean
|
||||
required: false
|
||||
default: false
|
||||
description: Whether or not to retain this backup's volume snapshots to use as a future incremental parent
|
||||
responses:
|
||||
200:
|
||||
description: OK
|
||||
schema:
|
||||
type: object
|
||||
id: Message
|
||||
400:
|
||||
description: Execution error
|
||||
schema:
|
||||
type: object
|
||||
id: Message
|
||||
404:
|
||||
description: Not found
|
||||
schema:
|
||||
type: object
|
||||
id: Message
|
||||
"""
|
||||
target_path = reqargs.get("target_path", None)
|
||||
incremental_parent = reqargs.get("incremental_parent", None)
|
||||
retain_snapshots = bool(strtobool(reqargs.get("retain_snapshots", "false")))
|
||||
return api_helper.vm_backup(
|
||||
vm, target_path, incremental_parent, retain_snapshots
|
||||
)
|
||||
|
||||
|
||||
api.add_resource(API_VM_Backup, "/vm/<vm>/backup")
|
||||
|
||||
|
||||
# /vm/<vm>/restore
|
||||
class API_VM_Restore(Resource):
|
||||
@RequestParser(
|
||||
[
|
||||
{
|
||||
"name": "target_path",
|
||||
"required": True,
|
||||
"helptext": "A local filesystem path on the primary coordinator must be specified",
|
||||
},
|
||||
{
|
||||
"name": "backup_datestring",
|
||||
"required": True,
|
||||
"helptext": "A backup datestring must be specified",
|
||||
}
|
||||
]
|
||||
)
|
||||
@Authenticator
|
||||
def post(self, vm, reqargs):
|
||||
"""
|
||||
Restore a backup of {vm} and its volumes from a local primary coordinator filesystem path
|
||||
---
|
||||
tags:
|
||||
- vm
|
||||
parameters:
|
||||
- in: query
|
||||
name: target_path
|
||||
type: string
|
||||
required: true
|
||||
description: A local filesystem path on the primary coordinator where the backup is stored
|
||||
- in: query
|
||||
name: backup_datestring
|
||||
type: string
|
||||
required: true
|
||||
description: The backup datestring identifier (e.g. 20230102030405)
|
||||
responses:
|
||||
200:
|
||||
description: OK
|
||||
schema:
|
||||
type: object
|
||||
id: Message
|
||||
400:
|
||||
description: Execution error
|
||||
schema:
|
||||
type: object
|
||||
id: Message
|
||||
404:
|
||||
description: Not found
|
||||
schema:
|
||||
type: object
|
||||
id: Message
|
||||
"""
|
||||
target_path = reqargs.get("target_path", None)
|
||||
backup_datestring = reqargs.get("backup_datestring", None)
|
||||
return api_helper.vm_restore(
|
||||
vm, target_path, backup_datestring
|
||||
)
|
||||
|
||||
|
||||
api.add_resource(API_VM_Restore, "/vm/<vm>/restore")
|
||||
|
||||
|
||||
##########################################################
|
||||
# Client API - Network
|
||||
##########################################################
|
||||
|
@ -470,6 +470,60 @@ def vm_define(
|
||||
return output, retcode
|
||||
|
||||
|
||||
@ZKConnection(config)
|
||||
def vm_backup(
|
||||
zkhandler,
|
||||
domain,
|
||||
target_path,
|
||||
incremental_parent=None,
|
||||
retain_snapshots=False,
|
||||
):
|
||||
"""
|
||||
Back up a VM to a local (primary coordinator) filesystem path.
|
||||
"""
|
||||
retflag, retdata = pvc_vm.backup_vm(
|
||||
zkhandler,
|
||||
domain,
|
||||
target_path,
|
||||
incremental_parent,
|
||||
retain_snapshots,
|
||||
)
|
||||
|
||||
if retflag:
|
||||
retcode = 200
|
||||
else:
|
||||
retcode = 400
|
||||
|
||||
output = {"message": retdata.replace('"', "'")}
|
||||
return output, retcode
|
||||
|
||||
|
||||
@ZKConnection(config)
|
||||
def vm_restore(
|
||||
zkhandler,
|
||||
domain,
|
||||
target_path,
|
||||
datestring,
|
||||
):
|
||||
"""
|
||||
Restore a VM from a local (primary coordinator) filesystem path.
|
||||
"""
|
||||
retflag, retdata = pvc_vm.restore_vm(
|
||||
zkhandler,
|
||||
domain,
|
||||
target_path,
|
||||
datestring,
|
||||
)
|
||||
|
||||
if retflag:
|
||||
retcode = 200
|
||||
else:
|
||||
retcode = 400
|
||||
|
||||
output = {"message": retdata.replace('"', "'")}
|
||||
return output, retcode
|
||||
|
||||
|
||||
@ZKConnection(config)
|
||||
def vm_attach_device(zkhandler, vm, device_spec_xml):
|
||||
"""
|
||||
|
@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# A useful script for testing out changes to PVC by building the debs and deploying them out to a
|
||||
# set of hosts automatically, including restarting the daemon (with a pause between) on the remote
|
||||
@ -36,34 +36,37 @@ echo "Preparing code (format and lint)..."
|
||||
./lint || exit 1
|
||||
|
||||
# Build the packages
|
||||
echo -n "Building packages... "
|
||||
echo -n "Building packages..."
|
||||
version="$( ./build-unstable-deb.sh 2>/dev/null )"
|
||||
echo "done. Package version ${version}."
|
||||
echo " done. Package version ${version}."
|
||||
|
||||
# Install the client(s) locally
|
||||
echo -n "Installing client packages locally... "
|
||||
echo -n "Installing client packages locally..."
|
||||
$SUDO dpkg -i ../pvc-client*_${version}*.deb &>/dev/null
|
||||
echo "done".
|
||||
echo " done".
|
||||
|
||||
for HOST in ${HOSTS[@]}; do
|
||||
echo "> Deploying packages to host ${HOST}"
|
||||
echo -n "Copying packages... "
|
||||
echo -n "Copying packages..."
|
||||
ssh $HOST $SUDO rm -rf /tmp/pvc &>/dev/null
|
||||
ssh $HOST mkdir /tmp/pvc &>/dev/null
|
||||
scp ../pvc-*_${version}*.deb $HOST:/tmp/pvc/ &>/dev/null
|
||||
echo "done."
|
||||
echo -n "Installing packages... "
|
||||
echo " done."
|
||||
echo -n "Installing packages..."
|
||||
ssh $HOST $SUDO dpkg -i /tmp/pvc/{pvc-client-cli,pvc-daemon-common,pvc-daemon-api,pvc-daemon-node}*.deb &>/dev/null
|
||||
ssh $HOST rm -rf /tmp/pvc &>/dev/null
|
||||
echo "done."
|
||||
echo -n "Restarting PVC daemons... "
|
||||
echo " done."
|
||||
echo -n "Restarting PVC daemons..."
|
||||
ssh $HOST $SUDO systemctl restart pvcapid &>/dev/null
|
||||
ssh $HOST $SUDO systemctl restart pvcapid-worker &>/dev/null
|
||||
ssh $HOST $SUDO systemctl restart pvcnoded &>/dev/null
|
||||
echo "done."
|
||||
echo -n "Waiting 30s for host to stabilize... "
|
||||
sleep 30
|
||||
echo "done."
|
||||
echo " done."
|
||||
echo -n "Waiting for node daemon to be running..."
|
||||
while [[ $( ssh $HOST "pvc -q node list -f json ${HOST%%.*} | jq -r '.[].daemon_state'" ) != "run" ]]; do
|
||||
sleep 5
|
||||
echo -n "."
|
||||
done
|
||||
echo " done."
|
||||
done
|
||||
if [[ -z ${KEEP_ARTIFACTS} ]]; then
|
||||
rm ../pvc*_${version}*
|
||||
|
@ -1,4 +1,4 @@
|
||||
#!/bin/sh
|
||||
#!/usr/bin/env bash
|
||||
pushd $( git rev-parse --show-toplevel ) &>/dev/null
|
||||
ver="$( head -1 debian/changelog | awk -F'[()-]' '{ print $2 }' )"
|
||||
git pull
|
||||
|
@ -1,4 +1,4 @@
|
||||
#!/bin/sh
|
||||
#!/usr/bin/env bash
|
||||
set -o xtrace
|
||||
exec 3>&1
|
||||
exec 1>&2
|
||||
|
@ -1590,6 +1590,92 @@ def cli_vm_flush_locks(domain):
|
||||
finish(retcode, retmsg)
|
||||
|
||||
|
||||
###############################################################################
|
||||
# > pvc vm backup
|
||||
###############################################################################
|
||||
@click.command(name="backup", short_help="Create a backup of a virtual machine.")
|
||||
@connection_req
|
||||
@click.argument("domain")
|
||||
@click.argument("target_path")
|
||||
@click.option(
|
||||
"-i",
|
||||
"--incremental",
|
||||
"incremental_parent",
|
||||
default=None,
|
||||
help="Perform an incremental volume backup from this parent backup datestring.",
|
||||
)
|
||||
@click.option(
|
||||
"-r",
|
||||
"--retain-snapshots",
|
||||
"retain_snapshots",
|
||||
is_flag=True,
|
||||
default=False,
|
||||
help="Retain volume snapshots for future incremental use.",
|
||||
)
|
||||
def cli_vm_backup(domain, target_path, incremental_parent, retain_snapshots):
|
||||
"""
|
||||
Create a backup of virtual machine DOMAIN to TARGET_PATH on the cluster primary coordinator. DOMAIN may be a UUID or name.
|
||||
|
||||
TARGET_PATH must be a valid absolute directory path on the cluster "primary" coordinator (see "pvc node list") allowing writes from the API daemon (normally running as "root"). The TARGET_PATH should be a large storage volume, ideally a remotely mounted filesystem (e.g. NFS, SSHFS, etc.) or non-Ceph-backed disk; PVC does not handle this path, that is up to the administrator to configure and manage.
|
||||
|
||||
The backup will export the VM configuration, metainfo, and a point-in-time snapshot of all attached RBD volumes, using a datestring formatted backup name (i.e. YYYYMMDDHHMMSS).
|
||||
|
||||
The virtual machine DOMAIN may be running, and due to snapshots the backup should be crash-consistent, but will be in an unclean state and this must be considered when restoring from backups.
|
||||
|
||||
Incremental snapshots are possible by specifying the "-i"/"--incremental" option along with a source backup datestring. The snapshots from that source backup must have been retained using the "-r"/"--retain-snapshots" option. Arbitrary snapshots, assuming they are valid for all attached RBD volumes, may also be used, as long as they are prefixed with "backup_". Retaining snapshots of incremental backups is supported, though it is not recommended to "chain" incremental backups in this way as it can make managing restores more difficult.
|
||||
|
||||
Full backup volume images are sparse-allocated, however it is recommended for safety to consider their maximum allocated size when allocated space for the TARGET_PATH. Incremental volume images are generally small but are dependent entirely on the rate of data change in each volume.
|
||||
"""
|
||||
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"Backing up VM '{domain}'... ",
|
||||
newline=False,
|
||||
)
|
||||
retcode, retmsg = pvc.lib.vm.vm_backup(
|
||||
CLI_CONFIG, domain, target_path, incremental_parent, retain_snapshots
|
||||
)
|
||||
if retcode:
|
||||
echo(CLI_CONFIG, "done.")
|
||||
else:
|
||||
echo(CLI_CONFIG, "failed.")
|
||||
finish(retcode, retmsg)
|
||||
|
||||
|
||||
###############################################################################
|
||||
# > pvc vm restore
|
||||
###############################################################################
|
||||
@click.command(name="restore", short_help="Restore a backup of a virtual machine.")
|
||||
@connection_req
|
||||
@click.argument("domain")
|
||||
@click.argument("backup_datestring")
|
||||
@click.argument("target_path")
|
||||
def cli_vm_restore(domain, backup_datestring, target_path):
|
||||
"""
|
||||
Restore the backup BACKUP_DATESTRING of virtual machine DOMAIN stored in TARGET_PATH on the cluster primary coordinator. DOMAIN may be a UUID or name.
|
||||
|
||||
TARGET_PATH must be a valid absolute directory path on the cluster "primary" coordinator (see "pvc node list") allowing reads from the API daemon (normally running as "root"). The TARGET_PATH should be a large storage volume, ideally a remotely mounted filesystem (e.g. NFS, SSHFS, etc.) or non-Ceph-backed disk; PVC does not handle this path, that is up to the administrator to configure and manage.
|
||||
|
||||
The restore will import the VM configuration, metainfo, and the point-in-time snapshot of all attached RBD volumes. Incremental backups will be automatically handled.
|
||||
|
||||
A VM named DOMAIN must not exist; if the VM already exists, it must be removed before restoring. Renaming is not sufficient as the UUID will remain the same.
|
||||
"""
|
||||
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"Restoring backup {backup_datestring} of VM '{domain}'... ",
|
||||
newline=False,
|
||||
)
|
||||
retcode, retmsg = pvc.lib.vm.vm_restore(
|
||||
CLI_CONFIG, domain, target_path, backup_datestring
|
||||
)
|
||||
if retcode:
|
||||
echo(CLI_CONFIG, "done.")
|
||||
else:
|
||||
echo(CLI_CONFIG, "failed.")
|
||||
finish(retcode, retmsg)
|
||||
|
||||
|
||||
###############################################################################
|
||||
# > pvc vm tag
|
||||
###############################################################################
|
||||
@ -5659,6 +5745,8 @@ cli_vm.add_command(cli_vm_move)
|
||||
cli_vm.add_command(cli_vm_migrate)
|
||||
cli_vm.add_command(cli_vm_unmigrate)
|
||||
cli_vm.add_command(cli_vm_flush_locks)
|
||||
cli_vm.add_command(cli_vm_backup)
|
||||
cli_vm.add_command(cli_vm_restore)
|
||||
cli_vm_tag.add_command(cli_vm_tag_get)
|
||||
cli_vm_tag.add_command(cli_vm_tag_add)
|
||||
cli_vm_tag.add_command(cli_vm_tag_remove)
|
||||
|
@ -433,6 +433,47 @@ def vm_locks(config, vm):
|
||||
return retstatus, response.json().get("message", "")
|
||||
|
||||
|
||||
def vm_backup(config, vm, target_path, incremental_parent=None, retain_snapshots=False):
|
||||
"""
|
||||
Create a backup of {vm} and its volumes to a local primary coordinator filesystem path
|
||||
|
||||
API endpoint: POST /vm/{vm}/backup
|
||||
API arguments: target_path={target_path}, incremental_parent={incremental_parent}, retain_snapshots={retain_snapshots}
|
||||
API schema: {"message":"{data}"}
|
||||
"""
|
||||
params = {
|
||||
"target_path": target_path,
|
||||
"incremental_parent": incremental_parent,
|
||||
"retain_snapshots": retain_snapshots,
|
||||
}
|
||||
response = call_api(config, "post", "/vm/{vm}/backup".format(vm=vm), params=params)
|
||||
|
||||
if response.status_code != 200:
|
||||
return False, response.json().get("message", "")
|
||||
else:
|
||||
return True, response.json().get("message", "")
|
||||
|
||||
|
||||
def vm_restore(config, vm, target_path, backup_datestring):
|
||||
"""
|
||||
Restore a backup of {vm} and its volumes from a local primary coordinator filesystem path
|
||||
|
||||
API endpoint: POST /vm/{vm}/restore
|
||||
API arguments: target_path={target_path}, backup_datestring={backup_datestring}
|
||||
API schema: {"message":"{data}"}
|
||||
"""
|
||||
params = {
|
||||
"target_path": target_path,
|
||||
"backup_datestring": backup_datestring,
|
||||
}
|
||||
response = call_api(config, "post", "/vm/{vm}/restore".format(vm=vm), params=params)
|
||||
|
||||
if response.status_code != 200:
|
||||
return False, response.json().get("message", "")
|
||||
else:
|
||||
return True, response.json().get("message", "")
|
||||
|
||||
|
||||
def vm_vcpus_set(config, vm, vcpus, topology, restart):
|
||||
"""
|
||||
Set the vCPU count of the VM with topology
|
||||
|
@ -146,7 +146,11 @@ def run_os_daemon(command_string, environment=None, logfile=None):
|
||||
# Run a local OS command via shell
|
||||
#
|
||||
def run_os_command(command_string, background=False, environment=None, timeout=None):
|
||||
command = shlex_split(command_string)
|
||||
if not isinstance(command_string, list):
|
||||
command = shlex_split(command_string)
|
||||
else:
|
||||
command = command_string
|
||||
|
||||
if background:
|
||||
|
||||
def runcmd():
|
||||
|
@ -21,12 +21,17 @@
|
||||
|
||||
import time
|
||||
import re
|
||||
import os.path
|
||||
import lxml.objectify
|
||||
import lxml.etree
|
||||
|
||||
from distutils.util import strtobool
|
||||
from uuid import UUID
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from datetime import datetime
|
||||
from socket import gethostname
|
||||
from json import dump as jdump
|
||||
from json import load as jload
|
||||
|
||||
import daemon_lib.common as common
|
||||
|
||||
@ -1175,13 +1180,15 @@ def get_info(zkhandler, domain):
|
||||
return True, domain_information
|
||||
|
||||
|
||||
def get_list(zkhandler, node, state, tag, limit, is_fuzzy=True, negate=False):
|
||||
if node:
|
||||
def get_list(
|
||||
zkhandler, node=None, state=None, tag=None, limit=None, is_fuzzy=True, negate=False
|
||||
):
|
||||
if node is not None:
|
||||
# Verify node is valid
|
||||
if not common.verifyNode(zkhandler, node):
|
||||
return False, 'Specified node "{}" is invalid.'.format(node)
|
||||
|
||||
if state:
|
||||
if state is not None:
|
||||
valid_states = [
|
||||
"start",
|
||||
"restart",
|
||||
@ -1200,7 +1207,7 @@ def get_list(zkhandler, node, state, tag, limit, is_fuzzy=True, negate=False):
|
||||
full_vm_list.sort()
|
||||
|
||||
# Set our limit to a sensible regex
|
||||
if limit:
|
||||
if limit is not None:
|
||||
# Check if the limit is a UUID
|
||||
is_limit_uuid = False
|
||||
try:
|
||||
@ -1229,7 +1236,7 @@ def get_list(zkhandler, node, state, tag, limit, is_fuzzy=True, negate=False):
|
||||
is_state_match = False
|
||||
|
||||
# Check on limit
|
||||
if limit:
|
||||
if limit is not None:
|
||||
# Try to match the limit against the UUID (if applicable) and name
|
||||
try:
|
||||
if is_limit_uuid and re.fullmatch(limit, vm):
|
||||
@ -1241,7 +1248,7 @@ def get_list(zkhandler, node, state, tag, limit, is_fuzzy=True, negate=False):
|
||||
else:
|
||||
is_limit_match = True
|
||||
|
||||
if tag:
|
||||
if tag is not None:
|
||||
vm_tags = zkhandler.children(("domain.meta.tags", vm))
|
||||
if negate and tag not in vm_tags:
|
||||
is_tag_match = True
|
||||
@ -1251,7 +1258,7 @@ def get_list(zkhandler, node, state, tag, limit, is_fuzzy=True, negate=False):
|
||||
is_tag_match = True
|
||||
|
||||
# Check on node
|
||||
if node:
|
||||
if node is not None:
|
||||
vm_node = zkhandler.read(("domain.node", vm))
|
||||
if negate and vm_node != node:
|
||||
is_node_match = True
|
||||
@ -1261,7 +1268,7 @@ def get_list(zkhandler, node, state, tag, limit, is_fuzzy=True, negate=False):
|
||||
is_node_match = True
|
||||
|
||||
# Check on state
|
||||
if state:
|
||||
if state is not None:
|
||||
vm_state = zkhandler.read(("domain.state", vm))
|
||||
if negate and vm_state != state:
|
||||
is_state_match = True
|
||||
@ -1297,3 +1304,372 @@ def get_list(zkhandler, node, state, tag, limit, is_fuzzy=True, negate=False):
|
||||
pass
|
||||
|
||||
return True, sorted(vm_data_list, key=lambda d: d["name"])
|
||||
|
||||
|
||||
def backup_vm(
|
||||
zkhandler, domain, target_path, incremental_parent=None, retain_snapshots=False
|
||||
):
|
||||
|
||||
tstart = time.time()
|
||||
|
||||
# 0. Validations
|
||||
# Validate that VM exists in cluster
|
||||
dom_uuid = getDomainUUID(zkhandler, domain)
|
||||
if not dom_uuid:
|
||||
return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain)
|
||||
|
||||
# Validate that the target path exists
|
||||
if not re.match(r"^/", target_path):
|
||||
return (
|
||||
False,
|
||||
f"ERROR: Target path {target_path} is not a valid absolute path on the primary coordinator!",
|
||||
)
|
||||
|
||||
# Ensure that target_path (on this node) exists
|
||||
if not os.path.isdir(target_path):
|
||||
return False, f"ERROR: Target path {target_path} does not exist!"
|
||||
|
||||
# 1. Get information about VM
|
||||
vm_detail = get_list(zkhandler, limit=dom_uuid, is_fuzzy=False)[1][0]
|
||||
if not isinstance(vm_detail, dict):
|
||||
return False, f"ERROR: VM listing returned invalid data: {vm_detail}"
|
||||
|
||||
vm_volumes = list()
|
||||
for disk in vm_detail["disks"]:
|
||||
if disk["type"] != "rbd":
|
||||
continue
|
||||
|
||||
pool, volume = disk["name"].split('/')
|
||||
|
||||
retcode, retdata = ceph.get_list_volume(zkhandler, pool, volume, is_fuzzy=False)
|
||||
if not retcode or len(retdata) != 1:
|
||||
if len(retdata) < 1:
|
||||
retdata = "No volumes returned."
|
||||
elif len(retdata) > 1:
|
||||
retdata = "Multiple volumes returned."
|
||||
return False, f"ERROR: Failed to get volume details for {pool}/{volume}: {retdata}"
|
||||
|
||||
try:
|
||||
size = retdata[0]["stats"]["size"]
|
||||
except Exception as e:
|
||||
return False, f"ERROR: Failed to get volume size for {pool}/{volume}: {e}"
|
||||
|
||||
vm_volumes.append((pool, volume, size))
|
||||
|
||||
# 2a. Validate that all volumes exist (they should, but just in case)
|
||||
for pool, volume, _ in vm_volumes:
|
||||
if not ceph.verifyVolume(zkhandler, pool, volume):
|
||||
return (
|
||||
False,
|
||||
f"ERROR: VM defines a volume {pool}/{volume} which does not exist!",
|
||||
)
|
||||
|
||||
# 2b. Validate that, if an incremental_parent is given, it is valid
|
||||
# The incremental parent is just a datestring
|
||||
if incremental_parent is not None:
|
||||
for pool, volume, _ in vm_volumes:
|
||||
if not ceph.verifySnapshot(
|
||||
zkhandler, pool, volume, f"backup_{incremental_parent}"
|
||||
):
|
||||
return (
|
||||
False,
|
||||
f"ERROR: Incremental parent {incremental_parent} given, but no snapshots were found; cannot export an incremental backup.",
|
||||
)
|
||||
|
||||
export_fileext = "rbddiff"
|
||||
else:
|
||||
export_fileext = "rbdimg"
|
||||
|
||||
# 2c. Validate that there's enough space on the target
|
||||
# TODO
|
||||
|
||||
# 3. Set datestring in YYYYMMDDHHMMSS format
|
||||
now = datetime.now()
|
||||
datestring = now.strftime("%Y%m%d%H%M%S")
|
||||
|
||||
snapshot_name = f"backup_{datestring}"
|
||||
|
||||
# 4. Create destination directory
|
||||
vm_target_root = f"{target_path}/{domain}"
|
||||
vm_target_backup = f"{target_path}/{domain}/{domain}.{datestring}.pvcdisks"
|
||||
if not os.path.isdir(vm_target_backup):
|
||||
try:
|
||||
os.makedirs(vm_target_backup)
|
||||
except Exception as e:
|
||||
return False, f"ERROR: Failed to create backup directory: {e}"
|
||||
|
||||
# 5. Take snapshot of each disks with the name @backup_{datestring}
|
||||
is_snapshot_create_failed = False
|
||||
which_snapshot_create_failed = list()
|
||||
msg_snapshot_create_failed = list()
|
||||
for pool, volume, _ in vm_volumes:
|
||||
retcode, retmsg = ceph.add_snapshot(zkhandler, pool, volume, snapshot_name)
|
||||
if not retcode:
|
||||
is_snapshot_create_failed = True
|
||||
which_snapshot_create_failed.append(f"{pool}/{volume}")
|
||||
msg_snapshot_create_failed.append(retmsg)
|
||||
|
||||
if is_snapshot_create_failed:
|
||||
for pool, volume, _ in vm_volumes:
|
||||
if ceph.verifySnapshot(zkhandler, pool, volume, snapshot_name):
|
||||
ceph.remove_snapshot(zkhandler, pool, volume, snapshot_name)
|
||||
return (
|
||||
False,
|
||||
f'ERROR: Failed to create snapshot for volume(s) {", ".join(which_snapshot_create_failed)}: {", ".join(msg_snapshot_create_failed)}',
|
||||
)
|
||||
|
||||
# 6. Dump snapshot to folder with `rbd export` (full) or `rbd export-diff` (incremental)
|
||||
is_snapshot_export_failed = False
|
||||
which_snapshot_export_failed = list()
|
||||
msg_snapshot_export_failed = list()
|
||||
for pool, volume, _ in vm_volumes:
|
||||
if incremental_parent is not None:
|
||||
incremental_parent_snapshot_name = f"backup_{incremental_parent}"
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
f"rbd export-diff --from-snap {incremental_parent_snapshot_name} {pool}/{volume}@{snapshot_name} {vm_target_backup}/{pool}.{volume}.{export_fileext}"
|
||||
)
|
||||
if retcode:
|
||||
is_snapshot_export_failed = True
|
||||
which_snapshot_export_failed.append(f"{pool}/{volume}")
|
||||
msg_snapshot_export_failed.append(stderr)
|
||||
else:
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
f"rbd export --export-format 2 {pool}/{volume}@{snapshot_name} {vm_target_backup}/{pool}.{volume}.{export_fileext}"
|
||||
)
|
||||
if retcode:
|
||||
is_snapshot_export_failed = True
|
||||
which_snapshot_export_failed.append(f"{pool}/{volume}")
|
||||
msg_snapshot_export_failed.append(stderr)
|
||||
|
||||
if is_snapshot_export_failed:
|
||||
for pool, volume, _ in vm_volumes:
|
||||
if ceph.verifySnapshot(zkhandler, pool, volume, snapshot_name):
|
||||
ceph.remove_snapshot(zkhandler, pool, volume, snapshot_name)
|
||||
return (
|
||||
False,
|
||||
f'ERROR: Failed to export snapshot for volume(s) {", ".join(which_snapshot_export_failed)}: {", ".join(msg_snapshot_export_failed)}',
|
||||
)
|
||||
|
||||
# 7. Create and dump VM backup information
|
||||
backup_type = "incremental" if incremental_parent is not None else "full"
|
||||
vm_backup = {
|
||||
"type": backup_type,
|
||||
"datestring": datestring,
|
||||
"incremental_parent": incremental_parent,
|
||||
"vm_detail": vm_detail,
|
||||
"backup_files": [(f"{domain}.{datestring}.pvcdisks/{p}.{v}.{export_fileext}", s) for p, v, s in vm_volumes],
|
||||
}
|
||||
with open(f"{vm_target_root}/{domain}.{datestring}.pvcbackup", "w") as fh:
|
||||
jdump(vm_backup, fh)
|
||||
|
||||
# 8. Remove snapshots if retain_snapshot is False
|
||||
is_snapshot_remove_failed = False
|
||||
which_snapshot_remove_failed = list()
|
||||
msg_snapshot_remove_failed = list()
|
||||
if not retain_snapshots:
|
||||
for pool, volume, _ in vm_volumes:
|
||||
if ceph.verifySnapshot(zkhandler, pool, volume, snapshot_name):
|
||||
retcode, retmsg = ceph.remove_snapshot(
|
||||
zkhandler, pool, volume, snapshot_name
|
||||
)
|
||||
if not retcode:
|
||||
is_snapshot_remove_failed = True
|
||||
which_snapshot_remove_failed.append(f"{pool}/{volume}")
|
||||
msg_snapshot_remove_failed.append(retmsg)
|
||||
|
||||
tend = time.time()
|
||||
ttot = round(tend - tstart, 2)
|
||||
retlines = list()
|
||||
|
||||
if is_snapshot_remove_failed:
|
||||
retlines.append(f"WARNING: Failed to remove snapshot as requested for volume(s) {', '.join(which_snapshot_remove_failed)}: {', '.join(msg_snapshot_remove_failed)}")
|
||||
|
||||
myhostname = gethostname().split(".")[0]
|
||||
if retain_snapshots:
|
||||
retlines.append(f"Successfully backed up VM '{domain}' ({backup_type}@{datestring}, snapshots retained) to '{myhostname}:{target_path}' in {ttot}s.")
|
||||
else:
|
||||
retlines.append(f"Successfully backed up VM '{domain}' ({backup_type}@{datestring}) to '{myhostname}:{target_path}' in {ttot}s.")
|
||||
|
||||
return True, '\n'.join(retlines)
|
||||
|
||||
|
||||
def restore_vm(zkhandler, domain, source_path, datestring):
|
||||
|
||||
tstart = time.time()
|
||||
|
||||
# 0. Validations
|
||||
# Validate that VM does not exist in cluster
|
||||
dom_uuid = getDomainUUID(zkhandler, domain)
|
||||
if dom_uuid:
|
||||
return (
|
||||
False,
|
||||
f'ERROR: VM "{domain}" already exists in the cluster! Remove or rename it before restoring a backup.',
|
||||
)
|
||||
|
||||
# Validate that the target path exists
|
||||
if not re.match(r"^/", source_path):
|
||||
return (
|
||||
False,
|
||||
f"ERROR: Source path {source_path} is not a valid absolute path on the primary coordinator!",
|
||||
)
|
||||
|
||||
# Ensure that source_path (on this node) exists
|
||||
if not os.path.isdir(source_path):
|
||||
return False, f"ERROR: Source path {source_path} does not exist!"
|
||||
|
||||
# Ensure that domain path (on this node) exists
|
||||
backup_source_path = f"{source_path}/{domain}"
|
||||
if not os.path.isdir(backup_source_path):
|
||||
return False, f"ERROR: Source VM path {backup_source_path} does not exist!"
|
||||
|
||||
# Ensure that the archives are present
|
||||
backup_source_pvcbackup_file = f"{backup_source_path}/{domain}.{datestring}.pvcbackup"
|
||||
if not os.path.isfile(backup_source_pvcbackup_file):
|
||||
return False, "ERROR: The specified source backup files do not exist!"
|
||||
|
||||
# 1. Read the backup file and get VM details
|
||||
try:
|
||||
with open(backup_source_pvcbackup_file) as fh:
|
||||
backup_source_details = jload(fh)
|
||||
except Exception as e:
|
||||
return False, f"ERROR: Failed to read source backup details: {e}"
|
||||
|
||||
# Handle incrementals
|
||||
incremental_parent = backup_source_details.get("incremental_parent", None)
|
||||
if incremental_parent is not None:
|
||||
backup_source_parent_pvcbackup_file = (
|
||||
f"{backup_source_path}/{domain}.{incremental_parent}.pvcbackup"
|
||||
)
|
||||
if not os.path.isfile(backup_source_parent_pvcbackup_file):
|
||||
return (
|
||||
False,
|
||||
"ERROR: The specified backup is incremental but the required incremental parent source backup files do not exist!",
|
||||
)
|
||||
|
||||
try:
|
||||
with open(backup_source_parent_pvcbackup_file) as fh:
|
||||
backup_source_parent_details = jload(fh)
|
||||
except Exception as e:
|
||||
return False, f"ERROR: Failed to read source incremental parent backup details: {e}"
|
||||
|
||||
# 2. Import VM config and metadata in provision state
|
||||
try:
|
||||
retcode, retmsg = define_vm(
|
||||
zkhandler,
|
||||
backup_source_details["vm_detail"]["xml"],
|
||||
backup_source_details["vm_detail"]["node"],
|
||||
backup_source_details["vm_detail"]["node_limit"],
|
||||
backup_source_details["vm_detail"]["node_selector"],
|
||||
backup_source_details["vm_detail"]["node_autostart"],
|
||||
backup_source_details["vm_detail"]["migration_method"],
|
||||
backup_source_details["vm_detail"]["profile"],
|
||||
backup_source_details["vm_detail"]["tags"],
|
||||
"restore",
|
||||
)
|
||||
if not retcode:
|
||||
return False, f"ERROR: Failed to define restored VM: {retmsg}"
|
||||
except Exception as e:
|
||||
return False, f"ERROR: Failed to parse VM backup details: {e}"
|
||||
|
||||
# 4. Import volumes
|
||||
is_snapshot_remove_failed = False
|
||||
which_snapshot_remove_failed = list()
|
||||
msg_snapshot_remove_failed = list()
|
||||
if incremental_parent is not None:
|
||||
for volume_file, volume_size in backup_source_details.get('backup_files'):
|
||||
pool, volume, _ = volume_file.split('/')[-1].split('.')
|
||||
try:
|
||||
parent_volume_file = [f[0] for f in backup_source_parent_details.get('backup_files') if f[0].split('/')[-1].replace('.rbdimg', '') == volume_file.split('/')[-1].replace('.rbddiff', '')][0]
|
||||
except Exception as e:
|
||||
return False, f"ERROR: Failed to find parent volume for volume {pool}/{volume}; backup may be corrupt or invalid: {e}"
|
||||
|
||||
# First we create the expected volumes then clean them up
|
||||
# This process is a bit of a hack because rbd import does not expect an existing volume,
|
||||
# but we need the information in PVC.
|
||||
# Thus create the RBD volume using ceph.add_volume based on the backup size, and then
|
||||
# manually remove the RBD volume (leaving the PVC metainfo)
|
||||
retcode, retmsg = ceph.add_volume(zkhandler, pool, volume, volume_size)
|
||||
if not retcode:
|
||||
return False, f"ERROR: Failed to create restored volume: {retmsg}"
|
||||
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
f"rbd remove {pool}/{volume}"
|
||||
)
|
||||
if retcode:
|
||||
return False, f"ERROR: Failed to remove temporary RBD volume '{pool}/{volume}': {stderr}"
|
||||
|
||||
# Next we import the parent images
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
f"rbd import --export-format 2 --dest-pool {pool} {source_path}/{domain}/{parent_volume_file} {volume}"
|
||||
)
|
||||
if retcode:
|
||||
return False, f"ERROR: Failed to import parent backup image {parent_volume_file}: {stderr}"
|
||||
|
||||
# Then we import the incremental diffs
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
f"rbd import-diff {source_path}/{domain}/{volume_file} {pool}/{volume}"
|
||||
)
|
||||
if retcode:
|
||||
return False, f"ERROR: Failed to import incremental backup image {volume_file}: {stderr}"
|
||||
|
||||
# Finally we remove the parent and child snapshots (no longer required required)
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
f"rbd snap rm {pool}/{volume}@backup_{incremental_parent}"
|
||||
)
|
||||
if retcode:
|
||||
return False, f"ERROR: Failed to remove imported image snapshot for {parent_volume_file}: {stderr}"
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
f"rbd snap rm {pool}/{volume}@backup_{datestring}"
|
||||
)
|
||||
if retcode:
|
||||
return False, f"ERROR: Failed to remove imported image snapshot for {volume_file}: {stderr}"
|
||||
|
||||
else:
|
||||
for volume_file, volume_size in backup_source_details.get('backup_files'):
|
||||
pool, volume, _ = volume_file.split('/')[-1].split('.')
|
||||
|
||||
# First we create the expected volumes then clean them up
|
||||
# This process is a bit of a hack because rbd import does not expect an existing volume,
|
||||
# but we need the information in PVC.
|
||||
# Thus create the RBD volume using ceph.add_volume based on the backup size, and then
|
||||
# manually remove the RBD volume (leaving the PVC metainfo)
|
||||
retcode, retmsg = ceph.add_volume(zkhandler, pool, volume, volume_size)
|
||||
if not retcode:
|
||||
return False, f"ERROR: Failed to create restored volume: {retmsg}"
|
||||
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
f"rbd remove {pool}/{volume}"
|
||||
)
|
||||
if retcode:
|
||||
return False, f"ERROR: Failed to remove temporary RBD volume '{pool}/{volume}': {stderr}"
|
||||
|
||||
# Then we perform the actual import
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
f"rbd import --export-format 2 --dest-pool {pool} {source_path}/{domain}/{volume_file} {volume}"
|
||||
)
|
||||
if retcode:
|
||||
return False, f"ERROR: Failed to import backup image {volume_file}: {stderr}"
|
||||
|
||||
# Finally we remove the source snapshot (not required)
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
f"rbd snap rm {pool}/{volume}@backup_{datestring}"
|
||||
)
|
||||
if retcode:
|
||||
return False, f"ERROR: Failed to remove imported image snapshot for {volume_file}: {stderr}"
|
||||
|
||||
# 5. Start VM
|
||||
retcode, retmsg = start_vm(zkhandler, domain)
|
||||
if not retcode:
|
||||
return False, f"ERROR: Failed to start restored VM {domain}: {retmsg}"
|
||||
|
||||
tend = time.time()
|
||||
ttot = round(tend - tstart, 2)
|
||||
retlines = list()
|
||||
|
||||
if is_snapshot_remove_failed:
|
||||
retlines.append(f"WARNING: Failed to remove parent snapshot as requested for volume(s) {', '.join(which_snapshot_remove_failed)}: {', '.join(msg_snapshot_remove_failed)}")
|
||||
|
||||
myhostname = gethostname().split(".")[0]
|
||||
retlines.append(f"Successfully restored VM backup {datestring} for '{domain}' from '{myhostname}:{source_path}' in {ttot}s.")
|
||||
|
||||
return True, '\n'.join(retlines)
|
||||
|
@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Generate the database migration files
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Generate the Zookeeper migration files
|
||||
|
||||
|
@ -132,7 +132,7 @@ class MonitoringPluginScript(MonitoringPlugin):
|
||||
for slave_interface in slave_interfaces:
|
||||
if slave_interface[1] == 'up':
|
||||
slave_interface_up_count += 1
|
||||
if slave_interface_up_count < 2:
|
||||
if slave_interface_up_count < len(slave_interfaces):
|
||||
messages.append(f"{dev} DEGRADED with {slave_interface_up_count} active slaves")
|
||||
health_delta += 10
|
||||
else:
|
||||
|
@ -77,5 +77,5 @@ def start_system_services(logger, config):
|
||||
start_ceph_mon(logger, config)
|
||||
start_ceph_mgr(logger, config)
|
||||
|
||||
logger.out("Waiting 3 seconds for daemons to start", state="s")
|
||||
sleep(3)
|
||||
logger.out("Waiting 10 seconds for daemons to start", state="s")
|
||||
sleep(10)
|
||||
|
Reference in New Issue
Block a user