Compare commits
39 Commits
4a0680b27f
...
v0.9.100
Author | SHA1 | Date | |
---|---|---|---|
8cb44c0c5d | |||
c55021f30c | |||
783c9e46c2 | |||
b7f33c1fcb | |||
0f578d7c7d | |||
f87b96887c | |||
02a775c99b | |||
8177d5f8b7 | |||
26d0d08873 | |||
f57b8d4a15 | |||
10de85cce3 | |||
e938140414 | |||
fd87a28eb3 | |||
4ef5fbdbe8 | |||
8fa6bed736 | |||
f7926726f2 | |||
de58efdaa9 | |||
8ca6976892 | |||
a957218976 | |||
61365e6e01 | |||
35fe16ce75 | |||
c45e488958 | |||
c1f320ede2 | |||
03db9604e1 | |||
f1668bffcc | |||
c0686fc5c7 | |||
7ecc05b413 | |||
4b37c4fea3 | |||
0d918d66fe | |||
fd199f405b | |||
f6c009beac | |||
fc89f4f2f5 | |||
565011b277 | |||
0bf9cc6b06 | |||
f2dfada73e | |||
f63c392ba6 | |||
7663ad72c5 | |||
9b3075be18 | |||
9a661d0173 |
@ -4,4 +4,4 @@ bbuilder:
|
||||
published:
|
||||
- git submodule update --init
|
||||
- /bin/bash build-stable-deb.sh
|
||||
- sudo /usr/local/bin/deploy-package -C pvc
|
||||
- sudo /usr/local/bin/deploy-package -C pvc -D bookworm
|
||||
|
28
CHANGELOG.md
28
CHANGELOG.md
@ -1,5 +1,33 @@
|
||||
## PVC Changelog
|
||||
|
||||
###### [v0.9.100](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.100)
|
||||
|
||||
* [API Daemon] Improves the handling of "detect:" disk strings on newer systems by leveraging the "nvme" command
|
||||
* [Client CLI] Update help text about "detect:" disk strings
|
||||
* [Meta] Updates deprecation warnings and updates builder to only add this version for Debian 12 (Bookworm)
|
||||
|
||||
###### [v0.9.99](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.99)
|
||||
|
||||
**Deprecation Warning**: `pvc vm backup` commands are now deprecated and will be removed in a future version. Use `pvc vm snapshot` commands instead.
|
||||
**Breaking Change**: The on-disk format of VM snapshot exports differs from backup exports, and the PVC autobackup system now leverages these. It is recommended to start fresh with a new tree of backups for `pvc autobackup` for maximum compatibility.
|
||||
**Breaking Change**: VM autobackups now run in `pvcworkerd` instead of the CLI client directly, allowing them to be triggerd from any node (or externally). It is important to apply the timer unit changes from the `pvc-ansible` role after upgrading to 0.9.99 to avoid duplicate runs.
|
||||
**Usage Note**: VM snapshots are displayed in the `pvc vm list` and `pvc vm info` outputs, not in a unique "list" endpoint.
|
||||
|
||||
* [API Daemon] Adds a proper error when an invalid provisioner profile is specified
|
||||
* [Node Daemon] Sorts Ceph pools properly in node keepalive to avoid incorrect ordering
|
||||
* [Health Daemon] Improves handling of IPMI checks by adding multiple tries but a shorter timeout
|
||||
* [API Daemon] Improves handling of XML parsing errors in VM configurations
|
||||
* [ALL] Adds support for whole VM snapshots, including configuration XML details, and direct rollback to snapshots
|
||||
* [ALL] Adds support for exporting and importing whole VM snapshots
|
||||
* [Client CLI] Removes vCPU topology from short VM info output
|
||||
* [Client CLI] Improves output format of VM info output
|
||||
* [API Daemon] Adds an endpoint to get the current primary node
|
||||
* [Client CLI] Fixes a bug where API requests were made 3 times
|
||||
* [Other] Improves the build-and-deploy.sh script
|
||||
* [API Daemon] Improves the "vm rename" command to avoid redefining VM, preserving history etc.
|
||||
* [API Daemon] Adds an indication when a task is run on the primary node
|
||||
* [API Daemon] Fixes a bug where the ZK schema relative path didn't work sometimes
|
||||
|
||||
###### [v0.9.98](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.98)
|
||||
|
||||
* [CLI Client] Fixed output when API call times out
|
||||
|
@ -27,7 +27,7 @@ from distutils.util import strtobool as dustrtobool
|
||||
import daemon_lib.config as cfg
|
||||
|
||||
# Daemon version
|
||||
version = "0.9.98"
|
||||
version = "0.9.100"
|
||||
|
||||
# API version
|
||||
API_VERSION = 1.0
|
||||
|
@ -3194,7 +3194,23 @@ class API_VM_Snapshot(Resource):
|
||||
id: Message
|
||||
"""
|
||||
snapshot_name = reqargs.get("snapshot_name", None)
|
||||
return api_helper.create_vm_snapshot(vm, snapshot_name=snapshot_name)
|
||||
|
||||
task = run_celery_task(
|
||||
"vm.create_snapshot",
|
||||
domain=vm,
|
||||
snapshot_name=snapshot_name,
|
||||
run_on="primary",
|
||||
)
|
||||
|
||||
return (
|
||||
{
|
||||
"task_id": task.id,
|
||||
"task_name": "vm.create_snapshot",
|
||||
"run_on": f"{get_primary_node()} (primary)",
|
||||
},
|
||||
202,
|
||||
{"Location": Api.url_for(api, API_Tasks_Element, task_id=task.id)},
|
||||
)
|
||||
|
||||
@RequestParser(
|
||||
[
|
||||
@ -3236,7 +3252,23 @@ class API_VM_Snapshot(Resource):
|
||||
id: Message
|
||||
"""
|
||||
snapshot_name = reqargs.get("snapshot_name", None)
|
||||
return api_helper.remove_vm_snapshot(vm, snapshot_name)
|
||||
|
||||
task = run_celery_task(
|
||||
"vm.remove_snapshot",
|
||||
domain=vm,
|
||||
snapshot_name=snapshot_name,
|
||||
run_on="primary",
|
||||
)
|
||||
|
||||
return (
|
||||
{
|
||||
"task_id": task.id,
|
||||
"task_name": "vm.remove_snapshot",
|
||||
"run_on": f"{get_primary_node()} (primary)",
|
||||
},
|
||||
202,
|
||||
{"Location": Api.url_for(api, API_Tasks_Element, task_id=task.id)},
|
||||
)
|
||||
|
||||
|
||||
api.add_resource(API_VM_Snapshot, "/vm/<vm>/snapshot")
|
||||
@ -3284,7 +3316,23 @@ class API_VM_Snapshot_Rollback(Resource):
|
||||
id: Message
|
||||
"""
|
||||
snapshot_name = reqargs.get("snapshot_name", None)
|
||||
return api_helper.rollback_vm_snapshot(vm, snapshot_name)
|
||||
|
||||
task = run_celery_task(
|
||||
"vm.rollback_snapshot",
|
||||
domain=vm,
|
||||
snapshot_name=snapshot_name,
|
||||
run_on="primary",
|
||||
)
|
||||
|
||||
return (
|
||||
{
|
||||
"task_id": task.id,
|
||||
"task_name": "vm.rollback_snapshot",
|
||||
"run_on": f"{get_primary_node()} (primary)",
|
||||
},
|
||||
202,
|
||||
{"Location": Api.url_for(api, API_Tasks_Element, task_id=task.id)},
|
||||
)
|
||||
|
||||
|
||||
api.add_resource(API_VM_Snapshot_Rollback, "/vm/<vm>/snapshot/rollback")
|
||||
@ -3354,8 +3402,24 @@ class API_VM_Snapshot_Export(Resource):
|
||||
snapshot_name = reqargs.get("snapshot_name", None)
|
||||
export_path = reqargs.get("export_path", None)
|
||||
incremental_parent = reqargs.get("incremental_parent", None)
|
||||
return api_helper.export_vm_snapshot(
|
||||
vm, snapshot_name, export_path, incremental_parent
|
||||
|
||||
task = run_celery_task(
|
||||
"vm.export_snapshot",
|
||||
domain=vm,
|
||||
snapshot_name=snapshot_name,
|
||||
export_path=export_path,
|
||||
incremental_parent=incremental_parent,
|
||||
run_on="primary",
|
||||
)
|
||||
|
||||
return (
|
||||
{
|
||||
"task_id": task.id,
|
||||
"task_name": "vm.export_snapshot",
|
||||
"run_on": f"{get_primary_node()} (primary)",
|
||||
},
|
||||
202,
|
||||
{"Location": Api.url_for(api, API_Tasks_Element, task_id=task.id)},
|
||||
)
|
||||
|
||||
|
||||
@ -3427,14 +3491,98 @@ class API_VM_Snapshot_Import(Resource):
|
||||
snapshot_name = reqargs.get("snapshot_name", None)
|
||||
import_path = reqargs.get("import_path", None)
|
||||
retain_snapshot = bool(strtobool(reqargs.get("retain_snapshot", "True")))
|
||||
return api_helper.import_vm_snapshot(
|
||||
vm, snapshot_name, import_path, retain_snapshot
|
||||
|
||||
task = run_celery_task(
|
||||
"vm.import_snapshot",
|
||||
domain=vm,
|
||||
snapshot_name=snapshot_name,
|
||||
import_path=import_path,
|
||||
retain_snapshot=retain_snapshot,
|
||||
run_on="primary",
|
||||
)
|
||||
|
||||
return (
|
||||
{
|
||||
"task_id": task.id,
|
||||
"task_name": "vm.import_snapshot",
|
||||
"run_on": f"{get_primary_node()} (primary)",
|
||||
},
|
||||
202,
|
||||
{"Location": Api.url_for(api, API_Tasks_Element, task_id=task.id)},
|
||||
)
|
||||
|
||||
|
||||
api.add_resource(API_VM_Snapshot_Import, "/vm/<vm>/snapshot/import")
|
||||
|
||||
|
||||
# /vm/autobackup
|
||||
class API_VM_Autobackup_Root(Resource):
|
||||
@RequestParser(
|
||||
[
|
||||
{"name": "force_full"},
|
||||
{"name": "email_recipients"},
|
||||
]
|
||||
)
|
||||
@Authenticator
|
||||
def post(self, reqargs):
|
||||
"""
|
||||
Trigger a cluster autobackup job
|
||||
---
|
||||
tags:
|
||||
- provisioner
|
||||
parameters:
|
||||
- in: query
|
||||
name: force_full
|
||||
type: boolean
|
||||
required: false
|
||||
description: If set and true, triggers a full autobackup regardless of schedule
|
||||
- in: query
|
||||
name: email_recipients
|
||||
type: array
|
||||
description: A list of email addresses to send failure and report emails to
|
||||
items:
|
||||
type: string
|
||||
example: "user@domain.tld"
|
||||
responses:
|
||||
200:
|
||||
description: OK
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
task_id:
|
||||
type: string
|
||||
description: Task ID for the provisioner Celery worker
|
||||
400:
|
||||
description: Bad request
|
||||
schema:
|
||||
type: object
|
||||
id: Message
|
||||
"""
|
||||
|
||||
email_recipients = reqargs.get("email_recipients", None)
|
||||
if email_recipients is not None and not isinstance(email_recipients, list):
|
||||
email_recipients = [email_recipients]
|
||||
|
||||
task = run_celery_task(
|
||||
"cluster.autobackup",
|
||||
force_full=bool(strtobool(reqargs.get("force_full", "false"))),
|
||||
email_recipients=email_recipients,
|
||||
run_on="primary",
|
||||
)
|
||||
return (
|
||||
{
|
||||
"task_id": task.id,
|
||||
"task_name": "cluster.autobackup",
|
||||
"run_on": f"{get_primary_node()} (primary)",
|
||||
},
|
||||
202,
|
||||
{"Location": Api.url_for(api, API_Tasks_Element, task_id=task.id)},
|
||||
)
|
||||
|
||||
|
||||
api.add_resource(API_VM_Autobackup_Root, "/vm/autobackup")
|
||||
|
||||
|
||||
##########################################################
|
||||
# Client API - Network
|
||||
##########################################################
|
||||
@ -4990,7 +5138,7 @@ class API_Storage_Ceph_Benchmark(Resource):
|
||||
{
|
||||
"task_id": task.id,
|
||||
"task_name": "storage.benchmark",
|
||||
"run_on": get_primary_node(),
|
||||
"run_on": f"{get_primary_node()} (primary)",
|
||||
},
|
||||
202,
|
||||
{"Location": Api.url_for(api, API_Tasks_Element, task_id=task.id)},
|
||||
@ -9246,7 +9394,7 @@ class API_Provisioner_Create_Root(Resource):
|
||||
{
|
||||
"task_id": task.id,
|
||||
"task_name": "provisioner.create",
|
||||
"run_on": get_primary_node(),
|
||||
"run_on": f"{get_primary_node()} (primary)",
|
||||
},
|
||||
202,
|
||||
{"Location": Api.url_for(api, API_Tasks_Element, task_id=task.id)},
|
||||
|
@ -13,6 +13,7 @@ else
|
||||
fi
|
||||
|
||||
KEEP_ARTIFACTS=""
|
||||
API_ONLY=""
|
||||
PRIMARY_NODE=""
|
||||
if [[ -n ${1} ]]; then
|
||||
for arg in ${@}; do
|
||||
@ -21,6 +22,10 @@ if [[ -n ${1} ]]; then
|
||||
KEEP_ARTIFACTS="y"
|
||||
shift
|
||||
;;
|
||||
-a|--api-only)
|
||||
API_ONLY="y"
|
||||
shift
|
||||
;;
|
||||
-p=*|--become-primary=*)
|
||||
PRIMARY_NODE=$( awk -F'=' '{ print $NF }' <<<"${arg}" )
|
||||
shift
|
||||
@ -75,6 +80,7 @@ for HOST in ${HOSTS[@]}; do
|
||||
ssh $HOST $SUDO systemctl restart pvcapid &>/dev/null
|
||||
sleep 2
|
||||
ssh $HOST $SUDO systemctl restart pvcworkerd &>/dev/null
|
||||
if [[ -z ${API_ONLY} ]]; then
|
||||
sleep 2
|
||||
ssh $HOST $SUDO systemctl restart pvchealthd &>/dev/null
|
||||
sleep 2
|
||||
@ -85,10 +91,13 @@ for HOST in ${HOSTS[@]}; do
|
||||
sleep 5
|
||||
echo -n "."
|
||||
done
|
||||
fi
|
||||
echo " done."
|
||||
if [[ -n ${PRIMARY_NODE} && ${PRIMARY_NODE} == ${HOST} ]]; then
|
||||
echo -n ">>> "
|
||||
ssh $HOST pvc -q node primary
|
||||
echo -n ">>> Setting node $HOST to primary coordinator state... "
|
||||
ssh $HOST pvc -q node primary --wait &>/dev/null
|
||||
ssh $HOST $SUDO systemctl restart pvcworkerd &>/dev/null
|
||||
echo "done."
|
||||
fi
|
||||
done
|
||||
|
||||
|
@ -724,6 +724,33 @@ def cli_node():
|
||||
pass
|
||||
|
||||
|
||||
###############################################################################
|
||||
# > pvc node is-primary
|
||||
###############################################################################
|
||||
@click.command(
|
||||
name="is-primary",
|
||||
short_help="Check if this node is primary coordinator.",
|
||||
)
|
||||
@connection_req
|
||||
@click.argument("node", default=DEFAULT_NODE_HOSTNAME)
|
||||
def cli_node_is_primary(
|
||||
node,
|
||||
):
|
||||
"""
|
||||
Check if NODE (or this node if unset) is the current primary coordinator.
|
||||
|
||||
Designed for scripting; returns no visible data, but the return code is 0 if the node
|
||||
is primary, and 1 if it is not.
|
||||
"""
|
||||
|
||||
_, primary_node = pvc.lib.cluster.get_primary_node(CLI_CONFIG)
|
||||
|
||||
if primary_node == node:
|
||||
exit(0)
|
||||
else:
|
||||
exit(1)
|
||||
|
||||
|
||||
###############################################################################
|
||||
# > pvc node primary
|
||||
###############################################################################
|
||||
@ -1749,7 +1776,7 @@ def cli_vm_unmigrate(domain, wait, force_live):
|
||||
is_flag=True,
|
||||
default=True,
|
||||
show_default=True,
|
||||
help="Wait or don't wait for task to complete, showing progress",
|
||||
help="Wait or don't wait for task to complete, showing progress if waiting",
|
||||
)
|
||||
def cli_vm_flush_locks(domain, wait_flag):
|
||||
"""
|
||||
@ -1758,7 +1785,7 @@ def cli_vm_flush_locks(domain, wait_flag):
|
||||
NOTE: This is a task-based command. The "--wait" flag (default) will block and show progress. Specifying the "--no-wait" flag will return immediately with a job ID instead, which can be queried externally later.
|
||||
"""
|
||||
|
||||
retcode, retmsg = pvc.lib.vm.vm_locks(CLI_CONFIG, domain, wait_flag)
|
||||
retcode, retmsg = pvc.lib.vm.vm_locks(CLI_CONFIG, domain, wait_flag=wait_flag)
|
||||
|
||||
if retcode and wait_flag:
|
||||
retmsg = wait_for_celery_task(CLI_CONFIG, retmsg)
|
||||
@ -1787,7 +1814,15 @@ def cli_vm_snapshot():
|
||||
@connection_req
|
||||
@click.argument("domain")
|
||||
@click.argument("snapshot_name", required=False, default=None)
|
||||
def cli_vm_snapshot_create(domain, snapshot_name):
|
||||
@click.option(
|
||||
"--wait/--no-wait",
|
||||
"wait_flag",
|
||||
is_flag=True,
|
||||
default=True,
|
||||
show_default=True,
|
||||
help="Wait or don't wait for task to complete, showing progress if waiting",
|
||||
)
|
||||
def cli_vm_snapshot_create(domain, snapshot_name, wait_flag):
|
||||
"""
|
||||
Create a snapshot of the disks and XML configuration of virtual machine DOMAIN, with the
|
||||
optional name SNAPSHOT_NAME. DOMAIN may be a UUID or name.
|
||||
@ -1797,18 +1832,12 @@ def cli_vm_snapshot_create(domain, snapshot_name):
|
||||
VM at the moment of the snapshot.
|
||||
"""
|
||||
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"Taking snapshot of VM '{domain}'... ",
|
||||
newline=False,
|
||||
)
|
||||
retcode, retmsg = pvc.lib.vm.vm_create_snapshot(
|
||||
CLI_CONFIG, domain, snapshot_name=snapshot_name
|
||||
CLI_CONFIG, domain, snapshot_name=snapshot_name, wait_flag=wait_flag
|
||||
)
|
||||
if retcode:
|
||||
echo(CLI_CONFIG, "done.")
|
||||
else:
|
||||
echo(CLI_CONFIG, "failed.")
|
||||
|
||||
if retcode and wait_flag:
|
||||
retmsg = wait_for_celery_task(CLI_CONFIG, retmsg)
|
||||
finish(retcode, retmsg)
|
||||
|
||||
|
||||
@ -1819,23 +1848,27 @@ def cli_vm_snapshot_create(domain, snapshot_name):
|
||||
@connection_req
|
||||
@click.argument("domain")
|
||||
@click.argument("snapshot_name")
|
||||
@click.option(
|
||||
"--wait/--no-wait",
|
||||
"wait_flag",
|
||||
is_flag=True,
|
||||
default=True,
|
||||
show_default=True,
|
||||
help="Wait or don't wait for task to complete, showing progress if waiting",
|
||||
)
|
||||
@confirm_opt("Remove shapshot {snapshot_name} of VM {domain}")
|
||||
def cli_vm_snapshot_remove(domain, snapshot_name):
|
||||
def cli_vm_snapshot_remove(domain, snapshot_name, wait_flag):
|
||||
"""
|
||||
Remove the snapshot SNAPSHOT_NAME of the disks and XML configuration of virtual machine DOMAIN,
|
||||
DOMAIN may be a UUID or name.
|
||||
"""
|
||||
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"Removing snapshot '{snapshot_name}' of VM '{domain}'... ",
|
||||
newline=False,
|
||||
retcode, retmsg = pvc.lib.vm.vm_remove_snapshot(
|
||||
CLI_CONFIG, domain, snapshot_name, wait_flag=wait_flag
|
||||
)
|
||||
retcode, retmsg = pvc.lib.vm.vm_remove_snapshot(CLI_CONFIG, domain, snapshot_name)
|
||||
if retcode:
|
||||
echo(CLI_CONFIG, "done.")
|
||||
else:
|
||||
echo(CLI_CONFIG, "failed.")
|
||||
|
||||
if retcode and wait_flag:
|
||||
retmsg = wait_for_celery_task(CLI_CONFIG, retmsg)
|
||||
finish(retcode, retmsg)
|
||||
|
||||
|
||||
@ -1848,25 +1881,29 @@ def cli_vm_snapshot_remove(domain, snapshot_name):
|
||||
@connection_req
|
||||
@click.argument("domain")
|
||||
@click.argument("snapshot_name")
|
||||
@click.option(
|
||||
"--wait/--no-wait",
|
||||
"wait_flag",
|
||||
is_flag=True,
|
||||
default=True,
|
||||
show_default=True,
|
||||
help="Wait or don't wait for task to complete, showing progress if waiting",
|
||||
)
|
||||
@confirm_opt(
|
||||
"Roll back to snapshot {snapshot_name} of {domain} and lose all data and changes since this snapshot"
|
||||
)
|
||||
def cli_vm_snapshot_rollback(domain, snapshot_name):
|
||||
def cli_vm_snapshot_rollback(domain, snapshot_name, wait_flag):
|
||||
"""
|
||||
Roll back to the snapshot SNAPSHOT_NAME of the disks and XML configuration of virtual machine DOMAIN,
|
||||
DOMAIN may be a UUID or name.
|
||||
"""
|
||||
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"Rolling back to snapshot '{snapshot_name}' of VM '{domain}'... ",
|
||||
newline=False,
|
||||
retcode, retmsg = pvc.lib.vm.vm_rollback_snapshot(
|
||||
CLI_CONFIG, domain, snapshot_name, wait_flag=wait_flag
|
||||
)
|
||||
retcode, retmsg = pvc.lib.vm.vm_rollback_snapshot(CLI_CONFIG, domain, snapshot_name)
|
||||
if retcode:
|
||||
echo(CLI_CONFIG, "done.")
|
||||
else:
|
||||
echo(CLI_CONFIG, "failed.")
|
||||
|
||||
if retcode and wait_flag:
|
||||
retmsg = wait_for_celery_task(CLI_CONFIG, retmsg)
|
||||
finish(retcode, retmsg)
|
||||
|
||||
|
||||
@ -1887,7 +1924,17 @@ def cli_vm_snapshot_rollback(domain, snapshot_name):
|
||||
default=None,
|
||||
help="Perform an incremental volume export from this parent snapshot.",
|
||||
)
|
||||
def cli_vm_snapshot_export(domain, snapshot_name, export_path, incremental_parent):
|
||||
@click.option(
|
||||
"--wait/--no-wait",
|
||||
"wait_flag",
|
||||
is_flag=True,
|
||||
default=True,
|
||||
show_default=True,
|
||||
help="Wait or don't wait for task to complete, showing progress if waiting",
|
||||
)
|
||||
def cli_vm_snapshot_export(
|
||||
domain, snapshot_name, export_path, incremental_parent, wait_flag
|
||||
):
|
||||
"""
|
||||
Export the (existing) snapshot SNAPSHOT_NAME of virtual machine DOMAIN to the absolute path EXPORT_PATH on the current PVC primary coordinator.
|
||||
DOMAIN may be a UUID or name.
|
||||
@ -1901,19 +1948,17 @@ def cli_vm_snapshot_export(domain, snapshot_name, export_path, incremental_paren
|
||||
Full export volume images are sparse-allocated, however it is recommended for safety to consider their maximum allocated size when allocated space for the EXPORT_PATH. Incremental volume images are generally small but are dependent entirely on the rate of data change in each volume.
|
||||
"""
|
||||
|
||||
_, primary_node = pvc.lib.cluster.get_primary_node(CLI_CONFIG)
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f'Exporting snapshot "{snapshot_name}" of VM "{domain}" to "{primary_node}:{export_path}"... ',
|
||||
newline=False,
|
||||
)
|
||||
retcode, retmsg = pvc.lib.vm.vm_export_snapshot(
|
||||
CLI_CONFIG, domain, snapshot_name, export_path, incremental_parent
|
||||
CLI_CONFIG,
|
||||
domain,
|
||||
snapshot_name,
|
||||
export_path,
|
||||
incremental_parent=incremental_parent,
|
||||
wait_flag=wait_flag,
|
||||
)
|
||||
if retcode:
|
||||
echo(CLI_CONFIG, "done.")
|
||||
else:
|
||||
echo(CLI_CONFIG, "failed.")
|
||||
|
||||
if retcode and wait_flag:
|
||||
retmsg = wait_for_celery_task(CLI_CONFIG, retmsg)
|
||||
finish(retcode, retmsg)
|
||||
|
||||
|
||||
@ -1933,7 +1978,17 @@ def cli_vm_snapshot_export(domain, snapshot_name, export_path, incremental_paren
|
||||
default=True,
|
||||
help="Retain or remove restored (parent, if incremental) snapshot in Ceph.",
|
||||
)
|
||||
def cli_vm_snapshot_import(domain, snapshot_name, import_path, retain_snapshot):
|
||||
@click.option(
|
||||
"--wait/--no-wait",
|
||||
"wait_flag",
|
||||
is_flag=True,
|
||||
default=True,
|
||||
show_default=True,
|
||||
help="Wait or don't wait for task to complete, showing progress if waiting",
|
||||
)
|
||||
def cli_vm_snapshot_import(
|
||||
domain, snapshot_name, import_path, retain_snapshot, wait_flag
|
||||
):
|
||||
"""
|
||||
Import the snapshot SNAPSHOT_NAME of virtual machine DOMAIN from the absolute path IMPORT_PATH on the current PVC primary coordinator.
|
||||
DOMAIN may be a UUID or name.
|
||||
@ -1942,25 +1997,24 @@ def cli_vm_snapshot_import(domain, snapshot_name, import_path, retain_snapshot):
|
||||
|
||||
The import will include the VM configuration, metainfo, and the point-in-time snapshot of all attached RBD volumes. Incremental imports will be automatically handled.
|
||||
|
||||
A VM named DOMAIN or with the same UUID must not exist; if a VM with the same name or UUID already exists, it must be removed, or renamed and then undefined (to preserve volumes), before importing.
|
||||
A VM named DOMAIN or with the same UUID must not exist; if a VM with the same name or UUID already exists, it must be removed (or renamed and then undefined, to preserve volumes while freeing the UUID) before importing.
|
||||
|
||||
If the "-r"/"--retain-snapshot" option is specified (the default), for incremental imports, only the parent snapshot is kept; for full imports, the imported snapshot is kept. If the "-R"/"--remove-snapshot" option is specified, the imported snapshot is removed.
|
||||
|
||||
WARNING: The "-R"/"--remove-snapshot" option will invalidate any existing incremental snapshots based on the same incremental parent for the imported VM.
|
||||
"""
|
||||
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"Importing snapshot '{snapshot_name}' of VM '{domain}'... ",
|
||||
newline=False,
|
||||
)
|
||||
retcode, retmsg = pvc.lib.vm.vm_import_snapshot(
|
||||
CLI_CONFIG, domain, snapshot_name, import_path, retain_snapshot
|
||||
CLI_CONFIG,
|
||||
domain,
|
||||
snapshot_name,
|
||||
import_path,
|
||||
retain_snapshot=retain_snapshot,
|
||||
wait_flag=wait_flag,
|
||||
)
|
||||
if retcode:
|
||||
echo(CLI_CONFIG, "done.")
|
||||
else:
|
||||
echo(CLI_CONFIG, "failed.")
|
||||
|
||||
if retcode and wait_flag:
|
||||
retmsg = wait_for_celery_task(CLI_CONFIG, retmsg)
|
||||
finish(retcode, retmsg)
|
||||
|
||||
|
||||
@ -1974,6 +2028,8 @@ def cli_vm_snapshot_import(domain, snapshot_name, import_path, retain_snapshot):
|
||||
)
|
||||
def cli_vm_backup():
|
||||
"""
|
||||
DEPRECATED: Use 'pvc vm snapshot' commands instead. 'pvc vm backup' commands will be removed in a future version.
|
||||
|
||||
Manage backups of VMs in a PVC cluster.
|
||||
"""
|
||||
pass
|
||||
@ -2003,6 +2059,8 @@ def cli_vm_backup():
|
||||
)
|
||||
def cli_vm_backup_create(domain, backup_path, incremental_parent, retain_snapshot):
|
||||
"""
|
||||
DEPRECATED: Use 'pvc vm snapshot' commands instead. 'pvc vm backup' commands will be removed in a future version.
|
||||
|
||||
Create a backup of virtual machine DOMAIN to BACKUP_PATH on the cluster primary coordinator. DOMAIN may be a UUID or name.
|
||||
|
||||
BACKUP_PATH must be a valid absolute directory path on the cluster "primary" coordinator (see "pvc node list") allowing writes from the API daemon (normally running as "root"). The BACKUP_PATH should be a large storage volume, ideally a remotely mounted filesystem (e.g. NFS, SSHFS, etc.) or non-Ceph-backed disk; PVC does not handle this path, that is up to the administrator to configure and manage.
|
||||
@ -2049,13 +2107,15 @@ def cli_vm_backup_create(domain, backup_path, incremental_parent, retain_snapsho
|
||||
)
|
||||
def cli_vm_backup_restore(domain, backup_datestring, backup_path, retain_snapshot):
|
||||
"""
|
||||
DEPRECATED: Use 'pvc vm snapshot' commands instead. 'pvc vm backup' commands will be removed in a future version.
|
||||
|
||||
Restore the backup BACKUP_DATESTRING of virtual machine DOMAIN stored in BACKUP_PATH on the cluster primary coordinator. DOMAIN may be a UUID or name.
|
||||
|
||||
BACKUP_PATH must be a valid absolute directory path on the cluster "primary" coordinator (see "pvc node list") allowing reads from the API daemon (normally running as "root"). The BACKUP_PATH should be a large storage volume, ideally a remotely mounted filesystem (e.g. NFS, SSHFS, etc.) or non-Ceph-backed disk; PVC does not handle this path, that is up to the administrator to configure and manage.
|
||||
|
||||
The restore will import the VM configuration, metainfo, and the point-in-time snapshot of all attached RBD volumes. Incremental backups will be automatically handled.
|
||||
|
||||
A VM named DOMAIN or with the same UUID must not exist; if a VM with the same name or UUID already exists, it must be removed, or renamed and then undefined (to preserve volumes), before restoring.
|
||||
A VM named DOMAIN or with the same UUID must not exist; if a VM with the same name or UUID already exists, it must be removed (or renamed and then undefined, to preserve volumes while freeing the UUID) before importing.
|
||||
|
||||
If the "-r"/"--retain-snapshot" option is specified (the default), for incremental restores, only the parent snapshot is kept; for full restores, the restored snapshot is kept. If the "-R"/"--remove-snapshot" option is specified, the imported snapshot is removed.
|
||||
|
||||
@ -2087,6 +2147,8 @@ def cli_vm_backup_restore(domain, backup_datestring, backup_path, retain_snapsho
|
||||
@click.argument("backup_path")
|
||||
def cli_vm_backup_remove(domain, backup_datestring, backup_path):
|
||||
"""
|
||||
DEPRECATED: Use 'pvc vm snapshot' commands instead. 'pvc vm backup' commands will be removed in a future version.
|
||||
|
||||
Remove the backup BACKUP_DATESTRING, including snapshots, of virtual machine DOMAIN stored in BACKUP_PATH on the cluster primary coordinator. DOMAIN may be a UUID or name.
|
||||
|
||||
WARNING: Removing an incremental parent will invalidate any existing incremental backups based on that backup.
|
||||
@ -2114,15 +2176,6 @@ def cli_vm_backup_remove(domain, backup_datestring, backup_path):
|
||||
name="autobackup", short_help="Perform automatic virtual machine backups."
|
||||
)
|
||||
@connection_req
|
||||
@click.option(
|
||||
"-f",
|
||||
"--configuration",
|
||||
"autobackup_cfgfile",
|
||||
envvar="PVC_AUTOBACKUP_CFGFILE",
|
||||
default=DEFAULT_AUTOBACKUP_FILENAME,
|
||||
show_default=True,
|
||||
help="Override default config file location.",
|
||||
)
|
||||
@click.option(
|
||||
"--email-report",
|
||||
"email_report",
|
||||
@ -2136,39 +2189,42 @@ def cli_vm_backup_remove(domain, backup_datestring, backup_path):
|
||||
is_flag=True,
|
||||
help="Force all backups to be full backups this run.",
|
||||
)
|
||||
@click.option(
|
||||
"--wait/--no-wait",
|
||||
"wait_flag",
|
||||
is_flag=True,
|
||||
default=True,
|
||||
show_default=True,
|
||||
help="Wait or don't wait for task to complete, showing progress if waiting.",
|
||||
)
|
||||
@click.option(
|
||||
"--cron",
|
||||
"cron_flag",
|
||||
default=False,
|
||||
is_flag=True,
|
||||
help="Cron mode; don't error exit if this isn't the primary coordinator.",
|
||||
default=False,
|
||||
show_default=True,
|
||||
help="Run in cron mode (returns immediately with no output once job is submitted).",
|
||||
)
|
||||
def cli_vm_autobackup(autobackup_cfgfile, email_report, force_full_flag, cron_flag):
|
||||
def cli_vm_autobackup(email_report, force_full_flag, wait_flag, cron_flag):
|
||||
"""
|
||||
Perform automated backups of VMs, with integrated cleanup and full/incremental scheduling.
|
||||
|
||||
This command enables automatic backup of PVC VMs at the block level, leveraging the various "pvc vm backup"
|
||||
This command enables automatic backup of PVC VMs at the block level, leveraging the various "pvc vm snapshot"
|
||||
functions with an internal rentention and cleanup system as well as determination of full vs. incremental
|
||||
backups at different intervals. VMs are selected based on configured VM tags. The destination storage
|
||||
may either be local, or provided by a remote filesystem which is automatically mounted and unmounted during
|
||||
the backup run via a set of configured commands before and after the backup run.
|
||||
|
||||
NOTE: This command performs its tasks in a local context. It MUST be run from the cluster's active primary
|
||||
coordinator using the "local" connection only; if either is not correct, the command will error.
|
||||
|
||||
NOTE: This command should be run as the same user as the API daemon, usually "root" with "sudo -E" or in
|
||||
a cronjob as "root", to ensure permissions are correct on the backup files. Failure to do so will still take
|
||||
the backup, but the state update write will likely fail and the backup will become untracked. The command
|
||||
will prompt for confirmation if it is found not to be running as "root" and this cannot be bypassed.
|
||||
|
||||
This command should be run from cron or a timer at a regular interval (e.g. daily, hourly, etc.) which defines
|
||||
how often backups are taken. Backup format (full/incremental) and retention is based only on the number of
|
||||
recorded backups, not on the time interval between them. Backups taken manually outside of the "autobackup"
|
||||
recorded backups, not on the time interval between them. Exports taken manually outside of the "autobackup"
|
||||
command are not counted towards the format or retention of autobackups.
|
||||
|
||||
The PVC_AUTOBACKUP_CFGFILE envvar or "-f"/"--configuration" option can be used to override the default
|
||||
configuration file path if required by a particular run. For full details of the possible options, please
|
||||
see the example configuration file at "/usr/share/pvc/autobackup.sample.yaml".
|
||||
WARNING: Running this command manually will interfere with the schedule! Do not run manually except for testing.
|
||||
|
||||
The actual details of the autobackup, including retention policies, full-vs-incremental, pre- and post- run
|
||||
mounting/unmounting commands, etc. are defined in the main PVC configuration file `/etc/pvc/pvc.conf`. See
|
||||
the sample configuration for more details.
|
||||
|
||||
An optional report on all current backups can be emailed to one or more email addresses using the
|
||||
"--email-report" flag. This report will include information on all current known backups.
|
||||
@ -2177,11 +2233,29 @@ def cli_vm_autobackup(autobackup_cfgfile, email_report, force_full_flag, cron_fl
|
||||
which can help synchronize the backups of existing VMs with new ones.
|
||||
"""
|
||||
|
||||
# All work here is done in the helper function for portability; we don't even use "finish"
|
||||
vm_autobackup(
|
||||
CLI_CONFIG, autobackup_cfgfile, email_report, force_full_flag, cron_flag
|
||||
if cron_flag:
|
||||
wait_flag = False
|
||||
|
||||
if email_report is not None:
|
||||
email_recipients = email_report.split(",")
|
||||
else:
|
||||
email_recipients = None
|
||||
|
||||
retcode, retmsg = pvc.lib.vm.vm_autobackup(
|
||||
CLI_CONFIG,
|
||||
email_recipients=email_recipients,
|
||||
force_full_flag=force_full_flag,
|
||||
wait_flag=wait_flag,
|
||||
)
|
||||
|
||||
if retcode and wait_flag:
|
||||
retmsg = wait_for_celery_task(CLI_CONFIG, retmsg)
|
||||
|
||||
if cron_flag:
|
||||
finish(retcode, None)
|
||||
else:
|
||||
finish(retcode, retmsg)
|
||||
|
||||
|
||||
###############################################################################
|
||||
# > pvc vm tag
|
||||
@ -3687,7 +3761,7 @@ def cli_storage_benchmark():
|
||||
is_flag=True,
|
||||
default=True,
|
||||
show_default=True,
|
||||
help="Wait or don't wait for task to complete, showing progress",
|
||||
help="Wait or don't wait for task to complete, showing progress if waiting",
|
||||
)
|
||||
@confirm_opt(
|
||||
"Storage benchmarks take approximately 10 minutes to run and generate significant load on the cluster; they should be run sparingly. Continue"
|
||||
@ -3776,7 +3850,7 @@ def cli_storage_osd():
|
||||
is_flag=True,
|
||||
default=True,
|
||||
show_default=True,
|
||||
help="Wait or don't wait for task to complete, showing progress",
|
||||
help="Wait or don't wait for task to complete, showing progress if waiting",
|
||||
)
|
||||
@confirm_opt(
|
||||
"Destroy all data on and create a new OSD database volume group on node {node} device {device}"
|
||||
@ -3792,8 +3866,6 @@ def cli_storage_osd_create_db_vg(node, device, wait_flag):
|
||||
Only one OSD database volume group on a single physical device, named "osd-db", is supported per node, so it must be fast and large enough to act as an effective OSD database device for all OSDs on the node. Attempting to add additional database volume groups after the first will result in an error.
|
||||
|
||||
WARNING: If the OSD database device fails, all OSDs on the node using it will be lost and must be recreated.
|
||||
|
||||
A "detect" string is a string in the form "detect:<NAME>:<HUMAN-SIZE>:<ID>". Detect strings allow for automatic determination of Linux block device paths from known basic information about disks by leveraging "lsscsi" on the target host. The "NAME" should be some descriptive identifier, for instance the manufacturer (e.g. "INTEL"), the "HUMAN-SIZE" should be the labeled human-readable size of the device (e.g. "480GB", "1.92TB"), and "ID" specifies the Nth 0-indexed device which matches the "NAME" and "HUMAN-SIZE" values (e.g. "2" would match the third device with the corresponding "NAME" and "HUMAN-SIZE"). When matching against sizes, there is +/- 3% flexibility to account for base-1000 vs. base-1024 differences and rounding errors. The "NAME" may contain whitespace but if so the entire detect string should be quoted, and is case-insensitive. More information about detect strings can be found in the manual.
|
||||
"""
|
||||
|
||||
retcode, retmsg = pvc.lib.storage.ceph_osd_db_vg_add(
|
||||
@ -3851,7 +3923,7 @@ def cli_storage_osd_create_db_vg(node, device, wait_flag):
|
||||
is_flag=True,
|
||||
default=True,
|
||||
show_default=True,
|
||||
help="Wait or don't wait for task to complete, showing progress",
|
||||
help="Wait or don't wait for task to complete, showing progress if waiting",
|
||||
)
|
||||
@confirm_opt("Destroy all data on and create new OSD(s) on node {node} device {device}")
|
||||
def cli_storage_osd_add(
|
||||
@ -3862,7 +3934,7 @@ def cli_storage_osd_add(
|
||||
|
||||
DEVICE must be a valid block device path (e.g. '/dev/nvme0n1', '/dev/disk/by-path/...') or a "detect" string. Partitions are NOT supported. A "detect" string is a string in the form "detect:<NAME>:<HUMAN-SIZE>:<ID>". The path or detect string must be valid on the current node housing the OSD.
|
||||
|
||||
A "detect" string is a string in the form "detect:<NAME>:<HUMAN-SIZE>:<ID>". Detect strings allow for automatic determination of Linux block device paths from known basic information about disks by leveraging "lsscsi" on the target host. The "NAME" should be some descriptive identifier, for instance the manufacturer (e.g. "INTEL"), the "HUMAN-SIZE" should be the labeled human-readable size of the device (e.g. "480GB", "1.92TB"), and "ID" specifies the Nth 0-indexed device which matches the "NAME" and "HUMAN-SIZE" values (e.g. "2" would match the third device with the corresponding "NAME" and "HUMAN-SIZE"). When matching against sizes, there is +/- 3% flexibility to account for base-1000 vs. base-1024 differences and rounding errors. The "NAME" may contain whitespace but if so the entire detect string should be quoted, and is case-insensitive. More information about detect strings can be found in the pvcbootstrapd manual.
|
||||
A "detect" string is a string in the form "detect:<NAME>:<HUMAN-SIZE>:<ID>". Detect strings allow for automatic determination of Linux block device paths from known basic information about disks by leveraging "lsscsi"/"nvme" on the target host. The "NAME" should be some descriptive identifier that would be part of the device's Model information, for instance the manufacturer (e.g. "INTEL") or a similar unique string (e.g. "BOSS" for Dell BOSS cards); the "HUMAN-SIZE" should be the labeled human-readable size of the device (e.g. "480GB", "1.92TB"); and "ID" specifies the Nth 0-indexed device which matches the "NAME" and "HUMAN-SIZE" values (e.g. "2" would match the third device with the corresponding "NAME" and "HUMAN-SIZE"). When matching against sizes, there is +/- 3% flexibility to account for base-1000 vs. base-1024 differences and rounding errors. The "NAME" may contain whitespace but if so the entire detect string should be quoted, and is case-insensitive. More information about detect strings can be found in the pvcbootstrapd manual.
|
||||
|
||||
The weight of an OSD should reflect the ratio of the size of the OSD to the other OSDs in the storage cluster. For example, with a 200GB disk and a 400GB disk in each node, the 400GB disk should have twice the weight as the 200GB disk. For more information about CRUSH weights, please see the Ceph documentation.
|
||||
|
||||
@ -3934,7 +4006,7 @@ def cli_storage_osd_add(
|
||||
is_flag=True,
|
||||
default=True,
|
||||
show_default=True,
|
||||
help="Wait or don't wait for task to complete, showing progress",
|
||||
help="Wait or don't wait for task to complete, showing progress if waiting",
|
||||
)
|
||||
@confirm_opt(
|
||||
"Destroy all data on and replace OSD {osdid} (and peer split OSDs) with new device {new_device}"
|
||||
@ -3989,7 +4061,7 @@ def cli_storage_osd_replace(
|
||||
is_flag=True,
|
||||
default=True,
|
||||
show_default=True,
|
||||
help="Wait or don't wait for task to complete, showing progress",
|
||||
help="Wait or don't wait for task to complete, showing progress if waiting",
|
||||
)
|
||||
@confirm_opt("Refresh OSD {osdid} (and peer split OSDs) on device {device}")
|
||||
def cli_storage_osd_refresh(osdid, device, wait_flag):
|
||||
@ -4034,7 +4106,7 @@ def cli_storage_osd_refresh(osdid, device, wait_flag):
|
||||
is_flag=True,
|
||||
default=True,
|
||||
show_default=True,
|
||||
help="Wait or don't wait for task to complete, showing progress",
|
||||
help="Wait or don't wait for task to complete, showing progress if waiting",
|
||||
)
|
||||
@confirm_opt("Remove and destroy data on OSD {osdid}")
|
||||
def cli_storage_osd_remove(osdid, force_flag, wait_flag):
|
||||
@ -6086,7 +6158,7 @@ def cli_provisioner_profile_list(limit, format_function):
|
||||
is_flag=True,
|
||||
default=True,
|
||||
show_default=True,
|
||||
help="Wait or don't wait for task to complete, showing progress",
|
||||
help="Wait or don't wait for task to complete, showing progress if waiting",
|
||||
)
|
||||
def cli_provisioner_create(
|
||||
name, profile, define_flag, start_flag, script_args, wait_flag
|
||||
@ -6478,6 +6550,7 @@ def cli(
|
||||
# Click command tree
|
||||
###############################################################################
|
||||
|
||||
cli_node.add_command(cli_node_is_primary)
|
||||
cli_node.add_command(cli_node_primary)
|
||||
cli_node.add_command(cli_node_secondary)
|
||||
cli_node.add_command(cli_node_flush)
|
||||
|
@ -20,26 +20,16 @@
|
||||
###############################################################################
|
||||
|
||||
from click import echo as click_echo
|
||||
from click import confirm
|
||||
from datetime import datetime
|
||||
from distutils.util import strtobool
|
||||
from getpass import getuser
|
||||
from json import load as jload
|
||||
from json import dump as jdump
|
||||
from os import chmod, environ, getpid, path, popen, makedirs, get_terminal_size
|
||||
from re import findall
|
||||
from os import chmod, environ, getpid, path, get_terminal_size
|
||||
from socket import gethostname
|
||||
from subprocess import run, PIPE
|
||||
from sys import argv
|
||||
from syslog import syslog, openlog, closelog, LOG_AUTH
|
||||
from yaml import load as yload
|
||||
from yaml import SafeLoader
|
||||
|
||||
import pvc.lib.provisioner
|
||||
import pvc.lib.vm
|
||||
import pvc.lib.node
|
||||
import pvc.lib.storage
|
||||
|
||||
|
||||
DEFAULT_STORE_DATA = {"cfgfile": "/etc/pvc/pvc.conf"}
|
||||
DEFAULT_STORE_FILENAME = "pvc.json"
|
||||
@ -196,452 +186,3 @@ def update_store(store_path, store_data):
|
||||
|
||||
with open(store_file, "w") as fh:
|
||||
jdump(store_data, fh, sort_keys=True, indent=4)
|
||||
|
||||
|
||||
def get_autobackup_config(CLI_CONFIG, cfgfile):
|
||||
try:
|
||||
config = dict()
|
||||
with open(cfgfile) as fh:
|
||||
full_config = yload(fh, Loader=SafeLoader)
|
||||
backup_config = full_config["autobackup"]
|
||||
config["backup_root_path"] = backup_config["backup_root_path"]
|
||||
config["backup_root_suffix"] = backup_config["backup_root_suffix"]
|
||||
config["backup_tags"] = backup_config["backup_tags"]
|
||||
config["backup_schedule"] = backup_config["backup_schedule"]
|
||||
config["auto_mount_enabled"] = backup_config["auto_mount"]["enabled"]
|
||||
if config["auto_mount_enabled"]:
|
||||
config["mount_cmds"] = list()
|
||||
_mount_cmds = backup_config["auto_mount"]["mount_cmds"]
|
||||
for _mount_cmd in _mount_cmds:
|
||||
if "{backup_root_path}" in _mount_cmd:
|
||||
_mount_cmd = _mount_cmd.format(
|
||||
backup_root_path=backup_config["backup_root_path"]
|
||||
)
|
||||
config["mount_cmds"].append(_mount_cmd)
|
||||
|
||||
config["unmount_cmds"] = list()
|
||||
_unmount_cmds = backup_config["auto_mount"]["unmount_cmds"]
|
||||
for _unmount_cmd in _unmount_cmds:
|
||||
if "{backup_root_path}" in _unmount_cmd:
|
||||
_unmount_cmd = _unmount_cmd.format(
|
||||
backup_root_path=backup_config["backup_root_path"]
|
||||
)
|
||||
config["unmount_cmds"].append(_unmount_cmd)
|
||||
except FileNotFoundError:
|
||||
return "Backup configuration does not exist!"
|
||||
except KeyError as e:
|
||||
return f"Backup configuration is invalid: {e}"
|
||||
|
||||
return config
|
||||
|
||||
|
||||
def vm_autobackup(
|
||||
CLI_CONFIG,
|
||||
autobackup_cfgfile=DEFAULT_AUTOBACKUP_FILENAME,
|
||||
email_report=None,
|
||||
force_full_flag=False,
|
||||
cron_flag=False,
|
||||
):
|
||||
"""
|
||||
Perform automatic backups of VMs based on an external config file.
|
||||
"""
|
||||
|
||||
backup_summary = dict()
|
||||
|
||||
if email_report is not None:
|
||||
from email.utils import formatdate
|
||||
from socket import gethostname
|
||||
|
||||
try:
|
||||
with open(autobackup_cfgfile) as fh:
|
||||
tmp_config = yload(fh, Loader=SafeLoader)
|
||||
cluster = tmp_config["cluster"]["name"]
|
||||
except Exception:
|
||||
cluster = "unknown"
|
||||
|
||||
def send_execution_failure_report(error=None):
|
||||
echo(CLI_CONFIG, f"Sending email failure report to {email_report}")
|
||||
|
||||
current_datetime = datetime.now()
|
||||
email_datetime = formatdate(float(current_datetime.strftime("%s")))
|
||||
|
||||
email = list()
|
||||
email.append(f"Date: {email_datetime}")
|
||||
email.append(f"Subject: PVC Autobackup execution failure for cluster {cluster}")
|
||||
|
||||
recipients = list()
|
||||
for recipient in email_report.split(","):
|
||||
recipients.append(f"<{recipient}>")
|
||||
email.append(f"To: {', '.join(recipients)}")
|
||||
email.append(f"From: PVC Autobackup System <pvc@{gethostname()}>")
|
||||
email.append("")
|
||||
|
||||
email.append(
|
||||
f"A PVC autobackup has FAILED at {current_datetime} due to an execution error."
|
||||
)
|
||||
email.append("")
|
||||
email.append("The reported error message is:")
|
||||
email.append(f" {error}")
|
||||
|
||||
try:
|
||||
p = popen("/usr/sbin/sendmail -t", "w")
|
||||
p.write("\n".join(email))
|
||||
p.close()
|
||||
except Exception as e:
|
||||
echo(CLI_CONFIG, f"Failed to send report email: {e}")
|
||||
|
||||
# Validate that we are running on the current primary coordinator of the 'local' cluster connection
|
||||
real_connection = CLI_CONFIG["connection"]
|
||||
CLI_CONFIG["connection"] = "local"
|
||||
retcode, retdata = pvc.lib.node.node_info(CLI_CONFIG, DEFAULT_NODE_HOSTNAME)
|
||||
if not retcode or retdata.get("coordinator_state") != "primary":
|
||||
if cron_flag:
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
"Current host is not the primary coordinator of the local cluster and running in cron mode. Exiting cleanly.",
|
||||
)
|
||||
exit(0)
|
||||
else:
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"ERROR: Current host is not the primary coordinator of the local cluster; got connection '{real_connection}', host '{DEFAULT_NODE_HOSTNAME}'.",
|
||||
)
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
"Autobackup MUST be run from the cluster active primary coordinator using the 'local' connection. See '-h'/'--help' for details.",
|
||||
)
|
||||
if email_report is not None:
|
||||
send_execution_failure_report(
|
||||
error=f"Autobackup run attempted from non-local connection or non-primary coordinator; got connection '{real_connection}', host '{DEFAULT_NODE_HOSTNAME}'."
|
||||
)
|
||||
exit(1)
|
||||
|
||||
# Ensure we're running as root, or show a warning & confirmation
|
||||
if getuser() != "root":
|
||||
confirm(
|
||||
"WARNING: You are not running this command as 'root'. This command should be run under the same user as the API daemon, which is usually 'root'. Are you sure you want to continue?",
|
||||
prompt_suffix=" ",
|
||||
abort=True,
|
||||
)
|
||||
|
||||
# Load our YAML config
|
||||
autobackup_config = get_autobackup_config(CLI_CONFIG, autobackup_cfgfile)
|
||||
if not isinstance(autobackup_config, dict):
|
||||
echo(CLI_CONFIG, f"ERROR: {autobackup_config}")
|
||||
if email_report is not None:
|
||||
send_execution_failure_report(error=f"{autobackup_config}")
|
||||
exit(1)
|
||||
|
||||
# Get the start time of this run
|
||||
autobackup_start_time = datetime.now()
|
||||
|
||||
# Get a list of all VMs on the cluster
|
||||
# We don't do tag filtering here, because we could match an arbitrary number of tags; instead, we
|
||||
# parse the list after
|
||||
retcode, retdata = pvc.lib.vm.vm_list(CLI_CONFIG, None, None, None, None, None)
|
||||
if not retcode:
|
||||
echo(CLI_CONFIG, f"ERROR: Failed to fetch VM list: {retdata}")
|
||||
if email_report is not None:
|
||||
send_execution_failure_report(error=f"Failed to fetch VM list: {retdata}")
|
||||
exit(1)
|
||||
cluster_vms = retdata
|
||||
|
||||
# Parse the list to match tags; too complex for list comprehension alas
|
||||
backup_vms = list()
|
||||
for vm in cluster_vms:
|
||||
vm_tag_names = [t["name"] for t in vm["tags"]]
|
||||
matching_tags = (
|
||||
True
|
||||
if len(
|
||||
set(vm_tag_names).intersection(set(autobackup_config["backup_tags"]))
|
||||
)
|
||||
> 0
|
||||
else False
|
||||
)
|
||||
if matching_tags:
|
||||
backup_vms.append(vm["name"])
|
||||
|
||||
if len(backup_vms) < 1:
|
||||
echo(CLI_CONFIG, "Found no suitable VMs for autobackup.")
|
||||
exit(0)
|
||||
|
||||
# Pretty print the names of the VMs we'll back up (to stderr)
|
||||
maxnamelen = max([len(n) for n in backup_vms]) + 2
|
||||
cols = 1
|
||||
while (cols * maxnamelen + maxnamelen + 2) <= MAX_CONTENT_WIDTH:
|
||||
cols += 1
|
||||
rows = len(backup_vms) // cols
|
||||
vm_list_rows = list()
|
||||
for row in range(0, rows + 1):
|
||||
row_start = row * cols
|
||||
row_end = (row * cols) + cols
|
||||
row_str = ""
|
||||
for x in range(row_start, row_end):
|
||||
if x < len(backup_vms):
|
||||
row_str += "{:<{}}".format(backup_vms[x], maxnamelen)
|
||||
vm_list_rows.append(row_str)
|
||||
|
||||
echo(CLI_CONFIG, f"Found {len(backup_vms)} suitable VM(s) for autobackup.")
|
||||
echo(CLI_CONFIG, "Full VM list:", stderr=True)
|
||||
echo(CLI_CONFIG, " {}".format("\n ".join(vm_list_rows)), stderr=True)
|
||||
echo(CLI_CONFIG, "", stderr=True)
|
||||
|
||||
if autobackup_config["auto_mount_enabled"]:
|
||||
# Execute each mount_cmds command in sequence
|
||||
for cmd in autobackup_config["mount_cmds"]:
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"Executing mount command '{cmd.split()[0]}'... ",
|
||||
newline=False,
|
||||
)
|
||||
tstart = datetime.now()
|
||||
ret = run(
|
||||
cmd.split(),
|
||||
stdout=PIPE,
|
||||
stderr=PIPE,
|
||||
)
|
||||
tend = datetime.now()
|
||||
ttot = tend - tstart
|
||||
if ret.returncode != 0:
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"failed. [{ttot.seconds}s]",
|
||||
)
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"Exiting; command reports: {ret.stderr.decode().strip()}",
|
||||
)
|
||||
if email_report is not None:
|
||||
send_execution_failure_report(error=ret.stderr.decode().strip())
|
||||
exit(1)
|
||||
else:
|
||||
echo(CLI_CONFIG, f"done. [{ttot.seconds}s]")
|
||||
|
||||
# For each VM, perform the backup
|
||||
for vm in backup_vms:
|
||||
backup_suffixed_path = f"{autobackup_config['backup_root_path']}{autobackup_config['backup_root_suffix']}"
|
||||
if not path.exists(backup_suffixed_path):
|
||||
makedirs(backup_suffixed_path)
|
||||
|
||||
backup_path = f"{backup_suffixed_path}/{vm}"
|
||||
autobackup_state_file = f"{backup_path}/.autobackup.json"
|
||||
if not path.exists(backup_path) or not path.exists(autobackup_state_file):
|
||||
# There are no new backups so the list is empty
|
||||
state_data = dict()
|
||||
tracked_backups = list()
|
||||
else:
|
||||
with open(autobackup_state_file) as fh:
|
||||
state_data = jload(fh)
|
||||
tracked_backups = state_data["tracked_backups"]
|
||||
|
||||
full_interval = autobackup_config["backup_schedule"]["full_interval"]
|
||||
full_retention = autobackup_config["backup_schedule"]["full_retention"]
|
||||
|
||||
full_backups = [b for b in tracked_backups if b["type"] == "full"]
|
||||
if len(full_backups) > 0:
|
||||
last_full_backup = full_backups[0]
|
||||
last_full_backup_idx = tracked_backups.index(last_full_backup)
|
||||
if force_full_flag:
|
||||
this_backup_type = "forced-full"
|
||||
this_backup_incremental_parent = None
|
||||
this_backup_retain_snapshot = True
|
||||
elif last_full_backup_idx >= full_interval - 1:
|
||||
this_backup_type = "full"
|
||||
this_backup_incremental_parent = None
|
||||
this_backup_retain_snapshot = True
|
||||
else:
|
||||
this_backup_type = "incremental"
|
||||
this_backup_incremental_parent = last_full_backup["datestring"]
|
||||
this_backup_retain_snapshot = False
|
||||
else:
|
||||
# The very first backup must be full to start the tree
|
||||
this_backup_type = "full"
|
||||
this_backup_incremental_parent = None
|
||||
this_backup_retain_snapshot = True
|
||||
|
||||
# Perform the backup
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"Backing up VM '{vm}' ({this_backup_type})... ",
|
||||
newline=False,
|
||||
)
|
||||
tstart = datetime.now()
|
||||
retcode, retdata = pvc.lib.vm.vm_backup(
|
||||
CLI_CONFIG,
|
||||
vm,
|
||||
backup_suffixed_path,
|
||||
incremental_parent=this_backup_incremental_parent,
|
||||
retain_snapshot=this_backup_retain_snapshot,
|
||||
)
|
||||
tend = datetime.now()
|
||||
ttot = tend - tstart
|
||||
if not retcode:
|
||||
backup_datestring = findall(r"[0-9]{14}", retdata)[0]
|
||||
echo(CLI_CONFIG, f"failed. [{ttot.seconds}s]")
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
retdata.strip().replace(f"ERROR in backup {backup_datestring}: ", ""),
|
||||
)
|
||||
skip_cleanup = True
|
||||
else:
|
||||
backup_datestring = findall(r"[0-9]{14}", retdata)[0]
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"done. Backup '{backup_datestring}' created. [{ttot.seconds}s]",
|
||||
)
|
||||
skip_cleanup = False
|
||||
|
||||
# Read backup file to get details
|
||||
backup_json_file = f"{backup_path}/{backup_datestring}/pvcbackup.json"
|
||||
with open(backup_json_file) as fh:
|
||||
backup_json = jload(fh)
|
||||
tracked_backups.insert(0, backup_json)
|
||||
|
||||
# Delete any full backups that are expired
|
||||
marked_for_deletion = list()
|
||||
found_full_count = 0
|
||||
for backup in tracked_backups:
|
||||
if backup["type"] == "full":
|
||||
found_full_count += 1
|
||||
if found_full_count > full_retention:
|
||||
marked_for_deletion.append(backup)
|
||||
|
||||
# Depete any incremental backups that depend on marked parents
|
||||
for backup in tracked_backups:
|
||||
if backup["type"] == "incremental" and backup["incremental_parent"] in [
|
||||
b["datestring"] for b in marked_for_deletion
|
||||
]:
|
||||
marked_for_deletion.append(backup)
|
||||
|
||||
if len(marked_for_deletion) > 0:
|
||||
if skip_cleanup:
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"Skipping cleanups for {len(marked_for_deletion)} aged-out backups due to backup failure.",
|
||||
)
|
||||
else:
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"Running cleanups for {len(marked_for_deletion)} aged-out backups...",
|
||||
)
|
||||
# Execute deletes
|
||||
for backup_to_delete in marked_for_deletion:
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"Removing old VM '{vm}' backup '{backup_to_delete['datestring']}' ({backup_to_delete['type']})... ",
|
||||
newline=False,
|
||||
)
|
||||
tstart = datetime.now()
|
||||
retcode, retdata = pvc.lib.vm.vm_remove_backup(
|
||||
CLI_CONFIG,
|
||||
vm,
|
||||
backup_suffixed_path,
|
||||
backup_to_delete["datestring"],
|
||||
)
|
||||
tend = datetime.now()
|
||||
ttot = tend - tstart
|
||||
if not retcode:
|
||||
echo(CLI_CONFIG, f"failed. [{ttot.seconds}s]")
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"Skipping removal from tracked backups; command reports: {retdata}",
|
||||
)
|
||||
else:
|
||||
tracked_backups.remove(backup_to_delete)
|
||||
echo(CLI_CONFIG, f"done. [{ttot.seconds}s]")
|
||||
|
||||
# Update tracked state information
|
||||
state_data["tracked_backups"] = tracked_backups
|
||||
with open(autobackup_state_file, "w") as fh:
|
||||
jdump(state_data, fh)
|
||||
|
||||
backup_summary[vm] = tracked_backups
|
||||
|
||||
if autobackup_config["auto_mount_enabled"]:
|
||||
# Execute each unmount_cmds command in sequence
|
||||
for cmd in autobackup_config["unmount_cmds"]:
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"Executing unmount command '{cmd.split()[0]}'... ",
|
||||
newline=False,
|
||||
)
|
||||
tstart = datetime.now()
|
||||
ret = run(
|
||||
cmd.split(),
|
||||
stdout=PIPE,
|
||||
stderr=PIPE,
|
||||
)
|
||||
tend = datetime.now()
|
||||
ttot = tend - tstart
|
||||
if ret.returncode != 0:
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"failed. [{ttot.seconds}s]",
|
||||
)
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"Continuing; command reports: {ret.stderr.decode().strip()}",
|
||||
)
|
||||
else:
|
||||
echo(CLI_CONFIG, f"done. [{ttot.seconds}s]")
|
||||
|
||||
autobackup_end_time = datetime.now()
|
||||
autobackup_total_time = autobackup_end_time - autobackup_start_time
|
||||
|
||||
# Handle report emailing
|
||||
if email_report is not None:
|
||||
echo(CLI_CONFIG, "")
|
||||
echo(CLI_CONFIG, f"Sending email summary report to {email_report}")
|
||||
|
||||
current_datetime = datetime.now()
|
||||
email_datetime = formatdate(float(current_datetime.strftime("%s")))
|
||||
|
||||
email = list()
|
||||
email.append(f"Date: {email_datetime}")
|
||||
email.append(f"Subject: PVC Autobackup report for cluster {cluster}")
|
||||
|
||||
recipients = list()
|
||||
for recipient in email_report.split(","):
|
||||
recipients.append(f"<{recipient}>")
|
||||
email.append(f"To: {', '.join(recipients)}")
|
||||
email.append(f"From: PVC Autobackup System <pvc@{gethostname()}>")
|
||||
email.append("")
|
||||
|
||||
email.append(
|
||||
f"A PVC autobackup has been completed at {current_datetime} in {autobackup_total_time}."
|
||||
)
|
||||
email.append("")
|
||||
email.append(
|
||||
"The following is a summary of all current VM backups after cleanups, most recent first:"
|
||||
)
|
||||
email.append("")
|
||||
|
||||
for vm in backup_vms:
|
||||
email.append(f"VM {vm}:")
|
||||
for backup in backup_summary[vm]:
|
||||
datestring = backup.get("datestring")
|
||||
backup_date = datetime.strptime(datestring, "%Y%m%d%H%M%S")
|
||||
if backup.get("result", False):
|
||||
email.append(
|
||||
f" {backup_date}: Success in {backup.get('runtime_secs', 0)} seconds, ID {datestring}, type {backup.get('type', 'unknown')}"
|
||||
)
|
||||
email.append(
|
||||
f" Backup contains {len(backup.get('backup_files'))} files totaling {pvc.lib.storage.format_bytes_tohuman(backup.get('backup_size_bytes', 0))} ({backup.get('backup_size_bytes', 0)} bytes)"
|
||||
)
|
||||
else:
|
||||
email.append(
|
||||
f" {backup_date}: Failure in {backup.get('runtime_secs', 0)} seconds, ID {datestring}, type {backup.get('type', 'unknown')}"
|
||||
)
|
||||
email.append(
|
||||
f" {backup.get('result_message')}"
|
||||
)
|
||||
|
||||
try:
|
||||
p = popen("/usr/sbin/sendmail -t", "w")
|
||||
p.write("\n".join(email))
|
||||
p.close()
|
||||
except Exception as e:
|
||||
echo(CLI_CONFIG, f"Failed to send report email: {e}")
|
||||
|
||||
echo(CLI_CONFIG, "")
|
||||
echo(CLI_CONFIG, f"Autobackup completed in {autobackup_total_time}.")
|
||||
|
@ -121,6 +121,8 @@ def wait_for_celery_task(CLI_CONFIG, task_detail, start_late=False):
|
||||
break
|
||||
if task_status.get("current") > last_task:
|
||||
current_task = int(task_status.get("current"))
|
||||
total_task = int(task_status.get("total"))
|
||||
bar.length = total_task
|
||||
bar.update(current_task - last_task)
|
||||
last_task = current_task
|
||||
# The extensive spaces at the end cause this to overwrite longer previous messages
|
||||
|
@ -421,7 +421,7 @@ def vm_node(config, vm, target_node, action, force=False, wait=False, force_live
|
||||
return retstatus, response.json().get("message", "")
|
||||
|
||||
|
||||
def vm_locks(config, vm, wait_flag):
|
||||
def vm_locks(config, vm, wait_flag=True):
|
||||
"""
|
||||
Flush RBD locks of (stopped) VM
|
||||
|
||||
@ -498,7 +498,7 @@ def vm_restore(config, vm, backup_path, backup_datestring, retain_snapshot=False
|
||||
return True, response.json().get("message", "")
|
||||
|
||||
|
||||
def vm_create_snapshot(config, vm, snapshot_name=None):
|
||||
def vm_create_snapshot(config, vm, snapshot_name=None, wait_flag=True):
|
||||
"""
|
||||
Take a snapshot of a VM's disks and configuration
|
||||
|
||||
@ -513,13 +513,10 @@ def vm_create_snapshot(config, vm, snapshot_name=None):
|
||||
config, "post", "/vm/{vm}/snapshot".format(vm=vm), params=params
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
return False, response.json().get("message", "")
|
||||
else:
|
||||
return True, response.json().get("message", "")
|
||||
return get_wait_retdata(response, wait_flag)
|
||||
|
||||
|
||||
def vm_remove_snapshot(config, vm, snapshot_name):
|
||||
def vm_remove_snapshot(config, vm, snapshot_name, wait_flag=True):
|
||||
"""
|
||||
Remove a snapshot of a VM's disks and configuration
|
||||
|
||||
@ -532,13 +529,10 @@ def vm_remove_snapshot(config, vm, snapshot_name):
|
||||
config, "delete", "/vm/{vm}/snapshot".format(vm=vm), params=params
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
return False, response.json().get("message", "")
|
||||
else:
|
||||
return True, response.json().get("message", "")
|
||||
return get_wait_retdata(response, wait_flag)
|
||||
|
||||
|
||||
def vm_rollback_snapshot(config, vm, snapshot_name):
|
||||
def vm_rollback_snapshot(config, vm, snapshot_name, wait_flag=True):
|
||||
"""
|
||||
Roll back to a snapshot of a VM's disks and configuration
|
||||
|
||||
@ -551,13 +545,12 @@ def vm_rollback_snapshot(config, vm, snapshot_name):
|
||||
config, "post", "/vm/{vm}/snapshot/rollback".format(vm=vm), params=params
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
return False, response.json().get("message", "")
|
||||
else:
|
||||
return True, response.json().get("message", "")
|
||||
return get_wait_retdata(response, wait_flag)
|
||||
|
||||
|
||||
def vm_export_snapshot(config, vm, snapshot_name, export_path, incremental_parent):
|
||||
def vm_export_snapshot(
|
||||
config, vm, snapshot_name, export_path, incremental_parent=None, wait_flag=True
|
||||
):
|
||||
"""
|
||||
Export an (existing) snapshot of a VM's disks and configuration to export_path, optionally
|
||||
incremental with incremental_parent
|
||||
@ -577,13 +570,12 @@ def vm_export_snapshot(config, vm, snapshot_name, export_path, incremental_paren
|
||||
config, "post", "/vm/{vm}/snapshot/export".format(vm=vm), params=params
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
return False, response.json().get("message", "")
|
||||
else:
|
||||
return True, response.json().get("message", "")
|
||||
return get_wait_retdata(response, wait_flag)
|
||||
|
||||
|
||||
def vm_import_snapshot(config, vm, snapshot_name, import_path, retain_snapshot=False):
|
||||
def vm_import_snapshot(
|
||||
config, vm, snapshot_name, import_path, retain_snapshot=False, wait_flag=True
|
||||
):
|
||||
"""
|
||||
Import a snapshot of {vm} and its volumes from a local primary coordinator filesystem path
|
||||
|
||||
@ -600,10 +592,25 @@ def vm_import_snapshot(config, vm, snapshot_name, import_path, retain_snapshot=F
|
||||
config, "post", "/vm/{vm}/snapshot/import".format(vm=vm), params=params
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
return False, response.json().get("message", "")
|
||||
else:
|
||||
return True, response.json().get("message", "")
|
||||
return get_wait_retdata(response, wait_flag)
|
||||
|
||||
|
||||
def vm_autobackup(config, email_recipients=None, force_full_flag=False, wait_flag=True):
|
||||
"""
|
||||
Perform a cluster VM autobackup
|
||||
|
||||
API endpoint: POST /vm//autobackup
|
||||
API arguments: email_recipients=email_recipients, force_full_flag=force_full_flag
|
||||
API schema: {"message":"{data}"}
|
||||
"""
|
||||
params = {
|
||||
"email_recipients": email_recipients,
|
||||
"force_full": force_full_flag,
|
||||
}
|
||||
|
||||
response = call_api(config, "post", "/vm/autobackup", params=params)
|
||||
|
||||
return get_wait_retdata(response, wait_flag)
|
||||
|
||||
|
||||
def vm_vcpus_set(config, vm, vcpus, topology, restart):
|
||||
|
@ -2,7 +2,7 @@ from setuptools import setup
|
||||
|
||||
setup(
|
||||
name="pvc",
|
||||
version="0.9.98",
|
||||
version="0.9.100",
|
||||
packages=["pvc.cli", "pvc.lib"],
|
||||
install_requires=[
|
||||
"Click",
|
||||
|
695
daemon-common/autobackup.py
Normal file
695
daemon-common/autobackup.py
Normal file
@ -0,0 +1,695 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# autobackup.py - PVC API Autobackup functions
|
||||
# Part of the Parallel Virtual Cluster (PVC) system
|
||||
#
|
||||
# Copyright (C) 2018-2024 Joshua M. Boniface <joshua@boniface.me>
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, version 3.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
from datetime import datetime
|
||||
from json import load as jload
|
||||
from json import dump as jdump
|
||||
from os import popen, makedirs, path, scandir
|
||||
from shutil import rmtree
|
||||
from subprocess import run, PIPE
|
||||
|
||||
from daemon_lib.common import run_os_command
|
||||
from daemon_lib.config import get_autobackup_configuration
|
||||
from daemon_lib.celery import start, fail, log_info, log_err, update, finish
|
||||
|
||||
import daemon_lib.ceph as ceph
|
||||
import daemon_lib.vm as vm
|
||||
|
||||
|
||||
def send_execution_failure_report(
|
||||
celery_conf, config, recipients=None, total_time=0, error=None
|
||||
):
|
||||
if recipients is None:
|
||||
return
|
||||
|
||||
from email.utils import formatdate
|
||||
from socket import gethostname
|
||||
|
||||
log_message = f"Sending email failure report to {', '.join(recipients)}"
|
||||
log_info(celery_conf[0], log_message)
|
||||
update(
|
||||
celery_conf[0],
|
||||
log_message,
|
||||
current=celery_conf[1] + 1,
|
||||
total=celery_conf[2],
|
||||
)
|
||||
|
||||
current_datetime = datetime.now()
|
||||
email_datetime = formatdate(float(current_datetime.strftime("%s")))
|
||||
|
||||
email = list()
|
||||
email.append(f"Date: {email_datetime}")
|
||||
email.append(
|
||||
f"Subject: PVC Autobackup execution failure for cluster '{config['cluster']}'"
|
||||
)
|
||||
|
||||
email_to = list()
|
||||
for recipient in recipients:
|
||||
email_to.append(f"<{recipient}>")
|
||||
|
||||
email.append(f"To: {', '.join(email_to)}")
|
||||
email.append(f"From: PVC Autobackup System <pvc@{gethostname()}>")
|
||||
email.append("")
|
||||
|
||||
email.append(
|
||||
f"A PVC autobackup has FAILED at {current_datetime} in {total_time}s due to an execution error."
|
||||
)
|
||||
email.append("")
|
||||
email.append("The reported error message is:")
|
||||
email.append(f" {error}")
|
||||
|
||||
try:
|
||||
with popen("/usr/sbin/sendmail -t", "w") as p:
|
||||
p.write("\n".join(email))
|
||||
except Exception as e:
|
||||
log_err(f"Failed to send report email: {e}")
|
||||
|
||||
|
||||
def send_execution_summary_report(
|
||||
celery_conf, config, recipients=None, total_time=0, summary=dict()
|
||||
):
|
||||
if recipients is None:
|
||||
return
|
||||
|
||||
from email.utils import formatdate
|
||||
from socket import gethostname
|
||||
|
||||
log_message = f"Sending email summary report to {', '.join(recipients)}"
|
||||
log_info(celery_conf[0], log_message)
|
||||
update(
|
||||
celery_conf[0],
|
||||
log_message,
|
||||
current=celery_conf[1] + 1,
|
||||
total=celery_conf[2],
|
||||
)
|
||||
|
||||
current_datetime = datetime.now()
|
||||
email_datetime = formatdate(float(current_datetime.strftime("%s")))
|
||||
|
||||
email = list()
|
||||
email.append(f"Date: {email_datetime}")
|
||||
email.append(f"Subject: PVC Autobackup report for cluster '{config['cluster']}'")
|
||||
|
||||
email_to = list()
|
||||
for recipient in recipients:
|
||||
email_to.append(f"<{recipient}>")
|
||||
|
||||
email.append(f"To: {', '.join(email_to)}")
|
||||
email.append(f"From: PVC Autobackup System <pvc@{gethostname()}>")
|
||||
email.append("")
|
||||
|
||||
email.append(
|
||||
f"A PVC autobackup has been completed at {current_datetime} in {total_time}."
|
||||
)
|
||||
email.append("")
|
||||
email.append(
|
||||
"The following is a summary of all current VM backups after cleanups, most recent first:"
|
||||
)
|
||||
email.append("")
|
||||
|
||||
for vm_name in summary.keys():
|
||||
email.append(f"VM: {vm_name}:")
|
||||
for backup in summary[vm_name]:
|
||||
datestring = backup.get("datestring")
|
||||
backup_date = datetime.strptime(datestring, "%Y%m%d%H%M%S")
|
||||
if backup.get("result", False):
|
||||
email.append(
|
||||
f" {backup_date}: Success in {backup.get('runtime_secs', 0)} seconds, ID {backup.get('snapshot_name')}, type {backup.get('type', 'unknown')}"
|
||||
)
|
||||
email.append(
|
||||
f" Backup contains {len(backup.get('export_files'))} files totaling {ceph.format_bytes_tohuman(backup.get('export_size_bytes', 0))} ({backup.get('export_size_bytes', 0)} bytes)"
|
||||
)
|
||||
else:
|
||||
email.append(
|
||||
f" {backup_date}: Failure in {backup.get('runtime_secs', 0)} seconds, ID {backup.get('snapshot_name')}, type {backup.get('type', 'unknown')}"
|
||||
)
|
||||
email.append(f" {backup.get('result_message')}")
|
||||
|
||||
try:
|
||||
with popen("/usr/sbin/sendmail -t", "w") as p:
|
||||
p.write("\n".join(email))
|
||||
except Exception as e:
|
||||
log_err(f"Failed to send report email: {e}")
|
||||
|
||||
|
||||
def run_vm_backup(zkhandler, celery, config, vm_detail, force_full=False):
|
||||
vm_name = vm_detail["name"]
|
||||
dom_uuid = vm_detail["uuid"]
|
||||
backup_suffixed_path = f"{config['backup_root_path']}{config['backup_root_suffix']}"
|
||||
vm_backup_path = f"{backup_suffixed_path}/{vm_name}"
|
||||
autobackup_state_file = f"{vm_backup_path}/.autobackup.json"
|
||||
full_interval = config["backup_schedule"]["full_interval"]
|
||||
full_retention = config["backup_schedule"]["full_retention"]
|
||||
|
||||
if not path.exists(vm_backup_path) or not path.exists(autobackup_state_file):
|
||||
# There are no existing backups so the list is empty
|
||||
state_data = dict()
|
||||
tracked_backups = list()
|
||||
else:
|
||||
with open(autobackup_state_file) as fh:
|
||||
state_data = jload(fh)
|
||||
tracked_backups = state_data["tracked_backups"]
|
||||
|
||||
full_backups = [b for b in tracked_backups if b["type"] == "full"]
|
||||
if len(full_backups) > 0:
|
||||
last_full_backup = full_backups[0]
|
||||
last_full_backup_idx = tracked_backups.index(last_full_backup)
|
||||
if force_full:
|
||||
this_backup_incremental_parent = None
|
||||
this_backup_retain_snapshot = True
|
||||
elif last_full_backup_idx >= full_interval - 1:
|
||||
this_backup_incremental_parent = None
|
||||
this_backup_retain_snapshot = True
|
||||
else:
|
||||
this_backup_incremental_parent = last_full_backup["snapshot_name"]
|
||||
this_backup_retain_snapshot = False
|
||||
else:
|
||||
# The very first ackup must be full to start the tree
|
||||
this_backup_incremental_parent = None
|
||||
this_backup_retain_snapshot = True
|
||||
|
||||
export_type = (
|
||||
"incremental" if this_backup_incremental_parent is not None else "full"
|
||||
)
|
||||
|
||||
now = datetime.now()
|
||||
datestring = now.strftime("%Y%m%d%H%M%S")
|
||||
snapshot_name = f"ab{datestring}"
|
||||
|
||||
# Take the VM snapshot (vm.vm_worker_create_snapshot)
|
||||
snap_list = list()
|
||||
|
||||
failure = False
|
||||
export_files = None
|
||||
export_files_size = 0
|
||||
|
||||
def update_tracked_backups():
|
||||
# Read export file to get details
|
||||
backup_json_file = (
|
||||
f"{backup_suffixed_path}/{vm_name}/{snapshot_name}/snapshot.json"
|
||||
)
|
||||
try:
|
||||
with open(backup_json_file) as fh:
|
||||
backup_json = jload(fh)
|
||||
tracked_backups.insert(0, backup_json)
|
||||
except Exception as e:
|
||||
log_err(celery, f"Could not open export JSON: {e}")
|
||||
return list()
|
||||
|
||||
state_data["tracked_backups"] = tracked_backups
|
||||
with open(autobackup_state_file, "w") as fh:
|
||||
jdump(state_data, fh)
|
||||
|
||||
return tracked_backups
|
||||
|
||||
def write_backup_summary(success=False, message=""):
|
||||
ttotal = (datetime.now() - now).total_seconds()
|
||||
export_details = {
|
||||
"type": export_type,
|
||||
"result": success,
|
||||
"message": message,
|
||||
"datestring": datestring,
|
||||
"runtime_secs": ttotal,
|
||||
"snapshot_name": snapshot_name,
|
||||
"incremental_parent": this_backup_incremental_parent,
|
||||
"vm_detail": vm_detail,
|
||||
"export_files": export_files,
|
||||
"export_size_bytes": export_files_size,
|
||||
}
|
||||
try:
|
||||
with open(
|
||||
f"{backup_suffixed_path}/{vm_name}/{snapshot_name}/snapshot.json",
|
||||
"w",
|
||||
) as fh:
|
||||
jdump(export_details, fh)
|
||||
except Exception as e:
|
||||
log_err(celery, f"Error exporting snapshot details: {e}")
|
||||
return False, e
|
||||
|
||||
return True, ""
|
||||
|
||||
def cleanup_failure():
|
||||
for snapshot in snap_list:
|
||||
rbd, snapshot_name = snapshot.split("@")
|
||||
pool, volume = rbd.split("/")
|
||||
# We capture no output here, because if this fails too we're in a deep
|
||||
# error chain and will just ignore it
|
||||
ceph.remove_snapshot(zkhandler, pool, volume, snapshot_name)
|
||||
|
||||
rbd_list = zkhandler.read(("domain.storage.volumes", dom_uuid)).split(",")
|
||||
|
||||
for rbd in rbd_list:
|
||||
pool, volume = rbd.split("/")
|
||||
ret, msg = ceph.add_snapshot(
|
||||
zkhandler, pool, volume, snapshot_name, zk_only=False
|
||||
)
|
||||
if not ret:
|
||||
cleanup_failure()
|
||||
error_message = msg.replace("ERROR: ", "")
|
||||
log_err(celery, error_message)
|
||||
failure = True
|
||||
break
|
||||
else:
|
||||
snap_list.append(f"{pool}/{volume}@{snapshot_name}")
|
||||
|
||||
if failure:
|
||||
error_message = (f"[{vm_name}] Error in snapshot export, skipping",)
|
||||
write_backup_summary(message=error_message)
|
||||
tracked_backups = update_tracked_backups()
|
||||
return tracked_backups
|
||||
|
||||
# Get the current domain XML
|
||||
vm_config = zkhandler.read(("domain.xml", dom_uuid))
|
||||
|
||||
# Add the snapshot entry to Zookeeper
|
||||
ret = zkhandler.write(
|
||||
[
|
||||
(
|
||||
(
|
||||
"domain.snapshots",
|
||||
dom_uuid,
|
||||
"domain_snapshot.name",
|
||||
snapshot_name,
|
||||
),
|
||||
snapshot_name,
|
||||
),
|
||||
(
|
||||
(
|
||||
"domain.snapshots",
|
||||
dom_uuid,
|
||||
"domain_snapshot.timestamp",
|
||||
snapshot_name,
|
||||
),
|
||||
now.strftime("%s"),
|
||||
),
|
||||
(
|
||||
(
|
||||
"domain.snapshots",
|
||||
dom_uuid,
|
||||
"domain_snapshot.xml",
|
||||
snapshot_name,
|
||||
),
|
||||
vm_config,
|
||||
),
|
||||
(
|
||||
(
|
||||
"domain.snapshots",
|
||||
dom_uuid,
|
||||
"domain_snapshot.rbd_snapshots",
|
||||
snapshot_name,
|
||||
),
|
||||
",".join(snap_list),
|
||||
),
|
||||
]
|
||||
)
|
||||
if not ret:
|
||||
error_message = (f"[{vm_name}] Error in snapshot export, skipping",)
|
||||
log_err(celery, error_message)
|
||||
write_backup_summary(message=error_message)
|
||||
tracked_backups = update_tracked_backups()
|
||||
return tracked_backups
|
||||
|
||||
# Export the snapshot (vm.vm_worker_export_snapshot)
|
||||
export_target_path = f"{backup_suffixed_path}/{vm_name}/{snapshot_name}/images"
|
||||
|
||||
try:
|
||||
makedirs(export_target_path)
|
||||
except Exception as e:
|
||||
error_message = (
|
||||
f"[{vm_name}] Failed to create target directory '{export_target_path}': {e}",
|
||||
)
|
||||
log_err(celery, error_message)
|
||||
return tracked_backups
|
||||
|
||||
def export_cleanup():
|
||||
from shutil import rmtree
|
||||
|
||||
rmtree(f"{backup_suffixed_path}/{vm_name}/{snapshot_name}")
|
||||
|
||||
# Set the export filetype
|
||||
if this_backup_incremental_parent is not None:
|
||||
export_fileext = "rbddiff"
|
||||
else:
|
||||
export_fileext = "rbdimg"
|
||||
|
||||
snapshot_volumes = list()
|
||||
for rbdsnap in snap_list:
|
||||
pool, _volume = rbdsnap.split("/")
|
||||
volume, name = _volume.split("@")
|
||||
ret, snapshots = ceph.get_list_snapshot(
|
||||
zkhandler, pool, volume, limit=name, is_fuzzy=False
|
||||
)
|
||||
if ret:
|
||||
snapshot_volumes += snapshots
|
||||
|
||||
export_files = list()
|
||||
for snapshot_volume in snapshot_volumes:
|
||||
snap_pool = snapshot_volume["pool"]
|
||||
snap_volume = snapshot_volume["volume"]
|
||||
snap_snapshot_name = snapshot_volume["snapshot"]
|
||||
snap_size = snapshot_volume["stats"]["size"]
|
||||
|
||||
if this_backup_incremental_parent is not None:
|
||||
retcode, stdout, stderr = run_os_command(
|
||||
f"rbd export-diff --from-snap {this_backup_incremental_parent} {snap_pool}/{snap_volume}@{snap_snapshot_name} {export_target_path}/{snap_pool}.{snap_volume}.{export_fileext}"
|
||||
)
|
||||
if retcode:
|
||||
error_message = (
|
||||
f"[{vm_name}] Failed to export snapshot for volume(s) '{snap_pool}/{snap_volume}'",
|
||||
)
|
||||
failure = True
|
||||
break
|
||||
else:
|
||||
export_files.append(
|
||||
(
|
||||
f"images/{snap_pool}.{snap_volume}.{export_fileext}",
|
||||
snap_size,
|
||||
)
|
||||
)
|
||||
else:
|
||||
retcode, stdout, stderr = run_os_command(
|
||||
f"rbd export --export-format 2 {snap_pool}/{snap_volume}@{snap_snapshot_name} {export_target_path}/{snap_pool}.{snap_volume}.{export_fileext}"
|
||||
)
|
||||
if retcode:
|
||||
error_message = (
|
||||
f"[{vm_name}] Failed to export snapshot for volume(s) '{snap_pool}/{snap_volume}'",
|
||||
)
|
||||
failure = True
|
||||
break
|
||||
else:
|
||||
export_files.append(
|
||||
(
|
||||
f"images/{snap_pool}.{snap_volume}.{export_fileext}",
|
||||
snap_size,
|
||||
)
|
||||
)
|
||||
|
||||
if failure:
|
||||
log_err(celery, error_message)
|
||||
write_backup_summary(message=error_message)
|
||||
tracked_backups = update_tracked_backups()
|
||||
return tracked_backups
|
||||
|
||||
def get_dir_size(pathname):
|
||||
total = 0
|
||||
with scandir(pathname) as it:
|
||||
for entry in it:
|
||||
if entry.is_file():
|
||||
total += entry.stat().st_size
|
||||
elif entry.is_dir():
|
||||
total += get_dir_size(entry.path)
|
||||
return total
|
||||
|
||||
export_files_size = get_dir_size(export_target_path)
|
||||
|
||||
ret, e = write_backup_summary(success=True)
|
||||
if not ret:
|
||||
error_message = (f"[{vm_name}] Failed to export configuration snapshot: {e}",)
|
||||
log_err(celery, error_message)
|
||||
write_backup_summary(message=error_message)
|
||||
tracked_backups = update_tracked_backups()
|
||||
return tracked_backups
|
||||
|
||||
# Clean up the snapshot (vm.vm_worker_remove_snapshot)
|
||||
if not this_backup_retain_snapshot:
|
||||
for snap in snap_list:
|
||||
rbd, name = snap.split("@")
|
||||
pool, volume = rbd.split("/")
|
||||
ret, msg = ceph.remove_snapshot(zkhandler, pool, volume, name)
|
||||
if not ret:
|
||||
error_message = msg.replace("ERROR: ", f"[{vm_name}] ")
|
||||
failure = True
|
||||
break
|
||||
|
||||
if failure:
|
||||
log_err(celery, error_message)
|
||||
write_backup_summary(message=error_message)
|
||||
tracked_backups = update_tracked_backups()
|
||||
return tracked_backups
|
||||
|
||||
ret = zkhandler.delete(
|
||||
("domain.snapshots", dom_uuid, "domain_snapshot.name", snapshot_name)
|
||||
)
|
||||
if not ret:
|
||||
error_message = (f"[{vm_name}] Failed to remove VM snapshot; continuing",)
|
||||
log_err(celery, error_message)
|
||||
|
||||
marked_for_deletion = list()
|
||||
# Find any full backups that are expired
|
||||
found_full_count = 0
|
||||
for backup in tracked_backups:
|
||||
if backup["type"] == "full":
|
||||
found_full_count += 1
|
||||
if found_full_count > full_retention:
|
||||
marked_for_deletion.append(backup)
|
||||
# Find any incremental backups that depend on marked parents
|
||||
for backup in tracked_backups:
|
||||
if backup["type"] == "incremental" and backup["incremental_parent"] in [
|
||||
b["snapshot_name"] for b in marked_for_deletion
|
||||
]:
|
||||
marked_for_deletion.append(backup)
|
||||
|
||||
if len(marked_for_deletion) > 0:
|
||||
for backup_to_delete in marked_for_deletion:
|
||||
ret = vm.vm_worker_remove_snapshot(
|
||||
zkhandler, None, vm_name, backup_to_delete["snapshot_name"]
|
||||
)
|
||||
if ret is False:
|
||||
error_message = f"Failed to remove obsolete backup snapshot '{backup_to_delete['snapshot_name']}', leaving in tracked backups"
|
||||
log_err(celery, error_message)
|
||||
else:
|
||||
rmtree(f"{vm_backup_path}/{backup_to_delete['snapshot_name']}")
|
||||
tracked_backups.remove(backup_to_delete)
|
||||
|
||||
tracked_backups = update_tracked_backups()
|
||||
return tracked_backups
|
||||
|
||||
|
||||
def worker_cluster_autobackup(
|
||||
zkhandler, celery, force_full=False, email_recipients=None
|
||||
):
|
||||
config = get_autobackup_configuration()
|
||||
|
||||
backup_summary = dict()
|
||||
|
||||
current_stage = 0
|
||||
total_stages = 1
|
||||
if email_recipients is not None:
|
||||
total_stages += 1
|
||||
|
||||
start(
|
||||
celery,
|
||||
f"Starting cluster '{config['cluster']}' VM autobackup",
|
||||
current=current_stage,
|
||||
total=total_stages,
|
||||
)
|
||||
|
||||
if not config["autobackup_enabled"]:
|
||||
message = "Autobackups are not configured on this cluster."
|
||||
log_info(celery, message)
|
||||
return finish(
|
||||
celery,
|
||||
message,
|
||||
current=total_stages,
|
||||
total=total_stages,
|
||||
)
|
||||
|
||||
autobackup_start_time = datetime.now()
|
||||
|
||||
retcode, vm_list = vm.get_list(zkhandler)
|
||||
if not retcode:
|
||||
error_message = f"Failed to fetch VM list: {vm_list}"
|
||||
log_err(celery, error_message)
|
||||
send_execution_failure_report(
|
||||
(celery, current_stage, total_stages),
|
||||
config,
|
||||
recipients=email_recipients,
|
||||
error=error_message,
|
||||
)
|
||||
fail(celery, error_message)
|
||||
return False
|
||||
|
||||
backup_suffixed_path = f"{config['backup_root_path']}{config['backup_root_suffix']}"
|
||||
if not path.exists(backup_suffixed_path):
|
||||
makedirs(backup_suffixed_path)
|
||||
|
||||
full_interval = config["backup_schedule"]["full_interval"]
|
||||
|
||||
backup_vms = list()
|
||||
for vm_detail in vm_list:
|
||||
vm_tag_names = [t["name"] for t in vm_detail["tags"]]
|
||||
matching_tags = (
|
||||
True
|
||||
if len(set(vm_tag_names).intersection(set(config["backup_tags"]))) > 0
|
||||
else False
|
||||
)
|
||||
if matching_tags:
|
||||
backup_vms.append(vm_detail)
|
||||
|
||||
if len(backup_vms) < 1:
|
||||
message = "Found no VMs tagged for autobackup."
|
||||
log_info(celery, message)
|
||||
return finish(
|
||||
celery,
|
||||
message,
|
||||
current=total_stages,
|
||||
total=total_stages,
|
||||
)
|
||||
|
||||
if config["auto_mount_enabled"]:
|
||||
total_stages += len(config["mount_cmds"])
|
||||
total_stages += len(config["unmount_cmds"])
|
||||
|
||||
total_stages += len(backup_vms)
|
||||
|
||||
log_info(
|
||||
celery,
|
||||
f"Found {len(backup_vms)} suitable VM(s) for autobackup: {', '.join([b['name'] for b in backup_vms])}",
|
||||
)
|
||||
|
||||
# Handle automount mount commands
|
||||
if config["auto_mount_enabled"]:
|
||||
for cmd in config["mount_cmds"]:
|
||||
current_stage += 1
|
||||
update(
|
||||
celery,
|
||||
f"Executing mount command '{cmd.split()[0]}'",
|
||||
current=current_stage,
|
||||
total=total_stages,
|
||||
)
|
||||
|
||||
ret = run(
|
||||
cmd.split(),
|
||||
stdout=PIPE,
|
||||
stderr=PIPE,
|
||||
)
|
||||
|
||||
if ret.returncode != 0:
|
||||
error_message = f"Failed to execute mount command '{cmd.split()[0]}': {ret.stderr.decode().strip()}"
|
||||
log_err(celery, error_message)
|
||||
send_execution_failure_report(
|
||||
(celery, current_stage, total_stages),
|
||||
config,
|
||||
recipients=email_recipients,
|
||||
total_time=datetime.now() - autobackup_start_time,
|
||||
error=error_message,
|
||||
)
|
||||
fail(celery, error_message)
|
||||
return False
|
||||
|
||||
# Execute the backup: take a snapshot, then export the snapshot
|
||||
for vm_detail in backup_vms:
|
||||
vm_backup_path = f"{backup_suffixed_path}/{vm_detail['name']}"
|
||||
autobackup_state_file = f"{vm_backup_path}/.autobackup.json"
|
||||
if not path.exists(vm_backup_path) or not path.exists(autobackup_state_file):
|
||||
# There are no existing backups so the list is empty
|
||||
state_data = dict()
|
||||
tracked_backups = list()
|
||||
else:
|
||||
with open(autobackup_state_file) as fh:
|
||||
state_data = jload(fh)
|
||||
tracked_backups = state_data["tracked_backups"]
|
||||
|
||||
full_backups = [b for b in tracked_backups if b["type"] == "full"]
|
||||
if len(full_backups) > 0:
|
||||
last_full_backup = full_backups[0]
|
||||
last_full_backup_idx = tracked_backups.index(last_full_backup)
|
||||
if force_full:
|
||||
this_backup_incremental_parent = None
|
||||
elif last_full_backup_idx >= full_interval - 1:
|
||||
this_backup_incremental_parent = None
|
||||
else:
|
||||
this_backup_incremental_parent = last_full_backup["snapshot_name"]
|
||||
else:
|
||||
# The very first ackup must be full to start the tree
|
||||
this_backup_incremental_parent = None
|
||||
|
||||
export_type = (
|
||||
"incremental" if this_backup_incremental_parent is not None else "full"
|
||||
)
|
||||
|
||||
current_stage += 1
|
||||
update(
|
||||
celery,
|
||||
f"Performing autobackup of VM {vm_detail['name']} ({export_type})",
|
||||
current=current_stage,
|
||||
total=total_stages,
|
||||
)
|
||||
|
||||
summary = run_vm_backup(
|
||||
zkhandler,
|
||||
celery,
|
||||
config,
|
||||
vm_detail,
|
||||
force_full=force_full,
|
||||
)
|
||||
backup_summary[vm_detail["name"]] = summary
|
||||
|
||||
# Handle automount unmount commands
|
||||
if config["auto_mount_enabled"]:
|
||||
for cmd in config["unmount_cmds"]:
|
||||
current_stage += 1
|
||||
update(
|
||||
celery,
|
||||
f"Executing unmount command '{cmd.split()[0]}'",
|
||||
current=current_stage,
|
||||
total=total_stages,
|
||||
)
|
||||
|
||||
ret = run(
|
||||
cmd.split(),
|
||||
stdout=PIPE,
|
||||
stderr=PIPE,
|
||||
)
|
||||
|
||||
if ret.returncode != 0:
|
||||
error_message = f"Failed to execute unmount command '{cmd.split()[0]}': {ret.stderr.decode().strip()}"
|
||||
log_err(celery, error_message)
|
||||
send_execution_failure_report(
|
||||
(celery, current_stage, total_stages),
|
||||
config,
|
||||
recipients=email_recipients,
|
||||
total_time=datetime.now() - autobackup_start_time,
|
||||
error=error_message,
|
||||
)
|
||||
fail(celery, error_message)
|
||||
return False
|
||||
|
||||
autobackup_end_time = datetime.now()
|
||||
autobackup_total_time = autobackup_end_time - autobackup_start_time
|
||||
|
||||
if email_recipients is not None:
|
||||
send_execution_summary_report(
|
||||
(celery, current_stage, total_stages),
|
||||
config,
|
||||
recipients=email_recipients,
|
||||
total_time=autobackup_total_time,
|
||||
summary=backup_summary,
|
||||
)
|
||||
current_stage += 1
|
||||
|
||||
current_stage += 1
|
||||
return finish(
|
||||
celery,
|
||||
f"Successfully completed cluster '{config['cluster']}' VM autobackup",
|
||||
current=current_stage,
|
||||
total=total_stages,
|
||||
)
|
@ -26,6 +26,7 @@ import subprocess
|
||||
import signal
|
||||
from json import loads
|
||||
from re import match as re_match
|
||||
from re import search as re_search
|
||||
from re import split as re_split
|
||||
from re import sub as re_sub
|
||||
from difflib import unified_diff
|
||||
@ -1073,7 +1074,7 @@ def sortInterfaceNames(interface_names):
|
||||
#
|
||||
# Parse a "detect" device into a real block device name
|
||||
#
|
||||
def get_detect_device(detect_string):
|
||||
def get_detect_device_lsscsi(detect_string):
|
||||
"""
|
||||
Parses a "detect:" string into a normalized block device path using lsscsi.
|
||||
|
||||
@ -1140,3 +1141,96 @@ def get_detect_device(detect_string):
|
||||
break
|
||||
|
||||
return blockdev
|
||||
|
||||
|
||||
def get_detect_device_nvme(detect_string):
|
||||
"""
|
||||
Parses a "detect:" string into a normalized block device path using nvme.
|
||||
|
||||
A detect string is formatted "detect:<NAME>:<SIZE>:<ID>", where
|
||||
NAME is some unique identifier in lsscsi, SIZE is a human-readable
|
||||
size value to within +/- 3% of the real size of the device, and
|
||||
ID is the Nth (0-indexed) matching entry of that NAME and SIZE.
|
||||
"""
|
||||
|
||||
unit_map = {
|
||||
"kB": 1000,
|
||||
"MB": 1000 * 1000,
|
||||
"GB": 1000 * 1000 * 1000,
|
||||
"TB": 1000 * 1000 * 1000 * 1000,
|
||||
"PB": 1000 * 1000 * 1000 * 1000 * 1000,
|
||||
"EB": 1000 * 1000 * 1000 * 1000 * 1000 * 1000,
|
||||
}
|
||||
|
||||
_, name, _size, idd = detect_string.split(":")
|
||||
if _ != "detect":
|
||||
return None
|
||||
|
||||
size_re = re_search(r"([\d.]+)([kKMGTP]B)", _size)
|
||||
size_val = float(size_re.group(1))
|
||||
size_unit = size_re.group(2)
|
||||
size_bytes = int(size_val * unit_map[size_unit])
|
||||
|
||||
retcode, stdout, stderr = run_os_command("nvme list --output-format json")
|
||||
if retcode:
|
||||
print(f"Failed to run nvme: {stderr}")
|
||||
return None
|
||||
|
||||
# Parse the output with json
|
||||
nvme_data = loads(stdout).get("Devices", list())
|
||||
|
||||
# Handle size determination (+/- 3%)
|
||||
size = None
|
||||
nvme_sizes = set()
|
||||
for entry in nvme_data:
|
||||
nvme_sizes.add(entry["PhysicalSize"])
|
||||
for l_size in nvme_sizes:
|
||||
plusthreepct = size_bytes * 1.03
|
||||
minusthreepct = size_bytes * 0.97
|
||||
|
||||
if l_size > minusthreepct and l_size < plusthreepct:
|
||||
size = l_size
|
||||
break
|
||||
if size is None:
|
||||
return None
|
||||
|
||||
blockdev = None
|
||||
matches = list()
|
||||
for entry in nvme_data:
|
||||
# Skip if name is not contained in the line (case-insensitive)
|
||||
if name.lower() not in entry["ModelNumber"].lower():
|
||||
continue
|
||||
# Skip if the size does not match
|
||||
if size != entry["PhysicalSize"]:
|
||||
continue
|
||||
# Get our blockdev and append to the list
|
||||
matches.append(entry["DevicePath"])
|
||||
|
||||
blockdev = None
|
||||
# Find the blockdev at index {idd}
|
||||
for idx, _blockdev in enumerate(matches):
|
||||
if int(idx) == int(idd):
|
||||
blockdev = _blockdev
|
||||
break
|
||||
|
||||
return blockdev
|
||||
|
||||
|
||||
def get_detect_device(detect_string):
|
||||
"""
|
||||
Parses a "detect:" string into a normalized block device path.
|
||||
|
||||
First tries to parse using "lsscsi" (get_detect_device_lsscsi). If this returns an invalid
|
||||
block device name, then try to parse using "nvme" (get_detect_device_nvme). This works around
|
||||
issues with more recent devices (e.g. the Dell R6615 series) not properly reporting block
|
||||
device paths for NVMe devices with "lsscsi".
|
||||
"""
|
||||
|
||||
device = get_detect_device_lsscsi(detect_string)
|
||||
if device is None or not re_match(r"^/dev", device):
|
||||
device = get_detect_device_nvme(detect_string)
|
||||
|
||||
if device is not None and re_match(r"^/dev", device):
|
||||
return device
|
||||
else:
|
||||
return None
|
||||
|
@ -406,6 +406,78 @@ def get_configuration():
|
||||
return config
|
||||
|
||||
|
||||
def get_parsed_autobackup_configuration(config_file):
|
||||
"""
|
||||
Load the configuration; this is the same main pvc.conf that the daemons read
|
||||
"""
|
||||
print('Loading configuration from file "{}"'.format(config_file))
|
||||
|
||||
with open(config_file, "r") as cfgfh:
|
||||
try:
|
||||
o_config = yaml.load(cfgfh, Loader=yaml.SafeLoader)
|
||||
except Exception as e:
|
||||
print(f"ERROR: Failed to parse configuration file: {e}")
|
||||
os._exit(1)
|
||||
|
||||
config = dict()
|
||||
|
||||
try:
|
||||
o_cluster = o_config["cluster"]
|
||||
config_cluster = {
|
||||
"cluster": o_cluster["name"],
|
||||
"autobackup_enabled": True,
|
||||
}
|
||||
config = {**config, **config_cluster}
|
||||
|
||||
o_autobackup = o_config["autobackup"]
|
||||
if o_autobackup is None:
|
||||
config["autobackup_enabled"] = False
|
||||
return config
|
||||
|
||||
config_autobackup = {
|
||||
"backup_root_path": o_autobackup["backup_root_path"],
|
||||
"backup_root_suffix": o_autobackup["backup_root_suffix"],
|
||||
"backup_tags": o_autobackup["backup_tags"],
|
||||
"backup_schedule": o_autobackup["backup_schedule"],
|
||||
}
|
||||
config = {**config, **config_autobackup}
|
||||
|
||||
o_automount = o_autobackup["auto_mount"]
|
||||
config_automount = {
|
||||
"auto_mount_enabled": o_automount["enabled"],
|
||||
}
|
||||
config = {**config, **config_automount}
|
||||
if config["auto_mount_enabled"]:
|
||||
config["mount_cmds"] = list()
|
||||
for _mount_cmd in o_automount["mount_cmds"]:
|
||||
if "{backup_root_path}" in _mount_cmd:
|
||||
_mount_cmd = _mount_cmd.format(
|
||||
backup_root_path=config["backup_root_path"]
|
||||
)
|
||||
config["mount_cmds"].append(_mount_cmd)
|
||||
config["unmount_cmds"] = list()
|
||||
for _unmount_cmd in o_automount["unmount_cmds"]:
|
||||
if "{backup_root_path}" in _unmount_cmd:
|
||||
_unmount_cmd = _unmount_cmd.format(
|
||||
backup_root_path=config["backup_root_path"]
|
||||
)
|
||||
config["unmount_cmds"].append(_unmount_cmd)
|
||||
|
||||
except Exception as e:
|
||||
raise MalformedConfigurationError(e)
|
||||
|
||||
return config
|
||||
|
||||
|
||||
def get_autobackup_configuration():
|
||||
"""
|
||||
Get the configuration.
|
||||
"""
|
||||
pvc_config_file = get_configuration_path()
|
||||
config = get_parsed_autobackup_configuration(pvc_config_file)
|
||||
return config
|
||||
|
||||
|
||||
def validate_directories(config):
|
||||
if not os.path.exists(config["dynamic_directory"]):
|
||||
os.makedirs(config["dynamic_directory"])
|
||||
|
1826
daemon-common/vm.py
1826
daemon-common/vm.py
File diff suppressed because it is too large
Load Diff
@ -30,6 +30,9 @@ from kazoo.client import KazooClient, KazooState
|
||||
from kazoo.exceptions import NoNodeError
|
||||
|
||||
|
||||
SCHEMA_ROOT_PATH = "/usr/share/pvc/daemon_lib/migrations/versions"
|
||||
|
||||
|
||||
#
|
||||
# Function decorators
|
||||
#
|
||||
@ -869,7 +872,7 @@ class ZKSchema(object):
|
||||
if not quiet:
|
||||
print(f"Loading schema version {version}")
|
||||
|
||||
with open(f"daemon_lib/migrations/versions/{version}.json", "r") as sfh:
|
||||
with open(f"{SCHEMA_ROOT_PATH}/{version}.json", "r") as sfh:
|
||||
self.schema = json.load(sfh)
|
||||
self.version = self.schema.get("version")
|
||||
|
||||
@ -1218,7 +1221,7 @@ class ZKSchema(object):
|
||||
# Write the latest schema to a file
|
||||
@classmethod
|
||||
def write(cls):
|
||||
schema_file = "daemon_lib/migrations/versions/{}.json".format(cls._version)
|
||||
schema_file = f"{SCHEMA_ROOT_PATH}/{cls._version}.json"
|
||||
with open(schema_file, "w") as sfh:
|
||||
json.dump(cls._schema, sfh)
|
||||
|
||||
@ -1226,7 +1229,7 @@ class ZKSchema(object):
|
||||
@staticmethod
|
||||
def find_all(start=0, end=None):
|
||||
versions = list()
|
||||
for version in os.listdir("daemon_lib/migrations/versions"):
|
||||
for version in os.listdir(SCHEMA_ROOT_PATH):
|
||||
sequence_id = int(version.split(".")[0])
|
||||
if end is None:
|
||||
if sequence_id > start:
|
||||
@ -1242,7 +1245,7 @@ class ZKSchema(object):
|
||||
@staticmethod
|
||||
def find_latest():
|
||||
latest_version = 0
|
||||
for version in os.listdir("daemon_lib/migrations/versions"):
|
||||
for version in os.listdir(SCHEMA_ROOT_PATH):
|
||||
sequence_id = int(version.split(".")[0])
|
||||
if sequence_id > latest_version:
|
||||
latest_version = sequence_id
|
||||
|
32
debian/changelog
vendored
32
debian/changelog
vendored
@ -1,3 +1,35 @@
|
||||
pvc (0.9.100-0) unstable; urgency=high
|
||||
|
||||
* [API Daemon] Improves the handling of "detect:" disk strings on newer systems by leveraging the "nvme" command
|
||||
* [Client CLI] Update help text about "detect:" disk strings
|
||||
* [Meta] Updates deprecation warnings and updates builder to only add this version for Debian 12 (Bookworm)
|
||||
|
||||
-- Joshua M. Boniface <joshua@boniface.me> Fri, 30 Aug 2024 11:03:33 -0400
|
||||
|
||||
pvc (0.9.99-0) unstable; urgency=high
|
||||
|
||||
**Deprecation Warning**: `pvc vm backup` commands are now deprecated and will be removed in **0.9.100**. Use `pvc vm snapshot` commands instead.
|
||||
**Breaking Change**: The on-disk format of VM snapshot exports differs from backup exports, and the PVC autobackup system now leverages these. It is recommended to start fresh with a new tree of backups for `pvc autobackup` for maximum compatibility.
|
||||
**Breaking Change**: VM autobackups now run in `pvcworkerd` instead of the CLI client directly, allowing them to be triggerd from any node (or externally). It is important to apply the timer unit changes from the `pvc-ansible` role after upgrading to 0.9.99 to avoid duplicate runs.
|
||||
**Usage Note**: VM snapshots are displayed in the `pvc vm list` and `pvc vm info` outputs, not in a unique "list" endpoint.
|
||||
|
||||
* [API Daemon] Adds a proper error when an invalid provisioner profile is specified
|
||||
* [Node Daemon] Sorts Ceph pools properly in node keepalive to avoid incorrect ordering
|
||||
* [Health Daemon] Improves handling of IPMI checks by adding multiple tries but a shorter timeout
|
||||
* [API Daemon] Improves handling of XML parsing errors in VM configurations
|
||||
* [ALL] Adds support for whole VM snapshots, including configuration XML details, and direct rollback to snapshots
|
||||
* [ALL] Adds support for exporting and importing whole VM snapshots
|
||||
* [Client CLI] Removes vCPU topology from short VM info output
|
||||
* [Client CLI] Improves output format of VM info output
|
||||
* [API Daemon] Adds an endpoint to get the current primary node
|
||||
* [Client CLI] Fixes a bug where API requests were made 3 times
|
||||
* [Other] Improves the build-and-deploy.sh script
|
||||
* [API Daemon] Improves the "vm rename" command to avoid redefining VM, preserving history etc.
|
||||
* [API Daemon] Adds an indication when a task is run on the primary node
|
||||
* [API Daemon] Fixes a bug where the ZK schema relative path didn't work sometimes
|
||||
|
||||
-- Joshua M. Boniface <joshua@boniface.me> Wed, 28 Aug 2024 11:15:55 -0400
|
||||
|
||||
pvc (0.9.98-0) unstable; urgency=high
|
||||
|
||||
* [CLI Client] Fixed output when API call times out
|
||||
|
@ -33,7 +33,7 @@ import os
|
||||
import signal
|
||||
|
||||
# Daemon version
|
||||
version = "0.9.98"
|
||||
version = "0.9.100"
|
||||
|
||||
|
||||
##########################################################
|
||||
|
@ -49,7 +49,7 @@ import re
|
||||
import json
|
||||
|
||||
# Daemon version
|
||||
version = "0.9.98"
|
||||
version = "0.9.100"
|
||||
|
||||
|
||||
##########################################################
|
||||
|
@ -28,6 +28,11 @@ from daemon_lib.vm import (
|
||||
vm_worker_flush_locks,
|
||||
vm_worker_attach_device,
|
||||
vm_worker_detach_device,
|
||||
vm_worker_create_snapshot,
|
||||
vm_worker_remove_snapshot,
|
||||
vm_worker_rollback_snapshot,
|
||||
vm_worker_export_snapshot,
|
||||
vm_worker_import_snapshot,
|
||||
)
|
||||
from daemon_lib.ceph import (
|
||||
osd_worker_add_osd,
|
||||
@ -42,9 +47,12 @@ from daemon_lib.benchmark import (
|
||||
from daemon_lib.vmbuilder import (
|
||||
worker_create_vm,
|
||||
)
|
||||
from daemon_lib.autobackup import (
|
||||
worker_cluster_autobackup,
|
||||
)
|
||||
|
||||
# Daemon version
|
||||
version = "0.9.98"
|
||||
version = "0.9.100"
|
||||
|
||||
|
||||
config = cfg.get_configuration()
|
||||
@ -96,6 +104,21 @@ def storage_benchmark(self, pool=None, run_on="primary"):
|
||||
return run_storage_benchmark(self, pool)
|
||||
|
||||
|
||||
@celery.task(name="cluster.autobackup", bind=True, routing_key="run_on")
|
||||
def cluster_autobackup(self, force_full=False, email_recipients=None, run_on="primary"):
|
||||
@ZKConnection(config)
|
||||
def run_cluster_autobackup(
|
||||
zkhandler, self, force_full=False, email_recipients=None
|
||||
):
|
||||
return worker_cluster_autobackup(
|
||||
zkhandler, self, force_full=force_full, email_recipients=email_recipients
|
||||
)
|
||||
|
||||
return run_cluster_autobackup(
|
||||
self, force_full=force_full, email_recipients=email_recipients
|
||||
)
|
||||
|
||||
|
||||
@celery.task(name="vm.flush_locks", bind=True, routing_key="run_on")
|
||||
def vm_flush_locks(self, domain=None, force_unlock=False, run_on="primary"):
|
||||
@ZKConnection(config)
|
||||
@ -123,6 +146,87 @@ def vm_device_detach(self, domain=None, xml=None, run_on=None):
|
||||
return run_vm_device_detach(self, domain, xml)
|
||||
|
||||
|
||||
@celery.task(name="vm.create_snapshot", bind=True, routing_key="run_on")
|
||||
def vm_create_snapshot(self, domain=None, snapshot_name=None, run_on="primary"):
|
||||
@ZKConnection(config)
|
||||
def run_vm_create_snapshot(zkhandler, self, domain, snapshot_name):
|
||||
return vm_worker_create_snapshot(zkhandler, self, domain, snapshot_name)
|
||||
|
||||
return run_vm_create_snapshot(self, domain, snapshot_name)
|
||||
|
||||
|
||||
@celery.task(name="vm.remove_snapshot", bind=True, routing_key="run_on")
|
||||
def vm_remove_snapshot(self, domain=None, snapshot_name=None, run_on="primary"):
|
||||
@ZKConnection(config)
|
||||
def run_vm_remove_snapshot(zkhandler, self, domain, snapshot_name):
|
||||
return vm_worker_remove_snapshot(zkhandler, self, domain, snapshot_name)
|
||||
|
||||
return run_vm_remove_snapshot(self, domain, snapshot_name)
|
||||
|
||||
|
||||
@celery.task(name="vm.rollback_snapshot", bind=True, routing_key="run_on")
|
||||
def vm_rollback_snapshot(self, domain=None, snapshot_name=None, run_on="primary"):
|
||||
@ZKConnection(config)
|
||||
def run_vm_rollback_snapshot(zkhandler, self, domain, snapshot_name):
|
||||
return vm_worker_rollback_snapshot(zkhandler, self, domain, snapshot_name)
|
||||
|
||||
return run_vm_rollback_snapshot(self, domain, snapshot_name)
|
||||
|
||||
|
||||
@celery.task(name="vm.export_snapshot", bind=True, routing_key="run_on")
|
||||
def vm_export_snapshot(
|
||||
self,
|
||||
domain=None,
|
||||
snapshot_name=None,
|
||||
export_path=None,
|
||||
incremental_parent=None,
|
||||
run_on="primary",
|
||||
):
|
||||
@ZKConnection(config)
|
||||
def run_vm_export_snapshot(
|
||||
zkhandler, self, domain, snapshot_name, export_path, incremental_parent=None
|
||||
):
|
||||
return vm_worker_export_snapshot(
|
||||
zkhandler,
|
||||
self,
|
||||
domain,
|
||||
snapshot_name,
|
||||
export_path,
|
||||
incremental_parent=incremental_parent,
|
||||
)
|
||||
|
||||
return run_vm_export_snapshot(
|
||||
self, domain, snapshot_name, export_path, incremental_parent=incremental_parent
|
||||
)
|
||||
|
||||
|
||||
@celery.task(name="vm.import_snapshot", bind=True, routing_key="run_on")
|
||||
def vm_import_snapshot(
|
||||
self,
|
||||
domain=None,
|
||||
snapshot_name=None,
|
||||
import_path=None,
|
||||
retain_snapshot=True,
|
||||
run_on="primary",
|
||||
):
|
||||
@ZKConnection(config)
|
||||
def run_vm_import_snapshot(
|
||||
zkhandler, self, domain, snapshot_name, import_path, retain_snapshot=True
|
||||
):
|
||||
return vm_worker_import_snapshot(
|
||||
zkhandler,
|
||||
self,
|
||||
domain,
|
||||
snapshot_name,
|
||||
import_path,
|
||||
retain_snapshot=retain_snapshot,
|
||||
)
|
||||
|
||||
return run_vm_import_snapshot(
|
||||
self, domain, snapshot_name, import_path, retain_snapshot=retain_snapshot
|
||||
)
|
||||
|
||||
|
||||
@celery.task(name="osd.add", bind=True, routing_key="run_on")
|
||||
def osd_add(
|
||||
self,
|
||||
|
Reference in New Issue
Block a user