diff --git a/api-daemon/pvcapid/flaskapi.py b/api-daemon/pvcapid/flaskapi.py index 9411f38f..6051a7ef 100755 --- a/api-daemon/pvcapid/flaskapi.py +++ b/api-daemon/pvcapid/flaskapi.py @@ -2140,7 +2140,7 @@ class API_VM_Locks(Resource): api.add_resource(API_VM_Locks, "/vm//locks") -# /vm//console class API_VM_Console(Resource): @RequestParser([{"name": "lines"}]) @Authenticator @@ -2293,6 +2293,77 @@ class API_VM_Device(Resource): api.add_resource(API_VM_Device, "/vm//device") +# /vm//backup +class API_VM_Backup(Resource): + @RequestParser( + [ + { + "name": "target_path", + "required": True, + "helptext": "A local filesystem path on the primary coordinator must be specified", + }, + { + "name": "incremental_parent", + "required": False, + }, + { + "name": "retain_snapshots", + "required": False, + }, + ] + ) + @Authenticator + def get(self, vm, reqargs): + """ + Create a backup of {vm} and its volumes to a local primary coordinator filesystem path + --- + tags: + - vm + parameters: + - in: query + name: target_path + type: string + required: true + description: A local filesystem path on the primary coordinator to store the backup + - in: query + name: incremental_parent + type: string + required: false + description: A previous backup datestamp to use as an incremental parent; if unspecified a full backup is taken + - in: query + name: retain_snapshots + type: boolean + required: false + default: false + description: Whether or not to retain this backup's volume snapshots to use as a future incremental parent + responses: + 200: + description: OK + schema: + type: object + id: Message + 400: + description: Execution error + schema: + type: object + id: Message + 404: + description: Not found + schema: + type: object + id: Message + """ + target_path = reqargs.get("target_path", None) + incremental_parent = reqargs.get("incremental_parent", None) + retain_snapshots = bool(strtobool(reqargs.get("retain_snapshots", "false"))) + return api_helper.backup_vm( + vm, target_path, incremental_parent, retain_snapshots + ) + + +api.add_resource(API_VM_Backup, "/vm//backup") + + ########################################################## # Client API - Network ########################################################## diff --git a/api-daemon/pvcapid/helper.py b/api-daemon/pvcapid/helper.py index aa4b4108..f180972b 100755 --- a/api-daemon/pvcapid/helper.py +++ b/api-daemon/pvcapid/helper.py @@ -470,6 +470,34 @@ def vm_define( return output, retcode +@ZKConnection(config) +def vm_backup( + zkhandler, + domain, + target_path, + incremental_parent=None, + retain_snapshots=False, +): + """ + Back up a VM to a local (primary coordinator) filesystem path. + """ + retflag, retdata = pvc_vm.backup_vm( + zkhandler, + domain, + target_path, + incremental_parent, + retain_snapshots, + ) + + if retflag: + retcode = 200 + else: + retcode = 400 + + output = {"message": retdata.replace('"', "'")} + return output, retcode + + @ZKConnection(config) def vm_attach_device(zkhandler, vm, device_spec_xml): """ diff --git a/client-cli/pvc/cli/cli.py b/client-cli/pvc/cli/cli.py index df79df91..9d8a63d6 100644 --- a/client-cli/pvc/cli/cli.py +++ b/client-cli/pvc/cli/cli.py @@ -1590,6 +1590,48 @@ def cli_vm_flush_locks(domain): finish(retcode, retmsg) +############################################################################### +# > pvc vm backup +############################################################################### +@click.command(name="backup", short_help="Create a backup of a virtual machine.") +@connection_req +@click.argument("domain") +@click.argument("target_path") +@click.option( + "-i", + "--incremental" "incremental_parent", + default=None, + help="Perform an incremental volume backup from this parent backup datestring.", +) +@click.option( + "-r", + "--retain-snapshots", + "retain_snapshots", + is_flag=True, + default=False, + help="Retain volume snapshots for future incremental use.", +) +def cli_vm_backup(domain, target_path, incremental_parent, retain_snapshots): + """ + Create a backup of virtual machine DOMAIN to TARGET_PATH on the cluster primary coordinator. DOMAIN may be a UUID or name. + + TARGET_PATH must be a valid absolute directory path on the cluster "primary" coordinator (see "pvc node list") allowing writes from the API daemon (normally running as "root"). The TARGET_PATH should be a large storage volume, ideally a remotely mounted filesystem (e.g. NFS, SSHFS, etc.) or non-Ceph-backed disk; PVC does not handle this path, that is up to the administrator to configure and manage. + + The backup will export the VM configuration, metainfo, and a point-in-time snapshot of all attached RBD volumes, using a datestring formatted backup name (i.e. YYYYMMDDHHMMSS). + + The virtual machine DOMAIN may be running, and due to snapshots the backup should be crash-consistent, but will be in an unclean state and this must be considered when restoring from backups. + + Incremental snapshots are possible by specifying the "-i"/"--incremental" option along with a source backup datestring. The snapshots from that source backup must have been retained using the "-r"/"--retain-snapshots" option. Arbitrary snapshots, assuming they are valid for all attached RBD volumes, may also be used, as long as they are prefixed with "backup_". Retaining snapshots of incremental backups is supported, though it is not recommended to "chain" incremental backups in this way as it can make managing restores more difficult. + + Full backup volume images are sparse-allocated, however it is recommended for safety to consider their maximum allocated size when allocated space for the TARGET_PATH. Incremental volume images are generally small but are dependent entirely on the rate of data change in each volume. + """ + + retcode, retmsg = pvc.lib.vm.vm_backup( + CLI_CONFIG, domain, target_path, incremental_parent, retain_snapshots + ) + finish(retcode, retmsg) + + ############################################################################### # > pvc vm tag ############################################################################### @@ -5659,6 +5701,7 @@ cli_vm.add_command(cli_vm_move) cli_vm.add_command(cli_vm_migrate) cli_vm.add_command(cli_vm_unmigrate) cli_vm.add_command(cli_vm_flush_locks) +cli_vm.add_command(cli_vm_backup) cli_vm_tag.add_command(cli_vm_tag_get) cli_vm_tag.add_command(cli_vm_tag_add) cli_vm_tag.add_command(cli_vm_tag_remove) diff --git a/client-cli/pvc/lib/vm.py b/client-cli/pvc/lib/vm.py index af764d53..30416e83 100644 --- a/client-cli/pvc/lib/vm.py +++ b/client-cli/pvc/lib/vm.py @@ -433,6 +433,27 @@ def vm_locks(config, vm): return retstatus, response.json().get("message", "") +def vm_backup(config, vm, target_path, incremental_parent=None, retain_snapshots=False): + """ + Create a backup of {vm} and its volumes to a local primary coordinator filesystem path + + API endpoint: GET /vm/{vm}/backup + API arguments: target_path={target_path}, incremental_parent={incremental_parent}, retain_snapshots={retain_snapshots} + API schema: {"message":"{data}"} + """ + params = { + "target_path": target_path, + "incremental_parent": incremental_parent, + "retain_snapshots": retain_snapshots, + } + response = call_api(config, "get", "/vm/{vm}/backup".format(vm=vm), params=params) + + if response.status_code != 200: + return False, response.json().get("message", "") + else: + return True, response.json().get("message", "") + + def vm_vcpus_set(config, vm, vcpus, topology, restart): """ Set the vCPU count of the VM with topology diff --git a/daemon-common/vm.py b/daemon-common/vm.py index 4aec2950..e4df8439 100644 --- a/daemon-common/vm.py +++ b/daemon-common/vm.py @@ -21,12 +21,15 @@ import time import re +import os.path import lxml.objectify import lxml.etree from distutils.util import strtobool from uuid import UUID from concurrent.futures import ThreadPoolExecutor +from datetime import datetime +from json import dump as jdump import daemon_lib.common as common @@ -1297,3 +1300,159 @@ def get_list(zkhandler, node, state, tag, limit, is_fuzzy=True, negate=False): pass return True, sorted(vm_data_list, key=lambda d: d["name"]) + + +def backup_vm( + zkhandler, domain, target_path, incremental_parent=None, retain_snapshots=False +): + + # 0. Validations + # Validate that VM exists in cluster + dom_uuid = getDomainUUID(zkhandler, domain) + if not dom_uuid: + return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain) + + # Validate that the target path exists + if not re.match(r"^/", target_path): + return ( + False, + f"ERROR: Target path {target_path} is not a valid absolute path on the primary coordinator!", + ) + + # Ensure that target_path (on this node) exists + if not os.path.isdir(target_path): + return False, f"ERROR: Target path {target_path} does not exist!" + + # 1. Get information about VM + vm_detail = get_list(zkhandler, limit=dom_uuid, is_fuzzy=False)[0] + vm_volumes = [ + tuple(d["name"].split("/")) for d in vm_detail["disks"] if d["type"] == "rbd" + ] + + # 2a. Validate that all volumes exist (they should, but just in case) + for pool, volume in vm_volumes: + if not ceph.verifyVolume(zkhandler, pool, volume): + return ( + False, + f"ERROR: VM defines a volume {pool}/{volume} which does not exist!", + ) + + # 2b. Validate that, if an incremental_parent is given, it is valid + # The incremental parent is just a datestring + if incremental_parent is not None: + for pool, volume in vm_volumes: + if not ceph.verifySnapshot( + zkhandler, pool, volume, f"backup_{incremental_parent}" + ): + return ( + False, + f"ERROR: Incremental parent {incremental_parent} given, but no snapshot {pool}/{volume}@backup_{incremental_parent} was found; cannot export an incremental backup.", + ) + + export_fileext = "rbddiff" + else: + export_fileext = "rbdimg" + + # 3. Set datestring in YYYYMMDDHHMMSS format + now = datetime.now() + datestring = f"{now.year}{now.month}{now.day}{now.hour}{now.minute}{now.second}" + + snapshot_name = f"backup_{datestring}" + + # 4. Create destination directory + vm_target_root = f"{target_path}/{domain}" + vm_target_backup = f"{target_path}/{domain}/.{datestring}" + if not os.path.isdir(vm_target_backup): + try: + os.makedirs(vm_target_backup) + except Exception as e: + return False, f"ERROR: Failed to create backup directory: {e}" + + # 5. Take snapshot of each disks with the name @backup_{datestring} + is_snapshot_create_failed = False + which_snapshot_create_failed = list() + msg_snapshot_create_failed = list() + for pool, volume in vm_volumes: + retcode, retmsg = ceph.add_snapshot(zkhandler, pool, volume, snapshot_name) + if not retcode: + is_snapshot_create_failed = True + which_snapshot_create_failed.append(f"{pool}/{volume}") + msg_snapshot_create_failed.append(retmsg) + + if is_snapshot_create_failed: + for pool, volume in vm_volumes: + if ceph.verifySnapshot(zkhandler, pool, volume, snapshot_name): + ceph.remove_snapshot(zkhandler, pool, volume, snapshot_name) + return ( + False, + f'ERROR: Failed to create snapshot for volume(s) {", ".join(which_snapshot_create_failed)}: {", ".join(msg_snapshot_create_failed)}', + ) + + # 6. Dump snapshot to folder with `rbd export` (full) or `rbd export-diff` (incremental) + is_snapshot_export_failed = False + which_snapshot_export_failed = list() + msg_snapshot_export_failed = list() + for pool, volume in vm_volumes: + if incremental_parent is not None: + incremental_parent_snapshot_name = f"backup_{incremental_parent}" + retcode, stdout, stderr = common.run_os_command( + f"rbd export-diff --from-snap {incremental_parent_snapshot_name} {pool}/{volume}@{snapshot_name} {vm_target_backup}/{volume}.{export_fileext}" + ) + if retcode: + is_snapshot_export_failed = True + which_snapshot_export_failed.append(f"{pool}/{volume}") + msg_snapshot_export_failed.append(stderr) + else: + retcode, stdout, stderr = common.run_os_command( + f"rbd export --export-format 2 {pool}/{volume}@{snapshot_name} {vm_target_backup}/{volume}.{export_fileext}" + ) + if retcode: + is_snapshot_export_failed = True + which_snapshot_export_failed.append(f"{pool}/{volume}") + msg_snapshot_export_failed.append(stderr) + + if is_snapshot_export_failed: + for pool, volume in vm_volumes: + if ceph.verifySnapshot(zkhandler, pool, volume, snapshot_name): + ceph.remove_snapshot(zkhandler, pool, volume, snapshot_name) + return ( + False, + f'ERROR: Failed to export snapshot for volume(s) {", ".join(which_snapshot_export_failed)}: {", ".join(msg_snapshot_export_failed)}', + ) + + # 7. Create and dump VM backup information + vm_backup = { + "type": "incremental" if incremental_parent is not None else "full", + "datestring": datestring, + "incremental_parent": incremental_parent, + "vm_detail": vm_detail, + "backup_files": [f".{datestring}/{v}.{export_fileext}" for p, v in vm_volumes], + } + with open(f"{vm_target_root}/{domain}.{datestring}.pvcbackup", "w") as fh: + jdump(fh, vm_backup) + + # 8. Remove snapshots if retain_snapshot is False + if not retain_snapshots: + is_snapshot_remove_failed = False + which_snapshot_remove_failed = list() + msg_snapshot_remove_failed = list() + for pool, volume in vm_volumes: + if ceph.verifySnapshot(zkhandler, pool, volume, snapshot_name): + retcode, retmsg = ceph.remove_snapshot( + zkhandler, pool, volume, snapshot_name + ) + if not retcode: + is_snapshot_remove_failed = True + which_snapshot_remove_failed.append(f"{pool}/{volume}") + msg_snapshot_remove_failed.append(retmsg) + + if is_snapshot_remove_failed: + for pool, volume in vm_volumes: + if ceph.verifySnapshot(zkhandler, pool, volume, snapshot_name): + ceph.remove_snapshot(zkhandler, pool, volume, snapshot_name) + return ( + True, + f'WARNING: Successfully backed up VM {domain} @ {datestring} to {target_path}, but failed to remove snapshot as requested for volume(s) {", ".join(which_snapshot_remove_failed)}: {", ".join(msg_snapshot_remove_failed)}', + ) + + return True, f"Successfully backed up VM {domain} @ {datestring} to {target_path}"