From 553c1e670e1bec49bed4b72c4815cec396cea702 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Thu, 16 May 2024 09:23:31 -0400 Subject: [PATCH] Add VM snapshots functionality Adds the ability to create snapshots of an entire VM, including all its RBD disks and the VM XML config, though not any PVC metadata. --- api-daemon/pvcapid/flaskapi.py | 48 ++++++++++ api-daemon/pvcapid/helper.py | 24 +++++ client-cli/pvc/cli/cli.py | 49 +++++++++++ client-cli/pvc/lib/vm.py | 21 +++++ daemon-common/migrations/versions/14.json | 1 + daemon-common/vm.py | 102 ++++++++++++++++++++++ daemon-common/zkhandler.py | 14 ++- 7 files changed, 256 insertions(+), 3 deletions(-) create mode 100644 daemon-common/migrations/versions/14.json diff --git a/api-daemon/pvcapid/flaskapi.py b/api-daemon/pvcapid/flaskapi.py index 0b1e1665..29162f92 100755 --- a/api-daemon/pvcapid/flaskapi.py +++ b/api-daemon/pvcapid/flaskapi.py @@ -3086,6 +3086,54 @@ class API_VM_Restore(Resource): api.add_resource(API_VM_Restore, "/vm//restore") +# /vm//snapshot +class API_VM_Snapshot(Resource): + @RequestParser( + [ + { + "name": "snapshot_name", + "required": False, + "helptext": "", + }, + ] + ) + @Authenticator + def post(self, vm, reqargs): + """ + Take a snapshot of a VM's disks and configuration + --- + tags: + - vm + parameters: + - in: query + name: snapshot_name + type: string + required: false + description: A custom name for the snapshot instead of autogeneration by date + responses: + 200: + description: OK + schema: + type: object + id: Message + 400: + description: Execution error + schema: + type: object + id: Message + 404: + description: Not found + schema: + type: object + id: Message + """ + snapshot_name = reqargs.get("snapshot_name", None) + return api_helper.create_vm_snapshot(vm, snapshot_name=snapshot_name) + + +api.add_resource(API_VM_Snapshot, "/vm//snapshot") + + ########################################################## # Client API - Network ########################################################## diff --git a/api-daemon/pvcapid/helper.py b/api-daemon/pvcapid/helper.py index aac500e6..7d28f0f8 100755 --- a/api-daemon/pvcapid/helper.py +++ b/api-daemon/pvcapid/helper.py @@ -765,6 +765,30 @@ def vm_restore( return output, retcode +@ZKConnection(config) +def create_vm_snapshot( + zkhandler, + domain, + snapshot_name=None, +): + """ + Take a snapshot of a VM. + """ + retflag, retdata = pvc_vm.create_vm_snapshot( + zkhandler, + domain, + snapshot_name, + ) + + if retflag: + retcode = 200 + else: + retcode = 400 + + output = {"message": retdata.replace('"', "'")} + return output, retcode + + @ZKConnection(config) def vm_attach_device(zkhandler, vm, device_spec_xml): """ diff --git a/client-cli/pvc/cli/cli.py b/client-cli/pvc/cli/cli.py index 81ca9222..67554333 100644 --- a/client-cli/pvc/cli/cli.py +++ b/client-cli/pvc/cli/cli.py @@ -1765,6 +1765,53 @@ def cli_vm_flush_locks(domain, wait_flag): finish(retcode, retmsg) +############################################################################### +# > pvc vm snapshot +############################################################################### +@click.group( + name="snapshot", + short_help="Manage snapshots for PVC VMs.", + context_settings=CONTEXT_SETTINGS, +) +def cli_vm_snapshot(): + """ + Manage snapshots of VMs in a PVC cluster. + """ + pass + + +############################################################################### +# > pvc vm snapshot create +############################################################################### +@click.command(name="create", short_help="Create a snapshot of a virtual machine.") +@connection_req +@click.argument("domain") +@click.argument("snapshot_name", required=False, default=None) +def cli_vm_snapshot_create(domain, snapshot_name): + """ + Create a snapshot of the disks and XML configuration of virtual machine DOMAIN, with the + optional name SNAPSHOT_NAME. DOMAIN mayb e a UUID or name. + + WARNING: RBD snapshots are crash-consistent but not filesystem-aware. If a snapshot was taken + of a running VM, restoring that snapshot will be equivalent to having forcibly restarted the + VM at the moment of the snapshot. + """ + + echo( + CLI_CONFIG, + f"Taking snapshot of VM '{domain}'... ", + newline=False, + ) + retcode, retmsg = pvc.lib.vm.vm_create_snapshot( + CLI_CONFIG, domain, snapshot_name=snapshot_name + ) + if retcode: + echo(CLI_CONFIG, "done.") + else: + echo(CLI_CONFIG, "failed.") + finish(retcode, retmsg) + + ############################################################################### # > pvc vm backup ############################################################################### @@ -6302,6 +6349,8 @@ cli_vm.add_command(cli_vm_move) cli_vm.add_command(cli_vm_migrate) cli_vm.add_command(cli_vm_unmigrate) cli_vm.add_command(cli_vm_flush_locks) +cli_vm_snapshot.add_command(cli_vm_snapshot_create) +cli_vm.add_command(cli_vm_snapshot) cli_vm_backup.add_command(cli_vm_backup_create) cli_vm_backup.add_command(cli_vm_backup_restore) cli_vm_backup.add_command(cli_vm_backup_remove) diff --git a/client-cli/pvc/lib/vm.py b/client-cli/pvc/lib/vm.py index 2b58ad12..198e873c 100644 --- a/client-cli/pvc/lib/vm.py +++ b/client-cli/pvc/lib/vm.py @@ -498,6 +498,27 @@ def vm_restore(config, vm, backup_path, backup_datestring, retain_snapshot=False return True, response.json().get("message", "") +def vm_create_snapshot(config, vm, snapshot_name=None): + """ + Take a snapshot of a VM's disks and configuration + + API endpoint: POST /vm/{vm}/snapshot + API arguments: snapshot_name=snapshot_name + API schema: {"message":"{data}"} + """ + params = dict() + if snapshot_name is not None: + params["snapshot_name"] = snapshot_name + response = call_api( + config, "post", "/vm/{vm}/snapshot".format(vm=vm), params=params + ) + + if response.status_code != 200: + return False, response.json().get("message", "") + else: + return True, response.json().get("message", "") + + def vm_vcpus_set(config, vm, vcpus, topology, restart): """ Set the vCPU count of the VM with topology diff --git a/daemon-common/migrations/versions/14.json b/daemon-common/migrations/versions/14.json new file mode 100644 index 00000000..0d37e7ea --- /dev/null +++ b/daemon-common/migrations/versions/14.json @@ -0,0 +1 @@ +{"version": "14", "root": "", "base": {"root": "", "schema": "/schema", "schema.version": "/schema/version", "config": "/config", "config.maintenance": "/config/maintenance", "config.primary_node": "/config/primary_node", "config.primary_node.sync_lock": "/config/primary_node/sync_lock", "config.upstream_ip": "/config/upstream_ip", "config.migration_target_selector": "/config/migration_target_selector", "logs": "/logs", "faults": "/faults", "node": "/nodes", "domain": "/domains", "network": "/networks", "storage": "/ceph", "storage.health": "/ceph/health", "storage.util": "/ceph/util", "osd": "/ceph/osds", "pool": "/ceph/pools", "volume": "/ceph/volumes", "snapshot": "/ceph/snapshots"}, "logs": {"node": "", "messages": "/messages"}, "faults": {"id": "", "last_time": "/last_time", "first_time": "/first_time", "ack_time": "/ack_time", "status": "/status", "delta": "/delta", "message": "/message"}, "node": {"name": "", "keepalive": "/keepalive", "mode": "/daemonmode", "data.active_schema": "/activeschema", "data.latest_schema": "/latestschema", "data.static": "/staticdata", "data.pvc_version": "/pvcversion", "running_domains": "/runningdomains", "count.provisioned_domains": "/domainscount", "count.networks": "/networkscount", "state.daemon": "/daemonstate", "state.router": "/routerstate", "state.domain": "/domainstate", "cpu.load": "/cpuload", "vcpu.allocated": "/vcpualloc", "memory.total": "/memtotal", "memory.used": "/memused", "memory.free": "/memfree", "memory.allocated": "/memalloc", "memory.provisioned": "/memprov", "ipmi.hostname": "/ipmihostname", "ipmi.username": "/ipmiusername", "ipmi.password": "/ipmipassword", "sriov": "/sriov", "sriov.pf": "/sriov/pf", "sriov.vf": "/sriov/vf", "monitoring.plugins": "/monitoring_plugins", "monitoring.data": "/monitoring_data", "monitoring.health": "/monitoring_health", "network.stats": "/network_stats"}, "monitoring_plugin": {"name": "", "last_run": "/last_run", "health_delta": "/health_delta", "message": "/message", "data": "/data", "runtime": "/runtime"}, "sriov_pf": {"phy": "", "mtu": "/mtu", "vfcount": "/vfcount"}, "sriov_vf": {"phy": "", "pf": "/pf", "mtu": "/mtu", "mac": "/mac", "phy_mac": "/phy_mac", "config": "/config", "config.vlan_id": "/config/vlan_id", "config.vlan_qos": "/config/vlan_qos", "config.tx_rate_min": "/config/tx_rate_min", "config.tx_rate_max": "/config/tx_rate_max", "config.spoof_check": "/config/spoof_check", "config.link_state": "/config/link_state", "config.trust": "/config/trust", "config.query_rss": "/config/query_rss", "pci": "/pci", "pci.domain": "/pci/domain", "pci.bus": "/pci/bus", "pci.slot": "/pci/slot", "pci.function": "/pci/function", "used": "/used", "used_by": "/used_by"}, "domain": {"name": "", "xml": "/xml", "state": "/state", "profile": "/profile", "stats": "/stats", "node": "/node", "last_node": "/lastnode", "failed_reason": "/failedreason", "storage.volumes": "/rbdlist", "console.log": "/consolelog", "console.vnc": "/vnc", "meta.autostart": "/node_autostart", "meta.migrate_method": "/migration_method", "meta.migrate_max_downtime": "/migration_max_downtime", "meta.node_selector": "/node_selector", "meta.node_limit": "/node_limit", "meta.tags": "/tags", "migrate.sync_lock": "/migrate_sync_lock", "snapshots": "/snapshots"}, "tag": {"name": "", "type": "/type", "protected": "/protected"}, "domain_snapshot": {"name": "", "is_backup": "/is_backup", "xml": "/xml", "rbd_snapshots": "/rbdsnaplist"}, "network": {"vni": "", "type": "/nettype", "mtu": "/mtu", "rule": "/firewall_rules", "rule.in": "/firewall_rules/in", "rule.out": "/firewall_rules/out", "nameservers": "/name_servers", "domain": "/domain", "reservation": "/dhcp4_reservations", "lease": "/dhcp4_leases", "ip4.gateway": "/ip4_gateway", "ip4.network": "/ip4_network", "ip4.dhcp": "/dhcp4_flag", "ip4.dhcp_start": "/dhcp4_start", "ip4.dhcp_end": "/dhcp4_end", "ip6.gateway": "/ip6_gateway", "ip6.network": "/ip6_network", "ip6.dhcp": "/dhcp6_flag"}, "reservation": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname"}, "lease": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname", "expiry": "/expiry", "client_id": "/clientid"}, "rule": {"description": "", "rule": "/rule", "order": "/order"}, "osd": {"id": "", "node": "/node", "device": "/device", "db_device": "/db_device", "fsid": "/fsid", "ofsid": "/fsid/osd", "cfsid": "/fsid/cluster", "lvm": "/lvm", "vg": "/lvm/vg", "lv": "/lvm/lv", "is_split": "/is_split", "stats": "/stats"}, "pool": {"name": "", "pgs": "/pgs", "tier": "/tier", "stats": "/stats"}, "volume": {"name": "", "stats": "/stats"}, "snapshot": {"name": "", "stats": "/stats"}} \ No newline at end of file diff --git a/daemon-common/vm.py b/daemon-common/vm.py index d1f90a92..98691403 100644 --- a/daemon-common/vm.py +++ b/daemon-common/vm.py @@ -306,6 +306,7 @@ def define_vm( (("domain.meta.node_selector", dom_uuid), str(node_selector).lower()), (("domain.meta.tags", dom_uuid), ""), (("domain.migrate.sync_lock", dom_uuid), ""), + (("domain.snapshots", dom_uuid), ""), ] ) @@ -1245,6 +1246,107 @@ def get_list( return True, sorted(vm_data_list, key=lambda d: d["name"]) +# +# VM Snapshot Tasks +# +def create_vm_snapshot(zkhandler, domain, snapshot_name=None): + # Validate that VM exists in cluster + dom_uuid = getDomainUUID(zkhandler, domain) + if not dom_uuid: + return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain) + + if snapshot_name is None: + now = datetime.now() + datestring = now.strftime("%Y%m%d%H%M%S") + snapshot_name = f"snapshot_{datestring}" + else: + reg = re.compile("^[a-z0-9.-_]+$") + if not reg.match(snapshot_name): + return ( + False, + f'ERROR: Snapshot name "{snapshot_name}" contains invalid characters; only alphanumeric, ".", "-", and "_" characters are allowed!', + ) + + tstart = time.time() + + # Get the list of all RBD volumes + rbd_list = zkhandler.read(("domain.storage.volumes", dom_uuid)).split(",") + + snap_list = list() + + # If a snapshot fails, clean up any snapshots that were successfuly created + def cleanup_failure(): + for snapshot in snap_list: + rbd, snapshot_name = snapshot.split("@") + pool, volume = rbd.split("/") + # We capture no output here, because if this fails too we're in a deep + # error chain and will just ignore it + ceph.remove_snapshot(zkhandler, pool, volume, snapshot_name) + + # Iterrate through and create a snapshot for each RBD volume + for rbd in rbd_list: + pool, volume = rbd.split("/") + ret, msg = ceph.add_snapshot(zkhandler, pool, volume, snapshot_name) + if not ret: + cleanup_failure() + return False, msg + else: + snap_list.append(f"{pool}/{volume}@{snapshot_name}") + + # Get the current domain XML + vm_config = zkhandler.read(("domain.xml", dom_uuid)) + + # Add the snapshot entry to Zookeeper + zkhandler.write( + [ + ( + ("domain.snapshots", dom_uuid, "domain_snapshot.name", snapshot_name), + snapshot_name, + ), + ( + ( + "domain.snapshots", + dom_uuid, + "domain_snapshot.is_backup", + snapshot_name, + ), + False, + ), + ( + ("domain.snapshots", dom_uuid, "domain_snapshot.xml", snapshot_name), + vm_config, + ), + ( + ( + "domain.snapshots", + dom_uuid, + "domain_snapshot.rbd_snapshots", + snapshot_name, + ), + ",".join(snap_list), + ), + ] + ) + + tend = time.time() + ttot = round(tend - tstart, 2) + return ( + True, + f'Successfully created snapshot "{snapshot_name}" of VM "{domain}" in {ttot}s.', + ) + + +def rollback_vm_snapshot(zkhandler, domain, snapshot_name): + pass + + +def remove_vm_snapshot(zkhandler, domain, snapshot_name): + pass + + +# +# VM Backup Tasks +# def backup_vm( zkhandler, domain, backup_path, incremental_parent=None, retain_snapshot=False ): diff --git a/daemon-common/zkhandler.py b/daemon-common/zkhandler.py index 6f83799f..72d2542a 100644 --- a/daemon-common/zkhandler.py +++ b/daemon-common/zkhandler.py @@ -573,7 +573,7 @@ class ZKHandler(object): # class ZKSchema(object): # Current version - _version = 13 + _version = 14 # Root for doing nested keys _schema_root = "" @@ -713,13 +713,21 @@ class ZKSchema(object): "meta.node_limit": "/node_limit", "meta.tags": "/tags", "migrate.sync_lock": "/migrate_sync_lock", + "snapshots": "/snapshots", }, # The schema of an individual domain tag entry (/domains/{domain}/tags/{tag}) "tag": { - "name": "", + "name": "", # The root key "type": "/type", "protected": "/protected", - }, # The root key + }, + # The schema of an individual domain snapshot entry (/domains/{domain}/snapshots/{snapshot}) + "domain_snapshot": { + "name": "", # The root key + "is_backup": "/is_backup", + "xml": "/xml", + "rbd_snapshots": "/rbdsnaplist", + }, # The schema of an individual network entry (/networks/{vni}) "network": { "vni": "", # The root key