Refactor autobackups to make more sense
This commit is contained in:
		| @@ -25,7 +25,6 @@ from json import dump as jdump | ||||
| from os import popen, makedirs, path, scandir | ||||
| from shutil import rmtree | ||||
| from subprocess import run, PIPE | ||||
| from time import time | ||||
|  | ||||
| from daemon_lib.common import run_os_command | ||||
| from daemon_lib.config import get_autobackup_configuration | ||||
| @@ -136,7 +135,7 @@ def send_execution_summary_report( | ||||
|                     f"    {backup_date}: Success in {backup.get('runtime_secs', 0)} seconds, ID {datestring}, type {backup.get('type', 'unknown')}" | ||||
|                 ) | ||||
|                 email.append( | ||||
|                     f"                         Backup contains {len(backup.get('backup_files'))} files totaling {ceph.format_bytes_tohuman(backup.get('backup_size_bytes', 0))} ({backup.get('backup_size_bytes', 0)} bytes)" | ||||
|                     f"                         Backup contains {len(backup.get('export_files'))} files totaling {ceph.format_bytes_tohuman(backup.get('export_size_bytes', 0))} ({backup.get('export_size_bytes', 0)} bytes)" | ||||
|                 ) | ||||
|             else: | ||||
|                 email.append( | ||||
| @@ -151,6 +150,461 @@ def send_execution_summary_report( | ||||
|         log_err(f"Failed to send report email: {e}") | ||||
|  | ||||
|  | ||||
| def run_vm_backup( | ||||
|     zkhandler, celery, current_stage, total_stages, config, vm_detail, force_full=False | ||||
| ): | ||||
|     vm_name = vm_detail["name"] | ||||
|     dom_uuid = vm_detail["uuid"] | ||||
|     backup_suffixed_path = ( | ||||
|         f"{config['backup_root_path']}/{config['backup_root_suffix']}" | ||||
|     ) | ||||
|     vm_backup_path = f"{backup_suffixed_path}/{vm_name}" | ||||
|     autobackup_state_file = f"{vm_backup_path}/.autobackup.json" | ||||
|     full_interval = config["backup_schedule"]["full_interval"] | ||||
|     full_retention = config["backup_schedule"]["full_retention"] | ||||
|  | ||||
|     if not path.exists(vm_backup_path) or not path.exists(autobackup_state_file): | ||||
|         # There are no existing backups so the list is empty | ||||
|         state_data = dict() | ||||
|         tracked_backups = list() | ||||
|     else: | ||||
|         with open(autobackup_state_file) as fh: | ||||
|             state_data = jload(fh) | ||||
|         tracked_backups = state_data["tracked_backups"] | ||||
|  | ||||
|     full_backups = [b for b in tracked_backups if b["type"] == "full"] | ||||
|     if len(full_backups) > 0: | ||||
|         last_full_backup = full_backups[0] | ||||
|         last_full_backup_idx = tracked_backups.index(last_full_backup) | ||||
|         if force_full: | ||||
|             this_backup_incremental_parent = None | ||||
|             this_backup_retain_snapshot = True | ||||
|         elif last_full_backup_idx >= full_interval - 1: | ||||
|             this_backup_incremental_parent = None | ||||
|             this_backup_retain_snapshot = True | ||||
|         else: | ||||
|             this_backup_incremental_parent = last_full_backup["datestring"] | ||||
|             this_backup_retain_snapshot = False | ||||
|     else: | ||||
|         # The very first ackup must be full to start the tree | ||||
|         this_backup_incremental_parent = None | ||||
|         this_backup_retain_snapshot = True | ||||
|  | ||||
|     export_type = ( | ||||
|         "incremental" if this_backup_incremental_parent is not None else "full" | ||||
|     ) | ||||
|  | ||||
|     now = datetime.now() | ||||
|     datestring = now.strftime("%Y%m%d%H%M%S") | ||||
|     snapshot_name = f"ab{datestring}" | ||||
|  | ||||
|     # Take the VM snapshot (vm.vm_worker_create_snapshot) | ||||
|     snap_list = list() | ||||
|  | ||||
|     failure = False | ||||
|     export_files = None | ||||
|     export_files_size = 0 | ||||
|  | ||||
|     def update_tracked_backups(): | ||||
|         # Read export file to get details | ||||
|         backup_json_file = ( | ||||
|             f"{backup_suffixed_path}/{vm_name}/{snapshot_name}/snapshot.json" | ||||
|         ) | ||||
|         with open(backup_json_file) as fh: | ||||
|             backup_json = jload(fh) | ||||
|         tracked_backups.insert(0, backup_json) | ||||
|  | ||||
|         state_data["tracked_backups"] = tracked_backups | ||||
|         with open(autobackup_state_file, "w") as fh: | ||||
|             jdump(state_data, fh) | ||||
|  | ||||
|         return tracked_backups | ||||
|  | ||||
|     def write_backup_summary(success=False, message=""): | ||||
|         ttotal = (datetime.now() - now).total_seconds() | ||||
|         export_details = { | ||||
|             "type": export_type, | ||||
|             "result": success, | ||||
|             "message": message, | ||||
|             "datestring": datestring, | ||||
|             "runtime_secs": ttotal, | ||||
|             "snapshot_name": snapshot_name, | ||||
|             "incremental_parent": this_backup_incremental_parent, | ||||
|             "vm_detail": vm_detail, | ||||
|             "export_files": export_files, | ||||
|             "export_size_bytes": export_files_size, | ||||
|         } | ||||
|         try: | ||||
|             with open( | ||||
|                 f"{backup_suffixed_path}/{vm_name}/{snapshot_name}/snapshot.json", | ||||
|                 "w", | ||||
|             ) as fh: | ||||
|                 jdump(export_details, fh) | ||||
|         except Exception as e: | ||||
|             log_err(celery, f"Error exporting snapshot details: {e}") | ||||
|             return False, e | ||||
|  | ||||
|         return True, "" | ||||
|  | ||||
|     def cleanup_failure(): | ||||
|         for snapshot in snap_list: | ||||
|             rbd, snapshot_name = snapshot.split("@") | ||||
|             pool, volume = rbd.split("/") | ||||
|             # We capture no output here, because if this fails too we're in a deep | ||||
|             # error chain and will just ignore it | ||||
|             ceph.remove_snapshot(zkhandler, pool, volume, snapshot_name) | ||||
|  | ||||
|     rbd_list = zkhandler.read(("domain.storage.volumes", dom_uuid)).split(",") | ||||
|     final_stage = ( | ||||
|         current_stage | ||||
|         + 5 | ||||
|         + len(rbd_list) | ||||
|         + (len(rbd_list) if this_backup_retain_snapshot else 0) | ||||
|     ) | ||||
|  | ||||
|     for rbd in rbd_list: | ||||
|         current_stage += 1 | ||||
|         update( | ||||
|             celery, | ||||
|             f"[{vm_name}] Creating RBD snapshot of {rbd}", | ||||
|             current=current_stage, | ||||
|             total=total_stages, | ||||
|         ) | ||||
|  | ||||
|         pool, volume = rbd.split("/") | ||||
|         ret, msg = ceph.add_snapshot( | ||||
|             zkhandler, pool, volume, snapshot_name, zk_only=False | ||||
|         ) | ||||
|         if not ret: | ||||
|             cleanup_failure() | ||||
|             error_message = msg.replace("ERROR: ", "") | ||||
|             log_err(celery, error_message) | ||||
|             failure = True | ||||
|             break | ||||
|         else: | ||||
|             snap_list.append(f"{pool}/{volume}@{snapshot_name}") | ||||
|  | ||||
|     if failure: | ||||
|         error_message = (f"[{vm_name}] Error in snapshot export, skipping",) | ||||
|         current_stage = final_stage | ||||
|         write_backup_summary(message=error_message) | ||||
|         tracked_backups = update_tracked_backups() | ||||
|         update( | ||||
|             celery, | ||||
|             error_message, | ||||
|             current=current_stage, | ||||
|             total=total_stages, | ||||
|         ) | ||||
|         return tracked_backups, current_stage | ||||
|  | ||||
|     current_stage += 1 | ||||
|     update( | ||||
|         celery, | ||||
|         f"[{vm_name}] Creating VM configuration snapshot", | ||||
|         current=current_stage, | ||||
|         total=total_stages, | ||||
|     ) | ||||
|  | ||||
|     # Get the current domain XML | ||||
|     vm_config = zkhandler.read(("domain.xml", dom_uuid)) | ||||
|  | ||||
|     # Add the snapshot entry to Zookeeper | ||||
|     ret = zkhandler.write( | ||||
|         [ | ||||
|             ( | ||||
|                 ( | ||||
|                     "domain.snapshots", | ||||
|                     dom_uuid, | ||||
|                     "domain_snapshot.name", | ||||
|                     snapshot_name, | ||||
|                 ), | ||||
|                 snapshot_name, | ||||
|             ), | ||||
|             ( | ||||
|                 ( | ||||
|                     "domain.snapshots", | ||||
|                     dom_uuid, | ||||
|                     "domain_snapshot.timestamp", | ||||
|                     snapshot_name, | ||||
|                 ), | ||||
|                 now.strftime("%s"), | ||||
|             ), | ||||
|             ( | ||||
|                 ( | ||||
|                     "domain.snapshots", | ||||
|                     dom_uuid, | ||||
|                     "domain_snapshot.xml", | ||||
|                     snapshot_name, | ||||
|                 ), | ||||
|                 vm_config, | ||||
|             ), | ||||
|             ( | ||||
|                 ( | ||||
|                     "domain.snapshots", | ||||
|                     dom_uuid, | ||||
|                     "domain_snapshot.rbd_snapshots", | ||||
|                     snapshot_name, | ||||
|                 ), | ||||
|                 ",".join(snap_list), | ||||
|             ), | ||||
|         ] | ||||
|     ) | ||||
|     if not ret: | ||||
|         error_message = (f"[{vm_name}] Error in snapshot export, skipping",) | ||||
|         current_stage = final_stage | ||||
|         log_err(celery, error_message) | ||||
|         write_backup_summary(message=error_message) | ||||
|         tracked_backups = update_tracked_backups() | ||||
|         update( | ||||
|             celery, | ||||
|             error_message, | ||||
|             current=current_stage, | ||||
|             total=total_stages, | ||||
|         ) | ||||
|         return tracked_backups, current_stage | ||||
|  | ||||
|     # Export the snapshot (vm.vm_worker_export_snapshot) | ||||
|     export_target_path = f"{backup_suffixed_path}/{vm_name}/{snapshot_name}/images" | ||||
|  | ||||
|     try: | ||||
|         makedirs(export_target_path) | ||||
|     except Exception as e: | ||||
|         error_message = ( | ||||
|             f"[{vm_name}] Failed to create target directory '{export_target_path}': {e}", | ||||
|         ) | ||||
|         current_stage = final_stage | ||||
|         log_err(celery, error_message) | ||||
|         write_backup_summary(message=error_message) | ||||
|         tracked_backups = update_tracked_backups() | ||||
|         update( | ||||
|             celery, | ||||
|             f"[{vm_name}] Error in snapshot export, skipping", | ||||
|             current=current_stage, | ||||
|             total=total_stages, | ||||
|         ) | ||||
|         return tracked_backups, current_stage | ||||
|  | ||||
|     def export_cleanup(): | ||||
|         from shutil import rmtree | ||||
|  | ||||
|         rmtree(f"{backup_suffixed_path}/{vm_name}/{snapshot_name}") | ||||
|  | ||||
|     # Set the export filetype | ||||
|     if this_backup_incremental_parent is not None: | ||||
|         export_fileext = "rbddiff" | ||||
|     else: | ||||
|         export_fileext = "rbdimg" | ||||
|  | ||||
|     snapshot_volumes = list() | ||||
|     for rbdsnap in snap_list: | ||||
|         pool, _volume = rbdsnap.split("/") | ||||
|         volume, name = _volume.split("@") | ||||
|         ret, snapshots = ceph.get_list_snapshot( | ||||
|             zkhandler, pool, volume, limit=name, is_fuzzy=False | ||||
|         ) | ||||
|         if ret: | ||||
|             snapshot_volumes += snapshots | ||||
|  | ||||
|     export_files = list() | ||||
|     for snapshot_volume in snapshot_volumes: | ||||
|         snap_pool = snapshot_volume["pool"] | ||||
|         snap_volume = snapshot_volume["volume"] | ||||
|         snap_snapshot_name = snapshot_volume["snapshot"] | ||||
|         snap_size = snapshot_volume["stats"]["size"] | ||||
|         snap_str = f"{snap_pool}/{snap_volume}@{snap_snapshot_name}" | ||||
|  | ||||
|         current_stage += 1 | ||||
|         update( | ||||
|             celery, | ||||
|             f"[{vm_name}] Exporting RBD snapshot {snap_str}", | ||||
|             current=current_stage, | ||||
|             total=total_stages, | ||||
|         ) | ||||
|  | ||||
|         if this_backup_incremental_parent is not None: | ||||
|             retcode, stdout, stderr = run_os_command( | ||||
|                 f"rbd export-diff --from-snap {this_backup_incremental_parent} {snap_pool}/{snap_volume}@{snap_snapshot_name} {export_target_path}/{snap_pool}.{snap_volume}.{export_fileext}" | ||||
|             ) | ||||
|             if retcode: | ||||
|                 error_message = ( | ||||
|                     f"[{vm_name}] Failed to export snapshot for volume(s) '{snap_pool}/{snap_volume}'", | ||||
|                 ) | ||||
|                 failure = True | ||||
|                 break | ||||
|             else: | ||||
|                 export_files.append( | ||||
|                     ( | ||||
|                         f"images/{snap_pool}.{snap_volume}.{export_fileext}", | ||||
|                         snap_size, | ||||
|                     ) | ||||
|                 ) | ||||
|         else: | ||||
|             retcode, stdout, stderr = run_os_command( | ||||
|                 f"rbd export --export-format 2 {snap_pool}/{snap_volume}@{snap_snapshot_name} {export_target_path}/{snap_pool}.{snap_volume}.{export_fileext}" | ||||
|             ) | ||||
|             if retcode: | ||||
|                 error_message = ( | ||||
|                     f"[{vm_name}] Failed to export snapshot for volume(s) '{snap_pool}/{snap_volume}'", | ||||
|                 ) | ||||
|                 failure = True | ||||
|                 break | ||||
|             else: | ||||
|                 export_files.append( | ||||
|                     ( | ||||
|                         f"images/{snap_pool}.{snap_volume}.{export_fileext}", | ||||
|                         snap_size, | ||||
|                     ) | ||||
|                 ) | ||||
|  | ||||
|     if failure: | ||||
|         current_stage = final_stage | ||||
|         log_err(celery, error_message) | ||||
|         write_backup_summary(message=error_message) | ||||
|         tracked_backups = update_tracked_backups() | ||||
|         update( | ||||
|             celery, | ||||
|             error_message, | ||||
|             current=current_stage, | ||||
|             total=total_stages, | ||||
|         ) | ||||
|         return tracked_backups, current_stage | ||||
|  | ||||
|     current_stage += 1 | ||||
|     update( | ||||
|         celery, | ||||
|         f"[{vm_name}] Writing snapshot details", | ||||
|         current=current_stage, | ||||
|         total=total_stages, | ||||
|     ) | ||||
|  | ||||
|     def get_dir_size(pathname): | ||||
|         total = 0 | ||||
|         with scandir(pathname) as it: | ||||
|             for entry in it: | ||||
|                 if entry.is_file(): | ||||
|                     total += entry.stat().st_size | ||||
|                 elif entry.is_dir(): | ||||
|                     total += get_dir_size(entry.path) | ||||
|         return total | ||||
|  | ||||
|     export_files_size = get_dir_size(export_target_path) | ||||
|  | ||||
|     ret, e = write_backup_summary(success=True) | ||||
|     if not ret: | ||||
|         error_message = (f"[{vm_name}] Failed to export configuration snapshot: {e}",) | ||||
|         current_stage = final_stage | ||||
|         log_err(celery, error_message) | ||||
|         write_backup_summary(message=error_message) | ||||
|         tracked_backups = update_tracked_backups() | ||||
|         update( | ||||
|             celery, | ||||
|             error_message, | ||||
|             current=current_stage, | ||||
|             total=total_stages, | ||||
|         ) | ||||
|         return tracked_backups, current_stage | ||||
|  | ||||
|     # Clean up the snapshot (vm.vm_worker_remove_snapshot) | ||||
|     if not this_backup_retain_snapshot: | ||||
|         for snap in snap_list: | ||||
|             current_stage += 1 | ||||
|             update( | ||||
|                 celery, | ||||
|                 f"[{vm_name}] Removing RBD snapshot {snap}", | ||||
|                 current=current_stage, | ||||
|                 total=total_stages, | ||||
|             ) | ||||
|  | ||||
|             rbd, name = snap.split("@") | ||||
|             pool, volume = rbd.split("/") | ||||
|             ret, msg = ceph.remove_snapshot(zkhandler, pool, volume, name) | ||||
|             if not ret: | ||||
|                 error_message = msg.replace("ERROR: ", f"[{vm_name}] ") | ||||
|                 failure = True | ||||
|                 break | ||||
|  | ||||
|         if failure: | ||||
|             current_stage = final_stage | ||||
|             log_err(celery, error_message) | ||||
|             write_backup_summary(message=error_message) | ||||
|             tracked_backups = update_tracked_backups() | ||||
|             update( | ||||
|                 celery, | ||||
|                 error_message, | ||||
|                 current=current_stage, | ||||
|                 total=total_stages, | ||||
|             ) | ||||
|             return tracked_backups, current_stage | ||||
|  | ||||
|         current_stage += 1 | ||||
|         update( | ||||
|             celery, | ||||
|             f"[{vm_name}] Removing VM configuration snapshot", | ||||
|             current=current_stage, | ||||
|             total=total_stages, | ||||
|         ) | ||||
|  | ||||
|         ret = zkhandler.delete( | ||||
|             ("domain.snapshots", dom_uuid, "domain_snapshot.name", snapshot_name) | ||||
|         ) | ||||
|         if not ret: | ||||
|             error_message = (f"[{vm_name}] Failed to remove VM snapshot; continuing",) | ||||
|             log_err(celery, error_message) | ||||
|  | ||||
|     current_stage += 1 | ||||
|     update( | ||||
|         celery, | ||||
|         f"Finding obsolete incremental backups for '{vm_name}'", | ||||
|         current=current_stage, | ||||
|         total=total_stages, | ||||
|     ) | ||||
|  | ||||
|     marked_for_deletion = list() | ||||
|     # Find any full backups that are expired | ||||
|     found_full_count = 0 | ||||
|     for backup in tracked_backups: | ||||
|         if backup["type"] == "full": | ||||
|             found_full_count += 1 | ||||
|             if found_full_count > full_retention: | ||||
|                 marked_for_deletion.append(backup) | ||||
|     # Find any incremental backups that depend on marked parents | ||||
|     for backup in tracked_backups: | ||||
|         if backup["type"] == "incremental" and backup["incremental_parent"] in [ | ||||
|             b["datestring"] for b in marked_for_deletion | ||||
|         ]: | ||||
|             marked_for_deletion.append(backup) | ||||
|  | ||||
|     current_stage += 1 | ||||
|     if len(marked_for_deletion) > 0: | ||||
|         update( | ||||
|             celery, | ||||
|             f"Cleaning up aged out backups for '{vm_name}'", | ||||
|             current=current_stage, | ||||
|             total=total_stages, | ||||
|         ) | ||||
|  | ||||
|         for backup_to_delete in marked_for_deletion: | ||||
|             ret = vm.vm_worker_remove_snapshot( | ||||
|                 zkhandler, None, vm_name, backup_to_delete["snapshot_name"] | ||||
|             ) | ||||
|             if ret is False: | ||||
|                 error_message = f"Failed to remove obsolete backup snapshot '{backup_to_delete['snapshot_name']}', leaving in tracked backups" | ||||
|                 log_err(celery, error_message) | ||||
|             else: | ||||
|                 rmtree(f"{vm_backup_path}/{backup_to_delete['snapshot_name']}") | ||||
|                 tracked_backups.remove(backup_to_delete) | ||||
|  | ||||
|     current_stage += 1 | ||||
|     update( | ||||
|         celery, | ||||
|         "Updating tracked backups", | ||||
|         current=current_stage, | ||||
|         total=total_stages, | ||||
|     ) | ||||
|     tracked_backups = update_tracked_backups() | ||||
|     return tracked_backups, current_stage | ||||
|  | ||||
|  | ||||
| def worker_cluster_autobackup( | ||||
|     zkhandler, celery, force_full=False, email_recipients=None | ||||
| ): | ||||
| @@ -202,7 +656,6 @@ def worker_cluster_autobackup( | ||||
|         makedirs(backup_suffixed_path) | ||||
|  | ||||
|     full_interval = config["backup_schedule"]["full_interval"] | ||||
|     full_retention = config["backup_schedule"]["full_retention"] | ||||
|  | ||||
|     backup_vms = list() | ||||
|     for vm_detail in vm_list: | ||||
| @@ -296,419 +749,16 @@ def worker_cluster_autobackup( | ||||
|  | ||||
|     # Execute the backup: take a snapshot, then export the snapshot | ||||
|     for vm_detail in backup_vms: | ||||
|         vm_name = vm_detail["name"] | ||||
|         dom_uuid = vm_detail["uuid"] | ||||
|         vm_backup_path = f"{backup_suffixed_path}/{vm_name}" | ||||
|         autobackup_state_file = f"{vm_backup_path}/.autobackup.json" | ||||
|         if not path.exists(vm_backup_path) or not path.exists(autobackup_state_file): | ||||
|             # There are no existing backups so the list is empty | ||||
|             state_data = dict() | ||||
|             tracked_backups = list() | ||||
|         else: | ||||
|             with open(autobackup_state_file) as fh: | ||||
|                 state_data = jload(fh) | ||||
|             tracked_backups = state_data["tracked_backups"] | ||||
|  | ||||
|         full_backups = [b for b in tracked_backups if b["type"] == "full"] | ||||
|         if len(full_backups) > 0: | ||||
|             last_full_backup = full_backups[0] | ||||
|             last_full_backup_idx = tracked_backups.index(last_full_backup) | ||||
|             if force_full: | ||||
|                 this_backup_incremental_parent = None | ||||
|                 this_backup_retain_snapshot = True | ||||
|             elif last_full_backup_idx >= full_interval - 1: | ||||
|                 this_backup_incremental_parent = None | ||||
|                 this_backup_retain_snapshot = True | ||||
|             else: | ||||
|                 this_backup_incremental_parent = last_full_backup["datestring"] | ||||
|                 this_backup_retain_snapshot = False | ||||
|         else: | ||||
|             # The very first ackup must be full to start the tree | ||||
|             this_backup_incremental_parent = None | ||||
|             this_backup_retain_snapshot = True | ||||
|  | ||||
|         now = datetime.now() | ||||
|         datestring = now.strftime("%Y%m%d%H%M%S") | ||||
|         snapshot_name = f"ab{datestring}" | ||||
|  | ||||
|         # Take the VM snapshot (vm.vm_worker_create_snapshot) | ||||
|         snap_list = list() | ||||
|  | ||||
|         def cleanup_failure(): | ||||
|             for snapshot in snap_list: | ||||
|                 rbd, snapshot_name = snapshot.split("@") | ||||
|                 pool, volume = rbd.split("/") | ||||
|                 # We capture no output here, because if this fails too we're in a deep | ||||
|                 # error chain and will just ignore it | ||||
|                 ceph.remove_snapshot(zkhandler, pool, volume, snapshot_name) | ||||
|  | ||||
|         rbd_list = zkhandler.read(("domain.storage.volumes", dom_uuid)).split(",") | ||||
|  | ||||
|         for rbd in rbd_list: | ||||
|             current_stage += 1 | ||||
|             update( | ||||
|                 celery, | ||||
|                 f"[{vm_name}] Creating RBD snapshot of {rbd}", | ||||
|                 current=current_stage, | ||||
|                 total=total_stages, | ||||
|             ) | ||||
|  | ||||
|             pool, volume = rbd.split("/") | ||||
|             ret, msg = ceph.add_snapshot( | ||||
|                 zkhandler, pool, volume, snapshot_name, zk_only=False | ||||
|             ) | ||||
|             if not ret: | ||||
|                 cleanup_failure() | ||||
|                 error_message = msg.replace("ERROR: ", "") | ||||
|                 log_err(celery, error_message) | ||||
|                 send_execution_failure_report( | ||||
|                     (celery, current_stage, total_stages), | ||||
|                     config, | ||||
|                     recipients=email_recipients, | ||||
|                     error=error_message, | ||||
|                 ) | ||||
|                 fail(celery, error_message) | ||||
|                 return False | ||||
|             else: | ||||
|                 snap_list.append(f"{pool}/{volume}@{snapshot_name}") | ||||
|  | ||||
|         current_stage += 1 | ||||
|         update( | ||||
|         summary, current_stage = run_vm_backup( | ||||
|             zkhandler, | ||||
|             celery, | ||||
|             f"[{vm_name}] Creating VM configuration snapshot", | ||||
|             current=current_stage, | ||||
|             total=total_stages, | ||||
|             current_stage, | ||||
|             total_stages, | ||||
|             config, | ||||
|             vm_detail, | ||||
|             force_full=force_full, | ||||
|         ) | ||||
|  | ||||
|         # Get the current timestamp | ||||
|         tstart = time() | ||||
|  | ||||
|         # Get the current domain XML | ||||
|         vm_config = zkhandler.read(("domain.xml", dom_uuid)) | ||||
|  | ||||
|         # Add the snapshot entry to Zookeeper | ||||
|         zkhandler.write( | ||||
|             [ | ||||
|                 ( | ||||
|                     ( | ||||
|                         "domain.snapshots", | ||||
|                         dom_uuid, | ||||
|                         "domain_snapshot.name", | ||||
|                         snapshot_name, | ||||
|                     ), | ||||
|                     snapshot_name, | ||||
|                 ), | ||||
|                 ( | ||||
|                     ( | ||||
|                         "domain.snapshots", | ||||
|                         dom_uuid, | ||||
|                         "domain_snapshot.timestamp", | ||||
|                         snapshot_name, | ||||
|                     ), | ||||
|                     tstart, | ||||
|                 ), | ||||
|                 ( | ||||
|                     ( | ||||
|                         "domain.snapshots", | ||||
|                         dom_uuid, | ||||
|                         "domain_snapshot.xml", | ||||
|                         snapshot_name, | ||||
|                     ), | ||||
|                     vm_config, | ||||
|                 ), | ||||
|                 ( | ||||
|                     ( | ||||
|                         "domain.snapshots", | ||||
|                         dom_uuid, | ||||
|                         "domain_snapshot.rbd_snapshots", | ||||
|                         snapshot_name, | ||||
|                     ), | ||||
|                     ",".join(snap_list), | ||||
|                 ), | ||||
|             ] | ||||
|         ) | ||||
|  | ||||
|         # Export the snapshot (vm.vm_worker_export_snapshot) | ||||
|         export_target_path = f"{backup_suffixed_path}/{vm_name}/{snapshot_name}/images" | ||||
|  | ||||
|         try: | ||||
|             makedirs(export_target_path) | ||||
|         except Exception as e: | ||||
|             error_message = ( | ||||
|                 f"[{vm_name}] Failed to create target directory '{export_target_path}': {e}", | ||||
|             ) | ||||
|             log_err(celery, error_message) | ||||
|             send_execution_failure_report( | ||||
|                 (celery, current_stage, total_stages), | ||||
|                 config, | ||||
|                 recipients=email_recipients, | ||||
|                 error=error_message, | ||||
|             ) | ||||
|             fail(celery, error_message) | ||||
|             return False | ||||
|  | ||||
|         def export_cleanup(): | ||||
|             from shutil import rmtree | ||||
|  | ||||
|             rmtree(f"{backup_suffixed_path}/{vm_name}/{snapshot_name}") | ||||
|  | ||||
|         export_type = ( | ||||
|             "incremental" if this_backup_incremental_parent is not None else "full" | ||||
|         ) | ||||
|  | ||||
|         # Set the export filetype | ||||
|         if this_backup_incremental_parent is not None: | ||||
|             export_fileext = "rbddiff" | ||||
|         else: | ||||
|             export_fileext = "rbdimg" | ||||
|  | ||||
|         failure = False | ||||
|         export_files = None | ||||
|         export_files_size = 0 | ||||
|  | ||||
|         def write_backup_summary(success=False, message=""): | ||||
|             export_details = { | ||||
|                 "type": export_type, | ||||
|                 "result": success, | ||||
|                 "message": message, | ||||
|                 "datestring": datestring, | ||||
|                 "snapshot_name": snapshot_name, | ||||
|                 "incremental_parent": this_backup_incremental_parent, | ||||
|                 "vm_detail": vm_detail, | ||||
|                 "export_files": export_files, | ||||
|                 "export_size_bytes": export_files_size, | ||||
|             } | ||||
|             try: | ||||
|                 with open( | ||||
|                     f"{backup_suffixed_path}/{vm_name}/{snapshot_name}/snapshot.json", | ||||
|                     "w", | ||||
|                 ) as fh: | ||||
|                     jdump(export_details, fh) | ||||
|             except Exception as e: | ||||
|                 log_err(celery, f"Error exporting snapshot details: {e}") | ||||
|                 return False, e | ||||
|             return True, "" | ||||
|  | ||||
|         snapshot_volumes = list() | ||||
|         for rbdsnap in snap_list: | ||||
|             pool, _volume = rbdsnap.split("/") | ||||
|             volume, name = _volume.split("@") | ||||
|             ret, snapshots = ceph.get_list_snapshot( | ||||
|                 zkhandler, pool, volume, limit=name, is_fuzzy=False | ||||
|             ) | ||||
|             if ret: | ||||
|                 snapshot_volumes += snapshots | ||||
|  | ||||
|         export_files = list() | ||||
|         for snapshot_volume in snapshot_volumes: | ||||
|             snap_pool = snapshot_volume["pool"] | ||||
|             snap_volume = snapshot_volume["volume"] | ||||
|             snap_snapshot_name = snapshot_volume["snapshot"] | ||||
|             snap_size = snapshot_volume["stats"]["size"] | ||||
|             snap_str = f"{snap_pool}/{snap_volume}@{snap_snapshot_name}" | ||||
|  | ||||
|             current_stage += 1 | ||||
|             update( | ||||
|                 celery, | ||||
|                 f"[{vm_name}] Exporting RBD snapshot {snap_str}", | ||||
|                 current=current_stage, | ||||
|                 total=total_stages, | ||||
|             ) | ||||
|  | ||||
|             if this_backup_incremental_parent is not None: | ||||
|                 retcode, stdout, stderr = run_os_command( | ||||
|                     f"rbd export-diff --from-snap {this_backup_incremental_parent} {snap_pool}/{snap_volume}@{snap_snapshot_name} {export_target_path}/{snap_pool}.{snap_volume}.{export_fileext}" | ||||
|                 ) | ||||
|                 if retcode: | ||||
|                     error_message = ( | ||||
|                         f"[{vm_name}] Failed to export snapshot for volume(s) '{snap_pool}/{snap_volume}'", | ||||
|                     ) | ||||
|                     write_backup_summary(message=error_message) | ||||
|                     failure = True | ||||
|                     break | ||||
|                 else: | ||||
|                     export_files.append( | ||||
|                         ( | ||||
|                             f"images/{snap_pool}.{snap_volume}.{export_fileext}", | ||||
|                             snap_size, | ||||
|                         ) | ||||
|                     ) | ||||
|             else: | ||||
|                 retcode, stdout, stderr = run_os_command( | ||||
|                     f"rbd export --export-format 2 {snap_pool}/{snap_volume}@{snap_snapshot_name} {export_target_path}/{snap_pool}.{snap_volume}.{export_fileext}" | ||||
|                 ) | ||||
|                 if retcode: | ||||
|                     error_message = ( | ||||
|                         f"[{vm_name}] Failed to export snapshot for volume(s) '{snap_pool}/{snap_volume}'", | ||||
|                     ) | ||||
|                     write_backup_summary(message=error_message) | ||||
|                     failure = True | ||||
|                     break | ||||
|                 else: | ||||
|                     export_files.append( | ||||
|                         ( | ||||
|                             f"images/{snap_pool}.{snap_volume}.{export_fileext}", | ||||
|                             snap_size, | ||||
|                         ) | ||||
|                     ) | ||||
|  | ||||
|         if failure: | ||||
|             current_stage += 6 | ||||
|             if not this_backup_retain_snapshot: | ||||
|                 current_stage += len(snap_list) | ||||
|             update( | ||||
|                 celery, | ||||
|                 f"[{vm_name}] Error in snapshot export, skipping", | ||||
|                 current=current_stage, | ||||
|                 total=total_stages, | ||||
|             ) | ||||
|             continue | ||||
|  | ||||
|         current_stage += 1 | ||||
|         update( | ||||
|             celery, | ||||
|             f"[{vm_name}] Writing snapshot details", | ||||
|             current=current_stage, | ||||
|             total=total_stages, | ||||
|         ) | ||||
|  | ||||
|         def get_dir_size(pathname): | ||||
|             total = 0 | ||||
|             with scandir(pathname) as it: | ||||
|                 for entry in it: | ||||
|                     if entry.is_file(): | ||||
|                         total += entry.stat().st_size | ||||
|                     elif entry.is_dir(): | ||||
|                         total += get_dir_size(entry.path) | ||||
|             return total | ||||
|  | ||||
|         export_files_size = get_dir_size(export_target_path) | ||||
|  | ||||
|         ret, e = write_backup_summary(success=True) | ||||
|         if not ret: | ||||
|             error_message = ( | ||||
|                 f"[{vm_name}] Failed to export configuration snapshot: {e}", | ||||
|             ) | ||||
|             write_backup_summary(message=error_message) | ||||
|             current_stage += 5 | ||||
|             if not this_backup_retain_snapshot: | ||||
|                 current_stage += len(snap_list) | ||||
|             update( | ||||
|                 celery, | ||||
|                 error_message, | ||||
|                 current=current_stage, | ||||
|                 total=total_stages, | ||||
|             ) | ||||
|             continue | ||||
|  | ||||
|         # Clean up the snapshot (vm.vm_worker_remove_snapshot) | ||||
|         if not this_backup_retain_snapshot: | ||||
|             for snap in snap_list: | ||||
|                 current_stage += 1 | ||||
|                 update( | ||||
|                     celery, | ||||
|                     f"[{vm_name}] Removing RBD snapshot {snap}", | ||||
|                     current=current_stage, | ||||
|                     total=total_stages, | ||||
|                 ) | ||||
|  | ||||
|                 rbd, name = snap.split("@") | ||||
|                 pool, volume = rbd.split("/") | ||||
|                 ret, msg = ceph.remove_snapshot(zkhandler, pool, volume, name) | ||||
|                 if not ret: | ||||
|                     error_message = msg.replace("ERROR: ", "") | ||||
|                     write_backup_summary(message=error_message) | ||||
|                     failure = True | ||||
|                     break | ||||
|  | ||||
|             if failure: | ||||
|                 current_stage += 4 | ||||
|                 update( | ||||
|                     celery, | ||||
|                     f"[{vm_name}] Error in snapshot export, skipping", | ||||
|                     current=current_stage, | ||||
|                     total=total_stages, | ||||
|                 ) | ||||
|                 continue | ||||
|  | ||||
|             current_stage += 1 | ||||
|             update( | ||||
|                 celery, | ||||
|                 f"[{vm_name}] Removing VM configuration snapshot", | ||||
|                 current=current_stage, | ||||
|                 total=total_stages, | ||||
|             ) | ||||
|  | ||||
|             ret = zkhandler.delete( | ||||
|                 ("domain.snapshots", dom_uuid, "domain_snapshot.name", snapshot_name) | ||||
|             ) | ||||
|             if not ret: | ||||
|                 error_message = ( | ||||
|                     f"[{vm_name}] Failed to remove VM snapshot; continuing", | ||||
|                 ) | ||||
|                 log_err(celery, error_message) | ||||
|  | ||||
|         current_stage += 1 | ||||
|         update( | ||||
|             celery, | ||||
|             f"Finding obsolete incremental backups for '{vm_name}'", | ||||
|             current=current_stage, | ||||
|             total=total_stages, | ||||
|         ) | ||||
|  | ||||
|         # Read export file to get details | ||||
|         backup_json_file = f"{vm_backup_path}/{snapshot_name}/snapshot.json" | ||||
|         with open(backup_json_file) as fh: | ||||
|             backup_json = jload(fh) | ||||
|         tracked_backups.insert(0, backup_json) | ||||
|  | ||||
|         marked_for_deletion = list() | ||||
|         # Find any full backups that are expired | ||||
|         found_full_count = 0 | ||||
|         for backup in tracked_backups: | ||||
|             if backup["type"] == "full": | ||||
|                 found_full_count += 1 | ||||
|                 if found_full_count > full_retention: | ||||
|                     marked_for_deletion.append(backup) | ||||
|         # Find any incremental backups that depend on marked parents | ||||
|         for backup in tracked_backups: | ||||
|             if backup["type"] == "incremental" and backup["incremental_parent"] in [ | ||||
|                 b["datestring"] for b in marked_for_deletion | ||||
|             ]: | ||||
|                 marked_for_deletion.append(backup) | ||||
|  | ||||
|         current_stage += 1 | ||||
|         if len(marked_for_deletion) > 0: | ||||
|             update( | ||||
|                 celery, | ||||
|                 f"Cleaning up aged out backups for '{vm_name}'", | ||||
|                 current=current_stage, | ||||
|                 total=total_stages, | ||||
|             ) | ||||
|  | ||||
|             for backup_to_delete in marked_for_deletion: | ||||
|                 ret = vm.vm_worker_remove_snapshot( | ||||
|                     zkhandler, None, vm_name, backup_to_delete["snapshot_name"] | ||||
|                 ) | ||||
|                 if ret is False: | ||||
|                     error_message = f"Failed to remove obsolete backup snapshot '{backup_to_delete['snapshot_name']}', leaving in tracked backups" | ||||
|                     log_err(celery, error_message) | ||||
|                 else: | ||||
|                     rmtree(f"{vm_backup_path}/{backup_to_delete['snapshot_name']}") | ||||
|                     tracked_backups.remove(backup_to_delete) | ||||
|  | ||||
|         current_stage += 1 | ||||
|         update( | ||||
|             celery, | ||||
|             "Updating tracked backups", | ||||
|             current=current_stage, | ||||
|             total=total_stages, | ||||
|         ) | ||||
|         state_data["tracked_backups"] = tracked_backups | ||||
|         with open(autobackup_state_file, "w") as fh: | ||||
|             jdump(state_data, fh) | ||||
|  | ||||
|         backup_summary[vm_detail["name"]] = tracked_backups | ||||
|         backup_summary[vm_detail["name"]] = summary | ||||
|  | ||||
|     # Handle automount unmount commands | ||||
|     if config["auto_mount_enabled"]: | ||||
|   | ||||
		Reference in New Issue
	
	Block a user