Refactor autobackups to make more sense
This commit is contained in:
parent
fd87a28eb3
commit
e938140414
|
@ -25,7 +25,6 @@ from json import dump as jdump
|
||||||
from os import popen, makedirs, path, scandir
|
from os import popen, makedirs, path, scandir
|
||||||
from shutil import rmtree
|
from shutil import rmtree
|
||||||
from subprocess import run, PIPE
|
from subprocess import run, PIPE
|
||||||
from time import time
|
|
||||||
|
|
||||||
from daemon_lib.common import run_os_command
|
from daemon_lib.common import run_os_command
|
||||||
from daemon_lib.config import get_autobackup_configuration
|
from daemon_lib.config import get_autobackup_configuration
|
||||||
|
@ -136,7 +135,7 @@ def send_execution_summary_report(
|
||||||
f" {backup_date}: Success in {backup.get('runtime_secs', 0)} seconds, ID {datestring}, type {backup.get('type', 'unknown')}"
|
f" {backup_date}: Success in {backup.get('runtime_secs', 0)} seconds, ID {datestring}, type {backup.get('type', 'unknown')}"
|
||||||
)
|
)
|
||||||
email.append(
|
email.append(
|
||||||
f" Backup contains {len(backup.get('backup_files'))} files totaling {ceph.format_bytes_tohuman(backup.get('backup_size_bytes', 0))} ({backup.get('backup_size_bytes', 0)} bytes)"
|
f" Backup contains {len(backup.get('export_files'))} files totaling {ceph.format_bytes_tohuman(backup.get('export_size_bytes', 0))} ({backup.get('export_size_bytes', 0)} bytes)"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
email.append(
|
email.append(
|
||||||
|
@ -151,6 +150,461 @@ def send_execution_summary_report(
|
||||||
log_err(f"Failed to send report email: {e}")
|
log_err(f"Failed to send report email: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def run_vm_backup(
|
||||||
|
zkhandler, celery, current_stage, total_stages, config, vm_detail, force_full=False
|
||||||
|
):
|
||||||
|
vm_name = vm_detail["name"]
|
||||||
|
dom_uuid = vm_detail["uuid"]
|
||||||
|
backup_suffixed_path = (
|
||||||
|
f"{config['backup_root_path']}/{config['backup_root_suffix']}"
|
||||||
|
)
|
||||||
|
vm_backup_path = f"{backup_suffixed_path}/{vm_name}"
|
||||||
|
autobackup_state_file = f"{vm_backup_path}/.autobackup.json"
|
||||||
|
full_interval = config["backup_schedule"]["full_interval"]
|
||||||
|
full_retention = config["backup_schedule"]["full_retention"]
|
||||||
|
|
||||||
|
if not path.exists(vm_backup_path) or not path.exists(autobackup_state_file):
|
||||||
|
# There are no existing backups so the list is empty
|
||||||
|
state_data = dict()
|
||||||
|
tracked_backups = list()
|
||||||
|
else:
|
||||||
|
with open(autobackup_state_file) as fh:
|
||||||
|
state_data = jload(fh)
|
||||||
|
tracked_backups = state_data["tracked_backups"]
|
||||||
|
|
||||||
|
full_backups = [b for b in tracked_backups if b["type"] == "full"]
|
||||||
|
if len(full_backups) > 0:
|
||||||
|
last_full_backup = full_backups[0]
|
||||||
|
last_full_backup_idx = tracked_backups.index(last_full_backup)
|
||||||
|
if force_full:
|
||||||
|
this_backup_incremental_parent = None
|
||||||
|
this_backup_retain_snapshot = True
|
||||||
|
elif last_full_backup_idx >= full_interval - 1:
|
||||||
|
this_backup_incremental_parent = None
|
||||||
|
this_backup_retain_snapshot = True
|
||||||
|
else:
|
||||||
|
this_backup_incremental_parent = last_full_backup["datestring"]
|
||||||
|
this_backup_retain_snapshot = False
|
||||||
|
else:
|
||||||
|
# The very first ackup must be full to start the tree
|
||||||
|
this_backup_incremental_parent = None
|
||||||
|
this_backup_retain_snapshot = True
|
||||||
|
|
||||||
|
export_type = (
|
||||||
|
"incremental" if this_backup_incremental_parent is not None else "full"
|
||||||
|
)
|
||||||
|
|
||||||
|
now = datetime.now()
|
||||||
|
datestring = now.strftime("%Y%m%d%H%M%S")
|
||||||
|
snapshot_name = f"ab{datestring}"
|
||||||
|
|
||||||
|
# Take the VM snapshot (vm.vm_worker_create_snapshot)
|
||||||
|
snap_list = list()
|
||||||
|
|
||||||
|
failure = False
|
||||||
|
export_files = None
|
||||||
|
export_files_size = 0
|
||||||
|
|
||||||
|
def update_tracked_backups():
|
||||||
|
# Read export file to get details
|
||||||
|
backup_json_file = (
|
||||||
|
f"{backup_suffixed_path}/{vm_name}/{snapshot_name}/snapshot.json"
|
||||||
|
)
|
||||||
|
with open(backup_json_file) as fh:
|
||||||
|
backup_json = jload(fh)
|
||||||
|
tracked_backups.insert(0, backup_json)
|
||||||
|
|
||||||
|
state_data["tracked_backups"] = tracked_backups
|
||||||
|
with open(autobackup_state_file, "w") as fh:
|
||||||
|
jdump(state_data, fh)
|
||||||
|
|
||||||
|
return tracked_backups
|
||||||
|
|
||||||
|
def write_backup_summary(success=False, message=""):
|
||||||
|
ttotal = (datetime.now() - now).total_seconds()
|
||||||
|
export_details = {
|
||||||
|
"type": export_type,
|
||||||
|
"result": success,
|
||||||
|
"message": message,
|
||||||
|
"datestring": datestring,
|
||||||
|
"runtime_secs": ttotal,
|
||||||
|
"snapshot_name": snapshot_name,
|
||||||
|
"incremental_parent": this_backup_incremental_parent,
|
||||||
|
"vm_detail": vm_detail,
|
||||||
|
"export_files": export_files,
|
||||||
|
"export_size_bytes": export_files_size,
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
with open(
|
||||||
|
f"{backup_suffixed_path}/{vm_name}/{snapshot_name}/snapshot.json",
|
||||||
|
"w",
|
||||||
|
) as fh:
|
||||||
|
jdump(export_details, fh)
|
||||||
|
except Exception as e:
|
||||||
|
log_err(celery, f"Error exporting snapshot details: {e}")
|
||||||
|
return False, e
|
||||||
|
|
||||||
|
return True, ""
|
||||||
|
|
||||||
|
def cleanup_failure():
|
||||||
|
for snapshot in snap_list:
|
||||||
|
rbd, snapshot_name = snapshot.split("@")
|
||||||
|
pool, volume = rbd.split("/")
|
||||||
|
# We capture no output here, because if this fails too we're in a deep
|
||||||
|
# error chain and will just ignore it
|
||||||
|
ceph.remove_snapshot(zkhandler, pool, volume, snapshot_name)
|
||||||
|
|
||||||
|
rbd_list = zkhandler.read(("domain.storage.volumes", dom_uuid)).split(",")
|
||||||
|
final_stage = (
|
||||||
|
current_stage
|
||||||
|
+ 5
|
||||||
|
+ len(rbd_list)
|
||||||
|
+ (len(rbd_list) if this_backup_retain_snapshot else 0)
|
||||||
|
)
|
||||||
|
|
||||||
|
for rbd in rbd_list:
|
||||||
|
current_stage += 1
|
||||||
|
update(
|
||||||
|
celery,
|
||||||
|
f"[{vm_name}] Creating RBD snapshot of {rbd}",
|
||||||
|
current=current_stage,
|
||||||
|
total=total_stages,
|
||||||
|
)
|
||||||
|
|
||||||
|
pool, volume = rbd.split("/")
|
||||||
|
ret, msg = ceph.add_snapshot(
|
||||||
|
zkhandler, pool, volume, snapshot_name, zk_only=False
|
||||||
|
)
|
||||||
|
if not ret:
|
||||||
|
cleanup_failure()
|
||||||
|
error_message = msg.replace("ERROR: ", "")
|
||||||
|
log_err(celery, error_message)
|
||||||
|
failure = True
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
snap_list.append(f"{pool}/{volume}@{snapshot_name}")
|
||||||
|
|
||||||
|
if failure:
|
||||||
|
error_message = (f"[{vm_name}] Error in snapshot export, skipping",)
|
||||||
|
current_stage = final_stage
|
||||||
|
write_backup_summary(message=error_message)
|
||||||
|
tracked_backups = update_tracked_backups()
|
||||||
|
update(
|
||||||
|
celery,
|
||||||
|
error_message,
|
||||||
|
current=current_stage,
|
||||||
|
total=total_stages,
|
||||||
|
)
|
||||||
|
return tracked_backups, current_stage
|
||||||
|
|
||||||
|
current_stage += 1
|
||||||
|
update(
|
||||||
|
celery,
|
||||||
|
f"[{vm_name}] Creating VM configuration snapshot",
|
||||||
|
current=current_stage,
|
||||||
|
total=total_stages,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get the current domain XML
|
||||||
|
vm_config = zkhandler.read(("domain.xml", dom_uuid))
|
||||||
|
|
||||||
|
# Add the snapshot entry to Zookeeper
|
||||||
|
ret = zkhandler.write(
|
||||||
|
[
|
||||||
|
(
|
||||||
|
(
|
||||||
|
"domain.snapshots",
|
||||||
|
dom_uuid,
|
||||||
|
"domain_snapshot.name",
|
||||||
|
snapshot_name,
|
||||||
|
),
|
||||||
|
snapshot_name,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
(
|
||||||
|
"domain.snapshots",
|
||||||
|
dom_uuid,
|
||||||
|
"domain_snapshot.timestamp",
|
||||||
|
snapshot_name,
|
||||||
|
),
|
||||||
|
now.strftime("%s"),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
(
|
||||||
|
"domain.snapshots",
|
||||||
|
dom_uuid,
|
||||||
|
"domain_snapshot.xml",
|
||||||
|
snapshot_name,
|
||||||
|
),
|
||||||
|
vm_config,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
(
|
||||||
|
"domain.snapshots",
|
||||||
|
dom_uuid,
|
||||||
|
"domain_snapshot.rbd_snapshots",
|
||||||
|
snapshot_name,
|
||||||
|
),
|
||||||
|
",".join(snap_list),
|
||||||
|
),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
if not ret:
|
||||||
|
error_message = (f"[{vm_name}] Error in snapshot export, skipping",)
|
||||||
|
current_stage = final_stage
|
||||||
|
log_err(celery, error_message)
|
||||||
|
write_backup_summary(message=error_message)
|
||||||
|
tracked_backups = update_tracked_backups()
|
||||||
|
update(
|
||||||
|
celery,
|
||||||
|
error_message,
|
||||||
|
current=current_stage,
|
||||||
|
total=total_stages,
|
||||||
|
)
|
||||||
|
return tracked_backups, current_stage
|
||||||
|
|
||||||
|
# Export the snapshot (vm.vm_worker_export_snapshot)
|
||||||
|
export_target_path = f"{backup_suffixed_path}/{vm_name}/{snapshot_name}/images"
|
||||||
|
|
||||||
|
try:
|
||||||
|
makedirs(export_target_path)
|
||||||
|
except Exception as e:
|
||||||
|
error_message = (
|
||||||
|
f"[{vm_name}] Failed to create target directory '{export_target_path}': {e}",
|
||||||
|
)
|
||||||
|
current_stage = final_stage
|
||||||
|
log_err(celery, error_message)
|
||||||
|
write_backup_summary(message=error_message)
|
||||||
|
tracked_backups = update_tracked_backups()
|
||||||
|
update(
|
||||||
|
celery,
|
||||||
|
f"[{vm_name}] Error in snapshot export, skipping",
|
||||||
|
current=current_stage,
|
||||||
|
total=total_stages,
|
||||||
|
)
|
||||||
|
return tracked_backups, current_stage
|
||||||
|
|
||||||
|
def export_cleanup():
|
||||||
|
from shutil import rmtree
|
||||||
|
|
||||||
|
rmtree(f"{backup_suffixed_path}/{vm_name}/{snapshot_name}")
|
||||||
|
|
||||||
|
# Set the export filetype
|
||||||
|
if this_backup_incremental_parent is not None:
|
||||||
|
export_fileext = "rbddiff"
|
||||||
|
else:
|
||||||
|
export_fileext = "rbdimg"
|
||||||
|
|
||||||
|
snapshot_volumes = list()
|
||||||
|
for rbdsnap in snap_list:
|
||||||
|
pool, _volume = rbdsnap.split("/")
|
||||||
|
volume, name = _volume.split("@")
|
||||||
|
ret, snapshots = ceph.get_list_snapshot(
|
||||||
|
zkhandler, pool, volume, limit=name, is_fuzzy=False
|
||||||
|
)
|
||||||
|
if ret:
|
||||||
|
snapshot_volumes += snapshots
|
||||||
|
|
||||||
|
export_files = list()
|
||||||
|
for snapshot_volume in snapshot_volumes:
|
||||||
|
snap_pool = snapshot_volume["pool"]
|
||||||
|
snap_volume = snapshot_volume["volume"]
|
||||||
|
snap_snapshot_name = snapshot_volume["snapshot"]
|
||||||
|
snap_size = snapshot_volume["stats"]["size"]
|
||||||
|
snap_str = f"{snap_pool}/{snap_volume}@{snap_snapshot_name}"
|
||||||
|
|
||||||
|
current_stage += 1
|
||||||
|
update(
|
||||||
|
celery,
|
||||||
|
f"[{vm_name}] Exporting RBD snapshot {snap_str}",
|
||||||
|
current=current_stage,
|
||||||
|
total=total_stages,
|
||||||
|
)
|
||||||
|
|
||||||
|
if this_backup_incremental_parent is not None:
|
||||||
|
retcode, stdout, stderr = run_os_command(
|
||||||
|
f"rbd export-diff --from-snap {this_backup_incremental_parent} {snap_pool}/{snap_volume}@{snap_snapshot_name} {export_target_path}/{snap_pool}.{snap_volume}.{export_fileext}"
|
||||||
|
)
|
||||||
|
if retcode:
|
||||||
|
error_message = (
|
||||||
|
f"[{vm_name}] Failed to export snapshot for volume(s) '{snap_pool}/{snap_volume}'",
|
||||||
|
)
|
||||||
|
failure = True
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
export_files.append(
|
||||||
|
(
|
||||||
|
f"images/{snap_pool}.{snap_volume}.{export_fileext}",
|
||||||
|
snap_size,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
retcode, stdout, stderr = run_os_command(
|
||||||
|
f"rbd export --export-format 2 {snap_pool}/{snap_volume}@{snap_snapshot_name} {export_target_path}/{snap_pool}.{snap_volume}.{export_fileext}"
|
||||||
|
)
|
||||||
|
if retcode:
|
||||||
|
error_message = (
|
||||||
|
f"[{vm_name}] Failed to export snapshot for volume(s) '{snap_pool}/{snap_volume}'",
|
||||||
|
)
|
||||||
|
failure = True
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
export_files.append(
|
||||||
|
(
|
||||||
|
f"images/{snap_pool}.{snap_volume}.{export_fileext}",
|
||||||
|
snap_size,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if failure:
|
||||||
|
current_stage = final_stage
|
||||||
|
log_err(celery, error_message)
|
||||||
|
write_backup_summary(message=error_message)
|
||||||
|
tracked_backups = update_tracked_backups()
|
||||||
|
update(
|
||||||
|
celery,
|
||||||
|
error_message,
|
||||||
|
current=current_stage,
|
||||||
|
total=total_stages,
|
||||||
|
)
|
||||||
|
return tracked_backups, current_stage
|
||||||
|
|
||||||
|
current_stage += 1
|
||||||
|
update(
|
||||||
|
celery,
|
||||||
|
f"[{vm_name}] Writing snapshot details",
|
||||||
|
current=current_stage,
|
||||||
|
total=total_stages,
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_dir_size(pathname):
|
||||||
|
total = 0
|
||||||
|
with scandir(pathname) as it:
|
||||||
|
for entry in it:
|
||||||
|
if entry.is_file():
|
||||||
|
total += entry.stat().st_size
|
||||||
|
elif entry.is_dir():
|
||||||
|
total += get_dir_size(entry.path)
|
||||||
|
return total
|
||||||
|
|
||||||
|
export_files_size = get_dir_size(export_target_path)
|
||||||
|
|
||||||
|
ret, e = write_backup_summary(success=True)
|
||||||
|
if not ret:
|
||||||
|
error_message = (f"[{vm_name}] Failed to export configuration snapshot: {e}",)
|
||||||
|
current_stage = final_stage
|
||||||
|
log_err(celery, error_message)
|
||||||
|
write_backup_summary(message=error_message)
|
||||||
|
tracked_backups = update_tracked_backups()
|
||||||
|
update(
|
||||||
|
celery,
|
||||||
|
error_message,
|
||||||
|
current=current_stage,
|
||||||
|
total=total_stages,
|
||||||
|
)
|
||||||
|
return tracked_backups, current_stage
|
||||||
|
|
||||||
|
# Clean up the snapshot (vm.vm_worker_remove_snapshot)
|
||||||
|
if not this_backup_retain_snapshot:
|
||||||
|
for snap in snap_list:
|
||||||
|
current_stage += 1
|
||||||
|
update(
|
||||||
|
celery,
|
||||||
|
f"[{vm_name}] Removing RBD snapshot {snap}",
|
||||||
|
current=current_stage,
|
||||||
|
total=total_stages,
|
||||||
|
)
|
||||||
|
|
||||||
|
rbd, name = snap.split("@")
|
||||||
|
pool, volume = rbd.split("/")
|
||||||
|
ret, msg = ceph.remove_snapshot(zkhandler, pool, volume, name)
|
||||||
|
if not ret:
|
||||||
|
error_message = msg.replace("ERROR: ", f"[{vm_name}] ")
|
||||||
|
failure = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if failure:
|
||||||
|
current_stage = final_stage
|
||||||
|
log_err(celery, error_message)
|
||||||
|
write_backup_summary(message=error_message)
|
||||||
|
tracked_backups = update_tracked_backups()
|
||||||
|
update(
|
||||||
|
celery,
|
||||||
|
error_message,
|
||||||
|
current=current_stage,
|
||||||
|
total=total_stages,
|
||||||
|
)
|
||||||
|
return tracked_backups, current_stage
|
||||||
|
|
||||||
|
current_stage += 1
|
||||||
|
update(
|
||||||
|
celery,
|
||||||
|
f"[{vm_name}] Removing VM configuration snapshot",
|
||||||
|
current=current_stage,
|
||||||
|
total=total_stages,
|
||||||
|
)
|
||||||
|
|
||||||
|
ret = zkhandler.delete(
|
||||||
|
("domain.snapshots", dom_uuid, "domain_snapshot.name", snapshot_name)
|
||||||
|
)
|
||||||
|
if not ret:
|
||||||
|
error_message = (f"[{vm_name}] Failed to remove VM snapshot; continuing",)
|
||||||
|
log_err(celery, error_message)
|
||||||
|
|
||||||
|
current_stage += 1
|
||||||
|
update(
|
||||||
|
celery,
|
||||||
|
f"Finding obsolete incremental backups for '{vm_name}'",
|
||||||
|
current=current_stage,
|
||||||
|
total=total_stages,
|
||||||
|
)
|
||||||
|
|
||||||
|
marked_for_deletion = list()
|
||||||
|
# Find any full backups that are expired
|
||||||
|
found_full_count = 0
|
||||||
|
for backup in tracked_backups:
|
||||||
|
if backup["type"] == "full":
|
||||||
|
found_full_count += 1
|
||||||
|
if found_full_count > full_retention:
|
||||||
|
marked_for_deletion.append(backup)
|
||||||
|
# Find any incremental backups that depend on marked parents
|
||||||
|
for backup in tracked_backups:
|
||||||
|
if backup["type"] == "incremental" and backup["incremental_parent"] in [
|
||||||
|
b["datestring"] for b in marked_for_deletion
|
||||||
|
]:
|
||||||
|
marked_for_deletion.append(backup)
|
||||||
|
|
||||||
|
current_stage += 1
|
||||||
|
if len(marked_for_deletion) > 0:
|
||||||
|
update(
|
||||||
|
celery,
|
||||||
|
f"Cleaning up aged out backups for '{vm_name}'",
|
||||||
|
current=current_stage,
|
||||||
|
total=total_stages,
|
||||||
|
)
|
||||||
|
|
||||||
|
for backup_to_delete in marked_for_deletion:
|
||||||
|
ret = vm.vm_worker_remove_snapshot(
|
||||||
|
zkhandler, None, vm_name, backup_to_delete["snapshot_name"]
|
||||||
|
)
|
||||||
|
if ret is False:
|
||||||
|
error_message = f"Failed to remove obsolete backup snapshot '{backup_to_delete['snapshot_name']}', leaving in tracked backups"
|
||||||
|
log_err(celery, error_message)
|
||||||
|
else:
|
||||||
|
rmtree(f"{vm_backup_path}/{backup_to_delete['snapshot_name']}")
|
||||||
|
tracked_backups.remove(backup_to_delete)
|
||||||
|
|
||||||
|
current_stage += 1
|
||||||
|
update(
|
||||||
|
celery,
|
||||||
|
"Updating tracked backups",
|
||||||
|
current=current_stage,
|
||||||
|
total=total_stages,
|
||||||
|
)
|
||||||
|
tracked_backups = update_tracked_backups()
|
||||||
|
return tracked_backups, current_stage
|
||||||
|
|
||||||
|
|
||||||
def worker_cluster_autobackup(
|
def worker_cluster_autobackup(
|
||||||
zkhandler, celery, force_full=False, email_recipients=None
|
zkhandler, celery, force_full=False, email_recipients=None
|
||||||
):
|
):
|
||||||
|
@ -202,7 +656,6 @@ def worker_cluster_autobackup(
|
||||||
makedirs(backup_suffixed_path)
|
makedirs(backup_suffixed_path)
|
||||||
|
|
||||||
full_interval = config["backup_schedule"]["full_interval"]
|
full_interval = config["backup_schedule"]["full_interval"]
|
||||||
full_retention = config["backup_schedule"]["full_retention"]
|
|
||||||
|
|
||||||
backup_vms = list()
|
backup_vms = list()
|
||||||
for vm_detail in vm_list:
|
for vm_detail in vm_list:
|
||||||
|
@ -296,419 +749,16 @@ def worker_cluster_autobackup(
|
||||||
|
|
||||||
# Execute the backup: take a snapshot, then export the snapshot
|
# Execute the backup: take a snapshot, then export the snapshot
|
||||||
for vm_detail in backup_vms:
|
for vm_detail in backup_vms:
|
||||||
vm_name = vm_detail["name"]
|
summary, current_stage = run_vm_backup(
|
||||||
dom_uuid = vm_detail["uuid"]
|
zkhandler,
|
||||||
vm_backup_path = f"{backup_suffixed_path}/{vm_name}"
|
|
||||||
autobackup_state_file = f"{vm_backup_path}/.autobackup.json"
|
|
||||||
if not path.exists(vm_backup_path) or not path.exists(autobackup_state_file):
|
|
||||||
# There are no existing backups so the list is empty
|
|
||||||
state_data = dict()
|
|
||||||
tracked_backups = list()
|
|
||||||
else:
|
|
||||||
with open(autobackup_state_file) as fh:
|
|
||||||
state_data = jload(fh)
|
|
||||||
tracked_backups = state_data["tracked_backups"]
|
|
||||||
|
|
||||||
full_backups = [b for b in tracked_backups if b["type"] == "full"]
|
|
||||||
if len(full_backups) > 0:
|
|
||||||
last_full_backup = full_backups[0]
|
|
||||||
last_full_backup_idx = tracked_backups.index(last_full_backup)
|
|
||||||
if force_full:
|
|
||||||
this_backup_incremental_parent = None
|
|
||||||
this_backup_retain_snapshot = True
|
|
||||||
elif last_full_backup_idx >= full_interval - 1:
|
|
||||||
this_backup_incremental_parent = None
|
|
||||||
this_backup_retain_snapshot = True
|
|
||||||
else:
|
|
||||||
this_backup_incremental_parent = last_full_backup["datestring"]
|
|
||||||
this_backup_retain_snapshot = False
|
|
||||||
else:
|
|
||||||
# The very first ackup must be full to start the tree
|
|
||||||
this_backup_incremental_parent = None
|
|
||||||
this_backup_retain_snapshot = True
|
|
||||||
|
|
||||||
now = datetime.now()
|
|
||||||
datestring = now.strftime("%Y%m%d%H%M%S")
|
|
||||||
snapshot_name = f"ab{datestring}"
|
|
||||||
|
|
||||||
# Take the VM snapshot (vm.vm_worker_create_snapshot)
|
|
||||||
snap_list = list()
|
|
||||||
|
|
||||||
def cleanup_failure():
|
|
||||||
for snapshot in snap_list:
|
|
||||||
rbd, snapshot_name = snapshot.split("@")
|
|
||||||
pool, volume = rbd.split("/")
|
|
||||||
# We capture no output here, because if this fails too we're in a deep
|
|
||||||
# error chain and will just ignore it
|
|
||||||
ceph.remove_snapshot(zkhandler, pool, volume, snapshot_name)
|
|
||||||
|
|
||||||
rbd_list = zkhandler.read(("domain.storage.volumes", dom_uuid)).split(",")
|
|
||||||
|
|
||||||
for rbd in rbd_list:
|
|
||||||
current_stage += 1
|
|
||||||
update(
|
|
||||||
celery,
|
celery,
|
||||||
f"[{vm_name}] Creating RBD snapshot of {rbd}",
|
current_stage,
|
||||||
current=current_stage,
|
total_stages,
|
||||||
total=total_stages,
|
|
||||||
)
|
|
||||||
|
|
||||||
pool, volume = rbd.split("/")
|
|
||||||
ret, msg = ceph.add_snapshot(
|
|
||||||
zkhandler, pool, volume, snapshot_name, zk_only=False
|
|
||||||
)
|
|
||||||
if not ret:
|
|
||||||
cleanup_failure()
|
|
||||||
error_message = msg.replace("ERROR: ", "")
|
|
||||||
log_err(celery, error_message)
|
|
||||||
send_execution_failure_report(
|
|
||||||
(celery, current_stage, total_stages),
|
|
||||||
config,
|
config,
|
||||||
recipients=email_recipients,
|
vm_detail,
|
||||||
error=error_message,
|
force_full=force_full,
|
||||||
)
|
)
|
||||||
fail(celery, error_message)
|
backup_summary[vm_detail["name"]] = summary
|
||||||
return False
|
|
||||||
else:
|
|
||||||
snap_list.append(f"{pool}/{volume}@{snapshot_name}")
|
|
||||||
|
|
||||||
current_stage += 1
|
|
||||||
update(
|
|
||||||
celery,
|
|
||||||
f"[{vm_name}] Creating VM configuration snapshot",
|
|
||||||
current=current_stage,
|
|
||||||
total=total_stages,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Get the current timestamp
|
|
||||||
tstart = time()
|
|
||||||
|
|
||||||
# Get the current domain XML
|
|
||||||
vm_config = zkhandler.read(("domain.xml", dom_uuid))
|
|
||||||
|
|
||||||
# Add the snapshot entry to Zookeeper
|
|
||||||
zkhandler.write(
|
|
||||||
[
|
|
||||||
(
|
|
||||||
(
|
|
||||||
"domain.snapshots",
|
|
||||||
dom_uuid,
|
|
||||||
"domain_snapshot.name",
|
|
||||||
snapshot_name,
|
|
||||||
),
|
|
||||||
snapshot_name,
|
|
||||||
),
|
|
||||||
(
|
|
||||||
(
|
|
||||||
"domain.snapshots",
|
|
||||||
dom_uuid,
|
|
||||||
"domain_snapshot.timestamp",
|
|
||||||
snapshot_name,
|
|
||||||
),
|
|
||||||
tstart,
|
|
||||||
),
|
|
||||||
(
|
|
||||||
(
|
|
||||||
"domain.snapshots",
|
|
||||||
dom_uuid,
|
|
||||||
"domain_snapshot.xml",
|
|
||||||
snapshot_name,
|
|
||||||
),
|
|
||||||
vm_config,
|
|
||||||
),
|
|
||||||
(
|
|
||||||
(
|
|
||||||
"domain.snapshots",
|
|
||||||
dom_uuid,
|
|
||||||
"domain_snapshot.rbd_snapshots",
|
|
||||||
snapshot_name,
|
|
||||||
),
|
|
||||||
",".join(snap_list),
|
|
||||||
),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
# Export the snapshot (vm.vm_worker_export_snapshot)
|
|
||||||
export_target_path = f"{backup_suffixed_path}/{vm_name}/{snapshot_name}/images"
|
|
||||||
|
|
||||||
try:
|
|
||||||
makedirs(export_target_path)
|
|
||||||
except Exception as e:
|
|
||||||
error_message = (
|
|
||||||
f"[{vm_name}] Failed to create target directory '{export_target_path}': {e}",
|
|
||||||
)
|
|
||||||
log_err(celery, error_message)
|
|
||||||
send_execution_failure_report(
|
|
||||||
(celery, current_stage, total_stages),
|
|
||||||
config,
|
|
||||||
recipients=email_recipients,
|
|
||||||
error=error_message,
|
|
||||||
)
|
|
||||||
fail(celery, error_message)
|
|
||||||
return False
|
|
||||||
|
|
||||||
def export_cleanup():
|
|
||||||
from shutil import rmtree
|
|
||||||
|
|
||||||
rmtree(f"{backup_suffixed_path}/{vm_name}/{snapshot_name}")
|
|
||||||
|
|
||||||
export_type = (
|
|
||||||
"incremental" if this_backup_incremental_parent is not None else "full"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Set the export filetype
|
|
||||||
if this_backup_incremental_parent is not None:
|
|
||||||
export_fileext = "rbddiff"
|
|
||||||
else:
|
|
||||||
export_fileext = "rbdimg"
|
|
||||||
|
|
||||||
failure = False
|
|
||||||
export_files = None
|
|
||||||
export_files_size = 0
|
|
||||||
|
|
||||||
def write_backup_summary(success=False, message=""):
|
|
||||||
export_details = {
|
|
||||||
"type": export_type,
|
|
||||||
"result": success,
|
|
||||||
"message": message,
|
|
||||||
"datestring": datestring,
|
|
||||||
"snapshot_name": snapshot_name,
|
|
||||||
"incremental_parent": this_backup_incremental_parent,
|
|
||||||
"vm_detail": vm_detail,
|
|
||||||
"export_files": export_files,
|
|
||||||
"export_size_bytes": export_files_size,
|
|
||||||
}
|
|
||||||
try:
|
|
||||||
with open(
|
|
||||||
f"{backup_suffixed_path}/{vm_name}/{snapshot_name}/snapshot.json",
|
|
||||||
"w",
|
|
||||||
) as fh:
|
|
||||||
jdump(export_details, fh)
|
|
||||||
except Exception as e:
|
|
||||||
log_err(celery, f"Error exporting snapshot details: {e}")
|
|
||||||
return False, e
|
|
||||||
return True, ""
|
|
||||||
|
|
||||||
snapshot_volumes = list()
|
|
||||||
for rbdsnap in snap_list:
|
|
||||||
pool, _volume = rbdsnap.split("/")
|
|
||||||
volume, name = _volume.split("@")
|
|
||||||
ret, snapshots = ceph.get_list_snapshot(
|
|
||||||
zkhandler, pool, volume, limit=name, is_fuzzy=False
|
|
||||||
)
|
|
||||||
if ret:
|
|
||||||
snapshot_volumes += snapshots
|
|
||||||
|
|
||||||
export_files = list()
|
|
||||||
for snapshot_volume in snapshot_volumes:
|
|
||||||
snap_pool = snapshot_volume["pool"]
|
|
||||||
snap_volume = snapshot_volume["volume"]
|
|
||||||
snap_snapshot_name = snapshot_volume["snapshot"]
|
|
||||||
snap_size = snapshot_volume["stats"]["size"]
|
|
||||||
snap_str = f"{snap_pool}/{snap_volume}@{snap_snapshot_name}"
|
|
||||||
|
|
||||||
current_stage += 1
|
|
||||||
update(
|
|
||||||
celery,
|
|
||||||
f"[{vm_name}] Exporting RBD snapshot {snap_str}",
|
|
||||||
current=current_stage,
|
|
||||||
total=total_stages,
|
|
||||||
)
|
|
||||||
|
|
||||||
if this_backup_incremental_parent is not None:
|
|
||||||
retcode, stdout, stderr = run_os_command(
|
|
||||||
f"rbd export-diff --from-snap {this_backup_incremental_parent} {snap_pool}/{snap_volume}@{snap_snapshot_name} {export_target_path}/{snap_pool}.{snap_volume}.{export_fileext}"
|
|
||||||
)
|
|
||||||
if retcode:
|
|
||||||
error_message = (
|
|
||||||
f"[{vm_name}] Failed to export snapshot for volume(s) '{snap_pool}/{snap_volume}'",
|
|
||||||
)
|
|
||||||
write_backup_summary(message=error_message)
|
|
||||||
failure = True
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
export_files.append(
|
|
||||||
(
|
|
||||||
f"images/{snap_pool}.{snap_volume}.{export_fileext}",
|
|
||||||
snap_size,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
retcode, stdout, stderr = run_os_command(
|
|
||||||
f"rbd export --export-format 2 {snap_pool}/{snap_volume}@{snap_snapshot_name} {export_target_path}/{snap_pool}.{snap_volume}.{export_fileext}"
|
|
||||||
)
|
|
||||||
if retcode:
|
|
||||||
error_message = (
|
|
||||||
f"[{vm_name}] Failed to export snapshot for volume(s) '{snap_pool}/{snap_volume}'",
|
|
||||||
)
|
|
||||||
write_backup_summary(message=error_message)
|
|
||||||
failure = True
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
export_files.append(
|
|
||||||
(
|
|
||||||
f"images/{snap_pool}.{snap_volume}.{export_fileext}",
|
|
||||||
snap_size,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
if failure:
|
|
||||||
current_stage += 6
|
|
||||||
if not this_backup_retain_snapshot:
|
|
||||||
current_stage += len(snap_list)
|
|
||||||
update(
|
|
||||||
celery,
|
|
||||||
f"[{vm_name}] Error in snapshot export, skipping",
|
|
||||||
current=current_stage,
|
|
||||||
total=total_stages,
|
|
||||||
)
|
|
||||||
continue
|
|
||||||
|
|
||||||
current_stage += 1
|
|
||||||
update(
|
|
||||||
celery,
|
|
||||||
f"[{vm_name}] Writing snapshot details",
|
|
||||||
current=current_stage,
|
|
||||||
total=total_stages,
|
|
||||||
)
|
|
||||||
|
|
||||||
def get_dir_size(pathname):
|
|
||||||
total = 0
|
|
||||||
with scandir(pathname) as it:
|
|
||||||
for entry in it:
|
|
||||||
if entry.is_file():
|
|
||||||
total += entry.stat().st_size
|
|
||||||
elif entry.is_dir():
|
|
||||||
total += get_dir_size(entry.path)
|
|
||||||
return total
|
|
||||||
|
|
||||||
export_files_size = get_dir_size(export_target_path)
|
|
||||||
|
|
||||||
ret, e = write_backup_summary(success=True)
|
|
||||||
if not ret:
|
|
||||||
error_message = (
|
|
||||||
f"[{vm_name}] Failed to export configuration snapshot: {e}",
|
|
||||||
)
|
|
||||||
write_backup_summary(message=error_message)
|
|
||||||
current_stage += 5
|
|
||||||
if not this_backup_retain_snapshot:
|
|
||||||
current_stage += len(snap_list)
|
|
||||||
update(
|
|
||||||
celery,
|
|
||||||
error_message,
|
|
||||||
current=current_stage,
|
|
||||||
total=total_stages,
|
|
||||||
)
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Clean up the snapshot (vm.vm_worker_remove_snapshot)
|
|
||||||
if not this_backup_retain_snapshot:
|
|
||||||
for snap in snap_list:
|
|
||||||
current_stage += 1
|
|
||||||
update(
|
|
||||||
celery,
|
|
||||||
f"[{vm_name}] Removing RBD snapshot {snap}",
|
|
||||||
current=current_stage,
|
|
||||||
total=total_stages,
|
|
||||||
)
|
|
||||||
|
|
||||||
rbd, name = snap.split("@")
|
|
||||||
pool, volume = rbd.split("/")
|
|
||||||
ret, msg = ceph.remove_snapshot(zkhandler, pool, volume, name)
|
|
||||||
if not ret:
|
|
||||||
error_message = msg.replace("ERROR: ", "")
|
|
||||||
write_backup_summary(message=error_message)
|
|
||||||
failure = True
|
|
||||||
break
|
|
||||||
|
|
||||||
if failure:
|
|
||||||
current_stage += 4
|
|
||||||
update(
|
|
||||||
celery,
|
|
||||||
f"[{vm_name}] Error in snapshot export, skipping",
|
|
||||||
current=current_stage,
|
|
||||||
total=total_stages,
|
|
||||||
)
|
|
||||||
continue
|
|
||||||
|
|
||||||
current_stage += 1
|
|
||||||
update(
|
|
||||||
celery,
|
|
||||||
f"[{vm_name}] Removing VM configuration snapshot",
|
|
||||||
current=current_stage,
|
|
||||||
total=total_stages,
|
|
||||||
)
|
|
||||||
|
|
||||||
ret = zkhandler.delete(
|
|
||||||
("domain.snapshots", dom_uuid, "domain_snapshot.name", snapshot_name)
|
|
||||||
)
|
|
||||||
if not ret:
|
|
||||||
error_message = (
|
|
||||||
f"[{vm_name}] Failed to remove VM snapshot; continuing",
|
|
||||||
)
|
|
||||||
log_err(celery, error_message)
|
|
||||||
|
|
||||||
current_stage += 1
|
|
||||||
update(
|
|
||||||
celery,
|
|
||||||
f"Finding obsolete incremental backups for '{vm_name}'",
|
|
||||||
current=current_stage,
|
|
||||||
total=total_stages,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Read export file to get details
|
|
||||||
backup_json_file = f"{vm_backup_path}/{snapshot_name}/snapshot.json"
|
|
||||||
with open(backup_json_file) as fh:
|
|
||||||
backup_json = jload(fh)
|
|
||||||
tracked_backups.insert(0, backup_json)
|
|
||||||
|
|
||||||
marked_for_deletion = list()
|
|
||||||
# Find any full backups that are expired
|
|
||||||
found_full_count = 0
|
|
||||||
for backup in tracked_backups:
|
|
||||||
if backup["type"] == "full":
|
|
||||||
found_full_count += 1
|
|
||||||
if found_full_count > full_retention:
|
|
||||||
marked_for_deletion.append(backup)
|
|
||||||
# Find any incremental backups that depend on marked parents
|
|
||||||
for backup in tracked_backups:
|
|
||||||
if backup["type"] == "incremental" and backup["incremental_parent"] in [
|
|
||||||
b["datestring"] for b in marked_for_deletion
|
|
||||||
]:
|
|
||||||
marked_for_deletion.append(backup)
|
|
||||||
|
|
||||||
current_stage += 1
|
|
||||||
if len(marked_for_deletion) > 0:
|
|
||||||
update(
|
|
||||||
celery,
|
|
||||||
f"Cleaning up aged out backups for '{vm_name}'",
|
|
||||||
current=current_stage,
|
|
||||||
total=total_stages,
|
|
||||||
)
|
|
||||||
|
|
||||||
for backup_to_delete in marked_for_deletion:
|
|
||||||
ret = vm.vm_worker_remove_snapshot(
|
|
||||||
zkhandler, None, vm_name, backup_to_delete["snapshot_name"]
|
|
||||||
)
|
|
||||||
if ret is False:
|
|
||||||
error_message = f"Failed to remove obsolete backup snapshot '{backup_to_delete['snapshot_name']}', leaving in tracked backups"
|
|
||||||
log_err(celery, error_message)
|
|
||||||
else:
|
|
||||||
rmtree(f"{vm_backup_path}/{backup_to_delete['snapshot_name']}")
|
|
||||||
tracked_backups.remove(backup_to_delete)
|
|
||||||
|
|
||||||
current_stage += 1
|
|
||||||
update(
|
|
||||||
celery,
|
|
||||||
"Updating tracked backups",
|
|
||||||
current=current_stage,
|
|
||||||
total=total_stages,
|
|
||||||
)
|
|
||||||
state_data["tracked_backups"] = tracked_backups
|
|
||||||
with open(autobackup_state_file, "w") as fh:
|
|
||||||
jdump(state_data, fh)
|
|
||||||
|
|
||||||
backup_summary[vm_detail["name"]] = tracked_backups
|
|
||||||
|
|
||||||
# Handle automount unmount commands
|
# Handle automount unmount commands
|
||||||
if config["auto_mount_enabled"]:
|
if config["auto_mount_enabled"]:
|
||||||
|
|
Loading…
Reference in New Issue