Restore previous autobackup continue behaviour
With the original system, the failure of one VM's backups would not trigger a total fault, thus allowing other backups to complete. Restore that behaviour.
This commit is contained in:
parent
8fa6bed736
commit
4ef5fbdbe8
|
@ -118,7 +118,7 @@ def send_execution_summary_report(
|
||||||
email.append("")
|
email.append("")
|
||||||
|
|
||||||
email.append(
|
email.append(
|
||||||
f"A PVC autobackup has been completed at {current_datetime} in {total_time}s."
|
f"A PVC autobackup has been completed at {current_datetime} in {total_time}."
|
||||||
)
|
)
|
||||||
email.append("")
|
email.append("")
|
||||||
email.append(
|
email.append(
|
||||||
|
@ -462,6 +462,33 @@ def worker_cluster_autobackup(
|
||||||
else:
|
else:
|
||||||
export_fileext = "rbdimg"
|
export_fileext = "rbdimg"
|
||||||
|
|
||||||
|
failure = False
|
||||||
|
export_files = None
|
||||||
|
export_files_size = 0
|
||||||
|
|
||||||
|
def write_backup_summary(success=False, message=""):
|
||||||
|
export_details = {
|
||||||
|
"type": export_type,
|
||||||
|
"result": success,
|
||||||
|
"message": message,
|
||||||
|
"datestring": datestring,
|
||||||
|
"snapshot_name": snapshot_name,
|
||||||
|
"incremental_parent": this_backup_incremental_parent,
|
||||||
|
"vm_detail": vm_detail,
|
||||||
|
"export_files": export_files,
|
||||||
|
"export_size_bytes": export_files_size,
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
with open(
|
||||||
|
f"{backup_suffixed_path}/{vm_name}/{snapshot_name}/snapshot.json",
|
||||||
|
"w",
|
||||||
|
) as fh:
|
||||||
|
jdump(export_details, fh)
|
||||||
|
except Exception as e:
|
||||||
|
log_err(celery, f"Error exporting snapshot details: {e}")
|
||||||
|
return False, e
|
||||||
|
return True, ""
|
||||||
|
|
||||||
snapshot_volumes = list()
|
snapshot_volumes = list()
|
||||||
for rbdsnap in snap_list:
|
for rbdsnap in snap_list:
|
||||||
pool, _volume = rbdsnap.split("/")
|
pool, _volume = rbdsnap.split("/")
|
||||||
|
@ -496,15 +523,9 @@ def worker_cluster_autobackup(
|
||||||
error_message = (
|
error_message = (
|
||||||
f"[{vm_name}] Failed to export snapshot for volume(s) '{snap_pool}/{snap_volume}'",
|
f"[{vm_name}] Failed to export snapshot for volume(s) '{snap_pool}/{snap_volume}'",
|
||||||
)
|
)
|
||||||
log_err(celery, error_message)
|
write_backup_summary(message=error_message)
|
||||||
send_execution_failure_report(
|
failure = True
|
||||||
(celery, current_stage, total_stages),
|
break
|
||||||
config,
|
|
||||||
recipients=email_recipients,
|
|
||||||
error=error_message,
|
|
||||||
)
|
|
||||||
fail(celery, error_message)
|
|
||||||
return False
|
|
||||||
else:
|
else:
|
||||||
export_files.append(
|
export_files.append(
|
||||||
(
|
(
|
||||||
|
@ -520,15 +541,9 @@ def worker_cluster_autobackup(
|
||||||
error_message = (
|
error_message = (
|
||||||
f"[{vm_name}] Failed to export snapshot for volume(s) '{snap_pool}/{snap_volume}'",
|
f"[{vm_name}] Failed to export snapshot for volume(s) '{snap_pool}/{snap_volume}'",
|
||||||
)
|
)
|
||||||
log_err(celery, error_message)
|
write_backup_summary(message=error_message)
|
||||||
send_execution_failure_report(
|
failure = True
|
||||||
(celery, current_stage, total_stages),
|
break
|
||||||
config,
|
|
||||||
recipients=email_recipients,
|
|
||||||
error=error_message,
|
|
||||||
)
|
|
||||||
fail(celery, error_message)
|
|
||||||
return False
|
|
||||||
else:
|
else:
|
||||||
export_files.append(
|
export_files.append(
|
||||||
(
|
(
|
||||||
|
@ -537,6 +552,18 @@ def worker_cluster_autobackup(
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if failure:
|
||||||
|
current_stage += 6
|
||||||
|
if not this_backup_retain_snapshot:
|
||||||
|
current_stage += len(snap_list)
|
||||||
|
update(
|
||||||
|
celery,
|
||||||
|
f"[{vm_name}] Error in snapshot export, skipping",
|
||||||
|
current=current_stage,
|
||||||
|
total=total_stages,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
current_stage += 1
|
current_stage += 1
|
||||||
update(
|
update(
|
||||||
celery,
|
celery,
|
||||||
|
@ -557,33 +584,22 @@ def worker_cluster_autobackup(
|
||||||
|
|
||||||
export_files_size = get_dir_size(export_target_path)
|
export_files_size = get_dir_size(export_target_path)
|
||||||
|
|
||||||
export_details = {
|
ret, e = write_backup_summary(success=True)
|
||||||
"type": export_type,
|
if not ret:
|
||||||
"datestring": datestring,
|
|
||||||
"snapshot_name": snapshot_name,
|
|
||||||
"incremental_parent": this_backup_incremental_parent,
|
|
||||||
"vm_detail": vm_detail,
|
|
||||||
"export_files": export_files,
|
|
||||||
"export_size_bytes": export_files_size,
|
|
||||||
}
|
|
||||||
try:
|
|
||||||
with open(
|
|
||||||
f"{backup_suffixed_path}/{vm_name}/{snapshot_name}/snapshot.json", "w"
|
|
||||||
) as fh:
|
|
||||||
jdump(export_details, fh)
|
|
||||||
except Exception as e:
|
|
||||||
error_message = (
|
error_message = (
|
||||||
f"[{vm_name}] Failed to export configuration snapshot: {e}",
|
f"[{vm_name}] Failed to export configuration snapshot: {e}",
|
||||||
)
|
)
|
||||||
log_err(celery, error_message)
|
write_backup_summary(message=error_message)
|
||||||
send_execution_failure_report(
|
current_stage += 5
|
||||||
(celery, current_stage, total_stages),
|
if not this_backup_retain_snapshot:
|
||||||
config,
|
current_stage += len(snap_list)
|
||||||
recipients=email_recipients,
|
update(
|
||||||
error=error_message,
|
celery,
|
||||||
|
error_message,
|
||||||
|
current=current_stage,
|
||||||
|
total=total_stages,
|
||||||
)
|
)
|
||||||
fail(celery, error_message)
|
continue
|
||||||
return False
|
|
||||||
|
|
||||||
# Clean up the snapshot (vm.vm_worker_remove_snapshot)
|
# Clean up the snapshot (vm.vm_worker_remove_snapshot)
|
||||||
if not this_backup_retain_snapshot:
|
if not this_backup_retain_snapshot:
|
||||||
|
@ -601,15 +617,19 @@ def worker_cluster_autobackup(
|
||||||
ret, msg = ceph.remove_snapshot(zkhandler, pool, volume, name)
|
ret, msg = ceph.remove_snapshot(zkhandler, pool, volume, name)
|
||||||
if not ret:
|
if not ret:
|
||||||
error_message = msg.replace("ERROR: ", "")
|
error_message = msg.replace("ERROR: ", "")
|
||||||
log_err(celery, error_message)
|
write_backup_summary(message=error_message)
|
||||||
send_execution_failure_report(
|
failure = True
|
||||||
(celery, current_stage, total_stages),
|
break
|
||||||
config,
|
|
||||||
recipients=email_recipients,
|
if failure:
|
||||||
error=error_message,
|
current_stage += 4
|
||||||
|
update(
|
||||||
|
celery,
|
||||||
|
f"[{vm_name}] Error in snapshot export, skipping",
|
||||||
|
current=current_stage,
|
||||||
|
total=total_stages,
|
||||||
)
|
)
|
||||||
fail(celery, error_message)
|
continue
|
||||||
return False
|
|
||||||
|
|
||||||
current_stage += 1
|
current_stage += 1
|
||||||
update(
|
update(
|
||||||
|
@ -624,17 +644,9 @@ def worker_cluster_autobackup(
|
||||||
)
|
)
|
||||||
if not ret:
|
if not ret:
|
||||||
error_message = (
|
error_message = (
|
||||||
f"[{vm_name}] Failed to remove snapshot from Zookeeper",
|
f"[{vm_name}] Failed to remove VM snapshot; continuing",
|
||||||
)
|
)
|
||||||
log_err(celery, error_message)
|
log_err(celery, error_message)
|
||||||
send_execution_failure_report(
|
|
||||||
(celery, current_stage, total_stages),
|
|
||||||
config,
|
|
||||||
recipients=email_recipients,
|
|
||||||
error=error_message,
|
|
||||||
)
|
|
||||||
fail(celery, error_message)
|
|
||||||
return False
|
|
||||||
|
|
||||||
current_stage += 1
|
current_stage += 1
|
||||||
update(
|
update(
|
||||||
|
|
Loading…
Reference in New Issue