Add backup reporting and improve metrics
Major improvements to autobackup and backups, including additional information/fields in the backup JSON itself, improved error handling, and the ability to email reports of autobackups using a local sendmail utility.
This commit is contained in:
parent
8d74ee7273
commit
362edeed8c
|
@ -1895,6 +1895,12 @@ def cli_vm_backup_remove(domain, backup_datestring, backup_path):
|
|||
show_default=True,
|
||||
help="Override default config file location.",
|
||||
)
|
||||
@click.option(
|
||||
"--email-report",
|
||||
"email_report",
|
||||
default=None,
|
||||
help="Email a backup summary report to the specified address(es), comma-separated.",
|
||||
)
|
||||
@click.option(
|
||||
"--force-full",
|
||||
"force_full_flag",
|
||||
|
@ -1909,7 +1915,7 @@ def cli_vm_backup_remove(domain, backup_datestring, backup_path):
|
|||
is_flag=True,
|
||||
help="Cron mode; don't error exit if this isn't the primary coordinator.",
|
||||
)
|
||||
def cli_vm_autobackup(autobackup_cfgfile, force_full_flag, cron_flag):
|
||||
def cli_vm_autobackup(autobackup_cfgfile, email_report, force_full_flag, cron_flag):
|
||||
"""
|
||||
Perform automated backups of VMs, with integrated cleanup and full/incremental scheduling.
|
||||
|
||||
|
@ -1936,12 +1942,17 @@ def cli_vm_autobackup(autobackup_cfgfile, force_full_flag, cron_flag):
|
|||
configuration file path if required by a particular run. For full details of the possible options, please
|
||||
see the example configuration file at "/usr/share/pvc/autobackup.sample.yaml".
|
||||
|
||||
An optional report on all current backups can be emailed to one or more email addresses using the
|
||||
"--email-report" flag. This report will include information on all current known backups.
|
||||
|
||||
The "--force-full" option can be used to force all configured VMs to perform a "full" level backup this run,
|
||||
which can help synchronize the backups of existing VMs with new ones.
|
||||
"""
|
||||
|
||||
# All work here is done in the helper function for portability; we don't even use "finish"
|
||||
vm_autobackup(CLI_CONFIG, autobackup_cfgfile, force_full_flag, cron_flag)
|
||||
vm_autobackup(
|
||||
CLI_CONFIG, autobackup_cfgfile, email_report, force_full_flag, cron_flag
|
||||
)
|
||||
|
||||
|
||||
###############################################################################
|
||||
|
|
|
@ -26,7 +26,7 @@ from distutils.util import strtobool
|
|||
from getpass import getuser
|
||||
from json import load as jload
|
||||
from json import dump as jdump
|
||||
from os import chmod, environ, getpid, path, makedirs, get_terminal_size
|
||||
from os import chmod, environ, getpid, path, popen, makedirs, get_terminal_size
|
||||
from re import findall
|
||||
from socket import gethostname
|
||||
from subprocess import run, PIPE
|
||||
|
@ -38,6 +38,7 @@ from yaml import SafeLoader
|
|||
import pvc.lib.provisioner
|
||||
import pvc.lib.vm
|
||||
import pvc.lib.node
|
||||
import pvc.lib.storage
|
||||
|
||||
|
||||
DEFAULT_STORE_DATA = {"cfgfile": "/etc/pvc/pvc.conf"}
|
||||
|
@ -201,8 +202,8 @@ def get_autobackup_config(CLI_CONFIG, cfgfile):
|
|||
try:
|
||||
config = dict()
|
||||
with open(cfgfile) as fh:
|
||||
backup_config = yload(fh, Loader=SafeLoader)["autobackup"]
|
||||
|
||||
full_config = yload(fh, Loader=SafeLoader)
|
||||
backup_config = full_config["autobackup"]
|
||||
config["backup_root_path"] = backup_config["backup_root_path"]
|
||||
config["backup_root_suffix"] = backup_config["backup_root_suffix"]
|
||||
config["backup_tags"] = backup_config["backup_tags"]
|
||||
|
@ -226,13 +227,10 @@ def get_autobackup_config(CLI_CONFIG, cfgfile):
|
|||
backup_root_path=backup_config["backup_root_path"]
|
||||
)
|
||||
config["unmount_cmds"].append(_unmount_cmd)
|
||||
|
||||
except FileNotFoundError:
|
||||
echo(CLI_CONFIG, "ERROR: Specified backup configuration does not exist!")
|
||||
exit(1)
|
||||
return "Backup configuration does not exist!"
|
||||
except KeyError as e:
|
||||
echo(CLI_CONFIG, f"ERROR: Backup configuration is invalid: {e}")
|
||||
exit(1)
|
||||
return f"Backup configuration is invalid: {e}"
|
||||
|
||||
return config
|
||||
|
||||
|
@ -240,6 +238,7 @@ def get_autobackup_config(CLI_CONFIG, cfgfile):
|
|||
def vm_autobackup(
|
||||
CLI_CONFIG,
|
||||
autobackup_cfgfile=DEFAULT_AUTOBACKUP_FILENAME,
|
||||
email_report=None,
|
||||
force_full_flag=False,
|
||||
cron_flag=False,
|
||||
):
|
||||
|
@ -247,6 +246,48 @@ def vm_autobackup(
|
|||
Perform automatic backups of VMs based on an external config file.
|
||||
"""
|
||||
|
||||
if email_report is not None:
|
||||
from email.utils import formatdate
|
||||
from socket import gethostname
|
||||
|
||||
try:
|
||||
with open(autobackup_cfgfile) as fh:
|
||||
tmp_config = yload(fh, Loader=SafeLoader)
|
||||
cluster = tmp_config["cluster"]["name"]
|
||||
except Exception:
|
||||
cluster = "unknown"
|
||||
|
||||
def send_execution_failure_report(error=None):
|
||||
echo(CLI_CONFIG, f"Sending email failure report to {email_report}")
|
||||
|
||||
current_datetime = datetime.now()
|
||||
email_datetime = formatdate(float(current_datetime.strftime("%s")))
|
||||
|
||||
email = list()
|
||||
email.append(f"Date: {email_datetime}")
|
||||
email.append(f"Subject: PVC Autobackup execution failure for cluster {cluster}")
|
||||
|
||||
recipients = list()
|
||||
for recipient in email_report.split(","):
|
||||
recipients.append(f"<{recipient}>")
|
||||
email.append(f"To: {', '.join(recipients)}")
|
||||
email.append(f"From: PVC Autobackup System <pvc@{gethostname()}>")
|
||||
email.append("")
|
||||
|
||||
email.append(
|
||||
f"A PVC autobackup has FAILED at {current_datetime} due to an execution error."
|
||||
)
|
||||
email.append("")
|
||||
email.append("The reported error message is:")
|
||||
email.append(f" {error}")
|
||||
|
||||
try:
|
||||
p = popen("/usr/sbin/sendmail -t", "w")
|
||||
p.write("\n".join(email))
|
||||
p.close()
|
||||
except Exception as e:
|
||||
echo(CLI_CONFIG, f"Failed to send report email: {e}")
|
||||
|
||||
# Validate that we are running on the current primary coordinator of the 'local' cluster connection
|
||||
real_connection = CLI_CONFIG["connection"]
|
||||
CLI_CONFIG["connection"] = "local"
|
||||
|
@ -267,6 +308,10 @@ def vm_autobackup(
|
|||
CLI_CONFIG,
|
||||
"Autobackup MUST be run from the cluster active primary coordinator using the 'local' connection. See '-h'/'--help' for details.",
|
||||
)
|
||||
if email_report is not None:
|
||||
send_execution_failure_report(
|
||||
error=f"Autobackup run attempted from non-local connection or non-primary coordinator; got connection '{real_connection}', host '{DEFAULT_NODE_HOSTNAME}'."
|
||||
)
|
||||
exit(1)
|
||||
|
||||
# Ensure we're running as root, or show a warning & confirmation
|
||||
|
@ -279,6 +324,14 @@ def vm_autobackup(
|
|||
|
||||
# Load our YAML config
|
||||
autobackup_config = get_autobackup_config(CLI_CONFIG, autobackup_cfgfile)
|
||||
if not isinstance(autobackup_config, dict):
|
||||
echo(CLI_CONFIG, f"ERROR: {autobackup_config}")
|
||||
if email_report is not None:
|
||||
send_execution_failure_report(error=f"{autobackup_config}")
|
||||
exit(1)
|
||||
|
||||
# Get the start time of this run
|
||||
autobackup_start_time = datetime.now()
|
||||
|
||||
# Get a list of all VMs on the cluster
|
||||
# We don't do tag filtering here, because we could match an arbitrary number of tags; instead, we
|
||||
|
@ -286,6 +339,8 @@ def vm_autobackup(
|
|||
retcode, retdata = pvc.lib.vm.vm_list(CLI_CONFIG, None, None, None, None, None)
|
||||
if not retcode:
|
||||
echo(CLI_CONFIG, f"ERROR: Failed to fetch VM list: {retdata}")
|
||||
if email_report is not None:
|
||||
send_execution_failure_report(error=f"Failed to fetch VM list: {retdata}")
|
||||
exit(1)
|
||||
cluster_vms = retdata
|
||||
|
||||
|
@ -354,6 +409,8 @@ def vm_autobackup(
|
|||
CLI_CONFIG,
|
||||
f"Exiting; command reports: {ret.stderr.decode().strip()}",
|
||||
)
|
||||
if email_report is not None:
|
||||
send_execution_failure_report(error=ret.stderr.decode().strip())
|
||||
exit(1)
|
||||
else:
|
||||
echo(CLI_CONFIG, f"done. [{ttot.seconds}s]")
|
||||
|
@ -417,27 +474,26 @@ def vm_autobackup(
|
|||
tend = datetime.now()
|
||||
ttot = tend - tstart
|
||||
if not retcode:
|
||||
backup_datestring = findall(r"[0-9]{14}", retdata)[0]
|
||||
echo(CLI_CONFIG, f"failed. [{ttot.seconds}s]")
|
||||
echo(CLI_CONFIG, f"Skipping cleanups; command reports: {retdata}")
|
||||
continue
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
retdata.strip().replace(f"ERROR in backup {backup_datestring}: ", ""),
|
||||
)
|
||||
skip_cleanup = True
|
||||
else:
|
||||
backup_datestring = findall(r"[0-9]{14}", retdata)[0]
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"done. Backup '{backup_datestring}' created. [{ttot.seconds}s]",
|
||||
)
|
||||
skip_cleanup = False
|
||||
|
||||
# Read backup file to get details
|
||||
backup_json_file = f"{backup_path}/{backup_datestring}/pvcbackup.json"
|
||||
with open(backup_json_file) as fh:
|
||||
backup_json = jload(fh)
|
||||
backup = {
|
||||
"datestring": backup_json["datestring"],
|
||||
"type": backup_json["type"],
|
||||
"parent": backup_json["incremental_parent"],
|
||||
"retained_snapshot": backup_json["retained_snapshot"],
|
||||
}
|
||||
tracked_backups.insert(0, backup)
|
||||
tracked_backups.insert(0, backup_json)
|
||||
|
||||
# Delete any full backups that are expired
|
||||
marked_for_deletion = list()
|
||||
|
@ -450,11 +506,22 @@ def vm_autobackup(
|
|||
|
||||
# Depete any incremental backups that depend on marked parents
|
||||
for backup in tracked_backups:
|
||||
if backup["type"] == "incremental" and backup["parent"] in [
|
||||
if backup["type"] == "incremental" and backup["incremental_parent"] in [
|
||||
b["datestring"] for b in marked_for_deletion
|
||||
]:
|
||||
marked_for_deletion.append(backup)
|
||||
|
||||
if len(marked_for_deletion) > 0:
|
||||
if skip_cleanup:
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"Skipping cleanups for {len(marked_for_deletion)} aged-out backups due to backup failure.",
|
||||
)
|
||||
else:
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"Running cleanups for {len(marked_for_deletion)} aged-out backups...",
|
||||
)
|
||||
# Execute deletes
|
||||
for backup_to_delete in marked_for_deletion:
|
||||
echo(
|
||||
|
@ -477,7 +544,6 @@ def vm_autobackup(
|
|||
CLI_CONFIG,
|
||||
f"Skipping removal from tracked backups; command reports: {retdata}",
|
||||
)
|
||||
continue
|
||||
else:
|
||||
tracked_backups.remove(backup_to_delete)
|
||||
echo(CLI_CONFIG, f"done. [{ttot.seconds}s]")
|
||||
|
@ -514,3 +580,78 @@ def vm_autobackup(
|
|||
)
|
||||
else:
|
||||
echo(CLI_CONFIG, f"done. [{ttot.seconds}s]")
|
||||
|
||||
autobackup_end_time = datetime.now()
|
||||
autobackup_total_time = autobackup_end_time - autobackup_start_time
|
||||
|
||||
# Handle report emailing
|
||||
if email_report is not None:
|
||||
echo(CLI_CONFIG, "")
|
||||
echo(CLI_CONFIG, f"Sending email summary report to {email_report}")
|
||||
backup_summary = dict()
|
||||
for vm in backup_vms:
|
||||
backup_path = f"{backup_suffixed_path}/{vm}"
|
||||
autobackup_state_file = f"{backup_path}/.autobackup.json"
|
||||
if not path.exists(backup_path) or not path.exists(autobackup_state_file):
|
||||
# There are no new backups so the list is empty
|
||||
state_data = dict()
|
||||
tracked_backups = list()
|
||||
else:
|
||||
with open(autobackup_state_file) as fh:
|
||||
state_data = jload(fh)
|
||||
tracked_backups = state_data["tracked_backups"]
|
||||
|
||||
backup_summary[vm] = tracked_backups
|
||||
|
||||
current_datetime = datetime.now()
|
||||
email_datetime = formatdate(float(current_datetime.strftime("%s")))
|
||||
|
||||
email = list()
|
||||
email.append(f"Date: {email_datetime}")
|
||||
email.append(f"Subject: PVC Autobackup report for cluster {cluster}")
|
||||
|
||||
recipients = list()
|
||||
for recipient in email_report.split(","):
|
||||
recipients.append(f"<{recipient}>")
|
||||
email.append(f"To: {', '.join(recipients)}")
|
||||
email.append(f"From: PVC Autobackup System <pvc@{gethostname()}>")
|
||||
email.append("")
|
||||
|
||||
email.append(
|
||||
f"A PVC autobackup has been completed at {current_datetime} in {autobackup_total_time}."
|
||||
)
|
||||
email.append("")
|
||||
email.append(
|
||||
"The following is a summary of all current VM backups after cleanups, most recent first:"
|
||||
)
|
||||
email.append("")
|
||||
|
||||
for vm in backup_vms:
|
||||
email.append(f"VM {vm}:")
|
||||
for backup in backup_summary[vm]:
|
||||
datestring = backup.get("datestring")
|
||||
backup_date = datetime.strptime(datestring, "%Y%m%d%H%M%S")
|
||||
if backup.get("result", False):
|
||||
email.append(
|
||||
f" {backup_date}: Success in {backup.get('runtime_secs', 0)} seconds, ID {datestring}, type {backup.get('type', 'unknown')}"
|
||||
)
|
||||
email.append(
|
||||
f" Backup contains {len(backup.get('backup_files'))} files totaling {pvc.lib.storage.format_bytes_tohuman(backup.get('backup_size_bytes', 0))} ({backup.get('backup_size_bytes', 0)} bytes)"
|
||||
)
|
||||
else:
|
||||
email.append(
|
||||
f" {backup_date}: Failure in {backup.get('runtime_secs', 0)} seconds, ID {datestring}, type {backup.get('type', 'unknown')}"
|
||||
)
|
||||
email.append(
|
||||
f" {backup.get('result_message')}"
|
||||
)
|
||||
|
||||
try:
|
||||
p = popen("/usr/sbin/sendmail -t", "w")
|
||||
p.write("\n".join(email))
|
||||
p.close()
|
||||
except Exception as e:
|
||||
echo(CLI_CONFIG, f"Failed to send report email: {e}")
|
||||
|
||||
echo(CLI_CONFIG, "")
|
||||
echo(CLI_CONFIG, f"Autobackup completed in {autobackup_total_time}.")
|
||||
|
|
|
@ -32,6 +32,7 @@ from json import dump as jdump
|
|||
from json import load as jload
|
||||
from json import loads as jloads
|
||||
from libvirt import open as lvopen
|
||||
from os import scandir
|
||||
from shutil import rmtree
|
||||
from socket import gethostname
|
||||
from uuid import UUID
|
||||
|
@ -1183,12 +1184,15 @@ def backup_vm(
|
|||
if not re.match(r"^/", backup_path):
|
||||
return (
|
||||
False,
|
||||
f"ERROR: Target path {backup_path} is not a valid absolute path on the primary coordinator!",
|
||||
f"ERROR in backup {datestring}: Target path {backup_path} is not a valid absolute path on the primary coordinator!",
|
||||
)
|
||||
|
||||
# Ensure that backup_path (on this node) exists
|
||||
if not os.path.isdir(backup_path):
|
||||
return False, f"ERROR: Target path {backup_path} does not exist!"
|
||||
return (
|
||||
False,
|
||||
f"ERROR in backup {datestring}: Target path {backup_path} does not exist!",
|
||||
)
|
||||
|
||||
# 1a. Create destination directory
|
||||
vm_target_root = f"{backup_path}/{domain}"
|
||||
|
@ -1197,7 +1201,10 @@ def backup_vm(
|
|||
try:
|
||||
os.makedirs(vm_target_backup)
|
||||
except Exception as e:
|
||||
return False, f"ERROR: Failed to create backup directory: {e}"
|
||||
return (
|
||||
False,
|
||||
f"ERROR in backup {datestring}: Failed to create backup directory: {e}",
|
||||
)
|
||||
|
||||
tstart = time.time()
|
||||
backup_type = "incremental" if incremental_parent is not None else "full"
|
||||
|
@ -1222,7 +1229,7 @@ def backup_vm(
|
|||
"retained_snapshot": retain_snapshot,
|
||||
"result": result,
|
||||
"result_message": result_message,
|
||||
"runtime_secs": ttot.seconds,
|
||||
"runtime_secs": ttot,
|
||||
"vm_detail": vm_detail,
|
||||
"backup_files": backup_files,
|
||||
"backup_size_bytes": backup_files_size,
|
||||
|
@ -1233,28 +1240,26 @@ def backup_vm(
|
|||
# 2. Validations part 2
|
||||
# Disallow retaining snapshots with an incremental parent
|
||||
if incremental_parent is not None and retain_snapshot:
|
||||
result_message = (
|
||||
"ERROR: Retaining snapshots of incremental backups is not supported!"
|
||||
)
|
||||
write_pvcbackup_json(result=False, result_message=result_message)
|
||||
error_message = "Retaining snapshots of incremental backups is not supported!"
|
||||
write_pvcbackup_json(result=False, result_message=f"ERROR: {error_message}")
|
||||
return (
|
||||
False,
|
||||
result_message,
|
||||
f"ERROR in backup {datestring}: {error_message}",
|
||||
)
|
||||
|
||||
# Validate that VM exists in cluster
|
||||
dom_uuid = getDomainUUID(zkhandler, domain)
|
||||
if not dom_uuid:
|
||||
result_message = f'ERROR: Could not find VM "{domain}" in the cluster!'
|
||||
write_pvcbackup_json(result=False, result_message=result_message)
|
||||
return False, result_message
|
||||
error_message = f'Could not find VM "{domain}" in the cluster!'
|
||||
write_pvcbackup_json(result=False, result_message=f"ERROR: {error_message}")
|
||||
return False, f"ERROR in backup {datestring}: {error_message}"
|
||||
|
||||
# 3. Get information about VM
|
||||
vm_detail = get_list(zkhandler, limit=dom_uuid, is_fuzzy=False)[1][0]
|
||||
if not isinstance(vm_detail, dict):
|
||||
result_message = f"ERROR: VM listing returned invalid data: {vm_detail}"
|
||||
write_pvcbackup_json(result=False, result_message=result_message)
|
||||
return False, result_message
|
||||
error_message = f"VM listing returned invalid data: {vm_detail}"
|
||||
write_pvcbackup_json(result=False, result_message=f"ERROR: {error_message}")
|
||||
return False, f"ERROR in backup {datestring}: {error_message}"
|
||||
|
||||
vm_volumes = list()
|
||||
for disk in vm_detail["disks"]:
|
||||
|
@ -1270,39 +1275,47 @@ def backup_vm(
|
|||
elif len(retdata) > 1:
|
||||
retdata = "Multiple volumes returned."
|
||||
|
||||
result_message = (
|
||||
f"ERROR: Failed to get volume details for {pool}/{volume}: {retdata}"
|
||||
error_message = (
|
||||
f"Failed to get volume details for {pool}/{volume}: {retdata}"
|
||||
)
|
||||
write_pvcbackup_json(
|
||||
result=False, result_message=result_message, vm_detail=vm_detail
|
||||
result=False,
|
||||
result_message=f"ERROR: {error_message}",
|
||||
vm_detail=vm_detail,
|
||||
)
|
||||
return (
|
||||
False,
|
||||
result_message,
|
||||
f"ERROR in backup {datestring}: {error_message}",
|
||||
)
|
||||
|
||||
try:
|
||||
size = retdata[0]["stats"]["size"]
|
||||
except Exception as e:
|
||||
return False, f"ERROR: Failed to get volume size for {pool}/{volume}: {e}"
|
||||
error_message = f"Failed to get volume size for {pool}/{volume}: {e}"
|
||||
write_pvcbackup_json(
|
||||
result=False,
|
||||
result_message=f"ERROR: {error_message}",
|
||||
vm_detail=vm_detail,
|
||||
)
|
||||
return (
|
||||
False,
|
||||
f"ERROR in backup {datestring}: {error_message}",
|
||||
)
|
||||
|
||||
vm_volumes.append((pool, volume, size))
|
||||
|
||||
# 4a. Validate that all volumes exist (they should, but just in case)
|
||||
for pool, volume, _ in vm_volumes:
|
||||
if not ceph.verifyVolume(zkhandler, pool, volume):
|
||||
result_message = (
|
||||
f"ERROR: VM defines a volume {pool}/{volume} which does not exist!"
|
||||
)
|
||||
error_message = f"VM defines a volume {pool}/{volume} which does not exist!"
|
||||
write_pvcbackup_json(
|
||||
result=False,
|
||||
result_message=result_message,
|
||||
result_message=f"ERROR: {error_message}",
|
||||
vm_detail=vm_detail,
|
||||
vm_volumes=vm_volumes,
|
||||
)
|
||||
return (
|
||||
False,
|
||||
result_message,
|
||||
f"ERROR in backup {datestring}: {error_message}",
|
||||
)
|
||||
|
||||
# 4b. Validate that, if an incremental_parent is given, it is valid
|
||||
|
@ -1312,16 +1325,15 @@ def backup_vm(
|
|||
if not ceph.verifySnapshot(
|
||||
zkhandler, pool, volume, f"backup_{incremental_parent}"
|
||||
):
|
||||
result_message = f"ERROR: Incremental parent {incremental_parent} given, but no snapshots were found; cannot export an incremental backup."
|
||||
error_message = f"Incremental parent {incremental_parent} given, but no snapshots were found; cannot export an incremental backup."
|
||||
write_pvcbackup_json(
|
||||
result=False,
|
||||
result_message=result_message,
|
||||
result_message=f"ERROR: {error_message}",
|
||||
vm_detail=vm_detail,
|
||||
vm_volumes=vm_volumes,
|
||||
)
|
||||
return (
|
||||
False,
|
||||
result_message,
|
||||
f"ERROR in backup {datestring}: {error_message}",
|
||||
)
|
||||
|
||||
export_fileext = "rbddiff"
|
||||
|
@ -1334,35 +1346,31 @@ def backup_vm(
|
|||
# 5. Take snapshot of each disks with the name @backup_{datestring}
|
||||
is_snapshot_create_failed = False
|
||||
which_snapshot_create_failed = list()
|
||||
msg_snapshot_create_failed = list()
|
||||
for pool, volume, _ in vm_volumes:
|
||||
retcode, retmsg = ceph.add_snapshot(zkhandler, pool, volume, snapshot_name)
|
||||
if not retcode:
|
||||
is_snapshot_create_failed = True
|
||||
which_snapshot_create_failed.append(f"{pool}/{volume}")
|
||||
msg_snapshot_create_failed.append(retmsg)
|
||||
|
||||
if is_snapshot_create_failed:
|
||||
for pool, volume, _ in vm_volumes:
|
||||
if ceph.verifySnapshot(zkhandler, pool, volume, snapshot_name):
|
||||
ceph.remove_snapshot(zkhandler, pool, volume, snapshot_name)
|
||||
|
||||
result_message = f'ERROR: Failed to create snapshot for volume(s) {", ".join(which_snapshot_create_failed)}: {", ".join(msg_snapshot_create_failed)}'
|
||||
error_message = f'Failed to create snapshot for volume(s) {", ".join(which_snapshot_create_failed)}'
|
||||
write_pvcbackup_json(
|
||||
result=False,
|
||||
result_message=result_message,
|
||||
result_message=f"ERROR: {error_message}",
|
||||
vm_detail=vm_detail,
|
||||
vm_volumes=vm_volumes,
|
||||
)
|
||||
return (
|
||||
False,
|
||||
result_message,
|
||||
f"ERROR in backup {datestring}: {error_message}",
|
||||
)
|
||||
|
||||
# 6. Dump snapshot to folder with `rbd export` (full) or `rbd export-diff` (incremental)
|
||||
is_snapshot_export_failed = False
|
||||
which_snapshot_export_failed = list()
|
||||
msg_snapshot_export_failed = list()
|
||||
backup_files = list()
|
||||
for pool, volume, size in vm_volumes:
|
||||
if incremental_parent is not None:
|
||||
|
@ -1373,7 +1381,6 @@ def backup_vm(
|
|||
if retcode:
|
||||
is_snapshot_export_failed = True
|
||||
which_snapshot_export_failed.append(f"{pool}/{volume}")
|
||||
msg_snapshot_export_failed.append(stderr)
|
||||
else:
|
||||
backup_files.append(
|
||||
(f"pvcdisks/{pool}.{volume}.{export_fileext}", size)
|
||||
|
@ -1385,32 +1392,44 @@ def backup_vm(
|
|||
if retcode:
|
||||
is_snapshot_export_failed = True
|
||||
which_snapshot_export_failed.append(f"{pool}/{volume}")
|
||||
msg_snapshot_export_failed.append(stderr)
|
||||
else:
|
||||
backup_files.append(
|
||||
(f"pvcdisks/{pool}.{volume}.{export_fileext}", size)
|
||||
)
|
||||
|
||||
backup_files_size = os.path.getsize(vm_target_backup)
|
||||
def get_dir_size(path):
|
||||
total = 0
|
||||
with scandir(path) as it:
|
||||
for entry in it:
|
||||
if entry.is_file():
|
||||
total += entry.stat().st_size
|
||||
elif entry.is_dir():
|
||||
total += get_dir_size(entry.path)
|
||||
return total
|
||||
|
||||
backup_files_size = get_dir_size(vm_target_backup)
|
||||
|
||||
if is_snapshot_export_failed:
|
||||
for pool, volume, _ in vm_volumes:
|
||||
if ceph.verifySnapshot(zkhandler, pool, volume, snapshot_name):
|
||||
ceph.remove_snapshot(zkhandler, pool, volume, snapshot_name)
|
||||
|
||||
result_message = f'ERROR: Failed to export snapshot for volume(s) {", ".join(which_snapshot_export_failed)}: {", ".join(msg_snapshot_export_failed)}'
|
||||
error_message = f'Failed to export snapshot for volume(s) {", ".join(which_snapshot_export_failed)}'
|
||||
write_pvcbackup_json(
|
||||
result=False,
|
||||
result_message=result_message,
|
||||
result_message=f"ERROR: {error_message}",
|
||||
vm_detail=vm_detail,
|
||||
backup_files=backup_files,
|
||||
backup_files_size=backup_files_size,
|
||||
)
|
||||
return (
|
||||
False,
|
||||
result_message,
|
||||
f"ERROR in backup {datestring}: {error_message}",
|
||||
)
|
||||
|
||||
# 8. Remove snapshots if retain_snapshot is False
|
||||
is_snapshot_remove_failed = False
|
||||
which_snapshot_remove_failed = list()
|
||||
msg_snapshot_remove_failed = list()
|
||||
if not retain_snapshot:
|
||||
for pool, volume, _ in vm_volumes:
|
||||
if ceph.verifySnapshot(zkhandler, pool, volume, snapshot_name):
|
||||
|
@ -1420,7 +1439,6 @@ def backup_vm(
|
|||
if not retcode:
|
||||
is_snapshot_remove_failed = True
|
||||
which_snapshot_remove_failed.append(f"{pool}/{volume}")
|
||||
msg_snapshot_remove_failed.append(retmsg)
|
||||
|
||||
tend = time.time()
|
||||
ttot = round(tend - tstart, 2)
|
||||
|
@ -1429,7 +1447,7 @@ def backup_vm(
|
|||
|
||||
if is_snapshot_remove_failed:
|
||||
retlines.append(
|
||||
f"WARNING: Failed to remove snapshot(s) as requested for volume(s) {', '.join(which_snapshot_remove_failed)}: {', '.join(msg_snapshot_remove_failed)}"
|
||||
f"WARNING: Failed to remove snapshot(s) as requested for volume(s) {', '.join(which_snapshot_remove_failed)}"
|
||||
)
|
||||
|
||||
myhostname = gethostname().split(".")[0]
|
||||
|
@ -1437,7 +1455,7 @@ def backup_vm(
|
|||
result_message = f"Successfully backed up VM '{domain}' ({backup_type}@{datestring}, snapshots retained) to '{myhostname}:{backup_path}' in {ttot}s."
|
||||
else:
|
||||
result_message = f"Successfully backed up VM '{domain}' ({backup_type}@{datestring}) to '{myhostname}:{backup_path}' in {ttot}s."
|
||||
retlines.appendr(result_message)
|
||||
retlines.append(result_message)
|
||||
|
||||
write_pvcbackup_json(
|
||||
result=True,
|
||||
|
@ -1495,7 +1513,6 @@ def remove_backup(zkhandler, domain, backup_path, datestring):
|
|||
# 2. Remove snapshots
|
||||
is_snapshot_remove_failed = False
|
||||
which_snapshot_remove_failed = list()
|
||||
msg_snapshot_remove_failed = list()
|
||||
if backup_source_details["retained_snapshot"]:
|
||||
for volume_file, _ in backup_source_details.get("backup_files"):
|
||||
pool, volume, _ = volume_file.split("/")[-1].split(".")
|
||||
|
@ -1504,7 +1521,6 @@ def remove_backup(zkhandler, domain, backup_path, datestring):
|
|||
if not retcode:
|
||||
is_snapshot_remove_failed = True
|
||||
which_snapshot_remove_failed.append(f"{pool}/{volume}")
|
||||
msg_snapshot_remove_failed.append(retmsg)
|
||||
|
||||
# 3. Remove files
|
||||
is_files_remove_failed = False
|
||||
|
@ -1521,7 +1537,7 @@ def remove_backup(zkhandler, domain, backup_path, datestring):
|
|||
|
||||
if is_snapshot_remove_failed:
|
||||
retlines.append(
|
||||
f"WARNING: Failed to remove snapshot(s) as requested for volume(s) {', '.join(which_snapshot_remove_failed)}: {', '.join(msg_snapshot_remove_failed)}"
|
||||
f"WARNING: Failed to remove snapshot(s) as requested for volume(s) {', '.join(which_snapshot_remove_failed)}"
|
||||
)
|
||||
|
||||
if is_files_remove_failed:
|
||||
|
@ -1620,7 +1636,6 @@ def restore_vm(zkhandler, domain, backup_path, datestring, retain_snapshot=False
|
|||
# 4. Import volumes
|
||||
is_snapshot_remove_failed = False
|
||||
which_snapshot_remove_failed = list()
|
||||
msg_snapshot_remove_failed = list()
|
||||
if incremental_parent is not None:
|
||||
for volume_file, volume_size in backup_source_details.get("backup_files"):
|
||||
pool, volume, _ = volume_file.split("/")[-1].split(".")
|
||||
|
@ -1696,14 +1711,12 @@ def restore_vm(zkhandler, domain, backup_path, datestring, retain_snapshot=False
|
|||
if retcode:
|
||||
is_snapshot_remove_failed = True
|
||||
which_snapshot_remove_failed.append(f"{pool}/{volume}")
|
||||
msg_snapshot_remove_failed.append(retmsg)
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
f"rbd snap rm {pool}/{volume}@backup_{datestring}"
|
||||
)
|
||||
if retcode:
|
||||
is_snapshot_remove_failed = True
|
||||
which_snapshot_remove_failed.append(f"{pool}/{volume}")
|
||||
msg_snapshot_remove_failed.append(retmsg)
|
||||
|
||||
else:
|
||||
for volume_file, volume_size in backup_source_details.get("backup_files"):
|
||||
|
@ -1772,7 +1785,7 @@ def restore_vm(zkhandler, domain, backup_path, datestring, retain_snapshot=False
|
|||
|
||||
if is_snapshot_remove_failed:
|
||||
retlines.append(
|
||||
f"WARNING: Failed to remove hanging snapshot(s) as requested for volume(s) {', '.join(which_snapshot_remove_failed)}: {', '.join(msg_snapshot_remove_failed)}"
|
||||
f"WARNING: Failed to remove hanging snapshot(s) as requested for volume(s) {', '.join(which_snapshot_remove_failed)}"
|
||||
)
|
||||
|
||||
myhostname = gethostname().split(".")[0]
|
||||
|
|
Loading…
Reference in New Issue