Add backup reporting and improve metrics
Major improvements to autobackup and backups, including additional information/fields in the backup JSON itself, improved error handling, and the ability to email reports of autobackups using a local sendmail utility.
This commit is contained in:
parent
8d74ee7273
commit
362edeed8c
|
@ -1895,6 +1895,12 @@ def cli_vm_backup_remove(domain, backup_datestring, backup_path):
|
||||||
show_default=True,
|
show_default=True,
|
||||||
help="Override default config file location.",
|
help="Override default config file location.",
|
||||||
)
|
)
|
||||||
|
@click.option(
|
||||||
|
"--email-report",
|
||||||
|
"email_report",
|
||||||
|
default=None,
|
||||||
|
help="Email a backup summary report to the specified address(es), comma-separated.",
|
||||||
|
)
|
||||||
@click.option(
|
@click.option(
|
||||||
"--force-full",
|
"--force-full",
|
||||||
"force_full_flag",
|
"force_full_flag",
|
||||||
|
@ -1909,7 +1915,7 @@ def cli_vm_backup_remove(domain, backup_datestring, backup_path):
|
||||||
is_flag=True,
|
is_flag=True,
|
||||||
help="Cron mode; don't error exit if this isn't the primary coordinator.",
|
help="Cron mode; don't error exit if this isn't the primary coordinator.",
|
||||||
)
|
)
|
||||||
def cli_vm_autobackup(autobackup_cfgfile, force_full_flag, cron_flag):
|
def cli_vm_autobackup(autobackup_cfgfile, email_report, force_full_flag, cron_flag):
|
||||||
"""
|
"""
|
||||||
Perform automated backups of VMs, with integrated cleanup and full/incremental scheduling.
|
Perform automated backups of VMs, with integrated cleanup and full/incremental scheduling.
|
||||||
|
|
||||||
|
@ -1936,12 +1942,17 @@ def cli_vm_autobackup(autobackup_cfgfile, force_full_flag, cron_flag):
|
||||||
configuration file path if required by a particular run. For full details of the possible options, please
|
configuration file path if required by a particular run. For full details of the possible options, please
|
||||||
see the example configuration file at "/usr/share/pvc/autobackup.sample.yaml".
|
see the example configuration file at "/usr/share/pvc/autobackup.sample.yaml".
|
||||||
|
|
||||||
|
An optional report on all current backups can be emailed to one or more email addresses using the
|
||||||
|
"--email-report" flag. This report will include information on all current known backups.
|
||||||
|
|
||||||
The "--force-full" option can be used to force all configured VMs to perform a "full" level backup this run,
|
The "--force-full" option can be used to force all configured VMs to perform a "full" level backup this run,
|
||||||
which can help synchronize the backups of existing VMs with new ones.
|
which can help synchronize the backups of existing VMs with new ones.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# All work here is done in the helper function for portability; we don't even use "finish"
|
# All work here is done in the helper function for portability; we don't even use "finish"
|
||||||
vm_autobackup(CLI_CONFIG, autobackup_cfgfile, force_full_flag, cron_flag)
|
vm_autobackup(
|
||||||
|
CLI_CONFIG, autobackup_cfgfile, email_report, force_full_flag, cron_flag
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
|
@ -26,7 +26,7 @@ from distutils.util import strtobool
|
||||||
from getpass import getuser
|
from getpass import getuser
|
||||||
from json import load as jload
|
from json import load as jload
|
||||||
from json import dump as jdump
|
from json import dump as jdump
|
||||||
from os import chmod, environ, getpid, path, makedirs, get_terminal_size
|
from os import chmod, environ, getpid, path, popen, makedirs, get_terminal_size
|
||||||
from re import findall
|
from re import findall
|
||||||
from socket import gethostname
|
from socket import gethostname
|
||||||
from subprocess import run, PIPE
|
from subprocess import run, PIPE
|
||||||
|
@ -38,6 +38,7 @@ from yaml import SafeLoader
|
||||||
import pvc.lib.provisioner
|
import pvc.lib.provisioner
|
||||||
import pvc.lib.vm
|
import pvc.lib.vm
|
||||||
import pvc.lib.node
|
import pvc.lib.node
|
||||||
|
import pvc.lib.storage
|
||||||
|
|
||||||
|
|
||||||
DEFAULT_STORE_DATA = {"cfgfile": "/etc/pvc/pvc.conf"}
|
DEFAULT_STORE_DATA = {"cfgfile": "/etc/pvc/pvc.conf"}
|
||||||
|
@ -201,8 +202,8 @@ def get_autobackup_config(CLI_CONFIG, cfgfile):
|
||||||
try:
|
try:
|
||||||
config = dict()
|
config = dict()
|
||||||
with open(cfgfile) as fh:
|
with open(cfgfile) as fh:
|
||||||
backup_config = yload(fh, Loader=SafeLoader)["autobackup"]
|
full_config = yload(fh, Loader=SafeLoader)
|
||||||
|
backup_config = full_config["autobackup"]
|
||||||
config["backup_root_path"] = backup_config["backup_root_path"]
|
config["backup_root_path"] = backup_config["backup_root_path"]
|
||||||
config["backup_root_suffix"] = backup_config["backup_root_suffix"]
|
config["backup_root_suffix"] = backup_config["backup_root_suffix"]
|
||||||
config["backup_tags"] = backup_config["backup_tags"]
|
config["backup_tags"] = backup_config["backup_tags"]
|
||||||
|
@ -226,13 +227,10 @@ def get_autobackup_config(CLI_CONFIG, cfgfile):
|
||||||
backup_root_path=backup_config["backup_root_path"]
|
backup_root_path=backup_config["backup_root_path"]
|
||||||
)
|
)
|
||||||
config["unmount_cmds"].append(_unmount_cmd)
|
config["unmount_cmds"].append(_unmount_cmd)
|
||||||
|
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
echo(CLI_CONFIG, "ERROR: Specified backup configuration does not exist!")
|
return "Backup configuration does not exist!"
|
||||||
exit(1)
|
|
||||||
except KeyError as e:
|
except KeyError as e:
|
||||||
echo(CLI_CONFIG, f"ERROR: Backup configuration is invalid: {e}")
|
return f"Backup configuration is invalid: {e}"
|
||||||
exit(1)
|
|
||||||
|
|
||||||
return config
|
return config
|
||||||
|
|
||||||
|
@ -240,6 +238,7 @@ def get_autobackup_config(CLI_CONFIG, cfgfile):
|
||||||
def vm_autobackup(
|
def vm_autobackup(
|
||||||
CLI_CONFIG,
|
CLI_CONFIG,
|
||||||
autobackup_cfgfile=DEFAULT_AUTOBACKUP_FILENAME,
|
autobackup_cfgfile=DEFAULT_AUTOBACKUP_FILENAME,
|
||||||
|
email_report=None,
|
||||||
force_full_flag=False,
|
force_full_flag=False,
|
||||||
cron_flag=False,
|
cron_flag=False,
|
||||||
):
|
):
|
||||||
|
@ -247,6 +246,48 @@ def vm_autobackup(
|
||||||
Perform automatic backups of VMs based on an external config file.
|
Perform automatic backups of VMs based on an external config file.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
if email_report is not None:
|
||||||
|
from email.utils import formatdate
|
||||||
|
from socket import gethostname
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(autobackup_cfgfile) as fh:
|
||||||
|
tmp_config = yload(fh, Loader=SafeLoader)
|
||||||
|
cluster = tmp_config["cluster"]["name"]
|
||||||
|
except Exception:
|
||||||
|
cluster = "unknown"
|
||||||
|
|
||||||
|
def send_execution_failure_report(error=None):
|
||||||
|
echo(CLI_CONFIG, f"Sending email failure report to {email_report}")
|
||||||
|
|
||||||
|
current_datetime = datetime.now()
|
||||||
|
email_datetime = formatdate(float(current_datetime.strftime("%s")))
|
||||||
|
|
||||||
|
email = list()
|
||||||
|
email.append(f"Date: {email_datetime}")
|
||||||
|
email.append(f"Subject: PVC Autobackup execution failure for cluster {cluster}")
|
||||||
|
|
||||||
|
recipients = list()
|
||||||
|
for recipient in email_report.split(","):
|
||||||
|
recipients.append(f"<{recipient}>")
|
||||||
|
email.append(f"To: {', '.join(recipients)}")
|
||||||
|
email.append(f"From: PVC Autobackup System <pvc@{gethostname()}>")
|
||||||
|
email.append("")
|
||||||
|
|
||||||
|
email.append(
|
||||||
|
f"A PVC autobackup has FAILED at {current_datetime} due to an execution error."
|
||||||
|
)
|
||||||
|
email.append("")
|
||||||
|
email.append("The reported error message is:")
|
||||||
|
email.append(f" {error}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
p = popen("/usr/sbin/sendmail -t", "w")
|
||||||
|
p.write("\n".join(email))
|
||||||
|
p.close()
|
||||||
|
except Exception as e:
|
||||||
|
echo(CLI_CONFIG, f"Failed to send report email: {e}")
|
||||||
|
|
||||||
# Validate that we are running on the current primary coordinator of the 'local' cluster connection
|
# Validate that we are running on the current primary coordinator of the 'local' cluster connection
|
||||||
real_connection = CLI_CONFIG["connection"]
|
real_connection = CLI_CONFIG["connection"]
|
||||||
CLI_CONFIG["connection"] = "local"
|
CLI_CONFIG["connection"] = "local"
|
||||||
|
@ -267,6 +308,10 @@ def vm_autobackup(
|
||||||
CLI_CONFIG,
|
CLI_CONFIG,
|
||||||
"Autobackup MUST be run from the cluster active primary coordinator using the 'local' connection. See '-h'/'--help' for details.",
|
"Autobackup MUST be run from the cluster active primary coordinator using the 'local' connection. See '-h'/'--help' for details.",
|
||||||
)
|
)
|
||||||
|
if email_report is not None:
|
||||||
|
send_execution_failure_report(
|
||||||
|
error=f"Autobackup run attempted from non-local connection or non-primary coordinator; got connection '{real_connection}', host '{DEFAULT_NODE_HOSTNAME}'."
|
||||||
|
)
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
# Ensure we're running as root, or show a warning & confirmation
|
# Ensure we're running as root, or show a warning & confirmation
|
||||||
|
@ -279,6 +324,14 @@ def vm_autobackup(
|
||||||
|
|
||||||
# Load our YAML config
|
# Load our YAML config
|
||||||
autobackup_config = get_autobackup_config(CLI_CONFIG, autobackup_cfgfile)
|
autobackup_config = get_autobackup_config(CLI_CONFIG, autobackup_cfgfile)
|
||||||
|
if not isinstance(autobackup_config, dict):
|
||||||
|
echo(CLI_CONFIG, f"ERROR: {autobackup_config}")
|
||||||
|
if email_report is not None:
|
||||||
|
send_execution_failure_report(error=f"{autobackup_config}")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
# Get the start time of this run
|
||||||
|
autobackup_start_time = datetime.now()
|
||||||
|
|
||||||
# Get a list of all VMs on the cluster
|
# Get a list of all VMs on the cluster
|
||||||
# We don't do tag filtering here, because we could match an arbitrary number of tags; instead, we
|
# We don't do tag filtering here, because we could match an arbitrary number of tags; instead, we
|
||||||
|
@ -286,6 +339,8 @@ def vm_autobackup(
|
||||||
retcode, retdata = pvc.lib.vm.vm_list(CLI_CONFIG, None, None, None, None, None)
|
retcode, retdata = pvc.lib.vm.vm_list(CLI_CONFIG, None, None, None, None, None)
|
||||||
if not retcode:
|
if not retcode:
|
||||||
echo(CLI_CONFIG, f"ERROR: Failed to fetch VM list: {retdata}")
|
echo(CLI_CONFIG, f"ERROR: Failed to fetch VM list: {retdata}")
|
||||||
|
if email_report is not None:
|
||||||
|
send_execution_failure_report(error=f"Failed to fetch VM list: {retdata}")
|
||||||
exit(1)
|
exit(1)
|
||||||
cluster_vms = retdata
|
cluster_vms = retdata
|
||||||
|
|
||||||
|
@ -354,6 +409,8 @@ def vm_autobackup(
|
||||||
CLI_CONFIG,
|
CLI_CONFIG,
|
||||||
f"Exiting; command reports: {ret.stderr.decode().strip()}",
|
f"Exiting; command reports: {ret.stderr.decode().strip()}",
|
||||||
)
|
)
|
||||||
|
if email_report is not None:
|
||||||
|
send_execution_failure_report(error=ret.stderr.decode().strip())
|
||||||
exit(1)
|
exit(1)
|
||||||
else:
|
else:
|
||||||
echo(CLI_CONFIG, f"done. [{ttot.seconds}s]")
|
echo(CLI_CONFIG, f"done. [{ttot.seconds}s]")
|
||||||
|
@ -417,27 +474,26 @@ def vm_autobackup(
|
||||||
tend = datetime.now()
|
tend = datetime.now()
|
||||||
ttot = tend - tstart
|
ttot = tend - tstart
|
||||||
if not retcode:
|
if not retcode:
|
||||||
|
backup_datestring = findall(r"[0-9]{14}", retdata)[0]
|
||||||
echo(CLI_CONFIG, f"failed. [{ttot.seconds}s]")
|
echo(CLI_CONFIG, f"failed. [{ttot.seconds}s]")
|
||||||
echo(CLI_CONFIG, f"Skipping cleanups; command reports: {retdata}")
|
echo(
|
||||||
continue
|
CLI_CONFIG,
|
||||||
|
retdata.strip().replace(f"ERROR in backup {backup_datestring}: ", ""),
|
||||||
|
)
|
||||||
|
skip_cleanup = True
|
||||||
else:
|
else:
|
||||||
backup_datestring = findall(r"[0-9]{14}", retdata)[0]
|
backup_datestring = findall(r"[0-9]{14}", retdata)[0]
|
||||||
echo(
|
echo(
|
||||||
CLI_CONFIG,
|
CLI_CONFIG,
|
||||||
f"done. Backup '{backup_datestring}' created. [{ttot.seconds}s]",
|
f"done. Backup '{backup_datestring}' created. [{ttot.seconds}s]",
|
||||||
)
|
)
|
||||||
|
skip_cleanup = False
|
||||||
|
|
||||||
# Read backup file to get details
|
# Read backup file to get details
|
||||||
backup_json_file = f"{backup_path}/{backup_datestring}/pvcbackup.json"
|
backup_json_file = f"{backup_path}/{backup_datestring}/pvcbackup.json"
|
||||||
with open(backup_json_file) as fh:
|
with open(backup_json_file) as fh:
|
||||||
backup_json = jload(fh)
|
backup_json = jload(fh)
|
||||||
backup = {
|
tracked_backups.insert(0, backup_json)
|
||||||
"datestring": backup_json["datestring"],
|
|
||||||
"type": backup_json["type"],
|
|
||||||
"parent": backup_json["incremental_parent"],
|
|
||||||
"retained_snapshot": backup_json["retained_snapshot"],
|
|
||||||
}
|
|
||||||
tracked_backups.insert(0, backup)
|
|
||||||
|
|
||||||
# Delete any full backups that are expired
|
# Delete any full backups that are expired
|
||||||
marked_for_deletion = list()
|
marked_for_deletion = list()
|
||||||
|
@ -450,11 +506,22 @@ def vm_autobackup(
|
||||||
|
|
||||||
# Depete any incremental backups that depend on marked parents
|
# Depete any incremental backups that depend on marked parents
|
||||||
for backup in tracked_backups:
|
for backup in tracked_backups:
|
||||||
if backup["type"] == "incremental" and backup["parent"] in [
|
if backup["type"] == "incremental" and backup["incremental_parent"] in [
|
||||||
b["datestring"] for b in marked_for_deletion
|
b["datestring"] for b in marked_for_deletion
|
||||||
]:
|
]:
|
||||||
marked_for_deletion.append(backup)
|
marked_for_deletion.append(backup)
|
||||||
|
|
||||||
|
if len(marked_for_deletion) > 0:
|
||||||
|
if skip_cleanup:
|
||||||
|
echo(
|
||||||
|
CLI_CONFIG,
|
||||||
|
f"Skipping cleanups for {len(marked_for_deletion)} aged-out backups due to backup failure.",
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
echo(
|
||||||
|
CLI_CONFIG,
|
||||||
|
f"Running cleanups for {len(marked_for_deletion)} aged-out backups...",
|
||||||
|
)
|
||||||
# Execute deletes
|
# Execute deletes
|
||||||
for backup_to_delete in marked_for_deletion:
|
for backup_to_delete in marked_for_deletion:
|
||||||
echo(
|
echo(
|
||||||
|
@ -477,7 +544,6 @@ def vm_autobackup(
|
||||||
CLI_CONFIG,
|
CLI_CONFIG,
|
||||||
f"Skipping removal from tracked backups; command reports: {retdata}",
|
f"Skipping removal from tracked backups; command reports: {retdata}",
|
||||||
)
|
)
|
||||||
continue
|
|
||||||
else:
|
else:
|
||||||
tracked_backups.remove(backup_to_delete)
|
tracked_backups.remove(backup_to_delete)
|
||||||
echo(CLI_CONFIG, f"done. [{ttot.seconds}s]")
|
echo(CLI_CONFIG, f"done. [{ttot.seconds}s]")
|
||||||
|
@ -514,3 +580,78 @@ def vm_autobackup(
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
echo(CLI_CONFIG, f"done. [{ttot.seconds}s]")
|
echo(CLI_CONFIG, f"done. [{ttot.seconds}s]")
|
||||||
|
|
||||||
|
autobackup_end_time = datetime.now()
|
||||||
|
autobackup_total_time = autobackup_end_time - autobackup_start_time
|
||||||
|
|
||||||
|
# Handle report emailing
|
||||||
|
if email_report is not None:
|
||||||
|
echo(CLI_CONFIG, "")
|
||||||
|
echo(CLI_CONFIG, f"Sending email summary report to {email_report}")
|
||||||
|
backup_summary = dict()
|
||||||
|
for vm in backup_vms:
|
||||||
|
backup_path = f"{backup_suffixed_path}/{vm}"
|
||||||
|
autobackup_state_file = f"{backup_path}/.autobackup.json"
|
||||||
|
if not path.exists(backup_path) or not path.exists(autobackup_state_file):
|
||||||
|
# There are no new backups so the list is empty
|
||||||
|
state_data = dict()
|
||||||
|
tracked_backups = list()
|
||||||
|
else:
|
||||||
|
with open(autobackup_state_file) as fh:
|
||||||
|
state_data = jload(fh)
|
||||||
|
tracked_backups = state_data["tracked_backups"]
|
||||||
|
|
||||||
|
backup_summary[vm] = tracked_backups
|
||||||
|
|
||||||
|
current_datetime = datetime.now()
|
||||||
|
email_datetime = formatdate(float(current_datetime.strftime("%s")))
|
||||||
|
|
||||||
|
email = list()
|
||||||
|
email.append(f"Date: {email_datetime}")
|
||||||
|
email.append(f"Subject: PVC Autobackup report for cluster {cluster}")
|
||||||
|
|
||||||
|
recipients = list()
|
||||||
|
for recipient in email_report.split(","):
|
||||||
|
recipients.append(f"<{recipient}>")
|
||||||
|
email.append(f"To: {', '.join(recipients)}")
|
||||||
|
email.append(f"From: PVC Autobackup System <pvc@{gethostname()}>")
|
||||||
|
email.append("")
|
||||||
|
|
||||||
|
email.append(
|
||||||
|
f"A PVC autobackup has been completed at {current_datetime} in {autobackup_total_time}."
|
||||||
|
)
|
||||||
|
email.append("")
|
||||||
|
email.append(
|
||||||
|
"The following is a summary of all current VM backups after cleanups, most recent first:"
|
||||||
|
)
|
||||||
|
email.append("")
|
||||||
|
|
||||||
|
for vm in backup_vms:
|
||||||
|
email.append(f"VM {vm}:")
|
||||||
|
for backup in backup_summary[vm]:
|
||||||
|
datestring = backup.get("datestring")
|
||||||
|
backup_date = datetime.strptime(datestring, "%Y%m%d%H%M%S")
|
||||||
|
if backup.get("result", False):
|
||||||
|
email.append(
|
||||||
|
f" {backup_date}: Success in {backup.get('runtime_secs', 0)} seconds, ID {datestring}, type {backup.get('type', 'unknown')}"
|
||||||
|
)
|
||||||
|
email.append(
|
||||||
|
f" Backup contains {len(backup.get('backup_files'))} files totaling {pvc.lib.storage.format_bytes_tohuman(backup.get('backup_size_bytes', 0))} ({backup.get('backup_size_bytes', 0)} bytes)"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
email.append(
|
||||||
|
f" {backup_date}: Failure in {backup.get('runtime_secs', 0)} seconds, ID {datestring}, type {backup.get('type', 'unknown')}"
|
||||||
|
)
|
||||||
|
email.append(
|
||||||
|
f" {backup.get('result_message')}"
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
p = popen("/usr/sbin/sendmail -t", "w")
|
||||||
|
p.write("\n".join(email))
|
||||||
|
p.close()
|
||||||
|
except Exception as e:
|
||||||
|
echo(CLI_CONFIG, f"Failed to send report email: {e}")
|
||||||
|
|
||||||
|
echo(CLI_CONFIG, "")
|
||||||
|
echo(CLI_CONFIG, f"Autobackup completed in {autobackup_total_time}.")
|
||||||
|
|
|
@ -32,6 +32,7 @@ from json import dump as jdump
|
||||||
from json import load as jload
|
from json import load as jload
|
||||||
from json import loads as jloads
|
from json import loads as jloads
|
||||||
from libvirt import open as lvopen
|
from libvirt import open as lvopen
|
||||||
|
from os import scandir
|
||||||
from shutil import rmtree
|
from shutil import rmtree
|
||||||
from socket import gethostname
|
from socket import gethostname
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
|
@ -1183,12 +1184,15 @@ def backup_vm(
|
||||||
if not re.match(r"^/", backup_path):
|
if not re.match(r"^/", backup_path):
|
||||||
return (
|
return (
|
||||||
False,
|
False,
|
||||||
f"ERROR: Target path {backup_path} is not a valid absolute path on the primary coordinator!",
|
f"ERROR in backup {datestring}: Target path {backup_path} is not a valid absolute path on the primary coordinator!",
|
||||||
)
|
)
|
||||||
|
|
||||||
# Ensure that backup_path (on this node) exists
|
# Ensure that backup_path (on this node) exists
|
||||||
if not os.path.isdir(backup_path):
|
if not os.path.isdir(backup_path):
|
||||||
return False, f"ERROR: Target path {backup_path} does not exist!"
|
return (
|
||||||
|
False,
|
||||||
|
f"ERROR in backup {datestring}: Target path {backup_path} does not exist!",
|
||||||
|
)
|
||||||
|
|
||||||
# 1a. Create destination directory
|
# 1a. Create destination directory
|
||||||
vm_target_root = f"{backup_path}/{domain}"
|
vm_target_root = f"{backup_path}/{domain}"
|
||||||
|
@ -1197,7 +1201,10 @@ def backup_vm(
|
||||||
try:
|
try:
|
||||||
os.makedirs(vm_target_backup)
|
os.makedirs(vm_target_backup)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return False, f"ERROR: Failed to create backup directory: {e}"
|
return (
|
||||||
|
False,
|
||||||
|
f"ERROR in backup {datestring}: Failed to create backup directory: {e}",
|
||||||
|
)
|
||||||
|
|
||||||
tstart = time.time()
|
tstart = time.time()
|
||||||
backup_type = "incremental" if incremental_parent is not None else "full"
|
backup_type = "incremental" if incremental_parent is not None else "full"
|
||||||
|
@ -1222,7 +1229,7 @@ def backup_vm(
|
||||||
"retained_snapshot": retain_snapshot,
|
"retained_snapshot": retain_snapshot,
|
||||||
"result": result,
|
"result": result,
|
||||||
"result_message": result_message,
|
"result_message": result_message,
|
||||||
"runtime_secs": ttot.seconds,
|
"runtime_secs": ttot,
|
||||||
"vm_detail": vm_detail,
|
"vm_detail": vm_detail,
|
||||||
"backup_files": backup_files,
|
"backup_files": backup_files,
|
||||||
"backup_size_bytes": backup_files_size,
|
"backup_size_bytes": backup_files_size,
|
||||||
|
@ -1233,28 +1240,26 @@ def backup_vm(
|
||||||
# 2. Validations part 2
|
# 2. Validations part 2
|
||||||
# Disallow retaining snapshots with an incremental parent
|
# Disallow retaining snapshots with an incremental parent
|
||||||
if incremental_parent is not None and retain_snapshot:
|
if incremental_parent is not None and retain_snapshot:
|
||||||
result_message = (
|
error_message = "Retaining snapshots of incremental backups is not supported!"
|
||||||
"ERROR: Retaining snapshots of incremental backups is not supported!"
|
write_pvcbackup_json(result=False, result_message=f"ERROR: {error_message}")
|
||||||
)
|
|
||||||
write_pvcbackup_json(result=False, result_message=result_message)
|
|
||||||
return (
|
return (
|
||||||
False,
|
False,
|
||||||
result_message,
|
f"ERROR in backup {datestring}: {error_message}",
|
||||||
)
|
)
|
||||||
|
|
||||||
# Validate that VM exists in cluster
|
# Validate that VM exists in cluster
|
||||||
dom_uuid = getDomainUUID(zkhandler, domain)
|
dom_uuid = getDomainUUID(zkhandler, domain)
|
||||||
if not dom_uuid:
|
if not dom_uuid:
|
||||||
result_message = f'ERROR: Could not find VM "{domain}" in the cluster!'
|
error_message = f'Could not find VM "{domain}" in the cluster!'
|
||||||
write_pvcbackup_json(result=False, result_message=result_message)
|
write_pvcbackup_json(result=False, result_message=f"ERROR: {error_message}")
|
||||||
return False, result_message
|
return False, f"ERROR in backup {datestring}: {error_message}"
|
||||||
|
|
||||||
# 3. Get information about VM
|
# 3. Get information about VM
|
||||||
vm_detail = get_list(zkhandler, limit=dom_uuid, is_fuzzy=False)[1][0]
|
vm_detail = get_list(zkhandler, limit=dom_uuid, is_fuzzy=False)[1][0]
|
||||||
if not isinstance(vm_detail, dict):
|
if not isinstance(vm_detail, dict):
|
||||||
result_message = f"ERROR: VM listing returned invalid data: {vm_detail}"
|
error_message = f"VM listing returned invalid data: {vm_detail}"
|
||||||
write_pvcbackup_json(result=False, result_message=result_message)
|
write_pvcbackup_json(result=False, result_message=f"ERROR: {error_message}")
|
||||||
return False, result_message
|
return False, f"ERROR in backup {datestring}: {error_message}"
|
||||||
|
|
||||||
vm_volumes = list()
|
vm_volumes = list()
|
||||||
for disk in vm_detail["disks"]:
|
for disk in vm_detail["disks"]:
|
||||||
|
@ -1270,39 +1275,47 @@ def backup_vm(
|
||||||
elif len(retdata) > 1:
|
elif len(retdata) > 1:
|
||||||
retdata = "Multiple volumes returned."
|
retdata = "Multiple volumes returned."
|
||||||
|
|
||||||
result_message = (
|
error_message = (
|
||||||
f"ERROR: Failed to get volume details for {pool}/{volume}: {retdata}"
|
f"Failed to get volume details for {pool}/{volume}: {retdata}"
|
||||||
)
|
)
|
||||||
write_pvcbackup_json(
|
write_pvcbackup_json(
|
||||||
result=False, result_message=result_message, vm_detail=vm_detail
|
result=False,
|
||||||
|
result_message=f"ERROR: {error_message}",
|
||||||
|
vm_detail=vm_detail,
|
||||||
)
|
)
|
||||||
return (
|
return (
|
||||||
False,
|
False,
|
||||||
result_message,
|
f"ERROR in backup {datestring}: {error_message}",
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
size = retdata[0]["stats"]["size"]
|
size = retdata[0]["stats"]["size"]
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return False, f"ERROR: Failed to get volume size for {pool}/{volume}: {e}"
|
error_message = f"Failed to get volume size for {pool}/{volume}: {e}"
|
||||||
|
write_pvcbackup_json(
|
||||||
|
result=False,
|
||||||
|
result_message=f"ERROR: {error_message}",
|
||||||
|
vm_detail=vm_detail,
|
||||||
|
)
|
||||||
|
return (
|
||||||
|
False,
|
||||||
|
f"ERROR in backup {datestring}: {error_message}",
|
||||||
|
)
|
||||||
|
|
||||||
vm_volumes.append((pool, volume, size))
|
vm_volumes.append((pool, volume, size))
|
||||||
|
|
||||||
# 4a. Validate that all volumes exist (they should, but just in case)
|
# 4a. Validate that all volumes exist (they should, but just in case)
|
||||||
for pool, volume, _ in vm_volumes:
|
for pool, volume, _ in vm_volumes:
|
||||||
if not ceph.verifyVolume(zkhandler, pool, volume):
|
if not ceph.verifyVolume(zkhandler, pool, volume):
|
||||||
result_message = (
|
error_message = f"VM defines a volume {pool}/{volume} which does not exist!"
|
||||||
f"ERROR: VM defines a volume {pool}/{volume} which does not exist!"
|
|
||||||
)
|
|
||||||
write_pvcbackup_json(
|
write_pvcbackup_json(
|
||||||
result=False,
|
result=False,
|
||||||
result_message=result_message,
|
result_message=f"ERROR: {error_message}",
|
||||||
vm_detail=vm_detail,
|
vm_detail=vm_detail,
|
||||||
vm_volumes=vm_volumes,
|
|
||||||
)
|
)
|
||||||
return (
|
return (
|
||||||
False,
|
False,
|
||||||
result_message,
|
f"ERROR in backup {datestring}: {error_message}",
|
||||||
)
|
)
|
||||||
|
|
||||||
# 4b. Validate that, if an incremental_parent is given, it is valid
|
# 4b. Validate that, if an incremental_parent is given, it is valid
|
||||||
|
@ -1312,16 +1325,15 @@ def backup_vm(
|
||||||
if not ceph.verifySnapshot(
|
if not ceph.verifySnapshot(
|
||||||
zkhandler, pool, volume, f"backup_{incremental_parent}"
|
zkhandler, pool, volume, f"backup_{incremental_parent}"
|
||||||
):
|
):
|
||||||
result_message = f"ERROR: Incremental parent {incremental_parent} given, but no snapshots were found; cannot export an incremental backup."
|
error_message = f"Incremental parent {incremental_parent} given, but no snapshots were found; cannot export an incremental backup."
|
||||||
write_pvcbackup_json(
|
write_pvcbackup_json(
|
||||||
result=False,
|
result=False,
|
||||||
result_message=result_message,
|
result_message=f"ERROR: {error_message}",
|
||||||
vm_detail=vm_detail,
|
vm_detail=vm_detail,
|
||||||
vm_volumes=vm_volumes,
|
|
||||||
)
|
)
|
||||||
return (
|
return (
|
||||||
False,
|
False,
|
||||||
result_message,
|
f"ERROR in backup {datestring}: {error_message}",
|
||||||
)
|
)
|
||||||
|
|
||||||
export_fileext = "rbddiff"
|
export_fileext = "rbddiff"
|
||||||
|
@ -1334,35 +1346,31 @@ def backup_vm(
|
||||||
# 5. Take snapshot of each disks with the name @backup_{datestring}
|
# 5. Take snapshot of each disks with the name @backup_{datestring}
|
||||||
is_snapshot_create_failed = False
|
is_snapshot_create_failed = False
|
||||||
which_snapshot_create_failed = list()
|
which_snapshot_create_failed = list()
|
||||||
msg_snapshot_create_failed = list()
|
|
||||||
for pool, volume, _ in vm_volumes:
|
for pool, volume, _ in vm_volumes:
|
||||||
retcode, retmsg = ceph.add_snapshot(zkhandler, pool, volume, snapshot_name)
|
retcode, retmsg = ceph.add_snapshot(zkhandler, pool, volume, snapshot_name)
|
||||||
if not retcode:
|
if not retcode:
|
||||||
is_snapshot_create_failed = True
|
is_snapshot_create_failed = True
|
||||||
which_snapshot_create_failed.append(f"{pool}/{volume}")
|
which_snapshot_create_failed.append(f"{pool}/{volume}")
|
||||||
msg_snapshot_create_failed.append(retmsg)
|
|
||||||
|
|
||||||
if is_snapshot_create_failed:
|
if is_snapshot_create_failed:
|
||||||
for pool, volume, _ in vm_volumes:
|
for pool, volume, _ in vm_volumes:
|
||||||
if ceph.verifySnapshot(zkhandler, pool, volume, snapshot_name):
|
if ceph.verifySnapshot(zkhandler, pool, volume, snapshot_name):
|
||||||
ceph.remove_snapshot(zkhandler, pool, volume, snapshot_name)
|
ceph.remove_snapshot(zkhandler, pool, volume, snapshot_name)
|
||||||
|
|
||||||
result_message = f'ERROR: Failed to create snapshot for volume(s) {", ".join(which_snapshot_create_failed)}: {", ".join(msg_snapshot_create_failed)}'
|
error_message = f'Failed to create snapshot for volume(s) {", ".join(which_snapshot_create_failed)}'
|
||||||
write_pvcbackup_json(
|
write_pvcbackup_json(
|
||||||
result=False,
|
result=False,
|
||||||
result_message=result_message,
|
result_message=f"ERROR: {error_message}",
|
||||||
vm_detail=vm_detail,
|
vm_detail=vm_detail,
|
||||||
vm_volumes=vm_volumes,
|
|
||||||
)
|
)
|
||||||
return (
|
return (
|
||||||
False,
|
False,
|
||||||
result_message,
|
f"ERROR in backup {datestring}: {error_message}",
|
||||||
)
|
)
|
||||||
|
|
||||||
# 6. Dump snapshot to folder with `rbd export` (full) or `rbd export-diff` (incremental)
|
# 6. Dump snapshot to folder with `rbd export` (full) or `rbd export-diff` (incremental)
|
||||||
is_snapshot_export_failed = False
|
is_snapshot_export_failed = False
|
||||||
which_snapshot_export_failed = list()
|
which_snapshot_export_failed = list()
|
||||||
msg_snapshot_export_failed = list()
|
|
||||||
backup_files = list()
|
backup_files = list()
|
||||||
for pool, volume, size in vm_volumes:
|
for pool, volume, size in vm_volumes:
|
||||||
if incremental_parent is not None:
|
if incremental_parent is not None:
|
||||||
|
@ -1373,7 +1381,6 @@ def backup_vm(
|
||||||
if retcode:
|
if retcode:
|
||||||
is_snapshot_export_failed = True
|
is_snapshot_export_failed = True
|
||||||
which_snapshot_export_failed.append(f"{pool}/{volume}")
|
which_snapshot_export_failed.append(f"{pool}/{volume}")
|
||||||
msg_snapshot_export_failed.append(stderr)
|
|
||||||
else:
|
else:
|
||||||
backup_files.append(
|
backup_files.append(
|
||||||
(f"pvcdisks/{pool}.{volume}.{export_fileext}", size)
|
(f"pvcdisks/{pool}.{volume}.{export_fileext}", size)
|
||||||
|
@ -1385,32 +1392,44 @@ def backup_vm(
|
||||||
if retcode:
|
if retcode:
|
||||||
is_snapshot_export_failed = True
|
is_snapshot_export_failed = True
|
||||||
which_snapshot_export_failed.append(f"{pool}/{volume}")
|
which_snapshot_export_failed.append(f"{pool}/{volume}")
|
||||||
msg_snapshot_export_failed.append(stderr)
|
else:
|
||||||
|
backup_files.append(
|
||||||
|
(f"pvcdisks/{pool}.{volume}.{export_fileext}", size)
|
||||||
|
)
|
||||||
|
|
||||||
backup_files_size = os.path.getsize(vm_target_backup)
|
def get_dir_size(path):
|
||||||
|
total = 0
|
||||||
|
with scandir(path) as it:
|
||||||
|
for entry in it:
|
||||||
|
if entry.is_file():
|
||||||
|
total += entry.stat().st_size
|
||||||
|
elif entry.is_dir():
|
||||||
|
total += get_dir_size(entry.path)
|
||||||
|
return total
|
||||||
|
|
||||||
|
backup_files_size = get_dir_size(vm_target_backup)
|
||||||
|
|
||||||
if is_snapshot_export_failed:
|
if is_snapshot_export_failed:
|
||||||
for pool, volume, _ in vm_volumes:
|
for pool, volume, _ in vm_volumes:
|
||||||
if ceph.verifySnapshot(zkhandler, pool, volume, snapshot_name):
|
if ceph.verifySnapshot(zkhandler, pool, volume, snapshot_name):
|
||||||
ceph.remove_snapshot(zkhandler, pool, volume, snapshot_name)
|
ceph.remove_snapshot(zkhandler, pool, volume, snapshot_name)
|
||||||
|
|
||||||
result_message = f'ERROR: Failed to export snapshot for volume(s) {", ".join(which_snapshot_export_failed)}: {", ".join(msg_snapshot_export_failed)}'
|
error_message = f'Failed to export snapshot for volume(s) {", ".join(which_snapshot_export_failed)}'
|
||||||
write_pvcbackup_json(
|
write_pvcbackup_json(
|
||||||
result=False,
|
result=False,
|
||||||
result_message=result_message,
|
result_message=f"ERROR: {error_message}",
|
||||||
vm_detail=vm_detail,
|
vm_detail=vm_detail,
|
||||||
backup_files=backup_files,
|
backup_files=backup_files,
|
||||||
backup_files_size=backup_files_size,
|
backup_files_size=backup_files_size,
|
||||||
)
|
)
|
||||||
return (
|
return (
|
||||||
False,
|
False,
|
||||||
result_message,
|
f"ERROR in backup {datestring}: {error_message}",
|
||||||
)
|
)
|
||||||
|
|
||||||
# 8. Remove snapshots if retain_snapshot is False
|
# 8. Remove snapshots if retain_snapshot is False
|
||||||
is_snapshot_remove_failed = False
|
is_snapshot_remove_failed = False
|
||||||
which_snapshot_remove_failed = list()
|
which_snapshot_remove_failed = list()
|
||||||
msg_snapshot_remove_failed = list()
|
|
||||||
if not retain_snapshot:
|
if not retain_snapshot:
|
||||||
for pool, volume, _ in vm_volumes:
|
for pool, volume, _ in vm_volumes:
|
||||||
if ceph.verifySnapshot(zkhandler, pool, volume, snapshot_name):
|
if ceph.verifySnapshot(zkhandler, pool, volume, snapshot_name):
|
||||||
|
@ -1420,7 +1439,6 @@ def backup_vm(
|
||||||
if not retcode:
|
if not retcode:
|
||||||
is_snapshot_remove_failed = True
|
is_snapshot_remove_failed = True
|
||||||
which_snapshot_remove_failed.append(f"{pool}/{volume}")
|
which_snapshot_remove_failed.append(f"{pool}/{volume}")
|
||||||
msg_snapshot_remove_failed.append(retmsg)
|
|
||||||
|
|
||||||
tend = time.time()
|
tend = time.time()
|
||||||
ttot = round(tend - tstart, 2)
|
ttot = round(tend - tstart, 2)
|
||||||
|
@ -1429,7 +1447,7 @@ def backup_vm(
|
||||||
|
|
||||||
if is_snapshot_remove_failed:
|
if is_snapshot_remove_failed:
|
||||||
retlines.append(
|
retlines.append(
|
||||||
f"WARNING: Failed to remove snapshot(s) as requested for volume(s) {', '.join(which_snapshot_remove_failed)}: {', '.join(msg_snapshot_remove_failed)}"
|
f"WARNING: Failed to remove snapshot(s) as requested for volume(s) {', '.join(which_snapshot_remove_failed)}"
|
||||||
)
|
)
|
||||||
|
|
||||||
myhostname = gethostname().split(".")[0]
|
myhostname = gethostname().split(".")[0]
|
||||||
|
@ -1437,7 +1455,7 @@ def backup_vm(
|
||||||
result_message = f"Successfully backed up VM '{domain}' ({backup_type}@{datestring}, snapshots retained) to '{myhostname}:{backup_path}' in {ttot}s."
|
result_message = f"Successfully backed up VM '{domain}' ({backup_type}@{datestring}, snapshots retained) to '{myhostname}:{backup_path}' in {ttot}s."
|
||||||
else:
|
else:
|
||||||
result_message = f"Successfully backed up VM '{domain}' ({backup_type}@{datestring}) to '{myhostname}:{backup_path}' in {ttot}s."
|
result_message = f"Successfully backed up VM '{domain}' ({backup_type}@{datestring}) to '{myhostname}:{backup_path}' in {ttot}s."
|
||||||
retlines.appendr(result_message)
|
retlines.append(result_message)
|
||||||
|
|
||||||
write_pvcbackup_json(
|
write_pvcbackup_json(
|
||||||
result=True,
|
result=True,
|
||||||
|
@ -1495,7 +1513,6 @@ def remove_backup(zkhandler, domain, backup_path, datestring):
|
||||||
# 2. Remove snapshots
|
# 2. Remove snapshots
|
||||||
is_snapshot_remove_failed = False
|
is_snapshot_remove_failed = False
|
||||||
which_snapshot_remove_failed = list()
|
which_snapshot_remove_failed = list()
|
||||||
msg_snapshot_remove_failed = list()
|
|
||||||
if backup_source_details["retained_snapshot"]:
|
if backup_source_details["retained_snapshot"]:
|
||||||
for volume_file, _ in backup_source_details.get("backup_files"):
|
for volume_file, _ in backup_source_details.get("backup_files"):
|
||||||
pool, volume, _ = volume_file.split("/")[-1].split(".")
|
pool, volume, _ = volume_file.split("/")[-1].split(".")
|
||||||
|
@ -1504,7 +1521,6 @@ def remove_backup(zkhandler, domain, backup_path, datestring):
|
||||||
if not retcode:
|
if not retcode:
|
||||||
is_snapshot_remove_failed = True
|
is_snapshot_remove_failed = True
|
||||||
which_snapshot_remove_failed.append(f"{pool}/{volume}")
|
which_snapshot_remove_failed.append(f"{pool}/{volume}")
|
||||||
msg_snapshot_remove_failed.append(retmsg)
|
|
||||||
|
|
||||||
# 3. Remove files
|
# 3. Remove files
|
||||||
is_files_remove_failed = False
|
is_files_remove_failed = False
|
||||||
|
@ -1521,7 +1537,7 @@ def remove_backup(zkhandler, domain, backup_path, datestring):
|
||||||
|
|
||||||
if is_snapshot_remove_failed:
|
if is_snapshot_remove_failed:
|
||||||
retlines.append(
|
retlines.append(
|
||||||
f"WARNING: Failed to remove snapshot(s) as requested for volume(s) {', '.join(which_snapshot_remove_failed)}: {', '.join(msg_snapshot_remove_failed)}"
|
f"WARNING: Failed to remove snapshot(s) as requested for volume(s) {', '.join(which_snapshot_remove_failed)}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if is_files_remove_failed:
|
if is_files_remove_failed:
|
||||||
|
@ -1620,7 +1636,6 @@ def restore_vm(zkhandler, domain, backup_path, datestring, retain_snapshot=False
|
||||||
# 4. Import volumes
|
# 4. Import volumes
|
||||||
is_snapshot_remove_failed = False
|
is_snapshot_remove_failed = False
|
||||||
which_snapshot_remove_failed = list()
|
which_snapshot_remove_failed = list()
|
||||||
msg_snapshot_remove_failed = list()
|
|
||||||
if incremental_parent is not None:
|
if incremental_parent is not None:
|
||||||
for volume_file, volume_size in backup_source_details.get("backup_files"):
|
for volume_file, volume_size in backup_source_details.get("backup_files"):
|
||||||
pool, volume, _ = volume_file.split("/")[-1].split(".")
|
pool, volume, _ = volume_file.split("/")[-1].split(".")
|
||||||
|
@ -1696,14 +1711,12 @@ def restore_vm(zkhandler, domain, backup_path, datestring, retain_snapshot=False
|
||||||
if retcode:
|
if retcode:
|
||||||
is_snapshot_remove_failed = True
|
is_snapshot_remove_failed = True
|
||||||
which_snapshot_remove_failed.append(f"{pool}/{volume}")
|
which_snapshot_remove_failed.append(f"{pool}/{volume}")
|
||||||
msg_snapshot_remove_failed.append(retmsg)
|
|
||||||
retcode, stdout, stderr = common.run_os_command(
|
retcode, stdout, stderr = common.run_os_command(
|
||||||
f"rbd snap rm {pool}/{volume}@backup_{datestring}"
|
f"rbd snap rm {pool}/{volume}@backup_{datestring}"
|
||||||
)
|
)
|
||||||
if retcode:
|
if retcode:
|
||||||
is_snapshot_remove_failed = True
|
is_snapshot_remove_failed = True
|
||||||
which_snapshot_remove_failed.append(f"{pool}/{volume}")
|
which_snapshot_remove_failed.append(f"{pool}/{volume}")
|
||||||
msg_snapshot_remove_failed.append(retmsg)
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
for volume_file, volume_size in backup_source_details.get("backup_files"):
|
for volume_file, volume_size in backup_source_details.get("backup_files"):
|
||||||
|
@ -1772,7 +1785,7 @@ def restore_vm(zkhandler, domain, backup_path, datestring, retain_snapshot=False
|
||||||
|
|
||||||
if is_snapshot_remove_failed:
|
if is_snapshot_remove_failed:
|
||||||
retlines.append(
|
retlines.append(
|
||||||
f"WARNING: Failed to remove hanging snapshot(s) as requested for volume(s) {', '.join(which_snapshot_remove_failed)}: {', '.join(msg_snapshot_remove_failed)}"
|
f"WARNING: Failed to remove hanging snapshot(s) as requested for volume(s) {', '.join(which_snapshot_remove_failed)}"
|
||||||
)
|
)
|
||||||
|
|
||||||
myhostname = gethostname().split(".")[0]
|
myhostname = gethostname().split(".")[0]
|
||||||
|
|
Loading…
Reference in New Issue