Compare commits

...

20 Commits

Author SHA1 Message Date
33f905459a Implement VM rollback
Closes #184
2024-08-16 10:47:18 -04:00
174e6e08e3 Correct issues with VM output formats 2024-08-16 10:46:25 -04:00
9f85c92dff Handle missing or empty snapshot lists 2024-08-16 10:46:25 -04:00
4b30d2f58a Always show snapshots 2024-08-16 10:46:25 -04:00
2fcee28fed Hide topology in long output 2024-08-16 10:46:25 -04:00
1f18e88c06 Add snapshots to VM info details 2024-08-16 10:46:25 -04:00
359191c83f Ensure snapshot name does not already exist 2024-08-16 10:46:25 -04:00
3d0d5e63f6 Make default snap name just the datestring 2024-08-16 10:46:25 -04:00
e6bfbb6d45 Actually fix incorrect naming bug 2024-08-16 10:46:25 -04:00
b80f9e28dc Add human-readable age to snapshots
This is parsed server-side for consistent timing and to simplify the API
consumers.
2024-08-16 10:46:25 -04:00
fbd5b3cca3 Remove is_backup flag for snapshots
This won't be needed for anything.
2024-08-16 10:46:25 -04:00
2b1082590e Fix bug in snapshot removal 2024-08-16 10:46:25 -04:00
a4ca112128 Add snapshot count to VM list 2024-08-16 10:46:25 -04:00
6fc7f45027 Add snapshot lists and timestamp
Adds snapshots to the list of data in VM objects
2024-08-16 10:46:25 -04:00
0c240a5129 Add VM snapshot removal 2024-08-16 10:46:25 -04:00
553c1e670e Add VM snapshots functionality
Adds the ability to create snapshots of an entire VM, including all its
RBD disks and the VM XML config, though not any PVC metadata.
2024-08-16 10:46:25 -04:00
942de9f15b Add better exception handling for XML configs 2024-08-16 10:46:04 -04:00
9aca8e215b Run IPMI check 3 times with 2s timeout
Avoids potential timeouts or deadlocks, and retries if a single try
fails.
2024-07-28 12:36:01 -04:00
97329bb90d Sort Ceph pool data by name
There is no guarantee that both commands output the pools in the same
order, so sort them by name first so the iteration over the pools by ID
is successful.
2024-07-22 13:26:27 -04:00
c186015d6f Add check for invalid profile 2024-07-13 17:13:40 -04:00
11 changed files with 958 additions and 44 deletions

View File

@ -1610,6 +1610,32 @@ class API_VM_Root(Resource):
protected:
type: boolean
description: Whether the tag is protected or not
snapshots:
type: array
description: The snapshot(s) of the VM
items:
type: object
id: VMSnapshot
properties:
name:
type: string
description: The name of the snapshot
timestamp:
type: string
descrpition: Unix timestamp of the snapshot
age:
type: string
description: Human-readable age of the snapshot in the largest viable unit: seconds, minutes, hours, days
rbd_snapshots:
type: array
items:
type: string
description: A list of RBD volume snapshots belonging to this VM snapshot, in '<pool>/<volume>@<snapshot>' format
xml_diff_lines:
type: array
items:
type: string
description: A list of strings representing the lines of an (n=1) unified diff between the current VM XML specification and the snapshot VM XML specification
description:
type: string
description: The description of the VM
@ -3086,6 +3112,144 @@ class API_VM_Restore(Resource):
api.add_resource(API_VM_Restore, "/vm/<vm>/restore")
# /vm/<vm>/snapshot
class API_VM_Snapshot(Resource):
@RequestParser(
[
{
"name": "snapshot_name",
"required": False,
"helptext": "",
},
]
)
@Authenticator
def post(self, vm, reqargs):
"""
Take a snapshot of a VM's disks and configuration
---
tags:
- vm
parameters:
- in: query
name: snapshot_name
type: string
required: false
description: A custom name for the snapshot instead of autogeneration by date
responses:
200:
description: OK
schema:
type: object
id: Message
400:
description: Execution error
schema:
type: object
id: Message
404:
description: Not found
schema:
type: object
id: Message
"""
snapshot_name = reqargs.get("snapshot_name", None)
return api_helper.create_vm_snapshot(vm, snapshot_name=snapshot_name)
@RequestParser(
[
{
"name": "snapshot_name",
"required": True,
"helptext": "A snapshot name must be specified",
},
]
)
@Authenticator
def delete(self, vm, reqargs):
"""
Remove a snapshot of a VM's disks and configuration
---
tags:
- vm
parameters:
- in: query
name: snapshot_name
type: string
required: true
description: The name of the snapshot to remove
responses:
200:
description: OK
schema:
type: object
id: Message
400:
description: Execution error
schema:
type: object
id: Message
404:
description: Not found
schema:
type: object
id: Message
"""
snapshot_name = reqargs.get("snapshot_name", None)
return api_helper.remove_vm_snapshot(vm, snapshot_name)
api.add_resource(API_VM_Snapshot, "/vm/<vm>/snapshot")
# /vm/<vm>/snapshot/rollback
class API_VM_Snapshot_Rollback(Resource):
@RequestParser(
[
{
"name": "snapshot_name",
"required": True,
"helptext": "A snapshot name must be specified",
},
]
)
@Authenticator
def post(self, vm, reqargs):
"""
Roll back to a snapshot of a VM's disks and configuration
---
tags:
- vm
parameters:
- in: query
name: snapshot_name
type: string
required: true
description: The name of the snapshot to roll back to
responses:
200:
description: OK
schema:
type: object
id: Message
400:
description: Execution error
schema:
type: object
id: Message
404:
description: Not found
schema:
type: object
id: Message
"""
snapshot_name = reqargs.get("snapshot_name", None)
return api_helper.rollback_vm_snapshot(vm, snapshot_name)
api.add_resource(API_VM_Snapshot_Rollback, "/vm/<vm>/snapshot/rollback")
##########################################################
# Client API - Network
##########################################################

View File

@ -765,6 +765,78 @@ def vm_restore(
return output, retcode
@ZKConnection(config)
def create_vm_snapshot(
zkhandler,
domain,
snapshot_name=None,
):
"""
Take a snapshot of a VM.
"""
retflag, retdata = pvc_vm.create_vm_snapshot(
zkhandler,
domain,
snapshot_name,
)
if retflag:
retcode = 200
else:
retcode = 400
output = {"message": retdata.replace('"', "'")}
return output, retcode
@ZKConnection(config)
def remove_vm_snapshot(
zkhandler,
domain,
snapshot_name,
):
"""
Take a snapshot of a VM.
"""
retflag, retdata = pvc_vm.remove_vm_snapshot(
zkhandler,
domain,
snapshot_name,
)
if retflag:
retcode = 200
else:
retcode = 400
output = {"message": retdata.replace('"', "'")}
return output, retcode
@ZKConnection(config)
def rollback_vm_snapshot(
zkhandler,
domain,
snapshot_name,
):
"""
Roll back to a snapshot of a VM.
"""
retflag, retdata = pvc_vm.rollback_vm_snapshot(
zkhandler,
domain,
snapshot_name,
)
if retflag:
retcode = 200
else:
retcode = 400
output = {"message": retdata.replace('"', "'")}
return output, retcode
@ZKConnection(config)
def vm_attach_device(zkhandler, vm, device_spec_xml):
"""

View File

@ -1765,6 +1765,110 @@ def cli_vm_flush_locks(domain, wait_flag):
finish(retcode, retmsg)
###############################################################################
# > pvc vm snapshot
###############################################################################
@click.group(
name="snapshot",
short_help="Manage snapshots for PVC VMs.",
context_settings=CONTEXT_SETTINGS,
)
def cli_vm_snapshot():
"""
Manage snapshots of VMs in a PVC cluster.
"""
pass
###############################################################################
# > pvc vm snapshot create
###############################################################################
@click.command(name="create", short_help="Create a snapshot of a virtual machine.")
@connection_req
@click.argument("domain")
@click.argument("snapshot_name", required=False, default=None)
def cli_vm_snapshot_create(domain, snapshot_name):
"""
Create a snapshot of the disks and XML configuration of virtual machine DOMAIN, with the
optional name SNAPSHOT_NAME. DOMAIN may be a UUID or name.
WARNING: RBD snapshots are crash-consistent but not filesystem-aware. If a snapshot was taken
of a running VM, restoring that snapshot will be equivalent to having forcibly restarted the
VM at the moment of the snapshot.
"""
echo(
CLI_CONFIG,
f"Taking snapshot of VM '{domain}'... ",
newline=False,
)
retcode, retmsg = pvc.lib.vm.vm_create_snapshot(
CLI_CONFIG, domain, snapshot_name=snapshot_name
)
if retcode:
echo(CLI_CONFIG, "done.")
else:
echo(CLI_CONFIG, "failed.")
finish(retcode, retmsg)
###############################################################################
# > pvc vm snapshot remove
###############################################################################
@click.command(name="remove", short_help="Remove a snapshot of a virtual machine.")
@connection_req
@click.argument("domain")
@click.argument("snapshot_name")
def cli_vm_snapshot_remove(domain, snapshot_name):
"""
Remove the snapshot SNAPSHOT_NAME of the disks and XML configuration of virtual machine DOMAIN,
DOMAIN may be a UUID or name.
"""
echo(
CLI_CONFIG,
f"Removing snapshot '{snapshot_name}' of VM '{domain}'... ",
newline=False,
)
retcode, retmsg = pvc.lib.vm.vm_remove_snapshot(CLI_CONFIG, domain, snapshot_name)
if retcode:
echo(CLI_CONFIG, "done.")
else:
echo(CLI_CONFIG, "failed.")
finish(retcode, retmsg)
###############################################################################
# > pvc vm snapshot rollback
###############################################################################
@click.command(
name="rollback", short_help="Roll back to a snapshot of a virtual machine."
)
@connection_req
@click.argument("domain")
@click.argument("snapshot_name")
@confirm_opt(
"Roll back to snapshot {snapshot_name} of {domain} and lose all data and changes since this snapshot"
)
def cli_vm_snapshot_rollback(domain, snapshot_name):
"""
Roll back to the snapshot SNAPSHOT_NAME of the disks and XML configuration of virtual machine DOMAIN,
DOMAIN may be a UUID or name.
"""
echo(
CLI_CONFIG,
f"Rolling back to snapshot '{snapshot_name}' of VM '{domain}'... ",
newline=False,
)
retcode, retmsg = pvc.lib.vm.vm_rollback_snapshot(CLI_CONFIG, domain, snapshot_name)
if retcode:
echo(CLI_CONFIG, "done.")
else:
echo(CLI_CONFIG, "failed.")
finish(retcode, retmsg)
###############################################################################
# > pvc vm backup
###############################################################################
@ -6302,6 +6406,10 @@ cli_vm.add_command(cli_vm_move)
cli_vm.add_command(cli_vm_migrate)
cli_vm.add_command(cli_vm_unmigrate)
cli_vm.add_command(cli_vm_flush_locks)
cli_vm_snapshot.add_command(cli_vm_snapshot_create)
cli_vm_snapshot.add_command(cli_vm_snapshot_remove)
cli_vm_snapshot.add_command(cli_vm_snapshot_rollback)
cli_vm.add_command(cli_vm_snapshot)
cli_vm_backup.add_command(cli_vm_backup_create)
cli_vm_backup.add_command(cli_vm_backup_restore)
cli_vm_backup.add_command(cli_vm_backup_remove)

View File

@ -498,6 +498,65 @@ def vm_restore(config, vm, backup_path, backup_datestring, retain_snapshot=False
return True, response.json().get("message", "")
def vm_create_snapshot(config, vm, snapshot_name=None):
"""
Take a snapshot of a VM's disks and configuration
API endpoint: POST /vm/{vm}/snapshot
API arguments: snapshot_name=snapshot_name
API schema: {"message":"{data}"}
"""
params = dict()
if snapshot_name is not None:
params["snapshot_name"] = snapshot_name
response = call_api(
config, "post", "/vm/{vm}/snapshot".format(vm=vm), params=params
)
if response.status_code != 200:
return False, response.json().get("message", "")
else:
return True, response.json().get("message", "")
def vm_remove_snapshot(config, vm, snapshot_name):
"""
Remove a snapshot of a VM's disks and configuration
API endpoint: DELETE /vm/{vm}/snapshot
API arguments: snapshot_name=snapshot_name
API schema: {"message":"{data}"}
"""
params = {"snapshot_name": snapshot_name}
response = call_api(
config, "delete", "/vm/{vm}/snapshot".format(vm=vm), params=params
)
if response.status_code != 200:
return False, response.json().get("message", "")
else:
return True, response.json().get("message", "")
def vm_rollback_snapshot(config, vm, snapshot_name):
"""
Roll back to a snapshot of a VM's disks and configuration
API endpoint: POST /vm/{vm}/snapshot/rollback
API arguments: snapshot_name=snapshot_name
API schema: {"message":"{data}"}
"""
params = {"snapshot_name": snapshot_name}
response = call_api(
config, "post", "/vm/{vm}/snapshot/rollback".format(vm=vm), params=params
)
if response.status_code != 200:
return False, response.json().get("message", "")
else:
return True, response.json().get("message", "")
def vm_vcpus_set(config, vm, vcpus, topology, restart):
"""
Set the vCPU count of the VM with topology
@ -1522,29 +1581,40 @@ def format_info(config, domain_information, long_output):
ansiprint.purple(), ansiprint.end(), domain_information["vcpu"]
)
)
ainformation.append(
"{}Topology (S/C/T):{} {}".format(
ansiprint.purple(), ansiprint.end(), domain_information["vcpu_topology"]
if long_output:
ainformation.append(
"{}Topology (S/C/T):{} {}".format(
ansiprint.purple(), ansiprint.end(), domain_information["vcpu_topology"]
)
)
)
if (
domain_information["vnc"].get("listen", "None") != "None"
and domain_information["vnc"].get("port", "None") != "None"
):
domain_information["vnc"].get("listen")
and domain_information["vnc"].get("port")
) or long_output:
listen = (
domain_information["vnc"]["listen"]
if domain_information["vnc"].get("listen")
else "N/A"
)
port = (
domain_information["vnc"]["port"]
if domain_information["vnc"].get("port")
else "N/A"
)
ainformation.append("")
ainformation.append(
"{}VNC listen:{} {}".format(
ansiprint.purple(), ansiprint.end(), domain_information["vnc"]["listen"]
ansiprint.purple(), ansiprint.end(), listen
)
)
ainformation.append(
"{}VNC port:{} {}".format(
ansiprint.purple(), ansiprint.end(), domain_information["vnc"]["port"]
ansiprint.purple(), ansiprint.end(), port
)
)
if long_output is True:
if long_output:
# Virtualization information
ainformation.append("")
ainformation.append(
@ -1665,12 +1735,18 @@ def format_info(config, domain_information, long_output):
)
)
if not domain_information.get("node_selector"):
if (
not domain_information.get("node_selector")
or domain_information.get("node_selector") == "None"
):
formatted_node_selector = "Default"
else:
formatted_node_selector = str(domain_information["node_selector"]).title()
if not domain_information.get("node_limit"):
if (
not domain_information.get("node_limit")
or domain_information.get("node_limit") == "None"
):
formatted_node_limit = "Any"
else:
formatted_node_limit = ", ".join(domain_information["node_limit"])
@ -1682,7 +1758,10 @@ def format_info(config, domain_information, long_output):
autostart_colour = ansiprint.green()
formatted_node_autostart = "True"
if not domain_information.get("migration_method"):
if (
not domain_information.get("migration_method")
or domain_information.get("migration_method") == "None"
):
formatted_migration_method = "Live, Shutdown"
else:
formatted_migration_method = (
@ -1780,6 +1859,78 @@ def format_info(config, domain_information, long_output):
)
)
# Snapshot list
snapshots_name_length = 5
snapshots_age_length = 4
snapshots_xml_changes_length = 12
for snapshot in domain_information.get("snapshots", list()):
xml_diff_plus = 0
xml_diff_minus = 0
for line in snapshot["xml_diff_lines"]:
if re.match(r"^\+ ", line):
xml_diff_plus += 1
elif re.match(r"^- ", line):
xml_diff_minus += 1
xml_diff_counts = f"+{xml_diff_plus}/-{xml_diff_minus}"
_snapshots_name_length = len(snapshot["name"]) + 1
if _snapshots_name_length > snapshots_name_length:
snapshots_name_length = _snapshots_name_length
_snapshots_age_length = len(snapshot["age"]) + 1
if _snapshots_age_length > snapshots_age_length:
snapshots_age_length = _snapshots_age_length
_snapshots_xml_changes_length = len(xml_diff_counts) + 1
if _snapshots_xml_changes_length > snapshots_xml_changes_length:
snapshots_xml_changes_length = _snapshots_xml_changes_length
if len(domain_information.get("snapshots", list())) > 0:
ainformation.append("")
ainformation.append(
"{purple}Snapshots:{end} {bold}{snapshots_name: <{snapshots_name_length}} {snapshots_age: <{snapshots_age_length}} {snapshots_xml_changes: <{snapshots_xml_changes_length}}{end}".format(
purple=ansiprint.purple(),
bold=ansiprint.bold(),
end=ansiprint.end(),
snapshots_name_length=snapshots_name_length,
snapshots_age_length=snapshots_age_length,
snapshots_xml_changes_length=snapshots_xml_changes_length,
snapshots_name="Name",
snapshots_age="Age",
snapshots_xml_changes="XML Changes",
)
)
for snapshot in domain_information.get("snapshots", list()):
xml_diff_plus = 0
xml_diff_minus = 0
for line in snapshot["xml_diff_lines"]:
if re.match(r"^\+ ", line):
xml_diff_plus += 1
elif re.match(r"^- ", line):
xml_diff_minus += 1
xml_diff_counts = f"{ansiprint.green()}+{xml_diff_plus}{ansiprint.end()}/{ansiprint.red()}-{xml_diff_minus}{ansiprint.end()}"
ainformation.append(
" {snapshots_name: <{snapshots_name_length}} {snapshots_age: <{snapshots_age_length}} {snapshots_xml_changes: <{snapshots_xml_changes_length}}{end}".format(
snapshots_name_length=snapshots_name_length,
snapshots_age_length=snapshots_age_length,
snapshots_xml_changes_length=snapshots_xml_changes_length,
snapshots_name=snapshot["name"],
snapshots_age=snapshot["age"],
snapshots_xml_changes=xml_diff_counts,
end=ansiprint.end(),
)
)
else:
ainformation.append("")
ainformation.append(
"{purple}Snapshots:{end} N/A".format(
purple=ansiprint.purple(),
end=ansiprint.end(),
)
)
# Network list
net_list = []
cluster_net_list = call_api(config, "get", "/network").json()
@ -1806,7 +1957,7 @@ def format_info(config, domain_information, long_output):
)
)
if long_output is True:
if long_output:
# Disk list
ainformation.append("")
name_length = 0
@ -1942,6 +2093,7 @@ def format_list(config, vm_list):
vm_name_length = 5
vm_state_length = 6
vm_tags_length = 5
vm_snapshots_length = 10
vm_nets_length = 9
vm_ram_length = 8
vm_vcpu_length = 6
@ -1962,6 +2114,12 @@ def format_list(config, vm_list):
_vm_tags_length = len(",".join(tag_list)) + 1
if _vm_tags_length > vm_tags_length:
vm_tags_length = _vm_tags_length
# vm_snapshots column
_vm_snapshots_length = (
len(str(len(domain_information.get("snapshots", list())))) + 1
)
if _vm_snapshots_length > vm_snapshots_length:
vm_snapshots_length = _vm_snapshots_length
# vm_nets column
_vm_nets_length = len(",".join(net_list)) + 1
if _vm_nets_length > vm_nets_length:
@ -1978,7 +2136,11 @@ def format_list(config, vm_list):
# Format the string (header)
vm_list_output.append(
"{bold}{vm_header: <{vm_header_length}} {resource_header: <{resource_header_length}} {node_header: <{node_header_length}}{end_bold}".format(
vm_header_length=vm_name_length + vm_state_length + vm_tags_length + 2,
vm_header_length=vm_name_length
+ vm_state_length
+ vm_tags_length
+ vm_snapshots_length
+ 3,
resource_header_length=vm_nets_length + vm_ram_length + vm_vcpu_length + 2,
node_header_length=vm_node_length + vm_migrated_length + 1,
bold=ansiprint.bold(),
@ -1988,7 +2150,12 @@ def format_list(config, vm_list):
[
"-"
for _ in range(
4, vm_name_length + vm_state_length + vm_tags_length + 1
4,
vm_name_length
+ vm_state_length
+ vm_tags_length
+ +vm_snapshots_length
+ 2,
)
]
),
@ -2010,6 +2177,7 @@ def format_list(config, vm_list):
"{bold}{vm_name: <{vm_name_length}} \
{vm_state_colour}{vm_state: <{vm_state_length}}{end_colour} \
{vm_tags: <{vm_tags_length}} \
{vm_snapshots: <{vm_snapshots_length}} \
{vm_networks: <{vm_nets_length}} \
{vm_memory: <{vm_ram_length}} {vm_vcpu: <{vm_vcpu_length}} \
{vm_node: <{vm_node_length}} \
@ -2017,6 +2185,7 @@ def format_list(config, vm_list):
vm_name_length=vm_name_length,
vm_state_length=vm_state_length,
vm_tags_length=vm_tags_length,
vm_snapshots_length=vm_snapshots_length,
vm_nets_length=vm_nets_length,
vm_ram_length=vm_ram_length,
vm_vcpu_length=vm_vcpu_length,
@ -2029,6 +2198,7 @@ def format_list(config, vm_list):
vm_name="Name",
vm_state="State",
vm_tags="Tags",
vm_snapshots="Snapshots",
vm_networks="Networks",
vm_memory="RAM (M)",
vm_vcpu="vCPUs",
@ -2095,6 +2265,7 @@ def format_list(config, vm_list):
"{bold}{vm_name: <{vm_name_length}} \
{vm_state_colour}{vm_state: <{vm_state_length}}{end_colour} \
{vm_tags: <{vm_tags_length}} \
{vm_snapshots: <{vm_snapshots_length}} \
{vm_networks: <{vm_nets_length}} \
{vm_memory: <{vm_ram_length}} {vm_vcpu: <{vm_vcpu_length}} \
{vm_node: <{vm_node_length}} \
@ -2102,6 +2273,7 @@ def format_list(config, vm_list):
vm_name_length=vm_name_length,
vm_state_length=vm_state_length,
vm_tags_length=vm_tags_length,
vm_snapshots_length=vm_snapshots_length,
vm_nets_length=vm_nets_length,
vm_ram_length=vm_ram_length,
vm_vcpu_length=vm_vcpu_length,
@ -2114,6 +2286,7 @@ def format_list(config, vm_list):
vm_name=domain_information["name"],
vm_state=domain_information["state"],
vm_tags=",".join(tag_list),
vm_snapshots=len(domain_information.get("snapshots", list())),
vm_networks=",".join(net_string_list),
vm_memory=domain_information["memory"],
vm_vcpu=domain_information["vcpu"],

View File

@ -28,6 +28,7 @@ from json import loads
from re import match as re_match
from re import split as re_split
from re import sub as re_sub
from difflib import unified_diff
from distutils.util import strtobool
from threading import Thread
from shlex import split as shlex_split
@ -427,6 +428,96 @@ def getDomainTags(zkhandler, dom_uuid):
return tags
#
# Get a list of domain snapshots
#
def getDomainSnapshots(zkhandler, dom_uuid):
"""
Get a list of snapshots for domain dom_uuid
The UUID must be validated before calling this function!
"""
snapshots = list()
all_snapshots = zkhandler.children(("domain.snapshots", dom_uuid))
current_timestamp = time.time()
current_dom_xml = zkhandler.read(("domain.xml", dom_uuid))
snapshots = list()
for snapshot in all_snapshots:
(
snap_name,
snap_timestamp,
_snap_rbd_snapshots,
snap_dom_xml,
) = zkhandler.read_many(
[
("domain.snapshots", dom_uuid, "domain_snapshot.name", snapshot),
("domain.snapshots", dom_uuid, "domain_snapshot.timestamp", snapshot),
(
"domain.snapshots",
dom_uuid,
"domain_snapshot.rbd_snapshots",
snapshot,
),
("domain.snapshots", dom_uuid, "domain_snapshot.xml", snapshot),
]
)
snap_rbd_snapshots = _snap_rbd_snapshots.split(",")
snap_dom_xml_diff = list(
unified_diff(
current_dom_xml.split("\n"),
snap_dom_xml.split("\n"),
fromfile="current",
tofile="snapshot",
fromfiledate="",
tofiledate="",
n=1,
lineterm="",
)
)
_snap_timestamp = float(snap_timestamp)
snap_age_secs = int(current_timestamp) - int(_snap_timestamp)
snap_age = f"{snap_age_secs} seconds"
snap_age_minutes = int(snap_age_secs / 60)
if snap_age_minutes > 0:
if snap_age_minutes > 1:
s = "s"
else:
s = ""
snap_age = f"{snap_age_minutes} minute{s}"
snap_age_hours = int(snap_age_secs / 3600)
if snap_age_hours > 0:
if snap_age_hours > 1:
s = "s"
else:
s = ""
snap_age = f"{snap_age_hours} hour{s}"
snap_age_days = int(snap_age_secs / 86400)
if snap_age_days > 0:
if snap_age_days > 1:
s = "s"
else:
s = ""
snap_age = f"{snap_age_days} day{s}"
snapshots.append(
{
"name": snap_name,
"timestamp": snap_timestamp,
"age": snap_age,
"xml_diff_lines": snap_dom_xml_diff,
"rbd_snapshots": snap_rbd_snapshots,
}
)
return sorted(snapshots, key=lambda s: s["timestamp"], reverse=True)
#
# Get a set of domain metadata
#
@ -515,6 +606,7 @@ def getInformationFromXML(zkhandler, uuid):
) = getDomainMetadata(zkhandler, uuid)
domain_tags = getDomainTags(zkhandler, uuid)
domain_snapshots = getDomainSnapshots(zkhandler, uuid)
if domain_vnc:
domain_vnc_listen, domain_vnc_port = domain_vnc.split(":")
@ -574,6 +666,7 @@ def getInformationFromXML(zkhandler, uuid):
"migration_method": domain_migration_method,
"migration_max_downtime": int(domain_migration_max_downtime),
"tags": domain_tags,
"snapshots": domain_snapshots,
"description": domain_description,
"profile": domain_profile,
"memory": int(domain_memory),

View File

@ -0,0 +1 @@
{"version": "14", "root": "", "base": {"root": "", "schema": "/schema", "schema.version": "/schema/version", "config": "/config", "config.maintenance": "/config/maintenance", "config.primary_node": "/config/primary_node", "config.primary_node.sync_lock": "/config/primary_node/sync_lock", "config.upstream_ip": "/config/upstream_ip", "config.migration_target_selector": "/config/migration_target_selector", "logs": "/logs", "faults": "/faults", "node": "/nodes", "domain": "/domains", "network": "/networks", "storage": "/ceph", "storage.health": "/ceph/health", "storage.util": "/ceph/util", "osd": "/ceph/osds", "pool": "/ceph/pools", "volume": "/ceph/volumes", "snapshot": "/ceph/snapshots"}, "logs": {"node": "", "messages": "/messages"}, "faults": {"id": "", "last_time": "/last_time", "first_time": "/first_time", "ack_time": "/ack_time", "status": "/status", "delta": "/delta", "message": "/message"}, "node": {"name": "", "keepalive": "/keepalive", "mode": "/daemonmode", "data.active_schema": "/activeschema", "data.latest_schema": "/latestschema", "data.static": "/staticdata", "data.pvc_version": "/pvcversion", "running_domains": "/runningdomains", "count.provisioned_domains": "/domainscount", "count.networks": "/networkscount", "state.daemon": "/daemonstate", "state.router": "/routerstate", "state.domain": "/domainstate", "cpu.load": "/cpuload", "vcpu.allocated": "/vcpualloc", "memory.total": "/memtotal", "memory.used": "/memused", "memory.free": "/memfree", "memory.allocated": "/memalloc", "memory.provisioned": "/memprov", "ipmi.hostname": "/ipmihostname", "ipmi.username": "/ipmiusername", "ipmi.password": "/ipmipassword", "sriov": "/sriov", "sriov.pf": "/sriov/pf", "sriov.vf": "/sriov/vf", "monitoring.plugins": "/monitoring_plugins", "monitoring.data": "/monitoring_data", "monitoring.health": "/monitoring_health", "network.stats": "/network_stats"}, "monitoring_plugin": {"name": "", "last_run": "/last_run", "health_delta": "/health_delta", "message": "/message", "data": "/data", "runtime": "/runtime"}, "sriov_pf": {"phy": "", "mtu": "/mtu", "vfcount": "/vfcount"}, "sriov_vf": {"phy": "", "pf": "/pf", "mtu": "/mtu", "mac": "/mac", "phy_mac": "/phy_mac", "config": "/config", "config.vlan_id": "/config/vlan_id", "config.vlan_qos": "/config/vlan_qos", "config.tx_rate_min": "/config/tx_rate_min", "config.tx_rate_max": "/config/tx_rate_max", "config.spoof_check": "/config/spoof_check", "config.link_state": "/config/link_state", "config.trust": "/config/trust", "config.query_rss": "/config/query_rss", "pci": "/pci", "pci.domain": "/pci/domain", "pci.bus": "/pci/bus", "pci.slot": "/pci/slot", "pci.function": "/pci/function", "used": "/used", "used_by": "/used_by"}, "domain": {"name": "", "xml": "/xml", "state": "/state", "profile": "/profile", "stats": "/stats", "node": "/node", "last_node": "/lastnode", "failed_reason": "/failedreason", "storage.volumes": "/rbdlist", "console.log": "/consolelog", "console.vnc": "/vnc", "meta.autostart": "/node_autostart", "meta.migrate_method": "/migration_method", "meta.migrate_max_downtime": "/migration_max_downtime", "meta.node_selector": "/node_selector", "meta.node_limit": "/node_limit", "meta.tags": "/tags", "migrate.sync_lock": "/migrate_sync_lock", "snapshots": "/snapshots"}, "tag": {"name": "", "type": "/type", "protected": "/protected"}, "domain_snapshot": {"name": "", "timestamp": "/timestamp", "xml": "/xml", "rbd_snapshots": "/rbdsnaplist"}, "network": {"vni": "", "type": "/nettype", "mtu": "/mtu", "rule": "/firewall_rules", "rule.in": "/firewall_rules/in", "rule.out": "/firewall_rules/out", "nameservers": "/name_servers", "domain": "/domain", "reservation": "/dhcp4_reservations", "lease": "/dhcp4_leases", "ip4.gateway": "/ip4_gateway", "ip4.network": "/ip4_network", "ip4.dhcp": "/dhcp4_flag", "ip4.dhcp_start": "/dhcp4_start", "ip4.dhcp_end": "/dhcp4_end", "ip6.gateway": "/ip6_gateway", "ip6.network": "/ip6_network", "ip6.dhcp": "/dhcp6_flag"}, "reservation": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname"}, "lease": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname", "expiry": "/expiry", "client_id": "/clientid"}, "rule": {"description": "", "rule": "/rule", "order": "/order"}, "osd": {"id": "", "node": "/node", "device": "/device", "db_device": "/db_device", "fsid": "/fsid", "ofsid": "/fsid/osd", "cfsid": "/fsid/cluster", "lvm": "/lvm", "vg": "/lvm/vg", "lv": "/lvm/lv", "is_split": "/is_split", "stats": "/stats"}, "pool": {"name": "", "pgs": "/pgs", "tier": "/tier", "stats": "/stats"}, "volume": {"name": "", "stats": "/stats"}, "snapshot": {"name": "", "stats": "/stats"}}

View File

@ -155,10 +155,37 @@ def define_vm(
# Parse the XML data
try:
parsed_xml = lxml.objectify.fromstring(config_data)
except Exception:
return False, "ERROR: Failed to parse XML data."
dom_uuid = parsed_xml.uuid.text
dom_name = parsed_xml.name.text
except Exception as e:
return False, f"ERROR: Failed to parse XML data: {e}"
# Extract the required items from the XML document and error if not valid
next_field = 0
next_map = {
0: "uuid",
1: "name",
2: "memory",
3: "vcpu",
4: "networks",
5: "disks",
}
try:
dom_uuid = parsed_xml.uuid.text
next_field += 1
dom_name = parsed_xml.name.text
next_field += 1
parsed_memory = int(parsed_xml.memory.text)
next_field += 1
parsed_vcpu = int(parsed_xml.vcpu.text)
next_field += 1
dnetworks = common.getDomainNetworks(parsed_xml, {})
next_field += 1
ddisks = common.getDomainDisks(parsed_xml, {})
next_field += 1
except Exception as e:
return (
False,
f'ERROR: Failed to parse XML data: field data for "{next_map[next_field]}" is not valid: {e}',
)
# Ensure that the UUID and name are unique
if searchClusterByUUID(zkhandler, dom_uuid) or searchClusterByName(
@ -181,26 +208,25 @@ def define_vm(
# Validate the new RAM against the current active node
node_total_memory = int(zkhandler.read(("node.memory.total", target_node)))
if int(parsed_xml.memory.text) >= node_total_memory:
if parsed_memory >= node_total_memory:
return (
False,
'ERROR: VM configuration specifies more memory ({} MiB) than node "{}" has available ({} MiB).'.format(
parsed_xml.memory.text, target_node, node_total_memory
parsed_memory, target_node, node_total_memory
),
)
# Validate the number of vCPUs against the current active node
node_total_cpus = int(zkhandler.read(("node.data.static", target_node)).split()[0])
if (node_total_cpus - 2) <= int(parsed_xml.vcpu.text):
if parsed_vcpu >= (node_total_cpus - 2):
return (
False,
'ERROR: VM configuration specifies more vCPUs ({}) than node "{}" has available ({} minus 2).'.format(
parsed_xml.vcpu.text, target_node, node_total_cpus
parsed_vcpu, target_node, node_total_cpus
),
)
# If a SR-IOV network device is being added, set its used state
dnetworks = common.getDomainNetworks(parsed_xml, {})
for network in dnetworks:
if network["type"] in ["direct", "hostdev"]:
dom_node = zkhandler.read(("domain.node", dom_uuid))
@ -239,7 +265,6 @@ def define_vm(
)
# Obtain the RBD disk list using the common functions
ddisks = common.getDomainDisks(parsed_xml, {})
rbd_list = []
for disk in ddisks:
if disk["type"] == "rbd":
@ -281,6 +306,7 @@ def define_vm(
(("domain.meta.node_selector", dom_uuid), str(node_selector).lower()),
(("domain.meta.tags", dom_uuid), ""),
(("domain.migrate.sync_lock", dom_uuid), ""),
(("domain.snapshots", dom_uuid), ""),
]
)
@ -404,6 +430,35 @@ def modify_vm(zkhandler, domain, restart, new_vm_config):
except Exception:
return False, "ERROR: Failed to parse new XML data."
# Extract the required items from the XML document and error if not valid
next_field = 0
next_map = {
0: "uuid",
1: "name",
2: "memory",
3: "vcpu",
4: "networks",
5: "disks",
}
try:
dom_uuid = parsed_xml.uuid.text
next_field += 1
dom_name = parsed_xml.name.text
next_field += 1
parsed_memory = int(parsed_xml.memory.text)
next_field += 1
parsed_vcpu = int(parsed_xml.vcpu.text)
next_field += 1
dnetworks = common.getDomainNetworks(parsed_xml, {})
next_field += 1
ddisks = common.getDomainDisks(parsed_xml, {})
next_field += 1
except Exception as e:
return (
False,
f'ERROR: Failed to parse XML data: field data for "{next_map[next_field]}" is not valid: {e}',
)
# Get our old network list for comparison purposes
old_vm_config = zkhandler.read(("domain.xml", dom_uuid))
old_parsed_xml = lxml.objectify.fromstring(old_vm_config)
@ -412,26 +467,25 @@ def modify_vm(zkhandler, domain, restart, new_vm_config):
# Validate the new RAM against the current active node
node_name = zkhandler.read(("domain.node", dom_uuid))
node_total_memory = int(zkhandler.read(("node.memory.total", node_name)))
if int(parsed_xml.memory.text) >= node_total_memory:
if parsed_memory >= node_total_memory:
return (
False,
'ERROR: Updated VM configuration specifies more memory ({} MiB) than node "{}" has available ({} MiB).'.format(
parsed_xml.memory.text, node_name, node_total_memory
parsed_memory, node_name, node_total_memory
),
)
# Validate the number of vCPUs against the current active node
node_total_cpus = int(zkhandler.read(("node.data.static", node_name)).split()[0])
if (node_total_cpus - 2) <= int(parsed_xml.vcpu.text):
if parsed_vcpu >= (node_total_cpus - 2):
return (
False,
'ERROR: Updated VM configuration specifies more vCPUs ({}) than node "{}" has available ({} minus 2).'.format(
parsed_xml.vcpu.text, node_name, node_total_cpus
parsed_vcpu, node_name, node_total_cpus
),
)
# If a SR-IOV network device is being added, set its used state
dnetworks = common.getDomainNetworks(parsed_xml, {})
for network in dnetworks:
# Ignore networks that are already there
if network["source"] in [net["source"] for net in old_dnetworks]:
@ -482,7 +536,6 @@ def modify_vm(zkhandler, domain, restart, new_vm_config):
unset_sriov_vf_vm(zkhandler, dom_node, network["source"])
# Obtain the RBD disk list using the common functions
ddisks = common.getDomainDisks(parsed_xml, {})
rbd_list = []
for disk in ddisks:
if disk["type"] == "rbd":
@ -754,7 +807,15 @@ def update_vm_sriov_nics(zkhandler, dom_uuid, source_node, target_node):
# Update all the SR-IOV device states on both nodes, used during migrations but called by the node-side
vm_config = zkhandler.read(("domain.xml", dom_uuid))
parsed_xml = lxml.objectify.fromstring(vm_config)
dnetworks = common.getDomainNetworks(parsed_xml, {})
# Extract the required items from the XML document and error if not valid
try:
dnetworks = common.getDomainNetworks(parsed_xml, {})
except Exception as e:
return (
False,
f'ERROR: Failed to parse XML data: field data for "networks" is not valid: {e}',
)
retcode = True
retmsg = ""
for network in dnetworks:
@ -1185,6 +1246,222 @@ def get_list(
return True, sorted(vm_data_list, key=lambda d: d["name"])
#
# VM Snapshot Tasks
#
def create_vm_snapshot(zkhandler, domain, snapshot_name=None):
# Validate that VM exists in cluster
dom_uuid = getDomainUUID(zkhandler, domain)
if not dom_uuid:
return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain)
if snapshot_name is None:
now = datetime.now()
snapshot_name = now.strftime("%Y%m%d%H%M%S")
else:
reg = re.compile("^[a-z0-9.-_]+$")
if not reg.match(snapshot_name):
return (
False,
f'ERROR: Snapshot name "{snapshot_name}" contains invalid characters; only alphanumeric, ".", "-", and "_" characters are allowed!',
)
current_snapshots = zkhandler.children(("domain.snapshots", dom_uuid))
if current_snapshots and snapshot_name in current_snapshots:
return (
False,
f'ERROR: Snapshot name "{snapshot_name}" already exists for VM "{domain}"!',
)
tstart = time.time()
# Get the list of all RBD volumes
rbd_list = zkhandler.read(("domain.storage.volumes", dom_uuid)).split(",")
snap_list = list()
# If a snapshot fails, clean up any snapshots that were successfuly created
def cleanup_failure():
for snapshot in snap_list:
rbd, snapshot_name = snapshot.split("@")
pool, volume = rbd.split("/")
# We capture no output here, because if this fails too we're in a deep
# error chain and will just ignore it
ceph.remove_snapshot(zkhandler, pool, volume, snapshot_name)
# Iterrate through and create a snapshot for each RBD volume
for rbd in rbd_list:
pool, volume = rbd.split("/")
ret, msg = ceph.add_snapshot(zkhandler, pool, volume, snapshot_name)
if not ret:
cleanup_failure()
return False, msg
else:
snap_list.append(f"{pool}/{volume}@{snapshot_name}")
# Get the current domain XML
vm_config = zkhandler.read(("domain.xml", dom_uuid))
# Add the snapshot entry to Zookeeper
zkhandler.write(
[
(
(
"domain.snapshots",
dom_uuid,
"domain_snapshot.name",
snapshot_name,
),
snapshot_name,
),
(
(
"domain.snapshots",
dom_uuid,
"domain_snapshot.timestamp",
snapshot_name,
),
tstart,
),
(
(
"domain.snapshots",
dom_uuid,
"domain_snapshot.xml",
snapshot_name,
),
vm_config,
),
(
(
"domain.snapshots",
dom_uuid,
"domain_snapshot.rbd_snapshots",
snapshot_name,
),
",".join(snap_list),
),
]
)
tend = time.time()
ttot = round(tend - tstart, 2)
return (
True,
f'Successfully created snapshot "{snapshot_name}" of VM "{domain}" in {ttot}s.',
)
def remove_vm_snapshot(zkhandler, domain, snapshot_name):
# Validate that VM exists in cluster
dom_uuid = getDomainUUID(zkhandler, domain)
if not dom_uuid:
return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain)
if not zkhandler.exists(
("domain.snapshots", dom_uuid, "domain_snapshot.name", snapshot_name)
):
return (
False,
f'ERROR: Could not find snapshot "{snapshot_name}" of VM "{domain}"!',
)
tstart = time.time()
_snapshots = zkhandler.read(
("domain.snapshots", dom_uuid, "domain_snapshot.rbd_snapshots", snapshot_name)
)
rbd_snapshots = _snapshots.split(",")
for snap in rbd_snapshots:
rbd, name = snap.split("@")
pool, volume = rbd.split("/")
ret, msg = ceph.remove_snapshot(zkhandler, pool, volume, name)
if not ret:
return False, msg
ret = zkhandler.delete(
("domain.snapshots", dom_uuid, "domain_snapshot.name", snapshot_name)
)
if not ret:
return (
False,
f'ERROR: Failed to delete snapshot "{snapshot_name}" of VM "{domain}" in Zookeeper.',
)
tend = time.time()
ttot = round(tend - tstart, 2)
return (
True,
f'Successfully removed snapshot "{snapshot_name}" of VM "{domain}" in {ttot}s.',
)
def rollback_vm_snapshot(zkhandler, domain, snapshot_name):
# Validate that VM exists in cluster
dom_uuid = getDomainUUID(zkhandler, domain)
if not dom_uuid:
return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain)
# Verify that the VM is in a stopped state; renaming is not supported otherwise
state = zkhandler.read(("domain.state", dom_uuid))
if state not in ["stop", "disable"]:
return (
False,
'ERROR: VM "{}" is not in stopped state; VMs cannot be rolled back while running.'.format(
domain
),
)
# Verify that the snapshot exists
if not zkhandler.exists(
("domain.snapshots", dom_uuid, "domain_snapshot.name", snapshot_name)
):
return (
False,
f'ERROR: Could not find snapshot "{snapshot_name}" of VM "{domain}"!',
)
tstart = time.time()
_snapshots = zkhandler.read(
("domain.snapshots", dom_uuid, "domain_snapshot.rbd_snapshots", snapshot_name)
)
rbd_snapshots = _snapshots.split(",")
for snap in rbd_snapshots:
rbd, name = snap.split("@")
pool, volume = rbd.split("/")
ret, msg = ceph.rollback_snapshot(zkhandler, pool, volume, name)
if not ret:
return False, msg
# Get the snapshot domain XML
vm_config = zkhandler.read(
("domain.snapshots", dom_uuid, "domain_snapshot.xml", snapshot_name)
)
# Write the restored config to the main XML config
zkhandler.write(
[
(
(
"domain.xml",
dom_uuid,
),
vm_config,
),
]
)
tend = time.time()
ttot = round(tend - tstart, 2)
return (
True,
f'Successfully rolled back to snapshot "{snapshot_name}" of VM "{domain}" in {ttot}s.',
)
#
# VM Backup Tasks
#
def backup_vm(
zkhandler, domain, backup_path, incremental_parent=None, retain_snapshot=False
):

View File

@ -258,6 +258,13 @@ def worker_create_vm(
args = (vm_profile,)
db_cur.execute(query, args)
profile_data = db_cur.fetchone()
if profile_data is None:
fail(
celery,
f'Provisioner profile "{vm_profile}" is not present on the cluster',
exception=ClusterError,
)
if profile_data.get("arguments"):
vm_data["script_arguments"] = profile_data.get("arguments").split("|")
else:

View File

@ -573,7 +573,7 @@ class ZKHandler(object):
#
class ZKSchema(object):
# Current version
_version = 13
_version = 14
# Root for doing nested keys
_schema_root = ""
@ -713,13 +713,21 @@ class ZKSchema(object):
"meta.node_limit": "/node_limit",
"meta.tags": "/tags",
"migrate.sync_lock": "/migrate_sync_lock",
"snapshots": "/snapshots",
},
# The schema of an individual domain tag entry (/domains/{domain}/tags/{tag})
"tag": {
"name": "",
"name": "", # The root key
"type": "/type",
"protected": "/protected",
}, # The root key
},
# The schema of an individual domain snapshot entry (/domains/{domain}/snapshots/{snapshot})
"domain_snapshot": {
"name": "", # The root key
"timestamp": "/timestamp",
"xml": "/xml",
"rbd_snapshots": "/rbdsnaplist",
},
# The schema of an individual network entry (/networks/{vni})
"network": {
"vni": "", # The root key

View File

@ -69,26 +69,33 @@ class MonitoringPluginScript(MonitoringPlugin):
# Run any imports first
from daemon_lib.common import run_os_command
from time import sleep
# Check the node's IPMI interface
ipmi_hostname = self.config["ipmi_hostname"]
ipmi_username = self.config["ipmi_username"]
ipmi_password = self.config["ipmi_password"]
retcode, _, _ = run_os_command(
f"/usr/bin/ipmitool -I lanplus -H {ipmi_hostname} -U {ipmi_username} -P {ipmi_password} chassis power status",
timeout=5
)
retcode = 1
trycount = 0
while retcode > 0 and trycount < 3:
retcode, _, _ = run_os_command(
f"/usr/bin/ipmitool -I lanplus -H {ipmi_hostname} -U {ipmi_username} -P {ipmi_password} chassis power status",
timeout=2
)
trycount += 1
if retcode > 0 and trycount < 3:
sleep(trycount)
if retcode > 0:
# Set the health delta to 10 (subtract 10 from the total of 100)
health_delta = 10
# Craft a message that can be used by the clients
message = f"IPMI via {ipmi_username}@{ipmi_hostname} is NOT responding"
message = f"IPMI via {ipmi_username}@{ipmi_hostname} is NOT responding after 3 attempts"
else:
# Set the health delta to 0 (no change)
health_delta = 0
# Craft a message that can be used by the clients
message = f"IPMI via {ipmi_username}@{ipmi_hostname} is responding"
message = f"IPMI via {ipmi_username}@{ipmi_hostname} is responding after {trycount} attempts"
# Set the health delta in our local PluginResult object
self.plugin_result.set_health_delta(health_delta)

View File

@ -157,7 +157,9 @@ def collect_ceph_stats(logger, config, zkhandler, this_node, queue):
1
].decode("ascii")
try:
ceph_pool_df_raw = json.loads(ceph_df_output)["pools"]
ceph_pool_df_raw = sorted(
json.loads(ceph_df_output)["pools"], key=lambda x: x["name"]
)
except Exception as e:
logger.out("Failed to obtain Pool data (ceph df): {}".format(e), state="w")
ceph_pool_df_raw = []
@ -166,7 +168,9 @@ def collect_ceph_stats(logger, config, zkhandler, this_node, queue):
"rados df --format json", timeout=1
)
try:
rados_pool_df_raw = json.loads(stdout)["pools"]
rados_pool_df_raw = sorted(
json.loads(stdout)["pools"], key=lambda x: x["name"]
)
except Exception as e:
logger.out("Failed to obtain Pool data (rados df): {}".format(e), state="w")
rados_pool_df_raw = []