Compare commits
297 Commits
v0.9.58
...
8df189aa22
Author | SHA1 | Date | |
---|---|---|---|
8df189aa22 | |||
af436a93cc | |||
edb3aea990 | |||
4d786c11e3 | |||
25f3faa08f | |||
3ad6ff2d9c | |||
c7c47d9f86 | |||
3c5a5f08bc | |||
59b2dbeb5e | |||
0b8d26081b | |||
f076554b15 | |||
35f5219916 | |||
f7eaa11a5f | |||
924a0b22ec | |||
6a5f54d169 | |||
7741400370 | |||
5eafa475b9 | |||
f3ba4b6294 | |||
faf9cc537f | |||
a28df75a5d | |||
13dab7a285 | |||
f89dbe802e | |||
d63e80675a | |||
263f3570ab | |||
90f9336041 | |||
5415985ed2 | |||
3384f24ef5 | |||
ef3c22d793 | |||
078f85b431 | |||
bfb363c459 | |||
13e6a0f0bd | |||
c1302cf8b6 | |||
9358949991 | |||
cd0b8c23e6 | |||
fb30263a41 | |||
172e3627d4 | |||
53ffe6cd55 | |||
df6e11ae7a | |||
de2135db42 | |||
72e093c2c4 | |||
60e32f7795 | |||
23e7d84f53 | |||
dd81594f26 | |||
0d09f5d089 | |||
365c70e873 | |||
4f7e2fe146 | |||
77f49654b9 | |||
c158e4e0f5 | |||
31a5c8801f | |||
0a4e4c7048 | |||
de97f2f476 | |||
165ce15dfe | |||
a81d419a2e | |||
85a7088e5a | |||
b58fa06f67 | |||
3b3d2e7f7e | |||
72a5de800c | |||
f450d1d313 | |||
2db58488a2 | |||
1bbf8f6bf6 | |||
191f8780c9 | |||
80c1f78864 | |||
c8c0987fe7 | |||
67560c6457 | |||
79c9eba28c | |||
36e924d339 | |||
aeb1443410 | |||
eccd2a98b2 | |||
6e2c1fb45e | |||
b14ba9172c | |||
e9235a627c | |||
c84ee0f4f1 | |||
76c51460b0 | |||
6ed37f5b4a | |||
4b41ee2817 | |||
dc36c40690 | |||
459b16386b | |||
6146b062d6 | |||
74193c7e2a | |||
73c1ac732e | |||
58dd5830eb | |||
90e515c46f | |||
a6a5f71226 | |||
60a3ef1604 | |||
95807b23eb | |||
5ae430e1c5 | |||
4731faa2f0 | |||
42f4907dec | |||
02168a5ecf | |||
8cfcd02ac2 | |||
e464dcb483 | |||
27214c8190 | |||
f78669a175 | |||
00a4a01517 | |||
a40a69816d | |||
baf5a132ff | |||
584cb95b8d | |||
21bbb0393f | |||
d18e009b00 | |||
1f8f3252a6 | |||
b47c9832b7 | |||
d2757004db | |||
7323269775 | |||
85463f9aec | |||
19c37c3ed5 | |||
7d2ea494e7 | |||
cb50eee2a9 | |||
f3f4eaadf1 | |||
313a5d1c7d | |||
b6d689b769 | |||
a0fccf83f7 | |||
46896c593e | |||
02138974fa | |||
c3d255be65 | |||
45fc8a47a3 | |||
07f2006f68 | |||
f4c7fdffb8 | |||
be1b67b8f0 | |||
d68f6a945e | |||
c776aba8b3 | |||
2461941421 | |||
68954a79ec | |||
a2fa6ed450 | |||
02a2f6a27a | |||
a75b951605 | |||
658e80350f | |||
3aa20fbaa3 | |||
6d101df1ff | |||
be6a3992c1 | |||
d76da0f25a | |||
bc722ce9b8 | |||
7890c32c59 | |||
6febcfdd97 | |||
11d8ce70cd | |||
a17d9439c0 | |||
9cd02eb148 | |||
459485c202 | |||
9f92d5d822 | |||
947ac561c8 | |||
ca143c1968 | |||
6e110b178c | |||
d07d37d08e | |||
0639b16c86 | |||
1cf8706a52 | |||
dd8f07526f | |||
5a5e5da663 | |||
739b60b91e | |||
16544227eb | |||
73e3746885 | |||
66230ce971 | |||
fbfbd70461 | |||
2506098223 | |||
83e887c4ee | |||
4eb0f3bb8a | |||
adc767e32f | |||
2083fd824a | |||
3aa74a3940 | |||
71d94bbeab | |||
718f689df9 | |||
268b5c0b86 | |||
b016b9bf3d | |||
7604b9611f | |||
b21278fd80 | |||
3b02034b70 | |||
c7a5b41b1e | |||
48b0091d3e | |||
2e94516ee2 | |||
d7f26b27ea | |||
872f35a7ee | |||
52c3e8ced3 | |||
1d7acf62bf | |||
c790c331a7 | |||
23165482df | |||
057071a7b7 | |||
554fa9f412 | |||
5a5f924268 | |||
cc309fc021 | |||
5f783f1663 | |||
bc89bb5b68 | |||
eb233ef588 | |||
d3efb54cb4 | |||
da15357c8a | |||
b6939a28c0 | |||
a1da479a4c | |||
ace4082820 | |||
4036af6045 | |||
f96de97861 | |||
04cad46305 | |||
e9dea4d2d1 | |||
39fd85fcc3 | |||
cbbab46b55 | |||
d1f2ce0b0a | |||
2f01edca14 | |||
12a3a3a6a6 | |||
c44732be83 | |||
a8b68e0968 | |||
e59152afee | |||
56021c443a | |||
ebdea165f1 | |||
fb0651fb05 | |||
35e7e11403 | |||
b7555468eb | |||
f1b4ee02ba | |||
4698edc98e | |||
40e7e04aad | |||
7f074847c4 | |||
b0b0b75605 | |||
89f62318bd | |||
925141ed65 | |||
f7a826bf52 | |||
e176f3b2f6 | |||
b339d5e641 | |||
d476b13cc0 | |||
ce8b2c22cc | |||
feab5d3479 | |||
ee348593c9 | |||
e403146bcf | |||
bde684dd3a | |||
992e003500 | |||
eaeb860a83 | |||
1198ca9f5c | |||
e79d200244 | |||
5b3bb9f306 | |||
5501586a47 | |||
c160648c5c | |||
fa37227127 | |||
2cac98963c | |||
8e50428707 | |||
a4953bc6ef | |||
3c10d57148 | |||
26d8551388 | |||
57342541dd | |||
50f8afd749 | |||
3449069e3d | |||
cb66b16045 | |||
8edce74b85 | |||
e9b69c4124 | |||
3948206225 | |||
a09578fcf5 | |||
73be807b84 | |||
4a9805578e | |||
f70f052df1 | |||
1e8841ce69 | |||
9c7d39d523 | |||
011490bcca | |||
8de63b2785 | |||
8f8f00b2e9 | |||
1daab49b50 | |||
9f6041b9cf | |||
5b27e438a9 | |||
3e8a85b029 | |||
19ac1e17c3 | |||
252175fb6f | |||
f39b041471 | |||
3b41759262 | |||
e514eed414 | |||
b81e70ec18 | |||
c2a473ed8b | |||
5355f6ff48 | |||
bf7823deb5 | |||
8ba371723e | |||
e10ac52116 | |||
341073521b | |||
16c38da5ef | |||
c8134d3a1c | |||
9f41373324 | |||
8e62d5b30b | |||
7a8eee244a | |||
7df5b8e52e | |||
6f96219023 | |||
51967e164b | |||
7a3a44d47c | |||
44491dd988 | |||
eba142f470 | |||
6cef68d157 | |||
e8caf3369e | |||
3e3776a25b | |||
6e0d0e264e | |||
1855d03a36 | |||
1a286dc8dd | |||
1b6d10e03a | |||
73c96d1e93 | |||
5841c98a59 | |||
bc6395c959 | |||
d582f87472 | |||
e9735113af | |||
722fd0a65d | |||
3b41beb0f3 | |||
d3392c0282 | |||
560c013e95 | |||
384c6320ef | |||
445dec1c38 | |||
534c7cd7f0 | |||
4014ef7714 | |||
180f0445ac | |||
074664d4c1 | |||
418ac23d40 |
14
CHANGELOG.md
14
CHANGELOG.md
@ -1,5 +1,19 @@
|
||||
## PVC Changelog
|
||||
|
||||
###### [v0.9.61](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.61)
|
||||
|
||||
* [provisioner] Fixes a bug in network comparison
|
||||
* [api] Fixes a bug being unable to rename disabled VMs
|
||||
|
||||
###### [v0.9.60](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.60)
|
||||
|
||||
* [Provisioner] Cleans up several remaining bugs in the example scripts; they should all be valid now
|
||||
* [Provisioner] Adjust default libvirt schema to disable RBD caching for a 2x+ performance boost
|
||||
|
||||
###### [v0.9.59](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.59)
|
||||
|
||||
* [API] Flips the mem(prov) and mem(free) selectors making mem(free) the default for "mem" and "memprov" explicit
|
||||
|
||||
###### [v0.9.58](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.58)
|
||||
|
||||
* [API] Fixes a bug where migration selector could have case-sensitive operational faults
|
||||
|
@ -398,7 +398,7 @@ class VMBuilderScript(VMBuilder):
|
||||
if volume.get("source_volume") is not None:
|
||||
continue
|
||||
|
||||
if volume.get("filesystem") is None:
|
||||
if volume.get("filesystem") is None or volume.get("filesystem") == "swap":
|
||||
continue
|
||||
|
||||
mapped_dst_volume = f"/dev/rbd/{dst_volume}"
|
||||
@ -473,7 +473,7 @@ class VMBuilderScript(VMBuilder):
|
||||
]
|
||||
|
||||
# We need to know our root disk for later GRUB-ing
|
||||
root_disk = None
|
||||
root_volume = None
|
||||
for volume in volumes:
|
||||
if volume["mountpoint"] == "/":
|
||||
root_volume = volume
|
||||
@ -725,6 +725,7 @@ GRUB_DISABLE_LINUX_UUID=false
|
||||
if (
|
||||
volume.get("source_volume") is None
|
||||
and volume.get("filesystem") is not None
|
||||
and volume.get("filesystem") != "swap"
|
||||
):
|
||||
# Unmount filesystem
|
||||
retcode, stdout, stderr = pvc_common.run_os_command(
|
||||
|
@ -20,7 +20,7 @@
|
||||
###############################################################################
|
||||
|
||||
# This script provides an example of a PVC provisioner script. It will create a
|
||||
# standard VM config and install a RHEL-like OS using rinse.
|
||||
# standard VM config and install a RHEL 8+ or similar OS using rinse.
|
||||
|
||||
# This script can thus be used as an example or reference implementation of a
|
||||
# PVC provisioner script and expanded upon as required.
|
||||
@ -398,7 +398,7 @@ class VMBuilderScript(VMBuilder):
|
||||
if volume.get("source_volume") is not None:
|
||||
continue
|
||||
|
||||
if volume.get("filesystem") is None:
|
||||
if volume.get("filesystem") is None or volume.get("filesystem") == "swap":
|
||||
continue
|
||||
|
||||
mapped_dst_volume = f"/dev/rbd/{dst_volume}"
|
||||
@ -487,7 +487,7 @@ class VMBuilderScript(VMBuilder):
|
||||
post_packages = ["cloud-init"]
|
||||
|
||||
# We need to know our root disk for later GRUB-ing
|
||||
root_disk = None
|
||||
root_volume = None
|
||||
for volume in volumes:
|
||||
if volume["mountpoint"] == "/":
|
||||
root_volume = volume
|
||||
@ -571,21 +571,6 @@ class VMBuilderScript(VMBuilder):
|
||||
with open(hostname_file, "w") as fh:
|
||||
fh.write("{}".format(vm_name))
|
||||
|
||||
# Fix the cloud-init.target since it's broken by default
|
||||
cloudinit_target_file = "{}/etc/systemd/system/cloud-init.target".format(
|
||||
temporary_directory
|
||||
)
|
||||
with open(cloudinit_target_file, "w") as fh:
|
||||
# We lose our indent on these raw blocks to preserve the apperance of the files
|
||||
# inside the VM itself
|
||||
data = """[Install]
|
||||
WantedBy=multi-user.target
|
||||
[Unit]
|
||||
Description=Cloud-init target
|
||||
After=multi-user.target
|
||||
"""
|
||||
fh.write(data)
|
||||
|
||||
# Due to device ordering within the Libvirt XML configuration, the first Ethernet interface
|
||||
# will always be on PCI bus ID 2, hence the name "ens2".
|
||||
# Write a DHCP stanza for ens2
|
||||
@ -682,11 +667,6 @@ GRUB_SERIAL_COMMAND="serial --speed=115200 --unit=0 --word=8 --parity=no --stop=
|
||||
# Set the timezone to UTC
|
||||
os.system("ln -sf ../usr/share/zoneinfo/UTC /etc/localtime")
|
||||
|
||||
# Unmount the bound devfs and sysfs
|
||||
os.system("umount {}/dev".format(temporary_directory))
|
||||
os.system("umount {}/sys".format(temporary_directory))
|
||||
os.system("umount {}/proc".format(temporary_directory))
|
||||
|
||||
def cleanup(self):
|
||||
"""
|
||||
cleanup(): Perform any cleanup required due to prepare()/install()
|
||||
@ -700,6 +680,7 @@ GRUB_SERIAL_COMMAND="serial --speed=115200 --unit=0 --word=8 --parity=no --stop=
|
||||
"""
|
||||
|
||||
# Run any imports first
|
||||
import os
|
||||
from pvcapid.vmbuilder import open_zk
|
||||
from pvcapid.Daemon import config
|
||||
import daemon_lib.common as pvc_common
|
||||
@ -708,6 +689,11 @@ GRUB_SERIAL_COMMAND="serial --speed=115200 --unit=0 --word=8 --parity=no --stop=
|
||||
# Set the tempdir we used in the prepare() and install() steps
|
||||
temp_dir = "/tmp/target"
|
||||
|
||||
# Unmount the bound devfs and sysfs
|
||||
os.system(f"umount {temp_dir}/dev")
|
||||
os.system(f"umount {temp_dir}/sys")
|
||||
os.system(f"umount {temp_dir}/proc")
|
||||
|
||||
# Use this construct for reversing the list, as the normal reverse() messes with the list
|
||||
for volume in list(reversed(self.vm_data["volumes"])):
|
||||
dst_volume_name = f"{self.vm_name}_{volume['disk_id']}"
|
||||
@ -718,6 +704,7 @@ GRUB_SERIAL_COMMAND="serial --speed=115200 --unit=0 --word=8 --parity=no --stop=
|
||||
if (
|
||||
volume.get("source_volume") is None
|
||||
and volume.get("filesystem") is not None
|
||||
and volume.get("filesystem") != "swap"
|
||||
):
|
||||
# Unmount filesystem
|
||||
retcode, stdout, stderr = pvc_common.run_os_command(
|
||||
@ -728,14 +715,14 @@ GRUB_SERIAL_COMMAND="serial --speed=115200 --unit=0 --word=8 --parity=no --stop=
|
||||
f"Failed to unmount '{mapped_dst_volume}' on '{mount_path}': {stderr}"
|
||||
)
|
||||
|
||||
# Unmap volume
|
||||
with open_zk(config) as zkhandler:
|
||||
success, message = pvc_ceph.unmap_volume(
|
||||
zkhandler,
|
||||
volume["pool"],
|
||||
dst_volume_name,
|
||||
)
|
||||
if not success:
|
||||
raise ProvisioningError(
|
||||
f"Failed to unmap '{mapped_dst_volume}': {stderr}"
|
||||
)
|
||||
# Unmap volume
|
||||
with open_zk(config) as zkhandler:
|
||||
success, message = pvc_ceph.unmap_volume(
|
||||
zkhandler,
|
||||
volume["pool"],
|
||||
dst_volume_name,
|
||||
)
|
||||
if not success:
|
||||
raise ProvisioningError(
|
||||
f"Failed to unmap '{mapped_dst_volume}': {stderr}"
|
||||
)
|
||||
|
@ -27,7 +27,7 @@ from ssl import SSLContext, TLSVersion
|
||||
from distutils.util import strtobool as dustrtobool
|
||||
|
||||
# Daemon version
|
||||
version = "0.9.58"
|
||||
version = "0.9.61"
|
||||
|
||||
# API version
|
||||
API_VERSION = 1.0
|
||||
|
@ -1253,7 +1253,7 @@ class API_VM_Root(Resource):
|
||||
{"name": "node"},
|
||||
{
|
||||
"name": "selector",
|
||||
"choices": ("mem", "memfree", "vcpus", "load", "vms", "none"),
|
||||
"choices": ("mem", "memprov", "vcpus", "load", "vms", "none"),
|
||||
"helptext": "A valid selector must be specified",
|
||||
},
|
||||
{"name": "autostart"},
|
||||
@ -1302,7 +1302,7 @@ class API_VM_Root(Resource):
|
||||
default: none
|
||||
enum:
|
||||
- mem
|
||||
- memfree
|
||||
- memprov
|
||||
- vcpus
|
||||
- load
|
||||
- vms
|
||||
@ -1400,7 +1400,7 @@ class API_VM_Element(Resource):
|
||||
{"name": "node"},
|
||||
{
|
||||
"name": "selector",
|
||||
"choices": ("mem", "memfree", "vcpus", "load", "vms", "none"),
|
||||
"choices": ("mem", "memprov", "vcpus", "load", "vms", "none"),
|
||||
"helptext": "A valid selector must be specified",
|
||||
},
|
||||
{"name": "autostart"},
|
||||
@ -1451,7 +1451,7 @@ class API_VM_Element(Resource):
|
||||
default: none
|
||||
enum:
|
||||
- mem
|
||||
- memfree
|
||||
- memprov
|
||||
- vcpus
|
||||
- load
|
||||
- vms
|
||||
@ -1650,7 +1650,7 @@ class API_VM_Metadata(Resource):
|
||||
{"name": "limit"},
|
||||
{
|
||||
"name": "selector",
|
||||
"choices": ("mem", "memfree", "vcpus", "load", "vms", "none"),
|
||||
"choices": ("mem", "memprov", "vcpus", "load", "vms", "none"),
|
||||
"helptext": "A valid selector must be specified",
|
||||
},
|
||||
{"name": "autostart"},
|
||||
@ -1682,7 +1682,7 @@ class API_VM_Metadata(Resource):
|
||||
description: The selector used to determine candidate nodes during migration; see 'target_selector' in the node daemon configuration reference
|
||||
enum:
|
||||
- mem
|
||||
- memfree
|
||||
- memprov
|
||||
- vcpus
|
||||
- load
|
||||
- vms
|
||||
|
@ -100,7 +100,7 @@ devices_scsi_controller = """ <controller type='scsi' index='0' model='virtio
|
||||
# * vm_name
|
||||
# * disk_id
|
||||
devices_disk_header = """ <disk type='network' device='disk'>
|
||||
<driver name='qemu' discard='unmap'/>
|
||||
<driver name='qemu' discard='unmap' cache='none'/>
|
||||
<target dev='{disk_id}' bus='scsi'/>
|
||||
<auth username='libvirt'>
|
||||
<secret type='ceph' uuid='{ceph_storage_secret}'/>
|
||||
|
@ -580,7 +580,7 @@ def delete_template_network_element(name, vni):
|
||||
networks, code = list_template_network_vnis(name)
|
||||
found_vni = False
|
||||
for network in networks:
|
||||
if network["vni"] == int(vni):
|
||||
if network["vni"] == vni:
|
||||
found_vni = True
|
||||
if not found_vni:
|
||||
retmsg = {
|
||||
|
@ -215,6 +215,19 @@ def node_list(
|
||||
# Output display functions
|
||||
#
|
||||
def getOutputColours(node_information):
|
||||
node_health = node_information.get("health", "N/A")
|
||||
if isinstance(node_health, int):
|
||||
if node_health <= 50:
|
||||
health_colour = ansiprint.red()
|
||||
elif node_health <= 90:
|
||||
health_colour = ansiprint.yellow()
|
||||
elif node_health <= 100:
|
||||
health_colour = ansiprint.green()
|
||||
else:
|
||||
health_colour = ansiprint.blue()
|
||||
else:
|
||||
health_colour = ansiprint.blue()
|
||||
|
||||
if node_information["daemon_state"] == "run":
|
||||
daemon_state_colour = ansiprint.green()
|
||||
elif node_information["daemon_state"] == "stop":
|
||||
@ -251,6 +264,7 @@ def getOutputColours(node_information):
|
||||
mem_provisioned_colour = ""
|
||||
|
||||
return (
|
||||
health_colour,
|
||||
daemon_state_colour,
|
||||
coordinator_state_colour,
|
||||
domain_state_colour,
|
||||
@ -261,6 +275,7 @@ def getOutputColours(node_information):
|
||||
|
||||
def format_info(node_information, long_output):
|
||||
(
|
||||
health_colour,
|
||||
daemon_state_colour,
|
||||
coordinator_state_colour,
|
||||
domain_state_colour,
|
||||
@ -273,14 +288,56 @@ def format_info(node_information, long_output):
|
||||
# Basic information
|
||||
ainformation.append(
|
||||
"{}Name:{} {}".format(
|
||||
ansiprint.purple(), ansiprint.end(), node_information["name"]
|
||||
ansiprint.purple(),
|
||||
ansiprint.end(),
|
||||
node_information["name"],
|
||||
)
|
||||
)
|
||||
ainformation.append(
|
||||
"{}PVC Version:{} {}".format(
|
||||
ansiprint.purple(), ansiprint.end(), node_information["pvc_version"]
|
||||
ansiprint.purple(),
|
||||
ansiprint.end(),
|
||||
node_information["pvc_version"],
|
||||
)
|
||||
)
|
||||
|
||||
node_health = node_information.get("health", "N/A")
|
||||
if isinstance(node_health, int):
|
||||
node_health_text = f"{node_health}%"
|
||||
else:
|
||||
node_health_text = node_health
|
||||
ainformation.append(
|
||||
"{}Health:{} {}{}{}".format(
|
||||
ansiprint.purple(),
|
||||
ansiprint.end(),
|
||||
health_colour,
|
||||
node_health_text,
|
||||
ansiprint.end(),
|
||||
)
|
||||
)
|
||||
|
||||
node_health_details = node_information.get("health_details", [])
|
||||
if long_output:
|
||||
node_health_messages = "\n ".join(
|
||||
[f"{plugin['name']}: {plugin['message']}" for plugin in node_health_details]
|
||||
)
|
||||
else:
|
||||
node_health_messages = "\n ".join(
|
||||
[
|
||||
f"{plugin['name']}: {plugin['message']}"
|
||||
for plugin in node_health_details
|
||||
if int(plugin.get("health_delta", 0)) > 0
|
||||
]
|
||||
)
|
||||
|
||||
if len(node_health_messages) > 0:
|
||||
ainformation.append(
|
||||
"{}Health Plugin Details:{} {}".format(
|
||||
ansiprint.purple(), ansiprint.end(), node_health_messages
|
||||
)
|
||||
)
|
||||
ainformation.append("")
|
||||
|
||||
ainformation.append(
|
||||
"{}Daemon State:{} {}{}{}".format(
|
||||
ansiprint.purple(),
|
||||
@ -308,11 +365,6 @@ def format_info(node_information, long_output):
|
||||
ansiprint.end(),
|
||||
)
|
||||
)
|
||||
ainformation.append(
|
||||
"{}Active VM Count:{} {}".format(
|
||||
ansiprint.purple(), ansiprint.end(), node_information["domains_count"]
|
||||
)
|
||||
)
|
||||
if long_output:
|
||||
ainformation.append("")
|
||||
ainformation.append(
|
||||
@ -331,6 +383,11 @@ def format_info(node_information, long_output):
|
||||
)
|
||||
)
|
||||
ainformation.append("")
|
||||
ainformation.append(
|
||||
"{}Active VM Count:{} {}".format(
|
||||
ansiprint.purple(), ansiprint.end(), node_information["domains_count"]
|
||||
)
|
||||
)
|
||||
ainformation.append(
|
||||
"{}Host CPUs:{} {}".format(
|
||||
ansiprint.purple(), ansiprint.end(), node_information["vcpu"]["total"]
|
||||
@ -397,6 +454,7 @@ def format_list(node_list, raw):
|
||||
# Determine optimal column widths
|
||||
node_name_length = 5
|
||||
pvc_version_length = 8
|
||||
health_length = 7
|
||||
daemon_state_length = 7
|
||||
coordinator_state_length = 12
|
||||
domain_state_length = 7
|
||||
@ -417,6 +475,15 @@ def format_list(node_list, raw):
|
||||
_pvc_version_length = len(node_information.get("pvc_version", "N/A")) + 1
|
||||
if _pvc_version_length > pvc_version_length:
|
||||
pvc_version_length = _pvc_version_length
|
||||
# node_health column
|
||||
node_health = node_information.get("health", "N/A")
|
||||
if isinstance(node_health, int):
|
||||
node_health_text = f"{node_health}%"
|
||||
else:
|
||||
node_health_text = node_health
|
||||
_health_length = len(node_health_text) + 1
|
||||
if _health_length > health_length:
|
||||
health_length = _health_length
|
||||
# daemon_state column
|
||||
_daemon_state_length = len(node_information["daemon_state"]) + 1
|
||||
if _daemon_state_length > daemon_state_length:
|
||||
@ -466,7 +533,10 @@ def format_list(node_list, raw):
|
||||
# Format the string (header)
|
||||
node_list_output.append(
|
||||
"{bold}{node_header: <{node_header_length}} {state_header: <{state_header_length}} {resource_header: <{resource_header_length}} {memory_header: <{memory_header_length}}{end_bold}".format(
|
||||
node_header_length=node_name_length + pvc_version_length + 1,
|
||||
node_header_length=node_name_length
|
||||
+ pvc_version_length
|
||||
+ health_length
|
||||
+ 2,
|
||||
state_header_length=daemon_state_length
|
||||
+ coordinator_state_length
|
||||
+ domain_state_length
|
||||
@ -484,7 +554,14 @@ def format_list(node_list, raw):
|
||||
bold=ansiprint.bold(),
|
||||
end_bold=ansiprint.end(),
|
||||
node_header="Nodes "
|
||||
+ "".join(["-" for _ in range(6, node_name_length + pvc_version_length)]),
|
||||
+ "".join(
|
||||
[
|
||||
"-"
|
||||
for _ in range(
|
||||
6, node_name_length + pvc_version_length + health_length + 1
|
||||
)
|
||||
]
|
||||
),
|
||||
state_header="States "
|
||||
+ "".join(
|
||||
[
|
||||
@ -526,12 +603,13 @@ def format_list(node_list, raw):
|
||||
)
|
||||
|
||||
node_list_output.append(
|
||||
"{bold}{node_name: <{node_name_length}} {node_pvc_version: <{pvc_version_length}} \
|
||||
"{bold}{node_name: <{node_name_length}} {node_pvc_version: <{pvc_version_length}} {node_health: <{health_length}} \
|
||||
{daemon_state_colour}{node_daemon_state: <{daemon_state_length}}{end_colour} {coordinator_state_colour}{node_coordinator_state: <{coordinator_state_length}}{end_colour} {domain_state_colour}{node_domain_state: <{domain_state_length}}{end_colour} \
|
||||
{node_domains_count: <{domains_count_length}} {node_cpu_count: <{cpu_count_length}} {node_load: <{load_length}} \
|
||||
{node_mem_total: <{mem_total_length}} {node_mem_used: <{mem_used_length}} {node_mem_free: <{mem_free_length}} {node_mem_allocated: <{mem_alloc_length}} {node_mem_provisioned: <{mem_prov_length}}{end_bold}".format(
|
||||
node_name_length=node_name_length,
|
||||
pvc_version_length=pvc_version_length,
|
||||
health_length=health_length,
|
||||
daemon_state_length=daemon_state_length,
|
||||
coordinator_state_length=coordinator_state_length,
|
||||
domain_state_length=domain_state_length,
|
||||
@ -551,6 +629,7 @@ def format_list(node_list, raw):
|
||||
end_colour="",
|
||||
node_name="Name",
|
||||
node_pvc_version="Version",
|
||||
node_health="Health",
|
||||
node_daemon_state="Daemon",
|
||||
node_coordinator_state="Coordinator",
|
||||
node_domain_state="Domain",
|
||||
@ -568,19 +647,28 @@ def format_list(node_list, raw):
|
||||
# Format the string (elements)
|
||||
for node_information in sorted(node_list, key=lambda n: n["name"]):
|
||||
(
|
||||
health_colour,
|
||||
daemon_state_colour,
|
||||
coordinator_state_colour,
|
||||
domain_state_colour,
|
||||
mem_allocated_colour,
|
||||
mem_provisioned_colour,
|
||||
) = getOutputColours(node_information)
|
||||
|
||||
node_health = node_information.get("health", "N/A")
|
||||
if isinstance(node_health, int):
|
||||
node_health_text = f"{node_health}%"
|
||||
else:
|
||||
node_health_text = node_health
|
||||
|
||||
node_list_output.append(
|
||||
"{bold}{node_name: <{node_name_length}} {node_pvc_version: <{pvc_version_length}} \
|
||||
"{bold}{node_name: <{node_name_length}} {node_pvc_version: <{pvc_version_length}} {health_colour}{node_health: <{health_length}}{end_colour} \
|
||||
{daemon_state_colour}{node_daemon_state: <{daemon_state_length}}{end_colour} {coordinator_state_colour}{node_coordinator_state: <{coordinator_state_length}}{end_colour} {domain_state_colour}{node_domain_state: <{domain_state_length}}{end_colour} \
|
||||
{node_domains_count: <{domains_count_length}} {node_cpu_count: <{cpu_count_length}} {node_load: <{load_length}} \
|
||||
{node_mem_total: <{mem_total_length}} {node_mem_used: <{mem_used_length}} {node_mem_free: <{mem_free_length}} {mem_allocated_colour}{node_mem_allocated: <{mem_alloc_length}}{end_colour} {mem_provisioned_colour}{node_mem_provisioned: <{mem_prov_length}}{end_colour}{end_bold}".format(
|
||||
node_name_length=node_name_length,
|
||||
pvc_version_length=pvc_version_length,
|
||||
health_length=health_length,
|
||||
daemon_state_length=daemon_state_length,
|
||||
coordinator_state_length=coordinator_state_length,
|
||||
domain_state_length=domain_state_length,
|
||||
@ -594,6 +682,7 @@ def format_list(node_list, raw):
|
||||
mem_prov_length=mem_prov_length,
|
||||
bold="",
|
||||
end_bold="",
|
||||
health_colour=health_colour,
|
||||
daemon_state_colour=daemon_state_colour,
|
||||
coordinator_state_colour=coordinator_state_colour,
|
||||
domain_state_colour=domain_state_colour,
|
||||
@ -602,6 +691,7 @@ def format_list(node_list, raw):
|
||||
end_colour=ansiprint.end(),
|
||||
node_name=node_information["name"],
|
||||
node_pvc_version=node_information.get("pvc_version", "N/A"),
|
||||
node_health=node_health_text,
|
||||
node_daemon_state=node_information["daemon_state"],
|
||||
node_coordinator_state=node_information["coordinator_state"],
|
||||
node_domain_state=node_information["domain_state"],
|
||||
|
@ -807,7 +807,7 @@ def cli_vm():
|
||||
"node_selector",
|
||||
default="none",
|
||||
show_default=True,
|
||||
type=click.Choice(["mem", "memfree", "load", "vcpus", "vms", "none"]),
|
||||
type=click.Choice(["mem", "memprov", "load", "vcpus", "vms", "none"]),
|
||||
help='Method to determine optimal target node during autoselect; "none" will use the default for the cluster.',
|
||||
)
|
||||
@click.option(
|
||||
@ -859,15 +859,15 @@ def vm_define(
|
||||
Define a new virtual machine from Libvirt XML configuration file VMCONFIG.
|
||||
|
||||
The target node selector ("--node-selector"/"-s") can be "none" to use the cluster default, or one of the following values:
|
||||
* "mem": choose the node with the least provisioned VM memory
|
||||
* "memfree": choose the node with the most (real) free memory
|
||||
* "mem": choose the node with the most (real) free memory
|
||||
* "memprov": choose the node with the least provisioned VM memory
|
||||
* "vcpus": choose the node with the least allocated VM vCPUs
|
||||
* "load": choose the node with the lowest current load average
|
||||
* "vms": choose the node with the least number of provisioned VMs
|
||||
|
||||
For most clusters, "mem" should be sufficient, but others may be used based on the cluster workload and available resources. The following caveats should be considered:
|
||||
* "mem" looks at the provisioned memory, not the allocated memory; thus, stopped or disabled VMs are counted towards a node's memory for this selector, even though their memory is not actively in use.
|
||||
* "memfree" looks at the free memory of the node in general, ignoring the amount provisioned to VMs; if any VM's internal memory usage changes, this value would be affected. This might be preferable to "mem" on clusters with very high memory utilization versus total capacity or if many VMs are stopped/disabled.
|
||||
* "mem" looks at the free memory of the node in general, ignoring the amount provisioned to VMs; if any VM's internal memory usage changes, this value would be affected.
|
||||
* "memprov" looks at the provisioned memory, not the allocated memory; thus, stopped or disabled VMs are counted towards a node's memory for this selector, even though their memory is not actively in use.
|
||||
* "load" looks at the system load of the node in general, ignoring load in any particular VMs; if any VM's CPU usage changes, this value would be affected. This might be preferable on clusters with some very CPU intensive VMs.
|
||||
"""
|
||||
|
||||
@ -914,7 +914,7 @@ def vm_define(
|
||||
"node_selector",
|
||||
default=None,
|
||||
show_default=False,
|
||||
type=click.Choice(["mem", "memfree", "load", "vcpus", "vms", "none"]),
|
||||
type=click.Choice(["mem", "memprov", "load", "vcpus", "vms", "none"]),
|
||||
help='Method to determine optimal target node during autoselect; "none" will use the default for the cluster.',
|
||||
)
|
||||
@click.option(
|
||||
@ -4134,7 +4134,7 @@ def provisioner_template_system_list(limit):
|
||||
"--node-selector",
|
||||
"node_selector",
|
||||
type=click.Choice(
|
||||
["mem", "memfree", "vcpus", "vms", "load", "none"], case_sensitive=False
|
||||
["mem", "memprov", "vcpus", "vms", "load", "none"], case_sensitive=False
|
||||
),
|
||||
default="none",
|
||||
help='Method to determine optimal target node during autoselect; "none" will use the default for the cluster.',
|
||||
@ -4230,7 +4230,7 @@ def provisioner_template_system_add(
|
||||
"--node-selector",
|
||||
"node_selector",
|
||||
type=click.Choice(
|
||||
["mem", "memfree", "vcpus", "vms", "load", "none"], case_sensitive=False
|
||||
["mem", "memprov", "vcpus", "vms", "load", "none"], case_sensitive=False
|
||||
),
|
||||
help='Method to determine optimal target node during autoselect; "none" will use the default for the cluster.',
|
||||
)
|
||||
|
@ -2,7 +2,7 @@ from setuptools import setup
|
||||
|
||||
setup(
|
||||
name="pvc",
|
||||
version="0.9.58",
|
||||
version="0.9.61",
|
||||
packages=["pvc", "pvc.cli_lib"],
|
||||
install_requires=[
|
||||
"Click",
|
||||
|
@ -638,9 +638,9 @@ def findTargetNode(zkhandler, dom_uuid):
|
||||
|
||||
# Execute the search
|
||||
if search_field == "mem":
|
||||
return findTargetNodeMem(zkhandler, node_limit, dom_uuid)
|
||||
if search_field == "memfree":
|
||||
return findTargetNodeMemFree(zkhandler, node_limit, dom_uuid)
|
||||
if search_field == "memprov":
|
||||
return findTargetNodeMemProv(zkhandler, node_limit, dom_uuid)
|
||||
if search_field == "load":
|
||||
return findTargetNodeLoad(zkhandler, node_limit, dom_uuid)
|
||||
if search_field == "vcpus":
|
||||
@ -678,10 +678,28 @@ def getNodes(zkhandler, node_limit, dom_uuid):
|
||||
return valid_node_list
|
||||
|
||||
|
||||
#
|
||||
# via free memory
|
||||
#
|
||||
def findTargetNodeMemFree(zkhandler, node_limit, dom_uuid):
|
||||
most_memfree = 0
|
||||
target_node = None
|
||||
|
||||
node_list = getNodes(zkhandler, node_limit, dom_uuid)
|
||||
for node in node_list:
|
||||
memfree = int(zkhandler.read(("node.memory.free", node)))
|
||||
|
||||
if memfree > most_memfree:
|
||||
most_memfree = memfree
|
||||
target_node = node
|
||||
|
||||
return target_node
|
||||
|
||||
|
||||
#
|
||||
# via provisioned memory
|
||||
#
|
||||
def findTargetNodeMem(zkhandler, node_limit, dom_uuid):
|
||||
def findTargetNodeMemProv(zkhandler, node_limit, dom_uuid):
|
||||
most_provfree = 0
|
||||
target_node = None
|
||||
|
||||
@ -700,24 +718,6 @@ def findTargetNodeMem(zkhandler, node_limit, dom_uuid):
|
||||
return target_node
|
||||
|
||||
|
||||
#
|
||||
# via free memory
|
||||
#
|
||||
def findTargetNodeMemFree(zkhandler, node_limit, dom_uuid):
|
||||
most_memfree = 0
|
||||
target_node = None
|
||||
|
||||
node_list = getNodes(zkhandler, node_limit, dom_uuid)
|
||||
for node in node_list:
|
||||
memfree = int(zkhandler.read(("node.memory.free", node)))
|
||||
|
||||
if memfree > most_memfree:
|
||||
most_memfree = memfree
|
||||
target_node = node
|
||||
|
||||
return target_node
|
||||
|
||||
|
||||
#
|
||||
# via load average
|
||||
#
|
||||
|
1
daemon-common/migrations/versions/9.json
Normal file
1
daemon-common/migrations/versions/9.json
Normal file
@ -0,0 +1 @@
|
||||
{"version": "9", "root": "", "base": {"root": "", "schema": "/schema", "schema.version": "/schema/version", "config": "/config", "config.maintenance": "/config/maintenance", "config.primary_node": "/config/primary_node", "config.primary_node.sync_lock": "/config/primary_node/sync_lock", "config.upstream_ip": "/config/upstream_ip", "config.migration_target_selector": "/config/migration_target_selector", "cmd": "/cmd", "cmd.node": "/cmd/nodes", "cmd.domain": "/cmd/domains", "cmd.ceph": "/cmd/ceph", "logs": "/logs", "node": "/nodes", "domain": "/domains", "network": "/networks", "storage": "/ceph", "storage.util": "/ceph/util", "osd": "/ceph/osds", "pool": "/ceph/pools", "volume": "/ceph/volumes", "snapshot": "/ceph/snapshots"}, "logs": {"node": "", "messages": "/messages"}, "node": {"name": "", "keepalive": "/keepalive", "mode": "/daemonmode", "data.active_schema": "/activeschema", "data.latest_schema": "/latestschema", "data.static": "/staticdata", "data.pvc_version": "/pvcversion", "running_domains": "/runningdomains", "count.provisioned_domains": "/domainscount", "count.networks": "/networkscount", "state.daemon": "/daemonstate", "state.router": "/routerstate", "state.domain": "/domainstate", "cpu.load": "/cpuload", "vcpu.allocated": "/vcpualloc", "memory.total": "/memtotal", "memory.used": "/memused", "memory.free": "/memfree", "memory.allocated": "/memalloc", "memory.provisioned": "/memprov", "ipmi.hostname": "/ipmihostname", "ipmi.username": "/ipmiusername", "ipmi.password": "/ipmipassword", "sriov": "/sriov", "sriov.pf": "/sriov/pf", "sriov.vf": "/sriov/vf", "monitoring.plugins": "/monitoring_plugins", "monitoring.data": "/monitoring_data", "monitoring.health": "/monitoring_health"}, "monitoring_plugin": {"name": "", "last_run": "/last_run", "health_delta": "/health_delta", "message": "/message", "data": "/data", "runtime": "/runtime"}, "sriov_pf": {"phy": "", "mtu": "/mtu", "vfcount": "/vfcount"}, "sriov_vf": {"phy": "", "pf": "/pf", "mtu": "/mtu", "mac": "/mac", "phy_mac": "/phy_mac", "config": "/config", "config.vlan_id": "/config/vlan_id", "config.vlan_qos": "/config/vlan_qos", "config.tx_rate_min": "/config/tx_rate_min", "config.tx_rate_max": "/config/tx_rate_max", "config.spoof_check": "/config/spoof_check", "config.link_state": "/config/link_state", "config.trust": "/config/trust", "config.query_rss": "/config/query_rss", "pci": "/pci", "pci.domain": "/pci/domain", "pci.bus": "/pci/bus", "pci.slot": "/pci/slot", "pci.function": "/pci/function", "used": "/used", "used_by": "/used_by"}, "domain": {"name": "", "xml": "/xml", "state": "/state", "profile": "/profile", "stats": "/stats", "node": "/node", "last_node": "/lastnode", "failed_reason": "/failedreason", "storage.volumes": "/rbdlist", "console.log": "/consolelog", "console.vnc": "/vnc", "meta.autostart": "/node_autostart", "meta.migrate_method": "/migration_method", "meta.node_selector": "/node_selector", "meta.node_limit": "/node_limit", "meta.tags": "/tags", "migrate.sync_lock": "/migrate_sync_lock"}, "tag": {"name": "", "type": "/type", "protected": "/protected"}, "network": {"vni": "", "type": "/nettype", "mtu": "/mtu", "rule": "/firewall_rules", "rule.in": "/firewall_rules/in", "rule.out": "/firewall_rules/out", "nameservers": "/name_servers", "domain": "/domain", "reservation": "/dhcp4_reservations", "lease": "/dhcp4_leases", "ip4.gateway": "/ip4_gateway", "ip4.network": "/ip4_network", "ip4.dhcp": "/dhcp4_flag", "ip4.dhcp_start": "/dhcp4_start", "ip4.dhcp_end": "/dhcp4_end", "ip6.gateway": "/ip6_gateway", "ip6.network": "/ip6_network", "ip6.dhcp": "/dhcp6_flag"}, "reservation": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname"}, "lease": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname", "expiry": "/expiry", "client_id": "/clientid"}, "rule": {"description": "", "rule": "/rule", "order": "/order"}, "osd": {"id": "", "node": "/node", "device": "/device", "db_device": "/db_device", "fsid": "/fsid", "ofsid": "/fsid/osd", "cfsid": "/fsid/cluster", "lvm": "/lvm", "vg": "/lvm/vg", "lv": "/lvm/lv", "stats": "/stats"}, "pool": {"name": "", "pgs": "/pgs", "tier": "/tier", "stats": "/stats"}, "volume": {"name": "", "stats": "/stats"}, "snapshot": {"name": "", "stats": "/stats"}}
|
@ -21,6 +21,7 @@
|
||||
|
||||
import time
|
||||
import re
|
||||
import json
|
||||
|
||||
import daemon_lib.common as common
|
||||
|
||||
@ -49,6 +50,38 @@ def getNodeInformation(zkhandler, node_name):
|
||||
zkhandler.read(("node.count.provisioned_domains", node_name))
|
||||
)
|
||||
node_running_domains = zkhandler.read(("node.running_domains", node_name)).split()
|
||||
try:
|
||||
node_health = int(zkhandler.read(("node.monitoring.health", node_name)))
|
||||
except ValueError:
|
||||
node_health = "N/A"
|
||||
node_health_plugins = zkhandler.read(("node.monitoring.plugins", node_name)).split()
|
||||
node_health_details = list()
|
||||
for plugin in node_health_plugins:
|
||||
plugin_last_run = zkhandler.read(
|
||||
("node.monitoring.data", node_name, "monitoring_plugin.last_run", plugin)
|
||||
)
|
||||
plugin_health_delta = zkhandler.read(
|
||||
(
|
||||
"node.monitoring.data",
|
||||
node_name,
|
||||
"monitoring_plugin.health_delta",
|
||||
plugin,
|
||||
)
|
||||
)
|
||||
plugin_message = zkhandler.read(
|
||||
("node.monitoring.data", node_name, "monitoring_plugin.message", plugin)
|
||||
)
|
||||
plugin_data = zkhandler.read(
|
||||
("node.monitoring.data", node_name, "monitoring_plugin.data", plugin)
|
||||
)
|
||||
plugin_output = {
|
||||
"name": plugin,
|
||||
"last_run": int(plugin_last_run),
|
||||
"health_delta": int(plugin_health_delta),
|
||||
"message": plugin_message,
|
||||
"data": json.loads(plugin_data),
|
||||
}
|
||||
node_health_details.append(plugin_output)
|
||||
|
||||
# Construct a data structure to represent the data
|
||||
node_information = {
|
||||
@ -61,10 +94,16 @@ def getNodeInformation(zkhandler, node_name):
|
||||
"kernel": node_kernel,
|
||||
"os": node_os,
|
||||
"arch": node_arch,
|
||||
"health": node_health,
|
||||
"health_plugins": node_health_plugins,
|
||||
"health_details": node_health_details,
|
||||
"load": node_load,
|
||||
"domains_count": node_domains_count,
|
||||
"running_domains": node_running_domains,
|
||||
"vcpu": {"total": node_cpu_count, "allocated": node_vcpu_allocated},
|
||||
"vcpu": {
|
||||
"total": node_cpu_count,
|
||||
"allocated": node_vcpu_allocated,
|
||||
},
|
||||
"memory": {
|
||||
"total": node_mem_total,
|
||||
"allocated": node_mem_allocated,
|
||||
|
@ -644,7 +644,7 @@ def rename_vm(zkhandler, domain, new_domain):
|
||||
|
||||
# Verify that the VM is in a stopped state; renaming is not supported otherwise
|
||||
state = zkhandler.read(("domain.state", dom_uuid))
|
||||
if state != "stop":
|
||||
if state not in ["stop", "disable"]:
|
||||
return (
|
||||
False,
|
||||
'ERROR: VM "{}" is not in stopped state; VMs cannot be renamed while running.'.format(
|
||||
|
@ -540,7 +540,7 @@ class ZKHandler(object):
|
||||
#
|
||||
class ZKSchema(object):
|
||||
# Current version
|
||||
_version = 8
|
||||
_version = 9
|
||||
|
||||
# Root for doing nested keys
|
||||
_schema_root = ""
|
||||
@ -608,6 +608,18 @@ class ZKSchema(object):
|
||||
"sriov": "/sriov",
|
||||
"sriov.pf": "/sriov/pf",
|
||||
"sriov.vf": "/sriov/vf",
|
||||
"monitoring.plugins": "/monitoring_plugins",
|
||||
"monitoring.data": "/monitoring_data",
|
||||
"monitoring.health": "/monitoring_health",
|
||||
},
|
||||
# The schema of an individual monitoring plugin data entry (/nodes/{node_name}/monitoring_data/{plugin})
|
||||
"monitoring_plugin": {
|
||||
"name": "", # The root key
|
||||
"last_run": "/last_run",
|
||||
"health_delta": "/health_delta",
|
||||
"message": "/message",
|
||||
"data": "/data",
|
||||
"runtime": "/runtime",
|
||||
},
|
||||
# The schema of an individual SR-IOV PF entry (/nodes/{node_name}/sriov/pf/{pf})
|
||||
"sriov_pf": {"phy": "", "mtu": "/mtu", "vfcount": "/vfcount"}, # The root key
|
||||
@ -874,9 +886,10 @@ class ZKSchema(object):
|
||||
if not zkhandler.zk_conn.exists(nkipath):
|
||||
result = False
|
||||
|
||||
# One might expect child keys under node (specifically, sriov.pf and sriov.vf) to be
|
||||
# managed here as well, but those are created automatically every time pvcnoded starts
|
||||
# and thus never need to be validated or applied.
|
||||
# One might expect child keys under node (specifically, sriov.pf, sriov.vf,
|
||||
# monitoring.data) to be managed here as well, but those are created
|
||||
# automatically every time pvcnoded started and thus never need to be validated
|
||||
# or applied.
|
||||
|
||||
# These two have several children layers that must be parsed through
|
||||
for elem in ["volume"]:
|
||||
|
20
debian/changelog
vendored
20
debian/changelog
vendored
@ -1,3 +1,23 @@
|
||||
pvc (0.9.61-0) unstable; urgency=high
|
||||
|
||||
* [provisioner] Fixes a bug in network comparison
|
||||
* [api] Fixes a bug being unable to rename disabled VMs
|
||||
|
||||
-- Joshua M. Boniface <joshua@boniface.me> Wed, 08 Feb 2023 10:08:05 -0500
|
||||
|
||||
pvc (0.9.60-0) unstable; urgency=high
|
||||
|
||||
* [Provisioner] Cleans up several remaining bugs in the example scripts; they should all be valid now
|
||||
* [Provisioner] Adjust default libvirt schema to disable RBD caching for a 2x+ performance boost
|
||||
|
||||
-- Joshua M. Boniface <joshua@boniface.me> Tue, 06 Dec 2022 15:42:55 -0500
|
||||
|
||||
pvc (0.9.59-0) unstable; urgency=high
|
||||
|
||||
* [API] Flips the mem(prov) and mem(free) selectors making mem(free) the default for "mem" and "memprov" explicit
|
||||
|
||||
-- Joshua M. Boniface <joshua@boniface.me> Tue, 15 Nov 2022 15:50:15 -0500
|
||||
|
||||
pvc (0.9.58-0) unstable; urgency=high
|
||||
|
||||
* [API] Fixes a bug where migration selector could have case-sensitive operational faults
|
||||
|
1
debian/pvc-daemon-node.install
vendored
1
debian/pvc-daemon-node.install
vendored
@ -5,3 +5,4 @@ node-daemon/pvcnoded.service lib/systemd/system
|
||||
node-daemon/pvc.target lib/systemd/system
|
||||
node-daemon/pvcautoready.service lib/systemd/system
|
||||
node-daemon/monitoring usr/share/pvc
|
||||
node-daemon/plugins usr/share/pvc
|
||||
|
@ -356,15 +356,15 @@ The password for the PVC node daemon to log in to the IPMI interface.
|
||||
The default selector algorithm to use when migrating VMs away from a node; individual VMs can override this default.
|
||||
|
||||
Valid `target_selector` values are:
|
||||
* `mem`: choose the node with the least provisioned VM memory
|
||||
* `memfree`: choose the node with the most (real) free memory
|
||||
* `mem`: choose the node with the most (real) free memory
|
||||
* `memprov`: choose the node with the least provisioned VM memory
|
||||
* `vcpus`: choose the node with the least allocated VM vCPUs
|
||||
* `load`: choose the node with the lowest current load average
|
||||
* `vms`: choose the node with the least number of provisioned VMs
|
||||
|
||||
For most clusters, `mem` should be sufficient, but others may be used based on the cluster workload and available resources. The following caveats should be considered:
|
||||
* `mem` looks at the provisioned memory, not the allocated memory; thus, stopped or disabled VMs are counted towards a node's memory for this selector, even though their memory is not actively in use.
|
||||
* `memfree` looks at the free memory of the node in general, ignoring the amount provisioned to VMs; if any VM's internal memory usage changes, this value would be affected. This might be preferable to `mem` on clusters with very high memory utilization versus total capacity or if many VMs are stopped/disabled.
|
||||
* `mem` looks at the free memory of the node in general, ignoring the amount provisioned to VMs; if any VM's internal memory usage changes, this value would be affected.
|
||||
* `memprov` looks at the provisioned memory, not the allocated memory; thus, stopped or disabled VMs are counted towards a node's memory for this selector, even though their memory is not actively in use.
|
||||
* `load` looks at the system load of the node in general, ignoring load in any particular VMs; if any VM's CPU usage changes, this value would be affected. This might be preferable on clusters with some very CPU intensive VMs.
|
||||
|
||||
#### `system` → `configuration` → `directories` → `dynamic_directory`
|
||||
|
126
node-daemon/plugins/ceph
Normal file
126
node-daemon/plugins/ceph
Normal file
@ -0,0 +1,126 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# ceph.py - PVC Monitoring example plugin for Ceph status
|
||||
# Part of the Parallel Virtual Cluster (PVC) system
|
||||
#
|
||||
# Copyright (C) 2018-2022 Joshua M. Boniface <joshua@boniface.me>
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, version 3.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
# This script provides an example of a PVC monitoring plugin script. It will create
|
||||
# a simple plugin to check the Ceph cluster health for anomalies, and return a health
|
||||
# delta reflective of the overall Ceph status (HEALTH_WARN = 10, HEALTH_ERR = 50).
|
||||
|
||||
# This script can thus be used as an example or reference implementation of a
|
||||
# PVC monitoring pluginscript and expanded upon as required.
|
||||
|
||||
# A monitoring plugin script must implement the class "MonitoringPluginScript" which
|
||||
# extends "MonitoringPlugin", providing the 3 functions indicated. Detailed explanation
|
||||
# of the role of each function is provided in context of the example; see the other
|
||||
# examples for more potential uses.
|
||||
|
||||
# WARNING:
|
||||
#
|
||||
# This script will run in the context of the node daemon keepalives as root.
|
||||
# DO NOT install untrusted, unvetted plugins under any circumstances.
|
||||
|
||||
|
||||
# This import is always required here, as MonitoringPlugin is used by the
|
||||
# MonitoringPluginScript class
|
||||
from pvcnoded.objects.MonitoringInstance import MonitoringPlugin
|
||||
|
||||
|
||||
# A monitoring plugin script must always expose its nice name, which must be identical to
|
||||
# the file name
|
||||
PLUGIN_NAME = "ceph"
|
||||
|
||||
|
||||
# The MonitoringPluginScript class must be named as such, and extend MonitoringPlugin.
|
||||
class MonitoringPluginScript(MonitoringPlugin):
|
||||
def setup(self):
|
||||
"""
|
||||
setup(): Perform special setup steps during node daemon startup
|
||||
|
||||
This step is optional and should be used sparingly.
|
||||
"""
|
||||
|
||||
pass
|
||||
|
||||
def run(self):
|
||||
"""
|
||||
run(): Perform the check actions and return a PluginResult object
|
||||
"""
|
||||
|
||||
# Run any imports first
|
||||
from rados import Rados
|
||||
from json import loads, dumps
|
||||
|
||||
# Connect to the Ceph cluster
|
||||
try:
|
||||
ceph_conn = Rados(
|
||||
conffile=self.config["ceph_config_file"],
|
||||
conf=dict(keyring=self.config["ceph_admin_keyring"]),
|
||||
)
|
||||
ceph_conn.connect(timeout=1)
|
||||
except Exception as e:
|
||||
self.log(f"Failed to connect to Ceph cluster: {e}", state="e")
|
||||
return self.plugin_result
|
||||
|
||||
# Get the Ceph cluster health
|
||||
try:
|
||||
health_status = loads(
|
||||
ceph_conn.mon_command(dumps({"prefix": "health", "format": "json"}), b"", timeout=1)[1]
|
||||
)
|
||||
ceph_health = health_status["status"]
|
||||
except Exception as e:
|
||||
self.log(f"Failed to get health data from Ceph cluster: {e}", state="e")
|
||||
return self.plugin_result
|
||||
finally:
|
||||
ceph_conn.shutdown()
|
||||
|
||||
# Get a list of error entries in the health status output
|
||||
error_entries = health_status["checks"].keys()
|
||||
|
||||
# Set the health delta based on the errors presented
|
||||
if ceph_health == "HEALTH_ERR":
|
||||
health_delta = 50
|
||||
message = f"Ceph cluster in ERROR state: {', '.join(error_entries)}"
|
||||
elif ceph_health == "HEALTH_WARN":
|
||||
health_delta = 10
|
||||
message = f"Ceph cluster in WARNING state: {', '.join(error_entries)}"
|
||||
else:
|
||||
health_delta = 0
|
||||
message = "Ceph cluster in OK state"
|
||||
|
||||
# Set the health delta in our local PluginResult object
|
||||
self.plugin_result.set_health_delta(health_delta)
|
||||
|
||||
# Set the message in our local PluginResult object
|
||||
self.plugin_result.set_message(message)
|
||||
|
||||
# Set the detailed data in our local PluginResult object
|
||||
self.plugin_result.set_data(health_status)
|
||||
|
||||
# Return our local PluginResult object
|
||||
return self.plugin_result
|
||||
|
||||
def cleanup(self):
|
||||
"""
|
||||
cleanup(): Perform special cleanup steps during node daemon termination
|
||||
|
||||
This step is optional and should be used sparingly.
|
||||
"""
|
||||
|
||||
pass
|
157
node-daemon/plugins/dpkg
Normal file
157
node-daemon/plugins/dpkg
Normal file
@ -0,0 +1,157 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# dpkg.py - PVC Monitoring example plugin for dpkg status
|
||||
# Part of the Parallel Virtual Cluster (PVC) system
|
||||
#
|
||||
# Copyright (C) 2018-2022 Joshua M. Boniface <joshua@boniface.me>
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, version 3.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
# This script provides an example of a PVC monitoring plugin script. It will create
|
||||
# a simple plugin to check the system dpkg status is as expected, with no invalid
|
||||
# packages or obsolete configuration files, and will return a 1 health delta for each
|
||||
# flaw in invalid packages, upgradable packages, and obsolete config files.
|
||||
|
||||
# This script can thus be used as an example or reference implementation of a
|
||||
# PVC monitoring pluginscript and expanded upon as required.
|
||||
|
||||
# A monitoring plugin script must implement the class "MonitoringPluginScript" which
|
||||
# extends "MonitoringPlugin", providing the 3 functions indicated. Detailed explanation
|
||||
# of the role of each function is provided in context of the example; see the other
|
||||
# examples for more potential uses.
|
||||
|
||||
# WARNING:
|
||||
#
|
||||
# This script will run in the context of the node daemon keepalives as root.
|
||||
# DO NOT install untrusted, unvetted plugins under any circumstances.
|
||||
|
||||
|
||||
# This import is always required here, as MonitoringPlugin is used by the
|
||||
# MonitoringPluginScript class
|
||||
from pvcnoded.objects.MonitoringInstance import MonitoringPlugin
|
||||
|
||||
|
||||
# A monitoring plugin script must always expose its nice name, which must be identical to
|
||||
# the file name
|
||||
PLUGIN_NAME = "dpkg"
|
||||
|
||||
|
||||
# The MonitoringPluginScript class must be named as such, and extend MonitoringPlugin.
|
||||
class MonitoringPluginScript(MonitoringPlugin):
|
||||
def setup(self):
|
||||
"""
|
||||
setup(): Perform special setup steps during node daemon startup
|
||||
|
||||
This step is optional and should be used sparingly.
|
||||
"""
|
||||
|
||||
pass
|
||||
|
||||
def run(self):
|
||||
"""
|
||||
run(): Perform the check actions and return a PluginResult object
|
||||
"""
|
||||
|
||||
# Run any imports first
|
||||
from re import match
|
||||
import daemon_lib.common as pvc_common
|
||||
|
||||
# Get Debian version
|
||||
with open('/etc/debian_version', 'r') as fh:
|
||||
debian_version = fh.read().strip()
|
||||
|
||||
# Get a list of dpkg packages for analysis
|
||||
retcode, stdout, stderr = pvc_common.run_os_command("/usr/bin/dpkg --list")
|
||||
|
||||
# Get a list of installed packages and states
|
||||
packages = list()
|
||||
for dpkg_line in stdout.split('\n'):
|
||||
if match('^[a-z][a-z] ', dpkg_line):
|
||||
line_split = dpkg_line.split()
|
||||
package_state = line_split[0]
|
||||
package_name = line_split[1]
|
||||
packages.append((package_name, package_state))
|
||||
|
||||
count_ok = 0
|
||||
count_inconsistent = 0
|
||||
list_inconsistent = list()
|
||||
|
||||
for package in packages:
|
||||
if package[1] == "ii":
|
||||
count_ok += 1
|
||||
else:
|
||||
count_inconsistent += 1
|
||||
list_inconsistent.append(package[0])
|
||||
|
||||
# Get upgradable packages
|
||||
retcode, stdout, stderr = pvc_common.run_os_command("/usr/bin/apt list --upgradable")
|
||||
|
||||
list_upgradable = list()
|
||||
for apt_line in stdout.split('\n'):
|
||||
if match('^[a-z][a-z] ', apt_line):
|
||||
line_split = apt_line.split('/')
|
||||
package_name = line_split[0]
|
||||
list_upgradable.append(package_name)
|
||||
|
||||
count_upgradable = len(list_upgradable)
|
||||
|
||||
# Get obsolete config files (dpkg-* or ucf-* under /etc)
|
||||
retcode, stdout, stderr = pvc_common.run_os_command("/usr/bin/find /etc -type f -a \( -name '*.dpkg-*' -o -name '*.ucf-*' \)")
|
||||
|
||||
obsolete_conffiles = list()
|
||||
for conffile_line in stdout.split('\n'):
|
||||
if conffile_line:
|
||||
obsolete_conffiles.append(conffile_line)
|
||||
|
||||
count_obsolete_conffiles = len(obsolete_conffiles)
|
||||
|
||||
# Set health_delta based on the results
|
||||
health_delta = 0
|
||||
if count_inconsistent > 0:
|
||||
health_delta += 1
|
||||
if count_upgradable > 0:
|
||||
health_delta += 1
|
||||
if count_obsolete_conffiles > 0:
|
||||
health_delta += 1
|
||||
|
||||
# Set the health delta in our local PluginResult object
|
||||
self.plugin_result.set_health_delta(health_delta)
|
||||
|
||||
# Craft the message
|
||||
message = f"Debian {debian_version}; Obsolete conffiles: {count_obsolete_conffiles}; Packages valid: {count_ok}, inconsistent: {count_inconsistent}, upgradable: {count_upgradable}"
|
||||
|
||||
# Set the message in our local PluginResult object
|
||||
self.plugin_result.set_message(message)
|
||||
|
||||
# Set the detailed data in our local PluginResult object
|
||||
detailed_data = {
|
||||
"debian_version": debian_version,
|
||||
"obsolete_conffiles": obsolete_conffiles,
|
||||
"inconsistent_packages": list_inconsistent,
|
||||
"upgradable_packages": list_upgradable,
|
||||
}
|
||||
self.plugin_result.set_data(detailed_data)
|
||||
|
||||
# Return our local PluginResult object
|
||||
return self.plugin_result
|
||||
|
||||
def cleanup(self):
|
||||
"""
|
||||
cleanup(): Perform special cleanup steps during node daemon termination
|
||||
|
||||
This step is optional and should be used sparingly.
|
||||
"""
|
||||
|
||||
pass
|
105
node-daemon/plugins/load
Normal file
105
node-daemon/plugins/load
Normal file
@ -0,0 +1,105 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# load.py - PVC Monitoring example plugin for load
|
||||
# Part of the Parallel Virtual Cluster (PVC) system
|
||||
#
|
||||
# Copyright (C) 2018-2022 Joshua M. Boniface <joshua@boniface.me>
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, version 3.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
# This script provides an example of a PVC monitoring plugin script. It will create
|
||||
# a simple plugin to check the system load against the total number of CPU cores,
|
||||
# and return a 10 health delta (100 -> 90) if the load average is > 1/2 that number.
|
||||
|
||||
# This script can thus be used as an example or reference implementation of a
|
||||
# PVC monitoring pluginscript and expanded upon as required.
|
||||
|
||||
# A monitoring plugin script must implement the class "MonitoringPluginScript" which
|
||||
# extends "MonitoringPlugin", providing the 3 functions indicated. Detailed explanation
|
||||
# of the role of each function is provided in context of the example; see the other
|
||||
# examples for more potential uses.
|
||||
|
||||
# WARNING:
|
||||
#
|
||||
# This script will run in the context of the node daemon keepalives as root.
|
||||
# DO NOT install untrusted, unvetted plugins under any circumstances.
|
||||
|
||||
|
||||
# This import is always required here, as MonitoringPlugin is used by the
|
||||
# MonitoringPluginScript class
|
||||
from pvcnoded.objects.MonitoringInstance import MonitoringPlugin
|
||||
|
||||
|
||||
# A monitoring plugin script must always expose its nice name, which must be identical to
|
||||
# the file name
|
||||
PLUGIN_NAME = "load"
|
||||
|
||||
|
||||
# The MonitoringPluginScript class must be named as such, and extend MonitoringPlugin.
|
||||
class MonitoringPluginScript(MonitoringPlugin):
|
||||
def setup(self):
|
||||
"""
|
||||
setup(): Perform special setup steps during node daemon startup
|
||||
|
||||
This step is optional and should be used sparingly.
|
||||
"""
|
||||
|
||||
pass
|
||||
|
||||
def run(self):
|
||||
"""
|
||||
run(): Perform the check actions and return a PluginResult object
|
||||
"""
|
||||
|
||||
# Run any imports first
|
||||
from os import getloadavg
|
||||
from psutil import cpu_count
|
||||
|
||||
# Get the current 1-minute system load average
|
||||
load_average = getloadavg()[0]
|
||||
|
||||
# Get the number of CPU cores
|
||||
cpu_cores = cpu_count()
|
||||
|
||||
# Check that the load average is greater or equal to the cpu count
|
||||
if load_average > float(cpu_cores):
|
||||
# Set the health delta to 10 (subtract 10 from the total of 100)
|
||||
health_delta = 10
|
||||
# Craft a message that can be used by the clients
|
||||
message = f"Current load is {load_average} out of {cpu_cores} CPU cores"
|
||||
|
||||
else:
|
||||
# Set the health delta to 0 (no change)
|
||||
health_delta = 0
|
||||
# Craft a message that can be used by the clients
|
||||
message = f"Current load is {load_average} out of {cpu_cores} CPU cores"
|
||||
|
||||
# Set the health delta in our local PluginResult object
|
||||
self.plugin_result.set_health_delta(health_delta)
|
||||
|
||||
# Set the message in our local PluginResult object
|
||||
self.plugin_result.set_message(message)
|
||||
|
||||
# Return our local PluginResult object
|
||||
return self.plugin_result
|
||||
|
||||
def cleanup(self):
|
||||
"""
|
||||
cleanup(): Perform special cleanup steps during node daemon termination
|
||||
|
||||
This step is optional and should be used sparingly.
|
||||
"""
|
||||
|
||||
pass
|
@ -122,12 +122,14 @@ pvc:
|
||||
pass: Passw0rd
|
||||
# migration: Migration option configuration
|
||||
migration:
|
||||
# target_selector: Criteria to select the ideal migration target, options: mem, memfree, load, vcpus, vms
|
||||
# target_selector: Criteria to select the ideal migration target, options: mem, memprov, load, vcpus, vms
|
||||
target_selector: mem
|
||||
# configuration: Local system configurations
|
||||
configuration:
|
||||
# directories: PVC system directories
|
||||
directories:
|
||||
# plugin_directory: Directory containing node monitoring plugins
|
||||
plugin_directory: "/usr/share/pvc/plugins"
|
||||
# dynamic_directory: Temporary in-memory directory for active configurations
|
||||
dynamic_directory: "/run/pvc"
|
||||
# log_directory: Logging directory
|
||||
|
@ -27,6 +27,7 @@ import pvcnoded.util.services
|
||||
import pvcnoded.util.libvirt
|
||||
import pvcnoded.util.zookeeper
|
||||
|
||||
import pvcnoded.objects.MonitoringInstance as MonitoringInstance
|
||||
import pvcnoded.objects.DNSAggregatorInstance as DNSAggregatorInstance
|
||||
import pvcnoded.objects.MetadataAPIInstance as MetadataAPIInstance
|
||||
import pvcnoded.objects.VMInstance as VMInstance
|
||||
@ -48,7 +49,7 @@ import re
|
||||
import json
|
||||
|
||||
# Daemon version
|
||||
version = "0.9.58"
|
||||
version = "0.9.61"
|
||||
|
||||
|
||||
##########################################################
|
||||
@ -58,6 +59,7 @@ version = "0.9.58"
|
||||
|
||||
def entrypoint():
|
||||
keepalive_timer = None
|
||||
monitoring_instance = None
|
||||
|
||||
# Get our configuration
|
||||
config = pvcnoded.util.config.get_configuration()
|
||||
@ -204,7 +206,7 @@ def entrypoint():
|
||||
|
||||
# Define a cleanup function
|
||||
def cleanup(failure=False):
|
||||
nonlocal logger, zkhandler, keepalive_timer, d_domain
|
||||
nonlocal logger, zkhandler, keepalive_timer, d_domain, monitoring_instance
|
||||
|
||||
logger.out("Terminating pvcnoded and cleaning up", state="s")
|
||||
|
||||
@ -253,6 +255,13 @@ def entrypoint():
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Clean up any monitoring plugins that have cleanup
|
||||
try:
|
||||
logger.out("Performing monitoring plugin cleanup", state="s")
|
||||
monitoring_instance.run_cleanups()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Set stop state in Zookeeper
|
||||
zkhandler.write([(("node.state.daemon", config["node_hostname"]), "stop")])
|
||||
|
||||
@ -1015,9 +1024,14 @@ def entrypoint():
|
||||
state="i",
|
||||
)
|
||||
|
||||
# Set up the node monitoring instance
|
||||
monitoring_instance = MonitoringInstance.MonitoringInstance(
|
||||
zkhandler, config, logger, this_node
|
||||
)
|
||||
|
||||
# Start keepalived thread
|
||||
keepalive_timer = pvcnoded.util.keepalive.start_keepalive_timer(
|
||||
logger, config, zkhandler, this_node
|
||||
logger, config, zkhandler, this_node, monitoring_instance
|
||||
)
|
||||
|
||||
# Tick loop; does nothing since everything is async
|
||||
|
383
node-daemon/pvcnoded/objects/MonitoringInstance.py
Normal file
383
node-daemon/pvcnoded/objects/MonitoringInstance.py
Normal file
@ -0,0 +1,383 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# PluginInstance.py - Class implementing a PVC monitoring instance
|
||||
# Part of the Parallel Virtual Cluster (PVC) system
|
||||
#
|
||||
# Copyright (C) 2018-2022 Joshua M. Boniface <joshua@boniface.me>
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, version 3.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
import concurrent.futures
|
||||
import time
|
||||
import importlib.util
|
||||
|
||||
from os import walk
|
||||
from datetime import datetime
|
||||
from json import dumps
|
||||
|
||||
|
||||
class PluginResult(object):
|
||||
def __init__(self, zkhandler, config, logger, this_node, plugin_name):
|
||||
self.zkhandler = zkhandler
|
||||
self.config = config
|
||||
self.logger = logger
|
||||
self.this_node = this_node
|
||||
self.plugin_name = plugin_name
|
||||
self.current_time = int(time.time())
|
||||
self.health_delta = 0
|
||||
self.message = None
|
||||
self.data = {}
|
||||
self.runtime = "0.00"
|
||||
|
||||
def set_health_delta(self, new_delta):
|
||||
self.health_delta = new_delta
|
||||
|
||||
def set_message(self, new_message):
|
||||
self.message = new_message
|
||||
|
||||
def set_data(self, new_data):
|
||||
self.data = new_data
|
||||
|
||||
def set_runtime(self, new_runtime):
|
||||
self.runtime = new_runtime
|
||||
|
||||
def to_zookeeper(self):
|
||||
self.zkhandler.write(
|
||||
[
|
||||
(
|
||||
(
|
||||
"node.monitoring.data",
|
||||
self.this_node.name,
|
||||
"monitoring_plugin.name",
|
||||
self.plugin_name,
|
||||
),
|
||||
self.plugin_name,
|
||||
),
|
||||
(
|
||||
(
|
||||
"node.monitoring.data",
|
||||
self.this_node.name,
|
||||
"monitoring_plugin.last_run",
|
||||
self.plugin_name,
|
||||
),
|
||||
self.current_time,
|
||||
),
|
||||
(
|
||||
(
|
||||
"node.monitoring.data",
|
||||
self.this_node.name,
|
||||
"monitoring_plugin.health_delta",
|
||||
self.plugin_name,
|
||||
),
|
||||
self.health_delta,
|
||||
),
|
||||
(
|
||||
(
|
||||
"node.monitoring.data",
|
||||
self.this_node.name,
|
||||
"monitoring_plugin.message",
|
||||
self.plugin_name,
|
||||
),
|
||||
self.message,
|
||||
),
|
||||
(
|
||||
(
|
||||
"node.monitoring.data",
|
||||
self.this_node.name,
|
||||
"monitoring_plugin.data",
|
||||
self.plugin_name,
|
||||
),
|
||||
dumps(self.data),
|
||||
),
|
||||
(
|
||||
(
|
||||
"node.monitoring.data",
|
||||
self.this_node.name,
|
||||
"monitoring_plugin.runtime",
|
||||
self.plugin_name,
|
||||
),
|
||||
self.runtime,
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
class MonitoringPlugin(object):
|
||||
def __init__(self, zkhandler, config, logger, this_node, plugin_name):
|
||||
self.zkhandler = zkhandler
|
||||
self.config = config
|
||||
self.logger = logger
|
||||
self.this_node = this_node
|
||||
self.plugin_name = plugin_name
|
||||
|
||||
self.plugin_result = PluginResult(
|
||||
self.zkhandler,
|
||||
self.config,
|
||||
self.logger,
|
||||
self.this_node,
|
||||
self.plugin_name,
|
||||
)
|
||||
|
||||
def __str__(self):
|
||||
return self.plugin_name
|
||||
|
||||
#
|
||||
# Helper functions; exposed to child MonitoringPluginScript instances
|
||||
#
|
||||
def log(self, message, state="d"):
|
||||
"""
|
||||
Log a message to the PVC logger instance using the plugin name as a prefix
|
||||
Takes "state" values as defined by the PVC logger instance, defaulting to debug:
|
||||
"d": debug
|
||||
"i": informational
|
||||
"t": tick/keepalive
|
||||
"w": warning
|
||||
"e": error
|
||||
"""
|
||||
if state == "d" and not self.config["debug"]:
|
||||
return
|
||||
|
||||
self.logger.out(message, state=state, prefix=self.plugin_name)
|
||||
|
||||
#
|
||||
# Primary class functions; implemented by the individual plugins
|
||||
#
|
||||
def setup(self):
|
||||
"""
|
||||
setup(): Perform setup of the plugin; run once during daemon startup
|
||||
OPTIONAL
|
||||
"""
|
||||
pass
|
||||
|
||||
def run(self):
|
||||
"""
|
||||
run(): Run the plugin, returning a PluginResult object
|
||||
"""
|
||||
return self.plugin_result
|
||||
|
||||
def cleanup(self):
|
||||
"""
|
||||
cleanup(): Clean up after the plugin; run once during daemon shutdown
|
||||
OPTIONAL
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class MonitoringInstance(object):
|
||||
def __init__(self, zkhandler, config, logger, this_node):
|
||||
self.zkhandler = zkhandler
|
||||
self.config = config
|
||||
self.logger = logger
|
||||
self.this_node = this_node
|
||||
|
||||
# Get a list of plugins from the plugin_directory
|
||||
plugin_files = next(walk(self.config["plugin_directory"]), (None, None, []))[
|
||||
2
|
||||
] # [] if no file
|
||||
|
||||
self.all_plugins = list()
|
||||
self.all_plugin_names = list()
|
||||
|
||||
# Load each plugin file into the all_plugins list
|
||||
for plugin_file in sorted(plugin_files):
|
||||
try:
|
||||
self.logger.out(
|
||||
f"Loading monitoring plugin from {self.config['plugin_directory']}/{plugin_file}",
|
||||
state="i",
|
||||
)
|
||||
loader = importlib.machinery.SourceFileLoader(
|
||||
"plugin_script", f"{self.config['plugin_directory']}/{plugin_file}"
|
||||
)
|
||||
spec = importlib.util.spec_from_loader(loader.name, loader)
|
||||
plugin_script = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(plugin_script)
|
||||
|
||||
plugin = plugin_script.MonitoringPluginScript(
|
||||
self.zkhandler,
|
||||
self.config,
|
||||
self.logger,
|
||||
self.this_node,
|
||||
plugin_script.PLUGIN_NAME,
|
||||
)
|
||||
self.all_plugins.append(plugin)
|
||||
self.all_plugin_names.append(plugin.plugin_name)
|
||||
|
||||
# Create plugin key
|
||||
self.zkhandler.write(
|
||||
[
|
||||
(
|
||||
(
|
||||
"node.monitoring.data",
|
||||
self.this_node.name,
|
||||
"monitoring_plugin.name",
|
||||
plugin.plugin_name,
|
||||
),
|
||||
plugin.plugin_name,
|
||||
),
|
||||
(
|
||||
(
|
||||
"node.monitoring.data",
|
||||
self.this_node.name,
|
||||
"monitoring_plugin.last_run",
|
||||
plugin.plugin_name,
|
||||
),
|
||||
"0",
|
||||
),
|
||||
(
|
||||
(
|
||||
"node.monitoring.data",
|
||||
self.this_node.name,
|
||||
"monitoring_plugin.health_delta",
|
||||
plugin.plugin_name,
|
||||
),
|
||||
"0",
|
||||
),
|
||||
(
|
||||
(
|
||||
"node.monitoring.data",
|
||||
self.this_node.name,
|
||||
"monitoring_plugin.message",
|
||||
plugin.plugin_name,
|
||||
),
|
||||
"Initializing",
|
||||
),
|
||||
(
|
||||
(
|
||||
"node.monitoring.data",
|
||||
self.this_node.name,
|
||||
"monitoring_plugin.data",
|
||||
plugin.plugin_name,
|
||||
),
|
||||
dumps({}),
|
||||
),
|
||||
(
|
||||
(
|
||||
"node.monitoring.data",
|
||||
self.this_node.name,
|
||||
"monitoring_plugin.runtime",
|
||||
plugin.plugin_name,
|
||||
),
|
||||
"0.00",
|
||||
),
|
||||
]
|
||||
)
|
||||
self.logger.out(
|
||||
f"Successfully loaded monitoring plugin '{plugin.plugin_name}'",
|
||||
state="o",
|
||||
)
|
||||
except Exception as e:
|
||||
self.logger.out(
|
||||
f"Failed to load monitoring plugin: {e}",
|
||||
state="w",
|
||||
)
|
||||
|
||||
self.zkhandler.write(
|
||||
[
|
||||
(
|
||||
("node.monitoring.plugins", self.this_node.name),
|
||||
" ".join(self.all_plugin_names),
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
# Clean up any old plugin data for which a plugin file no longer exists
|
||||
for plugin_key in self.zkhandler.children(
|
||||
("node.monitoring.data", self.this_node.name)
|
||||
):
|
||||
if plugin_key not in self.all_plugin_names:
|
||||
self.zkhandler.delete(
|
||||
(
|
||||
"node.monitoring.data",
|
||||
self.this_node.name,
|
||||
"monitoring_plugin",
|
||||
plugin_key,
|
||||
)
|
||||
)
|
||||
|
||||
def run_plugin(self, plugin):
|
||||
time_start = datetime.now()
|
||||
result = plugin.run()
|
||||
time_end = datetime.now()
|
||||
time_delta = time_end - time_start
|
||||
runtime = "{:0.02f}".format(time_delta.total_seconds())
|
||||
result.set_runtime(runtime)
|
||||
result.to_zookeeper()
|
||||
return result
|
||||
|
||||
def run_plugins(self):
|
||||
total_health = 100
|
||||
self.logger.out(
|
||||
f"Running monitoring plugins: {', '.join([x.plugin_name for x in self.all_plugins])}",
|
||||
state="t",
|
||||
)
|
||||
plugin_results = list()
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=99) as executor:
|
||||
to_future_plugin_results = {
|
||||
executor.submit(self.run_plugin, plugin): plugin
|
||||
for plugin in self.all_plugins
|
||||
}
|
||||
for future in concurrent.futures.as_completed(to_future_plugin_results):
|
||||
plugin_results.append(future.result())
|
||||
|
||||
for result in sorted(plugin_results, key=lambda x: x.plugin_name):
|
||||
self.logger.out(
|
||||
result.message,
|
||||
state="t",
|
||||
prefix=f"{result.plugin_name} ({result.runtime}s)",
|
||||
)
|
||||
if result is not None:
|
||||
total_health -= result.health_delta
|
||||
|
||||
if total_health > 90:
|
||||
health_colour = self.logger.fmt_green
|
||||
elif total_health > 50:
|
||||
health_colour = self.logger.fmt_yellow
|
||||
else:
|
||||
health_colour = self.logger.fmt_red
|
||||
|
||||
self.zkhandler.write(
|
||||
[
|
||||
(
|
||||
("node.monitoring.health", self.this_node.name),
|
||||
total_health,
|
||||
),
|
||||
]
|
||||
)
|
||||
self.logger.out(
|
||||
f"System health: {health_colour}{total_health}/100{self.logger.fmt_end}",
|
||||
state="t",
|
||||
)
|
||||
|
||||
def run_cleanup(self, plugin):
|
||||
return plugin.cleanup()
|
||||
|
||||
def run_cleanups(self):
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=99) as executor:
|
||||
to_future_plugin_results = {
|
||||
executor.submit(self.run_cleanup, plugin): plugin
|
||||
for plugin in self.all_plugins
|
||||
}
|
||||
for future in concurrent.futures.as_completed(to_future_plugin_results):
|
||||
# This doesn't do anything, just lets us wait for them all to complete
|
||||
pass
|
||||
# Set the node health to None as no previous checks are now valid
|
||||
self.zkhandler.write(
|
||||
[
|
||||
(
|
||||
("node.monitoring.health", self.this_node.name),
|
||||
None,
|
||||
),
|
||||
]
|
||||
)
|
@ -180,6 +180,9 @@ def get_configuration():
|
||||
raise MalformedConfigurationError(e)
|
||||
|
||||
config_directories = {
|
||||
"plugin_directory": o_directories.get(
|
||||
"plugin_directory", "/usr/share/pvc/plugins"
|
||||
),
|
||||
"dynamic_directory": o_directories.get("dynamic_directory", None),
|
||||
"log_directory": o_directories.get("log_directory", None),
|
||||
"console_log_directory": o_directories.get("console_log_directory", None),
|
||||
|
@ -51,7 +51,7 @@ libvirt_vm_states = {
|
||||
}
|
||||
|
||||
|
||||
def start_keepalive_timer(logger, config, zkhandler, this_node):
|
||||
def start_keepalive_timer(logger, config, zkhandler, this_node, monitoring_instance):
|
||||
keepalive_interval = config["keepalive_interval"]
|
||||
logger.out(
|
||||
f"Starting keepalive timer ({keepalive_interval} second interval)", state="s"
|
||||
@ -59,7 +59,7 @@ def start_keepalive_timer(logger, config, zkhandler, this_node):
|
||||
keepalive_timer = BackgroundScheduler()
|
||||
keepalive_timer.add_job(
|
||||
node_keepalive,
|
||||
args=(logger, config, zkhandler, this_node),
|
||||
args=(logger, config, zkhandler, this_node, monitoring_instance),
|
||||
trigger="interval",
|
||||
seconds=keepalive_interval,
|
||||
)
|
||||
@ -97,29 +97,6 @@ def collect_ceph_stats(logger, config, zkhandler, this_node, queue):
|
||||
logger.out("Failed to open connection to Ceph cluster: {}".format(e), state="e")
|
||||
return
|
||||
|
||||
if debug:
|
||||
logger.out("Getting health stats from monitor", state="d", prefix="ceph-thread")
|
||||
|
||||
# Get Ceph cluster health for local status output
|
||||
command = {"prefix": "health", "format": "json"}
|
||||
try:
|
||||
health_status = json.loads(
|
||||
ceph_conn.mon_command(json.dumps(command), b"", timeout=1)[1]
|
||||
)
|
||||
ceph_health = health_status["status"]
|
||||
except Exception as e:
|
||||
logger.out("Failed to obtain Ceph health data: {}".format(e), state="e")
|
||||
ceph_health = "HEALTH_UNKN"
|
||||
|
||||
if ceph_health in ["HEALTH_OK"]:
|
||||
ceph_health_colour = logger.fmt_green
|
||||
elif ceph_health in ["HEALTH_UNKN"]:
|
||||
ceph_health_colour = logger.fmt_cyan
|
||||
elif ceph_health in ["HEALTH_WARN"]:
|
||||
ceph_health_colour = logger.fmt_yellow
|
||||
else:
|
||||
ceph_health_colour = logger.fmt_red
|
||||
|
||||
# Primary-only functions
|
||||
if this_node.router_state == "primary":
|
||||
if debug:
|
||||
@ -408,8 +385,6 @@ def collect_ceph_stats(logger, config, zkhandler, this_node, queue):
|
||||
|
||||
ceph_conn.shutdown()
|
||||
|
||||
queue.put(ceph_health_colour)
|
||||
queue.put(ceph_health)
|
||||
queue.put(osds_this_node)
|
||||
|
||||
if debug:
|
||||
@ -648,7 +623,7 @@ def collect_vm_stats(logger, config, zkhandler, this_node, queue):
|
||||
|
||||
|
||||
# Keepalive update function
|
||||
def node_keepalive(logger, config, zkhandler, this_node):
|
||||
def node_keepalive(logger, config, zkhandler, this_node, monitoring_instance):
|
||||
debug = config["debug"]
|
||||
if debug:
|
||||
logger.out("Keepalive starting", state="d", prefix="main-thread")
|
||||
@ -777,16 +752,14 @@ def node_keepalive(logger, config, zkhandler, this_node):
|
||||
|
||||
if config["enable_storage"]:
|
||||
try:
|
||||
ceph_health_colour = ceph_thread_queue.get(
|
||||
timeout=config["keepalive_interval"]
|
||||
osds_this_node = ceph_thread_queue.get(
|
||||
timeout=(config["keepalive_interval"] - 1)
|
||||
)
|
||||
ceph_health = ceph_thread_queue.get(timeout=config["keepalive_interval"])
|
||||
osds_this_node = ceph_thread_queue.get(timeout=config["keepalive_interval"])
|
||||
except Exception:
|
||||
logger.out("Ceph stats queue get exceeded timeout, continuing", state="w")
|
||||
ceph_health_colour = logger.fmt_cyan
|
||||
ceph_health = "UNKNOWN"
|
||||
osds_this_node = "?"
|
||||
else:
|
||||
osds_this_node = "0"
|
||||
|
||||
# Set our information in zookeeper
|
||||
keepalive_time = int(time.time())
|
||||
@ -839,8 +812,8 @@ def node_keepalive(logger, config, zkhandler, this_node):
|
||||
if config["log_keepalive_cluster_details"]:
|
||||
logger.out(
|
||||
"{bold}Maintenance:{nofmt} {maint} "
|
||||
"{bold}Active VMs:{nofmt} {domcount} "
|
||||
"{bold}Networks:{nofmt} {netcount} "
|
||||
"{bold}Node VMs:{nofmt} {domcount} "
|
||||
"{bold}Node OSDs:{nofmt} {osdcount} "
|
||||
"{bold}Load:{nofmt} {load} "
|
||||
"{bold}Memory [MiB]: VMs:{nofmt} {allocmem} "
|
||||
"{bold}Used:{nofmt} {usedmem} "
|
||||
@ -849,7 +822,7 @@ def node_keepalive(logger, config, zkhandler, this_node):
|
||||
nofmt=logger.fmt_end,
|
||||
maint=this_node.maintenance,
|
||||
domcount=this_node.domains_count,
|
||||
netcount=len(zkhandler.children("base.network")),
|
||||
osdcount=osds_this_node,
|
||||
load=this_node.cpuload,
|
||||
freemem=this_node.memfree,
|
||||
usedmem=this_node.memused,
|
||||
@ -857,22 +830,6 @@ def node_keepalive(logger, config, zkhandler, this_node):
|
||||
),
|
||||
state="t",
|
||||
)
|
||||
if config["enable_storage"] and config["log_keepalive_storage_details"]:
|
||||
logger.out(
|
||||
"{bold}Ceph cluster status:{nofmt} {health_colour}{health}{nofmt} "
|
||||
"{bold}Total OSDs:{nofmt} {total_osds} "
|
||||
"{bold}Node OSDs:{nofmt} {node_osds} "
|
||||
"{bold}Pools:{nofmt} {total_pools} ".format(
|
||||
bold=logger.fmt_bold,
|
||||
health_colour=ceph_health_colour,
|
||||
nofmt=logger.fmt_end,
|
||||
health=ceph_health,
|
||||
total_osds=len(zkhandler.children("base.osd")),
|
||||
node_osds=osds_this_node,
|
||||
total_pools=len(zkhandler.children("base.pool")),
|
||||
),
|
||||
state="t",
|
||||
)
|
||||
|
||||
# Look for dead nodes and fence them
|
||||
if not this_node.maintenance:
|
||||
@ -918,5 +875,7 @@ def node_keepalive(logger, config, zkhandler, this_node):
|
||||
[(("node.state.daemon", node_name), "dead")]
|
||||
)
|
||||
|
||||
monitoring_instance.run_plugins()
|
||||
|
||||
if debug:
|
||||
logger.out("Keepalive finished", state="d", prefix="main-thread")
|
||||
|
Reference in New Issue
Block a user