Compare commits
293 Commits
v0.9.59
...
25f3faa08f
| Author | SHA1 | Date | |
|---|---|---|---|
| 25f3faa08f | |||
| 3ad6ff2d9c | |||
| c7c47d9f86 | |||
| 3c5a5f08bc | |||
| 59b2dbeb5e | |||
| 0b8d26081b | |||
| f076554b15 | |||
| 35f5219916 | |||
| f7eaa11a5f | |||
| 924a0b22ec | |||
| 6a5f54d169 | |||
| 7741400370 | |||
| 5eafa475b9 | |||
| f3ba4b6294 | |||
| faf9cc537f | |||
| a28df75a5d | |||
| 13dab7a285 | |||
| f89dbe802e | |||
| d63e80675a | |||
| 263f3570ab | |||
| 90f9336041 | |||
| 5415985ed2 | |||
| 3384f24ef5 | |||
| ef3c22d793 | |||
| 078f85b431 | |||
| bfb363c459 | |||
| 13e6a0f0bd | |||
| c1302cf8b6 | |||
| 9358949991 | |||
| cd0b8c23e6 | |||
| fb30263a41 | |||
| 172e3627d4 | |||
| 53ffe6cd55 | |||
| df6e11ae7a | |||
| de2135db42 | |||
| 72e093c2c4 | |||
| 60e32f7795 | |||
| 23e7d84f53 | |||
| dd81594f26 | |||
| 0d09f5d089 | |||
| 365c70e873 | |||
| 4f7e2fe146 | |||
| 77f49654b9 | |||
| c158e4e0f5 | |||
| 31a5c8801f | |||
| 0a4e4c7048 | |||
| de97f2f476 | |||
| 165ce15dfe | |||
| a81d419a2e | |||
| 85a7088e5a | |||
| b58fa06f67 | |||
| 3b3d2e7f7e | |||
| 72a5de800c | |||
| f450d1d313 | |||
| 2db58488a2 | |||
| 1bbf8f6bf6 | |||
| 191f8780c9 | |||
| 80c1f78864 | |||
| c8c0987fe7 | |||
| 67560c6457 | |||
| 79c9eba28c | |||
| 36e924d339 | |||
| aeb1443410 | |||
| eccd2a98b2 | |||
| 6e2c1fb45e | |||
| b14ba9172c | |||
| e9235a627c | |||
| c84ee0f4f1 | |||
| 76c51460b0 | |||
| 6ed37f5b4a | |||
| 4b41ee2817 | |||
| dc36c40690 | |||
| 459b16386b | |||
| 6146b062d6 | |||
| 74193c7e2a | |||
| 73c1ac732e | |||
| 58dd5830eb | |||
| 90e515c46f | |||
| a6a5f71226 | |||
| 60a3ef1604 | |||
| 95807b23eb | |||
| 5ae430e1c5 | |||
| 4731faa2f0 | |||
| 42f4907dec | |||
| 02168a5ecf | |||
| 8cfcd02ac2 | |||
| e464dcb483 | |||
| 27214c8190 | |||
| f78669a175 | |||
| 00a4a01517 | |||
| a40a69816d | |||
| baf5a132ff | |||
| 584cb95b8d | |||
| 21bbb0393f | |||
| d18e009b00 | |||
| 1f8f3252a6 | |||
| b47c9832b7 | |||
| d2757004db | |||
| 7323269775 | |||
| 85463f9aec | |||
| 19c37c3ed5 | |||
| 7d2ea494e7 | |||
| cb50eee2a9 | |||
| f3f4eaadf1 | |||
| 313a5d1c7d | |||
| b6d689b769 | |||
| a0fccf83f7 | |||
| 46896c593e | |||
| 02138974fa | |||
| c3d255be65 | |||
| 45fc8a47a3 | |||
| 07f2006f68 | |||
| f4c7fdffb8 | |||
| be1b67b8f0 | |||
| d68f6a945e | |||
| c776aba8b3 | |||
| 2461941421 | |||
| 68954a79ec | |||
| a2fa6ed450 | |||
| 02a2f6a27a | |||
| a75b951605 | |||
| 658e80350f | |||
| 3aa20fbaa3 | |||
| 6d101df1ff | |||
| be6a3992c1 | |||
| d76da0f25a | |||
| bc722ce9b8 | |||
| 7890c32c59 | |||
| 6febcfdd97 | |||
| 11d8ce70cd | |||
| a17d9439c0 | |||
| 9cd02eb148 | |||
| 459485c202 | |||
| 9f92d5d822 | |||
| 947ac561c8 | |||
| ca143c1968 | |||
| 6e110b178c | |||
| d07d37d08e | |||
| 0639b16c86 | |||
| 1cf8706a52 | |||
| dd8f07526f | |||
| 5a5e5da663 | |||
| 739b60b91e | |||
| 16544227eb | |||
| 73e3746885 | |||
| 66230ce971 | |||
| fbfbd70461 | |||
| 2506098223 | |||
| 83e887c4ee | |||
| 4eb0f3bb8a | |||
| adc767e32f | |||
| 2083fd824a | |||
| 3aa74a3940 | |||
| 71d94bbeab | |||
| 718f689df9 | |||
| 268b5c0b86 | |||
| b016b9bf3d | |||
| 7604b9611f | |||
| b21278fd80 | |||
| 3b02034b70 | |||
| c7a5b41b1e | |||
| 48b0091d3e | |||
| 2e94516ee2 | |||
| d7f26b27ea | |||
| 872f35a7ee | |||
| 52c3e8ced3 | |||
| 1d7acf62bf | |||
| c790c331a7 | |||
| 23165482df | |||
| 057071a7b7 | |||
| 554fa9f412 | |||
| 5a5f924268 | |||
| cc309fc021 | |||
| 5f783f1663 | |||
| bc89bb5b68 | |||
| eb233ef588 | |||
| d3efb54cb4 | |||
| da15357c8a | |||
| b6939a28c0 | |||
| a1da479a4c | |||
| ace4082820 | |||
| 4036af6045 | |||
| f96de97861 | |||
| 04cad46305 | |||
| e9dea4d2d1 | |||
| 39fd85fcc3 | |||
| cbbab46b55 | |||
| d1f2ce0b0a | |||
| 2f01edca14 | |||
| 12a3a3a6a6 | |||
| c44732be83 | |||
| a8b68e0968 | |||
| e59152afee | |||
| 56021c443a | |||
| ebdea165f1 | |||
| fb0651fb05 | |||
| 35e7e11403 | |||
| b7555468eb | |||
| f1b4ee02ba | |||
| 4698edc98e | |||
| 40e7e04aad | |||
| 7f074847c4 | |||
| b0b0b75605 | |||
| 89f62318bd | |||
| 925141ed65 | |||
| f7a826bf52 | |||
| e176f3b2f6 | |||
| b339d5e641 | |||
| d476b13cc0 | |||
| ce8b2c22cc | |||
| feab5d3479 | |||
| ee348593c9 | |||
| e403146bcf | |||
| bde684dd3a | |||
| 992e003500 | |||
| eaeb860a83 | |||
| 1198ca9f5c | |||
| e79d200244 | |||
| 5b3bb9f306 | |||
| 5501586a47 | |||
| c160648c5c | |||
| fa37227127 | |||
| 2cac98963c | |||
| 8e50428707 | |||
| a4953bc6ef | |||
| 3c10d57148 | |||
| 26d8551388 | |||
| 57342541dd | |||
| 50f8afd749 | |||
| 3449069e3d | |||
| cb66b16045 | |||
| 8edce74b85 | |||
| e9b69c4124 | |||
| 3948206225 | |||
| a09578fcf5 | |||
| 73be807b84 | |||
| 4a9805578e | |||
| f70f052df1 | |||
| 1e8841ce69 | |||
| 9c7d39d523 | |||
| 011490bcca | |||
| 8de63b2785 | |||
| 8f8f00b2e9 | |||
| 1daab49b50 | |||
| 9f6041b9cf | |||
| 5b27e438a9 | |||
| 3e8a85b029 | |||
| 19ac1e17c3 | |||
| 252175fb6f | |||
| f39b041471 | |||
| 3b41759262 | |||
| e514eed414 | |||
| b81e70ec18 | |||
| c2a473ed8b | |||
| 5355f6ff48 | |||
| bf7823deb5 | |||
| 8ba371723e | |||
| e10ac52116 | |||
| 341073521b | |||
| 16c38da5ef | |||
| c8134d3a1c | |||
| 9f41373324 | |||
| 8e62d5b30b | |||
| 7a8eee244a | |||
| 7df5b8e52e | |||
| 6f96219023 | |||
| 51967e164b | |||
| 7a3a44d47c | |||
| 44491dd988 | |||
| eba142f470 | |||
| 6cef68d157 | |||
| e8caf3369e | |||
| 3e3776a25b | |||
| 6e0d0e264e | |||
| 1855d03a36 | |||
| 1a286dc8dd | |||
| 1b6d10e03a | |||
| 73c96d1e93 | |||
| 5841c98a59 | |||
| bc6395c959 | |||
| d582f87472 | |||
| e9735113af | |||
| 722fd0a65d | |||
| 3b41beb0f3 | |||
| d3392c0282 | |||
| 560c013e95 | |||
| 384c6320ef | |||
| 445dec1c38 | |||
| 534c7cd7f0 | |||
| 4014ef7714 | |||
| 180f0445ac | |||
|
|
074664d4c1 | ||
|
|
418ac23d40 |
10
CHANGELOG.md
10
CHANGELOG.md
@@ -1,5 +1,15 @@
|
|||||||
## PVC Changelog
|
## PVC Changelog
|
||||||
|
|
||||||
|
###### [v0.9.61](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.61)
|
||||||
|
|
||||||
|
* [provisioner] Fixes a bug in network comparison
|
||||||
|
* [api] Fixes a bug being unable to rename disabled VMs
|
||||||
|
|
||||||
|
###### [v0.9.60](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.60)
|
||||||
|
|
||||||
|
* [Provisioner] Cleans up several remaining bugs in the example scripts; they should all be valid now
|
||||||
|
* [Provisioner] Adjust default libvirt schema to disable RBD caching for a 2x+ performance boost
|
||||||
|
|
||||||
###### [v0.9.59](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.59)
|
###### [v0.9.59](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.59)
|
||||||
|
|
||||||
* [API] Flips the mem(prov) and mem(free) selectors making mem(free) the default for "mem" and "memprov" explicit
|
* [API] Flips the mem(prov) and mem(free) selectors making mem(free) the default for "mem" and "memprov" explicit
|
||||||
|
|||||||
@@ -398,7 +398,7 @@ class VMBuilderScript(VMBuilder):
|
|||||||
if volume.get("source_volume") is not None:
|
if volume.get("source_volume") is not None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if volume.get("filesystem") is None:
|
if volume.get("filesystem") is None or volume.get("filesystem") == "swap":
|
||||||
continue
|
continue
|
||||||
|
|
||||||
mapped_dst_volume = f"/dev/rbd/{dst_volume}"
|
mapped_dst_volume = f"/dev/rbd/{dst_volume}"
|
||||||
@@ -473,7 +473,7 @@ class VMBuilderScript(VMBuilder):
|
|||||||
]
|
]
|
||||||
|
|
||||||
# We need to know our root disk for later GRUB-ing
|
# We need to know our root disk for later GRUB-ing
|
||||||
root_disk = None
|
root_volume = None
|
||||||
for volume in volumes:
|
for volume in volumes:
|
||||||
if volume["mountpoint"] == "/":
|
if volume["mountpoint"] == "/":
|
||||||
root_volume = volume
|
root_volume = volume
|
||||||
@@ -725,6 +725,7 @@ GRUB_DISABLE_LINUX_UUID=false
|
|||||||
if (
|
if (
|
||||||
volume.get("source_volume") is None
|
volume.get("source_volume") is None
|
||||||
and volume.get("filesystem") is not None
|
and volume.get("filesystem") is not None
|
||||||
|
and volume.get("filesystem") != "swap"
|
||||||
):
|
):
|
||||||
# Unmount filesystem
|
# Unmount filesystem
|
||||||
retcode, stdout, stderr = pvc_common.run_os_command(
|
retcode, stdout, stderr = pvc_common.run_os_command(
|
||||||
|
|||||||
@@ -20,7 +20,7 @@
|
|||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
# This script provides an example of a PVC provisioner script. It will create a
|
# This script provides an example of a PVC provisioner script. It will create a
|
||||||
# standard VM config and install a RHEL-like OS using rinse.
|
# standard VM config and install a RHEL 8+ or similar OS using rinse.
|
||||||
|
|
||||||
# This script can thus be used as an example or reference implementation of a
|
# This script can thus be used as an example or reference implementation of a
|
||||||
# PVC provisioner script and expanded upon as required.
|
# PVC provisioner script and expanded upon as required.
|
||||||
@@ -398,7 +398,7 @@ class VMBuilderScript(VMBuilder):
|
|||||||
if volume.get("source_volume") is not None:
|
if volume.get("source_volume") is not None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if volume.get("filesystem") is None:
|
if volume.get("filesystem") is None or volume.get("filesystem") == "swap":
|
||||||
continue
|
continue
|
||||||
|
|
||||||
mapped_dst_volume = f"/dev/rbd/{dst_volume}"
|
mapped_dst_volume = f"/dev/rbd/{dst_volume}"
|
||||||
@@ -487,7 +487,7 @@ class VMBuilderScript(VMBuilder):
|
|||||||
post_packages = ["cloud-init"]
|
post_packages = ["cloud-init"]
|
||||||
|
|
||||||
# We need to know our root disk for later GRUB-ing
|
# We need to know our root disk for later GRUB-ing
|
||||||
root_disk = None
|
root_volume = None
|
||||||
for volume in volumes:
|
for volume in volumes:
|
||||||
if volume["mountpoint"] == "/":
|
if volume["mountpoint"] == "/":
|
||||||
root_volume = volume
|
root_volume = volume
|
||||||
@@ -571,21 +571,6 @@ class VMBuilderScript(VMBuilder):
|
|||||||
with open(hostname_file, "w") as fh:
|
with open(hostname_file, "w") as fh:
|
||||||
fh.write("{}".format(vm_name))
|
fh.write("{}".format(vm_name))
|
||||||
|
|
||||||
# Fix the cloud-init.target since it's broken by default
|
|
||||||
cloudinit_target_file = "{}/etc/systemd/system/cloud-init.target".format(
|
|
||||||
temporary_directory
|
|
||||||
)
|
|
||||||
with open(cloudinit_target_file, "w") as fh:
|
|
||||||
# We lose our indent on these raw blocks to preserve the apperance of the files
|
|
||||||
# inside the VM itself
|
|
||||||
data = """[Install]
|
|
||||||
WantedBy=multi-user.target
|
|
||||||
[Unit]
|
|
||||||
Description=Cloud-init target
|
|
||||||
After=multi-user.target
|
|
||||||
"""
|
|
||||||
fh.write(data)
|
|
||||||
|
|
||||||
# Due to device ordering within the Libvirt XML configuration, the first Ethernet interface
|
# Due to device ordering within the Libvirt XML configuration, the first Ethernet interface
|
||||||
# will always be on PCI bus ID 2, hence the name "ens2".
|
# will always be on PCI bus ID 2, hence the name "ens2".
|
||||||
# Write a DHCP stanza for ens2
|
# Write a DHCP stanza for ens2
|
||||||
@@ -682,11 +667,6 @@ GRUB_SERIAL_COMMAND="serial --speed=115200 --unit=0 --word=8 --parity=no --stop=
|
|||||||
# Set the timezone to UTC
|
# Set the timezone to UTC
|
||||||
os.system("ln -sf ../usr/share/zoneinfo/UTC /etc/localtime")
|
os.system("ln -sf ../usr/share/zoneinfo/UTC /etc/localtime")
|
||||||
|
|
||||||
# Unmount the bound devfs and sysfs
|
|
||||||
os.system("umount {}/dev".format(temporary_directory))
|
|
||||||
os.system("umount {}/sys".format(temporary_directory))
|
|
||||||
os.system("umount {}/proc".format(temporary_directory))
|
|
||||||
|
|
||||||
def cleanup(self):
|
def cleanup(self):
|
||||||
"""
|
"""
|
||||||
cleanup(): Perform any cleanup required due to prepare()/install()
|
cleanup(): Perform any cleanup required due to prepare()/install()
|
||||||
@@ -700,6 +680,7 @@ GRUB_SERIAL_COMMAND="serial --speed=115200 --unit=0 --word=8 --parity=no --stop=
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# Run any imports first
|
# Run any imports first
|
||||||
|
import os
|
||||||
from pvcapid.vmbuilder import open_zk
|
from pvcapid.vmbuilder import open_zk
|
||||||
from pvcapid.Daemon import config
|
from pvcapid.Daemon import config
|
||||||
import daemon_lib.common as pvc_common
|
import daemon_lib.common as pvc_common
|
||||||
@@ -708,6 +689,11 @@ GRUB_SERIAL_COMMAND="serial --speed=115200 --unit=0 --word=8 --parity=no --stop=
|
|||||||
# Set the tempdir we used in the prepare() and install() steps
|
# Set the tempdir we used in the prepare() and install() steps
|
||||||
temp_dir = "/tmp/target"
|
temp_dir = "/tmp/target"
|
||||||
|
|
||||||
|
# Unmount the bound devfs and sysfs
|
||||||
|
os.system(f"umount {temp_dir}/dev")
|
||||||
|
os.system(f"umount {temp_dir}/sys")
|
||||||
|
os.system(f"umount {temp_dir}/proc")
|
||||||
|
|
||||||
# Use this construct for reversing the list, as the normal reverse() messes with the list
|
# Use this construct for reversing the list, as the normal reverse() messes with the list
|
||||||
for volume in list(reversed(self.vm_data["volumes"])):
|
for volume in list(reversed(self.vm_data["volumes"])):
|
||||||
dst_volume_name = f"{self.vm_name}_{volume['disk_id']}"
|
dst_volume_name = f"{self.vm_name}_{volume['disk_id']}"
|
||||||
@@ -718,6 +704,7 @@ GRUB_SERIAL_COMMAND="serial --speed=115200 --unit=0 --word=8 --parity=no --stop=
|
|||||||
if (
|
if (
|
||||||
volume.get("source_volume") is None
|
volume.get("source_volume") is None
|
||||||
and volume.get("filesystem") is not None
|
and volume.get("filesystem") is not None
|
||||||
|
and volume.get("filesystem") != "swap"
|
||||||
):
|
):
|
||||||
# Unmount filesystem
|
# Unmount filesystem
|
||||||
retcode, stdout, stderr = pvc_common.run_os_command(
|
retcode, stdout, stderr = pvc_common.run_os_command(
|
||||||
@@ -728,14 +715,14 @@ GRUB_SERIAL_COMMAND="serial --speed=115200 --unit=0 --word=8 --parity=no --stop=
|
|||||||
f"Failed to unmount '{mapped_dst_volume}' on '{mount_path}': {stderr}"
|
f"Failed to unmount '{mapped_dst_volume}' on '{mount_path}': {stderr}"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Unmap volume
|
# Unmap volume
|
||||||
with open_zk(config) as zkhandler:
|
with open_zk(config) as zkhandler:
|
||||||
success, message = pvc_ceph.unmap_volume(
|
success, message = pvc_ceph.unmap_volume(
|
||||||
zkhandler,
|
zkhandler,
|
||||||
volume["pool"],
|
volume["pool"],
|
||||||
dst_volume_name,
|
dst_volume_name,
|
||||||
)
|
)
|
||||||
if not success:
|
if not success:
|
||||||
raise ProvisioningError(
|
raise ProvisioningError(
|
||||||
f"Failed to unmap '{mapped_dst_volume}': {stderr}"
|
f"Failed to unmap '{mapped_dst_volume}': {stderr}"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ from ssl import SSLContext, TLSVersion
|
|||||||
from distutils.util import strtobool as dustrtobool
|
from distutils.util import strtobool as dustrtobool
|
||||||
|
|
||||||
# Daemon version
|
# Daemon version
|
||||||
version = "0.9.59"
|
version = "0.9.61"
|
||||||
|
|
||||||
# API version
|
# API version
|
||||||
API_VERSION = 1.0
|
API_VERSION = 1.0
|
||||||
|
|||||||
@@ -100,7 +100,7 @@ devices_scsi_controller = """ <controller type='scsi' index='0' model='virtio
|
|||||||
# * vm_name
|
# * vm_name
|
||||||
# * disk_id
|
# * disk_id
|
||||||
devices_disk_header = """ <disk type='network' device='disk'>
|
devices_disk_header = """ <disk type='network' device='disk'>
|
||||||
<driver name='qemu' discard='unmap'/>
|
<driver name='qemu' discard='unmap' cache='none'/>
|
||||||
<target dev='{disk_id}' bus='scsi'/>
|
<target dev='{disk_id}' bus='scsi'/>
|
||||||
<auth username='libvirt'>
|
<auth username='libvirt'>
|
||||||
<secret type='ceph' uuid='{ceph_storage_secret}'/>
|
<secret type='ceph' uuid='{ceph_storage_secret}'/>
|
||||||
|
|||||||
@@ -580,7 +580,7 @@ def delete_template_network_element(name, vni):
|
|||||||
networks, code = list_template_network_vnis(name)
|
networks, code = list_template_network_vnis(name)
|
||||||
found_vni = False
|
found_vni = False
|
||||||
for network in networks:
|
for network in networks:
|
||||||
if network["vni"] == int(vni):
|
if network["vni"] == vni:
|
||||||
found_vni = True
|
found_vni = True
|
||||||
if not found_vni:
|
if not found_vni:
|
||||||
retmsg = {
|
retmsg = {
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ from setuptools import setup
|
|||||||
|
|
||||||
setup(
|
setup(
|
||||||
name="pvc",
|
name="pvc",
|
||||||
version="0.9.59",
|
version="0.9.61",
|
||||||
packages=["pvc", "pvc.cli_lib"],
|
packages=["pvc", "pvc.cli_lib"],
|
||||||
install_requires=[
|
install_requires=[
|
||||||
"Click",
|
"Click",
|
||||||
|
|||||||
1
daemon-common/migrations/versions/9.json
Normal file
1
daemon-common/migrations/versions/9.json
Normal file
@@ -0,0 +1 @@
|
|||||||
|
{"version": "9", "root": "", "base": {"root": "", "schema": "/schema", "schema.version": "/schema/version", "config": "/config", "config.maintenance": "/config/maintenance", "config.primary_node": "/config/primary_node", "config.primary_node.sync_lock": "/config/primary_node/sync_lock", "config.upstream_ip": "/config/upstream_ip", "config.migration_target_selector": "/config/migration_target_selector", "cmd": "/cmd", "cmd.node": "/cmd/nodes", "cmd.domain": "/cmd/domains", "cmd.ceph": "/cmd/ceph", "logs": "/logs", "node": "/nodes", "domain": "/domains", "network": "/networks", "storage": "/ceph", "storage.util": "/ceph/util", "osd": "/ceph/osds", "pool": "/ceph/pools", "volume": "/ceph/volumes", "snapshot": "/ceph/snapshots"}, "logs": {"node": "", "messages": "/messages"}, "node": {"name": "", "keepalive": "/keepalive", "mode": "/daemonmode", "data.active_schema": "/activeschema", "data.latest_schema": "/latestschema", "data.static": "/staticdata", "data.pvc_version": "/pvcversion", "running_domains": "/runningdomains", "count.provisioned_domains": "/domainscount", "count.networks": "/networkscount", "state.daemon": "/daemonstate", "state.router": "/routerstate", "state.domain": "/domainstate", "cpu.load": "/cpuload", "vcpu.allocated": "/vcpualloc", "memory.total": "/memtotal", "memory.used": "/memused", "memory.free": "/memfree", "memory.allocated": "/memalloc", "memory.provisioned": "/memprov", "ipmi.hostname": "/ipmihostname", "ipmi.username": "/ipmiusername", "ipmi.password": "/ipmipassword", "sriov": "/sriov", "sriov.pf": "/sriov/pf", "sriov.vf": "/sriov/vf", "monitoring.plugins": "/monitoring_plugins", "monitoring.data": "/monitoring_data"}, "monitoring_plugin": {"name": "", "last_run": "/last_run", "health_delta": "/health_delta", "message": "/message", "data": "/data", "runtime": "/runtime"}, "sriov_pf": {"phy": "", "mtu": "/mtu", "vfcount": "/vfcount"}, "sriov_vf": {"phy": "", "pf": "/pf", "mtu": "/mtu", "mac": "/mac", "phy_mac": "/phy_mac", "config": "/config", "config.vlan_id": "/config/vlan_id", "config.vlan_qos": "/config/vlan_qos", "config.tx_rate_min": "/config/tx_rate_min", "config.tx_rate_max": "/config/tx_rate_max", "config.spoof_check": "/config/spoof_check", "config.link_state": "/config/link_state", "config.trust": "/config/trust", "config.query_rss": "/config/query_rss", "pci": "/pci", "pci.domain": "/pci/domain", "pci.bus": "/pci/bus", "pci.slot": "/pci/slot", "pci.function": "/pci/function", "used": "/used", "used_by": "/used_by"}, "domain": {"name": "", "xml": "/xml", "state": "/state", "profile": "/profile", "stats": "/stats", "node": "/node", "last_node": "/lastnode", "failed_reason": "/failedreason", "storage.volumes": "/rbdlist", "console.log": "/consolelog", "console.vnc": "/vnc", "meta.autostart": "/node_autostart", "meta.migrate_method": "/migration_method", "meta.node_selector": "/node_selector", "meta.node_limit": "/node_limit", "meta.tags": "/tags", "migrate.sync_lock": "/migrate_sync_lock"}, "tag": {"name": "", "type": "/type", "protected": "/protected"}, "network": {"vni": "", "type": "/nettype", "mtu": "/mtu", "rule": "/firewall_rules", "rule.in": "/firewall_rules/in", "rule.out": "/firewall_rules/out", "nameservers": "/name_servers", "domain": "/domain", "reservation": "/dhcp4_reservations", "lease": "/dhcp4_leases", "ip4.gateway": "/ip4_gateway", "ip4.network": "/ip4_network", "ip4.dhcp": "/dhcp4_flag", "ip4.dhcp_start": "/dhcp4_start", "ip4.dhcp_end": "/dhcp4_end", "ip6.gateway": "/ip6_gateway", "ip6.network": "/ip6_network", "ip6.dhcp": "/dhcp6_flag"}, "reservation": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname"}, "lease": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname", "expiry": "/expiry", "client_id": "/clientid"}, "rule": {"description": "", "rule": "/rule", "order": "/order"}, "osd": {"id": "", "node": "/node", "device": "/device", "db_device": "/db_device", "fsid": "/fsid", "ofsid": "/fsid/osd", "cfsid": "/fsid/cluster", "lvm": "/lvm", "vg": "/lvm/vg", "lv": "/lvm/lv", "stats": "/stats"}, "pool": {"name": "", "pgs": "/pgs", "tier": "/tier", "stats": "/stats"}, "volume": {"name": "", "stats": "/stats"}, "snapshot": {"name": "", "stats": "/stats"}}
|
||||||
@@ -644,7 +644,7 @@ def rename_vm(zkhandler, domain, new_domain):
|
|||||||
|
|
||||||
# Verify that the VM is in a stopped state; renaming is not supported otherwise
|
# Verify that the VM is in a stopped state; renaming is not supported otherwise
|
||||||
state = zkhandler.read(("domain.state", dom_uuid))
|
state = zkhandler.read(("domain.state", dom_uuid))
|
||||||
if state != "stop":
|
if state not in ["stop", "disable"]:
|
||||||
return (
|
return (
|
||||||
False,
|
False,
|
||||||
'ERROR: VM "{}" is not in stopped state; VMs cannot be renamed while running.'.format(
|
'ERROR: VM "{}" is not in stopped state; VMs cannot be renamed while running.'.format(
|
||||||
|
|||||||
@@ -540,7 +540,7 @@ class ZKHandler(object):
|
|||||||
#
|
#
|
||||||
class ZKSchema(object):
|
class ZKSchema(object):
|
||||||
# Current version
|
# Current version
|
||||||
_version = 8
|
_version = 9
|
||||||
|
|
||||||
# Root for doing nested keys
|
# Root for doing nested keys
|
||||||
_schema_root = ""
|
_schema_root = ""
|
||||||
@@ -608,6 +608,17 @@ class ZKSchema(object):
|
|||||||
"sriov": "/sriov",
|
"sriov": "/sriov",
|
||||||
"sriov.pf": "/sriov/pf",
|
"sriov.pf": "/sriov/pf",
|
||||||
"sriov.vf": "/sriov/vf",
|
"sriov.vf": "/sriov/vf",
|
||||||
|
"monitoring.plugins": "/monitoring_plugins",
|
||||||
|
"monitoring.data": "/monitoring_data",
|
||||||
|
},
|
||||||
|
# The schema of an individual monitoring plugin data entry (/nodes/{node_name}/monitoring_data/{plugin})
|
||||||
|
"monitoring_plugin": {
|
||||||
|
"name": "", # The root key
|
||||||
|
"last_run": "/last_run",
|
||||||
|
"health_delta": "/health_delta",
|
||||||
|
"message": "/message",
|
||||||
|
"data": "/data",
|
||||||
|
"runtime": "/runtime",
|
||||||
},
|
},
|
||||||
# The schema of an individual SR-IOV PF entry (/nodes/{node_name}/sriov/pf/{pf})
|
# The schema of an individual SR-IOV PF entry (/nodes/{node_name}/sriov/pf/{pf})
|
||||||
"sriov_pf": {"phy": "", "mtu": "/mtu", "vfcount": "/vfcount"}, # The root key
|
"sriov_pf": {"phy": "", "mtu": "/mtu", "vfcount": "/vfcount"}, # The root key
|
||||||
@@ -874,9 +885,10 @@ class ZKSchema(object):
|
|||||||
if not zkhandler.zk_conn.exists(nkipath):
|
if not zkhandler.zk_conn.exists(nkipath):
|
||||||
result = False
|
result = False
|
||||||
|
|
||||||
# One might expect child keys under node (specifically, sriov.pf and sriov.vf) to be
|
# One might expect child keys under node (specifically, sriov.pf, sriov.vf,
|
||||||
# managed here as well, but those are created automatically every time pvcnoded starts
|
# monitoring.data) to be managed here as well, but those are created
|
||||||
# and thus never need to be validated or applied.
|
# automatically every time pvcnoded started and thus never need to be validated
|
||||||
|
# or applied.
|
||||||
|
|
||||||
# These two have several children layers that must be parsed through
|
# These two have several children layers that must be parsed through
|
||||||
for elem in ["volume"]:
|
for elem in ["volume"]:
|
||||||
|
|||||||
14
debian/changelog
vendored
14
debian/changelog
vendored
@@ -1,3 +1,17 @@
|
|||||||
|
pvc (0.9.61-0) unstable; urgency=high
|
||||||
|
|
||||||
|
* [provisioner] Fixes a bug in network comparison
|
||||||
|
* [api] Fixes a bug being unable to rename disabled VMs
|
||||||
|
|
||||||
|
-- Joshua M. Boniface <joshua@boniface.me> Wed, 08 Feb 2023 10:08:05 -0500
|
||||||
|
|
||||||
|
pvc (0.9.60-0) unstable; urgency=high
|
||||||
|
|
||||||
|
* [Provisioner] Cleans up several remaining bugs in the example scripts; they should all be valid now
|
||||||
|
* [Provisioner] Adjust default libvirt schema to disable RBD caching for a 2x+ performance boost
|
||||||
|
|
||||||
|
-- Joshua M. Boniface <joshua@boniface.me> Tue, 06 Dec 2022 15:42:55 -0500
|
||||||
|
|
||||||
pvc (0.9.59-0) unstable; urgency=high
|
pvc (0.9.59-0) unstable; urgency=high
|
||||||
|
|
||||||
* [API] Flips the mem(prov) and mem(free) selectors making mem(free) the default for "mem" and "memprov" explicit
|
* [API] Flips the mem(prov) and mem(free) selectors making mem(free) the default for "mem" and "memprov" explicit
|
||||||
|
|||||||
1
debian/pvc-daemon-node.install
vendored
1
debian/pvc-daemon-node.install
vendored
@@ -5,3 +5,4 @@ node-daemon/pvcnoded.service lib/systemd/system
|
|||||||
node-daemon/pvc.target lib/systemd/system
|
node-daemon/pvc.target lib/systemd/system
|
||||||
node-daemon/pvcautoready.service lib/systemd/system
|
node-daemon/pvcautoready.service lib/systemd/system
|
||||||
node-daemon/monitoring usr/share/pvc
|
node-daemon/monitoring usr/share/pvc
|
||||||
|
node-daemon/plugins usr/share/pvc
|
||||||
|
|||||||
126
node-daemon/plugins/ceph
Normal file
126
node-daemon/plugins/ceph
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
# ceph.py - PVC Monitoring example plugin for ceph status
|
||||||
|
# Part of the Parallel Virtual Cluster (PVC) system
|
||||||
|
#
|
||||||
|
# Copyright (C) 2018-2022 Joshua M. Boniface <joshua@boniface.me>
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, version 3.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
# This script provides an example of a PVC monitoring plugin script. It will create
|
||||||
|
# a simple plugin to check the Ceph cluster health for anomalies, and return a health
|
||||||
|
# delta reflective of the overall Ceph status (HEALTH_WARN = 10, HEALTH_ERR = 50).
|
||||||
|
|
||||||
|
# This script can thus be used as an example or reference implementation of a
|
||||||
|
# PVC monitoring pluginscript and expanded upon as required.
|
||||||
|
|
||||||
|
# A monitoring plugin script must implement the class "MonitoringPluginScript" which
|
||||||
|
# extends "MonitoringPlugin", providing the 3 functions indicated. Detailed explanation
|
||||||
|
# of the role of each function is provided in context of the example; see the other
|
||||||
|
# examples for more potential uses.
|
||||||
|
|
||||||
|
# WARNING:
|
||||||
|
#
|
||||||
|
# This script will run in the context of the node daemon keepalives as root.
|
||||||
|
# DO NOT install untrusted, unvetted plugins under any circumstances.
|
||||||
|
|
||||||
|
|
||||||
|
# This import is always required here, as MonitoringPlugin is used by the
|
||||||
|
# MonitoringPluginScript class
|
||||||
|
from pvcnoded.objects.MonitoringInstance import MonitoringPlugin
|
||||||
|
|
||||||
|
|
||||||
|
# A monitoring plugin script must always expose its nice name, which must be identical to
|
||||||
|
# the file name
|
||||||
|
PLUGIN_NAME = "ceph"
|
||||||
|
|
||||||
|
|
||||||
|
# The MonitoringPluginScript class must be named as such, and extend MonitoringPlugin.
|
||||||
|
class MonitoringPluginScript(MonitoringPlugin):
|
||||||
|
def setup(self):
|
||||||
|
"""
|
||||||
|
setup(): Perform special setup steps during node daemon startup
|
||||||
|
|
||||||
|
This step is optional and should be used sparingly.
|
||||||
|
"""
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
"""
|
||||||
|
run(): Perform the check actions and return a PluginResult object
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Run any imports first
|
||||||
|
from rados import Rados
|
||||||
|
from json import loads, dumps
|
||||||
|
|
||||||
|
# Connect to the Ceph cluster
|
||||||
|
try:
|
||||||
|
ceph_conn = Rados(
|
||||||
|
conffile=self.config["ceph_config_file"],
|
||||||
|
conf=dict(keyring=self.config["ceph_admin_keyring"]),
|
||||||
|
)
|
||||||
|
ceph_conn.connect(timeout=1)
|
||||||
|
except Exception as e:
|
||||||
|
self.log(f"Failed to connect to Ceph cluster: {e}", state="e")
|
||||||
|
return self.plugin_result
|
||||||
|
|
||||||
|
# Get the Ceph cluster health
|
||||||
|
try:
|
||||||
|
health_status = loads(
|
||||||
|
ceph_conn.mon_command(dumps({"prefix": "health", "format": "json"}), b"", timeout=1)[1]
|
||||||
|
)
|
||||||
|
ceph_health = health_status["status"]
|
||||||
|
except Exception as e:
|
||||||
|
self.log(f"Failed to get health data from Ceph cluster: {e}", state="e")
|
||||||
|
return self.plugin_result
|
||||||
|
finally:
|
||||||
|
ceph_conn.shutdown()
|
||||||
|
|
||||||
|
# Get a list of error entries in the health status output
|
||||||
|
error_entries = health_status["checks"].keys()
|
||||||
|
|
||||||
|
# Set the health delta based on the errors presented
|
||||||
|
if ceph_health == "HEALTH_ERR":
|
||||||
|
health_delta = 50
|
||||||
|
message = f"Ceph cluster in ERROR state: {', '.join(error_entries)}"
|
||||||
|
elif ceph_health == "HEALTH_WARN":
|
||||||
|
health_delta = 10
|
||||||
|
message = f"Ceph cluster in WARNING state: {', '.join(error_entries)}"
|
||||||
|
else:
|
||||||
|
health_delta = 0
|
||||||
|
message = "Ceph cluster in OK state"
|
||||||
|
|
||||||
|
# Set the health delta in our local PluginResult object
|
||||||
|
self.plugin_result.set_health_delta(health_delta)
|
||||||
|
|
||||||
|
# Set the message in our local PluginResult object
|
||||||
|
self.plugin_result.set_message(message)
|
||||||
|
|
||||||
|
# Set the detailed data in our local PluginResult object
|
||||||
|
self.plugin_result.set_data(dumps(health_status))
|
||||||
|
|
||||||
|
# Return our local PluginResult object
|
||||||
|
return self.plugin_result
|
||||||
|
|
||||||
|
def cleanup(self):
|
||||||
|
"""
|
||||||
|
cleanup(): Perform special cleanup steps during node daemon termination
|
||||||
|
|
||||||
|
This step is optional and should be used sparingly.
|
||||||
|
"""
|
||||||
|
|
||||||
|
pass
|
||||||
158
node-daemon/plugins/dpkg
Normal file
158
node-daemon/plugins/dpkg
Normal file
@@ -0,0 +1,158 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
# dpkg.py - PVC Monitoring example plugin for dpkg status
|
||||||
|
# Part of the Parallel Virtual Cluster (PVC) system
|
||||||
|
#
|
||||||
|
# Copyright (C) 2018-2022 Joshua M. Boniface <joshua@boniface.me>
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, version 3.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
# This script provides an example of a PVC monitoring plugin script. It will create
|
||||||
|
# a simple plugin to check the system dpkg status is as expected, with no invalid
|
||||||
|
# packages or obsolete configuration files, and will return a 1 health delta for each
|
||||||
|
# flaw in invalid packages, upgradable packages, and obsolete config files.
|
||||||
|
|
||||||
|
# This script can thus be used as an example or reference implementation of a
|
||||||
|
# PVC monitoring pluginscript and expanded upon as required.
|
||||||
|
|
||||||
|
# A monitoring plugin script must implement the class "MonitoringPluginScript" which
|
||||||
|
# extends "MonitoringPlugin", providing the 3 functions indicated. Detailed explanation
|
||||||
|
# of the role of each function is provided in context of the example; see the other
|
||||||
|
# examples for more potential uses.
|
||||||
|
|
||||||
|
# WARNING:
|
||||||
|
#
|
||||||
|
# This script will run in the context of the node daemon keepalives as root.
|
||||||
|
# DO NOT install untrusted, unvetted plugins under any circumstances.
|
||||||
|
|
||||||
|
|
||||||
|
# This import is always required here, as MonitoringPlugin is used by the
|
||||||
|
# MonitoringPluginScript class
|
||||||
|
from pvcnoded.objects.MonitoringInstance import MonitoringPlugin
|
||||||
|
|
||||||
|
|
||||||
|
# A monitoring plugin script must always expose its nice name, which must be identical to
|
||||||
|
# the file name
|
||||||
|
PLUGIN_NAME = "dpkg"
|
||||||
|
|
||||||
|
|
||||||
|
# The MonitoringPluginScript class must be named as such, and extend MonitoringPlugin.
|
||||||
|
class MonitoringPluginScript(MonitoringPlugin):
|
||||||
|
def setup(self):
|
||||||
|
"""
|
||||||
|
setup(): Perform special setup steps during node daemon startup
|
||||||
|
|
||||||
|
This step is optional and should be used sparingly.
|
||||||
|
"""
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
"""
|
||||||
|
run(): Perform the check actions and return a PluginResult object
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Run any imports first
|
||||||
|
from re import match
|
||||||
|
from json import dumps
|
||||||
|
import daemon_lib.common as pvc_common
|
||||||
|
|
||||||
|
# Get Debian version
|
||||||
|
with open('/etc/debian_version', 'r') as fh:
|
||||||
|
debian_version = fh.read().strip()
|
||||||
|
|
||||||
|
# Get a list of dpkg packages for analysis
|
||||||
|
retcode, stdout, stderr = pvc_common.run_os_command("/usr/bin/dpkg --list")
|
||||||
|
|
||||||
|
# Get a list of installed packages and states
|
||||||
|
packages = list()
|
||||||
|
for dpkg_line in stdout.split('\n'):
|
||||||
|
if match('^[a-z][a-z] ', dpkg_line):
|
||||||
|
line_split = dpkg_line.split()
|
||||||
|
package_state = line_split[0]
|
||||||
|
package_name = line_split[1]
|
||||||
|
packages.append((package_name, package_state))
|
||||||
|
|
||||||
|
count_ok = 0
|
||||||
|
count_inconsistent = 0
|
||||||
|
list_inconsistent = list()
|
||||||
|
|
||||||
|
for package in packages:
|
||||||
|
if package[1] == "ii":
|
||||||
|
count_ok += 1
|
||||||
|
else:
|
||||||
|
count_inconsistent += 1
|
||||||
|
list_inconsistent.append(package[0])
|
||||||
|
|
||||||
|
# Get upgradable packages
|
||||||
|
retcode, stdout, stderr = pvc_common.run_os_command("/usr/bin/apt list --upgradable")
|
||||||
|
|
||||||
|
list_upgradable = list()
|
||||||
|
for apt_line in stdout.split('\n'):
|
||||||
|
if match('^[a-z][a-z] ', apt_line):
|
||||||
|
line_split = apt_line.split('/')
|
||||||
|
package_name = line_split[0]
|
||||||
|
list_upgradable.append(package_name)
|
||||||
|
|
||||||
|
count_upgradable = len(list_upgradable)
|
||||||
|
|
||||||
|
# Get obsolete config files (dpkg-* or ucf-* under /etc)
|
||||||
|
retcode, stdout, stderr = pvc_common.run_os_command("/usr/bin/find /etc -type f -a \( -name '*.dpkg-*' -o -name '*.ucf-*' \)")
|
||||||
|
|
||||||
|
obsolete_conffiles = list()
|
||||||
|
for conffile_line in stdout.split('\n'):
|
||||||
|
if conffile_line:
|
||||||
|
obsolete_conffiles.append(conffile_line)
|
||||||
|
|
||||||
|
count_obsolete_conffiles = len(obsolete_conffiles)
|
||||||
|
|
||||||
|
# Set health_delta based on the results
|
||||||
|
health_delta = 0
|
||||||
|
if count_inconsistent > 0:
|
||||||
|
health_delta += 1
|
||||||
|
if count_upgradable > 0:
|
||||||
|
health_delta += 1
|
||||||
|
if count_obsolete_conffiles > 0:
|
||||||
|
health_delta += 1
|
||||||
|
|
||||||
|
# Set the health delta in our local PluginResult object
|
||||||
|
self.plugin_result.set_health_delta(health_delta)
|
||||||
|
|
||||||
|
# Craft the message
|
||||||
|
message = f"Debian {debian_version}; Obsolete conffiles: {count_obsolete_conffiles}; Packages valid: {count_ok}, inconsistent: {count_inconsistent}, upgradable: {count_upgradable}"
|
||||||
|
|
||||||
|
# Set the message in our local PluginResult object
|
||||||
|
self.plugin_result.set_message(message)
|
||||||
|
|
||||||
|
# Set the detailed data in our local PluginResult object
|
||||||
|
detailed_data = {
|
||||||
|
"debian_version": debian_version,
|
||||||
|
"obsolete_conffiles": obsolete_conffiles,
|
||||||
|
"inconsistent_packages": list_inconsistent,
|
||||||
|
"upgradable_packages": list_upgradable,
|
||||||
|
}
|
||||||
|
self.plugin_result.set_data(dumps(detailed_data))
|
||||||
|
|
||||||
|
# Return our local PluginResult object
|
||||||
|
return self.plugin_result
|
||||||
|
|
||||||
|
def cleanup(self):
|
||||||
|
"""
|
||||||
|
cleanup(): Perform special cleanup steps during node daemon termination
|
||||||
|
|
||||||
|
This step is optional and should be used sparingly.
|
||||||
|
"""
|
||||||
|
|
||||||
|
pass
|
||||||
105
node-daemon/plugins/load
Normal file
105
node-daemon/plugins/load
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
# load.py - PVC Monitoring example plugin for load
|
||||||
|
# Part of the Parallel Virtual Cluster (PVC) system
|
||||||
|
#
|
||||||
|
# Copyright (C) 2018-2022 Joshua M. Boniface <joshua@boniface.me>
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, version 3.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
# This script provides an example of a PVC monitoring plugin script. It will create
|
||||||
|
# a simple plugin to check the system load against the total number of CPU cores,
|
||||||
|
# and return a 10 health delta (100 -> 90) if the load average is > 1/2 that number.
|
||||||
|
|
||||||
|
# This script can thus be used as an example or reference implementation of a
|
||||||
|
# PVC monitoring pluginscript and expanded upon as required.
|
||||||
|
|
||||||
|
# A monitoring plugin script must implement the class "MonitoringPluginScript" which
|
||||||
|
# extends "MonitoringPlugin", providing the 3 functions indicated. Detailed explanation
|
||||||
|
# of the role of each function is provided in context of the example; see the other
|
||||||
|
# examples for more potential uses.
|
||||||
|
|
||||||
|
# WARNING:
|
||||||
|
#
|
||||||
|
# This script will run in the context of the node daemon keepalives as root.
|
||||||
|
# DO NOT install untrusted, unvetted plugins under any circumstances.
|
||||||
|
|
||||||
|
|
||||||
|
# This import is always required here, as MonitoringPlugin is used by the
|
||||||
|
# MonitoringPluginScript class
|
||||||
|
from pvcnoded.objects.MonitoringInstance import MonitoringPlugin
|
||||||
|
|
||||||
|
|
||||||
|
# A monitoring plugin script must always expose its nice name, which must be identical to
|
||||||
|
# the file name
|
||||||
|
PLUGIN_NAME = "load"
|
||||||
|
|
||||||
|
|
||||||
|
# The MonitoringPluginScript class must be named as such, and extend MonitoringPlugin.
|
||||||
|
class MonitoringPluginScript(MonitoringPlugin):
|
||||||
|
def setup(self):
|
||||||
|
"""
|
||||||
|
setup(): Perform special setup steps during node daemon startup
|
||||||
|
|
||||||
|
This step is optional and should be used sparingly.
|
||||||
|
"""
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
"""
|
||||||
|
run(): Perform the check actions and return a PluginResult object
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Run any imports first
|
||||||
|
from os import getloadavg
|
||||||
|
from psutil import cpu_count
|
||||||
|
|
||||||
|
# Get the current 1-minute system load average
|
||||||
|
load_average = getloadavg()[0]
|
||||||
|
|
||||||
|
# Get the number of CPU cores
|
||||||
|
cpu_cores = cpu_count()
|
||||||
|
|
||||||
|
# Check that the load average is greater or equal to the cpu count
|
||||||
|
if load_average > float(cpu_cores):
|
||||||
|
# Set the health delta to 10 (subtract 10 from the total of 100)
|
||||||
|
health_delta = 10
|
||||||
|
# Craft a message that can be used by the clients
|
||||||
|
message = f"Current load is {load_average} out of {cpu_cores} CPU cores"
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Set the health delta to 0 (no change)
|
||||||
|
health_delta = 0
|
||||||
|
# Craft a message that can be used by the clients
|
||||||
|
message = f"Current load is {load_average} out of {cpu_cores} CPU cores"
|
||||||
|
|
||||||
|
# Set the health delta in our local PluginResult object
|
||||||
|
self.plugin_result.set_health_delta(health_delta)
|
||||||
|
|
||||||
|
# Set the message in our local PluginResult object
|
||||||
|
self.plugin_result.set_message(message)
|
||||||
|
|
||||||
|
# Return our local PluginResult object
|
||||||
|
return self.plugin_result
|
||||||
|
|
||||||
|
def cleanup(self):
|
||||||
|
"""
|
||||||
|
cleanup(): Perform special cleanup steps during node daemon termination
|
||||||
|
|
||||||
|
This step is optional and should be used sparingly.
|
||||||
|
"""
|
||||||
|
|
||||||
|
pass
|
||||||
@@ -128,6 +128,8 @@ pvc:
|
|||||||
configuration:
|
configuration:
|
||||||
# directories: PVC system directories
|
# directories: PVC system directories
|
||||||
directories:
|
directories:
|
||||||
|
# plugin_directory: Directory containing node monitoring plugins
|
||||||
|
plugin_directory: "/usr/share/pvc/plugins"
|
||||||
# dynamic_directory: Temporary in-memory directory for active configurations
|
# dynamic_directory: Temporary in-memory directory for active configurations
|
||||||
dynamic_directory: "/run/pvc"
|
dynamic_directory: "/run/pvc"
|
||||||
# log_directory: Logging directory
|
# log_directory: Logging directory
|
||||||
|
|||||||
@@ -27,6 +27,7 @@ import pvcnoded.util.services
|
|||||||
import pvcnoded.util.libvirt
|
import pvcnoded.util.libvirt
|
||||||
import pvcnoded.util.zookeeper
|
import pvcnoded.util.zookeeper
|
||||||
|
|
||||||
|
import pvcnoded.objects.MonitoringInstance as MonitoringInstance
|
||||||
import pvcnoded.objects.DNSAggregatorInstance as DNSAggregatorInstance
|
import pvcnoded.objects.DNSAggregatorInstance as DNSAggregatorInstance
|
||||||
import pvcnoded.objects.MetadataAPIInstance as MetadataAPIInstance
|
import pvcnoded.objects.MetadataAPIInstance as MetadataAPIInstance
|
||||||
import pvcnoded.objects.VMInstance as VMInstance
|
import pvcnoded.objects.VMInstance as VMInstance
|
||||||
@@ -48,7 +49,7 @@ import re
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
# Daemon version
|
# Daemon version
|
||||||
version = "0.9.59"
|
version = "0.9.61"
|
||||||
|
|
||||||
|
|
||||||
##########################################################
|
##########################################################
|
||||||
@@ -58,6 +59,7 @@ version = "0.9.59"
|
|||||||
|
|
||||||
def entrypoint():
|
def entrypoint():
|
||||||
keepalive_timer = None
|
keepalive_timer = None
|
||||||
|
monitoring_instance = None
|
||||||
|
|
||||||
# Get our configuration
|
# Get our configuration
|
||||||
config = pvcnoded.util.config.get_configuration()
|
config = pvcnoded.util.config.get_configuration()
|
||||||
@@ -204,7 +206,7 @@ def entrypoint():
|
|||||||
|
|
||||||
# Define a cleanup function
|
# Define a cleanup function
|
||||||
def cleanup(failure=False):
|
def cleanup(failure=False):
|
||||||
nonlocal logger, zkhandler, keepalive_timer, d_domain
|
nonlocal logger, zkhandler, keepalive_timer, d_domain, monitoring_instance
|
||||||
|
|
||||||
logger.out("Terminating pvcnoded and cleaning up", state="s")
|
logger.out("Terminating pvcnoded and cleaning up", state="s")
|
||||||
|
|
||||||
@@ -253,6 +255,13 @@ def entrypoint():
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# Clean up any monitoring plugins that have cleanup
|
||||||
|
try:
|
||||||
|
logger.out("Performing monitoring plugin cleanup", state="s")
|
||||||
|
monitoring_instance.run_cleanups()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
# Set stop state in Zookeeper
|
# Set stop state in Zookeeper
|
||||||
zkhandler.write([(("node.state.daemon", config["node_hostname"]), "stop")])
|
zkhandler.write([(("node.state.daemon", config["node_hostname"]), "stop")])
|
||||||
|
|
||||||
@@ -1015,9 +1024,14 @@ def entrypoint():
|
|||||||
state="i",
|
state="i",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Set up the node monitoring instance
|
||||||
|
monitoring_instance = MonitoringInstance.MonitoringInstance(
|
||||||
|
zkhandler, config, logger, this_node
|
||||||
|
)
|
||||||
|
|
||||||
# Start keepalived thread
|
# Start keepalived thread
|
||||||
keepalive_timer = pvcnoded.util.keepalive.start_keepalive_timer(
|
keepalive_timer = pvcnoded.util.keepalive.start_keepalive_timer(
|
||||||
logger, config, zkhandler, this_node
|
logger, config, zkhandler, this_node, monitoring_instance
|
||||||
)
|
)
|
||||||
|
|
||||||
# Tick loop; does nothing since everything is async
|
# Tick loop; does nothing since everything is async
|
||||||
|
|||||||
357
node-daemon/pvcnoded/objects/MonitoringInstance.py
Normal file
357
node-daemon/pvcnoded/objects/MonitoringInstance.py
Normal file
@@ -0,0 +1,357 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
# PluginInstance.py - Class implementing a PVC monitoring instance
|
||||||
|
# Part of the Parallel Virtual Cluster (PVC) system
|
||||||
|
#
|
||||||
|
# Copyright (C) 2018-2022 Joshua M. Boniface <joshua@boniface.me>
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, version 3.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
import concurrent.futures
|
||||||
|
import time
|
||||||
|
import importlib.util
|
||||||
|
|
||||||
|
from os import walk
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
|
class PluginResult(object):
|
||||||
|
def __init__(self, zkhandler, config, logger, this_node, plugin_name):
|
||||||
|
self.zkhandler = zkhandler
|
||||||
|
self.config = config
|
||||||
|
self.logger = logger
|
||||||
|
self.this_node = this_node
|
||||||
|
self.plugin_name = plugin_name
|
||||||
|
self.current_time = int(time.time())
|
||||||
|
self.health_delta = 0
|
||||||
|
self.message = None
|
||||||
|
self.data = None
|
||||||
|
self.runtime = "0.00"
|
||||||
|
|
||||||
|
def set_health_delta(self, new_delta):
|
||||||
|
self.health_delta = new_delta
|
||||||
|
|
||||||
|
def set_message(self, new_message):
|
||||||
|
self.message = new_message
|
||||||
|
|
||||||
|
def set_data(self, new_data):
|
||||||
|
self.data = new_data
|
||||||
|
|
||||||
|
def set_runtime(self, new_runtime):
|
||||||
|
self.runtime = new_runtime
|
||||||
|
|
||||||
|
def to_zookeeper(self):
|
||||||
|
self.zkhandler.write(
|
||||||
|
[
|
||||||
|
(
|
||||||
|
(
|
||||||
|
"node.monitoring.data",
|
||||||
|
self.this_node.name,
|
||||||
|
"monitoring_plugin.name",
|
||||||
|
self.plugin_name,
|
||||||
|
),
|
||||||
|
self.plugin_name,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
(
|
||||||
|
"node.monitoring.data",
|
||||||
|
self.this_node.name,
|
||||||
|
"monitoring_plugin.last_run",
|
||||||
|
self.plugin_name,
|
||||||
|
),
|
||||||
|
self.current_time,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
(
|
||||||
|
"node.monitoring.data",
|
||||||
|
self.this_node.name,
|
||||||
|
"monitoring_plugin.health_delta",
|
||||||
|
self.plugin_name,
|
||||||
|
),
|
||||||
|
self.health_delta,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
(
|
||||||
|
"node.monitoring.data",
|
||||||
|
self.this_node.name,
|
||||||
|
"monitoring_plugin.message",
|
||||||
|
self.plugin_name,
|
||||||
|
),
|
||||||
|
self.message,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
(
|
||||||
|
"node.monitoring.data",
|
||||||
|
self.this_node.name,
|
||||||
|
"monitoring_plugin.data",
|
||||||
|
self.plugin_name,
|
||||||
|
),
|
||||||
|
self.data,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
(
|
||||||
|
"node.monitoring.data",
|
||||||
|
self.this_node.name,
|
||||||
|
"monitoring_plugin.runtime",
|
||||||
|
self.plugin_name,
|
||||||
|
),
|
||||||
|
self.runtime,
|
||||||
|
),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MonitoringPlugin(object):
|
||||||
|
def __init__(self, zkhandler, config, logger, this_node, plugin_name):
|
||||||
|
self.zkhandler = zkhandler
|
||||||
|
self.config = config
|
||||||
|
self.logger = logger
|
||||||
|
self.this_node = this_node
|
||||||
|
self.plugin_name = plugin_name
|
||||||
|
|
||||||
|
self.plugin_result = PluginResult(
|
||||||
|
self.zkhandler,
|
||||||
|
self.config,
|
||||||
|
self.logger,
|
||||||
|
self.this_node,
|
||||||
|
self.plugin_name,
|
||||||
|
)
|
||||||
|
|
||||||
|
#
|
||||||
|
# Helper functions; exposed to child MonitoringPluginScript instances
|
||||||
|
#
|
||||||
|
def log(self, message, state="d"):
|
||||||
|
"""
|
||||||
|
Log a message to the PVC logger instance using the plugin name as a prefix
|
||||||
|
Takes "state" values as defined by the PVC logger instance, defaulting to debug:
|
||||||
|
"d": debug
|
||||||
|
"i": informational
|
||||||
|
"t": tick/keepalive
|
||||||
|
"w": warning
|
||||||
|
"e": error
|
||||||
|
"""
|
||||||
|
if state == "d" and not self.config["debug"]:
|
||||||
|
return
|
||||||
|
|
||||||
|
self.logger.out(message, state=state, prefix=self.plugin_name)
|
||||||
|
|
||||||
|
#
|
||||||
|
# Primary class functions; implemented by the individual plugins
|
||||||
|
#
|
||||||
|
def setup(self):
|
||||||
|
"""
|
||||||
|
setup(): Perform setup of the plugin; run once during daemon startup
|
||||||
|
OPTIONAL
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
"""
|
||||||
|
run(): Run the plugin, returning a PluginResult object
|
||||||
|
"""
|
||||||
|
return self.plugin_result
|
||||||
|
|
||||||
|
def cleanup(self):
|
||||||
|
"""
|
||||||
|
cleanup(): Clean up after the plugin; run once during daemon shutdown
|
||||||
|
OPTIONAL
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class MonitoringInstance(object):
|
||||||
|
def __init__(self, zkhandler, config, logger, this_node):
|
||||||
|
self.zkhandler = zkhandler
|
||||||
|
self.config = config
|
||||||
|
self.logger = logger
|
||||||
|
self.this_node = this_node
|
||||||
|
|
||||||
|
# Get a list of plugins from the plugin_directory
|
||||||
|
plugin_files = next(walk(self.config["plugin_directory"]), (None, None, []))[
|
||||||
|
2
|
||||||
|
] # [] if no file
|
||||||
|
|
||||||
|
self.all_plugins = list()
|
||||||
|
self.all_plugin_names = list()
|
||||||
|
|
||||||
|
# Load each plugin file into the all_plugins list
|
||||||
|
for plugin_file in sorted(plugin_files):
|
||||||
|
try:
|
||||||
|
self.logger.out(
|
||||||
|
f"Loading monitoring plugin from {self.config['plugin_directory']}/{plugin_file}",
|
||||||
|
state="i",
|
||||||
|
)
|
||||||
|
loader = importlib.machinery.SourceFileLoader(
|
||||||
|
"plugin_script", f"{self.config['plugin_directory']}/{plugin_file}"
|
||||||
|
)
|
||||||
|
spec = importlib.util.spec_from_loader(loader.name, loader)
|
||||||
|
plugin_script = importlib.util.module_from_spec(spec)
|
||||||
|
spec.loader.exec_module(plugin_script)
|
||||||
|
|
||||||
|
plugin = plugin_script.MonitoringPluginScript(
|
||||||
|
self.zkhandler,
|
||||||
|
self.config,
|
||||||
|
self.logger,
|
||||||
|
self.this_node,
|
||||||
|
plugin_script.PLUGIN_NAME,
|
||||||
|
)
|
||||||
|
self.all_plugins.append(plugin)
|
||||||
|
self.all_plugin_names.append(plugin.plugin_name)
|
||||||
|
|
||||||
|
# Create plugin key
|
||||||
|
self.zkhandler.write(
|
||||||
|
[
|
||||||
|
(
|
||||||
|
(
|
||||||
|
"node.monitoring.data",
|
||||||
|
self.this_node.name,
|
||||||
|
"monitoring_plugin.name",
|
||||||
|
plugin.plugin_name,
|
||||||
|
),
|
||||||
|
plugin.plugin_name,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
(
|
||||||
|
"node.monitoring.data",
|
||||||
|
self.this_node.name,
|
||||||
|
"monitoring_plugin.last_run",
|
||||||
|
plugin.plugin_name,
|
||||||
|
),
|
||||||
|
"0",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
(
|
||||||
|
"node.monitoring.data",
|
||||||
|
self.this_node.name,
|
||||||
|
"monitoring_plugin.health_delta",
|
||||||
|
plugin.plugin_name,
|
||||||
|
),
|
||||||
|
"0",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
(
|
||||||
|
"node.monitoring.data",
|
||||||
|
self.this_node.name,
|
||||||
|
"monitoring_plugin.message",
|
||||||
|
plugin.plugin_name,
|
||||||
|
),
|
||||||
|
"Initializing",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
(
|
||||||
|
"node.monitoring.data",
|
||||||
|
self.this_node.name,
|
||||||
|
"monitoring_plugin.data",
|
||||||
|
plugin.plugin_name,
|
||||||
|
),
|
||||||
|
None,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
(
|
||||||
|
"node.monitoring.data",
|
||||||
|
self.this_node.name,
|
||||||
|
"monitoring_plugin.runtime",
|
||||||
|
plugin.plugin_name,
|
||||||
|
),
|
||||||
|
"0.00",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
self.logger.out(
|
||||||
|
f"Successfully loaded monitoring plugin '{plugin.plugin_name}'",
|
||||||
|
state="o",
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.out(
|
||||||
|
f"Failed to load monitoring plugin: {e}",
|
||||||
|
state="w",
|
||||||
|
)
|
||||||
|
|
||||||
|
self.zkhandler.write(
|
||||||
|
[
|
||||||
|
(
|
||||||
|
("node.monitoring.plugins", self.this_node.name),
|
||||||
|
self.all_plugin_names,
|
||||||
|
),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Clean up any old plugin data for which a plugin file no longer exists
|
||||||
|
for plugin_key in self.zkhandler.children(
|
||||||
|
("node.monitoring.data", self.this_node.name)
|
||||||
|
):
|
||||||
|
if plugin_key not in self.all_plugin_names:
|
||||||
|
self.zkhandler.delete(
|
||||||
|
(
|
||||||
|
"node.monitoring.data",
|
||||||
|
self.this_node.name,
|
||||||
|
"monitoring_plugin",
|
||||||
|
plugin_key,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
def run_plugin(self, plugin):
|
||||||
|
time_start = datetime.now()
|
||||||
|
result = plugin.run()
|
||||||
|
time_end = datetime.now()
|
||||||
|
time_delta = time_end - time_start
|
||||||
|
runtime = "{:0.02f}".format(time_delta.total_seconds())
|
||||||
|
result.set_runtime(runtime)
|
||||||
|
self.logger.out(
|
||||||
|
result.message, state="t", prefix=f"{plugin.plugin_name} ({runtime}s)"
|
||||||
|
)
|
||||||
|
result.to_zookeeper()
|
||||||
|
return result
|
||||||
|
|
||||||
|
def run_plugins(self):
|
||||||
|
total_health = 100
|
||||||
|
self.logger.out("Running monitoring plugins:", state="t")
|
||||||
|
plugin_results = list()
|
||||||
|
with concurrent.futures.ThreadPoolExecutor(max_workers=99) as executor:
|
||||||
|
to_future_plugin_results = {
|
||||||
|
executor.submit(self.run_plugin, plugin): plugin
|
||||||
|
for plugin in self.all_plugins
|
||||||
|
}
|
||||||
|
for future in concurrent.futures.as_completed(to_future_plugin_results):
|
||||||
|
plugin_results.append(future.result())
|
||||||
|
|
||||||
|
for result in plugin_results:
|
||||||
|
if result is not None:
|
||||||
|
total_health -= result.health_delta
|
||||||
|
|
||||||
|
if total_health > 90:
|
||||||
|
health_colour = self.logger.fmt_green
|
||||||
|
elif total_health > 50:
|
||||||
|
health_colour = self.logger.fmt_yellow
|
||||||
|
else:
|
||||||
|
health_colour = self.logger.fmt_red
|
||||||
|
|
||||||
|
self.logger.out(
|
||||||
|
f"System health: {health_colour}{total_health}/100{self.logger.fmt_end}",
|
||||||
|
state="t",
|
||||||
|
)
|
||||||
|
|
||||||
|
def run_cleanup(self, plugin):
|
||||||
|
return plugin.cleanup()
|
||||||
|
|
||||||
|
def run_cleanups(self):
|
||||||
|
with concurrent.futures.ThreadPoolExecutor(max_workers=99) as executor:
|
||||||
|
to_future_plugin_results = {
|
||||||
|
executor.submit(self.run_cleanup, plugin): plugin
|
||||||
|
for plugin in self.all_plugins
|
||||||
|
}
|
||||||
|
for future in concurrent.futures.as_completed(to_future_plugin_results):
|
||||||
|
# This doesn't do anything, just lets us wait for them all to complete
|
||||||
|
pass
|
||||||
@@ -180,6 +180,9 @@ def get_configuration():
|
|||||||
raise MalformedConfigurationError(e)
|
raise MalformedConfigurationError(e)
|
||||||
|
|
||||||
config_directories = {
|
config_directories = {
|
||||||
|
"plugin_directory": o_directories.get(
|
||||||
|
"plugin_directory", "/usr/share/pvc/plugins"
|
||||||
|
),
|
||||||
"dynamic_directory": o_directories.get("dynamic_directory", None),
|
"dynamic_directory": o_directories.get("dynamic_directory", None),
|
||||||
"log_directory": o_directories.get("log_directory", None),
|
"log_directory": o_directories.get("log_directory", None),
|
||||||
"console_log_directory": o_directories.get("console_log_directory", None),
|
"console_log_directory": o_directories.get("console_log_directory", None),
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ libvirt_vm_states = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def start_keepalive_timer(logger, config, zkhandler, this_node):
|
def start_keepalive_timer(logger, config, zkhandler, this_node, monitoring_instance):
|
||||||
keepalive_interval = config["keepalive_interval"]
|
keepalive_interval = config["keepalive_interval"]
|
||||||
logger.out(
|
logger.out(
|
||||||
f"Starting keepalive timer ({keepalive_interval} second interval)", state="s"
|
f"Starting keepalive timer ({keepalive_interval} second interval)", state="s"
|
||||||
@@ -59,7 +59,7 @@ def start_keepalive_timer(logger, config, zkhandler, this_node):
|
|||||||
keepalive_timer = BackgroundScheduler()
|
keepalive_timer = BackgroundScheduler()
|
||||||
keepalive_timer.add_job(
|
keepalive_timer.add_job(
|
||||||
node_keepalive,
|
node_keepalive,
|
||||||
args=(logger, config, zkhandler, this_node),
|
args=(logger, config, zkhandler, this_node, monitoring_instance),
|
||||||
trigger="interval",
|
trigger="interval",
|
||||||
seconds=keepalive_interval,
|
seconds=keepalive_interval,
|
||||||
)
|
)
|
||||||
@@ -97,29 +97,6 @@ def collect_ceph_stats(logger, config, zkhandler, this_node, queue):
|
|||||||
logger.out("Failed to open connection to Ceph cluster: {}".format(e), state="e")
|
logger.out("Failed to open connection to Ceph cluster: {}".format(e), state="e")
|
||||||
return
|
return
|
||||||
|
|
||||||
if debug:
|
|
||||||
logger.out("Getting health stats from monitor", state="d", prefix="ceph-thread")
|
|
||||||
|
|
||||||
# Get Ceph cluster health for local status output
|
|
||||||
command = {"prefix": "health", "format": "json"}
|
|
||||||
try:
|
|
||||||
health_status = json.loads(
|
|
||||||
ceph_conn.mon_command(json.dumps(command), b"", timeout=1)[1]
|
|
||||||
)
|
|
||||||
ceph_health = health_status["status"]
|
|
||||||
except Exception as e:
|
|
||||||
logger.out("Failed to obtain Ceph health data: {}".format(e), state="e")
|
|
||||||
ceph_health = "HEALTH_UNKN"
|
|
||||||
|
|
||||||
if ceph_health in ["HEALTH_OK"]:
|
|
||||||
ceph_health_colour = logger.fmt_green
|
|
||||||
elif ceph_health in ["HEALTH_UNKN"]:
|
|
||||||
ceph_health_colour = logger.fmt_cyan
|
|
||||||
elif ceph_health in ["HEALTH_WARN"]:
|
|
||||||
ceph_health_colour = logger.fmt_yellow
|
|
||||||
else:
|
|
||||||
ceph_health_colour = logger.fmt_red
|
|
||||||
|
|
||||||
# Primary-only functions
|
# Primary-only functions
|
||||||
if this_node.router_state == "primary":
|
if this_node.router_state == "primary":
|
||||||
if debug:
|
if debug:
|
||||||
@@ -408,8 +385,6 @@ def collect_ceph_stats(logger, config, zkhandler, this_node, queue):
|
|||||||
|
|
||||||
ceph_conn.shutdown()
|
ceph_conn.shutdown()
|
||||||
|
|
||||||
queue.put(ceph_health_colour)
|
|
||||||
queue.put(ceph_health)
|
|
||||||
queue.put(osds_this_node)
|
queue.put(osds_this_node)
|
||||||
|
|
||||||
if debug:
|
if debug:
|
||||||
@@ -648,7 +623,7 @@ def collect_vm_stats(logger, config, zkhandler, this_node, queue):
|
|||||||
|
|
||||||
|
|
||||||
# Keepalive update function
|
# Keepalive update function
|
||||||
def node_keepalive(logger, config, zkhandler, this_node):
|
def node_keepalive(logger, config, zkhandler, this_node, monitoring_instance):
|
||||||
debug = config["debug"]
|
debug = config["debug"]
|
||||||
if debug:
|
if debug:
|
||||||
logger.out("Keepalive starting", state="d", prefix="main-thread")
|
logger.out("Keepalive starting", state="d", prefix="main-thread")
|
||||||
@@ -777,16 +752,14 @@ def node_keepalive(logger, config, zkhandler, this_node):
|
|||||||
|
|
||||||
if config["enable_storage"]:
|
if config["enable_storage"]:
|
||||||
try:
|
try:
|
||||||
ceph_health_colour = ceph_thread_queue.get(
|
osds_this_node = ceph_thread_queue.get(
|
||||||
timeout=config["keepalive_interval"]
|
timeout=(config["keepalive_interval"] - 1)
|
||||||
)
|
)
|
||||||
ceph_health = ceph_thread_queue.get(timeout=config["keepalive_interval"])
|
|
||||||
osds_this_node = ceph_thread_queue.get(timeout=config["keepalive_interval"])
|
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.out("Ceph stats queue get exceeded timeout, continuing", state="w")
|
logger.out("Ceph stats queue get exceeded timeout, continuing", state="w")
|
||||||
ceph_health_colour = logger.fmt_cyan
|
|
||||||
ceph_health = "UNKNOWN"
|
|
||||||
osds_this_node = "?"
|
osds_this_node = "?"
|
||||||
|
else:
|
||||||
|
osds_this_node = "0"
|
||||||
|
|
||||||
# Set our information in zookeeper
|
# Set our information in zookeeper
|
||||||
keepalive_time = int(time.time())
|
keepalive_time = int(time.time())
|
||||||
@@ -839,8 +812,8 @@ def node_keepalive(logger, config, zkhandler, this_node):
|
|||||||
if config["log_keepalive_cluster_details"]:
|
if config["log_keepalive_cluster_details"]:
|
||||||
logger.out(
|
logger.out(
|
||||||
"{bold}Maintenance:{nofmt} {maint} "
|
"{bold}Maintenance:{nofmt} {maint} "
|
||||||
"{bold}Active VMs:{nofmt} {domcount} "
|
"{bold}Node VMs:{nofmt} {domcount} "
|
||||||
"{bold}Networks:{nofmt} {netcount} "
|
"{bold}Node OSDs:{nofmt} {osdcount} "
|
||||||
"{bold}Load:{nofmt} {load} "
|
"{bold}Load:{nofmt} {load} "
|
||||||
"{bold}Memory [MiB]: VMs:{nofmt} {allocmem} "
|
"{bold}Memory [MiB]: VMs:{nofmt} {allocmem} "
|
||||||
"{bold}Used:{nofmt} {usedmem} "
|
"{bold}Used:{nofmt} {usedmem} "
|
||||||
@@ -849,7 +822,7 @@ def node_keepalive(logger, config, zkhandler, this_node):
|
|||||||
nofmt=logger.fmt_end,
|
nofmt=logger.fmt_end,
|
||||||
maint=this_node.maintenance,
|
maint=this_node.maintenance,
|
||||||
domcount=this_node.domains_count,
|
domcount=this_node.domains_count,
|
||||||
netcount=len(zkhandler.children("base.network")),
|
osdcount=osds_this_node,
|
||||||
load=this_node.cpuload,
|
load=this_node.cpuload,
|
||||||
freemem=this_node.memfree,
|
freemem=this_node.memfree,
|
||||||
usedmem=this_node.memused,
|
usedmem=this_node.memused,
|
||||||
@@ -857,22 +830,6 @@ def node_keepalive(logger, config, zkhandler, this_node):
|
|||||||
),
|
),
|
||||||
state="t",
|
state="t",
|
||||||
)
|
)
|
||||||
if config["enable_storage"] and config["log_keepalive_storage_details"]:
|
|
||||||
logger.out(
|
|
||||||
"{bold}Ceph cluster status:{nofmt} {health_colour}{health}{nofmt} "
|
|
||||||
"{bold}Total OSDs:{nofmt} {total_osds} "
|
|
||||||
"{bold}Node OSDs:{nofmt} {node_osds} "
|
|
||||||
"{bold}Pools:{nofmt} {total_pools} ".format(
|
|
||||||
bold=logger.fmt_bold,
|
|
||||||
health_colour=ceph_health_colour,
|
|
||||||
nofmt=logger.fmt_end,
|
|
||||||
health=ceph_health,
|
|
||||||
total_osds=len(zkhandler.children("base.osd")),
|
|
||||||
node_osds=osds_this_node,
|
|
||||||
total_pools=len(zkhandler.children("base.pool")),
|
|
||||||
),
|
|
||||||
state="t",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Look for dead nodes and fence them
|
# Look for dead nodes and fence them
|
||||||
if not this_node.maintenance:
|
if not this_node.maintenance:
|
||||||
@@ -918,5 +875,7 @@ def node_keepalive(logger, config, zkhandler, this_node):
|
|||||||
[(("node.state.daemon", node_name), "dead")]
|
[(("node.state.daemon", node_name), "dead")]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
monitoring_instance.run_plugins()
|
||||||
|
|
||||||
if debug:
|
if debug:
|
||||||
logger.out("Keepalive finished", state="d", prefix="main-thread")
|
logger.out("Keepalive finished", state="d", prefix="main-thread")
|
||||||
|
|||||||
Reference in New Issue
Block a user