Compare commits
280 Commits
8df189aa22
...
v0.9.59
Author | SHA1 | Date | |
---|---|---|---|
92feeefd26 | |||
38d63d9837 | |||
095bcb2373 | |||
91e450f399 | |||
79eb994a5e | |||
d65f512897 | |||
8af7189dd0 | |||
ea7a4b2b85 | |||
59f97ebbfb | |||
072337f1f0 | |||
c3bc55eff8 | |||
6c58d52fa1 | |||
666e02fbfd | |||
46dde055c4 | |||
ef437c3dbf | |||
bd2208e8f6 | |||
62d5ff11df | |||
0019881cfa | |||
d46133802b | |||
fcadde057e | |||
2608f38d64 | |||
89f05ced3f | |||
729481126c | |||
41eccb9c7d | |||
e550e39a5a | |||
dff156b2b0 | |||
1c4fb80d1f | |||
ec7beb08cc | |||
3a180193ee | |||
e26ff8a975 | |||
6276414702 | |||
a34d64a71b | |||
71297e0179 | |||
45c9909428 | |||
7268592c87 | |||
726d0a562b | |||
39e1fc50ed | |||
7a3870fc44 | |||
bffab7a5a1 | |||
6cbaeb5dc8 | |||
58ce133c8d | |||
43feb33caa | |||
3a5d8c61da | |||
1e0b502250 | |||
fe17d28385 | |||
8aaac33056 | |||
cc7952c232 | |||
16915ed507 | |||
2c624ceb2c | |||
da85480488 | |||
47b0704555 | |||
7c49967586 | |||
e3f96ac87e | |||
4df70cf086 | |||
f1df1cfe93 | |||
5942aa50fc | |||
096bcdfd75 | |||
239c392892 | |||
172d0a86e4 | |||
d8e57a26c5 | |||
9b499b9f48 | |||
881550b610 | |||
2a21d48128 | |||
8d0f26ff7a | |||
bcabd7d079 | |||
05a316cdd6 | |||
4b36753f27 | |||
171f6ac9ed | |||
645b525ad7 | |||
ec559aec0d | |||
71ffd5a191 | |||
2739c27299 | |||
56129a3636 | |||
932b3c55a3 | |||
92e2ff7449 | |||
d8d3feee22 | |||
b1357cafdb | |||
f8cdcb30ba | |||
51ad2058ed | |||
c401a1f655 | |||
7a40c7a55b | |||
8027a6efdc | |||
3801fcc07b | |||
c741900baf | |||
464f0e0356 | |||
cea8832f90 | |||
5807351405 | |||
d6ca74376a | |||
413100a147 | |||
4d698be34b | |||
53aed0a735 | |||
ea709f573f | |||
1142454934 | |||
bbfad340a1 | |||
c73939e1c5 | |||
25fe45dd28 | |||
58d57d7037 | |||
00d2c67c41 | |||
67131de4f6 | |||
abc23ebb18 | |||
9f122e916f | |||
3ce4d90693 | |||
6ccd19e636 | |||
d8689e6eaa | |||
bc49b5eca2 | |||
8470dfaa29 | |||
f164d898c1 | |||
195f31501c | |||
a8899a1d66 | |||
817dffcf30 | |||
eda2a57a73 | |||
135d28e60b | |||
e7d7378bae | |||
799c3e8d5d | |||
d0ec24f690 | |||
6e9fcd38a3 | |||
f51f9fc4c8 | |||
a6dcffc737 | |||
364c190106 | |||
ea19af6494 | |||
7069d3237c | |||
619c3f7ff5 | |||
8a75bb3011 | |||
a817c3e678 | |||
0cc3f2deab | |||
21b4bbe51a | |||
87ec31c023 | |||
0d857d5ab8 | |||
006f40f195 | |||
5f193a6134 | |||
78faa90139 | |||
23b1501f40 | |||
66bfad3109 | |||
eee5c25d6f | |||
ff4fc18a60 | |||
ac885b855a | |||
b9c30baf80 | |||
9b12cc0236 | |||
c41664d2da | |||
3779bc960e | |||
5c620262e9 | |||
6b88fbd1e3 | |||
a50c8e6a4d | |||
7d6e4353f1 | |||
bf30b31db6 | |||
70bd601dc1 | |||
2e7b9b28b3 | |||
12eef58d42 | |||
f2e6892fd2 | |||
91fb9e1241 | |||
d87bea4159 | |||
3a6f442856 | |||
dfca998adf | |||
55f397a347 | |||
dfebb2d3e5 | |||
e88147db4a | |||
b8204d89ac | |||
fe73dfbdc9 | |||
8f906c1f81 | |||
2d9fb9688d | |||
fb84685c2a | |||
032ba44d9c | |||
b7761877e7 | |||
1fe07640b3 | |||
b8d843ebe4 | |||
95d983ddff | |||
4c5da1b6a8 | |||
be6b1e02e3 | |||
ec2a72ed4b | |||
b06e327add | |||
d1f32d2b9c | |||
3f78ca1cc9 | |||
e866335918 | |||
221494ed1b | |||
f13cc04b89 | |||
4ed537ee3b | |||
95e01f38d5 | |||
3122d73bf5 | |||
7ed8ef179c | |||
caead02b2a | |||
87bc5f93e6 | |||
203893559e | |||
2c51bb0705 | |||
46d3daf686 | |||
e9d05aa24e | |||
d2c18d7b46 | |||
6ce28c43af | |||
87cda72ca9 | |||
8f71a6d2f6 | |||
c45f8f5bd5 | |||
24de0f4189 | |||
3690a2c1e0 | |||
50d8aa0586 | |||
db6e65712d | |||
cf8e16543c | |||
1a4fcdcc2d | |||
9a71db0800 | |||
6ee4c55071 | |||
c27359c4bf | |||
46078932c3 | |||
c89699bc6f | |||
1b9507e4f5 | |||
3db7ac48f4 | |||
1830ec6465 | |||
bdb9db8375 | |||
c61d7bc313 | |||
c0f7ba0125 | |||
761032b321 | |||
3566e13e79 | |||
6b324029cf | |||
13eeabf44b | |||
d86768d3d0 | |||
a167757600 | |||
a95d9680ac | |||
63962f10ba | |||
a7a681d92a | |||
da9248cfa2 | |||
aa035a61a7 | |||
7c8ba56561 | |||
bba73980de | |||
32b3af697c | |||
7c122ac921 | |||
0dbf139706 | |||
c909beaf6d | |||
2da49297d2 | |||
0ff9a6b8c4 | |||
28377178d2 | |||
e06b114c48 | |||
0058f19d88 | |||
056cf3740d | |||
58f174b87b | |||
37b98fd54f | |||
f83a345bfe | |||
ce06e4d81b | |||
23977b04fc | |||
bb1cca522f | |||
9a4dce4e4c | |||
f6f6f07488 | |||
142c999ce8 | |||
1de069298c | |||
55221b3d97 | |||
0d72798814 | |||
3638efc77e | |||
c2c888d684 | |||
febef2e406 | |||
2a4f38e933 | |||
3b805cdc34 | |||
06f0f7ed91 | |||
fd040ab45a | |||
e23e2dd9bf | |||
ee4266f8ca | |||
0f02c5eaef | |||
075abec5fe | |||
3a1cbf8d01 | |||
a438a4155a | |||
65df807b09 | |||
d0f3e9e285 | |||
adc8a5a3bc | |||
df277edf1c | |||
772807deb3 | |||
58db537093 | |||
e71a6c90bf | |||
a8e9a56924 | |||
f3fb492633 | |||
e962743e51 | |||
46f1d761f6 | |||
be954c1625 | |||
fb46f5f9e9 | |||
694b8e85a0 | |||
eb321497ee | |||
5b81e59481 | |||
a4c0e0befd | |||
a18cef5f25 | |||
f6c5aa9992 | |||
ffa3dd5edb | |||
afb0359c20 | |||
afdf254297 | |||
42e776fac1 | |||
dae67a1b7b | |||
b86f8c1e09 |
10
CHANGELOG.md
10
CHANGELOG.md
@ -1,15 +1,5 @@
|
||||
## PVC Changelog
|
||||
|
||||
###### [v0.9.61](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.61)
|
||||
|
||||
* [provisioner] Fixes a bug in network comparison
|
||||
* [api] Fixes a bug being unable to rename disabled VMs
|
||||
|
||||
###### [v0.9.60](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.60)
|
||||
|
||||
* [Provisioner] Cleans up several remaining bugs in the example scripts; they should all be valid now
|
||||
* [Provisioner] Adjust default libvirt schema to disable RBD caching for a 2x+ performance boost
|
||||
|
||||
###### [v0.9.59](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.59)
|
||||
|
||||
* [API] Flips the mem(prov) and mem(free) selectors making mem(free) the default for "mem" and "memprov" explicit
|
||||
|
@ -398,7 +398,7 @@ class VMBuilderScript(VMBuilder):
|
||||
if volume.get("source_volume") is not None:
|
||||
continue
|
||||
|
||||
if volume.get("filesystem") is None or volume.get("filesystem") == "swap":
|
||||
if volume.get("filesystem") is None:
|
||||
continue
|
||||
|
||||
mapped_dst_volume = f"/dev/rbd/{dst_volume}"
|
||||
@ -473,7 +473,7 @@ class VMBuilderScript(VMBuilder):
|
||||
]
|
||||
|
||||
# We need to know our root disk for later GRUB-ing
|
||||
root_volume = None
|
||||
root_disk = None
|
||||
for volume in volumes:
|
||||
if volume["mountpoint"] == "/":
|
||||
root_volume = volume
|
||||
@ -725,7 +725,6 @@ GRUB_DISABLE_LINUX_UUID=false
|
||||
if (
|
||||
volume.get("source_volume") is None
|
||||
and volume.get("filesystem") is not None
|
||||
and volume.get("filesystem") != "swap"
|
||||
):
|
||||
# Unmount filesystem
|
||||
retcode, stdout, stderr = pvc_common.run_os_command(
|
||||
|
@ -20,7 +20,7 @@
|
||||
###############################################################################
|
||||
|
||||
# This script provides an example of a PVC provisioner script. It will create a
|
||||
# standard VM config and install a RHEL 8+ or similar OS using rinse.
|
||||
# standard VM config and install a RHEL-like OS using rinse.
|
||||
|
||||
# This script can thus be used as an example or reference implementation of a
|
||||
# PVC provisioner script and expanded upon as required.
|
||||
@ -398,7 +398,7 @@ class VMBuilderScript(VMBuilder):
|
||||
if volume.get("source_volume") is not None:
|
||||
continue
|
||||
|
||||
if volume.get("filesystem") is None or volume.get("filesystem") == "swap":
|
||||
if volume.get("filesystem") is None:
|
||||
continue
|
||||
|
||||
mapped_dst_volume = f"/dev/rbd/{dst_volume}"
|
||||
@ -487,7 +487,7 @@ class VMBuilderScript(VMBuilder):
|
||||
post_packages = ["cloud-init"]
|
||||
|
||||
# We need to know our root disk for later GRUB-ing
|
||||
root_volume = None
|
||||
root_disk = None
|
||||
for volume in volumes:
|
||||
if volume["mountpoint"] == "/":
|
||||
root_volume = volume
|
||||
@ -571,6 +571,21 @@ class VMBuilderScript(VMBuilder):
|
||||
with open(hostname_file, "w") as fh:
|
||||
fh.write("{}".format(vm_name))
|
||||
|
||||
# Fix the cloud-init.target since it's broken by default
|
||||
cloudinit_target_file = "{}/etc/systemd/system/cloud-init.target".format(
|
||||
temporary_directory
|
||||
)
|
||||
with open(cloudinit_target_file, "w") as fh:
|
||||
# We lose our indent on these raw blocks to preserve the apperance of the files
|
||||
# inside the VM itself
|
||||
data = """[Install]
|
||||
WantedBy=multi-user.target
|
||||
[Unit]
|
||||
Description=Cloud-init target
|
||||
After=multi-user.target
|
||||
"""
|
||||
fh.write(data)
|
||||
|
||||
# Due to device ordering within the Libvirt XML configuration, the first Ethernet interface
|
||||
# will always be on PCI bus ID 2, hence the name "ens2".
|
||||
# Write a DHCP stanza for ens2
|
||||
@ -667,6 +682,11 @@ GRUB_SERIAL_COMMAND="serial --speed=115200 --unit=0 --word=8 --parity=no --stop=
|
||||
# Set the timezone to UTC
|
||||
os.system("ln -sf ../usr/share/zoneinfo/UTC /etc/localtime")
|
||||
|
||||
# Unmount the bound devfs and sysfs
|
||||
os.system("umount {}/dev".format(temporary_directory))
|
||||
os.system("umount {}/sys".format(temporary_directory))
|
||||
os.system("umount {}/proc".format(temporary_directory))
|
||||
|
||||
def cleanup(self):
|
||||
"""
|
||||
cleanup(): Perform any cleanup required due to prepare()/install()
|
||||
@ -680,7 +700,6 @@ GRUB_SERIAL_COMMAND="serial --speed=115200 --unit=0 --word=8 --parity=no --stop=
|
||||
"""
|
||||
|
||||
# Run any imports first
|
||||
import os
|
||||
from pvcapid.vmbuilder import open_zk
|
||||
from pvcapid.Daemon import config
|
||||
import daemon_lib.common as pvc_common
|
||||
@ -689,11 +708,6 @@ GRUB_SERIAL_COMMAND="serial --speed=115200 --unit=0 --word=8 --parity=no --stop=
|
||||
# Set the tempdir we used in the prepare() and install() steps
|
||||
temp_dir = "/tmp/target"
|
||||
|
||||
# Unmount the bound devfs and sysfs
|
||||
os.system(f"umount {temp_dir}/dev")
|
||||
os.system(f"umount {temp_dir}/sys")
|
||||
os.system(f"umount {temp_dir}/proc")
|
||||
|
||||
# Use this construct for reversing the list, as the normal reverse() messes with the list
|
||||
for volume in list(reversed(self.vm_data["volumes"])):
|
||||
dst_volume_name = f"{self.vm_name}_{volume['disk_id']}"
|
||||
@ -704,7 +718,6 @@ GRUB_SERIAL_COMMAND="serial --speed=115200 --unit=0 --word=8 --parity=no --stop=
|
||||
if (
|
||||
volume.get("source_volume") is None
|
||||
and volume.get("filesystem") is not None
|
||||
and volume.get("filesystem") != "swap"
|
||||
):
|
||||
# Unmount filesystem
|
||||
retcode, stdout, stderr = pvc_common.run_os_command(
|
||||
@ -715,14 +728,14 @@ GRUB_SERIAL_COMMAND="serial --speed=115200 --unit=0 --word=8 --parity=no --stop=
|
||||
f"Failed to unmount '{mapped_dst_volume}' on '{mount_path}': {stderr}"
|
||||
)
|
||||
|
||||
# Unmap volume
|
||||
with open_zk(config) as zkhandler:
|
||||
success, message = pvc_ceph.unmap_volume(
|
||||
zkhandler,
|
||||
volume["pool"],
|
||||
dst_volume_name,
|
||||
)
|
||||
if not success:
|
||||
raise ProvisioningError(
|
||||
f"Failed to unmap '{mapped_dst_volume}': {stderr}"
|
||||
)
|
||||
# Unmap volume
|
||||
with open_zk(config) as zkhandler:
|
||||
success, message = pvc_ceph.unmap_volume(
|
||||
zkhandler,
|
||||
volume["pool"],
|
||||
dst_volume_name,
|
||||
)
|
||||
if not success:
|
||||
raise ProvisioningError(
|
||||
f"Failed to unmap '{mapped_dst_volume}': {stderr}"
|
||||
)
|
||||
|
@ -27,7 +27,7 @@ from ssl import SSLContext, TLSVersion
|
||||
from distutils.util import strtobool as dustrtobool
|
||||
|
||||
# Daemon version
|
||||
version = "0.9.61"
|
||||
version = "0.9.59"
|
||||
|
||||
# API version
|
||||
API_VERSION = 1.0
|
||||
|
@ -100,7 +100,7 @@ devices_scsi_controller = """ <controller type='scsi' index='0' model='virtio
|
||||
# * vm_name
|
||||
# * disk_id
|
||||
devices_disk_header = """ <disk type='network' device='disk'>
|
||||
<driver name='qemu' discard='unmap' cache='none'/>
|
||||
<driver name='qemu' discard='unmap'/>
|
||||
<target dev='{disk_id}' bus='scsi'/>
|
||||
<auth username='libvirt'>
|
||||
<secret type='ceph' uuid='{ceph_storage_secret}'/>
|
||||
|
@ -580,7 +580,7 @@ def delete_template_network_element(name, vni):
|
||||
networks, code = list_template_network_vnis(name)
|
||||
found_vni = False
|
||||
for network in networks:
|
||||
if network["vni"] == vni:
|
||||
if network["vni"] == int(vni):
|
||||
found_vni = True
|
||||
if not found_vni:
|
||||
retmsg = {
|
||||
|
@ -215,19 +215,6 @@ def node_list(
|
||||
# Output display functions
|
||||
#
|
||||
def getOutputColours(node_information):
|
||||
node_health = node_information.get("health", "N/A")
|
||||
if isinstance(node_health, int):
|
||||
if node_health <= 50:
|
||||
health_colour = ansiprint.red()
|
||||
elif node_health <= 90:
|
||||
health_colour = ansiprint.yellow()
|
||||
elif node_health <= 100:
|
||||
health_colour = ansiprint.green()
|
||||
else:
|
||||
health_colour = ansiprint.blue()
|
||||
else:
|
||||
health_colour = ansiprint.blue()
|
||||
|
||||
if node_information["daemon_state"] == "run":
|
||||
daemon_state_colour = ansiprint.green()
|
||||
elif node_information["daemon_state"] == "stop":
|
||||
@ -264,7 +251,6 @@ def getOutputColours(node_information):
|
||||
mem_provisioned_colour = ""
|
||||
|
||||
return (
|
||||
health_colour,
|
||||
daemon_state_colour,
|
||||
coordinator_state_colour,
|
||||
domain_state_colour,
|
||||
@ -275,7 +261,6 @@ def getOutputColours(node_information):
|
||||
|
||||
def format_info(node_information, long_output):
|
||||
(
|
||||
health_colour,
|
||||
daemon_state_colour,
|
||||
coordinator_state_colour,
|
||||
domain_state_colour,
|
||||
@ -288,56 +273,14 @@ def format_info(node_information, long_output):
|
||||
# Basic information
|
||||
ainformation.append(
|
||||
"{}Name:{} {}".format(
|
||||
ansiprint.purple(),
|
||||
ansiprint.end(),
|
||||
node_information["name"],
|
||||
ansiprint.purple(), ansiprint.end(), node_information["name"]
|
||||
)
|
||||
)
|
||||
ainformation.append(
|
||||
"{}PVC Version:{} {}".format(
|
||||
ansiprint.purple(),
|
||||
ansiprint.end(),
|
||||
node_information["pvc_version"],
|
||||
ansiprint.purple(), ansiprint.end(), node_information["pvc_version"]
|
||||
)
|
||||
)
|
||||
|
||||
node_health = node_information.get("health", "N/A")
|
||||
if isinstance(node_health, int):
|
||||
node_health_text = f"{node_health}%"
|
||||
else:
|
||||
node_health_text = node_health
|
||||
ainformation.append(
|
||||
"{}Health:{} {}{}{}".format(
|
||||
ansiprint.purple(),
|
||||
ansiprint.end(),
|
||||
health_colour,
|
||||
node_health_text,
|
||||
ansiprint.end(),
|
||||
)
|
||||
)
|
||||
|
||||
node_health_details = node_information.get("health_details", [])
|
||||
if long_output:
|
||||
node_health_messages = "\n ".join(
|
||||
[f"{plugin['name']}: {plugin['message']}" for plugin in node_health_details]
|
||||
)
|
||||
else:
|
||||
node_health_messages = "\n ".join(
|
||||
[
|
||||
f"{plugin['name']}: {plugin['message']}"
|
||||
for plugin in node_health_details
|
||||
if int(plugin.get("health_delta", 0)) > 0
|
||||
]
|
||||
)
|
||||
|
||||
if len(node_health_messages) > 0:
|
||||
ainformation.append(
|
||||
"{}Health Plugin Details:{} {}".format(
|
||||
ansiprint.purple(), ansiprint.end(), node_health_messages
|
||||
)
|
||||
)
|
||||
ainformation.append("")
|
||||
|
||||
ainformation.append(
|
||||
"{}Daemon State:{} {}{}{}".format(
|
||||
ansiprint.purple(),
|
||||
@ -365,6 +308,11 @@ def format_info(node_information, long_output):
|
||||
ansiprint.end(),
|
||||
)
|
||||
)
|
||||
ainformation.append(
|
||||
"{}Active VM Count:{} {}".format(
|
||||
ansiprint.purple(), ansiprint.end(), node_information["domains_count"]
|
||||
)
|
||||
)
|
||||
if long_output:
|
||||
ainformation.append("")
|
||||
ainformation.append(
|
||||
@ -383,11 +331,6 @@ def format_info(node_information, long_output):
|
||||
)
|
||||
)
|
||||
ainformation.append("")
|
||||
ainformation.append(
|
||||
"{}Active VM Count:{} {}".format(
|
||||
ansiprint.purple(), ansiprint.end(), node_information["domains_count"]
|
||||
)
|
||||
)
|
||||
ainformation.append(
|
||||
"{}Host CPUs:{} {}".format(
|
||||
ansiprint.purple(), ansiprint.end(), node_information["vcpu"]["total"]
|
||||
@ -454,7 +397,6 @@ def format_list(node_list, raw):
|
||||
# Determine optimal column widths
|
||||
node_name_length = 5
|
||||
pvc_version_length = 8
|
||||
health_length = 7
|
||||
daemon_state_length = 7
|
||||
coordinator_state_length = 12
|
||||
domain_state_length = 7
|
||||
@ -475,15 +417,6 @@ def format_list(node_list, raw):
|
||||
_pvc_version_length = len(node_information.get("pvc_version", "N/A")) + 1
|
||||
if _pvc_version_length > pvc_version_length:
|
||||
pvc_version_length = _pvc_version_length
|
||||
# node_health column
|
||||
node_health = node_information.get("health", "N/A")
|
||||
if isinstance(node_health, int):
|
||||
node_health_text = f"{node_health}%"
|
||||
else:
|
||||
node_health_text = node_health
|
||||
_health_length = len(node_health_text) + 1
|
||||
if _health_length > health_length:
|
||||
health_length = _health_length
|
||||
# daemon_state column
|
||||
_daemon_state_length = len(node_information["daemon_state"]) + 1
|
||||
if _daemon_state_length > daemon_state_length:
|
||||
@ -533,10 +466,7 @@ def format_list(node_list, raw):
|
||||
# Format the string (header)
|
||||
node_list_output.append(
|
||||
"{bold}{node_header: <{node_header_length}} {state_header: <{state_header_length}} {resource_header: <{resource_header_length}} {memory_header: <{memory_header_length}}{end_bold}".format(
|
||||
node_header_length=node_name_length
|
||||
+ pvc_version_length
|
||||
+ health_length
|
||||
+ 2,
|
||||
node_header_length=node_name_length + pvc_version_length + 1,
|
||||
state_header_length=daemon_state_length
|
||||
+ coordinator_state_length
|
||||
+ domain_state_length
|
||||
@ -554,14 +484,7 @@ def format_list(node_list, raw):
|
||||
bold=ansiprint.bold(),
|
||||
end_bold=ansiprint.end(),
|
||||
node_header="Nodes "
|
||||
+ "".join(
|
||||
[
|
||||
"-"
|
||||
for _ in range(
|
||||
6, node_name_length + pvc_version_length + health_length + 1
|
||||
)
|
||||
]
|
||||
),
|
||||
+ "".join(["-" for _ in range(6, node_name_length + pvc_version_length)]),
|
||||
state_header="States "
|
||||
+ "".join(
|
||||
[
|
||||
@ -603,13 +526,12 @@ def format_list(node_list, raw):
|
||||
)
|
||||
|
||||
node_list_output.append(
|
||||
"{bold}{node_name: <{node_name_length}} {node_pvc_version: <{pvc_version_length}} {node_health: <{health_length}} \
|
||||
"{bold}{node_name: <{node_name_length}} {node_pvc_version: <{pvc_version_length}} \
|
||||
{daemon_state_colour}{node_daemon_state: <{daemon_state_length}}{end_colour} {coordinator_state_colour}{node_coordinator_state: <{coordinator_state_length}}{end_colour} {domain_state_colour}{node_domain_state: <{domain_state_length}}{end_colour} \
|
||||
{node_domains_count: <{domains_count_length}} {node_cpu_count: <{cpu_count_length}} {node_load: <{load_length}} \
|
||||
{node_mem_total: <{mem_total_length}} {node_mem_used: <{mem_used_length}} {node_mem_free: <{mem_free_length}} {node_mem_allocated: <{mem_alloc_length}} {node_mem_provisioned: <{mem_prov_length}}{end_bold}".format(
|
||||
node_name_length=node_name_length,
|
||||
pvc_version_length=pvc_version_length,
|
||||
health_length=health_length,
|
||||
daemon_state_length=daemon_state_length,
|
||||
coordinator_state_length=coordinator_state_length,
|
||||
domain_state_length=domain_state_length,
|
||||
@ -629,7 +551,6 @@ def format_list(node_list, raw):
|
||||
end_colour="",
|
||||
node_name="Name",
|
||||
node_pvc_version="Version",
|
||||
node_health="Health",
|
||||
node_daemon_state="Daemon",
|
||||
node_coordinator_state="Coordinator",
|
||||
node_domain_state="Domain",
|
||||
@ -647,28 +568,19 @@ def format_list(node_list, raw):
|
||||
# Format the string (elements)
|
||||
for node_information in sorted(node_list, key=lambda n: n["name"]):
|
||||
(
|
||||
health_colour,
|
||||
daemon_state_colour,
|
||||
coordinator_state_colour,
|
||||
domain_state_colour,
|
||||
mem_allocated_colour,
|
||||
mem_provisioned_colour,
|
||||
) = getOutputColours(node_information)
|
||||
|
||||
node_health = node_information.get("health", "N/A")
|
||||
if isinstance(node_health, int):
|
||||
node_health_text = f"{node_health}%"
|
||||
else:
|
||||
node_health_text = node_health
|
||||
|
||||
node_list_output.append(
|
||||
"{bold}{node_name: <{node_name_length}} {node_pvc_version: <{pvc_version_length}} {health_colour}{node_health: <{health_length}}{end_colour} \
|
||||
"{bold}{node_name: <{node_name_length}} {node_pvc_version: <{pvc_version_length}} \
|
||||
{daemon_state_colour}{node_daemon_state: <{daemon_state_length}}{end_colour} {coordinator_state_colour}{node_coordinator_state: <{coordinator_state_length}}{end_colour} {domain_state_colour}{node_domain_state: <{domain_state_length}}{end_colour} \
|
||||
{node_domains_count: <{domains_count_length}} {node_cpu_count: <{cpu_count_length}} {node_load: <{load_length}} \
|
||||
{node_mem_total: <{mem_total_length}} {node_mem_used: <{mem_used_length}} {node_mem_free: <{mem_free_length}} {mem_allocated_colour}{node_mem_allocated: <{mem_alloc_length}}{end_colour} {mem_provisioned_colour}{node_mem_provisioned: <{mem_prov_length}}{end_colour}{end_bold}".format(
|
||||
node_name_length=node_name_length,
|
||||
pvc_version_length=pvc_version_length,
|
||||
health_length=health_length,
|
||||
daemon_state_length=daemon_state_length,
|
||||
coordinator_state_length=coordinator_state_length,
|
||||
domain_state_length=domain_state_length,
|
||||
@ -682,7 +594,6 @@ def format_list(node_list, raw):
|
||||
mem_prov_length=mem_prov_length,
|
||||
bold="",
|
||||
end_bold="",
|
||||
health_colour=health_colour,
|
||||
daemon_state_colour=daemon_state_colour,
|
||||
coordinator_state_colour=coordinator_state_colour,
|
||||
domain_state_colour=domain_state_colour,
|
||||
@ -691,7 +602,6 @@ def format_list(node_list, raw):
|
||||
end_colour=ansiprint.end(),
|
||||
node_name=node_information["name"],
|
||||
node_pvc_version=node_information.get("pvc_version", "N/A"),
|
||||
node_health=node_health_text,
|
||||
node_daemon_state=node_information["daemon_state"],
|
||||
node_coordinator_state=node_information["coordinator_state"],
|
||||
node_domain_state=node_information["domain_state"],
|
||||
|
@ -2,7 +2,7 @@ from setuptools import setup
|
||||
|
||||
setup(
|
||||
name="pvc",
|
||||
version="0.9.61",
|
||||
version="0.9.59",
|
||||
packages=["pvc", "pvc.cli_lib"],
|
||||
install_requires=[
|
||||
"Click",
|
||||
|
@ -1 +0,0 @@
|
||||
{"version": "9", "root": "", "base": {"root": "", "schema": "/schema", "schema.version": "/schema/version", "config": "/config", "config.maintenance": "/config/maintenance", "config.primary_node": "/config/primary_node", "config.primary_node.sync_lock": "/config/primary_node/sync_lock", "config.upstream_ip": "/config/upstream_ip", "config.migration_target_selector": "/config/migration_target_selector", "cmd": "/cmd", "cmd.node": "/cmd/nodes", "cmd.domain": "/cmd/domains", "cmd.ceph": "/cmd/ceph", "logs": "/logs", "node": "/nodes", "domain": "/domains", "network": "/networks", "storage": "/ceph", "storage.util": "/ceph/util", "osd": "/ceph/osds", "pool": "/ceph/pools", "volume": "/ceph/volumes", "snapshot": "/ceph/snapshots"}, "logs": {"node": "", "messages": "/messages"}, "node": {"name": "", "keepalive": "/keepalive", "mode": "/daemonmode", "data.active_schema": "/activeschema", "data.latest_schema": "/latestschema", "data.static": "/staticdata", "data.pvc_version": "/pvcversion", "running_domains": "/runningdomains", "count.provisioned_domains": "/domainscount", "count.networks": "/networkscount", "state.daemon": "/daemonstate", "state.router": "/routerstate", "state.domain": "/domainstate", "cpu.load": "/cpuload", "vcpu.allocated": "/vcpualloc", "memory.total": "/memtotal", "memory.used": "/memused", "memory.free": "/memfree", "memory.allocated": "/memalloc", "memory.provisioned": "/memprov", "ipmi.hostname": "/ipmihostname", "ipmi.username": "/ipmiusername", "ipmi.password": "/ipmipassword", "sriov": "/sriov", "sriov.pf": "/sriov/pf", "sriov.vf": "/sriov/vf", "monitoring.plugins": "/monitoring_plugins", "monitoring.data": "/monitoring_data", "monitoring.health": "/monitoring_health"}, "monitoring_plugin": {"name": "", "last_run": "/last_run", "health_delta": "/health_delta", "message": "/message", "data": "/data", "runtime": "/runtime"}, "sriov_pf": {"phy": "", "mtu": "/mtu", "vfcount": "/vfcount"}, "sriov_vf": {"phy": "", "pf": "/pf", "mtu": "/mtu", "mac": "/mac", "phy_mac": "/phy_mac", "config": "/config", "config.vlan_id": "/config/vlan_id", "config.vlan_qos": "/config/vlan_qos", "config.tx_rate_min": "/config/tx_rate_min", "config.tx_rate_max": "/config/tx_rate_max", "config.spoof_check": "/config/spoof_check", "config.link_state": "/config/link_state", "config.trust": "/config/trust", "config.query_rss": "/config/query_rss", "pci": "/pci", "pci.domain": "/pci/domain", "pci.bus": "/pci/bus", "pci.slot": "/pci/slot", "pci.function": "/pci/function", "used": "/used", "used_by": "/used_by"}, "domain": {"name": "", "xml": "/xml", "state": "/state", "profile": "/profile", "stats": "/stats", "node": "/node", "last_node": "/lastnode", "failed_reason": "/failedreason", "storage.volumes": "/rbdlist", "console.log": "/consolelog", "console.vnc": "/vnc", "meta.autostart": "/node_autostart", "meta.migrate_method": "/migration_method", "meta.node_selector": "/node_selector", "meta.node_limit": "/node_limit", "meta.tags": "/tags", "migrate.sync_lock": "/migrate_sync_lock"}, "tag": {"name": "", "type": "/type", "protected": "/protected"}, "network": {"vni": "", "type": "/nettype", "mtu": "/mtu", "rule": "/firewall_rules", "rule.in": "/firewall_rules/in", "rule.out": "/firewall_rules/out", "nameservers": "/name_servers", "domain": "/domain", "reservation": "/dhcp4_reservations", "lease": "/dhcp4_leases", "ip4.gateway": "/ip4_gateway", "ip4.network": "/ip4_network", "ip4.dhcp": "/dhcp4_flag", "ip4.dhcp_start": "/dhcp4_start", "ip4.dhcp_end": "/dhcp4_end", "ip6.gateway": "/ip6_gateway", "ip6.network": "/ip6_network", "ip6.dhcp": "/dhcp6_flag"}, "reservation": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname"}, "lease": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname", "expiry": "/expiry", "client_id": "/clientid"}, "rule": {"description": "", "rule": "/rule", "order": "/order"}, "osd": {"id": "", "node": "/node", "device": "/device", "db_device": "/db_device", "fsid": "/fsid", "ofsid": "/fsid/osd", "cfsid": "/fsid/cluster", "lvm": "/lvm", "vg": "/lvm/vg", "lv": "/lvm/lv", "stats": "/stats"}, "pool": {"name": "", "pgs": "/pgs", "tier": "/tier", "stats": "/stats"}, "volume": {"name": "", "stats": "/stats"}, "snapshot": {"name": "", "stats": "/stats"}}
|
@ -21,7 +21,6 @@
|
||||
|
||||
import time
|
||||
import re
|
||||
import json
|
||||
|
||||
import daemon_lib.common as common
|
||||
|
||||
@ -50,38 +49,6 @@ def getNodeInformation(zkhandler, node_name):
|
||||
zkhandler.read(("node.count.provisioned_domains", node_name))
|
||||
)
|
||||
node_running_domains = zkhandler.read(("node.running_domains", node_name)).split()
|
||||
try:
|
||||
node_health = int(zkhandler.read(("node.monitoring.health", node_name)))
|
||||
except ValueError:
|
||||
node_health = "N/A"
|
||||
node_health_plugins = zkhandler.read(("node.monitoring.plugins", node_name)).split()
|
||||
node_health_details = list()
|
||||
for plugin in node_health_plugins:
|
||||
plugin_last_run = zkhandler.read(
|
||||
("node.monitoring.data", node_name, "monitoring_plugin.last_run", plugin)
|
||||
)
|
||||
plugin_health_delta = zkhandler.read(
|
||||
(
|
||||
"node.monitoring.data",
|
||||
node_name,
|
||||
"monitoring_plugin.health_delta",
|
||||
plugin,
|
||||
)
|
||||
)
|
||||
plugin_message = zkhandler.read(
|
||||
("node.monitoring.data", node_name, "monitoring_plugin.message", plugin)
|
||||
)
|
||||
plugin_data = zkhandler.read(
|
||||
("node.monitoring.data", node_name, "monitoring_plugin.data", plugin)
|
||||
)
|
||||
plugin_output = {
|
||||
"name": plugin,
|
||||
"last_run": int(plugin_last_run),
|
||||
"health_delta": int(plugin_health_delta),
|
||||
"message": plugin_message,
|
||||
"data": json.loads(plugin_data),
|
||||
}
|
||||
node_health_details.append(plugin_output)
|
||||
|
||||
# Construct a data structure to represent the data
|
||||
node_information = {
|
||||
@ -94,16 +61,10 @@ def getNodeInformation(zkhandler, node_name):
|
||||
"kernel": node_kernel,
|
||||
"os": node_os,
|
||||
"arch": node_arch,
|
||||
"health": node_health,
|
||||
"health_plugins": node_health_plugins,
|
||||
"health_details": node_health_details,
|
||||
"load": node_load,
|
||||
"domains_count": node_domains_count,
|
||||
"running_domains": node_running_domains,
|
||||
"vcpu": {
|
||||
"total": node_cpu_count,
|
||||
"allocated": node_vcpu_allocated,
|
||||
},
|
||||
"vcpu": {"total": node_cpu_count, "allocated": node_vcpu_allocated},
|
||||
"memory": {
|
||||
"total": node_mem_total,
|
||||
"allocated": node_mem_allocated,
|
||||
|
@ -644,7 +644,7 @@ def rename_vm(zkhandler, domain, new_domain):
|
||||
|
||||
# Verify that the VM is in a stopped state; renaming is not supported otherwise
|
||||
state = zkhandler.read(("domain.state", dom_uuid))
|
||||
if state not in ["stop", "disable"]:
|
||||
if state != "stop":
|
||||
return (
|
||||
False,
|
||||
'ERROR: VM "{}" is not in stopped state; VMs cannot be renamed while running.'.format(
|
||||
|
@ -540,7 +540,7 @@ class ZKHandler(object):
|
||||
#
|
||||
class ZKSchema(object):
|
||||
# Current version
|
||||
_version = 9
|
||||
_version = 8
|
||||
|
||||
# Root for doing nested keys
|
||||
_schema_root = ""
|
||||
@ -608,18 +608,6 @@ class ZKSchema(object):
|
||||
"sriov": "/sriov",
|
||||
"sriov.pf": "/sriov/pf",
|
||||
"sriov.vf": "/sriov/vf",
|
||||
"monitoring.plugins": "/monitoring_plugins",
|
||||
"monitoring.data": "/monitoring_data",
|
||||
"monitoring.health": "/monitoring_health",
|
||||
},
|
||||
# The schema of an individual monitoring plugin data entry (/nodes/{node_name}/monitoring_data/{plugin})
|
||||
"monitoring_plugin": {
|
||||
"name": "", # The root key
|
||||
"last_run": "/last_run",
|
||||
"health_delta": "/health_delta",
|
||||
"message": "/message",
|
||||
"data": "/data",
|
||||
"runtime": "/runtime",
|
||||
},
|
||||
# The schema of an individual SR-IOV PF entry (/nodes/{node_name}/sriov/pf/{pf})
|
||||
"sriov_pf": {"phy": "", "mtu": "/mtu", "vfcount": "/vfcount"}, # The root key
|
||||
@ -886,10 +874,9 @@ class ZKSchema(object):
|
||||
if not zkhandler.zk_conn.exists(nkipath):
|
||||
result = False
|
||||
|
||||
# One might expect child keys under node (specifically, sriov.pf, sriov.vf,
|
||||
# monitoring.data) to be managed here as well, but those are created
|
||||
# automatically every time pvcnoded started and thus never need to be validated
|
||||
# or applied.
|
||||
# One might expect child keys under node (specifically, sriov.pf and sriov.vf) to be
|
||||
# managed here as well, but those are created automatically every time pvcnoded starts
|
||||
# and thus never need to be validated or applied.
|
||||
|
||||
# These two have several children layers that must be parsed through
|
||||
for elem in ["volume"]:
|
||||
|
14
debian/changelog
vendored
14
debian/changelog
vendored
@ -1,17 +1,3 @@
|
||||
pvc (0.9.61-0) unstable; urgency=high
|
||||
|
||||
* [provisioner] Fixes a bug in network comparison
|
||||
* [api] Fixes a bug being unable to rename disabled VMs
|
||||
|
||||
-- Joshua M. Boniface <joshua@boniface.me> Wed, 08 Feb 2023 10:08:05 -0500
|
||||
|
||||
pvc (0.9.60-0) unstable; urgency=high
|
||||
|
||||
* [Provisioner] Cleans up several remaining bugs in the example scripts; they should all be valid now
|
||||
* [Provisioner] Adjust default libvirt schema to disable RBD caching for a 2x+ performance boost
|
||||
|
||||
-- Joshua M. Boniface <joshua@boniface.me> Tue, 06 Dec 2022 15:42:55 -0500
|
||||
|
||||
pvc (0.9.59-0) unstable; urgency=high
|
||||
|
||||
* [API] Flips the mem(prov) and mem(free) selectors making mem(free) the default for "mem" and "memprov" explicit
|
||||
|
1
debian/pvc-daemon-node.install
vendored
1
debian/pvc-daemon-node.install
vendored
@ -5,4 +5,3 @@ node-daemon/pvcnoded.service lib/systemd/system
|
||||
node-daemon/pvc.target lib/systemd/system
|
||||
node-daemon/pvcautoready.service lib/systemd/system
|
||||
node-daemon/monitoring usr/share/pvc
|
||||
node-daemon/plugins usr/share/pvc
|
||||
|
@ -1,126 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# ceph.py - PVC Monitoring example plugin for Ceph status
|
||||
# Part of the Parallel Virtual Cluster (PVC) system
|
||||
#
|
||||
# Copyright (C) 2018-2022 Joshua M. Boniface <joshua@boniface.me>
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, version 3.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
# This script provides an example of a PVC monitoring plugin script. It will create
|
||||
# a simple plugin to check the Ceph cluster health for anomalies, and return a health
|
||||
# delta reflective of the overall Ceph status (HEALTH_WARN = 10, HEALTH_ERR = 50).
|
||||
|
||||
# This script can thus be used as an example or reference implementation of a
|
||||
# PVC monitoring pluginscript and expanded upon as required.
|
||||
|
||||
# A monitoring plugin script must implement the class "MonitoringPluginScript" which
|
||||
# extends "MonitoringPlugin", providing the 3 functions indicated. Detailed explanation
|
||||
# of the role of each function is provided in context of the example; see the other
|
||||
# examples for more potential uses.
|
||||
|
||||
# WARNING:
|
||||
#
|
||||
# This script will run in the context of the node daemon keepalives as root.
|
||||
# DO NOT install untrusted, unvetted plugins under any circumstances.
|
||||
|
||||
|
||||
# This import is always required here, as MonitoringPlugin is used by the
|
||||
# MonitoringPluginScript class
|
||||
from pvcnoded.objects.MonitoringInstance import MonitoringPlugin
|
||||
|
||||
|
||||
# A monitoring plugin script must always expose its nice name, which must be identical to
|
||||
# the file name
|
||||
PLUGIN_NAME = "ceph"
|
||||
|
||||
|
||||
# The MonitoringPluginScript class must be named as such, and extend MonitoringPlugin.
|
||||
class MonitoringPluginScript(MonitoringPlugin):
|
||||
def setup(self):
|
||||
"""
|
||||
setup(): Perform special setup steps during node daemon startup
|
||||
|
||||
This step is optional and should be used sparingly.
|
||||
"""
|
||||
|
||||
pass
|
||||
|
||||
def run(self):
|
||||
"""
|
||||
run(): Perform the check actions and return a PluginResult object
|
||||
"""
|
||||
|
||||
# Run any imports first
|
||||
from rados import Rados
|
||||
from json import loads, dumps
|
||||
|
||||
# Connect to the Ceph cluster
|
||||
try:
|
||||
ceph_conn = Rados(
|
||||
conffile=self.config["ceph_config_file"],
|
||||
conf=dict(keyring=self.config["ceph_admin_keyring"]),
|
||||
)
|
||||
ceph_conn.connect(timeout=1)
|
||||
except Exception as e:
|
||||
self.log(f"Failed to connect to Ceph cluster: {e}", state="e")
|
||||
return self.plugin_result
|
||||
|
||||
# Get the Ceph cluster health
|
||||
try:
|
||||
health_status = loads(
|
||||
ceph_conn.mon_command(dumps({"prefix": "health", "format": "json"}), b"", timeout=1)[1]
|
||||
)
|
||||
ceph_health = health_status["status"]
|
||||
except Exception as e:
|
||||
self.log(f"Failed to get health data from Ceph cluster: {e}", state="e")
|
||||
return self.plugin_result
|
||||
finally:
|
||||
ceph_conn.shutdown()
|
||||
|
||||
# Get a list of error entries in the health status output
|
||||
error_entries = health_status["checks"].keys()
|
||||
|
||||
# Set the health delta based on the errors presented
|
||||
if ceph_health == "HEALTH_ERR":
|
||||
health_delta = 50
|
||||
message = f"Ceph cluster in ERROR state: {', '.join(error_entries)}"
|
||||
elif ceph_health == "HEALTH_WARN":
|
||||
health_delta = 10
|
||||
message = f"Ceph cluster in WARNING state: {', '.join(error_entries)}"
|
||||
else:
|
||||
health_delta = 0
|
||||
message = "Ceph cluster in OK state"
|
||||
|
||||
# Set the health delta in our local PluginResult object
|
||||
self.plugin_result.set_health_delta(health_delta)
|
||||
|
||||
# Set the message in our local PluginResult object
|
||||
self.plugin_result.set_message(message)
|
||||
|
||||
# Set the detailed data in our local PluginResult object
|
||||
self.plugin_result.set_data(health_status)
|
||||
|
||||
# Return our local PluginResult object
|
||||
return self.plugin_result
|
||||
|
||||
def cleanup(self):
|
||||
"""
|
||||
cleanup(): Perform special cleanup steps during node daemon termination
|
||||
|
||||
This step is optional and should be used sparingly.
|
||||
"""
|
||||
|
||||
pass
|
@ -1,157 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# dpkg.py - PVC Monitoring example plugin for dpkg status
|
||||
# Part of the Parallel Virtual Cluster (PVC) system
|
||||
#
|
||||
# Copyright (C) 2018-2022 Joshua M. Boniface <joshua@boniface.me>
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, version 3.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
# This script provides an example of a PVC monitoring plugin script. It will create
|
||||
# a simple plugin to check the system dpkg status is as expected, with no invalid
|
||||
# packages or obsolete configuration files, and will return a 1 health delta for each
|
||||
# flaw in invalid packages, upgradable packages, and obsolete config files.
|
||||
|
||||
# This script can thus be used as an example or reference implementation of a
|
||||
# PVC monitoring pluginscript and expanded upon as required.
|
||||
|
||||
# A monitoring plugin script must implement the class "MonitoringPluginScript" which
|
||||
# extends "MonitoringPlugin", providing the 3 functions indicated. Detailed explanation
|
||||
# of the role of each function is provided in context of the example; see the other
|
||||
# examples for more potential uses.
|
||||
|
||||
# WARNING:
|
||||
#
|
||||
# This script will run in the context of the node daemon keepalives as root.
|
||||
# DO NOT install untrusted, unvetted plugins under any circumstances.
|
||||
|
||||
|
||||
# This import is always required here, as MonitoringPlugin is used by the
|
||||
# MonitoringPluginScript class
|
||||
from pvcnoded.objects.MonitoringInstance import MonitoringPlugin
|
||||
|
||||
|
||||
# A monitoring plugin script must always expose its nice name, which must be identical to
|
||||
# the file name
|
||||
PLUGIN_NAME = "dpkg"
|
||||
|
||||
|
||||
# The MonitoringPluginScript class must be named as such, and extend MonitoringPlugin.
|
||||
class MonitoringPluginScript(MonitoringPlugin):
|
||||
def setup(self):
|
||||
"""
|
||||
setup(): Perform special setup steps during node daemon startup
|
||||
|
||||
This step is optional and should be used sparingly.
|
||||
"""
|
||||
|
||||
pass
|
||||
|
||||
def run(self):
|
||||
"""
|
||||
run(): Perform the check actions and return a PluginResult object
|
||||
"""
|
||||
|
||||
# Run any imports first
|
||||
from re import match
|
||||
import daemon_lib.common as pvc_common
|
||||
|
||||
# Get Debian version
|
||||
with open('/etc/debian_version', 'r') as fh:
|
||||
debian_version = fh.read().strip()
|
||||
|
||||
# Get a list of dpkg packages for analysis
|
||||
retcode, stdout, stderr = pvc_common.run_os_command("/usr/bin/dpkg --list")
|
||||
|
||||
# Get a list of installed packages and states
|
||||
packages = list()
|
||||
for dpkg_line in stdout.split('\n'):
|
||||
if match('^[a-z][a-z] ', dpkg_line):
|
||||
line_split = dpkg_line.split()
|
||||
package_state = line_split[0]
|
||||
package_name = line_split[1]
|
||||
packages.append((package_name, package_state))
|
||||
|
||||
count_ok = 0
|
||||
count_inconsistent = 0
|
||||
list_inconsistent = list()
|
||||
|
||||
for package in packages:
|
||||
if package[1] == "ii":
|
||||
count_ok += 1
|
||||
else:
|
||||
count_inconsistent += 1
|
||||
list_inconsistent.append(package[0])
|
||||
|
||||
# Get upgradable packages
|
||||
retcode, stdout, stderr = pvc_common.run_os_command("/usr/bin/apt list --upgradable")
|
||||
|
||||
list_upgradable = list()
|
||||
for apt_line in stdout.split('\n'):
|
||||
if match('^[a-z][a-z] ', apt_line):
|
||||
line_split = apt_line.split('/')
|
||||
package_name = line_split[0]
|
||||
list_upgradable.append(package_name)
|
||||
|
||||
count_upgradable = len(list_upgradable)
|
||||
|
||||
# Get obsolete config files (dpkg-* or ucf-* under /etc)
|
||||
retcode, stdout, stderr = pvc_common.run_os_command("/usr/bin/find /etc -type f -a \( -name '*.dpkg-*' -o -name '*.ucf-*' \)")
|
||||
|
||||
obsolete_conffiles = list()
|
||||
for conffile_line in stdout.split('\n'):
|
||||
if conffile_line:
|
||||
obsolete_conffiles.append(conffile_line)
|
||||
|
||||
count_obsolete_conffiles = len(obsolete_conffiles)
|
||||
|
||||
# Set health_delta based on the results
|
||||
health_delta = 0
|
||||
if count_inconsistent > 0:
|
||||
health_delta += 1
|
||||
if count_upgradable > 0:
|
||||
health_delta += 1
|
||||
if count_obsolete_conffiles > 0:
|
||||
health_delta += 1
|
||||
|
||||
# Set the health delta in our local PluginResult object
|
||||
self.plugin_result.set_health_delta(health_delta)
|
||||
|
||||
# Craft the message
|
||||
message = f"Debian {debian_version}; Obsolete conffiles: {count_obsolete_conffiles}; Packages valid: {count_ok}, inconsistent: {count_inconsistent}, upgradable: {count_upgradable}"
|
||||
|
||||
# Set the message in our local PluginResult object
|
||||
self.plugin_result.set_message(message)
|
||||
|
||||
# Set the detailed data in our local PluginResult object
|
||||
detailed_data = {
|
||||
"debian_version": debian_version,
|
||||
"obsolete_conffiles": obsolete_conffiles,
|
||||
"inconsistent_packages": list_inconsistent,
|
||||
"upgradable_packages": list_upgradable,
|
||||
}
|
||||
self.plugin_result.set_data(detailed_data)
|
||||
|
||||
# Return our local PluginResult object
|
||||
return self.plugin_result
|
||||
|
||||
def cleanup(self):
|
||||
"""
|
||||
cleanup(): Perform special cleanup steps during node daemon termination
|
||||
|
||||
This step is optional and should be used sparingly.
|
||||
"""
|
||||
|
||||
pass
|
@ -1,105 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# load.py - PVC Monitoring example plugin for load
|
||||
# Part of the Parallel Virtual Cluster (PVC) system
|
||||
#
|
||||
# Copyright (C) 2018-2022 Joshua M. Boniface <joshua@boniface.me>
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, version 3.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
# This script provides an example of a PVC monitoring plugin script. It will create
|
||||
# a simple plugin to check the system load against the total number of CPU cores,
|
||||
# and return a 10 health delta (100 -> 90) if the load average is > 1/2 that number.
|
||||
|
||||
# This script can thus be used as an example or reference implementation of a
|
||||
# PVC monitoring pluginscript and expanded upon as required.
|
||||
|
||||
# A monitoring plugin script must implement the class "MonitoringPluginScript" which
|
||||
# extends "MonitoringPlugin", providing the 3 functions indicated. Detailed explanation
|
||||
# of the role of each function is provided in context of the example; see the other
|
||||
# examples for more potential uses.
|
||||
|
||||
# WARNING:
|
||||
#
|
||||
# This script will run in the context of the node daemon keepalives as root.
|
||||
# DO NOT install untrusted, unvetted plugins under any circumstances.
|
||||
|
||||
|
||||
# This import is always required here, as MonitoringPlugin is used by the
|
||||
# MonitoringPluginScript class
|
||||
from pvcnoded.objects.MonitoringInstance import MonitoringPlugin
|
||||
|
||||
|
||||
# A monitoring plugin script must always expose its nice name, which must be identical to
|
||||
# the file name
|
||||
PLUGIN_NAME = "load"
|
||||
|
||||
|
||||
# The MonitoringPluginScript class must be named as such, and extend MonitoringPlugin.
|
||||
class MonitoringPluginScript(MonitoringPlugin):
|
||||
def setup(self):
|
||||
"""
|
||||
setup(): Perform special setup steps during node daemon startup
|
||||
|
||||
This step is optional and should be used sparingly.
|
||||
"""
|
||||
|
||||
pass
|
||||
|
||||
def run(self):
|
||||
"""
|
||||
run(): Perform the check actions and return a PluginResult object
|
||||
"""
|
||||
|
||||
# Run any imports first
|
||||
from os import getloadavg
|
||||
from psutil import cpu_count
|
||||
|
||||
# Get the current 1-minute system load average
|
||||
load_average = getloadavg()[0]
|
||||
|
||||
# Get the number of CPU cores
|
||||
cpu_cores = cpu_count()
|
||||
|
||||
# Check that the load average is greater or equal to the cpu count
|
||||
if load_average > float(cpu_cores):
|
||||
# Set the health delta to 10 (subtract 10 from the total of 100)
|
||||
health_delta = 10
|
||||
# Craft a message that can be used by the clients
|
||||
message = f"Current load is {load_average} out of {cpu_cores} CPU cores"
|
||||
|
||||
else:
|
||||
# Set the health delta to 0 (no change)
|
||||
health_delta = 0
|
||||
# Craft a message that can be used by the clients
|
||||
message = f"Current load is {load_average} out of {cpu_cores} CPU cores"
|
||||
|
||||
# Set the health delta in our local PluginResult object
|
||||
self.plugin_result.set_health_delta(health_delta)
|
||||
|
||||
# Set the message in our local PluginResult object
|
||||
self.plugin_result.set_message(message)
|
||||
|
||||
# Return our local PluginResult object
|
||||
return self.plugin_result
|
||||
|
||||
def cleanup(self):
|
||||
"""
|
||||
cleanup(): Perform special cleanup steps during node daemon termination
|
||||
|
||||
This step is optional and should be used sparingly.
|
||||
"""
|
||||
|
||||
pass
|
@ -128,8 +128,6 @@ pvc:
|
||||
configuration:
|
||||
# directories: PVC system directories
|
||||
directories:
|
||||
# plugin_directory: Directory containing node monitoring plugins
|
||||
plugin_directory: "/usr/share/pvc/plugins"
|
||||
# dynamic_directory: Temporary in-memory directory for active configurations
|
||||
dynamic_directory: "/run/pvc"
|
||||
# log_directory: Logging directory
|
||||
|
@ -27,7 +27,6 @@ import pvcnoded.util.services
|
||||
import pvcnoded.util.libvirt
|
||||
import pvcnoded.util.zookeeper
|
||||
|
||||
import pvcnoded.objects.MonitoringInstance as MonitoringInstance
|
||||
import pvcnoded.objects.DNSAggregatorInstance as DNSAggregatorInstance
|
||||
import pvcnoded.objects.MetadataAPIInstance as MetadataAPIInstance
|
||||
import pvcnoded.objects.VMInstance as VMInstance
|
||||
@ -49,7 +48,7 @@ import re
|
||||
import json
|
||||
|
||||
# Daemon version
|
||||
version = "0.9.61"
|
||||
version = "0.9.59"
|
||||
|
||||
|
||||
##########################################################
|
||||
@ -59,7 +58,6 @@ version = "0.9.61"
|
||||
|
||||
def entrypoint():
|
||||
keepalive_timer = None
|
||||
monitoring_instance = None
|
||||
|
||||
# Get our configuration
|
||||
config = pvcnoded.util.config.get_configuration()
|
||||
@ -206,7 +204,7 @@ def entrypoint():
|
||||
|
||||
# Define a cleanup function
|
||||
def cleanup(failure=False):
|
||||
nonlocal logger, zkhandler, keepalive_timer, d_domain, monitoring_instance
|
||||
nonlocal logger, zkhandler, keepalive_timer, d_domain
|
||||
|
||||
logger.out("Terminating pvcnoded and cleaning up", state="s")
|
||||
|
||||
@ -255,13 +253,6 @@ def entrypoint():
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Clean up any monitoring plugins that have cleanup
|
||||
try:
|
||||
logger.out("Performing monitoring plugin cleanup", state="s")
|
||||
monitoring_instance.run_cleanups()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Set stop state in Zookeeper
|
||||
zkhandler.write([(("node.state.daemon", config["node_hostname"]), "stop")])
|
||||
|
||||
@ -1024,14 +1015,9 @@ def entrypoint():
|
||||
state="i",
|
||||
)
|
||||
|
||||
# Set up the node monitoring instance
|
||||
monitoring_instance = MonitoringInstance.MonitoringInstance(
|
||||
zkhandler, config, logger, this_node
|
||||
)
|
||||
|
||||
# Start keepalived thread
|
||||
keepalive_timer = pvcnoded.util.keepalive.start_keepalive_timer(
|
||||
logger, config, zkhandler, this_node, monitoring_instance
|
||||
logger, config, zkhandler, this_node
|
||||
)
|
||||
|
||||
# Tick loop; does nothing since everything is async
|
||||
|
@ -1,383 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# PluginInstance.py - Class implementing a PVC monitoring instance
|
||||
# Part of the Parallel Virtual Cluster (PVC) system
|
||||
#
|
||||
# Copyright (C) 2018-2022 Joshua M. Boniface <joshua@boniface.me>
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, version 3.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
import concurrent.futures
|
||||
import time
|
||||
import importlib.util
|
||||
|
||||
from os import walk
|
||||
from datetime import datetime
|
||||
from json import dumps
|
||||
|
||||
|
||||
class PluginResult(object):
|
||||
def __init__(self, zkhandler, config, logger, this_node, plugin_name):
|
||||
self.zkhandler = zkhandler
|
||||
self.config = config
|
||||
self.logger = logger
|
||||
self.this_node = this_node
|
||||
self.plugin_name = plugin_name
|
||||
self.current_time = int(time.time())
|
||||
self.health_delta = 0
|
||||
self.message = None
|
||||
self.data = {}
|
||||
self.runtime = "0.00"
|
||||
|
||||
def set_health_delta(self, new_delta):
|
||||
self.health_delta = new_delta
|
||||
|
||||
def set_message(self, new_message):
|
||||
self.message = new_message
|
||||
|
||||
def set_data(self, new_data):
|
||||
self.data = new_data
|
||||
|
||||
def set_runtime(self, new_runtime):
|
||||
self.runtime = new_runtime
|
||||
|
||||
def to_zookeeper(self):
|
||||
self.zkhandler.write(
|
||||
[
|
||||
(
|
||||
(
|
||||
"node.monitoring.data",
|
||||
self.this_node.name,
|
||||
"monitoring_plugin.name",
|
||||
self.plugin_name,
|
||||
),
|
||||
self.plugin_name,
|
||||
),
|
||||
(
|
||||
(
|
||||
"node.monitoring.data",
|
||||
self.this_node.name,
|
||||
"monitoring_plugin.last_run",
|
||||
self.plugin_name,
|
||||
),
|
||||
self.current_time,
|
||||
),
|
||||
(
|
||||
(
|
||||
"node.monitoring.data",
|
||||
self.this_node.name,
|
||||
"monitoring_plugin.health_delta",
|
||||
self.plugin_name,
|
||||
),
|
||||
self.health_delta,
|
||||
),
|
||||
(
|
||||
(
|
||||
"node.monitoring.data",
|
||||
self.this_node.name,
|
||||
"monitoring_plugin.message",
|
||||
self.plugin_name,
|
||||
),
|
||||
self.message,
|
||||
),
|
||||
(
|
||||
(
|
||||
"node.monitoring.data",
|
||||
self.this_node.name,
|
||||
"monitoring_plugin.data",
|
||||
self.plugin_name,
|
||||
),
|
||||
dumps(self.data),
|
||||
),
|
||||
(
|
||||
(
|
||||
"node.monitoring.data",
|
||||
self.this_node.name,
|
||||
"monitoring_plugin.runtime",
|
||||
self.plugin_name,
|
||||
),
|
||||
self.runtime,
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
class MonitoringPlugin(object):
|
||||
def __init__(self, zkhandler, config, logger, this_node, plugin_name):
|
||||
self.zkhandler = zkhandler
|
||||
self.config = config
|
||||
self.logger = logger
|
||||
self.this_node = this_node
|
||||
self.plugin_name = plugin_name
|
||||
|
||||
self.plugin_result = PluginResult(
|
||||
self.zkhandler,
|
||||
self.config,
|
||||
self.logger,
|
||||
self.this_node,
|
||||
self.plugin_name,
|
||||
)
|
||||
|
||||
def __str__(self):
|
||||
return self.plugin_name
|
||||
|
||||
#
|
||||
# Helper functions; exposed to child MonitoringPluginScript instances
|
||||
#
|
||||
def log(self, message, state="d"):
|
||||
"""
|
||||
Log a message to the PVC logger instance using the plugin name as a prefix
|
||||
Takes "state" values as defined by the PVC logger instance, defaulting to debug:
|
||||
"d": debug
|
||||
"i": informational
|
||||
"t": tick/keepalive
|
||||
"w": warning
|
||||
"e": error
|
||||
"""
|
||||
if state == "d" and not self.config["debug"]:
|
||||
return
|
||||
|
||||
self.logger.out(message, state=state, prefix=self.plugin_name)
|
||||
|
||||
#
|
||||
# Primary class functions; implemented by the individual plugins
|
||||
#
|
||||
def setup(self):
|
||||
"""
|
||||
setup(): Perform setup of the plugin; run once during daemon startup
|
||||
OPTIONAL
|
||||
"""
|
||||
pass
|
||||
|
||||
def run(self):
|
||||
"""
|
||||
run(): Run the plugin, returning a PluginResult object
|
||||
"""
|
||||
return self.plugin_result
|
||||
|
||||
def cleanup(self):
|
||||
"""
|
||||
cleanup(): Clean up after the plugin; run once during daemon shutdown
|
||||
OPTIONAL
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class MonitoringInstance(object):
|
||||
def __init__(self, zkhandler, config, logger, this_node):
|
||||
self.zkhandler = zkhandler
|
||||
self.config = config
|
||||
self.logger = logger
|
||||
self.this_node = this_node
|
||||
|
||||
# Get a list of plugins from the plugin_directory
|
||||
plugin_files = next(walk(self.config["plugin_directory"]), (None, None, []))[
|
||||
2
|
||||
] # [] if no file
|
||||
|
||||
self.all_plugins = list()
|
||||
self.all_plugin_names = list()
|
||||
|
||||
# Load each plugin file into the all_plugins list
|
||||
for plugin_file in sorted(plugin_files):
|
||||
try:
|
||||
self.logger.out(
|
||||
f"Loading monitoring plugin from {self.config['plugin_directory']}/{plugin_file}",
|
||||
state="i",
|
||||
)
|
||||
loader = importlib.machinery.SourceFileLoader(
|
||||
"plugin_script", f"{self.config['plugin_directory']}/{plugin_file}"
|
||||
)
|
||||
spec = importlib.util.spec_from_loader(loader.name, loader)
|
||||
plugin_script = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(plugin_script)
|
||||
|
||||
plugin = plugin_script.MonitoringPluginScript(
|
||||
self.zkhandler,
|
||||
self.config,
|
||||
self.logger,
|
||||
self.this_node,
|
||||
plugin_script.PLUGIN_NAME,
|
||||
)
|
||||
self.all_plugins.append(plugin)
|
||||
self.all_plugin_names.append(plugin.plugin_name)
|
||||
|
||||
# Create plugin key
|
||||
self.zkhandler.write(
|
||||
[
|
||||
(
|
||||
(
|
||||
"node.monitoring.data",
|
||||
self.this_node.name,
|
||||
"monitoring_plugin.name",
|
||||
plugin.plugin_name,
|
||||
),
|
||||
plugin.plugin_name,
|
||||
),
|
||||
(
|
||||
(
|
||||
"node.monitoring.data",
|
||||
self.this_node.name,
|
||||
"monitoring_plugin.last_run",
|
||||
plugin.plugin_name,
|
||||
),
|
||||
"0",
|
||||
),
|
||||
(
|
||||
(
|
||||
"node.monitoring.data",
|
||||
self.this_node.name,
|
||||
"monitoring_plugin.health_delta",
|
||||
plugin.plugin_name,
|
||||
),
|
||||
"0",
|
||||
),
|
||||
(
|
||||
(
|
||||
"node.monitoring.data",
|
||||
self.this_node.name,
|
||||
"monitoring_plugin.message",
|
||||
plugin.plugin_name,
|
||||
),
|
||||
"Initializing",
|
||||
),
|
||||
(
|
||||
(
|
||||
"node.monitoring.data",
|
||||
self.this_node.name,
|
||||
"monitoring_plugin.data",
|
||||
plugin.plugin_name,
|
||||
),
|
||||
dumps({}),
|
||||
),
|
||||
(
|
||||
(
|
||||
"node.monitoring.data",
|
||||
self.this_node.name,
|
||||
"monitoring_plugin.runtime",
|
||||
plugin.plugin_name,
|
||||
),
|
||||
"0.00",
|
||||
),
|
||||
]
|
||||
)
|
||||
self.logger.out(
|
||||
f"Successfully loaded monitoring plugin '{plugin.plugin_name}'",
|
||||
state="o",
|
||||
)
|
||||
except Exception as e:
|
||||
self.logger.out(
|
||||
f"Failed to load monitoring plugin: {e}",
|
||||
state="w",
|
||||
)
|
||||
|
||||
self.zkhandler.write(
|
||||
[
|
||||
(
|
||||
("node.monitoring.plugins", self.this_node.name),
|
||||
" ".join(self.all_plugin_names),
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
# Clean up any old plugin data for which a plugin file no longer exists
|
||||
for plugin_key in self.zkhandler.children(
|
||||
("node.monitoring.data", self.this_node.name)
|
||||
):
|
||||
if plugin_key not in self.all_plugin_names:
|
||||
self.zkhandler.delete(
|
||||
(
|
||||
"node.monitoring.data",
|
||||
self.this_node.name,
|
||||
"monitoring_plugin",
|
||||
plugin_key,
|
||||
)
|
||||
)
|
||||
|
||||
def run_plugin(self, plugin):
|
||||
time_start = datetime.now()
|
||||
result = plugin.run()
|
||||
time_end = datetime.now()
|
||||
time_delta = time_end - time_start
|
||||
runtime = "{:0.02f}".format(time_delta.total_seconds())
|
||||
result.set_runtime(runtime)
|
||||
result.to_zookeeper()
|
||||
return result
|
||||
|
||||
def run_plugins(self):
|
||||
total_health = 100
|
||||
self.logger.out(
|
||||
f"Running monitoring plugins: {', '.join([x.plugin_name for x in self.all_plugins])}",
|
||||
state="t",
|
||||
)
|
||||
plugin_results = list()
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=99) as executor:
|
||||
to_future_plugin_results = {
|
||||
executor.submit(self.run_plugin, plugin): plugin
|
||||
for plugin in self.all_plugins
|
||||
}
|
||||
for future in concurrent.futures.as_completed(to_future_plugin_results):
|
||||
plugin_results.append(future.result())
|
||||
|
||||
for result in sorted(plugin_results, key=lambda x: x.plugin_name):
|
||||
self.logger.out(
|
||||
result.message,
|
||||
state="t",
|
||||
prefix=f"{result.plugin_name} ({result.runtime}s)",
|
||||
)
|
||||
if result is not None:
|
||||
total_health -= result.health_delta
|
||||
|
||||
if total_health > 90:
|
||||
health_colour = self.logger.fmt_green
|
||||
elif total_health > 50:
|
||||
health_colour = self.logger.fmt_yellow
|
||||
else:
|
||||
health_colour = self.logger.fmt_red
|
||||
|
||||
self.zkhandler.write(
|
||||
[
|
||||
(
|
||||
("node.monitoring.health", self.this_node.name),
|
||||
total_health,
|
||||
),
|
||||
]
|
||||
)
|
||||
self.logger.out(
|
||||
f"System health: {health_colour}{total_health}/100{self.logger.fmt_end}",
|
||||
state="t",
|
||||
)
|
||||
|
||||
def run_cleanup(self, plugin):
|
||||
return plugin.cleanup()
|
||||
|
||||
def run_cleanups(self):
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=99) as executor:
|
||||
to_future_plugin_results = {
|
||||
executor.submit(self.run_cleanup, plugin): plugin
|
||||
for plugin in self.all_plugins
|
||||
}
|
||||
for future in concurrent.futures.as_completed(to_future_plugin_results):
|
||||
# This doesn't do anything, just lets us wait for them all to complete
|
||||
pass
|
||||
# Set the node health to None as no previous checks are now valid
|
||||
self.zkhandler.write(
|
||||
[
|
||||
(
|
||||
("node.monitoring.health", self.this_node.name),
|
||||
None,
|
||||
),
|
||||
]
|
||||
)
|
@ -180,9 +180,6 @@ def get_configuration():
|
||||
raise MalformedConfigurationError(e)
|
||||
|
||||
config_directories = {
|
||||
"plugin_directory": o_directories.get(
|
||||
"plugin_directory", "/usr/share/pvc/plugins"
|
||||
),
|
||||
"dynamic_directory": o_directories.get("dynamic_directory", None),
|
||||
"log_directory": o_directories.get("log_directory", None),
|
||||
"console_log_directory": o_directories.get("console_log_directory", None),
|
||||
|
@ -51,7 +51,7 @@ libvirt_vm_states = {
|
||||
}
|
||||
|
||||
|
||||
def start_keepalive_timer(logger, config, zkhandler, this_node, monitoring_instance):
|
||||
def start_keepalive_timer(logger, config, zkhandler, this_node):
|
||||
keepalive_interval = config["keepalive_interval"]
|
||||
logger.out(
|
||||
f"Starting keepalive timer ({keepalive_interval} second interval)", state="s"
|
||||
@ -59,7 +59,7 @@ def start_keepalive_timer(logger, config, zkhandler, this_node, monitoring_insta
|
||||
keepalive_timer = BackgroundScheduler()
|
||||
keepalive_timer.add_job(
|
||||
node_keepalive,
|
||||
args=(logger, config, zkhandler, this_node, monitoring_instance),
|
||||
args=(logger, config, zkhandler, this_node),
|
||||
trigger="interval",
|
||||
seconds=keepalive_interval,
|
||||
)
|
||||
@ -97,6 +97,29 @@ def collect_ceph_stats(logger, config, zkhandler, this_node, queue):
|
||||
logger.out("Failed to open connection to Ceph cluster: {}".format(e), state="e")
|
||||
return
|
||||
|
||||
if debug:
|
||||
logger.out("Getting health stats from monitor", state="d", prefix="ceph-thread")
|
||||
|
||||
# Get Ceph cluster health for local status output
|
||||
command = {"prefix": "health", "format": "json"}
|
||||
try:
|
||||
health_status = json.loads(
|
||||
ceph_conn.mon_command(json.dumps(command), b"", timeout=1)[1]
|
||||
)
|
||||
ceph_health = health_status["status"]
|
||||
except Exception as e:
|
||||
logger.out("Failed to obtain Ceph health data: {}".format(e), state="e")
|
||||
ceph_health = "HEALTH_UNKN"
|
||||
|
||||
if ceph_health in ["HEALTH_OK"]:
|
||||
ceph_health_colour = logger.fmt_green
|
||||
elif ceph_health in ["HEALTH_UNKN"]:
|
||||
ceph_health_colour = logger.fmt_cyan
|
||||
elif ceph_health in ["HEALTH_WARN"]:
|
||||
ceph_health_colour = logger.fmt_yellow
|
||||
else:
|
||||
ceph_health_colour = logger.fmt_red
|
||||
|
||||
# Primary-only functions
|
||||
if this_node.router_state == "primary":
|
||||
if debug:
|
||||
@ -385,6 +408,8 @@ def collect_ceph_stats(logger, config, zkhandler, this_node, queue):
|
||||
|
||||
ceph_conn.shutdown()
|
||||
|
||||
queue.put(ceph_health_colour)
|
||||
queue.put(ceph_health)
|
||||
queue.put(osds_this_node)
|
||||
|
||||
if debug:
|
||||
@ -623,7 +648,7 @@ def collect_vm_stats(logger, config, zkhandler, this_node, queue):
|
||||
|
||||
|
||||
# Keepalive update function
|
||||
def node_keepalive(logger, config, zkhandler, this_node, monitoring_instance):
|
||||
def node_keepalive(logger, config, zkhandler, this_node):
|
||||
debug = config["debug"]
|
||||
if debug:
|
||||
logger.out("Keepalive starting", state="d", prefix="main-thread")
|
||||
@ -752,14 +777,16 @@ def node_keepalive(logger, config, zkhandler, this_node, monitoring_instance):
|
||||
|
||||
if config["enable_storage"]:
|
||||
try:
|
||||
osds_this_node = ceph_thread_queue.get(
|
||||
timeout=(config["keepalive_interval"] - 1)
|
||||
ceph_health_colour = ceph_thread_queue.get(
|
||||
timeout=config["keepalive_interval"]
|
||||
)
|
||||
ceph_health = ceph_thread_queue.get(timeout=config["keepalive_interval"])
|
||||
osds_this_node = ceph_thread_queue.get(timeout=config["keepalive_interval"])
|
||||
except Exception:
|
||||
logger.out("Ceph stats queue get exceeded timeout, continuing", state="w")
|
||||
ceph_health_colour = logger.fmt_cyan
|
||||
ceph_health = "UNKNOWN"
|
||||
osds_this_node = "?"
|
||||
else:
|
||||
osds_this_node = "0"
|
||||
|
||||
# Set our information in zookeeper
|
||||
keepalive_time = int(time.time())
|
||||
@ -812,8 +839,8 @@ def node_keepalive(logger, config, zkhandler, this_node, monitoring_instance):
|
||||
if config["log_keepalive_cluster_details"]:
|
||||
logger.out(
|
||||
"{bold}Maintenance:{nofmt} {maint} "
|
||||
"{bold}Node VMs:{nofmt} {domcount} "
|
||||
"{bold}Node OSDs:{nofmt} {osdcount} "
|
||||
"{bold}Active VMs:{nofmt} {domcount} "
|
||||
"{bold}Networks:{nofmt} {netcount} "
|
||||
"{bold}Load:{nofmt} {load} "
|
||||
"{bold}Memory [MiB]: VMs:{nofmt} {allocmem} "
|
||||
"{bold}Used:{nofmt} {usedmem} "
|
||||
@ -822,7 +849,7 @@ def node_keepalive(logger, config, zkhandler, this_node, monitoring_instance):
|
||||
nofmt=logger.fmt_end,
|
||||
maint=this_node.maintenance,
|
||||
domcount=this_node.domains_count,
|
||||
osdcount=osds_this_node,
|
||||
netcount=len(zkhandler.children("base.network")),
|
||||
load=this_node.cpuload,
|
||||
freemem=this_node.memfree,
|
||||
usedmem=this_node.memused,
|
||||
@ -830,6 +857,22 @@ def node_keepalive(logger, config, zkhandler, this_node, monitoring_instance):
|
||||
),
|
||||
state="t",
|
||||
)
|
||||
if config["enable_storage"] and config["log_keepalive_storage_details"]:
|
||||
logger.out(
|
||||
"{bold}Ceph cluster status:{nofmt} {health_colour}{health}{nofmt} "
|
||||
"{bold}Total OSDs:{nofmt} {total_osds} "
|
||||
"{bold}Node OSDs:{nofmt} {node_osds} "
|
||||
"{bold}Pools:{nofmt} {total_pools} ".format(
|
||||
bold=logger.fmt_bold,
|
||||
health_colour=ceph_health_colour,
|
||||
nofmt=logger.fmt_end,
|
||||
health=ceph_health,
|
||||
total_osds=len(zkhandler.children("base.osd")),
|
||||
node_osds=osds_this_node,
|
||||
total_pools=len(zkhandler.children("base.pool")),
|
||||
),
|
||||
state="t",
|
||||
)
|
||||
|
||||
# Look for dead nodes and fence them
|
||||
if not this_node.maintenance:
|
||||
@ -875,7 +918,5 @@ def node_keepalive(logger, config, zkhandler, this_node, monitoring_instance):
|
||||
[(("node.state.daemon", node_name), "dead")]
|
||||
)
|
||||
|
||||
monitoring_instance.run_plugins()
|
||||
|
||||
if debug:
|
||||
logger.out("Keepalive finished", state="d", prefix="main-thread")
|
||||
|
Reference in New Issue
Block a user