Compare commits

...

6 Commits

12 changed files with 88 additions and 32 deletions

View File

@ -1 +1 @@
0.9.62
0.9.63

View File

@ -1,5 +1,13 @@
## PVC Changelog
###### [v0.9.63](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.63)
* Mentions Ganeti in the docs
* Increases API timeout back to 2s
* Adds .update-* configs to dpkg plugin
* Adds full/nearfull OSD warnings
* Improves size value handling for volumes
###### [v0.9.62](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.62)
* [all] Adds an enhanced health checking, monitoring, and reporting system for nodes and clusters

View File

@ -9,7 +9,7 @@
## What is PVC?
PVC is a Linux KVM-based hyperconverged infrastructure (HCI) virtualization cluster solution that is fully Free Software, scalable, redundant, self-healing, self-managing, and designed for administrator simplicity. It is an alternative to other HCI solutions such as Harvester, Nutanix, and VMWare, as well as to other common virtualization stacks such as ProxMox and OpenStack.
PVC is a Linux KVM-based hyperconverged infrastructure (HCI) virtualization cluster solution that is fully Free Software, scalable, redundant, self-healing, self-managing, and designed for administrator simplicity. It is an alternative to other HCI solutions such as Ganeti, Harvester, Nutanix, and VMWare, as well as to other common virtualization stacks such as ProxMox and OpenStack.
PVC is a complete HCI solution, built from well-known and well-trusted Free Software tools, to assist an administrator in creating and managing a cluster of servers to run virtual machines, as well as self-managing several important aspects including storage failover, node failure and recovery, virtual machine failure and recovery, and network plumbing. It is designed to act consistently, reliably, and unobtrusively, letting the administrator concentrate on more important things.

View File

@ -27,7 +27,7 @@ from ssl import SSLContext, TLSVersion
from distutils.util import strtobool as dustrtobool
# Daemon version
version = "0.9.62"
version = "0.9.63"
# API version
API_VERSION = 1.0

View File

@ -124,8 +124,8 @@ def call_api(
data=None,
files=None,
):
# Set the connect timeout to 1 seconds but extremely long (48 hour) data timeout
timeout = (1.05, 172800)
# Set the connect timeout to 2 seconds but extremely long (48 hour) data timeout
timeout = (2.05, 172800)
# Craft the URI
uri = "{}://{}{}{}".format(

View File

@ -2,7 +2,7 @@ from setuptools import setup
setup(
name="pvc",
version="0.9.62",
version="0.9.63",
packages=["pvc", "pvc.cli_lib"],
install_requires=[
"Click",

View File

@ -73,6 +73,11 @@ byte_unit_matrix = {
"G": 1024 * 1024 * 1024,
"T": 1024 * 1024 * 1024 * 1024,
"P": 1024 * 1024 * 1024 * 1024 * 1024,
"E": 1024 * 1024 * 1024 * 1024 * 1024 * 1024,
"Z": 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024,
"Y": 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024,
"R": 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024,
"Q": 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024,
}
# Matrix of human-to-metric values
@ -83,6 +88,11 @@ ops_unit_matrix = {
"G": 1000 * 1000 * 1000,
"T": 1000 * 1000 * 1000 * 1000,
"P": 1000 * 1000 * 1000 * 1000 * 1000,
"E": 1000 * 1000 * 1000 * 1000 * 1000 * 1000,
"Z": 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000,
"Y": 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000,
"R": 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000,
"Q": 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000 * 1000,
}
@ -103,14 +113,18 @@ def format_bytes_tohuman(databytes):
def format_bytes_fromhuman(datahuman):
# Trim off human-readable character
dataunit = str(datahuman)[-1]
datasize = int(str(datahuman)[:-1])
if not re.match(r"[A-Z]", dataunit):
if not re.search(r"[A-Za-z]+", datahuman):
dataunit = "B"
datasize = int(datahuman)
databytes = datasize * byte_unit_matrix[dataunit]
return databytes
else:
dataunit = str(re.match(r"[0-9]+([A-Za-z])[iBb]*", datahuman).group(1))
datasize = int(re.match(r"([0-9]+)[A-Za-z]+", datahuman).group(1))
if byte_unit_matrix.get(dataunit):
databytes = datasize * byte_unit_matrix[dataunit]
return databytes
else:
return None
# Format ops sizes to/from human-readable units
@ -731,22 +745,26 @@ def getVolumeInformation(zkhandler, pool, volume):
def add_volume(zkhandler, pool, name, size):
# Add 'B' if the volume is in bytes
if re.match(r"^[0-9]+$", size):
size = "{}B".format(size)
# 1. Verify the size of the volume
pool_information = getPoolInformation(zkhandler, pool)
size_bytes = format_bytes_fromhuman(size)
if size_bytes is None:
return (
False,
f"ERROR: Requested volume size '{size}' does not have a valid SI unit",
)
if size_bytes >= int(pool_information["stats"]["free_bytes"]):
return (
False,
"ERROR: Requested volume size is greater than the available free space in the pool",
f"ERROR: Requested volume size '{format_bytes_tohuman(size_bytes)}' is greater than the available free space in the pool ('{format_bytes_tohuman(pool_information['stats']['free_bytes'])}')",
)
# 2. Create the volume
retcode, stdout, stderr = common.run_os_command(
"rbd create --size {} {}/{}".format(size, pool, name)
"rbd create --size {} {}/{}".format(
format_bytes_tohuman(size_bytes), pool, name
)
)
if retcode:
return False, 'ERROR: Failed to create RBD volume "{}": {}'.format(name, stderr)
@ -766,7 +784,9 @@ def add_volume(zkhandler, pool, name, size):
]
)
return True, 'Created RBD volume "{}/{}" ({}).'.format(pool, name, size)
return True, 'Created RBD volume "{}" of size "{}" in pool "{}".'.format(
name, format_bytes_tohuman(size_bytes), pool
)
def clone_volume(zkhandler, pool, name_src, name_new):
@ -813,28 +833,32 @@ def resize_volume(zkhandler, pool, name, size):
name, pool
)
# Add 'B' if the volume is in bytes
if re.match(r"^[0-9]+$", size):
size = "{}B".format(size)
# 1. Verify the size of the volume
pool_information = getPoolInformation(zkhandler, pool)
size_bytes = format_bytes_fromhuman(size)
if size_bytes is None:
return (
False,
f"ERROR: Requested volume size '{size}' does not have a valid SI unit",
)
if size_bytes >= int(pool_information["stats"]["free_bytes"]):
return (
False,
"ERROR: Requested volume size is greater than the available free space in the pool",
f"ERROR: Requested volume size '{format_bytes_tohuman(size_bytes)}' is greater than the available free space in the pool ('{format_bytes_tohuman(pool_information['stats']['free_bytes'])}')",
)
# 2. Resize the volume
retcode, stdout, stderr = common.run_os_command(
"rbd resize --size {} {}/{}".format(size, pool, name)
"rbd resize --size {} {}/{}".format(
format_bytes_tohuman(size_bytes), pool, name
)
)
if retcode:
return (
False,
'ERROR: Failed to resize RBD volume "{}" to size "{}" in pool "{}": {}'.format(
name, size, pool, stderr
name, format_bytes_tohuman(size_bytes), pool, stderr
),
)
@ -860,7 +884,7 @@ def resize_volume(zkhandler, pool, name, size):
if target_vm_conn:
target_vm_conn.blockResize(
volume_id,
format_bytes_fromhuman(size),
size_bytes,
libvirt.VIR_DOMAIN_BLOCK_RESIZE_BYTES,
)
target_lv_conn.close()
@ -883,7 +907,7 @@ def resize_volume(zkhandler, pool, name, size):
)
return True, 'Resized RBD volume "{}" to size "{}" in pool "{}".'.format(
name, size, pool
name, format_bytes_tohuman(size_bytes), pool
)

View File

@ -51,6 +51,8 @@ def getClusterHealth(zkhandler, node_list, vm_list, ceph_osd_list):
"vm_stopped": 10,
"osd_out": 50,
"osd_down": 10,
"osd_full": 50,
"osd_nearfull": 10,
"memory_overprovisioned": 50,
"ceph_err": 50,
"ceph_warn": 10,
@ -110,6 +112,18 @@ def getClusterHealth(zkhandler, node_list, vm_list, ceph_osd_list):
f"cluster: Ceph OSD {ceph_osd['id']} in {up_texts[ceph_osd['stats']['up']].upper()} state"
)
# Handle full or nearfull OSDs (>85%)
if ceph_osd["stats"]["utilization"] >= 90:
cluster_health_value -= health_delta_map["osd_full"]
cluster_health_messages.append(
f"cluster: Ceph OSD {ceph_osd['id']} is FULL ({ceph_osd['stats']['utilization']:.1f}% > 90%)"
)
elif ceph_osd["stats"]["utilization"] >= 85:
cluster_health_value -= health_delta_map["osd_nearfull"]
cluster_health_messages.append(
f"cluster: Ceph OSD {ceph_osd['id']} is NEARFULL ({ceph_osd['stats']['utilization']:.1f}% > 85%)"
)
# Check for (n-1) overprovisioning
# Assume X nodes. If the total VM memory allocation (counting only running VMss) is greater than
# the total memory of the (n-1) smallest nodes, trigger this warning.

10
debian/changelog vendored
View File

@ -1,3 +1,13 @@
pvc (0.9.63-0) unstable; urgency=high
* Mentions Ganeti in the docs
* Increases API timeout back to 2s
* Adds .update-* configs to dpkg plugin
* Adds full/nearfull OSD warnings
* Improves size value handling for volumes
-- Joshua M. Boniface <joshua@boniface.me> Fri, 28 Apr 2023 14:47:04 -0400
pvc (0.9.62-0) unstable; urgency=high
* [all] Adds an enhanced health checking, monitoring, and reporting system for nodes and clusters

View File

@ -8,7 +8,7 @@
## What is PVC?
PVC is a Linux KVM-based hyperconverged infrastructure (HCI) virtualization cluster solution that is fully Free Software, scalable, redundant, self-healing, self-managing, and designed for administrator simplicity. It is an alternative to other HCI solutions such as Harvester, Nutanix, and VMWare, as well as to other common virtualization stacks such as ProxMox and OpenStack.
PVC is a Linux KVM-based hyperconverged infrastructure (HCI) virtualization cluster solution that is fully Free Software, scalable, redundant, self-healing, self-managing, and designed for administrator simplicity. It is an alternative to other HCI solutions such as Ganeti, Harvester, Nutanix, and VMWare, as well as to other common virtualization stacks such as ProxMox and OpenStack.
PVC is a complete HCI solution, built from well-known and well-trusted Free Software tools, to assist an administrator in creating and managing a cluster of servers to run virtual machines, as well as self-managing several important aspects including storage failover, node failure and recovery, virtual machine failure and recovery, and network plumbing. It is designed to act consistently, reliably, and unobtrusively, letting the administrator concentrate on more important things.

View File

@ -110,8 +110,8 @@ class MonitoringPluginScript(MonitoringPlugin):
count_upgradable = len(list_upgradable)
# Get obsolete config files (dpkg-* or ucf-* under /etc)
retcode, stdout, stderr = pvc_common.run_os_command("/usr/bin/find /etc -type f -a \( -name '*.dpkg-*' -o -name '*.ucf-*' \)")
# Get obsolete config files (dpkg-*, ucf-*, or update-* under /etc)
retcode, stdout, stderr = pvc_common.run_os_command("/usr/bin/find /etc -type f -a \( -name '*.dpkg-*' -o -name '*.ucf-*' -o -name '*.update-*' \)")
obsolete_conffiles = list()
for conffile_line in stdout.split('\n'):

View File

@ -49,7 +49,7 @@ import re
import json
# Daemon version
version = "0.9.62"
version = "0.9.63"
##########################################################