Compare commits

...

11 Commits

Author SHA1 Message Date
18f09196be Bump version to 0.9.93 2024-01-30 09:51:21 -05:00
8419659e1b Ensure zkhandler is always cleaned up
Even if the subfunction of an API @ZKConnection call fails, the
zkhandler needs to terminate and clean up, or it leaves stuck threads
around.
2024-01-30 09:48:17 -05:00
df40b779af Bump version to 0.9.92 2024-01-29 09:39:10 -05:00
db4f0881a2 Improve error handling and retries
1. Use the actual response code from the server on error, or 504 on
timeouts instead of 500.
2. Retry GET requests 3 times and only error if the last fails
2024-01-29 09:35:14 -05:00
9b51fe9f10 Use get() for newer keys in client 2024-01-29 09:21:02 -05:00
a66449541d Improve script error handling and variables 2024-01-26 15:41:34 -05:00
d28fb71f57 Fix incorrect variable set 2024-01-24 14:40:40 -05:00
e5e9c7086a Add missing restore state to colours 2024-01-24 09:34:59 -05:00
f29b4c2755 Bump version to 0.9.91 2024-01-23 10:40:59 -05:00
0adec2be0d Use consistent and less error-prone find rm's 2024-01-23 10:40:48 -05:00
b994e1a26c Add cleanup of pycaches to CLI install 2024-01-23 10:22:50 -05:00
22 changed files with 197 additions and 51 deletions

View File

@ -1 +1 @@
0.9.90
0.9.93

View File

@ -1,5 +1,25 @@
## PVC Changelog
###### [v0.9.93](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.93)
* [API Daemon] Fixes a bug where stuck zkhandler threads were not cleaned up on error
###### [v0.9.92](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.92)
* [CLI Client] Adds the new restore state to the colours list for VM status
* [API Daemon] Fixes an incorrect variable assignment
* [Provisioner] Improves the error handling of various steps in the debootstrap and rinse example scripts
* [CLI Client] Fixes two bugs around missing keys that were added recently (uses get() instead direct dictionary refs)
* [CLI Client] Improves API error handling via GET retries (x3) and better server status code handling
###### [v0.9.91](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.91)
* [Client CLI] Fixes a bug and improves output during cluster task events.
* [Client CLI] Improves the output of the task list display.
* [Provisioner] Fixes some missing cloud-init modules in the default debootstrap script.
* [Client CLI] Fixes a bug with a missing argument to the vm_define helper function.
* [All] Fixes inconsistent package find + rm commands to avoid errors in dpkg.
###### [v0.9.90](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.90)
* [Client CLI/API Daemon] Adds additional backup metainfo and an emailed report option to autobackups.

View File

@ -150,6 +150,10 @@
from daemon_lib.vmbuilder import VMBuilder
# These are some global variables used below
default_root_password = "test123"
# The VMBuilderScript class must be named as such, and extend VMBuilder.
class VMBuilderScript(VMBuilder):
def setup(self):
@ -498,11 +502,15 @@ class VMBuilderScript(VMBuilder):
ret = os.system(
f"debootstrap --include={','.join(deb_packages)} {deb_release} {temp_dir} {deb_mirror}"
)
ret = int(ret >> 8)
if ret > 0:
self.fail("Failed to run debootstrap")
self.fail(f"Debootstrap failed with exit code {ret}")
# Bind mount the devfs so we can grub-install later
os.system("mount --bind /dev {}/dev".format(temp_dir))
ret = os.system("mount --bind /dev {}/dev".format(temp_dir))
ret = int(ret >> 8)
if ret > 0:
self.fail(f"/dev bind mount failed with exit code {ret}")
# Create an fstab entry for each volume
fstab_file = "{}/etc/fstab".format(temp_dir)
@ -688,23 +696,36 @@ GRUB_DISABLE_LINUX_UUID=false
# Do some tasks inside the chroot using the provided context manager
with chroot(temp_dir):
# Install and update GRUB
os.system(
ret = os.system(
"grub-install --force /dev/rbd/{}/{}_{}".format(
root_volume["pool"], vm_name, root_volume["disk_id"]
)
)
os.system("update-grub")
ret = int(ret >> 8)
if ret > 0:
self.fail(f"GRUB install failed with exit code {ret}")
ret = os.system("update-grub")
ret = int(ret >> 8)
if ret > 0:
self.fail(f"GRUB update failed with exit code {ret}")
# Set a really dumb root password so the VM can be debugged
# EITHER CHANGE THIS YOURSELF, here or in Userdata, or run something after install
# to change the root password: don't leave it like this on an Internet-facing machine!
os.system("echo root:test123 | chpasswd")
ret = os.system(f"echo root:{default_root_password} | chpasswd")
ret = int(ret >> 8)
if ret > 0:
self.fail(f"Root password change failed with exit code {ret}")
# Enable cloud-init target on (first) boot
# Your user-data should handle this and disable it once done, or things get messy.
# That cloud-init won't run without this hack seems like a bug... but even the official
# Debian cloud images are affected, so who knows.
os.system("systemctl enable cloud-init.target")
ret = os.system("systemctl enable cloud-init.target")
ret = int(ret >> 8)
if ret > 0:
self.fail(f"Enable of cloud-init failed with exit code {ret}")
def cleanup(self):
"""
@ -729,7 +750,7 @@ GRUB_DISABLE_LINUX_UUID=false
temp_dir = "/tmp/target"
# Unmount the bound devfs
os.system("umount {}/dev".format(temp_dir))
os.system("umount -f {}/dev".format(temp_dir))
# Use this construct for reversing the list, as the normal reverse() messes with the list
for volume in list(reversed(self.vm_data["volumes"])):
@ -746,7 +767,7 @@ GRUB_DISABLE_LINUX_UUID=false
):
# Unmount filesystem
retcode, stdout, stderr = pvc_common.run_os_command(
f"umount {mount_path}"
f"umount -f {mount_path}"
)
if retcode:
self.log_err(

View File

@ -150,6 +150,11 @@
from daemon_lib.vmbuilder import VMBuilder
# These are some global variables used below
default_root_password = "test123"
default_local_time = "UTC"
# The VMBuilderScript class must be named as such, and extend VMBuilder.
class VMBuilderScript(VMBuilder):
def setup(self):
@ -524,13 +529,23 @@ class VMBuilderScript(VMBuilder):
ret = os.system(
f"rinse --arch {rinse_architecture} --directory {temporary_directory} --distribution {rinse_release} --cache-dir {rinse_cache} --add-pkg-list /tmp/addpkg --verbose {mirror_arg}"
)
ret = int(ret >> 8)
if ret > 0:
self.fail("Failed to run rinse")
self.fail(f"Rinse failed with exit code {ret}")
# Bind mount the devfs, sysfs, and procfs so we can grub-install later
os.system("mount --bind /dev {}/dev".format(temporary_directory))
os.system("mount --bind /sys {}/sys".format(temporary_directory))
os.system("mount --bind /proc {}/proc".format(temporary_directory))
ret = os.system("mount --bind /dev {}/dev".format(temporary_directory))
ret = int(ret >> 8)
if ret > 0:
self.fail(f"/dev bind mount failed with exit code {ret}")
ret = os.system("mount --bind /sys {}/sys".format(temporary_directory))
ret = int(ret >> 8)
if ret > 0:
self.fail(f"/sys bind mount failed with exit code {ret}")
ret = os.system("mount --bind /proc {}/proc".format(temporary_directory))
ret = int(ret >> 8)
if ret > 0:
self.fail(f"/proc bind mount failed with exit code {ret}")
# Create an fstab entry for each volume
fstab_file = "{}/etc/fstab".format(temporary_directory)
@ -642,41 +657,76 @@ GRUB_SERIAL_COMMAND="serial --speed=115200 --unit=0 --word=8 --parity=no --stop=
# Do some tasks inside the chroot using the provided context manager
with chroot(temporary_directory):
# Fix the broken kernel from rinse by setting a systemd machine ID and running the post scripts
os.system("systemd-machine-id-setup")
os.system(
ret = os.system("systemd-machine-id-setup")
ret = int(ret >> 8)
if ret > 0:
self.fail(f"Machine ID setup failed with exit code {ret}")
ret = os.system(
"rpm -q --scripts kernel-core | grep -A20 'posttrans scriptlet' | tail -n+2 | bash -x"
)
ret = int(ret >> 8)
if ret > 0:
self.fail(f"RPM kernel reinstall failed with exit code {ret}")
# Install any post packages
os.system(f"dnf install -y {' '.join(post_packages)}")
if len(post_packages) > 0:
ret = os.system(f"dnf install -y {' '.join(post_packages)}")
ret = int(ret >> 8)
if ret > 0:
self.fail(f"DNF install failed with exit code {ret}")
# Install and update GRUB config
os.system(
ret = os.system(
"grub2-install --force /dev/rbd/{}/{}_{}".format(
root_volume["pool"], vm_name, root_volume["disk_id"]
)
)
ret = int(ret >> 8)
if ret > 0:
self.fail(f"GRUB install failed with exit code {ret}")
os.system("grub2-mkconfig -o /boot/grub2/grub.cfg")
ret = int(ret >> 8)
if ret > 0:
self.fail(f"GRUB update failed with exit code {ret}")
# Set a really dumb root password so the VM can be debugged
# EITHER CHANGE THIS YOURSELF, here or in Userdata, or run something after install
# to change the root password: don't leave it like this on an Internet-facing machine!
os.system("echo root:test123 | chpasswd")
ret = os.system(f"echo root:{default_root_password} | chpasswd")
ret = int(ret >> 8)
if ret > 0:
self.fail(f"Root password change failed with exit code {ret}")
# Enable dbus-broker
os.system("systemctl enable dbus-broker.service")
ret = os.system("systemctl enable dbus-broker.service")
ret = int(ret >> 8)
if ret > 0:
self.fail(f"Enable of dbus-broker failed with exit code {ret}")
# Enable NetworkManager
os.system("systemctl enable NetworkManager.service")
ret = int(ret >> 8)
if ret > 0:
self.fail(f"Enable of NetworkManager failed with exit code {ret}")
# Enable cloud-init target on (first) boot
# Your user-data should handle this and disable it once done, or things get messy.
# That cloud-init won't run without this hack seems like a bug... but even the official
# Debian cloud images are affected, so who knows.
os.system("systemctl enable cloud-init.target")
ret = int(ret >> 8)
if ret > 0:
self.fail(f"Enable of cloud-init failed with exit code {ret}")
# Set the timezone to UTC
os.system("ln -sf ../usr/share/zoneinfo/UTC /etc/localtime")
ret = os.system(
f"ln -sf ../usr/share/zoneinfo/{default_local_time} /etc/localtime"
)
ret = int(ret >> 8)
if ret > 0:
self.fail(f"Localtime update failed with exit code {ret}")
def cleanup(self):
"""

View File

@ -27,7 +27,7 @@ from distutils.util import strtobool as dustrtobool
import daemon_lib.config as cfg
# Daemon version
version = "0.9.90"
version = "0.9.93"
# API version
API_VERSION = 1.0

View File

@ -140,15 +140,31 @@ def call_api(
# Determine the request type and hit the API
disable_warnings()
try:
response = None
if operation == "get":
response = requests.get(
uri,
timeout=timeout,
headers=headers,
params=params,
data=data,
verify=config["verify_ssl"],
)
retry_on_code = [429, 500, 502, 503, 504]
for i in range(3):
failed = False
try:
response = requests.get(
uri,
timeout=timeout,
headers=headers,
params=params,
data=data,
verify=config["verify_ssl"],
)
if response.status_code in retry_on_code:
failed = True
continue
except requests.exceptions.ConnectionError:
failed = True
pass
if failed:
error = f"Code {response.status_code}" if response else "Timeout"
raise requests.exceptions.ConnectionError(
f"Failed to connect after 3 tries ({error})"
)
if operation == "post":
response = requests.post(
uri,
@ -189,7 +205,8 @@ def call_api(
)
except Exception as e:
message = "Failed to connect to the API: {}".format(e)
response = ErrorResponse({"message": message}, 500)
code = response.status_code if response else 504
response = ErrorResponse({"message": message}, code)
# Display debug output
if config["debug"]:

View File

@ -430,7 +430,7 @@ def format_list_osd(config, osd_list):
)
continue
if osd_information["is_split"]:
if osd_information.get("is_split") is not None:
osd_information["device"] = f"{osd_information['device']} [s]"
# Deal with the size to human readable

View File

@ -1632,6 +1632,7 @@ def format_info(config, domain_information, long_output):
"migrate": ansiprint.blue(),
"unmigrate": ansiprint.blue(),
"provision": ansiprint.blue(),
"restore": ansiprint.blue(),
}
ainformation.append(
"{}State:{} {}{}{}".format(
@ -1716,7 +1717,7 @@ def format_info(config, domain_information, long_output):
"{}Max live downtime:{} {}".format(
ansiprint.purple(),
ansiprint.end(),
f"{domain_information['migration_max_downtime']} ms",
f"{domain_information.get('migration_max_downtime')} ms",
)
)

View File

@ -2,7 +2,7 @@ from setuptools import setup
setup(
name="pvc",
version="0.9.90",
version="0.9.93",
packages=["pvc.cli", "pvc.lib"],
install_requires=[
"Click",

View File

@ -1201,7 +1201,7 @@ def get_resource_metrics(zkhandler):
try:
user_time = vm["vcpu_stats"]["user_time"] / 1000000
except Exception:
cpu_time = 0
user_time = 0
output_lines.append(
f"pvc_vm_vcpus_user_time{{vm=\"{vm['name']}\"}} {user_time}"
)

View File

@ -57,10 +57,11 @@ class ZKConnection(object):
schema_version = 0
zkhandler.schema.load(schema_version, quiet=True)
ret = function(zkhandler, *args, **kwargs)
zkhandler.disconnect()
del zkhandler
try:
ret = function(zkhandler, *args, **kwargs)
finally:
zkhandler.disconnect()
del zkhandler
return ret

26
debian/changelog vendored
View File

@ -1,3 +1,29 @@
pvc (0.9.93-0) unstable; urgency=high
* [API Daemon] Fixes a bug where stuck zkhandler threads were not cleaned up on error
-- Joshua M. Boniface <joshua@boniface.me> Tue, 30 Jan 2024 09:51:21 -0500
pvc (0.9.92-0) unstable; urgency=high
* [CLI Client] Adds the new restore state to the colours list for VM status
* [API Daemon] Fixes an incorrect variable assignment
* [Provisioner] Improves the error handling of various steps in the debootstrap and rinse example scripts
* [CLI Client] Fixes two bugs around missing keys that were added recently (uses get() instead direct dictionary refs)
* [CLI Client] Improves API error handling via GET retries (x3) and better server status code handling
-- Joshua M. Boniface <joshua@boniface.me> Mon, 29 Jan 2024 09:39:10 -0500
pvc (0.9.91-0) unstable; urgency=high
* [Client CLI] Fixes a bug and improves output during cluster task events.
* [Client CLI] Improves the output of the task list display.
* [Provisioner] Fixes some missing cloud-init modules in the default debootstrap script.
* [Client CLI] Fixes a bug with a missing argument to the vm_define helper function.
* [All] Fixes inconsistent package find + rm commands to avoid errors in dpkg.
-- Joshua M. Boniface <joshua@boniface.me> Tue, 23 Jan 2024 10:02:19 -0500
pvc (0.9.90-0) unstable; urgency=high
* [Client CLI/API Daemon] Adds additional backup metainfo and an emailed report option to autobackups.

View File

@ -2,7 +2,12 @@
# Generate the bash completion configuration
if [ -d /etc/bash_completion.d ]; then
echo "Installing BASH completion configuration"
_PVC_COMPLETE=source_bash pvc > /etc/bash_completion.d/pvc
fi
# Remove any cached CPython directories or files
echo "Cleaning up CPython caches"
find /usr/lib/python3/dist-packages/pvc -type d -name "__pycache__" -exec rm -fr {} + &>/dev/null || true
exit 0

View File

@ -1,5 +1,5 @@
#!/bin/sh
# Remove any cached CPython directories or files
echo "Cleaning up existing CPython files"
find /usr/share/pvc/pvcapid -type d -name "__pycache__" -exec rm -rf {} \; &>/dev/null || true
echo "Cleaning up CPython caches"
find /usr/share/pvc/pvcapid -type d -name "__pycache__" -exec rm -fr {} + &>/dev/null || true

5
debian/pvc-daemon-common.preinst vendored Normal file
View File

@ -0,0 +1,5 @@
#!/bin/sh
# Remove any cached CPython directories or files
echo "Cleaning up CPython caches"
find /usr/share/pvc/daemon_lib -type d -name "__pycache__" -exec rm -fr {} + &>/dev/null || true

View File

@ -1,6 +1,6 @@
#!/bin/sh
# Remove any cached CPython directories or files
echo "Cleaning up existing CPython files"
find /usr/share/pvc/pvchealthd -type d -name "__pycache__" -exec rm -rf {} \; &>/dev/null || true
find /usr/share/pvc/plugins -type d -name "__pycache__" -exec rm -rf {} \; &>/dev/null || true
echo "Cleaning up CPython caches"
find /usr/share/pvc/pvchealthd -type d -name "__pycache__" -exec rm -fr {} + &>/dev/null || true
find /usr/share/pvc/plugins -type d -name "__pycache__" -exec rm -fr {} + &>/dev/null || true

View File

@ -1,5 +1,5 @@
#!/bin/sh
# Remove any cached CPython directories or files
echo "Cleaning up existing CPython files"
find /usr/share/pvc/pvcnoded -type d -name "__pycache__" -exec rm -rf {} \; &>/dev/null || true
echo "Cleaning up CPython caches"
find /usr/share/pvc/pvcnoded -type d -name "__pycache__" -exec rm -fr {} + &>/dev/null || true

View File

@ -1,5 +1,5 @@
#!/bin/sh
# Remove any cached CPython directories or files
echo "Cleaning up existing CPython files"
find /usr/share/pvc/pvcworkerd -type d -name "__pycache__" -exec rm -rf {} \; &>/dev/null || true
echo "Cleaning up CPython caches"
find /usr/share/pvc/pvcworkerd -type d -name "__pycache__" -exec rm -fr {} + &>/dev/null || true

2
debian/rules vendored
View File

@ -13,7 +13,7 @@ override_dh_python3:
rm -r $(CURDIR)/client-cli/.pybuild $(CURDIR)/client-cli/pvc.egg-info
override_dh_auto_clean:
find . -name "__pycache__" -o -name ".pybuild" -exec rm -r {} \; || true
find . -name "__pycache__" -o -name ".pybuild" -exec rm -fr {} + || true
# If you need to rebuild the Sphinx documentation
# Add spinxdoc to the dh --with line

View File

@ -33,7 +33,7 @@ import os
import signal
# Daemon version
version = "0.9.90"
version = "0.9.93"
##########################################################

View File

@ -49,7 +49,7 @@ import re
import json
# Daemon version
version = "0.9.90"
version = "0.9.93"
##########################################################

View File

@ -44,7 +44,7 @@ from daemon_lib.vmbuilder import (
)
# Daemon version
version = "0.9.90"
version = "0.9.93"
config = cfg.get_configuration()