Compare commits
136 Commits
Author | SHA1 | Date |
---|---|---|
Joshua Boniface | 41cd34ba4d | |
Joshua Boniface | 736762901c | |
Joshua Boniface | ecb812ccac | |
Joshua Boniface | a2e5df9f6d | |
Joshua Boniface | 73c0834f85 | |
Joshua Boniface | 2de999c700 | |
Joshua Boniface | 7543eb839d | |
Joshua Boniface | 8cb44c0c5d | |
Joshua Boniface | c55021f30c | |
Joshua Boniface | 783c9e46c2 | |
Joshua Boniface | b7f33c1fcb | |
Joshua Boniface | 0f578d7c7d | |
Joshua Boniface | f87b96887c | |
Joshua Boniface | 02a775c99b | |
Joshua Boniface | 8177d5f8b7 | |
Joshua Boniface | 26d0d08873 | |
Joshua Boniface | f57b8d4a15 | |
Joshua Boniface | 10de85cce3 | |
Joshua Boniface | e938140414 | |
Joshua Boniface | fd87a28eb3 | |
Joshua Boniface | 4ef5fbdbe8 | |
Joshua Boniface | 8fa6bed736 | |
Joshua Boniface | f7926726f2 | |
Joshua Boniface | de58efdaa9 | |
Joshua Boniface | 8ca6976892 | |
Joshua Boniface | a957218976 | |
Joshua Boniface | 61365e6e01 | |
Joshua Boniface | 35fe16ce75 | |
Joshua Boniface | c45e488958 | |
Joshua Boniface | c1f320ede2 | |
Joshua Boniface | 03db9604e1 | |
Joshua Boniface | f1668bffcc | |
Joshua Boniface | c0686fc5c7 | |
Joshua Boniface | 7ecc05b413 | |
Joshua Boniface | 4b37c4fea3 | |
Joshua Boniface | 0d918d66fe | |
Joshua Boniface | fd199f405b | |
Joshua Boniface | f6c009beac | |
Joshua Boniface | fc89f4f2f5 | |
Joshua Boniface | 565011b277 | |
Joshua Boniface | 0bf9cc6b06 | |
Joshua Boniface | f2dfada73e | |
Joshua Boniface | f63c392ba6 | |
Joshua Boniface | 7663ad72c5 | |
Joshua Boniface | 9b3075be18 | |
Joshua Boniface | 9a661d0173 | |
Joshua Boniface | 4a0680b27f | |
Joshua Boniface | 6597f7aef6 | |
Joshua Boniface | f42a1bad0e | |
Joshua Boniface | 3fb52a13c2 | |
Joshua Boniface | 8937ddf331 | |
Joshua Boniface | 7cc354466f | |
Joshua Boniface | 44232fe3c6 | |
Joshua Boniface | 0a8bad3418 | |
Joshua Boniface | f10d32987b | |
Joshua Boniface | faf920ac1d | |
Joshua Boniface | a6e824a049 | |
Joshua Boniface | 624eb4e752 | |
Joshua Boniface | d060787503 | |
Joshua Boniface | 9a435fe2ae | |
Joshua Boniface | 9f47da6777 | |
Joshua Boniface | 0cf229273a | |
Joshua Boniface | 212ecaab68 | |
Joshua Boniface | f1b4593367 | |
Joshua Boniface | fc55046812 | |
Joshua Boniface | 33f905459a | |
Joshua Boniface | 174e6e08e3 | |
Joshua Boniface | 9f85c92dff | |
Joshua Boniface | 4b30d2f58a | |
Joshua Boniface | 2fcee28fed | |
Joshua Boniface | 1f18e88c06 | |
Joshua Boniface | 359191c83f | |
Joshua Boniface | 3d0d5e63f6 | |
Joshua Boniface | e6bfbb6d45 | |
Joshua Boniface | b80f9e28dc | |
Joshua Boniface | fbd5b3cca3 | |
Joshua Boniface | 2b1082590e | |
Joshua Boniface | a4ca112128 | |
Joshua Boniface | 6fc7f45027 | |
Joshua Boniface | 0c240a5129 | |
Joshua Boniface | 553c1e670e | |
Joshua Boniface | 942de9f15b | |
Joshua Boniface | 9aca8e215b | |
Joshua Boniface | 97329bb90d | |
Joshua Boniface | c186015d6f | |
Joshua Boniface | 1aa5999109 | |
Joshua Boniface | 570460e5ee | |
Joshua Boniface | 7a99e0e524 | |
Joshua Boniface | 234d6ae83b | |
Joshua Boniface | 5d0e7931d1 | |
Joshua Boniface | dcb9c0d12c | |
Joshua Boniface | f6e856bf98 | |
Joshua Boniface | f1fe0c63f5 | |
Joshua Boniface | ab944f9b95 | |
Joshua Boniface | 9714ac20b2 | |
Joshua Boniface | 79ad09ae59 | |
Joshua Boniface | 4c6aabec6a | |
Joshua Boniface | 559400ed90 | |
Joshua Boniface | 78c774b607 | |
Joshua Boniface | a461791ce8 | |
Joshua Boniface | 9fdb6d8708 | |
Joshua Boniface | 2fb7c40497 | |
Joshua Boniface | dee8d186cf | |
Joshua Boniface | 1e9871241e | |
Joshua Boniface | 9cd88ebccb | |
Joshua Boniface | 3bc500bc55 | |
Joshua Boniface | d63cc2e661 | |
Joshua Boniface | 67ec41aaf9 | |
Joshua Boniface | a95e72008e | |
Joshua Boniface | efc7434143 | |
Joshua Boniface | c473dcca81 | |
Joshua Boniface | 18f09196be | |
Joshua Boniface | 8419659e1b | |
Joshua Boniface | df40b779af | |
Joshua Boniface | db4f0881a2 | |
Joshua Boniface | 9b51fe9f10 | |
Joshua Boniface | a66449541d | |
Joshua Boniface | d28fb71f57 | |
Joshua Boniface | e5e9c7086a | |
Joshua Boniface | f29b4c2755 | |
Joshua Boniface | 0adec2be0d | |
Joshua Boniface | b994e1a26c | |
Joshua Boniface | 6d6420a695 | |
Joshua Boniface | 94e0287fc4 | |
Joshua Boniface | 2886176762 | |
Joshua Boniface | 4dc4c975f1 | |
Joshua Boniface | 8f3120baf3 | |
Joshua Boniface | 86ca363697 | |
Joshua Boniface | a5763c9d25 | |
Joshua Boniface | 39ec427c42 | |
Joshua Boniface | 1ba21312ea | |
Joshua Boniface | 09269f182c | |
Joshua Boniface | 38eeb78423 | |
Joshua Boniface | 362edeed8c | |
Joshua Boniface | 8d74ee7273 | |
Joshua Boniface | 39c8367723 |
|
@ -4,4 +4,4 @@ bbuilder:
|
|||
published:
|
||||
- git submodule update --init
|
||||
- /bin/bash build-stable-deb.sh
|
||||
- sudo /usr/local/bin/deploy-package -C pvc
|
||||
- sudo /usr/local/bin/deploy-package -C pvc -D bookworm
|
||||
|
|
89
CHANGELOG.md
89
CHANGELOG.md
|
@ -1,5 +1,94 @@
|
|||
## PVC Changelog
|
||||
|
||||
###### [v0.9.100](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.100)
|
||||
|
||||
* [API Daemon] Improves the handling of "detect:" disk strings on newer systems by leveraging the "nvme" command
|
||||
* [Client CLI] Update help text about "detect:" disk strings
|
||||
* [Meta] Updates deprecation warnings and updates builder to only add this version for Debian 12 (Bookworm)
|
||||
|
||||
###### [v0.9.99](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.99)
|
||||
|
||||
**Deprecation Warning**: `pvc vm backup` commands are now deprecated and will be removed in a future version. Use `pvc vm snapshot` commands instead.
|
||||
**Breaking Change**: The on-disk format of VM snapshot exports differs from backup exports, and the PVC autobackup system now leverages these. It is recommended to start fresh with a new tree of backups for `pvc autobackup` for maximum compatibility.
|
||||
**Breaking Change**: VM autobackups now run in `pvcworkerd` instead of the CLI client directly, allowing them to be triggerd from any node (or externally). It is important to apply the timer unit changes from the `pvc-ansible` role after upgrading to 0.9.99 to avoid duplicate runs.
|
||||
**Usage Note**: VM snapshots are displayed in the `pvc vm list` and `pvc vm info` outputs, not in a unique "list" endpoint.
|
||||
|
||||
* [API Daemon] Adds a proper error when an invalid provisioner profile is specified
|
||||
* [Node Daemon] Sorts Ceph pools properly in node keepalive to avoid incorrect ordering
|
||||
* [Health Daemon] Improves handling of IPMI checks by adding multiple tries but a shorter timeout
|
||||
* [API Daemon] Improves handling of XML parsing errors in VM configurations
|
||||
* [ALL] Adds support for whole VM snapshots, including configuration XML details, and direct rollback to snapshots
|
||||
* [ALL] Adds support for exporting and importing whole VM snapshots
|
||||
* [Client CLI] Removes vCPU topology from short VM info output
|
||||
* [Client CLI] Improves output format of VM info output
|
||||
* [API Daemon] Adds an endpoint to get the current primary node
|
||||
* [Client CLI] Fixes a bug where API requests were made 3 times
|
||||
* [Other] Improves the build-and-deploy.sh script
|
||||
* [API Daemon] Improves the "vm rename" command to avoid redefining VM, preserving history etc.
|
||||
* [API Daemon] Adds an indication when a task is run on the primary node
|
||||
* [API Daemon] Fixes a bug where the ZK schema relative path didn't work sometimes
|
||||
|
||||
###### [v0.9.98](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.98)
|
||||
|
||||
* [CLI Client] Fixed output when API call times out
|
||||
* [Node Daemon] Improves the handling of fence states
|
||||
* [API Daemon/CLI Client] Adds support for storage snapshot rollback
|
||||
* [CLI Client] Adds additional warning messages about snapshot consistency to help output
|
||||
* [API Daemon] Fixes a bug listing snapshots by pool/volume
|
||||
* [Node Daemon] Adds a --version flag for information gathering by update-motd.sh
|
||||
|
||||
###### [v0.9.97](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.97)
|
||||
|
||||
* [Client CLI] Ensures --lines is always an integer value
|
||||
* [Node Daemon] Fixes a bug if d_network changes during iteration
|
||||
* [Node Daemon] Moves to using allocated instead of free memory for node reporting
|
||||
* [API Daemon] Fixes a bug if lingering RBD snapshots exist when removing a volume (#180)
|
||||
|
||||
###### [v0.9.96](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.96)
|
||||
|
||||
* [API Daemon] Fixes a bug when reporting node stats
|
||||
* [API Daemon] Fixes a bug deleteing successful benchmark results
|
||||
|
||||
###### [v0.9.95](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.95)
|
||||
|
||||
* [API Daemon/CLI Client] Adds a flag to allow duplicate VNIs in network templates
|
||||
* [API Daemon] Ensures that storage template disks are returned in disk ID order
|
||||
* [Client CLI] Fixes a display bug showing all OSDs as split
|
||||
|
||||
###### [v0.9.94](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.94)
|
||||
|
||||
* [CLI Client] Fixes an incorrect ordering issue with autobackup summary emails
|
||||
* [API Daemon/CLI Client] Adds an additional safety check for 80% cluster fullness when doing volume adds or resizes
|
||||
* [API Daemon/CLI Client] Adds safety checks to volume clones as well
|
||||
* [API Daemon] Fixes a few remaining memory bugs for stopped/disabled VMs
|
||||
|
||||
###### [v0.9.93](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.93)
|
||||
|
||||
* [API Daemon] Fixes a bug where stuck zkhandler threads were not cleaned up on error
|
||||
|
||||
###### [v0.9.92](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.92)
|
||||
|
||||
* [CLI Client] Adds the new restore state to the colours list for VM status
|
||||
* [API Daemon] Fixes an incorrect variable assignment
|
||||
* [Provisioner] Improves the error handling of various steps in the debootstrap and rinse example scripts
|
||||
* [CLI Client] Fixes two bugs around missing keys that were added recently (uses get() instead direct dictionary refs)
|
||||
* [CLI Client] Improves API error handling via GET retries (x3) and better server status code handling
|
||||
|
||||
###### [v0.9.91](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.91)
|
||||
|
||||
* [Client CLI] Fixes a bug and improves output during cluster task events.
|
||||
* [Client CLI] Improves the output of the task list display.
|
||||
* [Provisioner] Fixes some missing cloud-init modules in the default debootstrap script.
|
||||
* [Client CLI] Fixes a bug with a missing argument to the vm_define helper function.
|
||||
* [All] Fixes inconsistent package find + rm commands to avoid errors in dpkg.
|
||||
|
||||
###### [v0.9.90](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.90)
|
||||
|
||||
* [Client CLI/API Daemon] Adds additional backup metainfo and an emailed report option to autobackups.
|
||||
* [All] Adds a live migration maximum downtime selector to help with busy VM migrations.
|
||||
* [API Daemon] Fixes a database migration bug on Debian 10/11.
|
||||
* [Node Daemon] Fixes a race condition when applying Zookeeper schema changes.
|
||||
|
||||
###### [v0.9.89](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.89)
|
||||
|
||||
* [API/Worker Daemons] Fixes a bug with the Celery result backends not being properly initialized on Debian 10/11.
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
"""PVC version 0.9.89
|
||||
|
||||
Revision ID: 977e7b4d3497
|
||||
Revises: 88fa0d88a9f8
|
||||
Create Date: 2024-01-10 16:09:44.659027
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = '977e7b4d3497'
|
||||
down_revision = '88fa0d88a9f8'
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.add_column('system_template', sa.Column('migration_max_downtime', sa.Integer(), default="300", server_default="300", nullable=True))
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade():
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.drop_column('system_template', 'migration_max_downtime')
|
||||
# ### end Alembic commands ###
|
|
@ -150,6 +150,10 @@
|
|||
from daemon_lib.vmbuilder import VMBuilder
|
||||
|
||||
|
||||
# These are some global variables used below
|
||||
default_root_password = "test123"
|
||||
|
||||
|
||||
# The VMBuilderScript class must be named as such, and extend VMBuilder.
|
||||
class VMBuilderScript(VMBuilder):
|
||||
def setup(self):
|
||||
|
@ -498,11 +502,15 @@ class VMBuilderScript(VMBuilder):
|
|||
ret = os.system(
|
||||
f"debootstrap --include={','.join(deb_packages)} {deb_release} {temp_dir} {deb_mirror}"
|
||||
)
|
||||
ret = int(ret >> 8)
|
||||
if ret > 0:
|
||||
self.fail("Failed to run debootstrap")
|
||||
self.fail(f"Debootstrap failed with exit code {ret}")
|
||||
|
||||
# Bind mount the devfs so we can grub-install later
|
||||
os.system("mount --bind /dev {}/dev".format(temp_dir))
|
||||
ret = os.system("mount --bind /dev {}/dev".format(temp_dir))
|
||||
ret = int(ret >> 8)
|
||||
if ret > 0:
|
||||
self.fail(f"/dev bind mount failed with exit code {ret}")
|
||||
|
||||
# Create an fstab entry for each volume
|
||||
fstab_file = "{}/etc/fstab".format(temp_dir)
|
||||
|
@ -589,11 +597,13 @@ After=multi-user.target
|
|||
- migrator
|
||||
- bootcmd
|
||||
- write-files
|
||||
- growpart
|
||||
- resizefs
|
||||
- set_hostname
|
||||
- update_hostname
|
||||
- update_etc_hosts
|
||||
- ca-certs
|
||||
- users-groups
|
||||
- ssh
|
||||
|
||||
cloud_config_modules:
|
||||
|
@ -686,23 +696,36 @@ GRUB_DISABLE_LINUX_UUID=false
|
|||
# Do some tasks inside the chroot using the provided context manager
|
||||
with chroot(temp_dir):
|
||||
# Install and update GRUB
|
||||
os.system(
|
||||
ret = os.system(
|
||||
"grub-install --force /dev/rbd/{}/{}_{}".format(
|
||||
root_volume["pool"], vm_name, root_volume["disk_id"]
|
||||
)
|
||||
)
|
||||
os.system("update-grub")
|
||||
ret = int(ret >> 8)
|
||||
if ret > 0:
|
||||
self.fail(f"GRUB install failed with exit code {ret}")
|
||||
|
||||
ret = os.system("update-grub")
|
||||
ret = int(ret >> 8)
|
||||
if ret > 0:
|
||||
self.fail(f"GRUB update failed with exit code {ret}")
|
||||
|
||||
# Set a really dumb root password so the VM can be debugged
|
||||
# EITHER CHANGE THIS YOURSELF, here or in Userdata, or run something after install
|
||||
# to change the root password: don't leave it like this on an Internet-facing machine!
|
||||
os.system("echo root:test123 | chpasswd")
|
||||
ret = os.system(f"echo root:{default_root_password} | chpasswd")
|
||||
ret = int(ret >> 8)
|
||||
if ret > 0:
|
||||
self.fail(f"Root password change failed with exit code {ret}")
|
||||
|
||||
# Enable cloud-init target on (first) boot
|
||||
# Your user-data should handle this and disable it once done, or things get messy.
|
||||
# That cloud-init won't run without this hack seems like a bug... but even the official
|
||||
# Debian cloud images are affected, so who knows.
|
||||
os.system("systemctl enable cloud-init.target")
|
||||
ret = os.system("systemctl enable cloud-init.target")
|
||||
ret = int(ret >> 8)
|
||||
if ret > 0:
|
||||
self.fail(f"Enable of cloud-init failed with exit code {ret}")
|
||||
|
||||
def cleanup(self):
|
||||
"""
|
||||
|
@ -727,7 +750,7 @@ GRUB_DISABLE_LINUX_UUID=false
|
|||
temp_dir = "/tmp/target"
|
||||
|
||||
# Unmount the bound devfs
|
||||
os.system("umount {}/dev".format(temp_dir))
|
||||
os.system("umount -f {}/dev".format(temp_dir))
|
||||
|
||||
# Use this construct for reversing the list, as the normal reverse() messes with the list
|
||||
for volume in list(reversed(self.vm_data["volumes"])):
|
||||
|
@ -744,7 +767,7 @@ GRUB_DISABLE_LINUX_UUID=false
|
|||
):
|
||||
# Unmount filesystem
|
||||
retcode, stdout, stderr = pvc_common.run_os_command(
|
||||
f"umount {mount_path}"
|
||||
f"umount -f {mount_path}"
|
||||
)
|
||||
if retcode:
|
||||
self.log_err(
|
||||
|
|
|
@ -150,6 +150,11 @@
|
|||
from daemon_lib.vmbuilder import VMBuilder
|
||||
|
||||
|
||||
# These are some global variables used below
|
||||
default_root_password = "test123"
|
||||
default_local_time = "UTC"
|
||||
|
||||
|
||||
# The VMBuilderScript class must be named as such, and extend VMBuilder.
|
||||
class VMBuilderScript(VMBuilder):
|
||||
def setup(self):
|
||||
|
@ -524,13 +529,23 @@ class VMBuilderScript(VMBuilder):
|
|||
ret = os.system(
|
||||
f"rinse --arch {rinse_architecture} --directory {temporary_directory} --distribution {rinse_release} --cache-dir {rinse_cache} --add-pkg-list /tmp/addpkg --verbose {mirror_arg}"
|
||||
)
|
||||
ret = int(ret >> 8)
|
||||
if ret > 0:
|
||||
self.fail("Failed to run rinse")
|
||||
self.fail(f"Rinse failed with exit code {ret}")
|
||||
|
||||
# Bind mount the devfs, sysfs, and procfs so we can grub-install later
|
||||
os.system("mount --bind /dev {}/dev".format(temporary_directory))
|
||||
os.system("mount --bind /sys {}/sys".format(temporary_directory))
|
||||
os.system("mount --bind /proc {}/proc".format(temporary_directory))
|
||||
ret = os.system("mount --bind /dev {}/dev".format(temporary_directory))
|
||||
ret = int(ret >> 8)
|
||||
if ret > 0:
|
||||
self.fail(f"/dev bind mount failed with exit code {ret}")
|
||||
ret = os.system("mount --bind /sys {}/sys".format(temporary_directory))
|
||||
ret = int(ret >> 8)
|
||||
if ret > 0:
|
||||
self.fail(f"/sys bind mount failed with exit code {ret}")
|
||||
ret = os.system("mount --bind /proc {}/proc".format(temporary_directory))
|
||||
ret = int(ret >> 8)
|
||||
if ret > 0:
|
||||
self.fail(f"/proc bind mount failed with exit code {ret}")
|
||||
|
||||
# Create an fstab entry for each volume
|
||||
fstab_file = "{}/etc/fstab".format(temporary_directory)
|
||||
|
@ -642,41 +657,76 @@ GRUB_SERIAL_COMMAND="serial --speed=115200 --unit=0 --word=8 --parity=no --stop=
|
|||
# Do some tasks inside the chroot using the provided context manager
|
||||
with chroot(temporary_directory):
|
||||
# Fix the broken kernel from rinse by setting a systemd machine ID and running the post scripts
|
||||
os.system("systemd-machine-id-setup")
|
||||
os.system(
|
||||
ret = os.system("systemd-machine-id-setup")
|
||||
ret = int(ret >> 8)
|
||||
if ret > 0:
|
||||
self.fail(f"Machine ID setup failed with exit code {ret}")
|
||||
|
||||
ret = os.system(
|
||||
"rpm -q --scripts kernel-core | grep -A20 'posttrans scriptlet' | tail -n+2 | bash -x"
|
||||
)
|
||||
ret = int(ret >> 8)
|
||||
if ret > 0:
|
||||
self.fail(f"RPM kernel reinstall failed with exit code {ret}")
|
||||
|
||||
# Install any post packages
|
||||
os.system(f"dnf install -y {' '.join(post_packages)}")
|
||||
if len(post_packages) > 0:
|
||||
ret = os.system(f"dnf install -y {' '.join(post_packages)}")
|
||||
ret = int(ret >> 8)
|
||||
if ret > 0:
|
||||
self.fail(f"DNF install failed with exit code {ret}")
|
||||
|
||||
# Install and update GRUB config
|
||||
os.system(
|
||||
ret = os.system(
|
||||
"grub2-install --force /dev/rbd/{}/{}_{}".format(
|
||||
root_volume["pool"], vm_name, root_volume["disk_id"]
|
||||
)
|
||||
)
|
||||
ret = int(ret >> 8)
|
||||
if ret > 0:
|
||||
self.fail(f"GRUB install failed with exit code {ret}")
|
||||
|
||||
os.system("grub2-mkconfig -o /boot/grub2/grub.cfg")
|
||||
ret = int(ret >> 8)
|
||||
if ret > 0:
|
||||
self.fail(f"GRUB update failed with exit code {ret}")
|
||||
|
||||
# Set a really dumb root password so the VM can be debugged
|
||||
# EITHER CHANGE THIS YOURSELF, here or in Userdata, or run something after install
|
||||
# to change the root password: don't leave it like this on an Internet-facing machine!
|
||||
os.system("echo root:test123 | chpasswd")
|
||||
ret = os.system(f"echo root:{default_root_password} | chpasswd")
|
||||
ret = int(ret >> 8)
|
||||
if ret > 0:
|
||||
self.fail(f"Root password change failed with exit code {ret}")
|
||||
|
||||
# Enable dbus-broker
|
||||
os.system("systemctl enable dbus-broker.service")
|
||||
ret = os.system("systemctl enable dbus-broker.service")
|
||||
ret = int(ret >> 8)
|
||||
if ret > 0:
|
||||
self.fail(f"Enable of dbus-broker failed with exit code {ret}")
|
||||
|
||||
# Enable NetworkManager
|
||||
os.system("systemctl enable NetworkManager.service")
|
||||
ret = int(ret >> 8)
|
||||
if ret > 0:
|
||||
self.fail(f"Enable of NetworkManager failed with exit code {ret}")
|
||||
|
||||
# Enable cloud-init target on (first) boot
|
||||
# Your user-data should handle this and disable it once done, or things get messy.
|
||||
# That cloud-init won't run without this hack seems like a bug... but even the official
|
||||
# Debian cloud images are affected, so who knows.
|
||||
os.system("systemctl enable cloud-init.target")
|
||||
ret = int(ret >> 8)
|
||||
if ret > 0:
|
||||
self.fail(f"Enable of cloud-init failed with exit code {ret}")
|
||||
|
||||
# Set the timezone to UTC
|
||||
os.system("ln -sf ../usr/share/zoneinfo/UTC /etc/localtime")
|
||||
ret = os.system(
|
||||
f"ln -sf ../usr/share/zoneinfo/{default_local_time} /etc/localtime"
|
||||
)
|
||||
ret = int(ret >> 8)
|
||||
if ret > 0:
|
||||
self.fail(f"Localtime update failed with exit code {ret}")
|
||||
|
||||
def cleanup(self):
|
||||
"""
|
||||
|
|
|
@ -12,15 +12,7 @@ fi
|
|||
|
||||
pushd /usr/share/pvc
|
||||
|
||||
case "$( cat /etc/debian_version )" in
|
||||
10.*|11.*)
|
||||
# Debian 10 & 11
|
||||
./pvcapid-manage_legacy.py db upgrade
|
||||
;;
|
||||
*)
|
||||
# Debian 12+
|
||||
flask --app ./pvcapid-manage_flask.py db upgrade
|
||||
;;
|
||||
esac
|
||||
export FLASK_APP=./pvcapid-manage-flask.py
|
||||
flask db upgrade
|
||||
|
||||
popd
|
||||
|
|
|
@ -1,33 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# pvcapid-manage_legacy.py - PVC Database management tasks (Legacy)
|
||||
# Part of the Parallel Virtual Cluster (PVC) system
|
||||
#
|
||||
# Copyright (C) 2018-2024 Joshua M. Boniface <joshua@boniface.me>
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, version 3.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
from flask_migrate import Migrate, MigrateCommand, Manager
|
||||
|
||||
from pvcapid.flaskapi import app, db
|
||||
from pvcapid.models import * # noqa F401,F403
|
||||
|
||||
migrate = Migrate(app, db)
|
||||
manager = Manager(app)
|
||||
|
||||
manager.add_command("db", MigrateCommand)
|
||||
|
||||
if __name__ == "__main__":
|
||||
manager.run()
|
|
@ -19,6 +19,13 @@
|
|||
#
|
||||
###############################################################################
|
||||
|
||||
import pvcapid.Daemon # noqa: F401
|
||||
import sys
|
||||
from os import path
|
||||
|
||||
# Ensure current directory (/usr/share/pvc) is in the system path for Gunicorn
|
||||
current_dir = path.dirname(path.abspath(__file__))
|
||||
sys.path.append(current_dir)
|
||||
|
||||
import pvcapid.Daemon # noqa: F401, E402
|
||||
|
||||
pvcapid.Daemon.entrypoint()
|
||||
|
|
|
@ -19,15 +19,13 @@
|
|||
#
|
||||
###############################################################################
|
||||
|
||||
|
||||
import subprocess
|
||||
from ssl import SSLContext, TLSVersion
|
||||
|
||||
from distutils.util import strtobool as dustrtobool
|
||||
|
||||
import daemon_lib.config as cfg
|
||||
|
||||
# Daemon version
|
||||
version = "0.9.89"
|
||||
version = "0.9.100~git-73c0834f"
|
||||
|
||||
# API version
|
||||
API_VERSION = 1.0
|
||||
|
@ -53,7 +51,6 @@ def strtobool(stringv):
|
|||
# Configuration Parsing
|
||||
##########################################################
|
||||
|
||||
|
||||
# Get our configuration
|
||||
config = cfg.get_configuration()
|
||||
config["daemon_name"] = "pvcapid"
|
||||
|
@ -61,22 +58,16 @@ config["daemon_version"] = version
|
|||
|
||||
|
||||
##########################################################
|
||||
# Entrypoint
|
||||
# Flask App Creation for Gunicorn
|
||||
##########################################################
|
||||
|
||||
|
||||
def entrypoint():
|
||||
import pvcapid.flaskapi as pvc_api # noqa: E402
|
||||
|
||||
if config["api_ssl_enabled"]:
|
||||
context = SSLContext()
|
||||
context.minimum_version = TLSVersion.TLSv1
|
||||
context.get_ca_certs()
|
||||
context.load_cert_chain(
|
||||
config["api_ssl_cert_file"], keyfile=config["api_ssl_key_file"]
|
||||
)
|
||||
else:
|
||||
context = None
|
||||
def create_app():
|
||||
"""
|
||||
Create and return the Flask app and SSL context if necessary.
|
||||
"""
|
||||
# Import the Flask app from pvcapid.flaskapi after adjusting the path
|
||||
import pvcapid.flaskapi as pvc_api
|
||||
|
||||
# Print our startup messages
|
||||
print("")
|
||||
|
@ -102,9 +93,69 @@ def entrypoint():
|
|||
print("")
|
||||
|
||||
pvc_api.celery_startup()
|
||||
pvc_api.app.run(
|
||||
config["api_listen_address"],
|
||||
config["api_listen_port"],
|
||||
threaded=True,
|
||||
ssl_context=context,
|
||||
)
|
||||
|
||||
return pvc_api.app
|
||||
|
||||
|
||||
##########################################################
|
||||
# Entrypoint
|
||||
##########################################################
|
||||
|
||||
|
||||
def entrypoint():
|
||||
if config["debug"]:
|
||||
app = create_app()
|
||||
|
||||
if config["api_ssl_enabled"]:
|
||||
ssl_context = SSLContext()
|
||||
ssl_context.minimum_version = TLSVersion.TLSv1
|
||||
ssl_context.get_ca_certs()
|
||||
ssl_context.load_cert_chain(
|
||||
config["api_ssl_cert_file"], keyfile=config["api_ssl_key_file"]
|
||||
)
|
||||
else:
|
||||
ssl_context = None
|
||||
|
||||
app.run(
|
||||
config["api_listen_address"],
|
||||
config["api_listen_port"],
|
||||
threaded=True,
|
||||
ssl_context=ssl_context,
|
||||
)
|
||||
else:
|
||||
# Build the command to run Gunicorn
|
||||
gunicorn_cmd = [
|
||||
"gunicorn",
|
||||
"--workers",
|
||||
"1",
|
||||
"--threads",
|
||||
"8",
|
||||
"--timeout",
|
||||
"86400",
|
||||
"--bind",
|
||||
"{}:{}".format(config["api_listen_address"], config["api_listen_port"]),
|
||||
"pvcapid.Daemon:create_app()",
|
||||
"--log-level",
|
||||
"info",
|
||||
"--access-logfile",
|
||||
"-",
|
||||
"--error-logfile",
|
||||
"-",
|
||||
]
|
||||
|
||||
if config["api_ssl_enabled"]:
|
||||
gunicorn_cmd += [
|
||||
"--certfile",
|
||||
config["api_ssl_cert_file"],
|
||||
"--keyfile",
|
||||
config["api_ssl_key_file"],
|
||||
]
|
||||
|
||||
# Run Gunicorn
|
||||
try:
|
||||
subprocess.run(gunicorn_cmd)
|
||||
except KeyboardInterrupt:
|
||||
exit(0)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
exit(1)
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -641,6 +641,7 @@ def vm_define(
|
|||
selector,
|
||||
autostart,
|
||||
migration_method,
|
||||
migration_max_downtime=300,
|
||||
user_tags=[],
|
||||
protected_tags=[],
|
||||
):
|
||||
|
@ -668,6 +669,7 @@ def vm_define(
|
|||
selector,
|
||||
autostart,
|
||||
migration_method,
|
||||
migration_max_downtime,
|
||||
profile=None,
|
||||
tags=tags,
|
||||
)
|
||||
|
@ -763,6 +765,134 @@ def vm_restore(
|
|||
return output, retcode
|
||||
|
||||
|
||||
@ZKConnection(config)
|
||||
def create_vm_snapshot(
|
||||
zkhandler,
|
||||
domain,
|
||||
snapshot_name=None,
|
||||
):
|
||||
"""
|
||||
Take a snapshot of a VM.
|
||||
"""
|
||||
retflag, retdata = pvc_vm.create_vm_snapshot(
|
||||
zkhandler,
|
||||
domain,
|
||||
snapshot_name,
|
||||
)
|
||||
|
||||
if retflag:
|
||||
retcode = 200
|
||||
else:
|
||||
retcode = 400
|
||||
|
||||
output = {"message": retdata.replace('"', "'")}
|
||||
return output, retcode
|
||||
|
||||
|
||||
@ZKConnection(config)
|
||||
def remove_vm_snapshot(
|
||||
zkhandler,
|
||||
domain,
|
||||
snapshot_name,
|
||||
):
|
||||
"""
|
||||
Take a snapshot of a VM.
|
||||
"""
|
||||
retflag, retdata = pvc_vm.remove_vm_snapshot(
|
||||
zkhandler,
|
||||
domain,
|
||||
snapshot_name,
|
||||
)
|
||||
|
||||
if retflag:
|
||||
retcode = 200
|
||||
else:
|
||||
retcode = 400
|
||||
|
||||
output = {"message": retdata.replace('"', "'")}
|
||||
return output, retcode
|
||||
|
||||
|
||||
@ZKConnection(config)
|
||||
def rollback_vm_snapshot(
|
||||
zkhandler,
|
||||
domain,
|
||||
snapshot_name,
|
||||
):
|
||||
"""
|
||||
Roll back to a snapshot of a VM.
|
||||
"""
|
||||
retflag, retdata = pvc_vm.rollback_vm_snapshot(
|
||||
zkhandler,
|
||||
domain,
|
||||
snapshot_name,
|
||||
)
|
||||
|
||||
if retflag:
|
||||
retcode = 200
|
||||
else:
|
||||
retcode = 400
|
||||
|
||||
output = {"message": retdata.replace('"', "'")}
|
||||
return output, retcode
|
||||
|
||||
|
||||
@ZKConnection(config)
|
||||
def export_vm_snapshot(
|
||||
zkhandler,
|
||||
domain,
|
||||
snapshot_name,
|
||||
export_path,
|
||||
incremental_parent=None,
|
||||
):
|
||||
"""
|
||||
Export a snapshot of a VM to files.
|
||||
"""
|
||||
retflag, retdata = pvc_vm.export_vm_snapshot(
|
||||
zkhandler,
|
||||
domain,
|
||||
snapshot_name,
|
||||
export_path,
|
||||
incremental_parent,
|
||||
)
|
||||
|
||||
if retflag:
|
||||
retcode = 200
|
||||
else:
|
||||
retcode = 400
|
||||
|
||||
output = {"message": retdata.replace('"', "'")}
|
||||
return output, retcode
|
||||
|
||||
|
||||
@ZKConnection(config)
|
||||
def import_vm_snapshot(
|
||||
zkhandler,
|
||||
domain,
|
||||
snapshot_name,
|
||||
export_path,
|
||||
retain_snapshot=False,
|
||||
):
|
||||
"""
|
||||
Import a snapshot of a VM from files.
|
||||
"""
|
||||
retflag, retdata = pvc_vm.import_vm_snapshot(
|
||||
zkhandler,
|
||||
domain,
|
||||
snapshot_name,
|
||||
export_path,
|
||||
retain_snapshot,
|
||||
)
|
||||
|
||||
if retflag:
|
||||
retcode = 200
|
||||
else:
|
||||
retcode = 400
|
||||
|
||||
output = {"message": retdata.replace('"', "'")}
|
||||
return output, retcode
|
||||
|
||||
|
||||
@ZKConnection(config)
|
||||
def vm_attach_device(zkhandler, vm, device_spec_xml):
|
||||
"""
|
||||
|
@ -826,6 +956,7 @@ def get_vm_meta(zkhandler, vm):
|
|||
domain_node_selector,
|
||||
domain_node_autostart,
|
||||
domain_migrate_method,
|
||||
domain_migrate_max_downtime,
|
||||
) = pvc_common.getDomainMetadata(zkhandler, dom_uuid)
|
||||
|
||||
retcode = 200
|
||||
|
@ -835,6 +966,7 @@ def get_vm_meta(zkhandler, vm):
|
|||
"node_selector": domain_node_selector.lower(),
|
||||
"node_autostart": domain_node_autostart,
|
||||
"migration_method": domain_migrate_method.lower(),
|
||||
"migration_max_downtime": int(domain_migrate_max_downtime),
|
||||
}
|
||||
|
||||
return retdata, retcode
|
||||
|
@ -842,7 +974,14 @@ def get_vm_meta(zkhandler, vm):
|
|||
|
||||
@ZKConnection(config)
|
||||
def update_vm_meta(
|
||||
zkhandler, vm, limit, selector, autostart, provisioner_profile, migration_method
|
||||
zkhandler,
|
||||
vm,
|
||||
limit,
|
||||
selector,
|
||||
autostart,
|
||||
provisioner_profile,
|
||||
migration_method,
|
||||
migration_max_downtime,
|
||||
):
|
||||
"""
|
||||
Update metadata of a VM.
|
||||
|
@ -858,7 +997,14 @@ def update_vm_meta(
|
|||
autostart = False
|
||||
|
||||
retflag, retdata = pvc_vm.modify_vm_metadata(
|
||||
zkhandler, vm, limit, selector, autostart, provisioner_profile, migration_method
|
||||
zkhandler,
|
||||
vm,
|
||||
limit,
|
||||
selector,
|
||||
autostart,
|
||||
provisioner_profile,
|
||||
migration_method,
|
||||
migration_max_downtime,
|
||||
)
|
||||
|
||||
if retflag:
|
||||
|
@ -1851,11 +1997,29 @@ def ceph_volume_list(zkhandler, pool=None, limit=None, is_fuzzy=True):
|
|||
|
||||
|
||||
@ZKConnection(config)
|
||||
def ceph_volume_add(zkhandler, pool, name, size):
|
||||
def ceph_volume_scan(zkhandler, pool, name):
|
||||
"""
|
||||
(Re)scan a Ceph RBD volume for stats in the PVC Ceph storage cluster.
|
||||
"""
|
||||
retflag, retdata = pvc_ceph.scan_volume(zkhandler, pool, name)
|
||||
|
||||
if retflag:
|
||||
retcode = 200
|
||||
else:
|
||||
retcode = 400
|
||||
|
||||
output = {"message": retdata.replace('"', "'")}
|
||||
return output, retcode
|
||||
|
||||
|
||||
@ZKConnection(config)
|
||||
def ceph_volume_add(zkhandler, pool, name, size, force_flag=False):
|
||||
"""
|
||||
Add a Ceph RBD volume to the PVC Ceph storage cluster.
|
||||
"""
|
||||
retflag, retdata = pvc_ceph.add_volume(zkhandler, pool, name, size)
|
||||
retflag, retdata = pvc_ceph.add_volume(
|
||||
zkhandler, pool, name, size, force_flag=force_flag
|
||||
)
|
||||
|
||||
if retflag:
|
||||
retcode = 200
|
||||
|
@ -1867,11 +2031,13 @@ def ceph_volume_add(zkhandler, pool, name, size):
|
|||
|
||||
|
||||
@ZKConnection(config)
|
||||
def ceph_volume_clone(zkhandler, pool, name, source_volume):
|
||||
def ceph_volume_clone(zkhandler, pool, name, source_volume, force_flag):
|
||||
"""
|
||||
Clone a Ceph RBD volume to a new volume on the PVC Ceph storage cluster.
|
||||
"""
|
||||
retflag, retdata = pvc_ceph.clone_volume(zkhandler, pool, source_volume, name)
|
||||
retflag, retdata = pvc_ceph.clone_volume(
|
||||
zkhandler, pool, source_volume, name, force_flag=force_flag
|
||||
)
|
||||
|
||||
if retflag:
|
||||
retcode = 200
|
||||
|
@ -1883,11 +2049,13 @@ def ceph_volume_clone(zkhandler, pool, name, source_volume):
|
|||
|
||||
|
||||
@ZKConnection(config)
|
||||
def ceph_volume_resize(zkhandler, pool, name, size):
|
||||
def ceph_volume_resize(zkhandler, pool, name, size, force_flag):
|
||||
"""
|
||||
Resize an existing Ceph RBD volume in the PVC Ceph storage cluster.
|
||||
"""
|
||||
retflag, retdata = pvc_ceph.resize_volume(zkhandler, pool, name, size)
|
||||
retflag, retdata = pvc_ceph.resize_volume(
|
||||
zkhandler, pool, name, size, force_flag=force_flag
|
||||
)
|
||||
|
||||
if retflag:
|
||||
retcode = 200
|
||||
|
@ -2159,6 +2327,22 @@ def ceph_volume_snapshot_rename(zkhandler, pool, volume, name, new_name):
|
|||
return output, retcode
|
||||
|
||||
|
||||
@ZKConnection(config)
|
||||
def ceph_volume_snapshot_rollback(zkhandler, pool, volume, name):
|
||||
"""
|
||||
Roll back a Ceph RBD volume to a given snapshot in the PVC Ceph storage cluster.
|
||||
"""
|
||||
retflag, retdata = pvc_ceph.rollback_snapshot(zkhandler, pool, volume, name)
|
||||
|
||||
if retflag:
|
||||
retcode = 200
|
||||
else:
|
||||
retcode = 400
|
||||
|
||||
output = {"message": retdata.replace('"', "'")}
|
||||
return output, retcode
|
||||
|
||||
|
||||
@ZKConnection(config)
|
||||
def ceph_volume_snapshot_remove(zkhandler, pool, volume, name):
|
||||
"""
|
||||
|
|
|
@ -36,6 +36,7 @@ class DBSystemTemplate(db.Model):
|
|||
node_selector = db.Column(db.Text)
|
||||
node_autostart = db.Column(db.Boolean, nullable=False)
|
||||
migration_method = db.Column(db.Text)
|
||||
migration_max_downtime = db.Column(db.Integer, default=300, server_default="300")
|
||||
ova = db.Column(db.Integer, db.ForeignKey("ova.id"), nullable=True)
|
||||
|
||||
def __init__(
|
||||
|
@ -50,6 +51,7 @@ class DBSystemTemplate(db.Model):
|
|||
node_selector,
|
||||
node_autostart,
|
||||
migration_method,
|
||||
migration_max_downtime,
|
||||
ova=None,
|
||||
):
|
||||
self.name = name
|
||||
|
@ -62,6 +64,7 @@ class DBSystemTemplate(db.Model):
|
|||
self.node_selector = node_selector
|
||||
self.node_autostart = node_autostart
|
||||
self.migration_method = migration_method
|
||||
self.migration_max_downtime = migration_max_downtime
|
||||
self.ova = ova
|
||||
|
||||
def __repr__(self):
|
||||
|
|
|
@ -125,7 +125,7 @@ def list_template(limit, table, is_fuzzy=True):
|
|||
args = (template_data["id"],)
|
||||
cur.execute(query, args)
|
||||
disks = cur.fetchall()
|
||||
data[template_id]["disks"] = disks
|
||||
data[template_id]["disks"] = sorted(disks, key=lambda x: x["disk_id"])
|
||||
|
||||
close_database(conn, cur)
|
||||
|
||||
|
@ -221,6 +221,7 @@ def create_template_system(
|
|||
node_selector=None,
|
||||
node_autostart=False,
|
||||
migration_method=None,
|
||||
migration_max_downtime=None,
|
||||
ova=None,
|
||||
):
|
||||
if list_template_system(name, is_fuzzy=False)[-1] != 404:
|
||||
|
@ -231,7 +232,7 @@ def create_template_system(
|
|||
if node_selector == "none":
|
||||
node_selector = None
|
||||
|
||||
query = "INSERT INTO system_template (name, vcpu_count, vram_mb, serial, vnc, vnc_bind, node_limit, node_selector, node_autostart, migration_method, ova) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);"
|
||||
query = "INSERT INTO system_template (name, vcpu_count, vram_mb, serial, vnc, vnc_bind, node_limit, node_selector, node_autostart, migration_method, migration_max_downtime, ova) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);"
|
||||
args = (
|
||||
name,
|
||||
vcpu_count,
|
||||
|
@ -243,6 +244,7 @@ def create_template_system(
|
|||
node_selector,
|
||||
node_autostart,
|
||||
migration_method,
|
||||
migration_max_downtime,
|
||||
ova,
|
||||
)
|
||||
|
||||
|
@ -282,27 +284,28 @@ def create_template_network(name, mac_template=None):
|
|||
return retmsg, retcode
|
||||
|
||||
|
||||
def create_template_network_element(name, vni):
|
||||
def create_template_network_element(name, vni, permit_duplicate=False):
|
||||
if list_template_network(name, is_fuzzy=False)[-1] != 200:
|
||||
retmsg = {"message": 'The network template "{}" does not exist.'.format(name)}
|
||||
retcode = 400
|
||||
return retmsg, retcode
|
||||
|
||||
networks, code = list_template_network_vnis(name)
|
||||
if code != 200:
|
||||
networks = []
|
||||
found_vni = False
|
||||
for network in networks:
|
||||
if network["vni"] == vni:
|
||||
found_vni = True
|
||||
if found_vni:
|
||||
retmsg = {
|
||||
"message": 'The VNI "{}" in network template "{}" already exists.'.format(
|
||||
vni, name
|
||||
)
|
||||
}
|
||||
retcode = 400
|
||||
return retmsg, retcode
|
||||
if not permit_duplicate:
|
||||
networks, code = list_template_network_vnis(name)
|
||||
if code != 200:
|
||||
networks = []
|
||||
found_vni = False
|
||||
for network in networks:
|
||||
if network["vni"] == vni:
|
||||
found_vni = True
|
||||
if found_vni:
|
||||
retmsg = {
|
||||
"message": 'The VNI "{}" in network template "{}" already exists.'.format(
|
||||
vni, name
|
||||
)
|
||||
}
|
||||
retcode = 400
|
||||
return retmsg, retcode
|
||||
|
||||
conn, cur = open_database(config)
|
||||
try:
|
||||
|
@ -438,6 +441,7 @@ def modify_template_system(
|
|||
node_selector=None,
|
||||
node_autostart=None,
|
||||
migration_method=None,
|
||||
migration_max_downtime=None,
|
||||
):
|
||||
if list_template_system(name, is_fuzzy=False)[-1] != 200:
|
||||
retmsg = {"message": 'The system template "{}" does not exist.'.format(name)}
|
||||
|
@ -505,6 +509,11 @@ def modify_template_system(
|
|||
if migration_method is not None:
|
||||
fields.append({"field": "migration_method", "data": migration_method})
|
||||
|
||||
if migration_max_downtime is not None:
|
||||
fields.append(
|
||||
{"field": "migration_max_downtime", "data": int(migration_max_downtime)}
|
||||
)
|
||||
|
||||
conn, cur = open_database(config)
|
||||
try:
|
||||
for field in fields:
|
||||
|
|
|
@ -1,13 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>PVC Client API Documentation</title>
|
||||
<meta charset="utf-8"/>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<style> body { margin: 0; padding: 0; } </style>
|
||||
</head>
|
||||
<body>
|
||||
<redoc spec-url='./swagger.json' hide-loading></redoc>
|
||||
<script src="https://rebilly.github.io/ReDoc/releases/latest/redoc.min.js"> </script>
|
||||
</body>
|
||||
</html>
|
File diff suppressed because it is too large
Load Diff
|
@ -13,6 +13,8 @@ else
|
|||
fi
|
||||
|
||||
KEEP_ARTIFACTS=""
|
||||
API_ONLY=""
|
||||
PRIMARY_NODE=""
|
||||
if [[ -n ${1} ]]; then
|
||||
for arg in ${@}; do
|
||||
case ${arg} in
|
||||
|
@ -20,33 +22,45 @@ if [[ -n ${1} ]]; then
|
|||
KEEP_ARTIFACTS="y"
|
||||
shift
|
||||
;;
|
||||
-a|--api-only)
|
||||
API_ONLY="y"
|
||||
shift
|
||||
;;
|
||||
-p=*|--become-primary=*)
|
||||
PRIMARY_NODE=$( awk -F'=' '{ print $NF }' <<<"${arg}" )
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
fi
|
||||
|
||||
HOSTS=( ${@} )
|
||||
echo "> Deploying to host(s): ${HOSTS[@]}"
|
||||
echo "Deploying to host(s): ${HOSTS[@]}"
|
||||
if [[ -n ${PRIMARY_NODE} ]]; then
|
||||
echo "Will become primary on ${PRIMARY_NODE} after updating it"
|
||||
fi
|
||||
|
||||
# Move to repo root if we're not
|
||||
pushd $( git rev-parse --show-toplevel ) &>/dev/null
|
||||
|
||||
# Prepare code
|
||||
echo "Preparing code (format and lint)..."
|
||||
echo "> Preparing code (format and lint)..."
|
||||
./format || exit 1
|
||||
./lint || exit 1
|
||||
|
||||
# Build the packages
|
||||
echo -n "Building packages..."
|
||||
echo -n "> Building packages..."
|
||||
version="$( ./build-unstable-deb.sh 2>/dev/null )"
|
||||
echo " done. Package version ${version}."
|
||||
|
||||
# Install the client(s) locally
|
||||
echo -n "Installing client packages locally..."
|
||||
echo -n "> Installing client packages locally..."
|
||||
$SUDO dpkg -i --force-all ../pvc-client*_${version}*.deb &>/dev/null
|
||||
echo " done".
|
||||
|
||||
echo "> Copying packages..."
|
||||
for HOST in ${HOSTS[@]}; do
|
||||
echo -n "Copying packages to host ${HOST}..."
|
||||
echo -n ">>> Copying packages to host ${HOST}..."
|
||||
ssh $HOST $SUDO rm -rf /tmp/pvc &>/dev/null
|
||||
ssh $HOST mkdir /tmp/pvc &>/dev/null
|
||||
scp ../pvc-*_${version}*.deb $HOST:/tmp/pvc/ &>/dev/null
|
||||
|
@ -57,26 +71,34 @@ if [[ -z ${KEEP_ARTIFACTS} ]]; then
|
|||
fi
|
||||
|
||||
for HOST in ${HOSTS[@]}; do
|
||||
echo "> Deploying packages to host ${HOST}"
|
||||
echo -n "Installing packages..."
|
||||
echo "> Deploying packages on host ${HOST}"
|
||||
echo -n ">>> Installing packages..."
|
||||
ssh $HOST $SUDO dpkg -i --force-all /tmp/pvc/*.deb &>/dev/null
|
||||
ssh $HOST rm -rf /tmp/pvc &>/dev/null
|
||||
echo " done."
|
||||
echo -n "Restarting PVC daemons..."
|
||||
echo -n ">>> Restarting PVC daemons..."
|
||||
ssh $HOST $SUDO systemctl restart pvcapid &>/dev/null
|
||||
sleep 2
|
||||
ssh $HOST $SUDO systemctl restart pvcworkerd &>/dev/null
|
||||
if [[ -z ${API_ONLY} ]]; then
|
||||
sleep 2
|
||||
ssh $HOST $SUDO systemctl restart pvchealthd &>/dev/null
|
||||
# sleep 2
|
||||
# ssh $HOST $SUDO systemctl restart pvcnoded &>/dev/null
|
||||
sleep 2
|
||||
ssh $HOST $SUDO systemctl restart pvcnoded &>/dev/null
|
||||
echo " done."
|
||||
echo -n "Waiting for node daemon to be running..."
|
||||
echo -n ">>> Waiting for node daemon to be running..."
|
||||
while [[ $( ssh $HOST "pvc -q node list -f json ${HOST%%.*} | jq -r '.[].daemon_state'" 2>/dev/null ) != "run" ]]; do
|
||||
sleep 5
|
||||
echo -n "."
|
||||
done
|
||||
fi
|
||||
echo " done."
|
||||
if [[ -n ${PRIMARY_NODE} && ${PRIMARY_NODE} == ${HOST} ]]; then
|
||||
echo -n ">>> Setting node $HOST to primary coordinator state... "
|
||||
ssh $HOST pvc -q node primary --wait &>/dev/null
|
||||
ssh $HOST $SUDO systemctl restart pvcworkerd &>/dev/null
|
||||
echo "done."
|
||||
fi
|
||||
done
|
||||
|
||||
popd &>/dev/null
|
||||
|
|
|
@ -671,9 +671,9 @@ def cli_cluster_maintenance_off():
|
|||
@format_opt(
|
||||
{
|
||||
"pretty": cli_cluster_task_format_pretty,
|
||||
"raw": lambda d: "\n".join([t["id"] for t in d])
|
||||
if isinstance(d, list)
|
||||
else d["state"],
|
||||
"raw": lambda d: (
|
||||
"\n".join([t["id"] for t in d]) if isinstance(d, list) else d["state"]
|
||||
),
|
||||
"json": lambda d: jdumps(d),
|
||||
"json-pretty": lambda d: jdumps(d, indent=2),
|
||||
}
|
||||
|
@ -687,7 +687,10 @@ def cli_cluster_task(task_id, wait_flag, format_function):
|
|||
|
||||
if wait_flag:
|
||||
# First validate that this is actually a valid task that is running
|
||||
echo(CLI_CONFIG, "Querying cluster for tasks...", newline=False)
|
||||
retcode, retdata = pvc.lib.common.task_status(CLI_CONFIG, None)
|
||||
echo(CLI_CONFIG, " done.")
|
||||
echo(CLI_CONFIG, "")
|
||||
if task_id in [i["id"] for i in retdata]:
|
||||
task = [i for i in retdata if i["id"] == task_id][0]
|
||||
retmsg = wait_for_celery_task(
|
||||
|
@ -699,7 +702,10 @@ def cli_cluster_task(task_id, wait_flag, format_function):
|
|||
retmsg = f"No task with ID {task_id} found."
|
||||
finish(retcode, retmsg)
|
||||
else:
|
||||
echo(CLI_CONFIG, "Querying cluster for tasks...", newline=False)
|
||||
retcode, retdata = pvc.lib.common.task_status(CLI_CONFIG, task_id)
|
||||
echo(CLI_CONFIG, " done.")
|
||||
echo(CLI_CONFIG, "")
|
||||
finish(retcode, retdata, format_function)
|
||||
|
||||
|
||||
|
@ -718,6 +724,33 @@ def cli_node():
|
|||
pass
|
||||
|
||||
|
||||
###############################################################################
|
||||
# > pvc node is-primary
|
||||
###############################################################################
|
||||
@click.command(
|
||||
name="is-primary",
|
||||
short_help="Check if this node is primary coordinator.",
|
||||
)
|
||||
@connection_req
|
||||
@click.argument("node", default=DEFAULT_NODE_HOSTNAME)
|
||||
def cli_node_is_primary(
|
||||
node,
|
||||
):
|
||||
"""
|
||||
Check if NODE (or this node if unset) is the current primary coordinator.
|
||||
|
||||
Designed for scripting; returns no visible data, but the return code is 0 if the node
|
||||
is primary, and 1 if it is not.
|
||||
"""
|
||||
|
||||
_, primary_node = pvc.lib.cluster.get_primary_node(CLI_CONFIG)
|
||||
|
||||
if primary_node == node:
|
||||
exit(0)
|
||||
else:
|
||||
exit(1)
|
||||
|
||||
|
||||
###############################################################################
|
||||
# > pvc node primary
|
||||
###############################################################################
|
||||
|
@ -886,6 +919,7 @@ def cli_node_ready(
|
|||
"--lines",
|
||||
"lines",
|
||||
default=None,
|
||||
type=int,
|
||||
show_default=False,
|
||||
help="Display this many log lines from the end of the log buffer. [default: 1000; with follow: 10]",
|
||||
)
|
||||
|
@ -1098,6 +1132,14 @@ def cli_vm():
|
|||
type=click.Choice(["none", "live", "shutdown"]),
|
||||
help="The preferred migration method of the VM between nodes; saved with VM.",
|
||||
)
|
||||
@click.option(
|
||||
"-d",
|
||||
"--max-downtime",
|
||||
"migration_max_downtime",
|
||||
default=300,
|
||||
show_default=True,
|
||||
help="The maximum time in milliseconds that a VM can be down for during a live migration; busy VMs may require a larger downtime.",
|
||||
)
|
||||
@click.option(
|
||||
"-g",
|
||||
"--tag",
|
||||
|
@ -1122,6 +1164,7 @@ def cli_vm_define(
|
|||
node_selector,
|
||||
node_autostart,
|
||||
migration_method,
|
||||
migration_max_downtime,
|
||||
user_tags,
|
||||
protected_tags,
|
||||
):
|
||||
|
@ -1135,10 +1178,12 @@ def cli_vm_define(
|
|||
* "load": choose the node with the lowest current load average
|
||||
* "vms": choose the node with the least number of provisioned VMs
|
||||
|
||||
For most clusters, "mem" should be sufficient, but others may be used based on the cluster workload and available resources. The following caveats should be considered:
|
||||
For most clusters, the migration method selector ("--method"/"-m") "mem" should be sufficient, but others may be used based on the cluster workload and available resources. The following caveats should be considered:
|
||||
* "mem" looks at the free memory of the node in general, ignoring the amount provisioned to VMs; if any VM's internal memory usage changes, this value would be affected.
|
||||
* "memprov" looks at the provisioned memory, not the allocated memory; thus, stopped or disabled VMs are counted towards a node's memory for this selector, even though their memory is not actively in use.
|
||||
* "load" looks at the system load of the node in general, ignoring load in any particular VMs; if any VM's CPU usage changes, this value would be affected. This might be preferable on clusters with some very CPU intensive VMs.
|
||||
|
||||
For most VMs, the 300ms default maximum downtime ("--max-downtime"/"-d") should be sufficient. However very busy VMs with a lot of memory pressure or CPU load may require a larger downtime to properly migrate. Generally, keep this at the default unless you know the VM will be extremely busy, or you find you have problems migrating it later. Reasonable values range from 100ms to 2000ms (2 seconds).
|
||||
"""
|
||||
|
||||
# Open the XML file
|
||||
|
@ -1160,6 +1205,7 @@ def cli_vm_define(
|
|||
node_selector,
|
||||
node_autostart,
|
||||
migration_method,
|
||||
migration_max_downtime,
|
||||
user_tags,
|
||||
protected_tags,
|
||||
)
|
||||
|
@ -1205,6 +1251,13 @@ def cli_vm_define(
|
|||
type=click.Choice(["none", "live", "shutdown"]),
|
||||
help="The preferred migration method of the VM between nodes.",
|
||||
)
|
||||
@click.option(
|
||||
"-d",
|
||||
"--max-downtime",
|
||||
"migration_max_downtime",
|
||||
default=None,
|
||||
help="The maximum time in milliseconds that a VM can be down for during a live migration; busy VMs may require a larger downtime.",
|
||||
)
|
||||
@click.option(
|
||||
"-p",
|
||||
"--profile",
|
||||
|
@ -1220,12 +1273,13 @@ def cli_vm_meta(
|
|||
node_selector,
|
||||
node_autostart,
|
||||
migration_method,
|
||||
migration_max_downtime,
|
||||
provisioner_profile,
|
||||
):
|
||||
"""
|
||||
Modify the PVC metadata of existing virtual machine DOMAIN. At least one option to update must be specified. DOMAIN may be a UUID or name.
|
||||
|
||||
For details on the "--node-selector"/"-s" values, please see help for the command "pvc vm define".
|
||||
For details on the available option values, please see help for the command "pvc vm define".
|
||||
"""
|
||||
|
||||
if (
|
||||
|
@ -1233,6 +1287,7 @@ def cli_vm_meta(
|
|||
and node_selector is None
|
||||
and node_autostart is None
|
||||
and migration_method is None
|
||||
and migration_max_downtime is None
|
||||
and provisioner_profile is None
|
||||
):
|
||||
finish(False, "At least one metadata option must be specified to update.")
|
||||
|
@ -1244,6 +1299,7 @@ def cli_vm_meta(
|
|||
node_selector,
|
||||
node_autostart,
|
||||
migration_method,
|
||||
migration_max_downtime,
|
||||
provisioner_profile,
|
||||
)
|
||||
finish(retcode, retmsg)
|
||||
|
@ -1720,7 +1776,7 @@ def cli_vm_unmigrate(domain, wait, force_live):
|
|||
is_flag=True,
|
||||
default=True,
|
||||
show_default=True,
|
||||
help="Wait or don't wait for task to complete, showing progress",
|
||||
help="Wait or don't wait for task to complete, showing progress if waiting",
|
||||
)
|
||||
def cli_vm_flush_locks(domain, wait_flag):
|
||||
"""
|
||||
|
@ -1729,7 +1785,233 @@ def cli_vm_flush_locks(domain, wait_flag):
|
|||
NOTE: This is a task-based command. The "--wait" flag (default) will block and show progress. Specifying the "--no-wait" flag will return immediately with a job ID instead, which can be queried externally later.
|
||||
"""
|
||||
|
||||
retcode, retmsg = pvc.lib.vm.vm_locks(CLI_CONFIG, domain, wait_flag)
|
||||
retcode, retmsg = pvc.lib.vm.vm_locks(CLI_CONFIG, domain, wait_flag=wait_flag)
|
||||
|
||||
if retcode and wait_flag:
|
||||
retmsg = wait_for_celery_task(CLI_CONFIG, retmsg)
|
||||
finish(retcode, retmsg)
|
||||
|
||||
|
||||
###############################################################################
|
||||
# > pvc vm snapshot
|
||||
###############################################################################
|
||||
@click.group(
|
||||
name="snapshot",
|
||||
short_help="Manage snapshots for PVC VMs.",
|
||||
context_settings=CONTEXT_SETTINGS,
|
||||
)
|
||||
def cli_vm_snapshot():
|
||||
"""
|
||||
Manage snapshots of VMs in a PVC cluster.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
###############################################################################
|
||||
# > pvc vm snapshot create
|
||||
###############################################################################
|
||||
@click.command(name="create", short_help="Create a snapshot of a virtual machine.")
|
||||
@connection_req
|
||||
@click.argument("domain")
|
||||
@click.argument("snapshot_name", required=False, default=None)
|
||||
@click.option(
|
||||
"--wait/--no-wait",
|
||||
"wait_flag",
|
||||
is_flag=True,
|
||||
default=True,
|
||||
show_default=True,
|
||||
help="Wait or don't wait for task to complete, showing progress if waiting",
|
||||
)
|
||||
def cli_vm_snapshot_create(domain, snapshot_name, wait_flag):
|
||||
"""
|
||||
Create a snapshot of the disks and XML configuration of virtual machine DOMAIN, with the
|
||||
optional name SNAPSHOT_NAME. DOMAIN may be a UUID or name.
|
||||
|
||||
WARNING: RBD snapshots are crash-consistent but not filesystem-aware. If a snapshot was taken
|
||||
of a running VM, restoring that snapshot will be equivalent to having forcibly restarted the
|
||||
VM at the moment of the snapshot.
|
||||
"""
|
||||
|
||||
retcode, retmsg = pvc.lib.vm.vm_create_snapshot(
|
||||
CLI_CONFIG, domain, snapshot_name=snapshot_name, wait_flag=wait_flag
|
||||
)
|
||||
|
||||
if retcode and wait_flag:
|
||||
retmsg = wait_for_celery_task(CLI_CONFIG, retmsg)
|
||||
finish(retcode, retmsg)
|
||||
|
||||
|
||||
###############################################################################
|
||||
# > pvc vm snapshot remove
|
||||
###############################################################################
|
||||
@click.command(name="remove", short_help="Remove a snapshot of a virtual machine.")
|
||||
@connection_req
|
||||
@click.argument("domain")
|
||||
@click.argument("snapshot_name")
|
||||
@click.option(
|
||||
"--wait/--no-wait",
|
||||
"wait_flag",
|
||||
is_flag=True,
|
||||
default=True,
|
||||
show_default=True,
|
||||
help="Wait or don't wait for task to complete, showing progress if waiting",
|
||||
)
|
||||
@confirm_opt("Remove shapshot {snapshot_name} of VM {domain}")
|
||||
def cli_vm_snapshot_remove(domain, snapshot_name, wait_flag):
|
||||
"""
|
||||
Remove the snapshot SNAPSHOT_NAME of the disks and XML configuration of virtual machine DOMAIN,
|
||||
DOMAIN may be a UUID or name.
|
||||
"""
|
||||
|
||||
retcode, retmsg = pvc.lib.vm.vm_remove_snapshot(
|
||||
CLI_CONFIG, domain, snapshot_name, wait_flag=wait_flag
|
||||
)
|
||||
|
||||
if retcode and wait_flag:
|
||||
retmsg = wait_for_celery_task(CLI_CONFIG, retmsg)
|
||||
finish(retcode, retmsg)
|
||||
|
||||
|
||||
###############################################################################
|
||||
# > pvc vm snapshot rollback
|
||||
###############################################################################
|
||||
@click.command(
|
||||
name="rollback", short_help="Roll back to a snapshot of a virtual machine."
|
||||
)
|
||||
@connection_req
|
||||
@click.argument("domain")
|
||||
@click.argument("snapshot_name")
|
||||
@click.option(
|
||||
"--wait/--no-wait",
|
||||
"wait_flag",
|
||||
is_flag=True,
|
||||
default=True,
|
||||
show_default=True,
|
||||
help="Wait or don't wait for task to complete, showing progress if waiting",
|
||||
)
|
||||
@confirm_opt(
|
||||
"Roll back to snapshot {snapshot_name} of {domain} and lose all data and changes since this snapshot"
|
||||
)
|
||||
def cli_vm_snapshot_rollback(domain, snapshot_name, wait_flag):
|
||||
"""
|
||||
Roll back to the snapshot SNAPSHOT_NAME of the disks and XML configuration of virtual machine DOMAIN,
|
||||
DOMAIN may be a UUID or name.
|
||||
"""
|
||||
|
||||
retcode, retmsg = pvc.lib.vm.vm_rollback_snapshot(
|
||||
CLI_CONFIG, domain, snapshot_name, wait_flag=wait_flag
|
||||
)
|
||||
|
||||
if retcode and wait_flag:
|
||||
retmsg = wait_for_celery_task(CLI_CONFIG, retmsg)
|
||||
finish(retcode, retmsg)
|
||||
|
||||
|
||||
###############################################################################
|
||||
# > pvc vm snapshot export
|
||||
###############################################################################
|
||||
@click.command(
|
||||
name="export", short_help="Export a snapshot of a virtual machine to files."
|
||||
)
|
||||
@connection_req
|
||||
@click.argument("domain")
|
||||
@click.argument("snapshot_name")
|
||||
@click.argument("export_path")
|
||||
@click.option(
|
||||
"-i",
|
||||
"--incremental",
|
||||
"incremental_parent",
|
||||
default=None,
|
||||
help="Perform an incremental volume export from this parent snapshot.",
|
||||
)
|
||||
@click.option(
|
||||
"--wait/--no-wait",
|
||||
"wait_flag",
|
||||
is_flag=True,
|
||||
default=True,
|
||||
show_default=True,
|
||||
help="Wait or don't wait for task to complete, showing progress if waiting",
|
||||
)
|
||||
def cli_vm_snapshot_export(
|
||||
domain, snapshot_name, export_path, incremental_parent, wait_flag
|
||||
):
|
||||
"""
|
||||
Export the (existing) snapshot SNAPSHOT_NAME of virtual machine DOMAIN to the absolute path EXPORT_PATH on the current PVC primary coordinator.
|
||||
DOMAIN may be a UUID or name.
|
||||
|
||||
EXPORT_PATH must be a valid absolute directory path on the cluster "primary" coordinator (see "pvc node list") allowing writes from the API daemon (normally running as "root"). The EXPORT_PATH should be a large storage volume, ideally a remotely mounted filesystem (e.g. NFS, SSHFS, etc.) or non-Ceph-backed disk; PVC does not handle this path, that is up to the administrator to configure and manage.
|
||||
|
||||
The export will include the VM configuration, metainfo, and a point-in-time snapshot of all attached RBD volumes.
|
||||
|
||||
Incremental exports are possible by specifying the "-i"/"--incremental" option along with a parent snapshot name. To correctly import, that export must exist on EXPORT_PATH.
|
||||
|
||||
Full export volume images are sparse-allocated, however it is recommended for safety to consider their maximum allocated size when allocated space for the EXPORT_PATH. Incremental volume images are generally small but are dependent entirely on the rate of data change in each volume.
|
||||
"""
|
||||
|
||||
retcode, retmsg = pvc.lib.vm.vm_export_snapshot(
|
||||
CLI_CONFIG,
|
||||
domain,
|
||||
snapshot_name,
|
||||
export_path,
|
||||
incremental_parent=incremental_parent,
|
||||
wait_flag=wait_flag,
|
||||
)
|
||||
|
||||
if retcode and wait_flag:
|
||||
retmsg = wait_for_celery_task(CLI_CONFIG, retmsg)
|
||||
finish(retcode, retmsg)
|
||||
|
||||
|
||||
###############################################################################
|
||||
# > pvc vm snapshot import
|
||||
###############################################################################
|
||||
@click.command(name="import", short_help="Import a snapshot of a virtual machine.")
|
||||
@connection_req
|
||||
@click.argument("domain")
|
||||
@click.argument("snapshot_name")
|
||||
@click.argument("import_path")
|
||||
@click.option(
|
||||
"-r/-R",
|
||||
"--retain-snapshot/--remove-snapshot",
|
||||
"retain_snapshot",
|
||||
is_flag=True,
|
||||
default=True,
|
||||
help="Retain or remove restored (parent, if incremental) snapshot in Ceph.",
|
||||
)
|
||||
@click.option(
|
||||
"--wait/--no-wait",
|
||||
"wait_flag",
|
||||
is_flag=True,
|
||||
default=True,
|
||||
show_default=True,
|
||||
help="Wait or don't wait for task to complete, showing progress if waiting",
|
||||
)
|
||||
def cli_vm_snapshot_import(
|
||||
domain, snapshot_name, import_path, retain_snapshot, wait_flag
|
||||
):
|
||||
"""
|
||||
Import the snapshot SNAPSHOT_NAME of virtual machine DOMAIN from the absolute path IMPORT_PATH on the current PVC primary coordinator.
|
||||
DOMAIN may be a UUID or name.
|
||||
|
||||
IMPORT_PATH must be a valid absolute directory path on the cluster "primary" coordinator (see "pvc node list") allowing reads from the API daemon (normally running as "root"). The IMPORT_PATH should be a large storage volume, ideally a remotely mounted filesystem (e.g. NFS, SSHFS, etc.) or non-Ceph-backed disk; PVC does not handle this path, that is up to the administrator to configure and manage.
|
||||
|
||||
The import will include the VM configuration, metainfo, and the point-in-time snapshot of all attached RBD volumes. Incremental imports will be automatically handled.
|
||||
|
||||
A VM named DOMAIN or with the same UUID must not exist; if a VM with the same name or UUID already exists, it must be removed (or renamed and then undefined, to preserve volumes while freeing the UUID) before importing.
|
||||
|
||||
If the "-r"/"--retain-snapshot" option is specified (the default), for incremental imports, only the parent snapshot is kept; for full imports, the imported snapshot is kept. If the "-R"/"--remove-snapshot" option is specified, the imported snapshot is removed.
|
||||
|
||||
WARNING: The "-R"/"--remove-snapshot" option will invalidate any existing incremental snapshots based on the same incremental parent for the imported VM.
|
||||
"""
|
||||
|
||||
retcode, retmsg = pvc.lib.vm.vm_import_snapshot(
|
||||
CLI_CONFIG,
|
||||
domain,
|
||||
snapshot_name,
|
||||
import_path,
|
||||
retain_snapshot=retain_snapshot,
|
||||
wait_flag=wait_flag,
|
||||
)
|
||||
|
||||
if retcode and wait_flag:
|
||||
retmsg = wait_for_celery_task(CLI_CONFIG, retmsg)
|
||||
|
@ -1746,6 +2028,8 @@ def cli_vm_flush_locks(domain, wait_flag):
|
|||
)
|
||||
def cli_vm_backup():
|
||||
"""
|
||||
DEPRECATED: Use 'pvc vm snapshot' commands instead. 'pvc vm backup' commands will be removed in a future version.
|
||||
|
||||
Manage backups of VMs in a PVC cluster.
|
||||
"""
|
||||
pass
|
||||
|
@ -1775,6 +2059,8 @@ def cli_vm_backup():
|
|||
)
|
||||
def cli_vm_backup_create(domain, backup_path, incremental_parent, retain_snapshot):
|
||||
"""
|
||||
DEPRECATED: Use 'pvc vm snapshot' commands instead. 'pvc vm backup' commands will be removed in a future version.
|
||||
|
||||
Create a backup of virtual machine DOMAIN to BACKUP_PATH on the cluster primary coordinator. DOMAIN may be a UUID or name.
|
||||
|
||||
BACKUP_PATH must be a valid absolute directory path on the cluster "primary" coordinator (see "pvc node list") allowing writes from the API daemon (normally running as "root"). The BACKUP_PATH should be a large storage volume, ideally a remotely mounted filesystem (e.g. NFS, SSHFS, etc.) or non-Ceph-backed disk; PVC does not handle this path, that is up to the administrator to configure and manage.
|
||||
|
@ -1821,13 +2107,15 @@ def cli_vm_backup_create(domain, backup_path, incremental_parent, retain_snapsho
|
|||
)
|
||||
def cli_vm_backup_restore(domain, backup_datestring, backup_path, retain_snapshot):
|
||||
"""
|
||||
DEPRECATED: Use 'pvc vm snapshot' commands instead. 'pvc vm backup' commands will be removed in a future version.
|
||||
|
||||
Restore the backup BACKUP_DATESTRING of virtual machine DOMAIN stored in BACKUP_PATH on the cluster primary coordinator. DOMAIN may be a UUID or name.
|
||||
|
||||
BACKUP_PATH must be a valid absolute directory path on the cluster "primary" coordinator (see "pvc node list") allowing reads from the API daemon (normally running as "root"). The BACKUP_PATH should be a large storage volume, ideally a remotely mounted filesystem (e.g. NFS, SSHFS, etc.) or non-Ceph-backed disk; PVC does not handle this path, that is up to the administrator to configure and manage.
|
||||
|
||||
The restore will import the VM configuration, metainfo, and the point-in-time snapshot of all attached RBD volumes. Incremental backups will be automatically handled.
|
||||
|
||||
A VM named DOMAIN or with the same UUID must not exist; if a VM with the same name or UUID already exists, it must be removed, or renamed and then undefined (to preserve volumes), before restoring.
|
||||
A VM named DOMAIN or with the same UUID must not exist; if a VM with the same name or UUID already exists, it must be removed (or renamed and then undefined, to preserve volumes while freeing the UUID) before importing.
|
||||
|
||||
If the "-r"/"--retain-snapshot" option is specified (the default), for incremental restores, only the parent snapshot is kept; for full restores, the restored snapshot is kept. If the "-R"/"--remove-snapshot" option is specified, the imported snapshot is removed.
|
||||
|
||||
|
@ -1859,6 +2147,8 @@ def cli_vm_backup_restore(domain, backup_datestring, backup_path, retain_snapsho
|
|||
@click.argument("backup_path")
|
||||
def cli_vm_backup_remove(domain, backup_datestring, backup_path):
|
||||
"""
|
||||
DEPRECATED: Use 'pvc vm snapshot' commands instead. 'pvc vm backup' commands will be removed in a future version.
|
||||
|
||||
Remove the backup BACKUP_DATESTRING, including snapshots, of virtual machine DOMAIN stored in BACKUP_PATH on the cluster primary coordinator. DOMAIN may be a UUID or name.
|
||||
|
||||
WARNING: Removing an incremental parent will invalidate any existing incremental backups based on that backup.
|
||||
|
@ -1887,13 +2177,10 @@ def cli_vm_backup_remove(domain, backup_datestring, backup_path):
|
|||
)
|
||||
@connection_req
|
||||
@click.option(
|
||||
"-f",
|
||||
"--configuration",
|
||||
"autobackup_cfgfile",
|
||||
envvar="PVC_AUTOBACKUP_CFGFILE",
|
||||
default=DEFAULT_AUTOBACKUP_FILENAME,
|
||||
show_default=True,
|
||||
help="Override default config file location.",
|
||||
"--email-report",
|
||||
"email_report",
|
||||
default=None,
|
||||
help="Email a backup summary report to the specified address(es), comma-separated.",
|
||||
)
|
||||
@click.option(
|
||||
"--force-full",
|
||||
|
@ -1902,46 +2189,72 @@ def cli_vm_backup_remove(domain, backup_datestring, backup_path):
|
|||
is_flag=True,
|
||||
help="Force all backups to be full backups this run.",
|
||||
)
|
||||
@click.option(
|
||||
"--wait/--no-wait",
|
||||
"wait_flag",
|
||||
is_flag=True,
|
||||
default=True,
|
||||
show_default=True,
|
||||
help="Wait or don't wait for task to complete, showing progress if waiting.",
|
||||
)
|
||||
@click.option(
|
||||
"--cron",
|
||||
"cron_flag",
|
||||
default=False,
|
||||
is_flag=True,
|
||||
help="Cron mode; don't error exit if this isn't the primary coordinator.",
|
||||
default=False,
|
||||
show_default=True,
|
||||
help="Run in cron mode (returns immediately with no output once job is submitted).",
|
||||
)
|
||||
def cli_vm_autobackup(autobackup_cfgfile, force_full_flag, cron_flag):
|
||||
def cli_vm_autobackup(email_report, force_full_flag, wait_flag, cron_flag):
|
||||
"""
|
||||
Perform automated backups of VMs, with integrated cleanup and full/incremental scheduling.
|
||||
|
||||
This command enables automatic backup of PVC VMs at the block level, leveraging the various "pvc vm backup"
|
||||
This command enables automatic backup of PVC VMs at the block level, leveraging the various "pvc vm snapshot"
|
||||
functions with an internal rentention and cleanup system as well as determination of full vs. incremental
|
||||
backups at different intervals. VMs are selected based on configured VM tags. The destination storage
|
||||
may either be local, or provided by a remote filesystem which is automatically mounted and unmounted during
|
||||
the backup run via a set of configured commands before and after the backup run.
|
||||
|
||||
NOTE: This command performs its tasks in a local context. It MUST be run from the cluster's active primary
|
||||
coordinator using the "local" connection only; if either is not correct, the command will error.
|
||||
|
||||
NOTE: This command should be run as the same user as the API daemon, usually "root" with "sudo -E" or in
|
||||
a cronjob as "root", to ensure permissions are correct on the backup files. Failure to do so will still take
|
||||
the backup, but the state update write will likely fail and the backup will become untracked. The command
|
||||
will prompt for confirmation if it is found not to be running as "root" and this cannot be bypassed.
|
||||
|
||||
This command should be run from cron or a timer at a regular interval (e.g. daily, hourly, etc.) which defines
|
||||
how often backups are taken. Backup format (full/incremental) and retention is based only on the number of
|
||||
recorded backups, not on the time interval between them. Backups taken manually outside of the "autobackup"
|
||||
recorded backups, not on the time interval between them. Exports taken manually outside of the "autobackup"
|
||||
command are not counted towards the format or retention of autobackups.
|
||||
|
||||
The PVC_AUTOBACKUP_CFGFILE envvar or "-f"/"--configuration" option can be used to override the default
|
||||
configuration file path if required by a particular run. For full details of the possible options, please
|
||||
see the example configuration file at "/usr/share/pvc/autobackup.sample.yaml".
|
||||
WARNING: Running this command manually will interfere with the schedule! Do not run manually except for testing.
|
||||
|
||||
The actual details of the autobackup, including retention policies, full-vs-incremental, pre- and post- run
|
||||
mounting/unmounting commands, etc. are defined in the main PVC configuration file `/etc/pvc/pvc.conf`. See
|
||||
the sample configuration for more details.
|
||||
|
||||
An optional report on all current backups can be emailed to one or more email addresses using the
|
||||
"--email-report" flag. This report will include information on all current known backups.
|
||||
|
||||
The "--force-full" option can be used to force all configured VMs to perform a "full" level backup this run,
|
||||
which can help synchronize the backups of existing VMs with new ones.
|
||||
"""
|
||||
|
||||
# All work here is done in the helper function for portability; we don't even use "finish"
|
||||
vm_autobackup(CLI_CONFIG, autobackup_cfgfile, force_full_flag, cron_flag)
|
||||
if cron_flag:
|
||||
wait_flag = False
|
||||
|
||||
if email_report is not None:
|
||||
email_recipients = email_report.split(",")
|
||||
else:
|
||||
email_recipients = None
|
||||
|
||||
retcode, retmsg = pvc.lib.vm.vm_autobackup(
|
||||
CLI_CONFIG,
|
||||
email_recipients=email_recipients,
|
||||
force_full_flag=force_full_flag,
|
||||
wait_flag=wait_flag,
|
||||
)
|
||||
|
||||
if retcode and wait_flag:
|
||||
retmsg = wait_for_celery_task(CLI_CONFIG, retmsg)
|
||||
|
||||
if cron_flag:
|
||||
finish(retcode, None)
|
||||
else:
|
||||
finish(retcode, retmsg)
|
||||
|
||||
|
||||
###############################################################################
|
||||
|
@ -2477,6 +2790,7 @@ def cli_vm_volume_remove(domain, volume, live_flag, restart_flag):
|
|||
"--lines",
|
||||
"lines",
|
||||
default=None,
|
||||
type=int,
|
||||
show_default=False,
|
||||
help="Display this many log lines from the end of the log buffer. [default: 1000; with follow: 10]",
|
||||
)
|
||||
|
@ -3441,23 +3755,32 @@ def cli_storage_benchmark():
|
|||
@click.command(name="run", short_help="Run a storage benchmark.")
|
||||
@connection_req
|
||||
@click.argument("pool")
|
||||
@click.option(
|
||||
"--name",
|
||||
"name",
|
||||
default=None,
|
||||
show_default=False,
|
||||
help="Use a custom name for the job",
|
||||
)
|
||||
@click.option(
|
||||
"--wait/--no-wait",
|
||||
"wait_flag",
|
||||
is_flag=True,
|
||||
default=True,
|
||||
show_default=True,
|
||||
help="Wait or don't wait for task to complete, showing progress",
|
||||
help="Wait or don't wait for task to complete, showing progress if waiting",
|
||||
)
|
||||
@confirm_opt(
|
||||
"Storage benchmarks take approximately 10 minutes to run and generate significant load on the cluster; they should be run sparingly. Continue"
|
||||
)
|
||||
def cli_storage_benchmark_run(pool, wait_flag):
|
||||
def cli_storage_benchmark_run(pool, name, wait_flag):
|
||||
"""
|
||||
Run a storage benchmark on POOL in the background.
|
||||
"""
|
||||
|
||||
retcode, retmsg = pvc.lib.storage.ceph_benchmark_run(CLI_CONFIG, pool, wait_flag)
|
||||
retcode, retmsg = pvc.lib.storage.ceph_benchmark_run(
|
||||
CLI_CONFIG, pool, name, wait_flag
|
||||
)
|
||||
|
||||
if retcode and wait_flag:
|
||||
retmsg = wait_for_celery_task(CLI_CONFIG, retmsg)
|
||||
|
@ -3536,7 +3859,7 @@ def cli_storage_osd():
|
|||
is_flag=True,
|
||||
default=True,
|
||||
show_default=True,
|
||||
help="Wait or don't wait for task to complete, showing progress",
|
||||
help="Wait or don't wait for task to complete, showing progress if waiting",
|
||||
)
|
||||
@confirm_opt(
|
||||
"Destroy all data on and create a new OSD database volume group on node {node} device {device}"
|
||||
|
@ -3552,8 +3875,6 @@ def cli_storage_osd_create_db_vg(node, device, wait_flag):
|
|||
Only one OSD database volume group on a single physical device, named "osd-db", is supported per node, so it must be fast and large enough to act as an effective OSD database device for all OSDs on the node. Attempting to add additional database volume groups after the first will result in an error.
|
||||
|
||||
WARNING: If the OSD database device fails, all OSDs on the node using it will be lost and must be recreated.
|
||||
|
||||
A "detect" string is a string in the form "detect:<NAME>:<HUMAN-SIZE>:<ID>". Detect strings allow for automatic determination of Linux block device paths from known basic information about disks by leveraging "lsscsi" on the target host. The "NAME" should be some descriptive identifier, for instance the manufacturer (e.g. "INTEL"), the "HUMAN-SIZE" should be the labeled human-readable size of the device (e.g. "480GB", "1.92TB"), and "ID" specifies the Nth 0-indexed device which matches the "NAME" and "HUMAN-SIZE" values (e.g. "2" would match the third device with the corresponding "NAME" and "HUMAN-SIZE"). When matching against sizes, there is +/- 3% flexibility to account for base-1000 vs. base-1024 differences and rounding errors. The "NAME" may contain whitespace but if so the entire detect string should be quoted, and is case-insensitive. More information about detect strings can be found in the manual.
|
||||
"""
|
||||
|
||||
retcode, retmsg = pvc.lib.storage.ceph_osd_db_vg_add(
|
||||
|
@ -3611,7 +3932,7 @@ def cli_storage_osd_create_db_vg(node, device, wait_flag):
|
|||
is_flag=True,
|
||||
default=True,
|
||||
show_default=True,
|
||||
help="Wait or don't wait for task to complete, showing progress",
|
||||
help="Wait or don't wait for task to complete, showing progress if waiting",
|
||||
)
|
||||
@confirm_opt("Destroy all data on and create new OSD(s) on node {node} device {device}")
|
||||
def cli_storage_osd_add(
|
||||
|
@ -3622,7 +3943,7 @@ def cli_storage_osd_add(
|
|||
|
||||
DEVICE must be a valid block device path (e.g. '/dev/nvme0n1', '/dev/disk/by-path/...') or a "detect" string. Partitions are NOT supported. A "detect" string is a string in the form "detect:<NAME>:<HUMAN-SIZE>:<ID>". The path or detect string must be valid on the current node housing the OSD.
|
||||
|
||||
A "detect" string is a string in the form "detect:<NAME>:<HUMAN-SIZE>:<ID>". Detect strings allow for automatic determination of Linux block device paths from known basic information about disks by leveraging "lsscsi" on the target host. The "NAME" should be some descriptive identifier, for instance the manufacturer (e.g. "INTEL"), the "HUMAN-SIZE" should be the labeled human-readable size of the device (e.g. "480GB", "1.92TB"), and "ID" specifies the Nth 0-indexed device which matches the "NAME" and "HUMAN-SIZE" values (e.g. "2" would match the third device with the corresponding "NAME" and "HUMAN-SIZE"). When matching against sizes, there is +/- 3% flexibility to account for base-1000 vs. base-1024 differences and rounding errors. The "NAME" may contain whitespace but if so the entire detect string should be quoted, and is case-insensitive. More information about detect strings can be found in the pvcbootstrapd manual.
|
||||
A "detect" string is a string in the form "detect:<NAME>:<HUMAN-SIZE>:<ID>". Detect strings allow for automatic determination of Linux block device paths from known basic information about disks by leveraging "lsscsi"/"nvme" on the target host. The "NAME" should be some descriptive identifier that would be part of the device's Model information, for instance the manufacturer (e.g. "INTEL") or a similar unique string (e.g. "BOSS" for Dell BOSS cards); the "HUMAN-SIZE" should be the labeled human-readable size of the device (e.g. "480GB", "1.92TB"); and "ID" specifies the Nth 0-indexed device which matches the "NAME" and "HUMAN-SIZE" values (e.g. "2" would match the third device with the corresponding "NAME" and "HUMAN-SIZE"). When matching against sizes, there is +/- 3% flexibility to account for base-1000 vs. base-1024 differences and rounding errors. The "NAME" may contain whitespace but if so the entire detect string should be quoted, and is case-insensitive. More information about detect strings can be found in the pvcbootstrapd manual.
|
||||
|
||||
The weight of an OSD should reflect the ratio of the size of the OSD to the other OSDs in the storage cluster. For example, with a 200GB disk and a 400GB disk in each node, the 400GB disk should have twice the weight as the 200GB disk. For more information about CRUSH weights, please see the Ceph documentation.
|
||||
|
||||
|
@ -3694,7 +4015,7 @@ def cli_storage_osd_add(
|
|||
is_flag=True,
|
||||
default=True,
|
||||
show_default=True,
|
||||
help="Wait or don't wait for task to complete, showing progress",
|
||||
help="Wait or don't wait for task to complete, showing progress if waiting",
|
||||
)
|
||||
@confirm_opt(
|
||||
"Destroy all data on and replace OSD {osdid} (and peer split OSDs) with new device {new_device}"
|
||||
|
@ -3749,7 +4070,7 @@ def cli_storage_osd_replace(
|
|||
is_flag=True,
|
||||
default=True,
|
||||
show_default=True,
|
||||
help="Wait or don't wait for task to complete, showing progress",
|
||||
help="Wait or don't wait for task to complete, showing progress if waiting",
|
||||
)
|
||||
@confirm_opt("Refresh OSD {osdid} (and peer split OSDs) on device {device}")
|
||||
def cli_storage_osd_refresh(osdid, device, wait_flag):
|
||||
|
@ -3794,7 +4115,7 @@ def cli_storage_osd_refresh(osdid, device, wait_flag):
|
|||
is_flag=True,
|
||||
default=True,
|
||||
show_default=True,
|
||||
help="Wait or don't wait for task to complete, showing progress",
|
||||
help="Wait or don't wait for task to complete, showing progress if waiting",
|
||||
)
|
||||
@confirm_opt("Remove and destroy data on OSD {osdid}")
|
||||
def cli_storage_osd_remove(osdid, force_flag, wait_flag):
|
||||
|
@ -4061,12 +4382,26 @@ def cli_storage_volume():
|
|||
@click.argument("pool")
|
||||
@click.argument("name")
|
||||
@click.argument("size")
|
||||
def cli_storage_volume_add(pool, name, size):
|
||||
@click.option(
|
||||
"-f",
|
||||
"--force",
|
||||
"force_flag",
|
||||
is_flag=True,
|
||||
default=False,
|
||||
help="Force creation even if volume would violate 80% full safe free space.",
|
||||
)
|
||||
def cli_storage_volume_add(pool, name, size, force_flag):
|
||||
"""
|
||||
Add a new Ceph RBD volume in pool POOL with name NAME and size SIZE (in human units, e.g. 1024M, 20G, etc.).
|
||||
|
||||
PVC will prevent the creation of a volume who's size is greater than the available free space on the pool. This cannot be overridden.
|
||||
|
||||
PVC will prevent the creation of a volume who's size is greater than the 80% full safe free space on the pool. This can be overridden with the "-f"/"--force" option but this may be dangerous!
|
||||
"""
|
||||
|
||||
retcode, retmsg = pvc.lib.storage.ceph_volume_add(CLI_CONFIG, pool, name, size)
|
||||
retcode, retmsg = pvc.lib.storage.ceph_volume_add(
|
||||
CLI_CONFIG, pool, name, size, force_flag=force_flag
|
||||
)
|
||||
finish(retcode, retmsg)
|
||||
|
||||
|
||||
|
@ -4132,14 +4467,26 @@ def cli_storage_volume_remove(pool, name):
|
|||
@click.argument("pool")
|
||||
@click.argument("name")
|
||||
@click.argument("size")
|
||||
@click.option(
|
||||
"-f",
|
||||
"--force",
|
||||
"force_flag",
|
||||
is_flag=True,
|
||||
default=False,
|
||||
help="Force resize even if volume would violate 80% full safe free space.",
|
||||
)
|
||||
@confirm_opt("Resize volume {name} in pool {pool} to size {size}")
|
||||
def cli_storage_volume_resize(pool, name, size):
|
||||
def cli_storage_volume_resize(pool, name, size, force_flag):
|
||||
"""
|
||||
Resize an existing Ceph RBD volume with name NAME in pool POOL to size SIZE (in human units, e.g. 1024M, 20G, etc.).
|
||||
|
||||
PVC will prevent the resize of a volume who's new size is greater than the available free space on the pool. This cannot be overridden.
|
||||
|
||||
PVC will prevent the resize of a volume who's new size is greater than the 80% full safe free space on the pool. This can be overridden with the "-f"/"--force" option but this may be dangerous!
|
||||
"""
|
||||
|
||||
retcode, retmsg = pvc.lib.storage.ceph_volume_modify(
|
||||
CLI_CONFIG, pool, name, new_size=size
|
||||
CLI_CONFIG, pool, name, new_size=size, force_flag=force_flag
|
||||
)
|
||||
finish(retcode, retmsg)
|
||||
|
||||
|
@ -4172,13 +4519,25 @@ def cli_storage_volume_rename(pool, name, new_name):
|
|||
@click.argument("pool")
|
||||
@click.argument("name")
|
||||
@click.argument("new_name")
|
||||
def cli_storage_volume_clone(pool, name, new_name):
|
||||
@click.option(
|
||||
"-f",
|
||||
"--force",
|
||||
"force_flag",
|
||||
is_flag=True,
|
||||
default=False,
|
||||
help="Force clone even if volume would violate 80% full safe free space.",
|
||||
)
|
||||
def cli_storage_volume_clone(pool, name, new_name, force_flag):
|
||||
"""
|
||||
Clone a Ceph RBD volume with name NAME in pool POOL to name NEW_NAME in pool POOL.
|
||||
|
||||
PVC will prevent the clone of a volume who's new size is greater than the available free space on the pool. This cannot be overridden.
|
||||
|
||||
PVC will prevent the clone of a volume who's new size is greater than the 80% full safe free space on the pool. This can be overridden with the "-f"/"--force" option but this may be dangerous!
|
||||
"""
|
||||
|
||||
retcode, retmsg = pvc.lib.storage.ceph_volume_clone(
|
||||
CLI_CONFIG, pool, name, new_name
|
||||
CLI_CONFIG, pool, name, new_name, force_flag=force_flag
|
||||
)
|
||||
finish(retcode, retmsg)
|
||||
|
||||
|
@ -4246,6 +4605,10 @@ def cli_storage_volume_snapshot():
|
|||
def cli_storage_volume_snapshot_add(pool, volume, name):
|
||||
"""
|
||||
Add a snapshot with name NAME of Ceph RBD volume VOLUME in pool POOL.
|
||||
|
||||
WARNING: RBD snapshots are crash-consistent but not filesystem-aware. If a snapshot was taken
|
||||
of a running VM, restoring that snapshot will be equivalent to having forcibly restarted the
|
||||
VM at the moment of the snapshot.
|
||||
"""
|
||||
|
||||
retcode, retmsg = pvc.lib.storage.ceph_snapshot_add(CLI_CONFIG, pool, volume, name)
|
||||
|
@ -4293,6 +4656,36 @@ def cli_storage_volume_snapshot_remove(pool, volume, name):
|
|||
finish(retcode, retmsg)
|
||||
|
||||
|
||||
###############################################################################
|
||||
# > pvc storage volume snapshot rollback
|
||||
###############################################################################
|
||||
@click.command(name="rollback", short_help="Roll back RBD volume to snapshot.")
|
||||
@connection_req
|
||||
@click.argument("pool")
|
||||
@click.argument("volume")
|
||||
@click.argument("name")
|
||||
@confirm_opt("Roll back to snapshot {name} for volume {pool}/{volume}")
|
||||
def cli_storage_volume_snapshot_rollback(pool, volume, name):
|
||||
"""
|
||||
Roll back the Ceph RBD volume VOLUME in pool POOL to the snapshot NAME.
|
||||
|
||||
DANGER: All data written to the volume since the given snapshot will be permanently lost.
|
||||
|
||||
WARNING: A rollback cannot be performed on an RBD volume with active I/O. Doing so will cause
|
||||
undefined behaviour and possible corruption. Ensure that any VM(s) using this RBD volume are
|
||||
stopped or disabled before attempting a snapshot rollback.
|
||||
|
||||
WARNING: RBD snapshots are crash-consistent but not filesystem-aware. If a snapshot was taken
|
||||
of a running VM, restoring that snapshot will be equivalent to having forcibly restarted the
|
||||
VM at the moment of the snapshot.
|
||||
"""
|
||||
|
||||
retcode, retmsg = pvc.lib.storage.ceph_snapshot_rollback(
|
||||
CLI_CONFIG, pool, volume, name
|
||||
)
|
||||
finish(retcode, retmsg)
|
||||
|
||||
|
||||
###############################################################################
|
||||
# > pvc storage volume snapshot list
|
||||
###############################################################################
|
||||
|
@ -4445,6 +4838,13 @@ def cli_provisioner_template_system():
|
|||
default=None, # Use cluster default
|
||||
help="The preferred migration method of the VM between nodes",
|
||||
)
|
||||
@click.option(
|
||||
"--max-downtime",
|
||||
"migration_max_downtime",
|
||||
default=300,
|
||||
show_default=True,
|
||||
help="The maximum time in milliseconds that a VM can be down for during a live migration; busy VMs may require a larger downtime.",
|
||||
)
|
||||
def cli_provisioner_template_system_add(
|
||||
name,
|
||||
vcpus,
|
||||
|
@ -4456,11 +4856,12 @@ def cli_provisioner_template_system_add(
|
|||
node_selector,
|
||||
node_autostart,
|
||||
migration_method,
|
||||
migration_max_downtime,
|
||||
):
|
||||
"""
|
||||
Add a new system template NAME to the PVC cluster provisioner.
|
||||
|
||||
For details on the possible "--node-selector" values, please see help for the command "pvc vm define".
|
||||
For details on the possible option values, please see help for the command "pvc vm define".
|
||||
"""
|
||||
params = dict()
|
||||
params["name"] = name
|
||||
|
@ -4478,6 +4879,8 @@ def cli_provisioner_template_system_add(
|
|||
params["node_autostart"] = node_autostart
|
||||
if migration_method:
|
||||
params["migration_method"] = migration_method
|
||||
if migration_max_downtime:
|
||||
params["migration_max_downtime"] = migration_max_downtime
|
||||
|
||||
retcode, retdata = pvc.lib.provisioner.template_add(
|
||||
CLI_CONFIG, params, template_type="system"
|
||||
|
@ -4540,6 +4943,12 @@ def cli_provisioner_template_system_add(
|
|||
default=None, # Use cluster default
|
||||
help="The preferred migration method of the VM between nodes",
|
||||
)
|
||||
@click.option(
|
||||
"--max-downtime",
|
||||
"migration_max_downtime",
|
||||
default=None,
|
||||
help="The maximum time in milliseconds that a VM can be down for during a live migration; busy VMs may require a larger downtime.",
|
||||
)
|
||||
def cli_provisioner_template_system_modify(
|
||||
name,
|
||||
vcpus,
|
||||
|
@ -4551,11 +4960,12 @@ def cli_provisioner_template_system_modify(
|
|||
node_selector,
|
||||
node_autostart,
|
||||
migration_method,
|
||||
migration_max_downtime,
|
||||
):
|
||||
"""
|
||||
Add a new system template NAME to the PVC cluster provisioner.
|
||||
|
||||
For details on the possible "--node-selector" values, please see help for the command "pvc vm define".
|
||||
For details on the possible option values, please see help for the command "pvc vm define".
|
||||
"""
|
||||
params = dict()
|
||||
params["vcpus"] = vcpus
|
||||
|
@ -4567,6 +4977,7 @@ def cli_provisioner_template_system_modify(
|
|||
params["node_selector"] = node_selector
|
||||
params["node_autostart"] = node_autostart
|
||||
params["migration_method"] = migration_method
|
||||
params["migration_max_downtime"] = migration_max_downtime
|
||||
|
||||
retcode, retdata = pvc.lib.provisioner.template_modify(
|
||||
CLI_CONFIG, params, name, template_type="system"
|
||||
|
@ -4754,13 +5165,27 @@ def cli_provisioner_template_network_vni():
|
|||
@connection_req
|
||||
@click.argument("name")
|
||||
@click.argument("vni")
|
||||
def cli_provisioner_template_network_vni_add(name, vni):
|
||||
@click.option(
|
||||
"-d",
|
||||
"--permit-duplicate",
|
||||
"permit_duplicate_flag",
|
||||
is_flag=True,
|
||||
default=False,
|
||||
help="Permit a duplicate VNI if one already exists",
|
||||
)
|
||||
def cli_provisioner_template_network_vni_add(name, vni, permit_duplicate_flag):
|
||||
"""
|
||||
Add a new network VNI to network template NAME.
|
||||
|
||||
Networks will be added to VMs in the order they are added and displayed within the template.
|
||||
|
||||
NOTE: Normally, the API prevents duplicate VNIs from being added to the same network template
|
||||
by returning an error, as this requirement is very niche. If you do not desire this behaviour,
|
||||
use the "-d"/"--permit-duplicate" option to bypass the check.
|
||||
"""
|
||||
params = dict()
|
||||
if permit_duplicate_flag:
|
||||
params["permit_duplicate"] = True
|
||||
|
||||
retcode, retdata = pvc.lib.provisioner.template_element_add(
|
||||
CLI_CONFIG, name, vni, params, element_type="net", template_type="network"
|
||||
|
@ -5742,7 +6167,7 @@ def cli_provisioner_profile_list(limit, format_function):
|
|||
is_flag=True,
|
||||
default=True,
|
||||
show_default=True,
|
||||
help="Wait or don't wait for task to complete, showing progress",
|
||||
help="Wait or don't wait for task to complete, showing progress if waiting",
|
||||
)
|
||||
def cli_provisioner_create(
|
||||
name, profile, define_flag, start_flag, script_args, wait_flag
|
||||
|
@ -6134,6 +6559,7 @@ def cli(
|
|||
# Click command tree
|
||||
###############################################################################
|
||||
|
||||
cli_node.add_command(cli_node_is_primary)
|
||||
cli_node.add_command(cli_node_primary)
|
||||
cli_node.add_command(cli_node_secondary)
|
||||
cli_node.add_command(cli_node_flush)
|
||||
|
@ -6157,6 +6583,12 @@ cli_vm.add_command(cli_vm_move)
|
|||
cli_vm.add_command(cli_vm_migrate)
|
||||
cli_vm.add_command(cli_vm_unmigrate)
|
||||
cli_vm.add_command(cli_vm_flush_locks)
|
||||
cli_vm_snapshot.add_command(cli_vm_snapshot_create)
|
||||
cli_vm_snapshot.add_command(cli_vm_snapshot_remove)
|
||||
cli_vm_snapshot.add_command(cli_vm_snapshot_rollback)
|
||||
cli_vm_snapshot.add_command(cli_vm_snapshot_export)
|
||||
cli_vm_snapshot.add_command(cli_vm_snapshot_import)
|
||||
cli_vm.add_command(cli_vm_snapshot)
|
||||
cli_vm_backup.add_command(cli_vm_backup_create)
|
||||
cli_vm_backup.add_command(cli_vm_backup_restore)
|
||||
cli_vm_backup.add_command(cli_vm_backup_remove)
|
||||
|
@ -6238,6 +6670,7 @@ cli_storage_volume.add_command(cli_storage_volume_list)
|
|||
cli_storage_volume_snapshot.add_command(cli_storage_volume_snapshot_add)
|
||||
cli_storage_volume_snapshot.add_command(cli_storage_volume_snapshot_rename)
|
||||
cli_storage_volume_snapshot.add_command(cli_storage_volume_snapshot_remove)
|
||||
cli_storage_volume_snapshot.add_command(cli_storage_volume_snapshot_rollback)
|
||||
cli_storage_volume_snapshot.add_command(cli_storage_volume_snapshot_list)
|
||||
cli_storage_volume.add_command(cli_storage_volume_snapshot)
|
||||
cli_storage.add_command(cli_storage_volume)
|
||||
|
|
|
@ -83,6 +83,37 @@ def cli_cluster_status_format_pretty(CLI_CONFIG, data):
|
|||
total_volumes = data.get("volumes", 0)
|
||||
total_snapshots = data.get("snapshots", 0)
|
||||
|
||||
total_cpu_total = data.get("resources", {}).get("cpu", {}).get("total", 0)
|
||||
total_cpu_load = data.get("resources", {}).get("cpu", {}).get("load", 0)
|
||||
total_cpu_utilization = (
|
||||
data.get("resources", {}).get("cpu", {}).get("utilization", 0)
|
||||
)
|
||||
total_cpu_string = (
|
||||
f"{total_cpu_utilization:.1f}% ({total_cpu_load:.1f} / {total_cpu_total})"
|
||||
)
|
||||
|
||||
total_memory_total = (
|
||||
data.get("resources", {}).get("memory", {}).get("total", 0) / 1024
|
||||
)
|
||||
total_memory_used = (
|
||||
data.get("resources", {}).get("memory", {}).get("used", 0) / 1024
|
||||
)
|
||||
total_memory_utilization = (
|
||||
data.get("resources", {}).get("memory", {}).get("utilization", 0)
|
||||
)
|
||||
total_memory_string = f"{total_memory_utilization:.1f}% ({total_memory_used:.1f} GB / {total_memory_total:.1f} GB)"
|
||||
|
||||
total_disk_total = (
|
||||
data.get("resources", {}).get("disk", {}).get("total", 0) / 1024 / 1024
|
||||
)
|
||||
total_disk_used = (
|
||||
data.get("resources", {}).get("disk", {}).get("used", 0) / 1024 / 1024
|
||||
)
|
||||
total_disk_utilization = round(
|
||||
data.get("resources", {}).get("disk", {}).get("utilization", 0)
|
||||
)
|
||||
total_disk_string = f"{total_disk_utilization:.1f}% ({total_disk_used:.1f} GB / {total_disk_total:.1f} GB)"
|
||||
|
||||
if maintenance == "true" or health == -1:
|
||||
health_colour = ansii["blue"]
|
||||
elif health > 90:
|
||||
|
@ -94,12 +125,9 @@ def cli_cluster_status_format_pretty(CLI_CONFIG, data):
|
|||
|
||||
output = list()
|
||||
|
||||
output.append(f"{ansii['bold']}PVC cluster status:{ansii['end']}")
|
||||
output.append("")
|
||||
|
||||
output.append(f"{ansii['purple']}Primary node:{ansii['end']} {primary_node}")
|
||||
output.append(f"{ansii['purple']}PVC version:{ansii['end']} {pvc_version}")
|
||||
output.append(f"{ansii['purple']}Upstream IP:{ansii['end']} {upstream_ip}")
|
||||
output.append(f"{ansii['purple']}Primary node:{ansii['end']} {primary_node}")
|
||||
output.append(f"{ansii['purple']}PVC version:{ansii['end']} {pvc_version}")
|
||||
output.append(f"{ansii['purple']}Upstream IP:{ansii['end']} {upstream_ip}")
|
||||
output.append("")
|
||||
|
||||
if health != "-1":
|
||||
|
@ -111,7 +139,7 @@ def cli_cluster_status_format_pretty(CLI_CONFIG, data):
|
|||
health = f"{health} (maintenance on)"
|
||||
|
||||
output.append(
|
||||
f"{ansii['purple']}Health:{ansii['end']} {health_colour}{health}{ansii['end']}"
|
||||
f"{ansii['purple']}Health:{ansii['end']} {health_colour}{health}{ansii['end']}"
|
||||
)
|
||||
|
||||
if messages is not None and len(messages) > 0:
|
||||
|
@ -136,7 +164,17 @@ def cli_cluster_status_format_pretty(CLI_CONFIG, data):
|
|||
)
|
||||
|
||||
messages = "\n ".join(message_list)
|
||||
output.append(f"{ansii['purple']}Active Faults:{ansii['end']} {messages}")
|
||||
else:
|
||||
messages = "None"
|
||||
output.append(f"{ansii['purple']}Active faults:{ansii['end']} {messages}")
|
||||
|
||||
output.append(f"{ansii['purple']}Total CPU:{ansii['end']} {total_cpu_string}")
|
||||
|
||||
output.append(
|
||||
f"{ansii['purple']}Total memory:{ansii['end']} {total_memory_string}"
|
||||
)
|
||||
|
||||
output.append(f"{ansii['purple']}Total disk:{ansii['end']} {total_disk_string}")
|
||||
|
||||
output.append("")
|
||||
|
||||
|
@ -166,7 +204,7 @@ def cli_cluster_status_format_pretty(CLI_CONFIG, data):
|
|||
|
||||
nodes_string = ", ".join(nodes_strings)
|
||||
|
||||
output.append(f"{ansii['purple']}Nodes:{ansii['end']} {nodes_string}")
|
||||
output.append(f"{ansii['purple']}Nodes:{ansii['end']} {nodes_string}")
|
||||
|
||||
vm_states = ["start", "disable"]
|
||||
vm_states.extend(
|
||||
|
@ -196,7 +234,7 @@ def cli_cluster_status_format_pretty(CLI_CONFIG, data):
|
|||
|
||||
vms_string = ", ".join(vms_strings)
|
||||
|
||||
output.append(f"{ansii['purple']}VMs:{ansii['end']} {vms_string}")
|
||||
output.append(f"{ansii['purple']}VMs:{ansii['end']} {vms_string}")
|
||||
|
||||
osd_states = ["up,in"]
|
||||
osd_states.extend(
|
||||
|
@ -222,15 +260,15 @@ def cli_cluster_status_format_pretty(CLI_CONFIG, data):
|
|||
|
||||
osds_string = " ".join(osds_strings)
|
||||
|
||||
output.append(f"{ansii['purple']}OSDs:{ansii['end']} {osds_string}")
|
||||
output.append(f"{ansii['purple']}OSDs:{ansii['end']} {osds_string}")
|
||||
|
||||
output.append(f"{ansii['purple']}Pools:{ansii['end']} {total_pools}")
|
||||
output.append(f"{ansii['purple']}Pools:{ansii['end']} {total_pools}")
|
||||
|
||||
output.append(f"{ansii['purple']}Volumes:{ansii['end']} {total_volumes}")
|
||||
output.append(f"{ansii['purple']}Volumes:{ansii['end']} {total_volumes}")
|
||||
|
||||
output.append(f"{ansii['purple']}Snapshots:{ansii['end']} {total_snapshots}")
|
||||
output.append(f"{ansii['purple']}Snapshots:{ansii['end']} {total_snapshots}")
|
||||
|
||||
output.append(f"{ansii['purple']}Networks:{ansii['end']} {total_networks}")
|
||||
output.append(f"{ansii['purple']}Networks:{ansii['end']} {total_networks}")
|
||||
|
||||
output.append("")
|
||||
|
||||
|
@ -258,9 +296,6 @@ def cli_cluster_status_format_short(CLI_CONFIG, data):
|
|||
|
||||
output = list()
|
||||
|
||||
output.append(f"{ansii['bold']}PVC cluster status:{ansii['end']}")
|
||||
output.append("")
|
||||
|
||||
if health != "-1":
|
||||
health = f"{health}%"
|
||||
else:
|
||||
|
@ -270,7 +305,7 @@ def cli_cluster_status_format_short(CLI_CONFIG, data):
|
|||
health = f"{health} (maintenance on)"
|
||||
|
||||
output.append(
|
||||
f"{ansii['purple']}Health:{ansii['end']} {health_colour}{health}{ansii['end']}"
|
||||
f"{ansii['purple']}Health:{ansii['end']} {health_colour}{health}{ansii['end']}"
|
||||
)
|
||||
|
||||
if messages is not None and len(messages) > 0:
|
||||
|
@ -295,7 +330,48 @@ def cli_cluster_status_format_short(CLI_CONFIG, data):
|
|||
)
|
||||
|
||||
messages = "\n ".join(message_list)
|
||||
output.append(f"{ansii['purple']}Active Faults:{ansii['end']} {messages}")
|
||||
else:
|
||||
messages = "None"
|
||||
output.append(f"{ansii['purple']}Active faults:{ansii['end']} {messages}")
|
||||
|
||||
total_cpu_total = data.get("resources", {}).get("cpu", {}).get("total", 0)
|
||||
total_cpu_load = data.get("resources", {}).get("cpu", {}).get("load", 0)
|
||||
total_cpu_utilization = (
|
||||
data.get("resources", {}).get("cpu", {}).get("utilization", 0)
|
||||
)
|
||||
total_cpu_string = (
|
||||
f"{total_cpu_utilization:.1f}% ({total_cpu_load:.1f} / {total_cpu_total})"
|
||||
)
|
||||
|
||||
total_memory_total = (
|
||||
data.get("resources", {}).get("memory", {}).get("total", 0) / 1024
|
||||
)
|
||||
total_memory_used = (
|
||||
data.get("resources", {}).get("memory", {}).get("used", 0) / 1024
|
||||
)
|
||||
total_memory_utilization = (
|
||||
data.get("resources", {}).get("memory", {}).get("utilization", 0)
|
||||
)
|
||||
total_memory_string = f"{total_memory_utilization:.1f}% ({total_memory_used:.1f} GB / {total_memory_total:.1f} GB)"
|
||||
|
||||
total_disk_total = (
|
||||
data.get("resources", {}).get("disk", {}).get("total", 0) / 1024 / 1024
|
||||
)
|
||||
total_disk_used = (
|
||||
data.get("resources", {}).get("disk", {}).get("used", 0) / 1024 / 1024
|
||||
)
|
||||
total_disk_utilization = round(
|
||||
data.get("resources", {}).get("disk", {}).get("utilization", 0)
|
||||
)
|
||||
total_disk_string = f"{total_disk_utilization:.1f}% ({total_disk_used:.1f} GB / {total_disk_total:.1f} GB)"
|
||||
|
||||
output.append(f"{ansii['purple']}CPU usage:{ansii['end']} {total_cpu_string}")
|
||||
|
||||
output.append(
|
||||
f"{ansii['purple']}Memory usage:{ansii['end']} {total_memory_string}"
|
||||
)
|
||||
|
||||
output.append(f"{ansii['purple']}Disk usage:{ansii['end']} {total_disk_string}")
|
||||
|
||||
output.append("")
|
||||
|
||||
|
@ -580,9 +656,11 @@ def cli_cluster_fault_list_format_long(CLI_CONFIG, fault_data):
|
|||
fault_id=fault["id"],
|
||||
fault_status=fault["status"].title(),
|
||||
fault_health_delta=f"-{fault['health_delta']}%",
|
||||
fault_acknowledged_at=fault["acknowledged_at"]
|
||||
if fault["acknowledged_at"] != ""
|
||||
else "N/A",
|
||||
fault_acknowledged_at=(
|
||||
fault["acknowledged_at"]
|
||||
if fault["acknowledged_at"] != ""
|
||||
else "N/A"
|
||||
),
|
||||
fault_last_reported=fault["last_reported"],
|
||||
fault_first_reported=fault["first_reported"],
|
||||
)
|
||||
|
@ -645,6 +723,24 @@ def cli_cluster_task_format_pretty(CLI_CONFIG, task_data):
|
|||
if _task_type_length > task_type_length:
|
||||
task_type_length = _task_type_length
|
||||
|
||||
for arg_name, arg_data in task["kwargs"].items():
|
||||
# Skip the "run_on" argument
|
||||
if arg_name == "run_on":
|
||||
continue
|
||||
|
||||
# task_arg_name column
|
||||
_task_arg_name_length = len(str(arg_name)) + 1
|
||||
if _task_arg_name_length > task_arg_name_length:
|
||||
task_arg_name_length = _task_arg_name_length
|
||||
|
||||
task_header_length = (
|
||||
task_id_length + task_name_length + task_type_length + task_worker_length + 3
|
||||
)
|
||||
max_task_data_length = (
|
||||
MAX_CONTENT_WIDTH - task_header_length - task_arg_name_length - 2
|
||||
)
|
||||
|
||||
for task in task_data:
|
||||
updated_kwargs = list()
|
||||
for arg_name, arg_data in task["kwargs"].items():
|
||||
# Skip the "run_on" argument
|
||||
|
@ -656,15 +752,30 @@ def cli_cluster_task_format_pretty(CLI_CONFIG, task_data):
|
|||
if _task_arg_name_length > task_arg_name_length:
|
||||
task_arg_name_length = _task_arg_name_length
|
||||
|
||||
if len(str(arg_data)) > 17:
|
||||
arg_data = arg_data[:17] + "..."
|
||||
if isinstance(arg_data, list):
|
||||
for subarg_data in arg_data:
|
||||
if len(subarg_data) > max_task_data_length:
|
||||
subarg_data = (
|
||||
str(subarg_data[: max_task_data_length - 4]) + " ..."
|
||||
)
|
||||
|
||||
# task_arg_data column
|
||||
_task_arg_data_length = len(str(arg_data)) + 1
|
||||
if _task_arg_data_length > task_arg_data_length:
|
||||
task_arg_data_length = _task_arg_data_length
|
||||
# task_arg_data column
|
||||
_task_arg_data_length = len(str(subarg_data)) + 1
|
||||
if _task_arg_data_length > task_arg_data_length:
|
||||
task_arg_data_length = _task_arg_data_length
|
||||
|
||||
updated_kwargs.append({"name": arg_name, "data": subarg_data})
|
||||
else:
|
||||
if len(str(arg_data)) > 24:
|
||||
arg_data = str(arg_data[:24]) + " ..."
|
||||
|
||||
# task_arg_data column
|
||||
_task_arg_data_length = len(str(arg_data)) + 1
|
||||
if _task_arg_data_length > task_arg_data_length:
|
||||
task_arg_data_length = _task_arg_data_length
|
||||
|
||||
updated_kwargs.append({"name": arg_name, "data": arg_data})
|
||||
|
||||
updated_kwargs.append({"name": arg_name, "data": arg_data})
|
||||
task["kwargs"] = updated_kwargs
|
||||
tasks.append(task)
|
||||
|
||||
|
|
|
@ -20,25 +20,16 @@
|
|||
###############################################################################
|
||||
|
||||
from click import echo as click_echo
|
||||
from click import confirm
|
||||
from datetime import datetime
|
||||
from distutils.util import strtobool
|
||||
from getpass import getuser
|
||||
from json import load as jload
|
||||
from json import dump as jdump
|
||||
from os import chmod, environ, getpid, path, makedirs, get_terminal_size
|
||||
from re import findall
|
||||
from os import chmod, environ, getpid, path, get_terminal_size
|
||||
from socket import gethostname
|
||||
from subprocess import run, PIPE
|
||||
from sys import argv
|
||||
from syslog import syslog, openlog, closelog, LOG_AUTH
|
||||
from yaml import load as yload
|
||||
from yaml import SafeLoader
|
||||
|
||||
import pvc.lib.provisioner
|
||||
import pvc.lib.vm
|
||||
import pvc.lib.node
|
||||
|
||||
|
||||
DEFAULT_STORE_DATA = {"cfgfile": "/etc/pvc/pvc.conf"}
|
||||
DEFAULT_STORE_FILENAME = "pvc.json"
|
||||
|
@ -195,322 +186,3 @@ def update_store(store_path, store_data):
|
|||
|
||||
with open(store_file, "w") as fh:
|
||||
jdump(store_data, fh, sort_keys=True, indent=4)
|
||||
|
||||
|
||||
def get_autobackup_config(CLI_CONFIG, cfgfile):
|
||||
try:
|
||||
config = dict()
|
||||
with open(cfgfile) as fh:
|
||||
backup_config = yload(fh, Loader=SafeLoader)["autobackup"]
|
||||
|
||||
config["backup_root_path"] = backup_config["backup_root_path"]
|
||||
config["backup_root_suffix"] = backup_config["backup_root_suffix"]
|
||||
config["backup_tags"] = backup_config["backup_tags"]
|
||||
config["backup_schedule"] = backup_config["backup_schedule"]
|
||||
config["auto_mount_enabled"] = backup_config["auto_mount"]["enabled"]
|
||||
if config["auto_mount_enabled"]:
|
||||
config["mount_cmds"] = list()
|
||||
_mount_cmds = backup_config["auto_mount"]["mount_cmds"]
|
||||
for _mount_cmd in _mount_cmds:
|
||||
if "{backup_root_path}" in _mount_cmd:
|
||||
_mount_cmd = _mount_cmd.format(
|
||||
backup_root_path=backup_config["backup_root_path"]
|
||||
)
|
||||
config["mount_cmds"].append(_mount_cmd)
|
||||
|
||||
config["unmount_cmds"] = list()
|
||||
_unmount_cmds = backup_config["auto_mount"]["unmount_cmds"]
|
||||
for _unmount_cmd in _unmount_cmds:
|
||||
if "{backup_root_path}" in _unmount_cmd:
|
||||
_unmount_cmd = _unmount_cmd.format(
|
||||
backup_root_path=backup_config["backup_root_path"]
|
||||
)
|
||||
config["unmount_cmds"].append(_unmount_cmd)
|
||||
|
||||
except FileNotFoundError:
|
||||
echo(CLI_CONFIG, "ERROR: Specified backup configuration does not exist!")
|
||||
exit(1)
|
||||
except KeyError as e:
|
||||
echo(CLI_CONFIG, f"ERROR: Backup configuration is invalid: {e}")
|
||||
exit(1)
|
||||
|
||||
return config
|
||||
|
||||
|
||||
def vm_autobackup(
|
||||
CLI_CONFIG,
|
||||
autobackup_cfgfile=DEFAULT_AUTOBACKUP_FILENAME,
|
||||
force_full_flag=False,
|
||||
cron_flag=False,
|
||||
):
|
||||
"""
|
||||
Perform automatic backups of VMs based on an external config file.
|
||||
"""
|
||||
|
||||
# Validate that we are running on the current primary coordinator of the 'local' cluster connection
|
||||
real_connection = CLI_CONFIG["connection"]
|
||||
CLI_CONFIG["connection"] = "local"
|
||||
retcode, retdata = pvc.lib.node.node_info(CLI_CONFIG, DEFAULT_NODE_HOSTNAME)
|
||||
if not retcode or retdata.get("coordinator_state") != "primary":
|
||||
if cron_flag:
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
"Current host is not the primary coordinator of the local cluster and running in cron mode. Exiting cleanly.",
|
||||
)
|
||||
exit(0)
|
||||
else:
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"ERROR: Current host is not the primary coordinator of the local cluster; got connection '{real_connection}', host '{DEFAULT_NODE_HOSTNAME}'.",
|
||||
)
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
"Autobackup MUST be run from the cluster active primary coordinator using the 'local' connection. See '-h'/'--help' for details.",
|
||||
)
|
||||
exit(1)
|
||||
|
||||
# Ensure we're running as root, or show a warning & confirmation
|
||||
if getuser() != "root":
|
||||
confirm(
|
||||
"WARNING: You are not running this command as 'root'. This command should be run under the same user as the API daemon, which is usually 'root'. Are you sure you want to continue?",
|
||||
prompt_suffix=" ",
|
||||
abort=True,
|
||||
)
|
||||
|
||||
# Load our YAML config
|
||||
autobackup_config = get_autobackup_config(CLI_CONFIG, autobackup_cfgfile)
|
||||
|
||||
# Get a list of all VMs on the cluster
|
||||
# We don't do tag filtering here, because we could match an arbitrary number of tags; instead, we
|
||||
# parse the list after
|
||||
retcode, retdata = pvc.lib.vm.vm_list(CLI_CONFIG, None, None, None, None, None)
|
||||
if not retcode:
|
||||
echo(CLI_CONFIG, f"ERROR: Failed to fetch VM list: {retdata}")
|
||||
exit(1)
|
||||
cluster_vms = retdata
|
||||
|
||||
# Parse the list to match tags; too complex for list comprehension alas
|
||||
backup_vms = list()
|
||||
for vm in cluster_vms:
|
||||
vm_tag_names = [t["name"] for t in vm["tags"]]
|
||||
matching_tags = (
|
||||
True
|
||||
if len(
|
||||
set(vm_tag_names).intersection(set(autobackup_config["backup_tags"]))
|
||||
)
|
||||
> 0
|
||||
else False
|
||||
)
|
||||
if matching_tags:
|
||||
backup_vms.append(vm["name"])
|
||||
|
||||
if len(backup_vms) < 1:
|
||||
echo(CLI_CONFIG, "Found no suitable VMs for autobackup.")
|
||||
exit(0)
|
||||
|
||||
# Pretty print the names of the VMs we'll back up (to stderr)
|
||||
maxnamelen = max([len(n) for n in backup_vms]) + 2
|
||||
cols = 1
|
||||
while (cols * maxnamelen + maxnamelen + 2) <= MAX_CONTENT_WIDTH:
|
||||
cols += 1
|
||||
rows = len(backup_vms) // cols
|
||||
vm_list_rows = list()
|
||||
for row in range(0, rows + 1):
|
||||
row_start = row * cols
|
||||
row_end = (row * cols) + cols
|
||||
row_str = ""
|
||||
for x in range(row_start, row_end):
|
||||
if x < len(backup_vms):
|
||||
row_str += "{:<{}}".format(backup_vms[x], maxnamelen)
|
||||
vm_list_rows.append(row_str)
|
||||
|
||||
echo(CLI_CONFIG, f"Found {len(backup_vms)} suitable VM(s) for autobackup.")
|
||||
echo(CLI_CONFIG, "Full VM list:", stderr=True)
|
||||
echo(CLI_CONFIG, " {}".format("\n ".join(vm_list_rows)), stderr=True)
|
||||
echo(CLI_CONFIG, "", stderr=True)
|
||||
|
||||
if autobackup_config["auto_mount_enabled"]:
|
||||
# Execute each mount_cmds command in sequence
|
||||
for cmd in autobackup_config["mount_cmds"]:
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"Executing mount command '{cmd.split()[0]}'... ",
|
||||
newline=False,
|
||||
)
|
||||
tstart = datetime.now()
|
||||
ret = run(
|
||||
cmd.split(),
|
||||
stdout=PIPE,
|
||||
stderr=PIPE,
|
||||
)
|
||||
tend = datetime.now()
|
||||
ttot = tend - tstart
|
||||
if ret.returncode != 0:
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"failed. [{ttot.seconds}s]",
|
||||
)
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"Exiting; command reports: {ret.stderr.decode().strip()}",
|
||||
)
|
||||
exit(1)
|
||||
else:
|
||||
echo(CLI_CONFIG, f"done. [{ttot.seconds}s]")
|
||||
|
||||
# For each VM, perform the backup
|
||||
for vm in backup_vms:
|
||||
backup_suffixed_path = f"{autobackup_config['backup_root_path']}{autobackup_config['backup_root_suffix']}"
|
||||
if not path.exists(backup_suffixed_path):
|
||||
makedirs(backup_suffixed_path)
|
||||
|
||||
backup_path = f"{backup_suffixed_path}/{vm}"
|
||||
autobackup_state_file = f"{backup_path}/.autobackup.json"
|
||||
if not path.exists(backup_path) or not path.exists(autobackup_state_file):
|
||||
# There are no new backups so the list is empty
|
||||
state_data = dict()
|
||||
tracked_backups = list()
|
||||
else:
|
||||
with open(autobackup_state_file) as fh:
|
||||
state_data = jload(fh)
|
||||
tracked_backups = state_data["tracked_backups"]
|
||||
|
||||
full_interval = autobackup_config["backup_schedule"]["full_interval"]
|
||||
full_retention = autobackup_config["backup_schedule"]["full_retention"]
|
||||
|
||||
full_backups = [b for b in tracked_backups if b["type"] == "full"]
|
||||
if len(full_backups) > 0:
|
||||
last_full_backup = full_backups[0]
|
||||
last_full_backup_idx = tracked_backups.index(last_full_backup)
|
||||
if force_full_flag:
|
||||
this_backup_type = "forced-full"
|
||||
this_backup_incremental_parent = None
|
||||
this_backup_retain_snapshot = True
|
||||
elif last_full_backup_idx >= full_interval - 1:
|
||||
this_backup_type = "full"
|
||||
this_backup_incremental_parent = None
|
||||
this_backup_retain_snapshot = True
|
||||
else:
|
||||
this_backup_type = "incremental"
|
||||
this_backup_incremental_parent = last_full_backup["datestring"]
|
||||
this_backup_retain_snapshot = False
|
||||
else:
|
||||
# The very first backup must be full to start the tree
|
||||
this_backup_type = "full"
|
||||
this_backup_incremental_parent = None
|
||||
this_backup_retain_snapshot = True
|
||||
|
||||
# Perform the backup
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"Backing up VM '{vm}' ({this_backup_type})... ",
|
||||
newline=False,
|
||||
)
|
||||
tstart = datetime.now()
|
||||
retcode, retdata = pvc.lib.vm.vm_backup(
|
||||
CLI_CONFIG,
|
||||
vm,
|
||||
backup_suffixed_path,
|
||||
incremental_parent=this_backup_incremental_parent,
|
||||
retain_snapshot=this_backup_retain_snapshot,
|
||||
)
|
||||
tend = datetime.now()
|
||||
ttot = tend - tstart
|
||||
if not retcode:
|
||||
echo(CLI_CONFIG, f"failed. [{ttot.seconds}s]")
|
||||
echo(CLI_CONFIG, f"Skipping cleanups; command reports: {retdata}")
|
||||
continue
|
||||
else:
|
||||
backup_datestring = findall(r"[0-9]{14}", retdata)[0]
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"done. Backup '{backup_datestring}' created. [{ttot.seconds}s]",
|
||||
)
|
||||
|
||||
# Read backup file to get details
|
||||
backup_json_file = f"{backup_path}/{backup_datestring}/pvcbackup.json"
|
||||
with open(backup_json_file) as fh:
|
||||
backup_json = jload(fh)
|
||||
backup = {
|
||||
"datestring": backup_json["datestring"],
|
||||
"type": backup_json["type"],
|
||||
"parent": backup_json["incremental_parent"],
|
||||
"retained_snapshot": backup_json["retained_snapshot"],
|
||||
}
|
||||
tracked_backups.insert(0, backup)
|
||||
|
||||
# Delete any full backups that are expired
|
||||
marked_for_deletion = list()
|
||||
found_full_count = 0
|
||||
for backup in tracked_backups:
|
||||
if backup["type"] == "full":
|
||||
found_full_count += 1
|
||||
if found_full_count > full_retention:
|
||||
marked_for_deletion.append(backup)
|
||||
|
||||
# Depete any incremental backups that depend on marked parents
|
||||
for backup in tracked_backups:
|
||||
if backup["type"] == "incremental" and backup["parent"] in [
|
||||
b["datestring"] for b in marked_for_deletion
|
||||
]:
|
||||
marked_for_deletion.append(backup)
|
||||
|
||||
# Execute deletes
|
||||
for backup_to_delete in marked_for_deletion:
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"Removing old VM '{vm}' backup '{backup_to_delete['datestring']}' ({backup_to_delete['type']})... ",
|
||||
newline=False,
|
||||
)
|
||||
tstart = datetime.now()
|
||||
retcode, retdata = pvc.lib.vm.vm_remove_backup(
|
||||
CLI_CONFIG,
|
||||
vm,
|
||||
backup_suffixed_path,
|
||||
backup_to_delete["datestring"],
|
||||
)
|
||||
tend = datetime.now()
|
||||
ttot = tend - tstart
|
||||
if not retcode:
|
||||
echo(CLI_CONFIG, f"failed. [{ttot.seconds}s]")
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"Skipping removal from tracked backups; command reports: {retdata}",
|
||||
)
|
||||
continue
|
||||
else:
|
||||
tracked_backups.remove(backup_to_delete)
|
||||
echo(CLI_CONFIG, f"done. [{ttot.seconds}s]")
|
||||
|
||||
# Update tracked state information
|
||||
state_data["tracked_backups"] = tracked_backups
|
||||
with open(autobackup_state_file, "w") as fh:
|
||||
jdump(state_data, fh)
|
||||
|
||||
if autobackup_config["auto_mount_enabled"]:
|
||||
# Execute each unmount_cmds command in sequence
|
||||
for cmd in autobackup_config["unmount_cmds"]:
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"Executing unmount command '{cmd.split()[0]}'... ",
|
||||
newline=False,
|
||||
)
|
||||
tstart = datetime.now()
|
||||
ret = run(
|
||||
cmd.split(),
|
||||
stdout=PIPE,
|
||||
stderr=PIPE,
|
||||
)
|
||||
tend = datetime.now()
|
||||
ttot = tend - tstart
|
||||
if ret.returncode != 0:
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"failed. [{ttot.seconds}s]",
|
||||
)
|
||||
echo(
|
||||
CLI_CONFIG,
|
||||
f"Continuing; command reports: {ret.stderr.decode().strip()}",
|
||||
)
|
||||
else:
|
||||
echo(CLI_CONFIG, f"done. [{ttot.seconds}s]")
|
||||
|
|
|
@ -115,10 +115,14 @@ def wait_for_celery_task(CLI_CONFIG, task_detail, start_late=False):
|
|||
)
|
||||
while True:
|
||||
sleep(0.5)
|
||||
if isinstance(task_status, tuple):
|
||||
continue
|
||||
if task_status.get("state") != "RUNNING":
|
||||
break
|
||||
if task_status.get("current") > last_task:
|
||||
current_task = int(task_status.get("current"))
|
||||
total_task = int(task_status.get("total"))
|
||||
bar.length = total_task
|
||||
bar.update(current_task - last_task)
|
||||
last_task = current_task
|
||||
# The extensive spaces at the end cause this to overwrite longer previous messages
|
||||
|
|
|
@ -21,6 +21,8 @@
|
|||
|
||||
import json
|
||||
|
||||
from time import sleep
|
||||
|
||||
from pvc.lib.common import call_api
|
||||
|
||||
|
||||
|
@ -114,3 +116,22 @@ def get_info(config):
|
|||
return True, response.json()
|
||||
else:
|
||||
return False, response.json().get("message", "")
|
||||
|
||||
|
||||
def get_primary_node(config):
|
||||
"""
|
||||
Get the current primary node of the PVC cluster
|
||||
|
||||
API endpoint: GET /api/v1/status/primary_node
|
||||
API arguments:
|
||||
API schema: {json_data_object}
|
||||
"""
|
||||
while True:
|
||||
response = call_api(config, "get", "/status/primary_node")
|
||||
resp_code = response.status_code
|
||||
if resp_code == 200:
|
||||
break
|
||||
else:
|
||||
sleep(1)
|
||||
|
||||
return True, response.json()["primary_node"]
|
||||
|
|
|
@ -108,9 +108,10 @@ class UploadProgressBar(object):
|
|||
|
||||
|
||||
class ErrorResponse(requests.Response):
|
||||
def __init__(self, json_data, status_code):
|
||||
def __init__(self, json_data, status_code, headers):
|
||||
self.json_data = json_data
|
||||
self.status_code = status_code
|
||||
self.headers = headers
|
||||
|
||||
def json(self):
|
||||
return self.json_data
|
||||
|
@ -140,15 +141,32 @@ def call_api(
|
|||
# Determine the request type and hit the API
|
||||
disable_warnings()
|
||||
try:
|
||||
response = None
|
||||
if operation == "get":
|
||||
response = requests.get(
|
||||
uri,
|
||||
timeout=timeout,
|
||||
headers=headers,
|
||||
params=params,
|
||||
data=data,
|
||||
verify=config["verify_ssl"],
|
||||
)
|
||||
retry_on_code = [429, 500, 502, 503, 504]
|
||||
for i in range(3):
|
||||
failed = False
|
||||
try:
|
||||
response = requests.get(
|
||||
uri,
|
||||
timeout=timeout,
|
||||
headers=headers,
|
||||
params=params,
|
||||
data=data,
|
||||
verify=config["verify_ssl"],
|
||||
)
|
||||
if response.status_code in retry_on_code:
|
||||
failed = True
|
||||
continue
|
||||
break
|
||||
except requests.exceptions.ConnectionError:
|
||||
failed = True
|
||||
continue
|
||||
if failed:
|
||||
error = f"Code {response.status_code}" if response else "Timeout"
|
||||
raise requests.exceptions.ConnectionError(
|
||||
f"Failed to connect after 3 tries ({error})"
|
||||
)
|
||||
if operation == "post":
|
||||
response = requests.post(
|
||||
uri,
|
||||
|
@ -189,7 +207,8 @@ def call_api(
|
|||
)
|
||||
except Exception as e:
|
||||
message = "Failed to connect to the API: {}".format(e)
|
||||
response = ErrorResponse({"message": message}, 500)
|
||||
code = response.status_code if response else 504
|
||||
response = ErrorResponse({"message": message}, code, None)
|
||||
|
||||
# Display debug output
|
||||
if config["debug"]:
|
||||
|
|
|
@ -779,7 +779,8 @@ def format_list_template_system(template_data):
|
|||
template_node_limit_length = 6
|
||||
template_node_selector_length = 9
|
||||
template_node_autostart_length = 10
|
||||
template_migration_method_length = 10
|
||||
template_migration_method_length = 12
|
||||
template_migration_max_downtime_length = 13
|
||||
|
||||
for template in template_data:
|
||||
# template_name column
|
||||
|
@ -826,6 +827,17 @@ def format_list_template_system(template_data):
|
|||
_template_migration_method_length = len(str(template["migration_method"])) + 1
|
||||
if _template_migration_method_length > template_migration_method_length:
|
||||
template_migration_method_length = _template_migration_method_length
|
||||
# template_migration_max_downtime column
|
||||
_template_migration_max_downtime_length = (
|
||||
len(str(template["migration_max_downtime"])) + 1
|
||||
)
|
||||
if (
|
||||
_template_migration_max_downtime_length
|
||||
> template_migration_max_downtime_length
|
||||
):
|
||||
template_migration_max_downtime_length = (
|
||||
_template_migration_max_downtime_length
|
||||
)
|
||||
|
||||
# Format the string (header)
|
||||
template_list_output.append(
|
||||
|
@ -842,7 +854,8 @@ def format_list_template_system(template_data):
|
|||
+ template_node_selector_length
|
||||
+ template_node_autostart_length
|
||||
+ template_migration_method_length
|
||||
+ 3,
|
||||
+ template_migration_max_downtime_length
|
||||
+ 4,
|
||||
template_header="System Templates "
|
||||
+ "".join(
|
||||
["-" for _ in range(17, template_name_length + template_id_length)]
|
||||
|
@ -874,7 +887,8 @@ def format_list_template_system(template_data):
|
|||
+ template_node_selector_length
|
||||
+ template_node_autostart_length
|
||||
+ template_migration_method_length
|
||||
+ 2,
|
||||
+ template_migration_max_downtime_length
|
||||
+ 3,
|
||||
)
|
||||
]
|
||||
),
|
||||
|
@ -891,7 +905,8 @@ def format_list_template_system(template_data):
|
|||
{template_node_limit: <{template_node_limit_length}} \
|
||||
{template_node_selector: <{template_node_selector_length}} \
|
||||
{template_node_autostart: <{template_node_autostart_length}} \
|
||||
{template_migration_method: <{template_migration_method_length}}{end_bold}".format(
|
||||
{template_migration_method: <{template_migration_method_length}} \
|
||||
{template_migration_max_downtime: <{template_migration_max_downtime_length}}{end_bold}".format(
|
||||
bold=ansiprint.bold(),
|
||||
end_bold=ansiprint.end(),
|
||||
template_name_length=template_name_length,
|
||||
|
@ -905,6 +920,7 @@ def format_list_template_system(template_data):
|
|||
template_node_selector_length=template_node_selector_length,
|
||||
template_node_autostart_length=template_node_autostart_length,
|
||||
template_migration_method_length=template_migration_method_length,
|
||||
template_migration_max_downtime_length=template_migration_max_downtime_length,
|
||||
template_name="Name",
|
||||
template_id="ID",
|
||||
template_vcpu="vCPUs",
|
||||
|
@ -915,7 +931,8 @@ def format_list_template_system(template_data):
|
|||
template_node_limit="Limit",
|
||||
template_node_selector="Selector",
|
||||
template_node_autostart="Autostart",
|
||||
template_migration_method="Migration",
|
||||
template_migration_method="Mig. Method",
|
||||
template_migration_max_downtime="Max Downtime",
|
||||
)
|
||||
)
|
||||
|
||||
|
@ -931,7 +948,8 @@ def format_list_template_system(template_data):
|
|||
{template_node_limit: <{template_node_limit_length}} \
|
||||
{template_node_selector: <{template_node_selector_length}} \
|
||||
{template_node_autostart: <{template_node_autostart_length}} \
|
||||
{template_migration_method: <{template_migration_method_length}}{end_bold}".format(
|
||||
{template_migration_method: <{template_migration_method_length}} \
|
||||
{template_migration_max_downtime: <{template_migration_max_downtime_length}}{end_bold}".format(
|
||||
template_name_length=template_name_length,
|
||||
template_id_length=template_id_length,
|
||||
template_vcpu_length=template_vcpu_length,
|
||||
|
@ -943,6 +961,7 @@ def format_list_template_system(template_data):
|
|||
template_node_selector_length=template_node_selector_length,
|
||||
template_node_autostart_length=template_node_autostart_length,
|
||||
template_migration_method_length=template_migration_method_length,
|
||||
template_migration_max_downtime_length=template_migration_max_downtime_length,
|
||||
bold="",
|
||||
end_bold="",
|
||||
template_name=str(template["name"]),
|
||||
|
@ -956,6 +975,7 @@ def format_list_template_system(template_data):
|
|||
template_node_selector=str(template["node_selector"]),
|
||||
template_node_autostart=str(template["node_autostart"]),
|
||||
template_migration_method=str(template["migration_method"]),
|
||||
template_migration_max_downtime=f"{str(template['migration_max_downtime'])} ms",
|
||||
)
|
||||
)
|
||||
|
||||
|
|
|
@ -30,6 +30,7 @@ from requests_toolbelt.multipart.encoder import (
|
|||
|
||||
import pvc.lib.ansiprint as ansiprint
|
||||
from pvc.lib.common import UploadProgressBar, call_api, get_wait_retdata
|
||||
from pvc.cli.helpers import MAX_CONTENT_WIDTH
|
||||
|
||||
#
|
||||
# Supplemental functions
|
||||
|
@ -430,7 +431,9 @@ def format_list_osd(config, osd_list):
|
|||
)
|
||||
continue
|
||||
|
||||
if osd_information["is_split"]:
|
||||
if osd_information.get("is_split") is not None and osd_information.get(
|
||||
"is_split"
|
||||
):
|
||||
osd_information["device"] = f"{osd_information['device']} [s]"
|
||||
|
||||
# Deal with the size to human readable
|
||||
|
@ -1172,15 +1175,15 @@ def ceph_volume_list(config, limit, pool):
|
|||
return False, response.json().get("message", "")
|
||||
|
||||
|
||||
def ceph_volume_add(config, pool, volume, size):
|
||||
def ceph_volume_add(config, pool, volume, size, force_flag=False):
|
||||
"""
|
||||
Add new Ceph volume
|
||||
|
||||
API endpoint: POST /api/v1/storage/ceph/volume
|
||||
API arguments: volume={volume}, pool={pool}, size={size}
|
||||
API arguments: volume={volume}, pool={pool}, size={size}, force={force_flag}
|
||||
API schema: {"message":"{data}"}
|
||||
"""
|
||||
params = {"volume": volume, "pool": pool, "size": size}
|
||||
params = {"volume": volume, "pool": pool, "size": size, "force": force_flag}
|
||||
response = call_api(config, "post", "/storage/ceph/volume", params=params)
|
||||
|
||||
if response.status_code == 200:
|
||||
|
@ -1261,12 +1264,14 @@ def ceph_volume_remove(config, pool, volume):
|
|||
return retstatus, response.json().get("message", "")
|
||||
|
||||
|
||||
def ceph_volume_modify(config, pool, volume, new_name=None, new_size=None):
|
||||
def ceph_volume_modify(
|
||||
config, pool, volume, new_name=None, new_size=None, force_flag=False
|
||||
):
|
||||
"""
|
||||
Modify Ceph volume
|
||||
|
||||
API endpoint: PUT /api/v1/storage/ceph/volume/{pool}/{volume}
|
||||
API arguments:
|
||||
API arguments: [new_name={new_name}], [new_size={new_size}], force_flag={force_flag}
|
||||
API schema: {"message":"{data}"}
|
||||
"""
|
||||
|
||||
|
@ -1275,6 +1280,7 @@ def ceph_volume_modify(config, pool, volume, new_name=None, new_size=None):
|
|||
params["new_name"] = new_name
|
||||
if new_size:
|
||||
params["new_size"] = new_size
|
||||
params["force"] = force_flag
|
||||
|
||||
response = call_api(
|
||||
config,
|
||||
|
@ -1291,15 +1297,15 @@ def ceph_volume_modify(config, pool, volume, new_name=None, new_size=None):
|
|||
return retstatus, response.json().get("message", "")
|
||||
|
||||
|
||||
def ceph_volume_clone(config, pool, volume, new_volume):
|
||||
def ceph_volume_clone(config, pool, volume, new_volume, force_flag=False):
|
||||
"""
|
||||
Clone Ceph volume
|
||||
|
||||
API endpoint: POST /api/v1/storage/ceph/volume/{pool}/{volume}
|
||||
API arguments: new_volume={new_volume
|
||||
API arguments: new_volume={new_volume, force_flag={force_flag}
|
||||
API schema: {"message":"{data}"}
|
||||
"""
|
||||
params = {"new_volume": new_volume}
|
||||
params = {"new_volume": new_volume, "force_flag": force_flag}
|
||||
response = call_api(
|
||||
config,
|
||||
"post",
|
||||
|
@ -1539,6 +1545,30 @@ def ceph_snapshot_add(config, pool, volume, snapshot):
|
|||
return retstatus, response.json().get("message", "")
|
||||
|
||||
|
||||
def ceph_snapshot_rollback(config, pool, volume, snapshot):
|
||||
"""
|
||||
Roll back Ceph volume to snapshot
|
||||
|
||||
API endpoint: POST /api/v1/storage/ceph/snapshot/{pool}/{volume}/{snapshot}/rollback
|
||||
API arguments:
|
||||
API schema: {"message":"{data}"}
|
||||
"""
|
||||
response = call_api(
|
||||
config,
|
||||
"post",
|
||||
"/storage/ceph/snapshot/{pool}/{volume}/{snapshot}/rollback".format(
|
||||
snapshot=snapshot, volume=volume, pool=pool
|
||||
),
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
retstatus = True
|
||||
else:
|
||||
retstatus = False
|
||||
|
||||
return retstatus, response.json().get("message", "")
|
||||
|
||||
|
||||
def ceph_snapshot_remove(config, pool, volume, snapshot):
|
||||
"""
|
||||
Remove Ceph snapshot
|
||||
|
@ -1695,15 +1725,17 @@ def format_list_snapshot(config, snapshot_list):
|
|||
#
|
||||
# Benchmark functions
|
||||
#
|
||||
def ceph_benchmark_run(config, pool, wait_flag):
|
||||
def ceph_benchmark_run(config, pool, name, wait_flag):
|
||||
"""
|
||||
Run a storage benchmark against {pool}
|
||||
|
||||
API endpoint: POST /api/v1/storage/ceph/benchmark
|
||||
API arguments: pool={pool}
|
||||
API arguments: pool={pool}, name={name}
|
||||
API schema: {message}
|
||||
"""
|
||||
params = {"pool": pool}
|
||||
if name:
|
||||
params["name"] = name
|
||||
response = call_api(config, "post", "/storage/ceph/benchmark", params=params)
|
||||
|
||||
return get_wait_retdata(response, wait_flag)
|
||||
|
@ -1775,7 +1807,7 @@ def get_benchmark_list_results(benchmark_format, benchmark_data):
|
|||
benchmark_bandwidth, benchmark_iops = get_benchmark_list_results_legacy(
|
||||
benchmark_data
|
||||
)
|
||||
elif benchmark_format == 1:
|
||||
elif benchmark_format == 1 or benchmark_format == 2:
|
||||
benchmark_bandwidth, benchmark_iops = get_benchmark_list_results_json(
|
||||
benchmark_data
|
||||
)
|
||||
|
@ -1977,6 +2009,7 @@ def format_info_benchmark(config, benchmark_information):
|
|||
benchmark_matrix = {
|
||||
0: format_info_benchmark_legacy,
|
||||
1: format_info_benchmark_json,
|
||||
2: format_info_benchmark_json,
|
||||
}
|
||||
|
||||
benchmark_version = benchmark_information[0]["test_format"]
|
||||
|
@ -2311,12 +2344,15 @@ def format_info_benchmark_json(config, benchmark_information):
|
|||
if benchmark_information["benchmark_result"] == "Running":
|
||||
return "Benchmark test is still running."
|
||||
|
||||
benchmark_format = benchmark_information["test_format"]
|
||||
benchmark_details = benchmark_information["benchmark_result"]
|
||||
|
||||
# Format a nice output; do this line-by-line then concat the elements at the end
|
||||
ainformation = []
|
||||
ainformation.append(
|
||||
"{}Storage Benchmark details:{}".format(ansiprint.bold(), ansiprint.end())
|
||||
"{}Storage Benchmark details (format {}):{}".format(
|
||||
ansiprint.bold(), benchmark_format, ansiprint.end()
|
||||
)
|
||||
)
|
||||
|
||||
nice_test_name_map = {
|
||||
|
@ -2364,7 +2400,7 @@ def format_info_benchmark_json(config, benchmark_information):
|
|||
if element[1] != 0:
|
||||
useful_latency_tree.append(element)
|
||||
|
||||
max_rows = 9
|
||||
max_rows = 5
|
||||
if len(useful_latency_tree) > 9:
|
||||
max_rows = len(useful_latency_tree)
|
||||
elif len(useful_latency_tree) < 9:
|
||||
|
@ -2373,15 +2409,10 @@ def format_info_benchmark_json(config, benchmark_information):
|
|||
|
||||
# Format the static data
|
||||
overall_label = [
|
||||
"Overall BW/s:",
|
||||
"Overall IOPS:",
|
||||
"Total I/O:",
|
||||
"Runtime (s):",
|
||||
"User CPU %:",
|
||||
"System CPU %:",
|
||||
"Ctx Switches:",
|
||||
"Major Faults:",
|
||||
"Minor Faults:",
|
||||
"BW/s:",
|
||||
"IOPS:",
|
||||
"I/O:",
|
||||
"Time:",
|
||||
]
|
||||
while len(overall_label) < max_rows:
|
||||
overall_label.append("")
|
||||
|
@ -2390,68 +2421,149 @@ def format_info_benchmark_json(config, benchmark_information):
|
|||
format_bytes_tohuman(int(job_details[io_class]["bw_bytes"])),
|
||||
format_ops_tohuman(int(job_details[io_class]["iops"])),
|
||||
format_bytes_tohuman(int(job_details[io_class]["io_bytes"])),
|
||||
job_details["job_runtime"] / 1000,
|
||||
job_details["usr_cpu"],
|
||||
job_details["sys_cpu"],
|
||||
job_details["ctx"],
|
||||
job_details["majf"],
|
||||
job_details["minf"],
|
||||
str(job_details["job_runtime"] / 1000) + "s",
|
||||
]
|
||||
while len(overall_data) < max_rows:
|
||||
overall_data.append("")
|
||||
|
||||
cpu_label = [
|
||||
"Total:",
|
||||
"User:",
|
||||
"Sys:",
|
||||
"OSD:",
|
||||
"MON:",
|
||||
]
|
||||
while len(cpu_label) < max_rows:
|
||||
cpu_label.append("")
|
||||
|
||||
cpu_data = [
|
||||
(
|
||||
benchmark_details[test]["avg_cpu_util_percent"]["total"]
|
||||
if benchmark_format > 1
|
||||
else "N/A"
|
||||
),
|
||||
round(job_details["usr_cpu"], 2),
|
||||
round(job_details["sys_cpu"], 2),
|
||||
(
|
||||
benchmark_details[test]["avg_cpu_util_percent"]["ceph-osd"]
|
||||
if benchmark_format > 1
|
||||
else "N/A"
|
||||
),
|
||||
(
|
||||
benchmark_details[test]["avg_cpu_util_percent"]["ceph-mon"]
|
||||
if benchmark_format > 1
|
||||
else "N/A"
|
||||
),
|
||||
]
|
||||
while len(cpu_data) < max_rows:
|
||||
cpu_data.append("")
|
||||
|
||||
memory_label = [
|
||||
"Total:",
|
||||
"OSD:",
|
||||
"MON:",
|
||||
]
|
||||
while len(memory_label) < max_rows:
|
||||
memory_label.append("")
|
||||
|
||||
memory_data = [
|
||||
(
|
||||
benchmark_details[test]["avg_memory_util_percent"]["total"]
|
||||
if benchmark_format > 1
|
||||
else "N/A"
|
||||
),
|
||||
(
|
||||
benchmark_details[test]["avg_memory_util_percent"]["ceph-osd"]
|
||||
if benchmark_format > 1
|
||||
else "N/A"
|
||||
),
|
||||
(
|
||||
benchmark_details[test]["avg_memory_util_percent"]["ceph-mon"]
|
||||
if benchmark_format > 1
|
||||
else "N/A"
|
||||
),
|
||||
]
|
||||
while len(memory_data) < max_rows:
|
||||
memory_data.append("")
|
||||
|
||||
network_label = [
|
||||
"Total:",
|
||||
"Sent:",
|
||||
"Recv:",
|
||||
]
|
||||
while len(network_label) < max_rows:
|
||||
network_label.append("")
|
||||
|
||||
network_data = [
|
||||
(
|
||||
format_bytes_tohuman(
|
||||
int(benchmark_details[test]["avg_network_util_bps"]["total"])
|
||||
)
|
||||
if benchmark_format > 1
|
||||
else "N/A"
|
||||
),
|
||||
(
|
||||
format_bytes_tohuman(
|
||||
int(benchmark_details[test]["avg_network_util_bps"]["sent"])
|
||||
)
|
||||
if benchmark_format > 1
|
||||
else "N/A"
|
||||
),
|
||||
(
|
||||
format_bytes_tohuman(
|
||||
int(benchmark_details[test]["avg_network_util_bps"]["recv"])
|
||||
)
|
||||
if benchmark_format > 1
|
||||
else "N/A"
|
||||
),
|
||||
]
|
||||
while len(network_data) < max_rows:
|
||||
network_data.append("")
|
||||
|
||||
bandwidth_label = [
|
||||
"Min:",
|
||||
"Max:",
|
||||
"Mean:",
|
||||
"StdDev:",
|
||||
"Samples:",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
]
|
||||
while len(bandwidth_label) < max_rows:
|
||||
bandwidth_label.append("")
|
||||
|
||||
bandwidth_data = [
|
||||
format_bytes_tohuman(int(job_details[io_class]["bw_min"]) * 1024),
|
||||
format_bytes_tohuman(int(job_details[io_class]["bw_max"]) * 1024),
|
||||
format_bytes_tohuman(int(job_details[io_class]["bw_mean"]) * 1024),
|
||||
format_bytes_tohuman(int(job_details[io_class]["bw_dev"]) * 1024),
|
||||
job_details[io_class]["bw_samples"],
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
format_bytes_tohuman(int(job_details[io_class]["bw_min"]) * 1024)
|
||||
+ " / "
|
||||
+ format_ops_tohuman(int(job_details[io_class]["iops_min"])),
|
||||
format_bytes_tohuman(int(job_details[io_class]["bw_max"]) * 1024)
|
||||
+ " / "
|
||||
+ format_ops_tohuman(int(job_details[io_class]["iops_max"])),
|
||||
format_bytes_tohuman(int(job_details[io_class]["bw_mean"]) * 1024)
|
||||
+ " / "
|
||||
+ format_ops_tohuman(int(job_details[io_class]["iops_mean"])),
|
||||
format_bytes_tohuman(int(job_details[io_class]["bw_dev"]) * 1024)
|
||||
+ " / "
|
||||
+ format_ops_tohuman(int(job_details[io_class]["iops_stddev"])),
|
||||
str(job_details[io_class]["bw_samples"])
|
||||
+ " / "
|
||||
+ str(job_details[io_class]["iops_samples"]),
|
||||
]
|
||||
while len(bandwidth_data) < max_rows:
|
||||
bandwidth_data.append("")
|
||||
|
||||
iops_data = [
|
||||
format_ops_tohuman(int(job_details[io_class]["iops_min"])),
|
||||
format_ops_tohuman(int(job_details[io_class]["iops_max"])),
|
||||
format_ops_tohuman(int(job_details[io_class]["iops_mean"])),
|
||||
format_ops_tohuman(int(job_details[io_class]["iops_stddev"])),
|
||||
job_details[io_class]["iops_samples"],
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
lat_label = [
|
||||
"Min:",
|
||||
"Max:",
|
||||
"Mean:",
|
||||
"StdDev:",
|
||||
]
|
||||
while len(iops_data) < max_rows:
|
||||
iops_data.append("")
|
||||
while len(lat_label) < max_rows:
|
||||
lat_label.append("")
|
||||
|
||||
lat_data = [
|
||||
int(job_details[io_class]["lat_ns"]["min"]) / 1000,
|
||||
int(job_details[io_class]["lat_ns"]["max"]) / 1000,
|
||||
int(job_details[io_class]["lat_ns"]["mean"]) / 1000,
|
||||
int(job_details[io_class]["lat_ns"]["stddev"]) / 1000,
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
]
|
||||
while len(lat_data) < max_rows:
|
||||
lat_data.append("")
|
||||
|
@ -2460,98 +2572,119 @@ def format_info_benchmark_json(config, benchmark_information):
|
|||
lat_bucket_label = list()
|
||||
lat_bucket_data = list()
|
||||
for element in useful_latency_tree:
|
||||
lat_bucket_label.append(element[0])
|
||||
lat_bucket_data.append(element[1])
|
||||
lat_bucket_label.append(element[0] + ":" if element[0] else "")
|
||||
lat_bucket_data.append(round(float(element[1]), 2) if element[1] else "")
|
||||
while len(lat_bucket_label) < max_rows:
|
||||
lat_bucket_label.append("")
|
||||
while len(lat_bucket_data) < max_rows:
|
||||
lat_bucket_label.append("")
|
||||
|
||||
# Column default widths
|
||||
overall_label_length = 0
|
||||
overall_label_length = 5
|
||||
overall_column_length = 0
|
||||
bandwidth_label_length = 0
|
||||
bandwidth_column_length = 11
|
||||
iops_column_length = 4
|
||||
latency_column_length = 12
|
||||
cpu_label_length = 6
|
||||
cpu_column_length = 0
|
||||
memory_label_length = 6
|
||||
memory_column_length = 0
|
||||
network_label_length = 6
|
||||
network_column_length = 6
|
||||
bandwidth_label_length = 8
|
||||
bandwidth_column_length = 0
|
||||
latency_label_length = 7
|
||||
latency_column_length = 0
|
||||
latency_bucket_label_length = 0
|
||||
latency_bucket_column_length = 0
|
||||
|
||||
# Column layout:
|
||||
# General Bandwidth IOPS Latency Percentiles
|
||||
# --------- ---------- -------- -------- ---------------
|
||||
# Size Min Min Min A
|
||||
# BW Max Max Max B
|
||||
# IOPS Mean Mean Mean ...
|
||||
# Runtime StdDev StdDev StdDev Z
|
||||
# UsrCPU Samples Samples
|
||||
# SysCPU
|
||||
# CtxSw
|
||||
# MajFault
|
||||
# MinFault
|
||||
# Overall CPU Memory Network Bandwidth/IOPS Latency Percentiles
|
||||
# --------- ----- ------- -------- -------------- -------- ---------------
|
||||
# BW Total Total Total Min Min A
|
||||
# IOPS Usr OSD Send Max Max B
|
||||
# Time Sys MON Recv Mean Mean ...
|
||||
# Size OSD StdDev StdDev Z
|
||||
# MON Samples
|
||||
|
||||
# Set column widths
|
||||
for item in overall_label:
|
||||
_item_length = len(str(item))
|
||||
if _item_length > overall_label_length:
|
||||
overall_label_length = _item_length
|
||||
|
||||
for item in overall_data:
|
||||
_item_length = len(str(item))
|
||||
if _item_length > overall_column_length:
|
||||
overall_column_length = _item_length
|
||||
|
||||
test_name_length = len(nice_test_name_map[test])
|
||||
if test_name_length > overall_label_length + overall_column_length:
|
||||
_diff = test_name_length - (overall_label_length + overall_column_length)
|
||||
overall_column_length += _diff
|
||||
|
||||
for item in bandwidth_label:
|
||||
for item in cpu_data:
|
||||
_item_length = len(str(item))
|
||||
if _item_length > bandwidth_label_length:
|
||||
bandwidth_label_length = _item_length
|
||||
if _item_length > cpu_column_length:
|
||||
cpu_column_length = _item_length
|
||||
|
||||
for item in memory_data:
|
||||
_item_length = len(str(item))
|
||||
if _item_length > memory_column_length:
|
||||
memory_column_length = _item_length
|
||||
|
||||
for item in network_data:
|
||||
_item_length = len(str(item))
|
||||
if _item_length > network_column_length:
|
||||
network_column_length = _item_length
|
||||
|
||||
for item in bandwidth_data:
|
||||
_item_length = len(str(item))
|
||||
if _item_length > bandwidth_column_length:
|
||||
bandwidth_column_length = _item_length
|
||||
|
||||
for item in iops_data:
|
||||
_item_length = len(str(item))
|
||||
if _item_length > iops_column_length:
|
||||
iops_column_length = _item_length
|
||||
|
||||
for item in lat_data:
|
||||
_item_length = len(str(item))
|
||||
if _item_length > latency_column_length:
|
||||
latency_column_length = _item_length
|
||||
|
||||
for item in lat_bucket_label:
|
||||
for item in lat_bucket_data:
|
||||
_item_length = len(str(item))
|
||||
if _item_length > latency_bucket_label_length:
|
||||
latency_bucket_label_length = _item_length
|
||||
if _item_length > latency_bucket_column_length:
|
||||
latency_bucket_column_length = _item_length
|
||||
|
||||
# Top row (Headers)
|
||||
ainformation.append(
|
||||
"{bold}\
|
||||
{overall_label: <{overall_label_length}} \
|
||||
{bandwidth_label: <{bandwidth_label_length}} \
|
||||
{bandwidth: <{bandwidth_length}} \
|
||||
{iops: <{iops_length}} \
|
||||
{latency: <{latency_length}} \
|
||||
{latency_bucket_label: <{latency_bucket_label_length}} \
|
||||
{latency_bucket} \
|
||||
{end_bold}".format(
|
||||
"{bold}{overall_label: <{overall_label_length}} {header_fill}{end_bold}".format(
|
||||
bold=ansiprint.bold(),
|
||||
end_bold=ansiprint.end(),
|
||||
overall_label=nice_test_name_map[test],
|
||||
overall_label_length=overall_label_length,
|
||||
bandwidth_label="",
|
||||
bandwidth_label_length=bandwidth_label_length,
|
||||
bandwidth="Bandwidth/s",
|
||||
bandwidth_length=bandwidth_column_length,
|
||||
iops="IOPS",
|
||||
iops_length=iops_column_length,
|
||||
latency="Latency (μs)",
|
||||
latency_length=latency_column_length,
|
||||
latency_bucket_label="Latency Buckets (μs/%)",
|
||||
latency_bucket_label_length=latency_bucket_label_length,
|
||||
latency_bucket="",
|
||||
header_fill="-"
|
||||
* (
|
||||
(MAX_CONTENT_WIDTH if MAX_CONTENT_WIDTH <= 120 else 120)
|
||||
- len(nice_test_name_map[test])
|
||||
- 4
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
ainformation.append(
|
||||
"{bold}\
|
||||
{overall_label: <{overall_label_length}} \
|
||||
{cpu_label: <{cpu_label_length}} \
|
||||
{memory_label: <{memory_label_length}} \
|
||||
{network_label: <{network_label_length}} \
|
||||
{bandwidth_label: <{bandwidth_label_length}} \
|
||||
{latency_label: <{latency_label_length}} \
|
||||
{latency_bucket_label: <{latency_bucket_label_length}}\
|
||||
{end_bold}".format(
|
||||
bold=ansiprint.bold(),
|
||||
end_bold=ansiprint.end(),
|
||||
overall_label="Overall",
|
||||
overall_label_length=overall_label_length + overall_column_length + 1,
|
||||
cpu_label="CPU (%)",
|
||||
cpu_label_length=cpu_label_length + cpu_column_length + 1,
|
||||
memory_label="Memory (%)",
|
||||
memory_label_length=memory_label_length + memory_column_length + 1,
|
||||
network_label="Network (bps)",
|
||||
network_label_length=network_label_length + network_column_length + 1,
|
||||
bandwidth_label="Bandwidth / IOPS",
|
||||
bandwidth_label_length=bandwidth_label_length
|
||||
+ bandwidth_column_length
|
||||
+ 1,
|
||||
latency_label="Latency (μs)",
|
||||
latency_label_length=latency_label_length + latency_column_length + 1,
|
||||
latency_bucket_label="Buckets (μs/%)",
|
||||
latency_bucket_label_length=latency_bucket_label_length
|
||||
+ latency_bucket_column_length,
|
||||
)
|
||||
)
|
||||
|
||||
|
@ -2559,14 +2692,20 @@ def format_info_benchmark_json(config, benchmark_information):
|
|||
# Top row (Headers)
|
||||
ainformation.append(
|
||||
"{bold}\
|
||||
{overall_label: >{overall_label_length}} \
|
||||
{overall: <{overall_length}} \
|
||||
{bandwidth_label: >{bandwidth_label_length}} \
|
||||
{bandwidth: <{bandwidth_length}} \
|
||||
{iops: <{iops_length}} \
|
||||
{latency: <{latency_length}} \
|
||||
{latency_bucket_label: >{latency_bucket_label_length}} \
|
||||
{latency_bucket} \
|
||||
{overall_label: <{overall_label_length}} \
|
||||
{overall: <{overall_length}} \
|
||||
{cpu_label: <{cpu_label_length}} \
|
||||
{cpu: <{cpu_length}} \
|
||||
{memory_label: <{memory_label_length}} \
|
||||
{memory: <{memory_length}} \
|
||||
{network_label: <{network_label_length}} \
|
||||
{network: <{network_length}} \
|
||||
{bandwidth_label: <{bandwidth_label_length}} \
|
||||
{bandwidth: <{bandwidth_length}} \
|
||||
{latency_label: <{latency_label_length}} \
|
||||
{latency: <{latency_length}} \
|
||||
{latency_bucket_label: <{latency_bucket_label_length}} \
|
||||
{latency_bucket}\
|
||||
{end_bold}".format(
|
||||
bold="",
|
||||
end_bold="",
|
||||
|
@ -2574,12 +2713,24 @@ def format_info_benchmark_json(config, benchmark_information):
|
|||
overall_label_length=overall_label_length,
|
||||
overall=overall_data[idx],
|
||||
overall_length=overall_column_length,
|
||||
cpu_label=cpu_label[idx],
|
||||
cpu_label_length=cpu_label_length,
|
||||
cpu=cpu_data[idx],
|
||||
cpu_length=cpu_column_length,
|
||||
memory_label=memory_label[idx],
|
||||
memory_label_length=memory_label_length,
|
||||
memory=memory_data[idx],
|
||||
memory_length=memory_column_length,
|
||||
network_label=network_label[idx],
|
||||
network_label_length=network_label_length,
|
||||
network=network_data[idx],
|
||||
network_length=network_column_length,
|
||||
bandwidth_label=bandwidth_label[idx],
|
||||
bandwidth_label_length=bandwidth_label_length,
|
||||
bandwidth=bandwidth_data[idx],
|
||||
bandwidth_length=bandwidth_column_length,
|
||||
iops=iops_data[idx],
|
||||
iops_length=iops_column_length,
|
||||
latency_label=lat_label[idx],
|
||||
latency_label_length=latency_label_length,
|
||||
latency=lat_data[idx],
|
||||
latency_length=latency_column_length,
|
||||
latency_bucket_label=lat_bucket_label[idx],
|
||||
|
@ -2588,4 +2739,4 @@ def format_info_benchmark_json(config, benchmark_information):
|
|||
)
|
||||
)
|
||||
|
||||
return "\n".join(ainformation)
|
||||
return "\n".join(ainformation) + "\n"
|
||||
|
|
|
@ -89,6 +89,7 @@ def vm_define(
|
|||
node_selector,
|
||||
node_autostart,
|
||||
migration_method,
|
||||
migration_max_downtime,
|
||||
user_tags,
|
||||
protected_tags,
|
||||
):
|
||||
|
@ -96,7 +97,7 @@ def vm_define(
|
|||
Define a new VM on the cluster
|
||||
|
||||
API endpoint: POST /vm
|
||||
API arguments: xml={xml}, node={node}, limit={node_limit}, selector={node_selector}, autostart={node_autostart}, migration_method={migration_method}, user_tags={user_tags}, protected_tags={protected_tags}
|
||||
API arguments: xml={xml}, node={node}, limit={node_limit}, selector={node_selector}, autostart={node_autostart}, migration_method={migration_method}, migration_max_downtime={migration_max_downtime}, user_tags={user_tags}, protected_tags={protected_tags}
|
||||
API schema: {"message":"{data}"}
|
||||
"""
|
||||
params = {
|
||||
|
@ -105,6 +106,7 @@ def vm_define(
|
|||
"selector": node_selector,
|
||||
"autostart": node_autostart,
|
||||
"migration_method": migration_method,
|
||||
"migration_max_downtime": migration_max_downtime,
|
||||
"user_tags": user_tags,
|
||||
"protected_tags": protected_tags,
|
||||
}
|
||||
|
@ -205,6 +207,7 @@ def vm_metadata(
|
|||
node_selector,
|
||||
node_autostart,
|
||||
migration_method,
|
||||
migration_max_downtime,
|
||||
provisioner_profile,
|
||||
):
|
||||
"""
|
||||
|
@ -229,6 +232,9 @@ def vm_metadata(
|
|||
if migration_method is not None:
|
||||
params["migration_method"] = migration_method
|
||||
|
||||
if migration_max_downtime is not None:
|
||||
params["migration_max_downtime"] = migration_max_downtime
|
||||
|
||||
if provisioner_profile is not None:
|
||||
params["profile"] = provisioner_profile
|
||||
|
||||
|
@ -415,7 +421,7 @@ def vm_node(config, vm, target_node, action, force=False, wait=False, force_live
|
|||
return retstatus, response.json().get("message", "")
|
||||
|
||||
|
||||
def vm_locks(config, vm, wait_flag):
|
||||
def vm_locks(config, vm, wait_flag=True):
|
||||
"""
|
||||
Flush RBD locks of (stopped) VM
|
||||
|
||||
|
@ -492,6 +498,121 @@ def vm_restore(config, vm, backup_path, backup_datestring, retain_snapshot=False
|
|||
return True, response.json().get("message", "")
|
||||
|
||||
|
||||
def vm_create_snapshot(config, vm, snapshot_name=None, wait_flag=True):
|
||||
"""
|
||||
Take a snapshot of a VM's disks and configuration
|
||||
|
||||
API endpoint: POST /vm/{vm}/snapshot
|
||||
API arguments: snapshot_name=snapshot_name
|
||||
API schema: {"message":"{data}"}
|
||||
"""
|
||||
params = dict()
|
||||
if snapshot_name is not None:
|
||||
params["snapshot_name"] = snapshot_name
|
||||
response = call_api(
|
||||
config, "post", "/vm/{vm}/snapshot".format(vm=vm), params=params
|
||||
)
|
||||
|
||||
return get_wait_retdata(response, wait_flag)
|
||||
|
||||
|
||||
def vm_remove_snapshot(config, vm, snapshot_name, wait_flag=True):
|
||||
"""
|
||||
Remove a snapshot of a VM's disks and configuration
|
||||
|
||||
API endpoint: DELETE /vm/{vm}/snapshot
|
||||
API arguments: snapshot_name=snapshot_name
|
||||
API schema: {"message":"{data}"}
|
||||
"""
|
||||
params = {"snapshot_name": snapshot_name}
|
||||
response = call_api(
|
||||
config, "delete", "/vm/{vm}/snapshot".format(vm=vm), params=params
|
||||
)
|
||||
|
||||
return get_wait_retdata(response, wait_flag)
|
||||
|
||||
|
||||
def vm_rollback_snapshot(config, vm, snapshot_name, wait_flag=True):
|
||||
"""
|
||||
Roll back to a snapshot of a VM's disks and configuration
|
||||
|
||||
API endpoint: POST /vm/{vm}/snapshot/rollback
|
||||
API arguments: snapshot_name=snapshot_name
|
||||
API schema: {"message":"{data}"}
|
||||
"""
|
||||
params = {"snapshot_name": snapshot_name}
|
||||
response = call_api(
|
||||
config, "post", "/vm/{vm}/snapshot/rollback".format(vm=vm), params=params
|
||||
)
|
||||
|
||||
return get_wait_retdata(response, wait_flag)
|
||||
|
||||
|
||||
def vm_export_snapshot(
|
||||
config, vm, snapshot_name, export_path, incremental_parent=None, wait_flag=True
|
||||
):
|
||||
"""
|
||||
Export an (existing) snapshot of a VM's disks and configuration to export_path, optionally
|
||||
incremental with incremental_parent
|
||||
|
||||
API endpoint: POST /vm/{vm}/snapshot/export
|
||||
API arguments: snapshot_name=snapshot_name, export_path=export_path, incremental_parent=incremental_parent
|
||||
API schema: {"message":"{data}"}
|
||||
"""
|
||||
params = {
|
||||
"snapshot_name": snapshot_name,
|
||||
"export_path": export_path,
|
||||
}
|
||||
if incremental_parent is not None:
|
||||
params["incremental_parent"] = incremental_parent
|
||||
|
||||
response = call_api(
|
||||
config, "post", "/vm/{vm}/snapshot/export".format(vm=vm), params=params
|
||||
)
|
||||
|
||||
return get_wait_retdata(response, wait_flag)
|
||||
|
||||
|
||||
def vm_import_snapshot(
|
||||
config, vm, snapshot_name, import_path, retain_snapshot=False, wait_flag=True
|
||||
):
|
||||
"""
|
||||
Import a snapshot of {vm} and its volumes from a local primary coordinator filesystem path
|
||||
|
||||
API endpoint: POST /vm/{vm}/snapshot/import
|
||||
API arguments: snapshot_name={snapshot_name}, import_path={import_path}, retain_snapshot={retain_snapshot}
|
||||
API schema: {"message":"{data}"}
|
||||
"""
|
||||
params = {
|
||||
"snapshot_name": snapshot_name,
|
||||
"import_path": import_path,
|
||||
"retain_snapshot": retain_snapshot,
|
||||
}
|
||||
response = call_api(
|
||||
config, "post", "/vm/{vm}/snapshot/import".format(vm=vm), params=params
|
||||
)
|
||||
|
||||
return get_wait_retdata(response, wait_flag)
|
||||
|
||||
|
||||
def vm_autobackup(config, email_recipients=None, force_full_flag=False, wait_flag=True):
|
||||
"""
|
||||
Perform a cluster VM autobackup
|
||||
|
||||
API endpoint: POST /vm//autobackup
|
||||
API arguments: email_recipients=email_recipients, force_full_flag=force_full_flag
|
||||
API schema: {"message":"{data}"}
|
||||
"""
|
||||
params = {
|
||||
"email_recipients": email_recipients,
|
||||
"force_full": force_full_flag,
|
||||
}
|
||||
|
||||
response = call_api(config, "post", "/vm/autobackup", params=params)
|
||||
|
||||
return get_wait_retdata(response, wait_flag)
|
||||
|
||||
|
||||
def vm_vcpus_set(config, vm, vcpus, topology, restart):
|
||||
"""
|
||||
Set the vCPU count of the VM with topology
|
||||
|
@ -1516,29 +1637,40 @@ def format_info(config, domain_information, long_output):
|
|||
ansiprint.purple(), ansiprint.end(), domain_information["vcpu"]
|
||||
)
|
||||
)
|
||||
ainformation.append(
|
||||
"{}Topology (S/C/T):{} {}".format(
|
||||
ansiprint.purple(), ansiprint.end(), domain_information["vcpu_topology"]
|
||||
if long_output:
|
||||
ainformation.append(
|
||||
"{}Topology (S/C/T):{} {}".format(
|
||||
ansiprint.purple(), ansiprint.end(), domain_information["vcpu_topology"]
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
if (
|
||||
domain_information["vnc"].get("listen", "None") != "None"
|
||||
and domain_information["vnc"].get("port", "None") != "None"
|
||||
):
|
||||
domain_information["vnc"].get("listen")
|
||||
and domain_information["vnc"].get("port")
|
||||
) or long_output:
|
||||
listen = (
|
||||
domain_information["vnc"]["listen"]
|
||||
if domain_information["vnc"].get("listen")
|
||||
else "N/A"
|
||||
)
|
||||
port = (
|
||||
domain_information["vnc"]["port"]
|
||||
if domain_information["vnc"].get("port")
|
||||
else "N/A"
|
||||
)
|
||||
ainformation.append("")
|
||||
ainformation.append(
|
||||
"{}VNC listen:{} {}".format(
|
||||
ansiprint.purple(), ansiprint.end(), domain_information["vnc"]["listen"]
|
||||
ansiprint.purple(), ansiprint.end(), listen
|
||||
)
|
||||
)
|
||||
ainformation.append(
|
||||
"{}VNC port:{} {}".format(
|
||||
ansiprint.purple(), ansiprint.end(), domain_information["vnc"]["port"]
|
||||
ansiprint.purple(), ansiprint.end(), port
|
||||
)
|
||||
)
|
||||
|
||||
if long_output is True:
|
||||
if long_output:
|
||||
# Virtualization information
|
||||
ainformation.append("")
|
||||
ainformation.append(
|
||||
|
@ -1626,6 +1758,8 @@ def format_info(config, domain_information, long_output):
|
|||
"migrate": ansiprint.blue(),
|
||||
"unmigrate": ansiprint.blue(),
|
||||
"provision": ansiprint.blue(),
|
||||
"restore": ansiprint.blue(),
|
||||
"import": ansiprint.blue(),
|
||||
}
|
||||
ainformation.append(
|
||||
"{}State:{} {}{}{}".format(
|
||||
|
@ -1637,14 +1771,14 @@ def format_info(config, domain_information, long_output):
|
|||
)
|
||||
)
|
||||
ainformation.append(
|
||||
"{}Current Node:{} {}".format(
|
||||
"{}Current node:{} {}".format(
|
||||
ansiprint.purple(), ansiprint.end(), domain_information["node"]
|
||||
)
|
||||
)
|
||||
if not domain_information["last_node"]:
|
||||
domain_information["last_node"] = "N/A"
|
||||
ainformation.append(
|
||||
"{}Previous Node:{} {}".format(
|
||||
"{}Previous node:{} {}".format(
|
||||
ansiprint.purple(), ansiprint.end(), domain_information["last_node"]
|
||||
)
|
||||
)
|
||||
|
@ -1658,12 +1792,18 @@ def format_info(config, domain_information, long_output):
|
|||
)
|
||||
)
|
||||
|
||||
if not domain_information.get("node_selector"):
|
||||
if (
|
||||
not domain_information.get("node_selector")
|
||||
or domain_information.get("node_selector") == "None"
|
||||
):
|
||||
formatted_node_selector = "Default"
|
||||
else:
|
||||
formatted_node_selector = str(domain_information["node_selector"]).title()
|
||||
|
||||
if not domain_information.get("node_limit"):
|
||||
if (
|
||||
not domain_information.get("node_limit")
|
||||
or domain_information.get("node_limit") == "None"
|
||||
):
|
||||
formatted_node_limit = "Any"
|
||||
else:
|
||||
formatted_node_limit = ", ".join(domain_information["node_limit"])
|
||||
|
@ -1675,16 +1815,16 @@ def format_info(config, domain_information, long_output):
|
|||
autostart_colour = ansiprint.green()
|
||||
formatted_node_autostart = "True"
|
||||
|
||||
if not domain_information.get("migration_method"):
|
||||
formatted_migration_method = "Any"
|
||||
if (
|
||||
not domain_information.get("migration_method")
|
||||
or domain_information.get("migration_method") == "None"
|
||||
):
|
||||
formatted_migration_method = "Live, Shutdown"
|
||||
else:
|
||||
formatted_migration_method = str(domain_information["migration_method"]).title()
|
||||
|
||||
ainformation.append(
|
||||
"{}Migration selector:{} {}".format(
|
||||
ansiprint.purple(), ansiprint.end(), formatted_node_selector
|
||||
formatted_migration_method = (
|
||||
f"{str(domain_information['migration_method']).title()} only"
|
||||
)
|
||||
)
|
||||
|
||||
ainformation.append(
|
||||
"{}Node limit:{} {}".format(
|
||||
ansiprint.purple(), ansiprint.end(), formatted_node_limit
|
||||
|
@ -1700,10 +1840,22 @@ def format_info(config, domain_information, long_output):
|
|||
)
|
||||
)
|
||||
ainformation.append(
|
||||
"{}Migration Method:{} {}".format(
|
||||
"{}Migration method:{} {}".format(
|
||||
ansiprint.purple(), ansiprint.end(), formatted_migration_method
|
||||
)
|
||||
)
|
||||
ainformation.append(
|
||||
"{}Migration selector:{} {}".format(
|
||||
ansiprint.purple(), ansiprint.end(), formatted_node_selector
|
||||
)
|
||||
)
|
||||
ainformation.append(
|
||||
"{}Max live downtime:{} {}".format(
|
||||
ansiprint.purple(),
|
||||
ansiprint.end(),
|
||||
f"{domain_information.get('migration_max_downtime')} ms",
|
||||
)
|
||||
)
|
||||
|
||||
# Tag list
|
||||
tags_name_length = 5
|
||||
|
@ -1749,9 +1901,9 @@ def format_info(config, domain_information, long_output):
|
|||
tags_name=tag["name"],
|
||||
tags_type=tag["type"],
|
||||
tags_protected=str(tag["protected"]),
|
||||
tags_protected_colour=ansiprint.green()
|
||||
if tag["protected"]
|
||||
else ansiprint.blue(),
|
||||
tags_protected_colour=(
|
||||
ansiprint.green() if tag["protected"] else ansiprint.blue()
|
||||
),
|
||||
end=ansiprint.end(),
|
||||
)
|
||||
)
|
||||
|
@ -1764,6 +1916,78 @@ def format_info(config, domain_information, long_output):
|
|||
)
|
||||
)
|
||||
|
||||
# Snapshot list
|
||||
snapshots_name_length = 5
|
||||
snapshots_age_length = 4
|
||||
snapshots_xml_changes_length = 12
|
||||
for snapshot in domain_information.get("snapshots", list()):
|
||||
xml_diff_plus = 0
|
||||
xml_diff_minus = 0
|
||||
for line in snapshot["xml_diff_lines"]:
|
||||
if re.match(r"^\+ ", line):
|
||||
xml_diff_plus += 1
|
||||
elif re.match(r"^- ", line):
|
||||
xml_diff_minus += 1
|
||||
xml_diff_counts = f"+{xml_diff_plus}/-{xml_diff_minus}"
|
||||
|
||||
_snapshots_name_length = len(snapshot["name"]) + 1
|
||||
if _snapshots_name_length > snapshots_name_length:
|
||||
snapshots_name_length = _snapshots_name_length
|
||||
|
||||
_snapshots_age_length = len(snapshot["age"]) + 1
|
||||
if _snapshots_age_length > snapshots_age_length:
|
||||
snapshots_age_length = _snapshots_age_length
|
||||
|
||||
_snapshots_xml_changes_length = len(xml_diff_counts) + 1
|
||||
if _snapshots_xml_changes_length > snapshots_xml_changes_length:
|
||||
snapshots_xml_changes_length = _snapshots_xml_changes_length
|
||||
|
||||
if len(domain_information.get("snapshots", list())) > 0:
|
||||
ainformation.append("")
|
||||
ainformation.append(
|
||||
"{purple}Snapshots:{end} {bold}{snapshots_name: <{snapshots_name_length}} {snapshots_age: <{snapshots_age_length}} {snapshots_xml_changes: <{snapshots_xml_changes_length}}{end}".format(
|
||||
purple=ansiprint.purple(),
|
||||
bold=ansiprint.bold(),
|
||||
end=ansiprint.end(),
|
||||
snapshots_name_length=snapshots_name_length,
|
||||
snapshots_age_length=snapshots_age_length,
|
||||
snapshots_xml_changes_length=snapshots_xml_changes_length,
|
||||
snapshots_name="Name",
|
||||
snapshots_age="Age",
|
||||
snapshots_xml_changes="XML Changes",
|
||||
)
|
||||
)
|
||||
|
||||
for snapshot in domain_information.get("snapshots", list()):
|
||||
xml_diff_plus = 0
|
||||
xml_diff_minus = 0
|
||||
for line in snapshot["xml_diff_lines"]:
|
||||
if re.match(r"^\+ ", line):
|
||||
xml_diff_plus += 1
|
||||
elif re.match(r"^- ", line):
|
||||
xml_diff_minus += 1
|
||||
xml_diff_counts = f"{ansiprint.green()}+{xml_diff_plus}{ansiprint.end()}/{ansiprint.red()}-{xml_diff_minus}{ansiprint.end()}"
|
||||
|
||||
ainformation.append(
|
||||
" {snapshots_name: <{snapshots_name_length}} {snapshots_age: <{snapshots_age_length}} {snapshots_xml_changes: <{snapshots_xml_changes_length}}{end}".format(
|
||||
snapshots_name_length=snapshots_name_length,
|
||||
snapshots_age_length=snapshots_age_length,
|
||||
snapshots_xml_changes_length=snapshots_xml_changes_length,
|
||||
snapshots_name=snapshot["name"],
|
||||
snapshots_age=snapshot["age"],
|
||||
snapshots_xml_changes=xml_diff_counts,
|
||||
end=ansiprint.end(),
|
||||
)
|
||||
)
|
||||
else:
|
||||
ainformation.append("")
|
||||
ainformation.append(
|
||||
"{purple}Snapshots:{end} N/A".format(
|
||||
purple=ansiprint.purple(),
|
||||
end=ansiprint.end(),
|
||||
)
|
||||
)
|
||||
|
||||
# Network list
|
||||
net_list = []
|
||||
cluster_net_list = call_api(config, "get", "/network").json()
|
||||
|
@ -1790,7 +2014,7 @@ def format_info(config, domain_information, long_output):
|
|||
)
|
||||
)
|
||||
|
||||
if long_output is True:
|
||||
if long_output:
|
||||
# Disk list
|
||||
ainformation.append("")
|
||||
name_length = 0
|
||||
|
@ -1926,6 +2150,7 @@ def format_list(config, vm_list):
|
|||
vm_name_length = 5
|
||||
vm_state_length = 6
|
||||
vm_tags_length = 5
|
||||
vm_snapshots_length = 10
|
||||
vm_nets_length = 9
|
||||
vm_ram_length = 8
|
||||
vm_vcpu_length = 6
|
||||
|
@ -1946,6 +2171,12 @@ def format_list(config, vm_list):
|
|||
_vm_tags_length = len(",".join(tag_list)) + 1
|
||||
if _vm_tags_length > vm_tags_length:
|
||||
vm_tags_length = _vm_tags_length
|
||||
# vm_snapshots column
|
||||
_vm_snapshots_length = (
|
||||
len(str(len(domain_information.get("snapshots", list())))) + 1
|
||||
)
|
||||
if _vm_snapshots_length > vm_snapshots_length:
|
||||
vm_snapshots_length = _vm_snapshots_length
|
||||
# vm_nets column
|
||||
_vm_nets_length = len(",".join(net_list)) + 1
|
||||
if _vm_nets_length > vm_nets_length:
|
||||
|
@ -1962,7 +2193,11 @@ def format_list(config, vm_list):
|
|||
# Format the string (header)
|
||||
vm_list_output.append(
|
||||
"{bold}{vm_header: <{vm_header_length}} {resource_header: <{resource_header_length}} {node_header: <{node_header_length}}{end_bold}".format(
|
||||
vm_header_length=vm_name_length + vm_state_length + vm_tags_length + 2,
|
||||
vm_header_length=vm_name_length
|
||||
+ vm_state_length
|
||||
+ vm_tags_length
|
||||
+ vm_snapshots_length
|
||||
+ 3,
|
||||
resource_header_length=vm_nets_length + vm_ram_length + vm_vcpu_length + 2,
|
||||
node_header_length=vm_node_length + vm_migrated_length + 1,
|
||||
bold=ansiprint.bold(),
|
||||
|
@ -1972,7 +2207,12 @@ def format_list(config, vm_list):
|
|||
[
|
||||
"-"
|
||||
for _ in range(
|
||||
4, vm_name_length + vm_state_length + vm_tags_length + 1
|
||||
4,
|
||||
vm_name_length
|
||||
+ vm_state_length
|
||||
+ vm_tags_length
|
||||
+ +vm_snapshots_length
|
||||
+ 2,
|
||||
)
|
||||
]
|
||||
),
|
||||
|
@ -1994,6 +2234,7 @@ def format_list(config, vm_list):
|
|||
"{bold}{vm_name: <{vm_name_length}} \
|
||||
{vm_state_colour}{vm_state: <{vm_state_length}}{end_colour} \
|
||||
{vm_tags: <{vm_tags_length}} \
|
||||
{vm_snapshots: <{vm_snapshots_length}} \
|
||||
{vm_networks: <{vm_nets_length}} \
|
||||
{vm_memory: <{vm_ram_length}} {vm_vcpu: <{vm_vcpu_length}} \
|
||||
{vm_node: <{vm_node_length}} \
|
||||
|
@ -2001,6 +2242,7 @@ def format_list(config, vm_list):
|
|||
vm_name_length=vm_name_length,
|
||||
vm_state_length=vm_state_length,
|
||||
vm_tags_length=vm_tags_length,
|
||||
vm_snapshots_length=vm_snapshots_length,
|
||||
vm_nets_length=vm_nets_length,
|
||||
vm_ram_length=vm_ram_length,
|
||||
vm_vcpu_length=vm_vcpu_length,
|
||||
|
@ -2013,6 +2255,7 @@ def format_list(config, vm_list):
|
|||
vm_name="Name",
|
||||
vm_state="State",
|
||||
vm_tags="Tags",
|
||||
vm_snapshots="Snapshots",
|
||||
vm_networks="Networks",
|
||||
vm_memory="RAM (M)",
|
||||
vm_vcpu="vCPUs",
|
||||
|
@ -2079,6 +2322,7 @@ def format_list(config, vm_list):
|
|||
"{bold}{vm_name: <{vm_name_length}} \
|
||||
{vm_state_colour}{vm_state: <{vm_state_length}}{end_colour} \
|
||||
{vm_tags: <{vm_tags_length}} \
|
||||
{vm_snapshots: <{vm_snapshots_length}} \
|
||||
{vm_networks: <{vm_nets_length}} \
|
||||
{vm_memory: <{vm_ram_length}} {vm_vcpu: <{vm_vcpu_length}} \
|
||||
{vm_node: <{vm_node_length}} \
|
||||
|
@ -2086,6 +2330,7 @@ def format_list(config, vm_list):
|
|||
vm_name_length=vm_name_length,
|
||||
vm_state_length=vm_state_length,
|
||||
vm_tags_length=vm_tags_length,
|
||||
vm_snapshots_length=vm_snapshots_length,
|
||||
vm_nets_length=vm_nets_length,
|
||||
vm_ram_length=vm_ram_length,
|
||||
vm_vcpu_length=vm_vcpu_length,
|
||||
|
@ -2098,6 +2343,7 @@ def format_list(config, vm_list):
|
|||
vm_name=domain_information["name"],
|
||||
vm_state=domain_information["state"],
|
||||
vm_tags=",".join(tag_list),
|
||||
vm_snapshots=len(domain_information.get("snapshots", list())),
|
||||
vm_networks=",".join(net_string_list),
|
||||
vm_memory=domain_information["memory"],
|
||||
vm_vcpu=domain_information["vcpu"],
|
||||
|
|
|
@ -2,7 +2,7 @@ from setuptools import setup
|
|||
|
||||
setup(
|
||||
name="pvc",
|
||||
version="0.9.89",
|
||||
version="0.9.100",
|
||||
packages=["pvc.cli", "pvc.lib"],
|
||||
install_requires=[
|
||||
"Click",
|
||||
|
|
|
@ -0,0 +1,695 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# autobackup.py - PVC API Autobackup functions
|
||||
# Part of the Parallel Virtual Cluster (PVC) system
|
||||
#
|
||||
# Copyright (C) 2018-2024 Joshua M. Boniface <joshua@boniface.me>
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, version 3.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
from datetime import datetime
|
||||
from json import load as jload
|
||||
from json import dump as jdump
|
||||
from os import popen, makedirs, path, scandir
|
||||
from shutil import rmtree
|
||||
from subprocess import run, PIPE
|
||||
|
||||
from daemon_lib.common import run_os_command
|
||||
from daemon_lib.config import get_autobackup_configuration
|
||||
from daemon_lib.celery import start, fail, log_info, log_err, update, finish
|
||||
|
||||
import daemon_lib.ceph as ceph
|
||||
import daemon_lib.vm as vm
|
||||
|
||||
|
||||
def send_execution_failure_report(
|
||||
celery_conf, config, recipients=None, total_time=0, error=None
|
||||
):
|
||||
if recipients is None:
|
||||
return
|
||||
|
||||
from email.utils import formatdate
|
||||
from socket import gethostname
|
||||
|
||||
log_message = f"Sending email failure report to {', '.join(recipients)}"
|
||||
log_info(celery_conf[0], log_message)
|
||||
update(
|
||||
celery_conf[0],
|
||||
log_message,
|
||||
current=celery_conf[1] + 1,
|
||||
total=celery_conf[2],
|
||||
)
|
||||
|
||||
current_datetime = datetime.now()
|
||||
email_datetime = formatdate(float(current_datetime.strftime("%s")))
|
||||
|
||||
email = list()
|
||||
email.append(f"Date: {email_datetime}")
|
||||
email.append(
|
||||
f"Subject: PVC Autobackup execution failure for cluster '{config['cluster']}'"
|
||||
)
|
||||
|
||||
email_to = list()
|
||||
for recipient in recipients:
|
||||
email_to.append(f"<{recipient}>")
|
||||
|
||||
email.append(f"To: {', '.join(email_to)}")
|
||||
email.append(f"From: PVC Autobackup System <pvc@{gethostname()}>")
|
||||
email.append("")
|
||||
|
||||
email.append(
|
||||
f"A PVC autobackup has FAILED at {current_datetime} in {total_time}s due to an execution error."
|
||||
)
|
||||
email.append("")
|
||||
email.append("The reported error message is:")
|
||||
email.append(f" {error}")
|
||||
|
||||
try:
|
||||
with popen("/usr/sbin/sendmail -t", "w") as p:
|
||||
p.write("\n".join(email))
|
||||
except Exception as e:
|
||||
log_err(f"Failed to send report email: {e}")
|
||||
|
||||
|
||||
def send_execution_summary_report(
|
||||
celery_conf, config, recipients=None, total_time=0, summary=dict()
|
||||
):
|
||||
if recipients is None:
|
||||
return
|
||||
|
||||
from email.utils import formatdate
|
||||
from socket import gethostname
|
||||
|
||||
log_message = f"Sending email summary report to {', '.join(recipients)}"
|
||||
log_info(celery_conf[0], log_message)
|
||||
update(
|
||||
celery_conf[0],
|
||||
log_message,
|
||||
current=celery_conf[1] + 1,
|
||||
total=celery_conf[2],
|
||||
)
|
||||
|
||||
current_datetime = datetime.now()
|
||||
email_datetime = formatdate(float(current_datetime.strftime("%s")))
|
||||
|
||||
email = list()
|
||||
email.append(f"Date: {email_datetime}")
|
||||
email.append(f"Subject: PVC Autobackup report for cluster '{config['cluster']}'")
|
||||
|
||||
email_to = list()
|
||||
for recipient in recipients:
|
||||
email_to.append(f"<{recipient}>")
|
||||
|
||||
email.append(f"To: {', '.join(email_to)}")
|
||||
email.append(f"From: PVC Autobackup System <pvc@{gethostname()}>")
|
||||
email.append("")
|
||||
|
||||
email.append(
|
||||
f"A PVC autobackup has been completed at {current_datetime} in {total_time}."
|
||||
)
|
||||
email.append("")
|
||||
email.append(
|
||||
"The following is a summary of all current VM backups after cleanups, most recent first:"
|
||||
)
|
||||
email.append("")
|
||||
|
||||
for vm_name in summary.keys():
|
||||
email.append(f"VM: {vm_name}:")
|
||||
for backup in summary[vm_name]:
|
||||
datestring = backup.get("datestring")
|
||||
backup_date = datetime.strptime(datestring, "%Y%m%d%H%M%S")
|
||||
if backup.get("result", False):
|
||||
email.append(
|
||||
f" {backup_date}: Success in {backup.get('runtime_secs', 0)} seconds, ID {backup.get('snapshot_name')}, type {backup.get('type', 'unknown')}"
|
||||
)
|
||||
email.append(
|
||||
f" Backup contains {len(backup.get('export_files'))} files totaling {ceph.format_bytes_tohuman(backup.get('export_size_bytes', 0))} ({backup.get('export_size_bytes', 0)} bytes)"
|
||||
)
|
||||
else:
|
||||
email.append(
|
||||
f" {backup_date}: Failure in {backup.get('runtime_secs', 0)} seconds, ID {backup.get('snapshot_name')}, type {backup.get('type', 'unknown')}"
|
||||
)
|
||||
email.append(f" {backup.get('result_message')}")
|
||||
|
||||
try:
|
||||
with popen("/usr/sbin/sendmail -t", "w") as p:
|
||||
p.write("\n".join(email))
|
||||
except Exception as e:
|
||||
log_err(f"Failed to send report email: {e}")
|
||||
|
||||
|
||||
def run_vm_backup(zkhandler, celery, config, vm_detail, force_full=False):
|
||||
vm_name = vm_detail["name"]
|
||||
dom_uuid = vm_detail["uuid"]
|
||||
backup_suffixed_path = f"{config['backup_root_path']}{config['backup_root_suffix']}"
|
||||
vm_backup_path = f"{backup_suffixed_path}/{vm_name}"
|
||||
autobackup_state_file = f"{vm_backup_path}/.autobackup.json"
|
||||
full_interval = config["backup_schedule"]["full_interval"]
|
||||
full_retention = config["backup_schedule"]["full_retention"]
|
||||
|
||||
if not path.exists(vm_backup_path) or not path.exists(autobackup_state_file):
|
||||
# There are no existing backups so the list is empty
|
||||
state_data = dict()
|
||||
tracked_backups = list()
|
||||
else:
|
||||
with open(autobackup_state_file) as fh:
|
||||
state_data = jload(fh)
|
||||
tracked_backups = state_data["tracked_backups"]
|
||||
|
||||
full_backups = [b for b in tracked_backups if b["type"] == "full"]
|
||||
if len(full_backups) > 0:
|
||||
last_full_backup = full_backups[0]
|
||||
last_full_backup_idx = tracked_backups.index(last_full_backup)
|
||||
if force_full:
|
||||
this_backup_incremental_parent = None
|
||||
this_backup_retain_snapshot = True
|
||||
elif last_full_backup_idx >= full_interval - 1:
|
||||
this_backup_incremental_parent = None
|
||||
this_backup_retain_snapshot = True
|
||||
else:
|
||||
this_backup_incremental_parent = last_full_backup["snapshot_name"]
|
||||
this_backup_retain_snapshot = False
|
||||
else:
|
||||
# The very first ackup must be full to start the tree
|
||||
this_backup_incremental_parent = None
|
||||
this_backup_retain_snapshot = True
|
||||
|
||||
export_type = (
|
||||
"incremental" if this_backup_incremental_parent is not None else "full"
|
||||
)
|
||||
|
||||
now = datetime.now()
|
||||
datestring = now.strftime("%Y%m%d%H%M%S")
|
||||
snapshot_name = f"ab{datestring}"
|
||||
|
||||
# Take the VM snapshot (vm.vm_worker_create_snapshot)
|
||||
snap_list = list()
|
||||
|
||||
failure = False
|
||||
export_files = None
|
||||
export_files_size = 0
|
||||
|
||||
def update_tracked_backups():
|
||||
# Read export file to get details
|
||||
backup_json_file = (
|
||||
f"{backup_suffixed_path}/{vm_name}/{snapshot_name}/snapshot.json"
|
||||
)
|
||||
try:
|
||||
with open(backup_json_file) as fh:
|
||||
backup_json = jload(fh)
|
||||
tracked_backups.insert(0, backup_json)
|
||||
except Exception as e:
|
||||
log_err(celery, f"Could not open export JSON: {e}")
|
||||
return list()
|
||||
|
||||
state_data["tracked_backups"] = tracked_backups
|
||||
with open(autobackup_state_file, "w") as fh:
|
||||
jdump(state_data, fh)
|
||||
|
||||
return tracked_backups
|
||||
|
||||
def write_backup_summary(success=False, message=""):
|
||||
ttotal = (datetime.now() - now).total_seconds()
|
||||
export_details = {
|
||||
"type": export_type,
|
||||
"result": success,
|
||||
"message": message,
|
||||
"datestring": datestring,
|
||||
"runtime_secs": ttotal,
|
||||
"snapshot_name": snapshot_name,
|
||||
"incremental_parent": this_backup_incremental_parent,
|
||||
"vm_detail": vm_detail,
|
||||
"export_files": export_files,
|
||||
"export_size_bytes": export_files_size,
|
||||
}
|
||||
try:
|
||||
with open(
|
||||
f"{backup_suffixed_path}/{vm_name}/{snapshot_name}/snapshot.json",
|
||||
"w",
|
||||
) as fh:
|
||||
jdump(export_details, fh)
|
||||
except Exception as e:
|
||||
log_err(celery, f"Error exporting snapshot details: {e}")
|
||||
return False, e
|
||||
|
||||
return True, ""
|
||||
|
||||
def cleanup_failure():
|
||||
for snapshot in snap_list:
|
||||
rbd, snapshot_name = snapshot.split("@")
|
||||
pool, volume = rbd.split("/")
|
||||
# We capture no output here, because if this fails too we're in a deep
|
||||
# error chain and will just ignore it
|
||||
ceph.remove_snapshot(zkhandler, pool, volume, snapshot_name)
|
||||
|
||||
rbd_list = zkhandler.read(("domain.storage.volumes", dom_uuid)).split(",")
|
||||
|
||||
for rbd in rbd_list:
|
||||
pool, volume = rbd.split("/")
|
||||
ret, msg = ceph.add_snapshot(
|
||||
zkhandler, pool, volume, snapshot_name, zk_only=False
|
||||
)
|
||||
if not ret:
|
||||
cleanup_failure()
|
||||
error_message = msg.replace("ERROR: ", "")
|
||||
log_err(celery, error_message)
|
||||
failure = True
|
||||
break
|
||||
else:
|
||||
snap_list.append(f"{pool}/{volume}@{snapshot_name}")
|
||||
|
||||
if failure:
|
||||
error_message = (f"[{vm_name}] Error in snapshot export, skipping",)
|
||||
write_backup_summary(message=error_message)
|
||||
tracked_backups = update_tracked_backups()
|
||||
return tracked_backups
|
||||
|
||||
# Get the current domain XML
|
||||
vm_config = zkhandler.read(("domain.xml", dom_uuid))
|
||||
|
||||
# Add the snapshot entry to Zookeeper
|
||||
ret = zkhandler.write(
|
||||
[
|
||||
(
|
||||
(
|
||||
"domain.snapshots",
|
||||
dom_uuid,
|
||||
"domain_snapshot.name",
|
||||
snapshot_name,
|
||||
),
|
||||
snapshot_name,
|
||||
),
|
||||
(
|
||||
(
|
||||
"domain.snapshots",
|
||||
dom_uuid,
|
||||
"domain_snapshot.timestamp",
|
||||
snapshot_name,
|
||||
),
|
||||
now.strftime("%s"),
|
||||
),
|
||||
(
|
||||
(
|
||||
"domain.snapshots",
|
||||
dom_uuid,
|
||||
"domain_snapshot.xml",
|
||||
snapshot_name,
|
||||
),
|
||||
vm_config,
|
||||
),
|
||||
(
|
||||
(
|
||||
"domain.snapshots",
|
||||
dom_uuid,
|
||||
"domain_snapshot.rbd_snapshots",
|
||||
snapshot_name,
|
||||
),
|
||||
",".join(snap_list),
|
||||
),
|
||||
]
|
||||
)
|
||||
if not ret:
|
||||
error_message = (f"[{vm_name}] Error in snapshot export, skipping",)
|
||||
log_err(celery, error_message)
|
||||
write_backup_summary(message=error_message)
|
||||
tracked_backups = update_tracked_backups()
|
||||
return tracked_backups
|
||||
|
||||
# Export the snapshot (vm.vm_worker_export_snapshot)
|
||||
export_target_path = f"{backup_suffixed_path}/{vm_name}/{snapshot_name}/images"
|
||||
|
||||
try:
|
||||
makedirs(export_target_path)
|
||||
except Exception as e:
|
||||
error_message = (
|
||||
f"[{vm_name}] Failed to create target directory '{export_target_path}': {e}",
|
||||
)
|
||||
log_err(celery, error_message)
|
||||
return tracked_backups
|
||||
|
||||
def export_cleanup():
|
||||
from shutil import rmtree
|
||||
|
||||
rmtree(f"{backup_suffixed_path}/{vm_name}/{snapshot_name}")
|
||||
|
||||
# Set the export filetype
|
||||
if this_backup_incremental_parent is not None:
|
||||
export_fileext = "rbddiff"
|
||||
else:
|
||||
export_fileext = "rbdimg"
|
||||
|
||||
snapshot_volumes = list()
|
||||
for rbdsnap in snap_list:
|
||||
pool, _volume = rbdsnap.split("/")
|
||||
volume, name = _volume.split("@")
|
||||
ret, snapshots = ceph.get_list_snapshot(
|
||||
zkhandler, pool, volume, limit=name, is_fuzzy=False
|
||||
)
|
||||
if ret:
|
||||
snapshot_volumes += snapshots
|
||||
|
||||
export_files = list()
|
||||
for snapshot_volume in snapshot_volumes:
|
||||
snap_pool = snapshot_volume["pool"]
|
||||
snap_volume = snapshot_volume["volume"]
|
||||
snap_snapshot_name = snapshot_volume["snapshot"]
|
||||
snap_size = snapshot_volume["stats"]["size"]
|
||||
|
||||
if this_backup_incremental_parent is not None:
|
||||
retcode, stdout, stderr = run_os_command(
|
||||
f"rbd export-diff --from-snap {this_backup_incremental_parent} {snap_pool}/{snap_volume}@{snap_snapshot_name} {export_target_path}/{snap_pool}.{snap_volume}.{export_fileext}"
|
||||
)
|
||||
if retcode:
|
||||
error_message = (
|
||||
f"[{vm_name}] Failed to export snapshot for volume(s) '{snap_pool}/{snap_volume}'",
|
||||
)
|
||||
failure = True
|
||||
break
|
||||
else:
|
||||
export_files.append(
|
||||
(
|
||||
f"images/{snap_pool}.{snap_volume}.{export_fileext}",
|
||||
snap_size,
|
||||
)
|
||||
)
|
||||
else:
|
||||
retcode, stdout, stderr = run_os_command(
|
||||
f"rbd export --export-format 2 {snap_pool}/{snap_volume}@{snap_snapshot_name} {export_target_path}/{snap_pool}.{snap_volume}.{export_fileext}"
|
||||
)
|
||||
if retcode:
|
||||
error_message = (
|
||||
f"[{vm_name}] Failed to export snapshot for volume(s) '{snap_pool}/{snap_volume}'",
|
||||
)
|
||||
failure = True
|
||||
break
|
||||
else:
|
||||
export_files.append(
|
||||
(
|
||||
f"images/{snap_pool}.{snap_volume}.{export_fileext}",
|
||||
snap_size,
|
||||
)
|
||||
)
|
||||
|
||||
if failure:
|
||||
log_err(celery, error_message)
|
||||
write_backup_summary(message=error_message)
|
||||
tracked_backups = update_tracked_backups()
|
||||
return tracked_backups
|
||||
|
||||
def get_dir_size(pathname):
|
||||
total = 0
|
||||
with scandir(pathname) as it:
|
||||
for entry in it:
|
||||
if entry.is_file():
|
||||
total += entry.stat().st_size
|
||||
elif entry.is_dir():
|
||||
total += get_dir_size(entry.path)
|
||||
return total
|
||||
|
||||
export_files_size = get_dir_size(export_target_path)
|
||||
|
||||
ret, e = write_backup_summary(success=True)
|
||||
if not ret:
|
||||
error_message = (f"[{vm_name}] Failed to export configuration snapshot: {e}",)
|
||||
log_err(celery, error_message)
|
||||
write_backup_summary(message=error_message)
|
||||
tracked_backups = update_tracked_backups()
|
||||
return tracked_backups
|
||||
|
||||
# Clean up the snapshot (vm.vm_worker_remove_snapshot)
|
||||
if not this_backup_retain_snapshot:
|
||||
for snap in snap_list:
|
||||
rbd, name = snap.split("@")
|
||||
pool, volume = rbd.split("/")
|
||||
ret, msg = ceph.remove_snapshot(zkhandler, pool, volume, name)
|
||||
if not ret:
|
||||
error_message = msg.replace("ERROR: ", f"[{vm_name}] ")
|
||||
failure = True
|
||||
break
|
||||
|
||||
if failure:
|
||||
log_err(celery, error_message)
|
||||
write_backup_summary(message=error_message)
|
||||
tracked_backups = update_tracked_backups()
|
||||
return tracked_backups
|
||||
|
||||
ret = zkhandler.delete(
|
||||
("domain.snapshots", dom_uuid, "domain_snapshot.name", snapshot_name)
|
||||
)
|
||||
if not ret:
|
||||
error_message = (f"[{vm_name}] Failed to remove VM snapshot; continuing",)
|
||||
log_err(celery, error_message)
|
||||
|
||||
marked_for_deletion = list()
|
||||
# Find any full backups that are expired
|
||||
found_full_count = 0
|
||||
for backup in tracked_backups:
|
||||
if backup["type"] == "full":
|
||||
found_full_count += 1
|
||||
if found_full_count > full_retention:
|
||||
marked_for_deletion.append(backup)
|
||||
# Find any incremental backups that depend on marked parents
|
||||
for backup in tracked_backups:
|
||||
if backup["type"] == "incremental" and backup["incremental_parent"] in [
|
||||
b["snapshot_name"] for b in marked_for_deletion
|
||||
]:
|
||||
marked_for_deletion.append(backup)
|
||||
|
||||
if len(marked_for_deletion) > 0:
|
||||
for backup_to_delete in marked_for_deletion:
|
||||
ret = vm.vm_worker_remove_snapshot(
|
||||
zkhandler, None, vm_name, backup_to_delete["snapshot_name"]
|
||||
)
|
||||
if ret is False:
|
||||
error_message = f"Failed to remove obsolete backup snapshot '{backup_to_delete['snapshot_name']}', leaving in tracked backups"
|
||||
log_err(celery, error_message)
|
||||
else:
|
||||
rmtree(f"{vm_backup_path}/{backup_to_delete['snapshot_name']}")
|
||||
tracked_backups.remove(backup_to_delete)
|
||||
|
||||
tracked_backups = update_tracked_backups()
|
||||
return tracked_backups
|
||||
|
||||
|
||||
def worker_cluster_autobackup(
|
||||
zkhandler, celery, force_full=False, email_recipients=None
|
||||
):
|
||||
config = get_autobackup_configuration()
|
||||
|
||||
backup_summary = dict()
|
||||
|
||||
current_stage = 0
|
||||
total_stages = 1
|
||||
if email_recipients is not None:
|
||||
total_stages += 1
|
||||
|
||||
start(
|
||||
celery,
|
||||
f"Starting cluster '{config['cluster']}' VM autobackup",
|
||||
current=current_stage,
|
||||
total=total_stages,
|
||||
)
|
||||
|
||||
if not config["autobackup_enabled"]:
|
||||
message = "Autobackups are not configured on this cluster."
|
||||
log_info(celery, message)
|
||||
return finish(
|
||||
celery,
|
||||
message,
|
||||
current=total_stages,
|
||||
total=total_stages,
|
||||
)
|
||||
|
||||
autobackup_start_time = datetime.now()
|
||||
|
||||
retcode, vm_list = vm.get_list(zkhandler)
|
||||
if not retcode:
|
||||
error_message = f"Failed to fetch VM list: {vm_list}"
|
||||
log_err(celery, error_message)
|
||||
send_execution_failure_report(
|
||||
(celery, current_stage, total_stages),
|
||||
config,
|
||||
recipients=email_recipients,
|
||||
error=error_message,
|
||||
)
|
||||
fail(celery, error_message)
|
||||
return False
|
||||
|
||||
backup_suffixed_path = f"{config['backup_root_path']}{config['backup_root_suffix']}"
|
||||
if not path.exists(backup_suffixed_path):
|
||||
makedirs(backup_suffixed_path)
|
||||
|
||||
full_interval = config["backup_schedule"]["full_interval"]
|
||||
|
||||
backup_vms = list()
|
||||
for vm_detail in vm_list:
|
||||
vm_tag_names = [t["name"] for t in vm_detail["tags"]]
|
||||
matching_tags = (
|
||||
True
|
||||
if len(set(vm_tag_names).intersection(set(config["backup_tags"]))) > 0
|
||||
else False
|
||||
)
|
||||
if matching_tags:
|
||||
backup_vms.append(vm_detail)
|
||||
|
||||
if len(backup_vms) < 1:
|
||||
message = "Found no VMs tagged for autobackup."
|
||||
log_info(celery, message)
|
||||
return finish(
|
||||
celery,
|
||||
message,
|
||||
current=total_stages,
|
||||
total=total_stages,
|
||||
)
|
||||
|
||||
if config["auto_mount_enabled"]:
|
||||
total_stages += len(config["mount_cmds"])
|
||||
total_stages += len(config["unmount_cmds"])
|
||||
|
||||
total_stages += len(backup_vms)
|
||||
|
||||
log_info(
|
||||
celery,
|
||||
f"Found {len(backup_vms)} suitable VM(s) for autobackup: {', '.join([b['name'] for b in backup_vms])}",
|
||||
)
|
||||
|
||||
# Handle automount mount commands
|
||||
if config["auto_mount_enabled"]:
|
||||
for cmd in config["mount_cmds"]:
|
||||
current_stage += 1
|
||||
update(
|
||||
celery,
|
||||
f"Executing mount command '{cmd.split()[0]}'",
|
||||
current=current_stage,
|
||||
total=total_stages,
|
||||
)
|
||||
|
||||
ret = run(
|
||||
cmd.split(),
|
||||
stdout=PIPE,
|
||||
stderr=PIPE,
|
||||
)
|
||||
|
||||
if ret.returncode != 0:
|
||||
error_message = f"Failed to execute mount command '{cmd.split()[0]}': {ret.stderr.decode().strip()}"
|
||||
log_err(celery, error_message)
|
||||
send_execution_failure_report(
|
||||
(celery, current_stage, total_stages),
|
||||
config,
|
||||
recipients=email_recipients,
|
||||
total_time=datetime.now() - autobackup_start_time,
|
||||
error=error_message,
|
||||
)
|
||||
fail(celery, error_message)
|
||||
return False
|
||||
|
||||
# Execute the backup: take a snapshot, then export the snapshot
|
||||
for vm_detail in backup_vms:
|
||||
vm_backup_path = f"{backup_suffixed_path}/{vm_detail['name']}"
|
||||
autobackup_state_file = f"{vm_backup_path}/.autobackup.json"
|
||||
if not path.exists(vm_backup_path) or not path.exists(autobackup_state_file):
|
||||
# There are no existing backups so the list is empty
|
||||
state_data = dict()
|
||||
tracked_backups = list()
|
||||
else:
|
||||
with open(autobackup_state_file) as fh:
|
||||
state_data = jload(fh)
|
||||
tracked_backups = state_data["tracked_backups"]
|
||||
|
||||
full_backups = [b for b in tracked_backups if b["type"] == "full"]
|
||||
if len(full_backups) > 0:
|
||||
last_full_backup = full_backups[0]
|
||||
last_full_backup_idx = tracked_backups.index(last_full_backup)
|
||||
if force_full:
|
||||
this_backup_incremental_parent = None
|
||||
elif last_full_backup_idx >= full_interval - 1:
|
||||
this_backup_incremental_parent = None
|
||||
else:
|
||||
this_backup_incremental_parent = last_full_backup["snapshot_name"]
|
||||
else:
|
||||
# The very first ackup must be full to start the tree
|
||||
this_backup_incremental_parent = None
|
||||
|
||||
export_type = (
|
||||
"incremental" if this_backup_incremental_parent is not None else "full"
|
||||
)
|
||||
|
||||
current_stage += 1
|
||||
update(
|
||||
celery,
|
||||
f"Performing autobackup of VM {vm_detail['name']} ({export_type})",
|
||||
current=current_stage,
|
||||
total=total_stages,
|
||||
)
|
||||
|
||||
summary = run_vm_backup(
|
||||
zkhandler,
|
||||
celery,
|
||||
config,
|
||||
vm_detail,
|
||||
force_full=force_full,
|
||||
)
|
||||
backup_summary[vm_detail["name"]] = summary
|
||||
|
||||
# Handle automount unmount commands
|
||||
if config["auto_mount_enabled"]:
|
||||
for cmd in config["unmount_cmds"]:
|
||||
current_stage += 1
|
||||
update(
|
||||
celery,
|
||||
f"Executing unmount command '{cmd.split()[0]}'",
|
||||
current=current_stage,
|
||||
total=total_stages,
|
||||
)
|
||||
|
||||
ret = run(
|
||||
cmd.split(),
|
||||
stdout=PIPE,
|
||||
stderr=PIPE,
|
||||
)
|
||||
|
||||
if ret.returncode != 0:
|
||||
error_message = f"Failed to execute unmount command '{cmd.split()[0]}': {ret.stderr.decode().strip()}"
|
||||
log_err(celery, error_message)
|
||||
send_execution_failure_report(
|
||||
(celery, current_stage, total_stages),
|
||||
config,
|
||||
recipients=email_recipients,
|
||||
total_time=datetime.now() - autobackup_start_time,
|
||||
error=error_message,
|
||||
)
|
||||
fail(celery, error_message)
|
||||
return False
|
||||
|
||||
autobackup_end_time = datetime.now()
|
||||
autobackup_total_time = autobackup_end_time - autobackup_start_time
|
||||
|
||||
if email_recipients is not None:
|
||||
send_execution_summary_report(
|
||||
(celery, current_stage, total_stages),
|
||||
config,
|
||||
recipients=email_recipients,
|
||||
total_time=autobackup_total_time,
|
||||
summary=backup_summary,
|
||||
)
|
||||
current_stage += 1
|
||||
|
||||
current_stage += 1
|
||||
return finish(
|
||||
celery,
|
||||
f"Successfully completed cluster '{config['cluster']}' VM autobackup",
|
||||
current=current_stage,
|
||||
total=total_stages,
|
||||
)
|
|
@ -19,31 +19,34 @@
|
|||
#
|
||||
###############################################################################
|
||||
|
||||
import os
|
||||
import psutil
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
import subprocess
|
||||
|
||||
from datetime import datetime
|
||||
from json import loads, dumps
|
||||
from time import sleep
|
||||
|
||||
from daemon_lib.celery import start, fail, log_info, update, finish
|
||||
|
||||
import daemon_lib.common as pvc_common
|
||||
import daemon_lib.ceph as pvc_ceph
|
||||
|
||||
|
||||
# Define the current test format
|
||||
TEST_FORMAT = 1
|
||||
TEST_FORMAT = 2
|
||||
|
||||
|
||||
# We run a total of 8 tests, to give a generalized idea of performance on the cluster:
|
||||
# 1. A sequential read test of 8GB with a 4M block size
|
||||
# 2. A sequential write test of 8GB with a 4M block size
|
||||
# 3. A random read test of 8GB with a 4M block size
|
||||
# 4. A random write test of 8GB with a 4M block size
|
||||
# 5. A random read test of 8GB with a 256k block size
|
||||
# 6. A random write test of 8GB with a 256k block size
|
||||
# 7. A random read test of 8GB with a 4k block size
|
||||
# 8. A random write test of 8GB with a 4k block size
|
||||
# 1. A sequential read test of 64GB with a 4M block size
|
||||
# 2. A sequential write test of 64GB with a 4M block size
|
||||
# 3. A random read test of 64GB with a 4M block size
|
||||
# 4. A random write test of 64GB with a 4M block size
|
||||
# 5. A random read test of 64GB with a 256k block size
|
||||
# 6. A random write test of 64GB with a 256k block size
|
||||
# 7. A random read test of 64GB with a 4k block size
|
||||
# 8. A random write test of 64GB with a 4k block size
|
||||
# Taken together, these 8 results should give a very good indication of the overall storage performance
|
||||
# for a variety of workloads.
|
||||
test_matrix = {
|
||||
|
@ -100,7 +103,7 @@ test_matrix = {
|
|||
|
||||
# Specify the benchmark volume name and size
|
||||
benchmark_volume_name = "pvcbenchmark"
|
||||
benchmark_volume_size = "8G"
|
||||
benchmark_volume_size = "64G"
|
||||
|
||||
|
||||
#
|
||||
|
@ -115,12 +118,13 @@ class BenchmarkError(Exception):
|
|||
#
|
||||
|
||||
|
||||
def cleanup(job_name, db_conn=None, db_cur=None, zkhandler=None):
|
||||
def cleanup(job_name, db_conn=None, db_cur=None, zkhandler=None, final=False):
|
||||
if db_conn is not None and db_cur is not None:
|
||||
# Clean up our dangling result
|
||||
query = "DELETE FROM storage_benchmarks WHERE job = %s;"
|
||||
args = (job_name,)
|
||||
db_cur.execute(query, args)
|
||||
if not final:
|
||||
# Clean up our dangling result (non-final runs only)
|
||||
query = "DELETE FROM storage_benchmarks WHERE job = %s;"
|
||||
args = (job_name,)
|
||||
db_cur.execute(query, args)
|
||||
db_conn.commit()
|
||||
# Close the database connections cleanly
|
||||
close_database(db_conn, db_cur)
|
||||
|
@ -225,7 +229,7 @@ def cleanup_benchmark_volume(
|
|||
|
||||
|
||||
def run_benchmark_job(
|
||||
test, pool, job_name=None, db_conn=None, db_cur=None, zkhandler=None
|
||||
config, test, pool, job_name=None, db_conn=None, db_cur=None, zkhandler=None
|
||||
):
|
||||
test_spec = test_matrix[test]
|
||||
log_info(None, f"Running test '{test}'")
|
||||
|
@ -255,31 +259,165 @@ def run_benchmark_job(
|
|||
)
|
||||
|
||||
log_info(None, "Running fio job: {}".format(" ".join(fio_cmd.split())))
|
||||
retcode, stdout, stderr = pvc_common.run_os_command(fio_cmd)
|
||||
|
||||
# Run the fio command manually instead of using our run_os_command wrapper
|
||||
# This will help us gather statistics about this node while it's running
|
||||
process = subprocess.Popen(
|
||||
fio_cmd.split(),
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Wait 15 seconds for the test to start
|
||||
log_info(None, "Waiting 15 seconds for test resource stabilization")
|
||||
sleep(15)
|
||||
|
||||
# Set up function to get process CPU utilization by name
|
||||
def get_cpu_utilization_by_name(process_name):
|
||||
cpu_usage = 0
|
||||
for proc in psutil.process_iter(["name", "cpu_percent"]):
|
||||
if proc.info["name"] == process_name:
|
||||
cpu_usage += proc.info["cpu_percent"]
|
||||
return cpu_usage
|
||||
|
||||
# Set up function to get process memory utilization by name
|
||||
def get_memory_utilization_by_name(process_name):
|
||||
memory_usage = 0
|
||||
for proc in psutil.process_iter(["name", "memory_percent"]):
|
||||
if proc.info["name"] == process_name:
|
||||
memory_usage += proc.info["memory_percent"]
|
||||
return memory_usage
|
||||
|
||||
# Set up function to get network traffic utilization in bps
|
||||
def get_network_traffic_bps(interface, duration=1):
|
||||
# Get initial network counters
|
||||
net_io_start = psutil.net_io_counters(pernic=True)
|
||||
if interface not in net_io_start:
|
||||
return None, None
|
||||
|
||||
stats_start = net_io_start[interface]
|
||||
bytes_sent_start = stats_start.bytes_sent
|
||||
bytes_recv_start = stats_start.bytes_recv
|
||||
|
||||
# Wait for the specified duration
|
||||
sleep(duration)
|
||||
|
||||
# Get final network counters
|
||||
net_io_end = psutil.net_io_counters(pernic=True)
|
||||
stats_end = net_io_end[interface]
|
||||
bytes_sent_end = stats_end.bytes_sent
|
||||
bytes_recv_end = stats_end.bytes_recv
|
||||
|
||||
# Calculate bytes per second
|
||||
bytes_sent_per_sec = (bytes_sent_end - bytes_sent_start) / duration
|
||||
bytes_recv_per_sec = (bytes_recv_end - bytes_recv_start) / duration
|
||||
|
||||
# Convert to bits per second (bps)
|
||||
bits_sent_per_sec = bytes_sent_per_sec * 8
|
||||
bits_recv_per_sec = bytes_recv_per_sec * 8
|
||||
bits_total_per_sec = bits_sent_per_sec + bits_recv_per_sec
|
||||
|
||||
return bits_sent_per_sec, bits_recv_per_sec, bits_total_per_sec
|
||||
|
||||
log_info(None, f"Starting system resource polling for test '{test}'")
|
||||
storage_interface = config["storage_dev"]
|
||||
total_cpus = psutil.cpu_count(logical=True)
|
||||
ticks = 1
|
||||
osd_cpu_utilization = 0
|
||||
osd_memory_utilization = 0
|
||||
mon_cpu_utilization = 0
|
||||
mon_memory_utilization = 0
|
||||
total_cpu_utilization = 0
|
||||
total_memory_utilization = 0
|
||||
storage_sent_bps = 0
|
||||
storage_recv_bps = 0
|
||||
storage_total_bps = 0
|
||||
|
||||
while process.poll() is None:
|
||||
# Do collection of statistics like network bandwidth and cpu utilization
|
||||
current_osd_cpu_utilization = get_cpu_utilization_by_name("ceph-osd")
|
||||
current_osd_memory_utilization = get_memory_utilization_by_name("ceph-osd")
|
||||
current_mon_cpu_utilization = get_cpu_utilization_by_name("ceph-mon")
|
||||
current_mon_memory_utilization = get_memory_utilization_by_name("ceph-mon")
|
||||
current_total_cpu_utilization = psutil.cpu_percent(interval=1)
|
||||
current_total_memory_utilization = psutil.virtual_memory().percent
|
||||
(
|
||||
current_storage_sent_bps,
|
||||
current_storage_recv_bps,
|
||||
current_storage_total_bps,
|
||||
) = get_network_traffic_bps(storage_interface)
|
||||
# Recheck if the process is done yet; if it's not, we add the values and increase the ticks
|
||||
# This helps ensure that if the process finishes earlier than the longer polls above,
|
||||
# this particular tick isn't counted which can skew the average
|
||||
if process.poll() is None:
|
||||
osd_cpu_utilization += current_osd_cpu_utilization
|
||||
osd_memory_utilization += current_osd_memory_utilization
|
||||
mon_cpu_utilization += current_mon_cpu_utilization
|
||||
mon_memory_utilization += current_mon_memory_utilization
|
||||
total_cpu_utilization += current_total_cpu_utilization
|
||||
total_memory_utilization += current_total_memory_utilization
|
||||
storage_sent_bps += current_storage_sent_bps
|
||||
storage_recv_bps += current_storage_recv_bps
|
||||
storage_total_bps += current_storage_total_bps
|
||||
ticks += 1
|
||||
|
||||
# Get the 1-minute load average and CPU utilization, which covers the test duration
|
||||
load1, _, _ = os.getloadavg()
|
||||
load1 = round(load1, 2)
|
||||
|
||||
# Calculate the average CPU utilization values over the runtime
|
||||
# Divide the OSD and MON CPU utilization by the total number of CPU cores, because
|
||||
# the total is divided this way
|
||||
avg_osd_cpu_utilization = round(osd_cpu_utilization / ticks / total_cpus, 2)
|
||||
avg_osd_memory_utilization = round(osd_memory_utilization / ticks, 2)
|
||||
avg_mon_cpu_utilization = round(mon_cpu_utilization / ticks / total_cpus, 2)
|
||||
avg_mon_memory_utilization = round(mon_memory_utilization / ticks, 2)
|
||||
avg_total_cpu_utilization = round(total_cpu_utilization / ticks, 2)
|
||||
avg_total_memory_utilization = round(total_memory_utilization / ticks, 2)
|
||||
avg_storage_sent_bps = round(storage_sent_bps / ticks, 2)
|
||||
avg_storage_recv_bps = round(storage_recv_bps / ticks, 2)
|
||||
avg_storage_total_bps = round(storage_total_bps / ticks, 2)
|
||||
|
||||
stdout, stderr = process.communicate()
|
||||
retcode = process.returncode
|
||||
|
||||
resource_data = {
|
||||
"avg_cpu_util_percent": {
|
||||
"total": avg_total_cpu_utilization,
|
||||
"ceph-mon": avg_mon_cpu_utilization,
|
||||
"ceph-osd": avg_osd_cpu_utilization,
|
||||
},
|
||||
"avg_memory_util_percent": {
|
||||
"total": avg_total_memory_utilization,
|
||||
"ceph-mon": avg_mon_memory_utilization,
|
||||
"ceph-osd": avg_osd_memory_utilization,
|
||||
},
|
||||
"avg_network_util_bps": {
|
||||
"sent": avg_storage_sent_bps,
|
||||
"recv": avg_storage_recv_bps,
|
||||
"total": avg_storage_total_bps,
|
||||
},
|
||||
}
|
||||
|
||||
try:
|
||||
jstdout = loads(stdout)
|
||||
if retcode:
|
||||
raise
|
||||
except Exception:
|
||||
cleanup(
|
||||
job_name,
|
||||
db_conn=db_conn,
|
||||
db_cur=db_cur,
|
||||
zkhandler=zkhandler,
|
||||
)
|
||||
fail(
|
||||
None,
|
||||
f"Failed to run fio test '{test}': {stderr}",
|
||||
)
|
||||
return None, None
|
||||
|
||||
return jstdout
|
||||
return resource_data, jstdout
|
||||
|
||||
|
||||
def worker_run_benchmark(zkhandler, celery, config, pool):
|
||||
def worker_run_benchmark(zkhandler, celery, config, pool, name):
|
||||
# Phase 0 - connect to databases
|
||||
cur_time = datetime.now().isoformat(timespec="seconds")
|
||||
cur_primary = zkhandler.read("base.config.primary_node")
|
||||
job_name = f"{cur_time}_{cur_primary}"
|
||||
if not name:
|
||||
cur_time = datetime.now().isoformat(timespec="seconds")
|
||||
cur_primary = zkhandler.read("base.config.primary_node")
|
||||
job_name = f"{cur_time}_{cur_primary}"
|
||||
else:
|
||||
job_name = name
|
||||
|
||||
current_stage = 0
|
||||
total_stages = 13
|
||||
|
@ -357,7 +495,8 @@ def worker_run_benchmark(zkhandler, celery, config, pool):
|
|||
total=total_stages,
|
||||
)
|
||||
|
||||
results[test] = run_benchmark_job(
|
||||
resource_data, fio_data = run_benchmark_job(
|
||||
config,
|
||||
test,
|
||||
pool,
|
||||
job_name=job_name,
|
||||
|
@ -365,6 +504,25 @@ def worker_run_benchmark(zkhandler, celery, config, pool):
|
|||
db_cur=db_cur,
|
||||
zkhandler=zkhandler,
|
||||
)
|
||||
if resource_data is None or fio_data is None:
|
||||
cleanup_benchmark_volume(
|
||||
pool,
|
||||
job_name=job_name,
|
||||
db_conn=db_conn,
|
||||
db_cur=db_cur,
|
||||
zkhandler=zkhandler,
|
||||
)
|
||||
cleanup(
|
||||
job_name,
|
||||
db_conn=db_conn,
|
||||
db_cur=db_cur,
|
||||
zkhandler=zkhandler,
|
||||
)
|
||||
fail(
|
||||
None,
|
||||
f"Failed to run fio test '{test}'",
|
||||
)
|
||||
results[test] = {**resource_data, **fio_data}
|
||||
|
||||
# Phase 3 - cleanup
|
||||
current_stage += 1
|
||||
|
@ -410,6 +568,7 @@ def worker_run_benchmark(zkhandler, celery, config, pool):
|
|||
db_conn=db_conn,
|
||||
db_cur=db_cur,
|
||||
zkhandler=zkhandler,
|
||||
final=True,
|
||||
)
|
||||
|
||||
current_stage += 1
|
||||
|
|
|
@ -320,7 +320,11 @@ def get_list_osd(zkhandler, limit=None, is_fuzzy=True):
|
|||
#
|
||||
def getPoolInformation(zkhandler, pool):
|
||||
# Parse the stats data
|
||||
(pool_stats_raw, tier, pgs,) = zkhandler.read_many(
|
||||
(
|
||||
pool_stats_raw,
|
||||
tier,
|
||||
pgs,
|
||||
) = zkhandler.read_many(
|
||||
[
|
||||
("pool.stats", pool),
|
||||
("pool.tier", pool),
|
||||
|
@ -536,7 +540,10 @@ def getCephVolumes(zkhandler, pool):
|
|||
pool_list = [pool]
|
||||
|
||||
for pool_name in pool_list:
|
||||
for volume_name in zkhandler.children(("volume", pool_name)):
|
||||
children = zkhandler.children(("volume", pool_name))
|
||||
if children is None:
|
||||
continue
|
||||
for volume_name in children:
|
||||
volume_list.append("{}/{}".format(pool_name, volume_name))
|
||||
|
||||
return volume_list
|
||||
|
@ -553,7 +560,21 @@ def getVolumeInformation(zkhandler, pool, volume):
|
|||
return volume_information
|
||||
|
||||
|
||||
def add_volume(zkhandler, pool, name, size):
|
||||
def scan_volume(zkhandler, pool, name):
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
"rbd info --format json {}/{}".format(pool, name)
|
||||
)
|
||||
volstats = stdout
|
||||
|
||||
# 3. Add the new volume to Zookeeper
|
||||
zkhandler.write(
|
||||
[
|
||||
(("volume.stats", f"{pool}/{name}"), volstats),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def add_volume(zkhandler, pool, name, size, force_flag=False, zk_only=False):
|
||||
# 1. Verify the size of the volume
|
||||
pool_information = getPoolInformation(zkhandler, pool)
|
||||
size_bytes = format_bytes_fromhuman(size)
|
||||
|
@ -563,46 +584,88 @@ def add_volume(zkhandler, pool, name, size):
|
|||
f"ERROR: Requested volume size '{size}' does not have a valid SI unit",
|
||||
)
|
||||
|
||||
if size_bytes >= int(pool_information["stats"]["free_bytes"]):
|
||||
pool_total_free_bytes = int(pool_information["stats"]["free_bytes"])
|
||||
if size_bytes >= pool_total_free_bytes:
|
||||
return (
|
||||
False,
|
||||
f"ERROR: Requested volume size '{format_bytes_tohuman(size_bytes)}' is greater than the available free space in the pool ('{format_bytes_tohuman(pool_information['stats']['free_bytes'])}')",
|
||||
)
|
||||
|
||||
# 2. Create the volume
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
"rbd create --size {}B {}/{}".format(size_bytes, pool, name)
|
||||
# Check if we're greater than 80% utilization after the create; error if so unless we have the force flag
|
||||
pool_total_bytes = (
|
||||
int(pool_information["stats"]["used_bytes"]) + pool_total_free_bytes
|
||||
)
|
||||
if retcode:
|
||||
return False, 'ERROR: Failed to create RBD volume "{}": {}'.format(name, stderr)
|
||||
pool_safe_total_bytes = int(pool_total_bytes * 0.80)
|
||||
pool_safe_free_bytes = pool_safe_total_bytes - int(
|
||||
pool_information["stats"]["used_bytes"]
|
||||
)
|
||||
if size_bytes >= pool_safe_free_bytes and not force_flag:
|
||||
return (
|
||||
False,
|
||||
f"ERROR: Requested volume size '{format_bytes_tohuman(size_bytes)}' is greater than the safe free space in the pool ('{format_bytes_tohuman(pool_safe_free_bytes)}' for 80% full); retry with force to ignore this error",
|
||||
)
|
||||
|
||||
# 2. Get volume stats
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
"rbd info --format json {}/{}".format(pool, name)
|
||||
)
|
||||
volstats = stdout
|
||||
# 2. Create the volume
|
||||
# zk_only flag skips actually creating the volume - this would be done by some other mechanism
|
||||
if not zk_only:
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
"rbd create --size {}B {}/{}".format(size_bytes, pool, name)
|
||||
)
|
||||
if retcode:
|
||||
return False, 'ERROR: Failed to create RBD volume "{}": {}'.format(
|
||||
name, stderr
|
||||
)
|
||||
|
||||
# 3. Add the new volume to Zookeeper
|
||||
zkhandler.write(
|
||||
[
|
||||
(("volume", f"{pool}/{name}"), ""),
|
||||
(("volume.stats", f"{pool}/{name}"), volstats),
|
||||
(("volume.stats", f"{pool}/{name}"), ""),
|
||||
(("snapshot", f"{pool}/{name}"), ""),
|
||||
]
|
||||
)
|
||||
|
||||
# 4. Scan the volume stats
|
||||
scan_volume(zkhandler, pool, name)
|
||||
|
||||
return True, 'Created RBD volume "{}" of size "{}" in pool "{}".'.format(
|
||||
name, format_bytes_tohuman(size_bytes), pool
|
||||
)
|
||||
|
||||
|
||||
def clone_volume(zkhandler, pool, name_src, name_new):
|
||||
def clone_volume(zkhandler, pool, name_src, name_new, force_flag=False):
|
||||
# 1. Verify the volume
|
||||
if not verifyVolume(zkhandler, pool, name_src):
|
||||
return False, 'ERROR: No volume with name "{}" is present in pool "{}".'.format(
|
||||
name_src, pool
|
||||
)
|
||||
|
||||
# 1. Clone the volume
|
||||
volume_stats_raw = zkhandler.read(("volume.stats", f"{pool}/{name_src}"))
|
||||
volume_stats = dict(json.loads(volume_stats_raw))
|
||||
size_bytes = volume_stats["size"]
|
||||
pool_information = getPoolInformation(zkhandler, pool)
|
||||
pool_total_free_bytes = int(pool_information["stats"]["free_bytes"])
|
||||
if size_bytes >= pool_total_free_bytes:
|
||||
return (
|
||||
False,
|
||||
f"ERROR: Clone volume size '{format_bytes_tohuman(size_bytes)}' is greater than the available free space in the pool ('{format_bytes_tohuman(pool_information['stats']['free_bytes'])}')",
|
||||
)
|
||||
|
||||
# Check if we're greater than 80% utilization after the create; error if so unless we have the force flag
|
||||
pool_total_bytes = (
|
||||
int(pool_information["stats"]["used_bytes"]) + pool_total_free_bytes
|
||||
)
|
||||
pool_safe_total_bytes = int(pool_total_bytes * 0.80)
|
||||
pool_safe_free_bytes = pool_safe_total_bytes - int(
|
||||
pool_information["stats"]["used_bytes"]
|
||||
)
|
||||
if size_bytes >= pool_safe_free_bytes and not force_flag:
|
||||
return (
|
||||
False,
|
||||
f"ERROR: Clone volume size '{format_bytes_tohuman(size_bytes)}' is greater than the safe free space in the pool ('{format_bytes_tohuman(pool_safe_free_bytes)}' for 80% full); retry with force to ignore this error",
|
||||
)
|
||||
|
||||
# 2. Clone the volume
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
"rbd copy {}/{} {}/{}".format(pool, name_src, pool, name_new)
|
||||
)
|
||||
|
@ -614,27 +677,24 @@ def clone_volume(zkhandler, pool, name_src, name_new):
|
|||
),
|
||||
)
|
||||
|
||||
# 2. Get volume stats
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
"rbd info --format json {}/{}".format(pool, name_new)
|
||||
)
|
||||
volstats = stdout
|
||||
|
||||
# 3. Add the new volume to Zookeeper
|
||||
zkhandler.write(
|
||||
[
|
||||
(("volume", f"{pool}/{name_new}"), ""),
|
||||
(("volume.stats", f"{pool}/{name_new}"), volstats),
|
||||
(("volume.stats", f"{pool}/{name_new}"), ""),
|
||||
(("snapshot", f"{pool}/{name_new}"), ""),
|
||||
]
|
||||
)
|
||||
|
||||
# 4. Scan the volume stats
|
||||
scan_volume(zkhandler, pool, name_new)
|
||||
|
||||
return True, 'Cloned RBD volume "{}" to "{}" in pool "{}"'.format(
|
||||
name_src, name_new, pool
|
||||
)
|
||||
|
||||
|
||||
def resize_volume(zkhandler, pool, name, size):
|
||||
def resize_volume(zkhandler, pool, name, size, force_flag=False):
|
||||
if not verifyVolume(zkhandler, pool, name):
|
||||
return False, 'ERROR: No volume with name "{}" is present in pool "{}".'.format(
|
||||
name, pool
|
||||
|
@ -649,12 +709,27 @@ def resize_volume(zkhandler, pool, name, size):
|
|||
f"ERROR: Requested volume size '{size}' does not have a valid SI unit",
|
||||
)
|
||||
|
||||
if size_bytes >= int(pool_information["stats"]["free_bytes"]):
|
||||
pool_total_free_bytes = int(pool_information["stats"]["free_bytes"])
|
||||
if size_bytes >= pool_total_free_bytes:
|
||||
return (
|
||||
False,
|
||||
f"ERROR: Requested volume size '{format_bytes_tohuman(size_bytes)}' is greater than the available free space in the pool ('{format_bytes_tohuman(pool_information['stats']['free_bytes'])}')",
|
||||
)
|
||||
|
||||
# Check if we're greater than 80% utilization after the create; error if so unless we have the force flag
|
||||
pool_total_bytes = (
|
||||
int(pool_information["stats"]["used_bytes"]) + pool_total_free_bytes
|
||||
)
|
||||
pool_safe_total_bytes = int(pool_total_bytes * 0.80)
|
||||
pool_safe_free_bytes = pool_safe_total_bytes - int(
|
||||
pool_information["stats"]["used_bytes"]
|
||||
)
|
||||
if size_bytes >= pool_safe_free_bytes and not force_flag:
|
||||
return (
|
||||
False,
|
||||
f"ERROR: Requested volume size '{format_bytes_tohuman(size_bytes)}' is greater than the safe free space in the pool ('{format_bytes_tohuman(pool_safe_free_bytes)}' for 80% full); retry with force to ignore this error",
|
||||
)
|
||||
|
||||
# 2. Resize the volume
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
"rbd resize --size {} {}/{}".format(
|
||||
|
@ -698,20 +773,8 @@ def resize_volume(zkhandler, pool, name, size):
|
|||
except Exception:
|
||||
pass
|
||||
|
||||
# 4. Get volume stats
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
"rbd info --format json {}/{}".format(pool, name)
|
||||
)
|
||||
volstats = stdout
|
||||
|
||||
# 5. Update the volume in Zookeeper
|
||||
zkhandler.write(
|
||||
[
|
||||
(("volume", f"{pool}/{name}"), ""),
|
||||
(("volume.stats", f"{pool}/{name}"), volstats),
|
||||
(("snapshot", f"{pool}/{name}"), ""),
|
||||
]
|
||||
)
|
||||
# 4. Scan the volume stats
|
||||
scan_volume(zkhandler, pool, name)
|
||||
|
||||
return True, 'Resized RBD volume "{}" to size "{}" in pool "{}".'.format(
|
||||
name, format_bytes_tohuman(size_bytes), pool
|
||||
|
@ -744,18 +807,8 @@ def rename_volume(zkhandler, pool, name, new_name):
|
|||
]
|
||||
)
|
||||
|
||||
# 3. Get volume stats
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
"rbd info --format json {}/{}".format(pool, new_name)
|
||||
)
|
||||
volstats = stdout
|
||||
|
||||
# 4. Update the volume stats in Zookeeper
|
||||
zkhandler.write(
|
||||
[
|
||||
(("volume.stats", f"{pool}/{new_name}"), volstats),
|
||||
]
|
||||
)
|
||||
# 3. Scan the volume stats
|
||||
scan_volume(zkhandler, pool, new_name)
|
||||
|
||||
return True, 'Renamed RBD volume "{}" to "{}" in pool "{}".'.format(
|
||||
name, new_name, pool
|
||||
|
@ -768,10 +821,22 @@ def remove_volume(zkhandler, pool, name):
|
|||
name, pool
|
||||
)
|
||||
|
||||
# 1. Remove volume snapshots
|
||||
# 1a. Remove PVC-managed volume snapshots
|
||||
for snapshot in zkhandler.children(("snapshot", f"{pool}/{name}")):
|
||||
remove_snapshot(zkhandler, pool, name, snapshot)
|
||||
|
||||
# 1b. Purge any remaining volume snapshots
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
"rbd snap purge {}/{}".format(pool, name)
|
||||
)
|
||||
if retcode:
|
||||
return (
|
||||
False,
|
||||
'ERROR: Failed to purge snapshots from RBD volume "{}" in pool "{}": {}'.format(
|
||||
name, pool, stderr
|
||||
),
|
||||
)
|
||||
|
||||
# 2. Remove the volume
|
||||
retcode, stdout, stderr = common.run_os_command("rbd rm {}/{}".format(pool, name))
|
||||
if retcode:
|
||||
|
@ -940,23 +1005,27 @@ def add_snapshot(zkhandler, pool, volume, name, zk_only=False):
|
|||
),
|
||||
)
|
||||
|
||||
# 2. Add the snapshot to Zookeeper
|
||||
# 2. Get snapshot stats
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
"rbd info --format json {}/{}@{}".format(pool, volume, name)
|
||||
)
|
||||
snapstats = stdout
|
||||
|
||||
# 3. Add the snapshot to Zookeeper
|
||||
zkhandler.write(
|
||||
[
|
||||
(("snapshot", f"{pool}/{volume}/{name}"), ""),
|
||||
(("snapshot.stats", f"{pool}/{volume}/{name}"), "{}"),
|
||||
(("snapshot.stats", f"{pool}/{volume}/{name}"), snapstats),
|
||||
]
|
||||
)
|
||||
|
||||
# 3. Update the count of snapshots on this volume
|
||||
# 4. Update the count of snapshots on this volume
|
||||
volume_stats_raw = zkhandler.read(("volume.stats", f"{pool}/{volume}"))
|
||||
volume_stats = dict(json.loads(volume_stats_raw))
|
||||
# Format the size to something nicer
|
||||
volume_stats["snapshot_count"] = volume_stats["snapshot_count"] + 1
|
||||
volume_stats_raw = json.dumps(volume_stats)
|
||||
zkhandler.write(
|
||||
[
|
||||
(("volume.stats", f"{pool}/{volume}"), volume_stats_raw),
|
||||
(("volume.stats", f"{pool}/{volume}"), json.dumps(volume_stats)),
|
||||
]
|
||||
)
|
||||
|
||||
|
@ -1010,6 +1079,36 @@ def rename_snapshot(zkhandler, pool, volume, name, new_name):
|
|||
)
|
||||
|
||||
|
||||
def rollback_snapshot(zkhandler, pool, volume, name):
|
||||
if not verifyVolume(zkhandler, pool, volume):
|
||||
return False, 'ERROR: No volume with name "{}" is present in pool "{}".'.format(
|
||||
volume, pool
|
||||
)
|
||||
if not verifySnapshot(zkhandler, pool, volume, name):
|
||||
return (
|
||||
False,
|
||||
'ERROR: No snapshot with name "{}" is present for volume "{}" in pool "{}".'.format(
|
||||
name, volume, pool
|
||||
),
|
||||
)
|
||||
|
||||
# 1. Roll back the snapshot
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
"rbd snap rollback {}/{}@{}".format(pool, volume, name)
|
||||
)
|
||||
if retcode:
|
||||
return (
|
||||
False,
|
||||
'ERROR: Failed to roll back RBD volume "{}" in pool "{}" to snapshot "{}": {}'.format(
|
||||
volume, pool, name, stderr
|
||||
),
|
||||
)
|
||||
|
||||
return True, 'Rolled back RBD volume "{}" in pool "{}" to snapshot "{}".'.format(
|
||||
volume, pool, name
|
||||
)
|
||||
|
||||
|
||||
def remove_snapshot(zkhandler, pool, volume, name):
|
||||
if not verifyVolume(zkhandler, pool, volume):
|
||||
return False, 'ERROR: No volume with name "{}" is present in pool "{}".'.format(
|
||||
|
@ -1051,20 +1150,9 @@ def remove_snapshot(zkhandler, pool, volume, name):
|
|||
)
|
||||
|
||||
|
||||
def get_list_snapshot(zkhandler, pool, volume, limit=None, is_fuzzy=True):
|
||||
def get_list_snapshot(zkhandler, target_pool, target_volume, limit=None, is_fuzzy=True):
|
||||
snapshot_list = []
|
||||
if pool and not verifyPool(zkhandler, pool):
|
||||
return False, 'ERROR: No pool with name "{}" is present in the cluster.'.format(
|
||||
pool
|
||||
)
|
||||
|
||||
if volume and not verifyPool(zkhandler, volume):
|
||||
return (
|
||||
False,
|
||||
'ERROR: No volume with name "{}" is present in the cluster.'.format(volume),
|
||||
)
|
||||
|
||||
full_snapshot_list = getCephSnapshots(zkhandler, pool, volume)
|
||||
full_snapshot_list = getCephSnapshots(zkhandler, target_pool, target_volume)
|
||||
|
||||
if is_fuzzy and limit:
|
||||
# Implicitly assume fuzzy limits
|
||||
|
@ -1076,6 +1164,15 @@ def get_list_snapshot(zkhandler, pool, volume, limit=None, is_fuzzy=True):
|
|||
for snapshot in full_snapshot_list:
|
||||
volume, snapshot_name = snapshot.split("@")
|
||||
pool_name, volume_name = volume.split("/")
|
||||
if target_pool and pool_name != target_pool:
|
||||
continue
|
||||
if target_volume and volume_name != target_volume:
|
||||
continue
|
||||
snapshot_stats = json.loads(
|
||||
zkhandler.read(
|
||||
("snapshot.stats", f"{pool_name}/{volume_name}/{snapshot_name}")
|
||||
)
|
||||
)
|
||||
if limit:
|
||||
try:
|
||||
if re.fullmatch(limit, snapshot_name):
|
||||
|
@ -1084,13 +1181,19 @@ def get_list_snapshot(zkhandler, pool, volume, limit=None, is_fuzzy=True):
|
|||
"pool": pool_name,
|
||||
"volume": volume_name,
|
||||
"snapshot": snapshot_name,
|
||||
"stats": snapshot_stats,
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
return False, "Regex Error: {}".format(e)
|
||||
else:
|
||||
snapshot_list.append(
|
||||
{"pool": pool_name, "volume": volume_name, "snapshot": snapshot_name}
|
||||
{
|
||||
"pool": pool_name,
|
||||
"volume": volume_name,
|
||||
"snapshot": snapshot_name,
|
||||
"stats": snapshot_stats,
|
||||
}
|
||||
)
|
||||
|
||||
return True, sorted(snapshot_list, key=lambda x: str(x["snapshot"]))
|
||||
|
|
|
@ -262,6 +262,22 @@ def getClusterInformation(zkhandler):
|
|||
# Get cluster maintenance state
|
||||
maintenance_state = zkhandler.read("base.config.maintenance")
|
||||
|
||||
# Prepare cluster total values
|
||||
cluster_total_node_memory = 0
|
||||
cluster_total_used_memory = 0
|
||||
cluster_total_free_memory = 0
|
||||
cluster_total_allocated_memory = 0
|
||||
cluster_total_provisioned_memory = 0
|
||||
cluster_total_average_memory_utilization = 0
|
||||
cluster_total_cpu_cores = 0
|
||||
cluster_total_cpu_load = 0
|
||||
cluster_total_average_cpu_utilization = 0
|
||||
cluster_total_allocated_cores = 0
|
||||
cluster_total_osd_space = 0
|
||||
cluster_total_used_space = 0
|
||||
cluster_total_free_space = 0
|
||||
cluster_total_average_osd_utilization = 0
|
||||
|
||||
# Get primary node
|
||||
maintenance_state, primary_node = zkhandler.read_many(
|
||||
[
|
||||
|
@ -276,19 +292,36 @@ def getClusterInformation(zkhandler):
|
|||
# Get the list of Nodes
|
||||
node_list = zkhandler.children("base.node")
|
||||
node_count = len(node_list)
|
||||
# Get the daemon and domain states of all Nodes
|
||||
# Get the information of all Nodes
|
||||
node_state_reads = list()
|
||||
node_memory_reads = list()
|
||||
node_cpu_reads = list()
|
||||
for node in node_list:
|
||||
node_state_reads += [
|
||||
("node.state.daemon", node),
|
||||
("node.state.domain", node),
|
||||
]
|
||||
node_memory_reads += [
|
||||
("node.memory.total", node),
|
||||
("node.memory.used", node),
|
||||
("node.memory.free", node),
|
||||
("node.memory.allocated", node),
|
||||
("node.memory.provisioned", node),
|
||||
]
|
||||
node_cpu_reads += [
|
||||
("node.data.static", node),
|
||||
("node.vcpu.allocated", node),
|
||||
("node.cpu.load", node),
|
||||
]
|
||||
all_node_states = zkhandler.read_many(node_state_reads)
|
||||
all_node_memory = zkhandler.read_many(node_memory_reads)
|
||||
all_node_cpu = zkhandler.read_many(node_cpu_reads)
|
||||
|
||||
# Parse out the Node states
|
||||
node_data = list()
|
||||
formatted_node_states = {"total": node_count}
|
||||
for nidx, node in enumerate(node_list):
|
||||
# Split the large list of return values by the IDX of this node
|
||||
# Split the large list of return values by the IDX of this node (states)
|
||||
# Each node result is 2 fields long
|
||||
pos_start = nidx * 2
|
||||
pos_end = nidx * 2 + 2
|
||||
|
@ -308,6 +341,46 @@ def getClusterInformation(zkhandler):
|
|||
else:
|
||||
formatted_node_states[node_state] = 1
|
||||
|
||||
# Split the large list of return values by the IDX of this node (memory)
|
||||
# Each node result is 5 fields long
|
||||
pos_start = nidx * 5
|
||||
pos_end = nidx * 5 + 5
|
||||
(
|
||||
node_memory_total,
|
||||
node_memory_used,
|
||||
node_memory_free,
|
||||
node_memory_allocated,
|
||||
node_memory_provisioned,
|
||||
) = tuple(all_node_memory[pos_start:pos_end])
|
||||
cluster_total_node_memory += int(node_memory_total)
|
||||
cluster_total_used_memory += int(node_memory_used)
|
||||
cluster_total_free_memory += int(node_memory_free)
|
||||
cluster_total_allocated_memory += int(node_memory_allocated)
|
||||
cluster_total_provisioned_memory += int(node_memory_provisioned)
|
||||
|
||||
# Split the large list of return values by the IDX of this node (cpu)
|
||||
# Each nod result is 3 fields long
|
||||
pos_start = nidx * 3
|
||||
pos_end = nidx * 3 + 3
|
||||
node_static_data, node_vcpu_allocated, node_cpu_load = tuple(
|
||||
all_node_cpu[pos_start:pos_end]
|
||||
)
|
||||
cluster_total_cpu_cores += int(node_static_data.split()[0])
|
||||
cluster_total_cpu_load += round(float(node_cpu_load), 2)
|
||||
cluster_total_allocated_cores += int(node_vcpu_allocated)
|
||||
|
||||
cluster_total_average_memory_utilization = (
|
||||
(round((cluster_total_used_memory / cluster_total_node_memory) * 100, 2))
|
||||
if cluster_total_node_memory > 0
|
||||
else 0.00
|
||||
)
|
||||
|
||||
cluster_total_average_cpu_utilization = (
|
||||
(round((cluster_total_cpu_load / cluster_total_cpu_cores) * 100, 2))
|
||||
if cluster_total_cpu_cores > 0
|
||||
else 0.00
|
||||
)
|
||||
|
||||
# Get the list of VMs
|
||||
vm_list = zkhandler.children("base.domain")
|
||||
vm_count = len(vm_list)
|
||||
|
@ -380,6 +453,18 @@ def getClusterInformation(zkhandler):
|
|||
else:
|
||||
formatted_osd_states[osd_state] = 1
|
||||
|
||||
# Add the OSD utilization
|
||||
cluster_total_osd_space += int(osd_stats["kb"])
|
||||
cluster_total_used_space += int(osd_stats["kb_used"])
|
||||
cluster_total_free_space += int(osd_stats["kb_avail"])
|
||||
cluster_total_average_osd_utilization += float(osd_stats["utilization"])
|
||||
|
||||
cluster_total_average_osd_utilization = (
|
||||
(round(cluster_total_average_osd_utilization / len(ceph_osd_list), 2))
|
||||
if ceph_osd_list
|
||||
else 0.00
|
||||
)
|
||||
|
||||
# Get the list of Networks
|
||||
network_list = zkhandler.children("base.network")
|
||||
network_count = len(network_list)
|
||||
|
@ -424,6 +509,28 @@ def getClusterInformation(zkhandler):
|
|||
"pools": ceph_pool_count,
|
||||
"volumes": ceph_volume_count,
|
||||
"snapshots": ceph_snapshot_count,
|
||||
"resources": {
|
||||
"memory": {
|
||||
"total": cluster_total_node_memory,
|
||||
"free": cluster_total_free_memory,
|
||||
"used": cluster_total_used_memory,
|
||||
"allocated": cluster_total_allocated_memory,
|
||||
"provisioned": cluster_total_provisioned_memory,
|
||||
"utilization": cluster_total_average_memory_utilization,
|
||||
},
|
||||
"cpu": {
|
||||
"total": cluster_total_cpu_cores,
|
||||
"load": cluster_total_cpu_load,
|
||||
"allocated": cluster_total_allocated_cores,
|
||||
"utilization": cluster_total_average_cpu_utilization,
|
||||
},
|
||||
"disk": {
|
||||
"total": cluster_total_osd_space,
|
||||
"used": cluster_total_used_space,
|
||||
"free": cluster_total_free_space,
|
||||
"utilization": cluster_total_average_osd_utilization,
|
||||
},
|
||||
},
|
||||
"detail": {
|
||||
"node": node_data,
|
||||
"vm": vm_data,
|
||||
|
@ -1051,6 +1158,8 @@ def get_resource_metrics(zkhandler):
|
|||
"restart": 6,
|
||||
"stop": 7,
|
||||
"fail": 8,
|
||||
"import": 9,
|
||||
"restore": 10,
|
||||
}
|
||||
state = vm["state"]
|
||||
output_lines.append(
|
||||
|
@ -1201,7 +1310,7 @@ def get_resource_metrics(zkhandler):
|
|||
try:
|
||||
user_time = vm["vcpu_stats"]["user_time"] / 1000000
|
||||
except Exception:
|
||||
cpu_time = 0
|
||||
user_time = 0
|
||||
output_lines.append(
|
||||
f"pvc_vm_vcpus_user_time{{vm=\"{vm['name']}\"}} {user_time}"
|
||||
)
|
||||
|
@ -1230,7 +1339,7 @@ def get_resource_metrics(zkhandler):
|
|||
)
|
||||
output_lines.append("# TYPE pvc_vm_memory_stats_actual gauge")
|
||||
for vm in vm_data:
|
||||
actual_memory = vm["memory_stats"]["actual"]
|
||||
actual_memory = vm["memory_stats"].get("actual", 0)
|
||||
output_lines.append(
|
||||
f"pvc_vm_memory_stats_actual{{vm=\"{vm['name']}\"}} {actual_memory}"
|
||||
)
|
||||
|
@ -1238,7 +1347,7 @@ def get_resource_metrics(zkhandler):
|
|||
output_lines.append("# HELP pvc_vm_memory_stats_rss PVC VM RSS memory KB")
|
||||
output_lines.append("# TYPE pvc_vm_memory_stats_rss gauge")
|
||||
for vm in vm_data:
|
||||
rss_memory = vm["memory_stats"]["rss"]
|
||||
rss_memory = vm["memory_stats"].get("rss", 0)
|
||||
output_lines.append(
|
||||
f"pvc_vm_memory_stats_rss{{vm=\"{vm['name']}\"}} {rss_memory}"
|
||||
)
|
||||
|
|
|
@ -26,8 +26,10 @@ import subprocess
|
|||
import signal
|
||||
from json import loads
|
||||
from re import match as re_match
|
||||
from re import search as re_search
|
||||
from re import split as re_split
|
||||
from re import sub as re_sub
|
||||
from difflib import unified_diff
|
||||
from distutils.util import strtobool
|
||||
from threading import Thread
|
||||
from shlex import split as shlex_split
|
||||
|
@ -81,6 +83,8 @@ vm_state_combinations = [
|
|||
"migrate",
|
||||
"unmigrate",
|
||||
"provision",
|
||||
"import",
|
||||
"restore",
|
||||
]
|
||||
ceph_osd_state_combinations = [
|
||||
"up,in",
|
||||
|
@ -427,6 +431,96 @@ def getDomainTags(zkhandler, dom_uuid):
|
|||
return tags
|
||||
|
||||
|
||||
#
|
||||
# Get a list of domain snapshots
|
||||
#
|
||||
def getDomainSnapshots(zkhandler, dom_uuid):
|
||||
"""
|
||||
Get a list of snapshots for domain dom_uuid
|
||||
|
||||
The UUID must be validated before calling this function!
|
||||
"""
|
||||
snapshots = list()
|
||||
|
||||
all_snapshots = zkhandler.children(("domain.snapshots", dom_uuid))
|
||||
|
||||
current_timestamp = time.time()
|
||||
current_dom_xml = zkhandler.read(("domain.xml", dom_uuid))
|
||||
|
||||
snapshots = list()
|
||||
for snapshot in all_snapshots:
|
||||
(
|
||||
snap_name,
|
||||
snap_timestamp,
|
||||
_snap_rbd_snapshots,
|
||||
snap_dom_xml,
|
||||
) = zkhandler.read_many(
|
||||
[
|
||||
("domain.snapshots", dom_uuid, "domain_snapshot.name", snapshot),
|
||||
("domain.snapshots", dom_uuid, "domain_snapshot.timestamp", snapshot),
|
||||
(
|
||||
"domain.snapshots",
|
||||
dom_uuid,
|
||||
"domain_snapshot.rbd_snapshots",
|
||||
snapshot,
|
||||
),
|
||||
("domain.snapshots", dom_uuid, "domain_snapshot.xml", snapshot),
|
||||
]
|
||||
)
|
||||
|
||||
snap_rbd_snapshots = _snap_rbd_snapshots.split(",")
|
||||
|
||||
snap_dom_xml_diff = list(
|
||||
unified_diff(
|
||||
current_dom_xml.split("\n"),
|
||||
snap_dom_xml.split("\n"),
|
||||
fromfile="current",
|
||||
tofile="snapshot",
|
||||
fromfiledate="",
|
||||
tofiledate="",
|
||||
n=1,
|
||||
lineterm="",
|
||||
)
|
||||
)
|
||||
|
||||
_snap_timestamp = float(snap_timestamp)
|
||||
snap_age_secs = int(current_timestamp) - int(_snap_timestamp)
|
||||
snap_age = f"{snap_age_secs} seconds"
|
||||
snap_age_minutes = int(snap_age_secs / 60)
|
||||
if snap_age_minutes > 0:
|
||||
if snap_age_minutes > 1:
|
||||
s = "s"
|
||||
else:
|
||||
s = ""
|
||||
snap_age = f"{snap_age_minutes} minute{s}"
|
||||
snap_age_hours = int(snap_age_secs / 3600)
|
||||
if snap_age_hours > 0:
|
||||
if snap_age_hours > 1:
|
||||
s = "s"
|
||||
else:
|
||||
s = ""
|
||||
snap_age = f"{snap_age_hours} hour{s}"
|
||||
snap_age_days = int(snap_age_secs / 86400)
|
||||
if snap_age_days > 0:
|
||||
if snap_age_days > 1:
|
||||
s = "s"
|
||||
else:
|
||||
s = ""
|
||||
snap_age = f"{snap_age_days} day{s}"
|
||||
|
||||
snapshots.append(
|
||||
{
|
||||
"name": snap_name,
|
||||
"timestamp": snap_timestamp,
|
||||
"age": snap_age,
|
||||
"xml_diff_lines": snap_dom_xml_diff,
|
||||
"rbd_snapshots": snap_rbd_snapshots,
|
||||
}
|
||||
)
|
||||
|
||||
return sorted(snapshots, key=lambda s: s["timestamp"], reverse=True)
|
||||
|
||||
|
||||
#
|
||||
# Get a set of domain metadata
|
||||
#
|
||||
|
@ -441,12 +535,14 @@ def getDomainMetadata(zkhandler, dom_uuid):
|
|||
domain_node_selector,
|
||||
domain_node_autostart,
|
||||
domain_migration_method,
|
||||
domain_migration_max_downtime,
|
||||
) = zkhandler.read_many(
|
||||
[
|
||||
("domain.meta.node_limit", dom_uuid),
|
||||
("domain.meta.node_selector", dom_uuid),
|
||||
("domain.meta.autostart", dom_uuid),
|
||||
("domain.meta.migrate_method", dom_uuid),
|
||||
("domain.meta.migrate_max_downtime", dom_uuid),
|
||||
]
|
||||
)
|
||||
|
||||
|
@ -464,11 +560,15 @@ def getDomainMetadata(zkhandler, dom_uuid):
|
|||
if not domain_migration_method or domain_migration_method == "none":
|
||||
domain_migration_method = None
|
||||
|
||||
if not domain_migration_max_downtime or domain_migration_max_downtime == "none":
|
||||
domain_migration_max_downtime = 300
|
||||
|
||||
return (
|
||||
domain_node_limit,
|
||||
domain_node_selector,
|
||||
domain_node_autostart,
|
||||
domain_migration_method,
|
||||
domain_migration_max_downtime,
|
||||
)
|
||||
|
||||
|
||||
|
@ -505,9 +605,11 @@ def getInformationFromXML(zkhandler, uuid):
|
|||
domain_node_selector,
|
||||
domain_node_autostart,
|
||||
domain_migration_method,
|
||||
domain_migration_max_downtime,
|
||||
) = getDomainMetadata(zkhandler, uuid)
|
||||
|
||||
domain_tags = getDomainTags(zkhandler, uuid)
|
||||
domain_snapshots = getDomainSnapshots(zkhandler, uuid)
|
||||
|
||||
if domain_vnc:
|
||||
domain_vnc_listen, domain_vnc_port = domain_vnc.split(":")
|
||||
|
@ -565,7 +667,9 @@ def getInformationFromXML(zkhandler, uuid):
|
|||
"node_selector": domain_node_selector,
|
||||
"node_autostart": bool(strtobool(domain_node_autostart)),
|
||||
"migration_method": domain_migration_method,
|
||||
"migration_max_downtime": int(domain_migration_max_downtime),
|
||||
"tags": domain_tags,
|
||||
"snapshots": domain_snapshots,
|
||||
"description": domain_description,
|
||||
"profile": domain_profile,
|
||||
"memory": int(domain_memory),
|
||||
|
@ -970,7 +1074,7 @@ def sortInterfaceNames(interface_names):
|
|||
#
|
||||
# Parse a "detect" device into a real block device name
|
||||
#
|
||||
def get_detect_device(detect_string):
|
||||
def get_detect_device_lsscsi(detect_string):
|
||||
"""
|
||||
Parses a "detect:" string into a normalized block device path using lsscsi.
|
||||
|
||||
|
@ -1037,3 +1141,96 @@ def get_detect_device(detect_string):
|
|||
break
|
||||
|
||||
return blockdev
|
||||
|
||||
|
||||
def get_detect_device_nvme(detect_string):
|
||||
"""
|
||||
Parses a "detect:" string into a normalized block device path using nvme.
|
||||
|
||||
A detect string is formatted "detect:<NAME>:<SIZE>:<ID>", where
|
||||
NAME is some unique identifier in lsscsi, SIZE is a human-readable
|
||||
size value to within +/- 3% of the real size of the device, and
|
||||
ID is the Nth (0-indexed) matching entry of that NAME and SIZE.
|
||||
"""
|
||||
|
||||
unit_map = {
|
||||
"kB": 1000,
|
||||
"MB": 1000 * 1000,
|
||||
"GB": 1000 * 1000 * 1000,
|
||||
"TB": 1000 * 1000 * 1000 * 1000,
|
||||
"PB": 1000 * 1000 * 1000 * 1000 * 1000,
|
||||
"EB": 1000 * 1000 * 1000 * 1000 * 1000 * 1000,
|
||||
}
|
||||
|
||||
_, name, _size, idd = detect_string.split(":")
|
||||
if _ != "detect":
|
||||
return None
|
||||
|
||||
size_re = re_search(r"([\d.]+)([kKMGTP]B)", _size)
|
||||
size_val = float(size_re.group(1))
|
||||
size_unit = size_re.group(2)
|
||||
size_bytes = int(size_val * unit_map[size_unit])
|
||||
|
||||
retcode, stdout, stderr = run_os_command("nvme list --output-format json")
|
||||
if retcode:
|
||||
print(f"Failed to run nvme: {stderr}")
|
||||
return None
|
||||
|
||||
# Parse the output with json
|
||||
nvme_data = loads(stdout).get("Devices", list())
|
||||
|
||||
# Handle size determination (+/- 3%)
|
||||
size = None
|
||||
nvme_sizes = set()
|
||||
for entry in nvme_data:
|
||||
nvme_sizes.add(entry["PhysicalSize"])
|
||||
for l_size in nvme_sizes:
|
||||
plusthreepct = size_bytes * 1.03
|
||||
minusthreepct = size_bytes * 0.97
|
||||
|
||||
if l_size > minusthreepct and l_size < plusthreepct:
|
||||
size = l_size
|
||||
break
|
||||
if size is None:
|
||||
return None
|
||||
|
||||
blockdev = None
|
||||
matches = list()
|
||||
for entry in nvme_data:
|
||||
# Skip if name is not contained in the line (case-insensitive)
|
||||
if name.lower() not in entry["ModelNumber"].lower():
|
||||
continue
|
||||
# Skip if the size does not match
|
||||
if size != entry["PhysicalSize"]:
|
||||
continue
|
||||
# Get our blockdev and append to the list
|
||||
matches.append(entry["DevicePath"])
|
||||
|
||||
blockdev = None
|
||||
# Find the blockdev at index {idd}
|
||||
for idx, _blockdev in enumerate(matches):
|
||||
if int(idx) == int(idd):
|
||||
blockdev = _blockdev
|
||||
break
|
||||
|
||||
return blockdev
|
||||
|
||||
|
||||
def get_detect_device(detect_string):
|
||||
"""
|
||||
Parses a "detect:" string into a normalized block device path.
|
||||
|
||||
First tries to parse using "lsscsi" (get_detect_device_lsscsi). If this returns an invalid
|
||||
block device name, then try to parse using "nvme" (get_detect_device_nvme). This works around
|
||||
issues with more recent devices (e.g. the Dell R6615 series) not properly reporting block
|
||||
device paths for NVMe devices with "lsscsi".
|
||||
"""
|
||||
|
||||
device = get_detect_device_lsscsi(detect_string)
|
||||
if device is None or not re_match(r"^/dev", device):
|
||||
device = get_detect_device_nvme(detect_string)
|
||||
|
||||
if device is not None and re_match(r"^/dev", device):
|
||||
return device
|
||||
else:
|
||||
return None
|
||||
|
|
|
@ -244,9 +244,9 @@ def get_parsed_configuration(config_file):
|
|||
]
|
||||
][0]
|
||||
|
||||
config_cluster_networks_specific[
|
||||
f"{network_type}_dev_ip"
|
||||
] = f"{list(network.hosts())[address_id]}/{network.prefixlen}"
|
||||
config_cluster_networks_specific[f"{network_type}_dev_ip"] = (
|
||||
f"{list(network.hosts())[address_id]}/{network.prefixlen}"
|
||||
)
|
||||
|
||||
config = {**config, **config_cluster_networks_specific}
|
||||
|
||||
|
@ -406,6 +406,78 @@ def get_configuration():
|
|||
return config
|
||||
|
||||
|
||||
def get_parsed_autobackup_configuration(config_file):
|
||||
"""
|
||||
Load the configuration; this is the same main pvc.conf that the daemons read
|
||||
"""
|
||||
print('Loading configuration from file "{}"'.format(config_file))
|
||||
|
||||
with open(config_file, "r") as cfgfh:
|
||||
try:
|
||||
o_config = yaml.load(cfgfh, Loader=yaml.SafeLoader)
|
||||
except Exception as e:
|
||||
print(f"ERROR: Failed to parse configuration file: {e}")
|
||||
os._exit(1)
|
||||
|
||||
config = dict()
|
||||
|
||||
try:
|
||||
o_cluster = o_config["cluster"]
|
||||
config_cluster = {
|
||||
"cluster": o_cluster["name"],
|
||||
"autobackup_enabled": True,
|
||||
}
|
||||
config = {**config, **config_cluster}
|
||||
|
||||
o_autobackup = o_config["autobackup"]
|
||||
if o_autobackup is None:
|
||||
config["autobackup_enabled"] = False
|
||||
return config
|
||||
|
||||
config_autobackup = {
|
||||
"backup_root_path": o_autobackup["backup_root_path"],
|
||||
"backup_root_suffix": o_autobackup["backup_root_suffix"],
|
||||
"backup_tags": o_autobackup["backup_tags"],
|
||||
"backup_schedule": o_autobackup["backup_schedule"],
|
||||
}
|
||||
config = {**config, **config_autobackup}
|
||||
|
||||
o_automount = o_autobackup["auto_mount"]
|
||||
config_automount = {
|
||||
"auto_mount_enabled": o_automount["enabled"],
|
||||
}
|
||||
config = {**config, **config_automount}
|
||||
if config["auto_mount_enabled"]:
|
||||
config["mount_cmds"] = list()
|
||||
for _mount_cmd in o_automount["mount_cmds"]:
|
||||
if "{backup_root_path}" in _mount_cmd:
|
||||
_mount_cmd = _mount_cmd.format(
|
||||
backup_root_path=config["backup_root_path"]
|
||||
)
|
||||
config["mount_cmds"].append(_mount_cmd)
|
||||
config["unmount_cmds"] = list()
|
||||
for _unmount_cmd in o_automount["unmount_cmds"]:
|
||||
if "{backup_root_path}" in _unmount_cmd:
|
||||
_unmount_cmd = _unmount_cmd.format(
|
||||
backup_root_path=config["backup_root_path"]
|
||||
)
|
||||
config["unmount_cmds"].append(_unmount_cmd)
|
||||
|
||||
except Exception as e:
|
||||
raise MalformedConfigurationError(e)
|
||||
|
||||
return config
|
||||
|
||||
|
||||
def get_autobackup_configuration():
|
||||
"""
|
||||
Get the configuration.
|
||||
"""
|
||||
pvc_config_file = get_configuration_path()
|
||||
config = get_parsed_autobackup_configuration(pvc_config_file)
|
||||
return config
|
||||
|
||||
|
||||
def validate_directories(config):
|
||||
if not os.path.exists(config["dynamic_directory"]):
|
||||
os.makedirs(config["dynamic_directory"])
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
{"version": "13", "root": "", "base": {"root": "", "schema": "/schema", "schema.version": "/schema/version", "config": "/config", "config.maintenance": "/config/maintenance", "config.primary_node": "/config/primary_node", "config.primary_node.sync_lock": "/config/primary_node/sync_lock", "config.upstream_ip": "/config/upstream_ip", "config.migration_target_selector": "/config/migration_target_selector", "logs": "/logs", "faults": "/faults", "node": "/nodes", "domain": "/domains", "network": "/networks", "storage": "/ceph", "storage.health": "/ceph/health", "storage.util": "/ceph/util", "osd": "/ceph/osds", "pool": "/ceph/pools", "volume": "/ceph/volumes", "snapshot": "/ceph/snapshots"}, "logs": {"node": "", "messages": "/messages"}, "faults": {"id": "", "last_time": "/last_time", "first_time": "/first_time", "ack_time": "/ack_time", "status": "/status", "delta": "/delta", "message": "/message"}, "node": {"name": "", "keepalive": "/keepalive", "mode": "/daemonmode", "data.active_schema": "/activeschema", "data.latest_schema": "/latestschema", "data.static": "/staticdata", "data.pvc_version": "/pvcversion", "running_domains": "/runningdomains", "count.provisioned_domains": "/domainscount", "count.networks": "/networkscount", "state.daemon": "/daemonstate", "state.router": "/routerstate", "state.domain": "/domainstate", "cpu.load": "/cpuload", "vcpu.allocated": "/vcpualloc", "memory.total": "/memtotal", "memory.used": "/memused", "memory.free": "/memfree", "memory.allocated": "/memalloc", "memory.provisioned": "/memprov", "ipmi.hostname": "/ipmihostname", "ipmi.username": "/ipmiusername", "ipmi.password": "/ipmipassword", "sriov": "/sriov", "sriov.pf": "/sriov/pf", "sriov.vf": "/sriov/vf", "monitoring.plugins": "/monitoring_plugins", "monitoring.data": "/monitoring_data", "monitoring.health": "/monitoring_health", "network.stats": "/network_stats"}, "monitoring_plugin": {"name": "", "last_run": "/last_run", "health_delta": "/health_delta", "message": "/message", "data": "/data", "runtime": "/runtime"}, "sriov_pf": {"phy": "", "mtu": "/mtu", "vfcount": "/vfcount"}, "sriov_vf": {"phy": "", "pf": "/pf", "mtu": "/mtu", "mac": "/mac", "phy_mac": "/phy_mac", "config": "/config", "config.vlan_id": "/config/vlan_id", "config.vlan_qos": "/config/vlan_qos", "config.tx_rate_min": "/config/tx_rate_min", "config.tx_rate_max": "/config/tx_rate_max", "config.spoof_check": "/config/spoof_check", "config.link_state": "/config/link_state", "config.trust": "/config/trust", "config.query_rss": "/config/query_rss", "pci": "/pci", "pci.domain": "/pci/domain", "pci.bus": "/pci/bus", "pci.slot": "/pci/slot", "pci.function": "/pci/function", "used": "/used", "used_by": "/used_by"}, "domain": {"name": "", "xml": "/xml", "state": "/state", "profile": "/profile", "stats": "/stats", "node": "/node", "last_node": "/lastnode", "failed_reason": "/failedreason", "storage.volumes": "/rbdlist", "console.log": "/consolelog", "console.vnc": "/vnc", "meta.autostart": "/node_autostart", "meta.migrate_method": "/migration_method", "meta.migrate_max_downtime": "/migration_max_downtime", "meta.node_selector": "/node_selector", "meta.node_limit": "/node_limit", "meta.tags": "/tags", "migrate.sync_lock": "/migrate_sync_lock"}, "tag": {"name": "", "type": "/type", "protected": "/protected"}, "network": {"vni": "", "type": "/nettype", "mtu": "/mtu", "rule": "/firewall_rules", "rule.in": "/firewall_rules/in", "rule.out": "/firewall_rules/out", "nameservers": "/name_servers", "domain": "/domain", "reservation": "/dhcp4_reservations", "lease": "/dhcp4_leases", "ip4.gateway": "/ip4_gateway", "ip4.network": "/ip4_network", "ip4.dhcp": "/dhcp4_flag", "ip4.dhcp_start": "/dhcp4_start", "ip4.dhcp_end": "/dhcp4_end", "ip6.gateway": "/ip6_gateway", "ip6.network": "/ip6_network", "ip6.dhcp": "/dhcp6_flag"}, "reservation": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname"}, "lease": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname", "expiry": "/expiry", "client_id": "/clientid"}, "rule": {"description": "", "rule": "/rule", "order": "/order"}, "osd": {"id": "", "node": "/node", "device": "/device", "db_device": "/db_device", "fsid": "/fsid", "ofsid": "/fsid/osd", "cfsid": "/fsid/cluster", "lvm": "/lvm", "vg": "/lvm/vg", "lv": "/lvm/lv", "is_split": "/is_split", "stats": "/stats"}, "pool": {"name": "", "pgs": "/pgs", "tier": "/tier", "stats": "/stats"}, "volume": {"name": "", "stats": "/stats"}, "snapshot": {"name": "", "stats": "/stats"}}
|
|
@ -0,0 +1 @@
|
|||
{"version": "14", "root": "", "base": {"root": "", "schema": "/schema", "schema.version": "/schema/version", "config": "/config", "config.maintenance": "/config/maintenance", "config.primary_node": "/config/primary_node", "config.primary_node.sync_lock": "/config/primary_node/sync_lock", "config.upstream_ip": "/config/upstream_ip", "config.migration_target_selector": "/config/migration_target_selector", "logs": "/logs", "faults": "/faults", "node": "/nodes", "domain": "/domains", "network": "/networks", "storage": "/ceph", "storage.health": "/ceph/health", "storage.util": "/ceph/util", "osd": "/ceph/osds", "pool": "/ceph/pools", "volume": "/ceph/volumes", "snapshot": "/ceph/snapshots"}, "logs": {"node": "", "messages": "/messages"}, "faults": {"id": "", "last_time": "/last_time", "first_time": "/first_time", "ack_time": "/ack_time", "status": "/status", "delta": "/delta", "message": "/message"}, "node": {"name": "", "keepalive": "/keepalive", "mode": "/daemonmode", "data.active_schema": "/activeschema", "data.latest_schema": "/latestschema", "data.static": "/staticdata", "data.pvc_version": "/pvcversion", "running_domains": "/runningdomains", "count.provisioned_domains": "/domainscount", "count.networks": "/networkscount", "state.daemon": "/daemonstate", "state.router": "/routerstate", "state.domain": "/domainstate", "cpu.load": "/cpuload", "vcpu.allocated": "/vcpualloc", "memory.total": "/memtotal", "memory.used": "/memused", "memory.free": "/memfree", "memory.allocated": "/memalloc", "memory.provisioned": "/memprov", "ipmi.hostname": "/ipmihostname", "ipmi.username": "/ipmiusername", "ipmi.password": "/ipmipassword", "sriov": "/sriov", "sriov.pf": "/sriov/pf", "sriov.vf": "/sriov/vf", "monitoring.plugins": "/monitoring_plugins", "monitoring.data": "/monitoring_data", "monitoring.health": "/monitoring_health", "network.stats": "/network_stats"}, "monitoring_plugin": {"name": "", "last_run": "/last_run", "health_delta": "/health_delta", "message": "/message", "data": "/data", "runtime": "/runtime"}, "sriov_pf": {"phy": "", "mtu": "/mtu", "vfcount": "/vfcount"}, "sriov_vf": {"phy": "", "pf": "/pf", "mtu": "/mtu", "mac": "/mac", "phy_mac": "/phy_mac", "config": "/config", "config.vlan_id": "/config/vlan_id", "config.vlan_qos": "/config/vlan_qos", "config.tx_rate_min": "/config/tx_rate_min", "config.tx_rate_max": "/config/tx_rate_max", "config.spoof_check": "/config/spoof_check", "config.link_state": "/config/link_state", "config.trust": "/config/trust", "config.query_rss": "/config/query_rss", "pci": "/pci", "pci.domain": "/pci/domain", "pci.bus": "/pci/bus", "pci.slot": "/pci/slot", "pci.function": "/pci/function", "used": "/used", "used_by": "/used_by"}, "domain": {"name": "", "xml": "/xml", "state": "/state", "profile": "/profile", "stats": "/stats", "node": "/node", "last_node": "/lastnode", "failed_reason": "/failedreason", "storage.volumes": "/rbdlist", "console.log": "/consolelog", "console.vnc": "/vnc", "meta.autostart": "/node_autostart", "meta.migrate_method": "/migration_method", "meta.migrate_max_downtime": "/migration_max_downtime", "meta.node_selector": "/node_selector", "meta.node_limit": "/node_limit", "meta.tags": "/tags", "migrate.sync_lock": "/migrate_sync_lock", "snapshots": "/snapshots"}, "tag": {"name": "", "type": "/type", "protected": "/protected"}, "domain_snapshot": {"name": "", "timestamp": "/timestamp", "xml": "/xml", "rbd_snapshots": "/rbdsnaplist"}, "network": {"vni": "", "type": "/nettype", "mtu": "/mtu", "rule": "/firewall_rules", "rule.in": "/firewall_rules/in", "rule.out": "/firewall_rules/out", "nameservers": "/name_servers", "domain": "/domain", "reservation": "/dhcp4_reservations", "lease": "/dhcp4_leases", "ip4.gateway": "/ip4_gateway", "ip4.network": "/ip4_network", "ip4.dhcp": "/dhcp4_flag", "ip4.dhcp_start": "/dhcp4_start", "ip4.dhcp_end": "/dhcp4_end", "ip6.gateway": "/ip6_gateway", "ip6.network": "/ip6_network", "ip6.dhcp": "/dhcp6_flag"}, "reservation": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname"}, "lease": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname", "expiry": "/expiry", "client_id": "/clientid"}, "rule": {"description": "", "rule": "/rule", "order": "/order"}, "osd": {"id": "", "node": "/node", "device": "/device", "db_device": "/db_device", "fsid": "/fsid", "ofsid": "/fsid/osd", "cfsid": "/fsid/cluster", "lvm": "/lvm", "vg": "/lvm/vg", "lv": "/lvm/lv", "is_split": "/is_split", "stats": "/stats"}, "pool": {"name": "", "pgs": "/pgs", "tier": "/tier", "stats": "/stats"}, "volume": {"name": "", "stats": "/stats"}, "snapshot": {"name": "", "stats": "/stats"}}
|
|
@ -69,6 +69,8 @@ def getNodeHealthDetails(zkhandler, node_name, node_health_plugins):
|
|||
plugin_message,
|
||||
plugin_data,
|
||||
) = tuple(all_plugin_data[pos_start:pos_end])
|
||||
if plugin_data is None:
|
||||
continue
|
||||
plugin_output = {
|
||||
"name": plugin,
|
||||
"last_run": int(plugin_last_run) if plugin_last_run is not None else None,
|
||||
|
@ -156,9 +158,9 @@ def getNodeInformation(zkhandler, node_name):
|
|||
zkhandler, node_name, node_health_plugins
|
||||
)
|
||||
|
||||
if _node_network_stats is not None:
|
||||
try:
|
||||
node_network_stats = json.loads(_node_network_stats)
|
||||
else:
|
||||
except Exception:
|
||||
node_network_stats = dict()
|
||||
|
||||
# Construct a data structure to represent the data
|
||||
|
|
1461
daemon-common/vm.py
1461
daemon-common/vm.py
File diff suppressed because it is too large
Load Diff
|
@ -258,6 +258,13 @@ def worker_create_vm(
|
|||
args = (vm_profile,)
|
||||
db_cur.execute(query, args)
|
||||
profile_data = db_cur.fetchone()
|
||||
if profile_data is None:
|
||||
fail(
|
||||
celery,
|
||||
f'Provisioner profile "{vm_profile}" is not present on the cluster',
|
||||
exception=ClusterError,
|
||||
)
|
||||
|
||||
if profile_data.get("arguments"):
|
||||
vm_data["script_arguments"] = profile_data.get("arguments").split("|")
|
||||
else:
|
||||
|
@ -744,6 +751,7 @@ def worker_create_vm(
|
|||
node_selector = vm_data["system_details"]["node_selector"]
|
||||
node_autostart = vm_data["system_details"]["node_autostart"]
|
||||
migration_method = vm_data["system_details"]["migration_method"]
|
||||
migration_max_downtime = vm_data["system_details"]["migration_max_downtime"]
|
||||
with open_zk(config) as zkhandler:
|
||||
retcode, retmsg = pvc_vm.define_vm(
|
||||
zkhandler,
|
||||
|
@ -753,6 +761,7 @@ def worker_create_vm(
|
|||
node_selector,
|
||||
node_autostart,
|
||||
migration_method,
|
||||
migration_max_downtime,
|
||||
vm_profile,
|
||||
initial_state="provision",
|
||||
)
|
||||
|
|
|
@ -30,6 +30,9 @@ from kazoo.client import KazooClient, KazooState
|
|||
from kazoo.exceptions import NoNodeError
|
||||
|
||||
|
||||
SCHEMA_ROOT_PATH = "/usr/share/pvc/daemon_lib/migrations/versions"
|
||||
|
||||
|
||||
#
|
||||
# Function decorators
|
||||
#
|
||||
|
@ -57,10 +60,11 @@ class ZKConnection(object):
|
|||
schema_version = 0
|
||||
zkhandler.schema.load(schema_version, quiet=True)
|
||||
|
||||
ret = function(zkhandler, *args, **kwargs)
|
||||
|
||||
zkhandler.disconnect()
|
||||
del zkhandler
|
||||
try:
|
||||
ret = function(zkhandler, *args, **kwargs)
|
||||
finally:
|
||||
zkhandler.disconnect()
|
||||
del zkhandler
|
||||
|
||||
return ret
|
||||
|
||||
|
@ -572,7 +576,7 @@ class ZKHandler(object):
|
|||
#
|
||||
class ZKSchema(object):
|
||||
# Current version
|
||||
_version = 12
|
||||
_version = 14
|
||||
|
||||
# Root for doing nested keys
|
||||
_schema_root = ""
|
||||
|
@ -707,17 +711,26 @@ class ZKSchema(object):
|
|||
"console.vnc": "/vnc",
|
||||
"meta.autostart": "/node_autostart",
|
||||
"meta.migrate_method": "/migration_method",
|
||||
"meta.migrate_max_downtime": "/migration_max_downtime",
|
||||
"meta.node_selector": "/node_selector",
|
||||
"meta.node_limit": "/node_limit",
|
||||
"meta.tags": "/tags",
|
||||
"migrate.sync_lock": "/migrate_sync_lock",
|
||||
"snapshots": "/snapshots",
|
||||
},
|
||||
# The schema of an individual domain tag entry (/domains/{domain}/tags/{tag})
|
||||
"tag": {
|
||||
"name": "",
|
||||
"name": "", # The root key
|
||||
"type": "/type",
|
||||
"protected": "/protected",
|
||||
}, # The root key
|
||||
},
|
||||
# The schema of an individual domain snapshot entry (/domains/{domain}/snapshots/{snapshot})
|
||||
"domain_snapshot": {
|
||||
"name": "", # The root key
|
||||
"timestamp": "/timestamp",
|
||||
"xml": "/xml",
|
||||
"rbd_snapshots": "/rbdsnaplist",
|
||||
},
|
||||
# The schema of an individual network entry (/networks/{vni})
|
||||
"network": {
|
||||
"vni": "", # The root key
|
||||
|
@ -859,7 +872,7 @@ class ZKSchema(object):
|
|||
if not quiet:
|
||||
print(f"Loading schema version {version}")
|
||||
|
||||
with open(f"daemon_lib/migrations/versions/{version}.json", "r") as sfh:
|
||||
with open(f"{SCHEMA_ROOT_PATH}/{version}.json", "r") as sfh:
|
||||
self.schema = json.load(sfh)
|
||||
self.version = self.schema.get("version")
|
||||
|
||||
|
@ -1026,6 +1039,8 @@ class ZKSchema(object):
|
|||
default_data = "False"
|
||||
elif elem == "pool" and ikey == "tier":
|
||||
default_data = "default"
|
||||
elif elem == "domain" and ikey == "meta.migrate_max_downtime":
|
||||
default_data = "300"
|
||||
else:
|
||||
default_data = ""
|
||||
zkhandler.zk_conn.create(
|
||||
|
@ -1206,7 +1221,7 @@ class ZKSchema(object):
|
|||
# Write the latest schema to a file
|
||||
@classmethod
|
||||
def write(cls):
|
||||
schema_file = "daemon_lib/migrations/versions/{}.json".format(cls._version)
|
||||
schema_file = f"{SCHEMA_ROOT_PATH}/{cls._version}.json"
|
||||
with open(schema_file, "w") as sfh:
|
||||
json.dump(cls._schema, sfh)
|
||||
|
||||
|
@ -1214,7 +1229,7 @@ class ZKSchema(object):
|
|||
@staticmethod
|
||||
def find_all(start=0, end=None):
|
||||
versions = list()
|
||||
for version in os.listdir("daemon_lib/migrations/versions"):
|
||||
for version in os.listdir(SCHEMA_ROOT_PATH):
|
||||
sequence_id = int(version.split(".")[0])
|
||||
if end is None:
|
||||
if sequence_id > start:
|
||||
|
@ -1230,7 +1245,7 @@ class ZKSchema(object):
|
|||
@staticmethod
|
||||
def find_latest():
|
||||
latest_version = 0
|
||||
for version in os.listdir("daemon_lib/migrations/versions"):
|
||||
for version in os.listdir(SCHEMA_ROOT_PATH):
|
||||
sequence_id = int(version.split(".")[0])
|
||||
if sequence_id > latest_version:
|
||||
latest_version = sequence_id
|
||||
|
|
|
@ -1,3 +1,114 @@
|
|||
pvc (0.9.100-0) unstable; urgency=high
|
||||
|
||||
* [API Daemon] Improves the handling of "detect:" disk strings on newer systems by leveraging the "nvme" command
|
||||
* [Client CLI] Update help text about "detect:" disk strings
|
||||
* [Meta] Updates deprecation warnings and updates builder to only add this version for Debian 12 (Bookworm)
|
||||
|
||||
-- Joshua M. Boniface <joshua@boniface.me> Fri, 30 Aug 2024 11:03:33 -0400
|
||||
|
||||
pvc (0.9.99-0) unstable; urgency=high
|
||||
|
||||
**Deprecation Warning**: `pvc vm backup` commands are now deprecated and will be removed in **0.9.100**. Use `pvc vm snapshot` commands instead.
|
||||
**Breaking Change**: The on-disk format of VM snapshot exports differs from backup exports, and the PVC autobackup system now leverages these. It is recommended to start fresh with a new tree of backups for `pvc autobackup` for maximum compatibility.
|
||||
**Breaking Change**: VM autobackups now run in `pvcworkerd` instead of the CLI client directly, allowing them to be triggerd from any node (or externally). It is important to apply the timer unit changes from the `pvc-ansible` role after upgrading to 0.9.99 to avoid duplicate runs.
|
||||
**Usage Note**: VM snapshots are displayed in the `pvc vm list` and `pvc vm info` outputs, not in a unique "list" endpoint.
|
||||
|
||||
* [API Daemon] Adds a proper error when an invalid provisioner profile is specified
|
||||
* [Node Daemon] Sorts Ceph pools properly in node keepalive to avoid incorrect ordering
|
||||
* [Health Daemon] Improves handling of IPMI checks by adding multiple tries but a shorter timeout
|
||||
* [API Daemon] Improves handling of XML parsing errors in VM configurations
|
||||
* [ALL] Adds support for whole VM snapshots, including configuration XML details, and direct rollback to snapshots
|
||||
* [ALL] Adds support for exporting and importing whole VM snapshots
|
||||
* [Client CLI] Removes vCPU topology from short VM info output
|
||||
* [Client CLI] Improves output format of VM info output
|
||||
* [API Daemon] Adds an endpoint to get the current primary node
|
||||
* [Client CLI] Fixes a bug where API requests were made 3 times
|
||||
* [Other] Improves the build-and-deploy.sh script
|
||||
* [API Daemon] Improves the "vm rename" command to avoid redefining VM, preserving history etc.
|
||||
* [API Daemon] Adds an indication when a task is run on the primary node
|
||||
* [API Daemon] Fixes a bug where the ZK schema relative path didn't work sometimes
|
||||
|
||||
-- Joshua M. Boniface <joshua@boniface.me> Wed, 28 Aug 2024 11:15:55 -0400
|
||||
|
||||
pvc (0.9.98-0) unstable; urgency=high
|
||||
|
||||
* [CLI Client] Fixed output when API call times out
|
||||
* [Node Daemon] Improves the handling of fence states
|
||||
* [API Daemon/CLI Client] Adds support for storage snapshot rollback
|
||||
* [CLI Client] Adds additional warning messages about snapshot consistency to help output
|
||||
* [API Daemon] Fixes a bug listing snapshots by pool/volume
|
||||
* [Node Daemon] Adds a --version flag for information gathering by update-motd.sh
|
||||
|
||||
-- Joshua M. Boniface <joshua@boniface.me> Wed, 05 Jun 2024 12:01:31 -0400
|
||||
|
||||
pvc (0.9.97-0) unstable; urgency=high
|
||||
|
||||
* [Client CLI] Ensures --lines is always an integer value
|
||||
* [Node Daemon] Fixes a bug if d_network changes during iteration
|
||||
* [Node Daemon] Moves to using allocated instead of free memory for node reporting
|
||||
* [API Daemon] Fixes a bug if lingering RBD snapshots exist when removing a volume (#180)
|
||||
|
||||
-- Joshua M. Boniface <joshua@boniface.me> Fri, 19 Apr 2024 10:32:16 -0400
|
||||
|
||||
pvc (0.9.96-0) unstable; urgency=high
|
||||
|
||||
* [API Daemon] Fixes a bug when reporting node stats
|
||||
* [API Daemon] Fixes a bug deleteing successful benchmark results
|
||||
|
||||
-- Joshua M. Boniface <joshua@boniface.me> Fri, 08 Mar 2024 14:23:06 -0500
|
||||
|
||||
pvc (0.9.95-0) unstable; urgency=high
|
||||
|
||||
* [API Daemon/CLI Client] Adds a flag to allow duplicate VNIs in network templates
|
||||
* [API Daemon] Ensures that storage template disks are returned in disk ID order
|
||||
* [Client CLI] Fixes a display bug showing all OSDs as split
|
||||
|
||||
-- Joshua M. Boniface <joshua@boniface.me> Fri, 09 Feb 2024 12:42:00 -0500
|
||||
|
||||
pvc (0.9.94-0) unstable; urgency=high
|
||||
|
||||
* [CLI Client] Fixes an incorrect ordering issue with autobackup summary emails
|
||||
* [API Daemon/CLI Client] Adds an additional safety check for 80% cluster fullness when doing volume adds or resizes
|
||||
* [API Daemon/CLI Client] Adds safety checks to volume clones as well
|
||||
* [API Daemon] Fixes a few remaining memory bugs for stopped/disabled VMs
|
||||
|
||||
-- Joshua M. Boniface <joshua@boniface.me> Mon, 05 Feb 2024 09:58:07 -0500
|
||||
|
||||
pvc (0.9.93-0) unstable; urgency=high
|
||||
|
||||
* [API Daemon] Fixes a bug where stuck zkhandler threads were not cleaned up on error
|
||||
|
||||
-- Joshua M. Boniface <joshua@boniface.me> Tue, 30 Jan 2024 09:51:21 -0500
|
||||
|
||||
pvc (0.9.92-0) unstable; urgency=high
|
||||
|
||||
* [CLI Client] Adds the new restore state to the colours list for VM status
|
||||
* [API Daemon] Fixes an incorrect variable assignment
|
||||
* [Provisioner] Improves the error handling of various steps in the debootstrap and rinse example scripts
|
||||
* [CLI Client] Fixes two bugs around missing keys that were added recently (uses get() instead direct dictionary refs)
|
||||
* [CLI Client] Improves API error handling via GET retries (x3) and better server status code handling
|
||||
|
||||
-- Joshua M. Boniface <joshua@boniface.me> Mon, 29 Jan 2024 09:39:10 -0500
|
||||
|
||||
pvc (0.9.91-0) unstable; urgency=high
|
||||
|
||||
* [Client CLI] Fixes a bug and improves output during cluster task events.
|
||||
* [Client CLI] Improves the output of the task list display.
|
||||
* [Provisioner] Fixes some missing cloud-init modules in the default debootstrap script.
|
||||
* [Client CLI] Fixes a bug with a missing argument to the vm_define helper function.
|
||||
* [All] Fixes inconsistent package find + rm commands to avoid errors in dpkg.
|
||||
|
||||
-- Joshua M. Boniface <joshua@boniface.me> Tue, 23 Jan 2024 10:02:19 -0500
|
||||
|
||||
pvc (0.9.90-0) unstable; urgency=high
|
||||
|
||||
* [Client CLI/API Daemon] Adds additional backup metainfo and an emailed report option to autobackups.
|
||||
* [All] Adds a live migration maximum downtime selector to help with busy VM migrations.
|
||||
* [API Daemon] Fixes a database migration bug on Debian 10/11.
|
||||
* [Node Daemon] Fixes a race condition when applying Zookeeper schema changes.
|
||||
|
||||
-- Joshua M. Boniface <joshua@boniface.me> Thu, 11 Jan 2024 00:14:49 -0500
|
||||
|
||||
pvc (0.9.89-0) unstable; urgency=high
|
||||
|
||||
* [API/Worker Daemons] Fixes a bug with the Celery result backends not being properly initialized on Debian 10/11.
|
||||
|
|
|
@ -32,7 +32,7 @@ Description: Parallel Virtual Cluster worker daemon
|
|||
|
||||
Package: pvc-daemon-api
|
||||
Architecture: all
|
||||
Depends: systemd, pvc-daemon-common, python3-yaml, python3-flask, python3-flask-restful, python3-celery, python3-distutils, python3-redis, python3-lxml, python3-flask-migrate
|
||||
Depends: systemd, pvc-daemon-common, gunicorn, python3-gunicorn, python3-yaml, python3-flask, python3-flask-restful, python3-celery, python3-distutils, python3-redis, python3-lxml, python3-flask-migrate
|
||||
Description: Parallel Virtual Cluster API daemon
|
||||
A KVM/Zookeeper/Ceph-based VM and private cloud manager
|
||||
.
|
||||
|
|
|
@ -2,7 +2,12 @@
|
|||
|
||||
# Generate the bash completion configuration
|
||||
if [ -d /etc/bash_completion.d ]; then
|
||||
echo "Installing BASH completion configuration"
|
||||
_PVC_COMPLETE=source_bash pvc > /etc/bash_completion.d/pvc
|
||||
fi
|
||||
|
||||
# Remove any cached CPython directories or files
|
||||
echo "Cleaning up CPython caches"
|
||||
find /usr/lib/python3/dist-packages/pvc -type d -name "__pycache__" -exec rm -fr {} + &>/dev/null || true
|
||||
|
||||
exit 0
|
||||
|
|
|
@ -9,11 +9,6 @@ if systemctl is-active --quiet pvcapid.service; then
|
|||
/usr/share/pvc/pvc-api-db-upgrade
|
||||
systemctl start pvcapid.service
|
||||
fi
|
||||
# Restart the worker daemon
|
||||
if systemctl is-active --quiet pvcworkerd.service; then
|
||||
systemctl stop pvcworkerd.service
|
||||
systemctl start pvcworkerd.service
|
||||
fi
|
||||
|
||||
if [ ! -f /etc/pvc/pvc.conf ]; then
|
||||
echo "NOTE: The PVC client API daemon (pvcapid.service) and the PVC Worker daemon (pvcworkerd.service) have not been started; create a config file at /etc/pvc/pvc.conf, then run the database configuration (/usr/share/pvc/pvc-api-db-upgrade) and start them manually."
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#!/bin/sh
|
||||
|
||||
# Remove any cached CPython directories or files
|
||||
echo "Cleaning up existing CPython files"
|
||||
find /usr/share/pvc/pvcapid -type d -name "__pycache__" -exec rm -rf {} \; &>/dev/null || true
|
||||
echo "Cleaning up CPython caches"
|
||||
find /usr/share/pvc/pvcapid -type d -name "__pycache__" -exec rm -fr {} + &>/dev/null || true
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
#!/bin/sh
|
||||
|
||||
# Remove any cached CPython directories or files
|
||||
echo "Cleaning up CPython caches"
|
||||
find /usr/share/pvc/daemon_lib -type d -name "__pycache__" -exec rm -fr {} + &>/dev/null || true
|
|
@ -1,6 +1,6 @@
|
|||
#!/bin/sh
|
||||
|
||||
# Remove any cached CPython directories or files
|
||||
echo "Cleaning up existing CPython files"
|
||||
find /usr/share/pvc/pvchealthd -type d -name "__pycache__" -exec rm -rf {} \; &>/dev/null || true
|
||||
find /usr/share/pvc/plugins -type d -name "__pycache__" -exec rm -rf {} \; &>/dev/null || true
|
||||
echo "Cleaning up CPython caches"
|
||||
find /usr/share/pvc/pvchealthd -type d -name "__pycache__" -exec rm -fr {} + &>/dev/null || true
|
||||
find /usr/share/pvc/plugins -type d -name "__pycache__" -exec rm -fr {} + &>/dev/null || true
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#!/bin/sh
|
||||
|
||||
# Remove any cached CPython directories or files
|
||||
echo "Cleaning up existing CPython files"
|
||||
find /usr/share/pvc/pvcnoded -type d -name "__pycache__" -exec rm -rf {} \; &>/dev/null || true
|
||||
echo "Cleaning up CPython caches"
|
||||
find /usr/share/pvc/pvcnoded -type d -name "__pycache__" -exec rm -fr {} + &>/dev/null || true
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#!/bin/sh
|
||||
|
||||
# Remove any cached CPython directories or files
|
||||
echo "Cleaning up existing CPython files"
|
||||
find /usr/share/pvc/pvcworkerd -type d -name "__pycache__" -exec rm -rf {} \; &>/dev/null || true
|
||||
echo "Cleaning up CPython caches"
|
||||
find /usr/share/pvc/pvcworkerd -type d -name "__pycache__" -exec rm -fr {} + &>/dev/null || true
|
||||
|
|
|
@ -13,7 +13,7 @@ override_dh_python3:
|
|||
rm -r $(CURDIR)/client-cli/.pybuild $(CURDIR)/client-cli/pvc.egg-info
|
||||
|
||||
override_dh_auto_clean:
|
||||
find . -name "__pycache__" -o -name ".pybuild" -exec rm -r {} \; || true
|
||||
find . -name "__pycache__" -o -name ".pybuild" -exec rm -fr {} + || true
|
||||
|
||||
# If you need to rebuild the Sphinx documentation
|
||||
# Add spinxdoc to the dh --with line
|
||||
|
|
|
@ -2,12 +2,19 @@
|
|||
|
||||
# Generate the database migration files
|
||||
|
||||
set -o xtrace
|
||||
|
||||
VERSION="$( head -1 debian/changelog | awk -F'[()-]' '{ print $2 }' )"
|
||||
|
||||
sudo ip addr add 10.0.1.250/32 dev lo
|
||||
|
||||
pushd $( git rev-parse --show-toplevel ) &>/dev/null
|
||||
pushd api-daemon &>/dev/null
|
||||
export PVC_CONFIG_FILE="../pvc.sample.conf"
|
||||
./pvcapid-manage_flask.py db migrate -m "PVC version ${VERSION}"
|
||||
./pvcapid-manage_flask.py db upgrade
|
||||
export FLASK_APP=./pvcapid-manage_flask.py
|
||||
flask db migrate -m "PVC version ${VERSION}"
|
||||
flask db upgrade
|
||||
popd &>/dev/null
|
||||
popd &>/dev/null
|
||||
|
||||
sudo ip addr del 10.0.1.250/32 dev lo
|
||||
|
|
|
@ -69,26 +69,33 @@ class MonitoringPluginScript(MonitoringPlugin):
|
|||
|
||||
# Run any imports first
|
||||
from daemon_lib.common import run_os_command
|
||||
from time import sleep
|
||||
|
||||
# Check the node's IPMI interface
|
||||
ipmi_hostname = self.config["ipmi_hostname"]
|
||||
ipmi_username = self.config["ipmi_username"]
|
||||
ipmi_password = self.config["ipmi_password"]
|
||||
retcode, _, _ = run_os_command(
|
||||
f"/usr/bin/ipmitool -I lanplus -H {ipmi_hostname} -U {ipmi_username} -P {ipmi_password} chassis power status",
|
||||
timeout=5
|
||||
)
|
||||
retcode = 1
|
||||
trycount = 0
|
||||
while retcode > 0 and trycount < 3:
|
||||
retcode, _, _ = run_os_command(
|
||||
f"/usr/bin/ipmitool -I lanplus -H {ipmi_hostname} -U {ipmi_username} -P {ipmi_password} chassis power status",
|
||||
timeout=2
|
||||
)
|
||||
trycount += 1
|
||||
if retcode > 0 and trycount < 3:
|
||||
sleep(trycount)
|
||||
|
||||
if retcode > 0:
|
||||
# Set the health delta to 10 (subtract 10 from the total of 100)
|
||||
health_delta = 10
|
||||
# Craft a message that can be used by the clients
|
||||
message = f"IPMI via {ipmi_username}@{ipmi_hostname} is NOT responding"
|
||||
message = f"IPMI via {ipmi_username}@{ipmi_hostname} is NOT responding after 3 attempts"
|
||||
else:
|
||||
# Set the health delta to 0 (no change)
|
||||
health_delta = 0
|
||||
# Craft a message that can be used by the clients
|
||||
message = f"IPMI via {ipmi_username}@{ipmi_hostname} is responding"
|
||||
message = f"IPMI via {ipmi_username}@{ipmi_hostname} is responding after {trycount} attempts"
|
||||
|
||||
# Set the health delta in our local PluginResult object
|
||||
self.plugin_result.set_health_delta(health_delta)
|
||||
|
|
|
@ -33,7 +33,7 @@ import os
|
|||
import signal
|
||||
|
||||
# Daemon version
|
||||
version = "0.9.89"
|
||||
version = "0.9.100"
|
||||
|
||||
|
||||
##########################################################
|
||||
|
|
|
@ -19,6 +19,11 @@
|
|||
#
|
||||
###############################################################################
|
||||
|
||||
from sys import argv
|
||||
import pvcnoded.Daemon # noqa: F401
|
||||
|
||||
if "--version" in argv:
|
||||
print(pvcnoded.Daemon.version)
|
||||
exit(0)
|
||||
|
||||
pvcnoded.Daemon.entrypoint()
|
||||
|
|
|
@ -49,7 +49,7 @@ import re
|
|||
import json
|
||||
|
||||
# Daemon version
|
||||
version = "0.9.89"
|
||||
version = "0.9.100"
|
||||
|
||||
|
||||
##########################################################
|
||||
|
@ -197,7 +197,9 @@ def entrypoint():
|
|||
os.execv(sys.argv[0], sys.argv)
|
||||
|
||||
# Validate the schema
|
||||
pvcnoded.util.zookeeper.validate_schema(logger, zkhandler)
|
||||
with zkhandler.writelock("base.schema.version"):
|
||||
sleep(0.5)
|
||||
pvcnoded.util.zookeeper.validate_schema(logger, zkhandler)
|
||||
|
||||
# Define a cleanup function
|
||||
def cleanup(failure=False):
|
||||
|
|
|
@ -231,7 +231,7 @@ class NetstatsInstance(object):
|
|||
# Get a list of all active interfaces
|
||||
net_root_path = "/sys/class/net"
|
||||
all_ifaces = list()
|
||||
for (_, dirnames, _) in walk(net_root_path):
|
||||
for _, dirnames, _ in walk(net_root_path):
|
||||
all_ifaces.extend(dirnames)
|
||||
all_ifaces.sort()
|
||||
|
||||
|
|
|
@ -521,7 +521,7 @@ class NodeInstance(object):
|
|||
self.logger.out("Acquired write lock for synchronization phase F", state="o")
|
||||
time.sleep(0.2) # Time fir reader to acquire the lock
|
||||
# 4. Add gateway IPs
|
||||
for network in self.d_network:
|
||||
for network in self.d_network.copy():
|
||||
self.d_network[network].createGateways()
|
||||
self.logger.out("Releasing write lock for synchronization phase F", state="i")
|
||||
self.zkhandler.write([("base.config.primary_node.sync_lock", "")])
|
||||
|
|
|
@ -687,6 +687,29 @@ class VMInstance(object):
|
|||
abort_migrate("Target node changed during preparation")
|
||||
return
|
||||
if not force_shutdown:
|
||||
# Set the maxdowntime value from Zookeeper
|
||||
try:
|
||||
max_downtime = self.zkhandler.read(
|
||||
("domain.meta.migrate_max_downtime", self.domuuid)
|
||||
)
|
||||
except Exception as e:
|
||||
self.logger.out(
|
||||
f"Error fetching migrate max downtime; using default of 300s: {e}",
|
||||
state="w",
|
||||
)
|
||||
self.max_downtime = 300
|
||||
self.logger.out(
|
||||
f"Running migrate-setmaxdowntime with downtime value {max_downtime}",
|
||||
state="i",
|
||||
prefix="Domain {}".format(self.domuuid),
|
||||
)
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
f"virsh migrate-setmaxdowntime --downtime {max_downtime} {self.domuuid}"
|
||||
)
|
||||
if retcode:
|
||||
abort_migrate("Failed to set maxdowntime value on running VM")
|
||||
return
|
||||
|
||||
# A live migrate is attemped 3 times in succession
|
||||
ticks = 0
|
||||
while True:
|
||||
|
|
|
@ -253,12 +253,16 @@ def reboot_via_ipmi(node_name, ipmi_hostname, ipmi_user, ipmi_password, logger):
|
|||
state="i",
|
||||
prefix=f"fencing {node_name}",
|
||||
)
|
||||
ipmi_status_retcode, ipmi_status_stdout, ipmi_status_stderr = common.run_os_command(
|
||||
(
|
||||
ipmi_intermediate_status_retcode,
|
||||
ipmi_intermediate_status_stdout,
|
||||
ipmi_intermediate_status_stderr,
|
||||
) = common.run_os_command(
|
||||
f"/usr/bin/ipmitool -I lanplus -H {ipmi_hostname} -U {ipmi_user} -P {ipmi_password} chassis power status"
|
||||
)
|
||||
if ipmi_status_retcode == 0:
|
||||
if ipmi_intermediate_status_retcode == 0:
|
||||
logger.out(
|
||||
f"Current chassis power state is: {ipmi_status_stdout.strip()}",
|
||||
f"Current chassis power state is: {ipmi_intermediate_status_stdout.strip()}",
|
||||
state="i",
|
||||
prefix=f"fencing {node_name}",
|
||||
)
|
||||
|
@ -299,12 +303,14 @@ def reboot_via_ipmi(node_name, ipmi_hostname, ipmi_user, ipmi_password, logger):
|
|||
state="i",
|
||||
prefix=f"fencing {node_name}",
|
||||
)
|
||||
ipmi_status_retcode, ipmi_status_stdout, ipmi_status_stderr = common.run_os_command(
|
||||
f"/usr/bin/ipmitool -I lanplus -H {ipmi_hostname} -U {ipmi_user} -P {ipmi_password} chassis power status"
|
||||
ipmi_final_status_retcode, ipmi_final_status_stdout, ipmi_final_status_stderr = (
|
||||
common.run_os_command(
|
||||
f"/usr/bin/ipmitool -I lanplus -H {ipmi_hostname} -U {ipmi_user} -P {ipmi_password} chassis power status"
|
||||
)
|
||||
)
|
||||
|
||||
if ipmi_stop_retcode == 0:
|
||||
if ipmi_status_stdout.strip() == "Chassis Power is on":
|
||||
if ipmi_intermediate_status_stdout.strip() == "Chassis power is off":
|
||||
if ipmi_final_status_stdout.strip() == "Chassis Power is on":
|
||||
# We successfully rebooted the node and it is powered on; this is a succeessful fence
|
||||
logger.out(
|
||||
"Successfully rebooted dead node; proceeding with fence recovery action",
|
||||
|
@ -312,7 +318,7 @@ def reboot_via_ipmi(node_name, ipmi_hostname, ipmi_user, ipmi_password, logger):
|
|||
prefix=f"fencing {node_name}",
|
||||
)
|
||||
return True
|
||||
elif ipmi_status_stdout.strip() == "Chassis Power is off":
|
||||
elif ipmi_final_status_stdout.strip() == "Chassis Power is off":
|
||||
# We successfully rebooted the node but it is powered off; this might be expected or not, but the node is confirmed off so we can call it a successful fence
|
||||
logger.out(
|
||||
"Chassis power is in confirmed off state after successfuly IPMI reboot; proceeding with fence recovery action",
|
||||
|
@ -323,13 +329,13 @@ def reboot_via_ipmi(node_name, ipmi_hostname, ipmi_user, ipmi_password, logger):
|
|||
else:
|
||||
# We successfully rebooted the node but it is in some unknown power state; since this might indicate a silent failure, we must call it a failed fence
|
||||
logger.out(
|
||||
f"Chassis power is in an unknown state ({ipmi_status_stdout.strip()}) after successful IPMI reboot; NOT proceeding fence recovery action",
|
||||
f"Chassis power is in an unknown state ({ipmi_final_status_stdout.strip()}) after successful IPMI reboot; NOT proceeding fence recovery action",
|
||||
state="e",
|
||||
prefix=f"fencing {node_name}",
|
||||
)
|
||||
return False
|
||||
else:
|
||||
if ipmi_status_stdout.strip() == "Chassis Power is off":
|
||||
if ipmi_final_status_stdout.strip() == "Chassis Power is off":
|
||||
# We failed to reboot the node but it is powered off; it has probably suffered a serious hardware failure, but the node is confirmed off so we can call it a successful fence
|
||||
logger.out(
|
||||
"Chassis power is in confirmed off state after failed IPMI reboot; proceeding with fence recovery action",
|
||||
|
|
|
@ -157,7 +157,9 @@ def collect_ceph_stats(logger, config, zkhandler, this_node, queue):
|
|||
1
|
||||
].decode("ascii")
|
||||
try:
|
||||
ceph_pool_df_raw = json.loads(ceph_df_output)["pools"]
|
||||
ceph_pool_df_raw = sorted(
|
||||
json.loads(ceph_df_output)["pools"], key=lambda x: x["name"]
|
||||
)
|
||||
except Exception as e:
|
||||
logger.out("Failed to obtain Pool data (ceph df): {}".format(e), state="w")
|
||||
ceph_pool_df_raw = []
|
||||
|
@ -166,7 +168,9 @@ def collect_ceph_stats(logger, config, zkhandler, this_node, queue):
|
|||
"rados df --format json", timeout=1
|
||||
)
|
||||
try:
|
||||
rados_pool_df_raw = json.loads(stdout)["pools"]
|
||||
rados_pool_df_raw = sorted(
|
||||
json.loads(stdout)["pools"], key=lambda x: x["name"]
|
||||
)
|
||||
except Exception as e:
|
||||
logger.out("Failed to obtain Pool data (rados df): {}".format(e), state="w")
|
||||
rados_pool_df_raw = []
|
||||
|
@ -743,7 +747,7 @@ def node_keepalive(logger, config, zkhandler, this_node, netstats):
|
|||
# Get node performance statistics
|
||||
this_node.memtotal = int(psutil.virtual_memory().total / 1024 / 1024)
|
||||
this_node.memused = int(psutil.virtual_memory().used / 1024 / 1024)
|
||||
this_node.memfree = int(psutil.virtual_memory().free / 1024 / 1024)
|
||||
this_node.memfree = int(psutil.virtual_memory().available / 1024 / 1024)
|
||||
this_node.cpuload = round(os.getloadavg()[0], 2)
|
||||
|
||||
# Get node network statistics via netstats instance
|
||||
|
|
|
@ -94,7 +94,10 @@ def validate_schema(logger, zkhandler):
|
|||
# Validate our schema against the active version
|
||||
if not zkhandler.schema.validate(zkhandler, logger):
|
||||
logger.out("Found schema violations, applying", state="i")
|
||||
zkhandler.schema.apply(zkhandler)
|
||||
try:
|
||||
zkhandler.schema.apply(zkhandler)
|
||||
except Exception as e:
|
||||
logger.out(f"Failed to apply schema updates: {e}", state="w")
|
||||
else:
|
||||
logger.out("Schema successfully validated", state="o")
|
||||
|
||||
|
|
|
@ -168,7 +168,7 @@ database:
|
|||
port: 6379
|
||||
|
||||
# Hostname; use `cluster` network floating IP address
|
||||
hostname: 10.0.1.250
|
||||
hostname: 127.0.0.1
|
||||
|
||||
# Path, usually "/0"
|
||||
path: "/0"
|
||||
|
@ -180,7 +180,7 @@ database:
|
|||
port: 5432
|
||||
|
||||
# Hostname; use `cluster` network floating IP address
|
||||
hostname: 10.0.1.250
|
||||
hostname: 127.0.0.1
|
||||
|
||||
# Credentials
|
||||
credentials:
|
||||
|
|
|
@ -28,6 +28,11 @@ from daemon_lib.vm import (
|
|||
vm_worker_flush_locks,
|
||||
vm_worker_attach_device,
|
||||
vm_worker_detach_device,
|
||||
vm_worker_create_snapshot,
|
||||
vm_worker_remove_snapshot,
|
||||
vm_worker_rollback_snapshot,
|
||||
vm_worker_export_snapshot,
|
||||
vm_worker_import_snapshot,
|
||||
)
|
||||
from daemon_lib.ceph import (
|
||||
osd_worker_add_osd,
|
||||
|
@ -42,9 +47,12 @@ from daemon_lib.benchmark import (
|
|||
from daemon_lib.vmbuilder import (
|
||||
worker_create_vm,
|
||||
)
|
||||
from daemon_lib.autobackup import (
|
||||
worker_cluster_autobackup,
|
||||
)
|
||||
|
||||
# Daemon version
|
||||
version = "0.9.89"
|
||||
version = "0.9.100"
|
||||
|
||||
|
||||
config = cfg.get_configuration()
|
||||
|
@ -88,12 +96,27 @@ def create_vm(
|
|||
|
||||
|
||||
@celery.task(name="storage.benchmark", bind=True, routing_key="run_on")
|
||||
def storage_benchmark(self, pool=None, run_on="primary"):
|
||||
def storage_benchmark(self, pool=None, name=None, run_on="primary"):
|
||||
@ZKConnection(config)
|
||||
def run_storage_benchmark(zkhandler, self, pool):
|
||||
return worker_run_benchmark(zkhandler, self, config, pool)
|
||||
def run_storage_benchmark(zkhandler, self, pool, name):
|
||||
return worker_run_benchmark(zkhandler, self, config, pool, name)
|
||||
|
||||
return run_storage_benchmark(self, pool)
|
||||
return run_storage_benchmark(self, pool, name)
|
||||
|
||||
|
||||
@celery.task(name="cluster.autobackup", bind=True, routing_key="run_on")
|
||||
def cluster_autobackup(self, force_full=False, email_recipients=None, run_on="primary"):
|
||||
@ZKConnection(config)
|
||||
def run_cluster_autobackup(
|
||||
zkhandler, self, force_full=False, email_recipients=None
|
||||
):
|
||||
return worker_cluster_autobackup(
|
||||
zkhandler, self, force_full=force_full, email_recipients=email_recipients
|
||||
)
|
||||
|
||||
return run_cluster_autobackup(
|
||||
self, force_full=force_full, email_recipients=email_recipients
|
||||
)
|
||||
|
||||
|
||||
@celery.task(name="vm.flush_locks", bind=True, routing_key="run_on")
|
||||
|
@ -123,6 +146,87 @@ def vm_device_detach(self, domain=None, xml=None, run_on=None):
|
|||
return run_vm_device_detach(self, domain, xml)
|
||||
|
||||
|
||||
@celery.task(name="vm.create_snapshot", bind=True, routing_key="run_on")
|
||||
def vm_create_snapshot(self, domain=None, snapshot_name=None, run_on="primary"):
|
||||
@ZKConnection(config)
|
||||
def run_vm_create_snapshot(zkhandler, self, domain, snapshot_name):
|
||||
return vm_worker_create_snapshot(zkhandler, self, domain, snapshot_name)
|
||||
|
||||
return run_vm_create_snapshot(self, domain, snapshot_name)
|
||||
|
||||
|
||||
@celery.task(name="vm.remove_snapshot", bind=True, routing_key="run_on")
|
||||
def vm_remove_snapshot(self, domain=None, snapshot_name=None, run_on="primary"):
|
||||
@ZKConnection(config)
|
||||
def run_vm_remove_snapshot(zkhandler, self, domain, snapshot_name):
|
||||
return vm_worker_remove_snapshot(zkhandler, self, domain, snapshot_name)
|
||||
|
||||
return run_vm_remove_snapshot(self, domain, snapshot_name)
|
||||
|
||||
|
||||
@celery.task(name="vm.rollback_snapshot", bind=True, routing_key="run_on")
|
||||
def vm_rollback_snapshot(self, domain=None, snapshot_name=None, run_on="primary"):
|
||||
@ZKConnection(config)
|
||||
def run_vm_rollback_snapshot(zkhandler, self, domain, snapshot_name):
|
||||
return vm_worker_rollback_snapshot(zkhandler, self, domain, snapshot_name)
|
||||
|
||||
return run_vm_rollback_snapshot(self, domain, snapshot_name)
|
||||
|
||||
|
||||
@celery.task(name="vm.export_snapshot", bind=True, routing_key="run_on")
|
||||
def vm_export_snapshot(
|
||||
self,
|
||||
domain=None,
|
||||
snapshot_name=None,
|
||||
export_path=None,
|
||||
incremental_parent=None,
|
||||
run_on="primary",
|
||||
):
|
||||
@ZKConnection(config)
|
||||
def run_vm_export_snapshot(
|
||||
zkhandler, self, domain, snapshot_name, export_path, incremental_parent=None
|
||||
):
|
||||
return vm_worker_export_snapshot(
|
||||
zkhandler,
|
||||
self,
|
||||
domain,
|
||||
snapshot_name,
|
||||
export_path,
|
||||
incremental_parent=incremental_parent,
|
||||
)
|
||||
|
||||
return run_vm_export_snapshot(
|
||||
self, domain, snapshot_name, export_path, incremental_parent=incremental_parent
|
||||
)
|
||||
|
||||
|
||||
@celery.task(name="vm.import_snapshot", bind=True, routing_key="run_on")
|
||||
def vm_import_snapshot(
|
||||
self,
|
||||
domain=None,
|
||||
snapshot_name=None,
|
||||
import_path=None,
|
||||
retain_snapshot=True,
|
||||
run_on="primary",
|
||||
):
|
||||
@ZKConnection(config)
|
||||
def run_vm_import_snapshot(
|
||||
zkhandler, self, domain, snapshot_name, import_path, retain_snapshot=True
|
||||
):
|
||||
return vm_worker_import_snapshot(
|
||||
zkhandler,
|
||||
self,
|
||||
domain,
|
||||
snapshot_name,
|
||||
import_path,
|
||||
retain_snapshot=retain_snapshot,
|
||||
)
|
||||
|
||||
return run_vm_import_snapshot(
|
||||
self, domain, snapshot_name, import_path, retain_snapshot=retain_snapshot
|
||||
)
|
||||
|
||||
|
||||
@celery.task(name="osd.add", bind=True, routing_key="run_on")
|
||||
def osd_add(
|
||||
self,
|
||||
|
|
Loading…
Reference in New Issue