Compare commits

...

6 Commits

Author SHA1 Message Date
d1fcac1f0a Bump version to 0.9.101 2024-10-15 11:39:11 -04:00
6ace2ebf6a Set expected PVC version for mirroring 2024-10-15 11:31:50 -04:00
962fba7621 Bump up startup waits slightly
Ensures there's more time for daemons (specifically Zookeeper) to start
up and synchronize between nodes.
2024-10-15 11:10:23 -04:00
49bf51da38 Fix indentation of previous fix 2024-10-15 10:57:33 -04:00
1293e8ae7e Fix bugs in lock freeing function
1. The destination state on an error was invalid; should be "stop".

2. If a lock was listed but removing it fails (because it was already
cleared somehow, this would error. In turn this would cause the VM to
not migrate and be left in an undefined state. Fix that when unlocking
is forced.
2024-10-15 10:43:52 -04:00
ae2cf8a070 Add some time for Zookeeper to synchronize 2024-10-15 10:43:44 -04:00
11 changed files with 72 additions and 16 deletions

View File

@ -1 +1 @@
0.9.100 0.9.101

View File

@ -1,5 +1,29 @@
## PVC Changelog ## PVC Changelog
###### [v0.9.101](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.101)
**New Feature**: Adds VM snapshot sending (`vm snapshot send`), VM mirroring (`vm mirror create`), and (offline) mirror promotion (`vm mirror promote`). Permits transferring VM snapshots to remote clusters, individually or repeatedly, and promoting them to active status, for disaster recovery and migration between clusters.
**Breaking Change**: Migrates the API daemon into Gunicorn when in production mode. Permits more scalable and performant operation of the API. **Requires additional dependency packages on all coordinator nodes** (`gunicorn`, `python3-gunicorn`, `python3-setuptools`); upgrade via `pvc-ansible` is strongly recommended.
**Enhancement**: Provides whole cluster utilization stats in the cluster status data. Permits better observability into the overall resource utilization of the cluster.
**Enhancement**: Adds a new storage benchmark format (v2) which includes additional resource utilization statistics. This allows for better evaluation of storage performance impact on the cluster as a whole. The updated format also permits arbitrary benchmark job names for easier parsing and tracking.
* [API Daemon] Allows scanning of new volumes added manually via other commands
* [API Daemon/CLI Client] Adds whole cluster utilization statistics to cluster status
* [API Daemon] Moves production API execution into Gunicorn
* [API Daemon] Adds a new storage benchmark format (v2) with additional resource tracking
* [API Daemon] Adds support for named storage benchmark jobs
* [API Daemon] Fixes a bug in OSD creation which would create `split` OSDs if `--osd-count` was set to 1
* [API Daemon] Adds support for the `mirror` VM state used by snapshot mirrors
* [CLI Client] Fixes several output display bugs in various commands and in Worker task outputs
* [CLI Client] Improves and shrinks the status progress bar output to support longer messages
* [API Daemon] Adds support for sending snapshots to remote clusters
* [API Daemon] Adds support for updating and promoting snapshot mirrors to remote clusters
* [Node Daemon] Improves timeouts during primary/secondary coordinator transitions to avoid deadlocks
* [Node Daemon] Improves timeouts during keepalive updates to avoid deadlocks
* [Node Daemon] Refactors fencing thread structure to ensure a single fencing task per cluster and sequential node fences to avoid potential anomalies (e.g. fencing 2 nodes simultaneously)
* [Node Daemon] Fixes a bug in fencing if VM locks were already freed, leaving VMs in an invalid state
* [Node Daemon] Increases the wait time during system startup to ensure Zookeeper has more time to synchronize
###### [v0.9.100](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.100) ###### [v0.9.100](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.100)
* [API Daemon] Improves the handling of "detect:" disk strings on newer systems by leveraging the "nvme" command * [API Daemon] Improves the handling of "detect:" disk strings on newer systems by leveraging the "nvme" command

View File

@ -2,7 +2,7 @@ from setuptools import setup
setup( setup(
name="pvc", name="pvc",
version="0.9.100", version="0.9.101",
packages=["pvc.cli", "pvc.lib"], packages=["pvc.cli", "pvc.lib"],
install_requires=[ install_requires=[
"Click", "Click",

View File

@ -1997,11 +1997,14 @@ def vm_worker_flush_locks(zkhandler, celery, domain, force_unlock=False):
) )
if lock_remove_retcode != 0: if lock_remove_retcode != 0:
fail( if force_unlock and "No such file or directory" in lock_remove_stderr:
celery, continue
f"Failed to free RBD lock {lock['id']} on volume {rbd}: {lock_remove_stderr}", else:
) fail(
return False celery,
f"Failed to free RBD lock {lock['id']} on volume {rbd}: {lock_remove_stderr}",
)
return False
current_stage += 1 current_stage += 1
return finish( return finish(
@ -3266,7 +3269,7 @@ def vm_worker_send_snapshot(
) )
return False return False
expected_destination_pvc_version = "0.9.100" # TODO: 0.9.101 when completed expected_destination_pvc_version = "0.9.101"
# Work around development versions # Work around development versions
current_destination_pvc_version = re.sub( current_destination_pvc_version = re.sub(
r"~git-.*", "", current_destination_pvc_version r"~git-.*", "", current_destination_pvc_version
@ -3810,7 +3813,7 @@ def vm_worker_create_mirror(
) )
return False return False
expected_destination_pvc_version = "0.9.100" # TODO: 0.9.101 when completed expected_destination_pvc_version = "0.9.101"
# Work around development versions # Work around development versions
current_destination_pvc_version = re.sub( current_destination_pvc_version = re.sub(
r"~git-.*", "", current_destination_pvc_version r"~git-.*", "", current_destination_pvc_version
@ -4406,7 +4409,7 @@ def vm_worker_promote_mirror(
) )
return False return False
expected_destination_pvc_version = "0.9.100" # TODO: 0.9.101 when completed expected_destination_pvc_version = "0.9.101"
# Work around development versions # Work around development versions
current_destination_pvc_version = re.sub( current_destination_pvc_version = re.sub(
r"~git-.*", "", current_destination_pvc_version r"~git-.*", "", current_destination_pvc_version

26
debian/changelog vendored
View File

@ -1,3 +1,29 @@
pvc (0.9.101-0) unstable; urgency=high
**New Feature**: Adds VM snapshot sending (`vm snapshot send`), VM mirroring (`vm mirror create`), and (offline) mirror promotion (`vm mirror promote`). Permits transferring VM snapshots to remote clusters, individually or repeatedly, and promoting them to active status, for disaster recovery and migration between clusters.
**Breaking Change**: Migrates the API daemon into Gunicorn when in production mode. Permits more scalable and performant operation of the API. **Requires additional dependency packages on all coordinator nodes** (`gunicorn`, `python3-gunicorn`, `python3-setuptools`); upgrade via `pvc-ansible` is strongly recommended.
**Enhancement**: Provides whole cluster utilization stats in the cluster status data. Permits better observability into the overall resource utilization of the cluster.
**Enhancement**: Adds a new storage benchmark format (v2) which includes additional resource utilization statistics. This allows for better evaluation of storage performance impact on the cluster as a whole. The updated format also permits arbitrary benchmark job names for easier parsing and tracking.
* [API Daemon] Allows scanning of new volumes added manually via other commands
* [API Daemon/CLI Client] Adds whole cluster utilization statistics to cluster status
* [API Daemon] Moves production API execution into Gunicorn
* [API Daemon] Adds a new storage benchmark format (v2) with additional resource tracking
* [API Daemon] Adds support for named storage benchmark jobs
* [API Daemon] Fixes a bug in OSD creation which would create `split` OSDs if `--osd-count` was set to 1
* [API Daemon] Adds support for the `mirror` VM state used by snapshot mirrors
* [CLI Client] Fixes several output display bugs in various commands and in Worker task outputs
* [CLI Client] Improves and shrinks the status progress bar output to support longer messages
* [API Daemon] Adds support for sending snapshots to remote clusters
* [API Daemon] Adds support for updating and promoting snapshot mirrors to remote clusters
* [Node Daemon] Improves timeouts during primary/secondary coordinator transitions to avoid deadlocks
* [Node Daemon] Improves timeouts during keepalive updates to avoid deadlocks
* [Node Daemon] Refactors fencing thread structure to ensure a single fencing task per cluster and sequential node fences to avoid potential anomalies (e.g. fencing 2 nodes simultaneously)
* [Node Daemon] Fixes a bug in fencing if VM locks were already freed, leaving VMs in an invalid state
* [Node Daemon] Increases the wait time during system startup to ensure Zookeeper has more time to synchronize
-- Joshua M. Boniface <joshua@boniface.me> Tue, 15 Oct 2024 11:39:11 -0400
pvc (0.9.100-0) unstable; urgency=high pvc (0.9.100-0) unstable; urgency=high
* [API Daemon] Improves the handling of "detect:" disk strings on newer systems by leveraging the "nvme" command * [API Daemon] Improves the handling of "detect:" disk strings on newer systems by leveraging the "nvme" command

View File

@ -33,7 +33,7 @@ import os
import signal import signal
# Daemon version # Daemon version
version = "0.9.100" version = "0.9.101"
########################################################## ##########################################################

View File

@ -49,7 +49,7 @@ import re
import json import json
# Daemon version # Daemon version
version = "0.9.100" version = "0.9.101"
########################################################## ##########################################################

View File

@ -247,7 +247,7 @@ def migrateFromFencedNode(zkhandler, node_name, config, logger):
) )
zkhandler.write( zkhandler.write(
{ {
(("domain.state", dom_uuid), "stopped"), (("domain.state", dom_uuid), "stop"),
(("domain.meta.autostart", dom_uuid), "True"), (("domain.meta.autostart", dom_uuid), "True"),
} }
) )

View File

@ -102,5 +102,5 @@ def start_system_services(logger, config):
start_workerd(logger, config) start_workerd(logger, config)
start_healthd(logger, config) start_healthd(logger, config)
logger.out("Waiting 5 seconds for daemons to start", state="s") logger.out("Waiting 10 seconds for daemons to start", state="s")
sleep(5) sleep(10)

View File

@ -188,3 +188,6 @@ def setup_node(logger, config, zkhandler):
(("node.count.networks", config["node_hostname"]), "0"), (("node.count.networks", config["node_hostname"]), "0"),
] ]
) )
logger.out("Waiting 5 seconds for Zookeeper to synchronize", state="s")
time.sleep(5)

View File

@ -55,7 +55,7 @@ from daemon_lib.autobackup import (
) )
# Daemon version # Daemon version
version = "0.9.100" version = "0.9.101"
config = cfg.get_configuration() config = cfg.get_configuration()