Compare commits

..

4 Commits

Author SHA1 Message Date
962fba7621 Bump up startup waits slightly
Ensures there's more time for daemons (specifically Zookeeper) to start
up and synchronize between nodes.
2024-10-15 11:10:23 -04:00
49bf51da38 Fix indentation of previous fix 2024-10-15 10:57:33 -04:00
1293e8ae7e Fix bugs in lock freeing function
1. The destination state on an error was invalid; should be "stop".

2. If a lock was listed but removing it fails (because it was already
cleared somehow, this would error. In turn this would cause the VM to
not migrate and be left in an undefined state. Fix that when unlocking
is forced.
2024-10-15 10:43:52 -04:00
ae2cf8a070 Add some time for Zookeeper to synchronize 2024-10-15 10:43:44 -04:00
4 changed files with 14 additions and 8 deletions

View File

@ -1997,11 +1997,14 @@ def vm_worker_flush_locks(zkhandler, celery, domain, force_unlock=False):
)
if lock_remove_retcode != 0:
fail(
celery,
f"Failed to free RBD lock {lock['id']} on volume {rbd}: {lock_remove_stderr}",
)
return False
if force_unlock and "No such file or directory" in lock_remove_stderr:
continue
else:
fail(
celery,
f"Failed to free RBD lock {lock['id']} on volume {rbd}: {lock_remove_stderr}",
)
return False
current_stage += 1
return finish(

View File

@ -247,7 +247,7 @@ def migrateFromFencedNode(zkhandler, node_name, config, logger):
)
zkhandler.write(
{
(("domain.state", dom_uuid), "stopped"),
(("domain.state", dom_uuid), "stop"),
(("domain.meta.autostart", dom_uuid), "True"),
}
)

View File

@ -102,5 +102,5 @@ def start_system_services(logger, config):
start_workerd(logger, config)
start_healthd(logger, config)
logger.out("Waiting 5 seconds for daemons to start", state="s")
sleep(5)
logger.out("Waiting 10 seconds for daemons to start", state="s")
sleep(10)

View File

@ -188,3 +188,6 @@ def setup_node(logger, config, zkhandler):
(("node.count.networks", config["node_hostname"]), "0"),
]
)
logger.out("Waiting 5 seconds for Zookeeper to synchronize", state="s")
time.sleep(5)