Move VM list assembly to thread pool

This helps parallelize the numerous Zookeeper calls a little bit, at
least within the bounds of the GIL, to improve performance when getting
a large list of VMs. The max_workers value is capped at 32 to avoid
causing too many threads during concurrent executions, but still
provides a noticeable speedup (on the order of 0.2-0.4 seconds with 75
VMs, scaling up further as counts grow).
This commit is contained in:
Joshua Boniface 2021-07-01 17:24:47 -04:00
parent baf4c3fbc7
commit 58789f1db4
1 changed files with 17 additions and 6 deletions

View File

@ -25,6 +25,7 @@ import lxml.objectify
import lxml.etree import lxml.etree
from uuid import UUID from uuid import UUID
from concurrent.futures import ThreadPoolExecutor
import daemon_lib.common as common import daemon_lib.common as common
@ -881,11 +882,21 @@ def get_list(zkhandler, node, state, limit, is_fuzzy=True):
else: else:
is_state_match = True is_state_match = True
if is_limit_match and is_node_match and is_state_match: get_vm_info[vm] = True if is_limit_match and is_node_match and is_state_match else False
get_vm_info[vm] = True
else:
get_vm_info[vm] = False
vm_list = [common.getInformationFromXML(zkhandler, vm) for vm in full_vm_list if get_vm_info[vm]] # Obtain our VM data in a thread pool
# This helps parallelize the numerous Zookeeper calls a bit, within the bounds of the GIL, and
# should help prevent this task from becoming absurdly slow with very large numbers of VMs.
# The max_workers is capped at 32 to avoid creating an absurd number of threads especially if
# the list gets called multiple times simultaneously by the API, but still provides a noticeable
# speedup.
vm_execute_list = [vm for vm in full_vm_list if get_vm_info[vm]]
vm_data_list = list()
with ThreadPoolExecutor(max_workers=32, thread_name_prefix='vm_list') as executor:
futures = []
for vm_uuid in vm_execute_list:
futures.append(executor.submit(common.getInformationFromXML, zkhandler, vm_uuid))
for future in futures:
vm_data_list.append(future.result())
return True, vm_list return True, vm_data_list