Add additional logging to flush selector

Adds additional debug logging to the flush selector to determine how any
why any given node is selected. Useful for troubleshooting strange
choices.
This commit is contained in:
Joshua Boniface 2020-10-20 11:08:30 -04:00
parent 7cc33451b9
commit 726501f4d4
3 changed files with 42 additions and 11 deletions

View File

@ -641,7 +641,7 @@ class NodeInstance(object):
else: else:
current_node = zkhandler.readdata(self.zk_conn, '/domains/{}/node'.format(dom_uuid)) current_node = zkhandler.readdata(self.zk_conn, '/domains/{}/node'.format(dom_uuid))
target_node = common.findTargetNode(self.zk_conn, self.config, dom_uuid) target_node = common.findTargetNode(self.zk_conn, self.config, self.logger, dom_uuid)
if target_node == current_node: if target_node == current_node:
target_node = None target_node = None

View File

@ -138,7 +138,7 @@ def removeIPAddress(ipaddr, cidrnetmask, dev):
# #
# Find a migration target # Find a migration target
# #
def findTargetNode(zk_conn, config, dom_uuid): def findTargetNode(zk_conn, config, logger, dom_uuid):
# Determine VM node limits; set config value if read fails # Determine VM node limits; set config value if read fails
try: try:
node_limit = zkhandler.readdata(zk_conn, '/domains/{}/node_limit'.format(dom_uuid)).split(',') node_limit = zkhandler.readdata(zk_conn, '/domains/{}/node_limit'.format(dom_uuid)).split(',')
@ -159,15 +159,18 @@ def findTargetNode(zk_conn, config, dom_uuid):
search_field = config['migration_target_selector'] search_field = config['migration_target_selector']
zkhandler.writedata(zk_conn, { '/domains/{}/node_selector'.format(dom_uuid): config['migration_target_selector'] }) zkhandler.writedata(zk_conn, { '/domains/{}/node_selector'.format(dom_uuid): config['migration_target_selector'] })
if config['debug']:
logger.out('Migrating VM {} with selector {}'.format(dom_uuid, search_field), state='d', prefix='node-flush')
# Execute the search # Execute the search
if search_field == 'mem': if search_field == 'mem':
return findTargetNodeMem(zk_conn, node_limit, dom_uuid) return findTargetNodeMem(zk_conn, config, logger, node_limit, dom_uuid)
if search_field == 'load': if search_field == 'load':
return findTargetNodeLoad(zk_conn, node_limit, dom_uuid) return findTargetNodeLoad(zk_conn, config, logger, node_limit, dom_uuid)
if search_field == 'vcpus': if search_field == 'vcpus':
return findTargetNodeVCPUs(zk_conn, node_limit, dom_uuid) return findTargetNodeVCPUs(zk_conn, config, logger, node_limit, dom_uuid)
if search_field == 'vms': if search_field == 'vms':
return findTargetNodeVMs(zk_conn, node_limit, dom_uuid) return findTargetNodeVMs(zk_conn, config, logger, node_limit, dom_uuid)
# Nothing was found # Nothing was found
return None return None
@ -196,11 +199,14 @@ def getNodes(zk_conn, node_limit, dom_uuid):
return valid_node_list return valid_node_list
# via free memory (relative to allocated memory) # via free memory (relative to allocated memory)
def findTargetNodeMem(zk_conn, node_limit, dom_uuid): def findTargetNodeMem(zk_conn, config, logger, node_limit, dom_uuid):
most_provfree = 0 most_provfree = 0
target_node = None target_node = None
node_list = getNodes(zk_conn, node_limit, dom_uuid) node_list = getNodes(zk_conn, node_limit, dom_uuid)
if config['debug']:
logger.out('Found nodes: {}'.format(node_list), state='d', prefix='node-flush')
for node in node_list: for node in node_list:
memprov = int(zkhandler.readdata(zk_conn, '/nodes/{}/memprov'.format(node))) memprov = int(zkhandler.readdata(zk_conn, '/nodes/{}/memprov'.format(node)))
memused = int(zkhandler.readdata(zk_conn, '/nodes/{}/memused'.format(node))) memused = int(zkhandler.readdata(zk_conn, '/nodes/{}/memused'.format(node)))
@ -208,53 +214,78 @@ def findTargetNodeMem(zk_conn, node_limit, dom_uuid):
memtotal = memused + memfree memtotal = memused + memfree
provfree = memtotal - memprov provfree = memtotal - memprov
if config['debug']:
logger.out('Evaluating node {} with {} provfree'.format(node, provfree), state='d', prefix='node-flush')
if provfree > most_provfree: if provfree > most_provfree:
most_provfree = provfree most_provfree = provfree
target_node = node target_node = node
if config['debug']:
logger.out('Selected node {}'.format(target_node), state='d', prefix='node-flush')
return target_node return target_node
# via load average # via load average
def findTargetNodeLoad(zk_conn, node_limit, dom_uuid): def findTargetNodeLoad(zk_conn, config, logger, node_limit, dom_uuid):
least_load = 9999.0 least_load = 9999.0
target_node = None target_node = None
node_list = getNodes(zk_conn, node_limit, dom_uuid) node_list = getNodes(zk_conn, node_limit, dom_uuid)
if config['debug']:
logger.out('Found nodes: {}'.format(node_list), state='d', prefix='node-flush')
for node in node_list: for node in node_list:
load = float(zkhandler.readdata(zk_conn, '/nodes/{}/cpuload'.format(node))) load = float(zkhandler.readdata(zk_conn, '/nodes/{}/cpuload'.format(node)))
if config['debug']:
logger.out('Evaluating node {} with load {}'.format(node, load), state='d', prefix='node-flush')
if load < least_load: if load < least_load:
least_load = load least_load = load
target_node = node target_node = node
if config['debug']:
logger.out('Selected node {}'.format(target_node), state='d', prefix='node-flush')
return target_node return target_node
# via total vCPUs # via total vCPUs
def findTargetNodeVCPUs(zk_conn, node_limit, dom_uuid): def findTargetNodeVCPUs(zk_conn, config, logger, node_limit, dom_uuid):
least_vcpus = 9999 least_vcpus = 9999
target_node = None target_node = None
node_list = getNodes(zk_conn, node_limit, dom_uuid) node_list = getNodes(zk_conn, node_limit, dom_uuid)
if config['debug']:
logger.out('Found nodes: {}'.format(node_list), state='d', prefix='node-flush')
for node in node_list: for node in node_list:
vcpus = int(zkhandler.readdata(zk_conn, '/nodes/{}/vcpualloc'.format(node))) vcpus = int(zkhandler.readdata(zk_conn, '/nodes/{}/vcpualloc'.format(node)))
if config['debug']:
logger.out('Evaluating node {} with vcpualloc {}'.format(node, vcpus), state='d', prefix='node-flush')
if vcpus < least_vcpus: if vcpus < least_vcpus:
least_vcpus = vcpus least_vcpus = vcpus
target_node = node target_node = node
if config['debug']:
logger.out('Selected node {}'.format(target_node), state='d', prefix='node-flush')
return target_node return target_node
# via total VMs # via total VMs
def findTargetNodeVMs(zk_conn, node_limit, dom_uuid): def findTargetNodeVMs(zk_conn, config, logger, node_limit, dom_uuid):
least_vms = 9999 least_vms = 9999
target_node = None target_node = None
node_list = getNodes(zk_conn, node_limit, dom_uuid) node_list = getNodes(zk_conn, node_limit, dom_uuid)
if config['debug']:
logger.out('Found nodes: {}'.format(node_list), state='d', prefix='node-flush')
for node in node_list: for node in node_list:
vms = int(zkhandler.readdata(zk_conn, '/nodes/{}/domainscount'.format(node))) vms = int(zkhandler.readdata(zk_conn, '/nodes/{}/domainscount'.format(node)))
if config['debug']:
logger.out('Evaluating node {} with VM count {}'.format(node, vms), state='d', prefix='node-flush')
if vms < least_vms: if vms < least_vms:
least_vms = vms least_vms = vms
target_node = node target_node = node
if config['debug']:
logger.out('Selected node {}'.format(target_node), state='d', prefix='node-flush')
return target_node return target_node

View File

@ -88,7 +88,7 @@ def migrateFromFencedNode(zk_conn, node_name, config, logger):
def fence_migrate_vm(dom_uuid): def fence_migrate_vm(dom_uuid):
VMInstance.flush_locks(zk_conn, logger, dom_uuid) VMInstance.flush_locks(zk_conn, logger, dom_uuid)
target_node = common.findTargetNode(zk_conn, config, dom_uuid) target_node = common.findTargetNode(zk_conn, config, logger, dom_uuid)
if target_node is not None: if target_node is not None:
logger.out('Migrating VM "{}" to node "{}"'.format(dom_uuid, target_node), state='i') logger.out('Migrating VM "{}" to node "{}"'.format(dom_uuid, target_node), state='i')