pvc/daemon-common/node.py
Joshua M. Boniface a4b80be5ed Add provisioned memory to node info
Adds a separate field to the node memory, "provisioned", which totals
the amount of memory provisioned to all VMs on the node, regardless of
state, and in contrast to "allocated" which only counts running VMs.

Allows for the detection of potential overprovisioned states when
factoring in non-running VMs.

Includes the supporting code to get this data, since the original
implementation of VM memory selection was dependent on the VM being
running and getting this from libvirt. Now, if the VM is not active, it
gets this from the domain XML instead.
2020-10-18 14:17:15 -04:00

442 lines
20 KiB
Python

#!/usr/bin/env python3
# node.py - PVC client function library, node management
# Part of the Parallel Virtual Cluster (PVC) system
#
# Copyright (C) 2018-2020 Joshua M. Boniface <joshua@boniface.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################
import os
import socket
import time
import uuid
import re
import tempfile
import subprocess
import difflib
import colorama
import click
import lxml.objectify
import configparser
import kazoo.client
import daemon_lib.ansiprint as ansiprint
import daemon_lib.zkhandler as zkhandler
import daemon_lib.common as common
import daemon_lib.vm as pvc_vm
def getNodeInformation(zk_conn, node_name):
"""
Gather information about a node from the Zookeeper database and return a dict() containing it.
"""
node_daemon_state = zkhandler.readdata(zk_conn, '/nodes/{}/daemonstate'.format(node_name))
node_coordinator_state = zkhandler.readdata(zk_conn, '/nodes/{}/routerstate'.format(node_name))
node_domain_state = zkhandler.readdata(zk_conn, '/nodes/{}/domainstate'.format(node_name))
node_static_data = zkhandler.readdata(zk_conn, '/nodes/{}/staticdata'.format(node_name)).split()
node_cpu_count = int(node_static_data[0])
node_kernel = node_static_data[1]
node_os = node_static_data[2]
node_arch = node_static_data[3]
node_vcpu_allocated = int(zkhandler.readdata(zk_conn, 'nodes/{}/vcpualloc'.format(node_name)))
node_mem_total = int(zkhandler.readdata(zk_conn, '/nodes/{}/memtotal'.format(node_name)))
node_mem_allocated = int(zkhandler.readdata(zk_conn, '/nodes/{}/memalloc'.format(node_name)))
node_mem_provisioned = int(zkhandler.readdata(zk_conn, '/nodes/{}/memprov'.format(node_name)))
node_mem_used = int(zkhandler.readdata(zk_conn, '/nodes/{}/memused'.format(node_name)))
node_mem_free = int(zkhandler.readdata(zk_conn, '/nodes/{}/memfree'.format(node_name)))
node_load = float(zkhandler.readdata(zk_conn, '/nodes/{}/cpuload'.format(node_name)))
node_domains_count = int(zkhandler.readdata(zk_conn, '/nodes/{}/domainscount'.format(node_name)))
node_running_domains = zkhandler.readdata(zk_conn, '/nodes/{}/runningdomains'.format(node_name)).split()
# Construct a data structure to represent the data
node_information = {
'name': node_name,
'daemon_state': node_daemon_state,
'coordinator_state': node_coordinator_state,
'domain_state': node_domain_state,
'cpu_count': node_cpu_count,
'kernel': node_kernel,
'os': node_os,
'arch': node_arch,
'load': node_load,
'domains_count': node_domains_count,
'running_domains': node_running_domains,
'vcpu': {
'total': node_cpu_count,
'allocated': node_vcpu_allocated
},
'memory': {
'total': node_mem_total,
'allocated': node_mem_allocated,
'provisioned': node_mem_provisioned,
'used': node_mem_used,
'free': node_mem_free
}
}
return node_information
#
# Direct Functions
#
def secondary_node(zk_conn, node):
# Verify node is valid
if not common.verifyNode(zk_conn, node):
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node)
# Ensure node is a coordinator
daemon_mode = zkhandler.readdata(zk_conn, '/nodes/{}/daemonmode'.format(node))
if daemon_mode == 'hypervisor':
return False, 'ERROR: Cannot change router mode on non-coordinator node "{}"'.format(node)
# Ensure node is in run daemonstate
daemon_state = zkhandler.readdata(zk_conn, '/nodes/{}/daemonstate'.format(node))
if daemon_state != 'run':
return False, 'ERROR: Node "{}" is not active'.format(node)
# Get current state
current_state = zkhandler.readdata(zk_conn, '/nodes/{}/routerstate'.format(node))
if current_state == 'primary':
retmsg = 'Setting node {} in secondary router mode.'.format(node)
zkhandler.writedata(zk_conn, {
'/primary_node': 'none'
})
else:
return False, 'Node "{}" is already in secondary router mode.'.format(node)
return True, retmsg
def primary_node(zk_conn, node):
# Verify node is valid
if not common.verifyNode(zk_conn, node):
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node)
# Ensure node is a coordinator
daemon_mode = zkhandler.readdata(zk_conn, '/nodes/{}/daemonmode'.format(node))
if daemon_mode == 'hypervisor':
return False, 'ERROR: Cannot change router mode on non-coordinator node "{}"'.format(node)
# Ensure node is in run daemonstate
daemon_state = zkhandler.readdata(zk_conn, '/nodes/{}/daemonstate'.format(node))
if daemon_state != 'run':
return False, 'ERROR: Node "{}" is not active'.format(node)
# Get current state
current_state = zkhandler.readdata(zk_conn, '/nodes/{}/routerstate'.format(node))
if current_state == 'secondary':
retmsg = 'Setting node {} in primary router mode.'.format(node)
zkhandler.writedata(zk_conn, {
'/primary_node': node
})
else:
return False, 'Node "{}" is already in primary router mode.'.format(node)
return True, retmsg
def flush_node(zk_conn, node, wait=False):
# Verify node is valid
if not common.verifyNode(zk_conn, node):
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node)
retmsg = 'Flushing hypervisor {} of running VMs.'.format(node)
# Add the new domain to Zookeeper
zkhandler.writedata(zk_conn, {
'/nodes/{}/domainstate'.format(node): 'flush'
})
if wait:
while zkhandler.readdata(zk_conn, '/nodes/{}/domainstate'.format(node)) == 'flush':
time.sleep(1)
retmsg = 'Flushed hypervisor {} of running VMs.'.format(node)
return True, retmsg
def ready_node(zk_conn, node, wait=False):
# Verify node is valid
if not common.verifyNode(zk_conn, node):
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node)
retmsg = 'Restoring hypervisor {} to active service.'.format(node)
# Add the new domain to Zookeeper
zkhandler.writedata(zk_conn, {
'/nodes/{}/domainstate'.format(node): 'unflush'
})
if wait:
while zkhandler.readdata(zk_conn, '/nodes/{}/domainstate'.format(node)) == 'unflush':
time.sleep(1)
retmsg = 'Restored hypervisor {} to active service.'.format(node)
return True, retmsg
def get_info(zk_conn, node):
# Verify node is valid
if not common.verifyNode(zk_conn, node):
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node)
# Get information about node in a pretty format
node_information = getNodeInformation(zk_conn, node)
if not node_information:
return False, 'ERROR: Could not get information about node "{}".'.format(node)
return True, node_information
def get_list(zk_conn, limit, daemon_state=None, coordinator_state=None, domain_state=None, is_fuzzy=True):
node_list = []
full_node_list = zkhandler.listchildren(zk_conn, '/nodes')
for node in full_node_list:
if limit:
try:
if not is_fuzzy:
limit = '^' + limit + '$'
if re.match(limit, node):
node_list.append(getNodeInformation(zk_conn, node))
except Exception as e:
return False, 'Regex Error: {}'.format(e)
else:
node_list.append(getNodeInformation(zk_conn, node))
if daemon_state or coordinator_state or domain_state:
limited_node_list = []
for node in node_list:
add_node = False
if daemon_state and node['daemon_state'] == daemon_state:
add_node = True
if coordinator_state and node['coordinator_state'] == coordinator_state:
add_node = True
if domain_state and node['domain_state'] == domain_state:
add_node = True
if add_node:
limited_node_list.append(node)
node_list = limited_node_list
return True, node_list
#
# CLI-specific functions
#
def getOutputColours(node_information):
if node_information['daemon_state'] == 'run':
daemon_state_colour = ansiprint.green()
elif node_information['daemon_state'] == 'stop':
daemon_state_colour = ansiprint.red()
elif node_information['daemon_state'] == 'shutdown':
daemon_state_colour = ansiprint.yellow()
elif node_information['daemon_state'] == 'init':
daemon_state_colour = ansiprint.yellow()
elif node_information['daemon_state'] == 'dead':
daemon_state_colour = ansiprint.red() + ansiprint.bold()
else:
daemon_state_colour = ansiprint.blue()
if node_information['coordinator_state'] == 'primary':
coordinator_state_colour = ansiprint.green()
elif node_information['coordinator_state'] == 'secondary':
coordinator_state_colour = ansiprint.blue()
else:
coordinator_state_colour = ansiprint.cyan()
if node_information['domain_state'] == 'ready':
domain_state_colour = ansiprint.green()
else:
domain_state_colour = ansiprint.blue()
return daemon_state_colour, coordinator_state_colour, domain_state_colour
def format_info(node_information, long_output):
daemon_state_colour, coordinator_state_colour, domain_state_colour = getOutputColours(node_information)
# Format a nice output; do this line-by-line then concat the elements at the end
ainformation = []
# Basic information
ainformation.append('{}Name:{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['name']))
ainformation.append('{}Daemon State:{} {}{}{}'.format(ansiprint.purple(), ansiprint.end(), daemon_state_colour, node_information['daemon_state'], ansiprint.end()))
ainformation.append('{}Coordinator State:{} {}{}{}'.format(ansiprint.purple(), ansiprint.end(), coordinator_state_colour, node_information['coordinator_state'], ansiprint.end()))
ainformation.append('{}Domain State:{} {}{}{}'.format(ansiprint.purple(), ansiprint.end(), domain_state_colour, node_information['domain_state'], ansiprint.end()))
ainformation.append('{}Active VM Count:{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['domains_count']))
if long_output:
ainformation.append('')
ainformation.append('{}Architecture:{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['arch']))
ainformation.append('{}Operating System:{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['os']))
ainformation.append('{}Kernel Version:{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['kernel']))
ainformation.append('')
ainformation.append('{}Host CPUs:{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['vcpu']['total']))
ainformation.append('{}vCPUs:{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['vcpu']['allocated']))
ainformation.append('{}Load:{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['load']))
ainformation.append('{}Total RAM (MiB):{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['memory']['total']))
ainformation.append('{}Used RAM (MiB):{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['memory']['used']))
ainformation.append('{}Free RAM (MiB):{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['memory']['free']))
ainformation.append('{}Allocated RAM (MiB):{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['memory']['allocated']))
ainformation.append('{}Provisioned RAM (MiB):{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['memory']['provisioned']))
# Join it all together
information = '\n'.join(ainformation)
click.echo(information)
click.echo('')
def format_list(node_list):
node_list_output = []
# Determine optimal column widths
node_name_length = 5
daemon_state_length = 7
coordinator_state_length = 12
domain_state_length = 8
domains_count_length = 4
cpu_count_length = 6
load_length = 5
mem_total_length = 6
mem_used_length = 5
mem_free_length = 5
mem_alloc_length = 4
mem_prov_length = 4
for node_information in node_list:
# node_name column
_node_name_length = len(node_information['name']) + 1
if _node_name_length > node_name_length:
node_name_length = _node_name_length
# daemon_state column
_daemon_state_length = len(node_information['daemon_state']) + 1
if _daemon_state_length > daemon_state_length:
daemon_state_length = _daemon_state_length
# coordinator_state column
_coordinator_state_length = len(node_information['coordinator_state']) + 1
if _coordinator_state_length > coordinator_state_length:
coordinator_state_length = _coordinator_state_length
# domain_state column
_domain_state_length = len(node_information['domain_state']) + 1
if _domain_state_length > domain_state_length:
domain_state_length = _domain_state_length
# domains_count column
_domains_count_length = len(str(node_information['domains_count'])) + 1
if _domains_count_length > domains_count_length:
domains_count_length = _domains_count_length
# cpu_count column
_cpu_count_length = len(str(node_information['cpu_count'])) + 1
if _cpu_count_length > cpu_count_length:
cpu_count_length = _cpu_count_length
# load column
_load_length = len(str(node_information['load'])) + 1
if _load_length > load_length:
load_length = _load_length
# mem_total column
_mem_total_length = len(str(node_information['memory']['total'])) + 1
if _mem_total_length > mem_total_length:
mem_total_length = _mem_total_length
# mem_used column
_mem_used_length = len(str(node_information['memory']['used'])) + 1
if _mem_used_length > mem_used_length:
mem_used_length = _mem_used_length
# mem_free column
_mem_free_length = len(str(node_information['memory']['free'])) + 1
if _mem_free_length > mem_free_length:
mem_free_length = _mem_free_length
# mem_alloc column
_mem_alloc_length = len(str(node_information['memory']['allocated'])) + 1
if _mem_alloc_length > mem_alloc_length:
mem_alloc_length = _mem_alloc_length
# mem_prov column
_mem_prov_length = len(str(node_information['memory']['provisioned'])) + 1
if _mem_prov_length > mem_prov_length:
mem_prov_length = _mem_prov_length
# Format the string (header)
node_list_output.append(
'{bold}{node_name: <{node_name_length}} \
St: {daemon_state_colour}{node_daemon_state: <{daemon_state_length}}{end_colour} {coordinator_state_colour}{node_coordinator_state: <{coordinator_state_length}}{end_colour} {domain_state_colour}{node_domain_state: <{domain_state_length}}{end_colour} \
Res: {node_domains_count: <{domains_count_length}} {node_cpu_count: <{cpu_count_length}} {node_load: <{load_length}} \
Mem (M): {node_mem_total: <{mem_total_length}} {node_mem_used: <{mem_used_length}} {node_mem_free: <{mem_free_length}} {node_mem_allocated: <{mem_alloc_length}} {node_mem_provisioned: <{mem_prov_length}}{end_bold}'.format(
node_name_length=node_name_length,
daemon_state_length=daemon_state_length,
coordinator_state_length=coordinator_state_length,
domain_state_length=domain_state_length,
domains_count_length=domains_count_length,
cpu_count_length=cpu_count_length,
load_length=load_length,
mem_total_length=mem_total_length,
mem_used_length=mem_used_length,
mem_free_length=mem_free_length,
mem_alloc_length=mem_alloc_length,
mem_prov_length=mem_prov_length,
bold=ansiprint.bold(),
end_bold=ansiprint.end(),
daemon_state_colour='',
coordinator_state_colour='',
domain_state_colour='',
end_colour='',
node_name='Name',
node_daemon_state='Daemon',
node_coordinator_state='Coordinator',
node_domain_state='Domain',
node_domains_count='VMs',
node_cpu_count='vCPUs',
node_load='Load',
node_mem_total='Total',
node_mem_used='Used',
node_mem_free='Free',
node_mem_allocated='VMs Run',
node_mem_provisioned='VMs Total'
)
)
# Format the string (elements)
for node_information in node_list:
daemon_state_colour, coordinator_state_colour, domain_state_colour = getOutputColours(node_information)
node_list_output.append(
'{bold}{node_name: <{node_name_length}} \
{daemon_state_colour}{node_daemon_state: <{daemon_state_length}}{end_colour} {coordinator_state_colour}{node_coordinator_state: <{coordinator_state_length}}{end_colour} {domain_state_colour}{node_domain_state: <{domain_state_length}}{end_colour} \
{node_domains_count: <{domains_count_length}} {node_cpu_count: <{cpu_count_length}} {node_load: <{load_length}} \
{node_mem_total: <{mem_total_length}} {node_mem_used: <{mem_used_length}} {node_mem_free: <{mem_free_length}} {node_mem_allocated: <{mem_alloc_length}} {node_mem_provisioned: <{mem_prov_length}}{end_bold}'.format(
node_name_length=node_name_length,
daemon_state_length=daemon_state_length,
coordinator_state_length=coordinator_state_length,
domain_state_length=domain_state_length,
domains_count_length=domains_count_length,
cpu_count_length=cpu_count_length,
load_length=load_length,
mem_total_length=mem_total_length,
mem_used_length=mem_used_length,
mem_free_length=mem_free_length,
mem_alloc_length=mem_alloc_length,
mem_prov_length=mem_prov_length,
bold='',
end_bold='',
daemon_state_colour=daemon_state_colour,
coordinator_state_colour=coordinator_state_colour,
domain_state_colour=domain_state_colour,
end_colour=ansiprint.end(),
node_name=node_information['name'],
node_daemon_state=node_information['daemon_state'],
node_coordinator_state=node_information['coordinator_state'],
node_domain_state=node_information['domain_state'],
node_domains_count=node_information['domains_count'],
node_cpu_count=node_information['vcpu']['allocated'],
node_load=node_information['load'],
node_mem_total=node_information['memory']['total'],
node_mem_used=node_information['memory']['used'],
node_mem_free=node_information['memory']['free'],
node_mem_allocated=node_information['memory']['allocated'],
node_mem_provisioned=node_information['memory']['provisioned']
)
)
click.echo('\n'.join(sorted(node_list_output)))