2018-10-14 02:01:35 -04:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
# common.py - PVC daemon function library, common fuctions
|
|
|
|
# Part of the Parallel Virtual Cluster (PVC) system
|
|
|
|
#
|
2020-01-08 19:38:02 -05:00
|
|
|
# Copyright (C) 2018-2020 Joshua M. Boniface <joshua@boniface.me>
|
2018-10-14 02:01:35 -04:00
|
|
|
#
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
#
|
|
|
|
###############################################################################
|
|
|
|
|
|
|
|
import subprocess
|
|
|
|
import threading
|
|
|
|
import signal
|
|
|
|
import os
|
|
|
|
import time
|
2019-08-07 14:02:57 -04:00
|
|
|
import shlex
|
2018-10-14 02:01:35 -04:00
|
|
|
|
|
|
|
import pvcd.log as log
|
2019-07-09 14:16:19 -04:00
|
|
|
import pvcd.zkhandler as zkhandler
|
2018-10-14 02:01:35 -04:00
|
|
|
|
|
|
|
class OSDaemon(object):
|
2018-10-15 20:54:49 -04:00
|
|
|
def __init__(self, command_string, environment, logfile):
|
2019-08-07 14:02:57 -04:00
|
|
|
command = shlex.split(command_string)
|
2018-10-15 20:54:49 -04:00
|
|
|
# Set stdout to be a logfile if set
|
|
|
|
if logfile:
|
|
|
|
stdout = open(logfile, 'a')
|
|
|
|
else:
|
|
|
|
stdout = subprocess.PIPE
|
|
|
|
|
|
|
|
# Invoke the process
|
2018-10-14 02:01:35 -04:00
|
|
|
self.proc = subprocess.Popen(
|
|
|
|
command,
|
|
|
|
env=environment,
|
2018-10-15 20:54:49 -04:00
|
|
|
stdout=stdout,
|
|
|
|
stderr=stdout,
|
2018-10-14 02:01:35 -04:00
|
|
|
)
|
|
|
|
|
2018-10-15 20:54:49 -04:00
|
|
|
# Signal the process
|
2018-10-14 02:01:35 -04:00
|
|
|
def signal(self, sent_signal):
|
|
|
|
signal_map = {
|
|
|
|
'hup': signal.SIGHUP,
|
|
|
|
'int': signal.SIGINT,
|
2018-10-17 00:23:43 -04:00
|
|
|
'term': signal.SIGTERM,
|
|
|
|
'kill': signal.SIGKILL
|
2018-10-14 02:01:35 -04:00
|
|
|
}
|
|
|
|
self.proc.send_signal(signal_map[sent_signal])
|
|
|
|
|
2018-10-15 20:54:49 -04:00
|
|
|
def run_os_daemon(command_string, environment=None, logfile=None):
|
|
|
|
daemon = OSDaemon(command_string, environment, logfile)
|
2018-10-14 02:01:35 -04:00
|
|
|
return daemon
|
|
|
|
|
|
|
|
# Run a oneshot command, optionally without blocking
|
2019-07-09 15:03:14 -04:00
|
|
|
def run_os_command(command_string, background=False, environment=None, timeout=None):
|
2019-08-07 14:02:57 -04:00
|
|
|
command = shlex.split(command_string)
|
2018-10-14 02:01:35 -04:00
|
|
|
if background:
|
|
|
|
def runcmd():
|
2019-07-09 15:03:14 -04:00
|
|
|
try:
|
|
|
|
subprocess.run(
|
|
|
|
command,
|
|
|
|
env=environment,
|
|
|
|
timeout=timeout,
|
|
|
|
stdout=subprocess.PIPE,
|
|
|
|
stderr=subprocess.PIPE,
|
|
|
|
)
|
|
|
|
except subprocess.TimeoutExpired:
|
|
|
|
pass
|
|
|
|
thread = threading.Thread(target=runcmd, args=())
|
|
|
|
thread.start()
|
|
|
|
return 0, None, None
|
|
|
|
else:
|
|
|
|
try:
|
|
|
|
command_output = subprocess.run(
|
2018-10-14 02:01:35 -04:00
|
|
|
command,
|
|
|
|
env=environment,
|
2019-07-09 15:03:14 -04:00
|
|
|
timeout=timeout,
|
2018-10-14 02:01:35 -04:00
|
|
|
stdout=subprocess.PIPE,
|
|
|
|
stderr=subprocess.PIPE,
|
|
|
|
)
|
2019-07-09 15:03:14 -04:00
|
|
|
retcode = command_output.returncode
|
|
|
|
except subprocess.TimeoutExpired:
|
|
|
|
retcode = 128
|
|
|
|
|
2018-10-29 17:51:08 -04:00
|
|
|
try:
|
|
|
|
stdout = command_output.stdout.decode('ascii')
|
|
|
|
except:
|
|
|
|
stdout = ''
|
|
|
|
try:
|
|
|
|
stderr = command_output.stderr.decode('ascii')
|
|
|
|
except:
|
|
|
|
stderr = ''
|
|
|
|
return retcode, stdout, stderr
|
2018-10-14 02:01:35 -04:00
|
|
|
|
|
|
|
# Reload the firewall rules of the system
|
2018-10-17 20:05:22 -04:00
|
|
|
def reload_firewall_rules(logger, rules_file):
|
|
|
|
logger.out('Reloading firewall configuration', state='o')
|
2018-10-14 02:01:35 -04:00
|
|
|
retcode, stdout, stderr = run_os_command('/usr/sbin/nft -f {}'.format(rules_file))
|
|
|
|
if retcode != 0:
|
2018-10-17 20:05:22 -04:00
|
|
|
logger.out('Failed to reload configuration: {}'.format(stderr), state='e')
|
2018-10-27 16:31:31 -04:00
|
|
|
|
|
|
|
# Create IP address
|
|
|
|
def createIPAddress(ipaddr, cidrnetmask, dev):
|
|
|
|
run_os_command(
|
|
|
|
'ip address add {}/{} dev {}'.format(
|
|
|
|
ipaddr,
|
|
|
|
cidrnetmask,
|
|
|
|
dev
|
|
|
|
)
|
|
|
|
)
|
|
|
|
run_os_command(
|
Add black magic to minimize ping losses
This particular arping interval/count, along with forcing it to run in
the foreground, seems to minimize the packet loss when the primary
coordinator transitions. Through extensive testing, this value results
in the, consistently, least amount of loss: 1-2 pings, at an 0.025s ping
interval, return "TTL exceeded", with no other loss, and only when the
node the test VM is on is the one switching to secondary state. No other
combination of values here, nor tweaks to other parts of the code, seem
able to reduce this further, therefore this is likely the best
configuration possible.
2019-12-19 18:49:35 -05:00
|
|
|
'arping -P -U -W 0.02 -c 2 -i {dev} -S {ip} {ip}'.format(
|
2019-08-07 11:29:38 -04:00
|
|
|
dev=dev,
|
|
|
|
ip=ipaddr
|
Add black magic to minimize ping losses
This particular arping interval/count, along with forcing it to run in
the foreground, seems to minimize the packet loss when the primary
coordinator transitions. Through extensive testing, this value results
in the, consistently, least amount of loss: 1-2 pings, at an 0.025s ping
interval, return "TTL exceeded", with no other loss, and only when the
node the test VM is on is the one switching to secondary state. No other
combination of values here, nor tweaks to other parts of the code, seem
able to reduce this further, therefore this is likely the best
configuration possible.
2019-12-19 18:49:35 -05:00
|
|
|
)
|
2018-10-27 16:31:31 -04:00
|
|
|
)
|
|
|
|
|
|
|
|
# Remove IP address
|
|
|
|
def removeIPAddress(ipaddr, cidrnetmask, dev):
|
|
|
|
run_os_command(
|
|
|
|
'ip address delete {}/{} dev {}'.format(
|
|
|
|
ipaddr,
|
|
|
|
cidrnetmask,
|
|
|
|
dev
|
|
|
|
)
|
|
|
|
)
|
2019-07-09 14:16:19 -04:00
|
|
|
|
|
|
|
#
|
|
|
|
# Find a migration target
|
|
|
|
#
|
2019-10-12 01:59:08 -04:00
|
|
|
def findTargetNode(zk_conn, config, dom_uuid):
|
2019-10-12 01:17:39 -04:00
|
|
|
# Determine VM node limits; set config value if read fails
|
|
|
|
try:
|
2019-10-12 17:58:48 -04:00
|
|
|
node_limit = zkhandler.readdata(zk_conn, '/domains/{}/node_limit'.format(dom_uuid)).split(',')
|
2019-10-17 13:34:11 -04:00
|
|
|
if not any(node_limit):
|
2019-10-22 12:32:14 -04:00
|
|
|
node_limit = ''
|
2019-10-12 01:17:39 -04:00
|
|
|
except:
|
2019-10-22 12:32:14 -04:00
|
|
|
node_limit = ''
|
|
|
|
zkhandler.writedata(zk_conn, { '/domains/{}/node_limit'.format(dom_uuid): '' })
|
2019-10-12 01:17:39 -04:00
|
|
|
|
2020-02-04 17:35:24 -05:00
|
|
|
# Determine VM search field
|
2019-10-12 01:17:39 -04:00
|
|
|
try:
|
2019-10-12 17:58:48 -04:00
|
|
|
search_field = zkhandler.readdata(zk_conn, '/domains/{}/node_selector'.format(dom_uuid))
|
2020-02-04 17:35:24 -05:00
|
|
|
except Exception as e:
|
|
|
|
search_field = None
|
|
|
|
|
|
|
|
# If our search field is invalid, use and set the default (for next time)
|
|
|
|
if search_field is None or search_field == 'None':
|
2019-10-22 12:28:18 -04:00
|
|
|
search_field = config['migration_target_selector']
|
|
|
|
zkhandler.writedata(zk_conn, { '/domains/{}/node_selector'.format(dom_uuid): config['migration_target_selector'] })
|
2019-10-12 01:17:39 -04:00
|
|
|
|
|
|
|
# Execute the search
|
2019-07-09 14:16:19 -04:00
|
|
|
if search_field == 'mem':
|
2019-10-12 01:59:08 -04:00
|
|
|
return findTargetNodeMem(zk_conn, node_limit, dom_uuid)
|
2019-07-09 14:16:19 -04:00
|
|
|
if search_field == 'load':
|
2019-10-12 01:59:08 -04:00
|
|
|
return findTargetNodeLoad(zk_conn, node_limit, dom_uuid)
|
2019-07-09 14:16:19 -04:00
|
|
|
if search_field == 'vcpus':
|
2019-10-12 01:59:08 -04:00
|
|
|
return findTargetNodeVCPUs(zk_conn, node_limit, dom_uuid)
|
2019-07-09 14:16:19 -04:00
|
|
|
if search_field == 'vms':
|
2019-10-12 01:59:08 -04:00
|
|
|
return findTargetNodeVMs(zk_conn, node_limit, dom_uuid)
|
2019-10-12 01:17:39 -04:00
|
|
|
|
|
|
|
# Nothing was found
|
2019-07-09 14:16:19 -04:00
|
|
|
return None
|
|
|
|
|
|
|
|
# Get the list of valid target nodes
|
2019-10-12 01:59:08 -04:00
|
|
|
def getNodes(zk_conn, node_limit, dom_uuid):
|
2019-07-09 14:16:19 -04:00
|
|
|
valid_node_list = []
|
|
|
|
full_node_list = zkhandler.listchildren(zk_conn, '/nodes')
|
|
|
|
current_node = zkhandler.readdata(zk_conn, '/domains/{}/node'.format(dom_uuid))
|
|
|
|
|
|
|
|
for node in full_node_list:
|
2019-10-12 01:17:39 -04:00
|
|
|
if node_limit and node not in node_limit:
|
|
|
|
continue
|
|
|
|
|
2019-07-09 14:16:19 -04:00
|
|
|
daemon_state = zkhandler.readdata(zk_conn, '/nodes/{}/daemonstate'.format(node))
|
|
|
|
domain_state = zkhandler.readdata(zk_conn, '/nodes/{}/domainstate'.format(node))
|
|
|
|
|
|
|
|
if node == current_node:
|
|
|
|
continue
|
|
|
|
|
|
|
|
if daemon_state != 'run' or domain_state != 'ready':
|
|
|
|
continue
|
|
|
|
|
|
|
|
valid_node_list.append(node)
|
|
|
|
|
|
|
|
return valid_node_list
|
|
|
|
|
|
|
|
# via free memory (relative to allocated memory)
|
2019-10-12 01:59:08 -04:00
|
|
|
def findTargetNodeMem(zk_conn, node_limit, dom_uuid):
|
2019-07-09 14:16:19 -04:00
|
|
|
most_allocfree = 0
|
|
|
|
target_node = None
|
|
|
|
|
2019-10-12 01:59:08 -04:00
|
|
|
node_list = getNodes(zk_conn, node_limit, dom_uuid)
|
2019-07-09 14:16:19 -04:00
|
|
|
for node in node_list:
|
|
|
|
memalloc = int(zkhandler.readdata(zk_conn, '/nodes/{}/memalloc'.format(node)))
|
|
|
|
memused = int(zkhandler.readdata(zk_conn, '/nodes/{}/memused'.format(node)))
|
|
|
|
memfree = int(zkhandler.readdata(zk_conn, '/nodes/{}/memfree'.format(node)))
|
|
|
|
memtotal = memused + memfree
|
|
|
|
allocfree = memtotal - memalloc
|
|
|
|
|
|
|
|
if allocfree > most_allocfree:
|
|
|
|
most_allocfree = allocfree
|
|
|
|
target_node = node
|
|
|
|
|
|
|
|
return target_node
|
|
|
|
|
|
|
|
# via load average
|
2019-10-12 01:59:08 -04:00
|
|
|
def findTargetNodeLoad(zk_conn, node_limit, dom_uuid):
|
2020-01-29 17:22:29 -05:00
|
|
|
least_load = 9999.0
|
2019-07-09 14:16:19 -04:00
|
|
|
target_node = None
|
|
|
|
|
2019-10-12 01:59:08 -04:00
|
|
|
node_list = getNodes(zk_conn, node_limit, dom_uuid)
|
2019-07-09 14:16:19 -04:00
|
|
|
for node in node_list:
|
2020-01-29 17:22:29 -05:00
|
|
|
load = float(zkhandler.readdata(zk_conn, '/nodes/{}/cpuload'.format(node)))
|
2019-07-09 14:16:19 -04:00
|
|
|
|
|
|
|
if load < least_load:
|
|
|
|
least_load = load
|
2020-01-29 17:22:29 -05:00
|
|
|
target_node = node
|
2019-07-09 14:16:19 -04:00
|
|
|
|
|
|
|
return target_node
|
|
|
|
|
|
|
|
# via total vCPUs
|
2019-10-12 01:59:08 -04:00
|
|
|
def findTargetNodeVCPUs(zk_conn, node_limit, dom_uuid):
|
2019-07-09 14:16:19 -04:00
|
|
|
least_vcpus = 9999
|
|
|
|
target_node = None
|
|
|
|
|
2019-10-12 01:59:08 -04:00
|
|
|
node_list = getNodes(zk_conn, node_limit, dom_uuid)
|
2019-07-09 14:16:19 -04:00
|
|
|
for node in node_list:
|
|
|
|
vcpus = int(zkhandler.readdata(zk_conn, '/nodes/{}/vcpualloc'.format(node)))
|
|
|
|
|
|
|
|
if vcpus < least_vcpus:
|
|
|
|
least_vcpus = vcpus
|
|
|
|
target_node = node
|
|
|
|
|
|
|
|
return target_node
|
|
|
|
|
|
|
|
# via total VMs
|
2019-10-12 01:59:08 -04:00
|
|
|
def findTargetNodeVMs(zk_conn, node_limit, dom_uuid):
|
2019-07-09 14:16:19 -04:00
|
|
|
least_vms = 9999
|
|
|
|
target_node = None
|
|
|
|
|
2019-10-12 01:59:08 -04:00
|
|
|
node_list = getNodes(zk_conn, node_limit, dom_uuid)
|
2019-07-09 14:16:19 -04:00
|
|
|
for node in node_list:
|
|
|
|
vms = int(zkhandler.readdata(zk_conn, '/nodes/{}/domainscount'.format(node)))
|
|
|
|
|
|
|
|
if vms < least_vms:
|
|
|
|
least_vms = vms
|
|
|
|
target_node = node
|
|
|
|
|
|
|
|
return target_node
|