pvc/daemon-common/node.py

228 lines
8.1 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
# node.py - PVC client function library, node management
# Part of the Parallel Virtual Cluster (PVC) system
#
2021-03-25 17:01:55 -04:00
# Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################
import time
import re
import daemon_lib.common as common
2021-05-29 20:56:21 -04:00
def getNodeInformation(zkhandler, node_name):
"""
Gather information about a node from the Zookeeper database and return a dict() containing it.
"""
node_daemon_state = zkhandler.read(('node.state.daemon', node_name))
node_coordinator_state = zkhandler.read(('node.state.router', node_name))
node_domain_state = zkhandler.read(('node.state.domain', node_name))
node_static_data = zkhandler.read(('node.data.static', node_name)).split()
node_cpu_count = int(node_static_data[0])
node_kernel = node_static_data[1]
node_os = node_static_data[2]
node_arch = node_static_data[3]
node_vcpu_allocated = int(zkhandler.read(('node.vcpu.allocated', node_name)))
node_mem_total = int(zkhandler.read(('node.memory.total', node_name)))
node_mem_allocated = int(zkhandler.read(('node.memory.allocated', node_name)))
node_mem_provisioned = int(zkhandler.read(('node.memory.provisioned', node_name)))
node_mem_used = int(zkhandler.read(('node.memory.used', node_name)))
node_mem_free = int(zkhandler.read(('node.memory.free', node_name)))
node_load = float(zkhandler.read(('node.cpu.load', node_name)))
node_domains_count = int(zkhandler.read(('node.count.provisioned_domains', node_name)))
node_running_domains = zkhandler.read(('node.running_domains', node_name)).split()
# Construct a data structure to represent the data
node_information = {
'name': node_name,
'daemon_state': node_daemon_state,
'coordinator_state': node_coordinator_state,
'domain_state': node_domain_state,
'cpu_count': node_cpu_count,
'kernel': node_kernel,
'os': node_os,
'arch': node_arch,
'load': node_load,
'domains_count': node_domains_count,
'running_domains': node_running_domains,
'vcpu': {
'total': node_cpu_count,
'allocated': node_vcpu_allocated
},
'memory': {
'total': node_mem_total,
'allocated': node_mem_allocated,
'provisioned': node_mem_provisioned,
'used': node_mem_used,
'free': node_mem_free
}
}
return node_information
#
# Direct Functions
#
2021-05-29 20:56:21 -04:00
def secondary_node(zkhandler, node):
# Verify node is valid
2021-05-29 20:56:21 -04:00
if not common.verifyNode(zkhandler, node):
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node)
# Ensure node is a coordinator
daemon_mode = zkhandler.read(('node.mode', node))
if daemon_mode == 'hypervisor':
return False, 'ERROR: Cannot change router mode on non-coordinator node "{}"'.format(node)
# Ensure node is in run daemonstate
daemon_state = zkhandler.read(('node.state.daemon', node))
2019-06-19 14:19:00 -04:00
if daemon_state != 'run':
return False, 'ERROR: Node "{}" is not active'.format(node)
# Get current state
current_state = zkhandler.read(('node.state.router', node))
if current_state == 'secondary':
return True, 'Node "{}" is already in secondary router mode.'.format(node)
retmsg = 'Setting node {} in secondary router mode.'.format(node)
zkhandler.write([
('base.config.primary_node', 'none')
])
return True, retmsg
2021-05-29 20:56:21 -04:00
def primary_node(zkhandler, node):
# Verify node is valid
2021-05-29 20:56:21 -04:00
if not common.verifyNode(zkhandler, node):
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node)
# Ensure node is a coordinator
daemon_mode = zkhandler.read(('node.mode', node))
if daemon_mode == 'hypervisor':
return False, 'ERROR: Cannot change router mode on non-coordinator node "{}"'.format(node)
# Ensure node is in run daemonstate
daemon_state = zkhandler.read(('node.state.daemon', node))
2019-06-19 14:19:00 -04:00
if daemon_state != 'run':
return False, 'ERROR: Node "{}" is not active'.format(node)
# Get current state
current_state = zkhandler.read(('node.state.router', node))
if current_state == 'primary':
return True, 'Node "{}" is already in primary router mode.'.format(node)
retmsg = 'Setting node {} in primary router mode.'.format(node)
zkhandler.write([
('base.config.primary_node', node)
])
return True, retmsg
2021-05-29 20:56:21 -04:00
def flush_node(zkhandler, node, wait=False):
# Verify node is valid
2021-05-29 20:56:21 -04:00
if not common.verifyNode(zkhandler, node):
2018-09-23 01:05:54 -04:00
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node)
if zkhandler.read(('node.state.domain', node)) == 'flushed':
return True, 'Hypervisor {} is already flushed.'.format(node)
retmsg = 'Flushing hypervisor {} of running VMs.'.format(node)
# Add the new domain to Zookeeper
2021-05-29 20:56:21 -04:00
zkhandler.write([
(('node.state.domain', node), 'flush')
2021-05-29 20:56:21 -04:00
])
if wait:
while zkhandler.read(('node.state.domain', node)) == 'flush':
time.sleep(1)
2020-01-05 15:28:08 -05:00
retmsg = 'Flushed hypervisor {} of running VMs.'.format(node)
return True, retmsg
2021-05-29 20:56:21 -04:00
def ready_node(zkhandler, node, wait=False):
# Verify node is valid
2021-05-29 20:56:21 -04:00
if not common.verifyNode(zkhandler, node):
2018-09-23 01:05:54 -04:00
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node)
if zkhandler.read(('node.state.domain', node)) == 'ready':
return True, 'Hypervisor {} is already ready.'.format(node)
retmsg = 'Restoring hypervisor {} to active service.'.format(node)
# Add the new domain to Zookeeper
2021-05-29 20:56:21 -04:00
zkhandler.write([
(('node.state.domain', node), 'unflush')
2021-05-29 20:56:21 -04:00
])
if wait:
while zkhandler.read(('node.state.domain', node)) == 'unflush':
time.sleep(1)
2020-01-05 15:28:08 -05:00
retmsg = 'Restored hypervisor {} to active service.'.format(node)
return True, retmsg
2021-05-29 20:56:21 -04:00
def get_info(zkhandler, node):
# Verify node is valid
2021-05-29 20:56:21 -04:00
if not common.verifyNode(zkhandler, node):
2018-09-23 01:05:54 -04:00
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node)
# Get information about node in a pretty format
2021-05-29 20:56:21 -04:00
node_information = getNodeInformation(zkhandler, node)
2019-06-24 13:37:56 -04:00
if not node_information:
2019-05-20 22:15:28 -04:00
return False, 'ERROR: Could not get information about node "{}".'.format(node)
return True, node_information
2021-05-29 20:56:21 -04:00
def get_list(zkhandler, limit, daemon_state=None, coordinator_state=None, domain_state=None, is_fuzzy=True):
node_list = []
full_node_list = zkhandler.children('base.node')
2019-07-05 00:30:11 -04:00
for node in full_node_list:
2019-06-24 13:37:56 -04:00
if limit:
try:
if not is_fuzzy:
limit = '^' + limit + '$'
2018-09-25 02:20:32 -04:00
2019-06-24 13:37:56 -04:00
if re.match(limit, node):
2021-05-29 20:56:21 -04:00
node_list.append(getNodeInformation(zkhandler, node))
except Exception as e:
return False, 'Regex Error: {}'.format(e)
2018-09-25 02:20:32 -04:00
else:
2021-05-29 20:56:21 -04:00
node_list.append(getNodeInformation(zkhandler, node))
if daemon_state or coordinator_state or domain_state:
limited_node_list = []
for node in node_list:
add_node = False
if daemon_state and node['daemon_state'] == daemon_state:
add_node = True
if coordinator_state and node['coordinator_state'] == coordinator_state:
add_node = True
if domain_state and node['domain_state'] == domain_state:
add_node = True
if add_node:
limited_node_list.append(node)
node_list = limited_node_list
return True, node_list