2018-05-31 20:26:44 -04:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
2018-06-06 01:47:53 -04:00
|
|
|
# pvcd.py - PVC hypervisor node daemon
|
|
|
|
# Part of the Parallel Virtual Cluster (PVC) system
|
|
|
|
#
|
|
|
|
# Copyright (C) 2018 Joshua M. Boniface <joshua@boniface.me>
|
|
|
|
#
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
#
|
|
|
|
###############################################################################
|
|
|
|
|
2018-06-06 14:16:52 -04:00
|
|
|
import kazoo.client
|
2018-05-31 20:26:44 -04:00
|
|
|
import libvirt
|
|
|
|
import sys
|
2018-06-08 12:19:48 -04:00
|
|
|
import os
|
2018-05-31 21:49:23 -04:00
|
|
|
import socket
|
2018-06-12 12:07:57 -04:00
|
|
|
import psutil
|
|
|
|
import subprocess
|
2018-05-31 20:26:44 -04:00
|
|
|
import uuid
|
|
|
|
import VMInstance
|
2018-05-31 21:49:23 -04:00
|
|
|
import NodeInstance
|
2018-05-31 20:26:44 -04:00
|
|
|
import time
|
2018-05-31 21:49:23 -04:00
|
|
|
import atexit
|
2018-06-08 12:19:48 -04:00
|
|
|
import configparser
|
2018-06-06 14:16:52 -04:00
|
|
|
import apscheduler.schedulers.background
|
2018-06-06 22:59:31 -04:00
|
|
|
import ansiiprint
|
2018-05-31 20:26:44 -04:00
|
|
|
|
2018-06-08 12:19:48 -04:00
|
|
|
print(ansiiprint.bold() + "pvcd - Parallel Virtual Cluster management daemon" + ansiiprint.end())
|
2018-05-31 20:26:44 -04:00
|
|
|
|
2018-06-08 12:19:48 -04:00
|
|
|
# Get the config file variable from the environment
|
|
|
|
try:
|
2018-06-11 01:36:07 -04:00
|
|
|
pvcd_config_file = os.environ['PVCD_CONFIG_FILE']
|
2018-06-08 12:19:48 -04:00
|
|
|
except:
|
2018-06-11 01:36:07 -04:00
|
|
|
print('ERROR: The "PVCD_CONFIG_FILE" environment variable must be set before starting pvcd.')
|
2018-06-08 12:19:48 -04:00
|
|
|
exit(1)
|
|
|
|
|
|
|
|
print('Loading configuration from file {}'.format(pvcd_config_file))
|
|
|
|
|
2018-06-10 20:41:11 -04:00
|
|
|
myhostname = socket.gethostname()
|
|
|
|
myshorthostname = myhostname.split('.', 1)[0]
|
|
|
|
mydomainname = ''.join(myhostname.split('.', 1)[1:])
|
|
|
|
|
|
|
|
# Config values dictionary
|
|
|
|
config_values = [
|
|
|
|
'zookeeper',
|
|
|
|
'keepalive_interval',
|
|
|
|
'ipmi_hostname',
|
|
|
|
'ipmi_username',
|
|
|
|
'ipmi_password'
|
|
|
|
]
|
2018-06-08 12:19:48 -04:00
|
|
|
def readConfig(pvcd_config_file, myhostname):
|
|
|
|
o_config = configparser.ConfigParser()
|
|
|
|
o_config.read(pvcd_config_file)
|
|
|
|
config = {}
|
|
|
|
|
2018-06-10 20:41:11 -04:00
|
|
|
try:
|
|
|
|
entries = o_config[myhostname]
|
|
|
|
except:
|
2018-06-11 01:36:07 -04:00
|
|
|
try:
|
|
|
|
entries = o_config['default']
|
|
|
|
except:
|
|
|
|
print('ERROR: Config file is not valid!')
|
|
|
|
exit(1)
|
2018-06-10 20:41:11 -04:00
|
|
|
|
|
|
|
for entry in config_values:
|
|
|
|
try:
|
|
|
|
config[entry] = entries[entry]
|
|
|
|
except:
|
2018-06-11 01:36:07 -04:00
|
|
|
try:
|
|
|
|
config[entry] = o_config['default'][entry]
|
|
|
|
except:
|
|
|
|
print('ERROR: Config file missing required value "{}" for this host!'.format(entry))
|
|
|
|
exit(1)
|
2018-06-10 20:41:11 -04:00
|
|
|
|
|
|
|
# Handle an empty ipmi_hostname
|
|
|
|
if config['ipmi_hostname'] == '':
|
2018-06-10 20:45:32 -04:00
|
|
|
config['ipmi_hostname'] = myshorthostname + '-lom.' + mydomainname
|
2018-06-10 20:41:11 -04:00
|
|
|
|
|
|
|
return config
|
2018-05-31 20:26:44 -04:00
|
|
|
|
2018-06-10 20:45:32 -04:00
|
|
|
config = readConfig(pvcd_config_file, myhostname)
|
|
|
|
|
2018-06-06 14:16:52 -04:00
|
|
|
# Connect to local zookeeper
|
2018-06-08 12:19:48 -04:00
|
|
|
zk = kazoo.client.KazooClient(hosts=config['zookeeper'])
|
2018-05-31 20:26:44 -04:00
|
|
|
try:
|
2018-06-08 12:19:48 -04:00
|
|
|
print('Connecting to Zookeeper instance at {}'.format(config['zookeeper']))
|
2018-05-31 20:26:44 -04:00
|
|
|
zk.start()
|
|
|
|
except:
|
2018-06-08 12:19:48 -04:00
|
|
|
print('ERROR: Failed to connect to Zookeeper')
|
2018-05-31 20:26:44 -04:00
|
|
|
exit(1)
|
|
|
|
|
|
|
|
def zk_listener(state):
|
2018-06-06 14:16:52 -04:00
|
|
|
if state == kazoo.client.KazooState.LOST:
|
2018-05-31 22:31:20 -04:00
|
|
|
cleanup()
|
2018-06-08 12:19:48 -04:00
|
|
|
exit(1)
|
2018-06-06 14:16:52 -04:00
|
|
|
elif state == kazoo.client.KazooState.SUSPENDED:
|
2018-05-31 22:31:20 -04:00
|
|
|
cleanup()
|
2018-06-08 12:19:48 -04:00
|
|
|
exit(1)
|
2018-05-31 20:26:44 -04:00
|
|
|
else:
|
|
|
|
pass
|
|
|
|
|
|
|
|
zk.add_listener(zk_listener)
|
|
|
|
|
2018-05-31 21:49:23 -04:00
|
|
|
def cleanup():
|
2018-06-01 00:35:39 -04:00
|
|
|
try:
|
2018-06-06 14:16:52 -04:00
|
|
|
update_timer.shutdown()
|
2018-06-11 02:46:24 -04:00
|
|
|
zk.set('/nodes/{}/daemonstate'.format(myhostname), 'stop'.encode('ascii'))
|
2018-06-01 12:21:58 -04:00
|
|
|
zk.stop()
|
|
|
|
zk.close()
|
2018-06-01 00:35:39 -04:00
|
|
|
except:
|
|
|
|
pass
|
2018-05-31 20:26:44 -04:00
|
|
|
|
2018-05-31 21:49:23 -04:00
|
|
|
atexit.register(cleanup)
|
2018-05-31 20:26:44 -04:00
|
|
|
|
2018-06-12 21:21:22 -04:00
|
|
|
# Gather useful data about our host for staticdata
|
|
|
|
# Static data format: 'cpu_count', 'arch', 'os', 'kernel'
|
|
|
|
staticdata = []
|
|
|
|
staticdata.append(str(psutil.cpu_count()))
|
|
|
|
staticdata.append(subprocess.run(['uname', '-r'], stdout=subprocess.PIPE).stdout.decode('ascii').strip())
|
|
|
|
staticdata.append(subprocess.run(['uname', '-o'], stdout=subprocess.PIPE).stdout.decode('ascii').strip())
|
|
|
|
staticdata.append(subprocess.run(['uname', '-m'], stdout=subprocess.PIPE).stdout.decode('ascii').strip())
|
|
|
|
# Print static data on start
|
|
|
|
|
2018-06-11 01:58:40 -04:00
|
|
|
print('{0}Node hostname:{1} {2}'.format(ansiiprint.bold(), ansiiprint.end(), myhostname))
|
|
|
|
print('{0}IPMI hostname:{1} {2}'.format(ansiiprint.bold(), ansiiprint.end(), config['ipmi_hostname']))
|
2018-06-12 21:21:22 -04:00
|
|
|
print('{0}Machine details:{1}'.format(ansiiprint.bold(), ansiiprint.end()))
|
|
|
|
print(' {0}CPUs:{1} {2}'.format(ansiiprint.bold(), ansiiprint.end(), staticdata[0]))
|
|
|
|
print(' {0}Arch:{1} {2}'.format(ansiiprint.bold(), ansiiprint.end(), staticdata[1]))
|
|
|
|
print(' {0}OS:{1} {2}'.format(ansiiprint.bold(), ansiiprint.end(), staticdata[2]))
|
|
|
|
print(' {0}Kernel:{1} {2}'.format(ansiiprint.bold(), ansiiprint.end(), staticdata[3]))
|
2018-06-11 01:58:40 -04:00
|
|
|
|
2018-05-31 22:55:44 -04:00
|
|
|
# Check if our node exists in Zookeeper, and create it if not
|
2018-06-06 22:13:16 -04:00
|
|
|
if zk.exists('/nodes/{}'.format(myhostname)):
|
2018-06-06 23:57:25 -04:00
|
|
|
print("Node is " + ansiiprint.green() + "present" + ansiiprint.end() + " in Zookeeper")
|
2018-06-12 21:21:22 -04:00
|
|
|
# Update static data just in case it's changed
|
|
|
|
zk.set('/nodes/{}/staticdata'.format(myhostname), ' '.join(staticdata).encode('ascii'))
|
2018-05-31 23:04:34 -04:00
|
|
|
else:
|
2018-06-06 23:57:25 -04:00
|
|
|
print("Node is " + ansiiprint.red() + "absent" + ansiiprint.end() + " in Zookeeper; adding new node")
|
2018-06-06 15:41:06 -04:00
|
|
|
keepalive_time = int(time.time())
|
2018-06-10 20:21:00 -04:00
|
|
|
zk.create('/nodes/{}'.format(myhostname), 'hypervisor'.encode('ascii'))
|
2018-06-08 12:19:48 -04:00
|
|
|
# Basic state information
|
2018-06-11 02:46:24 -04:00
|
|
|
zk.create('/nodes/{}/daemonstate'.format(myhostname), 'stop'.encode('ascii'))
|
2018-06-11 03:04:53 -04:00
|
|
|
zk.create('/nodes/{}/domainstate'.format(myhostname), 'ready'.encode('ascii'))
|
2018-06-12 21:21:22 -04:00
|
|
|
zk.create('/nodes/{}/staticdata'.format(myhostname), ' '.join(staticdata).encode('ascii'))
|
2018-06-10 20:21:00 -04:00
|
|
|
zk.create('/nodes/{}/memfree'.format(myhostname), '0'.encode('ascii'))
|
2018-06-11 01:50:06 -04:00
|
|
|
zk.create('/nodes/{}/memused'.format(myhostname), '0'.encode('ascii'))
|
2018-06-10 20:21:00 -04:00
|
|
|
zk.create('/nodes/{}/cpuload'.format(myhostname), '0.0'.encode('ascii'))
|
|
|
|
zk.create('/nodes/{}/runningdomains'.format(myhostname), ''.encode('ascii'))
|
2018-06-11 01:50:06 -04:00
|
|
|
zk.create('/nodes/{}/domainscount'.format(myhostname), '0'.encode('ascii'))
|
2018-06-08 12:19:48 -04:00
|
|
|
# Keepalives and fencing information
|
2018-06-10 20:21:00 -04:00
|
|
|
zk.create('/nodes/{}/keepalive'.format(myhostname), str(keepalive_time).encode('ascii'))
|
|
|
|
zk.create('/nodes/{}/ipmihostname'.format(myhostname), config['ipmi_hostname'].encode('ascii'))
|
|
|
|
zk.create('/nodes/{}/ipmiusername'.format(myhostname), config['ipmi_username'].encode('ascii'))
|
|
|
|
zk.create('/nodes/{}/ipmipassword'.format(myhostname), config['ipmi_password'].encode('ascii'))
|
2018-05-31 22:55:44 -04:00
|
|
|
|
2018-06-12 21:21:22 -04:00
|
|
|
zk.set('/nodes/{}/daemonstate'.format(myhostname), 'init'.encode('ascii'))
|
2018-06-12 12:07:57 -04:00
|
|
|
|
2018-05-31 23:28:26 -04:00
|
|
|
t_node = dict()
|
|
|
|
s_domain = dict()
|
|
|
|
node_list = []
|
2018-06-04 02:22:59 -04:00
|
|
|
domain_list = []
|
2018-05-31 22:55:44 -04:00
|
|
|
|
2018-05-31 23:28:26 -04:00
|
|
|
@zk.ChildrenWatch('/nodes')
|
2018-05-31 23:30:21 -04:00
|
|
|
def updatenodes(new_node_list):
|
2018-06-04 02:22:59 -04:00
|
|
|
global node_list
|
2018-05-31 23:28:26 -04:00
|
|
|
node_list = new_node_list
|
2018-06-06 23:57:25 -04:00
|
|
|
print(ansiiprint.blue() + 'Node list: ' + ansiiprint.end() + '{}'.format(' '.join(node_list)))
|
2018-05-31 23:01:22 -04:00
|
|
|
for node in node_list:
|
2018-05-31 23:28:26 -04:00
|
|
|
if node in t_node:
|
2018-06-04 01:52:26 -04:00
|
|
|
t_node[node].updatenodelist(t_node)
|
2018-05-31 23:28:26 -04:00
|
|
|
else:
|
2018-06-11 02:20:03 -04:00
|
|
|
t_node[node] = NodeInstance.NodeInstance(myhostname, node, t_node, s_domain, zk, config)
|
2018-05-31 20:26:44 -04:00
|
|
|
|
2018-06-01 01:32:19 -04:00
|
|
|
@zk.ChildrenWatch('/domains')
|
|
|
|
def updatedomains(new_domain_list):
|
2018-06-04 02:22:59 -04:00
|
|
|
global domain_list
|
2018-06-01 01:32:19 -04:00
|
|
|
domain_list = new_domain_list
|
2018-06-06 23:57:25 -04:00
|
|
|
print(ansiiprint.blue() + 'Domain list: ' + ansiiprint.end() + '{}'.format(' '.join(domain_list)))
|
2018-06-01 01:32:19 -04:00
|
|
|
for domain in domain_list:
|
|
|
|
if not domain in s_domain:
|
2018-06-08 12:19:48 -04:00
|
|
|
s_domain[domain] = VMInstance.VMInstance(domain, zk, config, t_node[myhostname]);
|
2018-06-02 15:03:07 -04:00
|
|
|
for node in node_list:
|
|
|
|
if node in t_node:
|
|
|
|
t_node[node].updatedomainlist(s_domain)
|
2018-05-31 20:26:44 -04:00
|
|
|
|
2018-06-06 14:16:52 -04:00
|
|
|
# Set up our update function
|
|
|
|
this_node = t_node[myhostname]
|
|
|
|
update_zookeeper = this_node.update_zookeeper
|
|
|
|
|
|
|
|
# Create timer to update this node in Zookeeper
|
|
|
|
update_timer = apscheduler.schedulers.background.BackgroundScheduler()
|
2018-06-08 12:44:47 -04:00
|
|
|
update_timer.add_job(update_zookeeper, 'interval', seconds=int(config['keepalive_interval']))
|
2018-06-06 14:16:52 -04:00
|
|
|
update_timer.start()
|
2018-06-04 02:22:59 -04:00
|
|
|
|
2018-06-06 14:16:52 -04:00
|
|
|
# Tick loop
|
2018-05-31 20:26:44 -04:00
|
|
|
while True:
|
2018-05-31 21:49:23 -04:00
|
|
|
try:
|
2018-05-31 22:31:20 -04:00
|
|
|
time.sleep(0.1)
|
2018-05-31 21:49:23 -04:00
|
|
|
except:
|
2018-06-06 14:16:52 -04:00
|
|
|
break
|