2018-05-31 21:49:23 -04:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
2018-06-04 01:22:18 -04:00
|
|
|
import os, sys, socket, time, threading, libvirt, kazoo.client, pvcf
|
2018-05-31 21:49:23 -04:00
|
|
|
|
|
|
|
class NodeInstance(threading.Thread):
|
2018-06-04 01:52:26 -04:00
|
|
|
def __init__(self, name, t_node, s_domain, zk):
|
2018-05-31 21:49:23 -04:00
|
|
|
super(NodeInstance, self).__init__()
|
|
|
|
# Passed-in variables on creation
|
|
|
|
self.zkey = '/nodes/%s' % name
|
|
|
|
self.zk = zk
|
|
|
|
self.name = name
|
2018-06-01 01:00:55 -04:00
|
|
|
self.state = 'stop'
|
2018-05-31 21:49:23 -04:00
|
|
|
self.stop_thread = threading.Event()
|
2018-06-04 01:52:26 -04:00
|
|
|
self.t_node = t_node
|
2018-06-02 15:03:44 -04:00
|
|
|
self.s_domain = s_domain
|
2018-06-01 12:21:58 -04:00
|
|
|
self.domain_list = []
|
|
|
|
|
|
|
|
# Zookeeper handlers for changed states
|
|
|
|
@zk.DataWatch(self.zkey + '/state')
|
|
|
|
def watch_hypervisor_state(data, stat, event=""):
|
|
|
|
self.state = data.decode('ascii')
|
|
|
|
|
|
|
|
@zk.DataWatch(self.zkey + '/memfree')
|
|
|
|
def watch_hypervisor_memfree(data, stat, event=""):
|
|
|
|
self.memfree = data.decode('ascii')
|
|
|
|
|
|
|
|
@zk.DataWatch(self.zkey + '/runningdomains')
|
|
|
|
def watch_hypervisor_runningdomains(data, stat, event=""):
|
|
|
|
self.domain_list = data.decode('ascii').split()
|
2018-05-31 21:49:23 -04:00
|
|
|
|
2018-05-31 22:31:20 -04:00
|
|
|
# Get value functions
|
2018-05-31 23:41:52 -04:00
|
|
|
def getfreemem(self):
|
2018-05-31 22:31:20 -04:00
|
|
|
return self.memfree
|
|
|
|
|
2018-05-31 23:41:52 -04:00
|
|
|
def getcpuload(self):
|
2018-05-31 22:31:20 -04:00
|
|
|
return self.cpuload
|
|
|
|
|
2018-05-31 23:41:52 -04:00
|
|
|
def getname(self):
|
2018-05-31 22:31:20 -04:00
|
|
|
return self.name
|
|
|
|
|
2018-05-31 23:41:52 -04:00
|
|
|
def getstate(self):
|
2018-05-31 23:40:21 -04:00
|
|
|
return self.state
|
|
|
|
|
2018-06-01 12:21:58 -04:00
|
|
|
def getdomainlist(self):
|
|
|
|
return self.domain_list
|
|
|
|
|
2018-05-31 23:01:22 -04:00
|
|
|
# Update value functions
|
2018-06-04 01:52:26 -04:00
|
|
|
def updatenodelist(self, t_node):
|
|
|
|
self.t_node = t_node
|
2018-05-31 23:01:22 -04:00
|
|
|
|
2018-06-02 15:03:44 -04:00
|
|
|
def updatedomainlist(self, s_domain):
|
|
|
|
self.s_domain = s_domain
|
|
|
|
|
2018-05-31 22:31:20 -04:00
|
|
|
# Shutdown the thread
|
2018-05-31 21:49:23 -04:00
|
|
|
def stop(self):
|
|
|
|
self.stop_thread.set()
|
|
|
|
|
2018-05-31 22:31:20 -04:00
|
|
|
# Flush all VMs on the host
|
|
|
|
def flush(self):
|
2018-06-01 12:21:58 -04:00
|
|
|
for domain in self.domain_list:
|
2018-05-31 22:31:20 -04:00
|
|
|
# Determine the best target hypervisor
|
2018-06-04 02:22:59 -04:00
|
|
|
least_mem = 2**64
|
|
|
|
least_host = None
|
|
|
|
for node_name, node in self.t_node.items():
|
|
|
|
if node_name == self.name:
|
|
|
|
continue
|
|
|
|
node_freemem = int(node.getfreemem())
|
2018-05-31 22:31:20 -04:00
|
|
|
if node_freemem < least_mem:
|
|
|
|
least_mem = node_freemem
|
2018-06-04 02:22:59 -04:00
|
|
|
least_host = node_name
|
|
|
|
|
|
|
|
if least_host == None:
|
|
|
|
print(">>> Failed to find valid migration target for %s" % domain)
|
|
|
|
transaction = self.zk.transaction()
|
|
|
|
transaction.set_data('/domains/' + domain + '/state', 'shutdown'.encode('ascii'))
|
|
|
|
transaction.commit()
|
|
|
|
else:
|
|
|
|
transaction = self.zk.transaction()
|
|
|
|
transaction.set_data('/domains/' + domain + '/state', 'migrate'.encode('ascii'))
|
|
|
|
transaction.set_data('/domains/' + domain + '/hypervisor', least_host.encode('ascii'))
|
|
|
|
transaction.commit()
|
|
|
|
|
|
|
|
# Wait 1s between migrations
|
|
|
|
time.sleep(1)
|
2018-05-31 22:31:20 -04:00
|
|
|
|
2018-05-31 21:49:23 -04:00
|
|
|
def run(self):
|
|
|
|
if self.name == socket.gethostname():
|
2018-06-01 01:00:55 -04:00
|
|
|
self.setup_local_node()
|
|
|
|
else:
|
|
|
|
self.setup_remote_node()
|
2018-06-01 00:51:20 -04:00
|
|
|
|
2018-05-31 21:49:23 -04:00
|
|
|
def setup_local_node(self):
|
|
|
|
# Connect to libvirt
|
|
|
|
libvirt_name = "qemu:///system"
|
|
|
|
conn = libvirt.open(libvirt_name)
|
|
|
|
if conn == None:
|
2018-06-02 16:22:05 -04:00
|
|
|
print('>>> Failed to open connection to %s' % libvirt_name)
|
2018-05-31 21:49:23 -04:00
|
|
|
exit(1)
|
2018-05-31 23:45:07 -04:00
|
|
|
|
2018-05-31 21:49:23 -04:00
|
|
|
# Gather data about hypervisor
|
|
|
|
self.name = conn.getHostname()
|
|
|
|
self.cpucount = conn.getCPUMap()[0]
|
2018-05-31 23:48:12 -04:00
|
|
|
self.state = 'start'
|
2018-05-31 22:55:44 -04:00
|
|
|
self.zk.set(self.zkey + '/state', 'start'.encode('ascii'))
|
2018-05-31 21:49:23 -04:00
|
|
|
self.zk.set(self.zkey + '/cpucount', str(self.cpucount).encode('ascii'))
|
|
|
|
print("Node hostname: %s" % self.name)
|
|
|
|
print("CPUs: %s" % self.cpucount)
|
|
|
|
|
2018-06-01 01:00:55 -04:00
|
|
|
while True:
|
2018-06-02 15:26:37 -04:00
|
|
|
# Toggle state management of all VMs
|
|
|
|
for domain, instance in self.s_domain.items():
|
2018-06-04 00:51:11 -04:00
|
|
|
if instance.inshutdown == False and domain in self.domain_list:
|
2018-06-02 16:28:18 -04:00
|
|
|
instance.manage_vm_state()
|
2018-06-02 15:26:37 -04:00
|
|
|
|
2018-06-02 15:43:02 -04:00
|
|
|
# Remove any non-running VMs from our list
|
2018-06-02 14:37:49 -04:00
|
|
|
for domain in self.domain_list:
|
2018-06-04 01:24:11 -04:00
|
|
|
dom = pvcf.lookupByUUID(domain)
|
2018-06-04 01:13:48 -04:00
|
|
|
if dom == None:
|
2018-06-04 01:26:23 -04:00
|
|
|
try:
|
|
|
|
self.domain_list.remove(domain)
|
|
|
|
except:
|
|
|
|
pass
|
2018-06-04 01:13:48 -04:00
|
|
|
else:
|
2018-06-02 14:50:26 -04:00
|
|
|
state = dom.state()[0]
|
2018-06-02 14:37:49 -04:00
|
|
|
if state != libvirt.VIR_DOMAIN_RUNNING:
|
2018-06-04 01:26:23 -04:00
|
|
|
try:
|
|
|
|
self.domain_list.remove(domain)
|
|
|
|
except:
|
|
|
|
pass
|
2018-06-02 14:37:49 -04:00
|
|
|
|
|
|
|
# Set our information in zookeeper
|
2018-06-01 01:00:55 -04:00
|
|
|
self.memfree = conn.getFreeMemory()
|
|
|
|
self.cpuload = os.getloadavg()[0]
|
|
|
|
try:
|
|
|
|
self.zk.set(self.zkey + '/memfree', str(self.memfree).encode('ascii'))
|
|
|
|
self.zk.set(self.zkey + '/cpuload', str(self.cpuload).encode('ascii'))
|
2018-06-01 12:21:58 -04:00
|
|
|
self.zk.set(self.zkey + '/runningdomains', ' '.join(self.domain_list).encode('ascii'))
|
2018-06-01 01:00:55 -04:00
|
|
|
except:
|
|
|
|
if self.stop_thread.is_set():
|
|
|
|
return
|
|
|
|
|
2018-06-01 12:21:58 -04:00
|
|
|
print(">>> %s - Free memory: %s | Load: %s" % ( time.strftime("%d/%m/%Y %H:%M:%S"), self.memfree, self.cpuload ))
|
|
|
|
print("Active domains: %s" % self.domain_list)
|
2018-06-01 01:26:21 -04:00
|
|
|
active_node_list = []
|
|
|
|
flushed_node_list = []
|
|
|
|
inactive_node_list = []
|
|
|
|
|
2018-06-04 02:22:59 -04:00
|
|
|
for node_name in self.t_node:
|
2018-06-04 01:52:26 -04:00
|
|
|
state, stat = self.zk.get('/nodes/%s/state' % node_name)
|
2018-06-01 01:26:21 -04:00
|
|
|
node_state = state.decode('ascii')
|
|
|
|
if node_state == 'start':
|
2018-06-04 01:52:26 -04:00
|
|
|
active_node_list.append(node_name)
|
2018-06-01 01:26:21 -04:00
|
|
|
elif node_state == 'flush':
|
2018-06-04 01:52:26 -04:00
|
|
|
flushed_node_list.append(node_name)
|
|
|
|
self.flush()
|
2018-06-01 01:26:21 -04:00
|
|
|
else:
|
2018-06-04 01:52:26 -04:00
|
|
|
inactive_node_list.append(node_name)
|
2018-06-01 01:26:21 -04:00
|
|
|
|
|
|
|
print('Active nodes: %s' % active_node_list)
|
|
|
|
print('Flushed nodes: %s' % flushed_node_list)
|
|
|
|
print('Inactive nodes: %s' % inactive_node_list)
|
|
|
|
|
2018-06-02 14:37:49 -04:00
|
|
|
# Sleep for 10s but with quick interruptability
|
2018-06-01 01:00:55 -04:00
|
|
|
for x in range(0,100):
|
|
|
|
time.sleep(0.1)
|
|
|
|
if self.stop_thread.is_set():
|
|
|
|
return
|
|
|
|
|
|
|
|
def setup_remote_node(self):
|
|
|
|
while True:
|
|
|
|
for x in range(0,100):
|
|
|
|
time.sleep(0.1)
|
|
|
|
if self.stop_thread.is_set():
|
|
|
|
return
|
|
|
|
|
|
|
|
|