Add separate states for the daemon and the domains
This commit is contained in:
parent
7779f7f895
commit
705f2086be
|
@ -30,7 +30,8 @@ class NodeInstance():
|
||||||
self.config = config
|
self.config = config
|
||||||
self.this_node = this_node
|
self.this_node = this_node
|
||||||
self.name = name
|
self.name = name
|
||||||
self.state = 'stop'
|
self.daemon_state = 'stop'
|
||||||
|
self.domain_state = 'ready'
|
||||||
self.t_node = t_node
|
self.t_node = t_node
|
||||||
self.active_node_list = []
|
self.active_node_list = []
|
||||||
self.flushed_node_list = []
|
self.flushed_node_list = []
|
||||||
|
@ -41,20 +42,26 @@ class NodeInstance():
|
||||||
self.domains_count = 0
|
self.domains_count = 0
|
||||||
self.memused = 0
|
self.memused = 0
|
||||||
self.memfree = 0
|
self.memfree = 0
|
||||||
self.inflush = False
|
|
||||||
|
|
||||||
# Zookeeper handlers for changed states
|
# Zookeeper handlers for changed states
|
||||||
@zk.DataWatch('/nodes/{}/state'.format(self.name))
|
@zk.DataWatch('/nodes/{}/daemonstate'.format(self.name))
|
||||||
def watch_hypervisor_state(data, stat, event=""):
|
def watch_hypervisor_daemonstate(data, stat, event=""):
|
||||||
try:
|
try:
|
||||||
self.state = data.decode('ascii')
|
self.daemon_state = data.decode('ascii')
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
self.state = 'stop'
|
self.daemon_state = 'stop'
|
||||||
|
|
||||||
|
@zk.DataWatch('/nodes/{}/domainstate'.format(self.name))
|
||||||
|
def watch_hypervisor_domainstate(data, stat, event=""):
|
||||||
|
try:
|
||||||
|
self.domain_state = data.decode('ascii')
|
||||||
|
except AttributeError:
|
||||||
|
self.domain_state = 'stop'
|
||||||
|
|
||||||
if self.name == self.this_node:
|
if self.name == self.this_node:
|
||||||
if self.state == 'flush':
|
if self.domain_state == 'flush':
|
||||||
self.flush()
|
self.flush()
|
||||||
if self.state == 'unflush':
|
if self.domain_state == 'unflush':
|
||||||
self.unflush()
|
self.unflush()
|
||||||
|
|
||||||
@zk.DataWatch('/nodes/{}/memfree'.format(self.name))
|
@zk.DataWatch('/nodes/{}/memfree'.format(self.name))
|
||||||
|
@ -64,6 +71,13 @@ class NodeInstance():
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
self.memfree = 0
|
self.memfree = 0
|
||||||
|
|
||||||
|
@zk.DataWatch('/nodes/{}/memused'.format(self.name))
|
||||||
|
def watch_hypervisor_memused(data, stat, event=""):
|
||||||
|
try:
|
||||||
|
self.memused = data.decode('ascii')
|
||||||
|
except AttributeError:
|
||||||
|
self.memused = 0
|
||||||
|
|
||||||
@zk.DataWatch('/nodes/{}/runningdomains'.format(self.name))
|
@zk.DataWatch('/nodes/{}/runningdomains'.format(self.name))
|
||||||
def watch_hypervisor_runningdomains(data, stat, event=""):
|
def watch_hypervisor_runningdomains(data, stat, event=""):
|
||||||
try:
|
try:
|
||||||
|
@ -71,6 +85,13 @@ class NodeInstance():
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
self.domain_list = []
|
self.domain_list = []
|
||||||
|
|
||||||
|
@zk.DataWatch('/nodes/{}/domainscount'.format(self.name))
|
||||||
|
def watch_hypervisor_domainscount(data, stat, event=""):
|
||||||
|
try:
|
||||||
|
self.domains_count = data.decode('ascii')
|
||||||
|
except AttributeError:
|
||||||
|
self.domains_count = 0
|
||||||
|
|
||||||
# Get value functions
|
# Get value functions
|
||||||
def getfreemem(self):
|
def getfreemem(self):
|
||||||
return self.memfree
|
return self.memfree
|
||||||
|
@ -81,8 +102,11 @@ class NodeInstance():
|
||||||
def getname(self):
|
def getname(self):
|
||||||
return self.name
|
return self.name
|
||||||
|
|
||||||
def getstate(self):
|
def getdaemonstate(self):
|
||||||
return self.state
|
return self.daemon_state
|
||||||
|
|
||||||
|
def getdomainstate(self):
|
||||||
|
return self.domain_state
|
||||||
|
|
||||||
def getdomainlist(self):
|
def getdomainlist(self):
|
||||||
return self.domain_list
|
return self.domain_list
|
||||||
|
@ -96,16 +120,20 @@ class NodeInstance():
|
||||||
|
|
||||||
# Flush all VMs on the host
|
# Flush all VMs on the host
|
||||||
def flush(self):
|
def flush(self):
|
||||||
self.inflush = True
|
|
||||||
ansiiprint.echo('Flushing node "{}" of running VMs'.format(self.name), '', 'i')
|
ansiiprint.echo('Flushing node "{}" of running VMs'.format(self.name), '', 'i')
|
||||||
|
self.zk.set('/nodes/{}/domainstate'.format(self.name), 'flushed'.encode('ascii'))
|
||||||
for dom_uuid in self.domain_list:
|
for dom_uuid in self.domain_list:
|
||||||
most_memfree = 0
|
most_memfree = 0
|
||||||
target_hypervisor = None
|
target_hypervisor = None
|
||||||
hypervisor_list = self.zk.get_children('/nodes')
|
hypervisor_list = self.zk.get_children('/nodes')
|
||||||
current_hypervisor = self.zk.get('/domains/{}/hypervisor'.format(dom_uuid))[0].decode('ascii')
|
current_hypervisor = self.zk.get('/domains/{}/hypervisor'.format(dom_uuid))[0].decode('ascii')
|
||||||
for hypervisor in hypervisor_list:
|
for hypervisor in hypervisor_list:
|
||||||
state = self.zk.get('/nodes/{}/state'.format(hypervisor))[0].decode('ascii')
|
daemon_state = self.zk.get('/nodes/{}/daemonstate'.format(hypervisor))[0].decode('ascii')
|
||||||
if state != 'start' or hypervisor == current_hypervisor:
|
domain_state = self.zk.get('/nodes/{}/domainstate'.format(hypervisor))[0].decode('ascii')
|
||||||
|
if hypervisor == current_hypervisor:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if daemon_state != 'start' or domain_state != 'ready':
|
||||||
continue
|
continue
|
||||||
|
|
||||||
memfree = int(self.zk.get('/nodes/{}/memfree'.format(hypervisor))[0].decode('ascii'))
|
memfree = int(self.zk.get('/nodes/{}/memfree'.format(hypervisor))[0].decode('ascii'))
|
||||||
|
@ -129,12 +157,9 @@ class NodeInstance():
|
||||||
# Wait 1s between migrations
|
# Wait 1s between migrations
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
self.zk.set('/nodes/{}/state'.format(self.name), 'flushed'.encode('ascii'))
|
|
||||||
self.inflush = False
|
|
||||||
|
|
||||||
def unflush(self):
|
def unflush(self):
|
||||||
ansiiprint.echo('Restoring node {} to active service.'.format(self.name), '', 'i')
|
ansiiprint.echo('Restoring node {} to active service.'.format(self.name), '', 'i')
|
||||||
self.zk.set('/nodes/{}/state'.format(self.name), 'start'.encode('ascii'))
|
self.zk.set('/nodes/{}/domainstate'.format(self.name), 'ready'.encode('ascii'))
|
||||||
for dom_uuid in self.s_domain:
|
for dom_uuid in self.s_domain:
|
||||||
last_hypervisor = self.zk.get('/domains/{}/lasthypervisor'.format(dom_uuid))[0].decode('ascii')
|
last_hypervisor = self.zk.get('/domains/{}/lasthypervisor'.format(dom_uuid))[0].decode('ascii')
|
||||||
if last_hypervisor != self.name:
|
if last_hypervisor != self.name:
|
||||||
|
@ -159,12 +184,12 @@ class NodeInstance():
|
||||||
return
|
return
|
||||||
|
|
||||||
# Get past state and update if needed
|
# Get past state and update if needed
|
||||||
past_state = self.zk.get('/nodes/{}/state'.format(self.name))[0].decode('ascii')
|
past_state = self.zk.get('/nodes/{}/daemonstate'.format(self.name))[0].decode('ascii')
|
||||||
if past_state != 'flush':
|
if past_state != 'start':
|
||||||
self.state = 'start'
|
self.daemon_state = 'start'
|
||||||
self.zk.set('/nodes/{}/state'.format(self.name), 'start'.encode('ascii'))
|
self.zk.set('/nodes/{}/daemonstate'.format(self.name), 'start'.encode('ascii'))
|
||||||
else:
|
else:
|
||||||
self.state = 'flush'
|
self.daemon_state = 'start'
|
||||||
|
|
||||||
# Toggle state management of all VMs and remove any non-running VMs
|
# Toggle state management of all VMs and remove any non-running VMs
|
||||||
for domain, instance in self.s_domain.items():
|
for domain, instance in self.s_domain.items():
|
||||||
|
@ -188,9 +213,9 @@ class NodeInstance():
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# toggle state management of this node
|
# toggle state management of this node
|
||||||
if self.state == 'flush':
|
if self.domain_state == 'flush':
|
||||||
self.flush()
|
self.flush()
|
||||||
if self.state == 'unflush':
|
if self.domain_state == 'unflush':
|
||||||
self.unflush()
|
self.unflush()
|
||||||
|
|
||||||
# Set our information in zookeeper
|
# Set our information in zookeeper
|
||||||
|
@ -222,7 +247,7 @@ class NodeInstance():
|
||||||
# Update our local node lists
|
# Update our local node lists
|
||||||
for node_name in self.t_node:
|
for node_name in self.t_node:
|
||||||
try:
|
try:
|
||||||
node_state = self.zk.get('/nodes/{}/state'.format(node_name))[0].decode('ascii')
|
node_state = self.zk.get('/nodes/{}/daemonstate'.format(node_name))[0].decode('ascii')
|
||||||
node_keepalive = int(self.zk.get('/nodes/{}/keepalive'.format(node_name))[0].decode('ascii'))
|
node_keepalive = int(self.zk.get('/nodes/{}/keepalive'.format(node_name))[0].decode('ascii'))
|
||||||
except:
|
except:
|
||||||
node_state = 'unknown'
|
node_state = 'unknown'
|
||||||
|
@ -234,7 +259,7 @@ class NodeInstance():
|
||||||
node_deadtime = int(time.time()) - ( int(self.config['keepalive_interval']) * 6 )
|
node_deadtime = int(time.time()) - ( int(self.config['keepalive_interval']) * 6 )
|
||||||
if node_keepalive < node_deadtime and node_state == 'start':
|
if node_keepalive < node_deadtime and node_state == 'start':
|
||||||
ansiiprint.echo('Node {} is dead - performing fence operation in 3 seconds'.format(node_name), '', 'w')
|
ansiiprint.echo('Node {} is dead - performing fence operation in 3 seconds'.format(node_name), '', 'w')
|
||||||
self.zk.set('/nodes/{}/state'.format(node_name), 'dead'.encode('ascii'))
|
self.zk.set('/nodes/{}/daemonstate'.format(node_name), 'dead'.encode('ascii'))
|
||||||
fence_thread = threading.Thread(target=fencenode.fence, args=(node_name, self.zk), kwargs={})
|
fence_thread = threading.Thread(target=fencenode.fence, args=(node_name, self.zk), kwargs={})
|
||||||
fence_thread.start()
|
fence_thread.start()
|
||||||
|
|
||||||
|
|
6
pvcd.py
6
pvcd.py
|
@ -114,8 +114,7 @@ zk.add_listener(zk_listener)
|
||||||
def cleanup():
|
def cleanup():
|
||||||
try:
|
try:
|
||||||
update_timer.shutdown()
|
update_timer.shutdown()
|
||||||
if t_node[myhostname].getstate() != 'flush':
|
zk.set('/nodes/{}/daemonstate'.format(myhostname), 'stop'.encode('ascii'))
|
||||||
zk.set('/nodes/{}/state'.format(myhostname), 'stop'.encode('ascii'))
|
|
||||||
zk.stop()
|
zk.stop()
|
||||||
zk.close()
|
zk.close()
|
||||||
except:
|
except:
|
||||||
|
@ -134,7 +133,8 @@ else:
|
||||||
keepalive_time = int(time.time())
|
keepalive_time = int(time.time())
|
||||||
zk.create('/nodes/{}'.format(myhostname), 'hypervisor'.encode('ascii'))
|
zk.create('/nodes/{}'.format(myhostname), 'hypervisor'.encode('ascii'))
|
||||||
# Basic state information
|
# Basic state information
|
||||||
zk.create('/nodes/{}/state'.format(myhostname), 'stop'.encode('ascii'))
|
zk.create('/nodes/{}/daemonstate'.format(myhostname), 'stop'.encode('ascii'))
|
||||||
|
zk.create('/nodes/{}/domainstate'.format(myhostname), 'stop'.encode('ascii'))
|
||||||
zk.create('/nodes/{}/cpucount'.format(myhostname), '0'.encode('ascii'))
|
zk.create('/nodes/{}/cpucount'.format(myhostname), '0'.encode('ascii'))
|
||||||
zk.create('/nodes/{}/memfree'.format(myhostname), '0'.encode('ascii'))
|
zk.create('/nodes/{}/memfree'.format(myhostname), '0'.encode('ascii'))
|
||||||
zk.create('/nodes/{}/memused'.format(myhostname), '0'.encode('ascii'))
|
zk.create('/nodes/{}/memused'.format(myhostname), '0'.encode('ascii'))
|
||||||
|
|
Loading…
Reference in New Issue