New router daemon based on virtualization daemon
This commit is contained in:
parent
a1f4ba9c60
commit
062a46f48c
|
@ -21,19 +21,22 @@
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
import kazoo.client
|
import kazoo.client
|
||||||
|
import libvirt
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import signal
|
import signal
|
||||||
import socket
|
import socket
|
||||||
import psutil
|
import psutil
|
||||||
import configparser
|
|
||||||
import time
|
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import uuid
|
||||||
|
import time
|
||||||
|
import configparser
|
||||||
|
import apscheduler.schedulers.background
|
||||||
|
|
||||||
import daemon_lib.ansiiprint as ansiiprint
|
import daemon_lib.ansiiprint as ansiiprint
|
||||||
import daemon_lib.zkhandler as zkhandler
|
import daemon_lib.zkhandler as zkhandler
|
||||||
import daemon_lib.common as common
|
|
||||||
|
|
||||||
|
import pvcrd.RouterInstance as RouterInstance
|
||||||
import pvcrd.VXNetworkInstance as VXNetworkInstance
|
import pvcrd.VXNetworkInstance as VXNetworkInstance
|
||||||
|
|
||||||
print(ansiiprint.bold() + "pvcrd - Parallel Virtual Cluster router daemon" + ansiiprint.end())
|
print(ansiiprint.bold() + "pvcrd - Parallel Virtual Cluster router daemon" + ansiiprint.end())
|
||||||
|
@ -47,13 +50,17 @@ except:
|
||||||
|
|
||||||
myhostname = socket.gethostname()
|
myhostname = socket.gethostname()
|
||||||
myshorthostname = myhostname.split('.', 1)[0]
|
myshorthostname = myhostname.split('.', 1)[0]
|
||||||
mydomainname = ''.join(myhostname.split('.', 1)[1:])
|
mynetworkname = ''.join(myhostname.split('.', 1)[1:])
|
||||||
|
|
||||||
# Config values dictionary
|
# Config values dictionary
|
||||||
config_values = [
|
config_values = [
|
||||||
'zookeeper',
|
'zookeeper',
|
||||||
|
'keepalive_interval',
|
||||||
'vni_dev',
|
'vni_dev',
|
||||||
'vni_dev_ip',
|
'vni_dev_ip'
|
||||||
|
'ipmi_hostname',
|
||||||
|
'ipmi_username',
|
||||||
|
'ipmi_password'
|
||||||
]
|
]
|
||||||
def readConfig(pvcrd_config_file, myhostname):
|
def readConfig(pvcrd_config_file, myhostname):
|
||||||
print('Loading configuration from file {}'.format(pvcrd_config_file))
|
print('Loading configuration from file {}'.format(pvcrd_config_file))
|
||||||
|
@ -67,7 +74,7 @@ def readConfig(pvcrd_config_file, myhostname):
|
||||||
except:
|
except:
|
||||||
try:
|
try:
|
||||||
entries = o_config['default']
|
entries = o_config['default']
|
||||||
except:
|
except Exception as e:
|
||||||
print('ERROR: Config file is not valid!')
|
print('ERROR: Config file is not valid!')
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
|
@ -81,23 +88,32 @@ def readConfig(pvcrd_config_file, myhostname):
|
||||||
print('ERROR: Config file missing required value "{}" for this host!'.format(entry))
|
print('ERROR: Config file missing required value "{}" for this host!'.format(entry))
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
|
# Handle an empty ipmi_hostname
|
||||||
|
if config['ipmi_hostname'] == '':
|
||||||
|
config['ipmi_hostname'] = myshorthostname + '-lom.' + mynetworkname
|
||||||
|
|
||||||
return config
|
return config
|
||||||
|
|
||||||
|
# Get config
|
||||||
config = readConfig(pvcrd_config_file, myhostname)
|
config = readConfig(pvcrd_config_file, myhostname)
|
||||||
|
|
||||||
|
# Connect to local zookeeper
|
||||||
zk_conn = kazoo.client.KazooClient(hosts=config['zookeeper'])
|
zk_conn = kazoo.client.KazooClient(hosts=config['zookeeper'])
|
||||||
try:
|
try:
|
||||||
print('Connecting to Zookeeper instance at {}'.format(config['zookeeper']))
|
print('Connecting to Zookeeper instance at {}'.format(config['zookeeper']))
|
||||||
zk_conn.start()
|
zk_conn.start()
|
||||||
except:
|
except:
|
||||||
print('ERROR: Failed to connect to Zookeeper!')
|
print('ERROR: Failed to connect to Zookeeper')
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
# Handle zookeeper failures gracefully
|
# Handle zookeeper failures
|
||||||
def zk_listener(state):
|
def zk_listener(state):
|
||||||
global zk_conn
|
global zk_conn, update_timer
|
||||||
if state == kazoo.client.KazooState.SUSPENDED:
|
if state == kazoo.client.KazooState.SUSPENDED:
|
||||||
ansiiprint.echo('Connection to Zookeeper list; retrying', '', 'e')
|
ansiiprint.echo('Connection to Zookeeper lost; retrying', '', 'e')
|
||||||
|
|
||||||
|
# Stop keepalive thread
|
||||||
|
stopKeepaliveTimer(update_timer)
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
_zk_conn = kazoo.client.KazooClient(hosts=config['zookeeper'])
|
_zk_conn = kazoo.client.KazooClient(hosts=config['zookeeper'])
|
||||||
|
@ -109,6 +125,9 @@ def zk_listener(state):
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
elif state == kazoo.client.KazooState.CONNECTED:
|
elif state == kazoo.client.KazooState.CONNECTED:
|
||||||
ansiiprint.echo('Connection to Zookeeper started', '', 'o')
|
ansiiprint.echo('Connection to Zookeeper started', '', 'o')
|
||||||
|
|
||||||
|
# Start keepalive thread
|
||||||
|
update_timer = createKeepaliveTimer()
|
||||||
else:
|
else:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -117,98 +136,117 @@ zk_conn.add_listener(zk_listener)
|
||||||
# Cleanup function
|
# Cleanup function
|
||||||
def cleanup(signum, frame):
|
def cleanup(signum, frame):
|
||||||
ansiiprint.echo('Terminating daemon', '', 'e')
|
ansiiprint.echo('Terminating daemon', '', 'e')
|
||||||
|
# Set stop state in Zookeeper
|
||||||
|
zkhandler.writedata(zk_conn, { '/routers/{}/daemonstate'.format(myhostname): 'stop' })
|
||||||
# Close the Zookeeper connection
|
# Close the Zookeeper connection
|
||||||
try:
|
try:
|
||||||
zk_conn.stop()
|
zk_conn.stop()
|
||||||
zk_conn.close()
|
zk_conn.close()
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
# Stop keepalive thread
|
||||||
|
stopKeepaliveTimer(update_timer)
|
||||||
# Exit
|
# Exit
|
||||||
exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
# Handle signals with cleanup
|
# Handle signals gracefully
|
||||||
signal.signal(signal.SIGTERM, cleanup)
|
signal.signal(signal.SIGTERM, cleanup)
|
||||||
signal.signal(signal.SIGINT, cleanup)
|
signal.signal(signal.SIGINT, cleanup)
|
||||||
signal.signal(signal.SIGQUIT, cleanup)
|
signal.signal(signal.SIGQUIT, cleanup)
|
||||||
|
|
||||||
# What this daemon does:
|
# Gather useful data about our host for staticdata
|
||||||
# 1. Configure public networks dynamically on startup (e.g. bonding, vlans, etc.) from config
|
# Static data format: 'cpu_count', 'arch', 'os', 'kernel'
|
||||||
# * no /etc/network/interfaces config for these - just mgmt interface via DHCP!
|
staticdata = []
|
||||||
# 2. Watch ZK /networks
|
staticdata.append(str(psutil.cpu_count()))
|
||||||
# 3. Provision required network interfaces when a network is added
|
staticdata.append(subprocess.run(['uname', '-r'], stdout=subprocess.PIPE).stdout.decode('ascii').strip())
|
||||||
# a. create vxlan interface targeting local dev from config
|
staticdata.append(subprocess.run(['uname', '-o'], stdout=subprocess.PIPE).stdout.decode('ascii').strip())
|
||||||
# b. create bridge interface
|
staticdata.append(subprocess.run(['uname', '-m'], stdout=subprocess.PIPE).stdout.decode('ascii').strip())
|
||||||
# c. add vxlan to bridge
|
# Print static data on start
|
||||||
# d. set interfaces up
|
|
||||||
# e. add corosync config for virtual gateway IP
|
|
||||||
# 4. Remove network interfaces when network disapears
|
|
||||||
|
|
||||||
# Zookeeper schema:
|
print('{0}Router hostname:{1} {2}'.format(ansiiprint.bold(), ansiiprint.end(), myhostname))
|
||||||
# networks/
|
print('{0}IPMI hostname:{1} {2}'.format(ansiiprint.bold(), ansiiprint.end(), config['ipmi_hostname']))
|
||||||
# <VXLANID>/
|
print('{0}Machine details:{1}'.format(ansiiprint.bold(), ansiiprint.end()))
|
||||||
# ipnet <NETWORK-CIDR> e.g. 10.101.0.0/24
|
print(' {0}CPUs:{1} {2}'.format(ansiiprint.bold(), ansiiprint.end(), staticdata[0]))
|
||||||
# gateway <IPADDR> e.g. 10.101.0.1 [1]
|
print(' {0}Arch:{1} {2}'.format(ansiiprint.bold(), ansiiprint.end(), staticdata[3]))
|
||||||
# dhcp <YES/NO> e.g. YES [2]
|
print(' {0}OS:{1} {2}'.format(ansiiprint.bold(), ansiiprint.end(), staticdata[2]))
|
||||||
# reservations/
|
print(' {0}Kernel:{1} {2}'.format(ansiiprint.bold(), ansiiprint.end(), staticdata[1]))
|
||||||
# <HOSTNAME/DESCRIPTION>/
|
|
||||||
# address <IPADDR> e.g. 10.101.0.30
|
|
||||||
# mac <MACADDR> e.g. ff:ff:fe:ab:cd:ef
|
|
||||||
# fwrules/
|
|
||||||
# <RULENAME>/
|
|
||||||
# description <DESCRIPTION> e.g. Allow HTTP from any to this net
|
|
||||||
# src <HOSTNAME/IPADDR/SUBNET/"any"/"this"> e.g. any
|
|
||||||
# dest <HOSTNAME/IPADDR/SUBNET/"any"/"this"> e.g. this
|
|
||||||
# port <PORT/RANGE/"any"> e.g. 80
|
|
||||||
|
|
||||||
# Notes:
|
# Check if our router exists in Zookeeper, and create it if not
|
||||||
# [1] becomes a VIP between the pair of routers in multi-router envs
|
if zk_conn.exists('/routers/{}'.format(myhostname)):
|
||||||
# [2] enables or disables a DHCP subnet definition for the network
|
print("Router is " + ansiiprint.green() + "present" + ansiiprint.end() + " in Zookeeper")
|
||||||
|
# Update static data just in case it's changed
|
||||||
|
zkhandler.writedata(zk_conn, { '/routers/{}/staticdata'.format(myhostname): ' '.join(staticdata) })
|
||||||
# Enable routing
|
|
||||||
common.run_os_command('sysctl sysctl net.ipv4.ip_forward=1')
|
|
||||||
common.run_os_command('sysctl sysctl net.ipv6.ip_forward=1')
|
|
||||||
common.run_os_command('sysctl sysctl net.ipv4.all.send_redirects=1')
|
|
||||||
common.run_os_command('sysctl sysctl net.ipv6.all.send_redirects=1')
|
|
||||||
common.run_os_command('sysctl sysctl net.ipv4.all.accept_source_route=1')
|
|
||||||
common.run_os_command('sysctl sysctl net.ipv6.all.accept_source_route=1')
|
|
||||||
|
|
||||||
# Prepare underlying interface
|
|
||||||
if config['vni_dev_ip'] == 'dhcp':
|
|
||||||
vni_dev = config['vni_dev']
|
|
||||||
ansiiprint.echo('Configuring VNI parent device {} with DHCP IP'.format(vni_dev), '', 'o')
|
|
||||||
common.run_os_command('ip link set {0} up'.format(vni_dev))
|
|
||||||
common.run_os_command('dhclient {0}'.format(vni_dev))
|
|
||||||
else:
|
else:
|
||||||
vni_dev = config['vni_dev']
|
print("Router is " + ansiiprint.red() + "absent" + ansiiprint.end() + " in Zookeeper; adding new router")
|
||||||
vni_dev_ip = config['vni_dev_ip']
|
keepalive_time = int(time.time())
|
||||||
ansiiprint.echo('Configuring VNI parent device {} with IP {}'.format(vni_dev, vni_dev_ip), '', 'o')
|
transaction = zk_conn.transaction()
|
||||||
common.run_os_command('ip link set {0} up'.format(vni_dev))
|
transaction.create('/routers/{}'.format(myhostname), 'hypervisor'.encode('ascii'))
|
||||||
common.run_os_command('ip address add {0} dev {1}'.format(vni_dev_ip, vni_dev))
|
# Basic state information
|
||||||
|
transaction.create('/routers/{}/daemonstate'.format(myhostname), 'stop'.encode('ascii'))
|
||||||
|
transaction.create('/routers/{}/networkstate'.format(myhostname), 'ready'.encode('ascii'))
|
||||||
|
transaction.create('/routers/{}/staticdata'.format(myhostname), ' '.join(staticdata).encode('ascii'))
|
||||||
|
# Keepalives and fencing information
|
||||||
|
transaction.create('/routers/{}/keepalive'.format(myhostname), str(keepalive_time).encode('ascii'))
|
||||||
|
transaction.create('/routers/{}/ipmihostname'.format(myhostname), config['ipmi_hostname'].encode('ascii'))
|
||||||
|
transaction.create('/routers/{}/ipmiusername'.format(myhostname), config['ipmi_username'].encode('ascii'))
|
||||||
|
transaction.create('/routers/{}/ipmipassword'.format(myhostname), config['ipmi_password'].encode('ascii'))
|
||||||
|
transaction.commit()
|
||||||
|
|
||||||
# Disable stonith in corosync
|
zkhandler.writedata(zk_conn, { '/routers/{}/daemonstate'.format(myhostname): 'init' })
|
||||||
common.run_os_command('crm configure property stonith-enabled="false"')
|
|
||||||
|
|
||||||
# Prepare VNI list
|
t_router = dict()
|
||||||
t_vni = dict()
|
s_network = dict()
|
||||||
vni_list = []
|
router_list = []
|
||||||
|
network_list = []
|
||||||
|
|
||||||
|
@zk_conn.ChildrenWatch('/routers')
|
||||||
|
def updaterouters(new_router_list):
|
||||||
|
global router_list
|
||||||
|
router_list = new_router_list
|
||||||
|
print(ansiiprint.blue() + 'Router list: ' + ansiiprint.end() + '{}'.format(' '.join(router_list)))
|
||||||
|
for router in router_list:
|
||||||
|
if router in t_router:
|
||||||
|
t_router[router].updaterouterlist(t_router)
|
||||||
|
else:
|
||||||
|
t_router[router] = RouterInstance.RouterInstance(myhostname, router, t_router, s_network, zk_conn, config)
|
||||||
|
|
||||||
@zk_conn.ChildrenWatch('/networks')
|
@zk_conn.ChildrenWatch('/networks')
|
||||||
def updatenetworks(new_vni_list):
|
def updatenetworks(new_network_list):
|
||||||
global vni_list
|
global network_list
|
||||||
print(ansiiprint.blue() + 'Network list: ' + ansiiprint.end() + '{}'.format(' '.join(new_vni_list)))
|
for network in network_list:
|
||||||
# Add new VNIs
|
if not network in s_network:
|
||||||
for vni in new_vni_list:
|
s_network[network] = VXNetworkInstance.VXNetworkInstance(network, zk_conn, config, t_router[myhostname]);
|
||||||
if vni not in vni_list:
|
for router in router_list:
|
||||||
vni_list.append(vni)
|
if router in t_router:
|
||||||
t_vni[vni] = VXNetworkInstance.VXNetworkInstance(vni, zk_conn, config)
|
t_router[router].updatenetworklist(s_network)
|
||||||
t_vni[vni].provision()
|
if not network in new_network_list:
|
||||||
|
s_network[network].removeAddress()
|
||||||
|
s_network[network].removeNetwork()
|
||||||
|
for router in router_list:
|
||||||
|
if router in t_router:
|
||||||
|
t_router[router].updatenetworklist(s_network)
|
||||||
|
network_list = new_network_list
|
||||||
|
print(ansiiprint.blue() + 'Network list: ' + ansiiprint.end() + '{}'.format(' '.join(network_list)))
|
||||||
|
|
||||||
# Remove deleted VNIs
|
# Set up our update function
|
||||||
for vni in vni_list:
|
this_router = t_router[myhostname]
|
||||||
if vni not in new_vni_list:
|
update_zookeeper = this_router.update_zookeeper
|
||||||
vni_list.remove(vni)
|
|
||||||
t_vni[vni].deprovision()
|
# Create timer to update this router in Zookeeper
|
||||||
|
def createKeepaliveTimer():
|
||||||
|
interval = int(config['keepalive_interval'])
|
||||||
|
ansiiprint.echo('Starting keepalive timer ({} second interval)'.format(interval), '', 'o')
|
||||||
|
update_timer = apscheduler.schedulers.background.BackgroundScheduler()
|
||||||
|
update_timer.add_job(update_zookeeper, 'interval', seconds=interval)
|
||||||
|
update_timer.start()
|
||||||
|
return update_timer
|
||||||
|
|
||||||
|
def stopKeepaliveTimer(update_timer):
|
||||||
|
ansiiprint.echo('Stopping keepalive timer', '', 'c')
|
||||||
|
update_timer.shutdown()
|
||||||
|
|
||||||
|
# Start keepalive thread
|
||||||
|
update_timer = createKeepaliveTimer()
|
||||||
|
|
||||||
# Tick loop
|
# Tick loop
|
||||||
while True:
|
while True:
|
||||||
|
|
|
@ -0,0 +1,247 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
# RouterInstance.py - Class implementing a PVC router and run by pvcrd
|
||||||
|
# Part of the Parallel Virtual Cluster (PVC) system
|
||||||
|
#
|
||||||
|
# Copyright (C) 2018 Joshua M. Boniface <joshua@boniface.me>
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import psutil
|
||||||
|
import socket
|
||||||
|
import time
|
||||||
|
import libvirt
|
||||||
|
import kazoo.client
|
||||||
|
import threading
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
import daemon_lib.ansiiprint as ansiiprint
|
||||||
|
import daemon_lib.zkhandler as zkhandler
|
||||||
|
|
||||||
|
class RouterInstance():
|
||||||
|
# Initialization function
|
||||||
|
def __init__(self, this_router, name, t_router, s_network, zk_conn, config):
|
||||||
|
# Passed-in variables on creation
|
||||||
|
self.zk_conn = zk_conn
|
||||||
|
self.config = config
|
||||||
|
self.this_router = this_router
|
||||||
|
self.name = name
|
||||||
|
self.daemon_state = 'stop'
|
||||||
|
self.network_state = 'primary'
|
||||||
|
self.t_router = t_router
|
||||||
|
self.primary_router_list = []
|
||||||
|
self.secondary_router_list = []
|
||||||
|
self.inactive_router_list = []
|
||||||
|
self.s_network = s_network
|
||||||
|
self.network_list = []
|
||||||
|
self.ipmi_hostname = self.config['ipmi_hostname']
|
||||||
|
|
||||||
|
# Zookeeper handlers for changed states
|
||||||
|
@zk_conn.DataWatch('/routers/{}/daemonstate'.format(self.name))
|
||||||
|
def watch_hypervisor_daemonstate(data, stat, event=""):
|
||||||
|
try:
|
||||||
|
self.daemon_state = data.decode('ascii')
|
||||||
|
except AttributeError:
|
||||||
|
self.daemon_state = 'stop'
|
||||||
|
|
||||||
|
@zk_conn.DataWatch('/routers/{}/networkstate'.format(self.name))
|
||||||
|
def watch_hypervisor_networkstate(data, stat, event=""):
|
||||||
|
try:
|
||||||
|
new_network_state = data.decode('ascii')
|
||||||
|
except AttributeError:
|
||||||
|
new_network_state = 'secondary'
|
||||||
|
|
||||||
|
if new_network_state != self.network_state:
|
||||||
|
self.network_state = new_network_state
|
||||||
|
# toggle state management of this router
|
||||||
|
if self.name == self.this_router:
|
||||||
|
if self.network_state == 'secondary':
|
||||||
|
self.set_secondary()
|
||||||
|
if self.network_state == 'primary':
|
||||||
|
self.set_primary()
|
||||||
|
for router in t_router:
|
||||||
|
if router.getname() != self.name:
|
||||||
|
router.set_secondary()
|
||||||
|
|
||||||
|
# Get value functions
|
||||||
|
def getname(self):
|
||||||
|
return self.name
|
||||||
|
|
||||||
|
def getdaemonstate(self):
|
||||||
|
return self.daemon_state
|
||||||
|
|
||||||
|
def getnetworkstate(self):
|
||||||
|
return self.network_state
|
||||||
|
|
||||||
|
def getnetworklist(self):
|
||||||
|
return self.network_list
|
||||||
|
|
||||||
|
# Update value functions
|
||||||
|
def updaterouterlist(self, t_router):
|
||||||
|
self.t_router = t_router
|
||||||
|
|
||||||
|
def updatenetworklist(self, s_network):
|
||||||
|
self.s_network = s_network
|
||||||
|
|
||||||
|
# Flush all VMs on the host
|
||||||
|
def set_secondary(self):
|
||||||
|
ansiiprint.echo('Setting node {} to secondary state'.format(self.name), '', 'i')
|
||||||
|
ansiiprint.echo('Network list: {}'.format(', '.join(self.network_list)), '', 'c')
|
||||||
|
zkhandler.writedata(self.zk_conn, { '/routers/{}/networkstate'.format(self.name): 'secondary' })
|
||||||
|
for network in self.s_network:
|
||||||
|
network.removeAddress()
|
||||||
|
|
||||||
|
def set_primary(self):
|
||||||
|
ansiiprint.echo('Setting node {} to master state.'.format(self.name), '', 'i')
|
||||||
|
ansiiprint.echo('Network list: {}'.format(', '.join(self.network_list)), '', 'c')
|
||||||
|
zkhandler.writedata(self.zk_conn, { '/routers/{}/networkstate'.format(self.name): 'primary' })
|
||||||
|
for network in self.s_network:
|
||||||
|
network.createAddress()
|
||||||
|
|
||||||
|
def update_zookeeper(self):
|
||||||
|
# Get past state and update if needed
|
||||||
|
past_state = zkhandler.readdata(self.zk_conn, '/routers/{}/daemonstate'.format(self.name))
|
||||||
|
if past_state != 'run':
|
||||||
|
self.daemon_state = 'run'
|
||||||
|
zkhandler.writedata(self.zk_conn, { '/routers/{}/daemonstate'.format(self.name): 'run' })
|
||||||
|
else:
|
||||||
|
self.daemon_state = 'run'
|
||||||
|
|
||||||
|
# Set our information in zookeeper
|
||||||
|
keepalive_time = int(time.time())
|
||||||
|
try:
|
||||||
|
zkhandler.writedata(self.zk_conn, {
|
||||||
|
'/routers/{}/keepalive'.format(self.name): str(keepalive_time)
|
||||||
|
})
|
||||||
|
except:
|
||||||
|
ansiiprint.echo('Failed to set keepalive data', '', 'e')
|
||||||
|
return
|
||||||
|
|
||||||
|
# Close the Libvirt connection
|
||||||
|
lv_conn.close()
|
||||||
|
|
||||||
|
# Display router information to the terminal
|
||||||
|
ansiiprint.echo('{}{} keepalive{}'.format(ansiiprint.purple(), self.name, ansiiprint.end()), '', 't')
|
||||||
|
ansiiprint.echo('{0}Active networks:{1} {2} {0}Free memory [MiB]:{1} {3} {0}Used memory [MiB]:{1} {4} {0}Load:{1} {5}'.format(ansiiprint.bold(), ansiiprint.end(), self.networks_count, self.memfree, self.memused, self.cpuload), '', 'c')
|
||||||
|
|
||||||
|
# Update our local router lists
|
||||||
|
for router_name in self.t_router:
|
||||||
|
try:
|
||||||
|
router_daemon_state = zkhandler.readdata(self.zk_conn, '/routers/{}/daemonstate'.format(router_name))
|
||||||
|
router_network_state = zkhandler.readdata(self.zk_conn, '/routers/{}/networkstate'.format(router_name))
|
||||||
|
router_keepalive = int(zkhandler.readdata(self.zk_conn, '/routers/{}/keepalive'.format(router_name)))
|
||||||
|
except:
|
||||||
|
router_daemon_state = 'unknown'
|
||||||
|
router_network_state = 'unknown'
|
||||||
|
router_keepalive = 0
|
||||||
|
|
||||||
|
# Handle deadtime and fencng if needed
|
||||||
|
# (A router is considered dead when its keepalive timer is >6*keepalive_interval seconds
|
||||||
|
# out-of-date while in 'start' state)
|
||||||
|
router_deadtime = int(time.time()) - ( int(self.config['keepalive_interval']) * int(self.config['fence_intervals']) )
|
||||||
|
if router_keepalive < router_deadtime and router_daemon_state == 'run':
|
||||||
|
ansiiprint.echo('Router {} seems dead - starting monitor for fencing'.format(router_name), '', 'w')
|
||||||
|
zkhandler.writedata(self.zk_conn, { '/routers/{}/daemonstate'.format(router_name): 'dead' })
|
||||||
|
fence_thread = threading.Thread(target=fenceRouter, args=(router_name, self.zk_conn, self.config), kwargs={})
|
||||||
|
fence_thread.start()
|
||||||
|
|
||||||
|
# Update the arrays
|
||||||
|
if router_daemon_state == 'run' and router_network_state != 'secondary' and router_name not in self.primary_router_list:
|
||||||
|
self.primary_router_list.append(router_name)
|
||||||
|
try:
|
||||||
|
self.secondary_router_list.remove(router_name)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
self.inactive_router_list.remove(router_name)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
if router_daemon_state != 'run' and router_network_state != 'secondary' and router_name not in self.inactive_router_list:
|
||||||
|
self.inactive_router_list.append(router_name)
|
||||||
|
try:
|
||||||
|
self.primary_router_list.remove(router_name)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
self.secondary_router_list.remove(router_name)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
if router_network_state == 'secondary' and router_name not in self.secondary_router_list:
|
||||||
|
self.secondary_router_list.append(router_name)
|
||||||
|
try:
|
||||||
|
self.primary_router_list.remove(router_name)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
self.inactive_router_list.remove(router_name)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Display cluster information to the terminal
|
||||||
|
ansiiprint.echo('{}Cluster status{}'.format(ansiiprint.purple(), ansiiprint.end()), '', 't')
|
||||||
|
ansiiprint.echo('{}Primary router:{} {}'.format(ansiiprint.bold(), ansiiprint.end(), ' '.join(self.primray_router_list)), '', 'c')
|
||||||
|
ansiiprint.echo('{}Secondary router:{} {}'.format(ansiiprint.bold(), ansiiprint.end(), ' '.join(self.secondary_router_list)), '', 'c')
|
||||||
|
ansiiprint.echo('{}Inactive routers:{} {}'.format(ansiiprint.bold(), ansiiprint.end(), ' '.join(self.inactive_router_list)), '', 'c')
|
||||||
|
|
||||||
|
#
|
||||||
|
# Fence thread entry function
|
||||||
|
#
|
||||||
|
def fenceRouter(router_name, zk_conn, config):
|
||||||
|
failcount = 0
|
||||||
|
# We allow exactly 3 saving throws for the host to come back online
|
||||||
|
while failcount < 3:
|
||||||
|
# Wait 5 seconds
|
||||||
|
time.sleep(5)
|
||||||
|
# Get the state
|
||||||
|
router_daemon_state = zkhandler.readdata(zk_conn, '/routers/{}/daemonstate'.format(router_name))
|
||||||
|
# Is it still 'dead'
|
||||||
|
if router_daemon_state == 'dead':
|
||||||
|
failcount += 1
|
||||||
|
ansiiprint.echo('Router "{}" failed {} saving throws'.format(router_name, failcount), '', 'w')
|
||||||
|
# It changed back to something else so it must be alive
|
||||||
|
else:
|
||||||
|
ansiiprint.echo('Router "{}" passed a saving throw; canceling fence'.format(router_name), '', 'o')
|
||||||
|
return
|
||||||
|
|
||||||
|
ansiiprint.echo('Fencing router "{}" via IPMI reboot signal'.format(router_name), '', 'e')
|
||||||
|
|
||||||
|
# Get IPMI information
|
||||||
|
ipmi_hostname = zkhandler.readdata(zk_conn, '/routers/{}/ipmihostname'.format(router_name))
|
||||||
|
ipmi_username = zkhandler.readdata(zk_conn, '/routers/{}/ipmiusername'.format(router_name))
|
||||||
|
ipmi_password = zkhandler.readdata(zk_conn, '/routers/{}/ipmipassword'.format(router_name))
|
||||||
|
|
||||||
|
# Shoot it in the head
|
||||||
|
fence_status = rebootViaIPMI(ipmi_hostname, ipmi_username, ipmi_password)
|
||||||
|
# Hold to ensure the fence takes effect
|
||||||
|
time.sleep(3)
|
||||||
|
|
||||||
|
# Set router in secondary state
|
||||||
|
zkhandler.writedata(zk_conn, { '/routers/{}/networkstate'.format(router_name): 'secondary' })
|
||||||
|
|
||||||
|
#
|
||||||
|
# Perform an IPMI fence
|
||||||
|
#
|
||||||
|
def rebootViaIPMI(ipmi_hostname, ipmi_user, ipmi_password):
|
||||||
|
ipmi_command = ['/usr/bin/ipmitool', '-I', 'lanplus', '-H', ipmi_hostname, '-U', ipmi_user, '-P', ipmi_password, 'chassis', 'power', 'reset']
|
||||||
|
ipmi_command_output = subprocess.run(ipmi_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
|
if ipmi_command_output.returncode == 0:
|
||||||
|
ansiiprint.echo('Successfully rebooted dead router', '', 'o')
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
ansiiprint.echo('Failed to reboot dead router', '', 'e')
|
||||||
|
return False
|
|
@ -23,8 +23,6 @@
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import subprocess
|
|
||||||
import apscheduler.schedulers.background
|
|
||||||
|
|
||||||
import daemon_lib.ansiiprint as ansiiprint
|
import daemon_lib.ansiiprint as ansiiprint
|
||||||
import daemon_lib.zkhandler as zkhandler
|
import daemon_lib.zkhandler as zkhandler
|
||||||
|
@ -32,9 +30,10 @@ import daemon_lib.common as common
|
||||||
|
|
||||||
class VXNetworkInstance():
|
class VXNetworkInstance():
|
||||||
# Initialization function
|
# Initialization function
|
||||||
def __init__ (self, vni, zk_conn, config):
|
def __init__ (self, vni, zk_conn, config, thisrouter):
|
||||||
self.vni = vni
|
self.vni = vni
|
||||||
self.zk_conn = zk_conn
|
self.zk_conn = zk_conn
|
||||||
|
self.thisrouter = thisrouter
|
||||||
self.vni_dev = config['vni_dev']
|
self.vni_dev = config['vni_dev']
|
||||||
|
|
||||||
self.old_description = zkhandler.readdata(self.zk_conn, '/networks/{}'.format(self.vni))
|
self.old_description = zkhandler.readdata(self.zk_conn, '/networks/{}'.format(self.vni))
|
||||||
|
@ -47,11 +46,7 @@ class VXNetworkInstance():
|
||||||
self.vxlan_nic = 'vxlan{}'.format(self.vni)
|
self.vxlan_nic = 'vxlan{}'.format(self.vni)
|
||||||
self.bridge_nic = 'br{}'.format(self.vni)
|
self.bridge_nic = 'br{}'.format(self.vni)
|
||||||
|
|
||||||
self.corosync_provisioned = False
|
self.createNetwork()
|
||||||
self.watch_change = False
|
|
||||||
|
|
||||||
self.update_timer = apscheduler.schedulers.background.BackgroundScheduler()
|
|
||||||
self.update_timer.add_job(self.updateCorosyncResource, 'interval', seconds=1)
|
|
||||||
|
|
||||||
# Zookeper handlers for changed states
|
# Zookeper handlers for changed states
|
||||||
@zk_conn.DataWatch('/networks/{}'.format(self.vni))
|
@zk_conn.DataWatch('/networks/{}'.format(self.vni))
|
||||||
|
@ -59,7 +54,6 @@ class VXNetworkInstance():
|
||||||
if data != None and self.description != data.decode('ascii'):
|
if data != None and self.description != data.decode('ascii'):
|
||||||
self.old_description = self.description
|
self.old_description = self.description
|
||||||
self.description = data.decode('ascii')
|
self.description = data.decode('ascii')
|
||||||
self.watch_change = True
|
|
||||||
|
|
||||||
@zk_conn.DataWatch('/networks/{}/ip_network'.format(self.vni))
|
@zk_conn.DataWatch('/networks/{}/ip_network'.format(self.vni))
|
||||||
def watch_network_ip_network(data, stat, event=''):
|
def watch_network_ip_network(data, stat, event=''):
|
||||||
|
@ -67,40 +61,18 @@ class VXNetworkInstance():
|
||||||
ip_network = data.decode('ascii')
|
ip_network = data.decode('ascii')
|
||||||
self.ip_network = ip_network
|
self.ip_network = ip_network
|
||||||
self.ip_cidrnetmask = ip_network.split('/')[-1]
|
self.ip_cidrnetmask = ip_network.split('/')[-1]
|
||||||
self.watch_change = True
|
|
||||||
|
|
||||||
@zk_conn.DataWatch('/networks/{}/ip_gateway'.format(self.vni))
|
@zk_conn.DataWatch('/networks/{}/ip_gateway'.format(self.vni))
|
||||||
def watch_network_gateway(data, stat, event=''):
|
def watch_network_gateway(data, stat, event=''):
|
||||||
if data != None and self.ip_gateway != data.decode('ascii'):
|
if data != None and self.ip_gateway != data.decode('ascii'):
|
||||||
|
self.removeAddress()
|
||||||
self.ip_gateway = data.decode('ascii')
|
self.ip_gateway = data.decode('ascii')
|
||||||
self.watch_change = True
|
self.addAddress()
|
||||||
|
|
||||||
@zk_conn.DataWatch('/networks/{}/dhcp_flag'.format(self.vni))
|
@zk_conn.DataWatch('/networks/{}/dhcp_flag'.format(self.vni))
|
||||||
def watch_network_dhcp_status(data, stat, event=''):
|
def watch_network_dhcp_status(data, stat, event=''):
|
||||||
if data != None and self.dhcp_flag != data.decode('ascii'):
|
if data != None and self.dhcp_flag != data.decode('ascii'):
|
||||||
self.dhcp_flag = ( data.decode('ascii') == 'True' )
|
self.dhcp_flag = ( data.decode('ascii') == 'True' )
|
||||||
self.watch_change = True
|
|
||||||
|
|
||||||
def createCorosyncResource(self):
|
|
||||||
ansiiprint.echo('Creating Corosync resource for network {} gateway {} on VNI {}'.format(self.description, self.ip_gateway, self.vni), '', 'o')
|
|
||||||
common.run_os_command('crm configure primitive vnivip_{0} ocf:heartbeat:IPaddr2 params ip={1} cidr_netmask={2} nic={3} op monitor interval=1s meta target-role=Stopped'.format(
|
|
||||||
self.description,
|
|
||||||
self.ip_gateway,
|
|
||||||
self.ip_cidrnetmask,
|
|
||||||
self.bridge_nic
|
|
||||||
))
|
|
||||||
common.run_os_command('crm configure location lvnivip_{0} vnivip_{0} 100: 1'.format(self.description))
|
|
||||||
common.run_os_command('crm resource start vnivip_{0}'.format(self.description))
|
|
||||||
common.run_os_command('crm resource refresh'.format(self.description))
|
|
||||||
|
|
||||||
self.watch_change = False
|
|
||||||
self.corosync_provisioned = True
|
|
||||||
|
|
||||||
def removeCorosyncResource(self):
|
|
||||||
ansiiprint.echo('Removing Corosync resource for network {} on VNI {}'.format(self.old_description, self.vni), '', 'o')
|
|
||||||
common.run_os_command('crm resource stop vnivip_{}'.format(self.old_description))
|
|
||||||
common.run_os_command('crm configure delete vnivip_{}'.format(self.old_description))
|
|
||||||
self.corosync_provisioned = False
|
|
||||||
|
|
||||||
def createNetwork(self):
|
def createNetwork(self):
|
||||||
ansiiprint.echo('Creating VNI {} device on interface {}'.format(self.vni, self.vni_dev), '', 'o')
|
ansiiprint.echo('Creating VNI {} device on interface {}'.format(self.vni, self.vni_dev), '', 'o')
|
||||||
|
@ -110,6 +82,11 @@ class VXNetworkInstance():
|
||||||
common.run_os_command('ip link set {} up'.format(self.vxlan_nic))
|
common.run_os_command('ip link set {} up'.format(self.vxlan_nic))
|
||||||
common.run_os_command('ip link set {} up'.format(self.bridge_nic))
|
common.run_os_command('ip link set {} up'.format(self.bridge_nic))
|
||||||
|
|
||||||
|
def createAddress(self):
|
||||||
|
if self.this_router.getnetworkstate() == 'primary':
|
||||||
|
ansiiprint.echo('Creating gateway {} on interface {}'.format(self.ip_gateway, self.vni_dev), '', 'o')
|
||||||
|
common.run_os_command('ip address add {}/{} dev {}'.format(self.ip_gateway, self.ip_cidrnetmask, self.vni_dev))
|
||||||
|
|
||||||
def removeNetwork(self):
|
def removeNetwork(self):
|
||||||
ansiiprint.echo('Removing VNI {} device on interface {}'.format(self.vni, self.vni_dev), '', 'o')
|
ansiiprint.echo('Removing VNI {} device on interface {}'.format(self.vni, self.vni_dev), '', 'o')
|
||||||
common.run_os_command('ip link set {} down'.format(self.bridge_nic))
|
common.run_os_command('ip link set {} down'.format(self.bridge_nic))
|
||||||
|
@ -118,20 +95,6 @@ class VXNetworkInstance():
|
||||||
common.run_os_command('brctl delbr {}'.format(self.bridge_nic))
|
common.run_os_command('brctl delbr {}'.format(self.bridge_nic))
|
||||||
common.run_os_command('ip link delete {}'.format(self.vxlan_nic))
|
common.run_os_command('ip link delete {}'.format(self.vxlan_nic))
|
||||||
|
|
||||||
def updateCorosyncResource(self):
|
def removeAddress(self):
|
||||||
if self.corosync_provisioned and self.watch_change:
|
ansiiprint.echo('Removing gateway {} from interface {}'.format(self.ip_gateway, self.vni_dev), '', 'o')
|
||||||
self.watch_change = False
|
common.run_os_command('ip address delete {}/{} dev {}'.format(self.ip_gateway, self.ip_cidrnetmask, self.vni_dev))
|
||||||
# Rebuild the resource
|
|
||||||
self.removeCorosyncResource()
|
|
||||||
self.createCorosyncResource()
|
|
||||||
|
|
||||||
def provision(self):
|
|
||||||
self.update_timer.start()
|
|
||||||
self.createNetwork()
|
|
||||||
time.sleep(0.1)
|
|
||||||
self.createCorosyncResource()
|
|
||||||
|
|
||||||
def deprovision(self):
|
|
||||||
self.update_timer.shutdown()
|
|
||||||
self.removeCorosyncResource()
|
|
||||||
self.removeNetwork()
|
|
||||||
|
|
Loading…
Reference in New Issue