New router daemon based on virtualization daemon

This commit is contained in:
Joshua Boniface 2018-09-24 01:03:16 -04:00
parent a1f4ba9c60
commit 062a46f48c
3 changed files with 381 additions and 133 deletions

View File

@ -21,19 +21,22 @@
############################################################################### ###############################################################################
import kazoo.client import kazoo.client
import libvirt
import sys import sys
import os import os
import signal import signal
import socket import socket
import psutil import psutil
import configparser
import time
import subprocess import subprocess
import uuid
import time
import configparser
import apscheduler.schedulers.background
import daemon_lib.ansiiprint as ansiiprint import daemon_lib.ansiiprint as ansiiprint
import daemon_lib.zkhandler as zkhandler import daemon_lib.zkhandler as zkhandler
import daemon_lib.common as common
import pvcrd.RouterInstance as RouterInstance
import pvcrd.VXNetworkInstance as VXNetworkInstance import pvcrd.VXNetworkInstance as VXNetworkInstance
print(ansiiprint.bold() + "pvcrd - Parallel Virtual Cluster router daemon" + ansiiprint.end()) print(ansiiprint.bold() + "pvcrd - Parallel Virtual Cluster router daemon" + ansiiprint.end())
@ -47,13 +50,17 @@ except:
myhostname = socket.gethostname() myhostname = socket.gethostname()
myshorthostname = myhostname.split('.', 1)[0] myshorthostname = myhostname.split('.', 1)[0]
mydomainname = ''.join(myhostname.split('.', 1)[1:]) mynetworkname = ''.join(myhostname.split('.', 1)[1:])
# Config values dictionary # Config values dictionary
config_values = [ config_values = [
'zookeeper', 'zookeeper',
'keepalive_interval',
'vni_dev', 'vni_dev',
'vni_dev_ip', 'vni_dev_ip'
'ipmi_hostname',
'ipmi_username',
'ipmi_password'
] ]
def readConfig(pvcrd_config_file, myhostname): def readConfig(pvcrd_config_file, myhostname):
print('Loading configuration from file {}'.format(pvcrd_config_file)) print('Loading configuration from file {}'.format(pvcrd_config_file))
@ -67,7 +74,7 @@ def readConfig(pvcrd_config_file, myhostname):
except: except:
try: try:
entries = o_config['default'] entries = o_config['default']
except: except Exception as e:
print('ERROR: Config file is not valid!') print('ERROR: Config file is not valid!')
exit(1) exit(1)
@ -81,23 +88,32 @@ def readConfig(pvcrd_config_file, myhostname):
print('ERROR: Config file missing required value "{}" for this host!'.format(entry)) print('ERROR: Config file missing required value "{}" for this host!'.format(entry))
exit(1) exit(1)
# Handle an empty ipmi_hostname
if config['ipmi_hostname'] == '':
config['ipmi_hostname'] = myshorthostname + '-lom.' + mynetworkname
return config return config
# Get config
config = readConfig(pvcrd_config_file, myhostname) config = readConfig(pvcrd_config_file, myhostname)
# Connect to local zookeeper
zk_conn = kazoo.client.KazooClient(hosts=config['zookeeper']) zk_conn = kazoo.client.KazooClient(hosts=config['zookeeper'])
try: try:
print('Connecting to Zookeeper instance at {}'.format(config['zookeeper'])) print('Connecting to Zookeeper instance at {}'.format(config['zookeeper']))
zk_conn.start() zk_conn.start()
except: except:
print('ERROR: Failed to connect to Zookeeper!') print('ERROR: Failed to connect to Zookeeper')
exit(1) exit(1)
# Handle zookeeper failures gracefully # Handle zookeeper failures
def zk_listener(state): def zk_listener(state):
global zk_conn global zk_conn, update_timer
if state == kazoo.client.KazooState.SUSPENDED: if state == kazoo.client.KazooState.SUSPENDED:
ansiiprint.echo('Connection to Zookeeper list; retrying', '', 'e') ansiiprint.echo('Connection to Zookeeper lost; retrying', '', 'e')
# Stop keepalive thread
stopKeepaliveTimer(update_timer)
while True: while True:
_zk_conn = kazoo.client.KazooClient(hosts=config['zookeeper']) _zk_conn = kazoo.client.KazooClient(hosts=config['zookeeper'])
@ -109,6 +125,9 @@ def zk_listener(state):
time.sleep(1) time.sleep(1)
elif state == kazoo.client.KazooState.CONNECTED: elif state == kazoo.client.KazooState.CONNECTED:
ansiiprint.echo('Connection to Zookeeper started', '', 'o') ansiiprint.echo('Connection to Zookeeper started', '', 'o')
# Start keepalive thread
update_timer = createKeepaliveTimer()
else: else:
pass pass
@ -117,98 +136,117 @@ zk_conn.add_listener(zk_listener)
# Cleanup function # Cleanup function
def cleanup(signum, frame): def cleanup(signum, frame):
ansiiprint.echo('Terminating daemon', '', 'e') ansiiprint.echo('Terminating daemon', '', 'e')
# Set stop state in Zookeeper
zkhandler.writedata(zk_conn, { '/routers/{}/daemonstate'.format(myhostname): 'stop' })
# Close the Zookeeper connection # Close the Zookeeper connection
try: try:
zk_conn.stop() zk_conn.stop()
zk_conn.close() zk_conn.close()
except: except:
pass pass
# Stop keepalive thread
stopKeepaliveTimer(update_timer)
# Exit # Exit
exit(0) sys.exit(0)
# Handle signals with cleanup # Handle signals gracefully
signal.signal(signal.SIGTERM, cleanup) signal.signal(signal.SIGTERM, cleanup)
signal.signal(signal.SIGINT, cleanup) signal.signal(signal.SIGINT, cleanup)
signal.signal(signal.SIGQUIT, cleanup) signal.signal(signal.SIGQUIT, cleanup)
# What this daemon does: # Gather useful data about our host for staticdata
# 1. Configure public networks dynamically on startup (e.g. bonding, vlans, etc.) from config # Static data format: 'cpu_count', 'arch', 'os', 'kernel'
# * no /etc/network/interfaces config for these - just mgmt interface via DHCP! staticdata = []
# 2. Watch ZK /networks staticdata.append(str(psutil.cpu_count()))
# 3. Provision required network interfaces when a network is added staticdata.append(subprocess.run(['uname', '-r'], stdout=subprocess.PIPE).stdout.decode('ascii').strip())
# a. create vxlan interface targeting local dev from config staticdata.append(subprocess.run(['uname', '-o'], stdout=subprocess.PIPE).stdout.decode('ascii').strip())
# b. create bridge interface staticdata.append(subprocess.run(['uname', '-m'], stdout=subprocess.PIPE).stdout.decode('ascii').strip())
# c. add vxlan to bridge # Print static data on start
# d. set interfaces up
# e. add corosync config for virtual gateway IP
# 4. Remove network interfaces when network disapears
# Zookeeper schema: print('{0}Router hostname:{1} {2}'.format(ansiiprint.bold(), ansiiprint.end(), myhostname))
# networks/ print('{0}IPMI hostname:{1} {2}'.format(ansiiprint.bold(), ansiiprint.end(), config['ipmi_hostname']))
# <VXLANID>/ print('{0}Machine details:{1}'.format(ansiiprint.bold(), ansiiprint.end()))
# ipnet <NETWORK-CIDR> e.g. 10.101.0.0/24 print(' {0}CPUs:{1} {2}'.format(ansiiprint.bold(), ansiiprint.end(), staticdata[0]))
# gateway <IPADDR> e.g. 10.101.0.1 [1] print(' {0}Arch:{1} {2}'.format(ansiiprint.bold(), ansiiprint.end(), staticdata[3]))
# dhcp <YES/NO> e.g. YES [2] print(' {0}OS:{1} {2}'.format(ansiiprint.bold(), ansiiprint.end(), staticdata[2]))
# reservations/ print(' {0}Kernel:{1} {2}'.format(ansiiprint.bold(), ansiiprint.end(), staticdata[1]))
# <HOSTNAME/DESCRIPTION>/
# address <IPADDR> e.g. 10.101.0.30
# mac <MACADDR> e.g. ff:ff:fe:ab:cd:ef
# fwrules/
# <RULENAME>/
# description <DESCRIPTION> e.g. Allow HTTP from any to this net
# src <HOSTNAME/IPADDR/SUBNET/"any"/"this"> e.g. any
# dest <HOSTNAME/IPADDR/SUBNET/"any"/"this"> e.g. this
# port <PORT/RANGE/"any"> e.g. 80
# Notes: # Check if our router exists in Zookeeper, and create it if not
# [1] becomes a VIP between the pair of routers in multi-router envs if zk_conn.exists('/routers/{}'.format(myhostname)):
# [2] enables or disables a DHCP subnet definition for the network print("Router is " + ansiiprint.green() + "present" + ansiiprint.end() + " in Zookeeper")
# Update static data just in case it's changed
zkhandler.writedata(zk_conn, { '/routers/{}/staticdata'.format(myhostname): ' '.join(staticdata) })
# Enable routing
common.run_os_command('sysctl sysctl net.ipv4.ip_forward=1')
common.run_os_command('sysctl sysctl net.ipv6.ip_forward=1')
common.run_os_command('sysctl sysctl net.ipv4.all.send_redirects=1')
common.run_os_command('sysctl sysctl net.ipv6.all.send_redirects=1')
common.run_os_command('sysctl sysctl net.ipv4.all.accept_source_route=1')
common.run_os_command('sysctl sysctl net.ipv6.all.accept_source_route=1')
# Prepare underlying interface
if config['vni_dev_ip'] == 'dhcp':
vni_dev = config['vni_dev']
ansiiprint.echo('Configuring VNI parent device {} with DHCP IP'.format(vni_dev), '', 'o')
common.run_os_command('ip link set {0} up'.format(vni_dev))
common.run_os_command('dhclient {0}'.format(vni_dev))
else: else:
vni_dev = config['vni_dev'] print("Router is " + ansiiprint.red() + "absent" + ansiiprint.end() + " in Zookeeper; adding new router")
vni_dev_ip = config['vni_dev_ip'] keepalive_time = int(time.time())
ansiiprint.echo('Configuring VNI parent device {} with IP {}'.format(vni_dev, vni_dev_ip), '', 'o') transaction = zk_conn.transaction()
common.run_os_command('ip link set {0} up'.format(vni_dev)) transaction.create('/routers/{}'.format(myhostname), 'hypervisor'.encode('ascii'))
common.run_os_command('ip address add {0} dev {1}'.format(vni_dev_ip, vni_dev)) # Basic state information
transaction.create('/routers/{}/daemonstate'.format(myhostname), 'stop'.encode('ascii'))
transaction.create('/routers/{}/networkstate'.format(myhostname), 'ready'.encode('ascii'))
transaction.create('/routers/{}/staticdata'.format(myhostname), ' '.join(staticdata).encode('ascii'))
# Keepalives and fencing information
transaction.create('/routers/{}/keepalive'.format(myhostname), str(keepalive_time).encode('ascii'))
transaction.create('/routers/{}/ipmihostname'.format(myhostname), config['ipmi_hostname'].encode('ascii'))
transaction.create('/routers/{}/ipmiusername'.format(myhostname), config['ipmi_username'].encode('ascii'))
transaction.create('/routers/{}/ipmipassword'.format(myhostname), config['ipmi_password'].encode('ascii'))
transaction.commit()
# Disable stonith in corosync zkhandler.writedata(zk_conn, { '/routers/{}/daemonstate'.format(myhostname): 'init' })
common.run_os_command('crm configure property stonith-enabled="false"')
# Prepare VNI list t_router = dict()
t_vni = dict() s_network = dict()
vni_list = [] router_list = []
network_list = []
@zk_conn.ChildrenWatch('/routers')
def updaterouters(new_router_list):
global router_list
router_list = new_router_list
print(ansiiprint.blue() + 'Router list: ' + ansiiprint.end() + '{}'.format(' '.join(router_list)))
for router in router_list:
if router in t_router:
t_router[router].updaterouterlist(t_router)
else:
t_router[router] = RouterInstance.RouterInstance(myhostname, router, t_router, s_network, zk_conn, config)
@zk_conn.ChildrenWatch('/networks') @zk_conn.ChildrenWatch('/networks')
def updatenetworks(new_vni_list): def updatenetworks(new_network_list):
global vni_list global network_list
print(ansiiprint.blue() + 'Network list: ' + ansiiprint.end() + '{}'.format(' '.join(new_vni_list))) for network in network_list:
# Add new VNIs if not network in s_network:
for vni in new_vni_list: s_network[network] = VXNetworkInstance.VXNetworkInstance(network, zk_conn, config, t_router[myhostname]);
if vni not in vni_list: for router in router_list:
vni_list.append(vni) if router in t_router:
t_vni[vni] = VXNetworkInstance.VXNetworkInstance(vni, zk_conn, config) t_router[router].updatenetworklist(s_network)
t_vni[vni].provision() if not network in new_network_list:
s_network[network].removeAddress()
s_network[network].removeNetwork()
for router in router_list:
if router in t_router:
t_router[router].updatenetworklist(s_network)
network_list = new_network_list
print(ansiiprint.blue() + 'Network list: ' + ansiiprint.end() + '{}'.format(' '.join(network_list)))
# Remove deleted VNIs # Set up our update function
for vni in vni_list: this_router = t_router[myhostname]
if vni not in new_vni_list: update_zookeeper = this_router.update_zookeeper
vni_list.remove(vni)
t_vni[vni].deprovision() # Create timer to update this router in Zookeeper
def createKeepaliveTimer():
interval = int(config['keepalive_interval'])
ansiiprint.echo('Starting keepalive timer ({} second interval)'.format(interval), '', 'o')
update_timer = apscheduler.schedulers.background.BackgroundScheduler()
update_timer.add_job(update_zookeeper, 'interval', seconds=interval)
update_timer.start()
return update_timer
def stopKeepaliveTimer(update_timer):
ansiiprint.echo('Stopping keepalive timer', '', 'c')
update_timer.shutdown()
# Start keepalive thread
update_timer = createKeepaliveTimer()
# Tick loop # Tick loop
while True: while True:

View File

@ -0,0 +1,247 @@
#!/usr/bin/env python3
# RouterInstance.py - Class implementing a PVC router and run by pvcrd
# Part of the Parallel Virtual Cluster (PVC) system
#
# Copyright (C) 2018 Joshua M. Boniface <joshua@boniface.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################
import os
import sys
import psutil
import socket
import time
import libvirt
import kazoo.client
import threading
import subprocess
import daemon_lib.ansiiprint as ansiiprint
import daemon_lib.zkhandler as zkhandler
class RouterInstance():
# Initialization function
def __init__(self, this_router, name, t_router, s_network, zk_conn, config):
# Passed-in variables on creation
self.zk_conn = zk_conn
self.config = config
self.this_router = this_router
self.name = name
self.daemon_state = 'stop'
self.network_state = 'primary'
self.t_router = t_router
self.primary_router_list = []
self.secondary_router_list = []
self.inactive_router_list = []
self.s_network = s_network
self.network_list = []
self.ipmi_hostname = self.config['ipmi_hostname']
# Zookeeper handlers for changed states
@zk_conn.DataWatch('/routers/{}/daemonstate'.format(self.name))
def watch_hypervisor_daemonstate(data, stat, event=""):
try:
self.daemon_state = data.decode('ascii')
except AttributeError:
self.daemon_state = 'stop'
@zk_conn.DataWatch('/routers/{}/networkstate'.format(self.name))
def watch_hypervisor_networkstate(data, stat, event=""):
try:
new_network_state = data.decode('ascii')
except AttributeError:
new_network_state = 'secondary'
if new_network_state != self.network_state:
self.network_state = new_network_state
# toggle state management of this router
if self.name == self.this_router:
if self.network_state == 'secondary':
self.set_secondary()
if self.network_state == 'primary':
self.set_primary()
for router in t_router:
if router.getname() != self.name:
router.set_secondary()
# Get value functions
def getname(self):
return self.name
def getdaemonstate(self):
return self.daemon_state
def getnetworkstate(self):
return self.network_state
def getnetworklist(self):
return self.network_list
# Update value functions
def updaterouterlist(self, t_router):
self.t_router = t_router
def updatenetworklist(self, s_network):
self.s_network = s_network
# Flush all VMs on the host
def set_secondary(self):
ansiiprint.echo('Setting node {} to secondary state'.format(self.name), '', 'i')
ansiiprint.echo('Network list: {}'.format(', '.join(self.network_list)), '', 'c')
zkhandler.writedata(self.zk_conn, { '/routers/{}/networkstate'.format(self.name): 'secondary' })
for network in self.s_network:
network.removeAddress()
def set_primary(self):
ansiiprint.echo('Setting node {} to master state.'.format(self.name), '', 'i')
ansiiprint.echo('Network list: {}'.format(', '.join(self.network_list)), '', 'c')
zkhandler.writedata(self.zk_conn, { '/routers/{}/networkstate'.format(self.name): 'primary' })
for network in self.s_network:
network.createAddress()
def update_zookeeper(self):
# Get past state and update if needed
past_state = zkhandler.readdata(self.zk_conn, '/routers/{}/daemonstate'.format(self.name))
if past_state != 'run':
self.daemon_state = 'run'
zkhandler.writedata(self.zk_conn, { '/routers/{}/daemonstate'.format(self.name): 'run' })
else:
self.daemon_state = 'run'
# Set our information in zookeeper
keepalive_time = int(time.time())
try:
zkhandler.writedata(self.zk_conn, {
'/routers/{}/keepalive'.format(self.name): str(keepalive_time)
})
except:
ansiiprint.echo('Failed to set keepalive data', '', 'e')
return
# Close the Libvirt connection
lv_conn.close()
# Display router information to the terminal
ansiiprint.echo('{}{} keepalive{}'.format(ansiiprint.purple(), self.name, ansiiprint.end()), '', 't')
ansiiprint.echo('{0}Active networks:{1} {2} {0}Free memory [MiB]:{1} {3} {0}Used memory [MiB]:{1} {4} {0}Load:{1} {5}'.format(ansiiprint.bold(), ansiiprint.end(), self.networks_count, self.memfree, self.memused, self.cpuload), '', 'c')
# Update our local router lists
for router_name in self.t_router:
try:
router_daemon_state = zkhandler.readdata(self.zk_conn, '/routers/{}/daemonstate'.format(router_name))
router_network_state = zkhandler.readdata(self.zk_conn, '/routers/{}/networkstate'.format(router_name))
router_keepalive = int(zkhandler.readdata(self.zk_conn, '/routers/{}/keepalive'.format(router_name)))
except:
router_daemon_state = 'unknown'
router_network_state = 'unknown'
router_keepalive = 0
# Handle deadtime and fencng if needed
# (A router is considered dead when its keepalive timer is >6*keepalive_interval seconds
# out-of-date while in 'start' state)
router_deadtime = int(time.time()) - ( int(self.config['keepalive_interval']) * int(self.config['fence_intervals']) )
if router_keepalive < router_deadtime and router_daemon_state == 'run':
ansiiprint.echo('Router {} seems dead - starting monitor for fencing'.format(router_name), '', 'w')
zkhandler.writedata(self.zk_conn, { '/routers/{}/daemonstate'.format(router_name): 'dead' })
fence_thread = threading.Thread(target=fenceRouter, args=(router_name, self.zk_conn, self.config), kwargs={})
fence_thread.start()
# Update the arrays
if router_daemon_state == 'run' and router_network_state != 'secondary' and router_name not in self.primary_router_list:
self.primary_router_list.append(router_name)
try:
self.secondary_router_list.remove(router_name)
except ValueError:
pass
try:
self.inactive_router_list.remove(router_name)
except ValueError:
pass
if router_daemon_state != 'run' and router_network_state != 'secondary' and router_name not in self.inactive_router_list:
self.inactive_router_list.append(router_name)
try:
self.primary_router_list.remove(router_name)
except ValueError:
pass
try:
self.secondary_router_list.remove(router_name)
except ValueError:
pass
if router_network_state == 'secondary' and router_name not in self.secondary_router_list:
self.secondary_router_list.append(router_name)
try:
self.primary_router_list.remove(router_name)
except ValueError:
pass
try:
self.inactive_router_list.remove(router_name)
except ValueError:
pass
# Display cluster information to the terminal
ansiiprint.echo('{}Cluster status{}'.format(ansiiprint.purple(), ansiiprint.end()), '', 't')
ansiiprint.echo('{}Primary router:{} {}'.format(ansiiprint.bold(), ansiiprint.end(), ' '.join(self.primray_router_list)), '', 'c')
ansiiprint.echo('{}Secondary router:{} {}'.format(ansiiprint.bold(), ansiiprint.end(), ' '.join(self.secondary_router_list)), '', 'c')
ansiiprint.echo('{}Inactive routers:{} {}'.format(ansiiprint.bold(), ansiiprint.end(), ' '.join(self.inactive_router_list)), '', 'c')
#
# Fence thread entry function
#
def fenceRouter(router_name, zk_conn, config):
failcount = 0
# We allow exactly 3 saving throws for the host to come back online
while failcount < 3:
# Wait 5 seconds
time.sleep(5)
# Get the state
router_daemon_state = zkhandler.readdata(zk_conn, '/routers/{}/daemonstate'.format(router_name))
# Is it still 'dead'
if router_daemon_state == 'dead':
failcount += 1
ansiiprint.echo('Router "{}" failed {} saving throws'.format(router_name, failcount), '', 'w')
# It changed back to something else so it must be alive
else:
ansiiprint.echo('Router "{}" passed a saving throw; canceling fence'.format(router_name), '', 'o')
return
ansiiprint.echo('Fencing router "{}" via IPMI reboot signal'.format(router_name), '', 'e')
# Get IPMI information
ipmi_hostname = zkhandler.readdata(zk_conn, '/routers/{}/ipmihostname'.format(router_name))
ipmi_username = zkhandler.readdata(zk_conn, '/routers/{}/ipmiusername'.format(router_name))
ipmi_password = zkhandler.readdata(zk_conn, '/routers/{}/ipmipassword'.format(router_name))
# Shoot it in the head
fence_status = rebootViaIPMI(ipmi_hostname, ipmi_username, ipmi_password)
# Hold to ensure the fence takes effect
time.sleep(3)
# Set router in secondary state
zkhandler.writedata(zk_conn, { '/routers/{}/networkstate'.format(router_name): 'secondary' })
#
# Perform an IPMI fence
#
def rebootViaIPMI(ipmi_hostname, ipmi_user, ipmi_password):
ipmi_command = ['/usr/bin/ipmitool', '-I', 'lanplus', '-H', ipmi_hostname, '-U', ipmi_user, '-P', ipmi_password, 'chassis', 'power', 'reset']
ipmi_command_output = subprocess.run(ipmi_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if ipmi_command_output.returncode == 0:
ansiiprint.echo('Successfully rebooted dead router', '', 'o')
return True
else:
ansiiprint.echo('Failed to reboot dead router', '', 'e')
return False

View File

@ -23,8 +23,6 @@
import os import os
import sys import sys
import time import time
import subprocess
import apscheduler.schedulers.background
import daemon_lib.ansiiprint as ansiiprint import daemon_lib.ansiiprint as ansiiprint
import daemon_lib.zkhandler as zkhandler import daemon_lib.zkhandler as zkhandler
@ -32,9 +30,10 @@ import daemon_lib.common as common
class VXNetworkInstance(): class VXNetworkInstance():
# Initialization function # Initialization function
def __init__ (self, vni, zk_conn, config): def __init__ (self, vni, zk_conn, config, thisrouter):
self.vni = vni self.vni = vni
self.zk_conn = zk_conn self.zk_conn = zk_conn
self.thisrouter = thisrouter
self.vni_dev = config['vni_dev'] self.vni_dev = config['vni_dev']
self.old_description = zkhandler.readdata(self.zk_conn, '/networks/{}'.format(self.vni)) self.old_description = zkhandler.readdata(self.zk_conn, '/networks/{}'.format(self.vni))
@ -47,11 +46,7 @@ class VXNetworkInstance():
self.vxlan_nic = 'vxlan{}'.format(self.vni) self.vxlan_nic = 'vxlan{}'.format(self.vni)
self.bridge_nic = 'br{}'.format(self.vni) self.bridge_nic = 'br{}'.format(self.vni)
self.corosync_provisioned = False self.createNetwork()
self.watch_change = False
self.update_timer = apscheduler.schedulers.background.BackgroundScheduler()
self.update_timer.add_job(self.updateCorosyncResource, 'interval', seconds=1)
# Zookeper handlers for changed states # Zookeper handlers for changed states
@zk_conn.DataWatch('/networks/{}'.format(self.vni)) @zk_conn.DataWatch('/networks/{}'.format(self.vni))
@ -59,7 +54,6 @@ class VXNetworkInstance():
if data != None and self.description != data.decode('ascii'): if data != None and self.description != data.decode('ascii'):
self.old_description = self.description self.old_description = self.description
self.description = data.decode('ascii') self.description = data.decode('ascii')
self.watch_change = True
@zk_conn.DataWatch('/networks/{}/ip_network'.format(self.vni)) @zk_conn.DataWatch('/networks/{}/ip_network'.format(self.vni))
def watch_network_ip_network(data, stat, event=''): def watch_network_ip_network(data, stat, event=''):
@ -67,40 +61,18 @@ class VXNetworkInstance():
ip_network = data.decode('ascii') ip_network = data.decode('ascii')
self.ip_network = ip_network self.ip_network = ip_network
self.ip_cidrnetmask = ip_network.split('/')[-1] self.ip_cidrnetmask = ip_network.split('/')[-1]
self.watch_change = True
@zk_conn.DataWatch('/networks/{}/ip_gateway'.format(self.vni)) @zk_conn.DataWatch('/networks/{}/ip_gateway'.format(self.vni))
def watch_network_gateway(data, stat, event=''): def watch_network_gateway(data, stat, event=''):
if data != None and self.ip_gateway != data.decode('ascii'): if data != None and self.ip_gateway != data.decode('ascii'):
self.removeAddress()
self.ip_gateway = data.decode('ascii') self.ip_gateway = data.decode('ascii')
self.watch_change = True self.addAddress()
@zk_conn.DataWatch('/networks/{}/dhcp_flag'.format(self.vni)) @zk_conn.DataWatch('/networks/{}/dhcp_flag'.format(self.vni))
def watch_network_dhcp_status(data, stat, event=''): def watch_network_dhcp_status(data, stat, event=''):
if data != None and self.dhcp_flag != data.decode('ascii'): if data != None and self.dhcp_flag != data.decode('ascii'):
self.dhcp_flag = ( data.decode('ascii') == 'True' ) self.dhcp_flag = ( data.decode('ascii') == 'True' )
self.watch_change = True
def createCorosyncResource(self):
ansiiprint.echo('Creating Corosync resource for network {} gateway {} on VNI {}'.format(self.description, self.ip_gateway, self.vni), '', 'o')
common.run_os_command('crm configure primitive vnivip_{0} ocf:heartbeat:IPaddr2 params ip={1} cidr_netmask={2} nic={3} op monitor interval=1s meta target-role=Stopped'.format(
self.description,
self.ip_gateway,
self.ip_cidrnetmask,
self.bridge_nic
))
common.run_os_command('crm configure location lvnivip_{0} vnivip_{0} 100: 1'.format(self.description))
common.run_os_command('crm resource start vnivip_{0}'.format(self.description))
common.run_os_command('crm resource refresh'.format(self.description))
self.watch_change = False
self.corosync_provisioned = True
def removeCorosyncResource(self):
ansiiprint.echo('Removing Corosync resource for network {} on VNI {}'.format(self.old_description, self.vni), '', 'o')
common.run_os_command('crm resource stop vnivip_{}'.format(self.old_description))
common.run_os_command('crm configure delete vnivip_{}'.format(self.old_description))
self.corosync_provisioned = False
def createNetwork(self): def createNetwork(self):
ansiiprint.echo('Creating VNI {} device on interface {}'.format(self.vni, self.vni_dev), '', 'o') ansiiprint.echo('Creating VNI {} device on interface {}'.format(self.vni, self.vni_dev), '', 'o')
@ -110,6 +82,11 @@ class VXNetworkInstance():
common.run_os_command('ip link set {} up'.format(self.vxlan_nic)) common.run_os_command('ip link set {} up'.format(self.vxlan_nic))
common.run_os_command('ip link set {} up'.format(self.bridge_nic)) common.run_os_command('ip link set {} up'.format(self.bridge_nic))
def createAddress(self):
if self.this_router.getnetworkstate() == 'primary':
ansiiprint.echo('Creating gateway {} on interface {}'.format(self.ip_gateway, self.vni_dev), '', 'o')
common.run_os_command('ip address add {}/{} dev {}'.format(self.ip_gateway, self.ip_cidrnetmask, self.vni_dev))
def removeNetwork(self): def removeNetwork(self):
ansiiprint.echo('Removing VNI {} device on interface {}'.format(self.vni, self.vni_dev), '', 'o') ansiiprint.echo('Removing VNI {} device on interface {}'.format(self.vni, self.vni_dev), '', 'o')
common.run_os_command('ip link set {} down'.format(self.bridge_nic)) common.run_os_command('ip link set {} down'.format(self.bridge_nic))
@ -118,20 +95,6 @@ class VXNetworkInstance():
common.run_os_command('brctl delbr {}'.format(self.bridge_nic)) common.run_os_command('brctl delbr {}'.format(self.bridge_nic))
common.run_os_command('ip link delete {}'.format(self.vxlan_nic)) common.run_os_command('ip link delete {}'.format(self.vxlan_nic))
def updateCorosyncResource(self): def removeAddress(self):
if self.corosync_provisioned and self.watch_change: ansiiprint.echo('Removing gateway {} from interface {}'.format(self.ip_gateway, self.vni_dev), '', 'o')
self.watch_change = False common.run_os_command('ip address delete {}/{} dev {}'.format(self.ip_gateway, self.ip_cidrnetmask, self.vni_dev))
# Rebuild the resource
self.removeCorosyncResource()
self.createCorosyncResource()
def provision(self):
self.update_timer.start()
self.createNetwork()
time.sleep(0.1)
self.createCorosyncResource()
def deprovision(self):
self.update_timer.shutdown()
self.removeCorosyncResource()
self.removeNetwork()