diff --git a/debian/control b/debian/control index e52dc7de..12fcff74 100644 --- a/debian/control +++ b/debian/control @@ -33,6 +33,14 @@ Description: Parallel Virtual Cluster network daemon (Python 3) . This package installs the PVC network daemon +Package: pvc-router-daemon +Architecture: all +Depends: pvc-daemon-common +Description: Parallel Virtual Cluster router daemon (Python 3) + The Parallel Virtual Cluster provides a management solution for QEMU/KVM virtual clusters. + . + This package installs the PVC router daemon + Package: pvc-client-common Architecture: all Depends: python3-kazoo, python3-psutil, python3-click, python3-lxml diff --git a/debian/pvc-router-daemon.install b/debian/pvc-router-daemon.install new file mode 100644 index 00000000..eca573c1 --- /dev/null +++ b/debian/pvc-router-daemon.install @@ -0,0 +1,4 @@ +router-daemon/pvcrd.py usr/share/pvc +router-daemon/pvcrd.service lib/systemd/system +router-daemon/pvcrd.conf.sample etc/pvc +router-daemon/pvcrd usr/share/pvc diff --git a/debian/pvc-router-daemon.postinst b/debian/pvc-router-daemon.postinst new file mode 100644 index 00000000..b40df819 --- /dev/null +++ b/debian/pvc-router-daemon.postinst @@ -0,0 +1,6 @@ +#!/bin/sh + +# Enable the servive +systemctl enable /lib/systemd/system/pvcnd.service + +echo "The PVC network daemon has not been started. Create a config file at /etc/pvc/pvcnd.conf then start it." diff --git a/debian/pvc-router-daemon.prerm b/debian/pvc-router-daemon.prerm new file mode 100644 index 00000000..3f4fd8cb --- /dev/null +++ b/debian/pvc-router-daemon.prerm @@ -0,0 +1,5 @@ +#!/bin/sh + +# Disable the service +systemctl disable pvcnd.service + diff --git a/router-daemon/pvcrd.conf.sample b/router-daemon/pvcrd.conf.sample new file mode 100644 index 00000000..72f69093 --- /dev/null +++ b/router-daemon/pvcrd.conf.sample @@ -0,0 +1,22 @@ +# pvcrd cluster configuration file example +# +# This configuration file specifies details for this node in PVC. Multiple host +# blocks can be added but only the one matching the current system hostname will +# be used by the local daemon. Default values apply to all hosts for any value +# not specifically overridden. +# +# The following values are required for each host or in a default section: +# zookeeper: the IP+port of the Zookeper instance (defaults to 127.0.0.1:2181) +# vni_dev: the lower-level network device to bind VNI to +# vni_dev_ip: the IP address (CIDR) of the lower-level network device, used +# by FRR to communicate with the route reflectors and pass routes +# for VNI interfaces +# +# Copy this example to /etc/pvc/pvcrd.conf and edit to your needs + +[default] +zookeeper = 127.0.0.1:2181 + +[myhost] +vni_dev = ens4 +vni_dev_ip = 10.255.0.3/24 diff --git a/router-daemon/pvcrd.py b/router-daemon/pvcrd.py new file mode 100755 index 00000000..485c956d --- /dev/null +++ b/router-daemon/pvcrd.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python3 + +# pvcrd.py - Router daemon startup stub +# Part of the Parallel Virtual Cluster (PVC) system +# +# Copyright (C) 2018 Joshua M. Boniface +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +############################################################################### + +import pvcrd.Daemon diff --git a/router-daemon/pvcrd.service b/router-daemon/pvcrd.service new file mode 100644 index 00000000..d467e543 --- /dev/null +++ b/router-daemon/pvcrd.service @@ -0,0 +1,15 @@ +# Parallel Virtual Cluster router daemon unit file +[Unit] +Description = Parallel Virtual Cluster router daemon +After = network-online.target frr.service + +[Service] +Type = simple +WorkingDirectory = /usr/share/pvc +Environment = PYTHONUNBUFFERED=true +Environment = PVCND_CONFIG_FILE=/etc/pvc/pvcrd.conf +ExecStart = /usr/share/pvc/pvcrd.py +Restart = on-failure + +[Install] +WantedBy = multi-user.target diff --git a/router-daemon/pvcrd/Daemon.py b/router-daemon/pvcrd/Daemon.py new file mode 100644 index 00000000..61b4b384 --- /dev/null +++ b/router-daemon/pvcrd/Daemon.py @@ -0,0 +1,222 @@ +#!/usr/bin/env python3 + +# Daemon.py - PVC hypervisor router daemon +# Part of the Parallel Virtual Cluster (PVC) system +# +# Copyright (C) 2018 Joshua M. Boniface +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +############################################################################### + +import kazoo.client +import sys +import os +import signal +import socket +import psutil +import configparser +import time + +import daemon_lib.ansiiprint as ansiiprint +import daemon_lib.zkhandler as zkhandler + +import pvcrd.VXNetworkInstance as VXNetworkInstance + +print(ansiiprint.bold() + "pvcrd - Parallel Virtual Cluster router daemon" + ansiiprint.end()) + +# Get the config file variable from the environment +try: + pvcrd_config_file = os.environ['PVCRD_CONFIG_FILE'] +except: + print('ERROR: The "PVCRD_CONFIG_FILE" environment variable must be set before starting pvcrd.') + exit(1) + +myhostname = socket.gethostname() +myshorthostname = myhostname.split('.', 1)[0] +mydomainname = ''.join(myhostname.split('.', 1)[1:]) + +# Config values dictionary +config_values = [ + 'zookeeper', + 'vni_dev', + 'vni_dev_ip', +] +def readConfig(pvcrd_config_file, myhostname): + print('Loading configuration from file {}'.format(pvcrd_config_file)) + + o_config = configparser.ConfigParser() + o_config.read(pvcrd_config_file) + config = {} + + try: + entries = o_config[myhostname] + except: + try: + entries = o_config['default'] + except: + print('ERROR: Config file is not valid!') + exit(1) + + for entry in config_values: + try: + config[entry] = entries[entry] + except: + try: + config[entry] = o_config['default'][entry] + except: + print('ERROR: Config file missing required value "{}" for this host!'.format(entry)) + exit(1) + + return config + +config = readConfig(pvcrd_config_file, myhostname) + +zk_conn = kazoo.client.KazooClient(hosts=config['zookeeper']) +try: + print('Connecting to Zookeeper instance at {}'.format(config['zookeeper'])) + zk_conn.start() +except: + print('ERROR: Failed to connect to Zookeeper!') + exit(1) + +# Handle zookeeper failures gracefully +def zk_listener(state): + global zk_conn + if state == kazoo.client.KazooState.SUSPENDED: + ansiiprint.echo('Connection to Zookeeper list; retrying', '', 'e') + + while True: + _zk_conn = kazoo.client.KazooClient(hosts=config['zookeeper']) + try: + _zk_conn.start() + zk_conn = _zk_conn + break + except: + time.sleep(1) + elif state == kazoo.client.KazooState.CONNECTED: + ansiiprint.echo('Connection to Zookeeper started', '', 'o') + else: + pass + +zk_conn.add_listener(zk_listener) + +# Cleanup function +def cleanup(signum, frame): + ansiiprint.echo('Terminating daemon', '', 'e') + # Close the Zookeeper connection + try: + zk_conn.stop() + zk_conn.close() + except: + pass + # Exit + exit(0) + +# Handle signals with cleanup +signal.signal(signal.SIGTERM, cleanup) +signal.signal(signal.SIGINT, cleanup) +signal.signal(signal.SIGQUIT, cleanup) + +# What this daemon does: +# 1. Configure public networks dynamically on startup (e.g. bonding, vlans, etc.) from config +# * no /etc/network/interfaces config for these - just mgmt interface via DHCP! +# 2. Watch ZK /networks +# 3. Provision required network interfaces when a network is added +# a. create vxlan interface targeting local dev from config +# b. create bridge interface +# c. add vxlan to bridge +# d. set interfaces up +# e. add corosync config for virtual gateway IP +# 4. Remove network interfaces when network disapears + +# Zookeeper schema: +# networks/ +# / +# ipnet e.g. 10.101.0.0/24 +# gateway e.g. 10.101.0.1 [1] +# dhcp e.g. YES [2] +# reservations/ +# / +# address e.g. 10.101.0.30 +# mac e.g. ff:ff:fe:ab:cd:ef +# fwrules/ +# / +# description e.g. Allow HTTP from any to this net +# src e.g. any +# dest e.g. this +# port e.g. 80 + +# Notes: +# [1] becomes a VIP between the pair of routers in multi-router envs +# [2] enables or disables a DHCP subnet definition for the network + + +# Prepare underlying interface +if config['vni_dev_ip'] == 'dhcp': + vni_dev = config['vni_dev'] + ansiiprint.echo('Configuring VNI parent device {} with DHCP IP'.format(vni_dev), '', 'o') + os.system( + 'ip link set {0} up'.format( + vni_dev + ) + ) + os.system( + 'dhclient {0}'.format( + vni_dev + ) + ) +else: + vni_dev = config['vni_dev'] + vni_dev_ip = config['vni_dev_ip'] + ansiiprint.echo('Configuring VNI parent device {} with IP {}'.format(vni_dev, vni_dev_ip), '', 'o') + os.system( + 'ip link set {0} up'.format( + vni_dev + ) + ) + os.system( + 'ip address add {0} dev {1}'.format( + vni_dev_ip, + vni_dev + ) + ) + +# Prepare VNI list +t_vni = dict() +vni_list = [] + +@zk_conn.ChildrenWatch('/networks') +def updatenetworks(new_vni_list): + global vni_list + print(ansiiprint.blue() + 'Network list: ' + ansiiprint.end() + '{}'.format(' '.join(new_vni_list))) + # Add new VNIs + for vni in new_vni_list: + if vni not in vni_list: + vni_list.append(vni) + t_vni[vni] = VXNetworkInstance.VXNetworkInstance(vni, zk_conn, config) + t_vni[vni].provision() + + # Remove deleted VNIs + for vni in vni_list: + if vni not in new_vni_list: + vni_list.remove(vni) + t_vni[vni].deprovision() + +# Tick loop +while True: + try: + time.sleep(0.1) + except: + break diff --git a/router-daemon/pvcrd/VXNetworkInstance.py b/router-daemon/pvcrd/VXNetworkInstance.py new file mode 100644 index 00000000..bc1a9966 --- /dev/null +++ b/router-daemon/pvcrd/VXNetworkInstance.py @@ -0,0 +1,196 @@ +#!/usr/bin/env python3 + +# VXNetworkInstance.py - Class implementing a PVC VM network (router-side) and run by pvcrd +# Part of the Parallel Virtual Cluster (PVC) system +# +# Copyright (C) 2018 Joshua M. Boniface +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +############################################################################### + +import os +import sys +import time +import apscheduler + +import daemon_lib.ansiiprint as ansiiprint +import daemon_lib.zkhandler as zkhandler + +class VXNetworkInstance(): + # Initialization function + def __init__ (self, vni, zk_conn, config): + self.vni = vni + self.zk_conn = zk_conn + self.vni_dev = config['vni_dev'] + + self.vxlan_nic = 'vxlan{}'.format(self.vni) + self.bridge_nic = 'br{}'.format(self.vni) + + self.corosync_provisioned = False + self.watch_change = False + + self.update_timer = apscheduler.schedulers.background.BackgroundScheduler() + self.update_timer.add_job(updateCorosyncResource, 'interval', seconds=1) + + # Zookeper handlers for changed states + @zk_conn.DataWatch('/networks/{}/description'.format(self.vni)) + def watch_network_description(data, stat, event=''): + try: + self.description = data.decode('ascii') + except AttributeError: + self.description = self.vni + + self.watch_change = True + + @zk_conn.DataWatch('/networks/{}/ip_network'.format(self.vni)) + def watch_network_ip_network(data, stat, event=''): + try: + ip_network = data.decode('ascii') + self.ip_network = ip_network + self.ip_cidrnetmask = ip_network.split('/')[-1] + except AttributeError: + self.ip_network = '' + self.ip_cidrnetmask = '' + + self.watch_change = True + + @zk_conn.DataWatch('/networks/{}/ip_gateway'.format(self.vni)) + def watch_network_gateway(data, stat, event=''): + try: + self.ip_gateway = data.decode('ascii') + except AttributeError: + self.ip_gateway = '' + + self.watch_change = True + + @zk_conn.DataWatch('/networks/{}/dhcp_flag'.format(self.vni)) + def watch_network_dhcp_status(data, stat, event=''): + try: + dhcp_flag = data.decode('ascii') + self.dhcp_flag = ( dhcp_flag == 'True' ) + except AttributeError: + self.dhcp_flag = False + + self.watch_change = True + + def createCorosyncResource(self): + self.corosync_provisioned = True + ansiiprint.echo('Creating Corosync resource for gateway {} on interface {}'.format(self.ip_gateway, self.vni), '', 'o') + os.system( + 'echo " + configure + primitive vnivip_{0} ocf:heartbeat:IPaddr2 params ip={1} cidr_netmask={2} nic={3} op monitor interval=1s + commit + up + resource + start vnivip_{0} + " | crm -f -'.format( + self.description, + self.ip_gateway, + self.ip_cidrnetmask + self.bridge_nic + ) + ) + + def removeCorosyncResource(self): + ansiiprint.echo('Removing Corosync resource for gateway {} on interface {}'.format(self.ip_gateway, self.vni), '', 'o') + os.system( + 'echo " + resource + stop vnivip_{0} + up + configure + delete vnivip_{0} + commit + " | crm -f -'.format( + self.description + ) + ) + self.corosync_provisioned = False + + def createNetwork(self): + ansiiprint.echo('Creating VNI {} device on interface {}'.format(self.vni, self.vni_dev), '', 'o') + os.system( + 'ip link add {0} type vxlan id {1} dstport 4789 dev {2}'.format( + self.vxlan_nic, + self.vni, + self.vni_dev + ) + ) + os.system( + 'brctl addbr {0}'.format( + self.bridge_nic + ) + ) + os.system( + 'brctl addif {0} {1}'.format( + self.bridge_nic + self.vxlan_nic + ) + ) + os.system( + 'ip link set {0} up'.format( + self.vxlan_nic + ) + ) + os.system( + 'ip link set {0} up'.format( + self.bridge_nic + ) + ) + + def removeNetwork(self): + ansiiprint.echo('Removing VNI {} device on interface {}'.format(self.vni, self.vni_dev), '', 'o') + os.system( + 'ip link set {0} down'.format( + self.bridge_nic + ) + ) + os.system( + 'ip link set {0} down'.format( + self.vxlan_nic + ) + ) + os.system( + 'brctl delif {0} {1}'.format( + self.bridge_nic, + self.vxlan_nic + ) + ) + os.system( + 'brctl delbr {0}'.format( + self.bridge_nic + ) + ) + os.system( + 'ip link delete {0}'.format( + self.vxlan_nic + ) + ) + + def updateCorosyncResource(self): + if self.corosync_provisioned and self.watch_change: + # Rebuild the resource + removeCorosyncResource() + createCorosyncResource() + + def provision(self): + createNetwork() + createCorosyncConfig() + self.update_timer.start() + + def deprovision(self): + self.update_timer.shutdown() + removeCorosyncConfig() diff --git a/router-daemon/pvcrd/__init__.py b/router-daemon/pvcrd/__init__.py new file mode 100644 index 00000000..e69de29b