Move to YAML config and allow split functions

1. Move to a YAML-based configuration format instead of the original
   INI-based configuration to facilitate better organization and
   readability.
2. Modify the daemon to be able to operate in several modes based
   on configuration flags. Either networking or storage functions
   can be disabled using the configuration, allowing the PVC system
   to be used only for hypervisor management if required.
This commit is contained in:
Joshua Boniface 2019-03-11 01:44:26 -04:00
parent 994315afa3
commit d90fb07240
11 changed files with 570 additions and 545 deletions

View File

@ -1 +1,2 @@
client-cli/pvc.py usr/share/pvc client-cli/pvc.py usr/share/pvc
client-cli/pvc_init.py usr/share/pvc

View File

@ -1,4 +1,4 @@
node-daemon/pvcd.py usr/share/pvc node-daemon/pvcd.py usr/share/pvc
node-daemon/pvcd.service lib/systemd/system node-daemon/pvcd.service lib/systemd/system
node-daemon/pvcd.conf.sample etc/pvc node-daemon/pvcd.sample.yaml etc/pvc
node-daemon/pvcd usr/share/pvc node-daemon/pvcd usr/share/pvc

2
debian/rules vendored
View File

@ -6,6 +6,8 @@
%: %:
dh $@ dh $@
override_dh_auto_clean:
find . -name "__pycache__" -exec rm -r {} \; || true
# If you need to rebuild the Sphinx documentation # If you need to rebuild the Sphinx documentation
# Add spinxdoc to the dh --with line # Add spinxdoc to the dh --with line

View File

@ -1 +1 @@
3.0 (quilt) 1.0

View File

@ -1,98 +0,0 @@
# pvcd cluster configuration file example
#
# This configuration file specifies details for this node in PVC. Multiple node
# blocks can be added but only the one matching the current system nodename will
# be used by the local daemon. Default values are not supported; the values in
# this sample configuration are considered defaults and, with adjustment of the
# nodename section and coordinators list, can be used as-is on a Debian system.
#
# The following values are required for each node or in a default section:
# coordinators: a CSV list of the short hostnames of the coordinator nodes; these nodes become
# members of the Zookeeper cluster, can act as routers, and perform additional
# special functions in a cluster; ideally there are 3 coordinators, though 5
# coordinators are supported
# cluster_domain: the node cluster domain, set during bootstrap
# storage_domain: the node storage domain, set during bootstrap
# dynamic_directory: the ramdisk directory for PVC to store its dynamic configurations,
# usually under /run or /var/run
# log_directory: the logging directory, usually under /var/log
# file_logging = whether to log daemon to a file (pvc.log under log_directory) in addition to
# normal stdout printing
# keepalive_interval: the interval between keepalives and for dead node timeout (defaults to 5)
# fence_intervals: the number of keepalive_intervals without Zookeeper contact before this node
# will consider another node dead and fence it (defaults to 6, i.e. 30s)
# suicide_intervals: the number of keepalive_intervals without Zookeeper contact before this
# node will consider itself failed and terminate all running VMs (defaults
# to 0, i.e. disabled); should be less than "fence_intervals"
# successful_fence: the action to take on a successful fencing operation; can be "none" or
# "migrate" (defaults to "migrate")
# failed_fence: the action to take on a failed fencing operation; can be "none" or "migrate"
# (defaults to "none"); "migrate" requires "suicide_intervals" to be set)
# NOTE: POTENTIALLY DANGEROUS - see README for details
# migration_target_selector: the method to use to select target nodes during a virtual machine
# flush action; can be "mem", "load", "vcpus", or "vms" (defaults
# to "mem"); the best choice based on this field is selected for
# each VM to be migrated
# pdns_mysql_host: the host address (usually "localhost") of the PowerDNS zone aggregator
# backend database
# pdns_mysql_port: the port (usually "3306") of the PowerDNS zone aggregator backend database
# pdns_mysql_dbname: the database name (usually "pvcdns") of the PowerDNS zone aggregator
# backend database
# pdns_mysql_user: the client username (usually "pvcdns") of the PowerDNS zone aggregator
# backend database
# pdns_mysql_password: the client user password (randomly generated at cluster bootstrap)
# of the PowerDNS zone aggregator backend database
# vni_floating_ip: the IP address (in CIDR format) for the floating IP on the VNI network,
# used to provide a consistent view of the dynamic primary node to other
# machines in the VNI network, e.g. for slaving DNS or sending in routes.
# upstream_floating_ip: the IP address (in CIDR format) for the floating IP on the upstream
# network, used to provide a consistent view of the dynamic primary
# node to machines in the upstream network, e.g. for slaving DNS or
# sending in routes.
# The following values are required for each node specifically (usually node-unique):
# vni_dev: the lower-level network device to bind VNI traffic to
# vni_dev_ip: the IP address (in CIDR format) of the lower-level network device, used by frr
# to communicate between nodes and pass routes between them.
# storage_dev: the lower-level network device to bind storage traffic to
# storage_dev_ip: the IP address (in CIDR format) of the lower-level network device, used by
# Ceph for storage traffic (both monitor and OSD).
# upstream_dev: the lower-level network device to bind coordinator upstream traffic to
# upstream_dev_ip: the IP address (in CIDR format) of the upstream network device, used by
# the system for upstream traffic flow.
# ipmi_hostname: the IPMI hostname for fencing (defaults to <shortname>-lom.<domain>)
# ipmi_username: username to connect to IPMI
# ipmi_password: password to connect to IPMI
#
# Copy this example to /etc/pvc/pvcd.conf and edit to your needs
[default]
coordinators = pvc-hv1,pvc-hv2,pvc-hv3
cluster_domain = i.bonilan.net
storage_domain = sx.bonilan.net
dynamic_directory = /run/pvc
log_directory = /var/log/pvc
file_logging = True
keepalive_interval = 5
fence_intervals = 6
suicide_intervals = 0
successful_fence = migrate
failed_fence = none
migration_target_selector = mem
pdns_mysql_host = localhost
pdns_mysql_port = 3306
pdns_mysql_dbname = pvcdns
pdns_mysql_user = pvcdns
pdns_mysql_password = pvcdns
vni_floating_ip = 10.255.0.254/24
upstream_floating_ip = 10.101.0.30/24
[pvc-hv1]
vni_dev = ens4
vni_dev_ip = 10.255.0.1/24
storage_dev = ens4
storage_dev_ip = 10.254.0.1/24
upstream_dev = ens2
upstream_dev_ip = 10.101.0.31/24
ipmi_username = admin
ipmi_password = Passw0rd
ipmi_hostname = pvc-hv1-lom

View File

@ -11,6 +11,15 @@
pvc: pvc:
# node: The (short) hostname of the node, set during provisioning # node: The (short) hostname of the node, set during provisioning
node: pvc-hv1 node: pvc-hv1
# functions: The daemon functions to enable
functions:
# enable_hypervisor: Enable or disable hypervisor functionality
# This should never be False except in very advanced usecases
enable_hypervisor: True
# enable_networking: Enable or disable virtual networking and routing functionality
enable_networking: True
# enable_storage: Enable or disable Ceph storage management functionality
enable_storage: True
# cluster: Cluster-level configuration # cluster: Cluster-level configuration
cluster: cluster:
# coordinators: The list of cluster coordinator hostnames # coordinators: The list of cluster coordinator hostnames
@ -19,6 +28,7 @@ pvc:
- pvc-hv2 - pvc-hv2
- pvc-hv3 - pvc-hv3
# networks: Cluster-level network configuration # networks: Cluster-level network configuration
# OPTIONAL if enable_networking: False
networks: networks:
# upstream: Upstream routed network for in- and out-bound upstream networking # upstream: Upstream routed network for in- and out-bound upstream networking
upstream: upstream:
@ -43,8 +53,9 @@ pvc:
# network: Cluster storage network block # network: Cluster storage network block
network: "10.254.0.0/24" network: "10.254.0.0/24"
# floating_ip: Cluster storage floating IP address for the primary coordinator # floating_ip: Cluster storage floating IP address for the primary coordinator
floating_ip: "10.255.0.254/24" floating_ip: "10.254.0.254/24"
# coordinator: Coordinator-specific configuration # coordinator: Coordinator-specific configuration
# OPTIONAL if enable_networking: False
coordinator: coordinator:
# dns: DNS aggregator subsystem # dns: DNS aggregator subsystem
dns: dns:
@ -105,6 +116,7 @@ pvc:
# stdout_logging: Enable or disable logging to stdout (i.e. journald) # stdout_logging: Enable or disable logging to stdout (i.e. journald)
stdout_logging: True stdout_logging: True
# networking: PVC networking configuration # networking: PVC networking configuration
# OPTIONAL if enable_networking: False
networking: networking:
# devices: Interface devices configuration # devices: Interface devices configuration
devices: devices:

View File

@ -7,7 +7,7 @@ After = network-online.target libvirtd.service zookeeper.service mariadb.service
Type = simple Type = simple
WorkingDirectory = /usr/share/pvc WorkingDirectory = /usr/share/pvc
Environment = PYTHONUNBUFFERED=true Environment = PYTHONUNBUFFERED=true
Environment = PVCD_CONFIG_FILE=/etc/pvc/pvcd.conf Environment = PVCD_CONFIG_FILE=/etc/pvc/pvcd.yaml
ExecStart = /usr/share/pvc/pvcd.py ExecStart = /usr/share/pvc/pvcd.py
Restart = never Restart = never

View File

@ -39,7 +39,9 @@ import time
import re import re
import configparser import configparser
import threading import threading
import yaml
import json import json
import ipaddress
import apscheduler.schedulers.background import apscheduler.schedulers.background
import pvcd.log as log import pvcd.log as log
@ -105,7 +107,10 @@ myfqdn = socket.gethostname()
#myfqdn = 'pvc-hv1.domain.net' #myfqdn = 'pvc-hv1.domain.net'
myhostname = myfqdn.split('.', 1)[0] myhostname = myfqdn.split('.', 1)[0]
mydomainname = ''.join(myfqdn.split('.', 1)[1:]) mydomainname = ''.join(myfqdn.split('.', 1)[1:])
mynodeid = re.findall(r'\d+', myhostname)[-1] try:
mynodeid = re.findall(r'\d+', myhostname)[-1]
except IndexError:
mynodeid = 1
# Gather useful data about our host # Gather useful data about our host
# Static data format: 'cpu_count', 'arch', 'os', 'kernel' # Static data format: 'cpu_count', 'arch', 'os', 'kernel'
@ -115,64 +120,116 @@ staticdata.append(subprocess.run(['uname', '-r'], stdout=subprocess.PIPE).stdout
staticdata.append(subprocess.run(['uname', '-o'], stdout=subprocess.PIPE).stdout.decode('ascii').strip()) staticdata.append(subprocess.run(['uname', '-o'], stdout=subprocess.PIPE).stdout.decode('ascii').strip())
staticdata.append(subprocess.run(['uname', '-m'], stdout=subprocess.PIPE).stdout.decode('ascii').strip()) staticdata.append(subprocess.run(['uname', '-m'], stdout=subprocess.PIPE).stdout.decode('ascii').strip())
# Config values dictionary
config_values = [
'coordinators',
'cluster_domain',
'storage_domain',
'dynamic_directory',
'log_directory',
'file_logging',
'keepalive_interval',
'fence_intervals',
'suicide_intervals',
'successful_fence',
'failed_fence',
'migration_target_selector',
'pdns_mysql_host',# = 'localhost'
'pdns_mysql_port',# = 3306
'pdns_mysql_dbname',# = 'pvcdns'
'pdns_mysql_user',# = 'pvcdns'
'pdns_mysql_password',# = 'pvcdns'
'vni_dev',
'vni_dev_ip',
'vni_floating_ip',
'storage_dev',
'storage_dev_ip',
'upstream_dev',
'upstream_dev_ip',
'upstream_floating_ip',
'ipmi_hostname',
'ipmi_username',
'ipmi_password'
]
# Read and parse the config file # Read and parse the config file
def readConfig(pvcd_config_file, myhostname): def readConfig(pvcd_config_file, myhostname):
print('Loading configuration from file "{}"'.format(pvcd_config_file)) print('Loading configuration from file "{}"'.format(pvcd_config_file))
o_config = configparser.ConfigParser() with open(pvcd_config_file, 'r') as cfgfile:
o_config.read(pvcd_config_file)
config = {}
try: try:
entries = o_config[myhostname] o_config = yaml.load(cfgfile)
except:
try:
entries = o_config['default']
except Exception as e: except Exception as e:
print('ERROR: Config file is not valid!') print('ERROR: Failed to parse configuration file: {}'.format(e))
exit(1) exit(1)
for entry in config_values: # Handle the basic config (hypervisor-only)
try: try:
config[entry] = entries[entry] config_general = {
except: 'coordinators': o_config['pvc']['cluster']['coordinators'],
try: 'enable_hypervisor': o_config['pvc']['functions']['enable_hypervisor'],
config[entry] = o_config['default'][entry] 'enable_networking': o_config['pvc']['functions']['enable_networking'],
except: 'enable_storage': o_config['pvc']['functions']['enable_storage'],
print('ERROR: Config file missing required value "{}" for this host!'.format(entry)) 'dynamic_directory': o_config['pvc']['system']['configuration']['directories']['dynamic_directory'],
'log_directory': o_config['pvc']['system']['configuration']['directories']['log_directory'],
'file_logging': o_config['pvc']['system']['configuration']['logging']['file_logging'],
'stdout_logging': o_config['pvc']['system']['configuration']['logging']['stdout_logging'],
'keepalive_interval': o_config['pvc']['system']['fencing']['intervals']['keepalive_interval'],
'fence_intervals': o_config['pvc']['system']['fencing']['intervals']['fence_intervals'],
'suicide_intervals': o_config['pvc']['system']['fencing']['intervals']['suicide_intervals'],
'successful_fence': o_config['pvc']['system']['fencing']['actions']['successful_fence'],
'failed_fence': o_config['pvc']['system']['fencing']['actions']['failed_fence'],
'migration_target_selector': o_config['pvc']['system']['migration']['target_selector'],
'ipmi_hostname': o_config['pvc']['system']['fencing']['ipmi']['host'],
'ipmi_username': o_config['pvc']['system']['fencing']['ipmi']['user'],
'ipmi_password': o_config['pvc']['system']['fencing']['ipmi']['pass']
}
except Exception as e:
print('ERROR: {}!'.format(e))
exit(1) exit(1)
config = config_general
# Handle the networking config
if config['enable_networking']:
try:
config_networking = {
'cluster_domain': o_config['pvc']['cluster']['networks']['cluster']['domain'],
'vni_floating_ip': o_config['pvc']['cluster']['networks']['cluster']['floating_ip'],
'vni_network': o_config['pvc']['cluster']['networks']['cluster']['network'],
'storage_domain': o_config['pvc']['cluster']['networks']['storage']['domain'],
'storage_floating_ip': o_config['pvc']['cluster']['networks']['storage']['floating_ip'],
'storage_network': o_config['pvc']['cluster']['networks']['storage']['network'],
'upstream_domain': o_config['pvc']['cluster']['networks']['upstream']['domain'],
'upstream_floating_ip': o_config['pvc']['cluster']['networks']['upstream']['floating_ip'],
'upstream_network': o_config['pvc']['cluster']['networks']['upstream']['network'],
'pdns_mysql_host': o_config['pvc']['coordinator']['dns']['database']['host'],
'pdns_mysql_port': o_config['pvc']['coordinator']['dns']['database']['port'],
'pdns_mysql_dbname': o_config['pvc']['coordinator']['dns']['database']['name'],
'pdns_mysql_user': o_config['pvc']['coordinator']['dns']['database']['user'],
'pdns_mysql_password': o_config['pvc']['coordinator']['dns']['database']['pass'],
'vni_dev': o_config['pvc']['system']['configuration']['networking']['devices']['cluster'],
'vni_dev_ip': o_config['pvc']['system']['configuration']['networking']['addresses']['cluster'],
'storage_dev': o_config['pvc']['system']['configuration']['networking']['devices']['storage'],
'storage_dev_ip': o_config['pvc']['system']['configuration']['networking']['addresses']['storage'],
'upstream_dev': o_config['pvc']['system']['configuration']['networking']['devices']['upstream'],
'upstream_dev_ip': o_config['pvc']['system']['configuration']['networking']['addresses']['upstream'],
}
except Exception as e:
print('ERROR: {}!'.format(e))
exit(1)
config = {**config, **config_networking}
# Create the by-id address entries
for net in [ 'vni',
'storage',
'upstream' ]:
address_key = '{}_dev_ip'.format(net)
floating_key = '{}_floating_ip'.format(net)
network_key = '{}_network'.format(net)
# Verify the network provided is valid
try:
network = ipaddress.ip_network(config[network_key])
except Exception as e:
print('ERROR: Network address {} for {} is not valid!'.format(config[network_key], network_key))
exit(1)
# If we should be autoselected
if config[address_key] == 'by-id':
# Construct an IP from the relevant network
# The NodeID starts at 1, but indexes start at 0
address_id = int(mynodeid) - 1
# Grab the nth address from the network
config[address_key] = list(network.hosts())[address_id]
# Verify that the floating IP is valid
try:
# Set the ipaddr
floating_addr = ipaddress.ip_address(config[floating_key].split('/')[0])
# Verify we're in the network
if not floating_addr in list(network.hosts()):
raise
except Exception as e:
print('ERROR: Floating address {} for {} is not valid!'.format(config[floating_key], floating_key))
exit(1)
# Handle the storage config
if config['enable_storage']:
try:
config_storage = dict()
except Exception as e:
print('ERROR: {}!'.format(e))
exit(1)
config = {**config, **config_storage}
# Handle an empty ipmi_hostname # Handle an empty ipmi_hostname
if config['ipmi_hostname'] == '': if config['ipmi_hostname'] == '':
@ -183,6 +240,11 @@ def readConfig(pvcd_config_file, myhostname):
# Get the config object from readConfig() # Get the config object from readConfig()
config = readConfig(pvcd_config_file, myhostname) config = readConfig(pvcd_config_file, myhostname)
# Handle the enable values
enable_hypervisor = config['enable_hypervisor']
enable_networking = config['enable_networking']
enable_storage = config['enable_storage']
############################################################################### ###############################################################################
# PHASE 1b - Prepare filesystem directories # PHASE 1b - Prepare filesystem directories
############################################################################### ###############################################################################
@ -242,32 +304,33 @@ logger.out('Starting pvcd on host {}'.format(myfqdn), state='s')
# PHASE 1d - Prepare sysctl for pvcd # PHASE 1d - Prepare sysctl for pvcd
############################################################################### ###############################################################################
# Enable routing functions if enable_networking:
common.run_os_command('sysctl net.ipv4.ip_forward=1') # Enable routing functions
common.run_os_command('sysctl net.ipv6.ip_forward=1') common.run_os_command('sysctl net.ipv4.ip_forward=1')
common.run_os_command('sysctl net.ipv6.ip_forward=1')
# Send redirects # Send redirects
common.run_os_command('sysctl net.ipv4.conf.all.send_redirects=1') common.run_os_command('sysctl net.ipv4.conf.all.send_redirects=1')
common.run_os_command('sysctl net.ipv4.conf.default.send_redirects=1') common.run_os_command('sysctl net.ipv4.conf.default.send_redirects=1')
common.run_os_command('sysctl net.ipv6.conf.all.send_redirects=1') common.run_os_command('sysctl net.ipv6.conf.all.send_redirects=1')
common.run_os_command('sysctl net.ipv6.conf.default.send_redirects=1') common.run_os_command('sysctl net.ipv6.conf.default.send_redirects=1')
# Accept source routes # Accept source routes
common.run_os_command('sysctl net.ipv4.conf.all.accept_source_route=1') common.run_os_command('sysctl net.ipv4.conf.all.accept_source_route=1')
common.run_os_command('sysctl net.ipv4.conf.default.accept_source_route=1') common.run_os_command('sysctl net.ipv4.conf.default.accept_source_route=1')
common.run_os_command('sysctl net.ipv6.conf.all.accept_source_route=1') common.run_os_command('sysctl net.ipv6.conf.all.accept_source_route=1')
common.run_os_command('sysctl net.ipv6.conf.default.accept_source_route=1') common.run_os_command('sysctl net.ipv6.conf.default.accept_source_route=1')
# Disable RP filtering on the VNI dev interface (to allow traffic pivoting from primary) # Disable RP filtering on the VNI dev interface (to allow traffic pivoting from primary)
common.run_os_command('sysctl net.ipv4.conf.{}.rp_filter=0'.format(config['vni_dev'])) common.run_os_command('sysctl net.ipv4.conf.{}.rp_filter=0'.format(config['vni_dev']))
common.run_os_command('sysctl net.ipv6.conf.{}.rp_filter=0'.format(config['vni_dev'])) common.run_os_command('sysctl net.ipv6.conf.{}.rp_filter=0'.format(config['vni_dev']))
############################################################################### ###############################################################################
# PHASE 2 - Determine coordinator mode and start Zookeeper on coordinators # PHASE 2 - Determine coordinator mode and start Zookeeper on coordinators
############################################################################### ###############################################################################
# What is the list of coordinator hosts # What is the list of coordinator hosts
coordinator_nodes = config['coordinators'].split(',') coordinator_nodes = config['coordinators']
if myhostname in coordinator_nodes: if myhostname in coordinator_nodes:
# We are indeed a coordinator host # We are indeed a coordinator host
@ -412,7 +475,11 @@ else:
transaction.commit() transaction.commit()
# Check that the primary key exists, and create it with us as master if not # Check that the primary key exists, and create it with us as master if not
current_primary = zkhandler.readdata(zk_conn, '/primary_node') try:
current_primary = zkhandler.readdata(zk_conn, '/primary_node')
except kazoo.exceptions.NoNodeError:
current_primary = 'none'
if current_primary and current_primary != 'none': if current_primary and current_primary != 'none':
logger.out('Current primary node is {}{}{}.'.format(logger.fmt_blue, current_primary, logger.fmt_end), state='i') logger.out('Current primary node is {}{}{}.'.format(logger.fmt_blue, current_primary, logger.fmt_end), state='i')
else: else:
@ -424,30 +491,31 @@ else:
# PHASE 6 - Create local IP addresses for static networks # PHASE 6 - Create local IP addresses for static networks
############################################################################### ###############################################################################
# VNI configuration if enable_networking:
vni_dev = config['vni_dev'] # VNI configuration
vni_dev_ip = config['vni_dev_ip'] vni_dev = config['vni_dev']
logger.out('Setting up VNI network interface {}'.format(vni_dev, vni_dev_ip), state='i') vni_dev_ip = config['vni_dev_ip']
common.run_os_command('ip link set {} mtu 9000 up'.format(vni_dev)) logger.out('Setting up VNI network interface {}'.format(vni_dev, vni_dev_ip), state='i')
common.run_os_command('ip link set {} mtu 9000 up'.format(vni_dev))
# Cluster bridge configuration # Cluster bridge configuration
logger.out('Setting up cluster network bridge on interface {} with IP {}'.format(vni_dev, vni_dev_ip), state='i') logger.out('Setting up cluster network bridge on interface {} with IP {}'.format(vni_dev, vni_dev_ip), state='i')
common.run_os_command('brctl addbr brcluster') common.run_os_command('brctl addbr brcluster')
common.run_os_command('brctl addif brcluster {}'.format(vni_dev)) common.run_os_command('brctl addif brcluster {}'.format(vni_dev))
common.run_os_command('ip link set brcluster mtu 9000 up') common.run_os_command('ip link set brcluster mtu 9000 up')
common.run_os_command('ip address add {} dev {}'.format(vni_dev_ip, 'brcluster')) common.run_os_command('ip address add {} dev {}'.format(vni_dev_ip, 'brcluster'))
# Storage configuration # Storage configuration
storage_dev = config['storage_dev'] storage_dev = config['storage_dev']
if storage_dev == vni_dev: if storage_dev == vni_dev:
storage_dev = 'brcluster' storage_dev = 'brcluster'
storage_dev_ip = config['storage_dev_ip'] storage_dev_ip = config['storage_dev_ip']
logger.out('Setting up Storage network on interface {} with IP {}'.format(storage_dev, storage_dev_ip), state='i') logger.out('Setting up Storage network on interface {} with IP {}'.format(storage_dev, storage_dev_ip), state='i')
common.run_os_command('ip link set {} mtu 9000 up'.format(storage_dev)) common.run_os_command('ip link set {} mtu 9000 up'.format(storage_dev))
common.run_os_command('ip address add {} dev {}'.format(storage_dev_ip, storage_dev)) common.run_os_command('ip address add {} dev {}'.format(storage_dev_ip, storage_dev))
# Upstream configuration # Upstream configuration
if config['daemon_mode'] == 'coordinator': if config['daemon_mode'] == 'coordinator':
upstream_dev = config['upstream_dev'] upstream_dev = config['upstream_dev']
upstream_dev_ip = config['upstream_dev_ip'] upstream_dev_ip = config['upstream_dev_ip']
logger.out('Setting up Upstream network on interface {} with IP {}'.format(upstream_dev, upstream_dev_ip), state='i') logger.out('Setting up Upstream network on interface {} with IP {}'.format(upstream_dev, upstream_dev_ip), state='i')
@ -458,18 +526,19 @@ if config['daemon_mode'] == 'coordinator':
# PHASE 7a - Ensure Libvirt is running on the local host # PHASE 7a - Ensure Libvirt is running on the local host
############################################################################### ###############################################################################
# Start the zookeeper service using systemctl if enable_hypervisor:
logger.out('Starting Libvirt daemon', state='i') # Start the zookeeper service using systemctl
common.run_os_command('systemctl start libvirtd.service') logger.out('Starting Libvirt daemon', state='i')
time.sleep(1) common.run_os_command('systemctl start libvirtd.service')
time.sleep(1)
# Check that libvirtd is listening TCP # Check that libvirtd is listening TCP
libvirt_check_name = "qemu+tcp://127.0.0.1:16509/system" libvirt_check_name = "qemu+tcp://127.0.0.1:16509/system"
logger.out('Connecting to Libvirt daemon at {}'.format(libvirt_check_name), state='i') logger.out('Connecting to Libvirt daemon at {}'.format(libvirt_check_name), state='i')
try: try:
lv_conn = libvirt.open(libvirt_check_name) lv_conn = libvirt.open(libvirt_check_name)
lv_conn.close() lv_conn.close()
except Exception as e: except Exception as e:
logger.out('ERROR: Failed to connect to Libvirt daemon: {}'.format(e), state='e') logger.out('ERROR: Failed to connect to Libvirt daemon: {}'.format(e), state='e')
exit(1) exit(1)
@ -477,7 +546,8 @@ except Exception as e:
# PHASE 7b - Ensure Ceph is running on the local host # PHASE 7b - Ensure Ceph is running on the local host
############################################################################### ###############################################################################
if config['daemon_mode'] == 'coordinator': if enable_storage:
if config['daemon_mode'] == 'coordinator':
common.run_os_command('systemctl start ceph-mon@{}'.format(myhostname)) common.run_os_command('systemctl start ceph-mon@{}'.format(myhostname))
common.run_os_command('systemctl start ceph-mgr@{}'.format(myhostname)) common.run_os_command('systemctl start ceph-mgr@{}'.format(myhostname))
@ -485,44 +555,45 @@ if config['daemon_mode'] == 'coordinator':
# PHASE 7c - Ensure NFT is running on the local host # PHASE 7c - Ensure NFT is running on the local host
############################################################################### ###############################################################################
logger.out("Creating NFT firewall configuration", state='i') if enable_networking:
logger.out("Creating NFT firewall configuration", state='i')
# Create our config dirs # Create our config dirs
common.run_os_command( common.run_os_command(
'/bin/mkdir --parents {}/networks'.format( '/bin/mkdir --parents {}/networks'.format(
config['nft_dynamic_directory'] config['nft_dynamic_directory']
) )
) )
common.run_os_command( common.run_os_command(
'/bin/mkdir --parents {}/static'.format( '/bin/mkdir --parents {}/static'.format(
config['nft_dynamic_directory'] config['nft_dynamic_directory']
) )
) )
common.run_os_command( common.run_os_command(
'/bin/mkdir --parents {}'.format( '/bin/mkdir --parents {}'.format(
config['nft_dynamic_directory'] config['nft_dynamic_directory']
) )
) )
# Set up the basic features of the nftables firewall # Set up the basic features of the nftables firewall
nftables_base_rules = """# Base rules nftables_base_rules = """# Base rules
flush ruleset flush ruleset
# Add the filter table and chains # Add the filter table and chains
add table inet filter add table inet filter
add chain inet filter forward {{ type filter hook forward priority 0; }} add chain inet filter forward {{ type filter hook forward priority 0; }}
add chain inet filter input {{ type filter hook input priority 0; }} add chain inet filter input {{ type filter hook input priority 0; }}
# Include static rules and network rules # Include static rules and network rules
include "{rulesdir}/static/*" include "{rulesdir}/static/*"
include "{rulesdir}/networks/*" include "{rulesdir}/networks/*"
""".format( """.format(
rulesdir=config['nft_dynamic_directory'] rulesdir=config['nft_dynamic_directory']
) )
# Write the basic firewall config # Write the basic firewall config
nftables_base_filename = '{}/base.nft'.format(config['nft_dynamic_directory']) nftables_base_filename = '{}/base.nft'.format(config['nft_dynamic_directory'])
with open(nftables_base_filename, 'w') as nfbasefile: with open(nftables_base_filename, 'w') as nfbasefile:
nfbasefile.write(nftables_base_rules) nfbasefile.write(nftables_base_rules)
common.reload_firewall_rules(logger, nftables_base_filename) common.reload_firewall_rules(logger, nftables_base_filename)
############################################################################### ###############################################################################
# PHASE 7d - Ensure DNSMASQ is not running # PHASE 7d - Ensure DNSMASQ is not running
@ -547,9 +618,12 @@ domain_list = []
osd_list = [] osd_list = []
pool_list = [] pool_list = []
# Create an instance of the DNS Aggregator if we're a coordinator if enable_networking:
if config['daemon_mode'] == 'coordinator': # Create an instance of the DNS Aggregator if we're a coordinator
if config['daemon_mode'] == 'coordinator':
dns_aggregator = DNSAggregatorInstance.DNSAggregatorInstance(zk_conn, config, logger) dns_aggregator = DNSAggregatorInstance.DNSAggregatorInstance(zk_conn, config, logger)
else:
dns_aggregator = None
else: else:
dns_aggregator = None dns_aggregator = None
@ -605,9 +679,10 @@ def update_primary(new_primary, stat, event=''):
for node in d_node: for node in d_node:
d_node[node].primary_node = new_primary d_node[node].primary_node = new_primary
# Network objects if enable_networking:
@zk_conn.ChildrenWatch('/networks') # Network objects
def update_networks(new_network_list): @zk_conn.ChildrenWatch('/networks')
def update_networks(new_network_list):
global network_list, d_network global network_list, d_network
# Add any missing networks to the list # Add any missing networks to the list
@ -644,9 +719,10 @@ def update_networks(new_network_list):
for node in d_node: for node in d_node:
d_node[node].update_network_list(d_network) d_node[node].update_network_list(d_network)
# VM domain objects if enable_hypervisor:
@zk_conn.ChildrenWatch('/domains') # VM domain objects
def update_domains(new_domain_list): @zk_conn.ChildrenWatch('/domains')
def update_domains(new_domain_list):
global domain_list, d_domain global domain_list, d_domain
# Add any missing domains to the list # Add any missing domains to the list
@ -668,9 +744,10 @@ def update_domains(new_domain_list):
for node in d_node: for node in d_node:
d_node[node].update_domain_list(d_domain) d_node[node].update_domain_list(d_domain)
# Ceph OSD provisioning key if enable_storage:
@zk_conn.DataWatch('/ceph/cmd') # Ceph OSD provisioning key
def cmd(data, stat, event=''): @zk_conn.DataWatch('/ceph/cmd')
def cmd(data, stat, event=''):
if data: if data:
data = data.decode('ascii') data = data.decode('ascii')
else: else:
@ -679,9 +756,9 @@ def cmd(data, stat, event=''):
if data: if data:
CephInstance.run_command(zk_conn, data, d_osd) CephInstance.run_command(zk_conn, data, d_osd)
# OSD objects # OSD objects
@zk_conn.ChildrenWatch('/ceph/osds') @zk_conn.ChildrenWatch('/ceph/osds')
def update_osds(new_osd_list): def update_osds(new_osd_list):
global osd_list, d_osd global osd_list, d_osd
# Add any missing OSDs to the list # Add any missing OSDs to the list
@ -699,9 +776,9 @@ def update_osds(new_osd_list):
osd_list = new_osd_list osd_list = new_osd_list
logger.out('{}OSD list:{} {}'.format(logger.fmt_blue, logger.fmt_end, ' '.join(osd_list)), state='i') logger.out('{}OSD list:{} {}'.format(logger.fmt_blue, logger.fmt_end, ' '.join(osd_list)), state='i')
# Pool objects # Pool objects
@zk_conn.ChildrenWatch('/ceph/pools') @zk_conn.ChildrenWatch('/ceph/pools')
def update_pools(new_pool_list): def update_pools(new_pool_list):
global pool_list, d_pool global pool_list, d_pool
# Add any missing Pools to the list # Add any missing Pools to the list
@ -742,6 +819,7 @@ def update_zookeeper():
if zkhandler.readdata(zk_conn, '/primary_node') != this_node.name: if zkhandler.readdata(zk_conn, '/primary_node') != this_node.name:
zkhandler.writedata(zk_conn, {'/primary_node': this_node.name}) zkhandler.writedata(zk_conn, {'/primary_node': this_node.name})
if enable_storage:
# Get Ceph cluster health (for local printing) # Get Ceph cluster health (for local printing)
if debug: if debug:
print("Get Ceph cluster health (for local printing)") print("Get Ceph cluster health (for local printing)")
@ -882,12 +960,12 @@ def update_zookeeper():
}) })
osds_this_node += 1 osds_this_node += 1
memalloc = 0
vcpualloc = 0
if enable_hypervisor:
# Toggle state management of dead VMs to restart them # Toggle state management of dead VMs to restart them
if debug: if debug:
print("Toggle state management of dead VMs to restart them") print("Toggle state management of dead VMs to restart them")
memalloc = 0
vcpualloc = 0
for domain, instance in this_node.d_domain.items(): for domain, instance in this_node.d_domain.items():
if domain in this_node.domain_list: if domain in this_node.domain_list:
# Add the allocated memory to our memalloc value # Add the allocated memory to our memalloc value
@ -929,7 +1007,10 @@ def update_zookeeper():
this_node.memalloc = memalloc this_node.memalloc = memalloc
this_node.vcpualloc = vcpualloc this_node.vcpualloc = vcpualloc
this_node.cpuload = os.getloadavg()[0] this_node.cpuload = os.getloadavg()[0]
if enable_hypervisor:
this_node.domains_count = len(lv_conn.listDomainsID()) this_node.domains_count = len(lv_conn.listDomainsID())
else:
this_node.domains_count = 0
keepalive_time = int(time.time()) keepalive_time = int(time.time())
try: try:
zkhandler.writedata(zk_conn, { zkhandler.writedata(zk_conn, {
@ -946,6 +1027,7 @@ def update_zookeeper():
logger.out('Failed to set keepalive data', state='e') logger.out('Failed to set keepalive data', state='e')
return return
if enable_hypervisor:
# Close the Libvirt connection # Close the Libvirt connection
lv_conn.close() lv_conn.close()
@ -1005,6 +1087,7 @@ def update_zookeeper():
netcount=len(network_list) netcount=len(network_list)
), ),
) )
if enable_storage:
logger.out( logger.out(
'{bold}Ceph cluster status:{nofmt} {health_colour}{health}{nofmt} ' '{bold}Ceph cluster status:{nofmt} {health_colour}{health}{nofmt} '
'{bold}Total OSDs:{nofmt} {total_osds} ' '{bold}Total OSDs:{nofmt} {total_osds} '

View File

@ -66,10 +66,18 @@ class NodeInstance(object):
self.memalloc = 0 self.memalloc = 0
self.vcpualloc = 0 self.vcpualloc = 0
# Floating upstreams # Floating upstreams
if self.config['enable_networking']:
self.vni_dev = self.config['vni_dev'] self.vni_dev = self.config['vni_dev']
self.vni_ipaddr, self.vni_cidrnetmask = self.config['vni_floating_ip'].split('/') self.vni_ipaddr, self.vni_cidrnetmask = self.config['vni_floating_ip'].split('/')
self.upstream_dev = self.config['upstream_dev'] self.upstream_dev = self.config['upstream_dev']
self.upstream_ipaddr, self.upstream_cidrnetmask = self.config['upstream_floating_ip'].split('/') self.upstream_ipaddr, self.upstream_cidrnetmask = self.config['upstream_floating_ip'].split('/')
else:
self.vni_dev = None
self.vni_ipaddr = None
self.vni_cidrnetmask = None
self.upstream_dev = None
self.upstream_ipaddr = None
self.upstream_cidrnetmask = None
# Flags # Flags
self.inflush = False self.inflush = False
@ -240,6 +248,7 @@ class NodeInstance(object):
# Routing primary/secondary states # Routing primary/secondary states
def become_secondary(self): def become_secondary(self):
if self.config['enable_networking']:
self.logger.out('Setting router {} to secondary state'.format(self.name), state='i') self.logger.out('Setting router {} to secondary state'.format(self.name), state='i')
self.logger.out('Network list: {}'.format(', '.join(self.network_list))) self.logger.out('Network list: {}'.format(', '.join(self.network_list)))
time.sleep(1) time.sleep(1)
@ -250,6 +259,7 @@ class NodeInstance(object):
self.dns_aggregator.stop_aggregator() self.dns_aggregator.stop_aggregator()
def become_primary(self): def become_primary(self):
if self.config['enable_networking']:
self.logger.out('Setting router {} to primary state'.format(self.name), state='i') self.logger.out('Setting router {} to primary state'.format(self.name), state='i')
self.logger.out('Network list: {}'.format(', '.join(self.network_list))) self.logger.out('Network list: {}'.format(', '.join(self.network_list)))
self.createFloatingAddresses() self.createFloatingAddresses()

13
node-daemon/pvcd/fixrbdlocks Executable file
View File

@ -0,0 +1,13 @@
#!/bin/bash
for disk in $( sudo rbd list ${BLSE_STORAGE_POOL_VM} | grep "^${vm}" ); do
echo -e " Disk: $disk"
locks="$( sudo rbd lock list ${BLSE_STORAGE_POOL_VM}/${disk} | grep '^client' )"
echo "${locks}"
if [[ -n "${locks}" ]]; then
echo -e " LOCK FOUND! Clearing."
locker="$( awk '{ print $1 }' <<<"${locks}" )"
id="$( awk '{ print $2" "$3 }' <<<"${locks}" )"
sudo rbd lock remove ${BLSE_STORAGE_POOL_VM}/${disk} "${id}" "${locker}"
fi
done

View File

@ -45,6 +45,8 @@ class Logger(object):
# We open the logfile for the duration of our session, but have a hup function # We open the logfile for the duration of our session, but have a hup function
self.writer = open(self.logfile, 'a', buffering=1) self.writer = open(self.logfile, 'a', buffering=1)
self.last_colour = self.fmt_cyan self.last_colour = self.fmt_cyan
else:
self.last_colour = ""
# Provide a hup function to close and reopen the writer # Provide a hup function to close and reopen the writer
def hup(self): def hup(self):