diff --git a/debian/pvc-client-cli.install b/debian/pvc-client-cli.install index b2ee9a60..91df889f 100644 --- a/debian/pvc-client-cli.install +++ b/debian/pvc-client-cli.install @@ -1 +1,2 @@ client-cli/pvc.py usr/share/pvc +client-cli/pvc_init.py usr/share/pvc diff --git a/debian/pvc-daemon.install b/debian/pvc-daemon.install index 067abc15..e7183947 100644 --- a/debian/pvc-daemon.install +++ b/debian/pvc-daemon.install @@ -1,4 +1,4 @@ node-daemon/pvcd.py usr/share/pvc node-daemon/pvcd.service lib/systemd/system -node-daemon/pvcd.conf.sample etc/pvc +node-daemon/pvcd.sample.yaml etc/pvc node-daemon/pvcd usr/share/pvc diff --git a/debian/rules b/debian/rules index da4dcf81..c155d70f 100755 --- a/debian/rules +++ b/debian/rules @@ -6,6 +6,8 @@ %: dh $@ +override_dh_auto_clean: + find . -name "__pycache__" -exec rm -r {} \; || true # If you need to rebuild the Sphinx documentation # Add spinxdoc to the dh --with line diff --git a/debian/source/format b/debian/source/format index 163aaf8d..d3827e75 100644 --- a/debian/source/format +++ b/debian/source/format @@ -1 +1 @@ -3.0 (quilt) +1.0 diff --git a/node-daemon/pvcd.conf.sample b/node-daemon/pvcd.conf.sample deleted file mode 100644 index d6646ddb..00000000 --- a/node-daemon/pvcd.conf.sample +++ /dev/null @@ -1,98 +0,0 @@ -# pvcd cluster configuration file example -# -# This configuration file specifies details for this node in PVC. Multiple node -# blocks can be added but only the one matching the current system nodename will -# be used by the local daemon. Default values are not supported; the values in -# this sample configuration are considered defaults and, with adjustment of the -# nodename section and coordinators list, can be used as-is on a Debian system. -# -# The following values are required for each node or in a default section: -# coordinators: a CSV list of the short hostnames of the coordinator nodes; these nodes become -# members of the Zookeeper cluster, can act as routers, and perform additional -# special functions in a cluster; ideally there are 3 coordinators, though 5 -# coordinators are supported -# cluster_domain: the node cluster domain, set during bootstrap -# storage_domain: the node storage domain, set during bootstrap -# dynamic_directory: the ramdisk directory for PVC to store its dynamic configurations, -# usually under /run or /var/run -# log_directory: the logging directory, usually under /var/log -# file_logging = whether to log daemon to a file (pvc.log under log_directory) in addition to -# normal stdout printing -# keepalive_interval: the interval between keepalives and for dead node timeout (defaults to 5) -# fence_intervals: the number of keepalive_intervals without Zookeeper contact before this node -# will consider another node dead and fence it (defaults to 6, i.e. 30s) -# suicide_intervals: the number of keepalive_intervals without Zookeeper contact before this -# node will consider itself failed and terminate all running VMs (defaults -# to 0, i.e. disabled); should be less than "fence_intervals" -# successful_fence: the action to take on a successful fencing operation; can be "none" or -# "migrate" (defaults to "migrate") -# failed_fence: the action to take on a failed fencing operation; can be "none" or "migrate" -# (defaults to "none"); "migrate" requires "suicide_intervals" to be set) -# NOTE: POTENTIALLY DANGEROUS - see README for details -# migration_target_selector: the method to use to select target nodes during a virtual machine -# flush action; can be "mem", "load", "vcpus", or "vms" (defaults -# to "mem"); the best choice based on this field is selected for -# each VM to be migrated -# pdns_mysql_host: the host address (usually "localhost") of the PowerDNS zone aggregator -# backend database -# pdns_mysql_port: the port (usually "3306") of the PowerDNS zone aggregator backend database -# pdns_mysql_dbname: the database name (usually "pvcdns") of the PowerDNS zone aggregator -# backend database -# pdns_mysql_user: the client username (usually "pvcdns") of the PowerDNS zone aggregator -# backend database -# pdns_mysql_password: the client user password (randomly generated at cluster bootstrap) -# of the PowerDNS zone aggregator backend database -# vni_floating_ip: the IP address (in CIDR format) for the floating IP on the VNI network, -# used to provide a consistent view of the dynamic primary node to other -# machines in the VNI network, e.g. for slaving DNS or sending in routes. -# upstream_floating_ip: the IP address (in CIDR format) for the floating IP on the upstream -# network, used to provide a consistent view of the dynamic primary -# node to machines in the upstream network, e.g. for slaving DNS or -# sending in routes. -# The following values are required for each node specifically (usually node-unique): -# vni_dev: the lower-level network device to bind VNI traffic to -# vni_dev_ip: the IP address (in CIDR format) of the lower-level network device, used by frr -# to communicate between nodes and pass routes between them. -# storage_dev: the lower-level network device to bind storage traffic to -# storage_dev_ip: the IP address (in CIDR format) of the lower-level network device, used by -# Ceph for storage traffic (both monitor and OSD). -# upstream_dev: the lower-level network device to bind coordinator upstream traffic to -# upstream_dev_ip: the IP address (in CIDR format) of the upstream network device, used by -# the system for upstream traffic flow. -# ipmi_hostname: the IPMI hostname for fencing (defaults to -lom.) -# ipmi_username: username to connect to IPMI -# ipmi_password: password to connect to IPMI -# -# Copy this example to /etc/pvc/pvcd.conf and edit to your needs - -[default] -coordinators = pvc-hv1,pvc-hv2,pvc-hv3 -cluster_domain = i.bonilan.net -storage_domain = sx.bonilan.net -dynamic_directory = /run/pvc -log_directory = /var/log/pvc -file_logging = True -keepalive_interval = 5 -fence_intervals = 6 -suicide_intervals = 0 -successful_fence = migrate -failed_fence = none -migration_target_selector = mem -pdns_mysql_host = localhost -pdns_mysql_port = 3306 -pdns_mysql_dbname = pvcdns -pdns_mysql_user = pvcdns -pdns_mysql_password = pvcdns -vni_floating_ip = 10.255.0.254/24 -upstream_floating_ip = 10.101.0.30/24 - -[pvc-hv1] -vni_dev = ens4 -vni_dev_ip = 10.255.0.1/24 -storage_dev = ens4 -storage_dev_ip = 10.254.0.1/24 -upstream_dev = ens2 -upstream_dev_ip = 10.101.0.31/24 -ipmi_username = admin -ipmi_password = Passw0rd -ipmi_hostname = pvc-hv1-lom diff --git a/node-daemon/pvcd.sample.yaml b/node-daemon/pvcd.sample.yaml index 64654da3..9a4bf83b 100644 --- a/node-daemon/pvcd.sample.yaml +++ b/node-daemon/pvcd.sample.yaml @@ -11,6 +11,15 @@ pvc: # node: The (short) hostname of the node, set during provisioning node: pvc-hv1 + # functions: The daemon functions to enable + functions: + # enable_hypervisor: Enable or disable hypervisor functionality + # This should never be False except in very advanced usecases + enable_hypervisor: True + # enable_networking: Enable or disable virtual networking and routing functionality + enable_networking: True + # enable_storage: Enable or disable Ceph storage management functionality + enable_storage: True # cluster: Cluster-level configuration cluster: # coordinators: The list of cluster coordinator hostnames @@ -19,6 +28,7 @@ pvc: - pvc-hv2 - pvc-hv3 # networks: Cluster-level network configuration + # OPTIONAL if enable_networking: False networks: # upstream: Upstream routed network for in- and out-bound upstream networking upstream: @@ -43,8 +53,9 @@ pvc: # network: Cluster storage network block network: "10.254.0.0/24" # floating_ip: Cluster storage floating IP address for the primary coordinator - floating_ip: "10.255.0.254/24" + floating_ip: "10.254.0.254/24" # coordinator: Coordinator-specific configuration + # OPTIONAL if enable_networking: False coordinator: # dns: DNS aggregator subsystem dns: @@ -105,6 +116,7 @@ pvc: # stdout_logging: Enable or disable logging to stdout (i.e. journald) stdout_logging: True # networking: PVC networking configuration + # OPTIONAL if enable_networking: False networking: # devices: Interface devices configuration devices: diff --git a/node-daemon/pvcd.service b/node-daemon/pvcd.service index f0769caf..8c2fbc27 100644 --- a/node-daemon/pvcd.service +++ b/node-daemon/pvcd.service @@ -7,7 +7,7 @@ After = network-online.target libvirtd.service zookeeper.service mariadb.service Type = simple WorkingDirectory = /usr/share/pvc Environment = PYTHONUNBUFFERED=true -Environment = PVCD_CONFIG_FILE=/etc/pvc/pvcd.conf +Environment = PVCD_CONFIG_FILE=/etc/pvc/pvcd.yaml ExecStart = /usr/share/pvc/pvcd.py Restart = never diff --git a/node-daemon/pvcd/Daemon.py b/node-daemon/pvcd/Daemon.py index 4174b331..461582bf 100644 --- a/node-daemon/pvcd/Daemon.py +++ b/node-daemon/pvcd/Daemon.py @@ -39,7 +39,9 @@ import time import re import configparser import threading +import yaml import json +import ipaddress import apscheduler.schedulers.background import pvcd.log as log @@ -105,7 +107,10 @@ myfqdn = socket.gethostname() #myfqdn = 'pvc-hv1.domain.net' myhostname = myfqdn.split('.', 1)[0] mydomainname = ''.join(myfqdn.split('.', 1)[1:]) -mynodeid = re.findall(r'\d+', myhostname)[-1] +try: + mynodeid = re.findall(r'\d+', myhostname)[-1] +except IndexError: + mynodeid = 1 # Gather useful data about our host # Static data format: 'cpu_count', 'arch', 'os', 'kernel' @@ -115,64 +120,116 @@ staticdata.append(subprocess.run(['uname', '-r'], stdout=subprocess.PIPE).stdout staticdata.append(subprocess.run(['uname', '-o'], stdout=subprocess.PIPE).stdout.decode('ascii').strip()) staticdata.append(subprocess.run(['uname', '-m'], stdout=subprocess.PIPE).stdout.decode('ascii').strip()) -# Config values dictionary -config_values = [ - 'coordinators', - 'cluster_domain', - 'storage_domain', - 'dynamic_directory', - 'log_directory', - 'file_logging', - 'keepalive_interval', - 'fence_intervals', - 'suicide_intervals', - 'successful_fence', - 'failed_fence', - 'migration_target_selector', - 'pdns_mysql_host',# = 'localhost' - 'pdns_mysql_port',# = 3306 - 'pdns_mysql_dbname',# = 'pvcdns' - 'pdns_mysql_user',# = 'pvcdns' - 'pdns_mysql_password',# = 'pvcdns' - 'vni_dev', - 'vni_dev_ip', - 'vni_floating_ip', - 'storage_dev', - 'storage_dev_ip', - 'upstream_dev', - 'upstream_dev_ip', - 'upstream_floating_ip', - 'ipmi_hostname', - 'ipmi_username', - 'ipmi_password' -] - # Read and parse the config file def readConfig(pvcd_config_file, myhostname): print('Loading configuration from file "{}"'.format(pvcd_config_file)) - o_config = configparser.ConfigParser() - o_config.read(pvcd_config_file) - config = {} - - try: - entries = o_config[myhostname] - except: + with open(pvcd_config_file, 'r') as cfgfile: try: - entries = o_config['default'] + o_config = yaml.load(cfgfile) except Exception as e: - print('ERROR: Config file is not valid!') + print('ERROR: Failed to parse configuration file: {}'.format(e)) exit(1) - for entry in config_values: + # Handle the basic config (hypervisor-only) + try: + config_general = { + 'coordinators': o_config['pvc']['cluster']['coordinators'], + 'enable_hypervisor': o_config['pvc']['functions']['enable_hypervisor'], + 'enable_networking': o_config['pvc']['functions']['enable_networking'], + 'enable_storage': o_config['pvc']['functions']['enable_storage'], + 'dynamic_directory': o_config['pvc']['system']['configuration']['directories']['dynamic_directory'], + 'log_directory': o_config['pvc']['system']['configuration']['directories']['log_directory'], + 'file_logging': o_config['pvc']['system']['configuration']['logging']['file_logging'], + 'stdout_logging': o_config['pvc']['system']['configuration']['logging']['stdout_logging'], + 'keepalive_interval': o_config['pvc']['system']['fencing']['intervals']['keepalive_interval'], + 'fence_intervals': o_config['pvc']['system']['fencing']['intervals']['fence_intervals'], + 'suicide_intervals': o_config['pvc']['system']['fencing']['intervals']['suicide_intervals'], + 'successful_fence': o_config['pvc']['system']['fencing']['actions']['successful_fence'], + 'failed_fence': o_config['pvc']['system']['fencing']['actions']['failed_fence'], + 'migration_target_selector': o_config['pvc']['system']['migration']['target_selector'], + 'ipmi_hostname': o_config['pvc']['system']['fencing']['ipmi']['host'], + 'ipmi_username': o_config['pvc']['system']['fencing']['ipmi']['user'], + 'ipmi_password': o_config['pvc']['system']['fencing']['ipmi']['pass'] + } + except Exception as e: + print('ERROR: {}!'.format(e)) + exit(1) + config = config_general + + # Handle the networking config + if config['enable_networking']: try: - config[entry] = entries[entry] - except: + config_networking = { + 'cluster_domain': o_config['pvc']['cluster']['networks']['cluster']['domain'], + 'vni_floating_ip': o_config['pvc']['cluster']['networks']['cluster']['floating_ip'], + 'vni_network': o_config['pvc']['cluster']['networks']['cluster']['network'], + 'storage_domain': o_config['pvc']['cluster']['networks']['storage']['domain'], + 'storage_floating_ip': o_config['pvc']['cluster']['networks']['storage']['floating_ip'], + 'storage_network': o_config['pvc']['cluster']['networks']['storage']['network'], + 'upstream_domain': o_config['pvc']['cluster']['networks']['upstream']['domain'], + 'upstream_floating_ip': o_config['pvc']['cluster']['networks']['upstream']['floating_ip'], + 'upstream_network': o_config['pvc']['cluster']['networks']['upstream']['network'], + 'pdns_mysql_host': o_config['pvc']['coordinator']['dns']['database']['host'], + 'pdns_mysql_port': o_config['pvc']['coordinator']['dns']['database']['port'], + 'pdns_mysql_dbname': o_config['pvc']['coordinator']['dns']['database']['name'], + 'pdns_mysql_user': o_config['pvc']['coordinator']['dns']['database']['user'], + 'pdns_mysql_password': o_config['pvc']['coordinator']['dns']['database']['pass'], + 'vni_dev': o_config['pvc']['system']['configuration']['networking']['devices']['cluster'], + 'vni_dev_ip': o_config['pvc']['system']['configuration']['networking']['addresses']['cluster'], + 'storage_dev': o_config['pvc']['system']['configuration']['networking']['devices']['storage'], + 'storage_dev_ip': o_config['pvc']['system']['configuration']['networking']['addresses']['storage'], + 'upstream_dev': o_config['pvc']['system']['configuration']['networking']['devices']['upstream'], + 'upstream_dev_ip': o_config['pvc']['system']['configuration']['networking']['addresses']['upstream'], + } + except Exception as e: + print('ERROR: {}!'.format(e)) + exit(1) + config = {**config, **config_networking} + + # Create the by-id address entries + for net in [ 'vni', + 'storage', + 'upstream' ]: + address_key = '{}_dev_ip'.format(net) + floating_key = '{}_floating_ip'.format(net) + network_key = '{}_network'.format(net) + + # Verify the network provided is valid try: - config[entry] = o_config['default'][entry] - except: - print('ERROR: Config file missing required value "{}" for this host!'.format(entry)) + network = ipaddress.ip_network(config[network_key]) + except Exception as e: + print('ERROR: Network address {} for {} is not valid!'.format(config[network_key], network_key)) exit(1) + + # If we should be autoselected + if config[address_key] == 'by-id': + # Construct an IP from the relevant network + # The NodeID starts at 1, but indexes start at 0 + address_id = int(mynodeid) - 1 + # Grab the nth address from the network + config[address_key] = list(network.hosts())[address_id] + + # Verify that the floating IP is valid + + try: + # Set the ipaddr + floating_addr = ipaddress.ip_address(config[floating_key].split('/')[0]) + # Verify we're in the network + if not floating_addr in list(network.hosts()): + raise + except Exception as e: + print('ERROR: Floating address {} for {} is not valid!'.format(config[floating_key], floating_key)) + exit(1) + + # Handle the storage config + if config['enable_storage']: + try: + config_storage = dict() + except Exception as e: + print('ERROR: {}!'.format(e)) + exit(1) + config = {**config, **config_storage} # Handle an empty ipmi_hostname if config['ipmi_hostname'] == '': @@ -182,6 +239,11 @@ def readConfig(pvcd_config_file, myhostname): # Get the config object from readConfig() config = readConfig(pvcd_config_file, myhostname) + +# Handle the enable values +enable_hypervisor = config['enable_hypervisor'] +enable_networking = config['enable_networking'] +enable_storage = config['enable_storage'] ############################################################################### # PHASE 1b - Prepare filesystem directories @@ -242,32 +304,33 @@ logger.out('Starting pvcd on host {}'.format(myfqdn), state='s') # PHASE 1d - Prepare sysctl for pvcd ############################################################################### -# Enable routing functions -common.run_os_command('sysctl net.ipv4.ip_forward=1') -common.run_os_command('sysctl net.ipv6.ip_forward=1') - -# Send redirects -common.run_os_command('sysctl net.ipv4.conf.all.send_redirects=1') -common.run_os_command('sysctl net.ipv4.conf.default.send_redirects=1') -common.run_os_command('sysctl net.ipv6.conf.all.send_redirects=1') -common.run_os_command('sysctl net.ipv6.conf.default.send_redirects=1') - -# Accept source routes -common.run_os_command('sysctl net.ipv4.conf.all.accept_source_route=1') -common.run_os_command('sysctl net.ipv4.conf.default.accept_source_route=1') -common.run_os_command('sysctl net.ipv6.conf.all.accept_source_route=1') -common.run_os_command('sysctl net.ipv6.conf.default.accept_source_route=1') - -# Disable RP filtering on the VNI dev interface (to allow traffic pivoting from primary) -common.run_os_command('sysctl net.ipv4.conf.{}.rp_filter=0'.format(config['vni_dev'])) -common.run_os_command('sysctl net.ipv6.conf.{}.rp_filter=0'.format(config['vni_dev'])) +if enable_networking: + # Enable routing functions + common.run_os_command('sysctl net.ipv4.ip_forward=1') + common.run_os_command('sysctl net.ipv6.ip_forward=1') + + # Send redirects + common.run_os_command('sysctl net.ipv4.conf.all.send_redirects=1') + common.run_os_command('sysctl net.ipv4.conf.default.send_redirects=1') + common.run_os_command('sysctl net.ipv6.conf.all.send_redirects=1') + common.run_os_command('sysctl net.ipv6.conf.default.send_redirects=1') + + # Accept source routes + common.run_os_command('sysctl net.ipv4.conf.all.accept_source_route=1') + common.run_os_command('sysctl net.ipv4.conf.default.accept_source_route=1') + common.run_os_command('sysctl net.ipv6.conf.all.accept_source_route=1') + common.run_os_command('sysctl net.ipv6.conf.default.accept_source_route=1') + + # Disable RP filtering on the VNI dev interface (to allow traffic pivoting from primary) + common.run_os_command('sysctl net.ipv4.conf.{}.rp_filter=0'.format(config['vni_dev'])) + common.run_os_command('sysctl net.ipv6.conf.{}.rp_filter=0'.format(config['vni_dev'])) ############################################################################### # PHASE 2 - Determine coordinator mode and start Zookeeper on coordinators ############################################################################### # What is the list of coordinator hosts -coordinator_nodes = config['coordinators'].split(',') +coordinator_nodes = config['coordinators'] if myhostname in coordinator_nodes: # We are indeed a coordinator host @@ -412,7 +475,11 @@ else: transaction.commit() # Check that the primary key exists, and create it with us as master if not -current_primary = zkhandler.readdata(zk_conn, '/primary_node') +try: + current_primary = zkhandler.readdata(zk_conn, '/primary_node') +except kazoo.exceptions.NoNodeError: + current_primary = 'none' + if current_primary and current_primary != 'none': logger.out('Current primary node is {}{}{}.'.format(logger.fmt_blue, current_primary, logger.fmt_end), state='i') else: @@ -424,105 +491,109 @@ else: # PHASE 6 - Create local IP addresses for static networks ############################################################################### -# VNI configuration -vni_dev = config['vni_dev'] -vni_dev_ip = config['vni_dev_ip'] -logger.out('Setting up VNI network interface {}'.format(vni_dev, vni_dev_ip), state='i') -common.run_os_command('ip link set {} mtu 9000 up'.format(vni_dev)) +if enable_networking: + # VNI configuration + vni_dev = config['vni_dev'] + vni_dev_ip = config['vni_dev_ip'] + logger.out('Setting up VNI network interface {}'.format(vni_dev, vni_dev_ip), state='i') + common.run_os_command('ip link set {} mtu 9000 up'.format(vni_dev)) -# Cluster bridge configuration -logger.out('Setting up cluster network bridge on interface {} with IP {}'.format(vni_dev, vni_dev_ip), state='i') -common.run_os_command('brctl addbr brcluster') -common.run_os_command('brctl addif brcluster {}'.format(vni_dev)) -common.run_os_command('ip link set brcluster mtu 9000 up') -common.run_os_command('ip address add {} dev {}'.format(vni_dev_ip, 'brcluster')) + # Cluster bridge configuration + logger.out('Setting up cluster network bridge on interface {} with IP {}'.format(vni_dev, vni_dev_ip), state='i') + common.run_os_command('brctl addbr brcluster') + common.run_os_command('brctl addif brcluster {}'.format(vni_dev)) + common.run_os_command('ip link set brcluster mtu 9000 up') + common.run_os_command('ip address add {} dev {}'.format(vni_dev_ip, 'brcluster')) -# Storage configuration -storage_dev = config['storage_dev'] -if storage_dev == vni_dev: - storage_dev = 'brcluster' -storage_dev_ip = config['storage_dev_ip'] -logger.out('Setting up Storage network on interface {} with IP {}'.format(storage_dev, storage_dev_ip), state='i') -common.run_os_command('ip link set {} mtu 9000 up'.format(storage_dev)) -common.run_os_command('ip address add {} dev {}'.format(storage_dev_ip, storage_dev)) + # Storage configuration + storage_dev = config['storage_dev'] + if storage_dev == vni_dev: + storage_dev = 'brcluster' + storage_dev_ip = config['storage_dev_ip'] + logger.out('Setting up Storage network on interface {} with IP {}'.format(storage_dev, storage_dev_ip), state='i') + common.run_os_command('ip link set {} mtu 9000 up'.format(storage_dev)) + common.run_os_command('ip address add {} dev {}'.format(storage_dev_ip, storage_dev)) -# Upstream configuration -if config['daemon_mode'] == 'coordinator': - upstream_dev = config['upstream_dev'] - upstream_dev_ip = config['upstream_dev_ip'] - logger.out('Setting up Upstream network on interface {} with IP {}'.format(upstream_dev, upstream_dev_ip), state='i') - common.run_os_command('ip link set {} up'.format(upstream_dev)) - common.run_os_command('ip address add {} dev {}'.format(upstream_dev_ip, upstream_dev)) + # Upstream configuration + if config['daemon_mode'] == 'coordinator': + upstream_dev = config['upstream_dev'] + upstream_dev_ip = config['upstream_dev_ip'] + logger.out('Setting up Upstream network on interface {} with IP {}'.format(upstream_dev, upstream_dev_ip), state='i') + common.run_os_command('ip link set {} up'.format(upstream_dev)) + common.run_os_command('ip address add {} dev {}'.format(upstream_dev_ip, upstream_dev)) ############################################################################### # PHASE 7a - Ensure Libvirt is running on the local host ############################################################################### -# Start the zookeeper service using systemctl -logger.out('Starting Libvirt daemon', state='i') -common.run_os_command('systemctl start libvirtd.service') -time.sleep(1) +if enable_hypervisor: + # Start the zookeeper service using systemctl + logger.out('Starting Libvirt daemon', state='i') + common.run_os_command('systemctl start libvirtd.service') + time.sleep(1) -# Check that libvirtd is listening TCP -libvirt_check_name = "qemu+tcp://127.0.0.1:16509/system" -logger.out('Connecting to Libvirt daemon at {}'.format(libvirt_check_name), state='i') -try: - lv_conn = libvirt.open(libvirt_check_name) - lv_conn.close() -except Exception as e: - logger.out('ERROR: Failed to connect to Libvirt daemon: {}'.format(e), state='e') - exit(1) + # Check that libvirtd is listening TCP + libvirt_check_name = "qemu+tcp://127.0.0.1:16509/system" + logger.out('Connecting to Libvirt daemon at {}'.format(libvirt_check_name), state='i') + try: + lv_conn = libvirt.open(libvirt_check_name) + lv_conn.close() + except Exception as e: + logger.out('ERROR: Failed to connect to Libvirt daemon: {}'.format(e), state='e') + exit(1) ############################################################################### # PHASE 7b - Ensure Ceph is running on the local host ############################################################################### -if config['daemon_mode'] == 'coordinator': - common.run_os_command('systemctl start ceph-mon@{}'.format(myhostname)) - common.run_os_command('systemctl start ceph-mgr@{}'.format(myhostname)) +if enable_storage: + if config['daemon_mode'] == 'coordinator': + common.run_os_command('systemctl start ceph-mon@{}'.format(myhostname)) + common.run_os_command('systemctl start ceph-mgr@{}'.format(myhostname)) ############################################################################### # PHASE 7c - Ensure NFT is running on the local host ############################################################################### -logger.out("Creating NFT firewall configuration", state='i') +if enable_networking: + logger.out("Creating NFT firewall configuration", state='i') -# Create our config dirs -common.run_os_command( - '/bin/mkdir --parents {}/networks'.format( - config['nft_dynamic_directory'] - ) -) -common.run_os_command( - '/bin/mkdir --parents {}/static'.format( - config['nft_dynamic_directory'] - ) -) -common.run_os_command( - '/bin/mkdir --parents {}'.format( - config['nft_dynamic_directory'] - ) -) + # Create our config dirs + common.run_os_command( + '/bin/mkdir --parents {}/networks'.format( + config['nft_dynamic_directory'] + ) + ) + common.run_os_command( + '/bin/mkdir --parents {}/static'.format( + config['nft_dynamic_directory'] + ) + ) + common.run_os_command( + '/bin/mkdir --parents {}'.format( + config['nft_dynamic_directory'] + ) + ) -# Set up the basic features of the nftables firewall -nftables_base_rules = """# Base rules -flush ruleset -# Add the filter table and chains -add table inet filter -add chain inet filter forward {{ type filter hook forward priority 0; }} -add chain inet filter input {{ type filter hook input priority 0; }} -# Include static rules and network rules -include "{rulesdir}/static/*" -include "{rulesdir}/networks/*" -""".format( - rulesdir=config['nft_dynamic_directory'] -) + # Set up the basic features of the nftables firewall + nftables_base_rules = """# Base rules + flush ruleset + # Add the filter table and chains + add table inet filter + add chain inet filter forward {{ type filter hook forward priority 0; }} + add chain inet filter input {{ type filter hook input priority 0; }} + # Include static rules and network rules + include "{rulesdir}/static/*" + include "{rulesdir}/networks/*" + """.format( + rulesdir=config['nft_dynamic_directory'] + ) -# Write the basic firewall config -nftables_base_filename = '{}/base.nft'.format(config['nft_dynamic_directory']) -with open(nftables_base_filename, 'w') as nfbasefile: - nfbasefile.write(nftables_base_rules) -common.reload_firewall_rules(logger, nftables_base_filename) + # Write the basic firewall config + nftables_base_filename = '{}/base.nft'.format(config['nft_dynamic_directory']) + with open(nftables_base_filename, 'w') as nfbasefile: + nfbasefile.write(nftables_base_rules) + common.reload_firewall_rules(logger, nftables_base_filename) ############################################################################### # PHASE 7d - Ensure DNSMASQ is not running @@ -547,9 +618,12 @@ domain_list = [] osd_list = [] pool_list = [] -# Create an instance of the DNS Aggregator if we're a coordinator -if config['daemon_mode'] == 'coordinator': - dns_aggregator = DNSAggregatorInstance.DNSAggregatorInstance(zk_conn, config, logger) +if enable_networking: + # Create an instance of the DNS Aggregator if we're a coordinator + if config['daemon_mode'] == 'coordinator': + dns_aggregator = DNSAggregatorInstance.DNSAggregatorInstance(zk_conn, config, logger) + else: + dns_aggregator = None else: dns_aggregator = None @@ -605,119 +679,122 @@ def update_primary(new_primary, stat, event=''): for node in d_node: d_node[node].primary_node = new_primary -# Network objects -@zk_conn.ChildrenWatch('/networks') -def update_networks(new_network_list): - global network_list, d_network +if enable_networking: + # Network objects + @zk_conn.ChildrenWatch('/networks') + def update_networks(new_network_list): + global network_list, d_network - # Add any missing networks to the list - for network in new_network_list: - if not network in network_list: - d_network[network] = VXNetworkInstance.VXNetworkInstance(network, zk_conn, config, logger, this_node) - print(network) - if config['daemon_mode'] == 'coordinator': - dns_aggregator.add_network(d_network[network]) - # Start primary functionality - if this_node.router_state == 'primary': - d_network[network].createGateways() - d_network[network].startDHCPServer() + # Add any missing networks to the list + for network in new_network_list: + if not network in network_list: + d_network[network] = VXNetworkInstance.VXNetworkInstance(network, zk_conn, config, logger, this_node) + print(network) + if config['daemon_mode'] == 'coordinator': + dns_aggregator.add_network(d_network[network]) + # Start primary functionality + if this_node.router_state == 'primary': + d_network[network].createGateways() + d_network[network].startDHCPServer() - # Remove any deleted networks from the list - for network in network_list: - if not network in new_network_list: - # Stop primary functionality - if this_node.router_state == 'primary': - d_network[network].stopDHCPServer() - d_network[network].removeGateways() - dns_aggregator.remove_network(d_network[network]) - # Stop general functionality - d_network[network].removeFirewall() - d_network[network].removeNetwork() - # Delete the object - del(d_network[network]) + # Remove any deleted networks from the list + for network in network_list: + if not network in new_network_list: + # Stop primary functionality + if this_node.router_state == 'primary': + d_network[network].stopDHCPServer() + d_network[network].removeGateways() + dns_aggregator.remove_network(d_network[network]) + # Stop general functionality + d_network[network].removeFirewall() + d_network[network].removeNetwork() + # Delete the object + del(d_network[network]) - # Update and print new list - network_list = new_network_list - logger.out('{}Network list:{} {}'.format(logger.fmt_blue, logger.fmt_end, ' '.join(network_list)), state='i') + # Update and print new list + network_list = new_network_list + logger.out('{}Network list:{} {}'.format(logger.fmt_blue, logger.fmt_end, ' '.join(network_list)), state='i') - # Update node objects' list - for node in d_node: - d_node[node].update_network_list(d_network) + # Update node objects' list + for node in d_node: + d_node[node].update_network_list(d_network) -# VM domain objects -@zk_conn.ChildrenWatch('/domains') -def update_domains(new_domain_list): - global domain_list, d_domain +if enable_hypervisor: + # VM domain objects + @zk_conn.ChildrenWatch('/domains') + def update_domains(new_domain_list): + global domain_list, d_domain - # Add any missing domains to the list - for domain in new_domain_list: - if not domain in domain_list: - d_domain[domain] = DomainInstance.DomainInstance(domain, zk_conn, config, logger, this_node) + # Add any missing domains to the list + for domain in new_domain_list: + if not domain in domain_list: + d_domain[domain] = DomainInstance.DomainInstance(domain, zk_conn, config, logger, this_node) - # Remove any deleted domains from the list - for domain in domain_list: - if not domain in new_domain_list: - # Delete the object - del(d_domain[domain]) + # Remove any deleted domains from the list + for domain in domain_list: + if not domain in new_domain_list: + # Delete the object + del(d_domain[domain]) - # Update and print new list - domain_list = new_domain_list - logger.out('{}Domain list:{} {}'.format(logger.fmt_blue, logger.fmt_end, ' '.join(domain_list)), state='i') + # Update and print new list + domain_list = new_domain_list + logger.out('{}Domain list:{} {}'.format(logger.fmt_blue, logger.fmt_end, ' '.join(domain_list)), state='i') - # Update node objects' list - for node in d_node: - d_node[node].update_domain_list(d_domain) + # Update node objects' list + for node in d_node: + d_node[node].update_domain_list(d_domain) -# Ceph OSD provisioning key -@zk_conn.DataWatch('/ceph/cmd') -def cmd(data, stat, event=''): - if data: - data = data.decode('ascii') - else: - data = '' +if enable_storage: + # Ceph OSD provisioning key + @zk_conn.DataWatch('/ceph/cmd') + def cmd(data, stat, event=''): + if data: + data = data.decode('ascii') + else: + data = '' - if data: - CephInstance.run_command(zk_conn, data, d_osd) + if data: + CephInstance.run_command(zk_conn, data, d_osd) -# OSD objects -@zk_conn.ChildrenWatch('/ceph/osds') -def update_osds(new_osd_list): - global osd_list, d_osd + # OSD objects + @zk_conn.ChildrenWatch('/ceph/osds') + def update_osds(new_osd_list): + global osd_list, d_osd - # Add any missing OSDs to the list - for osd in new_osd_list: - if not osd in osd_list: - d_osd[osd] = CephInstance.CephOSDInstance(zk_conn, this_node, osd) + # Add any missing OSDs to the list + for osd in new_osd_list: + if not osd in osd_list: + d_osd[osd] = CephInstance.CephOSDInstance(zk_conn, this_node, osd) - # Remove any deleted OSDs from the list - for osd in osd_list: - if not osd in new_osd_list: - # Delete the object - del(d_osd[osd]) + # Remove any deleted OSDs from the list + for osd in osd_list: + if not osd in new_osd_list: + # Delete the object + del(d_osd[osd]) - # Update and print new list - osd_list = new_osd_list - logger.out('{}OSD list:{} {}'.format(logger.fmt_blue, logger.fmt_end, ' '.join(osd_list)), state='i') + # Update and print new list + osd_list = new_osd_list + logger.out('{}OSD list:{} {}'.format(logger.fmt_blue, logger.fmt_end, ' '.join(osd_list)), state='i') -# Pool objects -@zk_conn.ChildrenWatch('/ceph/pools') -def update_pools(new_pool_list): - global pool_list, d_pool + # Pool objects + @zk_conn.ChildrenWatch('/ceph/pools') + def update_pools(new_pool_list): + global pool_list, d_pool - # Add any missing Pools to the list - for pool in new_pool_list: - if not pool in pool_list: - d_pool[pool] = CephInstance.CephPoolInstance(zk_conn, this_node, pool) + # Add any missing Pools to the list + for pool in new_pool_list: + if not pool in pool_list: + d_pool[pool] = CephInstance.CephPoolInstance(zk_conn, this_node, pool) - # Remove any deleted Pools from the list - for pool in pool_list: - if not pool in new_pool_list: - # Delete the object - del(d_pool[pool]) + # Remove any deleted Pools from the list + for pool in pool_list: + if not pool in new_pool_list: + # Delete the object + del(d_pool[pool]) - # Update and print new list - pool_list = new_pool_list - logger.out('{}Pool list:{} {}'.format(logger.fmt_blue, logger.fmt_end, ' '.join(pool_list)), state='i') + # Update and print new list + pool_list = new_pool_list + logger.out('{}Pool list:{} {}'.format(logger.fmt_blue, logger.fmt_end, ' '.join(pool_list)), state='i') ############################################################################### # PHASE 9 - Run the daemon @@ -742,183 +819,184 @@ def update_zookeeper(): if zkhandler.readdata(zk_conn, '/primary_node') != this_node.name: zkhandler.writedata(zk_conn, {'/primary_node': this_node.name}) - # Get Ceph cluster health (for local printing) - if debug: - print("Get Ceph cluster health (for local printing)") - retcode, stdout, stderr = common.run_os_command('ceph health') - ceph_health = stdout.rstrip() - if 'HEALTH_OK' in ceph_health: - ceph_health_colour = logger.fmt_green - elif 'HEALTH_WARN' in ceph_health: - ceph_health_colour = logger.fmt_yellow - else: - ceph_health_colour = logger.fmt_red - - # Set ceph health information in zookeeper (primary only) - if this_node.router_state == 'primary': + if enable_storage: + # Get Ceph cluster health (for local printing) if debug: - print("Set ceph health information in zookeeper (primary only)") - # Get status info - retcode, stdout, stderr = common.run_os_command('ceph status') - ceph_status = stdout - try: - zkhandler.writedata(zk_conn, { - '/ceph': str(ceph_status) - }) - except: - logger.out('Failed to set Ceph status data', state='e') - return + print("Get Ceph cluster health (for local printing)") + retcode, stdout, stderr = common.run_os_command('ceph health') + ceph_health = stdout.rstrip() + if 'HEALTH_OK' in ceph_health: + ceph_health_colour = logger.fmt_green + elif 'HEALTH_WARN' in ceph_health: + ceph_health_colour = logger.fmt_yellow + else: + ceph_health_colour = logger.fmt_red - # Set pool information in zookeeper (primary only) - if this_node.router_state == 'primary': - if debug: - print("Set pool information in zookeeper (primary only)") - # Get pool info - pool_df = dict() - retcode, stdout, stderr = common.run_os_command('rados df --format json') - pool_df_raw = json.loads(stdout)['pools'] - for pool in pool_df_raw: - pool_df.update({ - str(pool['name']): { - 'id': pool['id'], - 'size_bytes': pool['size_bytes'], - 'num_objects': pool['num_objects'], - 'num_object_clones': pool['num_object_clones'], - 'num_object_copies': pool['num_object_copies'], - 'num_objects_missing_on_primary': pool['num_objects_missing_on_primary'], - 'num_objects_unfound': pool['num_objects_unfound'], - 'num_objects_degraded': pool['num_objects_degraded'], - 'read_ops': pool['read_ops'], - 'read_bytes': pool['read_bytes'], - 'write_ops': pool['write_ops'], - 'write_bytes': pool['write_bytes'] - } - }) + # Set ceph health information in zookeeper (primary only) + if this_node.router_state == 'primary': + if debug: + print("Set ceph health information in zookeeper (primary only)") + # Get status info + retcode, stdout, stderr = common.run_os_command('ceph status') + ceph_status = stdout + try: + zkhandler.writedata(zk_conn, { + '/ceph': str(ceph_status) + }) + except: + logger.out('Failed to set Ceph status data', state='e') + return + + # Set pool information in zookeeper (primary only) + if this_node.router_state == 'primary': + if debug: + print("Set pool information in zookeeper (primary only)") + # Get pool info + pool_df = dict() + retcode, stdout, stderr = common.run_os_command('rados df --format json') + pool_df_raw = json.loads(stdout)['pools'] + for pool in pool_df_raw: + pool_df.update({ + str(pool['name']): { + 'id': pool['id'], + 'size_bytes': pool['size_bytes'], + 'num_objects': pool['num_objects'], + 'num_object_clones': pool['num_object_clones'], + 'num_object_copies': pool['num_object_copies'], + 'num_objects_missing_on_primary': pool['num_objects_missing_on_primary'], + 'num_objects_unfound': pool['num_objects_unfound'], + 'num_objects_degraded': pool['num_objects_degraded'], + 'read_ops': pool['read_ops'], + 'read_bytes': pool['read_bytes'], + 'write_ops': pool['write_ops'], + 'write_bytes': pool['write_bytes'] + } + }) # Trigger updates for each OSD on this node for pool in pool_list: zkhandler.writedata(zk_conn, { '/ceph/pools/{}/stats'.format(pool): str(json.dumps(pool_df[pool])) }) - - # Get data from Ceph OSDs - if debug: - print("Get data from Ceph OSDs") - # Parse the dump data - osd_dump = dict() - retcode, stdout, stderr = common.run_os_command('ceph osd dump --format json') - osd_dump_raw = json.loads(stdout)['osds'] - for osd in osd_dump_raw: - osd_dump.update({ - str(osd['osd']): { - 'uuid': osd['uuid'], - 'up': osd['up'], - 'in': osd['in'], - 'primary_affinity': osd['primary_affinity'] - } - }) - # Parse the df data - osd_df = dict() - retcode, stdout, stderr = common.run_os_command('ceph osd df --format json') - osd_df_raw = json.loads(stdout)['nodes'] - for osd in osd_df_raw: - osd_df.update({ - str(osd['id']): { - 'utilization': osd['utilization'], - 'var': osd['var'], - 'pgs': osd['pgs'], - 'kb': osd['kb'], - 'weight': osd['crush_weight'], - 'reweight': osd['reweight'], - } - }) - # Parse the status data - osd_status = dict() - retcode, stdout, stderr = common.run_os_command('ceph osd status') - for line in stderr.split('\n'): - # Strip off colour - line = re.sub(r'\x1b(\[.*?[@-~]|\].*?(\x07|\x1b\\))', '', line) - # Split it for parsing - line = line.split() - if len(line) > 1 and line[1].isdigit(): - # This is an OSD line so parse it - osd_id = line[1] - node = line[3].split('.')[0] - used = line[5] - avail = line[7] - wr_ops = line[9] - wr_data = line[11] - rd_ops = line[13] - rd_data = line[15] - state = line[17] - osd_status.update({ - str(osd_id): { - 'node': node, - 'used': used, - 'avail': avail, - 'wr_ops': wr_ops, - 'wr_data': wr_data, - 'rd_ops': rd_ops, - 'rd_data': rd_data, - 'state': state + + # Get data from Ceph OSDs + if debug: + print("Get data from Ceph OSDs") + # Parse the dump data + osd_dump = dict() + retcode, stdout, stderr = common.run_os_command('ceph osd dump --format json') + osd_dump_raw = json.loads(stdout)['osds'] + for osd in osd_dump_raw: + osd_dump.update({ + str(osd['osd']): { + 'uuid': osd['uuid'], + 'up': osd['up'], + 'in': osd['in'], + 'primary_affinity': osd['primary_affinity'] } }) - # Merge them together into a single meaningful dict - osd_stats = dict() - for osd in osd_list: - this_dump = osd_dump[osd] - this_dump.update(osd_df[osd]) - this_dump.update(osd_status[osd]) - osd_stats[osd] = this_dump - - # Trigger updates for each OSD on this node - if debug: - print("Trigger updates for each OSD on this node") - osds_this_node = 0 - for osd in osd_list: - if d_osd[osd].node == myhostname: - zkhandler.writedata(zk_conn, { - '/ceph/osds/{}/stats'.format(osd): str(json.dumps(osd_stats[osd])) + # Parse the df data + osd_df = dict() + retcode, stdout, stderr = common.run_os_command('ceph osd df --format json') + osd_df_raw = json.loads(stdout)['nodes'] + for osd in osd_df_raw: + osd_df.update({ + str(osd['id']): { + 'utilization': osd['utilization'], + 'var': osd['var'], + 'pgs': osd['pgs'], + 'kb': osd['kb'], + 'weight': osd['crush_weight'], + 'reweight': osd['reweight'], + } }) - osds_this_node += 1 + # Parse the status data + osd_status = dict() + retcode, stdout, stderr = common.run_os_command('ceph osd status') + for line in stderr.split('\n'): + # Strip off colour + line = re.sub(r'\x1b(\[.*?[@-~]|\].*?(\x07|\x1b\\))', '', line) + # Split it for parsing + line = line.split() + if len(line) > 1 and line[1].isdigit(): + # This is an OSD line so parse it + osd_id = line[1] + node = line[3].split('.')[0] + used = line[5] + avail = line[7] + wr_ops = line[9] + wr_data = line[11] + rd_ops = line[13] + rd_data = line[15] + state = line[17] + osd_status.update({ + str(osd_id): { + 'node': node, + 'used': used, + 'avail': avail, + 'wr_ops': wr_ops, + 'wr_data': wr_data, + 'rd_ops': rd_ops, + 'rd_data': rd_data, + 'state': state + } + }) + # Merge them together into a single meaningful dict + osd_stats = dict() + for osd in osd_list: + this_dump = osd_dump[osd] + this_dump.update(osd_df[osd]) + this_dump.update(osd_status[osd]) + osd_stats[osd] = this_dump + # Trigger updates for each OSD on this node + if debug: + print("Trigger updates for each OSD on this node") + osds_this_node = 0 + for osd in osd_list: + if d_osd[osd].node == myhostname: + zkhandler.writedata(zk_conn, { + '/ceph/osds/{}/stats'.format(osd): str(json.dumps(osd_stats[osd])) + }) + osds_this_node += 1 - # Toggle state management of dead VMs to restart them - if debug: - print("Toggle state management of dead VMs to restart them") memalloc = 0 vcpualloc = 0 - for domain, instance in this_node.d_domain.items(): - if domain in this_node.domain_list: - # Add the allocated memory to our memalloc value - memalloc += instance.getmemory() - vcpualloc += instance.getvcpus() - if instance.getstate() == 'start' and instance.getnode() == this_node.name: - if instance.getdom() != None: - try: - if instance.getdom().state()[0] != libvirt.VIR_DOMAIN_RUNNING: - raise - except Exception as e: - # Toggle a state "change" - zkhandler.writedata(zk_conn, { '/domains/{}/state'.format(domain): instance.getstate() }) - - # Connect to libvirt - if debug: - print("Connect to libvirt") - libvirt_name = "qemu:///system" - lv_conn = libvirt.open(libvirt_name) - if lv_conn == None: - logger.out('Failed to open connection to "{}"'.format(libvirt_name), state='e') - return - - # Ensure that any running VMs are readded to the domain_list - if debug: - print("Ensure that any running VMs are readded to the domain_list") - running_domains = lv_conn.listAllDomains(libvirt.VIR_CONNECT_LIST_DOMAINS_ACTIVE) - for domain in running_domains: - domain_uuid = domain.UUIDString() - if domain_uuid not in this_node.domain_list: - this_node.domain_list.append(domain_uuid) + if enable_hypervisor: + # Toggle state management of dead VMs to restart them + if debug: + print("Toggle state management of dead VMs to restart them") + for domain, instance in this_node.d_domain.items(): + if domain in this_node.domain_list: + # Add the allocated memory to our memalloc value + memalloc += instance.getmemory() + vcpualloc += instance.getvcpus() + if instance.getstate() == 'start' and instance.getnode() == this_node.name: + if instance.getdom() != None: + try: + if instance.getdom().state()[0] != libvirt.VIR_DOMAIN_RUNNING: + raise + except Exception as e: + # Toggle a state "change" + zkhandler.writedata(zk_conn, { '/domains/{}/state'.format(domain): instance.getstate() }) + + # Connect to libvirt + if debug: + print("Connect to libvirt") + libvirt_name = "qemu:///system" + lv_conn = libvirt.open(libvirt_name) + if lv_conn == None: + logger.out('Failed to open connection to "{}"'.format(libvirt_name), state='e') + return + + # Ensure that any running VMs are readded to the domain_list + if debug: + print("Ensure that any running VMs are readded to the domain_list") + running_domains = lv_conn.listAllDomains(libvirt.VIR_CONNECT_LIST_DOMAINS_ACTIVE) + for domain in running_domains: + domain_uuid = domain.UUIDString() + if domain_uuid not in this_node.domain_list: + this_node.domain_list.append(domain_uuid) # Set our information in zookeeper if debug: @@ -929,7 +1007,10 @@ def update_zookeeper(): this_node.memalloc = memalloc this_node.vcpualloc = vcpualloc this_node.cpuload = os.getloadavg()[0] - this_node.domains_count = len(lv_conn.listDomainsID()) + if enable_hypervisor: + this_node.domains_count = len(lv_conn.listDomainsID()) + else: + this_node.domains_count = 0 keepalive_time = int(time.time()) try: zkhandler.writedata(zk_conn, { @@ -946,8 +1027,9 @@ def update_zookeeper(): logger.out('Failed to set keepalive data', state='e') return - # Close the Libvirt connection - lv_conn.close() + if enable_hypervisor: + # Close the Libvirt connection + lv_conn.close() # Look for dead nodes and fence them if debug: @@ -1005,20 +1087,21 @@ def update_zookeeper(): netcount=len(network_list) ), ) - logger.out( - '{bold}Ceph cluster status:{nofmt} {health_colour}{health}{nofmt} ' - '{bold}Total OSDs:{nofmt} {total_osds} ' - '{bold}Node OSDs:{nofmt} {node_osds} ' - '{bold}Pools:{nofmt} {total_pools} '.format( - bold=logger.fmt_bold, - health_colour=ceph_health_colour, - nofmt=logger.fmt_end, - health=ceph_health, - total_osds=len(osd_list), - node_osds=osds_this_node, - total_pools=len(pool_list) - ), - ) + if enable_storage: + logger.out( + '{bold}Ceph cluster status:{nofmt} {health_colour}{health}{nofmt} ' + '{bold}Total OSDs:{nofmt} {total_osds} ' + '{bold}Node OSDs:{nofmt} {node_osds} ' + '{bold}Pools:{nofmt} {total_pools} '.format( + bold=logger.fmt_bold, + health_colour=ceph_health_colour, + nofmt=logger.fmt_end, + health=ceph_health, + total_osds=len(osd_list), + node_osds=osds_this_node, + total_pools=len(pool_list) + ), + ) # Start keepalive thread and immediately update Zookeeper diff --git a/node-daemon/pvcd/NodeInstance.py b/node-daemon/pvcd/NodeInstance.py index 60b0064d..d4b98f16 100644 --- a/node-daemon/pvcd/NodeInstance.py +++ b/node-daemon/pvcd/NodeInstance.py @@ -66,10 +66,18 @@ class NodeInstance(object): self.memalloc = 0 self.vcpualloc = 0 # Floating upstreams - self.vni_dev = self.config['vni_dev'] - self.vni_ipaddr, self.vni_cidrnetmask = self.config['vni_floating_ip'].split('/') - self.upstream_dev = self.config['upstream_dev'] - self.upstream_ipaddr, self.upstream_cidrnetmask = self.config['upstream_floating_ip'].split('/') + if self.config['enable_networking']: + self.vni_dev = self.config['vni_dev'] + self.vni_ipaddr, self.vni_cidrnetmask = self.config['vni_floating_ip'].split('/') + self.upstream_dev = self.config['upstream_dev'] + self.upstream_ipaddr, self.upstream_cidrnetmask = self.config['upstream_floating_ip'].split('/') + else: + self.vni_dev = None + self.vni_ipaddr = None + self.vni_cidrnetmask = None + self.upstream_dev = None + self.upstream_ipaddr = None + self.upstream_cidrnetmask = None # Flags self.inflush = False @@ -240,25 +248,27 @@ class NodeInstance(object): # Routing primary/secondary states def become_secondary(self): - self.logger.out('Setting router {} to secondary state'.format(self.name), state='i') - self.logger.out('Network list: {}'.format(', '.join(self.network_list))) - time.sleep(1) - for network in self.d_network: - self.d_network[network].stopDHCPServer() - self.d_network[network].removeGateways() - self.removeFloatingAddresses() - self.dns_aggregator.stop_aggregator() + if self.config['enable_networking']: + self.logger.out('Setting router {} to secondary state'.format(self.name), state='i') + self.logger.out('Network list: {}'.format(', '.join(self.network_list))) + time.sleep(1) + for network in self.d_network: + self.d_network[network].stopDHCPServer() + self.d_network[network].removeGateways() + self.removeFloatingAddresses() + self.dns_aggregator.stop_aggregator() def become_primary(self): - self.logger.out('Setting router {} to primary state'.format(self.name), state='i') - self.logger.out('Network list: {}'.format(', '.join(self.network_list))) - self.createFloatingAddresses() - # Start up the gateways and DHCP servers - for network in self.d_network: - self.d_network[network].createGateways() - self.d_network[network].startDHCPServer() - time.sleep(0.5) - self.dns_aggregator.start_aggregator() + if self.config['enable_networking']: + self.logger.out('Setting router {} to primary state'.format(self.name), state='i') + self.logger.out('Network list: {}'.format(', '.join(self.network_list))) + self.createFloatingAddresses() + # Start up the gateways and DHCP servers + for network in self.d_network: + self.d_network[network].createGateways() + self.d_network[network].startDHCPServer() + time.sleep(0.5) + self.dns_aggregator.start_aggregator() def createFloatingAddresses(self): # VNI floating IP diff --git a/node-daemon/pvcd/fixrbdlocks b/node-daemon/pvcd/fixrbdlocks new file mode 100755 index 00000000..ad9b9da5 --- /dev/null +++ b/node-daemon/pvcd/fixrbdlocks @@ -0,0 +1,13 @@ +#!/bin/bash + +for disk in $( sudo rbd list ${BLSE_STORAGE_POOL_VM} | grep "^${vm}" ); do + echo -e " Disk: $disk" + locks="$( sudo rbd lock list ${BLSE_STORAGE_POOL_VM}/${disk} | grep '^client' )" + echo "${locks}" + if [[ -n "${locks}" ]]; then + echo -e " LOCK FOUND! Clearing." + locker="$( awk '{ print $1 }' <<<"${locks}" )" + id="$( awk '{ print $2" "$3 }' <<<"${locks}" )" + sudo rbd lock remove ${BLSE_STORAGE_POOL_VM}/${disk} "${id}" "${locker}" + fi +done diff --git a/node-daemon/pvcd/log.py b/node-daemon/pvcd/log.py index 7ba930f7..fc49402f 100644 --- a/node-daemon/pvcd/log.py +++ b/node-daemon/pvcd/log.py @@ -45,6 +45,8 @@ class Logger(object): # We open the logfile for the duration of our session, but have a hup function self.writer = open(self.logfile, 'a', buffering=1) self.last_colour = self.fmt_cyan + else: + self.last_colour = "" # Provide a hup function to close and reopen the writer def hup(self):