Joshua M. Boniface f7a826bf52 Add handlers for client network MTUs
Refactors some of the code in VXNetworkInterface to handle MTUs in a
more streamlined fashion. Also fixes a bug whereby bridge client
networks were being explicitly given the cluster dev MTU which might not
be correct. Now adds support for this option explicitly in the configs,
and defaults to 1500 for safety (the standard Ethernet MTU).

Addresses #144
2021-10-09 17:02:27 -04:00

386 lines
15 KiB
Python

#!/usr/bin/env python3
# config.py - Utility functions for pvcnoded configuration parsing
# Part of the Parallel Virtual Cluster (PVC) system
#
# Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################
import os
import subprocess
import yaml
from socket import gethostname
from re import findall
from psutil import cpu_count
from ipaddress import ip_address, ip_network
class MalformedConfigurationError(Exception):
"""
An except when parsing the PVC Node daemon configuration file
"""
def __init__(self, error=None):
self.msg = f'ERROR: Configuration file is malformed: {error}'
def __str__(self):
return str(self.msg)
def get_static_data():
"""
Data that is obtained once at node startup for use later
"""
staticdata = list()
staticdata.append(str(cpu_count())) # CPU count
staticdata.append(
subprocess.run(
['uname', '-r'], stdout=subprocess.PIPE
).stdout.decode('ascii').strip()
)
staticdata.append(
subprocess.run(
['uname', '-o'], stdout=subprocess.PIPE
).stdout.decode('ascii').strip()
)
staticdata.append(
subprocess.run(
['uname', '-m'], stdout=subprocess.PIPE
).stdout.decode('ascii').strip()
)
return staticdata
def get_configuration_path():
try:
return os.environ['PVCD_CONFIG_FILE']
except KeyError:
print('ERROR: The "PVCD_CONFIG_FILE" environment variable must be set.')
os._exit(1)
def get_hostname():
node_fqdn = gethostname()
node_hostname = node_fqdn.split('.', 1)[0]
node_domain = ''.join(node_fqdn.split('.', 1)[1:])
try:
node_id = findall(r'\d+', node_hostname)[-1]
except IndexError:
node_id = 0
return node_fqdn, node_hostname, node_domain, node_id
def validate_floating_ip(config, network):
if network not in ['cluster', 'storage', 'upstream']:
return False, f'Specified network type "{network}" is not valid'
floating_key = f'{network}_floating_ip'
network_key = f'{network}_network'
# Verify the network provided is valid
try:
network = ip_network(config[network_key])
except Exception:
return False, f'Network address {config[network_key]} for {network_key} is not valid'
# Verify that the floating IP is valid (and in the network)
try:
floating_address = ip_address(config[floating_key].split('/')[0])
if floating_address not in list(network.hosts()):
raise
except Exception:
return False, f'Floating address {config[floating_key]} for {floating_key} is not valid'
return True, ''
def get_configuration():
"""
Parse the configuration of the node daemon.
"""
pvcnoded_config_file = get_configuration_path()
print('Loading configuration from file "{}"'.format(pvcnoded_config_file))
with open(pvcnoded_config_file, 'r') as cfgfile:
try:
o_config = yaml.load(cfgfile, Loader=yaml.SafeLoader)
except Exception as e:
print('ERROR: Failed to parse configuration file: {}'.format(e))
os._exit(1)
node_fqdn, node_hostname, node_domain, node_id = get_hostname()
# Create the configuration dictionary
config = dict()
# Get the initial base configuration
try:
o_base = o_config['pvc']
o_cluster = o_config['pvc']['cluster']
except Exception as e:
raise MalformedConfigurationError(e)
config_general = {
'node': o_base.get('node', node_hostname),
'node_hostname': node_hostname,
'node_fqdn': node_fqdn,
'node_domain': node_domain,
'node_id': node_id,
'coordinators': o_cluster.get('coordinators', list()),
'debug': o_base.get('debug', False),
}
config = {**config, **config_general}
# Get the functions configuration
try:
o_functions = o_config['pvc']['functions']
except Exception as e:
raise MalformedConfigurationError(e)
config_functions = {
'enable_hypervisor': o_functions.get('enable_hypervisor', False),
'enable_networking': o_functions.get('enable_networking', False),
'enable_storage': o_functions.get('enable_storage', False),
'enable_api': o_functions.get('enable_api', False),
}
config = {**config, **config_functions}
# Get the directory configuration
try:
o_directories = o_config['pvc']['system']['configuration']['directories']
except Exception as e:
raise MalformedConfigurationError(e)
config_directories = {
'dynamic_directory': o_directories.get('dynamic_directory', None),
'log_directory': o_directories.get('log_directory', None),
'console_log_directory': o_directories.get('console_log_directory', None),
}
# Define our dynamic directory schema
config_directories['dnsmasq_dynamic_directory'] = config_directories['dynamic_directory'] + '/dnsmasq'
config_directories['pdns_dynamic_directory'] = config_directories['dynamic_directory'] + '/pdns'
config_directories['nft_dynamic_directory'] = config_directories['dynamic_directory'] + '/nft'
# Define our log directory schema
config_directories['dnsmasq_log_directory'] = config_directories['log_directory'] + '/dnsmasq'
config_directories['pdns_log_directory'] = config_directories['log_directory'] + '/pdns'
config_directories['nft_log_directory'] = config_directories['log_directory'] + '/nft'
config = {**config, **config_directories}
# Get the logging configuration
try:
o_logging = o_config['pvc']['system']['configuration']['logging']
except Exception as e:
raise MalformedConfigurationError(e)
config_logging = {
'file_logging': o_logging.get('file_logging', False),
'stdout_logging': o_logging.get('stdout_logging', False),
'zookeeper_logging': o_logging.get('zookeeper_logging', False),
'log_colours': o_logging.get('log_colours', False),
'log_dates': o_logging.get('log_dates', False),
'log_keepalives': o_logging.get('log_keepalives', False),
'log_keepalive_cluster_details': o_logging.get('log_keepalive_cluster_details', False),
'log_keepalive_storage_details': o_logging.get('log_keepalive_storage_details', False),
'console_log_lines': o_logging.get('console_log_lines', False),
'node_log_lines': o_logging.get('node_log_lines', False),
}
config = {**config, **config_logging}
# Get the interval configuration
try:
o_intervals = o_config['pvc']['system']['intervals']
except Exception as e:
raise MalformedConfigurationError(e)
config_intervals = {
'vm_shutdown_timeout': int(o_intervals.get('vm_shutdown_timeout', 60)),
'keepalive_interval': int(o_intervals.get('keepalive_interval', 5)),
'fence_intervals': int(o_intervals.get('fence_intervals', 6)),
'suicide_intervals': int(o_intervals.get('suicide_interval', 0)),
}
config = {**config, **config_intervals}
# Get the fencing configuration
try:
o_fencing = o_config['pvc']['system']['fencing']
o_fencing_actions = o_fencing['actions']
o_fencing_ipmi = o_fencing['ipmi']
except Exception as e:
raise MalformedConfigurationError(e)
config_fencing = {
'successful_fence': o_fencing_actions.get('successful_fence', None),
'failed_fence': o_fencing_actions.get('failed_fence', None),
'ipmi_hostname': o_fencing_ipmi.get('host', f'{node_hostname}-lom.{node_domain}'),
'ipmi_username': o_fencing_ipmi.get('user', 'null'),
'ipmi_password': o_fencing_ipmi.get('pass', 'null'),
}
config = {**config, **config_fencing}
# Get the migration configuration
try:
o_migration = o_config['pvc']['system']['migration']
except Exception as e:
raise MalformedConfigurationError(e)
config_migration = {
'migration_target_selector': o_migration.get('target_selector', 'mem'),
}
config = {**config, **config_migration}
if config['enable_networking']:
# Get the node networks configuration
try:
o_networks = o_config['pvc']['cluster']['networks']
o_network_cluster = o_networks['cluster']
o_network_storage = o_networks['storage']
o_network_upstream = o_networks['upstream']
o_sysnetworks = o_config['pvc']['system']['configuration']['networking']
o_sysnetwork_cluster = o_sysnetworks['cluster']
o_sysnetwork_storage = o_sysnetworks['storage']
o_sysnetwork_upstream = o_sysnetworks['upstream']
except Exception as e:
raise MalformedConfigurationError(e)
config_networks = {
'cluster_domain': o_network_cluster.get('domain', None),
'cluster_network': o_network_cluster.get('network', None),
'cluster_floating_ip': o_network_cluster.get('floating_ip', None),
'cluster_dev': o_sysnetwork_cluster.get('device', None),
'cluster_mtu': o_sysnetwork_cluster.get('mtu', None),
'cluster_dev_ip': o_sysnetwork_cluster.get('address', None),
'storage_domain': o_network_storage.get('domain', None),
'storage_network': o_network_storage.get('network', None),
'storage_floating_ip': o_network_storage.get('floating_ip', None),
'storage_dev': o_sysnetwork_storage.get('device', None),
'storage_mtu': o_sysnetwork_storage.get('mtu', None),
'storage_dev_ip': o_sysnetwork_storage.get('address', None),
'upstream_domain': o_network_upstream.get('domain', None),
'upstream_network': o_network_upstream.get('network', None),
'upstream_floating_ip': o_network_upstream.get('floating_ip', None),
'upstream_gateway': o_network_upstream.get('gateway', None),
'upstream_dev': o_sysnetwork_upstream.get('device', None),
'upstream_mtu': o_sysnetwork_upstream.get('mtu', None),
'upstream_dev_ip': o_sysnetwork_upstream.get('address', None),
'bridge_dev': o_sysnetworks.get('bridge_device', None),
'bridge_mtu': o_sysnetworks.get('bridge_mtu', 1500),
'enable_sriov': o_sysnetworks.get('sriov_enable', False),
'sriov_device': o_sysnetworks.get('sriov_device', list())
}
config = {**config, **config_networks}
for network_type in ['cluster', 'storage', 'upstream']:
result, msg = validate_floating_ip(config, network_type)
if not result:
raise MalformedConfigurationError(msg)
address_key = '{}_dev_ip'.format(network_type)
network_key = f'{network_type}_network'
network = ip_network(config[network_key])
# With autoselection of addresses, construct an IP from the relevant network
if config[address_key] == 'by-id':
# The NodeID starts at 1, but indexes start at 0
address_id = int(config['node_id']) - 1
# Grab the nth address from the network
config[address_key] = '{}/{}'.format(list(network.hosts())[address_id], network.prefixlen)
# Validate the provided IP instead
else:
try:
address = ip_address(config[address_key].split('/')[0])
if address not in list(network.hosts()):
raise
except Exception:
raise MalformedConfigurationError(
f'IP address {config[address_key]} for {address_key} is not valid'
)
# Get the PowerDNS aggregator database configuration
try:
o_pdnsdb = o_config['pvc']['coordinator']['dns']['database']
except Exception as e:
raise MalformedConfigurationError(e)
config_pdnsdb = {
'pdns_postgresql_host': o_pdnsdb.get('host', None),
'pdns_postgresql_port': o_pdnsdb.get('port', None),
'pdns_postgresql_dbname': o_pdnsdb.get('name', None),
'pdns_postgresql_user': o_pdnsdb.get('user', None),
'pdns_postgresql_password': o_pdnsdb.get('pass', None),
}
config = {**config, **config_pdnsdb}
# Get the Cloud-Init Metadata database configuration
try:
o_metadatadb = o_config['pvc']['coordinator']['metadata']['database']
except Exception as e:
raise MalformedConfigurationError(e)
config_metadatadb = {
'metadata_postgresql_host': o_metadatadb.get('host', None),
'metadata_postgresql_port': o_metadatadb.get('port', None),
'metadata_postgresql_dbname': o_metadatadb.get('name', None),
'metadata_postgresql_user': o_metadatadb.get('user', None),
'metadata_postgresql_password': o_metadatadb.get('pass', None),
}
config = {**config, **config_metadatadb}
if config['enable_storage']:
# Get the storage configuration
try:
o_storage = o_config['pvc']['system']['configuration']['storage']
except Exception as e:
raise MalformedConfigurationError(e)
config_storage = {
'ceph_config_file': o_storage.get('ceph_config_file', None),
'ceph_admin_keyring': o_storage.get('ceph_admin_keyring', None),
}
config = {**config, **config_storage}
# Add our node static data to the config
config['static_data'] = get_static_data()
return config
def validate_directories(config):
if not os.path.exists(config['dynamic_directory']):
os.makedirs(config['dynamic_directory'])
os.makedirs(config['dnsmasq_dynamic_directory'])
os.makedirs(config['pdns_dynamic_directory'])
os.makedirs(config['nft_dynamic_directory'])
if not os.path.exists(config['log_directory']):
os.makedirs(config['log_directory'])
os.makedirs(config['dnsmasq_log_directory'])
os.makedirs(config['pdns_log_directory'])
os.makedirs(config['nft_log_directory'])