2018-10-14 02:01:35 -04:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
# Daemon.py - Node daemon
|
|
|
|
# Part of the Parallel Virtual Cluster (PVC) system
|
|
|
|
#
|
2021-03-25 17:01:55 -04:00
|
|
|
# Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
|
2018-10-14 02:01:35 -04:00
|
|
|
#
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
2021-03-25 16:57:17 -04:00
|
|
|
# the Free Software Foundation, version 3.
|
2018-10-14 02:01:35 -04:00
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
#
|
|
|
|
###############################################################################
|
|
|
|
|
|
|
|
import kazoo.client
|
|
|
|
import libvirt
|
2021-06-14 17:10:21 -04:00
|
|
|
import sys
|
2018-10-14 02:01:35 -04:00
|
|
|
import os
|
|
|
|
import signal
|
|
|
|
import psutil
|
|
|
|
import subprocess
|
|
|
|
import time
|
|
|
|
import re
|
2019-03-11 01:44:26 -04:00
|
|
|
import yaml
|
2018-10-29 17:51:08 -04:00
|
|
|
import json
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2020-08-11 11:46:41 -04:00
|
|
|
from socket import gethostname
|
|
|
|
from threading import Thread
|
|
|
|
from ipaddress import ip_address, ip_network
|
|
|
|
from apscheduler.schedulers.background import BackgroundScheduler
|
2020-01-09 10:53:27 -05:00
|
|
|
from distutils.util import strtobool
|
2020-06-06 13:23:24 -04:00
|
|
|
from queue import Queue
|
2020-06-06 15:31:26 -04:00
|
|
|
from xml.etree import ElementTree
|
2020-06-06 21:12:51 -04:00
|
|
|
from rados import Rados
|
2020-06-06 15:31:26 -04:00
|
|
|
|
2021-06-09 13:28:31 -04:00
|
|
|
from daemon_lib.zkhandler import ZKHandler
|
2021-05-30 14:48:41 -04:00
|
|
|
|
2020-02-08 19:16:19 -05:00
|
|
|
import pvcnoded.fencing as fencing
|
2021-06-01 18:50:26 -04:00
|
|
|
import daemon_lib.log as log
|
2021-06-01 12:17:25 -04:00
|
|
|
import daemon_lib.common as common
|
2020-02-08 19:16:19 -05:00
|
|
|
|
|
|
|
import pvcnoded.VMInstance as VMInstance
|
|
|
|
import pvcnoded.NodeInstance as NodeInstance
|
|
|
|
import pvcnoded.VXNetworkInstance as VXNetworkInstance
|
Implement SR-IOV PF and VF instances
Adds support for the node daemon managing SR-IOV PF and VF instances.
PFs are added to Zookeeper automatically based on the config at startup
during network configuration, and are otherwise completely static. PFs
are automatically removed from Zookeeper, along with all coresponding
VFs, should the PF phy device be removed from the configuration.
VFs are configured based on the (autocreated) VFs of each PF device,
added to Zookeeper, and then a new class instance, SRIOVVFInstance, is
used to watch them for configuration changes. This will enable the
runtime management of VF settings by the API. The set of keys ensures
that both configuration and details of the NIC can be tracked.
Most keys are self-explanatory, especially for PFs and the basic keys
for VFs. The configuration tree is also self-explanatory, being based
entirely on the options available in the `ip link set {dev} vf` command.
Two additional keys are also present: `used` and `used_by`, which will
be able to track the (boolean) state of usage, as well as the VM that
uses a given VIF. Since the VM side implementation will support both
macvtap and direct "hostdev" assignments, this will ensure that this
state can be tracked on both the VF and the VM side.
2021-06-17 01:01:23 -04:00
|
|
|
import pvcnoded.SRIOVVFInstance as SRIOVVFInstance
|
2020-02-08 19:16:19 -05:00
|
|
|
import pvcnoded.DNSAggregatorInstance as DNSAggregatorInstance
|
|
|
|
import pvcnoded.CephInstance as CephInstance
|
|
|
|
import pvcnoded.MetadataAPIInstance as MetadataAPIInstance
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2020-11-06 20:39:18 -05:00
|
|
|
# Version string for startup output
|
2021-07-05 23:40:32 -04:00
|
|
|
version = '0.9.23'
|
2020-11-06 20:39:18 -05:00
|
|
|
|
2018-10-14 02:01:35 -04:00
|
|
|
###############################################################################
|
|
|
|
# PVCD - node daemon startup program
|
|
|
|
###############################################################################
|
2019-06-18 12:36:53 -04:00
|
|
|
#
|
2018-10-14 02:01:35 -04:00
|
|
|
# The PVC daemon starts a node and configures all the required components for
|
|
|
|
# the node to run. It determines which of the 3 daemon modes it should be in
|
|
|
|
# during initial setup based on hostname and the config file, and then starts
|
|
|
|
# any required services. The 3 daemon modes are:
|
|
|
|
# * leader: the cluster leader, follows the Zookeeper leader
|
|
|
|
# * coordinator: a Zookeeper cluster member
|
|
|
|
# * hypervisor: a hypervisor without any cluster intelligence
|
|
|
|
#
|
|
|
|
###############################################################################
|
|
|
|
|
|
|
|
###############################################################################
|
|
|
|
# Daemon functions
|
|
|
|
###############################################################################
|
|
|
|
|
2021-06-08 23:17:07 -04:00
|
|
|
# Ensure the update_timer is None until it's set for real
|
|
|
|
update_timer = None
|
|
|
|
|
2020-11-07 14:45:24 -05:00
|
|
|
|
2018-10-14 02:01:35 -04:00
|
|
|
# Create timer to update this node in Zookeeper
|
|
|
|
def startKeepaliveTimer():
|
2018-11-18 00:55:04 -05:00
|
|
|
# Create our timer object
|
2020-08-11 11:46:41 -04:00
|
|
|
update_timer = BackgroundScheduler()
|
2018-10-14 02:01:35 -04:00
|
|
|
interval = int(config['keepalive_interval'])
|
|
|
|
logger.out('Starting keepalive timer ({} second interval)'.format(interval), state='s')
|
2020-06-06 12:48:44 -04:00
|
|
|
update_timer.add_job(node_keepalive, 'interval', seconds=interval)
|
2018-10-14 02:01:35 -04:00
|
|
|
update_timer.start()
|
2020-06-06 12:48:44 -04:00
|
|
|
node_keepalive()
|
2018-11-18 00:55:04 -05:00
|
|
|
return update_timer
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2020-11-07 14:45:24 -05:00
|
|
|
|
2018-10-14 02:01:35 -04:00
|
|
|
def stopKeepaliveTimer():
|
|
|
|
global update_timer
|
|
|
|
try:
|
|
|
|
update_timer.shutdown()
|
|
|
|
logger.out('Stopping keepalive timer', state='s')
|
2020-11-06 18:55:10 -05:00
|
|
|
except Exception:
|
2018-10-14 02:01:35 -04:00
|
|
|
pass
|
|
|
|
|
2020-11-07 13:17:49 -05:00
|
|
|
|
2018-10-14 02:01:35 -04:00
|
|
|
###############################################################################
|
|
|
|
# PHASE 1a - Configuration parsing
|
|
|
|
###############################################################################
|
|
|
|
|
|
|
|
# Get the config file variable from the environment
|
|
|
|
try:
|
2020-02-08 19:16:19 -05:00
|
|
|
pvcnoded_config_file = os.environ['PVCD_CONFIG_FILE']
|
2020-11-06 18:55:10 -05:00
|
|
|
except Exception:
|
2020-02-08 19:16:19 -05:00
|
|
|
print('ERROR: The "PVCD_CONFIG_FILE" environment variable must be set before starting pvcnoded.')
|
2018-10-14 02:01:35 -04:00
|
|
|
exit(1)
|
|
|
|
|
|
|
|
# Set local hostname and domain variables
|
2020-08-11 11:46:41 -04:00
|
|
|
myfqdn = gethostname()
|
2018-10-14 02:01:35 -04:00
|
|
|
myhostname = myfqdn.split('.', 1)[0]
|
|
|
|
mydomainname = ''.join(myfqdn.split('.', 1)[1:])
|
2019-03-11 01:44:26 -04:00
|
|
|
try:
|
|
|
|
mynodeid = re.findall(r'\d+', myhostname)[-1]
|
|
|
|
except IndexError:
|
|
|
|
mynodeid = 1
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2020-01-09 10:53:27 -05:00
|
|
|
# Maintenance mode off by default
|
|
|
|
maintenance = False
|
|
|
|
|
2018-10-14 02:01:35 -04:00
|
|
|
# Gather useful data about our host
|
|
|
|
# Static data format: 'cpu_count', 'arch', 'os', 'kernel'
|
|
|
|
staticdata = []
|
|
|
|
staticdata.append(str(psutil.cpu_count()))
|
|
|
|
staticdata.append(subprocess.run(['uname', '-r'], stdout=subprocess.PIPE).stdout.decode('ascii').strip())
|
|
|
|
staticdata.append(subprocess.run(['uname', '-o'], stdout=subprocess.PIPE).stdout.decode('ascii').strip())
|
|
|
|
staticdata.append(subprocess.run(['uname', '-m'], stdout=subprocess.PIPE).stdout.decode('ascii').strip())
|
|
|
|
|
2020-11-07 14:45:24 -05:00
|
|
|
|
2018-10-14 02:01:35 -04:00
|
|
|
# Read and parse the config file
|
2020-02-08 19:16:19 -05:00
|
|
|
def readConfig(pvcnoded_config_file, myhostname):
|
|
|
|
print('Loading configuration from file "{}"'.format(pvcnoded_config_file))
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2020-02-08 19:16:19 -05:00
|
|
|
with open(pvcnoded_config_file, 'r') as cfgfile:
|
2018-10-14 02:01:35 -04:00
|
|
|
try:
|
2021-02-08 02:50:16 -05:00
|
|
|
o_config = yaml.load(cfgfile, Loader=yaml.SafeLoader)
|
2018-10-14 02:01:35 -04:00
|
|
|
except Exception as e:
|
2019-03-11 01:44:26 -04:00
|
|
|
print('ERROR: Failed to parse configuration file: {}'.format(e))
|
2018-10-14 02:01:35 -04:00
|
|
|
exit(1)
|
|
|
|
|
2019-03-11 01:44:26 -04:00
|
|
|
# Handle the basic config (hypervisor-only)
|
|
|
|
try:
|
|
|
|
config_general = {
|
|
|
|
'coordinators': o_config['pvc']['cluster']['coordinators'],
|
|
|
|
'enable_hypervisor': o_config['pvc']['functions']['enable_hypervisor'],
|
|
|
|
'enable_networking': o_config['pvc']['functions']['enable_networking'],
|
|
|
|
'enable_storage': o_config['pvc']['functions']['enable_storage'],
|
2019-07-06 02:42:56 -04:00
|
|
|
'enable_api': o_config['pvc']['functions']['enable_api'],
|
2019-03-11 01:44:26 -04:00
|
|
|
'dynamic_directory': o_config['pvc']['system']['configuration']['directories']['dynamic_directory'],
|
|
|
|
'log_directory': o_config['pvc']['system']['configuration']['directories']['log_directory'],
|
2019-04-11 19:06:06 -04:00
|
|
|
'console_log_directory': o_config['pvc']['system']['configuration']['directories']['console_log_directory'],
|
2019-03-11 01:44:26 -04:00
|
|
|
'file_logging': o_config['pvc']['system']['configuration']['logging']['file_logging'],
|
|
|
|
'stdout_logging': o_config['pvc']['system']['configuration']['logging']['stdout_logging'],
|
2019-07-10 21:39:25 -04:00
|
|
|
'log_colours': o_config['pvc']['system']['configuration']['logging']['log_colours'],
|
|
|
|
'log_dates': o_config['pvc']['system']['configuration']['logging']['log_dates'],
|
2019-06-18 12:44:07 -04:00
|
|
|
'log_keepalives': o_config['pvc']['system']['configuration']['logging']['log_keepalives'],
|
2019-06-18 19:54:42 -04:00
|
|
|
'log_keepalive_cluster_details': o_config['pvc']['system']['configuration']['logging']['log_keepalive_cluster_details'],
|
|
|
|
'log_keepalive_storage_details': o_config['pvc']['system']['configuration']['logging']['log_keepalive_storage_details'],
|
2019-04-11 19:06:06 -04:00
|
|
|
'console_log_lines': o_config['pvc']['system']['configuration']['logging']['console_log_lines'],
|
2020-08-20 23:05:56 -04:00
|
|
|
'vm_shutdown_timeout': int(o_config['pvc']['system']['intervals']['vm_shutdown_timeout']),
|
2020-08-15 12:38:03 -04:00
|
|
|
'keepalive_interval': int(o_config['pvc']['system']['intervals']['keepalive_interval']),
|
|
|
|
'fence_intervals': int(o_config['pvc']['system']['intervals']['fence_intervals']),
|
|
|
|
'suicide_intervals': int(o_config['pvc']['system']['intervals']['suicide_intervals']),
|
2019-03-11 01:44:26 -04:00
|
|
|
'successful_fence': o_config['pvc']['system']['fencing']['actions']['successful_fence'],
|
|
|
|
'failed_fence': o_config['pvc']['system']['fencing']['actions']['failed_fence'],
|
|
|
|
'migration_target_selector': o_config['pvc']['system']['migration']['target_selector'],
|
|
|
|
'ipmi_hostname': o_config['pvc']['system']['fencing']['ipmi']['host'],
|
|
|
|
'ipmi_username': o_config['pvc']['system']['fencing']['ipmi']['user'],
|
|
|
|
'ipmi_password': o_config['pvc']['system']['fencing']['ipmi']['pass']
|
|
|
|
}
|
|
|
|
except Exception as e:
|
2020-01-22 12:09:31 -05:00
|
|
|
print('ERROR: Failed to load configuration: {}'.format(e))
|
2019-03-11 01:44:26 -04:00
|
|
|
exit(1)
|
|
|
|
config = config_general
|
|
|
|
|
2019-07-09 14:41:07 -04:00
|
|
|
# Handle debugging config
|
|
|
|
try:
|
|
|
|
config_debug = {
|
2019-08-01 11:22:27 -04:00
|
|
|
'debug': o_config['pvc']['debug']
|
2019-07-09 14:41:07 -04:00
|
|
|
}
|
2020-11-06 18:55:10 -05:00
|
|
|
except Exception:
|
2019-07-09 14:41:07 -04:00
|
|
|
config_debug = {
|
|
|
|
'debug': False
|
|
|
|
}
|
|
|
|
config = {**config, **config_debug}
|
|
|
|
|
2019-03-11 01:44:26 -04:00
|
|
|
# Handle the networking config
|
|
|
|
if config['enable_networking']:
|
2018-10-14 02:01:35 -04:00
|
|
|
try:
|
2019-03-11 01:44:26 -04:00
|
|
|
config_networking = {
|
|
|
|
'cluster_domain': o_config['pvc']['cluster']['networks']['cluster']['domain'],
|
|
|
|
'vni_floating_ip': o_config['pvc']['cluster']['networks']['cluster']['floating_ip'],
|
|
|
|
'vni_network': o_config['pvc']['cluster']['networks']['cluster']['network'],
|
|
|
|
'storage_domain': o_config['pvc']['cluster']['networks']['storage']['domain'],
|
|
|
|
'storage_floating_ip': o_config['pvc']['cluster']['networks']['storage']['floating_ip'],
|
|
|
|
'storage_network': o_config['pvc']['cluster']['networks']['storage']['network'],
|
|
|
|
'upstream_domain': o_config['pvc']['cluster']['networks']['upstream']['domain'],
|
|
|
|
'upstream_floating_ip': o_config['pvc']['cluster']['networks']['upstream']['floating_ip'],
|
|
|
|
'upstream_network': o_config['pvc']['cluster']['networks']['upstream']['network'],
|
2019-03-17 00:39:08 -04:00
|
|
|
'upstream_gateway': o_config['pvc']['cluster']['networks']['upstream']['gateway'],
|
2019-05-20 22:40:07 -04:00
|
|
|
'pdns_postgresql_host': o_config['pvc']['coordinator']['dns']['database']['host'],
|
|
|
|
'pdns_postgresql_port': o_config['pvc']['coordinator']['dns']['database']['port'],
|
|
|
|
'pdns_postgresql_dbname': o_config['pvc']['coordinator']['dns']['database']['name'],
|
|
|
|
'pdns_postgresql_user': o_config['pvc']['coordinator']['dns']['database']['user'],
|
|
|
|
'pdns_postgresql_password': o_config['pvc']['coordinator']['dns']['database']['pass'],
|
2019-12-14 15:55:30 -05:00
|
|
|
'metadata_postgresql_host': o_config['pvc']['coordinator']['metadata']['database']['host'],
|
|
|
|
'metadata_postgresql_port': o_config['pvc']['coordinator']['metadata']['database']['port'],
|
|
|
|
'metadata_postgresql_dbname': o_config['pvc']['coordinator']['metadata']['database']['name'],
|
|
|
|
'metadata_postgresql_user': o_config['pvc']['coordinator']['metadata']['database']['user'],
|
|
|
|
'metadata_postgresql_password': o_config['pvc']['coordinator']['metadata']['database']['pass'],
|
2020-01-06 14:44:56 -05:00
|
|
|
'bridge_dev': o_config['pvc']['system']['configuration']['networking']['bridge_device'],
|
2019-06-17 23:44:41 -04:00
|
|
|
'vni_dev': o_config['pvc']['system']['configuration']['networking']['cluster']['device'],
|
2019-06-17 23:41:07 -04:00
|
|
|
'vni_mtu': o_config['pvc']['system']['configuration']['networking']['cluster']['mtu'],
|
|
|
|
'vni_dev_ip': o_config['pvc']['system']['configuration']['networking']['cluster']['address'],
|
2019-06-17 23:44:41 -04:00
|
|
|
'storage_dev': o_config['pvc']['system']['configuration']['networking']['storage']['device'],
|
2019-06-17 23:41:07 -04:00
|
|
|
'storage_mtu': o_config['pvc']['system']['configuration']['networking']['storage']['mtu'],
|
|
|
|
'storage_dev_ip': o_config['pvc']['system']['configuration']['networking']['storage']['address'],
|
2019-06-17 23:44:41 -04:00
|
|
|
'upstream_dev': o_config['pvc']['system']['configuration']['networking']['upstream']['device'],
|
2019-06-17 23:41:07 -04:00
|
|
|
'upstream_mtu': o_config['pvc']['system']['configuration']['networking']['upstream']['mtu'],
|
|
|
|
'upstream_dev_ip': o_config['pvc']['system']['configuration']['networking']['upstream']['address'],
|
2019-03-11 01:44:26 -04:00
|
|
|
}
|
2021-06-15 22:42:59 -04:00
|
|
|
|
|
|
|
# Check if SR-IOV is enabled and activate
|
|
|
|
config_networking['enable_sriov'] = o_config['pvc']['system']['configuration']['networking'].get('sriov_enable', False)
|
|
|
|
if config_networking['enable_sriov']:
|
|
|
|
config_networking['sriov_device'] = list(o_config['pvc']['system']['configuration']['networking']['sriov_device'])
|
|
|
|
|
2019-03-11 01:44:26 -04:00
|
|
|
except Exception as e:
|
2020-01-22 12:09:31 -05:00
|
|
|
print('ERROR: Failed to load configuration: {}'.format(e))
|
2019-03-11 01:44:26 -04:00
|
|
|
exit(1)
|
|
|
|
config = {**config, **config_networking}
|
|
|
|
|
|
|
|
# Create the by-id address entries
|
2020-11-07 13:02:54 -05:00
|
|
|
for net in ['vni', 'storage', 'upstream']:
|
2019-03-11 01:44:26 -04:00
|
|
|
address_key = '{}_dev_ip'.format(net)
|
|
|
|
floating_key = '{}_floating_ip'.format(net)
|
|
|
|
network_key = '{}_network'.format(net)
|
2019-06-18 12:36:53 -04:00
|
|
|
|
2019-03-11 01:44:26 -04:00
|
|
|
# Verify the network provided is valid
|
2018-10-14 02:01:35 -04:00
|
|
|
try:
|
2020-08-11 11:46:41 -04:00
|
|
|
network = ip_network(config[network_key])
|
2020-11-06 21:13:13 -05:00
|
|
|
except Exception:
|
2019-03-11 01:44:26 -04:00
|
|
|
print('ERROR: Network address {} for {} is not valid!'.format(config[network_key], network_key))
|
|
|
|
exit(1)
|
2019-06-18 12:36:53 -04:00
|
|
|
|
2019-03-11 01:44:26 -04:00
|
|
|
# If we should be autoselected
|
|
|
|
if config[address_key] == 'by-id':
|
|
|
|
# Construct an IP from the relevant network
|
|
|
|
# The NodeID starts at 1, but indexes start at 0
|
|
|
|
address_id = int(mynodeid) - 1
|
|
|
|
# Grab the nth address from the network
|
2019-03-16 23:27:51 -04:00
|
|
|
config[address_key] = '{}/{}'.format(list(network.hosts())[address_id], network.prefixlen)
|
2019-06-18 12:36:53 -04:00
|
|
|
|
2019-03-11 01:44:26 -04:00
|
|
|
# Verify that the floating IP is valid
|
2019-06-18 12:36:53 -04:00
|
|
|
|
2019-03-11 01:44:26 -04:00
|
|
|
try:
|
|
|
|
# Set the ipaddr
|
2020-08-11 11:46:41 -04:00
|
|
|
floating_addr = ip_address(config[floating_key].split('/')[0])
|
2019-03-11 01:44:26 -04:00
|
|
|
# Verify we're in the network
|
2020-11-06 20:37:52 -05:00
|
|
|
if floating_addr not in list(network.hosts()):
|
2019-03-11 01:44:26 -04:00
|
|
|
raise
|
2020-11-06 21:13:13 -05:00
|
|
|
except Exception:
|
2019-03-11 01:44:26 -04:00
|
|
|
print('ERROR: Floating address {} for {} is not valid!'.format(config[floating_key], floating_key))
|
2018-10-14 02:01:35 -04:00
|
|
|
exit(1)
|
2019-06-18 12:36:53 -04:00
|
|
|
|
2019-03-11 01:44:26 -04:00
|
|
|
# Handle the storage config
|
|
|
|
if config['enable_storage']:
|
|
|
|
try:
|
2020-06-06 21:12:51 -04:00
|
|
|
config_storage = {
|
2020-06-06 22:29:32 -04:00
|
|
|
'ceph_config_file': o_config['pvc']['system']['configuration']['storage']['ceph_config_file'],
|
|
|
|
'ceph_admin_keyring': o_config['pvc']['system']['configuration']['storage']['ceph_admin_keyring']
|
2020-06-06 21:12:51 -04:00
|
|
|
}
|
2019-03-11 01:44:26 -04:00
|
|
|
except Exception as e:
|
2020-01-22 12:09:31 -05:00
|
|
|
print('ERROR: Failed to load configuration: {}'.format(e))
|
2019-03-11 01:44:26 -04:00
|
|
|
exit(1)
|
|
|
|
config = {**config, **config_storage}
|
2018-10-14 02:01:35 -04:00
|
|
|
|
|
|
|
# Handle an empty ipmi_hostname
|
|
|
|
if config['ipmi_hostname'] == '':
|
2020-11-07 13:31:19 -05:00
|
|
|
config['ipmi_hostname'] = myhostname + '-lom.' + mydomainname
|
2018-10-14 02:01:35 -04:00
|
|
|
|
|
|
|
return config
|
|
|
|
|
2020-11-07 13:17:49 -05:00
|
|
|
|
2018-10-14 02:01:35 -04:00
|
|
|
# Get the config object from readConfig()
|
2020-02-08 19:16:19 -05:00
|
|
|
config = readConfig(pvcnoded_config_file, myhostname)
|
2019-07-09 14:41:07 -04:00
|
|
|
debug = config['debug']
|
|
|
|
if debug:
|
|
|
|
print('DEBUG MODE ENABLED')
|
2019-06-18 12:36:53 -04:00
|
|
|
|
2019-03-11 01:44:26 -04:00
|
|
|
# Handle the enable values
|
|
|
|
enable_hypervisor = config['enable_hypervisor']
|
|
|
|
enable_networking = config['enable_networking']
|
2021-06-15 22:42:59 -04:00
|
|
|
enable_sriov = config['enable_sriov']
|
2019-03-11 01:44:26 -04:00
|
|
|
enable_storage = config['enable_storage']
|
2018-10-14 02:01:35 -04:00
|
|
|
|
|
|
|
###############################################################################
|
|
|
|
# PHASE 1b - Prepare filesystem directories
|
|
|
|
###############################################################################
|
|
|
|
|
|
|
|
# Define our dynamic directory schema
|
|
|
|
# <dynamic_directory>/
|
|
|
|
# dnsmasq/
|
|
|
|
# pdns/
|
|
|
|
# nft/
|
|
|
|
config['dnsmasq_dynamic_directory'] = config['dynamic_directory'] + '/dnsmasq'
|
|
|
|
config['pdns_dynamic_directory'] = config['dynamic_directory'] + '/pdns'
|
|
|
|
config['nft_dynamic_directory'] = config['dynamic_directory'] + '/nft'
|
|
|
|
|
|
|
|
# Create our dynamic directories if they don't exist
|
|
|
|
if not os.path.exists(config['dynamic_directory']):
|
|
|
|
os.makedirs(config['dynamic_directory'])
|
|
|
|
os.makedirs(config['dnsmasq_dynamic_directory'])
|
|
|
|
os.makedirs(config['pdns_dynamic_directory'])
|
|
|
|
os.makedirs(config['nft_dynamic_directory'])
|
|
|
|
|
|
|
|
# Define our log directory schema
|
|
|
|
# <log_directory>/
|
|
|
|
# dnsmasq/
|
|
|
|
# pdns/
|
|
|
|
# nft/
|
|
|
|
config['dnsmasq_log_directory'] = config['log_directory'] + '/dnsmasq'
|
|
|
|
config['pdns_log_directory'] = config['log_directory'] + '/pdns'
|
|
|
|
config['nft_log_directory'] = config['log_directory'] + '/nft'
|
|
|
|
|
2018-10-25 23:36:25 -04:00
|
|
|
# Create our log directories if they don't exist
|
2018-10-14 02:01:35 -04:00
|
|
|
if not os.path.exists(config['log_directory']):
|
|
|
|
os.makedirs(config['log_directory'])
|
|
|
|
os.makedirs(config['dnsmasq_log_directory'])
|
|
|
|
os.makedirs(config['pdns_log_directory'])
|
|
|
|
os.makedirs(config['nft_log_directory'])
|
|
|
|
|
|
|
|
###############################################################################
|
|
|
|
# PHASE 1c - Set up logging
|
|
|
|
###############################################################################
|
|
|
|
|
|
|
|
logger = log.Logger(config)
|
|
|
|
|
|
|
|
# Print our startup messages
|
2021-02-08 02:50:16 -05:00
|
|
|
logger.out('')
|
2021-07-02 02:21:30 -04:00
|
|
|
logger.out('|----------------------------------------------------------|')
|
|
|
|
logger.out('| |')
|
2021-07-02 02:32:08 -04:00
|
|
|
logger.out('| ███████████ ▜█▙ ▟█▛ █████ █ █ █ |')
|
|
|
|
logger.out('| ██ ▜█▙ ▟█▛ ██ |')
|
|
|
|
logger.out('| ███████████ ▜█▙ ▟█▛ ██ |')
|
|
|
|
logger.out('| ██ ▜█▙▟█▛ ███████████ |')
|
2021-07-02 02:21:30 -04:00
|
|
|
logger.out('| |')
|
|
|
|
logger.out('|----------------------------------------------------------|')
|
|
|
|
logger.out('| Parallel Virtual Cluster node daemon v{0: <18} |'.format(version))
|
2021-07-02 02:32:08 -04:00
|
|
|
logger.out('| Debug: {0: <49} |'.format(str(config['debug'])))
|
2021-07-02 02:21:30 -04:00
|
|
|
logger.out('| FQDN: {0: <50} |'.format(myfqdn))
|
|
|
|
logger.out('| Host: {0: <50} |'.format(myhostname))
|
|
|
|
logger.out('| ID: {0: <52} |'.format(mynodeid))
|
|
|
|
logger.out('| IPMI hostname: {0: <41} |'.format(config['ipmi_hostname']))
|
|
|
|
logger.out('| Machine details: |')
|
|
|
|
logger.out('| CPUs: {0: <48} |'.format(staticdata[0]))
|
|
|
|
logger.out('| Arch: {0: <48} |'.format(staticdata[3]))
|
|
|
|
logger.out('| OS: {0: <50} |'.format(staticdata[2]))
|
|
|
|
logger.out('| Kernel: {0: <46} |'.format(staticdata[1]))
|
|
|
|
logger.out('|----------------------------------------------------------|')
|
2021-02-08 02:50:16 -05:00
|
|
|
logger.out('')
|
|
|
|
|
2020-02-08 19:16:19 -05:00
|
|
|
logger.out('Starting pvcnoded on host {}'.format(myfqdn), state='s')
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2019-07-10 21:39:25 -04:00
|
|
|
# Define some colours for future messages if applicable
|
|
|
|
if config['log_colours']:
|
|
|
|
fmt_end = logger.fmt_end
|
|
|
|
fmt_bold = logger.fmt_bold
|
|
|
|
fmt_blue = logger.fmt_blue
|
2019-07-12 09:31:42 -04:00
|
|
|
fmt_cyan = logger.fmt_cyan
|
2019-07-10 21:39:25 -04:00
|
|
|
fmt_green = logger.fmt_green
|
|
|
|
fmt_yellow = logger.fmt_yellow
|
|
|
|
fmt_red = logger.fmt_red
|
|
|
|
fmt_purple = logger.fmt_purple
|
|
|
|
else:
|
|
|
|
fmt_end = ''
|
|
|
|
fmt_bold = ''
|
|
|
|
fmt_blue = ''
|
2020-05-08 18:15:02 -04:00
|
|
|
fmt_cyan = ''
|
2019-07-10 21:39:25 -04:00
|
|
|
fmt_green = ''
|
|
|
|
fmt_yellow = ''
|
|
|
|
fmt_red = ''
|
|
|
|
fmt_purple = ''
|
|
|
|
|
2018-10-14 02:58:02 -04:00
|
|
|
###############################################################################
|
2021-06-15 22:42:59 -04:00
|
|
|
# PHASE 2a - Activate SR-IOV support
|
|
|
|
###############################################################################
|
|
|
|
|
Implement SR-IOV PF and VF instances
Adds support for the node daemon managing SR-IOV PF and VF instances.
PFs are added to Zookeeper automatically based on the config at startup
during network configuration, and are otherwise completely static. PFs
are automatically removed from Zookeeper, along with all coresponding
VFs, should the PF phy device be removed from the configuration.
VFs are configured based on the (autocreated) VFs of each PF device,
added to Zookeeper, and then a new class instance, SRIOVVFInstance, is
used to watch them for configuration changes. This will enable the
runtime management of VF settings by the API. The set of keys ensures
that both configuration and details of the NIC can be tracked.
Most keys are self-explanatory, especially for PFs and the basic keys
for VFs. The configuration tree is also self-explanatory, being based
entirely on the options available in the `ip link set {dev} vf` command.
Two additional keys are also present: `used` and `used_by`, which will
be able to track the (boolean) state of usage, as well as the VM that
uses a given VIF. Since the VM side implementation will support both
macvtap and direct "hostdev" assignments, this will ensure that this
state can be tracked on both the VF and the VM side.
2021-06-17 01:01:23 -04:00
|
|
|
# This happens before other networking steps to enable using VFs for cluster functions.
|
2021-06-15 22:42:59 -04:00
|
|
|
if enable_networking and enable_sriov:
|
|
|
|
logger.out('Setting up SR-IOV device support', state='i')
|
|
|
|
# Enable unsafe interruptts for the vfio_iommu_type1 kernel module
|
|
|
|
try:
|
|
|
|
common.run_os_command('modprobe vfio_iommu_type1 allow_unsafe_interrupts=1')
|
|
|
|
with open('/sys/module/vfio_iommu_type1/parameters/allow_unsafe_interrupts', 'w') as mfh:
|
|
|
|
mfh.write('Y')
|
|
|
|
except Exception:
|
|
|
|
logger.out('Failed to enable kernel modules; SR-IOV may fail.', state='w')
|
|
|
|
|
|
|
|
# Loop through our SR-IOV NICs and enable the numvfs for each
|
|
|
|
for device in config['sriov_device']:
|
|
|
|
logger.out('Preparing SR-IOV PF {} with {} VFs'.format(device['phy'], device['vfcount']), state='i')
|
|
|
|
try:
|
|
|
|
with open('/sys/class/net/{}/device/sriov_numvfs'.format(device['phy']), 'r') as vfh:
|
|
|
|
current_sriov_count = vfh.read().strip()
|
|
|
|
with open('/sys/class/net/{}/device/sriov_numvfs'.format(device['phy']), 'w') as vfh:
|
|
|
|
vfh.write(str(device['vfcount']))
|
|
|
|
except FileNotFoundError:
|
|
|
|
logger.out('Failed to open SR-IOV configuration for PF {}; device may not support SR-IOV.'.format(device), state='w')
|
|
|
|
except OSError:
|
|
|
|
logger.out('Failed to set SR-IOV VF count for PF {} to {}; already set to {}.'.format(device['phy'], device['vfcount'], current_sriov_count), state='w')
|
|
|
|
|
|
|
|
if device.get('mtu', None) is not None:
|
|
|
|
logger.out('Setting SR-IOV PF {} to MTU {}'.format(device['phy'], device['mtu']), state='i')
|
|
|
|
common.run_os_command('ip link set {} mtu {} up'.format(device['phy'], device['mtu']))
|
|
|
|
|
|
|
|
|
|
|
|
###############################################################################
|
|
|
|
# PHASE 2b - Create local IP addresses for static networks
|
2019-03-17 00:53:11 -04:00
|
|
|
###############################################################################
|
|
|
|
|
|
|
|
if enable_networking:
|
|
|
|
# VNI configuration
|
|
|
|
vni_dev = config['vni_dev']
|
2019-06-17 23:28:02 -04:00
|
|
|
vni_mtu = config['vni_mtu']
|
2019-03-17 00:53:11 -04:00
|
|
|
vni_dev_ip = config['vni_dev_ip']
|
2020-01-12 19:04:31 -05:00
|
|
|
logger.out('Setting up VNI network interface {} with MTU {}'.format(vni_dev, vni_mtu), state='i')
|
2019-06-17 23:28:02 -04:00
|
|
|
common.run_os_command('ip link set {} mtu {} up'.format(vni_dev, vni_mtu))
|
2019-03-17 00:53:11 -04:00
|
|
|
|
|
|
|
# Cluster bridge configuration
|
2019-05-21 23:19:19 -04:00
|
|
|
logger.out('Setting up Cluster network bridge on interface {} with IP {}'.format(vni_dev, vni_dev_ip), state='i')
|
2019-03-17 00:53:11 -04:00
|
|
|
common.run_os_command('brctl addbr brcluster')
|
|
|
|
common.run_os_command('brctl addif brcluster {}'.format(vni_dev))
|
2019-06-17 23:28:02 -04:00
|
|
|
common.run_os_command('ip link set brcluster mtu {} up'.format(vni_mtu))
|
2019-03-17 00:53:11 -04:00
|
|
|
common.run_os_command('ip address add {} dev {}'.format(vni_dev_ip, 'brcluster'))
|
|
|
|
|
|
|
|
# Storage configuration
|
|
|
|
storage_dev = config['storage_dev']
|
2019-06-17 23:28:02 -04:00
|
|
|
storage_mtu = config['storage_mtu']
|
2019-03-17 00:53:11 -04:00
|
|
|
storage_dev_ip = config['storage_dev_ip']
|
2020-01-12 19:04:31 -05:00
|
|
|
logger.out('Setting up Storage network interface {} with MTU {}'.format(storage_dev, vni_mtu), state='i')
|
2019-06-17 23:28:02 -04:00
|
|
|
common.run_os_command('ip link set {} mtu {} up'.format(storage_dev, storage_mtu))
|
2020-01-12 19:04:31 -05:00
|
|
|
|
|
|
|
# Storage bridge configuration
|
|
|
|
if storage_dev == vni_dev:
|
|
|
|
logger.out('Adding Storage network IP {} to VNI Cluster bridge brcluster'.format(storage_dev_ip), state='i')
|
|
|
|
common.run_os_command('ip address add {} dev {}'.format(storage_dev_ip, 'brcluster'))
|
|
|
|
else:
|
|
|
|
logger.out('Setting up Storage network bridge on interface {} with IP {}'.format(vni_dev, vni_dev_ip), state='i')
|
|
|
|
common.run_os_command('brctl addbr brstorage')
|
|
|
|
common.run_os_command('brctl addif brstorage {}'.format(storage_dev))
|
|
|
|
common.run_os_command('ip link set brstorage mtu {} up'.format(storage_mtu))
|
|
|
|
common.run_os_command('ip address add {} dev {}'.format(storage_dev_ip, 'brstorage'))
|
2019-03-17 00:53:11 -04:00
|
|
|
|
|
|
|
# Upstream configuration
|
2020-01-12 19:04:31 -05:00
|
|
|
upstream_dev = config['upstream_dev']
|
|
|
|
upstream_mtu = config['upstream_mtu']
|
|
|
|
upstream_dev_ip = config['upstream_dev_ip']
|
|
|
|
logger.out('Setting up Upstream network interface {} with MTU {}'.format(upstream_dev, upstream_mtu), state='i')
|
|
|
|
common.run_os_command('ip link set {} mtu {} up'.format(upstream_dev, upstream_mtu))
|
|
|
|
|
|
|
|
# Upstream bridge configuration
|
|
|
|
if upstream_dev == vni_dev:
|
|
|
|
logger.out('Adding Upstream network IP {} to VNI Cluster bridge brcluster'.format(upstream_dev_ip), state='i')
|
|
|
|
common.run_os_command('ip address add {} dev {}'.format(upstream_dev_ip, 'brcluster'))
|
|
|
|
else:
|
|
|
|
logger.out('Setting up Upstream network bridge on interface {} with IP {}'.format(vni_dev, vni_dev_ip), state='i')
|
|
|
|
common.run_os_command('brctl addbr brupstream')
|
|
|
|
common.run_os_command('brctl addif brupstream {}'.format(upstream_dev))
|
|
|
|
common.run_os_command('ip link set brupstream mtu {} up'.format(upstream_mtu))
|
|
|
|
common.run_os_command('ip address add {} dev {}'.format(upstream_dev_ip, 'brupstream'))
|
|
|
|
|
|
|
|
# Add upstream default gateway
|
|
|
|
upstream_gateway = config.get('upstream_gateway', None)
|
|
|
|
if upstream_gateway:
|
|
|
|
logger.out('Setting up Upstream default gateway IP {}'.format(upstream_gateway), state='i')
|
|
|
|
if upstream_dev == vni_dev:
|
|
|
|
common.run_os_command('ip route add default via {} dev {}'.format(upstream_gateway, 'brcluster'))
|
|
|
|
else:
|
|
|
|
common.run_os_command('ip route add default via {} dev {}'.format(upstream_gateway, 'brupstream'))
|
2019-03-17 00:53:11 -04:00
|
|
|
|
2019-03-17 20:05:58 -04:00
|
|
|
###############################################################################
|
2021-06-15 22:42:59 -04:00
|
|
|
# PHASE 2c - Prepare sysctl for pvcnoded
|
2019-03-17 20:05:58 -04:00
|
|
|
###############################################################################
|
|
|
|
|
|
|
|
if enable_networking:
|
|
|
|
# Enable routing functions
|
|
|
|
common.run_os_command('sysctl net.ipv4.ip_forward=1')
|
|
|
|
common.run_os_command('sysctl net.ipv6.ip_forward=1')
|
2019-06-18 12:36:53 -04:00
|
|
|
|
2019-03-17 20:05:58 -04:00
|
|
|
# Send redirects
|
|
|
|
common.run_os_command('sysctl net.ipv4.conf.all.send_redirects=1')
|
|
|
|
common.run_os_command('sysctl net.ipv4.conf.default.send_redirects=1')
|
|
|
|
common.run_os_command('sysctl net.ipv6.conf.all.send_redirects=1')
|
|
|
|
common.run_os_command('sysctl net.ipv6.conf.default.send_redirects=1')
|
2019-06-18 12:36:53 -04:00
|
|
|
|
2019-03-17 20:05:58 -04:00
|
|
|
# Accept source routes
|
|
|
|
common.run_os_command('sysctl net.ipv4.conf.all.accept_source_route=1')
|
|
|
|
common.run_os_command('sysctl net.ipv4.conf.default.accept_source_route=1')
|
|
|
|
common.run_os_command('sysctl net.ipv6.conf.all.accept_source_route=1')
|
|
|
|
common.run_os_command('sysctl net.ipv6.conf.default.accept_source_route=1')
|
2019-06-18 12:36:53 -04:00
|
|
|
|
2020-01-12 19:04:31 -05:00
|
|
|
# Disable RP filtering on the VNI Cluster and Upstream interfaces (to allow traffic pivoting)
|
2019-03-20 12:01:26 -04:00
|
|
|
common.run_os_command('sysctl net.ipv4.conf.{}.rp_filter=0'.format(config['vni_dev']))
|
|
|
|
common.run_os_command('sysctl net.ipv4.conf.{}.rp_filter=0'.format(config['upstream_dev']))
|
2019-03-17 20:05:58 -04:00
|
|
|
common.run_os_command('sysctl net.ipv4.conf.brcluster.rp_filter=0')
|
2020-01-12 19:04:31 -05:00
|
|
|
common.run_os_command('sysctl net.ipv4.conf.brupstream.rp_filter=0')
|
2019-03-20 12:01:26 -04:00
|
|
|
common.run_os_command('sysctl net.ipv6.conf.{}.rp_filter=0'.format(config['vni_dev']))
|
|
|
|
common.run_os_command('sysctl net.ipv6.conf.{}.rp_filter=0'.format(config['upstream_dev']))
|
2019-03-17 20:05:58 -04:00
|
|
|
common.run_os_command('sysctl net.ipv6.conf.brcluster.rp_filter=0')
|
2020-01-12 19:04:31 -05:00
|
|
|
common.run_os_command('sysctl net.ipv6.conf.brupstream.rp_filter=0')
|
2019-03-17 20:05:58 -04:00
|
|
|
|
2019-03-17 00:53:11 -04:00
|
|
|
###############################################################################
|
2019-03-17 01:45:17 -04:00
|
|
|
# PHASE 3a - Determine coordinator mode
|
2018-10-14 02:01:35 -04:00
|
|
|
###############################################################################
|
|
|
|
|
|
|
|
# What is the list of coordinator hosts
|
2019-03-11 01:44:26 -04:00
|
|
|
coordinator_nodes = config['coordinators']
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2018-10-30 09:17:41 -04:00
|
|
|
if myhostname in coordinator_nodes:
|
2018-10-14 02:01:35 -04:00
|
|
|
# We are indeed a coordinator host
|
|
|
|
config['daemon_mode'] = 'coordinator'
|
|
|
|
# Start the zookeeper service using systemctl
|
2019-07-10 21:39:25 -04:00
|
|
|
logger.out('Node is a ' + fmt_blue + 'coordinator' + fmt_end, state='i')
|
2018-10-14 02:01:35 -04:00
|
|
|
else:
|
|
|
|
config['daemon_mode'] = 'hypervisor'
|
|
|
|
|
2019-03-17 01:45:17 -04:00
|
|
|
###############################################################################
|
|
|
|
# PHASE 3b - Start system daemons
|
|
|
|
###############################################################################
|
|
|
|
if config['daemon_mode'] == 'coordinator':
|
|
|
|
logger.out('Starting Zookeeper daemon', state='i')
|
|
|
|
common.run_os_command('systemctl start zookeeper.service')
|
|
|
|
|
|
|
|
if enable_hypervisor:
|
|
|
|
logger.out('Starting Libvirt daemon', state='i')
|
|
|
|
common.run_os_command('systemctl start libvirtd.service')
|
|
|
|
|
|
|
|
if enable_networking:
|
|
|
|
if config['daemon_mode'] == 'coordinator':
|
2019-05-20 22:40:07 -04:00
|
|
|
logger.out('Starting Patroni daemon', state='i')
|
|
|
|
common.run_os_command('systemctl start patroni.service')
|
2019-03-17 01:45:17 -04:00
|
|
|
logger.out('Starting FRRouting daemon', state='i')
|
|
|
|
common.run_os_command('systemctl start frr.service')
|
|
|
|
|
|
|
|
if enable_storage:
|
|
|
|
if config['daemon_mode'] == 'coordinator':
|
|
|
|
logger.out('Starting Ceph monitor daemon', state='i')
|
|
|
|
common.run_os_command('systemctl start ceph-mon@{}'.format(myhostname))
|
|
|
|
logger.out('Starting Ceph manager daemon', state='i')
|
|
|
|
common.run_os_command('systemctl start ceph-mgr@{}'.format(myhostname))
|
|
|
|
|
2019-06-17 21:56:06 -04:00
|
|
|
logger.out('Waiting 5s for daemons to start', state='s')
|
|
|
|
time.sleep(5)
|
2019-03-17 01:45:17 -04:00
|
|
|
|
2018-10-14 02:01:35 -04:00
|
|
|
###############################################################################
|
2019-03-17 00:53:11 -04:00
|
|
|
# PHASE 4 - Attempt to connect to the coordinators and start zookeeper client
|
2018-10-14 02:01:35 -04:00
|
|
|
###############################################################################
|
|
|
|
|
2021-05-30 14:48:41 -04:00
|
|
|
# Create an instance of the handler
|
|
|
|
zkhandler = ZKHandler(config, logger=logger)
|
|
|
|
|
2018-10-14 02:01:35 -04:00
|
|
|
try:
|
2018-10-30 09:17:41 -04:00
|
|
|
logger.out('Connecting to Zookeeper cluster nodes {}'.format(config['coordinators']), state='i')
|
2018-10-14 02:01:35 -04:00
|
|
|
# Start connection
|
2021-05-30 14:48:41 -04:00
|
|
|
zkhandler.connect(persistent=True)
|
2018-10-14 02:01:35 -04:00
|
|
|
except Exception as e:
|
|
|
|
logger.out('ERROR: Failed to connect to Zookeeper cluster: {}'.format(e), state='e')
|
|
|
|
exit(1)
|
|
|
|
|
2021-06-08 22:19:47 -04:00
|
|
|
logger.out('Validating Zookeeper schema', state='i')
|
|
|
|
|
2021-06-08 22:31:22 -04:00
|
|
|
try:
|
2021-06-09 13:28:31 -04:00
|
|
|
node_schema_version = int(zkhandler.read(('node.data.active_schema', myhostname)))
|
2021-06-08 22:31:22 -04:00
|
|
|
except Exception:
|
2021-06-30 09:40:33 -04:00
|
|
|
node_schema_version = int(zkhandler.read('base.schema.version'))
|
2021-06-14 21:06:15 -04:00
|
|
|
if node_schema_version is None:
|
|
|
|
node_schema_version = 0
|
2021-06-08 22:31:22 -04:00
|
|
|
zkhandler.write([
|
2021-06-09 13:28:31 -04:00
|
|
|
(('node.data.active_schema', myhostname), node_schema_version)
|
2021-06-08 22:31:22 -04:00
|
|
|
])
|
|
|
|
|
2021-06-08 23:17:07 -04:00
|
|
|
# Load in the current node schema version
|
2021-06-09 13:28:31 -04:00
|
|
|
zkhandler.schema.load(node_schema_version)
|
2021-06-08 22:19:47 -04:00
|
|
|
|
2021-06-08 23:17:07 -04:00
|
|
|
# Record the latest intalled schema version
|
2021-06-09 13:28:31 -04:00
|
|
|
latest_schema_version = zkhandler.schema.find_latest()
|
2021-06-14 12:52:43 -04:00
|
|
|
logger.out('Latest installed schema is {}'.format(latest_schema_version), state='i')
|
2021-06-08 23:17:07 -04:00
|
|
|
zkhandler.write([
|
2021-06-09 13:28:31 -04:00
|
|
|
(('node.data.latest_schema', myhostname), latest_schema_version)
|
2021-06-08 23:17:07 -04:00
|
|
|
])
|
|
|
|
|
2020-11-07 14:45:24 -05:00
|
|
|
|
2021-06-08 23:17:07 -04:00
|
|
|
# Watch for a global schema update and fire
|
|
|
|
# This will only change by the API when triggered after seeing all nodes can update
|
2021-06-09 13:28:31 -04:00
|
|
|
@zkhandler.zk_conn.DataWatch(zkhandler.schema.path('base.schema.version'))
|
2021-06-08 23:17:07 -04:00
|
|
|
def update_schema(new_schema_version, stat, event=''):
|
2021-06-09 13:28:31 -04:00
|
|
|
global zkhandler, update_timer, node_schema_version
|
2021-06-08 23:17:07 -04:00
|
|
|
|
2021-06-14 21:06:15 -04:00
|
|
|
try:
|
|
|
|
new_schema_version = int(new_schema_version.decode('ascii'))
|
|
|
|
except Exception:
|
|
|
|
new_schema_version = 0
|
2021-06-08 23:17:07 -04:00
|
|
|
|
|
|
|
if new_schema_version == node_schema_version:
|
2021-06-14 17:10:21 -04:00
|
|
|
return True
|
2021-06-08 23:17:07 -04:00
|
|
|
|
|
|
|
logger.out('Hot update of schema version started', state='s')
|
2021-06-14 17:10:21 -04:00
|
|
|
logger.out('Current version: {} New version: {}'.format(node_schema_version, new_schema_version), state='s')
|
2021-06-08 23:17:07 -04:00
|
|
|
|
|
|
|
# Prevent any keepalive updates while this happens
|
|
|
|
if update_timer is not None:
|
|
|
|
stopKeepaliveTimer()
|
2021-06-09 00:03:26 -04:00
|
|
|
time.sleep(1)
|
2021-06-08 23:17:07 -04:00
|
|
|
|
|
|
|
# Perform the migration (primary only)
|
2021-06-09 13:28:31 -04:00
|
|
|
if zkhandler.read('base.config.primary_node') == myhostname:
|
2021-06-08 23:17:07 -04:00
|
|
|
logger.out('Primary node acquiring exclusive lock', state='s')
|
|
|
|
# Wait for things to settle
|
|
|
|
time.sleep(0.5)
|
|
|
|
# Acquire a write lock on the root key
|
2021-06-09 13:28:31 -04:00
|
|
|
with zkhandler.exclusivelock('base.schema.version'):
|
2021-06-08 23:17:07 -04:00
|
|
|
# Perform the schema migration tasks
|
|
|
|
logger.out('Performing schema update', state='s')
|
2021-06-09 00:03:26 -04:00
|
|
|
if new_schema_version > node_schema_version:
|
2021-06-09 13:28:31 -04:00
|
|
|
zkhandler.schema.migrate(zkhandler, new_schema_version)
|
2021-06-09 00:03:26 -04:00
|
|
|
if new_schema_version < node_schema_version:
|
2021-06-09 13:28:31 -04:00
|
|
|
zkhandler.schema.rollback(zkhandler, new_schema_version)
|
2021-06-08 23:17:07 -04:00
|
|
|
# Wait for the exclusive lock to be lifted
|
|
|
|
else:
|
|
|
|
logger.out('Non-primary node acquiring read lock', state='s')
|
|
|
|
# Wait for things to settle
|
|
|
|
time.sleep(1)
|
|
|
|
# Wait for a read lock
|
2021-06-09 13:28:31 -04:00
|
|
|
lock = zkhandler.readlock('base.schema.version')
|
2021-06-08 23:17:07 -04:00
|
|
|
lock.acquire()
|
|
|
|
# Wait a bit more for the primary to return to normal
|
|
|
|
time.sleep(1)
|
|
|
|
|
|
|
|
# Update the local schema version
|
2021-06-14 17:10:21 -04:00
|
|
|
logger.out('Updating node target schema version', state='s')
|
2021-06-08 23:17:07 -04:00
|
|
|
zkhandler.write([
|
2021-06-09 13:28:31 -04:00
|
|
|
(('node.data.active_schema', myhostname), new_schema_version)
|
2021-06-08 23:17:07 -04:00
|
|
|
])
|
2021-06-09 00:03:26 -04:00
|
|
|
node_schema_version = new_schema_version
|
2021-06-08 23:17:07 -04:00
|
|
|
|
|
|
|
# Restart the API daemons if applicable
|
2021-06-14 17:10:21 -04:00
|
|
|
logger.out('Restarting services', state='s')
|
|
|
|
common.run_os_command('systemctl restart pvcapid-worker.service')
|
2021-06-09 13:28:31 -04:00
|
|
|
if zkhandler.read('base.config.primary_node') == myhostname:
|
2021-06-14 17:10:21 -04:00
|
|
|
common.run_os_command('systemctl restart pvcapid.service')
|
2021-06-08 23:17:07 -04:00
|
|
|
|
2021-06-14 17:10:21 -04:00
|
|
|
# Restart ourselves with the new schema
|
|
|
|
logger.out('Reloading node daemon', state='s')
|
2021-06-14 12:52:43 -04:00
|
|
|
try:
|
|
|
|
zkhandler.disconnect()
|
|
|
|
del zkhandler
|
|
|
|
except Exception:
|
|
|
|
pass
|
2021-06-14 17:10:21 -04:00
|
|
|
os.execv(sys.argv[0], sys.argv)
|
2021-06-14 12:52:43 -04:00
|
|
|
|
2021-06-08 23:17:07 -04:00
|
|
|
|
|
|
|
# If we are the last node to get a schema update, fire the master update
|
|
|
|
if latest_schema_version > node_schema_version:
|
|
|
|
node_latest_schema_version = list()
|
2021-06-09 13:28:31 -04:00
|
|
|
for node in zkhandler.children('base.node'):
|
|
|
|
node_latest_schema_version.append(int(zkhandler.read(('node.data.latest_schema', node))))
|
2021-06-08 23:17:07 -04:00
|
|
|
|
|
|
|
# This is true if all elements of the latest schema version are identical to the latest version,
|
|
|
|
# i.e. they have all had the latest schema installed and ready to load.
|
|
|
|
if node_latest_schema_version.count(latest_schema_version) == len(node_latest_schema_version):
|
|
|
|
zkhandler.write([
|
2021-06-09 13:28:31 -04:00
|
|
|
('base.schema.version', latest_schema_version)
|
2021-06-08 23:17:07 -04:00
|
|
|
])
|
|
|
|
|
2021-06-14 12:52:43 -04:00
|
|
|
# Validate our schema against the active version
|
|
|
|
if not zkhandler.schema.validate(zkhandler, logger):
|
|
|
|
logger.out('Found schema violations, applying', state='i')
|
|
|
|
zkhandler.schema.apply(zkhandler)
|
|
|
|
else:
|
|
|
|
logger.out('Schema successfully validated', state='o')
|
|
|
|
|
|
|
|
|
2018-10-14 02:01:35 -04:00
|
|
|
###############################################################################
|
2019-03-17 00:53:11 -04:00
|
|
|
# PHASE 5 - Gracefully handle termination
|
2018-10-14 02:01:35 -04:00
|
|
|
###############################################################################
|
|
|
|
|
2020-11-07 14:45:24 -05:00
|
|
|
|
2018-10-14 02:01:35 -04:00
|
|
|
# Cleanup function
|
|
|
|
def cleanup():
|
2021-06-09 13:28:31 -04:00
|
|
|
global zkhandler, update_timer, d_domain
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2020-04-08 21:58:19 -04:00
|
|
|
logger.out('Terminating pvcnoded and cleaning up', state='s')
|
2019-05-23 23:18:43 -04:00
|
|
|
|
2019-06-19 14:52:47 -04:00
|
|
|
# Set shutdown state in Zookeeper
|
2021-05-30 14:48:41 -04:00
|
|
|
zkhandler.write([
|
2021-06-09 13:28:31 -04:00
|
|
|
(('node.state.daemon', myhostname), 'shutdown')
|
2021-05-30 14:48:41 -04:00
|
|
|
])
|
2019-06-19 14:52:47 -04:00
|
|
|
|
2020-04-08 21:58:19 -04:00
|
|
|
# Waiting for any flushes to complete
|
2020-04-08 22:00:51 -04:00
|
|
|
logger.out('Waiting for any active flushes', state='s')
|
2020-04-08 21:58:19 -04:00
|
|
|
while this_node.flush_thread is not None:
|
|
|
|
time.sleep(0.5)
|
|
|
|
|
2019-04-11 19:06:06 -04:00
|
|
|
# Stop console logging on all VMs
|
|
|
|
logger.out('Stopping domain console watchers', state='s')
|
|
|
|
for domain in d_domain:
|
|
|
|
if d_domain[domain].getnode() == myhostname:
|
|
|
|
try:
|
|
|
|
d_domain[domain].console_log_instance.stop()
|
2020-11-06 21:13:13 -05:00
|
|
|
except NameError:
|
2019-04-11 19:06:06 -04:00
|
|
|
pass
|
2020-11-06 21:13:13 -05:00
|
|
|
except AttributeError:
|
2019-04-11 19:06:06 -04:00
|
|
|
pass
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2020-04-08 21:58:19 -04:00
|
|
|
# Force into secondary coordinator state if needed
|
2020-02-19 13:18:38 -05:00
|
|
|
try:
|
|
|
|
if this_node.router_state == 'primary':
|
2021-05-30 14:48:41 -04:00
|
|
|
zkhandler.write([
|
2021-06-09 13:28:31 -04:00
|
|
|
('base.config.primary_node', 'none')
|
2021-05-30 14:48:41 -04:00
|
|
|
])
|
2020-02-19 13:18:38 -05:00
|
|
|
logger.out('Waiting for primary migration', state='s')
|
|
|
|
while this_node.router_state != 'secondary':
|
2020-04-08 21:58:19 -04:00
|
|
|
time.sleep(0.5)
|
2020-11-06 18:55:10 -05:00
|
|
|
except Exception:
|
2020-02-19 13:18:38 -05:00
|
|
|
pass
|
2018-10-22 20:20:27 -04:00
|
|
|
|
2020-04-12 03:49:29 -04:00
|
|
|
# Stop keepalive thread
|
|
|
|
try:
|
|
|
|
stopKeepaliveTimer()
|
|
|
|
except NameError:
|
|
|
|
pass
|
|
|
|
except AttributeError:
|
|
|
|
pass
|
|
|
|
|
|
|
|
logger.out('Performing final keepalive update', state='s')
|
2020-06-06 12:48:44 -04:00
|
|
|
node_keepalive()
|
2020-04-12 03:49:29 -04:00
|
|
|
|
2018-10-22 20:20:27 -04:00
|
|
|
# Set stop state in Zookeeper
|
2021-05-30 14:48:41 -04:00
|
|
|
zkhandler.write([
|
2021-06-09 13:28:31 -04:00
|
|
|
(('node.state.daemon', myhostname), 'stop')
|
2021-05-30 14:48:41 -04:00
|
|
|
])
|
2018-10-22 20:20:27 -04:00
|
|
|
|
|
|
|
# Forcibly terminate dnsmasq because it gets stuck sometimes
|
|
|
|
common.run_os_command('killall dnsmasq')
|
2018-10-14 02:01:35 -04:00
|
|
|
|
|
|
|
# Close the Zookeeper connection
|
|
|
|
try:
|
2021-05-30 14:48:41 -04:00
|
|
|
zkhandler.disconnect()
|
|
|
|
del zkhandler
|
2020-11-06 18:55:10 -05:00
|
|
|
except Exception:
|
2018-10-14 02:01:35 -04:00
|
|
|
pass
|
|
|
|
|
2018-10-25 22:21:40 -04:00
|
|
|
logger.out('Terminated pvc daemon', state='s')
|
2021-06-08 23:17:07 -04:00
|
|
|
os._exit(0)
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2020-11-07 14:45:24 -05:00
|
|
|
|
2018-10-14 02:01:35 -04:00
|
|
|
# Termination function
|
|
|
|
def term(signum='', frame=''):
|
2019-04-11 19:06:06 -04:00
|
|
|
cleanup()
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2020-11-07 14:45:24 -05:00
|
|
|
|
2019-07-10 22:20:24 -04:00
|
|
|
# Hangup (logrotate) function
|
|
|
|
def hup(signum='', frame=''):
|
|
|
|
if config['file_logging']:
|
|
|
|
logger.hup()
|
|
|
|
|
2020-11-07 13:17:49 -05:00
|
|
|
|
2018-10-14 02:01:35 -04:00
|
|
|
# Handle signals gracefully
|
|
|
|
signal.signal(signal.SIGTERM, term)
|
|
|
|
signal.signal(signal.SIGINT, term)
|
|
|
|
signal.signal(signal.SIGQUIT, term)
|
2019-07-10 22:20:24 -04:00
|
|
|
signal.signal(signal.SIGHUP, hup)
|
2018-10-14 02:01:35 -04:00
|
|
|
|
|
|
|
###############################################################################
|
2019-03-17 00:53:11 -04:00
|
|
|
# PHASE 6 - Prepare host in Zookeeper
|
2018-10-14 02:01:35 -04:00
|
|
|
###############################################################################
|
|
|
|
|
|
|
|
# Check if our node exists in Zookeeper, and create it if not
|
2021-05-31 19:26:27 -04:00
|
|
|
if config['daemon_mode'] == 'coordinator':
|
|
|
|
init_routerstate = 'secondary'
|
|
|
|
else:
|
|
|
|
init_routerstate = 'client'
|
2021-07-05 09:57:38 -04:00
|
|
|
|
2021-06-09 13:28:31 -04:00
|
|
|
if zkhandler.exists(('node', myhostname)):
|
2019-07-10 21:39:25 -04:00
|
|
|
logger.out("Node is " + fmt_green + "present" + fmt_end + " in Zookeeper", state='i')
|
2018-10-14 02:01:35 -04:00
|
|
|
# Update static data just in case it's changed
|
2021-05-30 14:48:41 -04:00
|
|
|
zkhandler.write([
|
2021-07-05 23:39:23 -04:00
|
|
|
(('node', myhostname), config['daemon_mode']),
|
2021-06-09 13:28:31 -04:00
|
|
|
(('node.mode', myhostname), config['daemon_mode']),
|
|
|
|
(('node.state.daemon', myhostname), 'init'),
|
|
|
|
(('node.state.router', myhostname), init_routerstate),
|
|
|
|
(('node.data.static', myhostname), ' '.join(staticdata)),
|
2021-07-05 09:57:38 -04:00
|
|
|
(('node.data.pvc_version', myhostname), version),
|
2021-06-09 13:28:31 -04:00
|
|
|
(('node.ipmi.hostname', myhostname), config['ipmi_hostname']),
|
|
|
|
(('node.ipmi.username', myhostname), config['ipmi_username']),
|
|
|
|
(('node.ipmi.password', myhostname), config['ipmi_password']),
|
2021-05-30 14:48:41 -04:00
|
|
|
])
|
2018-10-14 02:01:35 -04:00
|
|
|
else:
|
2019-07-10 21:39:25 -04:00
|
|
|
logger.out("Node is " + fmt_red + "absent" + fmt_end + " in Zookeeper; adding new node", state='i')
|
2018-10-14 02:01:35 -04:00
|
|
|
keepalive_time = int(time.time())
|
2021-05-30 14:48:41 -04:00
|
|
|
zkhandler.write([
|
2021-06-09 13:28:31 -04:00
|
|
|
(('node', myhostname), config['daemon_mode']),
|
|
|
|
(('node.keepalive', myhostname), str(keepalive_time)),
|
|
|
|
(('node.mode', myhostname), config['daemon_mode']),
|
|
|
|
(('node.state.daemon', myhostname), 'init'),
|
|
|
|
(('node.state.domain', myhostname), 'flushed'),
|
|
|
|
(('node.state.router', myhostname), init_routerstate),
|
|
|
|
(('node.data.static', myhostname), ' '.join(staticdata)),
|
2021-07-05 09:57:38 -04:00
|
|
|
(('node.data.pvc_version', myhostname), version),
|
2021-06-09 13:28:31 -04:00
|
|
|
(('node.ipmi.hostname', myhostname), config['ipmi_hostname']),
|
|
|
|
(('node.ipmi.username', myhostname), config['ipmi_username']),
|
|
|
|
(('node.ipmi.password', myhostname), config['ipmi_password']),
|
|
|
|
(('node.memory.total', myhostname), '0'),
|
|
|
|
(('node.memory.used', myhostname), '0'),
|
|
|
|
(('node.memory.free', myhostname), '0'),
|
|
|
|
(('node.memory.allocated', myhostname), '0'),
|
|
|
|
(('node.memory.provisioned', myhostname), '0'),
|
|
|
|
(('node.vcpu.allocated', myhostname), '0'),
|
|
|
|
(('node.cpu.load', myhostname), '0.0'),
|
|
|
|
(('node.running_domains', myhostname), '0'),
|
|
|
|
(('node.count.provisioned_domains', myhostname), '0'),
|
|
|
|
(('node.count.networks', myhostname), '0'),
|
2021-05-30 14:48:41 -04:00
|
|
|
])
|
2018-10-14 02:01:35 -04:00
|
|
|
|
|
|
|
# Check that the primary key exists, and create it with us as master if not
|
2019-03-11 01:44:26 -04:00
|
|
|
try:
|
2021-06-09 13:28:31 -04:00
|
|
|
current_primary = zkhandler.read('base.config.primary_node')
|
2019-03-11 01:44:26 -04:00
|
|
|
except kazoo.exceptions.NoNodeError:
|
|
|
|
current_primary = 'none'
|
|
|
|
|
2018-10-14 02:01:35 -04:00
|
|
|
if current_primary and current_primary != 'none':
|
2019-07-10 21:39:25 -04:00
|
|
|
logger.out('Current primary node is {}{}{}.'.format(fmt_blue, current_primary, fmt_end), state='i')
|
2018-10-14 02:01:35 -04:00
|
|
|
else:
|
2018-10-27 18:04:55 -04:00
|
|
|
if config['daemon_mode'] == 'coordinator':
|
2021-06-09 13:28:31 -04:00
|
|
|
logger.out('No primary node found; setting us as primary.', state='i')
|
2021-05-30 14:48:41 -04:00
|
|
|
zkhandler.write([
|
2021-06-09 13:28:31 -04:00
|
|
|
('base.config.primary_node', myhostname)
|
2021-05-30 14:48:41 -04:00
|
|
|
])
|
2018-10-14 02:01:35 -04:00
|
|
|
|
|
|
|
###############################################################################
|
2020-08-13 14:38:05 -04:00
|
|
|
# PHASE 7a - Ensure IPMI is reachable and working
|
|
|
|
###############################################################################
|
|
|
|
if not fencing.verifyIPMI(config['ipmi_hostname'], config['ipmi_username'], config['ipmi_password']):
|
|
|
|
logger.out('Our IPMI is not reachable; fencing of this node will likely fail', state='w')
|
|
|
|
|
|
|
|
###############################################################################
|
|
|
|
# PHASE 7b - Ensure Libvirt is working
|
2018-10-14 02:01:35 -04:00
|
|
|
###############################################################################
|
|
|
|
|
2019-03-11 01:44:26 -04:00
|
|
|
if enable_hypervisor:
|
|
|
|
# Check that libvirtd is listening TCP
|
2019-06-25 22:09:32 -04:00
|
|
|
libvirt_check_name = "qemu+tcp://{}:16509/system".format(myhostname)
|
2019-03-11 01:44:26 -04:00
|
|
|
logger.out('Connecting to Libvirt daemon at {}'.format(libvirt_check_name), state='i')
|
|
|
|
try:
|
|
|
|
lv_conn = libvirt.open(libvirt_check_name)
|
|
|
|
lv_conn.close()
|
|
|
|
except Exception as e:
|
|
|
|
logger.out('ERROR: Failed to connect to Libvirt daemon: {}'.format(e), state='e')
|
|
|
|
exit(1)
|
2018-10-14 02:01:35 -04:00
|
|
|
|
|
|
|
###############################################################################
|
|
|
|
# PHASE 7c - Ensure NFT is running on the local host
|
|
|
|
###############################################################################
|
|
|
|
|
2019-03-11 01:44:26 -04:00
|
|
|
if enable_networking:
|
|
|
|
logger.out("Creating NFT firewall configuration", state='i')
|
|
|
|
|
|
|
|
# Create our config dirs
|
|
|
|
common.run_os_command(
|
|
|
|
'/bin/mkdir --parents {}/networks'.format(
|
|
|
|
config['nft_dynamic_directory']
|
2020-11-07 14:58:13 -05:00
|
|
|
)
|
2019-03-11 01:44:26 -04:00
|
|
|
)
|
|
|
|
common.run_os_command(
|
|
|
|
'/bin/mkdir --parents {}/static'.format(
|
|
|
|
config['nft_dynamic_directory']
|
2020-11-07 14:58:13 -05:00
|
|
|
)
|
2019-03-11 01:44:26 -04:00
|
|
|
)
|
|
|
|
common.run_os_command(
|
|
|
|
'/bin/mkdir --parents {}'.format(
|
|
|
|
config['nft_dynamic_directory']
|
2020-11-07 14:58:13 -05:00
|
|
|
)
|
2019-03-11 01:44:26 -04:00
|
|
|
)
|
|
|
|
|
|
|
|
# Set up the basic features of the nftables firewall
|
|
|
|
nftables_base_rules = """# Base rules
|
|
|
|
flush ruleset
|
|
|
|
# Add the filter table and chains
|
|
|
|
add table inet filter
|
2020-11-07 12:38:31 -05:00
|
|
|
add chain inet filter forward {{type filter hook forward priority 0; }}
|
|
|
|
add chain inet filter input {{type filter hook input priority 0; }}
|
2019-03-11 01:44:26 -04:00
|
|
|
# Include static rules and network rules
|
|
|
|
include "{rulesdir}/static/*"
|
|
|
|
include "{rulesdir}/networks/*"
|
|
|
|
""".format(
|
|
|
|
rulesdir=config['nft_dynamic_directory']
|
|
|
|
)
|
|
|
|
|
|
|
|
# Write the basic firewall config
|
|
|
|
nftables_base_filename = '{}/base.nft'.format(config['nft_dynamic_directory'])
|
|
|
|
with open(nftables_base_filename, 'w') as nfbasefile:
|
|
|
|
nfbasefile.write(nftables_base_rules)
|
2021-06-01 12:17:25 -04:00
|
|
|
common.reload_firewall_rules(nftables_base_filename, logger=logger)
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2018-10-14 18:37:34 -04:00
|
|
|
###############################################################################
|
|
|
|
# PHASE 7d - Ensure DNSMASQ is not running
|
|
|
|
###############################################################################
|
|
|
|
|
|
|
|
common.run_os_command('systemctl stop dnsmasq.service')
|
|
|
|
|
2018-10-14 02:01:35 -04:00
|
|
|
###############################################################################
|
|
|
|
# PHASE 8 - Set up our objects
|
|
|
|
###############################################################################
|
|
|
|
|
|
|
|
logger.out('Setting up objects', state='i')
|
|
|
|
|
|
|
|
d_node = dict()
|
|
|
|
d_network = dict()
|
Implement SR-IOV PF and VF instances
Adds support for the node daemon managing SR-IOV PF and VF instances.
PFs are added to Zookeeper automatically based on the config at startup
during network configuration, and are otherwise completely static. PFs
are automatically removed from Zookeeper, along with all coresponding
VFs, should the PF phy device be removed from the configuration.
VFs are configured based on the (autocreated) VFs of each PF device,
added to Zookeeper, and then a new class instance, SRIOVVFInstance, is
used to watch them for configuration changes. This will enable the
runtime management of VF settings by the API. The set of keys ensures
that both configuration and details of the NIC can be tracked.
Most keys are self-explanatory, especially for PFs and the basic keys
for VFs. The configuration tree is also self-explanatory, being based
entirely on the options available in the `ip link set {dev} vf` command.
Two additional keys are also present: `used` and `used_by`, which will
be able to track the (boolean) state of usage, as well as the VM that
uses a given VIF. Since the VM side implementation will support both
macvtap and direct "hostdev" assignments, this will ensure that this
state can be tracked on both the VF and the VM side.
2021-06-17 01:01:23 -04:00
|
|
|
d_sriov_vf = dict()
|
2018-10-14 02:01:35 -04:00
|
|
|
d_domain = dict()
|
2018-10-29 17:51:08 -04:00
|
|
|
d_osd = dict()
|
2018-10-31 23:38:17 -04:00
|
|
|
d_pool = dict()
|
2020-11-07 13:11:03 -05:00
|
|
|
d_volume = dict() # Dict of Dicts
|
2018-10-14 02:01:35 -04:00
|
|
|
node_list = []
|
|
|
|
network_list = []
|
Implement SR-IOV PF and VF instances
Adds support for the node daemon managing SR-IOV PF and VF instances.
PFs are added to Zookeeper automatically based on the config at startup
during network configuration, and are otherwise completely static. PFs
are automatically removed from Zookeeper, along with all coresponding
VFs, should the PF phy device be removed from the configuration.
VFs are configured based on the (autocreated) VFs of each PF device,
added to Zookeeper, and then a new class instance, SRIOVVFInstance, is
used to watch them for configuration changes. This will enable the
runtime management of VF settings by the API. The set of keys ensures
that both configuration and details of the NIC can be tracked.
Most keys are self-explanatory, especially for PFs and the basic keys
for VFs. The configuration tree is also self-explanatory, being based
entirely on the options available in the `ip link set {dev} vf` command.
Two additional keys are also present: `used` and `used_by`, which will
be able to track the (boolean) state of usage, as well as the VM that
uses a given VIF. Since the VM side implementation will support both
macvtap and direct "hostdev" assignments, this will ensure that this
state can be tracked on both the VF and the VM side.
2021-06-17 01:01:23 -04:00
|
|
|
sriov_pf_list = []
|
|
|
|
sriov_vf_list = []
|
2018-10-14 02:01:35 -04:00
|
|
|
domain_list = []
|
2018-10-29 17:51:08 -04:00
|
|
|
osd_list = []
|
2018-10-31 23:38:17 -04:00
|
|
|
pool_list = []
|
2020-11-07 13:11:03 -05:00
|
|
|
volume_list = dict() # Dict of Lists
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2019-03-11 01:44:26 -04:00
|
|
|
if enable_networking:
|
2019-12-14 15:55:30 -05:00
|
|
|
# Create an instance of the DNS Aggregator and Metadata API if we're a coordinator
|
2019-03-11 01:44:26 -04:00
|
|
|
if config['daemon_mode'] == 'coordinator':
|
2021-05-31 19:53:29 -04:00
|
|
|
dns_aggregator = DNSAggregatorInstance.DNSAggregatorInstance(config, logger)
|
2021-05-31 19:55:09 -04:00
|
|
|
metadata_api = MetadataAPIInstance.MetadataAPIInstance(zkhandler, config, logger)
|
2019-03-11 01:44:26 -04:00
|
|
|
else:
|
|
|
|
dns_aggregator = None
|
2019-12-14 15:55:30 -05:00
|
|
|
metadata_api = None
|
2018-10-15 21:09:40 -04:00
|
|
|
else:
|
|
|
|
dns_aggregator = None
|
2019-12-14 15:55:30 -05:00
|
|
|
metadata_api = None
|
2018-10-15 21:09:40 -04:00
|
|
|
|
2020-11-07 14:45:24 -05:00
|
|
|
|
2018-10-14 02:01:35 -04:00
|
|
|
# Node objects
|
2021-06-09 13:28:31 -04:00
|
|
|
@zkhandler.zk_conn.ChildrenWatch(zkhandler.schema.path('base.node'))
|
2018-10-14 02:01:35 -04:00
|
|
|
def update_nodes(new_node_list):
|
|
|
|
global node_list, d_node
|
|
|
|
|
|
|
|
# Add any missing nodes to the list
|
|
|
|
for node in new_node_list:
|
2020-11-06 20:37:52 -05:00
|
|
|
if node not in node_list:
|
2021-06-01 11:26:48 -04:00
|
|
|
d_node[node] = NodeInstance.NodeInstance(node, myhostname, zkhandler, config, logger, d_node, d_network, d_domain, dns_aggregator, metadata_api)
|
2018-10-14 02:01:35 -04:00
|
|
|
|
|
|
|
# Remove any deleted nodes from the list
|
|
|
|
for node in node_list:
|
2020-11-06 20:37:52 -05:00
|
|
|
if node not in new_node_list:
|
2018-10-14 02:01:35 -04:00
|
|
|
# Delete the object
|
|
|
|
del(d_node[node])
|
|
|
|
|
|
|
|
# Update and print new list
|
|
|
|
node_list = new_node_list
|
2019-07-10 21:39:25 -04:00
|
|
|
logger.out('{}Node list:{} {}'.format(fmt_blue, fmt_end, ' '.join(node_list)), state='i')
|
2018-10-14 02:01:35 -04:00
|
|
|
|
|
|
|
# Update node objects' list
|
|
|
|
for node in d_node:
|
|
|
|
d_node[node].update_node_list(d_node)
|
|
|
|
|
2020-11-07 13:17:49 -05:00
|
|
|
|
2018-10-14 02:01:35 -04:00
|
|
|
# Alias for our local node (passed to network and domain objects)
|
|
|
|
this_node = d_node[myhostname]
|
|
|
|
|
2020-11-07 14:45:24 -05:00
|
|
|
|
2020-01-09 10:53:27 -05:00
|
|
|
# Maintenance mode
|
2021-06-09 13:28:31 -04:00
|
|
|
@zkhandler.zk_conn.DataWatch(zkhandler.schema.path('base.config.maintenance'))
|
2020-01-09 10:53:27 -05:00
|
|
|
def set_maintenance(_maintenance, stat, event=''):
|
|
|
|
global maintenance
|
|
|
|
try:
|
|
|
|
maintenance = bool(strtobool(_maintenance.decode('ascii')))
|
2020-11-06 18:55:10 -05:00
|
|
|
except Exception:
|
2020-01-09 10:53:27 -05:00
|
|
|
maintenance = False
|
|
|
|
|
2020-11-07 14:45:24 -05:00
|
|
|
|
2018-10-21 22:08:23 -04:00
|
|
|
# Primary node
|
2021-06-09 13:28:31 -04:00
|
|
|
@zkhandler.zk_conn.DataWatch(zkhandler.schema.path('base.config.primary_node'))
|
2018-10-29 17:51:08 -04:00
|
|
|
def update_primary(new_primary, stat, event=''):
|
2018-10-21 22:08:23 -04:00
|
|
|
try:
|
|
|
|
new_primary = new_primary.decode('ascii')
|
|
|
|
except AttributeError:
|
|
|
|
new_primary = 'none'
|
Improve handling of primary contention
Previously, contention could occasionally cause a flap/dual primary
contention state due to the lack of checking within this function. This
could cause a state where a node transitions to primary than is almost
immediately shifted away, which could cause undefined behaviour in the
cluster.
The solution includes several elements:
* Implement an exclusive lock operation in zkhandler
* Switch the become_primary function to use this exclusive lock
* Implement exclusive locking during the contention process
* As a failsafe, check stat versions before setting the node as the
primary node, in case another node already has
* Delay the start of takeover/relinquish operations by slightly
longer than the lock timeout
* Make the current router_state conditions more explicit (positive
conditionals rather than negative conditionals)
The new scenario ensures that during contention, only one secondary will
ever succeed at acquiring the lock. Ideally, the other would then grab
the lock and pass, but in testing this does not seem to be the case -
the lock always times out, so the failsafe check is technically not
needed but has been left as an added safety mechanism. With this setup,
the node that fails the contention will never block the switchover nor
will it try to force itself onto the cluster after another node has
successfully won contention.
Timeouts may need to be adjusted in the future, but the base timeout of
0.4 seconds (and transition delay of 0.5 seconds) seems to work reliably
during preliminary tests.
2020-04-12 03:40:17 -04:00
|
|
|
key_version = stat.version
|
2018-10-21 22:08:23 -04:00
|
|
|
|
|
|
|
if new_primary != this_node.primary_node:
|
|
|
|
if config['daemon_mode'] == 'coordinator':
|
|
|
|
# We're a coordinator and there is no primary
|
|
|
|
if new_primary == 'none':
|
2020-04-22 09:45:23 -04:00
|
|
|
if this_node.daemon_state == 'run' and this_node.router_state not in ['primary', 'takeover', 'relinquish']:
|
2019-07-09 14:03:48 -04:00
|
|
|
logger.out('Contending for primary coordinator state', state='i')
|
Improve handling of primary contention
Previously, contention could occasionally cause a flap/dual primary
contention state due to the lack of checking within this function. This
could cause a state where a node transitions to primary than is almost
immediately shifted away, which could cause undefined behaviour in the
cluster.
The solution includes several elements:
* Implement an exclusive lock operation in zkhandler
* Switch the become_primary function to use this exclusive lock
* Implement exclusive locking during the contention process
* As a failsafe, check stat versions before setting the node as the
primary node, in case another node already has
* Delay the start of takeover/relinquish operations by slightly
longer than the lock timeout
* Make the current router_state conditions more explicit (positive
conditionals rather than negative conditionals)
The new scenario ensures that during contention, only one secondary will
ever succeed at acquiring the lock. Ideally, the other would then grab
the lock and pass, but in testing this does not seem to be the case -
the lock always times out, so the failsafe check is technically not
needed but has been left as an added safety mechanism. With this setup,
the node that fails the contention will never block the switchover nor
will it try to force itself onto the cluster after another node has
successfully won contention.
Timeouts may need to be adjusted in the future, but the base timeout of
0.4 seconds (and transition delay of 0.5 seconds) seems to work reliably
during preliminary tests.
2020-04-12 03:40:17 -04:00
|
|
|
# Acquire an exclusive lock on the primary_node key
|
2021-06-09 13:28:31 -04:00
|
|
|
primary_lock = zkhandler.exclusivelock('base.config.primary_node')
|
Improve handling of primary contention
Previously, contention could occasionally cause a flap/dual primary
contention state due to the lack of checking within this function. This
could cause a state where a node transitions to primary than is almost
immediately shifted away, which could cause undefined behaviour in the
cluster.
The solution includes several elements:
* Implement an exclusive lock operation in zkhandler
* Switch the become_primary function to use this exclusive lock
* Implement exclusive locking during the contention process
* As a failsafe, check stat versions before setting the node as the
primary node, in case another node already has
* Delay the start of takeover/relinquish operations by slightly
longer than the lock timeout
* Make the current router_state conditions more explicit (positive
conditionals rather than negative conditionals)
The new scenario ensures that during contention, only one secondary will
ever succeed at acquiring the lock. Ideally, the other would then grab
the lock and pass, but in testing this does not seem to be the case -
the lock always times out, so the failsafe check is technically not
needed but has been left as an added safety mechanism. With this setup,
the node that fails the contention will never block the switchover nor
will it try to force itself onto the cluster after another node has
successfully won contention.
Timeouts may need to be adjusted in the future, but the base timeout of
0.4 seconds (and transition delay of 0.5 seconds) seems to work reliably
during preliminary tests.
2020-04-12 03:40:17 -04:00
|
|
|
try:
|
|
|
|
# This lock times out after 0.4s, which is 0.1s less than the pre-takeover
|
|
|
|
# timeout below, thus ensuring that a primary takeover will not deadlock
|
|
|
|
# against a node that failed the contention
|
|
|
|
primary_lock.acquire(timeout=0.4)
|
|
|
|
# Ensure when we get the lock that the versions are still consistent and that
|
|
|
|
# another node hasn't already acquired primary state
|
2021-06-09 13:28:31 -04:00
|
|
|
if key_version == zkhandler.zk_conn.get(zkhandler.schema.path('base.config.primary_node'))[1].version:
|
2021-05-30 14:48:41 -04:00
|
|
|
zkhandler.write([
|
2021-06-09 13:28:31 -04:00
|
|
|
('base.config.primary_node', myhostname)
|
2021-05-30 14:48:41 -04:00
|
|
|
])
|
Improve handling of primary contention
Previously, contention could occasionally cause a flap/dual primary
contention state due to the lack of checking within this function. This
could cause a state where a node transitions to primary than is almost
immediately shifted away, which could cause undefined behaviour in the
cluster.
The solution includes several elements:
* Implement an exclusive lock operation in zkhandler
* Switch the become_primary function to use this exclusive lock
* Implement exclusive locking during the contention process
* As a failsafe, check stat versions before setting the node as the
primary node, in case another node already has
* Delay the start of takeover/relinquish operations by slightly
longer than the lock timeout
* Make the current router_state conditions more explicit (positive
conditionals rather than negative conditionals)
The new scenario ensures that during contention, only one secondary will
ever succeed at acquiring the lock. Ideally, the other would then grab
the lock and pass, but in testing this does not seem to be the case -
the lock always times out, so the failsafe check is technically not
needed but has been left as an added safety mechanism. With this setup,
the node that fails the contention will never block the switchover nor
will it try to force itself onto the cluster after another node has
successfully won contention.
Timeouts may need to be adjusted in the future, but the base timeout of
0.4 seconds (and transition delay of 0.5 seconds) seems to work reliably
during preliminary tests.
2020-04-12 03:40:17 -04:00
|
|
|
# Cleanly release the lock
|
|
|
|
primary_lock.release()
|
|
|
|
# We timed out acquiring a lock, which means we failed contention, so just pass
|
2021-05-30 14:48:41 -04:00
|
|
|
except Exception:
|
Improve handling of primary contention
Previously, contention could occasionally cause a flap/dual primary
contention state due to the lack of checking within this function. This
could cause a state where a node transitions to primary than is almost
immediately shifted away, which could cause undefined behaviour in the
cluster.
The solution includes several elements:
* Implement an exclusive lock operation in zkhandler
* Switch the become_primary function to use this exclusive lock
* Implement exclusive locking during the contention process
* As a failsafe, check stat versions before setting the node as the
primary node, in case another node already has
* Delay the start of takeover/relinquish operations by slightly
longer than the lock timeout
* Make the current router_state conditions more explicit (positive
conditionals rather than negative conditionals)
The new scenario ensures that during contention, only one secondary will
ever succeed at acquiring the lock. Ideally, the other would then grab
the lock and pass, but in testing this does not seem to be the case -
the lock always times out, so the failsafe check is technically not
needed but has been left as an added safety mechanism. With this setup,
the node that fails the contention will never block the switchover nor
will it try to force itself onto the cluster after another node has
successfully won contention.
Timeouts may need to be adjusted in the future, but the base timeout of
0.4 seconds (and transition delay of 0.5 seconds) seems to work reliably
during preliminary tests.
2020-04-12 03:40:17 -04:00
|
|
|
pass
|
2018-10-21 22:08:23 -04:00
|
|
|
elif new_primary == myhostname:
|
2020-04-22 09:45:23 -04:00
|
|
|
if this_node.router_state == 'secondary':
|
|
|
|
time.sleep(0.5)
|
2021-05-30 14:48:41 -04:00
|
|
|
zkhandler.write([
|
2021-06-09 13:28:31 -04:00
|
|
|
(('node.state.router', myhostname), 'takeover')
|
2021-05-30 14:48:41 -04:00
|
|
|
])
|
2018-10-21 22:08:23 -04:00
|
|
|
else:
|
Improve handling of primary contention
Previously, contention could occasionally cause a flap/dual primary
contention state due to the lack of checking within this function. This
could cause a state where a node transitions to primary than is almost
immediately shifted away, which could cause undefined behaviour in the
cluster.
The solution includes several elements:
* Implement an exclusive lock operation in zkhandler
* Switch the become_primary function to use this exclusive lock
* Implement exclusive locking during the contention process
* As a failsafe, check stat versions before setting the node as the
primary node, in case another node already has
* Delay the start of takeover/relinquish operations by slightly
longer than the lock timeout
* Make the current router_state conditions more explicit (positive
conditionals rather than negative conditionals)
The new scenario ensures that during contention, only one secondary will
ever succeed at acquiring the lock. Ideally, the other would then grab
the lock and pass, but in testing this does not seem to be the case -
the lock always times out, so the failsafe check is technically not
needed but has been left as an added safety mechanism. With this setup,
the node that fails the contention will never block the switchover nor
will it try to force itself onto the cluster after another node has
successfully won contention.
Timeouts may need to be adjusted in the future, but the base timeout of
0.4 seconds (and transition delay of 0.5 seconds) seems to work reliably
during preliminary tests.
2020-04-12 03:40:17 -04:00
|
|
|
if this_node.router_state == 'primary':
|
|
|
|
time.sleep(0.5)
|
2021-05-30 14:48:41 -04:00
|
|
|
zkhandler.write([
|
2021-06-09 13:28:31 -04:00
|
|
|
(('node.state.router', myhostname), 'relinquish')
|
2021-05-30 14:48:41 -04:00
|
|
|
])
|
2018-10-27 16:31:54 -04:00
|
|
|
else:
|
2021-05-30 14:48:41 -04:00
|
|
|
zkhandler.write([
|
2021-06-09 13:28:31 -04:00
|
|
|
(('node.state.router', myhostname), 'client')
|
2021-05-30 14:48:41 -04:00
|
|
|
])
|
2018-10-27 16:31:54 -04:00
|
|
|
|
2018-10-21 22:08:23 -04:00
|
|
|
for node in d_node:
|
|
|
|
d_node[node].primary_node = new_primary
|
|
|
|
|
2020-11-07 13:17:49 -05:00
|
|
|
|
2019-03-11 01:44:26 -04:00
|
|
|
if enable_networking:
|
|
|
|
# Network objects
|
2021-06-09 13:28:31 -04:00
|
|
|
@zkhandler.zk_conn.ChildrenWatch(zkhandler.schema.path('base.network'))
|
2019-03-11 01:44:26 -04:00
|
|
|
def update_networks(new_network_list):
|
|
|
|
global network_list, d_network
|
|
|
|
|
|
|
|
# Add any missing networks to the list
|
|
|
|
for network in new_network_list:
|
2020-11-06 20:37:52 -05:00
|
|
|
if network not in network_list:
|
2021-06-01 11:49:39 -04:00
|
|
|
d_network[network] = VXNetworkInstance.VXNetworkInstance(network, zkhandler, config, logger, this_node, dns_aggregator)
|
2019-03-15 11:28:49 -04:00
|
|
|
if config['daemon_mode'] == 'coordinator' and d_network[network].nettype == 'managed':
|
2019-08-07 11:46:58 -04:00
|
|
|
try:
|
|
|
|
dns_aggregator.add_network(d_network[network])
|
|
|
|
except Exception as e:
|
2020-11-06 21:13:13 -05:00
|
|
|
logger.out('Failed to create DNS Aggregator for network {}: {}'.format(network, e), 'w')
|
2019-03-11 01:44:26 -04:00
|
|
|
# Start primary functionality
|
2019-03-15 11:28:49 -04:00
|
|
|
if this_node.router_state == 'primary' and d_network[network].nettype == 'managed':
|
2019-03-11 01:44:26 -04:00
|
|
|
d_network[network].createGateways()
|
|
|
|
d_network[network].startDHCPServer()
|
|
|
|
|
|
|
|
# Remove any deleted networks from the list
|
|
|
|
for network in network_list:
|
2020-11-06 20:37:52 -05:00
|
|
|
if network not in new_network_list:
|
2019-03-15 11:28:49 -04:00
|
|
|
if d_network[network].nettype == 'managed':
|
|
|
|
# Stop primary functionality
|
|
|
|
if this_node.router_state == 'primary':
|
|
|
|
d_network[network].stopDHCPServer()
|
|
|
|
d_network[network].removeGateways()
|
|
|
|
dns_aggregator.remove_network(d_network[network])
|
|
|
|
# Stop general functionality
|
|
|
|
d_network[network].removeFirewall()
|
2019-03-11 01:44:26 -04:00
|
|
|
d_network[network].removeNetwork()
|
|
|
|
# Delete the object
|
|
|
|
del(d_network[network])
|
|
|
|
|
|
|
|
# Update and print new list
|
|
|
|
network_list = new_network_list
|
2019-07-10 21:39:25 -04:00
|
|
|
logger.out('{}Network list:{} {}'.format(fmt_blue, fmt_end, ' '.join(network_list)), state='i')
|
2019-03-11 01:44:26 -04:00
|
|
|
|
|
|
|
# Update node objects' list
|
|
|
|
for node in d_node:
|
|
|
|
d_node[node].update_network_list(d_network)
|
|
|
|
|
Implement SR-IOV PF and VF instances
Adds support for the node daemon managing SR-IOV PF and VF instances.
PFs are added to Zookeeper automatically based on the config at startup
during network configuration, and are otherwise completely static. PFs
are automatically removed from Zookeeper, along with all coresponding
VFs, should the PF phy device be removed from the configuration.
VFs are configured based on the (autocreated) VFs of each PF device,
added to Zookeeper, and then a new class instance, SRIOVVFInstance, is
used to watch them for configuration changes. This will enable the
runtime management of VF settings by the API. The set of keys ensures
that both configuration and details of the NIC can be tracked.
Most keys are self-explanatory, especially for PFs and the basic keys
for VFs. The configuration tree is also self-explanatory, being based
entirely on the options available in the `ip link set {dev} vf` command.
Two additional keys are also present: `used` and `used_by`, which will
be able to track the (boolean) state of usage, as well as the VM that
uses a given VIF. Since the VM side implementation will support both
macvtap and direct "hostdev" assignments, this will ensure that this
state can be tracked on both the VF and the VM side.
2021-06-17 01:01:23 -04:00
|
|
|
# Add the SR-IOV PFs and VFs to Zookeeper
|
|
|
|
# These do not behave like the objects; they are not dynamic (the API cannot change them), and they
|
|
|
|
# exist for the lifetime of this Node instance. The objects are set here in Zookeeper on a per-node
|
|
|
|
# basis, under the Node configuration tree.
|
|
|
|
# MIGRATION: The schema.schema.get ensures that the current active Schema contains the required keys
|
|
|
|
if enable_sriov and zkhandler.schema.schema.get('sriov_pf', None) is not None:
|
|
|
|
vf_list = list()
|
|
|
|
for device in config['sriov_device']:
|
|
|
|
pf = device['phy']
|
|
|
|
vfcount = device['vfcount']
|
|
|
|
if device.get('mtu', None) is None:
|
|
|
|
mtu = 1500
|
|
|
|
else:
|
|
|
|
mtu = device['mtu']
|
|
|
|
|
|
|
|
# Create the PF device in Zookeeper
|
|
|
|
zkhandler.write([
|
|
|
|
(('node.sriov.pf', myhostname, 'sriov_pf', pf), ''),
|
|
|
|
(('node.sriov.pf', myhostname, 'sriov_pf.mtu', pf), mtu),
|
|
|
|
(('node.sriov.pf', myhostname, 'sriov_pf.vfcount', pf), vfcount),
|
|
|
|
])
|
|
|
|
# Append the device to the list of PFs
|
|
|
|
sriov_pf_list.append(pf)
|
|
|
|
|
|
|
|
# Get the list of VFs from `ip link show`
|
|
|
|
vf_list = json.loads(common.run_os_command('ip --json link show {}'.format(pf))[1])[0].get('vfinfo_list', [])
|
|
|
|
for vf in vf_list:
|
|
|
|
# {
|
|
|
|
# 'vf': 3,
|
|
|
|
# 'link_type': 'ether',
|
|
|
|
# 'address': '00:00:00:00:00:00',
|
|
|
|
# 'broadcast': 'ff:ff:ff:ff:ff:ff',
|
|
|
|
# 'vlan_list': [{'vlan': 101, 'qos': 2}],
|
|
|
|
# 'rate': {'max_tx': 0, 'min_tx': 0},
|
|
|
|
# 'spoofchk': True,
|
|
|
|
# 'link_state': 'auto',
|
|
|
|
# 'trust': False,
|
|
|
|
# 'query_rss_en': False
|
|
|
|
# }
|
|
|
|
vfphy = '{}v{}'.format(pf, vf['vf'])
|
2021-06-21 20:49:45 -04:00
|
|
|
|
|
|
|
# Get the PCIe bus information
|
|
|
|
dev_pcie_path = None
|
|
|
|
try:
|
|
|
|
with open('/sys/class/net/{}/device/uevent'.format(vfphy)) as vfh:
|
|
|
|
dev_uevent = vfh.readlines()
|
|
|
|
for line in dev_uevent:
|
|
|
|
if re.match(r'^PCI_SLOT_NAME=.*', line):
|
2021-06-21 22:21:54 -04:00
|
|
|
dev_pcie_path = line.rstrip().split('=')[-1]
|
2021-06-21 20:49:45 -04:00
|
|
|
except FileNotFoundError:
|
|
|
|
# Something must already be using the PCIe device
|
|
|
|
pass
|
|
|
|
|
2021-06-17 01:45:22 -04:00
|
|
|
# Add the VF to Zookeeper if it does not yet exist
|
|
|
|
if not zkhandler.exists(('node.sriov.vf', myhostname, 'sriov_vf', vfphy)):
|
2021-06-21 20:49:45 -04:00
|
|
|
if dev_pcie_path is not None:
|
|
|
|
pcie_domain, pcie_bus, pcie_slot, pcie_function = re.split(r':|\.', dev_pcie_path)
|
|
|
|
else:
|
|
|
|
# We can't add the device - for some reason we can't get any information on its PCIe bus path,
|
|
|
|
# so just ignore this one, and continue.
|
|
|
|
# This shouldn't happen under any real circumstances, unless the admin tries to attach a non-existent
|
|
|
|
# VF to a VM manually, then goes ahead and adds that VF to the system with the VM running.
|
|
|
|
continue
|
|
|
|
|
2021-06-17 01:45:22 -04:00
|
|
|
zkhandler.write([
|
|
|
|
(('node.sriov.vf', myhostname, 'sriov_vf', vfphy), ''),
|
|
|
|
(('node.sriov.vf', myhostname, 'sriov_vf.pf', vfphy), pf),
|
|
|
|
(('node.sriov.vf', myhostname, 'sriov_vf.mtu', vfphy), mtu),
|
|
|
|
(('node.sriov.vf', myhostname, 'sriov_vf.mac', vfphy), vf['address']),
|
2021-06-22 00:54:32 -04:00
|
|
|
(('node.sriov.vf', myhostname, 'sriov_vf.phy_mac', vfphy), vf['address']),
|
2021-06-17 01:45:22 -04:00
|
|
|
(('node.sriov.vf', myhostname, 'sriov_vf.config', vfphy), ''),
|
2021-06-17 01:54:37 -04:00
|
|
|
(('node.sriov.vf', myhostname, 'sriov_vf.config.vlan_id', vfphy), vf['vlan_list'][0].get('vlan', '0')),
|
|
|
|
(('node.sriov.vf', myhostname, 'sriov_vf.config.vlan_qos', vfphy), vf['vlan_list'][0].get('qos', '0')),
|
2021-06-17 01:45:22 -04:00
|
|
|
(('node.sriov.vf', myhostname, 'sriov_vf.config.tx_rate_min', vfphy), vf['rate']['min_tx']),
|
|
|
|
(('node.sriov.vf', myhostname, 'sriov_vf.config.tx_rate_max', vfphy), vf['rate']['max_tx']),
|
|
|
|
(('node.sriov.vf', myhostname, 'sriov_vf.config.spoof_check', vfphy), vf['spoofchk']),
|
|
|
|
(('node.sriov.vf', myhostname, 'sriov_vf.config.link_state', vfphy), vf['link_state']),
|
|
|
|
(('node.sriov.vf', myhostname, 'sriov_vf.config.trust', vfphy), vf['trust']),
|
|
|
|
(('node.sriov.vf', myhostname, 'sriov_vf.config.query_rss', vfphy), vf['query_rss_en']),
|
2021-06-21 20:49:45 -04:00
|
|
|
(('node.sriov.vf', myhostname, 'sriov_vf.pci', vfphy), ''),
|
|
|
|
(('node.sriov.vf', myhostname, 'sriov_vf.pci.domain', vfphy), pcie_domain),
|
|
|
|
(('node.sriov.vf', myhostname, 'sriov_vf.pci.bus', vfphy), pcie_bus),
|
|
|
|
(('node.sriov.vf', myhostname, 'sriov_vf.pci.slot', vfphy), pcie_slot),
|
|
|
|
(('node.sriov.vf', myhostname, 'sriov_vf.pci.function', vfphy), pcie_function),
|
2021-06-21 01:25:38 -04:00
|
|
|
(('node.sriov.vf', myhostname, 'sriov_vf.used', vfphy), False),
|
|
|
|
(('node.sriov.vf', myhostname, 'sriov_vf.used_by', vfphy), ''),
|
2021-06-17 01:45:22 -04:00
|
|
|
])
|
2021-06-21 20:49:45 -04:00
|
|
|
|
Implement SR-IOV PF and VF instances
Adds support for the node daemon managing SR-IOV PF and VF instances.
PFs are added to Zookeeper automatically based on the config at startup
during network configuration, and are otherwise completely static. PFs
are automatically removed from Zookeeper, along with all coresponding
VFs, should the PF phy device be removed from the configuration.
VFs are configured based on the (autocreated) VFs of each PF device,
added to Zookeeper, and then a new class instance, SRIOVVFInstance, is
used to watch them for configuration changes. This will enable the
runtime management of VF settings by the API. The set of keys ensures
that both configuration and details of the NIC can be tracked.
Most keys are self-explanatory, especially for PFs and the basic keys
for VFs. The configuration tree is also self-explanatory, being based
entirely on the options available in the `ip link set {dev} vf` command.
Two additional keys are also present: `used` and `used_by`, which will
be able to track the (boolean) state of usage, as well as the VM that
uses a given VIF. Since the VM side implementation will support both
macvtap and direct "hostdev" assignments, this will ensure that this
state can be tracked on both the VF and the VM side.
2021-06-17 01:01:23 -04:00
|
|
|
# Append the device to the list of VFs
|
|
|
|
sriov_vf_list.append(vfphy)
|
|
|
|
|
|
|
|
# Remove any obsolete PFs from Zookeeper if they go away
|
|
|
|
for pf in zkhandler.children(('node.sriov.pf', myhostname)):
|
|
|
|
if pf not in sriov_pf_list:
|
|
|
|
zkhandler.delete([
|
|
|
|
('node.sriov.pf', myhostname, 'sriov_pf', pf)
|
|
|
|
])
|
|
|
|
# Remove any obsolete VFs from Zookeeper if their PF goes away
|
|
|
|
for vf in zkhandler.children(('node.sriov.vf', myhostname)):
|
|
|
|
vf_pf = zkhandler.read(('node.sriov.vf', myhostname, 'sriov_vf.pf', vf))
|
|
|
|
if vf_pf not in sriov_pf_list:
|
|
|
|
zkhandler.delete([
|
|
|
|
('node.sriov.vf', myhostname, 'sriov_vf', vf)
|
|
|
|
])
|
|
|
|
|
|
|
|
# SR-IOV VF objects
|
|
|
|
# This is a ChildrenWatch just for consistency; the list never changes at runtime
|
|
|
|
@zkhandler.zk_conn.ChildrenWatch(zkhandler.schema.path('node.sriov.vf', myhostname))
|
2021-06-21 18:40:11 -04:00
|
|
|
def update_sriov_vfs(new_sriov_vf_list):
|
Implement SR-IOV PF and VF instances
Adds support for the node daemon managing SR-IOV PF and VF instances.
PFs are added to Zookeeper automatically based on the config at startup
during network configuration, and are otherwise completely static. PFs
are automatically removed from Zookeeper, along with all coresponding
VFs, should the PF phy device be removed from the configuration.
VFs are configured based on the (autocreated) VFs of each PF device,
added to Zookeeper, and then a new class instance, SRIOVVFInstance, is
used to watch them for configuration changes. This will enable the
runtime management of VF settings by the API. The set of keys ensures
that both configuration and details of the NIC can be tracked.
Most keys are self-explanatory, especially for PFs and the basic keys
for VFs. The configuration tree is also self-explanatory, being based
entirely on the options available in the `ip link set {dev} vf` command.
Two additional keys are also present: `used` and `used_by`, which will
be able to track the (boolean) state of usage, as well as the VM that
uses a given VIF. Since the VM side implementation will support both
macvtap and direct "hostdev" assignments, this will ensure that this
state can be tracked on both the VF and the VM side.
2021-06-17 01:01:23 -04:00
|
|
|
global sriov_vf_list, d_sriov_vf
|
|
|
|
|
|
|
|
# Add VFs to the list
|
2021-06-21 18:40:11 -04:00
|
|
|
for vf in common.sortInterfaceNames(new_sriov_vf_list):
|
Implement SR-IOV PF and VF instances
Adds support for the node daemon managing SR-IOV PF and VF instances.
PFs are added to Zookeeper automatically based on the config at startup
during network configuration, and are otherwise completely static. PFs
are automatically removed from Zookeeper, along with all coresponding
VFs, should the PF phy device be removed from the configuration.
VFs are configured based on the (autocreated) VFs of each PF device,
added to Zookeeper, and then a new class instance, SRIOVVFInstance, is
used to watch them for configuration changes. This will enable the
runtime management of VF settings by the API. The set of keys ensures
that both configuration and details of the NIC can be tracked.
Most keys are self-explanatory, especially for PFs and the basic keys
for VFs. The configuration tree is also self-explanatory, being based
entirely on the options available in the `ip link set {dev} vf` command.
Two additional keys are also present: `used` and `used_by`, which will
be able to track the (boolean) state of usage, as well as the VM that
uses a given VIF. Since the VM side implementation will support both
macvtap and direct "hostdev" assignments, this will ensure that this
state can be tracked on both the VF and the VM side.
2021-06-17 01:01:23 -04:00
|
|
|
d_sriov_vf[vf] = SRIOVVFInstance.SRIOVVFInstance(vf, zkhandler, config, logger, this_node)
|
|
|
|
|
2021-06-21 01:40:05 -04:00
|
|
|
sriov_vf_list = sorted(new_sriov_vf_list)
|
Implement SR-IOV PF and VF instances
Adds support for the node daemon managing SR-IOV PF and VF instances.
PFs are added to Zookeeper automatically based on the config at startup
during network configuration, and are otherwise completely static. PFs
are automatically removed from Zookeeper, along with all coresponding
VFs, should the PF phy device be removed from the configuration.
VFs are configured based on the (autocreated) VFs of each PF device,
added to Zookeeper, and then a new class instance, SRIOVVFInstance, is
used to watch them for configuration changes. This will enable the
runtime management of VF settings by the API. The set of keys ensures
that both configuration and details of the NIC can be tracked.
Most keys are self-explanatory, especially for PFs and the basic keys
for VFs. The configuration tree is also self-explanatory, being based
entirely on the options available in the `ip link set {dev} vf` command.
Two additional keys are also present: `used` and `used_by`, which will
be able to track the (boolean) state of usage, as well as the VM that
uses a given VIF. Since the VM side implementation will support both
macvtap and direct "hostdev" assignments, this will ensure that this
state can be tracked on both the VF and the VM side.
2021-06-17 01:01:23 -04:00
|
|
|
logger.out('{}SR-IOV VF list:{} {}'.format(fmt_blue, fmt_end, ' '.join(sriov_vf_list)), state='i')
|
|
|
|
|
2019-03-11 01:44:26 -04:00
|
|
|
if enable_hypervisor:
|
2019-08-07 13:49:33 -04:00
|
|
|
# VM command pipeline key
|
2021-06-09 13:28:31 -04:00
|
|
|
@zkhandler.zk_conn.DataWatch(zkhandler.schema.path('base.cmd.domain'))
|
2020-11-07 12:14:29 -05:00
|
|
|
def cmd_domains(data, stat, event=''):
|
2019-08-07 13:49:33 -04:00
|
|
|
if data:
|
2021-06-01 11:46:27 -04:00
|
|
|
VMInstance.run_command(zkhandler, logger, this_node, data.decode('ascii'))
|
2019-08-07 13:49:33 -04:00
|
|
|
|
2019-03-11 01:44:26 -04:00
|
|
|
# VM domain objects
|
2021-06-09 13:28:31 -04:00
|
|
|
@zkhandler.zk_conn.ChildrenWatch(zkhandler.schema.path('base.domain'))
|
2019-03-11 01:44:26 -04:00
|
|
|
def update_domains(new_domain_list):
|
|
|
|
global domain_list, d_domain
|
|
|
|
|
|
|
|
# Add any missing domains to the list
|
|
|
|
for domain in new_domain_list:
|
2020-11-06 20:37:52 -05:00
|
|
|
if domain not in domain_list:
|
2021-06-01 11:46:27 -04:00
|
|
|
d_domain[domain] = VMInstance.VMInstance(domain, zkhandler, config, logger, this_node)
|
2019-03-11 01:44:26 -04:00
|
|
|
|
|
|
|
# Remove any deleted domains from the list
|
|
|
|
for domain in domain_list:
|
2020-11-06 20:37:52 -05:00
|
|
|
if domain not in new_domain_list:
|
2019-03-11 01:44:26 -04:00
|
|
|
# Delete the object
|
|
|
|
del(d_domain[domain])
|
|
|
|
|
|
|
|
# Update and print new list
|
|
|
|
domain_list = new_domain_list
|
2019-07-10 22:24:03 -04:00
|
|
|
logger.out('{}VM list:{} {}'.format(fmt_blue, fmt_end, ' '.join(domain_list)), state='i')
|
2019-03-11 01:44:26 -04:00
|
|
|
|
|
|
|
# Update node objects' list
|
|
|
|
for node in d_node:
|
|
|
|
d_node[node].update_domain_list(d_domain)
|
|
|
|
|
|
|
|
if enable_storage:
|
2019-08-07 13:49:33 -04:00
|
|
|
# Ceph command pipeline key
|
2021-06-09 13:28:31 -04:00
|
|
|
@zkhandler.zk_conn.DataWatch(zkhandler.schema.path('base.cmd.ceph'))
|
2020-11-07 12:14:29 -05:00
|
|
|
def cmd_ceph(data, stat, event=''):
|
2019-03-11 01:44:26 -04:00
|
|
|
if data:
|
2021-06-01 11:57:21 -04:00
|
|
|
CephInstance.run_command(zkhandler, logger, this_node, data.decode('ascii'), d_osd)
|
2019-03-11 01:44:26 -04:00
|
|
|
|
|
|
|
# OSD objects
|
2021-06-09 13:28:31 -04:00
|
|
|
@zkhandler.zk_conn.ChildrenWatch(zkhandler.schema.path('base.osd'))
|
2019-03-11 01:44:26 -04:00
|
|
|
def update_osds(new_osd_list):
|
|
|
|
global osd_list, d_osd
|
|
|
|
|
|
|
|
# Add any missing OSDs to the list
|
|
|
|
for osd in new_osd_list:
|
2020-11-06 20:37:52 -05:00
|
|
|
if osd not in osd_list:
|
2021-05-31 19:51:27 -04:00
|
|
|
d_osd[osd] = CephInstance.CephOSDInstance(zkhandler, this_node, osd)
|
2019-03-11 01:44:26 -04:00
|
|
|
|
|
|
|
# Remove any deleted OSDs from the list
|
|
|
|
for osd in osd_list:
|
2020-11-06 20:37:52 -05:00
|
|
|
if osd not in new_osd_list:
|
2019-03-11 01:44:26 -04:00
|
|
|
# Delete the object
|
|
|
|
del(d_osd[osd])
|
|
|
|
|
|
|
|
# Update and print new list
|
|
|
|
osd_list = new_osd_list
|
2019-07-10 21:39:25 -04:00
|
|
|
logger.out('{}OSD list:{} {}'.format(fmt_blue, fmt_end, ' '.join(osd_list)), state='i')
|
2019-03-11 01:44:26 -04:00
|
|
|
|
|
|
|
# Pool objects
|
2021-06-09 13:28:31 -04:00
|
|
|
@zkhandler.zk_conn.ChildrenWatch(zkhandler.schema.path('base.pool'))
|
2019-03-11 01:44:26 -04:00
|
|
|
def update_pools(new_pool_list):
|
|
|
|
global pool_list, d_pool
|
|
|
|
|
|
|
|
# Add any missing Pools to the list
|
|
|
|
for pool in new_pool_list:
|
2020-11-06 20:37:52 -05:00
|
|
|
if pool not in pool_list:
|
2021-05-31 19:51:27 -04:00
|
|
|
d_pool[pool] = CephInstance.CephPoolInstance(zkhandler, this_node, pool)
|
2019-06-19 10:25:22 -04:00
|
|
|
d_volume[pool] = dict()
|
|
|
|
volume_list[pool] = []
|
2019-03-11 01:44:26 -04:00
|
|
|
|
|
|
|
# Remove any deleted Pools from the list
|
|
|
|
for pool in pool_list:
|
2020-11-06 20:37:52 -05:00
|
|
|
if pool not in new_pool_list:
|
2019-03-11 01:44:26 -04:00
|
|
|
# Delete the object
|
|
|
|
del(d_pool[pool])
|
2018-10-31 23:38:17 -04:00
|
|
|
|
2019-03-11 01:44:26 -04:00
|
|
|
# Update and print new list
|
|
|
|
pool_list = new_pool_list
|
2019-07-10 21:39:25 -04:00
|
|
|
logger.out('{}Pool list:{} {}'.format(fmt_blue, fmt_end, ' '.join(pool_list)), state='i')
|
2018-10-31 23:38:17 -04:00
|
|
|
|
2019-06-19 10:25:22 -04:00
|
|
|
# Volume objects in each pool
|
|
|
|
for pool in pool_list:
|
2021-06-10 01:11:14 -04:00
|
|
|
@zkhandler.zk_conn.ChildrenWatch(zkhandler.schema.path('volume', pool))
|
2019-06-19 10:25:22 -04:00
|
|
|
def update_volumes(new_volume_list):
|
|
|
|
global volume_list, d_volume
|
2019-06-25 22:31:04 -04:00
|
|
|
|
2019-06-19 10:25:22 -04:00
|
|
|
# Add any missing Volumes to the list
|
|
|
|
for volume in new_volume_list:
|
2020-11-06 20:37:52 -05:00
|
|
|
if volume not in volume_list[pool]:
|
2021-05-31 19:51:27 -04:00
|
|
|
d_volume[pool][volume] = CephInstance.CephVolumeInstance(zkhandler, this_node, pool, volume)
|
2019-06-25 22:31:04 -04:00
|
|
|
|
2019-06-19 10:25:22 -04:00
|
|
|
# Remove any deleted Volumes from the list
|
|
|
|
for volume in volume_list[pool]:
|
2020-11-06 20:37:52 -05:00
|
|
|
if volume not in new_volume_list:
|
2019-06-19 10:25:22 -04:00
|
|
|
# Delete the object
|
|
|
|
del(d_volume[pool][volume])
|
|
|
|
|
|
|
|
# Update and print new list
|
|
|
|
volume_list[pool] = new_volume_list
|
2019-07-10 21:39:25 -04:00
|
|
|
logger.out('{}Volume list [{pool}]:{} {plist}'.format(fmt_blue, fmt_end, pool=pool, plist=' '.join(volume_list[pool])), state='i')
|
2019-06-19 09:40:20 -04:00
|
|
|
|
2020-11-07 14:45:24 -05:00
|
|
|
|
2018-10-14 02:01:35 -04:00
|
|
|
###############################################################################
|
|
|
|
# PHASE 9 - Run the daemon
|
|
|
|
###############################################################################
|
|
|
|
|
2020-06-06 13:23:24 -04:00
|
|
|
# Ceph stats update function
|
|
|
|
def collect_ceph_stats(queue):
|
2020-08-17 13:11:03 -04:00
|
|
|
if debug:
|
2020-08-17 14:30:21 -04:00
|
|
|
logger.out("Thread starting", state='d', prefix='ceph-thread')
|
2020-08-17 13:11:03 -04:00
|
|
|
|
2020-06-06 22:29:32 -04:00
|
|
|
# Connect to the Ceph cluster
|
|
|
|
try:
|
|
|
|
ceph_conn = Rados(conffile=config['ceph_config_file'], conf=dict(keyring=config['ceph_admin_keyring']))
|
2020-08-17 13:58:14 -04:00
|
|
|
if debug:
|
2020-08-17 14:30:21 -04:00
|
|
|
logger.out("Connecting to cluster", state='d', prefix='ceph-thread')
|
2020-08-17 13:58:14 -04:00
|
|
|
ceph_conn.connect(timeout=1)
|
2020-06-06 22:29:32 -04:00
|
|
|
except Exception as e:
|
|
|
|
logger.out('Failed to open connection to Ceph cluster: {}'.format(e), state='e')
|
|
|
|
return
|
|
|
|
|
2020-08-17 13:11:03 -04:00
|
|
|
if debug:
|
2020-08-17 14:30:21 -04:00
|
|
|
logger.out("Getting health stats from monitor", state='d', prefix='ceph-thread')
|
2020-08-17 13:11:03 -04:00
|
|
|
|
2020-06-06 22:29:32 -04:00
|
|
|
# Get Ceph cluster health for local status output
|
2020-11-07 12:57:42 -05:00
|
|
|
command = {"prefix": "health", "format": "json"}
|
2020-06-06 22:29:32 -04:00
|
|
|
try:
|
|
|
|
health_status = json.loads(ceph_conn.mon_command(json.dumps(command), b'', timeout=1)[1])
|
|
|
|
ceph_health = health_status['status']
|
|
|
|
except Exception as e:
|
|
|
|
logger.out('Failed to obtain Ceph health data: {}'.format(e), state='e')
|
|
|
|
return
|
|
|
|
|
|
|
|
if ceph_health == 'HEALTH_OK':
|
2020-06-06 13:23:24 -04:00
|
|
|
ceph_health_colour = fmt_green
|
2020-06-06 22:29:32 -04:00
|
|
|
elif ceph_health == 'HEALTH_WARN':
|
2020-06-06 13:23:24 -04:00
|
|
|
ceph_health_colour = fmt_yellow
|
|
|
|
else:
|
|
|
|
ceph_health_colour = fmt_red
|
|
|
|
|
2020-06-06 22:29:32 -04:00
|
|
|
# Primary-only functions
|
2020-06-06 13:23:24 -04:00
|
|
|
if this_node.router_state == 'primary':
|
|
|
|
if debug:
|
2020-08-17 14:30:21 -04:00
|
|
|
logger.out("Set ceph health information in zookeeper (primary only)", state='d', prefix='ceph-thread')
|
2020-06-06 22:29:32 -04:00
|
|
|
|
2020-11-07 12:57:42 -05:00
|
|
|
command = {"prefix": "status", "format": "pretty"}
|
2020-06-06 22:49:47 -04:00
|
|
|
ceph_status = ceph_conn.mon_command(json.dumps(command), b'', timeout=1)[1].decode('ascii')
|
2020-06-06 13:23:24 -04:00
|
|
|
try:
|
2021-05-30 14:48:41 -04:00
|
|
|
zkhandler.write([
|
2021-06-09 13:28:31 -04:00
|
|
|
('base.storage', str(ceph_status))
|
2021-05-30 14:48:41 -04:00
|
|
|
])
|
2020-06-06 22:29:32 -04:00
|
|
|
except Exception as e:
|
|
|
|
logger.out('Failed to set Ceph status data: {}'.format(e), state='e')
|
2020-06-06 13:23:24 -04:00
|
|
|
return
|
|
|
|
|
|
|
|
if debug:
|
2020-08-17 14:30:21 -04:00
|
|
|
logger.out("Set ceph rados df information in zookeeper (primary only)", state='d', prefix='ceph-thread')
|
2020-06-06 22:29:32 -04:00
|
|
|
|
2020-06-06 13:23:24 -04:00
|
|
|
# Get rados df info
|
2020-11-07 12:57:42 -05:00
|
|
|
command = {"prefix": "df", "format": "pretty"}
|
2020-06-06 22:49:47 -04:00
|
|
|
ceph_df = ceph_conn.mon_command(json.dumps(command), b'', timeout=1)[1].decode('ascii')
|
2020-06-06 13:23:24 -04:00
|
|
|
try:
|
2021-05-30 14:48:41 -04:00
|
|
|
zkhandler.write([
|
2021-06-09 13:28:31 -04:00
|
|
|
('base.storage.util', str(ceph_df))
|
2021-05-30 14:48:41 -04:00
|
|
|
])
|
2020-06-06 22:29:32 -04:00
|
|
|
except Exception as e:
|
2020-06-06 22:49:47 -04:00
|
|
|
logger.out('Failed to set Ceph utilization data: {}'.format(e), state='e')
|
2020-06-06 13:23:24 -04:00
|
|
|
return
|
|
|
|
|
|
|
|
if debug:
|
2020-08-17 14:30:21 -04:00
|
|
|
logger.out("Set pool information in zookeeper (primary only)", state='d', prefix='ceph-thread')
|
2020-06-06 13:23:24 -04:00
|
|
|
|
|
|
|
# Get pool info
|
2020-12-26 14:04:21 -05:00
|
|
|
retcode, stdout, stderr = common.run_os_command('ceph df --format json', timeout=1)
|
2020-06-06 13:23:24 -04:00
|
|
|
try:
|
2020-12-26 14:04:21 -05:00
|
|
|
ceph_pool_df_raw = json.loads(stdout)['pools']
|
2020-06-06 22:29:32 -04:00
|
|
|
except Exception as e:
|
|
|
|
logger.out('Failed to obtain Pool data (ceph df): {}'.format(e), state='w')
|
2020-06-06 13:23:24 -04:00
|
|
|
ceph_pool_df_raw = []
|
|
|
|
|
|
|
|
retcode, stdout, stderr = common.run_os_command('rados df --format json', timeout=1)
|
|
|
|
try:
|
|
|
|
rados_pool_df_raw = json.loads(stdout)['pools']
|
2020-06-06 22:29:32 -04:00
|
|
|
except Exception as e:
|
|
|
|
logger.out('Failed to obtain Pool data (rados df): {}'.format(e), state='w')
|
2020-06-06 13:23:24 -04:00
|
|
|
rados_pool_df_raw = []
|
|
|
|
|
|
|
|
pool_count = len(ceph_pool_df_raw)
|
|
|
|
if debug:
|
2020-08-17 14:30:21 -04:00
|
|
|
logger.out("Getting info for {} pools".format(pool_count), state='d', prefix='ceph-thread')
|
2020-06-06 13:23:24 -04:00
|
|
|
for pool_idx in range(0, pool_count):
|
|
|
|
try:
|
|
|
|
# Combine all the data for this pool
|
|
|
|
ceph_pool_df = ceph_pool_df_raw[pool_idx]
|
|
|
|
rados_pool_df = rados_pool_df_raw[pool_idx]
|
|
|
|
pool = ceph_pool_df
|
|
|
|
pool.update(rados_pool_df)
|
|
|
|
|
|
|
|
# Ignore any pools that aren't in our pool list
|
|
|
|
if pool['name'] not in pool_list:
|
|
|
|
if debug:
|
2020-08-17 14:30:21 -04:00
|
|
|
logger.out("Pool {} not in pool list {}".format(pool['name'], pool_list), state='d', prefix='ceph-thread')
|
2020-06-06 13:23:24 -04:00
|
|
|
continue
|
|
|
|
else:
|
|
|
|
if debug:
|
2020-08-17 14:30:21 -04:00
|
|
|
logger.out("Parsing data for pool {}".format(pool['name']), state='d', prefix='ceph-thread')
|
2020-06-06 13:23:24 -04:00
|
|
|
|
|
|
|
# Assemble a useful data structure
|
|
|
|
pool_df = {
|
|
|
|
'id': pool['id'],
|
2021-02-09 01:46:01 -05:00
|
|
|
'stored_bytes': pool['stats']['stored'],
|
2020-06-06 13:23:24 -04:00
|
|
|
'free_bytes': pool['stats']['max_avail'],
|
|
|
|
'used_bytes': pool['stats']['bytes_used'],
|
|
|
|
'used_percent': pool['stats']['percent_used'],
|
|
|
|
'num_objects': pool['stats']['objects'],
|
|
|
|
'num_object_clones': pool['num_object_clones'],
|
|
|
|
'num_object_copies': pool['num_object_copies'],
|
|
|
|
'num_objects_missing_on_primary': pool['num_objects_missing_on_primary'],
|
|
|
|
'num_objects_unfound': pool['num_objects_unfound'],
|
|
|
|
'num_objects_degraded': pool['num_objects_degraded'],
|
|
|
|
'read_ops': pool['read_ops'],
|
|
|
|
'read_bytes': pool['read_bytes'],
|
|
|
|
'write_ops': pool['write_ops'],
|
|
|
|
'write_bytes': pool['write_bytes']
|
|
|
|
}
|
2020-11-06 19:05:48 -05:00
|
|
|
|
2020-06-06 13:23:24 -04:00
|
|
|
# Write the pool data to Zookeeper
|
2021-05-30 14:48:41 -04:00
|
|
|
zkhandler.write([
|
2021-06-09 13:28:31 -04:00
|
|
|
(('pool.stats', pool['name']), str(json.dumps(pool_df)))
|
2021-05-30 14:48:41 -04:00
|
|
|
])
|
2020-06-06 13:23:24 -04:00
|
|
|
except Exception as e:
|
|
|
|
# One or more of the status commands timed out, just continue
|
2020-11-06 21:13:13 -05:00
|
|
|
logger.out('Failed to format and send pool data: {}'.format(e), state='w')
|
2020-06-06 13:23:24 -04:00
|
|
|
pass
|
|
|
|
|
|
|
|
# Only grab OSD stats if there are OSDs to grab (otherwise `ceph osd df` hangs)
|
|
|
|
osds_this_node = 0
|
|
|
|
if len(osd_list) > 0:
|
|
|
|
# Get data from Ceph OSDs
|
|
|
|
if debug:
|
2020-08-17 14:30:21 -04:00
|
|
|
logger.out("Get data from Ceph OSDs", state='d', prefix='ceph-thread')
|
2020-06-06 22:29:32 -04:00
|
|
|
|
2020-06-06 13:23:24 -04:00
|
|
|
# Parse the dump data
|
|
|
|
osd_dump = dict()
|
2020-06-06 22:29:32 -04:00
|
|
|
|
2020-11-07 12:57:42 -05:00
|
|
|
command = {"prefix": "osd dump", "format": "json"}
|
2020-06-06 13:23:24 -04:00
|
|
|
try:
|
2020-08-12 22:16:56 -04:00
|
|
|
retcode, stdout, stderr = common.run_os_command('ceph osd dump --format json --connect-timeout 2', timeout=2)
|
|
|
|
osd_dump_raw = json.loads(stdout)['osds']
|
2020-06-06 22:29:32 -04:00
|
|
|
except Exception as e:
|
|
|
|
logger.out('Failed to obtain OSD data: {}'.format(e), state='w')
|
2020-06-06 13:23:24 -04:00
|
|
|
osd_dump_raw = []
|
|
|
|
|
|
|
|
if debug:
|
2020-08-17 14:30:21 -04:00
|
|
|
logger.out("Loop through OSD dump", state='d', prefix='ceph-thread')
|
2020-06-06 13:23:24 -04:00
|
|
|
for osd in osd_dump_raw:
|
|
|
|
osd_dump.update({
|
|
|
|
str(osd['osd']): {
|
|
|
|
'uuid': osd['uuid'],
|
|
|
|
'up': osd['up'],
|
|
|
|
'in': osd['in'],
|
|
|
|
'primary_affinity': osd['primary_affinity']
|
|
|
|
}
|
|
|
|
})
|
|
|
|
|
|
|
|
# Parse the df data
|
|
|
|
if debug:
|
2020-08-17 14:30:21 -04:00
|
|
|
logger.out("Parse the OSD df data", state='d', prefix='ceph-thread')
|
2020-06-06 22:29:32 -04:00
|
|
|
|
2020-06-06 13:23:24 -04:00
|
|
|
osd_df = dict()
|
2020-06-06 22:29:32 -04:00
|
|
|
|
2020-11-07 12:57:42 -05:00
|
|
|
command = {"prefix": "osd df", "format": "json"}
|
2020-06-06 13:23:24 -04:00
|
|
|
try:
|
2020-06-06 22:29:32 -04:00
|
|
|
osd_df_raw = json.loads(ceph_conn.mon_command(json.dumps(command), b'', timeout=1)[1])['nodes']
|
2020-06-06 13:23:24 -04:00
|
|
|
except Exception as e:
|
2020-06-06 22:29:32 -04:00
|
|
|
logger.out('Failed to obtain OSD data: {}'.format(e), state='w')
|
2020-06-06 13:23:24 -04:00
|
|
|
osd_df_raw = []
|
|
|
|
|
|
|
|
if debug:
|
2020-08-17 14:30:21 -04:00
|
|
|
logger.out("Loop through OSD df", state='d', prefix='ceph-thread')
|
2020-06-06 13:23:24 -04:00
|
|
|
for osd in osd_df_raw:
|
|
|
|
osd_df.update({
|
|
|
|
str(osd['id']): {
|
|
|
|
'utilization': osd['utilization'],
|
|
|
|
'var': osd['var'],
|
|
|
|
'pgs': osd['pgs'],
|
|
|
|
'kb': osd['kb'],
|
|
|
|
'weight': osd['crush_weight'],
|
|
|
|
'reweight': osd['reweight'],
|
|
|
|
}
|
|
|
|
})
|
2020-06-06 22:29:32 -04:00
|
|
|
|
2020-06-06 13:23:24 -04:00
|
|
|
# Parse the status data
|
|
|
|
if debug:
|
2020-08-17 14:30:21 -04:00
|
|
|
logger.out("Parse the OSD status data", state='d', prefix='ceph-thread')
|
2020-06-06 22:29:32 -04:00
|
|
|
|
2020-06-06 13:23:24 -04:00
|
|
|
osd_status = dict()
|
2020-06-06 22:29:32 -04:00
|
|
|
|
2020-11-07 12:57:42 -05:00
|
|
|
command = {"prefix": "osd status", "format": "pretty"}
|
2020-06-06 22:29:32 -04:00
|
|
|
try:
|
|
|
|
osd_status_raw = ceph_conn.mon_command(json.dumps(command), b'', timeout=1)[1].decode('ascii')
|
|
|
|
except Exception as e:
|
|
|
|
logger.out('Failed to obtain OSD status data: {}'.format(e), state='w')
|
|
|
|
osd_status_raw = []
|
|
|
|
|
2020-06-06 13:23:24 -04:00
|
|
|
if debug:
|
2020-08-17 14:30:21 -04:00
|
|
|
logger.out("Loop through OSD status data", state='d', prefix='ceph-thread')
|
2020-06-06 22:29:32 -04:00
|
|
|
|
|
|
|
for line in osd_status_raw.split('\n'):
|
2020-06-06 13:23:24 -04:00
|
|
|
# Strip off colour
|
|
|
|
line = re.sub(r'\x1b(\[.*?[@-~]|\].*?(\x07|\x1b\\))', '', line)
|
|
|
|
# Split it for parsing
|
|
|
|
line = line.split()
|
|
|
|
if len(line) > 1 and line[1].isdigit():
|
|
|
|
# This is an OSD line so parse it
|
|
|
|
osd_id = line[1]
|
|
|
|
node = line[3].split('.')[0]
|
|
|
|
used = line[5]
|
|
|
|
avail = line[7]
|
|
|
|
wr_ops = line[9]
|
|
|
|
wr_data = line[11]
|
|
|
|
rd_ops = line[13]
|
|
|
|
rd_data = line[15]
|
|
|
|
state = line[17]
|
|
|
|
osd_status.update({
|
|
|
|
str(osd_id): {
|
|
|
|
'node': node,
|
|
|
|
'used': used,
|
|
|
|
'avail': avail,
|
|
|
|
'wr_ops': wr_ops,
|
|
|
|
'wr_data': wr_data,
|
|
|
|
'rd_ops': rd_ops,
|
|
|
|
'rd_data': rd_data,
|
|
|
|
'state': state
|
|
|
|
}
|
|
|
|
})
|
2020-06-06 22:29:32 -04:00
|
|
|
|
2020-06-06 13:23:24 -04:00
|
|
|
# Merge them together into a single meaningful dict
|
|
|
|
if debug:
|
2020-08-17 14:30:21 -04:00
|
|
|
logger.out("Merge OSD data together", state='d', prefix='ceph-thread')
|
2020-06-06 22:29:32 -04:00
|
|
|
|
2020-06-06 13:23:24 -04:00
|
|
|
osd_stats = dict()
|
2020-06-06 22:29:32 -04:00
|
|
|
|
2020-06-06 13:23:24 -04:00
|
|
|
for osd in osd_list:
|
2020-12-09 02:46:09 -05:00
|
|
|
if d_osd[osd].node == myhostname:
|
|
|
|
osds_this_node += 1
|
2020-06-06 13:23:24 -04:00
|
|
|
try:
|
|
|
|
this_dump = osd_dump[osd]
|
|
|
|
this_dump.update(osd_df[osd])
|
|
|
|
this_dump.update(osd_status[osd])
|
|
|
|
osd_stats[osd] = this_dump
|
|
|
|
except KeyError as e:
|
|
|
|
# One or more of the status commands timed out, just continue
|
|
|
|
logger.out('Failed to parse OSD stats into dictionary: {}'.format(e), state='w')
|
|
|
|
|
2020-12-09 02:46:09 -05:00
|
|
|
# Upload OSD data for the cluster (primary-only)
|
|
|
|
if this_node.router_state == 'primary':
|
|
|
|
if debug:
|
|
|
|
logger.out("Trigger updates for each OSD", state='d', prefix='ceph-thread')
|
2020-06-06 22:29:32 -04:00
|
|
|
|
2020-12-09 02:46:09 -05:00
|
|
|
for osd in osd_list:
|
2020-06-06 13:23:24 -04:00
|
|
|
try:
|
|
|
|
stats = json.dumps(osd_stats[osd])
|
2021-05-30 14:48:41 -04:00
|
|
|
zkhandler.write([
|
2021-06-09 13:28:31 -04:00
|
|
|
(('osd.stats', osd), str(stats))
|
2021-05-30 14:48:41 -04:00
|
|
|
])
|
2020-06-06 13:23:24 -04:00
|
|
|
except KeyError as e:
|
|
|
|
# One or more of the status commands timed out, just continue
|
|
|
|
logger.out('Failed to upload OSD stats from dictionary: {}'.format(e), state='w')
|
|
|
|
|
2020-06-06 22:29:32 -04:00
|
|
|
ceph_conn.shutdown()
|
|
|
|
|
2020-06-06 13:23:24 -04:00
|
|
|
queue.put(ceph_health_colour)
|
|
|
|
queue.put(ceph_health)
|
|
|
|
queue.put(osds_this_node)
|
|
|
|
|
2020-08-17 13:11:03 -04:00
|
|
|
if debug:
|
2020-08-17 14:30:21 -04:00
|
|
|
logger.out("Thread finished", state='d', prefix='ceph-thread')
|
2020-08-17 13:11:03 -04:00
|
|
|
|
2020-11-07 13:17:49 -05:00
|
|
|
|
2020-06-06 15:31:26 -04:00
|
|
|
# State table for pretty stats
|
|
|
|
libvirt_vm_states = {
|
|
|
|
0: "NOSTATE",
|
|
|
|
1: "RUNNING",
|
|
|
|
2: "BLOCKED",
|
|
|
|
3: "PAUSED",
|
|
|
|
4: "SHUTDOWN",
|
|
|
|
5: "SHUTOFF",
|
|
|
|
6: "CRASHED",
|
|
|
|
7: "PMSUSPENDED"
|
|
|
|
}
|
|
|
|
|
2020-11-07 14:45:24 -05:00
|
|
|
|
2020-06-06 15:31:26 -04:00
|
|
|
# VM stats update function
|
|
|
|
def collect_vm_stats(queue):
|
2020-08-17 13:11:03 -04:00
|
|
|
if debug:
|
2020-08-17 14:30:21 -04:00
|
|
|
logger.out("Thread starting", state='d', prefix='vm-thread')
|
2020-08-17 13:11:03 -04:00
|
|
|
|
2020-06-06 15:31:26 -04:00
|
|
|
# Connect to libvirt
|
|
|
|
libvirt_name = "qemu:///system"
|
2020-08-17 14:30:21 -04:00
|
|
|
if debug:
|
|
|
|
logger.out("Connecting to libvirt", state='d', prefix='vm-thread')
|
2020-06-06 15:31:26 -04:00
|
|
|
lv_conn = libvirt.open(libvirt_name)
|
2020-11-06 19:36:36 -05:00
|
|
|
if lv_conn is None:
|
2020-06-06 15:31:26 -04:00
|
|
|
logger.out('Failed to open connection to "{}"'.format(libvirt_name), state='e')
|
|
|
|
return
|
|
|
|
|
2020-06-06 15:44:05 -04:00
|
|
|
memalloc = 0
|
2020-10-18 14:02:34 -04:00
|
|
|
memprov = 0
|
2020-06-06 15:44:05 -04:00
|
|
|
vcpualloc = 0
|
|
|
|
# Toggle state management of dead VMs to restart them
|
|
|
|
if debug:
|
2020-08-17 14:30:21 -04:00
|
|
|
logger.out("Toggle state management of dead VMs to restart them", state='d', prefix='vm-thread')
|
2020-09-16 15:02:31 -04:00
|
|
|
# Make a copy of the d_domain; if not, and it changes in flight, this can fail
|
|
|
|
fixed_d_domain = this_node.d_domain.copy()
|
|
|
|
for domain, instance in fixed_d_domain.items():
|
2020-06-06 15:44:05 -04:00
|
|
|
if domain in this_node.domain_list:
|
|
|
|
# Add the allocated memory to our memalloc value
|
|
|
|
memalloc += instance.getmemory()
|
2020-10-18 14:02:34 -04:00
|
|
|
memprov += instance.getmemory()
|
2020-06-06 15:44:05 -04:00
|
|
|
vcpualloc += instance.getvcpus()
|
|
|
|
if instance.getstate() == 'start' and instance.getnode() == this_node.name:
|
2020-11-06 19:37:13 -05:00
|
|
|
if instance.getdom() is not None:
|
2020-06-06 15:44:05 -04:00
|
|
|
try:
|
|
|
|
if instance.getdom().state()[0] != libvirt.VIR_DOMAIN_RUNNING:
|
2020-12-14 14:39:51 -05:00
|
|
|
logger.out("VM {} has failed".format(instance.domname), state='w', prefix='vm-thread')
|
2020-06-06 15:44:05 -04:00
|
|
|
raise
|
2020-11-06 21:13:13 -05:00
|
|
|
except Exception:
|
2020-06-06 15:44:05 -04:00
|
|
|
# Toggle a state "change"
|
2020-12-14 14:39:51 -05:00
|
|
|
logger.out("Resetting state to {} for VM {}".format(instance.getstate(), instance.domname), state='i', prefix='vm-thread')
|
2021-05-30 14:48:41 -04:00
|
|
|
zkhandler.write([
|
2021-06-09 13:28:31 -04:00
|
|
|
(('domain.state', domain), instance.getstate())
|
2021-05-30 14:48:41 -04:00
|
|
|
])
|
2020-10-18 14:02:34 -04:00
|
|
|
elif instance.getnode() == this_node.name:
|
|
|
|
memprov += instance.getmemory()
|
2020-06-06 15:44:05 -04:00
|
|
|
|
2020-06-06 15:31:26 -04:00
|
|
|
# Get list of running domains from Libvirt
|
|
|
|
running_domains = lv_conn.listAllDomains(libvirt.VIR_CONNECT_LIST_DOMAINS_ACTIVE)
|
2020-06-06 15:44:05 -04:00
|
|
|
|
|
|
|
# Get statistics from any running VMs
|
2020-06-06 15:31:26 -04:00
|
|
|
for domain in running_domains:
|
2020-06-10 17:10:46 -04:00
|
|
|
try:
|
|
|
|
# Get basic information about the VM
|
|
|
|
tree = ElementTree.fromstring(domain.XMLDesc())
|
|
|
|
domain_uuid = domain.UUIDString()
|
|
|
|
domain_name = domain.name()
|
|
|
|
|
|
|
|
# Get all the raw information about the VM
|
|
|
|
if debug:
|
2020-08-17 14:30:21 -04:00
|
|
|
logger.out("Getting general statistics for VM {}".format(domain_name), state='d', prefix='vm-thread')
|
2020-06-10 17:10:46 -04:00
|
|
|
domain_state, domain_maxmem, domain_mem, domain_vcpus, domain_cputime = domain.info()
|
2020-08-28 01:47:36 -04:00
|
|
|
# We can't properly gather stats from a non-running VMs so continue
|
|
|
|
if domain_state != libvirt.VIR_DOMAIN_RUNNING:
|
|
|
|
continue
|
2020-06-10 17:10:46 -04:00
|
|
|
domain_memory_stats = domain.memoryStats()
|
|
|
|
domain_cpu_stats = domain.getCPUStats(True)[0]
|
|
|
|
except Exception as e:
|
2020-08-16 11:38:39 -04:00
|
|
|
if debug:
|
|
|
|
try:
|
2020-08-17 14:30:21 -04:00
|
|
|
logger.out("Failed getting VM information for {}: {}".format(domain.name(), e), state='d', prefix='vm-thread')
|
2020-11-06 18:55:10 -05:00
|
|
|
except Exception:
|
2020-08-16 11:38:39 -04:00
|
|
|
pass
|
2020-06-10 17:10:46 -04:00
|
|
|
continue
|
2020-06-06 15:44:05 -04:00
|
|
|
|
|
|
|
# Ensure VM is present in the domain_list
|
|
|
|
if domain_uuid not in this_node.domain_list:
|
|
|
|
this_node.domain_list.append(domain_uuid)
|
|
|
|
|
2020-06-06 22:49:47 -04:00
|
|
|
if debug:
|
2020-08-17 14:30:21 -04:00
|
|
|
logger.out("Getting disk statistics for VM {}".format(domain_name), state='d', prefix='vm-thread')
|
2020-06-06 15:31:26 -04:00
|
|
|
domain_disk_stats = []
|
|
|
|
for disk in tree.findall('devices/disk'):
|
|
|
|
disk_name = disk.find('source').get('name')
|
|
|
|
if not disk_name:
|
|
|
|
disk_name = disk.find('source').get('file')
|
|
|
|
disk_stats = domain.blockStats(disk.find('target').get('dev'))
|
|
|
|
domain_disk_stats.append({
|
|
|
|
"name": disk_name,
|
|
|
|
"rd_req": disk_stats[0],
|
|
|
|
"rd_bytes": disk_stats[1],
|
|
|
|
"wr_req": disk_stats[2],
|
|
|
|
"wr_bytes": disk_stats[3],
|
|
|
|
"err": disk_stats[4]
|
|
|
|
})
|
|
|
|
|
2020-06-06 22:49:47 -04:00
|
|
|
if debug:
|
2020-08-17 14:30:21 -04:00
|
|
|
logger.out("Getting network statistics for VM {}".format(domain_name), state='d', prefix='vm-thread')
|
2020-06-06 15:31:26 -04:00
|
|
|
domain_network_stats = []
|
|
|
|
for interface in tree.findall('devices/interface'):
|
2021-06-17 01:33:11 -04:00
|
|
|
interface_type = interface.get('type')
|
2021-06-21 20:49:45 -04:00
|
|
|
if interface_type not in ['bridge']:
|
2021-06-17 01:33:11 -04:00
|
|
|
continue
|
2020-06-06 15:31:26 -04:00
|
|
|
interface_name = interface.find('target').get('dev')
|
|
|
|
interface_bridge = interface.find('source').get('bridge')
|
|
|
|
interface_stats = domain.interfaceStats(interface_name)
|
|
|
|
domain_network_stats.append({
|
|
|
|
"name": interface_name,
|
|
|
|
"bridge": interface_bridge,
|
|
|
|
"rd_bytes": interface_stats[0],
|
|
|
|
"rd_packets": interface_stats[1],
|
|
|
|
"rd_errors": interface_stats[2],
|
|
|
|
"rd_drops": interface_stats[3],
|
|
|
|
"wr_bytes": interface_stats[4],
|
|
|
|
"wr_packets": interface_stats[5],
|
|
|
|
"wr_errors": interface_stats[6],
|
|
|
|
"wr_drops": interface_stats[7]
|
|
|
|
})
|
|
|
|
|
|
|
|
# Create the final dictionary
|
|
|
|
domain_stats = {
|
|
|
|
"state": libvirt_vm_states[domain_state],
|
|
|
|
"maxmem": domain_maxmem,
|
|
|
|
"livemem": domain_mem,
|
|
|
|
"cpus": domain_vcpus,
|
|
|
|
"cputime": domain_cputime,
|
|
|
|
"mem_stats": domain_memory_stats,
|
|
|
|
"cpu_stats": domain_cpu_stats,
|
|
|
|
"disk_stats": domain_disk_stats,
|
|
|
|
"net_stats": domain_network_stats
|
|
|
|
}
|
|
|
|
|
|
|
|
if debug:
|
2020-08-17 14:30:21 -04:00
|
|
|
logger.out("Writing statistics for VM {} to Zookeeper".format(domain_name), state='d', prefix='vm-thread')
|
2020-06-06 15:31:26 -04:00
|
|
|
|
|
|
|
try:
|
2021-05-30 14:48:41 -04:00
|
|
|
zkhandler.write([
|
2021-06-09 13:28:31 -04:00
|
|
|
(('domain.stats', domain_uuid), str(json.dumps(domain_stats)))
|
2021-05-30 14:48:41 -04:00
|
|
|
])
|
2020-06-06 15:31:26 -04:00
|
|
|
except Exception as e:
|
|
|
|
if debug:
|
2020-08-17 14:30:21 -04:00
|
|
|
logger.out("{}".format(e), state='d', prefix='vm-thread')
|
2020-06-06 15:31:26 -04:00
|
|
|
|
|
|
|
# Close the Libvirt connection
|
|
|
|
lv_conn.close()
|
|
|
|
|
2020-06-06 15:44:05 -04:00
|
|
|
queue.put(len(running_domains))
|
|
|
|
queue.put(memalloc)
|
2020-10-18 14:02:34 -04:00
|
|
|
queue.put(memprov)
|
2020-06-06 15:44:05 -04:00
|
|
|
queue.put(vcpualloc)
|
2020-06-06 15:31:26 -04:00
|
|
|
|
2020-08-17 13:11:03 -04:00
|
|
|
if debug:
|
2020-08-17 14:30:21 -04:00
|
|
|
logger.out("Thread finished", state='d', prefix='vm-thread')
|
2020-08-17 13:11:03 -04:00
|
|
|
|
2020-11-07 14:45:24 -05:00
|
|
|
|
2020-06-06 12:48:44 -04:00
|
|
|
# Keepalive update function
|
2021-07-02 01:55:15 -04:00
|
|
|
@common.Profiler(config)
|
2020-06-06 12:48:44 -04:00
|
|
|
def node_keepalive():
|
2020-08-17 13:11:03 -04:00
|
|
|
if debug:
|
2020-08-17 14:30:21 -04:00
|
|
|
logger.out("Keepalive starting", state='d', prefix='main-thread')
|
2020-08-17 13:11:03 -04:00
|
|
|
|
2021-06-01 10:52:41 -04:00
|
|
|
# Set the migration selector in Zookeeper for clients to read
|
|
|
|
if config['enable_hypervisor']:
|
|
|
|
if this_node.router_state == 'primary':
|
|
|
|
try:
|
2021-06-09 13:28:31 -04:00
|
|
|
if zkhandler.read('base.config.migration_target_selector') != config['migration_target_selector']:
|
2021-06-01 10:52:41 -04:00
|
|
|
raise
|
|
|
|
except Exception:
|
|
|
|
zkhandler.write([
|
2021-06-09 13:28:31 -04:00
|
|
|
('base.config.migration_target_selector', config['migration_target_selector'])
|
2021-06-01 10:52:41 -04:00
|
|
|
])
|
|
|
|
|
2019-10-21 16:46:32 -04:00
|
|
|
# Set the upstream IP in Zookeeper for clients to read
|
|
|
|
if config['enable_networking']:
|
|
|
|
if this_node.router_state == 'primary':
|
|
|
|
try:
|
2021-06-09 13:28:31 -04:00
|
|
|
if zkhandler.read('base.config.upstream_ip') != config['upstream_floating_ip']:
|
2019-10-21 16:46:32 -04:00
|
|
|
raise
|
2020-11-06 18:55:10 -05:00
|
|
|
except Exception:
|
2021-05-30 14:48:41 -04:00
|
|
|
zkhandler.write([
|
2021-06-09 13:28:31 -04:00
|
|
|
('base.config.upstream_ip', config['upstream_floating_ip'])
|
2021-05-30 14:48:41 -04:00
|
|
|
])
|
2019-10-21 16:46:32 -04:00
|
|
|
|
2018-10-22 20:20:27 -04:00
|
|
|
# Get past state and update if needed
|
2018-11-27 22:15:19 -05:00
|
|
|
if debug:
|
2020-08-17 14:30:21 -04:00
|
|
|
logger.out("Get past state and update if needed", state='d', prefix='main-thread')
|
2021-06-09 13:28:31 -04:00
|
|
|
past_state = zkhandler.read(('node.state.daemon', this_node.name))
|
2018-10-22 20:20:27 -04:00
|
|
|
if past_state != 'run':
|
|
|
|
this_node.daemon_state = 'run'
|
2021-05-30 14:48:41 -04:00
|
|
|
zkhandler.write([
|
2021-06-09 13:28:31 -04:00
|
|
|
(('node.state.daemon', this_node.name), 'run')
|
2021-05-30 14:48:41 -04:00
|
|
|
])
|
2018-10-22 20:20:27 -04:00
|
|
|
else:
|
|
|
|
this_node.daemon_state = 'run'
|
|
|
|
|
|
|
|
# Ensure the primary key is properly set
|
2018-11-27 22:15:19 -05:00
|
|
|
if debug:
|
2020-08-17 14:30:21 -04:00
|
|
|
logger.out("Ensure the primary key is properly set", state='d', prefix='main-thread')
|
2018-10-22 20:20:27 -04:00
|
|
|
if this_node.router_state == 'primary':
|
2021-06-09 13:28:31 -04:00
|
|
|
if zkhandler.read('base.config.primary_node') != this_node.name:
|
2021-05-30 14:48:41 -04:00
|
|
|
zkhandler.write([
|
2021-06-09 13:28:31 -04:00
|
|
|
('base.config.primary_node', this_node.name)
|
2021-05-30 14:48:41 -04:00
|
|
|
])
|
2018-10-22 20:20:27 -04:00
|
|
|
|
2020-06-06 15:31:26 -04:00
|
|
|
# Run VM statistics collection in separate thread for parallelization
|
|
|
|
if enable_hypervisor:
|
|
|
|
vm_thread_queue = Queue()
|
2020-08-11 11:46:41 -04:00
|
|
|
vm_stats_thread = Thread(target=collect_vm_stats, args=(vm_thread_queue,), kwargs={})
|
2020-06-06 15:31:26 -04:00
|
|
|
vm_stats_thread.start()
|
2020-11-06 19:05:48 -05:00
|
|
|
|
2020-06-06 15:44:05 -04:00
|
|
|
# Run Ceph status collection in separate thread for parallelization
|
|
|
|
if enable_storage:
|
|
|
|
ceph_thread_queue = Queue()
|
2020-08-11 11:46:41 -04:00
|
|
|
ceph_stats_thread = Thread(target=collect_ceph_stats, args=(ceph_thread_queue,), kwargs={})
|
2020-06-06 15:44:05 -04:00
|
|
|
ceph_stats_thread.start()
|
2020-11-06 19:05:48 -05:00
|
|
|
|
2020-06-06 13:20:40 -04:00
|
|
|
# Get node performance statistics
|
2019-04-15 18:25:50 -04:00
|
|
|
this_node.memtotal = int(psutil.virtual_memory().total / 1024 / 1024)
|
2018-10-22 20:20:27 -04:00
|
|
|
this_node.memused = int(psutil.virtual_memory().used / 1024 / 1024)
|
|
|
|
this_node.memfree = int(psutil.virtual_memory().free / 1024 / 1024)
|
|
|
|
this_node.cpuload = os.getloadavg()[0]
|
2020-06-06 15:44:05 -04:00
|
|
|
|
|
|
|
# Join against running threads
|
2019-03-11 01:44:26 -04:00
|
|
|
if enable_hypervisor:
|
2020-08-11 11:37:26 -04:00
|
|
|
vm_stats_thread.join(timeout=4.0)
|
|
|
|
if vm_stats_thread.is_alive():
|
|
|
|
logger.out('VM stats gathering exceeded 4s timeout, continuing', state='w')
|
2020-06-06 15:44:05 -04:00
|
|
|
if enable_storage:
|
2020-08-11 11:37:26 -04:00
|
|
|
ceph_stats_thread.join(timeout=4.0)
|
|
|
|
if ceph_stats_thread.is_alive():
|
|
|
|
logger.out('Ceph stats gathering exceeded 4s timeout, continuing', state='w')
|
2020-06-06 15:44:05 -04:00
|
|
|
|
|
|
|
# Get information from thread queues
|
|
|
|
if enable_hypervisor:
|
2020-08-11 11:37:26 -04:00
|
|
|
try:
|
|
|
|
this_node.domains_count = vm_thread_queue.get()
|
|
|
|
this_node.memalloc = vm_thread_queue.get()
|
2020-10-18 14:02:34 -04:00
|
|
|
this_node.memprov = vm_thread_queue.get()
|
2020-08-11 11:37:26 -04:00
|
|
|
this_node.vcpualloc = vm_thread_queue.get()
|
2020-11-06 18:55:10 -05:00
|
|
|
except Exception:
|
2020-08-11 11:37:26 -04:00
|
|
|
pass
|
2019-03-11 01:44:26 -04:00
|
|
|
else:
|
|
|
|
this_node.domains_count = 0
|
2020-06-06 15:44:05 -04:00
|
|
|
this_node.memalloc = 0
|
2020-10-18 14:02:34 -04:00
|
|
|
this_node.memprov = 0
|
2020-06-06 15:44:05 -04:00
|
|
|
this_node.vcpualloc = 0
|
2020-06-06 13:18:37 -04:00
|
|
|
|
2020-06-06 13:23:24 -04:00
|
|
|
if enable_storage:
|
2020-08-11 11:37:26 -04:00
|
|
|
try:
|
|
|
|
ceph_health_colour = ceph_thread_queue.get()
|
|
|
|
ceph_health = ceph_thread_queue.get()
|
|
|
|
osds_this_node = ceph_thread_queue.get()
|
2020-11-06 18:55:10 -05:00
|
|
|
except Exception:
|
2020-08-11 11:37:26 -04:00
|
|
|
ceph_health_colour = fmt_cyan
|
|
|
|
ceph_health = 'UNKNOWN'
|
|
|
|
osds_this_node = '?'
|
2020-06-06 13:23:24 -04:00
|
|
|
|
2020-06-06 13:20:40 -04:00
|
|
|
# Set our information in zookeeper
|
2018-10-22 20:20:27 -04:00
|
|
|
keepalive_time = int(time.time())
|
2020-06-06 13:20:40 -04:00
|
|
|
if debug:
|
2020-08-17 14:30:21 -04:00
|
|
|
logger.out("Set our information in zookeeper", state='d', prefix='main-thread')
|
2018-10-22 20:20:27 -04:00
|
|
|
try:
|
2021-05-30 14:48:41 -04:00
|
|
|
zkhandler.write([
|
2021-06-09 13:28:31 -04:00
|
|
|
(('node.memory.total', this_node.name), str(this_node.memtotal)),
|
|
|
|
(('node.memory.used', this_node.name), str(this_node.memused)),
|
|
|
|
(('node.memory.free', this_node.name), str(this_node.memfree)),
|
|
|
|
(('node.memory.allocated', this_node.name), str(this_node.memalloc)),
|
|
|
|
(('node.memory.provisioned', this_node.name), str(this_node.memprov)),
|
|
|
|
(('node.vcpu.allocated', this_node.name), str(this_node.vcpualloc)),
|
|
|
|
(('node.cpu.load', this_node.name), str(this_node.cpuload)),
|
|
|
|
(('node.count.provisioned_domains', this_node.name), str(this_node.domains_count)),
|
|
|
|
(('node.running_domains', this_node.name), ' '.join(this_node.domain_list)),
|
|
|
|
(('node.keepalive', this_node.name), str(keepalive_time)),
|
2021-05-30 14:48:41 -04:00
|
|
|
])
|
2020-11-06 18:55:10 -05:00
|
|
|
except Exception:
|
2018-10-22 20:20:27 -04:00
|
|
|
logger.out('Failed to set keepalive data', state='e')
|
|
|
|
return
|
|
|
|
|
|
|
|
# Display node information to the terminal
|
2019-06-18 12:44:07 -04:00
|
|
|
if config['log_keepalives']:
|
2019-07-12 09:31:42 -04:00
|
|
|
if this_node.router_state == 'primary':
|
|
|
|
cst_colour = fmt_green
|
|
|
|
elif this_node.router_state == 'secondary':
|
|
|
|
cst_colour = fmt_blue
|
|
|
|
else:
|
|
|
|
cst_colour = fmt_cyan
|
2019-06-18 12:44:07 -04:00
|
|
|
logger.out(
|
2019-07-11 20:11:05 -04:00
|
|
|
'{}{} keepalive{} [{}{}{}]'.format(
|
2019-07-10 21:39:25 -04:00
|
|
|
fmt_purple,
|
2019-06-18 12:44:07 -04:00
|
|
|
myhostname,
|
2019-07-11 20:11:05 -04:00
|
|
|
fmt_end,
|
2019-07-12 09:31:42 -04:00
|
|
|
fmt_bold + cst_colour,
|
2019-07-11 20:11:05 -04:00
|
|
|
this_node.router_state,
|
2019-07-10 21:39:25 -04:00
|
|
|
fmt_end
|
2019-06-18 12:44:07 -04:00
|
|
|
),
|
|
|
|
state='t'
|
|
|
|
)
|
2019-06-18 19:54:42 -04:00
|
|
|
if config['log_keepalive_cluster_details']:
|
|
|
|
logger.out(
|
2020-01-09 10:53:27 -05:00
|
|
|
'{bold}Maintenance:{nofmt} {maint} '
|
2019-07-10 22:24:03 -04:00
|
|
|
'{bold}Active VMs:{nofmt} {domcount} '
|
2019-06-18 19:54:42 -04:00
|
|
|
'{bold}Networks:{nofmt} {netcount} '
|
2019-07-10 22:24:03 -04:00
|
|
|
'{bold}Load:{nofmt} {load} '
|
|
|
|
'{bold}Memory [MiB]: VMs:{nofmt} {allocmem} '
|
|
|
|
'{bold}Used:{nofmt} {usedmem} '
|
|
|
|
'{bold}Free:{nofmt} {freemem}'.format(
|
2019-07-10 21:39:25 -04:00
|
|
|
bold=fmt_bold,
|
|
|
|
nofmt=fmt_end,
|
2020-01-09 10:53:27 -05:00
|
|
|
maint=maintenance,
|
2019-06-18 19:54:42 -04:00
|
|
|
domcount=this_node.domains_count,
|
2019-07-10 22:24:03 -04:00
|
|
|
netcount=len(network_list),
|
|
|
|
load=this_node.cpuload,
|
2019-06-18 19:54:42 -04:00
|
|
|
freemem=this_node.memfree,
|
|
|
|
usedmem=this_node.memused,
|
|
|
|
allocmem=this_node.memalloc,
|
|
|
|
),
|
2019-07-10 22:24:03 -04:00
|
|
|
state='t'
|
2019-06-18 19:54:42 -04:00
|
|
|
)
|
|
|
|
if enable_storage and config['log_keepalive_storage_details']:
|
2019-06-18 12:44:07 -04:00
|
|
|
logger.out(
|
|
|
|
'{bold}Ceph cluster status:{nofmt} {health_colour}{health}{nofmt} '
|
|
|
|
'{bold}Total OSDs:{nofmt} {total_osds} '
|
|
|
|
'{bold}Node OSDs:{nofmt} {node_osds} '
|
|
|
|
'{bold}Pools:{nofmt} {total_pools} '.format(
|
2019-07-10 21:39:25 -04:00
|
|
|
bold=fmt_bold,
|
2019-06-18 12:44:07 -04:00
|
|
|
health_colour=ceph_health_colour,
|
2019-07-10 21:39:25 -04:00
|
|
|
nofmt=fmt_end,
|
2019-06-18 12:44:07 -04:00
|
|
|
health=ceph_health,
|
|
|
|
total_osds=len(osd_list),
|
|
|
|
node_osds=osds_this_node,
|
|
|
|
total_pools=len(pool_list)
|
|
|
|
),
|
2019-07-10 22:24:03 -04:00
|
|
|
state='t'
|
2019-06-18 12:44:07 -04:00
|
|
|
)
|
2018-10-27 18:24:27 -04:00
|
|
|
|
2020-06-06 13:19:11 -04:00
|
|
|
# Look for dead nodes and fence them
|
|
|
|
if not maintenance:
|
|
|
|
if debug:
|
2020-08-17 14:30:21 -04:00
|
|
|
logger.out("Look for dead nodes and fence them", state='d', prefix='main-thread')
|
2020-06-06 13:19:11 -04:00
|
|
|
if config['daemon_mode'] == 'coordinator':
|
|
|
|
for node_name in d_node:
|
|
|
|
try:
|
2021-06-09 13:28:31 -04:00
|
|
|
node_daemon_state = zkhandler.read(('node.state.daemon', node_name))
|
|
|
|
node_keepalive = int(zkhandler.read(('node.keepalive', node_name)))
|
2020-11-06 18:55:10 -05:00
|
|
|
except Exception:
|
2020-06-06 13:19:11 -04:00
|
|
|
node_daemon_state = 'unknown'
|
|
|
|
node_keepalive = 0
|
2020-11-06 19:05:48 -05:00
|
|
|
|
2020-06-06 13:19:11 -04:00
|
|
|
# Handle deadtime and fencng if needed
|
|
|
|
# (A node is considered dead when its keepalive timer is >6*keepalive_interval seconds
|
|
|
|
# out-of-date while in 'start' state)
|
2020-11-07 12:58:54 -05:00
|
|
|
node_deadtime = int(time.time()) - (int(config['keepalive_interval']) * int(config['fence_intervals']))
|
2020-06-06 13:19:11 -04:00
|
|
|
if node_keepalive < node_deadtime and node_daemon_state == 'run':
|
|
|
|
logger.out('Node {} seems dead - starting monitor for fencing'.format(node_name), state='w')
|
2021-06-09 13:28:31 -04:00
|
|
|
zk_lock = zkhandler.writelock(('node.state.daemon', node_name))
|
2020-06-06 13:19:11 -04:00
|
|
|
with zk_lock:
|
|
|
|
# Ensures that, if we lost the lock race and come out of waiting,
|
|
|
|
# we won't try to trigger our own fence thread.
|
2021-06-09 13:28:31 -04:00
|
|
|
if zkhandler.read(('node.state.daemon', node_name)) != 'dead':
|
2021-06-01 11:57:21 -04:00
|
|
|
fence_thread = Thread(target=fencing.fenceNode, args=(node_name, zkhandler, config, logger), kwargs={})
|
2020-06-06 13:19:11 -04:00
|
|
|
fence_thread.start()
|
2020-08-05 21:57:11 -04:00
|
|
|
# Write the updated data after we start the fence thread
|
2021-05-30 14:48:41 -04:00
|
|
|
zkhandler.write([
|
2021-06-09 13:28:31 -04:00
|
|
|
(('node.state.daemon', node_name), 'dead')
|
2021-05-30 14:48:41 -04:00
|
|
|
])
|
2020-06-06 13:19:11 -04:00
|
|
|
|
2020-08-17 13:11:03 -04:00
|
|
|
if debug:
|
2020-08-17 14:30:21 -04:00
|
|
|
logger.out("Keepalive finished", state='d', prefix='main-thread')
|
2018-10-22 20:20:27 -04:00
|
|
|
|
2020-11-07 13:17:49 -05:00
|
|
|
|
2019-03-17 12:52:23 -04:00
|
|
|
# Start keepalive thread
|
2018-11-18 00:55:04 -05:00
|
|
|
update_timer = startKeepaliveTimer()
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2018-10-22 20:20:27 -04:00
|
|
|
# Tick loop; does nothing since everything else is async
|
2018-10-14 02:01:35 -04:00
|
|
|
while True:
|
|
|
|
try:
|
|
|
|
time.sleep(1)
|
2020-11-06 18:55:10 -05:00
|
|
|
except Exception:
|
2018-10-14 02:01:35 -04:00
|
|
|
break
|