2018-10-14 02:01:35 -04:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
# Daemon.py - Node daemon
|
|
|
|
# Part of the Parallel Virtual Cluster (PVC) system
|
|
|
|
#
|
|
|
|
# Copyright (C) 2018 Joshua M. Boniface <joshua@boniface.me>
|
|
|
|
#
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
#
|
|
|
|
###############################################################################
|
|
|
|
|
2018-10-22 20:20:27 -04:00
|
|
|
# Version string for startup output
|
2018-10-14 02:01:35 -04:00
|
|
|
version = '0.4'
|
2018-11-27 22:15:19 -05:00
|
|
|
# Debugging output mode
|
2018-11-27 22:19:42 -05:00
|
|
|
debug = False
|
2018-10-14 02:01:35 -04:00
|
|
|
|
|
|
|
import kazoo.client
|
|
|
|
import libvirt
|
|
|
|
import sys
|
|
|
|
import os
|
|
|
|
import signal
|
|
|
|
import atexit
|
|
|
|
import socket
|
|
|
|
import psutil
|
|
|
|
import subprocess
|
|
|
|
import uuid
|
|
|
|
import time
|
|
|
|
import re
|
|
|
|
import configparser
|
2018-10-22 20:20:27 -04:00
|
|
|
import threading
|
2019-03-11 01:44:26 -04:00
|
|
|
import yaml
|
2018-10-29 17:51:08 -04:00
|
|
|
import json
|
2019-03-11 01:44:26 -04:00
|
|
|
import ipaddress
|
2018-10-14 02:01:35 -04:00
|
|
|
import apscheduler.schedulers.background
|
|
|
|
|
|
|
|
import pvcd.log as log
|
|
|
|
import pvcd.zkhandler as zkhandler
|
2018-10-22 20:20:27 -04:00
|
|
|
import pvcd.fencing as fencing
|
2018-10-14 02:01:35 -04:00
|
|
|
import pvcd.common as common
|
|
|
|
|
|
|
|
import pvcd.DomainInstance as DomainInstance
|
|
|
|
import pvcd.NodeInstance as NodeInstance
|
|
|
|
import pvcd.VXNetworkInstance as VXNetworkInstance
|
2018-10-15 21:09:40 -04:00
|
|
|
import pvcd.DNSAggregatorInstance as DNSAggregatorInstance
|
2018-10-29 17:51:08 -04:00
|
|
|
import pvcd.CephInstance as CephInstance
|
2018-10-14 02:01:35 -04:00
|
|
|
|
|
|
|
###############################################################################
|
|
|
|
# PVCD - node daemon startup program
|
|
|
|
###############################################################################
|
|
|
|
#
|
|
|
|
# The PVC daemon starts a node and configures all the required components for
|
|
|
|
# the node to run. It determines which of the 3 daemon modes it should be in
|
|
|
|
# during initial setup based on hostname and the config file, and then starts
|
|
|
|
# any required services. The 3 daemon modes are:
|
|
|
|
# * leader: the cluster leader, follows the Zookeeper leader
|
|
|
|
# * coordinator: a Zookeeper cluster member
|
|
|
|
# * hypervisor: a hypervisor without any cluster intelligence
|
|
|
|
#
|
|
|
|
###############################################################################
|
|
|
|
|
|
|
|
###############################################################################
|
|
|
|
# Daemon functions
|
|
|
|
###############################################################################
|
|
|
|
|
|
|
|
# Create timer to update this node in Zookeeper
|
|
|
|
def startKeepaliveTimer():
|
2018-11-18 00:55:04 -05:00
|
|
|
# Create our timer object
|
|
|
|
update_timer = apscheduler.schedulers.background.BackgroundScheduler()
|
2018-10-14 02:01:35 -04:00
|
|
|
interval = int(config['keepalive_interval'])
|
|
|
|
logger.out('Starting keepalive timer ({} second interval)'.format(interval), state='s')
|
|
|
|
update_timer.add_job(update_zookeeper, 'interval', seconds=interval)
|
|
|
|
update_timer.start()
|
2019-03-17 12:52:23 -04:00
|
|
|
update_zookeeper()
|
2018-11-18 00:55:04 -05:00
|
|
|
return update_timer
|
2018-10-14 02:01:35 -04:00
|
|
|
|
|
|
|
def stopKeepaliveTimer():
|
|
|
|
global update_timer
|
|
|
|
try:
|
|
|
|
update_timer.shutdown()
|
|
|
|
logger.out('Stopping keepalive timer', state='s')
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
|
|
|
|
###############################################################################
|
|
|
|
# PHASE 1a - Configuration parsing
|
|
|
|
###############################################################################
|
|
|
|
|
|
|
|
# Get the config file variable from the environment
|
|
|
|
try:
|
2018-10-25 23:36:25 -04:00
|
|
|
pvcd_config_file = os.environ['PVCD_CONFIG_FILE']
|
2018-10-14 02:01:35 -04:00
|
|
|
except:
|
|
|
|
print('ERROR: The "PVCD_CONFIG_FILE" environment variable must be set before starting pvcd.')
|
|
|
|
exit(1)
|
|
|
|
|
|
|
|
# Set local hostname and domain variables
|
|
|
|
myfqdn = socket.gethostname()
|
|
|
|
#myfqdn = 'pvc-hv1.domain.net'
|
|
|
|
myhostname = myfqdn.split('.', 1)[0]
|
|
|
|
mydomainname = ''.join(myfqdn.split('.', 1)[1:])
|
2019-03-11 01:44:26 -04:00
|
|
|
try:
|
|
|
|
mynodeid = re.findall(r'\d+', myhostname)[-1]
|
|
|
|
except IndexError:
|
|
|
|
mynodeid = 1
|
2018-10-14 02:01:35 -04:00
|
|
|
|
|
|
|
# Gather useful data about our host
|
|
|
|
# Static data format: 'cpu_count', 'arch', 'os', 'kernel'
|
|
|
|
staticdata = []
|
|
|
|
staticdata.append(str(psutil.cpu_count()))
|
|
|
|
staticdata.append(subprocess.run(['uname', '-r'], stdout=subprocess.PIPE).stdout.decode('ascii').strip())
|
|
|
|
staticdata.append(subprocess.run(['uname', '-o'], stdout=subprocess.PIPE).stdout.decode('ascii').strip())
|
|
|
|
staticdata.append(subprocess.run(['uname', '-m'], stdout=subprocess.PIPE).stdout.decode('ascii').strip())
|
|
|
|
|
|
|
|
# Read and parse the config file
|
2018-10-25 23:36:25 -04:00
|
|
|
def readConfig(pvcd_config_file, myhostname):
|
|
|
|
print('Loading configuration from file "{}"'.format(pvcd_config_file))
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2019-03-11 01:44:26 -04:00
|
|
|
with open(pvcd_config_file, 'r') as cfgfile:
|
2018-10-14 02:01:35 -04:00
|
|
|
try:
|
2019-03-11 01:44:26 -04:00
|
|
|
o_config = yaml.load(cfgfile)
|
2018-10-14 02:01:35 -04:00
|
|
|
except Exception as e:
|
2019-03-11 01:44:26 -04:00
|
|
|
print('ERROR: Failed to parse configuration file: {}'.format(e))
|
2018-10-14 02:01:35 -04:00
|
|
|
exit(1)
|
|
|
|
|
2019-03-11 01:44:26 -04:00
|
|
|
# Handle the basic config (hypervisor-only)
|
|
|
|
try:
|
|
|
|
config_general = {
|
|
|
|
'coordinators': o_config['pvc']['cluster']['coordinators'],
|
|
|
|
'enable_hypervisor': o_config['pvc']['functions']['enable_hypervisor'],
|
|
|
|
'enable_networking': o_config['pvc']['functions']['enable_networking'],
|
|
|
|
'enable_storage': o_config['pvc']['functions']['enable_storage'],
|
|
|
|
'dynamic_directory': o_config['pvc']['system']['configuration']['directories']['dynamic_directory'],
|
|
|
|
'log_directory': o_config['pvc']['system']['configuration']['directories']['log_directory'],
|
2019-04-11 19:06:06 -04:00
|
|
|
'console_log_directory': o_config['pvc']['system']['configuration']['directories']['console_log_directory'],
|
2019-03-11 01:44:26 -04:00
|
|
|
'file_logging': o_config['pvc']['system']['configuration']['logging']['file_logging'],
|
|
|
|
'stdout_logging': o_config['pvc']['system']['configuration']['logging']['stdout_logging'],
|
2019-04-11 19:06:06 -04:00
|
|
|
'console_log_lines': o_config['pvc']['system']['configuration']['logging']['console_log_lines'],
|
2019-03-11 01:44:26 -04:00
|
|
|
'keepalive_interval': o_config['pvc']['system']['fencing']['intervals']['keepalive_interval'],
|
|
|
|
'fence_intervals': o_config['pvc']['system']['fencing']['intervals']['fence_intervals'],
|
|
|
|
'suicide_intervals': o_config['pvc']['system']['fencing']['intervals']['suicide_intervals'],
|
|
|
|
'successful_fence': o_config['pvc']['system']['fencing']['actions']['successful_fence'],
|
|
|
|
'failed_fence': o_config['pvc']['system']['fencing']['actions']['failed_fence'],
|
|
|
|
'migration_target_selector': o_config['pvc']['system']['migration']['target_selector'],
|
|
|
|
'ipmi_hostname': o_config['pvc']['system']['fencing']['ipmi']['host'],
|
|
|
|
'ipmi_username': o_config['pvc']['system']['fencing']['ipmi']['user'],
|
|
|
|
'ipmi_password': o_config['pvc']['system']['fencing']['ipmi']['pass']
|
|
|
|
}
|
|
|
|
except Exception as e:
|
|
|
|
print('ERROR: {}!'.format(e))
|
|
|
|
exit(1)
|
|
|
|
config = config_general
|
|
|
|
|
|
|
|
# Handle the networking config
|
|
|
|
if config['enable_networking']:
|
2018-10-14 02:01:35 -04:00
|
|
|
try:
|
2019-03-11 01:44:26 -04:00
|
|
|
config_networking = {
|
|
|
|
'cluster_domain': o_config['pvc']['cluster']['networks']['cluster']['domain'],
|
|
|
|
'vni_floating_ip': o_config['pvc']['cluster']['networks']['cluster']['floating_ip'],
|
|
|
|
'vni_network': o_config['pvc']['cluster']['networks']['cluster']['network'],
|
|
|
|
'storage_domain': o_config['pvc']['cluster']['networks']['storage']['domain'],
|
|
|
|
'storage_floating_ip': o_config['pvc']['cluster']['networks']['storage']['floating_ip'],
|
|
|
|
'storage_network': o_config['pvc']['cluster']['networks']['storage']['network'],
|
|
|
|
'upstream_domain': o_config['pvc']['cluster']['networks']['upstream']['domain'],
|
|
|
|
'upstream_floating_ip': o_config['pvc']['cluster']['networks']['upstream']['floating_ip'],
|
|
|
|
'upstream_network': o_config['pvc']['cluster']['networks']['upstream']['network'],
|
2019-03-17 00:39:08 -04:00
|
|
|
'upstream_gateway': o_config['pvc']['cluster']['networks']['upstream']['gateway'],
|
2019-05-20 22:40:07 -04:00
|
|
|
'pdns_postgresql_host': o_config['pvc']['coordinator']['dns']['database']['host'],
|
|
|
|
'pdns_postgresql_port': o_config['pvc']['coordinator']['dns']['database']['port'],
|
|
|
|
'pdns_postgresql_dbname': o_config['pvc']['coordinator']['dns']['database']['name'],
|
|
|
|
'pdns_postgresql_user': o_config['pvc']['coordinator']['dns']['database']['user'],
|
|
|
|
'pdns_postgresql_password': o_config['pvc']['coordinator']['dns']['database']['pass'],
|
2019-03-11 01:44:26 -04:00
|
|
|
'vni_dev': o_config['pvc']['system']['configuration']['networking']['devices']['cluster'],
|
|
|
|
'vni_dev_ip': o_config['pvc']['system']['configuration']['networking']['addresses']['cluster'],
|
|
|
|
'storage_dev': o_config['pvc']['system']['configuration']['networking']['devices']['storage'],
|
|
|
|
'storage_dev_ip': o_config['pvc']['system']['configuration']['networking']['addresses']['storage'],
|
|
|
|
'upstream_dev': o_config['pvc']['system']['configuration']['networking']['devices']['upstream'],
|
|
|
|
'upstream_dev_ip': o_config['pvc']['system']['configuration']['networking']['addresses']['upstream'],
|
|
|
|
}
|
|
|
|
except Exception as e:
|
|
|
|
print('ERROR: {}!'.format(e))
|
|
|
|
exit(1)
|
|
|
|
config = {**config, **config_networking}
|
|
|
|
|
|
|
|
# Create the by-id address entries
|
|
|
|
for net in [ 'vni',
|
|
|
|
'storage',
|
|
|
|
'upstream' ]:
|
|
|
|
address_key = '{}_dev_ip'.format(net)
|
|
|
|
floating_key = '{}_floating_ip'.format(net)
|
|
|
|
network_key = '{}_network'.format(net)
|
|
|
|
|
|
|
|
# Verify the network provided is valid
|
2018-10-14 02:01:35 -04:00
|
|
|
try:
|
2019-03-11 01:44:26 -04:00
|
|
|
network = ipaddress.ip_network(config[network_key])
|
|
|
|
except Exception as e:
|
|
|
|
print('ERROR: Network address {} for {} is not valid!'.format(config[network_key], network_key))
|
|
|
|
exit(1)
|
|
|
|
|
|
|
|
# If we should be autoselected
|
|
|
|
if config[address_key] == 'by-id':
|
|
|
|
# Construct an IP from the relevant network
|
|
|
|
# The NodeID starts at 1, but indexes start at 0
|
|
|
|
address_id = int(mynodeid) - 1
|
|
|
|
# Grab the nth address from the network
|
2019-03-16 23:27:51 -04:00
|
|
|
config[address_key] = '{}/{}'.format(list(network.hosts())[address_id], network.prefixlen)
|
2019-03-11 01:44:26 -04:00
|
|
|
|
|
|
|
# Verify that the floating IP is valid
|
|
|
|
|
|
|
|
try:
|
|
|
|
# Set the ipaddr
|
|
|
|
floating_addr = ipaddress.ip_address(config[floating_key].split('/')[0])
|
|
|
|
# Verify we're in the network
|
|
|
|
if not floating_addr in list(network.hosts()):
|
|
|
|
raise
|
|
|
|
except Exception as e:
|
|
|
|
print('ERROR: Floating address {} for {} is not valid!'.format(config[floating_key], floating_key))
|
2018-10-14 02:01:35 -04:00
|
|
|
exit(1)
|
2019-03-11 01:44:26 -04:00
|
|
|
|
|
|
|
# Handle the storage config
|
|
|
|
if config['enable_storage']:
|
|
|
|
try:
|
|
|
|
config_storage = dict()
|
|
|
|
except Exception as e:
|
|
|
|
print('ERROR: {}!'.format(e))
|
|
|
|
exit(1)
|
|
|
|
config = {**config, **config_storage}
|
2018-10-14 02:01:35 -04:00
|
|
|
|
|
|
|
# Handle an empty ipmi_hostname
|
|
|
|
if config['ipmi_hostname'] == '':
|
|
|
|
config['ipmi_hostname'] = myshorthostname + '-lom.' + mydomainname
|
|
|
|
|
|
|
|
return config
|
|
|
|
|
|
|
|
# Get the config object from readConfig()
|
2018-10-25 23:36:25 -04:00
|
|
|
config = readConfig(pvcd_config_file, myhostname)
|
2019-03-11 01:44:26 -04:00
|
|
|
|
|
|
|
# Handle the enable values
|
|
|
|
enable_hypervisor = config['enable_hypervisor']
|
|
|
|
enable_networking = config['enable_networking']
|
|
|
|
enable_storage = config['enable_storage']
|
2018-10-14 02:01:35 -04:00
|
|
|
|
|
|
|
###############################################################################
|
|
|
|
# PHASE 1b - Prepare filesystem directories
|
|
|
|
###############################################################################
|
|
|
|
|
|
|
|
# Define our dynamic directory schema
|
|
|
|
# <dynamic_directory>/
|
|
|
|
# dnsmasq/
|
|
|
|
# pdns/
|
|
|
|
# nft/
|
|
|
|
config['dnsmasq_dynamic_directory'] = config['dynamic_directory'] + '/dnsmasq'
|
|
|
|
config['pdns_dynamic_directory'] = config['dynamic_directory'] + '/pdns'
|
|
|
|
config['nft_dynamic_directory'] = config['dynamic_directory'] + '/nft'
|
|
|
|
|
|
|
|
# Create our dynamic directories if they don't exist
|
|
|
|
if not os.path.exists(config['dynamic_directory']):
|
|
|
|
os.makedirs(config['dynamic_directory'])
|
|
|
|
os.makedirs(config['dnsmasq_dynamic_directory'])
|
|
|
|
os.makedirs(config['pdns_dynamic_directory'])
|
|
|
|
os.makedirs(config['nft_dynamic_directory'])
|
|
|
|
|
|
|
|
# Define our log directory schema
|
|
|
|
# <log_directory>/
|
|
|
|
# dnsmasq/
|
|
|
|
# pdns/
|
|
|
|
# nft/
|
|
|
|
config['dnsmasq_log_directory'] = config['log_directory'] + '/dnsmasq'
|
|
|
|
config['pdns_log_directory'] = config['log_directory'] + '/pdns'
|
|
|
|
config['nft_log_directory'] = config['log_directory'] + '/nft'
|
|
|
|
|
2018-10-25 23:36:25 -04:00
|
|
|
# Create our log directories if they don't exist
|
2018-10-14 02:01:35 -04:00
|
|
|
if not os.path.exists(config['log_directory']):
|
|
|
|
os.makedirs(config['log_directory'])
|
|
|
|
os.makedirs(config['dnsmasq_log_directory'])
|
|
|
|
os.makedirs(config['pdns_log_directory'])
|
|
|
|
os.makedirs(config['nft_log_directory'])
|
|
|
|
|
|
|
|
###############################################################################
|
|
|
|
# PHASE 1c - Set up logging
|
|
|
|
###############################################################################
|
|
|
|
|
|
|
|
logger = log.Logger(config)
|
|
|
|
|
|
|
|
# Print our startup messages
|
|
|
|
logger.out('Parallel Virtual Cluster node daemon v{}'.format(version))
|
|
|
|
logger.out('FQDN: {}'.format(myfqdn))
|
|
|
|
logger.out('Host: {}'.format(myhostname))
|
|
|
|
logger.out('ID: {}'.format(mynodeid))
|
|
|
|
logger.out('IPMI hostname: {}'.format(config['ipmi_hostname']))
|
|
|
|
logger.out('Machine details:')
|
|
|
|
logger.out(' CPUs: {}'.format(staticdata[0]))
|
|
|
|
logger.out(' Arch: {}'.format(staticdata[3]))
|
|
|
|
logger.out(' OS: {}'.format(staticdata[2]))
|
|
|
|
logger.out(' Kernel: {}'.format(staticdata[1]))
|
|
|
|
logger.out('Starting pvcd on host {}'.format(myfqdn), state='s')
|
|
|
|
|
2018-10-14 02:58:02 -04:00
|
|
|
###############################################################################
|
2019-03-17 20:05:58 -04:00
|
|
|
# PHASE 2a - Create local IP addresses for static networks
|
2019-03-17 00:53:11 -04:00
|
|
|
###############################################################################
|
|
|
|
|
|
|
|
if enable_networking:
|
|
|
|
# VNI configuration
|
|
|
|
vni_dev = config['vni_dev']
|
|
|
|
vni_dev_ip = config['vni_dev_ip']
|
|
|
|
logger.out('Setting up VNI network interface {}'.format(vni_dev, vni_dev_ip), state='i')
|
|
|
|
common.run_os_command('ip link set {} mtu 9000 up'.format(vni_dev))
|
|
|
|
|
|
|
|
# Cluster bridge configuration
|
2019-05-21 23:19:19 -04:00
|
|
|
logger.out('Setting up Cluster network bridge on interface {} with IP {}'.format(vni_dev, vni_dev_ip), state='i')
|
2019-03-17 00:53:11 -04:00
|
|
|
common.run_os_command('brctl addbr brcluster')
|
|
|
|
common.run_os_command('brctl addif brcluster {}'.format(vni_dev))
|
|
|
|
common.run_os_command('ip link set brcluster mtu 9000 up')
|
|
|
|
common.run_os_command('ip address add {} dev {}'.format(vni_dev_ip, 'brcluster'))
|
|
|
|
|
|
|
|
# Storage configuration
|
|
|
|
storage_dev = config['storage_dev']
|
|
|
|
if storage_dev == vni_dev:
|
|
|
|
storage_dev = 'brcluster'
|
|
|
|
storage_dev_ip = config['storage_dev_ip']
|
|
|
|
logger.out('Setting up Storage network on interface {} with IP {}'.format(storage_dev, storage_dev_ip), state='i')
|
|
|
|
common.run_os_command('ip link set {} mtu 9000 up'.format(storage_dev))
|
|
|
|
common.run_os_command('ip address add {} dev {}'.format(storage_dev_ip, storage_dev))
|
|
|
|
|
|
|
|
# Upstream configuration
|
|
|
|
if config['upstream_dev']:
|
|
|
|
upstream_dev = config['upstream_dev']
|
|
|
|
upstream_dev_ip = config['upstream_dev_ip']
|
|
|
|
upstream_dev_gateway = config['upstream_gateway']
|
|
|
|
logger.out('Setting up Upstream network on interface {} with IP {}'.format(upstream_dev, upstream_dev_ip), state='i')
|
|
|
|
common.run_os_command('ip link set {} up'.format(upstream_dev))
|
|
|
|
common.run_os_command('ip address add {} dev {}'.format(upstream_dev_ip, upstream_dev))
|
|
|
|
if upstream_dev_gateway:
|
|
|
|
common.run_os_command('ip route add default via {} dev {}'.format(upstream_dev_gateway, upstream_dev))
|
|
|
|
|
2019-03-17 20:05:58 -04:00
|
|
|
###############################################################################
|
|
|
|
# PHASE 2b - Prepare sysctl for pvcd
|
|
|
|
###############################################################################
|
|
|
|
|
|
|
|
if enable_networking:
|
|
|
|
# Enable routing functions
|
|
|
|
common.run_os_command('sysctl net.ipv4.ip_forward=1')
|
|
|
|
common.run_os_command('sysctl net.ipv6.ip_forward=1')
|
|
|
|
|
|
|
|
# Send redirects
|
|
|
|
common.run_os_command('sysctl net.ipv4.conf.all.send_redirects=1')
|
|
|
|
common.run_os_command('sysctl net.ipv4.conf.default.send_redirects=1')
|
|
|
|
common.run_os_command('sysctl net.ipv6.conf.all.send_redirects=1')
|
|
|
|
common.run_os_command('sysctl net.ipv6.conf.default.send_redirects=1')
|
|
|
|
|
|
|
|
# Accept source routes
|
|
|
|
common.run_os_command('sysctl net.ipv4.conf.all.accept_source_route=1')
|
|
|
|
common.run_os_command('sysctl net.ipv4.conf.default.accept_source_route=1')
|
|
|
|
common.run_os_command('sysctl net.ipv6.conf.all.accept_source_route=1')
|
|
|
|
common.run_os_command('sysctl net.ipv6.conf.default.accept_source_route=1')
|
|
|
|
|
|
|
|
# Disable RP filtering on the VNI dev and bridge interfaces (to allow traffic pivoting)
|
2019-03-20 12:01:26 -04:00
|
|
|
common.run_os_command('sysctl net.ipv4.conf.{}.rp_filter=0'.format(config['vni_dev']))
|
|
|
|
common.run_os_command('sysctl net.ipv4.conf.{}.rp_filter=0'.format(config['upstream_dev']))
|
2019-03-17 20:05:58 -04:00
|
|
|
common.run_os_command('sysctl net.ipv4.conf.brcluster.rp_filter=0')
|
2019-03-20 12:01:26 -04:00
|
|
|
common.run_os_command('sysctl net.ipv6.conf.{}.rp_filter=0'.format(config['vni_dev']))
|
|
|
|
common.run_os_command('sysctl net.ipv6.conf.{}.rp_filter=0'.format(config['upstream_dev']))
|
2019-03-17 20:05:58 -04:00
|
|
|
common.run_os_command('sysctl net.ipv6.conf.brcluster.rp_filter=0')
|
|
|
|
|
2019-03-17 00:53:11 -04:00
|
|
|
###############################################################################
|
2019-03-17 01:45:17 -04:00
|
|
|
# PHASE 3a - Determine coordinator mode
|
2018-10-14 02:01:35 -04:00
|
|
|
###############################################################################
|
|
|
|
|
|
|
|
# What is the list of coordinator hosts
|
2019-03-11 01:44:26 -04:00
|
|
|
coordinator_nodes = config['coordinators']
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2018-10-30 09:17:41 -04:00
|
|
|
if myhostname in coordinator_nodes:
|
2018-10-14 02:01:35 -04:00
|
|
|
# We are indeed a coordinator host
|
|
|
|
config['daemon_mode'] = 'coordinator'
|
|
|
|
# Start the zookeeper service using systemctl
|
2019-03-17 01:45:17 -04:00
|
|
|
logger.out('Node is a ' + logger.fmt_blue + 'coordinator' + logger.fmt_end, state='i')
|
2018-10-14 02:01:35 -04:00
|
|
|
else:
|
|
|
|
config['daemon_mode'] = 'hypervisor'
|
|
|
|
|
2019-03-17 01:45:17 -04:00
|
|
|
###############################################################################
|
|
|
|
# PHASE 3b - Start system daemons
|
|
|
|
###############################################################################
|
|
|
|
if config['daemon_mode'] == 'coordinator':
|
|
|
|
logger.out('Starting Zookeeper daemon', state='i')
|
|
|
|
common.run_os_command('systemctl start zookeeper.service')
|
|
|
|
|
|
|
|
if enable_hypervisor:
|
|
|
|
logger.out('Starting Libvirt daemon', state='i')
|
|
|
|
common.run_os_command('systemctl start libvirtd.service')
|
|
|
|
|
|
|
|
if enable_networking:
|
|
|
|
if config['daemon_mode'] == 'coordinator':
|
2019-05-20 22:40:07 -04:00
|
|
|
logger.out('Starting Patroni daemon', state='i')
|
|
|
|
common.run_os_command('systemctl start patroni.service')
|
2019-03-17 01:45:17 -04:00
|
|
|
logger.out('Starting FRRouting daemon', state='i')
|
|
|
|
common.run_os_command('systemctl start frr.service')
|
|
|
|
|
|
|
|
if enable_storage:
|
|
|
|
if config['daemon_mode'] == 'coordinator':
|
|
|
|
logger.out('Starting Ceph monitor daemon', state='i')
|
|
|
|
common.run_os_command('systemctl start ceph-mon@{}'.format(myhostname))
|
|
|
|
logger.out('Starting Ceph manager daemon', state='i')
|
|
|
|
common.run_os_command('systemctl start ceph-mgr@{}'.format(myhostname))
|
|
|
|
|
|
|
|
time.sleep(1)
|
|
|
|
|
2018-10-14 02:01:35 -04:00
|
|
|
###############################################################################
|
2019-03-17 00:53:11 -04:00
|
|
|
# PHASE 4 - Attempt to connect to the coordinators and start zookeeper client
|
2018-10-14 02:01:35 -04:00
|
|
|
###############################################################################
|
|
|
|
|
|
|
|
# Start the connection to the coordinators
|
|
|
|
zk_conn = kazoo.client.KazooClient(hosts=config['coordinators'])
|
|
|
|
try:
|
2018-10-30 09:17:41 -04:00
|
|
|
logger.out('Connecting to Zookeeper cluster nodes {}'.format(config['coordinators']), state='i')
|
2018-10-14 02:01:35 -04:00
|
|
|
# Start connection
|
|
|
|
zk_conn.start()
|
|
|
|
except Exception as e:
|
|
|
|
logger.out('ERROR: Failed to connect to Zookeeper cluster: {}'.format(e), state='e')
|
|
|
|
exit(1)
|
|
|
|
|
|
|
|
# Handle zookeeper failures
|
|
|
|
def zk_listener(state):
|
|
|
|
global zk_conn, update_timer
|
2018-11-18 00:55:04 -05:00
|
|
|
if state == kazoo.client.KazooState.CONNECTED:
|
|
|
|
logger.out('Connection to Zookeeper restarted', state='o')
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2018-11-18 00:55:04 -05:00
|
|
|
# Start keepalive thread
|
|
|
|
if update_timer:
|
|
|
|
update_timer = startKeepaliveTimer()
|
|
|
|
else:
|
2018-10-14 02:01:35 -04:00
|
|
|
# Stop keepalive thread
|
|
|
|
if update_timer:
|
|
|
|
stopKeepaliveTimer()
|
|
|
|
|
2018-11-18 00:55:04 -05:00
|
|
|
logger.out('Connection to Zookeeper lost; retrying', state='w')
|
|
|
|
|
2018-10-14 02:01:35 -04:00
|
|
|
while True:
|
2018-10-22 23:11:04 -04:00
|
|
|
_zk_conn = kazoo.client.KazooClient(hosts=config['coordinators'])
|
2018-10-14 02:01:35 -04:00
|
|
|
try:
|
2018-10-22 23:11:04 -04:00
|
|
|
_zk_conn.start()
|
|
|
|
zk_conn = _zk_conn
|
2018-10-14 02:01:35 -04:00
|
|
|
break
|
|
|
|
except:
|
|
|
|
time.sleep(1)
|
|
|
|
zk_conn.add_listener(zk_listener)
|
|
|
|
|
|
|
|
###############################################################################
|
2019-03-17 00:53:11 -04:00
|
|
|
# PHASE 5 - Gracefully handle termination
|
2018-10-14 02:01:35 -04:00
|
|
|
###############################################################################
|
|
|
|
|
|
|
|
# Cleanup function
|
|
|
|
def cleanup():
|
2019-04-11 19:06:06 -04:00
|
|
|
global zk_conn, update_timer, d_domains
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2019-05-23 23:18:43 -04:00
|
|
|
logger.out('Performing final keepalive update', state='s')
|
|
|
|
update_zookeeper()
|
|
|
|
|
|
|
|
logger.out('Terminating pvcd and cleaning up', state='s')
|
|
|
|
|
2018-10-14 02:01:35 -04:00
|
|
|
# Stop keepalive thread
|
2019-04-11 19:06:06 -04:00
|
|
|
try:
|
|
|
|
stopKeepaliveTimer()
|
|
|
|
except NameError:
|
|
|
|
pass
|
|
|
|
except AttributeError:
|
|
|
|
pass
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2019-04-11 19:06:06 -04:00
|
|
|
# Stop console logging on all VMs
|
|
|
|
logger.out('Stopping domain console watchers', state='s')
|
|
|
|
for domain in d_domain:
|
|
|
|
if d_domain[domain].getnode() == myhostname:
|
|
|
|
try:
|
|
|
|
d_domain[domain].console_log_instance.stop()
|
|
|
|
except NameError as e:
|
|
|
|
pass
|
|
|
|
except AttributeError as e:
|
|
|
|
pass
|
2018-10-14 02:01:35 -04:00
|
|
|
|
|
|
|
# Force into secondary network state if needed
|
2018-10-27 16:31:54 -04:00
|
|
|
if zkhandler.readdata(zk_conn, '/nodes/{}/routerstate'.format(myhostname)) == 'primary':
|
2018-10-25 22:21:40 -04:00
|
|
|
is_primary = True
|
2018-10-25 21:54:09 -04:00
|
|
|
zkhandler.writedata(zk_conn, {
|
|
|
|
'/nodes/{}/routerstate'.format(myhostname): 'secondary',
|
|
|
|
'/primary_node': 'none'
|
|
|
|
})
|
2019-04-11 19:06:06 -04:00
|
|
|
logger.out('Waiting 3 seconds for primary migration', state='s')
|
2018-10-25 22:21:40 -04:00
|
|
|
time.sleep(3)
|
2018-10-22 20:20:27 -04:00
|
|
|
|
|
|
|
# Set stop state in Zookeeper
|
|
|
|
zkhandler.writedata(zk_conn, { '/nodes/{}/daemonstate'.format(myhostname): 'stop' })
|
|
|
|
|
|
|
|
# Forcibly terminate dnsmasq because it gets stuck sometimes
|
|
|
|
common.run_os_command('killall dnsmasq')
|
2018-10-14 02:01:35 -04:00
|
|
|
|
|
|
|
# Close the Zookeeper connection
|
|
|
|
try:
|
|
|
|
zk_conn.stop()
|
|
|
|
zk_conn.close()
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
|
2018-10-25 22:21:40 -04:00
|
|
|
logger.out('Terminated pvc daemon', state='s')
|
2019-04-11 19:06:06 -04:00
|
|
|
sys.exit(0)
|
2018-10-14 02:01:35 -04:00
|
|
|
|
|
|
|
# Termination function
|
|
|
|
def term(signum='', frame=''):
|
2019-04-11 19:06:06 -04:00
|
|
|
cleanup()
|
2018-10-14 02:01:35 -04:00
|
|
|
|
|
|
|
# Handle signals gracefully
|
|
|
|
signal.signal(signal.SIGTERM, term)
|
|
|
|
signal.signal(signal.SIGINT, term)
|
|
|
|
signal.signal(signal.SIGQUIT, term)
|
|
|
|
|
|
|
|
###############################################################################
|
2019-03-17 00:53:11 -04:00
|
|
|
# PHASE 6 - Prepare host in Zookeeper
|
2018-10-14 02:01:35 -04:00
|
|
|
###############################################################################
|
|
|
|
|
|
|
|
# Check if our node exists in Zookeeper, and create it if not
|
|
|
|
if zk_conn.exists('/nodes/{}'.format(myhostname)):
|
|
|
|
logger.out("Node is " + logger.fmt_green + "present" + logger.fmt_end + " in Zookeeper", state='i')
|
|
|
|
zkhandler.writedata(zk_conn, { '/nodes/{}/daemonstate'.format(myhostname): 'init' })
|
|
|
|
# Update static data just in case it's changed
|
|
|
|
zkhandler.writedata(zk_conn, { '/nodes/{}/staticdata'.format(myhostname): ' '.join(staticdata) })
|
|
|
|
else:
|
|
|
|
logger.out("Node is " + logger.fmt_red + "absent" + logger.fmt_end + " in Zookeeper; adding new node", state='i')
|
|
|
|
keepalive_time = int(time.time())
|
2019-04-15 18:24:00 -04:00
|
|
|
zkhander.writedata(zk_conn, {
|
|
|
|
'/nodes/{}'.format(myhostname): config['daemon_mode'].encode('ascii'),
|
2018-10-14 02:01:35 -04:00
|
|
|
# Basic state information
|
2019-04-15 18:24:00 -04:00
|
|
|
'/nodes/{}/daemonmode'.format(myhostname): config['daemon_mode'].encode('ascii'),
|
|
|
|
'/nodes/{}/daemonstate'.format(myhostname): 'init'.encode('ascii'),
|
|
|
|
'/nodes/{}/routerstate'.format(myhostname): 'client'.encode('ascii'),
|
|
|
|
'/nodes/{}/domainstate'.format(myhostname): 'flushed'.encode('ascii'),
|
|
|
|
'/nodes/{}/staticdata'.format(myhostname): ' '.join(staticdata).encode('ascii'),
|
|
|
|
'/nodes/{}/memtotal'.format(myhostname): '0'.encode('ascii'),
|
|
|
|
'/nodes/{}/memfree'.format(myhostname): '0'.encode('ascii'),
|
|
|
|
'/nodes/{}/memused'.format(myhostname): '0'.encode('ascii'),
|
|
|
|
'/nodes/{}/memalloc'.format(myhostname): '0'.encode('ascii'),
|
|
|
|
'/nodes/{}/vcpualloc'.format(myhostname): '0'.encode('ascii'),
|
|
|
|
'/nodes/{}/cpuload'.format(myhostname): '0.0'.encode('ascii'),
|
|
|
|
'/nodes/{}/networkscount'.format(myhostname): '0'.encode('ascii'),
|
|
|
|
'/nodes/{}/domainscount'.format(myhostname): '0'.encode('ascii'),
|
|
|
|
'/nodes/{}/runningdomains'.format(myhostname): ''.encode('ascii'),
|
2018-10-14 02:01:35 -04:00
|
|
|
# Keepalives and fencing information
|
2019-04-15 18:24:00 -04:00
|
|
|
'/nodes/{}/keepalive'.format(myhostname): str(keepalive_time).encode('ascii'),
|
|
|
|
'/nodes/{}/ipmihostname'.format(myhostname): config['ipmi_hostname'].encode('ascii'),
|
|
|
|
'/nodes/{}/ipmiusername'.format(myhostname): config['ipmi_username'].encode('ascii'),
|
|
|
|
'/nodes/{}/ipmipassword'.format(myhostname): config['ipmi_password'].encode('ascii')
|
|
|
|
})
|
2018-10-14 02:01:35 -04:00
|
|
|
|
|
|
|
# Check that the primary key exists, and create it with us as master if not
|
2019-03-11 01:44:26 -04:00
|
|
|
try:
|
|
|
|
current_primary = zkhandler.readdata(zk_conn, '/primary_node')
|
|
|
|
except kazoo.exceptions.NoNodeError:
|
|
|
|
current_primary = 'none'
|
|
|
|
|
2018-10-14 02:01:35 -04:00
|
|
|
if current_primary and current_primary != 'none':
|
2018-10-25 22:21:40 -04:00
|
|
|
logger.out('Current primary node is {}{}{}.'.format(logger.fmt_blue, current_primary, logger.fmt_end), state='i')
|
2018-10-14 02:01:35 -04:00
|
|
|
else:
|
2018-10-27 18:04:55 -04:00
|
|
|
if config['daemon_mode'] == 'coordinator':
|
|
|
|
logger.out('No primary node found; creating with us as primary.', state='i')
|
|
|
|
zkhandler.writedata(zk_conn, { '/primary_node': myhostname })
|
2018-10-14 02:01:35 -04:00
|
|
|
|
|
|
|
###############################################################################
|
2019-03-17 01:45:17 -04:00
|
|
|
# PHASE 7 - Ensure Libvirt is working
|
2018-10-14 02:01:35 -04:00
|
|
|
###############################################################################
|
|
|
|
|
2019-03-11 01:44:26 -04:00
|
|
|
if enable_hypervisor:
|
|
|
|
# Check that libvirtd is listening TCP
|
|
|
|
libvirt_check_name = "qemu+tcp://127.0.0.1:16509/system"
|
|
|
|
logger.out('Connecting to Libvirt daemon at {}'.format(libvirt_check_name), state='i')
|
|
|
|
try:
|
|
|
|
lv_conn = libvirt.open(libvirt_check_name)
|
|
|
|
lv_conn.close()
|
|
|
|
except Exception as e:
|
|
|
|
logger.out('ERROR: Failed to connect to Libvirt daemon: {}'.format(e), state='e')
|
|
|
|
exit(1)
|
2018-10-14 02:01:35 -04:00
|
|
|
|
|
|
|
###############################################################################
|
|
|
|
# PHASE 7c - Ensure NFT is running on the local host
|
|
|
|
###############################################################################
|
|
|
|
|
2019-03-11 01:44:26 -04:00
|
|
|
if enable_networking:
|
|
|
|
logger.out("Creating NFT firewall configuration", state='i')
|
|
|
|
|
|
|
|
# Create our config dirs
|
|
|
|
common.run_os_command(
|
|
|
|
'/bin/mkdir --parents {}/networks'.format(
|
|
|
|
config['nft_dynamic_directory']
|
|
|
|
)
|
|
|
|
)
|
|
|
|
common.run_os_command(
|
|
|
|
'/bin/mkdir --parents {}/static'.format(
|
|
|
|
config['nft_dynamic_directory']
|
|
|
|
)
|
|
|
|
)
|
|
|
|
common.run_os_command(
|
|
|
|
'/bin/mkdir --parents {}'.format(
|
|
|
|
config['nft_dynamic_directory']
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
# Set up the basic features of the nftables firewall
|
|
|
|
nftables_base_rules = """# Base rules
|
|
|
|
flush ruleset
|
|
|
|
# Add the filter table and chains
|
|
|
|
add table inet filter
|
|
|
|
add chain inet filter forward {{ type filter hook forward priority 0; }}
|
|
|
|
add chain inet filter input {{ type filter hook input priority 0; }}
|
|
|
|
# Include static rules and network rules
|
|
|
|
include "{rulesdir}/static/*"
|
|
|
|
include "{rulesdir}/networks/*"
|
|
|
|
""".format(
|
|
|
|
rulesdir=config['nft_dynamic_directory']
|
|
|
|
)
|
|
|
|
|
|
|
|
# Write the basic firewall config
|
|
|
|
nftables_base_filename = '{}/base.nft'.format(config['nft_dynamic_directory'])
|
|
|
|
with open(nftables_base_filename, 'w') as nfbasefile:
|
|
|
|
nfbasefile.write(nftables_base_rules)
|
|
|
|
common.reload_firewall_rules(logger, nftables_base_filename)
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2018-10-14 18:37:34 -04:00
|
|
|
###############################################################################
|
|
|
|
# PHASE 7d - Ensure DNSMASQ is not running
|
|
|
|
###############################################################################
|
|
|
|
|
|
|
|
common.run_os_command('systemctl stop dnsmasq.service')
|
|
|
|
|
2018-10-14 02:01:35 -04:00
|
|
|
###############################################################################
|
|
|
|
# PHASE 8 - Set up our objects
|
|
|
|
###############################################################################
|
|
|
|
|
|
|
|
logger.out('Setting up objects', state='i')
|
|
|
|
|
|
|
|
d_node = dict()
|
|
|
|
d_network = dict()
|
|
|
|
d_domain = dict()
|
2018-10-29 17:51:08 -04:00
|
|
|
d_osd = dict()
|
2018-10-31 23:38:17 -04:00
|
|
|
d_pool = dict()
|
2018-10-14 02:01:35 -04:00
|
|
|
node_list = []
|
|
|
|
network_list = []
|
|
|
|
domain_list = []
|
2018-10-29 17:51:08 -04:00
|
|
|
osd_list = []
|
2018-10-31 23:38:17 -04:00
|
|
|
pool_list = []
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2019-03-11 01:44:26 -04:00
|
|
|
if enable_networking:
|
|
|
|
# Create an instance of the DNS Aggregator if we're a coordinator
|
|
|
|
if config['daemon_mode'] == 'coordinator':
|
|
|
|
dns_aggregator = DNSAggregatorInstance.DNSAggregatorInstance(zk_conn, config, logger)
|
|
|
|
else:
|
|
|
|
dns_aggregator = None
|
2018-10-15 21:09:40 -04:00
|
|
|
else:
|
|
|
|
dns_aggregator = None
|
|
|
|
|
2018-10-14 02:01:35 -04:00
|
|
|
# Node objects
|
|
|
|
@zk_conn.ChildrenWatch('/nodes')
|
|
|
|
def update_nodes(new_node_list):
|
|
|
|
global node_list, d_node
|
|
|
|
|
|
|
|
# Add any missing nodes to the list
|
|
|
|
for node in new_node_list:
|
|
|
|
if not node in node_list:
|
2018-10-15 21:09:40 -04:00
|
|
|
d_node[node] = NodeInstance.NodeInstance(node, myhostname, zk_conn, config, logger, d_node, d_network, d_domain, dns_aggregator)
|
2018-10-14 02:01:35 -04:00
|
|
|
|
|
|
|
# Remove any deleted nodes from the list
|
|
|
|
for node in node_list:
|
|
|
|
if not node in new_node_list:
|
|
|
|
# Delete the object
|
|
|
|
del(d_node[node])
|
|
|
|
|
|
|
|
# Update and print new list
|
|
|
|
node_list = new_node_list
|
|
|
|
logger.out('{}Node list:{} {}'.format(logger.fmt_blue, logger.fmt_end, ' '.join(node_list)), state='i')
|
|
|
|
|
|
|
|
# Update node objects' list
|
|
|
|
for node in d_node:
|
|
|
|
d_node[node].update_node_list(d_node)
|
|
|
|
|
|
|
|
# Alias for our local node (passed to network and domain objects)
|
|
|
|
this_node = d_node[myhostname]
|
|
|
|
|
2018-10-21 22:08:23 -04:00
|
|
|
# Primary node
|
|
|
|
@zk_conn.DataWatch('/primary_node')
|
2018-10-29 17:51:08 -04:00
|
|
|
def update_primary(new_primary, stat, event=''):
|
2018-10-21 22:08:23 -04:00
|
|
|
try:
|
|
|
|
new_primary = new_primary.decode('ascii')
|
|
|
|
except AttributeError:
|
|
|
|
new_primary = 'none'
|
|
|
|
|
|
|
|
if new_primary != this_node.primary_node:
|
|
|
|
if config['daemon_mode'] == 'coordinator':
|
|
|
|
# We're a coordinator and there is no primary
|
|
|
|
if new_primary == 'none':
|
|
|
|
if this_node.daemon_state == 'run' and this_node.router_state != 'primary':
|
|
|
|
logger.out('Contending for primary routing state', state='i')
|
|
|
|
zkhandler.writedata(zk_conn, {'/primary_node': myhostname})
|
|
|
|
elif new_primary == myhostname:
|
|
|
|
zkhandler.writedata(zk_conn, {'/nodes/{}/routerstate'.format(myhostname): 'primary'})
|
|
|
|
else:
|
|
|
|
zkhandler.writedata(zk_conn, {'/nodes/{}/routerstate'.format(myhostname): 'secondary'})
|
2018-10-27 16:31:54 -04:00
|
|
|
else:
|
|
|
|
zkhandler.writedata(zk_conn, {'/nodes/{}/routerstate'.format(myhostname): 'client'})
|
|
|
|
|
2018-10-21 22:08:23 -04:00
|
|
|
for node in d_node:
|
|
|
|
d_node[node].primary_node = new_primary
|
|
|
|
|
2019-03-11 01:44:26 -04:00
|
|
|
if enable_networking:
|
|
|
|
# Network objects
|
|
|
|
@zk_conn.ChildrenWatch('/networks')
|
|
|
|
def update_networks(new_network_list):
|
|
|
|
global network_list, d_network
|
|
|
|
|
|
|
|
# Add any missing networks to the list
|
|
|
|
for network in new_network_list:
|
|
|
|
if not network in network_list:
|
|
|
|
d_network[network] = VXNetworkInstance.VXNetworkInstance(network, zk_conn, config, logger, this_node)
|
2019-03-15 11:28:49 -04:00
|
|
|
if config['daemon_mode'] == 'coordinator' and d_network[network].nettype == 'managed':
|
2019-03-11 01:44:26 -04:00
|
|
|
dns_aggregator.add_network(d_network[network])
|
|
|
|
# Start primary functionality
|
2019-03-15 11:28:49 -04:00
|
|
|
if this_node.router_state == 'primary' and d_network[network].nettype == 'managed':
|
2019-03-11 01:44:26 -04:00
|
|
|
d_network[network].createGateways()
|
|
|
|
d_network[network].startDHCPServer()
|
|
|
|
|
|
|
|
# Remove any deleted networks from the list
|
|
|
|
for network in network_list:
|
|
|
|
if not network in new_network_list:
|
2019-03-15 11:28:49 -04:00
|
|
|
if d_network[network].nettype == 'managed':
|
|
|
|
# Stop primary functionality
|
|
|
|
if this_node.router_state == 'primary':
|
|
|
|
d_network[network].stopDHCPServer()
|
|
|
|
d_network[network].removeGateways()
|
|
|
|
dns_aggregator.remove_network(d_network[network])
|
|
|
|
# Stop general functionality
|
|
|
|
d_network[network].removeFirewall()
|
2019-03-11 01:44:26 -04:00
|
|
|
d_network[network].removeNetwork()
|
|
|
|
# Delete the object
|
|
|
|
del(d_network[network])
|
|
|
|
|
|
|
|
# Update and print new list
|
|
|
|
network_list = new_network_list
|
|
|
|
logger.out('{}Network list:{} {}'.format(logger.fmt_blue, logger.fmt_end, ' '.join(network_list)), state='i')
|
|
|
|
|
|
|
|
# Update node objects' list
|
|
|
|
for node in d_node:
|
|
|
|
d_node[node].update_network_list(d_network)
|
|
|
|
|
|
|
|
if enable_hypervisor:
|
|
|
|
# VM domain objects
|
|
|
|
@zk_conn.ChildrenWatch('/domains')
|
|
|
|
def update_domains(new_domain_list):
|
|
|
|
global domain_list, d_domain
|
|
|
|
|
|
|
|
# Add any missing domains to the list
|
|
|
|
for domain in new_domain_list:
|
|
|
|
if not domain in domain_list:
|
|
|
|
d_domain[domain] = DomainInstance.DomainInstance(domain, zk_conn, config, logger, this_node)
|
|
|
|
|
|
|
|
# Remove any deleted domains from the list
|
|
|
|
for domain in domain_list:
|
|
|
|
if not domain in new_domain_list:
|
|
|
|
# Delete the object
|
|
|
|
del(d_domain[domain])
|
|
|
|
|
|
|
|
# Update and print new list
|
|
|
|
domain_list = new_domain_list
|
|
|
|
logger.out('{}Domain list:{} {}'.format(logger.fmt_blue, logger.fmt_end, ' '.join(domain_list)), state='i')
|
|
|
|
|
|
|
|
# Update node objects' list
|
|
|
|
for node in d_node:
|
|
|
|
d_node[node].update_domain_list(d_domain)
|
|
|
|
|
|
|
|
if enable_storage:
|
|
|
|
# Ceph OSD provisioning key
|
|
|
|
@zk_conn.DataWatch('/ceph/cmd')
|
|
|
|
def cmd(data, stat, event=''):
|
|
|
|
if data:
|
|
|
|
data = data.decode('ascii')
|
|
|
|
else:
|
|
|
|
data = ''
|
|
|
|
|
|
|
|
if data:
|
|
|
|
CephInstance.run_command(zk_conn, data, d_osd)
|
|
|
|
|
|
|
|
# OSD objects
|
|
|
|
@zk_conn.ChildrenWatch('/ceph/osds')
|
|
|
|
def update_osds(new_osd_list):
|
|
|
|
global osd_list, d_osd
|
|
|
|
|
|
|
|
# Add any missing OSDs to the list
|
|
|
|
for osd in new_osd_list:
|
|
|
|
if not osd in osd_list:
|
|
|
|
d_osd[osd] = CephInstance.CephOSDInstance(zk_conn, this_node, osd)
|
|
|
|
|
|
|
|
# Remove any deleted OSDs from the list
|
|
|
|
for osd in osd_list:
|
|
|
|
if not osd in new_osd_list:
|
|
|
|
# Delete the object
|
|
|
|
del(d_osd[osd])
|
|
|
|
|
|
|
|
# Update and print new list
|
|
|
|
osd_list = new_osd_list
|
|
|
|
logger.out('{}OSD list:{} {}'.format(logger.fmt_blue, logger.fmt_end, ' '.join(osd_list)), state='i')
|
|
|
|
|
|
|
|
# Pool objects
|
|
|
|
@zk_conn.ChildrenWatch('/ceph/pools')
|
|
|
|
def update_pools(new_pool_list):
|
|
|
|
global pool_list, d_pool
|
|
|
|
|
|
|
|
# Add any missing Pools to the list
|
|
|
|
for pool in new_pool_list:
|
|
|
|
if not pool in pool_list:
|
|
|
|
d_pool[pool] = CephInstance.CephPoolInstance(zk_conn, this_node, pool)
|
|
|
|
|
|
|
|
# Remove any deleted Pools from the list
|
|
|
|
for pool in pool_list:
|
|
|
|
if not pool in new_pool_list:
|
|
|
|
# Delete the object
|
|
|
|
del(d_pool[pool])
|
2018-10-31 23:38:17 -04:00
|
|
|
|
2019-03-11 01:44:26 -04:00
|
|
|
# Update and print new list
|
|
|
|
pool_list = new_pool_list
|
|
|
|
logger.out('{}Pool list:{} {}'.format(logger.fmt_blue, logger.fmt_end, ' '.join(pool_list)), state='i')
|
2018-10-31 23:38:17 -04:00
|
|
|
|
2018-10-14 02:01:35 -04:00
|
|
|
###############################################################################
|
|
|
|
# PHASE 9 - Run the daemon
|
|
|
|
###############################################################################
|
|
|
|
|
2018-10-22 20:20:27 -04:00
|
|
|
# Zookeeper keepalive update function
|
|
|
|
def update_zookeeper():
|
|
|
|
# Get past state and update if needed
|
2018-11-27 22:15:19 -05:00
|
|
|
if debug:
|
|
|
|
print("Get past state and update if needed")
|
2018-10-22 20:20:27 -04:00
|
|
|
past_state = zkhandler.readdata(zk_conn, '/nodes/{}/daemonstate'.format(this_node.name))
|
|
|
|
if past_state != 'run':
|
|
|
|
this_node.daemon_state = 'run'
|
|
|
|
zkhandler.writedata(zk_conn, { '/nodes/{}/daemonstate'.format(this_node.name): 'run' })
|
|
|
|
else:
|
|
|
|
this_node.daemon_state = 'run'
|
|
|
|
|
|
|
|
# Ensure the primary key is properly set
|
2018-11-27 22:15:19 -05:00
|
|
|
if debug:
|
|
|
|
print("Ensure the primary key is properly set")
|
2018-10-22 20:20:27 -04:00
|
|
|
if this_node.router_state == 'primary':
|
|
|
|
if zkhandler.readdata(zk_conn, '/primary_node') != this_node.name:
|
|
|
|
zkhandler.writedata(zk_conn, {'/primary_node': this_node.name})
|
|
|
|
|
2019-03-11 01:44:26 -04:00
|
|
|
if enable_storage:
|
|
|
|
# Get Ceph cluster health (for local printing)
|
2018-11-27 22:15:19 -05:00
|
|
|
if debug:
|
2019-03-11 01:44:26 -04:00
|
|
|
print("Get Ceph cluster health (for local printing)")
|
|
|
|
retcode, stdout, stderr = common.run_os_command('ceph health')
|
|
|
|
ceph_health = stdout.rstrip()
|
|
|
|
if 'HEALTH_OK' in ceph_health:
|
|
|
|
ceph_health_colour = logger.fmt_green
|
|
|
|
elif 'HEALTH_WARN' in ceph_health:
|
|
|
|
ceph_health_colour = logger.fmt_yellow
|
|
|
|
else:
|
|
|
|
ceph_health_colour = logger.fmt_red
|
|
|
|
|
|
|
|
# Set ceph health information in zookeeper (primary only)
|
|
|
|
if this_node.router_state == 'primary':
|
|
|
|
if debug:
|
|
|
|
print("Set ceph health information in zookeeper (primary only)")
|
|
|
|
# Get status info
|
|
|
|
retcode, stdout, stderr = common.run_os_command('ceph status')
|
|
|
|
ceph_status = stdout
|
|
|
|
try:
|
|
|
|
zkhandler.writedata(zk_conn, {
|
|
|
|
'/ceph': str(ceph_status)
|
|
|
|
})
|
|
|
|
except:
|
|
|
|
logger.out('Failed to set Ceph status data', state='e')
|
|
|
|
return
|
|
|
|
|
|
|
|
# Set pool information in zookeeper (primary only)
|
|
|
|
if this_node.router_state == 'primary':
|
|
|
|
if debug:
|
|
|
|
print("Set pool information in zookeeper (primary only)")
|
|
|
|
# Get pool info
|
|
|
|
pool_df = dict()
|
|
|
|
retcode, stdout, stderr = common.run_os_command('rados df --format json')
|
|
|
|
pool_df_raw = json.loads(stdout)['pools']
|
|
|
|
for pool in pool_df_raw:
|
|
|
|
pool_df.update({
|
|
|
|
str(pool['name']): {
|
|
|
|
'id': pool['id'],
|
|
|
|
'size_bytes': pool['size_bytes'],
|
|
|
|
'num_objects': pool['num_objects'],
|
|
|
|
'num_object_clones': pool['num_object_clones'],
|
|
|
|
'num_object_copies': pool['num_object_copies'],
|
|
|
|
'num_objects_missing_on_primary': pool['num_objects_missing_on_primary'],
|
|
|
|
'num_objects_unfound': pool['num_objects_unfound'],
|
|
|
|
'num_objects_degraded': pool['num_objects_degraded'],
|
|
|
|
'read_ops': pool['read_ops'],
|
|
|
|
'read_bytes': pool['read_bytes'],
|
|
|
|
'write_ops': pool['write_ops'],
|
|
|
|
'write_bytes': pool['write_bytes']
|
|
|
|
}
|
|
|
|
})
|
2018-10-31 23:38:17 -04:00
|
|
|
|
|
|
|
# Trigger updates for each OSD on this node
|
|
|
|
for pool in pool_list:
|
|
|
|
zkhandler.writedata(zk_conn, {
|
|
|
|
'/ceph/pools/{}/stats'.format(pool): str(json.dumps(pool_df[pool]))
|
|
|
|
})
|
2019-03-11 01:44:26 -04:00
|
|
|
|
|
|
|
# Get data from Ceph OSDs
|
|
|
|
if debug:
|
|
|
|
print("Get data from Ceph OSDs")
|
|
|
|
# Parse the dump data
|
|
|
|
osd_dump = dict()
|
|
|
|
retcode, stdout, stderr = common.run_os_command('ceph osd dump --format json')
|
|
|
|
osd_dump_raw = json.loads(stdout)['osds']
|
|
|
|
for osd in osd_dump_raw:
|
|
|
|
osd_dump.update({
|
|
|
|
str(osd['osd']): {
|
|
|
|
'uuid': osd['uuid'],
|
|
|
|
'up': osd['up'],
|
|
|
|
'in': osd['in'],
|
|
|
|
'primary_affinity': osd['primary_affinity']
|
2018-10-29 17:51:08 -04:00
|
|
|
}
|
|
|
|
})
|
2019-03-11 01:44:26 -04:00
|
|
|
# Parse the df data
|
|
|
|
osd_df = dict()
|
|
|
|
retcode, stdout, stderr = common.run_os_command('ceph osd df --format json')
|
|
|
|
osd_df_raw = json.loads(stdout)['nodes']
|
|
|
|
for osd in osd_df_raw:
|
|
|
|
osd_df.update({
|
|
|
|
str(osd['id']): {
|
|
|
|
'utilization': osd['utilization'],
|
|
|
|
'var': osd['var'],
|
|
|
|
'pgs': osd['pgs'],
|
|
|
|
'kb': osd['kb'],
|
|
|
|
'weight': osd['crush_weight'],
|
|
|
|
'reweight': osd['reweight'],
|
|
|
|
}
|
2018-10-29 17:51:08 -04:00
|
|
|
})
|
2019-03-11 01:44:26 -04:00
|
|
|
# Parse the status data
|
|
|
|
osd_status = dict()
|
|
|
|
retcode, stdout, stderr = common.run_os_command('ceph osd status')
|
|
|
|
for line in stderr.split('\n'):
|
|
|
|
# Strip off colour
|
|
|
|
line = re.sub(r'\x1b(\[.*?[@-~]|\].*?(\x07|\x1b\\))', '', line)
|
|
|
|
# Split it for parsing
|
|
|
|
line = line.split()
|
|
|
|
if len(line) > 1 and line[1].isdigit():
|
|
|
|
# This is an OSD line so parse it
|
|
|
|
osd_id = line[1]
|
|
|
|
node = line[3].split('.')[0]
|
|
|
|
used = line[5]
|
|
|
|
avail = line[7]
|
|
|
|
wr_ops = line[9]
|
|
|
|
wr_data = line[11]
|
|
|
|
rd_ops = line[13]
|
|
|
|
rd_data = line[15]
|
|
|
|
state = line[17]
|
|
|
|
osd_status.update({
|
|
|
|
str(osd_id): {
|
|
|
|
'node': node,
|
|
|
|
'used': used,
|
|
|
|
'avail': avail,
|
|
|
|
'wr_ops': wr_ops,
|
|
|
|
'wr_data': wr_data,
|
|
|
|
'rd_ops': rd_ops,
|
|
|
|
'rd_data': rd_data,
|
|
|
|
'state': state
|
|
|
|
}
|
|
|
|
})
|
|
|
|
# Merge them together into a single meaningful dict
|
|
|
|
osd_stats = dict()
|
|
|
|
for osd in osd_list:
|
|
|
|
this_dump = osd_dump[osd]
|
|
|
|
this_dump.update(osd_df[osd])
|
|
|
|
this_dump.update(osd_status[osd])
|
|
|
|
osd_stats[osd] = this_dump
|
2018-10-29 17:51:08 -04:00
|
|
|
|
2019-03-11 01:44:26 -04:00
|
|
|
# Trigger updates for each OSD on this node
|
|
|
|
if debug:
|
|
|
|
print("Trigger updates for each OSD on this node")
|
|
|
|
osds_this_node = 0
|
|
|
|
for osd in osd_list:
|
|
|
|
if d_osd[osd].node == myhostname:
|
|
|
|
zkhandler.writedata(zk_conn, {
|
|
|
|
'/ceph/osds/{}/stats'.format(osd): str(json.dumps(osd_stats[osd]))
|
|
|
|
})
|
|
|
|
osds_this_node += 1
|
2018-10-29 17:51:08 -04:00
|
|
|
|
2018-10-22 20:20:27 -04:00
|
|
|
memalloc = 0
|
|
|
|
vcpualloc = 0
|
2019-03-11 01:44:26 -04:00
|
|
|
if enable_hypervisor:
|
|
|
|
# Toggle state management of dead VMs to restart them
|
|
|
|
if debug:
|
|
|
|
print("Toggle state management of dead VMs to restart them")
|
|
|
|
for domain, instance in this_node.d_domain.items():
|
|
|
|
if domain in this_node.domain_list:
|
|
|
|
# Add the allocated memory to our memalloc value
|
|
|
|
memalloc += instance.getmemory()
|
|
|
|
vcpualloc += instance.getvcpus()
|
|
|
|
if instance.getstate() == 'start' and instance.getnode() == this_node.name:
|
|
|
|
if instance.getdom() != None:
|
|
|
|
try:
|
|
|
|
if instance.getdom().state()[0] != libvirt.VIR_DOMAIN_RUNNING:
|
|
|
|
raise
|
|
|
|
except Exception as e:
|
|
|
|
# Toggle a state "change"
|
|
|
|
zkhandler.writedata(zk_conn, { '/domains/{}/state'.format(domain): instance.getstate() })
|
|
|
|
|
|
|
|
# Connect to libvirt
|
|
|
|
if debug:
|
|
|
|
print("Connect to libvirt")
|
|
|
|
libvirt_name = "qemu:///system"
|
|
|
|
lv_conn = libvirt.open(libvirt_name)
|
|
|
|
if lv_conn == None:
|
|
|
|
logger.out('Failed to open connection to "{}"'.format(libvirt_name), state='e')
|
|
|
|
return
|
|
|
|
|
|
|
|
# Ensure that any running VMs are readded to the domain_list
|
|
|
|
if debug:
|
|
|
|
print("Ensure that any running VMs are readded to the domain_list")
|
|
|
|
running_domains = lv_conn.listAllDomains(libvirt.VIR_CONNECT_LIST_DOMAINS_ACTIVE)
|
|
|
|
for domain in running_domains:
|
|
|
|
domain_uuid = domain.UUIDString()
|
|
|
|
if domain_uuid not in this_node.domain_list:
|
|
|
|
this_node.domain_list.append(domain_uuid)
|
2018-10-22 20:20:27 -04:00
|
|
|
|
|
|
|
# Set our information in zookeeper
|
2018-11-27 22:15:19 -05:00
|
|
|
if debug:
|
|
|
|
print("Set our information in zookeeper")
|
2018-10-22 20:20:27 -04:00
|
|
|
#this_node.name = lv_conn.getHostname()
|
2019-04-15 18:25:50 -04:00
|
|
|
this_node.memtotal = int(psutil.virtual_memory().total / 1024 / 1024)
|
2018-10-22 20:20:27 -04:00
|
|
|
this_node.memused = int(psutil.virtual_memory().used / 1024 / 1024)
|
|
|
|
this_node.memfree = int(psutil.virtual_memory().free / 1024 / 1024)
|
|
|
|
this_node.memalloc = memalloc
|
|
|
|
this_node.vcpualloc = vcpualloc
|
|
|
|
this_node.cpuload = os.getloadavg()[0]
|
2019-03-11 01:44:26 -04:00
|
|
|
if enable_hypervisor:
|
|
|
|
this_node.domains_count = len(lv_conn.listDomainsID())
|
|
|
|
else:
|
|
|
|
this_node.domains_count = 0
|
2018-10-22 20:20:27 -04:00
|
|
|
keepalive_time = int(time.time())
|
|
|
|
try:
|
|
|
|
zkhandler.writedata(zk_conn, {
|
2019-04-15 18:25:50 -04:00
|
|
|
'/nodes/{}/memtotal'.format(this_node.name): str(this_node.memtotal),
|
2018-10-22 20:20:27 -04:00
|
|
|
'/nodes/{}/memused'.format(this_node.name): str(this_node.memused),
|
|
|
|
'/nodes/{}/memfree'.format(this_node.name): str(this_node.memfree),
|
|
|
|
'/nodes/{}/memalloc'.format(this_node.name): str(this_node.memalloc),
|
|
|
|
'/nodes/{}/vcpualloc'.format(this_node.name): str(this_node.vcpualloc),
|
|
|
|
'/nodes/{}/cpuload'.format(this_node.name): str(this_node.cpuload),
|
|
|
|
'/nodes/{}/domainscount'.format(this_node.name): str(this_node.domains_count),
|
|
|
|
'/nodes/{}/runningdomains'.format(this_node.name): ' '.join(this_node.domain_list),
|
|
|
|
'/nodes/{}/keepalive'.format(this_node.name): str(keepalive_time)
|
|
|
|
})
|
|
|
|
except:
|
|
|
|
logger.out('Failed to set keepalive data', state='e')
|
|
|
|
return
|
|
|
|
|
2019-03-11 01:44:26 -04:00
|
|
|
if enable_hypervisor:
|
|
|
|
# Close the Libvirt connection
|
|
|
|
lv_conn.close()
|
2018-10-22 20:20:27 -04:00
|
|
|
|
2018-11-20 21:21:23 -05:00
|
|
|
# Look for dead nodes and fence them
|
2018-11-27 22:15:19 -05:00
|
|
|
if debug:
|
|
|
|
print("Look for dead nodes and fence them")
|
2018-11-20 21:21:23 -05:00
|
|
|
if config['daemon_mode'] == 'coordinator':
|
|
|
|
for node_name in d_node:
|
|
|
|
try:
|
|
|
|
node_daemon_state = zkhandler.readdata(zk_conn, '/nodes/{}/daemonstate'.format(node_name))
|
|
|
|
node_domain_state = zkhandler.readdata(zk_conn, '/nodes/{}/domainstate'.format(node_name))
|
|
|
|
node_keepalive = int(zkhandler.readdata(zk_conn, '/nodes/{}/keepalive'.format(node_name)))
|
|
|
|
except:
|
|
|
|
node_daemon_state = 'unknown'
|
|
|
|
node_domain_state = 'unknown'
|
|
|
|
node_keepalive = 0
|
|
|
|
|
|
|
|
# Handle deadtime and fencng if needed
|
|
|
|
# (A node is considered dead when its keepalive timer is >6*keepalive_interval seconds
|
|
|
|
# out-of-date while in 'start' state)
|
|
|
|
node_deadtime = int(time.time()) - ( int(config['keepalive_interval']) * int(config['fence_intervals']) )
|
|
|
|
if node_keepalive < node_deadtime and node_daemon_state == 'run':
|
|
|
|
logger.out('Node {} seems dead - starting monitor for fencing'.format(node_name), state='w')
|
2018-11-27 21:30:30 -05:00
|
|
|
zk_lock = zkhandler.writelock(zk_conn, '/nodes/{}/daemonstate'.format(node_name))
|
2018-11-20 21:21:23 -05:00
|
|
|
with zk_lock:
|
|
|
|
# Ensures that, if we lost the lock race and come out of waiting,
|
|
|
|
# we won't try to trigger our own fence thread.
|
2018-11-27 22:15:19 -05:00
|
|
|
if zkhandler.readdata(zk_conn, '/nodes/{}/daemonstate'.format(node_name)) != 'dead':
|
2018-11-20 21:21:23 -05:00
|
|
|
fence_thread = threading.Thread(target=fencing.fenceNode, args=(node_name, zk_conn, config, logger), kwargs={})
|
|
|
|
fence_thread.start()
|
|
|
|
# Write the updated data after we start the fence thread
|
|
|
|
zkhandler.writedata(zk_conn, { '/nodes/{}/daemonstate'.format(node_name): 'dead' })
|
2018-10-22 20:20:27 -04:00
|
|
|
|
|
|
|
# Display node information to the terminal
|
|
|
|
logger.out(
|
2018-10-27 18:24:27 -04:00
|
|
|
'{}{} keepalive{}'.format(
|
|
|
|
logger.fmt_purple,
|
|
|
|
myhostname,
|
|
|
|
logger.fmt_end
|
|
|
|
),
|
|
|
|
state='t'
|
|
|
|
)
|
|
|
|
logger.out(
|
|
|
|
'{bold}Domains:{nofmt} {domcount} '
|
|
|
|
'{bold}Networks:{nofmt} {netcount} '
|
|
|
|
'{bold}VM memory [MiB]:{nofmt} {allocmem} '
|
|
|
|
'{bold}Free memory [MiB]:{nofmt} {freemem} '
|
|
|
|
'{bold}Used memory [MiB]:{nofmt} {usedmem} '
|
|
|
|
'{bold}Load:{nofmt} {load}'.format(
|
2018-10-22 20:20:27 -04:00
|
|
|
bold=logger.fmt_bold,
|
2018-10-27 18:24:27 -04:00
|
|
|
nofmt=logger.fmt_end,
|
2018-10-22 20:20:27 -04:00
|
|
|
domcount=this_node.domains_count,
|
|
|
|
freemem=this_node.memfree,
|
|
|
|
usedmem=this_node.memused,
|
|
|
|
load=this_node.cpuload,
|
|
|
|
allocmem=this_node.memalloc,
|
2018-10-29 17:51:08 -04:00
|
|
|
netcount=len(network_list)
|
2018-10-22 20:20:27 -04:00
|
|
|
),
|
|
|
|
)
|
2019-03-11 01:44:26 -04:00
|
|
|
if enable_storage:
|
|
|
|
logger.out(
|
|
|
|
'{bold}Ceph cluster status:{nofmt} {health_colour}{health}{nofmt} '
|
|
|
|
'{bold}Total OSDs:{nofmt} {total_osds} '
|
|
|
|
'{bold}Node OSDs:{nofmt} {node_osds} '
|
|
|
|
'{bold}Pools:{nofmt} {total_pools} '.format(
|
|
|
|
bold=logger.fmt_bold,
|
|
|
|
health_colour=ceph_health_colour,
|
|
|
|
nofmt=logger.fmt_end,
|
|
|
|
health=ceph_health,
|
|
|
|
total_osds=len(osd_list),
|
|
|
|
node_osds=osds_this_node,
|
|
|
|
total_pools=len(pool_list)
|
|
|
|
),
|
|
|
|
)
|
2018-10-27 18:24:27 -04:00
|
|
|
|
2018-10-22 20:20:27 -04:00
|
|
|
|
2019-03-17 12:52:23 -04:00
|
|
|
# Start keepalive thread
|
2018-11-18 00:55:04 -05:00
|
|
|
update_timer = startKeepaliveTimer()
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2018-10-22 20:20:27 -04:00
|
|
|
# Tick loop; does nothing since everything else is async
|
2018-10-14 02:01:35 -04:00
|
|
|
while True:
|
|
|
|
try:
|
|
|
|
time.sleep(1)
|
|
|
|
except:
|
|
|
|
break
|