2018-10-14 02:01:35 -04:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Daemon.py - PVC Node daemon main entrypoing
|
2018-10-14 02:01:35 -04:00
|
|
|
# Part of the Parallel Virtual Cluster (PVC) system
|
|
|
|
#
|
2022-10-06 11:55:27 -04:00
|
|
|
# Copyright (C) 2018-2022 Joshua M. Boniface <joshua@boniface.me>
|
2018-10-14 02:01:35 -04:00
|
|
|
#
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
2021-03-25 16:57:17 -04:00
|
|
|
# the Free Software Foundation, version 3.
|
2018-10-14 02:01:35 -04:00
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
#
|
|
|
|
###############################################################################
|
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
import pvcnoded.util.keepalive
|
|
|
|
import pvcnoded.util.config
|
|
|
|
import pvcnoded.util.fencing
|
|
|
|
import pvcnoded.util.networking
|
|
|
|
import pvcnoded.util.services
|
|
|
|
import pvcnoded.util.libvirt
|
|
|
|
import pvcnoded.util.zookeeper
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2023-02-13 03:06:06 -05:00
|
|
|
import pvcnoded.objects.MonitoringInstance as MonitoringInstance
|
2021-08-21 02:46:11 -04:00
|
|
|
import pvcnoded.objects.DNSAggregatorInstance as DNSAggregatorInstance
|
|
|
|
import pvcnoded.objects.MetadataAPIInstance as MetadataAPIInstance
|
|
|
|
import pvcnoded.objects.VMInstance as VMInstance
|
|
|
|
import pvcnoded.objects.NodeInstance as NodeInstance
|
|
|
|
import pvcnoded.objects.VXNetworkInstance as VXNetworkInstance
|
|
|
|
import pvcnoded.objects.SRIOVVFInstance as SRIOVVFInstance
|
|
|
|
import pvcnoded.objects.CephInstance as CephInstance
|
2021-05-30 14:48:41 -04:00
|
|
|
|
2021-06-01 18:50:26 -04:00
|
|
|
import daemon_lib.log as log
|
2021-06-01 12:17:25 -04:00
|
|
|
import daemon_lib.common as common
|
2020-02-08 19:16:19 -05:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
from time import sleep
|
|
|
|
from distutils.util import strtobool
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
import signal
|
|
|
|
import re
|
|
|
|
import json
|
2020-11-07 14:45:24 -05:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Daemon version
|
2023-10-24 02:10:24 -04:00
|
|
|
version = "0.9.79"
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2020-11-07 13:17:49 -05:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
##########################################################
|
|
|
|
# Entrypoint
|
|
|
|
##########################################################
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2021-11-06 03:02:43 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
def entrypoint():
|
|
|
|
keepalive_timer = None
|
2023-02-13 03:06:06 -05:00
|
|
|
monitoring_instance = None
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Get our configuration
|
|
|
|
config = pvcnoded.util.config.get_configuration()
|
2021-11-06 03:02:43 -04:00
|
|
|
config["pvcnoded_version"] = version
|
2019-03-11 01:44:26 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Set some useful booleans for later (fewer characters)
|
2021-11-06 03:02:43 -04:00
|
|
|
debug = config["debug"]
|
2021-08-21 02:46:11 -04:00
|
|
|
if debug:
|
2021-11-06 03:02:43 -04:00
|
|
|
print("DEBUG MODE ENABLED")
|
2021-08-21 02:46:11 -04:00
|
|
|
|
|
|
|
# Create and validate our directories
|
|
|
|
pvcnoded.util.config.validate_directories(config)
|
|
|
|
|
|
|
|
# Set up the logger instance
|
|
|
|
logger = log.Logger(config)
|
|
|
|
|
|
|
|
# Print our startup message
|
2021-11-06 03:02:43 -04:00
|
|
|
logger.out("")
|
|
|
|
logger.out("|----------------------------------------------------------|")
|
|
|
|
logger.out("| |")
|
|
|
|
logger.out("| ███████████ ▜█▙ ▟█▛ █████ █ █ █ |")
|
|
|
|
logger.out("| ██ ▜█▙ ▟█▛ ██ |")
|
|
|
|
logger.out("| ███████████ ▜█▙ ▟█▛ ██ |")
|
|
|
|
logger.out("| ██ ▜█▙▟█▛ ███████████ |")
|
|
|
|
logger.out("| |")
|
|
|
|
logger.out("|----------------------------------------------------------|")
|
|
|
|
logger.out("| Parallel Virtual Cluster node daemon v{0: <18} |".format(version))
|
|
|
|
logger.out("| Debug: {0: <49} |".format(str(config["debug"])))
|
|
|
|
logger.out("| FQDN: {0: <50} |".format(config["node_fqdn"]))
|
|
|
|
logger.out("| Host: {0: <50} |".format(config["node_hostname"]))
|
|
|
|
logger.out("| ID: {0: <52} |".format(config["node_id"]))
|
|
|
|
logger.out("| IPMI hostname: {0: <41} |".format(config["ipmi_hostname"]))
|
|
|
|
logger.out("| Machine details: |")
|
|
|
|
logger.out("| CPUs: {0: <48} |".format(config["static_data"][0]))
|
|
|
|
logger.out("| Arch: {0: <48} |".format(config["static_data"][3]))
|
|
|
|
logger.out("| OS: {0: <50} |".format(config["static_data"][2]))
|
|
|
|
logger.out("| Kernel: {0: <46} |".format(config["static_data"][1]))
|
|
|
|
logger.out("|----------------------------------------------------------|")
|
|
|
|
logger.out("")
|
|
|
|
logger.out(f'Starting pvcnoded on host {config["node_fqdn"]}', state="s")
|
|
|
|
|
|
|
|
if config["enable_networking"]:
|
|
|
|
if config["enable_sriov"]:
|
2021-08-21 02:46:11 -04:00
|
|
|
# Set up SR-IOV devices
|
|
|
|
pvcnoded.util.networking.setup_sriov(logger, config)
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Set up our interfaces
|
|
|
|
pvcnoded.util.networking.setup_interfaces(logger, config)
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Get list of coordinator nodes
|
2021-11-06 03:02:43 -04:00
|
|
|
coordinator_nodes = config["coordinators"]
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2021-11-06 03:02:43 -04:00
|
|
|
if config["node_hostname"] in coordinator_nodes:
|
2021-08-21 02:46:11 -04:00
|
|
|
# We are indeed a coordinator node
|
2021-11-06 03:02:43 -04:00
|
|
|
config["daemon_mode"] = "coordinator"
|
|
|
|
logger.out(
|
|
|
|
f"This node is a {logger.fmt_blue}coordinator{logger.fmt_end}", state="i"
|
|
|
|
)
|
2021-08-21 02:46:11 -04:00
|
|
|
else:
|
|
|
|
# We are a hypervisor node
|
2021-11-06 03:02:43 -04:00
|
|
|
config["daemon_mode"] = "hypervisor"
|
|
|
|
logger.out(
|
|
|
|
f"This node is a {logger.fmt_cyan}hypervisor{logger.fmt_end}", state="i"
|
|
|
|
)
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
pvcnoded.util.services.start_system_services(logger, config)
|
2019-07-10 21:39:25 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Connect to Zookeeper and return our handler and current schema version
|
|
|
|
zkhandler, node_schema_version = pvcnoded.util.zookeeper.connect(logger, config)
|
2021-06-15 22:42:59 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Watch for a global schema update and fire
|
|
|
|
# This will only change by the API when triggered after seeing all nodes can update
|
2021-11-06 03:02:43 -04:00
|
|
|
@zkhandler.zk_conn.DataWatch(zkhandler.schema.path("base.schema.version"))
|
|
|
|
def update_schema(new_schema_version, stat, event=""):
|
2021-08-21 02:46:11 -04:00
|
|
|
nonlocal zkhandler, keepalive_timer, node_schema_version
|
2021-06-15 22:42:59 -04:00
|
|
|
|
|
|
|
try:
|
2021-11-06 03:02:43 -04:00
|
|
|
new_schema_version = int(new_schema_version.decode("ascii"))
|
2021-08-21 02:46:11 -04:00
|
|
|
except Exception:
|
|
|
|
new_schema_version = 0
|
|
|
|
|
|
|
|
if new_schema_version == node_schema_version:
|
|
|
|
return True
|
|
|
|
|
2021-11-06 03:02:43 -04:00
|
|
|
logger.out("Hot update of schema version started", state="s")
|
|
|
|
logger.out(
|
|
|
|
f"Current version: {node_schema_version,} New version: {new_schema_version}",
|
|
|
|
state="s",
|
|
|
|
)
|
2021-08-21 02:46:11 -04:00
|
|
|
|
|
|
|
# Prevent any keepalive updates while this happens
|
|
|
|
if keepalive_timer is not None:
|
2021-10-07 14:41:12 -04:00
|
|
|
pvcnoded.util.keepalive.stop_keepalive_timer(logger, keepalive_timer)
|
2021-08-21 02:46:11 -04:00
|
|
|
sleep(1)
|
|
|
|
|
|
|
|
# Perform the migration (primary only)
|
2021-11-06 03:02:43 -04:00
|
|
|
if zkhandler.read("base.config.primary_node") == config["node_hostname"]:
|
|
|
|
logger.out("Primary node acquiring exclusive lock", state="s")
|
2021-08-21 02:46:11 -04:00
|
|
|
# Wait for things to settle
|
|
|
|
sleep(0.5)
|
|
|
|
# Acquire a write lock on the root key
|
2021-11-06 03:02:43 -04:00
|
|
|
with zkhandler.exclusivelock("base.schema.version"):
|
2021-08-21 02:46:11 -04:00
|
|
|
# Perform the schema migration tasks
|
2021-11-06 03:02:43 -04:00
|
|
|
logger.out("Performing schema update", state="s")
|
2021-08-21 02:46:11 -04:00
|
|
|
if new_schema_version > node_schema_version:
|
|
|
|
zkhandler.schema.migrate(zkhandler, new_schema_version)
|
|
|
|
if new_schema_version < node_schema_version:
|
|
|
|
zkhandler.schema.rollback(zkhandler, new_schema_version)
|
|
|
|
# Wait for the exclusive lock to be lifted
|
2020-01-12 19:04:31 -05:00
|
|
|
else:
|
2021-11-06 03:02:43 -04:00
|
|
|
logger.out("Non-primary node acquiring read lock", state="s")
|
2021-08-21 02:46:11 -04:00
|
|
|
# Wait for things to settle
|
|
|
|
sleep(1)
|
|
|
|
# Wait for a read lock
|
2021-11-06 03:02:43 -04:00
|
|
|
lock = zkhandler.readlock("base.schema.version")
|
2021-08-21 02:46:11 -04:00
|
|
|
lock.acquire()
|
|
|
|
# Wait a bit more for the primary to return to normal
|
|
|
|
sleep(1)
|
|
|
|
|
|
|
|
# Update the local schema version
|
2021-11-06 03:02:43 -04:00
|
|
|
logger.out("Updating node target schema version", state="s")
|
|
|
|
zkhandler.write(
|
|
|
|
[(("node.data.active_schema", config["node_hostname"]), new_schema_version)]
|
|
|
|
)
|
2021-08-21 02:46:11 -04:00
|
|
|
node_schema_version = new_schema_version
|
2019-03-17 01:45:17 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Restart the API daemons if applicable
|
2021-11-06 03:02:43 -04:00
|
|
|
logger.out("Restarting services", state="s")
|
|
|
|
common.run_os_command("systemctl restart pvcapid-worker.service")
|
|
|
|
if zkhandler.read("base.config.primary_node") == config["node_hostname"]:
|
|
|
|
common.run_os_command("systemctl restart pvcapid.service")
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Restart ourselves with the new schema
|
2021-11-06 03:02:43 -04:00
|
|
|
logger.out("Reloading node daemon", state="s")
|
2021-08-21 02:46:11 -04:00
|
|
|
try:
|
|
|
|
zkhandler.disconnect(persistent=True)
|
|
|
|
del zkhandler
|
|
|
|
except Exception:
|
|
|
|
pass
|
|
|
|
os.execv(sys.argv[0], sys.argv)
|
2021-06-08 23:17:07 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Validate the schema
|
|
|
|
pvcnoded.util.zookeeper.validate_schema(logger, zkhandler)
|
2021-06-14 12:52:43 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Define a cleanup function
|
|
|
|
def cleanup(failure=False):
|
2023-02-13 03:06:06 -05:00
|
|
|
nonlocal logger, zkhandler, keepalive_timer, d_domain, monitoring_instance
|
2021-06-08 23:17:07 -04:00
|
|
|
|
2021-11-06 03:02:43 -04:00
|
|
|
logger.out("Terminating pvcnoded and cleaning up", state="s")
|
2021-06-08 23:17:07 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Set shutdown state in Zookeeper
|
2021-11-06 03:02:43 -04:00
|
|
|
zkhandler.write([(("node.state.daemon", config["node_hostname"]), "shutdown")])
|
2021-06-08 23:17:07 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Waiting for any flushes to complete
|
2021-11-06 03:02:43 -04:00
|
|
|
logger.out("Waiting for any active flushes", state="s")
|
2021-08-29 03:52:18 -04:00
|
|
|
try:
|
|
|
|
if this_node is not None:
|
|
|
|
while this_node.flush_thread is not None:
|
|
|
|
sleep(0.5)
|
|
|
|
except Exception:
|
|
|
|
# We really don't care here, just proceed
|
|
|
|
pass
|
2021-08-21 02:46:11 -04:00
|
|
|
|
|
|
|
# Stop console logging on all VMs
|
2021-11-06 03:02:43 -04:00
|
|
|
logger.out("Stopping domain console watchers", state="s")
|
2021-09-22 16:02:04 -04:00
|
|
|
try:
|
|
|
|
if d_domain is not None:
|
|
|
|
for domain in d_domain:
|
2021-11-06 03:02:43 -04:00
|
|
|
if d_domain[domain].getnode() == config["node_hostname"]:
|
2021-08-21 02:46:11 -04:00
|
|
|
d_domain[domain].console_log_instance.stop()
|
2021-09-22 16:02:04 -04:00
|
|
|
except Exception:
|
|
|
|
pass
|
2021-06-14 12:52:43 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Force into secondary coordinator state if needed
|
|
|
|
try:
|
2023-09-15 16:47:56 -04:00
|
|
|
if this_node.coordinator_state == "primary" and len(d_node) > 1:
|
2021-11-06 03:02:43 -04:00
|
|
|
zkhandler.write([("base.config.primary_node", "none")])
|
|
|
|
logger.out("Waiting for primary migration", state="s")
|
2021-12-28 03:06:03 -05:00
|
|
|
timeout = 240
|
|
|
|
count = 0
|
2023-09-15 16:47:56 -04:00
|
|
|
while this_node.coordinator_state != "secondary" and count < timeout:
|
2021-08-21 02:46:11 -04:00
|
|
|
sleep(0.5)
|
2021-12-28 03:06:03 -05:00
|
|
|
count += 1
|
2021-08-21 02:46:11 -04:00
|
|
|
except Exception:
|
|
|
|
pass
|
2021-06-14 12:52:43 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Stop keepalive thread
|
|
|
|
try:
|
|
|
|
pvcnoded.util.keepalive.stop_keepalive_timer(logger, keepalive_timer)
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2021-11-06 03:02:43 -04:00
|
|
|
logger.out("Performing final keepalive update", state="s")
|
2021-08-21 02:46:11 -04:00
|
|
|
pvcnoded.util.keepalive.node_keepalive(logger, config, zkhandler, this_node)
|
|
|
|
except Exception:
|
|
|
|
pass
|
2020-11-07 14:45:24 -05:00
|
|
|
|
2023-09-15 16:27:41 -04:00
|
|
|
# Shut down the monitoring system
|
2023-02-13 03:06:06 -05:00
|
|
|
try:
|
2023-09-15 16:27:41 -04:00
|
|
|
logger.out("Shutting down monitoring subsystem", state="s")
|
|
|
|
monitoring_instance.shutdown()
|
2023-02-13 03:06:06 -05:00
|
|
|
except Exception:
|
|
|
|
pass
|
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Set stop state in Zookeeper
|
2021-11-06 03:02:43 -04:00
|
|
|
zkhandler.write([(("node.state.daemon", config["node_hostname"]), "stop")])
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Forcibly terminate dnsmasq because it gets stuck sometimes
|
2021-11-06 03:02:43 -04:00
|
|
|
common.run_os_command("killall dnsmasq")
|
2019-05-23 23:18:43 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Close the Zookeeper connection
|
|
|
|
try:
|
|
|
|
zkhandler.disconnect(persistent=True)
|
|
|
|
del zkhandler
|
|
|
|
except Exception:
|
|
|
|
pass
|
2019-06-19 14:52:47 -04:00
|
|
|
|
2021-11-06 03:02:43 -04:00
|
|
|
logger.out("Terminated pvc daemon", state="s")
|
2021-08-21 02:46:11 -04:00
|
|
|
logger.terminate()
|
2020-04-08 21:58:19 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
if failure:
|
|
|
|
retcode = 1
|
|
|
|
else:
|
|
|
|
retcode = 0
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
os._exit(retcode)
|
2018-10-22 20:20:27 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Termination function
|
2021-11-06 03:02:43 -04:00
|
|
|
def term(signum="", frame=""):
|
2021-08-21 02:46:11 -04:00
|
|
|
cleanup(failure=False)
|
2020-04-12 03:49:29 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Hangup (logrotate) function
|
2021-11-06 03:02:43 -04:00
|
|
|
def hup(signum="", frame=""):
|
|
|
|
if config["file_logging"]:
|
2021-08-21 02:46:11 -04:00
|
|
|
logger.hup()
|
2020-04-12 03:49:29 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Handle signals gracefully
|
|
|
|
signal.signal(signal.SIGTERM, term)
|
|
|
|
signal.signal(signal.SIGINT, term)
|
|
|
|
signal.signal(signal.SIGQUIT, term)
|
|
|
|
signal.signal(signal.SIGHUP, hup)
|
2018-10-22 20:20:27 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Set up this node in Zookeeper
|
|
|
|
pvcnoded.util.zookeeper.setup_node(logger, config, zkhandler)
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Check that the primary node key exists and create it with us as primary if not
|
2018-10-14 02:01:35 -04:00
|
|
|
try:
|
2021-11-06 03:02:43 -04:00
|
|
|
current_primary = zkhandler.read("base.config.primary_node")
|
2020-11-06 18:55:10 -05:00
|
|
|
except Exception:
|
2021-11-06 03:02:43 -04:00
|
|
|
current_primary = "none"
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2021-11-06 03:02:43 -04:00
|
|
|
if current_primary and current_primary != "none":
|
|
|
|
logger.out(
|
|
|
|
f"Current primary node is {logger.fmt_blue}{current_primary}{logger.fmt_end}",
|
|
|
|
state="i",
|
|
|
|
)
|
2021-07-19 12:39:13 -04:00
|
|
|
else:
|
2021-11-06 03:02:43 -04:00
|
|
|
if config["daemon_mode"] == "coordinator":
|
|
|
|
logger.out("No primary node found; setting us as primary", state="i")
|
|
|
|
zkhandler.write([("base.config.primary_node", config["node_hostname"])])
|
2020-08-13 14:38:05 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Ensure that IPMI is reachable and working
|
2021-11-06 03:02:43 -04:00
|
|
|
if not pvcnoded.util.fencing.verify_ipmi(
|
|
|
|
config["ipmi_hostname"], config["ipmi_username"], config["ipmi_password"]
|
|
|
|
):
|
|
|
|
logger.out(
|
2023-09-16 22:41:58 -04:00
|
|
|
"Our IPMI interface is not reachable; fencing of this node will fail until corrected",
|
2021-11-06 03:02:43 -04:00
|
|
|
state="w",
|
|
|
|
)
|
2023-09-16 22:41:58 -04:00
|
|
|
else:
|
|
|
|
logger.out(
|
|
|
|
"Our IPMI interface is reachable; fencing of this node is possible",
|
|
|
|
state="o",
|
|
|
|
)
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Validate libvirt
|
|
|
|
if not pvcnoded.util.libvirt.validate_libvirtd(logger, config):
|
2021-07-19 12:39:13 -04:00
|
|
|
cleanup(failure=True)
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Set up NFT
|
|
|
|
pvcnoded.util.networking.create_nft_configuration(logger, config)
|
|
|
|
|
|
|
|
# Create our object dictionaries
|
2021-11-06 03:02:43 -04:00
|
|
|
logger.out("Setting up objects", state="s")
|
2021-08-21 02:46:11 -04:00
|
|
|
|
|
|
|
d_node = dict()
|
|
|
|
node_list = list()
|
|
|
|
d_network = dict()
|
|
|
|
network_list = list()
|
|
|
|
sriov_pf_list = list()
|
|
|
|
d_sriov_vf = dict()
|
|
|
|
sriov_vf_list = list()
|
|
|
|
d_domain = dict()
|
|
|
|
domain_list = list()
|
|
|
|
d_osd = dict()
|
|
|
|
osd_list = list()
|
|
|
|
d_pool = dict()
|
|
|
|
pool_list = list()
|
|
|
|
d_volume = dict()
|
|
|
|
volume_list = dict()
|
|
|
|
|
2021-11-06 03:02:43 -04:00
|
|
|
if config["enable_networking"] and config["daemon_mode"] == "coordinator":
|
2021-08-21 02:46:11 -04:00
|
|
|
# Create an instance of the DNS Aggregator and Metadata API if we're a coordinator
|
2021-05-31 19:53:29 -04:00
|
|
|
dns_aggregator = DNSAggregatorInstance.DNSAggregatorInstance(config, logger)
|
2021-11-06 03:02:43 -04:00
|
|
|
metadata_api = MetadataAPIInstance.MetadataAPIInstance(
|
|
|
|
zkhandler, config, logger
|
|
|
|
)
|
2019-03-11 01:44:26 -04:00
|
|
|
else:
|
|
|
|
dns_aggregator = None
|
2019-12-14 15:55:30 -05:00
|
|
|
metadata_api = None
|
2018-10-15 21:09:40 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
#
|
|
|
|
# Zookeeper watchers for objects
|
|
|
|
#
|
2020-11-07 14:45:24 -05:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Node objects
|
2021-11-06 03:02:43 -04:00
|
|
|
@zkhandler.zk_conn.ChildrenWatch(zkhandler.schema.path("base.node"))
|
2021-08-21 02:46:11 -04:00
|
|
|
def set_nodes(new_node_list):
|
|
|
|
nonlocal d_node, node_list
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Add missing nodes to list
|
|
|
|
for node in [node for node in new_node_list if node not in node_list]:
|
2021-11-06 03:02:43 -04:00
|
|
|
d_node[node] = NodeInstance.NodeInstance(
|
|
|
|
node,
|
|
|
|
config["node_hostname"],
|
|
|
|
zkhandler,
|
|
|
|
config,
|
|
|
|
logger,
|
|
|
|
d_node,
|
|
|
|
d_network,
|
|
|
|
d_domain,
|
|
|
|
dns_aggregator,
|
|
|
|
metadata_api,
|
|
|
|
)
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Remove deleted nodes from list
|
|
|
|
for node in [node for node in node_list if node not in new_node_list]:
|
2021-11-06 03:02:43 -04:00
|
|
|
del d_node[node]
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
node_list = new_node_list
|
2021-11-06 03:02:43 -04:00
|
|
|
logger.out(
|
|
|
|
f'{logger.fmt_blue}Node list:{logger.fmt_end} {" ".join(node_list)}',
|
|
|
|
state="i",
|
|
|
|
)
|
2018-10-14 02:01:35 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Update node objects lists
|
|
|
|
for node in d_node:
|
|
|
|
d_node[node].update_node_list(d_node)
|
2020-11-07 14:45:24 -05:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Create helpful alias for this node
|
2021-11-06 03:02:43 -04:00
|
|
|
this_node = d_node[config["node_hostname"]]
|
2020-01-09 10:53:27 -05:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Maintenance status
|
2021-11-06 03:02:43 -04:00
|
|
|
@zkhandler.zk_conn.DataWatch(zkhandler.schema.path("base.config.maintenance"))
|
2021-08-21 02:46:11 -04:00
|
|
|
def update_maintenance(_maintenance, stat):
|
|
|
|
try:
|
2021-11-06 03:02:43 -04:00
|
|
|
maintenance = bool(strtobool(_maintenance.decode("ascii")))
|
2021-08-21 02:46:11 -04:00
|
|
|
except Exception:
|
|
|
|
maintenance = False
|
2020-11-07 14:45:24 -05:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
this_node.maintenance = maintenance
|
2018-10-21 22:08:23 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Primary node
|
2021-11-06 03:02:43 -04:00
|
|
|
@zkhandler.zk_conn.DataWatch(zkhandler.schema.path("base.config.primary_node"))
|
|
|
|
def update_primary_node(new_primary, stat, event=""):
|
2021-08-21 02:46:11 -04:00
|
|
|
try:
|
2021-11-06 03:02:43 -04:00
|
|
|
new_primary = new_primary.decode("ascii")
|
2021-08-21 02:46:11 -04:00
|
|
|
except AttributeError:
|
2021-11-06 03:02:43 -04:00
|
|
|
new_primary = "none"
|
2021-08-21 02:46:11 -04:00
|
|
|
key_version = stat.version
|
|
|
|
|
|
|
|
# TODO: Move this to the Node structure
|
|
|
|
if new_primary != this_node.primary_node:
|
2021-11-06 03:02:43 -04:00
|
|
|
if config["daemon_mode"] == "coordinator":
|
2021-08-21 02:46:11 -04:00
|
|
|
# We're a coordinator and there's no primary
|
2021-11-06 03:02:43 -04:00
|
|
|
if new_primary == "none":
|
|
|
|
if (
|
|
|
|
this_node.daemon_state == "run"
|
2023-09-15 16:47:56 -04:00
|
|
|
and this_node.coordinator_state
|
2021-11-06 03:02:43 -04:00
|
|
|
not in ["primary", "takeover", "relinquish"]
|
|
|
|
):
|
|
|
|
logger.out(
|
|
|
|
"Contending for primary coordinator state", state="i"
|
|
|
|
)
|
2021-08-21 02:46:11 -04:00
|
|
|
# Acquire an exclusive lock on the primary_node key
|
2021-11-06 03:02:43 -04:00
|
|
|
primary_lock = zkhandler.exclusivelock(
|
|
|
|
"base.config.primary_node"
|
|
|
|
)
|
2021-08-21 02:46:11 -04:00
|
|
|
try:
|
|
|
|
# This lock times out after 0.4s, which is 0.1s less than the pre-takeover
|
|
|
|
# timeout beow. This ensures a primary takeover will not deadlock against
|
|
|
|
# a node which has failed the contention
|
|
|
|
primary_lock.acquire(timeout=0.4)
|
|
|
|
# Ensure that when we get the lock the versions are still consistent and
|
|
|
|
# that another node hasn't already acquired the primary state (maybe we're
|
|
|
|
# extremely slow to respond)
|
2021-11-06 03:02:43 -04:00
|
|
|
if (
|
|
|
|
key_version
|
|
|
|
== zkhandler.zk_conn.get(
|
|
|
|
zkhandler.schema.path("base.config.primary_node")
|
|
|
|
)[1].version
|
|
|
|
):
|
2021-08-21 02:46:11 -04:00
|
|
|
# Set the primary to us
|
2021-11-06 03:02:43 -04:00
|
|
|
logger.out(
|
|
|
|
"Acquiring primary coordinator state", state="o"
|
|
|
|
)
|
|
|
|
zkhandler.write(
|
|
|
|
[
|
|
|
|
(
|
|
|
|
"base.config.primary_node",
|
|
|
|
config["node_hostname"],
|
|
|
|
)
|
|
|
|
]
|
|
|
|
)
|
2021-08-21 02:46:11 -04:00
|
|
|
# Cleanly release the lock
|
|
|
|
primary_lock.release()
|
|
|
|
# We timed out acquiring a lock, or failed to write, which means we failed the
|
|
|
|
# contention and should just log that
|
|
|
|
except Exception:
|
2021-11-06 03:02:43 -04:00
|
|
|
logger.out(
|
|
|
|
"Timed out contending for primary coordinator state",
|
|
|
|
state="i",
|
|
|
|
)
|
|
|
|
elif new_primary == config["node_hostname"]:
|
2023-09-15 16:47:56 -04:00
|
|
|
if this_node.coordinator_state == "secondary":
|
2021-08-21 02:46:11 -04:00
|
|
|
# Wait for 0.5s to ensure other contentions time out, then take over
|
|
|
|
sleep(0.5)
|
2021-11-06 03:02:43 -04:00
|
|
|
zkhandler.write(
|
|
|
|
[
|
|
|
|
(
|
|
|
|
("node.state.router", config["node_hostname"]),
|
|
|
|
"takeover",
|
|
|
|
)
|
|
|
|
]
|
|
|
|
)
|
2021-08-21 02:46:11 -04:00
|
|
|
else:
|
2023-09-15 16:47:56 -04:00
|
|
|
if this_node.coordinator_state == "primary":
|
2021-08-21 02:46:11 -04:00
|
|
|
# Wait for 0.5s to ensure other contentions time out, then relinquish
|
|
|
|
sleep(0.5)
|
2021-11-06 03:02:43 -04:00
|
|
|
zkhandler.write(
|
|
|
|
[
|
|
|
|
(
|
|
|
|
("node.state.router", config["node_hostname"]),
|
|
|
|
"relinquish",
|
|
|
|
)
|
|
|
|
]
|
|
|
|
)
|
2018-10-21 22:08:23 -04:00
|
|
|
else:
|
2021-11-06 03:02:43 -04:00
|
|
|
zkhandler.write(
|
|
|
|
[(("node.state.router", config["node_hostname"]), "client")]
|
|
|
|
)
|
2018-10-21 22:08:23 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# TODO: Turn this into a function like the others for clarity
|
|
|
|
for node in d_node:
|
|
|
|
d_node[node].primary_node = new_primary
|
2020-11-07 13:17:49 -05:00
|
|
|
|
2021-11-06 03:02:43 -04:00
|
|
|
if config["enable_networking"]:
|
2021-08-21 02:46:11 -04:00
|
|
|
# Network objects
|
2021-11-06 03:02:43 -04:00
|
|
|
@zkhandler.zk_conn.ChildrenWatch(zkhandler.schema.path("base.network"))
|
2021-08-21 02:46:11 -04:00
|
|
|
def update_networks(new_network_list):
|
|
|
|
nonlocal network_list, d_network
|
2019-03-11 01:44:26 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Add any missing networks to the list
|
2021-11-06 03:02:43 -04:00
|
|
|
for network in [
|
|
|
|
network for network in new_network_list if network not in network_list
|
|
|
|
]:
|
|
|
|
d_network[network] = VXNetworkInstance.VXNetworkInstance(
|
|
|
|
network, zkhandler, config, logger, this_node, dns_aggregator
|
|
|
|
)
|
2021-08-21 02:46:11 -04:00
|
|
|
# TODO: Move this to the Network structure
|
2021-11-06 03:02:43 -04:00
|
|
|
if (
|
|
|
|
config["daemon_mode"] == "coordinator"
|
|
|
|
and d_network[network].nettype == "managed"
|
|
|
|
):
|
2019-08-07 11:46:58 -04:00
|
|
|
try:
|
|
|
|
dns_aggregator.add_network(d_network[network])
|
|
|
|
except Exception as e:
|
2021-11-06 03:02:43 -04:00
|
|
|
logger.out(
|
|
|
|
f"Failed to create DNS Aggregator for network {network}: {e}",
|
|
|
|
state="w",
|
|
|
|
)
|
2019-03-11 01:44:26 -04:00
|
|
|
# Start primary functionality
|
2021-11-06 03:02:43 -04:00
|
|
|
if (
|
2023-09-15 16:47:56 -04:00
|
|
|
this_node.coordinator_state == "primary"
|
2021-11-06 03:02:43 -04:00
|
|
|
and d_network[network].nettype == "managed"
|
|
|
|
):
|
2019-03-11 01:44:26 -04:00
|
|
|
d_network[network].createGateways()
|
|
|
|
d_network[network].startDHCPServer()
|
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Remove any missing networks from the list
|
2021-11-06 03:02:43 -04:00
|
|
|
for network in [
|
|
|
|
network for network in network_list if network not in new_network_list
|
|
|
|
]:
|
2021-08-21 02:46:11 -04:00
|
|
|
# TODO: Move this to the Network structure
|
2021-11-06 03:02:43 -04:00
|
|
|
if d_network[network].nettype == "managed":
|
2019-03-15 11:28:49 -04:00
|
|
|
# Stop primary functionality
|
2023-09-15 16:47:56 -04:00
|
|
|
if this_node.coordinator_state == "primary":
|
2019-03-15 11:28:49 -04:00
|
|
|
d_network[network].stopDHCPServer()
|
|
|
|
d_network[network].removeGateways()
|
|
|
|
dns_aggregator.remove_network(d_network[network])
|
2021-08-21 02:46:11 -04:00
|
|
|
# Stop firewalling
|
2019-03-15 11:28:49 -04:00
|
|
|
d_network[network].removeFirewall()
|
2021-08-21 02:46:11 -04:00
|
|
|
# Delete the network
|
2019-03-11 01:44:26 -04:00
|
|
|
d_network[network].removeNetwork()
|
2021-11-06 03:02:43 -04:00
|
|
|
del d_network[network]
|
2019-03-11 01:44:26 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Update the new list
|
|
|
|
network_list = new_network_list
|
2021-11-06 03:02:43 -04:00
|
|
|
logger.out(
|
|
|
|
f'{logger.fmt_blue}Network list:{logger.fmt_end} {" ".join(network_list)}',
|
|
|
|
state="i",
|
|
|
|
)
|
2021-08-21 02:46:11 -04:00
|
|
|
|
|
|
|
# Update node objects list
|
|
|
|
for node in d_node:
|
|
|
|
d_node[node].update_network_list(d_network)
|
|
|
|
|
|
|
|
# Add the SR-IOV PFs and VFs to Zookeeper
|
|
|
|
# These do not behave like the objects; they are not dynamic (the API cannot change them), and they
|
|
|
|
# exist for the lifetime of this Node instance. The objects are set here in Zookeeper on a per-node
|
|
|
|
# basis, under the Node configuration tree.
|
|
|
|
# MIGRATION: The schema.schema.get ensures that the current active Schema contains the required keys
|
2021-11-06 03:02:43 -04:00
|
|
|
if (
|
|
|
|
config["enable_sriov"]
|
|
|
|
and zkhandler.schema.schema.get("sriov_pf", None) is not None
|
|
|
|
):
|
2021-08-21 02:46:11 -04:00
|
|
|
vf_list = list()
|
2021-11-06 03:02:43 -04:00
|
|
|
for device in config["sriov_device"]:
|
|
|
|
pf = device["phy"]
|
|
|
|
vfcount = device["vfcount"]
|
|
|
|
if device.get("mtu", None) is None:
|
2021-08-21 02:46:11 -04:00
|
|
|
mtu = 1500
|
|
|
|
else:
|
2021-11-06 03:02:43 -04:00
|
|
|
mtu = device["mtu"]
|
2019-03-11 01:44:26 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Create the PF device in Zookeeper
|
2021-11-06 03:02:43 -04:00
|
|
|
zkhandler.write(
|
|
|
|
[
|
|
|
|
(
|
|
|
|
("node.sriov.pf", config["node_hostname"], "sriov_pf", pf),
|
|
|
|
"",
|
|
|
|
),
|
|
|
|
(
|
|
|
|
(
|
|
|
|
"node.sriov.pf",
|
|
|
|
config["node_hostname"],
|
|
|
|
"sriov_pf.mtu",
|
|
|
|
pf,
|
|
|
|
),
|
|
|
|
mtu,
|
|
|
|
),
|
|
|
|
(
|
|
|
|
(
|
|
|
|
"node.sriov.pf",
|
|
|
|
config["node_hostname"],
|
|
|
|
"sriov_pf.vfcount",
|
|
|
|
pf,
|
|
|
|
),
|
|
|
|
vfcount,
|
|
|
|
),
|
|
|
|
]
|
|
|
|
)
|
2021-08-21 02:46:11 -04:00
|
|
|
# Append the device to the list of PFs
|
|
|
|
sriov_pf_list.append(pf)
|
|
|
|
|
|
|
|
# Get the list of VFs from `ip link show`
|
2021-11-06 03:02:43 -04:00
|
|
|
vf_list = json.loads(
|
|
|
|
common.run_os_command(f"ip --json link show {pf}")[1]
|
|
|
|
)[0].get("vfinfo_list", [])
|
2021-08-21 02:46:11 -04:00
|
|
|
for vf in vf_list:
|
|
|
|
# {
|
|
|
|
# 'vf': 3,
|
|
|
|
# 'link_type': 'ether',
|
|
|
|
# 'address': '00:00:00:00:00:00',
|
|
|
|
# 'broadcast': 'ff:ff:ff:ff:ff:ff',
|
|
|
|
# 'vlan_list': [{'vlan': 101, 'qos': 2}],
|
|
|
|
# 'rate': {'max_tx': 0, 'min_tx': 0},
|
|
|
|
# 'spoofchk': True,
|
|
|
|
# 'link_state': 'auto',
|
|
|
|
# 'trust': False,
|
|
|
|
# 'query_rss_en': False
|
|
|
|
# }
|
|
|
|
vfphy = f'{pf}v{vf["vf"]}'
|
|
|
|
|
|
|
|
# Get the PCIe bus information
|
|
|
|
dev_pcie_path = None
|
|
|
|
try:
|
2021-11-06 03:02:43 -04:00
|
|
|
with open(f"/sys/class/net/{vfphy}/device/uevent") as vfh:
|
2021-08-21 02:46:11 -04:00
|
|
|
dev_uevent = vfh.readlines()
|
|
|
|
for line in dev_uevent:
|
2021-11-06 03:02:43 -04:00
|
|
|
if re.match(r"^PCI_SLOT_NAME=.*", line):
|
|
|
|
dev_pcie_path = line.rstrip().split("=")[-1]
|
2021-08-21 02:46:11 -04:00
|
|
|
except FileNotFoundError:
|
|
|
|
# Something must already be using the PCIe device
|
|
|
|
pass
|
Implement SR-IOV PF and VF instances
Adds support for the node daemon managing SR-IOV PF and VF instances.
PFs are added to Zookeeper automatically based on the config at startup
during network configuration, and are otherwise completely static. PFs
are automatically removed from Zookeeper, along with all coresponding
VFs, should the PF phy device be removed from the configuration.
VFs are configured based on the (autocreated) VFs of each PF device,
added to Zookeeper, and then a new class instance, SRIOVVFInstance, is
used to watch them for configuration changes. This will enable the
runtime management of VF settings by the API. The set of keys ensures
that both configuration and details of the NIC can be tracked.
Most keys are self-explanatory, especially for PFs and the basic keys
for VFs. The configuration tree is also self-explanatory, being based
entirely on the options available in the `ip link set {dev} vf` command.
Two additional keys are also present: `used` and `used_by`, which will
be able to track the (boolean) state of usage, as well as the VM that
uses a given VIF. Since the VM side implementation will support both
macvtap and direct "hostdev" assignments, this will ensure that this
state can be tracked on both the VF and the VM side.
2021-06-17 01:01:23 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Add the VF to Zookeeper if it does not yet exist
|
2021-11-06 03:02:43 -04:00
|
|
|
if not zkhandler.exists(
|
|
|
|
("node.sriov.vf", config["node_hostname"], "sriov_vf", vfphy)
|
|
|
|
):
|
2021-08-21 02:46:11 -04:00
|
|
|
if dev_pcie_path is not None:
|
2021-11-06 03:02:43 -04:00
|
|
|
pcie_domain, pcie_bus, pcie_slot, pcie_function = re.split(
|
|
|
|
r":|\.", dev_pcie_path
|
|
|
|
)
|
2021-08-21 02:46:11 -04:00
|
|
|
else:
|
|
|
|
# We can't add the device - for some reason we can't get any information on its PCIe bus path,
|
|
|
|
# so just ignore this one, and continue.
|
|
|
|
# This shouldn't happen under any real circumstances, unless the admin tries to attach a non-existent
|
|
|
|
# VF to a VM manually, then goes ahead and adds that VF to the system with the VM running.
|
|
|
|
continue
|
|
|
|
|
2021-11-06 03:02:43 -04:00
|
|
|
zkhandler.write(
|
|
|
|
[
|
|
|
|
(
|
|
|
|
(
|
|
|
|
"node.sriov.vf",
|
|
|
|
config["node_hostname"],
|
|
|
|
"sriov_vf",
|
|
|
|
vfphy,
|
|
|
|
),
|
|
|
|
"",
|
|
|
|
),
|
|
|
|
(
|
|
|
|
(
|
|
|
|
"node.sriov.vf",
|
|
|
|
config["node_hostname"],
|
|
|
|
"sriov_vf.pf",
|
|
|
|
vfphy,
|
|
|
|
),
|
|
|
|
pf,
|
|
|
|
),
|
|
|
|
(
|
|
|
|
(
|
|
|
|
"node.sriov.vf",
|
|
|
|
config["node_hostname"],
|
|
|
|
"sriov_vf.mtu",
|
|
|
|
vfphy,
|
|
|
|
),
|
|
|
|
mtu,
|
|
|
|
),
|
|
|
|
(
|
|
|
|
(
|
|
|
|
"node.sriov.vf",
|
|
|
|
config["node_hostname"],
|
|
|
|
"sriov_vf.mac",
|
|
|
|
vfphy,
|
|
|
|
),
|
|
|
|
vf["address"],
|
|
|
|
),
|
|
|
|
(
|
|
|
|
(
|
|
|
|
"node.sriov.vf",
|
|
|
|
config["node_hostname"],
|
|
|
|
"sriov_vf.phy_mac",
|
|
|
|
vfphy,
|
|
|
|
),
|
|
|
|
vf["address"],
|
|
|
|
),
|
|
|
|
(
|
|
|
|
(
|
|
|
|
"node.sriov.vf",
|
|
|
|
config["node_hostname"],
|
|
|
|
"sriov_vf.config",
|
|
|
|
vfphy,
|
|
|
|
),
|
|
|
|
"",
|
|
|
|
),
|
|
|
|
(
|
|
|
|
(
|
|
|
|
"node.sriov.vf",
|
|
|
|
config["node_hostname"],
|
|
|
|
"sriov_vf.config.vlan_id",
|
|
|
|
vfphy,
|
|
|
|
),
|
|
|
|
vf["vlan_list"][0].get("vlan", "0"),
|
|
|
|
),
|
|
|
|
(
|
|
|
|
(
|
|
|
|
"node.sriov.vf",
|
|
|
|
config["node_hostname"],
|
|
|
|
"sriov_vf.config.vlan_qos",
|
|
|
|
vfphy,
|
|
|
|
),
|
|
|
|
vf["vlan_list"][0].get("qos", "0"),
|
|
|
|
),
|
|
|
|
(
|
|
|
|
(
|
|
|
|
"node.sriov.vf",
|
|
|
|
config["node_hostname"],
|
|
|
|
"sriov_vf.config.tx_rate_min",
|
|
|
|
vfphy,
|
|
|
|
),
|
|
|
|
vf["rate"]["min_tx"],
|
|
|
|
),
|
|
|
|
(
|
|
|
|
(
|
|
|
|
"node.sriov.vf",
|
|
|
|
config["node_hostname"],
|
|
|
|
"sriov_vf.config.tx_rate_max",
|
|
|
|
vfphy,
|
|
|
|
),
|
|
|
|
vf["rate"]["max_tx"],
|
|
|
|
),
|
|
|
|
(
|
|
|
|
(
|
|
|
|
"node.sriov.vf",
|
|
|
|
config["node_hostname"],
|
|
|
|
"sriov_vf.config.spoof_check",
|
|
|
|
vfphy,
|
|
|
|
),
|
|
|
|
vf["spoofchk"],
|
|
|
|
),
|
|
|
|
(
|
|
|
|
(
|
|
|
|
"node.sriov.vf",
|
|
|
|
config["node_hostname"],
|
|
|
|
"sriov_vf.config.link_state",
|
|
|
|
vfphy,
|
|
|
|
),
|
|
|
|
vf["link_state"],
|
|
|
|
),
|
|
|
|
(
|
|
|
|
(
|
|
|
|
"node.sriov.vf",
|
|
|
|
config["node_hostname"],
|
|
|
|
"sriov_vf.config.trust",
|
|
|
|
vfphy,
|
|
|
|
),
|
|
|
|
vf["trust"],
|
|
|
|
),
|
|
|
|
(
|
|
|
|
(
|
|
|
|
"node.sriov.vf",
|
|
|
|
config["node_hostname"],
|
|
|
|
"sriov_vf.config.query_rss",
|
|
|
|
vfphy,
|
|
|
|
),
|
|
|
|
vf["query_rss_en"],
|
|
|
|
),
|
|
|
|
(
|
|
|
|
(
|
|
|
|
"node.sriov.vf",
|
|
|
|
config["node_hostname"],
|
|
|
|
"sriov_vf.pci",
|
|
|
|
vfphy,
|
|
|
|
),
|
|
|
|
"",
|
|
|
|
),
|
|
|
|
(
|
|
|
|
(
|
|
|
|
"node.sriov.vf",
|
|
|
|
config["node_hostname"],
|
|
|
|
"sriov_vf.pci.domain",
|
|
|
|
vfphy,
|
|
|
|
),
|
|
|
|
pcie_domain,
|
|
|
|
),
|
|
|
|
(
|
|
|
|
(
|
|
|
|
"node.sriov.vf",
|
|
|
|
config["node_hostname"],
|
|
|
|
"sriov_vf.pci.bus",
|
|
|
|
vfphy,
|
|
|
|
),
|
|
|
|
pcie_bus,
|
|
|
|
),
|
|
|
|
(
|
|
|
|
(
|
|
|
|
"node.sriov.vf",
|
|
|
|
config["node_hostname"],
|
|
|
|
"sriov_vf.pci.slot",
|
|
|
|
vfphy,
|
|
|
|
),
|
|
|
|
pcie_slot,
|
|
|
|
),
|
|
|
|
(
|
|
|
|
(
|
|
|
|
"node.sriov.vf",
|
|
|
|
config["node_hostname"],
|
|
|
|
"sriov_vf.pci.function",
|
|
|
|
vfphy,
|
|
|
|
),
|
|
|
|
pcie_function,
|
|
|
|
),
|
|
|
|
(
|
|
|
|
(
|
|
|
|
"node.sriov.vf",
|
|
|
|
config["node_hostname"],
|
|
|
|
"sriov_vf.used",
|
|
|
|
vfphy,
|
|
|
|
),
|
|
|
|
False,
|
|
|
|
),
|
|
|
|
(
|
|
|
|
(
|
|
|
|
"node.sriov.vf",
|
|
|
|
config["node_hostname"],
|
|
|
|
"sriov_vf.used_by",
|
|
|
|
vfphy,
|
|
|
|
),
|
|
|
|
"",
|
|
|
|
),
|
|
|
|
]
|
|
|
|
)
|
2021-08-21 02:46:11 -04:00
|
|
|
|
|
|
|
# Append the device to the list of VFs
|
|
|
|
sriov_vf_list.append(vfphy)
|
|
|
|
|
|
|
|
# Remove any obsolete PFs from Zookeeper if they go away
|
2021-11-06 03:02:43 -04:00
|
|
|
for pf in zkhandler.children(("node.sriov.pf", config["node_hostname"])):
|
2021-08-21 02:46:11 -04:00
|
|
|
if pf not in sriov_pf_list:
|
2021-11-06 03:02:43 -04:00
|
|
|
zkhandler.delete(
|
|
|
|
[("node.sriov.pf", config["node_hostname"], "sriov_pf", pf)]
|
|
|
|
)
|
2021-08-21 02:46:11 -04:00
|
|
|
# Remove any obsolete VFs from Zookeeper if their PF goes away
|
2021-11-06 03:02:43 -04:00
|
|
|
for vf in zkhandler.children(("node.sriov.vf", config["node_hostname"])):
|
|
|
|
vf_pf = zkhandler.read(
|
|
|
|
("node.sriov.vf", config["node_hostname"], "sriov_vf.pf", vf)
|
|
|
|
)
|
2021-08-21 02:46:11 -04:00
|
|
|
if vf_pf not in sriov_pf_list:
|
2021-11-06 03:02:43 -04:00
|
|
|
zkhandler.delete(
|
|
|
|
[("node.sriov.vf", config["node_hostname"], "sriov_vf", vf)]
|
|
|
|
)
|
2021-06-21 20:49:45 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# SR-IOV VF objects
|
|
|
|
# This is a ChildrenWatch just for consistency; the list never changes at runtime
|
2021-11-06 03:02:43 -04:00
|
|
|
@zkhandler.zk_conn.ChildrenWatch(
|
|
|
|
zkhandler.schema.path("node.sriov.vf", config["node_hostname"])
|
|
|
|
)
|
2021-08-21 02:46:11 -04:00
|
|
|
def update_sriov_vfs(new_sriov_vf_list):
|
|
|
|
nonlocal sriov_vf_list, d_sriov_vf
|
Implement SR-IOV PF and VF instances
Adds support for the node daemon managing SR-IOV PF and VF instances.
PFs are added to Zookeeper automatically based on the config at startup
during network configuration, and are otherwise completely static. PFs
are automatically removed from Zookeeper, along with all coresponding
VFs, should the PF phy device be removed from the configuration.
VFs are configured based on the (autocreated) VFs of each PF device,
added to Zookeeper, and then a new class instance, SRIOVVFInstance, is
used to watch them for configuration changes. This will enable the
runtime management of VF settings by the API. The set of keys ensures
that both configuration and details of the NIC can be tracked.
Most keys are self-explanatory, especially for PFs and the basic keys
for VFs. The configuration tree is also self-explanatory, being based
entirely on the options available in the `ip link set {dev} vf` command.
Two additional keys are also present: `used` and `used_by`, which will
be able to track the (boolean) state of usage, as well as the VM that
uses a given VIF. Since the VM side implementation will support both
macvtap and direct "hostdev" assignments, this will ensure that this
state can be tracked on both the VF and the VM side.
2021-06-17 01:01:23 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Add VFs to the list
|
|
|
|
for vf in common.sortInterfaceNames(new_sriov_vf_list):
|
2021-11-06 03:02:43 -04:00
|
|
|
d_sriov_vf[vf] = SRIOVVFInstance.SRIOVVFInstance(
|
|
|
|
vf, zkhandler, config, logger, this_node
|
|
|
|
)
|
2021-08-21 02:46:11 -04:00
|
|
|
|
|
|
|
sriov_vf_list = sorted(new_sriov_vf_list)
|
2021-11-06 03:02:43 -04:00
|
|
|
logger.out(
|
|
|
|
f'{logger.fmt_blue}SR-IOV VF list:{logger.fmt_end} {" ".join(sriov_vf_list)}',
|
|
|
|
state="i",
|
|
|
|
)
|
Implement SR-IOV PF and VF instances
Adds support for the node daemon managing SR-IOV PF and VF instances.
PFs are added to Zookeeper automatically based on the config at startup
during network configuration, and are otherwise completely static. PFs
are automatically removed from Zookeeper, along with all coresponding
VFs, should the PF phy device be removed from the configuration.
VFs are configured based on the (autocreated) VFs of each PF device,
added to Zookeeper, and then a new class instance, SRIOVVFInstance, is
used to watch them for configuration changes. This will enable the
runtime management of VF settings by the API. The set of keys ensures
that both configuration and details of the NIC can be tracked.
Most keys are self-explanatory, especially for PFs and the basic keys
for VFs. The configuration tree is also self-explanatory, being based
entirely on the options available in the `ip link set {dev} vf` command.
Two additional keys are also present: `used` and `used_by`, which will
be able to track the (boolean) state of usage, as well as the VM that
uses a given VIF. Since the VM side implementation will support both
macvtap and direct "hostdev" assignments, this will ensure that this
state can be tracked on both the VF and the VM side.
2021-06-17 01:01:23 -04:00
|
|
|
|
2021-11-06 03:02:43 -04:00
|
|
|
if config["enable_hypervisor"]:
|
2021-08-21 02:46:11 -04:00
|
|
|
# VM command pipeline key
|
2021-11-06 03:02:43 -04:00
|
|
|
@zkhandler.zk_conn.DataWatch(zkhandler.schema.path("base.cmd.domain"))
|
|
|
|
def run_domain_command(data, stat, event=""):
|
2021-08-21 02:46:11 -04:00
|
|
|
if data:
|
2021-11-06 03:02:43 -04:00
|
|
|
VMInstance.vm_command(
|
|
|
|
zkhandler, logger, this_node, data.decode("ascii")
|
|
|
|
)
|
2021-08-21 02:46:11 -04:00
|
|
|
|
|
|
|
# VM domain objects
|
2021-11-06 03:02:43 -04:00
|
|
|
@zkhandler.zk_conn.ChildrenWatch(zkhandler.schema.path("base.domain"))
|
2021-08-21 02:46:11 -04:00
|
|
|
def update_domains(new_domain_list):
|
|
|
|
nonlocal domain_list, d_domain
|
|
|
|
|
|
|
|
# Add missing domains to the list
|
2021-11-06 03:02:43 -04:00
|
|
|
for domain in [
|
|
|
|
domain for domain in new_domain_list if domain not in domain_list
|
|
|
|
]:
|
|
|
|
d_domain[domain] = VMInstance.VMInstance(
|
|
|
|
domain, zkhandler, config, logger, this_node
|
|
|
|
)
|
2019-03-11 01:44:26 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Remove any deleted domains from the list
|
2021-11-06 03:02:43 -04:00
|
|
|
for domain in [
|
|
|
|
domain for domain in domain_list if domain not in new_domain_list
|
|
|
|
]:
|
|
|
|
del d_domain[domain]
|
2019-03-11 01:44:26 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Update the new list
|
|
|
|
domain_list = new_domain_list
|
2021-11-06 03:02:43 -04:00
|
|
|
logger.out(
|
|
|
|
f'{logger.fmt_blue}Domain list:{logger.fmt_end} {" ".join(domain_list)}',
|
|
|
|
state="i",
|
|
|
|
)
|
2019-03-11 01:44:26 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Update node objects' list
|
|
|
|
for node in d_node:
|
|
|
|
d_node[node].update_domain_list(d_domain)
|
|
|
|
|
2021-11-06 03:02:43 -04:00
|
|
|
if config["enable_storage"]:
|
2021-08-21 02:46:11 -04:00
|
|
|
# Ceph command pipeline key
|
2021-11-06 03:02:43 -04:00
|
|
|
@zkhandler.zk_conn.DataWatch(zkhandler.schema.path("base.cmd.ceph"))
|
|
|
|
def run_ceph_command(data, stat, event=""):
|
2021-08-21 02:46:11 -04:00
|
|
|
if data:
|
2021-11-06 03:02:43 -04:00
|
|
|
CephInstance.ceph_command(
|
|
|
|
zkhandler, logger, this_node, data.decode("ascii"), d_osd
|
|
|
|
)
|
2021-08-21 02:46:11 -04:00
|
|
|
|
|
|
|
# OSD objects
|
2021-11-06 03:02:43 -04:00
|
|
|
@zkhandler.zk_conn.ChildrenWatch(zkhandler.schema.path("base.osd"))
|
2021-08-21 02:46:11 -04:00
|
|
|
def update_osds(new_osd_list):
|
|
|
|
nonlocal osd_list, d_osd
|
|
|
|
|
|
|
|
# Add any missing OSDs to the list
|
|
|
|
for osd in [osd for osd in new_osd_list if osd not in osd_list]:
|
2022-05-02 12:11:32 -04:00
|
|
|
d_osd[osd] = CephInstance.CephOSDInstance(
|
|
|
|
zkhandler, logger, this_node, osd
|
|
|
|
)
|
2019-03-11 01:44:26 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Remove any deleted OSDs from the list
|
|
|
|
for osd in [osd for osd in osd_list if osd not in new_osd_list]:
|
2021-11-06 03:02:43 -04:00
|
|
|
del d_osd[osd]
|
2019-03-11 01:44:26 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Update the new list
|
|
|
|
osd_list = new_osd_list
|
2021-11-06 03:02:43 -04:00
|
|
|
logger.out(
|
|
|
|
f'{logger.fmt_blue}OSD list:{logger.fmt_end} {" ".join(osd_list)}',
|
|
|
|
state="i",
|
|
|
|
)
|
2019-03-11 01:44:26 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Pool objects
|
2021-11-06 03:02:43 -04:00
|
|
|
@zkhandler.zk_conn.ChildrenWatch(zkhandler.schema.path("base.pool"))
|
2021-08-21 02:46:11 -04:00
|
|
|
def update_pools(new_pool_list):
|
|
|
|
nonlocal pool_list, d_pool, volume_list, d_volume
|
2019-03-11 01:44:26 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Add any missing pools to the list
|
|
|
|
for pool in [pool for pool in new_pool_list if pool not in pool_list]:
|
2022-05-02 12:11:32 -04:00
|
|
|
d_pool[pool] = CephInstance.CephPoolInstance(
|
|
|
|
zkhandler, logger, this_node, pool
|
|
|
|
)
|
2021-08-21 02:46:11 -04:00
|
|
|
# Prepare the volume components for this pool
|
|
|
|
volume_list[pool] = list()
|
2019-06-19 10:25:22 -04:00
|
|
|
d_volume[pool] = dict()
|
2019-03-11 01:44:26 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Remove any deleted pools from the list
|
|
|
|
for pool in [pool for pool in pool_list if pool not in new_pool_list]:
|
2021-11-06 03:02:43 -04:00
|
|
|
del d_pool[pool]
|
2018-10-31 23:38:17 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Update the new list
|
|
|
|
pool_list = new_pool_list
|
2021-11-06 03:02:43 -04:00
|
|
|
logger.out(
|
|
|
|
f'{logger.fmt_blue}Pool list:{logger.fmt_end} {" ".join(pool_list)}',
|
|
|
|
state="i",
|
|
|
|
)
|
2018-10-31 23:38:17 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Volume objects (in each pool)
|
|
|
|
for pool in pool_list:
|
2021-11-06 03:02:43 -04:00
|
|
|
|
|
|
|
@zkhandler.zk_conn.ChildrenWatch(zkhandler.schema.path("volume", pool))
|
2021-08-21 02:46:11 -04:00
|
|
|
def update_volumes(new_volume_list):
|
|
|
|
nonlocal volume_list, d_volume
|
2019-06-25 22:31:04 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Add any missing volumes to the list
|
2021-11-06 03:02:43 -04:00
|
|
|
for volume in [
|
|
|
|
volume
|
|
|
|
for volume in new_volume_list
|
|
|
|
if volume not in volume_list[pool]
|
|
|
|
]:
|
|
|
|
d_volume[pool][volume] = CephInstance.CephVolumeInstance(
|
2022-05-02 12:11:32 -04:00
|
|
|
zkhandler, logger, this_node, pool, volume
|
2021-11-06 03:02:43 -04:00
|
|
|
)
|
2019-06-25 22:31:04 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Remove any deleted volumes from the list
|
2021-11-06 03:02:43 -04:00
|
|
|
for volume in [
|
|
|
|
volume
|
|
|
|
for volume in volume_list[pool]
|
|
|
|
if volume not in new_volume_list
|
|
|
|
]:
|
|
|
|
del d_volume[pool][volume]
|
2019-06-19 10:25:22 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Update the new list
|
|
|
|
volume_list[pool] = new_volume_list
|
2021-11-06 03:02:43 -04:00
|
|
|
logger.out(
|
|
|
|
f'{logger.fmt_blue}Volume list [{pool}]:{logger.fmt_end} {" ".join(volume_list[pool])}',
|
|
|
|
state="i",
|
|
|
|
)
|
2020-11-07 14:45:24 -05:00
|
|
|
|
2023-09-15 22:47:09 -04:00
|
|
|
# Set up the node monitoring instance and thread
|
2023-02-13 03:06:06 -05:00
|
|
|
monitoring_instance = MonitoringInstance.MonitoringInstance(
|
|
|
|
zkhandler, config, logger, this_node
|
|
|
|
)
|
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Start keepalived thread
|
2021-11-06 03:02:43 -04:00
|
|
|
keepalive_timer = pvcnoded.util.keepalive.start_keepalive_timer(
|
2023-09-15 22:47:09 -04:00
|
|
|
logger, config, zkhandler, this_node
|
2021-11-06 03:02:43 -04:00
|
|
|
)
|
2020-06-06 13:23:24 -04:00
|
|
|
|
2021-08-21 02:46:11 -04:00
|
|
|
# Tick loop; does nothing since everything is async
|
|
|
|
while True:
|
2020-06-06 13:23:24 -04:00
|
|
|
try:
|
2021-08-21 02:46:11 -04:00
|
|
|
sleep(1)
|
2020-11-06 18:55:10 -05:00
|
|
|
except Exception:
|
2021-08-21 02:46:11 -04:00
|
|
|
break
|