Reformat code with Black code formatter

Unify the code style along PEP and Black principles using the tool.
This commit is contained in:
Joshua Boniface 2021-11-06 03:02:43 -04:00
parent 3779bc960e
commit c41664d2da
47 changed files with 15547 additions and 10151 deletions

View File

@ -2,6 +2,7 @@
<img alt="Logo banner" src="docs/images/pvc_logo_black.png"/>
<br/><br/>
<a href="https://github.com/parallelvirtualcluster/pvc"><img alt="License" src="https://img.shields.io/github/license/parallelvirtualcluster/pvc"/></a>
<a href="https://github.com/psf/black"><img alt="Code style: Black" src="https://img.shields.io/badge/code%20style-black-000000.svg"/></a>
<a href="https://github.com/parallelvirtualcluster/pvc/releases"><img alt="Release" src="https://img.shields.io/github/release-pre/parallelvirtualcluster/pvc"/></a>
<a href="https://parallelvirtualcluster.readthedocs.io/en/latest/?badge=latest"><img alt="Documentation Status" src="https://readthedocs.org/projects/parallelvirtualcluster/badge/?version=latest"/></a>
</p>

View File

@ -64,29 +64,35 @@ def install(**kwargs):
# The provisioner has already mounted the disks on kwargs['temporary_directory'].
# by this point, so we can get right to running the debootstrap after setting
# some nicer variable names; you don't necessarily have to do this.
vm_name = kwargs['vm_name']
temporary_directory = kwargs['temporary_directory']
disks = kwargs['disks']
networks = kwargs['networks']
vm_name = kwargs["vm_name"]
temporary_directory = kwargs["temporary_directory"]
disks = kwargs["disks"]
networks = kwargs["networks"]
# Our own required arguments. We should, though are not required to, handle
# failures of these gracefully, should administrators forget to specify them.
try:
deb_release = kwargs['deb_release']
deb_release = kwargs["deb_release"]
except Exception:
deb_release = "stable"
try:
deb_mirror = kwargs['deb_mirror']
deb_mirror = kwargs["deb_mirror"]
except Exception:
deb_mirror = "http://ftp.debian.org/debian"
try:
deb_packages = kwargs['deb_packages'].split(',')
deb_packages = kwargs["deb_packages"].split(",")
except Exception:
deb_packages = ["linux-image-amd64", "grub-pc", "cloud-init", "python3-cffi-backend", "wget"]
deb_packages = [
"linux-image-amd64",
"grub-pc",
"cloud-init",
"python3-cffi-backend",
"wget",
]
# We need to know our root disk
root_disk = None
for disk in disks:
if disk['mountpoint'] == '/':
if disk["mountpoint"] == "/":
root_disk = disk
if not root_disk:
return
@ -95,9 +101,7 @@ def install(**kwargs):
# good idea to include if you plan to use anything that is not part of the
# base Debian host system, just in case the provisioner host is not properly
# configured already.
os.system(
"apt-get install -y debootstrap"
)
os.system("apt-get install -y debootstrap")
# Perform a deboostrap installation
os.system(
@ -105,16 +109,12 @@ def install(**kwargs):
suite=deb_release,
target=temporary_directory,
mirror=deb_mirror,
pkgs=','.join(deb_packages)
pkgs=",".join(deb_packages),
)
)
# Bind mount the devfs
os.system(
"mount --bind /dev {}/dev".format(
temporary_directory
)
)
os.system("mount --bind /dev {}/dev".format(temporary_directory))
# Create an fstab entry for each disk
fstab_file = "{}/etc/fstab".format(temporary_directory)
@ -130,11 +130,11 @@ def install(**kwargs):
options = "defaults,discard,noatime,nodiratime"
# The root, var, and log volumes have specific values
if disk['mountpoint'] == "/":
root_disk['scsi_id'] = disk_id
if disk["mountpoint"] == "/":
root_disk["scsi_id"] = disk_id
dump = 0
cpass = 1
elif disk['mountpoint'] == '/var' or disk['mountpoint'] == '/var/log':
elif disk["mountpoint"] == "/var" or disk["mountpoint"] == "/var/log":
dump = 0
cpass = 2
else:
@ -142,14 +142,14 @@ def install(**kwargs):
cpass = 0
# Append the fstab line
with open(fstab_file, 'a') as fh:
with open(fstab_file, "a") as fh:
data = "/dev/disk/by-id/scsi-0QEMU_QEMU_HARDDISK_drive-scsi0-0-0-{disk} {mountpoint} {filesystem} {options} {dump} {cpass}\n".format(
disk=disk_id,
mountpoint=disk['mountpoint'],
filesystem=disk['filesystem'],
mountpoint=disk["mountpoint"],
filesystem=disk["filesystem"],
options=options,
dump=dump,
cpass=cpass
cpass=cpass,
)
fh.write(data)
@ -158,12 +158,14 @@ def install(**kwargs):
# Write the hostname
hostname_file = "{}/etc/hostname".format(temporary_directory)
with open(hostname_file, 'w') as fh:
with open(hostname_file, "w") as fh:
fh.write("{}".format(vm_name))
# Fix the cloud-init.target since it's broken
cloudinit_target_file = "{}/etc/systemd/system/cloud-init.target".format(temporary_directory)
with open(cloudinit_target_file, 'w') as fh:
cloudinit_target_file = "{}/etc/systemd/system/cloud-init.target".format(
temporary_directory
)
with open(cloudinit_target_file, "w") as fh:
data = """[Install]
WantedBy=multi-user.target
[Unit]
@ -176,7 +178,7 @@ After=multi-user.target
# will always be on PCI bus ID 2, hence the name "ens2".
# Write a DHCP stanza for ens2
ens2_network_file = "{}/etc/network/interfaces.d/ens2".format(temporary_directory)
with open(ens2_network_file, 'w') as fh:
with open(ens2_network_file, "w") as fh:
data = """auto ens2
iface ens2 inet dhcp
"""
@ -184,25 +186,31 @@ iface ens2 inet dhcp
# Write the DHCP config for ens2
dhclient_file = "{}/etc/dhcp/dhclient.conf".format(temporary_directory)
with open(dhclient_file, 'w') as fh:
data = """# DHCP client configuration
with open(dhclient_file, "w") as fh:
data = (
"""# DHCP client configuration
# Written by the PVC provisioner
option rfc3442-classless-static-routes code 121 = array of unsigned integer 8;
interface "ens2" {
""" + """ send fqdn.fqdn = "{hostname}";
"""
+ """ send fqdn.fqdn = "{hostname}";
send host-name = "{hostname}";
""".format(hostname=vm_name) + """ request subnet-mask, broadcast-address, time-offset, routers,
""".format(
hostname=vm_name
)
+ """ request subnet-mask, broadcast-address, time-offset, routers,
domain-name, domain-name-servers, domain-search, host-name,
dhcp6.name-servers, dhcp6.domain-search, dhcp6.fqdn, dhcp6.sntp-servers,
netbios-name-servers, netbios-scope, interface-mtu,
rfc3442-classless-static-routes, ntp-servers;
}
"""
)
fh.write(data)
# Write the GRUB configuration
grubcfg_file = "{}/etc/default/grub".format(temporary_directory)
with open(grubcfg_file, 'w') as fh:
with open(grubcfg_file, "w") as fh:
data = """# Written by the PVC provisioner
GRUB_DEFAULT=0
GRUB_TIMEOUT=1
@ -212,35 +220,29 @@ GRUB_CMDLINE_LINUX=""
GRUB_TERMINAL=console
GRUB_SERIAL_COMMAND="serial --speed=115200 --unit=0 --word=8 --parity=no --stop=1"
GRUB_DISABLE_LINUX_UUID=false
""".format(root_disk=root_disk['scsi_id'])
""".format(
root_disk=root_disk["scsi_id"]
)
fh.write(data)
# Chroot, do some in-root tasks, then exit the chroot
with chroot_target(temporary_directory):
# Install and update GRUB
os.system(
"grub-install --force /dev/rbd/{}/{}_{}".format(root_disk['pool'], vm_name, root_disk['disk_id'])
"grub-install --force /dev/rbd/{}/{}_{}".format(
root_disk["pool"], vm_name, root_disk["disk_id"]
)
os.system(
"update-grub"
)
os.system("update-grub")
# Set a really dumb root password [TEMPORARY]
os.system(
"echo root:test123 | chpasswd"
)
os.system("echo root:test123 | chpasswd")
# Enable cloud-init target on (first) boot
# NOTE: Your user-data should handle this and disable it once done, or things get messy.
# That cloud-init won't run without this hack seems like a bug... but even the official
# Debian cloud images are affected, so who knows.
os.system(
"systemctl enable cloud-init.target"
)
os.system("systemctl enable cloud-init.target")
# Unmount the bound devfs
os.system(
"umount {}/dev".format(
temporary_directory
)
)
os.system("umount {}/dev".format(temporary_directory))
# Everything else is done via cloud-init user-data

View File

@ -35,9 +35,9 @@ def install(**kwargs):
# The provisioner has already mounted the disks on kwargs['temporary_directory'].
# by this point, so we can get right to running the debootstrap after setting
# some nicer variable names; you don't necessarily have to do this.
vm_name = kwargs['vm_name']
temporary_directory = kwargs['temporary_directory']
disks = kwargs['disks']
networks = kwargs['networks']
vm_name = kwargs["vm_name"]
temporary_directory = kwargs["temporary_directory"]
disks = kwargs["disks"]
networks = kwargs["networks"]
# No operation - this script just returns
pass

View File

@ -28,7 +28,7 @@ from pvcapid.models import * # noqa F401,F403
migrate = Migrate(app, db)
manager = Manager(app)
manager.add_command('db', MigrateCommand)
manager.add_command("db", MigrateCommand)
if __name__ == '__main__':
if __name__ == "__main__":
manager.run()

View File

@ -25,7 +25,7 @@ import yaml
from distutils.util import strtobool as dustrtobool
# Daemon version
version = '0.9.42'
version = "0.9.42"
# API version
API_VERSION = 1.0
@ -35,6 +35,7 @@ API_VERSION = 1.0
# Helper Functions
##########################################################
def strtobool(stringv):
if stringv is None:
return False
@ -52,54 +53,64 @@ def strtobool(stringv):
# Parse the configuration file
try:
pvcapid_config_file = os.environ['PVC_CONFIG_FILE']
pvcapid_config_file = os.environ["PVC_CONFIG_FILE"]
except Exception:
print('Error: The "PVC_CONFIG_FILE" environment variable must be set before starting pvcapid.')
print(
'Error: The "PVC_CONFIG_FILE" environment variable must be set before starting pvcapid.'
)
exit(1)
print('Loading configuration from file "{}"'.format(pvcapid_config_file))
# Read in the config
try:
with open(pvcapid_config_file, 'r') as cfgfile:
with open(pvcapid_config_file, "r") as cfgfile:
o_config = yaml.load(cfgfile, Loader=yaml.BaseLoader)
except Exception as e:
print('ERROR: Failed to parse configuration file: {}'.format(e))
print("ERROR: Failed to parse configuration file: {}".format(e))
exit(1)
try:
# Create the config object
config = {
'debug': strtobool(o_config['pvc']['debug']),
'coordinators': o_config['pvc']['coordinators'],
'listen_address': o_config['pvc']['api']['listen_address'],
'listen_port': int(o_config['pvc']['api']['listen_port']),
'auth_enabled': strtobool(o_config['pvc']['api']['authentication']['enabled']),
'auth_secret_key': o_config['pvc']['api']['authentication']['secret_key'],
'auth_tokens': o_config['pvc']['api']['authentication']['tokens'],
'ssl_enabled': strtobool(o_config['pvc']['api']['ssl']['enabled']),
'ssl_key_file': o_config['pvc']['api']['ssl']['key_file'],
'ssl_cert_file': o_config['pvc']['api']['ssl']['cert_file'],
'database_host': o_config['pvc']['provisioner']['database']['host'],
'database_port': int(o_config['pvc']['provisioner']['database']['port']),
'database_name': o_config['pvc']['provisioner']['database']['name'],
'database_user': o_config['pvc']['provisioner']['database']['user'],
'database_password': o_config['pvc']['provisioner']['database']['pass'],
'queue_host': o_config['pvc']['provisioner']['queue']['host'],
'queue_port': o_config['pvc']['provisioner']['queue']['port'],
'queue_path': o_config['pvc']['provisioner']['queue']['path'],
'storage_hosts': o_config['pvc']['provisioner']['ceph_cluster']['storage_hosts'],
'storage_domain': o_config['pvc']['provisioner']['ceph_cluster']['storage_domain'],
'ceph_monitor_port': o_config['pvc']['provisioner']['ceph_cluster']['ceph_monitor_port'],
'ceph_storage_secret_uuid': o_config['pvc']['provisioner']['ceph_cluster']['ceph_storage_secret_uuid']
"debug": strtobool(o_config["pvc"]["debug"]),
"coordinators": o_config["pvc"]["coordinators"],
"listen_address": o_config["pvc"]["api"]["listen_address"],
"listen_port": int(o_config["pvc"]["api"]["listen_port"]),
"auth_enabled": strtobool(o_config["pvc"]["api"]["authentication"]["enabled"]),
"auth_secret_key": o_config["pvc"]["api"]["authentication"]["secret_key"],
"auth_tokens": o_config["pvc"]["api"]["authentication"]["tokens"],
"ssl_enabled": strtobool(o_config["pvc"]["api"]["ssl"]["enabled"]),
"ssl_key_file": o_config["pvc"]["api"]["ssl"]["key_file"],
"ssl_cert_file": o_config["pvc"]["api"]["ssl"]["cert_file"],
"database_host": o_config["pvc"]["provisioner"]["database"]["host"],
"database_port": int(o_config["pvc"]["provisioner"]["database"]["port"]),
"database_name": o_config["pvc"]["provisioner"]["database"]["name"],
"database_user": o_config["pvc"]["provisioner"]["database"]["user"],
"database_password": o_config["pvc"]["provisioner"]["database"]["pass"],
"queue_host": o_config["pvc"]["provisioner"]["queue"]["host"],
"queue_port": o_config["pvc"]["provisioner"]["queue"]["port"],
"queue_path": o_config["pvc"]["provisioner"]["queue"]["path"],
"storage_hosts": o_config["pvc"]["provisioner"]["ceph_cluster"][
"storage_hosts"
],
"storage_domain": o_config["pvc"]["provisioner"]["ceph_cluster"][
"storage_domain"
],
"ceph_monitor_port": o_config["pvc"]["provisioner"]["ceph_cluster"][
"ceph_monitor_port"
],
"ceph_storage_secret_uuid": o_config["pvc"]["provisioner"]["ceph_cluster"][
"ceph_storage_secret_uuid"
],
}
# Use coordinators as storage hosts if not explicitly specified
if not config['storage_hosts']:
config['storage_hosts'] = config['coordinators']
if not config["storage_hosts"]:
config["storage_hosts"] = config["coordinators"]
except Exception as e:
print('ERROR: Failed to load configuration: {}'.format(e))
print("ERROR: Failed to load configuration: {}".format(e))
exit(1)
@ -107,31 +118,41 @@ except Exception as e:
# Entrypoint
##########################################################
def entrypoint():
import pvcapid.flaskapi as pvc_api # noqa: E402
if config['ssl_enabled']:
context = (config['ssl_cert_file'], config['ssl_key_file'])
if config["ssl_enabled"]:
context = (config["ssl_cert_file"], config["ssl_key_file"])
else:
context = None
# Print our startup messages
print('')
print('|----------------------------------------------------------|')
print('| |')
print('| ███████████ ▜█▙ ▟█▛ █████ █ █ █ |')
print('| ██ ▜█▙ ▟█▛ ██ |')
print('| ███████████ ▜█▙ ▟█▛ ██ |')
print('| ██ ▜█▙▟█▛ ███████████ |')
print('| |')
print('|----------------------------------------------------------|')
print('| Parallel Virtual Cluster API daemon v{0: <19} |'.format(version))
print('| Debug: {0: <49} |'.format(str(config['debug'])))
print('| API version: v{0: <42} |'.format(API_VERSION))
print('| Listen: {0: <48} |'.format('{}:{}'.format(config['listen_address'], config['listen_port'])))
print('| SSL: {0: <51} |'.format(str(config['ssl_enabled'])))
print('| Authentication: {0: <40} |'.format(str(config['auth_enabled'])))
print('|----------------------------------------------------------|')
print('')
print("")
print("|----------------------------------------------------------|")
print("| |")
print("| ███████████ ▜█▙ ▟█▛ █████ █ █ █ |")
print("| ██ ▜█▙ ▟█▛ ██ |")
print("| ███████████ ▜█▙ ▟█▛ ██ |")
print("| ██ ▜█▙▟█▛ ███████████ |")
print("| |")
print("|----------------------------------------------------------|")
print("| Parallel Virtual Cluster API daemon v{0: <19} |".format(version))
print("| Debug: {0: <49} |".format(str(config["debug"])))
print("| API version: v{0: <42} |".format(API_VERSION))
print(
"| Listen: {0: <48} |".format(
"{}:{}".format(config["listen_address"], config["listen_port"])
)
)
print("| SSL: {0: <51} |".format(str(config["ssl_enabled"])))
print("| Authentication: {0: <40} |".format(str(config["auth_enabled"])))
print("|----------------------------------------------------------|")
print("")
pvc_api.app.run(config['listen_address'], config['listen_port'], threaded=True, ssl_context=context)
pvc_api.app.run(
config["listen_address"],
config["listen_port"],
threaded=True,
ssl_context=context,
)

View File

@ -39,7 +39,10 @@ class BenchmarkError(Exception):
"""
An exception that results from the Benchmark job.
"""
def __init__(self, message, job_name=None, db_conn=None, db_cur=None, zkhandler=None):
def __init__(
self, message, job_name=None, db_conn=None, db_cur=None, zkhandler=None
):
self.message = message
if job_name is not None:
# Clean up our dangling result
@ -54,6 +57,7 @@ class BenchmarkError(Exception):
def __str__(self):
return str(self.message)
#
# Common functions
#
@ -62,11 +66,11 @@ class BenchmarkError(Exception):
# Database connections
def open_database(config):
conn = psycopg2.connect(
host=config['database_host'],
port=config['database_port'],
dbname=config['database_name'],
user=config['database_user'],
password=config['database_password']
host=config["database_host"],
port=config["database_port"],
dbname=config["database_name"],
user=config["database_user"],
password=config["database_password"],
)
cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
return conn, cur
@ -81,10 +85,10 @@ def close_database(conn, cur, failed=False):
def list_benchmarks(job=None):
if job is not None:
query = "SELECT * FROM {} WHERE job = %s;".format('storage_benchmarks')
args = (job, )
query = "SELECT * FROM {} WHERE job = %s;".format("storage_benchmarks")
args = (job,)
else:
query = "SELECT * FROM {} ORDER BY id DESC;".format('storage_benchmarks')
query = "SELECT * FROM {} ORDER BY id DESC;".format("storage_benchmarks")
args = ()
conn, cur = open_database(config)
@ -93,23 +97,23 @@ def list_benchmarks(job=None):
data = list()
for benchmark in orig_data:
benchmark_data = dict()
benchmark_data['id'] = benchmark['id']
benchmark_data['job'] = benchmark['job']
benchmark_data['test_format'] = benchmark['test_format']
if benchmark['result'] == 'Running':
benchmark_data['benchmark_result'] = 'Running'
benchmark_data["id"] = benchmark["id"]
benchmark_data["job"] = benchmark["job"]
benchmark_data["test_format"] = benchmark["test_format"]
if benchmark["result"] == "Running":
benchmark_data["benchmark_result"] = "Running"
else:
try:
benchmark_data['benchmark_result'] = loads(benchmark['result'])
benchmark_data["benchmark_result"] = loads(benchmark["result"])
except Exception:
benchmark_data['benchmark_result'] = {}
benchmark_data["benchmark_result"] = {}
# Append the new data to our actual output structure
data.append(benchmark_data)
close_database(conn, cur)
if data:
return data, 200
else:
return {'message': 'No benchmark found.'}, 404
return {"message": "No benchmark found."}, 404
def run_benchmark(self, pool):
@ -126,46 +130,68 @@ def run_benchmark(self, pool):
try:
db_conn, db_cur = open_database(config)
except Exception:
print('FATAL - failed to connect to Postgres')
print("FATAL - failed to connect to Postgres")
raise Exception
try:
zkhandler = ZKHandler(config)
zkhandler.connect()
except Exception:
print('FATAL - failed to connect to Zookeeper')
print("FATAL - failed to connect to Zookeeper")
raise Exception
cur_time = datetime.now().isoformat(timespec='seconds')
cur_primary = zkhandler.read('base.config.primary_node')
job_name = '{}_{}'.format(cur_time, cur_primary)
cur_time = datetime.now().isoformat(timespec="seconds")
cur_primary = zkhandler.read("base.config.primary_node")
job_name = "{}_{}".format(cur_time, cur_primary)
print("Starting storage benchmark '{}' on pool '{}'".format(job_name, pool))
print("Storing running status for job '{}' in database".format(job_name))
try:
query = "INSERT INTO storage_benchmarks (job, test_format, result) VALUES (%s, %s, %s);"
args = (job_name, TEST_FORMAT, "Running",)
args = (
job_name,
TEST_FORMAT,
"Running",
)
db_cur.execute(query, args)
db_conn.commit()
except Exception as e:
raise BenchmarkError("Failed to store running status: {}".format(e), job_name=job_name, db_conn=db_conn, db_cur=db_cur, zkhandler=zkhandler)
raise BenchmarkError(
"Failed to store running status: {}".format(e),
job_name=job_name,
db_conn=db_conn,
db_cur=db_cur,
zkhandler=zkhandler,
)
# Phase 1 - volume preparation
self.update_state(state='RUNNING', meta={'current': 1, 'total': 3, 'status': 'Creating benchmark volume'})
self.update_state(
state="RUNNING",
meta={"current": 1, "total": 3, "status": "Creating benchmark volume"},
)
time.sleep(1)
volume = 'pvcbenchmark'
volume = "pvcbenchmark"
# Create the RBD volume
retcode, retmsg = pvc_ceph.add_volume(zkhandler, pool, volume, "8G")
if not retcode:
raise BenchmarkError('Failed to create volume "{}": {}'.format(volume, retmsg), job_name=job_name, db_conn=db_conn, db_cur=db_cur, zkhandler=zkhandler)
raise BenchmarkError(
'Failed to create volume "{}": {}'.format(volume, retmsg),
job_name=job_name,
db_conn=db_conn,
db_cur=db_cur,
zkhandler=zkhandler,
)
else:
print(retmsg)
# Phase 2 - benchmark run
self.update_state(state='RUNNING', meta={'current': 2, 'total': 3, 'status': 'Running fio benchmarks on volume'})
self.update_state(
state="RUNNING",
meta={"current": 2, "total": 3, "status": "Running fio benchmarks on volume"},
)
time.sleep(1)
# We run a total of 8 tests, to give a generalized idea of performance on the cluster:
@ -180,53 +206,43 @@ def run_benchmark(self, pool):
# Taken together, these 8 results should give a very good indication of the overall storage performance
# for a variety of workloads.
test_matrix = {
'seq_read': {
'direction': 'read',
'iodepth': '64',
'bs': '4M',
'rw': 'read'
"seq_read": {"direction": "read", "iodepth": "64", "bs": "4M", "rw": "read"},
"seq_write": {"direction": "write", "iodepth": "64", "bs": "4M", "rw": "write"},
"rand_read_4M": {
"direction": "read",
"iodepth": "64",
"bs": "4M",
"rw": "randread",
},
'seq_write': {
'direction': 'write',
'iodepth': '64',
'bs': '4M',
'rw': 'write'
"rand_write_4M": {
"direction": "write",
"iodepth": "64",
"bs": "4M",
"rw": "randwrite",
},
'rand_read_4M': {
'direction': 'read',
'iodepth': '64',
'bs': '4M',
'rw': 'randread'
"rand_read_4K": {
"direction": "read",
"iodepth": "64",
"bs": "4K",
"rw": "randread",
},
'rand_write_4M': {
'direction': 'write',
'iodepth': '64',
'bs': '4M',
'rw': 'randwrite'
"rand_write_4K": {
"direction": "write",
"iodepth": "64",
"bs": "4K",
"rw": "randwrite",
},
'rand_read_4K': {
'direction': 'read',
'iodepth': '64',
'bs': '4K',
'rw': 'randread'
"rand_read_4K_lowdepth": {
"direction": "read",
"iodepth": "1",
"bs": "4K",
"rw": "randread",
},
'rand_write_4K': {
'direction': 'write',
'iodepth': '64',
'bs': '4K',
'rw': 'randwrite'
},
'rand_read_4K_lowdepth': {
'direction': 'read',
'iodepth': '1',
'bs': '4K',
'rw': 'randread'
},
'rand_write_4K_lowdepth': {
'direction': 'write',
'iodepth': '1',
'bs': '4K',
'rw': 'randwrite'
"rand_write_4K_lowdepth": {
"direction": "write",
"iodepth": "1",
"bs": "4K",
"rw": "randwrite",
},
}
@ -253,25 +269,41 @@ def run_benchmark(self, pool):
test=test,
pool=pool,
volume=volume,
iodepth=test_matrix[test]['iodepth'],
bs=test_matrix[test]['bs'],
rw=test_matrix[test]['rw'])
iodepth=test_matrix[test]["iodepth"],
bs=test_matrix[test]["bs"],
rw=test_matrix[test]["rw"],
)
print("Running fio job: {}".format(' '.join(fio_cmd.split())))
print("Running fio job: {}".format(" ".join(fio_cmd.split())))
retcode, stdout, stderr = pvc_common.run_os_command(fio_cmd)
if retcode:
raise BenchmarkError("Failed to run fio test: {}".format(stderr), job_name=job_name, db_conn=db_conn, db_cur=db_cur, zkhandler=zkhandler)
raise BenchmarkError(
"Failed to run fio test: {}".format(stderr),
job_name=job_name,
db_conn=db_conn,
db_cur=db_cur,
zkhandler=zkhandler,
)
results[test] = loads(stdout)
# Phase 3 - cleanup
self.update_state(state='RUNNING', meta={'current': 3, 'total': 3, 'status': 'Cleaning up and storing results'})
self.update_state(
state="RUNNING",
meta={"current": 3, "total": 3, "status": "Cleaning up and storing results"},
)
time.sleep(1)
# Remove the RBD volume
retcode, retmsg = pvc_ceph.remove_volume(zkhandler, pool, volume)
if not retcode:
raise BenchmarkError('Failed to remove volume "{}": {}'.format(volume, retmsg), job_name=job_name, db_conn=db_conn, db_cur=db_cur, zkhandler=zkhandler)
raise BenchmarkError(
'Failed to remove volume "{}": {}'.format(volume, retmsg),
job_name=job_name,
db_conn=db_conn,
db_cur=db_cur,
zkhandler=zkhandler,
)
else:
print(retmsg)
@ -282,10 +314,20 @@ def run_benchmark(self, pool):
db_cur.execute(query, args)
db_conn.commit()
except Exception as e:
raise BenchmarkError("Failed to store test results: {}".format(e), job_name=job_name, db_conn=db_conn, db_cur=db_cur, zkhandler=zkhandler)
raise BenchmarkError(
"Failed to store test results: {}".format(e),
job_name=job_name,
db_conn=db_conn,
db_cur=db_cur,
zkhandler=zkhandler,
)
close_database(db_conn, db_cur)
zkhandler.disconnect()
del zkhandler
return {'status': "Storage benchmark '{}' completed successfully.", 'current': 3, 'total': 3}
return {
"status": "Storage benchmark '{}' completed successfully.",
"current": 3,
"total": 3,
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -23,7 +23,7 @@ from pvcapid.flaskapi import db
class DBSystemTemplate(db.Model):
__tablename__ = 'system_template'
__tablename__ = "system_template"
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.Text, nullable=False, unique=True)
@ -38,7 +38,20 @@ class DBSystemTemplate(db.Model):
migration_method = db.Column(db.Text)
ova = db.Column(db.Integer, db.ForeignKey("ova.id"), nullable=True)
def __init__(self, name, vcpu_count, vram_mb, serial, vnc, vnc_bind, node_limit, node_selector, node_autostart, migration_method, ova=None):
def __init__(
self,
name,
vcpu_count,
vram_mb,
serial,
vnc,
vnc_bind,
node_limit,
node_selector,
node_autostart,
migration_method,
ova=None,
):
self.name = name
self.vcpu_count = vcpu_count
self.vram_mb = vram_mb
@ -52,11 +65,11 @@ class DBSystemTemplate(db.Model):
self.ova = ova
def __repr__(self):
return '<id {}>'.format(self.id)
return "<id {}>".format(self.id)
class DBNetworkTemplate(db.Model):
__tablename__ = 'network_template'
__tablename__ = "network_template"
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.Text, nullable=False, unique=True)
@ -69,14 +82,16 @@ class DBNetworkTemplate(db.Model):
self.ova = ova
def __repr__(self):
return '<id {}>'.format(self.id)
return "<id {}>".format(self.id)
class DBNetworkElement(db.Model):
__tablename__ = 'network'
__tablename__ = "network"
id = db.Column(db.Integer, primary_key=True)
network_template = db.Column(db.Integer, db.ForeignKey("network_template.id"), nullable=False)
network_template = db.Column(
db.Integer, db.ForeignKey("network_template.id"), nullable=False
)
vni = db.Column(db.Text, nullable=False)
def __init__(self, network_template, vni):
@ -84,11 +99,11 @@ class DBNetworkElement(db.Model):
self.vni = vni
def __repr__(self):
return '<id {}>'.format(self.id)
return "<id {}>".format(self.id)
class DBStorageTemplate(db.Model):
__tablename__ = 'storage_template'
__tablename__ = "storage_template"
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.Text, nullable=False, unique=True)
@ -99,14 +114,16 @@ class DBStorageTemplate(db.Model):
self.ova = ova
def __repr__(self):
return '<id {}>'.format(self.id)
return "<id {}>".format(self.id)
class DBStorageElement(db.Model):
__tablename__ = 'storage'
__tablename__ = "storage"
id = db.Column(db.Integer, primary_key=True)
storage_template = db.Column(db.Integer, db.ForeignKey("storage_template.id"), nullable=False)
storage_template = db.Column(
db.Integer, db.ForeignKey("storage_template.id"), nullable=False
)
pool = db.Column(db.Text, nullable=False)
disk_id = db.Column(db.Text, nullable=False)
source_volume = db.Column(db.Text)
@ -115,7 +132,17 @@ class DBStorageElement(db.Model):
filesystem = db.Column(db.Text)
filesystem_args = db.Column(db.Text)
def __init__(self, storage_template, pool, disk_id, source_volume, disk_size_gb, mountpoint, filesystem, filesystem_args):
def __init__(
self,
storage_template,
pool,
disk_id,
source_volume,
disk_size_gb,
mountpoint,
filesystem,
filesystem_args,
):
self.storage_template = storage_template
self.pool = pool
self.disk_id = disk_id
@ -126,11 +153,11 @@ class DBStorageElement(db.Model):
self.filesystem_args = filesystem_args
def __repr__(self):
return '<id {}>'.format(self.id)
return "<id {}>".format(self.id)
class DBUserdata(db.Model):
__tablename__ = 'userdata'
__tablename__ = "userdata"
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.Text, nullable=False, unique=True)
@ -141,11 +168,11 @@ class DBUserdata(db.Model):
self.userdata = userdata
def __repr__(self):
return '<id {}>'.format(self.id)
return "<id {}>".format(self.id)
class DBScript(db.Model):
__tablename__ = 'script'
__tablename__ = "script"
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.Text, nullable=False, unique=True)
@ -156,11 +183,11 @@ class DBScript(db.Model):
self.script = script
def __repr__(self):
return '<id {}>'.format(self.id)
return "<id {}>".format(self.id)
class DBOva(db.Model):
__tablename__ = 'ova'
__tablename__ = "ova"
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.Text, nullable=False, unique=True)
@ -171,11 +198,11 @@ class DBOva(db.Model):
self.ovf = ovf
def __repr__(self):
return '<id {}>'.format(self.id)
return "<id {}>".format(self.id)
class DBOvaVolume(db.Model):
__tablename__ = 'ova_volume'
__tablename__ = "ova_volume"
id = db.Column(db.Integer, primary_key=True)
ova = db.Column(db.Integer, db.ForeignKey("ova.id"), nullable=False)
@ -194,11 +221,11 @@ class DBOvaVolume(db.Model):
self.disk_size_gb = disk_size_gb
def __repr__(self):
return '<id {}>'.format(self.id)
return "<id {}>".format(self.id)
class DBProfile(db.Model):
__tablename__ = 'profile'
__tablename__ = "profile"
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.Text, nullable=False, unique=True)
@ -211,7 +238,18 @@ class DBProfile(db.Model):
ova = db.Column(db.Integer, db.ForeignKey("ova.id"))
arguments = db.Column(db.Text)
def __init__(self, name, profile_type, system_template, network_template, storage_template, userdata, script, ova, arguments):
def __init__(
self,
name,
profile_type,
system_template,
network_template,
storage_template,
userdata,
script,
ova,
arguments,
):
self.name = name
self.profile_type = profile_type
self.system_template = system_template
@ -223,15 +261,15 @@ class DBProfile(db.Model):
self.arguments = arguments
def __repr__(self):
return '<id {}>'.format(self.id)
return "<id {}>".format(self.id)
class DBStorageBenchmarks(db.Model):
__tablename__ = 'storage_benchmarks'
__tablename__ = "storage_benchmarks"
id = db.Column(db.Integer, primary_key=True)
job = db.Column(db.Text, nullable=False)
test_format = db.Column(db.Integer, nullable=False, default=0, server_default='0')
test_format = db.Column(db.Integer, nullable=False, default=0, server_default="0")
result = db.Column(db.Text, nullable=False)
def __init__(self, job, result, test_format):
@ -240,4 +278,4 @@ class DBStorageBenchmarks(db.Model):
self.test_format = test_format
def __repr__(self):
return '<id {}>'.format(self.id)
return "<id {}>".format(self.id)

View File

@ -47,11 +47,11 @@ import pvcapid.provisioner as provisioner
# Database connections
def open_database(config):
conn = psycopg2.connect(
host=config['database_host'],
port=config['database_port'],
dbname=config['database_name'],
user=config['database_user'],
password=config['database_password']
host=config["database_host"],
port=config["database_port"],
dbname=config["database_name"],
user=config["database_user"],
password=config["database_password"],
)
cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
return conn, cur
@ -71,19 +71,19 @@ def list_ova(limit, is_fuzzy=True):
if limit:
if is_fuzzy:
# Handle fuzzy vs. non-fuzzy limits
if not re.match(r'\^.*', limit):
limit = '%' + limit
if not re.match(r"\^.*", limit):
limit = "%" + limit
else:
limit = limit[1:]
if not re.match(r'.*\$', limit):
limit = limit + '%'
if not re.match(r".*\$", limit):
limit = limit + "%"
else:
limit = limit[:-1]
query = "SELECT id, name FROM {} WHERE name LIKE %s;".format('ova')
args = (limit, )
query = "SELECT id, name FROM {} WHERE name LIKE %s;".format("ova")
args = (limit,)
else:
query = "SELECT id, name FROM {};".format('ova')
query = "SELECT id, name FROM {};".format("ova")
args = ()
conn, cur = open_database(config)
@ -94,34 +94,36 @@ def list_ova(limit, is_fuzzy=True):
ova_data = list()
for ova in data:
ova_id = ova.get('id')
ova_name = ova.get('name')
ova_id = ova.get("id")
ova_name = ova.get("name")
query = "SELECT pool, volume_name, volume_format, disk_id, disk_size_gb FROM {} WHERE ova = %s;".format('ova_volume')
query = "SELECT pool, volume_name, volume_format, disk_id, disk_size_gb FROM {} WHERE ova = %s;".format(
"ova_volume"
)
args = (ova_id,)
conn, cur = open_database(config)
cur.execute(query, args)
volumes = cur.fetchall()
close_database(conn, cur)
ova_data.append({'id': ova_id, 'name': ova_name, 'volumes': volumes})
ova_data.append({"id": ova_id, "name": ova_name, "volumes": volumes})
if ova_data:
return ova_data, 200
else:
return {'message': 'No OVAs found.'}, 404
return {"message": "No OVAs found."}, 404
@ZKConnection(config)
def delete_ova(zkhandler, name):
ova_data, retcode = list_ova(name, is_fuzzy=False)
if retcode != 200:
retmsg = {'message': 'The OVA "{}" does not exist.'.format(name)}
retmsg = {"message": 'The OVA "{}" does not exist.'.format(name)}
retcode = 400
return retmsg, retcode
conn, cur = open_database(config)
ova_id = ova_data[0].get('id')
ova_id = ova_data[0].get("id")
try:
# Get the list of volumes for this OVA
query = "SELECT pool, volume_name FROM ova_volume WHERE ova = %s;"
@ -131,7 +133,9 @@ def delete_ova(zkhandler, name):
# Remove each volume for this OVA
for volume in volumes:
pvc_ceph.remove_volume(zkhandler, volume.get('pool'), volume.get('volume_name'))
pvc_ceph.remove_volume(
zkhandler, volume.get("pool"), volume.get("volume_name")
)
# Delete the volume entries from the database
query = "DELETE FROM ova_volume WHERE ova = %s;"
@ -156,7 +160,7 @@ def delete_ova(zkhandler, name):
retmsg = {"message": 'Removed OVA image "{}".'.format(name)}
retcode = 200
except Exception as e:
retmsg = {'message': 'Failed to remove OVA "{}": {}'.format(name, e)}
retmsg = {"message": 'Failed to remove OVA "{}": {}'.format(name, e)}
retcode = 400
close_database(conn, cur)
return retmsg, retcode
@ -174,20 +178,22 @@ def upload_ova(zkhandler, pool, name, ova_size):
# Unmap the OVA temporary blockdev
retflag, retdata = pvc_ceph.unmap_volume(zkhandler, pool, "ova_{}".format(name))
# Remove the OVA temporary blockdev
retflag, retdata = pvc_ceph.remove_volume(zkhandler, pool, "ova_{}".format(name))
retflag, retdata = pvc_ceph.remove_volume(
zkhandler, pool, "ova_{}".format(name)
)
# Normalize the OVA size to bytes
ova_size_bytes = pvc_ceph.format_bytes_fromhuman(ova_size)
ova_size = '{}B'.format(ova_size_bytes)
ova_size = "{}B".format(ova_size_bytes)
# Verify that the cluster has enough space to store the OVA volumes (2x OVA size, temporarily, 1x permanently)
pool_information = pvc_ceph.getPoolInformation(zkhandler, pool)
pool_free_space_bytes = int(pool_information['stats']['free_bytes'])
pool_free_space_bytes = int(pool_information["stats"]["free_bytes"])
if ova_size_bytes * 2 >= pool_free_space_bytes:
output = {
'message': "The cluster does not have enough free space ({}) to store the OVA volume ({}).".format(
"message": "The cluster does not have enough free space ({}) to store the OVA volume ({}).".format(
pvc_ceph.format_bytes_tohuman(pool_free_space_bytes),
pvc_ceph.format_bytes_tohuman(ova_size_bytes)
pvc_ceph.format_bytes_tohuman(ova_size_bytes),
)
}
retcode = 400
@ -195,11 +201,11 @@ def upload_ova(zkhandler, pool, name, ova_size):
return output, retcode
# Create a temporary OVA blockdev
retflag, retdata = pvc_ceph.add_volume(zkhandler, pool, "ova_{}".format(name), ova_size)
retflag, retdata = pvc_ceph.add_volume(
zkhandler, pool, "ova_{}".format(name), ova_size
)
if not retflag:
output = {
'message': retdata.replace('\"', '\'')
}
output = {"message": retdata.replace('"', "'")}
retcode = 400
cleanup_ova_maps_and_volumes()
return output, retcode
@ -207,9 +213,7 @@ def upload_ova(zkhandler, pool, name, ova_size):
# Map the temporary OVA blockdev
retflag, retdata = pvc_ceph.map_volume(zkhandler, pool, "ova_{}".format(name))
if not retflag:
output = {
'message': retdata.replace('\"', '\'')
}
output = {"message": retdata.replace('"', "'")}
retcode = 400
cleanup_ova_maps_and_volumes()
return output, retcode
@ -221,13 +225,14 @@ def upload_ova(zkhandler, pool, name, ova_size):
# rather than the standard stream_factory which writes to a temporary file waiting
# on a save() call. This will break if the API ever uploaded multiple files, but
# this is an acceptable workaround.
def ova_stream_factory(total_content_length, filename, content_type, content_length=None):
return open(ova_blockdev, 'wb')
def ova_stream_factory(
total_content_length, filename, content_type, content_length=None
):
return open(ova_blockdev, "wb")
parse_form_data(flask.request.environ, stream_factory=ova_stream_factory)
except Exception:
output = {
'message': "Failed to upload or write OVA file to temporary volume."
}
output = {"message": "Failed to upload or write OVA file to temporary volume."}
retcode = 400
cleanup_ova_maps_and_volumes()
return output, retcode
@ -238,15 +243,13 @@ def upload_ova(zkhandler, pool, name, ova_size):
# Determine the files in the OVA
members = ova_archive.getmembers()
except tarfile.TarError:
output = {
'message': "The uploaded OVA file is not readable."
}
output = {"message": "The uploaded OVA file is not readable."}
retcode = 400
cleanup_ova_maps_and_volumes()
return output, retcode
# Parse through the members list and extract the OVF file
for element in set(x for x in members if re.match(r'.*\.ovf$', x.name)):
for element in set(x for x in members if re.match(r".*\.ovf$", x.name)):
ovf_file = ova_archive.extractfile(element)
# Parse the OVF file to get our VM details
@ -261,14 +264,14 @@ def upload_ova(zkhandler, pool, name, ova_size):
# Create and upload each disk volume
for idx, disk in enumerate(disk_map):
disk_identifier = "sd{}".format(chr(ord('a') + idx))
disk_identifier = "sd{}".format(chr(ord("a") + idx))
volume = "ova_{}_{}".format(name, disk_identifier)
dev_src = disk.get('src')
dev_src = disk.get("src")
dev_size_raw = ova_archive.getmember(dev_src).size
vm_volume_size = disk.get('capacity')
vm_volume_size = disk.get("capacity")
# Normalize the dev size to bytes
dev_size = '{}B'.format(pvc_ceph.format_bytes_fromhuman(dev_size_raw))
dev_size = "{}B".format(pvc_ceph.format_bytes_fromhuman(dev_size_raw))
def cleanup_img_maps():
# Unmap the temporary blockdev
@ -277,9 +280,7 @@ def upload_ova(zkhandler, pool, name, ova_size):
# Create the blockdev
retflag, retdata = pvc_ceph.add_volume(zkhandler, pool, volume, dev_size)
if not retflag:
output = {
'message': retdata.replace('\"', '\'')
}
output = {"message": retdata.replace('"', "'")}
retcode = 400
cleanup_img_maps()
cleanup_ova_maps_and_volumes()
@ -288,9 +289,7 @@ def upload_ova(zkhandler, pool, name, ova_size):
# Map the blockdev
retflag, retdata = pvc_ceph.map_volume(zkhandler, pool, volume)
if not retflag:
output = {
'message': retdata.replace('\"', '\'')
}
output = {"message": retdata.replace('"', "'")}
retcode = 400
cleanup_img_maps()
cleanup_ova_maps_and_volumes()
@ -299,10 +298,10 @@ def upload_ova(zkhandler, pool, name, ova_size):
try:
# Open (extract) the TAR archive file and seek to byte 0
vmdk_file = ova_archive.extractfile(disk.get('src'))
vmdk_file = ova_archive.extractfile(disk.get("src"))
vmdk_file.seek(0)
# Open the temporary blockdev and seek to byte 0
blk_file = open(temp_blockdev, 'wb')
blk_file = open(temp_blockdev, "wb")
blk_file.seek(0)
# Write the contents of vmdk_file into blk_file
blk_file.write(vmdk_file.read())
@ -311,10 +310,12 @@ def upload_ova(zkhandler, pool, name, ova_size):
# Close vmdk_file
vmdk_file.close()
# Perform an OS-level sync
pvc_common.run_os_command('sync')
pvc_common.run_os_command("sync")
except Exception:
output = {
'message': "Failed to write image file '{}' to temporary volume.".format(disk.get('src'))
"message": "Failed to write image file '{}' to temporary volume.".format(
disk.get("src")
)
}
retcode = 400
cleanup_img_maps()
@ -333,27 +334,25 @@ def upload_ova(zkhandler, pool, name, ova_size):
cur.execute(query, args)
close_database(conn, cur)
except Exception as e:
output = {
'message': 'Failed to create OVA entry "{}": {}'.format(name, e)
}
output = {"message": 'Failed to create OVA entry "{}": {}'.format(name, e)}
retcode = 400
close_database(conn, cur)
return output, retcode
# Get the OVA database id
query = "SELECT id FROM ova WHERE name = %s;"
args = (name, )
args = (name,)
conn, cur = open_database(config)
cur.execute(query, args)
ova_id = cur.fetchone()['id']
ova_id = cur.fetchone()["id"]
close_database(conn, cur)
# Prepare disk entries in ova_volume
for idx, disk in enumerate(disk_map):
disk_identifier = "sd{}".format(chr(ord('a') + idx))
volume_type = disk.get('src').split('.')[-1]
disk_identifier = "sd{}".format(chr(ord("a") + idx))
volume_type = disk.get("src").split(".")[-1]
volume = "ova_{}_{}".format(name, disk_identifier)
vm_volume_size = disk.get('capacity')
vm_volume_size = disk.get("capacity")
# The function always return XXXXB, so strip off the B and convert to an integer
vm_volume_size_bytes = pvc_ceph.format_bytes_fromhuman(vm_volume_size)
@ -368,37 +367,49 @@ def upload_ova(zkhandler, pool, name, ova_size):
close_database(conn, cur)
except Exception as e:
output = {
'message': 'Failed to create OVA volume entry "{}": {}'.format(volume, e)
"message": 'Failed to create OVA volume entry "{}": {}'.format(
volume, e
)
}
retcode = 400
close_database(conn, cur)
return output, retcode
# Prepare a system_template for the OVA
vcpu_count = virtual_hardware.get('vcpus')
vram_mb = virtual_hardware.get('vram')
if virtual_hardware.get('graphics-controller') == 1:
vcpu_count = virtual_hardware.get("vcpus")
vram_mb = virtual_hardware.get("vram")
if virtual_hardware.get("graphics-controller") == 1:
vnc = True
serial = False
else:
vnc = False
serial = True
retdata, retcode = provisioner.create_template_system(name, vcpu_count, vram_mb, serial, vnc, vnc_bind=None, ova=ova_id)
retdata, retcode = provisioner.create_template_system(
name, vcpu_count, vram_mb, serial, vnc, vnc_bind=None, ova=ova_id
)
if retcode != 200:
return retdata, retcode
system_template, retcode = provisioner.list_template_system(name, is_fuzzy=False)
if retcode != 200:
return retdata, retcode
system_template_name = system_template[0].get('name')
system_template_name = system_template[0].get("name")
# Prepare a barebones profile for the OVA
retdata, retcode = provisioner.create_profile(name, 'ova', system_template_name, None, None, userdata=None, script=None, ova=name, arguments=None)
retdata, retcode = provisioner.create_profile(
name,
"ova",
system_template_name,
None,
None,
userdata=None,
script=None,
ova=name,
arguments=None,
)
if retcode != 200:
return retdata, retcode
output = {
'message': "Imported OVA image '{}'.".format(name)
}
output = {"message": "Imported OVA image '{}'.".format(name)}
retcode = 200
return output, retcode
@ -420,7 +431,7 @@ class OVFParser(object):
"20": "other-storage-device",
"23": "usb-controller",
"24": "graphics-controller",
"35": "sound-controller"
"35": "sound-controller",
}
def _getFilelist(self):
@ -438,7 +449,10 @@ class OVFParser(object):
cap_attr = "{{{schema}}}capacity".format(schema=self.OVF_SCHEMA)
cap_units = "{{{schema}}}capacityAllocationUnits".format(schema=self.OVF_SCHEMA)
current_list = self.xml.findall(path)
results = [(x.get(id_attr), x.get(ref_attr), x.get(cap_attr), x.get(cap_units)) for x in current_list]
results = [
(x.get(id_attr), x.get(ref_attr), x.get(cap_attr), x.get(cap_units))
for x in current_list
]
return results
def _getAttributes(self, virtual_system, path, attribute):
@ -451,36 +465,46 @@ class OVFParser(object):
# Define our schemas
envelope_tag = self.xml.find(".")
self.XML_SCHEMA = envelope_tag.nsmap.get('xsi')
self.OVF_SCHEMA = envelope_tag.nsmap.get('ovf')
self.RASD_SCHEMA = envelope_tag.nsmap.get('rasd')
self.SASD_SCHEMA = envelope_tag.nsmap.get('sasd')
self.VSSD_SCHEMA = envelope_tag.nsmap.get('vssd')
self.XML_SCHEMA = envelope_tag.nsmap.get("xsi")
self.OVF_SCHEMA = envelope_tag.nsmap.get("ovf")
self.RASD_SCHEMA = envelope_tag.nsmap.get("rasd")
self.SASD_SCHEMA = envelope_tag.nsmap.get("sasd")
self.VSSD_SCHEMA = envelope_tag.nsmap.get("vssd")
self.ovf_version = int(self.OVF_SCHEMA.split('/')[-1])
self.ovf_version = int(self.OVF_SCHEMA.split("/")[-1])
# Get the file and disk lists
self.filelist = self._getFilelist()
self.disklist = self._getDisklist()
def getVirtualSystems(self):
return self.xml.findall("{{{schema}}}VirtualSystem".format(schema=self.OVF_SCHEMA))
return self.xml.findall(
"{{{schema}}}VirtualSystem".format(schema=self.OVF_SCHEMA)
)
def getXML(self):
return lxml.etree.tostring(self.xml, pretty_print=True).decode('utf8')
return lxml.etree.tostring(self.xml, pretty_print=True).decode("utf8")
def getVirtualHardware(self, virtual_system):
hardware_list = virtual_system.findall(
"{{{schema}}}VirtualHardwareSection/{{{schema}}}Item".format(schema=self.OVF_SCHEMA)
"{{{schema}}}VirtualHardwareSection/{{{schema}}}Item".format(
schema=self.OVF_SCHEMA
)
)
virtual_hardware = {}
for item in hardware_list:
try:
item_type = self.RASD_TYPE[item.find("{{{rasd}}}ResourceType".format(rasd=self.RASD_SCHEMA)).text]
item_type = self.RASD_TYPE[
item.find(
"{{{rasd}}}ResourceType".format(rasd=self.RASD_SCHEMA)
).text
]
except Exception:
continue
quantity = item.find("{{{rasd}}}VirtualQuantity".format(rasd=self.RASD_SCHEMA))
quantity = item.find(
"{{{rasd}}}VirtualQuantity".format(rasd=self.RASD_SCHEMA)
)
if quantity is None:
virtual_hardware[item_type] = 1
else:
@ -492,11 +516,15 @@ class OVFParser(object):
# OVF v2 uses the StorageItem field, while v1 uses the normal Item field
if self.ovf_version < 2:
hardware_list = virtual_system.findall(
"{{{schema}}}VirtualHardwareSection/{{{schema}}}Item".format(schema=self.OVF_SCHEMA)
"{{{schema}}}VirtualHardwareSection/{{{schema}}}Item".format(
schema=self.OVF_SCHEMA
)
)
else:
hardware_list = virtual_system.findall(
"{{{schema}}}VirtualHardwareSection/{{{schema}}}StorageItem".format(schema=self.OVF_SCHEMA)
"{{{schema}}}VirtualHardwareSection/{{{schema}}}StorageItem".format(
schema=self.OVF_SCHEMA
)
)
disk_list = []
@ -504,38 +532,56 @@ class OVFParser(object):
item_type = None
if self.SASD_SCHEMA is not None:
item_type = self.RASD_TYPE[item.find("{{{sasd}}}ResourceType".format(sasd=self.SASD_SCHEMA)).text]
item_type = self.RASD_TYPE[
item.find(
"{{{sasd}}}ResourceType".format(sasd=self.SASD_SCHEMA)
).text
]
else:
item_type = self.RASD_TYPE[item.find("{{{rasd}}}ResourceType".format(rasd=self.RASD_SCHEMA)).text]
item_type = self.RASD_TYPE[
item.find(
"{{{rasd}}}ResourceType".format(rasd=self.RASD_SCHEMA)
).text
]
if item_type != 'disk':
if item_type != "disk":
continue
hostref = None
if self.SASD_SCHEMA is not None:
hostref = item.find("{{{sasd}}}HostResource".format(sasd=self.SASD_SCHEMA))
hostref = item.find(
"{{{sasd}}}HostResource".format(sasd=self.SASD_SCHEMA)
)
else:
hostref = item.find("{{{rasd}}}HostResource".format(rasd=self.RASD_SCHEMA))
hostref = item.find(
"{{{rasd}}}HostResource".format(rasd=self.RASD_SCHEMA)
)
if hostref is None:
continue
disk_res = hostref.text
# Determine which file this disk_res ultimately represents
(disk_id, disk_ref, disk_capacity, disk_capacity_unit) = [x for x in self.disklist if x[0] == disk_res.split('/')[-1]][0]
(disk_id, disk_ref, disk_capacity, disk_capacity_unit) = [
x for x in self.disklist if x[0] == disk_res.split("/")[-1]
][0]
(file_id, disk_src) = [x for x in self.filelist if x[0] == disk_ref][0]
if disk_capacity_unit is not None:
# Handle the unit conversion
base_unit, action, multiple = disk_capacity_unit.split()
multiple_base, multiple_exponent = multiple.split('^')
disk_capacity = int(disk_capacity) * (int(multiple_base) ** int(multiple_exponent))
multiple_base, multiple_exponent = multiple.split("^")
disk_capacity = int(disk_capacity) * (
int(multiple_base) ** int(multiple_exponent)
)
# Append the disk with all details to the list
disk_list.append({
disk_list.append(
{
"id": disk_id,
"ref": disk_ref,
"capacity": disk_capacity,
"src": disk_src
})
"src": disk_src,
}
)
return disk_list

File diff suppressed because it is too large Load Diff

View File

@ -24,74 +24,74 @@ import datetime
# ANSII colours for output
def red():
return '\033[91m'
return "\033[91m"
def blue():
return '\033[94m'
return "\033[94m"
def cyan():
return '\033[96m'
return "\033[96m"
def green():
return '\033[92m'
return "\033[92m"
def yellow():
return '\033[93m'
return "\033[93m"
def purple():
return '\033[95m'
return "\033[95m"
def bold():
return '\033[1m'
return "\033[1m"
def end():
return '\033[0m'
return "\033[0m"
# Print function
def echo(message, prefix, state):
# Get the date
date = '{} - '.format(datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S.%f'))
date = "{} - ".format(datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S.%f"))
endc = end()
# Continuation
if state == 'c':
date = ''
colour = ''
prompt = ' '
if state == "c":
date = ""
colour = ""
prompt = " "
# OK
elif state == 'o':
elif state == "o":
colour = green()
prompt = '>>> '
prompt = ">>> "
# Error
elif state == 'e':
elif state == "e":
colour = red()
prompt = '>>> '
prompt = ">>> "
# Warning
elif state == 'w':
elif state == "w":
colour = yellow()
prompt = '>>> '
prompt = ">>> "
# Tick
elif state == 't':
elif state == "t":
colour = purple()
prompt = '>>> '
prompt = ">>> "
# Information
elif state == 'i':
elif state == "i":
colour = blue()
prompt = '>>> '
prompt = ">>> "
else:
colour = bold()
prompt = '>>> '
prompt = ">>> "
# Append space to prefix
if prefix != '':
prefix = prefix + ' '
if prefix != "":
prefix = prefix + " "
print(colour + prompt + endc + date + prefix + message)

File diff suppressed because it is too large Load Diff

View File

@ -33,18 +33,15 @@ def initialize(config, overwrite=False):
API arguments: overwrite, yes-i-really-mean-it
API schema: {json_data_object}
"""
params = {
'yes-i-really-mean-it': 'yes',
'overwrite': overwrite
}
response = call_api(config, 'post', '/initialize', params=params)
params = {"yes-i-really-mean-it": "yes", "overwrite": overwrite}
response = call_api(config, "post", "/initialize", params=params)
if response.status_code == 200:
retstatus = True
else:
retstatus = False
return retstatus, response.json().get('message', '')
return retstatus, response.json().get("message", "")
def backup(config):
@ -55,12 +52,12 @@ def backup(config):
API arguments:
API schema: {json_data_object}
"""
response = call_api(config, 'get', '/backup')
response = call_api(config, "get", "/backup")
if response.status_code == 200:
return True, response.json()
else:
return False, response.json().get('message', '')
return False, response.json().get("message", "")
def restore(config, cluster_data):
@ -73,20 +70,16 @@ def restore(config, cluster_data):
"""
cluster_data_json = json.dumps(cluster_data)
params = {
'yes-i-really-mean-it': 'yes'
}
data = {
'cluster_data': cluster_data_json
}
response = call_api(config, 'post', '/restore', params=params, data=data)
params = {"yes-i-really-mean-it": "yes"}
data = {"cluster_data": cluster_data_json}
response = call_api(config, "post", "/restore", params=params, data=data)
if response.status_code == 200:
retstatus = True
else:
retstatus = False
return retstatus, response.json().get('message', '')
return retstatus, response.json().get("message", "")
def maintenance_mode(config, state):
@ -97,17 +90,15 @@ def maintenance_mode(config, state):
API arguments: {state}={state}
API schema: {json_data_object}
"""
params = {
'state': state
}
response = call_api(config, 'post', '/status', params=params)
params = {"state": state}
response = call_api(config, "post", "/status", params=params)
if response.status_code == 200:
retstatus = True
else:
retstatus = False
return retstatus, response.json().get('message', '')
return retstatus, response.json().get("message", "")
def get_info(config):
@ -118,109 +109,216 @@ def get_info(config):
API arguments:
API schema: {json_data_object}
"""
response = call_api(config, 'get', '/status')
response = call_api(config, "get", "/status")
if response.status_code == 200:
return True, response.json()
else:
return False, response.json().get('message', '')
return False, response.json().get("message", "")
def format_info(cluster_information, oformat):
if oformat == 'json':
if oformat == "json":
return json.dumps(cluster_information)
if oformat == 'json-pretty':
if oformat == "json-pretty":
return json.dumps(cluster_information, indent=4)
# Plain formatting, i.e. human-readable
if cluster_information['health'] == 'Optimal':
if cluster_information["health"] == "Optimal":
health_colour = ansiprint.green()
elif cluster_information['health'] == 'Maintenance':
elif cluster_information["health"] == "Maintenance":
health_colour = ansiprint.blue()
else:
health_colour = ansiprint.yellow()
if cluster_information['storage_health'] == 'Optimal':
if cluster_information["storage_health"] == "Optimal":
storage_health_colour = ansiprint.green()
elif cluster_information['storage_health'] == 'Maintenance':
elif cluster_information["storage_health"] == "Maintenance":
storage_health_colour = ansiprint.blue()
else:
storage_health_colour = ansiprint.yellow()
ainformation = []
if oformat == 'short':
ainformation.append('{}PVC cluster status:{}'.format(ansiprint.bold(), ansiprint.end()))
ainformation.append('{}Cluster health:{} {}{}{}'.format(ansiprint.purple(), ansiprint.end(), health_colour, cluster_information['health'], ansiprint.end()))
if cluster_information['health_msg']:
for line in cluster_information['health_msg']:
ainformation.append(' > {}'.format(line))
ainformation.append('{}Storage health:{} {}{}{}'.format(ansiprint.purple(), ansiprint.end(), storage_health_colour, cluster_information['storage_health'], ansiprint.end()))
if cluster_information['storage_health_msg']:
for line in cluster_information['storage_health_msg']:
ainformation.append(' > {}'.format(line))
if oformat == "short":
ainformation.append(
"{}PVC cluster status:{}".format(ansiprint.bold(), ansiprint.end())
)
ainformation.append(
"{}Cluster health:{} {}{}{}".format(
ansiprint.purple(),
ansiprint.end(),
health_colour,
cluster_information["health"],
ansiprint.end(),
)
)
if cluster_information["health_msg"]:
for line in cluster_information["health_msg"]:
ainformation.append(" > {}".format(line))
ainformation.append(
"{}Storage health:{} {}{}{}".format(
ansiprint.purple(),
ansiprint.end(),
storage_health_colour,
cluster_information["storage_health"],
ansiprint.end(),
)
)
if cluster_information["storage_health_msg"]:
for line in cluster_information["storage_health_msg"]:
ainformation.append(" > {}".format(line))
return '\n'.join(ainformation)
return "\n".join(ainformation)
ainformation.append('{}PVC cluster status:{}'.format(ansiprint.bold(), ansiprint.end()))
ainformation.append('')
ainformation.append('{}Cluster health:{} {}{}{}'.format(ansiprint.purple(), ansiprint.end(), health_colour, cluster_information['health'], ansiprint.end()))
if cluster_information['health_msg']:
for line in cluster_information['health_msg']:
ainformation.append(' > {}'.format(line))
ainformation.append('{}Storage health:{} {}{}{}'.format(ansiprint.purple(), ansiprint.end(), storage_health_colour, cluster_information['storage_health'], ansiprint.end()))
if cluster_information['storage_health_msg']:
for line in cluster_information['storage_health_msg']:
ainformation.append(' > {}'.format(line))
ainformation.append(
"{}PVC cluster status:{}".format(ansiprint.bold(), ansiprint.end())
)
ainformation.append("")
ainformation.append(
"{}Cluster health:{} {}{}{}".format(
ansiprint.purple(),
ansiprint.end(),
health_colour,
cluster_information["health"],
ansiprint.end(),
)
)
if cluster_information["health_msg"]:
for line in cluster_information["health_msg"]:
ainformation.append(" > {}".format(line))
ainformation.append(
"{}Storage health:{} {}{}{}".format(
ansiprint.purple(),
ansiprint.end(),
storage_health_colour,
cluster_information["storage_health"],
ansiprint.end(),
)
)
if cluster_information["storage_health_msg"]:
for line in cluster_information["storage_health_msg"]:
ainformation.append(" > {}".format(line))
ainformation.append('')
ainformation.append('{}Primary node:{} {}'.format(ansiprint.purple(), ansiprint.end(), cluster_information['primary_node']))
ainformation.append('{}Cluster upstream IP:{} {}'.format(ansiprint.purple(), ansiprint.end(), cluster_information['upstream_ip']))
ainformation.append('')
ainformation.append('{}Total nodes:{} {}'.format(ansiprint.purple(), ansiprint.end(), cluster_information['nodes']['total']))
ainformation.append('{}Total VMs:{} {}'.format(ansiprint.purple(), ansiprint.end(), cluster_information['vms']['total']))
ainformation.append('{}Total networks:{} {}'.format(ansiprint.purple(), ansiprint.end(), cluster_information['networks']))
ainformation.append('{}Total OSDs:{} {}'.format(ansiprint.purple(), ansiprint.end(), cluster_information['osds']['total']))
ainformation.append('{}Total pools:{} {}'.format(ansiprint.purple(), ansiprint.end(), cluster_information['pools']))
ainformation.append('{}Total volumes:{} {}'.format(ansiprint.purple(), ansiprint.end(), cluster_information['volumes']))
ainformation.append('{}Total snapshots:{} {}'.format(ansiprint.purple(), ansiprint.end(), cluster_information['snapshots']))
ainformation.append("")
ainformation.append(
"{}Primary node:{} {}".format(
ansiprint.purple(), ansiprint.end(), cluster_information["primary_node"]
)
)
ainformation.append(
"{}Cluster upstream IP:{} {}".format(
ansiprint.purple(), ansiprint.end(), cluster_information["upstream_ip"]
)
)
ainformation.append("")
ainformation.append(
"{}Total nodes:{} {}".format(
ansiprint.purple(), ansiprint.end(), cluster_information["nodes"]["total"]
)
)
ainformation.append(
"{}Total VMs:{} {}".format(
ansiprint.purple(), ansiprint.end(), cluster_information["vms"]["total"]
)
)
ainformation.append(
"{}Total networks:{} {}".format(
ansiprint.purple(), ansiprint.end(), cluster_information["networks"]
)
)
ainformation.append(
"{}Total OSDs:{} {}".format(
ansiprint.purple(), ansiprint.end(), cluster_information["osds"]["total"]
)
)
ainformation.append(
"{}Total pools:{} {}".format(
ansiprint.purple(), ansiprint.end(), cluster_information["pools"]
)
)
ainformation.append(
"{}Total volumes:{} {}".format(
ansiprint.purple(), ansiprint.end(), cluster_information["volumes"]
)
)
ainformation.append(
"{}Total snapshots:{} {}".format(
ansiprint.purple(), ansiprint.end(), cluster_information["snapshots"]
)
)
nodes_string = '{}Nodes:{} {}/{} {}ready,run{}'.format(ansiprint.purple(), ansiprint.end(), cluster_information['nodes'].get('run,ready', 0), cluster_information['nodes'].get('total', 0), ansiprint.green(), ansiprint.end())
for state, count in cluster_information['nodes'].items():
if state == 'total' or state == 'run,ready':
nodes_string = "{}Nodes:{} {}/{} {}ready,run{}".format(
ansiprint.purple(),
ansiprint.end(),
cluster_information["nodes"].get("run,ready", 0),
cluster_information["nodes"].get("total", 0),
ansiprint.green(),
ansiprint.end(),
)
for state, count in cluster_information["nodes"].items():
if state == "total" or state == "run,ready":
continue
nodes_string += ' {}/{} {}{}{}'.format(count, cluster_information['nodes']['total'], ansiprint.yellow(), state, ansiprint.end())
nodes_string += " {}/{} {}{}{}".format(
count,
cluster_information["nodes"]["total"],
ansiprint.yellow(),
state,
ansiprint.end(),
)
ainformation.append('')
ainformation.append("")
ainformation.append(nodes_string)
vms_string = '{}VMs:{} {}/{} {}start{}'.format(ansiprint.purple(), ansiprint.end(), cluster_information['vms'].get('start', 0), cluster_information['vms'].get('total', 0), ansiprint.green(), ansiprint.end())
for state, count in cluster_information['vms'].items():
if state == 'total' or state == 'start':
vms_string = "{}VMs:{} {}/{} {}start{}".format(
ansiprint.purple(),
ansiprint.end(),
cluster_information["vms"].get("start", 0),
cluster_information["vms"].get("total", 0),
ansiprint.green(),
ansiprint.end(),
)
for state, count in cluster_information["vms"].items():
if state == "total" or state == "start":
continue
if state in ['disable', 'migrate', 'unmigrate', 'provision']:
if state in ["disable", "migrate", "unmigrate", "provision"]:
colour = ansiprint.blue()
else:
colour = ansiprint.yellow()
vms_string += ' {}/{} {}{}{}'.format(count, cluster_information['vms']['total'], colour, state, ansiprint.end())
vms_string += " {}/{} {}{}{}".format(
count, cluster_information["vms"]["total"], colour, state, ansiprint.end()
)
ainformation.append('')
ainformation.append("")
ainformation.append(vms_string)
if cluster_information['osds']['total'] > 0:
osds_string = '{}Ceph OSDs:{} {}/{} {}up,in{}'.format(ansiprint.purple(), ansiprint.end(), cluster_information['osds'].get('up,in', 0), cluster_information['osds'].get('total', 0), ansiprint.green(), ansiprint.end())
for state, count in cluster_information['osds'].items():
if state == 'total' or state == 'up,in':
if cluster_information["osds"]["total"] > 0:
osds_string = "{}Ceph OSDs:{} {}/{} {}up,in{}".format(
ansiprint.purple(),
ansiprint.end(),
cluster_information["osds"].get("up,in", 0),
cluster_information["osds"].get("total", 0),
ansiprint.green(),
ansiprint.end(),
)
for state, count in cluster_information["osds"].items():
if state == "total" or state == "up,in":
continue
osds_string += ' {}/{} {}{}{}'.format(count, cluster_information['osds']['total'], ansiprint.yellow(), state, ansiprint.end())
osds_string += " {}/{} {}{}{}".format(
count,
cluster_information["osds"]["total"],
ansiprint.yellow(),
state,
ansiprint.end(),
)
ainformation.append('')
ainformation.append("")
ainformation.append(osds_string)
ainformation.append('')
return '\n'.join(ainformation)
ainformation.append("")
return "\n".join(ainformation)

View File

@ -29,42 +29,42 @@ from urllib3 import disable_warnings
def format_bytes(size_bytes):
byte_unit_matrix = {
'B': 1,
'K': 1024,
'M': 1024 * 1024,
'G': 1024 * 1024 * 1024,
'T': 1024 * 1024 * 1024 * 1024,
'P': 1024 * 1024 * 1024 * 1024 * 1024
"B": 1,
"K": 1024,
"M": 1024 * 1024,
"G": 1024 * 1024 * 1024,
"T": 1024 * 1024 * 1024 * 1024,
"P": 1024 * 1024 * 1024 * 1024 * 1024,
}
human_bytes = '0B'
human_bytes = "0B"
for unit in sorted(byte_unit_matrix, key=byte_unit_matrix.get):
formatted_bytes = int(math.ceil(size_bytes / byte_unit_matrix[unit]))
if formatted_bytes < 10000:
human_bytes = '{}{}'.format(formatted_bytes, unit)
human_bytes = "{}{}".format(formatted_bytes, unit)
break
return human_bytes
def format_metric(integer):
integer_unit_matrix = {
'': 1,
'K': 1000,
'M': 1000 * 1000,
'B': 1000 * 1000 * 1000,
'T': 1000 * 1000 * 1000 * 1000,
'Q': 1000 * 1000 * 1000 * 1000 * 1000
"": 1,
"K": 1000,
"M": 1000 * 1000,
"B": 1000 * 1000 * 1000,
"T": 1000 * 1000 * 1000 * 1000,
"Q": 1000 * 1000 * 1000 * 1000 * 1000,
}
human_integer = '0'
human_integer = "0"
for unit in sorted(integer_unit_matrix, key=integer_unit_matrix.get):
formatted_integer = int(math.ceil(integer / integer_unit_matrix[unit]))
if formatted_integer < 10000:
human_integer = '{}{}'.format(formatted_integer, unit)
human_integer = "{}{}".format(formatted_integer, unit)
break
return human_integer
class UploadProgressBar(object):
def __init__(self, filename, end_message='', end_nl=True):
def __init__(self, filename, end_message="", end_nl=True):
file_size = os.path.getsize(filename)
file_size_human = format_bytes(file_size)
click.echo("Uploading file (total size {})...".format(file_size_human))
@ -78,9 +78,9 @@ class UploadProgressBar(object):
self.end_message = end_message
self.end_nl = end_nl
if not self.end_nl:
self.end_suffix = ' '
self.end_suffix = " "
else:
self.end_suffix = ''
self.end_suffix = ""
self.bar = click.progressbar(length=self.length, show_eta=True)
@ -115,35 +115,34 @@ class ErrorResponse(requests.Response):
return self.json_data
def call_api(config, operation, request_uri, headers={}, params=None, data=None, files=None):
def call_api(
config, operation, request_uri, headers={}, params=None, data=None, files=None
):
# Craft the URI
uri = '{}://{}{}{}'.format(
config['api_scheme'],
config['api_host'],
config['api_prefix'],
request_uri
uri = "{}://{}{}{}".format(
config["api_scheme"], config["api_host"], config["api_prefix"], request_uri
)
# Default timeout is 3 seconds
timeout = 3
# Craft the authentication header if required
if config['api_key']:
headers['X-Api-Key'] = config['api_key']
if config["api_key"]:
headers["X-Api-Key"] = config["api_key"]
# Determine the request type and hit the API
disable_warnings()
try:
if operation == 'get':
if operation == "get":
response = requests.get(
uri,
timeout=timeout,
headers=headers,
params=params,
data=data,
verify=config['verify_ssl']
verify=config["verify_ssl"],
)
if operation == 'post':
if operation == "post":
response = requests.post(
uri,
timeout=timeout,
@ -151,9 +150,9 @@ def call_api(config, operation, request_uri, headers={}, params=None, data=None,
params=params,
data=data,
files=files,
verify=config['verify_ssl']
verify=config["verify_ssl"],
)
if operation == 'put':
if operation == "put":
response = requests.put(
uri,
timeout=timeout,
@ -161,35 +160,35 @@ def call_api(config, operation, request_uri, headers={}, params=None, data=None,
params=params,
data=data,
files=files,
verify=config['verify_ssl']
verify=config["verify_ssl"],
)
if operation == 'patch':
if operation == "patch":
response = requests.patch(
uri,
timeout=timeout,
headers=headers,
params=params,
data=data,
verify=config['verify_ssl']
verify=config["verify_ssl"],
)
if operation == 'delete':
if operation == "delete":
response = requests.delete(
uri,
timeout=timeout,
headers=headers,
params=params,
data=data,
verify=config['verify_ssl']
verify=config["verify_ssl"],
)
except Exception as e:
message = 'Failed to connect to the API: {}'.format(e)
response = ErrorResponse({'message': message}, 500)
message = "Failed to connect to the API: {}".format(e)
response = ErrorResponse({"message": message}, 500)
# Display debug output
if config['debug']:
click.echo('API endpoint: {}'.format(uri), err=True)
click.echo('Response code: {}'.format(response.status_code), err=True)
click.echo('Response headers: {}'.format(response.headers), err=True)
if config["debug"]:
click.echo("API endpoint: {}".format(uri), err=True)
click.echo("Response code: {}".format(response.status_code), err=True)
click.echo("Response headers: {}".format(response.headers), err=True)
click.echo(err=True)
# Return the response object

File diff suppressed because it is too large Load Diff

View File

@ -36,17 +36,20 @@ def node_coordinator_state(config, node, action):
API arguments: action={action}
API schema: {"message": "{data}"}
"""
params = {
'state': action
}
response = call_api(config, 'post', '/node/{node}/coordinator-state'.format(node=node), params=params)
params = {"state": action}
response = call_api(
config,
"post",
"/node/{node}/coordinator-state".format(node=node),
params=params,
)
if response.status_code == 200:
retstatus = True
else:
retstatus = False
return retstatus, response.json().get('message', '')
return retstatus, response.json().get("message", "")
def node_domain_state(config, node, action, wait):
@ -57,18 +60,17 @@ def node_domain_state(config, node, action, wait):
API arguments: action={action}, wait={wait}
API schema: {"message": "{data}"}
"""
params = {
'state': action,
'wait': str(wait).lower()
}
response = call_api(config, 'post', '/node/{node}/domain-state'.format(node=node), params=params)
params = {"state": action, "wait": str(wait).lower()}
response = call_api(
config, "post", "/node/{node}/domain-state".format(node=node), params=params
)
if response.status_code == 200:
retstatus = True
else:
retstatus = False
return retstatus, response.json().get('message', '')
return retstatus, response.json().get("message", "")
def view_node_log(config, node, lines=100):
@ -79,19 +81,19 @@ def view_node_log(config, node, lines=100):
API arguments: lines={lines}
API schema: {"name":"{node}","data":"{node_log}"}
"""
params = {
'lines': lines
}
response = call_api(config, 'get', '/node/{node}/log'.format(node=node), params=params)
params = {"lines": lines}
response = call_api(
config, "get", "/node/{node}/log".format(node=node), params=params
)
if response.status_code != 200:
return False, response.json().get('message', '')
return False, response.json().get("message", "")
node_log = response.json()['data']
node_log = response.json()["data"]
# Shrink the log buffer to length lines
shrunk_log = node_log.split('\n')[-lines:]
loglines = '\n'.join(shrunk_log)
shrunk_log = node_log.split("\n")[-lines:]
loglines = "\n".join(shrunk_log)
return True, loglines
@ -105,53 +107,55 @@ def follow_node_log(config, node, lines=10):
API schema: {"name":"{nodename}","data":"{node_log}"}
"""
# We always grab 200 to match the follow call, but only _show_ `lines` number
params = {
'lines': 200
}
response = call_api(config, 'get', '/node/{node}/log'.format(node=node), params=params)
params = {"lines": 200}
response = call_api(
config, "get", "/node/{node}/log".format(node=node), params=params
)
if response.status_code != 200:
return False, response.json().get('message', '')
return False, response.json().get("message", "")
# Shrink the log buffer to length lines
node_log = response.json()['data']
shrunk_log = node_log.split('\n')[-int(lines):]
loglines = '\n'.join(shrunk_log)
node_log = response.json()["data"]
shrunk_log = node_log.split("\n")[-int(lines) :]
loglines = "\n".join(shrunk_log)
# Print the initial data and begin following
print(loglines, end='')
print('\n', end='')
print(loglines, end="")
print("\n", end="")
while True:
# Grab the next line set (200 is a reasonable number of lines per half-second; any more are skipped)
try:
params = {
'lines': 200
}
response = call_api(config, 'get', '/node/{node}/log'.format(node=node), params=params)
new_node_log = response.json()['data']
params = {"lines": 200}
response = call_api(
config, "get", "/node/{node}/log".format(node=node), params=params
)
new_node_log = response.json()["data"]
except Exception:
break
# Split the new and old log strings into constitutent lines
old_node_loglines = node_log.split('\n')
new_node_loglines = new_node_log.split('\n')
old_node_loglines = node_log.split("\n")
new_node_loglines = new_node_log.split("\n")
# Set the node log to the new log value for the next iteration
node_log = new_node_log
# Get the difference between the two sets of lines
old_node_loglines_set = set(old_node_loglines)
diff_node_loglines = [x for x in new_node_loglines if x not in old_node_loglines_set]
diff_node_loglines = [
x for x in new_node_loglines if x not in old_node_loglines_set
]
# If there's a difference, print it out
if len(diff_node_loglines) > 0:
print('\n'.join(diff_node_loglines), end='')
print('\n', end='')
print("\n".join(diff_node_loglines), end="")
print("\n", end="")
# Wait half a second
time.sleep(0.5)
return True, ''
return True, ""
def node_info(config, node):
@ -162,7 +166,7 @@ def node_info(config, node):
API arguments:
API schema: {json_data_object}
"""
response = call_api(config, 'get', '/node/{node}'.format(node=node))
response = call_api(config, "get", "/node/{node}".format(node=node))
if response.status_code == 200:
if isinstance(response.json(), list) and len(response.json()) != 1:
@ -176,10 +180,12 @@ def node_info(config, node):
else:
return True, response.json()
else:
return False, response.json().get('message', '')
return False, response.json().get("message", "")
def node_list(config, limit, target_daemon_state, target_coordinator_state, target_domain_state):
def node_list(
config, limit, target_daemon_state, target_coordinator_state, target_domain_state
):
"""
Get list information about nodes (limited by {limit})
@ -189,102 +195,202 @@ def node_list(config, limit, target_daemon_state, target_coordinator_state, targ
"""
params = dict()
if limit:
params['limit'] = limit
params["limit"] = limit
if target_daemon_state:
params['daemon_state'] = target_daemon_state
params["daemon_state"] = target_daemon_state
if target_coordinator_state:
params['coordinator_state'] = target_coordinator_state
params["coordinator_state"] = target_coordinator_state
if target_domain_state:
params['domain_state'] = target_domain_state
params["domain_state"] = target_domain_state
response = call_api(config, 'get', '/node', params=params)
response = call_api(config, "get", "/node", params=params)
if response.status_code == 200:
return True, response.json()
else:
return False, response.json().get('message', '')
return False, response.json().get("message", "")
#
# Output display functions
#
def getOutputColours(node_information):
if node_information['daemon_state'] == 'run':
if node_information["daemon_state"] == "run":
daemon_state_colour = ansiprint.green()
elif node_information['daemon_state'] == 'stop':
elif node_information["daemon_state"] == "stop":
daemon_state_colour = ansiprint.red()
elif node_information['daemon_state'] == 'shutdown':
elif node_information["daemon_state"] == "shutdown":
daemon_state_colour = ansiprint.yellow()
elif node_information['daemon_state'] == 'init':
elif node_information["daemon_state"] == "init":
daemon_state_colour = ansiprint.yellow()
elif node_information['daemon_state'] == 'dead':
elif node_information["daemon_state"] == "dead":
daemon_state_colour = ansiprint.red() + ansiprint.bold()
else:
daemon_state_colour = ansiprint.blue()
if node_information['coordinator_state'] == 'primary':
if node_information["coordinator_state"] == "primary":
coordinator_state_colour = ansiprint.green()
elif node_information['coordinator_state'] == 'secondary':
elif node_information["coordinator_state"] == "secondary":
coordinator_state_colour = ansiprint.blue()
else:
coordinator_state_colour = ansiprint.cyan()
if node_information['domain_state'] == 'ready':
if node_information["domain_state"] == "ready":
domain_state_colour = ansiprint.green()
else:
domain_state_colour = ansiprint.blue()
if node_information['memory']['allocated'] > node_information['memory']['total']:
if node_information["memory"]["allocated"] > node_information["memory"]["total"]:
mem_allocated_colour = ansiprint.yellow()
else:
mem_allocated_colour = ''
mem_allocated_colour = ""
if node_information['memory']['provisioned'] > node_information['memory']['total']:
if node_information["memory"]["provisioned"] > node_information["memory"]["total"]:
mem_provisioned_colour = ansiprint.yellow()
else:
mem_provisioned_colour = ''
mem_provisioned_colour = ""
return daemon_state_colour, coordinator_state_colour, domain_state_colour, mem_allocated_colour, mem_provisioned_colour
return (
daemon_state_colour,
coordinator_state_colour,
domain_state_colour,
mem_allocated_colour,
mem_provisioned_colour,
)
def format_info(node_information, long_output):
daemon_state_colour, coordinator_state_colour, domain_state_colour, mem_allocated_colour, mem_provisioned_colour = getOutputColours(node_information)
(
daemon_state_colour,
coordinator_state_colour,
domain_state_colour,
mem_allocated_colour,
mem_provisioned_colour,
) = getOutputColours(node_information)
# Format a nice output; do this line-by-line then concat the elements at the end
ainformation = []
# Basic information
ainformation.append('{}Name:{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['name']))
ainformation.append('{}PVC Version:{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['pvc_version']))
ainformation.append('{}Daemon State:{} {}{}{}'.format(ansiprint.purple(), ansiprint.end(), daemon_state_colour, node_information['daemon_state'], ansiprint.end()))
ainformation.append('{}Coordinator State:{} {}{}{}'.format(ansiprint.purple(), ansiprint.end(), coordinator_state_colour, node_information['coordinator_state'], ansiprint.end()))
ainformation.append('{}Domain State:{} {}{}{}'.format(ansiprint.purple(), ansiprint.end(), domain_state_colour, node_information['domain_state'], ansiprint.end()))
ainformation.append('{}Active VM Count:{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['domains_count']))
ainformation.append(
"{}Name:{} {}".format(
ansiprint.purple(), ansiprint.end(), node_information["name"]
)
)
ainformation.append(
"{}PVC Version:{} {}".format(
ansiprint.purple(), ansiprint.end(), node_information["pvc_version"]
)
)
ainformation.append(
"{}Daemon State:{} {}{}{}".format(
ansiprint.purple(),
ansiprint.end(),
daemon_state_colour,
node_information["daemon_state"],
ansiprint.end(),
)
)
ainformation.append(
"{}Coordinator State:{} {}{}{}".format(
ansiprint.purple(),
ansiprint.end(),
coordinator_state_colour,
node_information["coordinator_state"],
ansiprint.end(),
)
)
ainformation.append(
"{}Domain State:{} {}{}{}".format(
ansiprint.purple(),
ansiprint.end(),
domain_state_colour,
node_information["domain_state"],
ansiprint.end(),
)
)
ainformation.append(
"{}Active VM Count:{} {}".format(
ansiprint.purple(), ansiprint.end(), node_information["domains_count"]
)
)
if long_output:
ainformation.append('')
ainformation.append('{}Architecture:{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['arch']))
ainformation.append('{}Operating System:{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['os']))
ainformation.append('{}Kernel Version:{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['kernel']))
ainformation.append('')
ainformation.append('{}Host CPUs:{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['vcpu']['total']))
ainformation.append('{}vCPUs:{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['vcpu']['allocated']))
ainformation.append('{}Load:{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['load']))
ainformation.append('{}Total RAM (MiB):{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['memory']['total']))
ainformation.append('{}Used RAM (MiB):{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['memory']['used']))
ainformation.append('{}Free RAM (MiB):{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['memory']['free']))
ainformation.append('{}Allocated RAM (MiB):{} {}{}{}'.format(ansiprint.purple(), ansiprint.end(), mem_allocated_colour, node_information['memory']['allocated'], ansiprint.end()))
ainformation.append('{}Provisioned RAM (MiB):{} {}{}{}'.format(ansiprint.purple(), ansiprint.end(), mem_provisioned_colour, node_information['memory']['provisioned'], ansiprint.end()))
ainformation.append("")
ainformation.append(
"{}Architecture:{} {}".format(
ansiprint.purple(), ansiprint.end(), node_information["arch"]
)
)
ainformation.append(
"{}Operating System:{} {}".format(
ansiprint.purple(), ansiprint.end(), node_information["os"]
)
)
ainformation.append(
"{}Kernel Version:{} {}".format(
ansiprint.purple(), ansiprint.end(), node_information["kernel"]
)
)
ainformation.append("")
ainformation.append(
"{}Host CPUs:{} {}".format(
ansiprint.purple(), ansiprint.end(), node_information["vcpu"]["total"]
)
)
ainformation.append(
"{}vCPUs:{} {}".format(
ansiprint.purple(), ansiprint.end(), node_information["vcpu"]["allocated"]
)
)
ainformation.append(
"{}Load:{} {}".format(
ansiprint.purple(), ansiprint.end(), node_information["load"]
)
)
ainformation.append(
"{}Total RAM (MiB):{} {}".format(
ansiprint.purple(), ansiprint.end(), node_information["memory"]["total"]
)
)
ainformation.append(
"{}Used RAM (MiB):{} {}".format(
ansiprint.purple(), ansiprint.end(), node_information["memory"]["used"]
)
)
ainformation.append(
"{}Free RAM (MiB):{} {}".format(
ansiprint.purple(), ansiprint.end(), node_information["memory"]["free"]
)
)
ainformation.append(
"{}Allocated RAM (MiB):{} {}{}{}".format(
ansiprint.purple(),
ansiprint.end(),
mem_allocated_colour,
node_information["memory"]["allocated"],
ansiprint.end(),
)
)
ainformation.append(
"{}Provisioned RAM (MiB):{} {}{}{}".format(
ansiprint.purple(),
ansiprint.end(),
mem_provisioned_colour,
node_information["memory"]["provisioned"],
ansiprint.end(),
)
)
# Join it all together
ainformation.append('')
return '\n'.join(ainformation)
ainformation.append("")
return "\n".join(ainformation)
def format_list(node_list, raw):
if raw:
ainformation = list()
for node in sorted(item['name'] for item in node_list):
for node in sorted(item["name"] for item in node_list):
ainformation.append(node)
return '\n'.join(ainformation)
return "\n".join(ainformation)
node_list_output = []
@ -304,80 +410,126 @@ def format_list(node_list, raw):
mem_prov_length = 5
for node_information in node_list:
# node_name column
_node_name_length = len(node_information['name']) + 1
_node_name_length = len(node_information["name"]) + 1
if _node_name_length > node_name_length:
node_name_length = _node_name_length
# node_pvc_version column
_pvc_version_length = len(node_information.get('pvc_version', 'N/A')) + 1
_pvc_version_length = len(node_information.get("pvc_version", "N/A")) + 1
if _pvc_version_length > pvc_version_length:
pvc_version_length = _pvc_version_length
# daemon_state column
_daemon_state_length = len(node_information['daemon_state']) + 1
_daemon_state_length = len(node_information["daemon_state"]) + 1
if _daemon_state_length > daemon_state_length:
daemon_state_length = _daemon_state_length
# coordinator_state column
_coordinator_state_length = len(node_information['coordinator_state']) + 1
_coordinator_state_length = len(node_information["coordinator_state"]) + 1
if _coordinator_state_length > coordinator_state_length:
coordinator_state_length = _coordinator_state_length
# domain_state column
_domain_state_length = len(node_information['domain_state']) + 1
_domain_state_length = len(node_information["domain_state"]) + 1
if _domain_state_length > domain_state_length:
domain_state_length = _domain_state_length
# domains_count column
_domains_count_length = len(str(node_information['domains_count'])) + 1
_domains_count_length = len(str(node_information["domains_count"])) + 1
if _domains_count_length > domains_count_length:
domains_count_length = _domains_count_length
# cpu_count column
_cpu_count_length = len(str(node_information['cpu_count'])) + 1
_cpu_count_length = len(str(node_information["cpu_count"])) + 1
if _cpu_count_length > cpu_count_length:
cpu_count_length = _cpu_count_length
# load column
_load_length = len(str(node_information['load'])) + 1
_load_length = len(str(node_information["load"])) + 1
if _load_length > load_length:
load_length = _load_length
# mem_total column
_mem_total_length = len(str(node_information['memory']['total'])) + 1
_mem_total_length = len(str(node_information["memory"]["total"])) + 1
if _mem_total_length > mem_total_length:
mem_total_length = _mem_total_length
# mem_used column
_mem_used_length = len(str(node_information['memory']['used'])) + 1
_mem_used_length = len(str(node_information["memory"]["used"])) + 1
if _mem_used_length > mem_used_length:
mem_used_length = _mem_used_length
# mem_free column
_mem_free_length = len(str(node_information['memory']['free'])) + 1
_mem_free_length = len(str(node_information["memory"]["free"])) + 1
if _mem_free_length > mem_free_length:
mem_free_length = _mem_free_length
# mem_alloc column
_mem_alloc_length = len(str(node_information['memory']['allocated'])) + 1
_mem_alloc_length = len(str(node_information["memory"]["allocated"])) + 1
if _mem_alloc_length > mem_alloc_length:
mem_alloc_length = _mem_alloc_length
# mem_prov column
_mem_prov_length = len(str(node_information['memory']['provisioned'])) + 1
_mem_prov_length = len(str(node_information["memory"]["provisioned"])) + 1
if _mem_prov_length > mem_prov_length:
mem_prov_length = _mem_prov_length
# Format the string (header)
node_list_output.append(
'{bold}{node_header: <{node_header_length}} {state_header: <{state_header_length}} {resource_header: <{resource_header_length}} {memory_header: <{memory_header_length}}{end_bold}'.format(
"{bold}{node_header: <{node_header_length}} {state_header: <{state_header_length}} {resource_header: <{resource_header_length}} {memory_header: <{memory_header_length}}{end_bold}".format(
node_header_length=node_name_length + pvc_version_length + 1,
state_header_length=daemon_state_length + coordinator_state_length + domain_state_length + 2,
resource_header_length=domains_count_length + cpu_count_length + load_length + 2,
memory_header_length=mem_total_length + mem_used_length + mem_free_length + mem_alloc_length + mem_prov_length + 4,
state_header_length=daemon_state_length
+ coordinator_state_length
+ domain_state_length
+ 2,
resource_header_length=domains_count_length
+ cpu_count_length
+ load_length
+ 2,
memory_header_length=mem_total_length
+ mem_used_length
+ mem_free_length
+ mem_alloc_length
+ mem_prov_length
+ 4,
bold=ansiprint.bold(),
end_bold=ansiprint.end(),
node_header='Nodes ' + ''.join(['-' for _ in range(6, node_name_length + pvc_version_length)]),
state_header='States ' + ''.join(['-' for _ in range(7, daemon_state_length + coordinator_state_length + domain_state_length + 1)]),
resource_header='Resources ' + ''.join(['-' for _ in range(10, domains_count_length + cpu_count_length + load_length + 1)]),
memory_header='Memory (M) ' + ''.join(['-' for _ in range(11, mem_total_length + mem_used_length + mem_free_length + mem_alloc_length + mem_prov_length + 3)])
node_header="Nodes "
+ "".join(["-" for _ in range(6, node_name_length + pvc_version_length)]),
state_header="States "
+ "".join(
[
"-"
for _ in range(
7,
daemon_state_length
+ coordinator_state_length
+ domain_state_length
+ 1,
)
]
),
resource_header="Resources "
+ "".join(
[
"-"
for _ in range(
10, domains_count_length + cpu_count_length + load_length + 1
)
]
),
memory_header="Memory (M) "
+ "".join(
[
"-"
for _ in range(
11,
mem_total_length
+ mem_used_length
+ mem_free_length
+ mem_alloc_length
+ mem_prov_length
+ 3,
)
]
),
)
)
node_list_output.append(
'{bold}{node_name: <{node_name_length}} {node_pvc_version: <{pvc_version_length}} \
"{bold}{node_name: <{node_name_length}} {node_pvc_version: <{pvc_version_length}} \
{daemon_state_colour}{node_daemon_state: <{daemon_state_length}}{end_colour} {coordinator_state_colour}{node_coordinator_state: <{coordinator_state_length}}{end_colour} {domain_state_colour}{node_domain_state: <{domain_state_length}}{end_colour} \
{node_domains_count: <{domains_count_length}} {node_cpu_count: <{cpu_count_length}} {node_load: <{load_length}} \
{node_mem_total: <{mem_total_length}} {node_mem_used: <{mem_used_length}} {node_mem_free: <{mem_free_length}} {node_mem_allocated: <{mem_alloc_length}} {node_mem_provisioned: <{mem_prov_length}}{end_bold}'.format(
{node_mem_total: <{mem_total_length}} {node_mem_used: <{mem_used_length}} {node_mem_free: <{mem_free_length}} {node_mem_allocated: <{mem_alloc_length}} {node_mem_provisioned: <{mem_prov_length}}{end_bold}".format(
node_name_length=node_name_length,
pvc_version_length=pvc_version_length,
daemon_state_length=daemon_state_length,
@ -393,34 +545,40 @@ def format_list(node_list, raw):
mem_prov_length=mem_prov_length,
bold=ansiprint.bold(),
end_bold=ansiprint.end(),
daemon_state_colour='',
coordinator_state_colour='',
domain_state_colour='',
end_colour='',
node_name='Name',
node_pvc_version='Version',
node_daemon_state='Daemon',
node_coordinator_state='Coordinator',
node_domain_state='Domain',
node_domains_count='VMs',
node_cpu_count='vCPUs',
node_load='Load',
node_mem_total='Total',
node_mem_used='Used',
node_mem_free='Free',
node_mem_allocated='Alloc',
node_mem_provisioned='Prov'
daemon_state_colour="",
coordinator_state_colour="",
domain_state_colour="",
end_colour="",
node_name="Name",
node_pvc_version="Version",
node_daemon_state="Daemon",
node_coordinator_state="Coordinator",
node_domain_state="Domain",
node_domains_count="VMs",
node_cpu_count="vCPUs",
node_load="Load",
node_mem_total="Total",
node_mem_used="Used",
node_mem_free="Free",
node_mem_allocated="Alloc",
node_mem_provisioned="Prov",
)
)
# Format the string (elements)
for node_information in sorted(node_list, key=lambda n: n['name']):
daemon_state_colour, coordinator_state_colour, domain_state_colour, mem_allocated_colour, mem_provisioned_colour = getOutputColours(node_information)
for node_information in sorted(node_list, key=lambda n: n["name"]):
(
daemon_state_colour,
coordinator_state_colour,
domain_state_colour,
mem_allocated_colour,
mem_provisioned_colour,
) = getOutputColours(node_information)
node_list_output.append(
'{bold}{node_name: <{node_name_length}} {node_pvc_version: <{pvc_version_length}} \
"{bold}{node_name: <{node_name_length}} {node_pvc_version: <{pvc_version_length}} \
{daemon_state_colour}{node_daemon_state: <{daemon_state_length}}{end_colour} {coordinator_state_colour}{node_coordinator_state: <{coordinator_state_length}}{end_colour} {domain_state_colour}{node_domain_state: <{domain_state_length}}{end_colour} \
{node_domains_count: <{domains_count_length}} {node_cpu_count: <{cpu_count_length}} {node_load: <{load_length}} \
{node_mem_total: <{mem_total_length}} {node_mem_used: <{mem_used_length}} {node_mem_free: <{mem_free_length}} {mem_allocated_colour}{node_mem_allocated: <{mem_alloc_length}}{end_colour} {mem_provisioned_colour}{node_mem_provisioned: <{mem_prov_length}}{end_colour}{end_bold}'.format(
{node_mem_total: <{mem_total_length}} {node_mem_used: <{mem_used_length}} {node_mem_free: <{mem_free_length}} {mem_allocated_colour}{node_mem_allocated: <{mem_alloc_length}}{end_colour} {mem_provisioned_colour}{node_mem_provisioned: <{mem_prov_length}}{end_colour}{end_bold}".format(
node_name_length=node_name_length,
pvc_version_length=pvc_version_length,
daemon_state_length=daemon_state_length,
@ -434,28 +592,28 @@ def format_list(node_list, raw):
mem_free_length=mem_free_length,
mem_alloc_length=mem_alloc_length,
mem_prov_length=mem_prov_length,
bold='',
end_bold='',
bold="",
end_bold="",
daemon_state_colour=daemon_state_colour,
coordinator_state_colour=coordinator_state_colour,
domain_state_colour=domain_state_colour,
mem_allocated_colour=mem_allocated_colour,
mem_provisioned_colour=mem_allocated_colour,
end_colour=ansiprint.end(),
node_name=node_information['name'],
node_pvc_version=node_information.get('pvc_version', 'N/A'),
node_daemon_state=node_information['daemon_state'],
node_coordinator_state=node_information['coordinator_state'],
node_domain_state=node_information['domain_state'],
node_domains_count=node_information['domains_count'],
node_cpu_count=node_information['vcpu']['allocated'],
node_load=node_information['load'],
node_mem_total=node_information['memory']['total'],
node_mem_used=node_information['memory']['used'],
node_mem_free=node_information['memory']['free'],
node_mem_allocated=node_information['memory']['allocated'],
node_mem_provisioned=node_information['memory']['provisioned']
node_name=node_information["name"],
node_pvc_version=node_information.get("pvc_version", "N/A"),
node_daemon_state=node_information["daemon_state"],
node_coordinator_state=node_information["coordinator_state"],
node_domain_state=node_information["domain_state"],
node_domains_count=node_information["domains_count"],
node_cpu_count=node_information["vcpu"]["allocated"],
node_load=node_information["load"],
node_mem_total=node_information["memory"]["total"],
node_mem_used=node_information["memory"]["used"],
node_mem_free=node_information["memory"]["free"],
node_mem_allocated=node_information["memory"]["allocated"],
node_mem_provisioned=node_information["memory"]["provisioned"],
)
)
return '\n'.join(node_list_output)
return "\n".join(node_list_output)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -45,7 +45,7 @@ def deletekey(zk_conn, key, recursive=True):
# Data read function
def readdata(zk_conn, key):
data_raw = zk_conn.get(key)
data = data_raw[0].decode('utf8')
data = data_raw[0].decode("utf8")
return data
@ -61,7 +61,7 @@ def writedata(zk_conn, kv):
# Check if this key already exists or not
if not zk_conn.exists(key):
# We're creating a new key
zk_transaction.create(key, str(data).encode('utf8'))
zk_transaction.create(key, str(data).encode("utf8"))
else:
# We're updating a key with version validation
orig_data = zk_conn.get(key)
@ -71,7 +71,7 @@ def writedata(zk_conn, kv):
new_version = version + 1
# Update the data
zk_transaction.set_data(key, str(data).encode('utf8'))
zk_transaction.set_data(key, str(data).encode("utf8"))
# Set up the check
try:
@ -91,12 +91,12 @@ def writedata(zk_conn, kv):
# Write lock function
def writelock(zk_conn, key):
lock_id = str(uuid.uuid1())
lock = zk_conn.WriteLock('{}'.format(key), lock_id)
lock = zk_conn.WriteLock("{}".format(key), lock_id)
return lock
# Read lock function
def readlock(zk_conn, key):
lock_id = str(uuid.uuid1())
lock = zk_conn.ReadLock('{}'.format(key), lock_id)
lock = zk_conn.ReadLock("{}".format(key), lock_id)
return lock

File diff suppressed because it is too large Load Diff

View File

@ -1,20 +1,20 @@
from setuptools import setup
setup(
name='pvc',
version='0.9.42',
packages=['pvc', 'pvc.cli_lib'],
name="pvc",
version="0.9.42",
packages=["pvc", "pvc.cli_lib"],
install_requires=[
'Click',
'PyYAML',
'lxml',
'colorama',
'requests',
'requests-toolbelt'
"Click",
"PyYAML",
"lxml",
"colorama",
"requests",
"requests-toolbelt",
],
entry_points={
'console_scripts': [
'pvc = pvc.pvc:cli',
"console_scripts": [
"pvc = pvc.pvc:cli",
],
},
)

File diff suppressed because it is too large Load Diff

View File

@ -29,28 +29,24 @@ import daemon_lib.ceph as pvc_ceph
def set_maintenance(zkhandler, maint_state):
current_maint_state = zkhandler.read('base.config.maintenance')
current_maint_state = zkhandler.read("base.config.maintenance")
if maint_state == current_maint_state:
if maint_state == 'true':
return True, 'Cluster is already in maintenance mode'
if maint_state == "true":
return True, "Cluster is already in maintenance mode"
else:
return True, 'Cluster is already in normal mode'
return True, "Cluster is already in normal mode"
if maint_state == 'true':
zkhandler.write([
('base.config.maintenance', 'true')
])
return True, 'Successfully set cluster in maintenance mode'
if maint_state == "true":
zkhandler.write([("base.config.maintenance", "true")])
return True, "Successfully set cluster in maintenance mode"
else:
zkhandler.write([
('base.config.maintenance', 'false')
])
return True, 'Successfully set cluster in normal mode'
zkhandler.write([("base.config.maintenance", "false")])
return True, "Successfully set cluster in normal mode"
def getClusterInformation(zkhandler):
# Get cluster maintenance state
maint_state = zkhandler.read('base.config.maintenance')
maint_state = zkhandler.read("base.config.maintenance")
# List of messages to display to the clients
cluster_health_msg = []
@ -69,7 +65,9 @@ def getClusterInformation(zkhandler):
retcode, ceph_osd_list = pvc_ceph.get_list_osd(zkhandler, None)
retcode, ceph_pool_list = pvc_ceph.get_list_pool(zkhandler, None)
retcode, ceph_volume_list = pvc_ceph.get_list_volume(zkhandler, None, None)
retcode, ceph_snapshot_list = pvc_ceph.get_list_snapshot(zkhandler, None, None, None)
retcode, ceph_snapshot_list = pvc_ceph.get_list_snapshot(
zkhandler, None, None, None
)
# Determine, for each subsection, the total count
node_count = len(node_list)
@ -91,8 +89,8 @@ def getClusterInformation(zkhandler):
node_largest_index = None
node_largest_count = 0
for index, node in enumerate(node_list):
node_mem_total = node['memory']['total']
node_mem_alloc = node['memory']['allocated']
node_mem_total = node["memory"]["total"]
node_mem_alloc = node["memory"]["allocated"]
alloc_total += node_mem_alloc
# Determine if this node is the largest seen so far
@ -105,32 +103,42 @@ def getClusterInformation(zkhandler):
continue
n_minus_1_node_list.append(node)
for index, node in enumerate(n_minus_1_node_list):
n_minus_1_total += node['memory']['total']
n_minus_1_total += node["memory"]["total"]
if alloc_total > n_minus_1_total:
cluster_healthy_status = False
cluster_health_msg.append("Total VM memory ({}) is overprovisioned (max {}) for (n-1) failure scenarios".format(alloc_total, n_minus_1_total))
cluster_health_msg.append(
"Total VM memory ({}) is overprovisioned (max {}) for (n-1) failure scenarios".format(
alloc_total, n_minus_1_total
)
)
# Determinations for node health
node_healthy_status = list(range(0, node_count))
node_report_status = list(range(0, node_count))
for index, node in enumerate(node_list):
daemon_state = node['daemon_state']
domain_state = node['domain_state']
if daemon_state != 'run' and domain_state != 'ready':
daemon_state = node["daemon_state"]
domain_state = node["domain_state"]
if daemon_state != "run" and domain_state != "ready":
node_healthy_status[index] = False
cluster_health_msg.append("Node '{}' in {},{} state".format(node['name'], daemon_state, domain_state))
cluster_health_msg.append(
"Node '{}' in {},{} state".format(
node["name"], daemon_state, domain_state
)
)
else:
node_healthy_status[index] = True
node_report_status[index] = daemon_state + ',' + domain_state
node_report_status[index] = daemon_state + "," + domain_state
# Determinations for VM health
vm_healthy_status = list(range(0, vm_count))
vm_report_status = list(range(0, vm_count))
for index, vm in enumerate(vm_list):
vm_state = vm['state']
if vm_state not in ['start', 'disable', 'migrate', 'unmigrate', 'provision']:
vm_state = vm["state"]
if vm_state not in ["start", "disable", "migrate", "unmigrate", "provision"]:
vm_healthy_status[index] = False
cluster_health_msg.append("VM '{}' in {} state".format(vm['name'], vm_state))
cluster_health_msg.append(
"VM '{}' in {} state".format(vm["name"], vm_state)
)
else:
vm_healthy_status[index] = True
vm_report_status[index] = vm_state
@ -140,70 +148,99 @@ def getClusterInformation(zkhandler):
ceph_osd_report_status = list(range(0, ceph_osd_count))
for index, ceph_osd in enumerate(ceph_osd_list):
try:
ceph_osd_up = ceph_osd['stats']['up']
ceph_osd_up = ceph_osd["stats"]["up"]
except KeyError:
ceph_osd_up = 0
try:
ceph_osd_in = ceph_osd['stats']['in']
ceph_osd_in = ceph_osd["stats"]["in"]
except KeyError:
ceph_osd_in = 0
up_texts = {1: 'up', 0: 'down'}
in_texts = {1: 'in', 0: 'out'}
up_texts = {1: "up", 0: "down"}
in_texts = {1: "in", 0: "out"}
if not ceph_osd_up or not ceph_osd_in:
ceph_osd_healthy_status[index] = False
cluster_health_msg.append('OSD {} in {},{} state'.format(ceph_osd['id'], up_texts[ceph_osd_up], in_texts[ceph_osd_in]))
cluster_health_msg.append(
"OSD {} in {},{} state".format(
ceph_osd["id"], up_texts[ceph_osd_up], in_texts[ceph_osd_in]
)
)
else:
ceph_osd_healthy_status[index] = True
ceph_osd_report_status[index] = up_texts[ceph_osd_up] + ',' + in_texts[ceph_osd_in]
ceph_osd_report_status[index] = (
up_texts[ceph_osd_up] + "," + in_texts[ceph_osd_in]
)
# Find out the overall cluster health; if any element of a healthy_status is false, it's unhealthy
if maint_state == 'true':
cluster_health = 'Maintenance'
elif cluster_healthy_status is False or False in node_healthy_status or False in vm_healthy_status or False in ceph_osd_healthy_status:
cluster_health = 'Degraded'
if maint_state == "true":
cluster_health = "Maintenance"
elif (
cluster_healthy_status is False
or False in node_healthy_status
or False in vm_healthy_status
or False in ceph_osd_healthy_status
):
cluster_health = "Degraded"
else:
cluster_health = 'Optimal'
cluster_health = "Optimal"
# Find out our storage health from Ceph
ceph_status = zkhandler.read('base.storage').split('\n')
ceph_status = zkhandler.read("base.storage").split("\n")
ceph_health = ceph_status[2].split()[-1]
# Parse the status output to get the health indicators
line_record = False
for index, line in enumerate(ceph_status):
if re.search('services:', line):
if re.search("services:", line):
line_record = False
if line_record and len(line.strip()) > 0:
storage_health_msg.append(line.strip())
if re.search('health:', line):
if re.search("health:", line):
line_record = True
if maint_state == 'true':
storage_health = 'Maintenance'
elif ceph_health != 'HEALTH_OK':
storage_health = 'Degraded'
if maint_state == "true":
storage_health = "Maintenance"
elif ceph_health != "HEALTH_OK":
storage_health = "Degraded"
else:
storage_health = 'Optimal'
storage_health = "Optimal"
# State lists
node_state_combinations = [
'run,ready', 'run,flush', 'run,flushed', 'run,unflush',
'init,ready', 'init,flush', 'init,flushed', 'init,unflush',
'stop,ready', 'stop,flush', 'stop,flushed', 'stop,unflush',
'dead,ready', 'dead,flush', 'dead,flushed', 'dead,unflush'
"run,ready",
"run,flush",
"run,flushed",
"run,unflush",
"init,ready",
"init,flush",
"init,flushed",
"init,unflush",
"stop,ready",
"stop,flush",
"stop,flushed",
"stop,unflush",
"dead,ready",
"dead,flush",
"dead,flushed",
"dead,unflush",
]
vm_state_combinations = [
'start', 'restart', 'shutdown', 'stop', 'disable', 'fail', 'migrate', 'unmigrate', 'provision'
]
ceph_osd_state_combinations = [
'up,in', 'up,out', 'down,in', 'down,out'
"start",
"restart",
"shutdown",
"stop",
"disable",
"fail",
"migrate",
"unmigrate",
"provision",
]
ceph_osd_state_combinations = ["up,in", "up,out", "down,in", "down,out"]
# Format the Node states
formatted_node_states = {'total': node_count}
formatted_node_states = {"total": node_count}
for state in node_state_combinations:
state_count = 0
for node_state in node_report_status:
@ -213,7 +250,7 @@ def getClusterInformation(zkhandler):
formatted_node_states[state] = state_count
# Format the VM states
formatted_vm_states = {'total': vm_count}
formatted_vm_states = {"total": vm_count}
for state in vm_state_combinations:
state_count = 0
for vm_state in vm_report_status:
@ -223,7 +260,7 @@ def getClusterInformation(zkhandler):
formatted_vm_states[state] = state_count
# Format the OSD states
formatted_osd_states = {'total': ceph_osd_count}
formatted_osd_states = {"total": ceph_osd_count}
for state in ceph_osd_state_combinations:
state_count = 0
for ceph_osd_state in ceph_osd_report_status:
@ -234,19 +271,19 @@ def getClusterInformation(zkhandler):
# Format the status data
cluster_information = {
'health': cluster_health,
'health_msg': cluster_health_msg,
'storage_health': storage_health,
'storage_health_msg': storage_health_msg,
'primary_node': common.getPrimaryNode(zkhandler),
'upstream_ip': zkhandler.read('base.config.upstream_ip'),
'nodes': formatted_node_states,
'vms': formatted_vm_states,
'networks': network_count,
'osds': formatted_osd_states,
'pools': ceph_pool_count,
'volumes': ceph_volume_count,
'snapshots': ceph_snapshot_count
"health": cluster_health,
"health_msg": cluster_health_msg,
"storage_health": storage_health,
"storage_health_msg": storage_health_msg,
"primary_node": common.getPrimaryNode(zkhandler),
"upstream_ip": zkhandler.read("base.config.upstream_ip"),
"nodes": formatted_node_states,
"vms": formatted_vm_states,
"networks": network_count,
"osds": formatted_osd_states,
"pools": ceph_pool_count,
"volumes": ceph_volume_count,
"snapshots": ceph_snapshot_count,
}
return cluster_information
@ -258,29 +295,32 @@ def get_info(zkhandler):
if cluster_information:
return True, cluster_information
else:
return False, 'ERROR: Failed to obtain cluster information!'
return False, "ERROR: Failed to obtain cluster information!"
def cluster_initialize(zkhandler, overwrite=False):
# Abort if we've initialized the cluster before
if zkhandler.exists('base.config.primary_node') and not overwrite:
return False, 'ERROR: Cluster contains data and overwrite not set.'
if zkhandler.exists("base.config.primary_node") and not overwrite:
return False, "ERROR: Cluster contains data and overwrite not set."
if overwrite:
# Delete the existing keys
for key in zkhandler.schema.keys('base'):
if key == 'root':
for key in zkhandler.schema.keys("base"):
if key == "root":
# Don't delete the root key
continue
status = zkhandler.delete('base.{}'.format(key), recursive=True)
status = zkhandler.delete("base.{}".format(key), recursive=True)
if not status:
return False, 'ERROR: Failed to delete data in cluster; running nodes perhaps?'
return (
False,
"ERROR: Failed to delete data in cluster; running nodes perhaps?",
)
# Create the root keys
zkhandler.schema.apply(zkhandler)
return True, 'Successfully initialized cluster'
return True, "Successfully initialized cluster"
def cluster_backup(zkhandler):
@ -294,25 +334,25 @@ def cluster_backup(zkhandler):
cluster_data[path] = data
if children:
if path == '/':
child_prefix = '/'
if path == "/":
child_prefix = "/"
else:
child_prefix = path + '/'
child_prefix = path + "/"
for child in children:
if child_prefix + child == '/zookeeper':
if child_prefix + child == "/zookeeper":
# We must skip the built-in /zookeeper tree
continue
if child_prefix + child == '/patroni':
if child_prefix + child == "/patroni":
# We must skip the /patroni tree
continue
get_data(child_prefix + child)
try:
get_data('/')
get_data("/")
except Exception as e:
return False, 'ERROR: Failed to obtain backup: {}'.format(e)
return False, "ERROR: Failed to obtain backup: {}".format(e)
return True, cluster_data
@ -322,18 +362,23 @@ def cluster_restore(zkhandler, cluster_data):
kv = []
schema_version = None
for key in cluster_data:
if key == zkhandler.schema.path('base.schema.version'):
if key == zkhandler.schema.path("base.schema.version"):
schema_version = cluster_data[key]
data = cluster_data[key]
kv.append((key, data))
if int(schema_version) != int(zkhandler.schema.version):
return False, 'ERROR: Schema version of backup ({}) does not match cluster schema version ({}).'.format(schema_version, zkhandler.schema.version)
return (
False,
"ERROR: Schema version of backup ({}) does not match cluster schema version ({}).".format(
schema_version, zkhandler.schema.version
),
)
# Close the Zookeeper connection
result = zkhandler.write(kv)
if result:
return True, 'Restore completed successfully.'
return True, "Restore completed successfully."
else:
return False, 'Restore failed.'
return False, "Restore failed."

View File

@ -40,8 +40,8 @@ from functools import wraps
# Get performance statistics on a function or class
class Profiler(object):
def __init__(self, config):
self.is_debug = config['debug']
self.pvc_logdir = '/var/log/pvc'
self.is_debug = config["debug"]
self.pvc_logdir = "/var/log/pvc"
def __call__(self, function):
if not callable(function):
@ -58,11 +58,15 @@ class Profiler(object):
from datetime import datetime
if not path.exists(self.pvc_logdir):
print('Profiler: Requested profiling of {} but no log dir present; printing instead.'.format(str(function.__name__)))
print(
"Profiler: Requested profiling of {} but no log dir present; printing instead.".format(
str(function.__name__)
)
)
log_result = False
else:
log_result = True
profiler_logdir = '{}/profiler'.format(self.pvc_logdir)
profiler_logdir = "{}/profiler".format(self.pvc_logdir)
if not path.exists(profiler_logdir):
makedirs(profiler_logdir)
@ -76,12 +80,23 @@ class Profiler(object):
stats.sort_stats(pstats.SortKey.TIME)
if log_result:
stats.dump_stats(filename='{}/{}_{}.log'.format(profiler_logdir, str(function.__name__), str(datetime.now()).replace(' ', '_')))
stats.dump_stats(
filename="{}/{}_{}.log".format(
profiler_logdir,
str(function.__name__),
str(datetime.now()).replace(" ", "_"),
)
)
else:
print('Profiler stats for function {} at {}:'.format(str(function.__name__), str(datetime.now())))
print(
"Profiler stats for function {} at {}:".format(
str(function.__name__), str(datetime.now())
)
)
stats.print_stats()
return ret
return profiler_wrapper
@ -97,7 +112,7 @@ class OSDaemon(object):
command = shlex_split(command_string)
# Set stdout to be a logfile if set
if logfile:
stdout = open(logfile, 'a')
stdout = open(logfile, "a")
else:
stdout = subprocess.PIPE
@ -112,10 +127,10 @@ class OSDaemon(object):
# Signal the process
def signal(self, sent_signal):
signal_map = {
'hup': signal.SIGHUP,
'int': signal.SIGINT,
'term': signal.SIGTERM,
'kill': signal.SIGKILL
"hup": signal.SIGHUP,
"int": signal.SIGINT,
"term": signal.SIGTERM,
"kill": signal.SIGKILL,
}
self.proc.send_signal(signal_map[sent_signal])
@ -131,6 +146,7 @@ def run_os_daemon(command_string, environment=None, logfile=None):
def run_os_command(command_string, background=False, environment=None, timeout=None):
command = shlex_split(command_string)
if background:
def runcmd():
try:
subprocess.run(
@ -142,6 +158,7 @@ def run_os_command(command_string, background=False, environment=None, timeout=N
)
except subprocess.TimeoutExpired:
pass
thread = Thread(target=runcmd, args=())
thread.start()
return 0, None, None
@ -161,13 +178,13 @@ def run_os_command(command_string, background=False, environment=None, timeout=N
retcode = 255
try:
stdout = command_output.stdout.decode('ascii')
stdout = command_output.stdout.decode("ascii")
except Exception:
stdout = ''
stdout = ""
try:
stderr = command_output.stderr.decode('ascii')
stderr = command_output.stderr.decode("ascii")
except Exception:
stderr = ''
stderr = ""
return retcode, stdout, stderr
@ -187,7 +204,7 @@ def validateUUID(dom_uuid):
#
def getDomainXML(zkhandler, dom_uuid):
try:
xml = zkhandler.read(('domain.xml', dom_uuid))
xml = zkhandler.read(("domain.xml", dom_uuid))
except Exception:
return None
@ -208,16 +225,20 @@ def getDomainMainDetails(parsed_xml):
ddescription = "N/A"
dname = str(parsed_xml.name)
dmemory = str(parsed_xml.memory)
dmemory_unit = str(parsed_xml.memory.attrib.get('unit'))
if dmemory_unit == 'KiB':
dmemory_unit = str(parsed_xml.memory.attrib.get("unit"))
if dmemory_unit == "KiB":
dmemory = int(int(dmemory) / 1024)
elif dmemory_unit == 'GiB':
elif dmemory_unit == "GiB":
dmemory = int(int(dmemory) * 1024)
dvcpu = str(parsed_xml.vcpu)
try:
dvcputopo = '{}/{}/{}'.format(parsed_xml.cpu.topology.attrib.get('sockets'), parsed_xml.cpu.topology.attrib.get('cores'), parsed_xml.cpu.topology.attrib.get('threads'))
dvcputopo = "{}/{}/{}".format(
parsed_xml.cpu.topology.attrib.get("sockets"),
parsed_xml.cpu.topology.attrib.get("cores"),
parsed_xml.cpu.topology.attrib.get("threads"),
)
except Exception:
dvcputopo = 'N/A'
dvcputopo = "N/A"
return duuid, dname, ddescription, dmemory, dvcpu, dvcputopo
@ -227,9 +248,9 @@ def getDomainMainDetails(parsed_xml):
#
def getDomainExtraDetails(parsed_xml):
dtype = str(parsed_xml.os.type)
darch = str(parsed_xml.os.type.attrib['arch'])
dmachine = str(parsed_xml.os.type.attrib['machine'])
dconsole = str(parsed_xml.devices.console.attrib['type'])
darch = str(parsed_xml.os.type.attrib["arch"])
dmachine = str(parsed_xml.os.type.attrib["machine"])
dconsole = str(parsed_xml.devices.console.attrib["type"])
demulator = str(parsed_xml.devices.emulator)
return dtype, darch, dmachine, dconsole, demulator
@ -255,37 +276,41 @@ def getDomainCPUFeatures(parsed_xml):
def getDomainDisks(parsed_xml, stats_data):
ddisks = []
for device in parsed_xml.devices.getchildren():
if device.tag == 'disk':
if device.tag == "disk":
disk_attrib = device.source.attrib
disk_target = device.target.attrib
disk_type = device.attrib.get('type')
disk_stats_list = [x for x in stats_data.get('disk_stats', []) if x.get('name') == disk_attrib.get('name')]
disk_type = device.attrib.get("type")
disk_stats_list = [
x
for x in stats_data.get("disk_stats", [])
if x.get("name") == disk_attrib.get("name")
]
try:
disk_stats = disk_stats_list[0]
except Exception:
disk_stats = {}
if disk_type == 'network':
if disk_type == "network":
disk_obj = {
'type': disk_attrib.get('protocol'),
'name': disk_attrib.get('name'),
'dev': disk_target.get('dev'),
'bus': disk_target.get('bus'),
'rd_req': disk_stats.get('rd_req', 0),
'rd_bytes': disk_stats.get('rd_bytes', 0),
'wr_req': disk_stats.get('wr_req', 0),
'wr_bytes': disk_stats.get('wr_bytes', 0)
"type": disk_attrib.get("protocol"),
"name": disk_attrib.get("name"),
"dev": disk_target.get("dev"),
"bus": disk_target.get("bus"),
"rd_req": disk_stats.get("rd_req", 0),
"rd_bytes": disk_stats.get("rd_bytes", 0),
"wr_req": disk_stats.get("wr_req", 0),
"wr_bytes": disk_stats.get("wr_bytes", 0),
}
elif disk_type == 'file':
elif disk_type == "file":
disk_obj = {
'type': 'file',
'name': disk_attrib.get('file'),
'dev': disk_target.get('dev'),
'bus': disk_target.get('bus'),
'rd_req': disk_stats.get('rd_req', 0),
'rd_bytes': disk_stats.get('rd_bytes', 0),
'wr_req': disk_stats.get('wr_req', 0),
'wr_bytes': disk_stats.get('wr_bytes', 0)
"type": "file",
"name": disk_attrib.get("file"),
"dev": disk_target.get("dev"),
"bus": disk_target.get("bus"),
"rd_req": disk_stats.get("rd_req", 0),
"rd_bytes": disk_stats.get("rd_bytes", 0),
"wr_req": disk_stats.get("wr_req", 0),
"wr_bytes": disk_stats.get("wr_bytes", 0),
}
else:
disk_obj = {}
@ -300,8 +325,8 @@ def getDomainDisks(parsed_xml, stats_data):
def getDomainDiskList(zkhandler, dom_uuid):
domain_information = getInformationFromXML(zkhandler, dom_uuid)
disk_list = []
for disk in domain_information['disks']:
disk_list.append(disk['name'])
for disk in domain_information["disks"]:
disk_list.append(disk["name"])
return disk_list
@ -317,10 +342,14 @@ def getDomainTags(zkhandler, dom_uuid):
"""
tags = list()
for tag in zkhandler.children(('domain.meta.tags', dom_uuid)):
tag_type = zkhandler.read(('domain.meta.tags', dom_uuid, 'tag.type', tag))
protected = bool(strtobool(zkhandler.read(('domain.meta.tags', dom_uuid, 'tag.protected', tag))))
tags.append({'name': tag, 'type': tag_type, 'protected': protected})
for tag in zkhandler.children(("domain.meta.tags", dom_uuid)):
tag_type = zkhandler.read(("domain.meta.tags", dom_uuid, "tag.type", tag))
protected = bool(
strtobool(
zkhandler.read(("domain.meta.tags", dom_uuid, "tag.protected", tag))
)
)
tags.append({"name": tag, "type": tag_type, "protected": protected})
return tags
@ -334,20 +363,25 @@ def getDomainMetadata(zkhandler, dom_uuid):
The UUID must be validated before calling this function!
"""
domain_node_limit = zkhandler.read(('domain.meta.node_limit', dom_uuid))
domain_node_selector = zkhandler.read(('domain.meta.node_selector', dom_uuid))
domain_node_autostart = zkhandler.read(('domain.meta.autostart', dom_uuid))
domain_migration_method = zkhandler.read(('domain.meta.migrate_method', dom_uuid))
domain_node_limit = zkhandler.read(("domain.meta.node_limit", dom_uuid))
domain_node_selector = zkhandler.read(("domain.meta.node_selector", dom_uuid))
domain_node_autostart = zkhandler.read(("domain.meta.autostart", dom_uuid))
domain_migration_method = zkhandler.read(("domain.meta.migrate_method", dom_uuid))
if not domain_node_limit:
domain_node_limit = None
else:
domain_node_limit = domain_node_limit.split(',')
domain_node_limit = domain_node_limit.split(",")
if not domain_node_autostart:
domain_node_autostart = None
return domain_node_limit, domain_node_selector, domain_node_autostart, domain_migration_method
return (
domain_node_limit,
domain_node_selector,
domain_node_autostart,
domain_migration_method,
)
#
@ -358,25 +392,30 @@ def getInformationFromXML(zkhandler, uuid):
Gather information about a VM from the Libvirt XML configuration in the Zookeper database
and return a dict() containing it.
"""
domain_state = zkhandler.read(('domain.state', uuid))
domain_node = zkhandler.read(('domain.node', uuid))
domain_lastnode = zkhandler.read(('domain.last_node', uuid))
domain_failedreason = zkhandler.read(('domain.failed_reason', uuid))
domain_state = zkhandler.read(("domain.state", uuid))
domain_node = zkhandler.read(("domain.node", uuid))
domain_lastnode = zkhandler.read(("domain.last_node", uuid))
domain_failedreason = zkhandler.read(("domain.failed_reason", uuid))
domain_node_limit, domain_node_selector, domain_node_autostart, domain_migration_method = getDomainMetadata(zkhandler, uuid)
(
domain_node_limit,
domain_node_selector,
domain_node_autostart,
domain_migration_method,
) = getDomainMetadata(zkhandler, uuid)
domain_tags = getDomainTags(zkhandler, uuid)
domain_profile = zkhandler.read(('domain.profile', uuid))
domain_profile = zkhandler.read(("domain.profile", uuid))
domain_vnc = zkhandler.read(('domain.console.vnc', uuid))
domain_vnc = zkhandler.read(("domain.console.vnc", uuid))
if domain_vnc:
domain_vnc_listen, domain_vnc_port = domain_vnc.split(':')
domain_vnc_listen, domain_vnc_port = domain_vnc.split(":")
else:
domain_vnc_listen = 'None'
domain_vnc_port = 'None'
domain_vnc_listen = "None"
domain_vnc_port = "None"
parsed_xml = getDomainXML(zkhandler, uuid)
stats_data = zkhandler.read(('domain.stats', uuid))
stats_data = zkhandler.read(("domain.stats", uuid))
if stats_data is not None:
try:
stats_data = loads(stats_data)
@ -385,54 +424,66 @@ def getInformationFromXML(zkhandler, uuid):
else:
stats_data = {}
domain_uuid, domain_name, domain_description, domain_memory, domain_vcpu, domain_vcputopo = getDomainMainDetails(parsed_xml)
(
domain_uuid,
domain_name,
domain_description,
domain_memory,
domain_vcpu,
domain_vcputopo,
) = getDomainMainDetails(parsed_xml)
domain_networks = getDomainNetworks(parsed_xml, stats_data)
domain_type, domain_arch, domain_machine, domain_console, domain_emulator = getDomainExtraDetails(parsed_xml)
(
domain_type,
domain_arch,
domain_machine,
domain_console,
domain_emulator,
) = getDomainExtraDetails(parsed_xml)
domain_features = getDomainCPUFeatures(parsed_xml)
domain_disks = getDomainDisks(parsed_xml, stats_data)
domain_controllers = getDomainControllers(parsed_xml)
if domain_lastnode:
domain_migrated = 'from {}'.format(domain_lastnode)
domain_migrated = "from {}".format(domain_lastnode)
else:
domain_migrated = 'no'
domain_migrated = "no"
domain_information = {
'name': domain_name,
'uuid': domain_uuid,
'state': domain_state,
'node': domain_node,
'last_node': domain_lastnode,
'migrated': domain_migrated,
'failed_reason': domain_failedreason,
'node_limit': domain_node_limit,
'node_selector': domain_node_selector,
'node_autostart': bool(strtobool(domain_node_autostart)),
'migration_method': domain_migration_method,
'tags': domain_tags,
'description': domain_description,
'profile': domain_profile,
'memory': int(domain_memory),
'memory_stats': stats_data.get('mem_stats', {}),
'vcpu': int(domain_vcpu),
'vcpu_topology': domain_vcputopo,
'vcpu_stats': stats_data.get('cpu_stats', {}),
'networks': domain_networks,
'type': domain_type,
'arch': domain_arch,
'machine': domain_machine,
'console': domain_console,
'vnc': {
'listen': domain_vnc_listen,
'port': domain_vnc_port
},
'emulator': domain_emulator,
'features': domain_features,
'disks': domain_disks,
'controllers': domain_controllers,
'xml': lxml.etree.tostring(parsed_xml, encoding='ascii', method='xml').decode().replace('\"', '\'')
"name": domain_name,
"uuid": domain_uuid,
"state": domain_state,
"node": domain_node,
"last_node": domain_lastnode,
"migrated": domain_migrated,
"failed_reason": domain_failedreason,
"node_limit": domain_node_limit,
"node_selector": domain_node_selector,
"node_autostart": bool(strtobool(domain_node_autostart)),
"migration_method": domain_migration_method,
"tags": domain_tags,
"description": domain_description,
"profile": domain_profile,
"memory": int(domain_memory),
"memory_stats": stats_data.get("mem_stats", {}),
"vcpu": int(domain_vcpu),
"vcpu_topology": domain_vcputopo,
"vcpu_stats": stats_data.get("cpu_stats", {}),
"networks": domain_networks,
"type": domain_type,
"arch": domain_arch,
"machine": domain_machine,
"console": domain_console,
"vnc": {"listen": domain_vnc_listen, "port": domain_vnc_port},
"emulator": domain_emulator,
"features": domain_features,
"disks": domain_disks,
"controllers": domain_controllers,
"xml": lxml.etree.tostring(parsed_xml, encoding="ascii", method="xml")
.decode()
.replace('"', "'"),
}
return domain_information
@ -444,14 +495,14 @@ def getInformationFromXML(zkhandler, uuid):
def getDomainNetworks(parsed_xml, stats_data):
dnets = []
for device in parsed_xml.devices.getchildren():
if device.tag == 'interface':
if device.tag == "interface":
try:
net_type = device.attrib.get('type')
net_type = device.attrib.get("type")
except Exception:
net_type = None
try:
net_mac = device.mac.attrib.get('address')
net_mac = device.mac.attrib.get("address")
except Exception:
net_mac = None
@ -461,48 +512,52 @@ def getDomainNetworks(parsed_xml, stats_data):
net_bridge = None
try:
net_model = device.model.attrib.get('type')
net_model = device.model.attrib.get("type")
except Exception:
net_model = None
try:
net_stats_list = [x for x in stats_data.get('net_stats', []) if x.get('bridge') == net_bridge]
net_stats_list = [
x
for x in stats_data.get("net_stats", [])
if x.get("bridge") == net_bridge
]
net_stats = net_stats_list[0]
except Exception:
net_stats = {}
net_rd_bytes = net_stats.get('rd_bytes', 0)
net_rd_packets = net_stats.get('rd_packets', 0)
net_rd_errors = net_stats.get('rd_errors', 0)
net_rd_drops = net_stats.get('rd_drops', 0)
net_wr_bytes = net_stats.get('wr_bytes', 0)
net_wr_packets = net_stats.get('wr_packets', 0)
net_wr_errors = net_stats.get('wr_errors', 0)
net_wr_drops = net_stats.get('wr_drops', 0)
net_rd_bytes = net_stats.get("rd_bytes", 0)
net_rd_packets = net_stats.get("rd_packets", 0)
net_rd_errors = net_stats.get("rd_errors", 0)
net_rd_drops = net_stats.get("rd_drops", 0)
net_wr_bytes = net_stats.get("wr_bytes", 0)
net_wr_packets = net_stats.get("wr_packets", 0)
net_wr_errors = net_stats.get("wr_errors", 0)
net_wr_drops = net_stats.get("wr_drops", 0)
if net_type == 'direct':
net_vni = 'macvtap:' + device.source.attrib.get('dev')
net_bridge = device.source.attrib.get('dev')
elif net_type == 'hostdev':
net_vni = 'hostdev:' + str(device.sriov_device)
if net_type == "direct":
net_vni = "macvtap:" + device.source.attrib.get("dev")
net_bridge = device.source.attrib.get("dev")
elif net_type == "hostdev":
net_vni = "hostdev:" + str(device.sriov_device)
net_bridge = str(device.sriov_device)
else:
net_vni = re_match(r'[vm]*br([0-9a-z]+)', net_bridge).group(1)
net_vni = re_match(r"[vm]*br([0-9a-z]+)", net_bridge).group(1)
net_obj = {
'type': net_type,
'vni': net_vni,
'mac': net_mac,
'source': net_bridge,
'model': net_model,
'rd_bytes': net_rd_bytes,
'rd_packets': net_rd_packets,
'rd_errors': net_rd_errors,
'rd_drops': net_rd_drops,
'wr_bytes': net_wr_bytes,
'wr_packets': net_wr_packets,
'wr_errors': net_wr_errors,
'wr_drops': net_wr_drops
"type": net_type,
"vni": net_vni,
"mac": net_mac,
"source": net_bridge,
"model": net_model,
"rd_bytes": net_rd_bytes,
"rd_packets": net_rd_packets,
"rd_errors": net_rd_errors,
"rd_drops": net_rd_drops,
"wr_bytes": net_wr_bytes,
"wr_packets": net_wr_packets,
"wr_errors": net_wr_errors,
"wr_drops": net_wr_drops,
}
dnets.append(net_obj)
@ -515,13 +570,13 @@ def getDomainNetworks(parsed_xml, stats_data):
def getDomainControllers(parsed_xml):
dcontrollers = []
for device in parsed_xml.devices.getchildren():
if device.tag == 'controller':
controller_type = device.attrib.get('type')
if device.tag == "controller":
controller_type = device.attrib.get("type")
try:
controller_model = device.attrib.get('model')
controller_model = device.attrib.get("model")
except KeyError:
controller_model = 'none'
controller_obj = {'type': controller_type, 'model': controller_model}
controller_model = "none"
controller_obj = {"type": controller_type, "model": controller_model}
dcontrollers.append(controller_obj)
return dcontrollers
@ -531,7 +586,7 @@ def getDomainControllers(parsed_xml):
# Verify node is valid in cluster
#
def verifyNode(zkhandler, node):
return zkhandler.exists(('node', node))
return zkhandler.exists(("node", node))
#
@ -541,11 +596,11 @@ def getPrimaryNode(zkhandler):
failcount = 0
while True:
try:
primary_node = zkhandler.read('base.config.primary_node')
primary_node = zkhandler.read("base.config.primary_node")
except Exception:
primary_node == 'none'
primary_node == "none"
if primary_node == 'none':
if primary_node == "none":
raise
time.sleep(1)
failcount += 1
@ -565,7 +620,7 @@ def getPrimaryNode(zkhandler):
def findTargetNode(zkhandler, dom_uuid):
# Determine VM node limits; set config value if read fails
try:
node_limit = zkhandler.read(('domain.meta.node_limit', dom_uuid)).split(',')
node_limit = zkhandler.read(("domain.meta.node_limit", dom_uuid)).split(",")
if not any(node_limit):
node_limit = None
except Exception:
@ -573,22 +628,22 @@ def findTargetNode(zkhandler, dom_uuid):
# Determine VM search field or use default; set config value if read fails
try:
search_field = zkhandler.read(('domain.meta.node_selector', dom_uuid))
search_field = zkhandler.read(("domain.meta.node_selector", dom_uuid))
except Exception:
search_field = None
# If our search field is invalid, use the default
if search_field is None or search_field == 'None':
search_field = zkhandler.read('base.config.migration_target_selector')
if search_field is None or search_field == "None":
search_field = zkhandler.read("base.config.migration_target_selector")
# Execute the search
if search_field == 'mem':
if search_field == "mem":
return findTargetNodeMem(zkhandler, node_limit, dom_uuid)
if search_field == 'load':
if search_field == "load":
return findTargetNodeLoad(zkhandler, node_limit, dom_uuid)
if search_field == 'vcpus':
if search_field == "vcpus":
return findTargetNodeVCPUs(zkhandler, node_limit, dom_uuid)
if search_field == 'vms':
if search_field == "vms":
return findTargetNodeVMs(zkhandler, node_limit, dom_uuid)
# Nothing was found
@ -600,20 +655,20 @@ def findTargetNode(zkhandler, dom_uuid):
#
def getNodes(zkhandler, node_limit, dom_uuid):
valid_node_list = []
full_node_list = zkhandler.children('base.node')
current_node = zkhandler.read(('domain.node', dom_uuid))
full_node_list = zkhandler.children("base.node")
current_node = zkhandler.read(("domain.node", dom_uuid))
for node in full_node_list:
if node_limit and node not in node_limit:
continue
daemon_state = zkhandler.read(('node.state.daemon', node))
domain_state = zkhandler.read(('node.state.domain', node))
daemon_state = zkhandler.read(("node.state.daemon", node))
domain_state = zkhandler.read(("node.state.domain", node))
if node == current_node:
continue
if daemon_state != 'run' or domain_state != 'ready':
if daemon_state != "run" or domain_state != "ready":
continue
valid_node_list.append(node)
@ -630,9 +685,9 @@ def findTargetNodeMem(zkhandler, node_limit, dom_uuid):
node_list = getNodes(zkhandler, node_limit, dom_uuid)
for node in node_list:
memprov = int(zkhandler.read(('node.memory.provisioned', node)))
memused = int(zkhandler.read(('node.memory.used', node)))
memfree = int(zkhandler.read(('node.memory.free', node)))
memprov = int(zkhandler.read(("node.memory.provisioned", node)))
memused = int(zkhandler.read(("node.memory.used", node)))
memfree = int(zkhandler.read(("node.memory.free", node)))
memtotal = memused + memfree
provfree = memtotal - memprov
@ -652,7 +707,7 @@ def findTargetNodeLoad(zkhandler, node_limit, dom_uuid):
node_list = getNodes(zkhandler, node_limit, dom_uuid)
for node in node_list:
load = float(zkhandler.read(('node.cpu.load', node)))
load = float(zkhandler.read(("node.cpu.load", node)))
if load < least_load:
least_load = load
@ -670,7 +725,7 @@ def findTargetNodeVCPUs(zkhandler, node_limit, dom_uuid):
node_list = getNodes(zkhandler, node_limit, dom_uuid)
for node in node_list:
vcpus = int(zkhandler.read(('node.vcpu.allocated', node)))
vcpus = int(zkhandler.read(("node.vcpu.allocated", node)))
if vcpus < least_vcpus:
least_vcpus = vcpus
@ -688,7 +743,7 @@ def findTargetNodeVMs(zkhandler, node_limit, dom_uuid):
node_list = getNodes(zkhandler, node_limit, dom_uuid)
for node in node_list:
vms = int(zkhandler.read(('node.count.provisioned_domains', node)))
vms = int(zkhandler.read(("node.count.provisioned_domains", node)))
if vms < least_vms:
least_vms = vms
@ -710,25 +765,33 @@ def runRemoteCommand(node, command, become=False):
class DnssecPolicy(paramiko.client.MissingHostKeyPolicy):
def missing_host_key(self, client, hostname, key):
sshfp_expect = hashlib.sha1(key.asbytes()).hexdigest()
ans = dns.resolver.query(hostname, 'SSHFP')
ans = dns.resolver.query(hostname, "SSHFP")
if not ans.response.flags & dns.flags.DO:
raise AssertionError('Answer is not DNSSEC signed')
raise AssertionError("Answer is not DNSSEC signed")
for answer in ans.response.answer:
for item in answer.items:
if sshfp_expect in item.to_text():
client._log(paramiko.common.DEBUG, 'Found {} in SSHFP for host {}'.format(key.get_name(), hostname))
client._log(
paramiko.common.DEBUG,
"Found {} in SSHFP for host {}".format(
key.get_name(), hostname
),
)
return
raise AssertionError('SSHFP not published in DNS')
raise AssertionError("SSHFP not published in DNS")
if become:
command = 'sudo ' + command
command = "sudo " + command
ssh_client = paramiko.client.SSHClient()
ssh_client.load_system_host_keys()
ssh_client.set_missing_host_key_policy(DnssecPolicy())
ssh_client.connect(node)
stdin, stdout, stderr = ssh_client.exec_command(command)
return stdout.read().decode('ascii').rstrip(), stderr.read().decode('ascii').rstrip()
return (
stdout.read().decode("ascii").rstrip(),
stderr.read().decode("ascii").rstrip(),
)
#
@ -736,29 +799,20 @@ def runRemoteCommand(node, command, become=False):
#
def reload_firewall_rules(rules_file, logger=None):
if logger is not None:
logger.out('Reloading firewall configuration', state='o')
logger.out("Reloading firewall configuration", state="o")
retcode, stdout, stderr = run_os_command('/usr/sbin/nft -f {}'.format(rules_file))
retcode, stdout, stderr = run_os_command("/usr/sbin/nft -f {}".format(rules_file))
if retcode != 0 and logger is not None:
logger.out('Failed to reload configuration: {}'.format(stderr), state='e')
logger.out("Failed to reload configuration: {}".format(stderr), state="e")
#
# Create an IP address
#
def createIPAddress(ipaddr, cidrnetmask, dev):
run_os_command("ip address add {}/{} dev {}".format(ipaddr, cidrnetmask, dev))
run_os_command(
'ip address add {}/{} dev {}'.format(
ipaddr,
cidrnetmask,
dev
)
)
run_os_command(
'arping -P -U -W 0.02 -c 2 -i {dev} -S {ip} {ip}'.format(
dev=dev,
ip=ipaddr
)
"arping -P -U -W 0.02 -c 2 -i {dev} -S {ip} {ip}".format(dev=dev, ip=ipaddr)
)
@ -766,13 +820,7 @@ def createIPAddress(ipaddr, cidrnetmask, dev):
# Remove an IP address
#
def removeIPAddress(ipaddr, cidrnetmask, dev):
run_os_command(
'ip address delete {}/{} dev {}'.format(
ipaddr,
cidrnetmask,
dev
)
)
run_os_command("ip address delete {}/{} dev {}".format(ipaddr, cidrnetmask, dev))
#
@ -792,6 +840,6 @@ def sortInterfaceNames(interface_names):
http://nedbatchelder.com/blog/200712/human_sorting.html
(See Toothy's implementation in the comments)
"""
return [atoi(c) for c in re_split(r'(\d+)', text)]
return [atoi(c) for c in re_split(r"(\d+)", text)]
return sorted(interface_names, key=natural_keys)

View File

@ -34,56 +34,56 @@ class Logger(object):
# formatted in various ways based off secondary characteristics.
# ANSII colours for output
fmt_red = '\033[91m'
fmt_green = '\033[92m'
fmt_yellow = '\033[93m'
fmt_blue = '\033[94m'
fmt_purple = '\033[95m'
fmt_cyan = '\033[96m'
fmt_white = '\033[97m'
fmt_bold = '\033[1m'
fmt_end = '\033[0m'
fmt_red = "\033[91m"
fmt_green = "\033[92m"
fmt_yellow = "\033[93m"
fmt_blue = "\033[94m"
fmt_purple = "\033[95m"
fmt_cyan = "\033[96m"
fmt_white = "\033[97m"
fmt_bold = "\033[1m"
fmt_end = "\033[0m"
last_colour = ''
last_prompt = ''
last_colour = ""
last_prompt = ""
# Format maps
format_map_colourized = {
# Colourized formatting with chevron prompts (log_colours = True)
'o': {'colour': fmt_green, 'prompt': '>>> '},
'e': {'colour': fmt_red, 'prompt': '>>> '},
'w': {'colour': fmt_yellow, 'prompt': '>>> '},
't': {'colour': fmt_purple, 'prompt': '>>> '},
'i': {'colour': fmt_blue, 'prompt': '>>> '},
's': {'colour': fmt_cyan, 'prompt': '>>> '},
'd': {'colour': fmt_white, 'prompt': '>>> '},
'x': {'colour': last_colour, 'prompt': last_prompt}
"o": {"colour": fmt_green, "prompt": ">>> "},
"e": {"colour": fmt_red, "prompt": ">>> "},
"w": {"colour": fmt_yellow, "prompt": ">>> "},
"t": {"colour": fmt_purple, "prompt": ">>> "},
"i": {"colour": fmt_blue, "prompt": ">>> "},
"s": {"colour": fmt_cyan, "prompt": ">>> "},
"d": {"colour": fmt_white, "prompt": ">>> "},
"x": {"colour": last_colour, "prompt": last_prompt},
}
format_map_textual = {
# Uncolourized formatting with text prompts (log_colours = False)
'o': {'colour': '', 'prompt': 'ok: '},
'e': {'colour': '', 'prompt': 'failed: '},
'w': {'colour': '', 'prompt': 'warning: '},
't': {'colour': '', 'prompt': 'tick: '},
'i': {'colour': '', 'prompt': 'info: '},
's': {'colour': '', 'prompt': 'system: '},
'd': {'colour': '', 'prompt': 'debug: '},
'x': {'colour': '', 'prompt': last_prompt}
"o": {"colour": "", "prompt": "ok: "},
"e": {"colour": "", "prompt": "failed: "},
"w": {"colour": "", "prompt": "warning: "},
"t": {"colour": "", "prompt": "tick: "},
"i": {"colour": "", "prompt": "info: "},
"s": {"colour": "", "prompt": "system: "},
"d": {"colour": "", "prompt": "debug: "},
"x": {"colour": "", "prompt": last_prompt},
}
# Initialization of instance
def __init__(self, config):
self.config = config
if self.config['file_logging']:
self.logfile = self.config['log_directory'] + '/pvc.log'
if self.config["file_logging"]:
self.logfile = self.config["log_directory"] + "/pvc.log"
# We open the logfile for the duration of our session, but have a hup function
self.writer = open(self.logfile, 'a', buffering=0)
self.writer = open(self.logfile, "a", buffering=0)
self.last_colour = ''
self.last_prompt = ''
self.last_colour = ""
self.last_prompt = ""
if self.config['zookeeper_logging']:
if self.config["zookeeper_logging"]:
self.zookeeper_queue = Queue()
self.zookeeper_logger = ZookeeperLogger(self.config, self.zookeeper_queue)
self.zookeeper_logger.start()
@ -91,14 +91,14 @@ class Logger(object):
# Provide a hup function to close and reopen the writer
def hup(self):
self.writer.close()
self.writer = open(self.logfile, 'a', buffering=0)
self.writer = open(self.logfile, "a", buffering=0)
# Provide a termination function so all messages are flushed before terminating the main daemon
def terminate(self):
if self.config['file_logging']:
if self.config["file_logging"]:
self.writer.close()
if self.config['zookeeper_logging']:
self.out("Waiting 15s for Zookeeper message queue to drain", state='s')
if self.config["zookeeper_logging"]:
self.out("Waiting 15s for Zookeeper message queue to drain", state="s")
tick_count = 0
while not self.zookeeper_queue.empty():
@ -111,48 +111,48 @@ class Logger(object):
self.zookeeper_logger.join()
# Output function
def out(self, message, state=None, prefix=''):
def out(self, message, state=None, prefix=""):
# Get the date
if self.config['log_dates']:
date = '{} '.format(datetime.now().strftime('%Y/%m/%d %H:%M:%S.%f'))
if self.config["log_dates"]:
date = "{} ".format(datetime.now().strftime("%Y/%m/%d %H:%M:%S.%f"))
else:
date = ''
date = ""
# Get the format map
if self.config['log_colours']:
if self.config["log_colours"]:
format_map = self.format_map_colourized
endc = Logger.fmt_end
else:
format_map = self.format_map_textual
endc = ''
endc = ""
# Define an undefined state as 'x'; no date in these prompts
if not state:
state = 'x'
date = ''
state = "x"
date = ""
# Get colour and prompt from the map
colour = format_map[state]['colour']
prompt = format_map[state]['prompt']
colour = format_map[state]["colour"]
prompt = format_map[state]["prompt"]
# Append space and separator to prefix
if prefix != '':
prefix = prefix + ' - '
if prefix != "":
prefix = prefix + " - "
# Assemble message string
message = colour + prompt + endc + date + prefix + message
# Log to stdout
if self.config['stdout_logging']:
if self.config["stdout_logging"]:
print(message)
# Log to file
if self.config['file_logging']:
self.writer.write(message + '\n')
if self.config["file_logging"]:
self.writer.write(message + "\n")
# Log to Zookeeper
if self.config['zookeeper_logging']:
if self.config["zookeeper_logging"]:
self.zookeeper_queue.put(message)
# Set last message variables
@ -165,10 +165,11 @@ class ZookeeperLogger(Thread):
Defines a threaded writer for Zookeeper locks. Threading prevents the blocking of other
daemon events while the records are written. They will be eventually-consistent
"""
def __init__(self, config, zookeeper_queue):
self.config = config
self.node = self.config['node']
self.max_lines = self.config['node_log_lines']
self.node = self.config["node"]
self.max_lines = self.config["node_log_lines"]
self.zookeeper_queue = zookeeper_queue
self.connected = False
self.running = False
@ -195,10 +196,7 @@ class ZookeeperLogger(Thread):
self.connected = True
# Ensure the root keys for this are instantiated
self.zkhandler.write([
('base.logs', ''),
(('logs', self.node), '')
])
self.zkhandler.write([("base.logs", ""), (("logs", self.node), "")])
def run(self):
while not self.connected:
@ -207,10 +205,10 @@ class ZookeeperLogger(Thread):
self.running = True
# Get the logs that are currently in Zookeeper and populate our deque
raw_logs = self.zkhandler.read(('logs.messages', self.node))
raw_logs = self.zkhandler.read(("logs.messages", self.node))
if raw_logs is None:
raw_logs = ''
logs = deque(raw_logs.split('\n'), self.max_lines)
raw_logs = ""
logs = deque(raw_logs.split("\n"), self.max_lines)
while self.running:
# Get a new message
try:
@ -220,19 +218,21 @@ class ZookeeperLogger(Thread):
except Exception:
continue
if not self.config['log_dates']:
if not self.config["log_dates"]:
# We want to log dates here, even if the log_dates config is not set
date = '{} '.format(datetime.now().strftime('%Y/%m/%d %H:%M:%S.%f'))
date = "{} ".format(datetime.now().strftime("%Y/%m/%d %H:%M:%S.%f"))
else:
date = ''
date = ""
# Add the message to the deque
logs.append(f'{date}{message}')
logs.append(f"{date}{message}")
tick_count = 0
while True:
try:
# Write the updated messages into Zookeeper
self.zkhandler.write([(('logs.messages', self.node), '\n'.join(logs))])
self.zkhandler.write(
[(("logs.messages", self.node), "\n".join(logs))]
)
break
except Exception:
# The write failed (connection loss, etc.) so retry for 15 seconds

File diff suppressed because it is too large Load Diff

View File

@ -29,50 +29,49 @@ def getNodeInformation(zkhandler, node_name):
"""
Gather information about a node from the Zookeeper database and return a dict() containing it.
"""
node_daemon_state = zkhandler.read(('node.state.daemon', node_name))
node_coordinator_state = zkhandler.read(('node.state.router', node_name))
node_domain_state = zkhandler.read(('node.state.domain', node_name))
node_static_data = zkhandler.read(('node.data.static', node_name)).split()
node_pvc_version = zkhandler.read(('node.data.pvc_version', node_name))
node_daemon_state = zkhandler.read(("node.state.daemon", node_name))
node_coordinator_state = zkhandler.read(("node.state.router", node_name))
node_domain_state = zkhandler.read(("node.state.domain", node_name))
node_static_data = zkhandler.read(("node.data.static", node_name)).split()
node_pvc_version = zkhandler.read(("node.data.pvc_version", node_name))
node_cpu_count = int(node_static_data[0])
node_kernel = node_static_data[1]
node_os = node_static_data[2]
node_arch = node_static_data[3]
node_vcpu_allocated = int(zkhandler.read(('node.vcpu.allocated', node_name)))
node_mem_total = int(zkhandler.read(('node.memory.total', node_name)))
node_mem_allocated = int(zkhandler.read(('node.memory.allocated', node_name)))
node_mem_provisioned = int(zkhandler.read(('node.memory.provisioned', node_name)))
node_mem_used = int(zkhandler.read(('node.memory.used', node_name)))
node_mem_free = int(zkhandler.read(('node.memory.free', node_name)))
node_load = float(zkhandler.read(('node.cpu.load', node_name)))
node_domains_count = int(zkhandler.read(('node.count.provisioned_domains', node_name)))
node_running_domains = zkhandler.read(('node.running_domains', node_name)).split()
node_vcpu_allocated = int(zkhandler.read(("node.vcpu.allocated", node_name)))
node_mem_total = int(zkhandler.read(("node.memory.total", node_name)))
node_mem_allocated = int(zkhandler.read(("node.memory.allocated", node_name)))
node_mem_provisioned = int(zkhandler.read(("node.memory.provisioned", node_name)))
node_mem_used = int(zkhandler.read(("node.memory.used", node_name)))
node_mem_free = int(zkhandler.read(("node.memory.free", node_name)))
node_load = float(zkhandler.read(("node.cpu.load", node_name)))
node_domains_count = int(
zkhandler.read(("node.count.provisioned_domains", node_name))
)
node_running_domains = zkhandler.read(("node.running_domains", node_name)).split()
# Construct a data structure to represent the data
node_information = {
'name': node_name,
'daemon_state': node_daemon_state,
'coordinator_state': node_coordinator_state,
'domain_state': node_domain_state,
'pvc_version': node_pvc_version,
'cpu_count': node_cpu_count,
'kernel': node_kernel,
'os': node_os,
'arch': node_arch,
'load': node_load,
'domains_count': node_domains_count,
'running_domains': node_running_domains,
'vcpu': {
'total': node_cpu_count,
'allocated': node_vcpu_allocated
"name": node_name,
"daemon_state": node_daemon_state,
"coordinator_state": node_coordinator_state,
"domain_state": node_domain_state,
"pvc_version": node_pvc_version,
"cpu_count": node_cpu_count,
"kernel": node_kernel,
"os": node_os,
"arch": node_arch,
"load": node_load,
"domains_count": node_domains_count,
"running_domains": node_running_domains,
"vcpu": {"total": node_cpu_count, "allocated": node_vcpu_allocated},
"memory": {
"total": node_mem_total,
"allocated": node_mem_allocated,
"provisioned": node_mem_provisioned,
"used": node_mem_used,
"free": node_mem_free,
},
'memory': {
'total': node_mem_total,
'allocated': node_mem_allocated,
'provisioned': node_mem_provisioned,
'used': node_mem_used,
'free': node_mem_free
}
}
return node_information
@ -83,27 +82,32 @@ def getNodeInformation(zkhandler, node_name):
def secondary_node(zkhandler, node):
# Verify node is valid
if not common.verifyNode(zkhandler, node):
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node)
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(
node
)
# Ensure node is a coordinator
daemon_mode = zkhandler.read(('node.mode', node))
if daemon_mode == 'hypervisor':
return False, 'ERROR: Cannot change router mode on non-coordinator node "{}"'.format(node)
daemon_mode = zkhandler.read(("node.mode", node))
if daemon_mode == "hypervisor":
return (
False,
'ERROR: Cannot change router mode on non-coordinator node "{}"'.format(
node
),
)
# Ensure node is in run daemonstate
daemon_state = zkhandler.read(('node.state.daemon', node))
if daemon_state != 'run':
daemon_state = zkhandler.read(("node.state.daemon", node))
if daemon_state != "run":
return False, 'ERROR: Node "{}" is not active'.format(node)
# Get current state
current_state = zkhandler.read(('node.state.router', node))
if current_state == 'secondary':
current_state = zkhandler.read(("node.state.router", node))
if current_state == "secondary":
return True, 'Node "{}" is already in secondary router mode.'.format(node)
retmsg = 'Setting node {} in secondary router mode.'.format(node)
zkhandler.write([
('base.config.primary_node', 'none')
])
retmsg = "Setting node {} in secondary router mode.".format(node)
zkhandler.write([("base.config.primary_node", "none")])
return True, retmsg
@ -111,27 +115,32 @@ def secondary_node(zkhandler, node):
def primary_node(zkhandler, node):
# Verify node is valid
if not common.verifyNode(zkhandler, node):
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node)
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(
node
)
# Ensure node is a coordinator
daemon_mode = zkhandler.read(('node.mode', node))
if daemon_mode == 'hypervisor':
return False, 'ERROR: Cannot change router mode on non-coordinator node "{}"'.format(node)
daemon_mode = zkhandler.read(("node.mode", node))
if daemon_mode == "hypervisor":
return (
False,
'ERROR: Cannot change router mode on non-coordinator node "{}"'.format(
node
),
)
# Ensure node is in run daemonstate
daemon_state = zkhandler.read(('node.state.daemon', node))
if daemon_state != 'run':
daemon_state = zkhandler.read(("node.state.daemon", node))
if daemon_state != "run":
return False, 'ERROR: Node "{}" is not active'.format(node)
# Get current state
current_state = zkhandler.read(('node.state.router', node))
if current_state == 'primary':
current_state = zkhandler.read(("node.state.router", node))
if current_state == "primary":
return True, 'Node "{}" is already in primary router mode.'.format(node)
retmsg = 'Setting node {} in primary router mode.'.format(node)
zkhandler.write([
('base.config.primary_node', node)
])
retmsg = "Setting node {} in primary router mode.".format(node)
zkhandler.write([("base.config.primary_node", node)])
return True, retmsg
@ -139,22 +148,22 @@ def primary_node(zkhandler, node):
def flush_node(zkhandler, node, wait=False):
# Verify node is valid
if not common.verifyNode(zkhandler, node):
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node)
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(
node
)
if zkhandler.read(('node.state.domain', node)) == 'flushed':
return True, 'Hypervisor {} is already flushed.'.format(node)
if zkhandler.read(("node.state.domain", node)) == "flushed":
return True, "Hypervisor {} is already flushed.".format(node)
retmsg = 'Flushing hypervisor {} of running VMs.'.format(node)
retmsg = "Flushing hypervisor {} of running VMs.".format(node)
# Add the new domain to Zookeeper
zkhandler.write([
(('node.state.domain', node), 'flush')
])
zkhandler.write([(("node.state.domain", node), "flush")])
if wait:
while zkhandler.read(('node.state.domain', node)) == 'flush':
while zkhandler.read(("node.state.domain", node)) == "flush":
time.sleep(1)
retmsg = 'Flushed hypervisor {} of running VMs.'.format(node)
retmsg = "Flushed hypervisor {} of running VMs.".format(node)
return True, retmsg
@ -162,22 +171,22 @@ def flush_node(zkhandler, node, wait=False):
def ready_node(zkhandler, node, wait=False):
# Verify node is valid
if not common.verifyNode(zkhandler, node):
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node)
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(
node
)
if zkhandler.read(('node.state.domain', node)) == 'ready':
return True, 'Hypervisor {} is already ready.'.format(node)
if zkhandler.read(("node.state.domain", node)) == "ready":
return True, "Hypervisor {} is already ready.".format(node)
retmsg = 'Restoring hypervisor {} to active service.'.format(node)
retmsg = "Restoring hypervisor {} to active service.".format(node)
# Add the new domain to Zookeeper
zkhandler.write([
(('node.state.domain', node), 'unflush')
])
zkhandler.write([(("node.state.domain", node), "unflush")])
if wait:
while zkhandler.read(('node.state.domain', node)) == 'unflush':
while zkhandler.read(("node.state.domain", node)) == "unflush":
time.sleep(1)
retmsg = 'Restored hypervisor {} to active service.'.format(node)
retmsg = "Restored hypervisor {} to active service.".format(node)
return True, retmsg
@ -185,17 +194,19 @@ def ready_node(zkhandler, node, wait=False):
def get_node_log(zkhandler, node, lines=2000):
# Verify node is valid
if not common.verifyNode(zkhandler, node):
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node)
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(
node
)
# Get the data from ZK
node_log = zkhandler.read(('logs.messages', node))
node_log = zkhandler.read(("logs.messages", node))
if node_log is None:
return True, ''
return True, ""
# Shrink the log buffer to length lines
shrunk_log = node_log.split('\n')[-lines:]
loglines = '\n'.join(shrunk_log)
shrunk_log = node_log.split("\n")[-lines:]
loglines = "\n".join(shrunk_log)
return True, loglines
@ -203,7 +214,9 @@ def get_node_log(zkhandler, node, lines=2000):
def get_info(zkhandler, node):
# Verify node is valid
if not common.verifyNode(zkhandler, node):
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node)
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(
node
)
# Get information about node in a pretty format
node_information = getNodeInformation(zkhandler, node)
@ -213,20 +226,27 @@ def get_info(zkhandler, node):
return True, node_information
def get_list(zkhandler, limit, daemon_state=None, coordinator_state=None, domain_state=None, is_fuzzy=True):
def get_list(
zkhandler,
limit,
daemon_state=None,
coordinator_state=None,
domain_state=None,
is_fuzzy=True,
):
node_list = []
full_node_list = zkhandler.children('base.node')
full_node_list = zkhandler.children("base.node")
for node in full_node_list:
if limit:
try:
if not is_fuzzy:
limit = '^' + limit + '$'
limit = "^" + limit + "$"
if re.match(limit, node):
node_list.append(getNodeInformation(zkhandler, node))
except Exception as e:
return False, 'Regex Error: {}'.format(e)
return False, "Regex Error: {}".format(e)
else:
node_list.append(getNodeInformation(zkhandler, node))
@ -234,11 +254,11 @@ def get_list(zkhandler, limit, daemon_state=None, coordinator_state=None, domain
limited_node_list = []
for node in node_list:
add_node = False
if daemon_state and node['daemon_state'] == daemon_state:
if daemon_state and node["daemon_state"] == daemon_state:
add_node = True
if coordinator_state and node['coordinator_state'] == coordinator_state:
if coordinator_state and node["coordinator_state"] == coordinator_state:
add_node = True
if domain_state and node['domain_state'] == domain_state:
if domain_state and node["domain_state"] == domain_state:
add_node = True
if add_node:
limited_node_list.append(node)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -35,67 +35,77 @@ import yaml
def get_zookeeper_key():
# Get the interface from environment (passed by dnsmasq)
try:
interface = os.environ['DNSMASQ_BRIDGE_INTERFACE']
interface = os.environ["DNSMASQ_BRIDGE_INTERFACE"]
except Exception as e:
print('ERROR: DNSMASQ_BRIDGE_INTERFACE environment variable not found: {}'.format(e), file=sys.stderr)
print(
"ERROR: DNSMASQ_BRIDGE_INTERFACE environment variable not found: {}".format(
e
),
file=sys.stderr,
)
exit(1)
# Get the ID of the interface (the digits)
network_vni = re.findall(r'\d+', interface)[0]
network_vni = re.findall(r"\d+", interface)[0]
# Create the key
zookeeper_key = '/networks/{}/dhcp4_leases'.format(network_vni)
zookeeper_key = "/networks/{}/dhcp4_leases".format(network_vni)
return zookeeper_key
def get_lease_expiry():
try:
expiry = os.environ['DNSMASQ_LEASE_EXPIRES']
expiry = os.environ["DNSMASQ_LEASE_EXPIRES"]
except Exception:
expiry = '0'
expiry = "0"
return expiry
def get_client_id():
try:
client_id = os.environ['DNSMASQ_CLIENT_ID']
client_id = os.environ["DNSMASQ_CLIENT_ID"]
except Exception:
client_id = '*'
client_id = "*"
return client_id
def connect_zookeeper():
# We expect the environ to contain the config file
try:
pvcnoded_config_file = os.environ['PVCD_CONFIG_FILE']
pvcnoded_config_file = os.environ["PVCD_CONFIG_FILE"]
except Exception:
# Default place
pvcnoded_config_file = '/etc/pvc/pvcnoded.yaml'
pvcnoded_config_file = "/etc/pvc/pvcnoded.yaml"
with open(pvcnoded_config_file, 'r') as cfgfile:
with open(pvcnoded_config_file, "r") as cfgfile:
try:
o_config = yaml.load(cfgfile)
except Exception as e:
print('ERROR: Failed to parse configuration file: {}'.format(e), file=sys.stderr)
print(
"ERROR: Failed to parse configuration file: {}".format(e),
file=sys.stderr,
)
exit(1)
try:
zk_conn = kazoo.client.KazooClient(hosts=o_config['pvc']['cluster']['coordinators'])
zk_conn = kazoo.client.KazooClient(
hosts=o_config["pvc"]["cluster"]["coordinators"]
)
zk_conn.start()
except Exception as e:
print('ERROR: Failed to connect to Zookeeper: {}'.format(e), file=sys.stderr)
print("ERROR: Failed to connect to Zookeeper: {}".format(e), file=sys.stderr)
exit(1)
return zk_conn
def read_data(zk_conn, key):
return zk_conn.get(key)[0].decode('ascii')
return zk_conn.get(key)[0].decode("ascii")
def get_lease(zk_conn, zk_leases_key, macaddr):
expiry = read_data(zk_conn, '{}/{}/expiry'.format(zk_leases_key, macaddr))
ipaddr = read_data(zk_conn, '{}/{}/ipaddr'.format(zk_leases_key, macaddr))
hostname = read_data(zk_conn, '{}/{}/hostname'.format(zk_leases_key, macaddr))
clientid = read_data(zk_conn, '{}/{}/clientid'.format(zk_leases_key, macaddr))
expiry = read_data(zk_conn, "{}/{}/expiry".format(zk_leases_key, macaddr))
ipaddr = read_data(zk_conn, "{}/{}/ipaddr".format(zk_leases_key, macaddr))
hostname = read_data(zk_conn, "{}/{}/hostname".format(zk_leases_key, macaddr))
clientid = read_data(zk_conn, "{}/{}/clientid".format(zk_leases_key, macaddr))
return expiry, ipaddr, hostname, clientid
@ -107,38 +117,50 @@ def read_lease_database(zk_conn, zk_leases_key):
output_list = []
for macaddr in leases_list:
expiry, ipaddr, hostname, clientid = get_lease(zk_conn, zk_leases_key, macaddr)
data_string = '{} {} {} {} {}'.format(expiry, macaddr, ipaddr, hostname, clientid)
print('Reading lease from Zookeeper: {}'.format(data_string), file=sys.stderr)
output_list.append('{}'.format(data_string))
data_string = "{} {} {} {} {}".format(
expiry, macaddr, ipaddr, hostname, clientid
)
print("Reading lease from Zookeeper: {}".format(data_string), file=sys.stderr)
output_list.append("{}".format(data_string))
# Output list
print('\n'.join(output_list))
print("\n".join(output_list))
def add_lease(zk_conn, zk_leases_key, expiry, macaddr, ipaddr, hostname, clientid):
if not hostname:
hostname = ''
hostname = ""
transaction = zk_conn.transaction()
transaction.create('{}/{}'.format(zk_leases_key, macaddr), ''.encode('ascii'))
transaction.create('{}/{}/expiry'.format(zk_leases_key, macaddr), expiry.encode('ascii'))
transaction.create('{}/{}/ipaddr'.format(zk_leases_key, macaddr), ipaddr.encode('ascii'))
transaction.create('{}/{}/hostname'.format(zk_leases_key, macaddr), hostname.encode('ascii'))
transaction.create('{}/{}/clientid'.format(zk_leases_key, macaddr), clientid.encode('ascii'))
transaction.create("{}/{}".format(zk_leases_key, macaddr), "".encode("ascii"))
transaction.create(
"{}/{}/expiry".format(zk_leases_key, macaddr), expiry.encode("ascii")
)
transaction.create(
"{}/{}/ipaddr".format(zk_leases_key, macaddr), ipaddr.encode("ascii")
)
transaction.create(
"{}/{}/hostname".format(zk_leases_key, macaddr), hostname.encode("ascii")
)
transaction.create(
"{}/{}/clientid".format(zk_leases_key, macaddr), clientid.encode("ascii")
)
transaction.commit()
def del_lease(zk_conn, zk_leases_key, macaddr, expiry):
zk_conn.delete('{}/{}'.format(zk_leases_key, macaddr), recursive=True)
zk_conn.delete("{}/{}".format(zk_leases_key, macaddr), recursive=True)
#
# Instantiate the parser
#
parser = argparse.ArgumentParser(description='Store or retrieve dnsmasq leases in Zookeeper')
parser.add_argument('action', type=str, help='Action')
parser.add_argument('macaddr', type=str, help='MAC Address', nargs='?', default=None)
parser.add_argument('ipaddr', type=str, help='IP Address', nargs='?', default=None)
parser.add_argument('hostname', type=str, help='Hostname', nargs='?', default=None)
parser = argparse.ArgumentParser(
description="Store or retrieve dnsmasq leases in Zookeeper"
)
parser.add_argument("action", type=str, help="Action")
parser.add_argument("macaddr", type=str, help="MAC Address", nargs="?", default=None)
parser.add_argument("ipaddr", type=str, help="IP Address", nargs="?", default=None)
parser.add_argument("hostname", type=str, help="Hostname", nargs="?", default=None)
args = parser.parse_args()
action = args.action
@ -149,7 +171,7 @@ hostname = args.hostname
zk_conn = connect_zookeeper()
zk_leases_key = get_zookeeper_key()
if action == 'init':
if action == "init":
read_lease_database(zk_conn, zk_leases_key)
exit(0)
@ -159,10 +181,13 @@ clientid = get_client_id()
#
# Choose action
#
print('Lease action - {} {} {} {}'.format(action, macaddr, ipaddr, hostname), file=sys.stderr)
if action == 'add':
print(
"Lease action - {} {} {} {}".format(action, macaddr, ipaddr, hostname),
file=sys.stderr,
)
if action == "add":
add_lease(zk_conn, zk_leases_key, expiry, macaddr, ipaddr, hostname, clientid)
elif action == 'del':
elif action == "del":
del_lease(zk_conn, zk_leases_key, macaddr, expiry)
elif action == 'old':
elif action == "old":
pass

View File

@ -38,63 +38,73 @@ class CephOSDInstance(object):
self.size = None
self.stats = dict()
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('osd.node', self.osd_id))
def watch_osd_node(data, stat, event=''):
if event and event.type == 'DELETED':
@self.zkhandler.zk_conn.DataWatch(
self.zkhandler.schema.path("osd.node", self.osd_id)
)
def watch_osd_node(data, stat, event=""):
if event and event.type == "DELETED":
# The key has been deleted after existing before; terminate this watcher
# because this class instance is about to be reaped in Daemon.py
return False
try:
data = data.decode('ascii')
data = data.decode("ascii")
except AttributeError:
data = ''
data = ""
if data and data != self.node:
self.node = data
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('osd.stats', self.osd_id))
def watch_osd_stats(data, stat, event=''):
if event and event.type == 'DELETED':
@self.zkhandler.zk_conn.DataWatch(
self.zkhandler.schema.path("osd.stats", self.osd_id)
)
def watch_osd_stats(data, stat, event=""):
if event and event.type == "DELETED":
# The key has been deleted after existing before; terminate this watcher
# because this class instance is about to be reaped in Daemon.py
return False
try:
data = data.decode('ascii')
data = data.decode("ascii")
except AttributeError:
data = ''
data = ""
if data and data != self.stats:
self.stats = json.loads(data)
@staticmethod
def add_osd(zkhandler, logger, node, device, weight, ext_db_flag=False, ext_db_ratio=0.05):
def add_osd(
zkhandler, logger, node, device, weight, ext_db_flag=False, ext_db_ratio=0.05
):
# We are ready to create a new OSD on this node
logger.out('Creating new OSD disk on block device {}'.format(device), state='i')
logger.out("Creating new OSD disk on block device {}".format(device), state="i")
try:
# 1. Create an OSD; we do this so we know what ID will be gen'd
retcode, stdout, stderr = common.run_os_command('ceph osd create')
retcode, stdout, stderr = common.run_os_command("ceph osd create")
if retcode:
print('ceph osd create')
print("ceph osd create")
print(stdout)
print(stderr)
raise Exception
osd_id = stdout.rstrip()
# 2. Remove that newly-created OSD
retcode, stdout, stderr = common.run_os_command('ceph osd rm {}'.format(osd_id))
retcode, stdout, stderr = common.run_os_command(
"ceph osd rm {}".format(osd_id)
)
if retcode:
print('ceph osd rm')
print("ceph osd rm")
print(stdout)
print(stderr)
raise Exception
# 3a. Zap the disk to ensure it is ready to go
logger.out('Zapping disk {}'.format(device), state='i')
retcode, stdout, stderr = common.run_os_command('ceph-volume lvm zap --destroy {}'.format(device))
logger.out("Zapping disk {}".format(device), state="i")
retcode, stdout, stderr = common.run_os_command(
"ceph-volume lvm zap --destroy {}".format(device)
)
if retcode:
print('ceph-volume lvm zap')
print("ceph-volume lvm zap")
print(stdout)
print(stderr)
raise Exception
@ -103,9 +113,13 @@ class CephOSDInstance(object):
# 3b. Prepare the logical volume if ext_db_flag
if ext_db_flag:
_, osd_size_bytes, _ = common.run_os_command('blockdev --getsize64 {}'.format(device))
_, osd_size_bytes, _ = common.run_os_command(
"blockdev --getsize64 {}".format(device)
)
osd_size_bytes = int(osd_size_bytes)
result = CephOSDInstance.create_osd_db_lv(zkhandler, logger, osd_id, ext_db_ratio, osd_size_bytes)
result = CephOSDInstance.create_osd_db_lv(
zkhandler, logger, osd_id, ext_db_ratio, osd_size_bytes
)
if not result:
raise Exception
db_device = "osd-db/osd-{}".format(osd_id)
@ -114,63 +128,67 @@ class CephOSDInstance(object):
db_device = ""
# 3c. Create the OSD for real
logger.out('Preparing LVM for new OSD disk with ID {} on {}'.format(osd_id, device), state='i')
logger.out(
"Preparing LVM for new OSD disk with ID {} on {}".format(
osd_id, device
),
state="i",
)
retcode, stdout, stderr = common.run_os_command(
'ceph-volume lvm prepare --bluestore {devices}'.format(
osdid=osd_id,
devices=dev_flags
"ceph-volume lvm prepare --bluestore {devices}".format(
osdid=osd_id, devices=dev_flags
)
)
if retcode:
print('ceph-volume lvm prepare')
print("ceph-volume lvm prepare")
print(stdout)
print(stderr)
raise Exception
# 4a. Get OSD FSID
logger.out('Getting OSD FSID for ID {} on {}'.format(osd_id, device), state='i')
logger.out(
"Getting OSD FSID for ID {} on {}".format(osd_id, device), state="i"
)
retcode, stdout, stderr = common.run_os_command(
'ceph-volume lvm list {device}'.format(
osdid=osd_id,
device=device
"ceph-volume lvm list {device}".format(osdid=osd_id, device=device)
)
)
for line in stdout.split('\n'):
if 'osd fsid' in line:
for line in stdout.split("\n"):
if "osd fsid" in line:
osd_fsid = line.split()[-1]
if not osd_fsid:
print('ceph-volume lvm list')
print('Could not find OSD fsid in data:')
print("ceph-volume lvm list")
print("Could not find OSD fsid in data:")
print(stdout)
print(stderr)
raise Exception
# 4b. Activate the OSD
logger.out('Activating new OSD disk with ID {}'.format(osd_id, device), state='i')
logger.out(
"Activating new OSD disk with ID {}".format(osd_id, device), state="i"
)
retcode, stdout, stderr = common.run_os_command(
'ceph-volume lvm activate --bluestore {osdid} {osdfsid}'.format(
osdid=osd_id,
osdfsid=osd_fsid
"ceph-volume lvm activate --bluestore {osdid} {osdfsid}".format(
osdid=osd_id, osdfsid=osd_fsid
)
)
if retcode:
print('ceph-volume lvm activate')
print("ceph-volume lvm activate")
print(stdout)
print(stderr)
raise Exception
# 5. Add it to the crush map
logger.out('Adding new OSD disk with ID {} to CRUSH map'.format(osd_id), state='i')
logger.out(
"Adding new OSD disk with ID {} to CRUSH map".format(osd_id), state="i"
)
retcode, stdout, stderr = common.run_os_command(
'ceph osd crush add osd.{osdid} {weight} root=default host={node}'.format(
osdid=osd_id,
weight=weight,
node=node
"ceph osd crush add osd.{osdid} {weight} root=default host={node}".format(
osdid=osd_id, weight=weight, node=node
)
)
if retcode:
print('ceph osd crush add')
print("ceph osd crush add")
print(stdout)
print(stderr)
raise Exception
@ -178,65 +196,73 @@ class CephOSDInstance(object):
# 6. Verify it started
retcode, stdout, stderr = common.run_os_command(
'systemctl status ceph-osd@{osdid}'.format(
osdid=osd_id
)
"systemctl status ceph-osd@{osdid}".format(osdid=osd_id)
)
if retcode:
print('systemctl status')
print("systemctl status")
print(stdout)
print(stderr)
raise Exception
# 7. Add the new OSD to the list
logger.out('Adding new OSD disk with ID {} to Zookeeper'.format(osd_id), state='i')
zkhandler.write([
(('osd', osd_id), ''),
(('osd.node', osd_id), node),
(('osd.device', osd_id), device),
(('osd.db_device', osd_id), db_device),
(('osd.stats', osd_id), '{}'),
])
logger.out(
"Adding new OSD disk with ID {} to Zookeeper".format(osd_id), state="i"
)
zkhandler.write(
[
(("osd", osd_id), ""),
(("osd.node", osd_id), node),
(("osd.device", osd_id), device),
(("osd.db_device", osd_id), db_device),
(("osd.stats", osd_id), "{}"),
]
)
# Log it
logger.out('Created new OSD disk with ID {}'.format(osd_id), state='o')
logger.out("Created new OSD disk with ID {}".format(osd_id), state="o")
return True
except Exception as e:
# Log it
logger.out('Failed to create new OSD disk: {}'.format(e), state='e')
logger.out("Failed to create new OSD disk: {}".format(e), state="e")
return False
@staticmethod
def remove_osd(zkhandler, logger, osd_id, osd_obj):
logger.out('Removing OSD disk {}'.format(osd_id), state='i')
logger.out("Removing OSD disk {}".format(osd_id), state="i")
try:
# 1. Verify the OSD is present
retcode, stdout, stderr = common.run_os_command('ceph osd ls')
osd_list = stdout.split('\n')
retcode, stdout, stderr = common.run_os_command("ceph osd ls")
osd_list = stdout.split("\n")
if osd_id not in osd_list:
logger.out('Could not find OSD {} in the cluster'.format(osd_id), state='e')
logger.out(
"Could not find OSD {} in the cluster".format(osd_id), state="e"
)
return True
# 1. Set the OSD out so it will flush
logger.out('Setting out OSD disk with ID {}'.format(osd_id), state='i')
retcode, stdout, stderr = common.run_os_command('ceph osd out {}'.format(osd_id))
logger.out("Setting out OSD disk with ID {}".format(osd_id), state="i")
retcode, stdout, stderr = common.run_os_command(
"ceph osd out {}".format(osd_id)
)
if retcode:
print('ceph osd out')
print("ceph osd out")
print(stdout)
print(stderr)
raise Exception
# 2. Wait for the OSD to flush
logger.out('Flushing OSD disk with ID {}'.format(osd_id), state='i')
logger.out("Flushing OSD disk with ID {}".format(osd_id), state="i")
osd_string = str()
while True:
try:
retcode, stdout, stderr = common.run_os_command('ceph pg dump osds --format json')
retcode, stdout, stderr = common.run_os_command(
"ceph pg dump osds --format json"
)
dump_string = json.loads(stdout)
for osd in dump_string:
if str(osd['osd']) == osd_id:
if str(osd["osd"]) == osd_id:
osd_string = osd
num_pgs = osd_string['num_pgs']
num_pgs = osd_string["num_pgs"]
if num_pgs > 0:
time.sleep(5)
else:
@ -245,10 +271,12 @@ class CephOSDInstance(object):
break
# 3. Stop the OSD process and wait for it to be terminated
logger.out('Stopping OSD disk with ID {}'.format(osd_id), state='i')
retcode, stdout, stderr = common.run_os_command('systemctl stop ceph-osd@{}'.format(osd_id))
logger.out("Stopping OSD disk with ID {}".format(osd_id), state="i")
retcode, stdout, stderr = common.run_os_command(
"systemctl stop ceph-osd@{}".format(osd_id)
)
if retcode:
print('systemctl stop')
print("systemctl stop")
print(stdout)
print(stderr)
raise Exception
@ -257,161 +285,213 @@ class CephOSDInstance(object):
while True:
is_osd_up = False
# Find if there is a process named ceph-osd with arg '--id {id}'
for p in psutil.process_iter(attrs=['name', 'cmdline']):
if 'ceph-osd' == p.info['name'] and '--id {}'.format(osd_id) in ' '.join(p.info['cmdline']):
for p in psutil.process_iter(attrs=["name", "cmdline"]):
if "ceph-osd" == p.info["name"] and "--id {}".format(
osd_id
) in " ".join(p.info["cmdline"]):
is_osd_up = True
# If there isn't, continue
if not is_osd_up:
break
# 4. Determine the block devices
retcode, stdout, stderr = common.run_os_command('readlink /var/lib/ceph/osd/ceph-{}/block'.format(osd_id))
vg_name = stdout.split('/')[-2] # e.g. /dev/ceph-<uuid>/osd-block-<uuid>
retcode, stdout, stderr = common.run_os_command('vgs --separator , --noheadings -o pv_name {}'.format(vg_name))
retcode, stdout, stderr = common.run_os_command(
"readlink /var/lib/ceph/osd/ceph-{}/block".format(osd_id)
)
vg_name = stdout.split("/")[-2] # e.g. /dev/ceph-<uuid>/osd-block-<uuid>
retcode, stdout, stderr = common.run_os_command(
"vgs --separator , --noheadings -o pv_name {}".format(vg_name)
)
pv_block = stdout.strip()
# 5. Zap the volumes
logger.out('Zapping OSD disk with ID {} on {}'.format(osd_id, pv_block), state='i')
retcode, stdout, stderr = common.run_os_command('ceph-volume lvm zap --destroy {}'.format(pv_block))
logger.out(
"Zapping OSD disk with ID {} on {}".format(osd_id, pv_block), state="i"
)
retcode, stdout, stderr = common.run_os_command(
"ceph-volume lvm zap --destroy {}".format(pv_block)
)
if retcode:
print('ceph-volume lvm zap')
print("ceph-volume lvm zap")
print(stdout)
print(stderr)
raise Exception
# 6. Purge the OSD from Ceph
logger.out('Purging OSD disk with ID {}'.format(osd_id), state='i')
retcode, stdout, stderr = common.run_os_command('ceph osd purge {} --yes-i-really-mean-it'.format(osd_id))
logger.out("Purging OSD disk with ID {}".format(osd_id), state="i")
retcode, stdout, stderr = common.run_os_command(
"ceph osd purge {} --yes-i-really-mean-it".format(osd_id)
)
if retcode:
print('ceph osd purge')
print("ceph osd purge")
print(stdout)
print(stderr)
raise Exception
# 7. Remove the DB device
if zkhandler.exists(('osd.db_device', osd_id)):
db_device = zkhandler.read(('osd.db_device', osd_id))
logger.out('Removing OSD DB logical volume "{}"'.format(db_device), state='i')
retcode, stdout, stderr = common.run_os_command('lvremove --yes --force {}'.format(db_device))
if zkhandler.exists(("osd.db_device", osd_id)):
db_device = zkhandler.read(("osd.db_device", osd_id))
logger.out(
'Removing OSD DB logical volume "{}"'.format(db_device), state="i"
)
retcode, stdout, stderr = common.run_os_command(
"lvremove --yes --force {}".format(db_device)
)
# 8. Delete OSD from ZK
logger.out('Deleting OSD disk with ID {} from Zookeeper'.format(osd_id), state='i')
zkhandler.delete(('osd', osd_id), recursive=True)
logger.out(
"Deleting OSD disk with ID {} from Zookeeper".format(osd_id), state="i"
)
zkhandler.delete(("osd", osd_id), recursive=True)
# Log it
logger.out('Removed OSD disk with ID {}'.format(osd_id), state='o')
logger.out("Removed OSD disk with ID {}".format(osd_id), state="o")
return True
except Exception as e:
# Log it
logger.out('Failed to purge OSD disk with ID {}: {}'.format(osd_id, e), state='e')
logger.out(
"Failed to purge OSD disk with ID {}: {}".format(osd_id, e), state="e"
)
return False
@staticmethod
def add_db_vg(zkhandler, logger, device):
logger.out('Creating new OSD database volume group on block device {}'.format(device), state='i')
logger.out(
"Creating new OSD database volume group on block device {}".format(device),
state="i",
)
try:
# 0. Check if an existsing volume group exists
retcode, stdout, stderr = common.run_os_command(
'vgdisplay osd-db'
)
retcode, stdout, stderr = common.run_os_command("vgdisplay osd-db")
if retcode != 5:
logger.out('Ceph OSD database VG "osd-db" already exists', state='e')
logger.out('Ceph OSD database VG "osd-db" already exists', state="e")
return False
# 1. Create an empty partition table
logger.out('Creating partitions on block device {}'.format(device), state='i')
logger.out(
"Creating partitions on block device {}".format(device), state="i"
)
retcode, stdout, stderr = common.run_os_command(
'sgdisk --clear {}'.format(device)
"sgdisk --clear {}".format(device)
)
if retcode:
print('sgdisk create partition table')
print("sgdisk create partition table")
print(stdout)
print(stderr)
raise Exception
retcode, stdout, stderr = common.run_os_command(
'sgdisk --new 1:: --typecode 1:8e00 {}'.format(device)
"sgdisk --new 1:: --typecode 1:8e00 {}".format(device)
)
if retcode:
print('sgdisk create pv partition')
print("sgdisk create pv partition")
print(stdout)
print(stderr)
raise Exception
# Handle the partition ID portion
if search(r'by-path', device) or search(r'by-id', device):
if search(r"by-path", device) or search(r"by-id", device):
# /dev/disk/by-path/pci-0000:03:00.0-scsi-0:1:0:0 -> pci-0000:03:00.0-scsi-0:1:0:0-part1
partition = '{}-part1'.format(device)
elif search(r'nvme', device):
partition = "{}-part1".format(device)
elif search(r"nvme", device):
# /dev/nvme0n1 -> nvme0n1p1
partition = '{}p1'.format(device)
partition = "{}p1".format(device)
else:
# /dev/sda -> sda1
# No other '/dev/disk/by-*' types are valid for raw block devices anyways
partition = '{}1'.format(device)
partition = "{}1".format(device)
# 2. Create the PV
logger.out('Creating PV on block device {}'.format(partition), state='i')
logger.out("Creating PV on block device {}".format(partition), state="i")
retcode, stdout, stderr = common.run_os_command(
'pvcreate --force {}'.format(partition)
"pvcreate --force {}".format(partition)
)
if retcode:
print('pv creation')
print("pv creation")
print(stdout)
print(stderr)
raise Exception
# 2. Create the VG (named 'osd-db')
logger.out('Creating VG "osd-db" on block device {}'.format(partition), state='i')
logger.out(
'Creating VG "osd-db" on block device {}'.format(partition), state="i"
)
retcode, stdout, stderr = common.run_os_command(
'vgcreate --force osd-db {}'.format(partition)
"vgcreate --force osd-db {}".format(partition)
)
if retcode:
print('vg creation')
print("vg creation")
print(stdout)
print(stderr)
raise Exception
# Log it
logger.out('Created new OSD database volume group on block device {}'.format(device), state='o')
logger.out(
"Created new OSD database volume group on block device {}".format(
device
),
state="o",
)
return True
except Exception as e:
# Log it
logger.out('Failed to create OSD database volume group: {}'.format(e), state='e')
logger.out(
"Failed to create OSD database volume group: {}".format(e), state="e"
)
return False
@staticmethod
def create_osd_db_lv(zkhandler, logger, osd_id, ext_db_ratio, osd_size_bytes):
logger.out('Creating new OSD database logical volume for OSD ID {}'.format(osd_id), state='i')
logger.out(
"Creating new OSD database logical volume for OSD ID {}".format(osd_id),
state="i",
)
try:
# 0. Check if an existsing logical volume exists
retcode, stdout, stderr = common.run_os_command(
'lvdisplay osd-db/osd{}'.format(osd_id)
"lvdisplay osd-db/osd{}".format(osd_id)
)
if retcode != 5:
logger.out('Ceph OSD database LV "osd-db/osd{}" already exists'.format(osd_id), state='e')
logger.out(
'Ceph OSD database LV "osd-db/osd{}" already exists'.format(osd_id),
state="e",
)
return False
# 1. Determine LV sizing
osd_db_size = int(osd_size_bytes * ext_db_ratio / 1024 / 1024)
# 2. Create the LV
logger.out('Creating DB LV "osd-db/osd-{}" of {}M ({} * {})'.format(osd_id, osd_db_size, osd_size_bytes, ext_db_ratio), state='i')
logger.out(
'Creating DB LV "osd-db/osd-{}" of {}M ({} * {})'.format(
osd_id, osd_db_size, osd_size_bytes, ext_db_ratio
),
state="i",
)
retcode, stdout, stderr = common.run_os_command(
'lvcreate --yes --name osd-{} --size {} osd-db'.format(osd_id, osd_db_size)
"lvcreate --yes --name osd-{} --size {} osd-db".format(
osd_id, osd_db_size
)
)
if retcode:
print('db lv creation')
print("db lv creation")
print(stdout)
print(stderr)
raise Exception
# Log it
logger.out('Created new OSD database logical volume "osd-db/osd-{}"'.format(osd_id), state='o')
logger.out(
'Created new OSD database logical volume "osd-db/osd-{}"'.format(
osd_id
),
state="o",
)
return True
except Exception as e:
# Log it
logger.out('Failed to create OSD database logical volume: {}'.format(e), state='e')
logger.out(
"Failed to create OSD database logical volume: {}".format(e), state="e"
)
return False
@ -420,35 +500,39 @@ class CephPoolInstance(object):
self.zkhandler = zkhandler
self.this_node = this_node
self.name = name
self.pgs = ''
self.pgs = ""
self.stats = dict()
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('pool.pgs', self.name))
def watch_pool_node(data, stat, event=''):
if event and event.type == 'DELETED':
@self.zkhandler.zk_conn.DataWatch(
self.zkhandler.schema.path("pool.pgs", self.name)
)
def watch_pool_node(data, stat, event=""):
if event and event.type == "DELETED":
# The key has been deleted after existing before; terminate this watcher
# because this class instance is about to be reaped in Daemon.py
return False
try:
data = data.decode('ascii')
data = data.decode("ascii")
except AttributeError:
data = ''
data = ""
if data and data != self.pgs:
self.pgs = data
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('pool.stats', self.name))
def watch_pool_stats(data, stat, event=''):
if event and event.type == 'DELETED':
@self.zkhandler.zk_conn.DataWatch(
self.zkhandler.schema.path("pool.stats", self.name)
)
def watch_pool_stats(data, stat, event=""):
if event and event.type == "DELETED":
# The key has been deleted after existing before; terminate this watcher
# because this class instance is about to be reaped in Daemon.py
return False
try:
data = data.decode('ascii')
data = data.decode("ascii")
except AttributeError:
data = ''
data = ""
if data and data != self.stats:
self.stats = json.loads(data)
@ -462,17 +546,19 @@ class CephVolumeInstance(object):
self.name = name
self.stats = dict()
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('volume.stats', f'{self.pool}/{self.name}'))
def watch_volume_stats(data, stat, event=''):
if event and event.type == 'DELETED':
@self.zkhandler.zk_conn.DataWatch(
self.zkhandler.schema.path("volume.stats", f"{self.pool}/{self.name}")
)
def watch_volume_stats(data, stat, event=""):
if event and event.type == "DELETED":
# The key has been deleted after existing before; terminate this watcher
# because this class instance is about to be reaped in Daemon.py
return False
try:
data = data.decode('ascii')
data = data.decode("ascii")
except AttributeError:
data = ''
data = ""
if data and data != self.stats:
self.stats = json.loads(data)
@ -487,17 +573,21 @@ class CephSnapshotInstance(object):
self.name = name
self.stats = dict()
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('snapshot.stats', f'{self.pool}/{self.volume}/{self.name}'))
def watch_snapshot_stats(data, stat, event=''):
if event and event.type == 'DELETED':
@self.zkhandler.zk_conn.DataWatch(
self.zkhandler.schema.path(
"snapshot.stats", f"{self.pool}/{self.volume}/{self.name}"
)
)
def watch_snapshot_stats(data, stat, event=""):
if event and event.type == "DELETED":
# The key has been deleted after existing before; terminate this watcher
# because this class instance is about to be reaped in Daemon.py
return False
try:
data = data.decode('ascii')
data = data.decode("ascii")
except AttributeError:
data = ''
data = ""
if data and data != self.stats:
self.stats = json.loads(data)
@ -510,77 +600,69 @@ def ceph_command(zkhandler, logger, this_node, data, d_osd):
command, args = data.split()
# Adding a new OSD
if command == 'osd_add':
node, device, weight, ext_db_flag, ext_db_ratio = args.split(',')
if command == "osd_add":
node, device, weight, ext_db_flag, ext_db_ratio = args.split(",")
ext_db_flag = bool(strtobool(ext_db_flag))
ext_db_ratio = float(ext_db_ratio)
if node == this_node.name:
# Lock the command queue
zk_lock = zkhandler.writelock('base.cmd.ceph')
zk_lock = zkhandler.writelock("base.cmd.ceph")
with zk_lock:
# Add the OSD
result = CephOSDInstance.add_osd(zkhandler, logger, node, device, weight, ext_db_flag, ext_db_ratio)
result = CephOSDInstance.add_osd(
zkhandler, logger, node, device, weight, ext_db_flag, ext_db_ratio
)
# Command succeeded
if result:
# Update the command queue
zkhandler.write([
('base.cmd.ceph', 'success-{}'.format(data))
])
zkhandler.write([("base.cmd.ceph", "success-{}".format(data))])
# Command failed
else:
# Update the command queue
zkhandler.write([
('base.cmd.ceph', 'failure-{}'.format(data))
])
zkhandler.write([("base.cmd.ceph", "failure-{}".format(data))])
# Wait 1 seconds before we free the lock, to ensure the client hits the lock
time.sleep(1)
# Removing an OSD
elif command == 'osd_remove':
elif command == "osd_remove":
osd_id = args
# Verify osd_id is in the list
if d_osd[osd_id] and d_osd[osd_id].node == this_node.name:
# Lock the command queue
zk_lock = zkhandler.writelock('base.cmd.ceph')
zk_lock = zkhandler.writelock("base.cmd.ceph")
with zk_lock:
# Remove the OSD
result = CephOSDInstance.remove_osd(zkhandler, logger, osd_id, d_osd[osd_id])
result = CephOSDInstance.remove_osd(
zkhandler, logger, osd_id, d_osd[osd_id]
)
# Command succeeded
if result:
# Update the command queue
zkhandler.write([
('base.cmd.ceph', 'success-{}'.format(data))
])
zkhandler.write([("base.cmd.ceph", "success-{}".format(data))])
# Command failed
else:
# Update the command queue
zkhandler.write([
('base.cmd.ceph', 'failure-{}'.format(data))
])
zkhandler.write([("base.cmd.ceph", "failure-{}".format(data))])
# Wait 1 seconds before we free the lock, to ensure the client hits the lock
time.sleep(1)
# Adding a new DB VG
elif command == 'db_vg_add':
node, device = args.split(',')
elif command == "db_vg_add":
node, device = args.split(",")
if node == this_node.name:
# Lock the command queue
zk_lock = zkhandler.writelock('base.cmd.ceph')
zk_lock = zkhandler.writelock("base.cmd.ceph")
with zk_lock:
# Add the VG
result = CephOSDInstance.add_db_vg(zkhandler, logger, device)
# Command succeeded
if result:
# Update the command queue
zkhandler.write([
('base.cmd.ceph', 'success-{}'.format(data))
])
zkhandler.write([("base.cmd.ceph", "success-{}".format(data))])
# Command failed
else:
# Update the command queue
zkhandler.write([
('base.cmd.ceph', 'failure={}'.format(data))
])
zkhandler.write([("base.cmd.ceph", "failure={}".format(data))])
# Wait 1 seconds before we free the lock, to ensure the client hits the lock
time.sleep(1)

View File

@ -74,69 +74,71 @@ class PowerDNSInstance(object):
self.dns_server_daemon = None
# Floating upstreams
self.cluster_floatingipaddr, self.cluster_cidrnetmask = self.config['cluster_floating_ip'].split('/')
self.upstream_floatingipaddr, self.upstream_cidrnetmask = self.config['upstream_floating_ip'].split('/')
self.cluster_floatingipaddr, self.cluster_cidrnetmask = self.config[
"cluster_floating_ip"
].split("/")
self.upstream_floatingipaddr, self.upstream_cidrnetmask = self.config[
"upstream_floating_ip"
].split("/")
def start(self):
self.logger.out(
'Starting PowerDNS zone aggregator',
state='i'
)
self.logger.out("Starting PowerDNS zone aggregator", state="i")
# Define the PowerDNS config
dns_configuration = [
# Option # Explanation
'--no-config',
'--daemon=no', # Start directly
'--guardian=yes', # Use a guardian
'--disable-syslog=yes', # Log only to stdout (which is then captured)
'--disable-axfr=no', # Allow AXFRs
'--allow-axfr-ips=0.0.0.0/0', # Allow AXFRs to anywhere
'--local-address={},{}'.format(self.cluster_floatingipaddr, self.upstream_floatingipaddr), # Listen on floating IPs
'--local-port=53', # On port 53
'--log-dns-details=on', # Log details
'--loglevel=3', # Log info
'--master=yes', # Enable master mode
'--slave=yes', # Enable slave mode
'--slave-renotify=yes', # Renotify out for our slaved zones
'--version-string=powerdns', # Set the version string
'--default-soa-name=dns.pvc.local', # Override dnsmasq's invalid name
'--socket-dir={}'.format(self.config['pdns_dynamic_directory']), # Standard socket directory
'--launch=gpgsql', # Use the PostgreSQL backend
'--gpgsql-host={}'.format(self.config['pdns_postgresql_host']), # PostgreSQL instance
'--gpgsql-port={}'.format(self.config['pdns_postgresql_port']), # Default port
'--gpgsql-dbname={}'.format(self.config['pdns_postgresql_dbname']), # Database name
'--gpgsql-user={}'.format(self.config['pdns_postgresql_user']), # User name
'--gpgsql-password={}'.format(self.config['pdns_postgresql_password']), # User password
'--gpgsql-dnssec=no', # Do DNSSEC elsewhere
"--no-config",
"--daemon=no", # Start directly
"--guardian=yes", # Use a guardian
"--disable-syslog=yes", # Log only to stdout (which is then captured)
"--disable-axfr=no", # Allow AXFRs
"--allow-axfr-ips=0.0.0.0/0", # Allow AXFRs to anywhere
"--local-address={},{}".format(
self.cluster_floatingipaddr, self.upstream_floatingipaddr
), # Listen on floating IPs
"--local-port=53", # On port 53
"--log-dns-details=on", # Log details
"--loglevel=3", # Log info
"--master=yes", # Enable master mode
"--slave=yes", # Enable slave mode
"--slave-renotify=yes", # Renotify out for our slaved zones
"--version-string=powerdns", # Set the version string
"--default-soa-name=dns.pvc.local", # Override dnsmasq's invalid name
"--socket-dir={}".format(
self.config["pdns_dynamic_directory"]
), # Standard socket directory
"--launch=gpgsql", # Use the PostgreSQL backend
"--gpgsql-host={}".format(
self.config["pdns_postgresql_host"]
), # PostgreSQL instance
"--gpgsql-port={}".format(
self.config["pdns_postgresql_port"]
), # Default port
"--gpgsql-dbname={}".format(
self.config["pdns_postgresql_dbname"]
), # Database name
"--gpgsql-user={}".format(self.config["pdns_postgresql_user"]), # User name
"--gpgsql-password={}".format(
self.config["pdns_postgresql_password"]
), # User password
"--gpgsql-dnssec=no", # Do DNSSEC elsewhere
]
# Start the pdns process in a thread
self.dns_server_daemon = common.run_os_daemon(
'/usr/sbin/pdns_server {}'.format(
' '.join(dns_configuration)
),
"/usr/sbin/pdns_server {}".format(" ".join(dns_configuration)),
environment=None,
logfile='{}/pdns-aggregator.log'.format(self.config['pdns_log_directory'])
logfile="{}/pdns-aggregator.log".format(self.config["pdns_log_directory"]),
)
if self.dns_server_daemon:
self.logger.out(
'Successfully started PowerDNS zone aggregator',
state='o'
)
self.logger.out("Successfully started PowerDNS zone aggregator", state="o")
def stop(self):
if self.dns_server_daemon:
self.logger.out(
'Stopping PowerDNS zone aggregator',
state='i'
)
self.logger.out("Stopping PowerDNS zone aggregator", state="i")
# Terminate, then kill
self.dns_server_daemon.signal('term')
self.dns_server_daemon.signal("term")
time.sleep(0.2)
self.dns_server_daemon.signal('kill')
self.logger.out(
'Successfully stopped PowerDNS zone aggregator',
state='o'
)
self.dns_server_daemon.signal("kill")
self.logger.out("Successfully stopped PowerDNS zone aggregator", state="o")
class DNSNetworkInstance(object):
@ -153,29 +155,24 @@ class DNSNetworkInstance(object):
network_domain = self.network.domain
self.logger.out(
'Adding entry for client domain {}'.format(
network_domain
),
prefix='DNS aggregator',
state='o'
"Adding entry for client domain {}".format(network_domain),
prefix="DNS aggregator",
state="o",
)
# Connect to the database
self.sql_conn = psycopg2.connect(
"host='{}' port='{}' dbname='{}' user='{}' password='{}' sslmode='disable'".format(
self.config['pdns_postgresql_host'],
self.config['pdns_postgresql_port'],
self.config['pdns_postgresql_dbname'],
self.config['pdns_postgresql_user'],
self.config['pdns_postgresql_password']
self.config["pdns_postgresql_host"],
self.config["pdns_postgresql_port"],
self.config["pdns_postgresql_dbname"],
self.config["pdns_postgresql_user"],
self.config["pdns_postgresql_password"],
)
)
sql_curs = self.sql_conn.cursor()
# Try to access the domains entry
sql_curs.execute(
"SELECT * FROM domains WHERE name=%s",
(network_domain,)
)
sql_curs.execute("SELECT * FROM domains WHERE name=%s", (network_domain,))
results = sql_curs.fetchone()
# If we got back a result, don't try to add the domain to the DB
@ -188,14 +185,11 @@ class DNSNetworkInstance(object):
if self.aggregator.is_active and write_domain:
sql_curs.execute(
"INSERT INTO domains (name, type, account, notified_serial) VALUES (%s, 'MASTER', 'internal', 0)",
(network_domain,)
(network_domain,),
)
self.sql_conn.commit()
sql_curs.execute(
"SELECT id FROM domains WHERE name=%s",
(network_domain,)
)
sql_curs.execute("SELECT id FROM domains WHERE name=%s", (network_domain,))
domain_id = sql_curs.fetchone()
sql_curs.execute(
@ -203,13 +197,22 @@ class DNSNetworkInstance(object):
INSERT INTO records (domain_id, name, content, type, ttl, prio) VALUES
(%s, %s, %s, %s, %s, %s)
""",
(domain_id, network_domain, 'nsX.{d} root.{d} 1 10800 1800 86400 86400'.format(d=self.config['upstream_domain']), 'SOA', 86400, 0)
(
domain_id,
network_domain,
"nsX.{d} root.{d} 1 10800 1800 86400 86400".format(
d=self.config["upstream_domain"]
),
"SOA",
86400,
0,
),
)
if self.network.name_servers:
ns_servers = self.network.name_servers
else:
ns_servers = ['pvc-dns.{}'.format(self.config['upstream_domain'])]
ns_servers = ["pvc-dns.{}".format(self.config["upstream_domain"])]
for ns_server in ns_servers:
sql_curs.execute(
@ -217,7 +220,7 @@ class DNSNetworkInstance(object):
INSERT INTO records (domain_id, name, content, type, ttl, prio) VALUES
(%s, %s, %s, %s, %s, %s)
""",
(domain_id, network_domain, ns_server, 'NS', 86400, 0)
(domain_id, network_domain, ns_server, "NS", 86400, 0),
)
self.sql_conn.commit()
@ -229,42 +232,31 @@ class DNSNetworkInstance(object):
network_domain = self.network.domain
self.logger.out(
'Removing entry for client domain {}'.format(
network_domain
),
prefix='DNS aggregator',
state='o'
"Removing entry for client domain {}".format(network_domain),
prefix="DNS aggregator",
state="o",
)
# Connect to the database
self.sql_conn = psycopg2.connect(
"host='{}' port='{}' dbname='{}' user='{}' password='{}' sslmode='disable'".format(
self.config['pdns_postgresql_host'],
self.config['pdns_postgresql_port'],
self.config['pdns_postgresql_dbname'],
self.config['pdns_postgresql_user'],
self.config['pdns_postgresql_password']
self.config["pdns_postgresql_host"],
self.config["pdns_postgresql_port"],
self.config["pdns_postgresql_dbname"],
self.config["pdns_postgresql_user"],
self.config["pdns_postgresql_password"],
)
)
sql_curs = self.sql_conn.cursor()
# Get the domain ID
sql_curs.execute(
"SELECT id FROM domains WHERE name=%s",
(network_domain,)
)
sql_curs.execute("SELECT id FROM domains WHERE name=%s", (network_domain,))
domain_id = sql_curs.fetchone()
# Delete the domain from the database if we're active
if self.aggregator.is_active and domain_id:
sql_curs.execute(
"DELETE FROM domains WHERE id=%s",
(domain_id,)
)
sql_curs.execute(
"DELETE FROM records WHERE domain_id=%s",
(domain_id,)
)
sql_curs.execute("DELETE FROM domains WHERE id=%s", (domain_id,))
sql_curs.execute("DELETE FROM records WHERE domain_id=%s", (domain_id,))
self.sql_conn.commit()
self.sql_conn.close()
@ -295,11 +287,11 @@ class AXFRDaemonInstance(object):
# after the leader transitions
self.sql_conn = psycopg2.connect(
"host='{}' port='{}' dbname='{}' user='{}' password='{}' sslmode='disable'".format(
self.config['pdns_postgresql_host'],
self.config['pdns_postgresql_port'],
self.config['pdns_postgresql_dbname'],
self.config['pdns_postgresql_user'],
self.config['pdns_postgresql_password']
self.config["pdns_postgresql_host"],
self.config["pdns_postgresql_port"],
self.config["pdns_postgresql_dbname"],
self.config["pdns_postgresql_user"],
self.config["pdns_postgresql_password"],
)
)
@ -328,7 +320,7 @@ class AXFRDaemonInstance(object):
# Set up our basic variables
domain = network.domain
if network.ip4_gateway != 'None':
if network.ip4_gateway != "None":
dnsmasq_ip = network.ip4_gateway
else:
dnsmasq_ip = network.ip6_gateway
@ -341,53 +333,67 @@ class AXFRDaemonInstance(object):
z = dns.zone.from_xfr(axfr)
records_raw = [z[n].to_text(n) for n in z.nodes.keys()]
except Exception as e:
if self.config['debug']:
self.logger.out('{} {} ({})'.format(e, dnsmasq_ip, domain), state='d', prefix='dns-aggregator')
if self.config["debug"]:
self.logger.out(
"{} {} ({})".format(e, dnsmasq_ip, domain),
state="d",
prefix="dns-aggregator",
)
continue
# Fix the formatting because it's useless
# reference: ['@ 600 IN SOA . . 4 1200 180 1209600 600\n@ 600 IN NS .', 'test3 600 IN A 10.1.1.203\ntest3 600 IN AAAA 2001:b23e:1113:0:5054:ff:fe5c:f131', etc.]
# We don't really care about dnsmasq's terrible SOA or NS records which are in [0]
string_records = '\n'.join(records_raw[1:])
string_records = "\n".join(records_raw[1:])
# Split into individual records
records_new = list()
for element in string_records.split('\n'):
for element in string_records.split("\n"):
if element:
record = element.split()
# Handle space-containing data elements
if domain not in record[0]:
name = '{}.{}'.format(record[0], domain)
name = "{}.{}".format(record[0], domain)
else:
name = record[0]
entry = '{} {} IN {} {}'.format(name, record[1], record[3], ' '.join(record[4:]))
entry = "{} {} IN {} {}".format(
name, record[1], record[3], " ".join(record[4:])
)
records_new.append(entry)
#
# Get the current zone from the database
#
try:
sql_curs.execute(
"SELECT id FROM domains WHERE name=%s",
(domain,)
)
sql_curs.execute("SELECT id FROM domains WHERE name=%s", (domain,))
domain_id = sql_curs.fetchone()
sql_curs.execute(
"SELECT * FROM records WHERE domain_id=%s",
(domain_id,)
"SELECT * FROM records WHERE domain_id=%s", (domain_id,)
)
results = list(sql_curs.fetchall())
if self.config['debug']:
self.logger.out('SQL query results: {}'.format(results), state='d', prefix='dns-aggregator')
if self.config["debug"]:
self.logger.out(
"SQL query results: {}".format(results),
state="d",
prefix="dns-aggregator",
)
except Exception as e:
self.logger.out('ERROR: Failed to obtain DNS records from database: {}'.format(e))
self.logger.out(
"ERROR: Failed to obtain DNS records from database: {}".format(
e
)
)
# Fix the formatting because it's useless for comparison
# reference: ((10, 28, 'testnet01.i.bonilan.net', 'SOA', 'nsX.pvc.local root.pvc.local 1 10800 1800 86400 86400', 86400, 0, None, 0, None, 1), etc.)
records_old = list()
records_old_ids = list()
if not results:
if self.config['debug']:
self.logger.out('No results found, skipping.', state='d', prefix='dns-aggregator')
if self.config["debug"]:
self.logger.out(
"No results found, skipping.",
state="d",
prefix="dns-aggregator",
)
continue
for record in results:
# Skip the non-A
@ -397,14 +403,24 @@ class AXFRDaemonInstance(object):
r_type = record[3]
r_data = record[4]
# Assemble a list element in the same format as the AXFR data
entry = '{} {} IN {} {}'.format(r_name, r_ttl, r_type, r_data)
if self.config['debug']:
self.logger.out('Found record: {}'.format(entry), state='d', prefix='dns-aggregator')
entry = "{} {} IN {} {}".format(r_name, r_ttl, r_type, r_data)
if self.config["debug"]:
self.logger.out(
"Found record: {}".format(entry),
state="d",
prefix="dns-aggregator",
)
# Skip non-A or AAAA records
if r_type != 'A' and r_type != 'AAAA':
if self.config['debug']:
self.logger.out('Skipping record {}, not A or AAAA: "{}"'.format(entry, r_type), state='d', prefix='dns-aggregator')
if r_type != "A" and r_type != "AAAA":
if self.config["debug"]:
self.logger.out(
'Skipping record {}, not A or AAAA: "{}"'.format(
entry, r_type
),
state="d",
prefix="dns-aggregator",
)
continue
records_old.append(entry)
@ -413,9 +429,17 @@ class AXFRDaemonInstance(object):
records_new.sort()
records_old.sort()
if self.config['debug']:
self.logger.out('New: {}'.format(records_new), state='d', prefix='dns-aggregator')
self.logger.out('Old: {}'.format(records_old), state='d', prefix='dns-aggregator')
if self.config["debug"]:
self.logger.out(
"New: {}".format(records_new),
state="d",
prefix="dns-aggregator",
)
self.logger.out(
"Old: {}".format(records_old),
state="d",
prefix="dns-aggregator",
)
# Find the differences between the lists
# Basic check one: are they completely equal
@ -426,9 +450,17 @@ class AXFRDaemonInstance(object):
in_new_not_in_old = in_new - in_old
in_old_not_in_new = in_old - in_new
if self.config['debug']:
self.logger.out('New but not old: {}'.format(in_new_not_in_old), state='d', prefix='dns-aggregator')
self.logger.out('Old but not new: {}'.format(in_old_not_in_new), state='d', prefix='dns-aggregator')
if self.config["debug"]:
self.logger.out(
"New but not old: {}".format(in_new_not_in_old),
state="d",
prefix="dns-aggregator",
)
self.logger.out(
"Old but not new: {}".format(in_old_not_in_new),
state="d",
prefix="dns-aggregator",
)
# Go through the old list
remove_records = list() # list of database IDs
@ -445,18 +477,24 @@ class AXFRDaemonInstance(object):
for newrecord in in_new_not_in_old:
splitnewrecord = newrecord.split()
# If there's a name and type match with different content, remove the old one
if splitrecord[0] == splitnewrecord[0] and splitrecord[3] == splitnewrecord[3]:
if (
splitrecord[0] == splitnewrecord[0]
and splitrecord[3] == splitnewrecord[3]
):
remove_records.append(record_id)
changed = False
if len(remove_records) > 0:
# Remove the invalid old records
for record_id in remove_records:
if self.config['debug']:
self.logger.out('Removing record: {}'.format(record_id), state='d', prefix='dns-aggregator')
if self.config["debug"]:
self.logger.out(
"Removing record: {}".format(record_id),
state="d",
prefix="dns-aggregator",
)
sql_curs.execute(
"DELETE FROM records WHERE id=%s",
(record_id,)
"DELETE FROM records WHERE id=%s", (record_id,)
)
changed = True
@ -469,53 +507,81 @@ class AXFRDaemonInstance(object):
r_ttl = record[1]
r_type = record[3]
r_data = record[4]
if self.config['debug']:
self.logger.out('Add record: {}'.format(name), state='d', prefix='dns-aggregator')
if self.config["debug"]:
self.logger.out(
"Add record: {}".format(name),
state="d",
prefix="dns-aggregator",
)
try:
sql_curs.execute(
"INSERT INTO records (domain_id, name, ttl, type, prio, content) VALUES (%s, %s, %s, %s, %s, %s)",
(domain_id, r_name, r_ttl, r_type, 0, r_data)
(domain_id, r_name, r_ttl, r_type, 0, r_data),
)
changed = True
except psycopg2.IntegrityError as e:
if self.config['debug']:
self.logger.out('Failed to add record due to {}: {}'.format(e, name), state='d', prefix='dns-aggregator')
if self.config["debug"]:
self.logger.out(
"Failed to add record due to {}: {}".format(
e, name
),
state="d",
prefix="dns-aggregator",
)
except psycopg2.errors.InFailedSqlTransaction as e:
if self.config['debug']:
self.logger.out('Failed to add record due to {}: {}'.format(e, name), state='d', prefix='dns-aggregator')
if self.config["debug"]:
self.logger.out(
"Failed to add record due to {}: {}".format(
e, name
),
state="d",
prefix="dns-aggregator",
)
if changed:
# Increase SOA serial
sql_curs.execute(
"SELECT content FROM records WHERE domain_id=%s AND type='SOA'",
(domain_id,)
(domain_id,),
)
soa_record = list(sql_curs.fetchone())[0].split()
current_serial = int(soa_record[2])
new_serial = current_serial + 1
soa_record[2] = str(new_serial)
if self.config['debug']:
self.logger.out('Records changed; bumping SOA: {}'.format(new_serial), state='d', prefix='dns-aggregator')
if self.config["debug"]:
self.logger.out(
"Records changed; bumping SOA: {}".format(new_serial),
state="d",
prefix="dns-aggregator",
)
sql_curs.execute(
"UPDATE records SET content=%s WHERE domain_id=%s AND type='SOA'",
(' '.join(soa_record), domain_id)
(" ".join(soa_record), domain_id),
)
# Commit all the previous changes
if self.config['debug']:
self.logger.out('Committing database changes and reloading PDNS', state='d', prefix='dns-aggregator')
if self.config["debug"]:
self.logger.out(
"Committing database changes and reloading PDNS",
state="d",
prefix="dns-aggregator",
)
try:
self.sql_conn.commit()
except Exception as e:
self.logger.out('ERROR: Failed to commit DNS aggregator changes: {}'.format(e), state='e')
self.logger.out(
"ERROR: Failed to commit DNS aggregator changes: {}".format(
e
),
state="e",
)
# Reload the domain
common.run_os_command(
'/usr/bin/pdns_control --socket-dir={} reload {}'.format(
self.config['pdns_dynamic_directory'],
domain
"/usr/bin/pdns_control --socket-dir={} reload {}".format(
self.config["pdns_dynamic_directory"], domain
),
background=False
background=False,
)
# Wait for 10 seconds

View File

@ -46,45 +46,52 @@ class MetadataAPIInstance(object):
# Add flask routes inside our instance
def add_routes(self):
@self.mdapi.route('/', methods=['GET'])
@self.mdapi.route("/", methods=["GET"])
def api_root():
return flask.jsonify({"message": "PVC Provisioner Metadata API version 1"}), 209
return (
flask.jsonify({"message": "PVC Provisioner Metadata API version 1"}),
209,
)
@self.mdapi.route('/<version>/meta-data/', methods=['GET'])
@self.mdapi.route("/<version>/meta-data/", methods=["GET"])
def api_metadata_root(version):
metadata = """instance-id\nname\nprofile"""
return metadata, 200
@self.mdapi.route('/<version>/meta-data/instance-id', methods=['GET'])
@self.mdapi.route("/<version>/meta-data/instance-id", methods=["GET"])
def api_metadata_instanceid(version):
source_address = flask.request.__dict__['environ']['REMOTE_ADDR']
source_address = flask.request.__dict__["environ"]["REMOTE_ADDR"]
vm_details = self.get_vm_details(source_address)
instance_id = vm_details.get('uuid', None)
instance_id = vm_details.get("uuid", None)
return instance_id, 200
@self.mdapi.route('/<version>/meta-data/name', methods=['GET'])
@self.mdapi.route("/<version>/meta-data/name", methods=["GET"])
def api_metadata_hostname(version):
source_address = flask.request.__dict__['environ']['REMOTE_ADDR']
source_address = flask.request.__dict__["environ"]["REMOTE_ADDR"]
vm_details = self.get_vm_details(source_address)
vm_name = vm_details.get('name', None)
vm_name = vm_details.get("name", None)
return vm_name, 200
@self.mdapi.route('/<version>/meta-data/profile', methods=['GET'])
@self.mdapi.route("/<version>/meta-data/profile", methods=["GET"])
def api_metadata_profile(version):
source_address = flask.request.__dict__['environ']['REMOTE_ADDR']
source_address = flask.request.__dict__["environ"]["REMOTE_ADDR"]
vm_details = self.get_vm_details(source_address)
vm_profile = vm_details.get('profile', None)
vm_profile = vm_details.get("profile", None)
return vm_profile, 200
@self.mdapi.route('/<version>/user-data', methods=['GET'])
@self.mdapi.route("/<version>/user-data", methods=["GET"])
def api_userdata(version):
source_address = flask.request.__dict__['environ']['REMOTE_ADDR']
source_address = flask.request.__dict__["environ"]["REMOTE_ADDR"]
vm_details = self.get_vm_details(source_address)
vm_profile = vm_details.get('profile', None)
vm_profile = vm_details.get("profile", None)
# Get the userdata
if vm_profile:
userdata = self.get_profile_userdata(vm_profile)
self.logger.out("Returning userdata for profile {}".format(vm_profile), state='i', prefix='Metadata API')
self.logger.out(
"Returning userdata for profile {}".format(vm_profile),
state="i",
prefix="Metadata API",
)
else:
userdata = None
return flask.Response(userdata)
@ -92,46 +99,46 @@ class MetadataAPIInstance(object):
def launch_wsgi(self):
try:
self.md_http_server = gevent.pywsgi.WSGIServer(
('169.254.169.254', 80),
("169.254.169.254", 80),
self.mdapi,
log=sys.stdout,
error_log=sys.stdout
error_log=sys.stdout,
)
self.md_http_server.serve_forever()
except Exception as e:
self.logger.out('Error starting Metadata API: {}'.format(e), state='e')
self.logger.out("Error starting Metadata API: {}".format(e), state="e")
# WSGI start/stop
def start(self):
# Launch Metadata API
self.logger.out('Starting Metadata API at 169.254.169.254:80', state='i')
self.logger.out("Starting Metadata API at 169.254.169.254:80", state="i")
self.thread = Thread(target=self.launch_wsgi)
self.thread.start()
self.logger.out('Successfully started Metadata API thread', state='o')
self.logger.out("Successfully started Metadata API thread", state="o")
def stop(self):
if not self.md_http_server:
return
self.logger.out('Stopping Metadata API at 169.254.169.254:80', state='i')
self.logger.out("Stopping Metadata API at 169.254.169.254:80", state="i")
try:
self.md_http_server.stop()
time.sleep(0.1)
self.md_http_server.close()
time.sleep(0.1)
self.md_http_server = None
self.logger.out('Successfully stopped Metadata API', state='o')
self.logger.out("Successfully stopped Metadata API", state="o")
except Exception as e:
self.logger.out('Error stopping Metadata API: {}'.format(e), state='e')
self.logger.out("Error stopping Metadata API: {}".format(e), state="e")
# Helper functions
def open_database(self):
conn = psycopg2.connect(
host=self.config['metadata_postgresql_host'],
port=self.config['metadata_postgresql_port'],
dbname=self.config['metadata_postgresql_dbname'],
user=self.config['metadata_postgresql_user'],
password=self.config['metadata_postgresql_password']
host=self.config["metadata_postgresql_host"],
port=self.config["metadata_postgresql_port"],
dbname=self.config["metadata_postgresql_dbname"],
user=self.config["metadata_postgresql_user"],
password=self.config["metadata_postgresql_password"],
)
cur = conn.cursor(cursor_factory=RealDictCursor)
return conn, cur
@ -153,7 +160,7 @@ class MetadataAPIInstance(object):
data_raw = cur.fetchone()
self.close_database(conn, cur)
if data_raw is not None:
data = data_raw.get('userdata', None)
data = data_raw.get("userdata", None)
return data
else:
return None
@ -165,27 +172,31 @@ class MetadataAPIInstance(object):
# Figure out which server this is via the DHCP address
host_information = dict()
networks_managed = (x for x in networks if x.get('type') == 'managed')
networks_managed = (x for x in networks if x.get("type") == "managed")
for network in networks_managed:
network_leases = pvc_network.getNetworkDHCPLeases(self.zkhandler, network.get('vni'))
network_leases = pvc_network.getNetworkDHCPLeases(
self.zkhandler, network.get("vni")
)
for network_lease in network_leases:
information = pvc_network.getDHCPLeaseInformation(self.zkhandler, network.get('vni'), network_lease)
information = pvc_network.getDHCPLeaseInformation(
self.zkhandler, network.get("vni"), network_lease
)
try:
if information.get('ip4_address', None) == source_address:
if information.get("ip4_address", None) == source_address:
host_information = information
except Exception:
pass
# Get our real information on the host; now we can start querying about it
client_macaddr = host_information.get('mac_address', None)
client_macaddr = host_information.get("mac_address", None)
# Find the VM with that MAC address - we can't assume that the hostname is actually right
_discard, vm_list = pvc_vm.get_list(self.zkhandler, None, None, None, None)
vm_details = dict()
for vm in vm_list:
try:
for network in vm.get('networks'):
if network.get('mac', None) == client_macaddr:
for network in vm.get("networks"):
if network.get("mac", None) == client_macaddr:
vm_details = vm
except Exception:
pass

File diff suppressed because it is too large Load Diff

View File

@ -23,10 +23,10 @@ import daemon_lib.common as common
def boolToOnOff(state):
if state and str(state) == 'True':
return 'on'
if state and str(state) == "True":
return "on"
else:
return 'off'
return "off"
class SRIOVVFInstance(object):
@ -39,12 +39,20 @@ class SRIOVVFInstance(object):
self.this_node = this_node
self.myhostname = self.this_node.name
self.pf = self.zkhandler.read(('node.sriov.vf', self.myhostname, 'sriov_vf.pf', self.vf))
self.mtu = self.zkhandler.read(('node.sriov.vf', self.myhostname, 'sriov_vf.mtu', self.vf))
self.vfid = self.vf.replace('{}v'.format(self.pf), '')
self.pf = self.zkhandler.read(
("node.sriov.vf", self.myhostname, "sriov_vf.pf", self.vf)
)
self.mtu = self.zkhandler.read(
("node.sriov.vf", self.myhostname, "sriov_vf.mtu", self.vf)
)
self.vfid = self.vf.replace("{}v".format(self.pf), "")
self.logger.out('Setting MTU to {}'.format(self.mtu), state='i', prefix='SR-IOV VF {}'.format(self.vf))
common.run_os_command('ip link set {} mtu {}'.format(self.vf, self.mtu))
self.logger.out(
"Setting MTU to {}".format(self.mtu),
state="i",
prefix="SR-IOV VF {}".format(self.vf),
)
common.run_os_command("ip link set {} mtu {}".format(self.vf, self.mtu))
# These properties are set via the DataWatch functions, to ensure they are configured on the system
self.mac = None
@ -58,153 +66,244 @@ class SRIOVVFInstance(object):
self.query_rss = None
# Zookeeper handlers for changed configs
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.mac', self.vf))
def watch_vf_mac(data, stat, event=''):
if event and event.type == 'DELETED':
@self.zkhandler.zk_conn.DataWatch(
self.zkhandler.schema.path("node.sriov.vf", self.myhostname)
+ self.zkhandler.schema.path("sriov_vf.mac", self.vf)
)
def watch_vf_mac(data, stat, event=""):
if event and event.type == "DELETED":
# The key has been deleted after existing before; terminate this watcher
# because this class instance is about to be reaped in Daemon.py
return False
try:
data = data.decode('ascii')
data = data.decode("ascii")
except AttributeError:
data = '00:00:00:00:00:00'
data = "00:00:00:00:00:00"
if data != self.mac:
self.mac = data
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.vlan_id', self.vf))
def watch_vf_vlan_id(data, stat, event=''):
if event and event.type == 'DELETED':
@self.zkhandler.zk_conn.DataWatch(
self.zkhandler.schema.path("node.sriov.vf", self.myhostname)
+ self.zkhandler.schema.path("sriov_vf.config.vlan_id", self.vf)
)
def watch_vf_vlan_id(data, stat, event=""):
if event and event.type == "DELETED":
# The key has been deleted after existing before; terminate this watcher
# because this class instance is about to be reaped in Daemon.py
return False
try:
data = data.decode('ascii')
data = data.decode("ascii")
except AttributeError:
data = '0'
data = "0"
if data != self.vlan_id:
self.vlan_id = data
self.logger.out('Setting vLAN ID to {}'.format(self.vlan_id), state='i', prefix='SR-IOV VF {}'.format(self.vf))
common.run_os_command('ip link set {} vf {} vlan {} qos {}'.format(self.pf, self.vfid, self.vlan_id, self.vlan_qos))
self.logger.out(
"Setting vLAN ID to {}".format(self.vlan_id),
state="i",
prefix="SR-IOV VF {}".format(self.vf),
)
common.run_os_command(
"ip link set {} vf {} vlan {} qos {}".format(
self.pf, self.vfid, self.vlan_id, self.vlan_qos
)
)
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.vlan_qos', self.vf))
def watch_vf_vlan_qos(data, stat, event=''):
if event and event.type == 'DELETED':
@self.zkhandler.zk_conn.DataWatch(
self.zkhandler.schema.path("node.sriov.vf", self.myhostname)
+ self.zkhandler.schema.path("sriov_vf.config.vlan_qos", self.vf)
)
def watch_vf_vlan_qos(data, stat, event=""):
if event and event.type == "DELETED":
# The key has been deleted after existing before; terminate this watcher
# because this class instance is about to be reaped in Daemon.py
return False
try:
data = data.decode('ascii')
data = data.decode("ascii")
except AttributeError:
data = '0'
data = "0"
if data != self.vlan_qos:
self.vlan_qos = data
self.logger.out('Setting vLAN QOS to {}'.format(self.vlan_qos), state='i', prefix='SR-IOV VF {}'.format(self.vf))
common.run_os_command('ip link set {} vf {} vlan {} qos {}'.format(self.pf, self.vfid, self.vlan_id, self.vlan_qos))
self.logger.out(
"Setting vLAN QOS to {}".format(self.vlan_qos),
state="i",
prefix="SR-IOV VF {}".format(self.vf),
)
common.run_os_command(
"ip link set {} vf {} vlan {} qos {}".format(
self.pf, self.vfid, self.vlan_id, self.vlan_qos
)
)
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.tx_rate_min', self.vf))
def watch_vf_tx_rate_min(data, stat, event=''):
if event and event.type == 'DELETED':
@self.zkhandler.zk_conn.DataWatch(
self.zkhandler.schema.path("node.sriov.vf", self.myhostname)
+ self.zkhandler.schema.path("sriov_vf.config.tx_rate_min", self.vf)
)
def watch_vf_tx_rate_min(data, stat, event=""):
if event and event.type == "DELETED":
# The key has been deleted after existing before; terminate this watcher
# because this class instance is about to be reaped in Daemon.py
return False
try:
data = data.decode('ascii')
data = data.decode("ascii")
except AttributeError:
data = '0'
data = "0"
if data != self.tx_rate_min:
self.tx_rate_min = data
self.logger.out('Setting minimum TX rate to {}'.format(self.tx_rate_min), state='i', prefix='SR-IOV VF {}'.format(self.vf))
common.run_os_command('ip link set {} vf {} min_tx_rate {}'.format(self.pf, self.vfid, self.tx_rate_min))
self.logger.out(
"Setting minimum TX rate to {}".format(self.tx_rate_min),
state="i",
prefix="SR-IOV VF {}".format(self.vf),
)
common.run_os_command(
"ip link set {} vf {} min_tx_rate {}".format(
self.pf, self.vfid, self.tx_rate_min
)
)
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.tx_rate_max', self.vf))
def watch_vf_tx_rate_max(data, stat, event=''):
if event and event.type == 'DELETED':
@self.zkhandler.zk_conn.DataWatch(
self.zkhandler.schema.path("node.sriov.vf", self.myhostname)
+ self.zkhandler.schema.path("sriov_vf.config.tx_rate_max", self.vf)
)
def watch_vf_tx_rate_max(data, stat, event=""):
if event and event.type == "DELETED":
# The key has been deleted after existing before; termaxate this watcher
# because this class instance is about to be reaped in Daemon.py
return False
try:
data = data.decode('ascii')
data = data.decode("ascii")
except AttributeError:
data = '0'
data = "0"
if data != self.tx_rate_max:
self.tx_rate_max = data
self.logger.out('Setting maximum TX rate to {}'.format(self.tx_rate_max), state='i', prefix='SR-IOV VF {}'.format(self.vf))
common.run_os_command('ip link set {} vf {} max_tx_rate {}'.format(self.pf, self.vfid, self.tx_rate_max))
self.logger.out(
"Setting maximum TX rate to {}".format(self.tx_rate_max),
state="i",
prefix="SR-IOV VF {}".format(self.vf),
)
common.run_os_command(
"ip link set {} vf {} max_tx_rate {}".format(
self.pf, self.vfid, self.tx_rate_max
)
)
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.spoof_check', self.vf))
def watch_vf_spoof_check(data, stat, event=''):
if event and event.type == 'DELETED':
@self.zkhandler.zk_conn.DataWatch(
self.zkhandler.schema.path("node.sriov.vf", self.myhostname)
+ self.zkhandler.schema.path("sriov_vf.config.spoof_check", self.vf)
)
def watch_vf_spoof_check(data, stat, event=""):
if event and event.type == "DELETED":
# The key has been deleted after existing before; terminate this watcher
# because this class instance is about to be reaped in Daemon.py
return False
try:
data = data.decode('ascii')
data = data.decode("ascii")
except AttributeError:
data = '0'
data = "0"
if data != self.spoof_check:
self.spoof_check = data
self.logger.out('Setting spoof checking {}'.format(boolToOnOff(self.spoof_check)), state='i', prefix='SR-IOV VF {}'.format(self.vf))
common.run_os_command('ip link set {} vf {} spoofchk {}'.format(self.pf, self.vfid, boolToOnOff(self.spoof_check)))
self.logger.out(
"Setting spoof checking {}".format(boolToOnOff(self.spoof_check)),
state="i",
prefix="SR-IOV VF {}".format(self.vf),
)
common.run_os_command(
"ip link set {} vf {} spoofchk {}".format(
self.pf, self.vfid, boolToOnOff(self.spoof_check)
)
)
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.link_state', self.vf))
def watch_vf_link_state(data, stat, event=''):
if event and event.type == 'DELETED':
@self.zkhandler.zk_conn.DataWatch(
self.zkhandler.schema.path("node.sriov.vf", self.myhostname)
+ self.zkhandler.schema.path("sriov_vf.config.link_state", self.vf)
)
def watch_vf_link_state(data, stat, event=""):
if event and event.type == "DELETED":
# The key has been deleted after existing before; terminate this watcher
# because this class instance is about to be reaped in Daemon.py
return False
try:
data = data.decode('ascii')
data = data.decode("ascii")
except AttributeError:
data = 'on'
data = "on"
if data != self.link_state:
self.link_state = data
self.logger.out('Setting link state to {}'.format(boolToOnOff(self.link_state)), state='i', prefix='SR-IOV VF {}'.format(self.vf))
common.run_os_command('ip link set {} vf {} state {}'.format(self.pf, self.vfid, self.link_state))
self.logger.out(
"Setting link state to {}".format(boolToOnOff(self.link_state)),
state="i",
prefix="SR-IOV VF {}".format(self.vf),
)
common.run_os_command(
"ip link set {} vf {} state {}".format(
self.pf, self.vfid, self.link_state
)
)
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.trust', self.vf))
def watch_vf_trust(data, stat, event=''):
if event and event.type == 'DELETED':
@self.zkhandler.zk_conn.DataWatch(
self.zkhandler.schema.path("node.sriov.vf", self.myhostname)
+ self.zkhandler.schema.path("sriov_vf.config.trust", self.vf)
)
def watch_vf_trust(data, stat, event=""):
if event and event.type == "DELETED":
# The key has been deleted after existing before; terminate this watcher
# because this class instance is about to be reaped in Daemon.py
return False
try:
data = data.decode('ascii')
data = data.decode("ascii")
except AttributeError:
data = 'off'
data = "off"
if data != self.trust:
self.trust = data
self.logger.out('Setting trust mode {}'.format(boolToOnOff(self.trust)), state='i', prefix='SR-IOV VF {}'.format(self.vf))
common.run_os_command('ip link set {} vf {} trust {}'.format(self.pf, self.vfid, boolToOnOff(self.trust)))
self.logger.out(
"Setting trust mode {}".format(boolToOnOff(self.trust)),
state="i",
prefix="SR-IOV VF {}".format(self.vf),
)
common.run_os_command(
"ip link set {} vf {} trust {}".format(
self.pf, self.vfid, boolToOnOff(self.trust)
)
)
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.query_rss', self.vf))
def watch_vf_query_rss(data, stat, event=''):
if event and event.type == 'DELETED':
@self.zkhandler.zk_conn.DataWatch(
self.zkhandler.schema.path("node.sriov.vf", self.myhostname)
+ self.zkhandler.schema.path("sriov_vf.config.query_rss", self.vf)
)
def watch_vf_query_rss(data, stat, event=""):
if event and event.type == "DELETED":
# The key has been deleted after existing before; terminate this watcher
# because this class instance is about to be reaped in Daemon.py
return False
try:
data = data.decode('ascii')
data = data.decode("ascii")
except AttributeError:
data = 'off'
data = "off"
if data != self.query_rss:
self.query_rss = data
self.logger.out('Setting RSS query ability {}'.format(boolToOnOff(self.query_rss)), state='i', prefix='SR-IOV VF {}'.format(self.vf))
common.run_os_command('ip link set {} vf {} query_rss {}'.format(self.pf, self.vfid, boolToOnOff(self.query_rss)))
self.logger.out(
"Setting RSS query ability {}".format(boolToOnOff(self.query_rss)),
state="i",
prefix="SR-IOV VF {}".format(self.vf),
)
common.run_os_command(
"ip link set {} vf {} query_rss {}".format(
self.pf, self.vfid, boolToOnOff(self.query_rss)
)
)

View File

@ -33,22 +33,26 @@ class VMConsoleWatcherInstance(object):
self.domname = domname
self.zkhandler = zkhandler
self.config = config
self.logfile = '{}/{}.log'.format(config['console_log_directory'], self.domname)
self.console_log_lines = config['console_log_lines']
self.logfile = "{}/{}.log".format(config["console_log_directory"], self.domname)
self.console_log_lines = config["console_log_lines"]
self.logger = logger
self.this_node = this_node
# Try to append (create) the logfile and set its permissions
open(self.logfile, 'a').close()
open(self.logfile, "a").close()
os.chmod(self.logfile, 0o600)
try:
self.logdeque = deque(open(self.logfile), self.console_log_lines)
except UnicodeDecodeError:
# There is corruption in the log file; overwrite it
self.logger.out('Failed to decode console log file; clearing existing file', state='w', prefix='Domain {}'.format(self.domuuid))
with open(self.logfile, 'w') as lfh:
lfh.write('\n')
self.logger.out(
"Failed to decode console log file; clearing existing file",
state="w",
prefix="Domain {}".format(self.domuuid),
)
with open(self.logfile, "w") as lfh:
lfh.write("\n")
self.logdeque = deque(open(self.logfile), self.console_log_lines)
self.stamp = None
@ -66,13 +70,19 @@ class VMConsoleWatcherInstance(object):
def start(self):
self.thread_stopper.clear()
self.thread = Thread(target=self.run, args=(), kwargs={})
self.logger.out('Starting VM log parser', state='i', prefix='Domain {}'.format(self.domuuid))
self.logger.out(
"Starting VM log parser", state="i", prefix="Domain {}".format(self.domuuid)
)
self.thread.start()
# Stop execution thread
def stop(self):
if self.thread and self.thread.is_alive():
self.logger.out('Stopping VM log parser', state='i', prefix='Domain {}'.format(self.domuuid))
self.logger.out(
"Stopping VM log parser",
state="i",
prefix="Domain {}".format(self.domuuid),
)
self.thread_stopper.set()
# Do one final flush
self.update()
@ -91,11 +101,11 @@ class VMConsoleWatcherInstance(object):
self.fetch_lines()
# Update Zookeeper with the new loglines if they changed
if self.loglines != self.last_loglines:
self.zkhandler.write([
(('domain.console.log', self.domuuid), self.loglines)
])
self.zkhandler.write(
[(("domain.console.log", self.domuuid), self.loglines)]
)
self.last_loglines = self.loglines
def fetch_lines(self):
self.logdeque = deque(open(self.logfile), self.console_log_lines)
self.loglines = ''.join(self.logdeque)
self.loglines = "".join(self.logdeque)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -36,8 +36,9 @@ class MalformedConfigurationError(Exception):
"""
An except when parsing the PVC Node daemon configuration file
"""
def __init__(self, error=None):
self.msg = f'ERROR: Configuration file is malformed: {error}'
self.msg = f"ERROR: Configuration file is malformed: {error}"
def __str__(self):
return str(self.msg)
@ -50,19 +51,19 @@ def get_static_data():
staticdata = list()
staticdata.append(str(cpu_count())) # CPU count
staticdata.append(
subprocess.run(
['uname', '-r'], stdout=subprocess.PIPE
).stdout.decode('ascii').strip()
subprocess.run(["uname", "-r"], stdout=subprocess.PIPE)
.stdout.decode("ascii")
.strip()
)
staticdata.append(
subprocess.run(
['uname', '-o'], stdout=subprocess.PIPE
).stdout.decode('ascii').strip()
subprocess.run(["uname", "-o"], stdout=subprocess.PIPE)
.stdout.decode("ascii")
.strip()
)
staticdata.append(
subprocess.run(
['uname', '-m'], stdout=subprocess.PIPE
).stdout.decode('ascii').strip()
subprocess.run(["uname", "-m"], stdout=subprocess.PIPE)
.stdout.decode("ascii")
.strip()
)
return staticdata
@ -70,7 +71,7 @@ def get_static_data():
def get_configuration_path():
try:
return os.environ['PVCD_CONFIG_FILE']
return os.environ["PVCD_CONFIG_FILE"]
except KeyError:
print('ERROR: The "PVCD_CONFIG_FILE" environment variable must be set.')
os._exit(1)
@ -78,10 +79,10 @@ def get_configuration_path():
def get_hostname():
node_fqdn = gethostname()
node_hostname = node_fqdn.split('.', 1)[0]
node_domain = ''.join(node_fqdn.split('.', 1)[1:])
node_hostname = node_fqdn.split(".", 1)[0]
node_domain = "".join(node_fqdn.split(".", 1)[1:])
try:
node_id = findall(r'\d+', node_hostname)[-1]
node_id = findall(r"\d+", node_hostname)[-1]
except IndexError:
node_id = 0
@ -89,27 +90,33 @@ def get_hostname():
def validate_floating_ip(config, network):
if network not in ['cluster', 'storage', 'upstream']:
if network not in ["cluster", "storage", "upstream"]:
return False, f'Specified network type "{network}" is not valid'
floating_key = f'{network}_floating_ip'
network_key = f'{network}_network'
floating_key = f"{network}_floating_ip"
network_key = f"{network}_network"
# Verify the network provided is valid
try:
network = ip_network(config[network_key])
except Exception:
return False, f'Network address {config[network_key]} for {network_key} is not valid'
return (
False,
f"Network address {config[network_key]} for {network_key} is not valid",
)
# Verify that the floating IP is valid (and in the network)
try:
floating_address = ip_address(config[floating_key].split('/')[0])
floating_address = ip_address(config[floating_key].split("/")[0])
if floating_address not in list(network.hosts()):
raise
except Exception:
return False, f'Floating address {config[floating_key]} for {floating_key} is not valid'
return (
False,
f"Floating address {config[floating_key]} for {floating_key} is not valid",
)
return True, ''
return True, ""
def get_configuration():
@ -120,11 +127,11 @@ def get_configuration():
print('Loading configuration from file "{}"'.format(pvcnoded_config_file))
with open(pvcnoded_config_file, 'r') as cfgfile:
with open(pvcnoded_config_file, "r") as cfgfile:
try:
o_config = yaml.load(cfgfile, Loader=yaml.SafeLoader)
except Exception as e:
print('ERROR: Failed to parse configuration file: {}'.format(e))
print("ERROR: Failed to parse configuration file: {}".format(e))
os._exit(1)
node_fqdn, node_hostname, node_domain, node_id = get_hostname()
@ -134,263 +141,287 @@ def get_configuration():
# Get the initial base configuration
try:
o_base = o_config['pvc']
o_cluster = o_config['pvc']['cluster']
o_base = o_config["pvc"]
o_cluster = o_config["pvc"]["cluster"]
except Exception as e:
raise MalformedConfigurationError(e)
config_general = {
'node': o_base.get('node', node_hostname),
'node_hostname': node_hostname,
'node_fqdn': node_fqdn,
'node_domain': node_domain,
'node_id': node_id,
'coordinators': o_cluster.get('coordinators', list()),
'debug': o_base.get('debug', False),
"node": o_base.get("node", node_hostname),
"node_hostname": node_hostname,
"node_fqdn": node_fqdn,
"node_domain": node_domain,
"node_id": node_id,
"coordinators": o_cluster.get("coordinators", list()),
"debug": o_base.get("debug", False),
}
config = {**config, **config_general}
# Get the functions configuration
try:
o_functions = o_config['pvc']['functions']
o_functions = o_config["pvc"]["functions"]
except Exception as e:
raise MalformedConfigurationError(e)
config_functions = {
'enable_hypervisor': o_functions.get('enable_hypervisor', False),
'enable_networking': o_functions.get('enable_networking', False),
'enable_storage': o_functions.get('enable_storage', False),
'enable_api': o_functions.get('enable_api', False),
"enable_hypervisor": o_functions.get("enable_hypervisor", False),
"enable_networking": o_functions.get("enable_networking", False),
"enable_storage": o_functions.get("enable_storage", False),
"enable_api": o_functions.get("enable_api", False),
}
config = {**config, **config_functions}
# Get the directory configuration
try:
o_directories = o_config['pvc']['system']['configuration']['directories']
o_directories = o_config["pvc"]["system"]["configuration"]["directories"]
except Exception as e:
raise MalformedConfigurationError(e)
config_directories = {
'dynamic_directory': o_directories.get('dynamic_directory', None),
'log_directory': o_directories.get('log_directory', None),
'console_log_directory': o_directories.get('console_log_directory', None),
"dynamic_directory": o_directories.get("dynamic_directory", None),
"log_directory": o_directories.get("log_directory", None),
"console_log_directory": o_directories.get("console_log_directory", None),
}
# Define our dynamic directory schema
config_directories['dnsmasq_dynamic_directory'] = config_directories['dynamic_directory'] + '/dnsmasq'
config_directories['pdns_dynamic_directory'] = config_directories['dynamic_directory'] + '/pdns'
config_directories['nft_dynamic_directory'] = config_directories['dynamic_directory'] + '/nft'
config_directories["dnsmasq_dynamic_directory"] = (
config_directories["dynamic_directory"] + "/dnsmasq"
)
config_directories["pdns_dynamic_directory"] = (
config_directories["dynamic_directory"] + "/pdns"
)
config_directories["nft_dynamic_directory"] = (
config_directories["dynamic_directory"] + "/nft"
)
# Define our log directory schema
config_directories['dnsmasq_log_directory'] = config_directories['log_directory'] + '/dnsmasq'
config_directories['pdns_log_directory'] = config_directories['log_directory'] + '/pdns'
config_directories['nft_log_directory'] = config_directories['log_directory'] + '/nft'
config_directories["dnsmasq_log_directory"] = (
config_directories["log_directory"] + "/dnsmasq"
)
config_directories["pdns_log_directory"] = (
config_directories["log_directory"] + "/pdns"
)
config_directories["nft_log_directory"] = (
config_directories["log_directory"] + "/nft"
)
config = {**config, **config_directories}
# Get the logging configuration
try:
o_logging = o_config['pvc']['system']['configuration']['logging']
o_logging = o_config["pvc"]["system"]["configuration"]["logging"]
except Exception as e:
raise MalformedConfigurationError(e)
config_logging = {
'file_logging': o_logging.get('file_logging', False),
'stdout_logging': o_logging.get('stdout_logging', False),
'zookeeper_logging': o_logging.get('zookeeper_logging', False),
'log_colours': o_logging.get('log_colours', False),
'log_dates': o_logging.get('log_dates', False),
'log_keepalives': o_logging.get('log_keepalives', False),
'log_keepalive_cluster_details': o_logging.get('log_keepalive_cluster_details', False),
'log_keepalive_storage_details': o_logging.get('log_keepalive_storage_details', False),
'console_log_lines': o_logging.get('console_log_lines', False),
'node_log_lines': o_logging.get('node_log_lines', False),
"file_logging": o_logging.get("file_logging", False),
"stdout_logging": o_logging.get("stdout_logging", False),
"zookeeper_logging": o_logging.get("zookeeper_logging", False),
"log_colours": o_logging.get("log_colours", False),
"log_dates": o_logging.get("log_dates", False),
"log_keepalives": o_logging.get("log_keepalives", False),
"log_keepalive_cluster_details": o_logging.get(
"log_keepalive_cluster_details", False
),
"log_keepalive_storage_details": o_logging.get(
"log_keepalive_storage_details", False
),
"console_log_lines": o_logging.get("console_log_lines", False),
"node_log_lines": o_logging.get("node_log_lines", False),
}
config = {**config, **config_logging}
# Get the interval configuration
try:
o_intervals = o_config['pvc']['system']['intervals']
o_intervals = o_config["pvc"]["system"]["intervals"]
except Exception as e:
raise MalformedConfigurationError(e)
config_intervals = {
'vm_shutdown_timeout': int(o_intervals.get('vm_shutdown_timeout', 60)),
'keepalive_interval': int(o_intervals.get('keepalive_interval', 5)),
'fence_intervals': int(o_intervals.get('fence_intervals', 6)),
'suicide_intervals': int(o_intervals.get('suicide_interval', 0)),
"vm_shutdown_timeout": int(o_intervals.get("vm_shutdown_timeout", 60)),
"keepalive_interval": int(o_intervals.get("keepalive_interval", 5)),
"fence_intervals": int(o_intervals.get("fence_intervals", 6)),
"suicide_intervals": int(o_intervals.get("suicide_interval", 0)),
}
config = {**config, **config_intervals}
# Get the fencing configuration
try:
o_fencing = o_config['pvc']['system']['fencing']
o_fencing_actions = o_fencing['actions']
o_fencing_ipmi = o_fencing['ipmi']
o_fencing = o_config["pvc"]["system"]["fencing"]
o_fencing_actions = o_fencing["actions"]
o_fencing_ipmi = o_fencing["ipmi"]
except Exception as e:
raise MalformedConfigurationError(e)
config_fencing = {
'successful_fence': o_fencing_actions.get('successful_fence', None),
'failed_fence': o_fencing_actions.get('failed_fence', None),
'ipmi_hostname': o_fencing_ipmi.get('host', f'{node_hostname}-lom.{node_domain}'),
'ipmi_username': o_fencing_ipmi.get('user', 'null'),
'ipmi_password': o_fencing_ipmi.get('pass', 'null'),
"successful_fence": o_fencing_actions.get("successful_fence", None),
"failed_fence": o_fencing_actions.get("failed_fence", None),
"ipmi_hostname": o_fencing_ipmi.get(
"host", f"{node_hostname}-lom.{node_domain}"
),
"ipmi_username": o_fencing_ipmi.get("user", "null"),
"ipmi_password": o_fencing_ipmi.get("pass", "null"),
}
config = {**config, **config_fencing}
# Get the migration configuration
try:
o_migration = o_config['pvc']['system']['migration']
o_migration = o_config["pvc"]["system"]["migration"]
except Exception as e:
raise MalformedConfigurationError(e)
config_migration = {
'migration_target_selector': o_migration.get('target_selector', 'mem'),
"migration_target_selector": o_migration.get("target_selector", "mem"),
}
config = {**config, **config_migration}
if config['enable_networking']:
if config["enable_networking"]:
# Get the node networks configuration
try:
o_networks = o_config['pvc']['cluster']['networks']
o_network_cluster = o_networks['cluster']
o_network_storage = o_networks['storage']
o_network_upstream = o_networks['upstream']
o_sysnetworks = o_config['pvc']['system']['configuration']['networking']
o_sysnetwork_cluster = o_sysnetworks['cluster']
o_sysnetwork_storage = o_sysnetworks['storage']
o_sysnetwork_upstream = o_sysnetworks['upstream']
o_networks = o_config["pvc"]["cluster"]["networks"]
o_network_cluster = o_networks["cluster"]
o_network_storage = o_networks["storage"]
o_network_upstream = o_networks["upstream"]
o_sysnetworks = o_config["pvc"]["system"]["configuration"]["networking"]
o_sysnetwork_cluster = o_sysnetworks["cluster"]
o_sysnetwork_storage = o_sysnetworks["storage"]
o_sysnetwork_upstream = o_sysnetworks["upstream"]
except Exception as e:
raise MalformedConfigurationError(e)
config_networks = {
'cluster_domain': o_network_cluster.get('domain', None),
'cluster_network': o_network_cluster.get('network', None),
'cluster_floating_ip': o_network_cluster.get('floating_ip', None),
'cluster_dev': o_sysnetwork_cluster.get('device', None),
'cluster_mtu': o_sysnetwork_cluster.get('mtu', None),
'cluster_dev_ip': o_sysnetwork_cluster.get('address', None),
'storage_domain': o_network_storage.get('domain', None),
'storage_network': o_network_storage.get('network', None),
'storage_floating_ip': o_network_storage.get('floating_ip', None),
'storage_dev': o_sysnetwork_storage.get('device', None),
'storage_mtu': o_sysnetwork_storage.get('mtu', None),
'storage_dev_ip': o_sysnetwork_storage.get('address', None),
'upstream_domain': o_network_upstream.get('domain', None),
'upstream_network': o_network_upstream.get('network', None),
'upstream_floating_ip': o_network_upstream.get('floating_ip', None),
'upstream_gateway': o_network_upstream.get('gateway', None),
'upstream_dev': o_sysnetwork_upstream.get('device', None),
'upstream_mtu': o_sysnetwork_upstream.get('mtu', None),
'upstream_dev_ip': o_sysnetwork_upstream.get('address', None),
'bridge_dev': o_sysnetworks.get('bridge_device', None),
'bridge_mtu': o_sysnetworks.get('bridge_mtu', None),
'enable_sriov': o_sysnetworks.get('sriov_enable', False),
'sriov_device': o_sysnetworks.get('sriov_device', list())
"cluster_domain": o_network_cluster.get("domain", None),
"cluster_network": o_network_cluster.get("network", None),
"cluster_floating_ip": o_network_cluster.get("floating_ip", None),
"cluster_dev": o_sysnetwork_cluster.get("device", None),
"cluster_mtu": o_sysnetwork_cluster.get("mtu", None),
"cluster_dev_ip": o_sysnetwork_cluster.get("address", None),
"storage_domain": o_network_storage.get("domain", None),
"storage_network": o_network_storage.get("network", None),
"storage_floating_ip": o_network_storage.get("floating_ip", None),
"storage_dev": o_sysnetwork_storage.get("device", None),
"storage_mtu": o_sysnetwork_storage.get("mtu", None),
"storage_dev_ip": o_sysnetwork_storage.get("address", None),
"upstream_domain": o_network_upstream.get("domain", None),
"upstream_network": o_network_upstream.get("network", None),
"upstream_floating_ip": o_network_upstream.get("floating_ip", None),
"upstream_gateway": o_network_upstream.get("gateway", None),
"upstream_dev": o_sysnetwork_upstream.get("device", None),
"upstream_mtu": o_sysnetwork_upstream.get("mtu", None),
"upstream_dev_ip": o_sysnetwork_upstream.get("address", None),
"bridge_dev": o_sysnetworks.get("bridge_device", None),
"bridge_mtu": o_sysnetworks.get("bridge_mtu", None),
"enable_sriov": o_sysnetworks.get("sriov_enable", False),
"sriov_device": o_sysnetworks.get("sriov_device", list()),
}
if config_networks['bridge_mtu'] is None:
if config_networks["bridge_mtu"] is None:
# Read the current MTU of bridge_dev and set bridge_mtu to it; avoids weird resets
retcode, stdout, stderr = common.run_os_command(f"ip -json link show dev {config_networks['bridge_dev']}")
current_bridge_mtu = loads(stdout)[0]['mtu']
print(f"Config key bridge_mtu not explicitly set; using live MTU {current_bridge_mtu} from {config_networks['bridge_dev']}")
config_networks['bridge_mtu'] = current_bridge_mtu
retcode, stdout, stderr = common.run_os_command(
f"ip -json link show dev {config_networks['bridge_dev']}"
)
current_bridge_mtu = loads(stdout)[0]["mtu"]
print(
f"Config key bridge_mtu not explicitly set; using live MTU {current_bridge_mtu} from {config_networks['bridge_dev']}"
)
config_networks["bridge_mtu"] = current_bridge_mtu
config = {**config, **config_networks}
for network_type in ['cluster', 'storage', 'upstream']:
for network_type in ["cluster", "storage", "upstream"]:
result, msg = validate_floating_ip(config, network_type)
if not result:
raise MalformedConfigurationError(msg)
address_key = '{}_dev_ip'.format(network_type)
network_key = f'{network_type}_network'
address_key = "{}_dev_ip".format(network_type)
network_key = f"{network_type}_network"
network = ip_network(config[network_key])
# With autoselection of addresses, construct an IP from the relevant network
if config[address_key] == 'by-id':
if config[address_key] == "by-id":
# The NodeID starts at 1, but indexes start at 0
address_id = int(config['node_id']) - 1
address_id = int(config["node_id"]) - 1
# Grab the nth address from the network
config[address_key] = '{}/{}'.format(list(network.hosts())[address_id], network.prefixlen)
config[address_key] = "{}/{}".format(
list(network.hosts())[address_id], network.prefixlen
)
# Validate the provided IP instead
else:
try:
address = ip_address(config[address_key].split('/')[0])
address = ip_address(config[address_key].split("/")[0])
if address not in list(network.hosts()):
raise
except Exception:
raise MalformedConfigurationError(
f'IP address {config[address_key]} for {address_key} is not valid'
f"IP address {config[address_key]} for {address_key} is not valid"
)
# Get the PowerDNS aggregator database configuration
try:
o_pdnsdb = o_config['pvc']['coordinator']['dns']['database']
o_pdnsdb = o_config["pvc"]["coordinator"]["dns"]["database"]
except Exception as e:
raise MalformedConfigurationError(e)
config_pdnsdb = {
'pdns_postgresql_host': o_pdnsdb.get('host', None),
'pdns_postgresql_port': o_pdnsdb.get('port', None),
'pdns_postgresql_dbname': o_pdnsdb.get('name', None),
'pdns_postgresql_user': o_pdnsdb.get('user', None),
'pdns_postgresql_password': o_pdnsdb.get('pass', None),
"pdns_postgresql_host": o_pdnsdb.get("host", None),
"pdns_postgresql_port": o_pdnsdb.get("port", None),
"pdns_postgresql_dbname": o_pdnsdb.get("name", None),
"pdns_postgresql_user": o_pdnsdb.get("user", None),
"pdns_postgresql_password": o_pdnsdb.get("pass", None),
}
config = {**config, **config_pdnsdb}
# Get the Cloud-Init Metadata database configuration
try:
o_metadatadb = o_config['pvc']['coordinator']['metadata']['database']
o_metadatadb = o_config["pvc"]["coordinator"]["metadata"]["database"]
except Exception as e:
raise MalformedConfigurationError(e)
config_metadatadb = {
'metadata_postgresql_host': o_metadatadb.get('host', None),
'metadata_postgresql_port': o_metadatadb.get('port', None),
'metadata_postgresql_dbname': o_metadatadb.get('name', None),
'metadata_postgresql_user': o_metadatadb.get('user', None),
'metadata_postgresql_password': o_metadatadb.get('pass', None),
"metadata_postgresql_host": o_metadatadb.get("host", None),
"metadata_postgresql_port": o_metadatadb.get("port", None),
"metadata_postgresql_dbname": o_metadatadb.get("name", None),
"metadata_postgresql_user": o_metadatadb.get("user", None),
"metadata_postgresql_password": o_metadatadb.get("pass", None),
}
config = {**config, **config_metadatadb}
if config['enable_storage']:
if config["enable_storage"]:
# Get the storage configuration
try:
o_storage = o_config['pvc']['system']['configuration']['storage']
o_storage = o_config["pvc"]["system"]["configuration"]["storage"]
except Exception as e:
raise MalformedConfigurationError(e)
config_storage = {
'ceph_config_file': o_storage.get('ceph_config_file', None),
'ceph_admin_keyring': o_storage.get('ceph_admin_keyring', None),
"ceph_config_file": o_storage.get("ceph_config_file", None),
"ceph_admin_keyring": o_storage.get("ceph_admin_keyring", None),
}
config = {**config, **config_storage}
# Add our node static data to the config
config['static_data'] = get_static_data()
config["static_data"] = get_static_data()
return config
def validate_directories(config):
if not os.path.exists(config['dynamic_directory']):
os.makedirs(config['dynamic_directory'])
os.makedirs(config['dnsmasq_dynamic_directory'])
os.makedirs(config['pdns_dynamic_directory'])
os.makedirs(config['nft_dynamic_directory'])
if not os.path.exists(config["dynamic_directory"]):
os.makedirs(config["dynamic_directory"])
os.makedirs(config["dnsmasq_dynamic_directory"])
os.makedirs(config["pdns_dynamic_directory"])
os.makedirs(config["nft_dynamic_directory"])
if not os.path.exists(config['log_directory']):
os.makedirs(config['log_directory'])
os.makedirs(config['dnsmasq_log_directory'])
os.makedirs(config['pdns_log_directory'])
os.makedirs(config['nft_log_directory'])
if not os.path.exists(config["log_directory"]):
os.makedirs(config["log_directory"])
os.makedirs(config["dnsmasq_log_directory"])
os.makedirs(config["pdns_log_directory"])
os.makedirs(config["nft_log_directory"])

View File

@ -35,74 +35,89 @@ def fence_node(node_name, zkhandler, config, logger):
failcount = 0
while failcount < failcount_limit:
# Wait 5 seconds
time.sleep(config['keepalive_interval'])
time.sleep(config["keepalive_interval"])
# Get the state
node_daemon_state = zkhandler.read(('node.state.daemon', node_name))
node_daemon_state = zkhandler.read(("node.state.daemon", node_name))
# Is it still 'dead'
if node_daemon_state == 'dead':
if node_daemon_state == "dead":
failcount += 1
logger.out('Node "{}" failed {}/{} saving throws'.format(node_name, failcount, failcount_limit), state='s')
logger.out(
'Node "{}" failed {}/{} saving throws'.format(
node_name, failcount, failcount_limit
),
state="s",
)
# It changed back to something else so it must be alive
else:
logger.out('Node "{}" passed a saving throw; canceling fence'.format(node_name), state='o')
logger.out(
'Node "{}" passed a saving throw; canceling fence'.format(node_name),
state="o",
)
return
logger.out('Fencing node "{}" via IPMI reboot signal'.format(node_name), state='s')
logger.out('Fencing node "{}" via IPMI reboot signal'.format(node_name), state="s")
# Get IPMI information
ipmi_hostname = zkhandler.read(('node.ipmi.hostname', node_name))
ipmi_username = zkhandler.read(('node.ipmi.username', node_name))
ipmi_password = zkhandler.read(('node.ipmi.password', node_name))
ipmi_hostname = zkhandler.read(("node.ipmi.hostname", node_name))
ipmi_username = zkhandler.read(("node.ipmi.username", node_name))
ipmi_password = zkhandler.read(("node.ipmi.password", node_name))
# Shoot it in the head
fence_status = reboot_via_ipmi(ipmi_hostname, ipmi_username, ipmi_password, logger)
# Hold to ensure the fence takes effect and system stabilizes
logger.out('Waiting {}s for fence of node "{}" to take effect'.format(config['keepalive_interval'], node_name), state='i')
time.sleep(config['keepalive_interval'])
logger.out(
'Waiting {}s for fence of node "{}" to take effect'.format(
config["keepalive_interval"], node_name
),
state="i",
)
time.sleep(config["keepalive_interval"])
if fence_status:
logger.out('Marking node "{}" as fenced'.format(node_name), state='i')
logger.out('Marking node "{}" as fenced'.format(node_name), state="i")
while True:
try:
zkhandler.write([
(('node.state.daemon', node_name), 'fenced')
])
zkhandler.write([(("node.state.daemon", node_name), "fenced")])
break
except Exception:
continue
# Force into secondary network state if needed
if node_name in config['coordinators']:
logger.out('Forcing secondary status for node "{}"'.format(node_name), state='i')
zkhandler.write([
(('node.state.router', node_name), 'secondary')
])
if zkhandler.read('base.config.primary_node') == node_name:
zkhandler.write([
('base.config.primary_node', 'none')
])
if node_name in config["coordinators"]:
logger.out(
'Forcing secondary status for node "{}"'.format(node_name), state="i"
)
zkhandler.write([(("node.state.router", node_name), "secondary")])
if zkhandler.read("base.config.primary_node") == node_name:
zkhandler.write([("base.config.primary_node", "none")])
# If the fence succeeded and successful_fence is migrate
if fence_status and config['successful_fence'] == 'migrate':
if fence_status and config["successful_fence"] == "migrate":
migrateFromFencedNode(zkhandler, node_name, config, logger)
# If the fence failed and failed_fence is migrate
if not fence_status and config['failed_fence'] == 'migrate' and config['suicide_intervals'] != '0':
if (
not fence_status
and config["failed_fence"] == "migrate"
and config["suicide_intervals"] != "0"
):
migrateFromFencedNode(zkhandler, node_name, config, logger)
# Migrate hosts away from a fenced node
def migrateFromFencedNode(zkhandler, node_name, config, logger):
logger.out('Migrating VMs from dead node "{}" to new hosts'.format(node_name), state='i')
logger.out(
'Migrating VMs from dead node "{}" to new hosts'.format(node_name), state="i"
)
# Get the list of VMs
dead_node_running_domains = zkhandler.read(('node.running_domains', node_name)).split()
dead_node_running_domains = zkhandler.read(
("node.running_domains", node_name)
).split()
# Set the node to a custom domainstate so we know what's happening
zkhandler.write([
(('node.state.domain', node_name), 'fence-flush')
])
zkhandler.write([(("node.state.domain", node_name), "fence-flush")])
# Migrate a VM after a flush
def fence_migrate_vm(dom_uuid):
@ -111,28 +126,40 @@ def migrateFromFencedNode(zkhandler, node_name, config, logger):
target_node = common.findTargetNode(zkhandler, dom_uuid)
if target_node is not None:
logger.out('Migrating VM "{}" to node "{}"'.format(dom_uuid, target_node), state='i')
zkhandler.write([
(('domain.state', dom_uuid), 'start'),
(('domain.node', dom_uuid), target_node),
(('domain.last_node', dom_uuid), node_name),
])
logger.out(
'Migrating VM "{}" to node "{}"'.format(dom_uuid, target_node),
state="i",
)
zkhandler.write(
[
(("domain.state", dom_uuid), "start"),
(("domain.node", dom_uuid), target_node),
(("domain.last_node", dom_uuid), node_name),
]
)
else:
logger.out('No target node found for VM "{}"; VM will autostart on next unflush/ready of current node'.format(dom_uuid), state='i')
zkhandler.write({
(('domain.state', dom_uuid), 'stopped'),
(('domain.meta.autostart', dom_uuid), 'True'),
})
logger.out(
'No target node found for VM "{}"; VM will autostart on next unflush/ready of current node'.format(
dom_uuid
),
state="i",
)
zkhandler.write(
{
(("domain.state", dom_uuid), "stopped"),
(("domain.meta.autostart", dom_uuid), "True"),
}
)
# Loop through the VMs
for dom_uuid in dead_node_running_domains:
fence_migrate_vm(dom_uuid)
# Set node in flushed state for easy remigrating when it comes back
zkhandler.write([
(('node.state.domain', node_name), 'flushed')
])
logger.out('All VMs flushed from dead node "{}" to new hosts'.format(node_name), state='i')
zkhandler.write([(("node.state.domain", node_name), "flushed")])
logger.out(
'All VMs flushed from dead node "{}" to new hosts'.format(node_name), state="i"
)
#
@ -140,68 +167,100 @@ def migrateFromFencedNode(zkhandler, node_name, config, logger):
#
def reboot_via_ipmi(ipmi_hostname, ipmi_user, ipmi_password, logger):
# Power off the node the node
logger.out('Sending power off to dead node', state='i')
ipmi_command_stop = '/usr/bin/ipmitool -I lanplus -H {} -U {} -P {} chassis power off'.format(
logger.out("Sending power off to dead node", state="i")
ipmi_command_stop = (
"/usr/bin/ipmitool -I lanplus -H {} -U {} -P {} chassis power off".format(
ipmi_hostname, ipmi_user, ipmi_password
)
ipmi_stop_retcode, ipmi_stop_stdout, ipmi_stop_stderr = common.run_os_command(ipmi_command_stop)
)
ipmi_stop_retcode, ipmi_stop_stdout, ipmi_stop_stderr = common.run_os_command(
ipmi_command_stop
)
if ipmi_stop_retcode != 0:
logger.out(f'Failed to power off dead node: {ipmi_stop_stderr}', state='e')
logger.out(f"Failed to power off dead node: {ipmi_stop_stderr}", state="e")
time.sleep(5)
# Check the chassis power state
logger.out('Checking power state of dead node', state='i')
ipmi_command_status = '/usr/bin/ipmitool -I lanplus -H {} -U {} -P {} chassis power status'.format(
logger.out("Checking power state of dead node", state="i")
ipmi_command_status = (
"/usr/bin/ipmitool -I lanplus -H {} -U {} -P {} chassis power status".format(
ipmi_hostname, ipmi_user, ipmi_password
)
ipmi_status_retcode, ipmi_status_stdout, ipmi_status_stderr = common.run_os_command(ipmi_command_status)
)
ipmi_status_retcode, ipmi_status_stdout, ipmi_status_stderr = common.run_os_command(
ipmi_command_status
)
if ipmi_status_retcode == 0:
logger.out(f'Current chassis power state is: {ipmi_status_stdout.strip()}', state='i')
logger.out(
f"Current chassis power state is: {ipmi_status_stdout.strip()}", state="i"
)
else:
logger.out(f'Current chassis power state is: Unknown', state='w')
logger.out(f"Current chassis power state is: Unknown", state="w")
# Power on the node
logger.out('Sending power on to dead node', state='i')
ipmi_command_start = '/usr/bin/ipmitool -I lanplus -H {} -U {} -P {} chassis power on'.format(
logger.out("Sending power on to dead node", state="i")
ipmi_command_start = (
"/usr/bin/ipmitool -I lanplus -H {} -U {} -P {} chassis power on".format(
ipmi_hostname, ipmi_user, ipmi_password
)
ipmi_start_retcode, ipmi_start_stdout, ipmi_start_stderr = common.run_os_command(ipmi_command_start)
)
ipmi_start_retcode, ipmi_start_stdout, ipmi_start_stderr = common.run_os_command(
ipmi_command_start
)
if ipmi_start_retcode != 0:
logger.out(f'Failed to power on dead node: {ipmi_start_stderr}', state='w')
logger.out(f"Failed to power on dead node: {ipmi_start_stderr}", state="w")
time.sleep(2)
# Check the chassis power state
logger.out('Checking power state of dead node', state='i')
ipmi_command_status = '/usr/bin/ipmitool -I lanplus -H {} -U {} -P {} chassis power status'.format(
logger.out("Checking power state of dead node", state="i")
ipmi_command_status = (
"/usr/bin/ipmitool -I lanplus -H {} -U {} -P {} chassis power status".format(
ipmi_hostname, ipmi_user, ipmi_password
)
ipmi_status_retcode, ipmi_status_stdout, ipmi_status_stderr = common.run_os_command(ipmi_command_status)
)
ipmi_status_retcode, ipmi_status_stdout, ipmi_status_stderr = common.run_os_command(
ipmi_command_status
)
if ipmi_stop_retcode == 0:
if ipmi_status_stdout.strip() == "Chassis Power is on":
# We successfully rebooted the node and it is powered on; this is a succeessful fence
logger.out('Successfully rebooted dead node', state='o')
logger.out("Successfully rebooted dead node", state="o")
return True
elif ipmi_status_stdout.strip() == "Chassis Power is off":
# We successfully rebooted the node but it is powered off; this might be expected or not, but the node is confirmed off so we can call it a successful fence
logger.out('Chassis power is in confirmed off state after successfuly IPMI reboot; proceeding with fence-flush', state='o')
logger.out(
"Chassis power is in confirmed off state after successfuly IPMI reboot; proceeding with fence-flush",
state="o",
)
return True
else:
# We successfully rebooted the node but it is in some unknown power state; since this might indicate a silent failure, we must call it a failed fence
logger.out('Chassis power is in an unknown state ({}) after successful IPMI reboot; not performing fence-flush'.format(ipmi_status_stdout.strip()), state='e')
logger.out(
"Chassis power is in an unknown state ({}) after successful IPMI reboot; not performing fence-flush".format(
ipmi_status_stdout.strip()
),
state="e",
)
return False
else:
if ipmi_status_stdout.strip() == "Chassis Power is off":
# We failed to reboot the node but it is powered off; it has probably suffered a serious hardware failure, but the node is confirmed off so we can call it a successful fence
logger.out('Chassis power is in confirmed off state after failed IPMI reboot; proceeding with fence-flush', state='o')
logger.out(
"Chassis power is in confirmed off state after failed IPMI reboot; proceeding with fence-flush",
state="o",
)
return True
else:
# We failed to reboot the node but it is in some unknown power state (including "on"); since this might indicate a silent failure, we must call it a failed fence
logger.out('Chassis power is not in confirmed off state after failed IPMI reboot; not performing fence-flush', state='e')
logger.out(
"Chassis power is not in confirmed off state after failed IPMI reboot; not performing fence-flush",
state="e",
)
return False
@ -209,7 +268,7 @@ def reboot_via_ipmi(ipmi_hostname, ipmi_user, ipmi_password, logger):
# Verify that IPMI connectivity to this host exists (used during node init)
#
def verify_ipmi(ipmi_hostname, ipmi_user, ipmi_password):
ipmi_command = f'/usr/bin/ipmitool -I lanplus -H {ipmi_hostname} -U {ipmi_user} -P {ipmi_password} chassis power status'
ipmi_command = f"/usr/bin/ipmitool -I lanplus -H {ipmi_hostname} -U {ipmi_user} -P {ipmi_password} chassis power status"
retcode, stdout, stderr = common.run_os_command(ipmi_command, timeout=2)
if retcode == 0 and stdout.strip() == "Chassis Power is on":
return True

File diff suppressed because it is too large Load Diff

View File

@ -23,14 +23,14 @@ import libvirt
def validate_libvirtd(logger, config):
if config['enable_hypervisor']:
if config["enable_hypervisor"]:
libvirt_check_name = f'qemu+tcp://{config["node_hostname"]}/system'
logger.out(f'Connecting to Libvirt daemon at {libvirt_check_name}', state='i')
logger.out(f"Connecting to Libvirt daemon at {libvirt_check_name}", state="i")
try:
lv_conn = libvirt.open(libvirt_check_name)
lv_conn.close()
except Exception as e:
logger.out(f'Failed to connect to Libvirt daemon: {e}', state='e')
logger.out(f"Failed to connect to Libvirt daemon: {e}", state="e")
return False
return True

View File

@ -26,141 +26,192 @@ from os import makedirs
def setup_sriov(logger, config):
logger.out('Setting up SR-IOV device support', state='i')
logger.out("Setting up SR-IOV device support", state="i")
# Enable unsafe interrupts for the vfio_iommu_type1 kernel module
try:
common.run_os_command('modprobe vfio_iommu_type1 allow_unsafe_interrupts=1')
with open('/sys/module/vfio_iommu_type1/parameters/allow_unsafe_interrupts', 'w') as mfh:
mfh.write('Y')
common.run_os_command("modprobe vfio_iommu_type1 allow_unsafe_interrupts=1")
with open(
"/sys/module/vfio_iommu_type1/parameters/allow_unsafe_interrupts", "w"
) as mfh:
mfh.write("Y")
except Exception:
logger.out('Failed to enable vfio_iommu_type1 kernel module; SR-IOV may fail', state='w')
logger.out(
"Failed to enable vfio_iommu_type1 kernel module; SR-IOV may fail",
state="w",
)
# Loop through our SR-IOV NICs and enable the numvfs for each
for device in config['sriov_device']:
logger.out(f'Preparing SR-IOV PF {device["phy"]} with {device["vfcount"]} VFs', state='i')
for device in config["sriov_device"]:
logger.out(
f'Preparing SR-IOV PF {device["phy"]} with {device["vfcount"]} VFs',
state="i",
)
try:
with open(f'/sys/class/net/{device["phy"]}/device/sriov_numvfs', 'r') as vfh:
with open(
f'/sys/class/net/{device["phy"]}/device/sriov_numvfs', "r"
) as vfh:
current_vf_count = vfh.read().strip()
with open(f'/sys/class/net/{device["phy"]}/device/sriov_numvfs', 'w') as vfh:
vfh.write(str(device['vfcount']))
with open(
f'/sys/class/net/{device["phy"]}/device/sriov_numvfs', "w"
) as vfh:
vfh.write(str(device["vfcount"]))
except FileNotFoundError:
logger.out(f'Failed to open SR-IOV configuration for PF {device["phy"]}; device may not support SR-IOV', state='w')
logger.out(
f'Failed to open SR-IOV configuration for PF {device["phy"]}; device may not support SR-IOV',
state="w",
)
except OSError:
logger.out(f'Failed to set SR-IOV VF count for PF {device["phy"]} to {device["vfcount"]}; already set to {current_vf_count}', state='w')
logger.out(
f'Failed to set SR-IOV VF count for PF {device["phy"]} to {device["vfcount"]}; already set to {current_vf_count}',
state="w",
)
if device.get('mtu', None) is not None:
logger.out(f'Setting SR-IOV PF {device["phy"]} to MTU {device["mtu"]}', state='i')
if device.get("mtu", None) is not None:
logger.out(
f'Setting SR-IOV PF {device["phy"]} to MTU {device["mtu"]}', state="i"
)
common.run_os_command(f'ip link set {device["phy"]} mtu {device["mtu"]} up')
def setup_interfaces(logger, config):
# Set up the Cluster interface
cluster_dev = config['cluster_dev']
cluster_mtu = config['cluster_mtu']
cluster_dev_ip = config['cluster_dev_ip']
cluster_dev = config["cluster_dev"]
cluster_mtu = config["cluster_mtu"]
cluster_dev_ip = config["cluster_dev_ip"]
logger.out(f'Setting up Cluster network interface {cluster_dev} with MTU {cluster_mtu}', state='i')
logger.out(
f"Setting up Cluster network interface {cluster_dev} with MTU {cluster_mtu}",
state="i",
)
common.run_os_command(f'ip link set {cluster_dev} mtu {cluster_mtu} up')
common.run_os_command(f"ip link set {cluster_dev} mtu {cluster_mtu} up")
logger.out(f'Setting up Cluster network bridge on interface {cluster_dev} with IP {cluster_dev_ip}', state='i')
logger.out(
f"Setting up Cluster network bridge on interface {cluster_dev} with IP {cluster_dev_ip}",
state="i",
)
common.run_os_command(f'brctl addbr brcluster')
common.run_os_command(f'brctl addif brcluster {cluster_dev}')
common.run_os_command(f'ip link set brcluster mtu {cluster_mtu} up')
common.run_os_command(f'ip address add {cluster_dev_ip} dev brcluster')
common.run_os_command(f"brctl addbr brcluster")
common.run_os_command(f"brctl addif brcluster {cluster_dev}")
common.run_os_command(f"ip link set brcluster mtu {cluster_mtu} up")
common.run_os_command(f"ip address add {cluster_dev_ip} dev brcluster")
# Set up the Storage interface
storage_dev = config['storage_dev']
storage_mtu = config['storage_mtu']
storage_dev_ip = config['storage_dev_ip']
storage_dev = config["storage_dev"]
storage_mtu = config["storage_mtu"]
storage_dev_ip = config["storage_dev_ip"]
logger.out(f'Setting up Storage network interface {storage_dev} with MTU {storage_mtu}', state='i')
logger.out(
f"Setting up Storage network interface {storage_dev} with MTU {storage_mtu}",
state="i",
)
common.run_os_command(f'ip link set {storage_dev} mtu {storage_mtu} up')
common.run_os_command(f"ip link set {storage_dev} mtu {storage_mtu} up")
if storage_dev == cluster_dev:
if storage_dev_ip != cluster_dev_ip:
logger.out(f'Setting up Storage network on Cluster network bridge with IP {storage_dev_ip}', state='i')
logger.out(
f"Setting up Storage network on Cluster network bridge with IP {storage_dev_ip}",
state="i",
)
common.run_os_command(f'ip address add {storage_dev_ip} dev brcluster')
common.run_os_command(f"ip address add {storage_dev_ip} dev brcluster")
else:
logger.out(f'Setting up Storage network bridge on interface {storage_dev} with IP {storage_dev_ip}', state='i')
logger.out(
f"Setting up Storage network bridge on interface {storage_dev} with IP {storage_dev_ip}",
state="i",
)
common.run_os_command(f'brctl addbr brstorage')
common.run_os_command(f'brctl addif brstorage {storage_dev}')
common.run_os_command(f'ip link set brstorage mtu {storage_mtu} up')
common.run_os_command(f'ip address add {storage_dev_ip} dev brstorage')
common.run_os_command(f"brctl addbr brstorage")
common.run_os_command(f"brctl addif brstorage {storage_dev}")
common.run_os_command(f"ip link set brstorage mtu {storage_mtu} up")
common.run_os_command(f"ip address add {storage_dev_ip} dev brstorage")
# Set up the Upstream interface
upstream_dev = config['upstream_dev']
upstream_mtu = config['upstream_mtu']
upstream_dev_ip = config['upstream_dev_ip']
upstream_dev = config["upstream_dev"]
upstream_mtu = config["upstream_mtu"]
upstream_dev_ip = config["upstream_dev_ip"]
logger.out(f'Setting up Upstream network interface {upstream_dev} with MTU {upstream_mtu}', state='i')
logger.out(
f"Setting up Upstream network interface {upstream_dev} with MTU {upstream_mtu}",
state="i",
)
if upstream_dev == cluster_dev:
if upstream_dev_ip != cluster_dev_ip:
logger.out(f'Setting up Upstream network on Cluster network bridge with IP {upstream_dev_ip}', state='i')
logger.out(
f"Setting up Upstream network on Cluster network bridge with IP {upstream_dev_ip}",
state="i",
)
common.run_os_command(f'ip address add {upstream_dev_ip} dev brcluster')
common.run_os_command(f"ip address add {upstream_dev_ip} dev brcluster")
else:
logger.out(f'Setting up Upstream network bridge on interface {upstream_dev} with IP {upstream_dev_ip}', state='i')
logger.out(
f"Setting up Upstream network bridge on interface {upstream_dev} with IP {upstream_dev_ip}",
state="i",
)
common.run_os_command(f'brctl addbr brupstream')
common.run_os_command(f'brctl addif brupstream {upstream_dev}')
common.run_os_command(f'ip link set brupstream mtu {upstream_mtu} up')
common.run_os_command(f'ip address add {upstream_dev_ip} dev brupstream')
common.run_os_command(f"brctl addbr brupstream")
common.run_os_command(f"brctl addif brupstream {upstream_dev}")
common.run_os_command(f"ip link set brupstream mtu {upstream_mtu} up")
common.run_os_command(f"ip address add {upstream_dev_ip} dev brupstream")
upstream_gateway = config['upstream_gateway']
upstream_gateway = config["upstream_gateway"]
if upstream_gateway is not None:
logger.out(f'Setting up Upstream network default gateway IP {upstream_gateway}', state='i')
logger.out(
f"Setting up Upstream network default gateway IP {upstream_gateway}",
state="i",
)
if upstream_dev == cluster_dev:
common.run_os_command(f'ip route add default via {upstream_gateway} dev brcluster')
common.run_os_command(
f"ip route add default via {upstream_gateway} dev brcluster"
)
else:
common.run_os_command(f'ip route add default via {upstream_gateway} dev brupstream')
common.run_os_command(
f"ip route add default via {upstream_gateway} dev brupstream"
)
# Set up sysctl tweaks to optimize networking
# Enable routing functions
common.run_os_command('sysctl net.ipv4.ip_forward=1')
common.run_os_command('sysctl net.ipv6.ip_forward=1')
common.run_os_command("sysctl net.ipv4.ip_forward=1")
common.run_os_command("sysctl net.ipv6.ip_forward=1")
# Enable send redirects
common.run_os_command('sysctl net.ipv4.conf.all.send_redirects=1')
common.run_os_command('sysctl net.ipv4.conf.default.send_redirects=1')
common.run_os_command('sysctl net.ipv6.conf.all.send_redirects=1')
common.run_os_command('sysctl net.ipv6.conf.default.send_redirects=1')
common.run_os_command("sysctl net.ipv4.conf.all.send_redirects=1")
common.run_os_command("sysctl net.ipv4.conf.default.send_redirects=1")
common.run_os_command("sysctl net.ipv6.conf.all.send_redirects=1")
common.run_os_command("sysctl net.ipv6.conf.default.send_redirects=1")
# Accept source routes
common.run_os_command('sysctl net.ipv4.conf.all.accept_source_route=1')
common.run_os_command('sysctl net.ipv4.conf.default.accept_source_route=1')
common.run_os_command('sysctl net.ipv6.conf.all.accept_source_route=1')
common.run_os_command('sysctl net.ipv6.conf.default.accept_source_route=1')
common.run_os_command("sysctl net.ipv4.conf.all.accept_source_route=1")
common.run_os_command("sysctl net.ipv4.conf.default.accept_source_route=1")
common.run_os_command("sysctl net.ipv6.conf.all.accept_source_route=1")
common.run_os_command("sysctl net.ipv6.conf.default.accept_source_route=1")
# Disable RP filtering on Cluster and Upstream interfaces (to allow traffic pivoting)
common.run_os_command(f'sysctl net.ipv4.conf.{cluster_dev}.rp_filter=0')
common.run_os_command(f'sysctl net.ipv4.conf.brcluster.rp_filter=0')
common.run_os_command(f'sysctl net.ipv4.conf.{upstream_dev}.rp_filter=0')
common.run_os_command(f'sysctl net.ipv4.conf.brupstream.rp_filter=0')
common.run_os_command(f'sysctl net.ipv6.conf.{cluster_dev}.rp_filter=0')
common.run_os_command(f'sysctl net.ipv6.conf.brcluster.rp_filter=0')
common.run_os_command(f'sysctl net.ipv6.conf.{upstream_dev}.rp_filter=0')
common.run_os_command(f'sysctl net.ipv6.conf.brupstream.rp_filter=0')
common.run_os_command(f"sysctl net.ipv4.conf.{cluster_dev}.rp_filter=0")
common.run_os_command(f"sysctl net.ipv4.conf.brcluster.rp_filter=0")
common.run_os_command(f"sysctl net.ipv4.conf.{upstream_dev}.rp_filter=0")
common.run_os_command(f"sysctl net.ipv4.conf.brupstream.rp_filter=0")
common.run_os_command(f"sysctl net.ipv6.conf.{cluster_dev}.rp_filter=0")
common.run_os_command(f"sysctl net.ipv6.conf.brcluster.rp_filter=0")
common.run_os_command(f"sysctl net.ipv6.conf.{upstream_dev}.rp_filter=0")
common.run_os_command(f"sysctl net.ipv6.conf.brupstream.rp_filter=0")
# Stop DNSMasq if it is running
common.run_os_command('systemctl stop dnsmasq.service')
common.run_os_command("systemctl stop dnsmasq.service")
logger.out('Waiting 3 seconds for networking to come up', state='s')
logger.out("Waiting 3 seconds for networking to come up", state="s")
sleep(3)
def create_nft_configuration(logger, config):
if config['enable_networking']:
logger.out('Creating NFT firewall configuration', state='i')
if config["enable_networking"]:
logger.out("Creating NFT firewall configuration", state="i")
dynamic_directory = config['nft_dynamic_directory']
dynamic_directory = config["nft_dynamic_directory"]
# Create directories
makedirs(f'{dynamic_directory}/networks', exist_ok=True)
makedirs(f'{dynamic_directory}/static', exist_ok=True)
makedirs(f"{dynamic_directory}/networks", exist_ok=True)
makedirs(f"{dynamic_directory}/static", exist_ok=True)
# Set up the base rules
nftables_base_rules = f"""# Base rules
@ -175,7 +226,7 @@ def create_nft_configuration(logger, config):
"""
# Write the base firewall config
nftables_base_filename = f'{dynamic_directory}/base.nft'
with open(nftables_base_filename, 'w') as nftfh:
nftables_base_filename = f"{dynamic_directory}/base.nft"
with open(nftables_base_filename, "w") as nftfh:
nftfh.write(nftables_base_rules)
common.reload_firewall_rules(nftables_base_filename, logger)

View File

@ -24,45 +24,49 @@ from time import sleep
def start_zookeeper(logger, config):
if config['daemon_mode'] == 'coordinator':
logger.out('Starting Zookeeper daemon', state='i')
if config["daemon_mode"] == "coordinator":
logger.out("Starting Zookeeper daemon", state="i")
# TODO: Move our handling out of Systemd and integrate it directly as a subprocess?
common.run_os_command('systemctl start zookeeper.service')
common.run_os_command("systemctl start zookeeper.service")
def start_libvirtd(logger, config):
if config['enable_hypervisor']:
logger.out('Starting Libvirt daemon', state='i')
if config["enable_hypervisor"]:
logger.out("Starting Libvirt daemon", state="i")
# TODO: Move our handling out of Systemd and integrate it directly as a subprocess?
common.run_os_command('systemctl start libvirtd.service')
common.run_os_command("systemctl start libvirtd.service")
def start_patroni(logger, config):
if config['enable_networking'] and config['daemon_mode'] == 'coordinator':
logger.out('Starting Patroni daemon', state='i')
if config["enable_networking"] and config["daemon_mode"] == "coordinator":
logger.out("Starting Patroni daemon", state="i")
# TODO: Move our handling out of Systemd and integrate it directly as a subprocess?
common.run_os_command('systemctl start patroni.service')
common.run_os_command("systemctl start patroni.service")
def start_frrouting(logger, config):
if config['enable_networking'] and config['daemon_mode'] == 'coordinator':
logger.out('Starting FRRouting daemon', state='i')
if config["enable_networking"] and config["daemon_mode"] == "coordinator":
logger.out("Starting FRRouting daemon", state="i")
# TODO: Move our handling out of Systemd and integrate it directly as a subprocess?
common.run_os_command('systemctl start frr.service')
common.run_os_command("systemctl start frr.service")
def start_ceph_mon(logger, config):
if config['enable_storage'] and config['daemon_mode'] == 'coordinator':
logger.out('Starting Ceph Monitor daemon', state='i')
if config["enable_storage"] and config["daemon_mode"] == "coordinator":
logger.out("Starting Ceph Monitor daemon", state="i")
# TODO: Move our handling out of Systemd and integrate it directly as a subprocess?
common.run_os_command(f'systemctl start ceph-mon@{config["node_hostname"]}.service')
common.run_os_command(
f'systemctl start ceph-mon@{config["node_hostname"]}.service'
)
def start_ceph_mgr(logger, config):
if config['enable_storage'] and config['daemon_mode'] == 'coordinator':
logger.out('Starting Ceph Manager daemon', state='i')
if config["enable_storage"] and config["daemon_mode"] == "coordinator":
logger.out("Starting Ceph Manager daemon", state="i")
# TODO: Move our handling out of Systemd and integrate it directly as a subprocess?
common.run_os_command(f'systemctl start ceph-mgr@{config["node_hostname"]}.service')
common.run_os_command(
f'systemctl start ceph-mgr@{config["node_hostname"]}.service'
)
def start_system_services(logger, config):
@ -73,5 +77,5 @@ def start_system_services(logger, config):
start_ceph_mon(logger, config)
start_ceph_mgr(logger, config)
logger.out('Waiting 3 seconds for daemons to start', state='s')
logger.out("Waiting 3 seconds for daemons to start", state="s")
sleep(3)

View File

@ -31,45 +31,61 @@ def connect(logger, config):
zkhandler = ZKHandler(config, logger)
try:
logger.out('Connecting to Zookeeper on coordinator nodes {}'.format(config['coordinators']), state='i')
logger.out(
"Connecting to Zookeeper on coordinator nodes {}".format(
config["coordinators"]
),
state="i",
)
# Start connection
zkhandler.connect(persistent=True)
except Exception as e:
logger.out('ERROR: Failed to connect to Zookeeper cluster: {}'.format(e), state='e')
logger.out(
"ERROR: Failed to connect to Zookeeper cluster: {}".format(e), state="e"
)
os._exit(1)
logger.out('Validating Zookeeper schema', state='i')
logger.out("Validating Zookeeper schema", state="i")
try:
node_schema_version = int(zkhandler.read(('node.data.active_schema', config['node_hostname'])))
node_schema_version = int(
zkhandler.read(("node.data.active_schema", config["node_hostname"]))
)
except Exception:
node_schema_version = int(zkhandler.read('base.schema.version'))
zkhandler.write([
(('node.data.active_schema', config['node_hostname']), node_schema_version)
])
node_schema_version = int(zkhandler.read("base.schema.version"))
zkhandler.write(
[
(
("node.data.active_schema", config["node_hostname"]),
node_schema_version,
)
]
)
# Load in the current node schema version
zkhandler.schema.load(node_schema_version)
# Record the latest intalled schema version
latest_schema_version = zkhandler.schema.find_latest()
logger.out('Latest installed schema is {}'.format(latest_schema_version), state='i')
zkhandler.write([
(('node.data.latest_schema', config['node_hostname']), latest_schema_version)
])
logger.out("Latest installed schema is {}".format(latest_schema_version), state="i")
zkhandler.write(
[(("node.data.latest_schema", config["node_hostname"]), latest_schema_version)]
)
# If we are the last node to get a schema update, fire the master update
if latest_schema_version > node_schema_version:
node_latest_schema_version = list()
for node in zkhandler.children('base.node'):
node_latest_schema_version.append(int(zkhandler.read(('node.data.latest_schema', node))))
for node in zkhandler.children("base.node"):
node_latest_schema_version.append(
int(zkhandler.read(("node.data.latest_schema", node)))
)
# This is true if all elements of the latest schema version are identical to the latest version,
# i.e. they have all had the latest schema installed and ready to load.
if node_latest_schema_version.count(latest_schema_version) == len(node_latest_schema_version):
zkhandler.write([
('base.schema.version', latest_schema_version)
])
if node_latest_schema_version.count(latest_schema_version) == len(
node_latest_schema_version
):
zkhandler.write([("base.schema.version", latest_schema_version)])
return zkhandler, node_schema_version
@ -77,56 +93,95 @@ def connect(logger, config):
def validate_schema(logger, zkhandler):
# Validate our schema against the active version
if not zkhandler.schema.validate(zkhandler, logger):
logger.out('Found schema violations, applying', state='i')
logger.out("Found schema violations, applying", state="i")
zkhandler.schema.apply(zkhandler)
else:
logger.out('Schema successfully validated', state='o')
logger.out("Schema successfully validated", state="o")
def setup_node(logger, config, zkhandler):
# Check if our node exists in Zookeeper, and create it if not
if config['daemon_mode'] == 'coordinator':
init_routerstate = 'secondary'
if config["daemon_mode"] == "coordinator":
init_routerstate = "secondary"
else:
init_routerstate = 'client'
init_routerstate = "client"
if zkhandler.exists(('node', config['node_hostname'])):
logger.out(f'Node is {logger.fmt_green}present{logger.fmt_end} in Zookeeper', state='i')
if zkhandler.exists(("node", config["node_hostname"])):
logger.out(
f"Node is {logger.fmt_green}present{logger.fmt_end} in Zookeeper", state="i"
)
# Update static data just in case it's changed
zkhandler.write([
(('node', config['node_hostname']), config['daemon_mode']),
(('node.mode', config['node_hostname']), config['daemon_mode']),
(('node.state.daemon', config['node_hostname']), 'init'),
(('node.state.router', config['node_hostname']), init_routerstate),
(('node.data.static', config['node_hostname']), ' '.join(config['static_data'])),
(('node.data.pvc_version', config['node_hostname']), config['pvcnoded_version']),
(('node.ipmi.hostname', config['node_hostname']), config['ipmi_hostname']),
(('node.ipmi.username', config['node_hostname']), config['ipmi_username']),
(('node.ipmi.password', config['node_hostname']), config['ipmi_password']),
])
zkhandler.write(
[
(("node", config["node_hostname"]), config["daemon_mode"]),
(("node.mode", config["node_hostname"]), config["daemon_mode"]),
(("node.state.daemon", config["node_hostname"]), "init"),
(("node.state.router", config["node_hostname"]), init_routerstate),
(
("node.data.static", config["node_hostname"]),
" ".join(config["static_data"]),
),
(
("node.data.pvc_version", config["node_hostname"]),
config["pvcnoded_version"],
),
(
("node.ipmi.hostname", config["node_hostname"]),
config["ipmi_hostname"],
),
(
("node.ipmi.username", config["node_hostname"]),
config["ipmi_username"],
),
(
("node.ipmi.password", config["node_hostname"]),
config["ipmi_password"],
),
]
)
else:
logger.out(f'Node is {logger.fmt_red}absent{logger.fmt_end} in Zookeeper; adding new node', state='i')
logger.out(
f"Node is {logger.fmt_red}absent{logger.fmt_end} in Zookeeper; adding new node",
state="i",
)
keepalive_time = int(time.time())
zkhandler.write([
(('node', config['node_hostname']), config['daemon_mode']),
(('node.keepalive', config['node_hostname']), str(keepalive_time)),
(('node.mode', config['node_hostname']), config['daemon_mode']),
(('node.state.daemon', config['node_hostname']), 'init'),
(('node.state.domain', config['node_hostname']), 'flushed'),
(('node.state.router', config['node_hostname']), init_routerstate),
(('node.data.static', config['node_hostname']), ' '.join(config['static_data'])),
(('node.data.pvc_version', config['node_hostname']), config['pvcnoded_version']),
(('node.ipmi.hostname', config['node_hostname']), config['ipmi_hostname']),
(('node.ipmi.username', config['node_hostname']), config['ipmi_username']),
(('node.ipmi.password', config['node_hostname']), config['ipmi_password']),
(('node.memory.total', config['node_hostname']), '0'),
(('node.memory.used', config['node_hostname']), '0'),
(('node.memory.free', config['node_hostname']), '0'),
(('node.memory.allocated', config['node_hostname']), '0'),
(('node.memory.provisioned', config['node_hostname']), '0'),
(('node.vcpu.allocated', config['node_hostname']), '0'),
(('node.cpu.load', config['node_hostname']), '0.0'),
(('node.running_domains', config['node_hostname']), '0'),
(('node.count.provisioned_domains', config['node_hostname']), '0'),
(('node.count.networks', config['node_hostname']), '0'),
])
zkhandler.write(
[
(("node", config["node_hostname"]), config["daemon_mode"]),
(("node.keepalive", config["node_hostname"]), str(keepalive_time)),
(("node.mode", config["node_hostname"]), config["daemon_mode"]),
(("node.state.daemon", config["node_hostname"]), "init"),
(("node.state.domain", config["node_hostname"]), "flushed"),
(("node.state.router", config["node_hostname"]), init_routerstate),
(
("node.data.static", config["node_hostname"]),
" ".join(config["static_data"]),
),
(
("node.data.pvc_version", config["node_hostname"]),
config["pvcnoded_version"],
),
(
("node.ipmi.hostname", config["node_hostname"]),
config["ipmi_hostname"],
),
(
("node.ipmi.username", config["node_hostname"]),
config["ipmi_username"],
),
(
("node.ipmi.password", config["node_hostname"]),
config["ipmi_password"],
),
(("node.memory.total", config["node_hostname"]), "0"),
(("node.memory.used", config["node_hostname"]), "0"),
(("node.memory.free", config["node_hostname"]), "0"),
(("node.memory.allocated", config["node_hostname"]), "0"),
(("node.memory.provisioned", config["node_hostname"]), "0"),
(("node.vcpu.allocated", config["node_hostname"]), "0"),
(("node.cpu.load", config["node_hostname"]), "0.0"),
(("node.running_domains", config["node_hostname"]), "0"),
(("node.count.provisioned_domains", config["node_hostname"]), "0"),
(("node.count.networks", config["node_hostname"]), "0"),
]
)