From 1126382ac9fe8c3bf919d858eb8e9e456bea9eb9 Mon Sep 17 00:00:00 2001 From: Joshua Boniface Date: Thu, 21 Mar 2019 11:19:28 -0400 Subject: [PATCH 01/13] Initial API including fixes to common functons Some functions were doing `click.echo` inside themselves; don't do that as it's not API compatible, just return everything. --- client-api/client_lib | 1 + client-api/pvc.py | 1398 +++++++++++++++++++++++++++++++++++++++++ client-api/pvcapi.py | 149 +++++ client-common/node.py | 25 +- 4 files changed, 1563 insertions(+), 10 deletions(-) create mode 120000 client-api/client_lib create mode 100755 client-api/pvc.py create mode 100755 client-api/pvcapi.py diff --git a/client-api/client_lib b/client-api/client_lib new file mode 120000 index 00000000..37daac79 --- /dev/null +++ b/client-api/client_lib @@ -0,0 +1 @@ +../client-common \ No newline at end of file diff --git a/client-api/pvc.py b/client-api/pvc.py new file mode 100755 index 00000000..696652a2 --- /dev/null +++ b/client-api/pvc.py @@ -0,0 +1,1398 @@ +#!/usr/bin/env python3 + +# pvcd.py - PVC client command-line interface +# Part of the Parallel Virtual Cluster (PVC) system +# +# Copyright (C) 2018 Joshua M. Boniface +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +############################################################################### + +import flask + +app = flask.Flask(__name__) +app.config["DEBUG"] = True + + +@app.route('/', methods=['GET']) +def home(): + return "

Distant Reading Archive

This site is a prototype API for distant reading of science fiction novels.

" + +app.run() + +exit(0) + +import socket +import click +import tempfile +import os +import subprocess +import difflib +import re +import colorama +import yaml + +import client_lib.common as pvc_common +import client_lib.node as pvc_node +import client_lib.vm as pvc_vm +import client_lib.network as pvc_network +import client_lib.ceph as pvc_ceph +#import client_lib.provisioner as pvc_provisioner + +myhostname = socket.gethostname() +zk_host = '' + +CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'], max_content_width=120) + +def cleanup(retcode, retmsg, zk_conn): + pvc_common.stopZKConnection(zk_conn) + if retcode == True: + if retmsg != '': + click.echo(retmsg) + exit(0) + else: + if retmsg != '': + click.echo(retmsg) + exit(1) + +############################################################################### +# pvc node +############################################################################### +@click.group(name='node', short_help='Manage a PVC node.', context_settings=CONTEXT_SETTINGS) +def cli_node(): + """ + Manage the state of a node in the PVC cluster. + """ + pass + +############################################################################### +# pvc node secondary +############################################################################### +@click.command(name='secondary', short_help='Set a node in secondary node status.') +@click.argument( + 'node' +) +def node_secondary(node): + """ + Take NODE out of primary router mode. + """ + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_node.secondary_node(zk_conn, node) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc node primary +############################################################################### +@click.command(name='primary', short_help='Set a node in primary status.') +@click.argument( + 'node' +) +def node_primary(node): + """ + Put NODE into primary router mode. + """ + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_node.primary_node(zk_conn, node) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc node flush +############################################################################### +@click.command(name='flush', short_help='Take a node out of service.') +@click.option( + '-w', '--wait', 'wait', is_flag=True, default=False, + help='Wait for migrations to complete before returning.' +) +@click.argument( + 'node', default=myhostname +) +def node_flush(node, wait): + """ + Take NODE out of active service and migrate away all VMs. If unspecified, defaults to this host. + """ + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_node.flush_node(zk_conn, node, wait) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc node ready/unflush +############################################################################### +@click.command(name='ready', short_help='Restore node to service.') +@click.argument( + 'node', default=myhostname +) +def node_ready(node): + """ + Restore NODE to active service and migrate back all VMs. If unspecified, defaults to this host. + """ + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_node.ready_node(zk_conn, node) + cleanup(retcode, retmsg, zk_conn) + +@click.command(name='unflush', short_help='Restore node to service.') +@click.argument( + 'node', default=myhostname +) +def node_unflush(node): + """ + Restore NODE to active service and migrate back all VMs. If unspecified, defaults to this host. + """ + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_node.ready_node(zk_conn, node) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc node info +############################################################################### +@click.command(name='info', short_help='Show details of a node object.') +@click.argument( + 'node', default=myhostname +) +@click.option( + '-l', '--long', 'long_output', is_flag=True, default=False, + help='Display more detailed information.' +) +def node_info(node, long_output): + """ + Show information about node NODE. If unspecified, defaults to this host. + """ + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_node.get_info(zk_conn, node, long_output) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc node list +############################################################################### +@click.command(name='list', short_help='List all node objects.') +@click.argument( + 'limit', default=None, required=False +) +def node_list(limit): + """ + List all nodes in the cluster; optionally only match names matching regex LIMIT. + """ + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_node.get_list(zk_conn, limit) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc vm +############################################################################### +@click.group(name='vm', short_help='Manage a PVC virtual machine.', context_settings=CONTEXT_SETTINGS) +def cli_vm(): + """ + Manage the state of a virtual machine in the PVC cluster. + """ + pass + +############################################################################### +# pvc vm add +############################################################################### +@click.command(name='add', short_help='Add a new virtual machine to the provisioning queue.') +@click.option( + '--target', 'target_node', + help='Home node for this domain; autodetect if unspecified.' +) +@click.option( + '--cluster', 'is_cluster', + is_flag=True, + help='Create a cluster VM.' +) +@click.option( + '--system-template', 'system_template', + required=True, + help='System resource template for this domain.' +) +@click.option( + '--network-template', 'network_template', + required=True, + help='Network resource template for this domain.' +) +@click.option( + '--storage-template', 'storage_template', + required=True, + help='Storage resource template for this domain.' +) +@click.argument( + 'vmname' +) +def vm_add(vmname, target_node, is_cluster, system_template, network_template, storage_template): + """ + Add a new VM VMNAME to the provisioning queue. + + Note: Cluster VMs are those which will only run on Coordinator hosts. Usually, these VMs will use the 'cluster' network template, or possibly a custom template including the upstream network as well. Use these sparingly, as they are designed primarily for cluster control or upstream bridge VMs. + """ + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_provisioner.add_vm( + zk_conn, + vmname=vmname, + target_node=target_node, + is_cluster=is_cluster, + system_template=system_template, + network_template=network_template, + storage_template=storage_template + ) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc vm define +############################################################################### +@click.command(name='define', short_help='Define a new virtual machine from a Libvirt XML file.') +@click.option( + '-t', '--target', 'target_node', + help='Home node for this domain; autodetect if unspecified.' +) +@click.option( + '-s', '--selector', 'selector', default='mem', show_default=True, + type=click.Choice(['mem','load','vcpus','vms']), + help='Method to determine optimal target node during autodetect.' +) +@click.argument( + 'config', type=click.File() +) +def vm_define(config, target_node, selector): + """ + Define a new virtual machine from Libvirt XML configuration file CONFIG. + """ + + # Open the XML file + config_data = config.read() + config.close() + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_vm.define_vm(zk_conn, config_data, target_node, selector) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc vm modify +############################################################################### +@click.command(name='modify', short_help='Modify an existing VM configuration.') +@click.option( + '-e', '--editor', 'editor', is_flag=True, + help='Use local editor to modify existing config.' +) +@click.option( + '-r', '--restart', 'restart', is_flag=True, + help='Immediately restart VM to apply new config.' +) +@click.argument( + 'domain' +) +@click.argument( + 'config', type=click.File(), default=None, required=False +) +def vm_modify(domain, config, editor, restart): + """ + Modify existing virtual machine DOMAIN, either in-editor or with replacement CONFIG. DOMAIN may be a UUID or name. + """ + + if editor == False and config == None: + cleanup(False, 'Either an XML config file or the "--editor" option must be specified.') + + zk_conn = pvc_common.startZKConnection(zk_host) + + if editor == True: + dom_uuid = pvc_vm.getDomainUUID(zk_conn, domain) + if dom_uuid == None: + cleanup(False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain)) + dom_name = pvc_vm.getDomainName(zk_conn, dom_uuid) + + # Grab the current config + current_vm_config = zk_conn.get('/domains/{}/xml'.format(dom_uuid))[0].decode('ascii') + + # Write it to a tempfile + fd, path = tempfile.mkstemp() + fw = os.fdopen(fd, 'w') + fw.write(current_vm_config) + fw.close() + + # Edit it + editor = os.getenv('EDITOR', 'vi') + subprocess.call('%s %s' % (editor, path), shell=True) + + # Open the tempfile to read + with open(path, 'r') as fr: + new_vm_config = fr.read() + fr.close() + + # Delete the tempfile + os.unlink(path) + + # Show a diff and confirm + diff = list(difflib.unified_diff(current_vm_config.split('\n'), new_vm_config.split('\n'), fromfile='current', tofile='modified', fromfiledate='', tofiledate='', n=3, lineterm='')) + if len(diff) < 1: + click.echo('Aborting with no modifications.') + exit(0) + + click.echo('Pending modifications:') + click.echo('') + for line in diff: + if re.match('^\+', line) != None: + click.echo(colorama.Fore.GREEN + line + colorama.Fore.RESET) + elif re.match('^\-', line) != None: + click.echo(colorama.Fore.RED + line + colorama.Fore.RESET) + elif re.match('^\^', line) != None: + click.echo(colorama.Fore.BLUE + line + colorama.Fore.RESET) + else: + click.echo(line) + click.echo('') + + click.confirm('Write modifications to Zookeeper?', abort=True) + + if restart: + click.echo('Writing modified config of VM "{}" and restarting.'.format(dom_name)) + else: + click.echo('Writing modified config of VM "{}".'.format(dom_name)) + + # We're operating in replace mode + else: + # Open the XML file + new_vm_config = config.read() + config.close() + + if restart: + click.echo('Replacing config of VM "{}" with file "{}" and restarting.'.format(dom_name, config)) + else: + click.echo('Replacing config of VM "{}" with file "{}".'.format(dom_name, config)) + + retcode, retmsg = pvc_vm.modify_vm(zk_conn, domain, restart, new_vm_config) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc vm undefine +############################################################################### +@click.command(name='undefine', short_help='Undefine and stop a virtual machine.') +@click.argument( + 'domain' +) +def vm_undefine(domain): + """ + Stop virtual machine DOMAIN and remove it from the cluster database. DOMAIN may be a UUID or name. + """ + + # Ensure at least one search method is set + if domain == None: + click.echo("ERROR: You must specify either a name or UUID value.") + exit(1) + + # Open a Zookeeper connection + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_vm.undefine_vm(zk_conn, domain) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc vm dump +############################################################################### +@click.command(name='dump', short_help='Dump a virtual machine XML to stdout.') +@click.argument( + 'domain' +) +def vm_dump(domain): + """ + Dump the Libvirt XML definition of virtual machine DOMAIN to stdout. DOMAIN may be a UUID or name. + """ + + # Ensure at least one search method is set + if domain == None: + click.echo("ERROR: You must specify either a name or UUID value.") + exit(1) + + # Open a Zookeeper connection + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_vm.dump_vm(zk_conn, domain) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc vm start +############################################################################### +@click.command(name='start', short_help='Start up a defined virtual machine.') +@click.argument( + 'domain' +) +def vm_start(domain): + """ + Start virtual machine DOMAIN on its configured node. DOMAIN may be a UUID or name. + """ + + # Open a Zookeeper connection + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_vm.start_vm(zk_conn, domain) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc vm restart +############################################################################### +@click.command(name='restart', short_help='Restart a running virtual machine.') +@click.argument( + 'domain' +) +def vm_restart(domain): + """ + Restart running virtual machine DOMAIN. DOMAIN may be a UUID or name. + """ + + # Open a Zookeeper connection + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_vm.restart_vm(zk_conn, domain) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc vm shutdown +############################################################################### +@click.command(name='shutdown', short_help='Gracefully shut down a running virtual machine.') +@click.argument( + 'domain' +) +def vm_shutdown(domain): + """ + Gracefully shut down virtual machine DOMAIN. DOMAIN may be a UUID or name. + """ + + # Open a Zookeeper connection + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_vm.shutdown_vm(zk_conn, domain) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc vm stop +############################################################################### +@click.command(name='stop', short_help='Forcibly halt a running virtual machine.') +@click.argument( + 'domain' +) +def vm_stop(domain): + """ + Forcibly halt (destroy) running virtual machine DOMAIN. DOMAIN may be a UUID or name. + """ + + # Open a Zookeeper connection + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_vm.stop_vm(zk_conn, domain) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc vm move +############################################################################### +@click.command(name='move', short_help='Permanently move a virtual machine to another node.') +@click.argument( + 'domain' +) +@click.option( + '-t', '--target', 'target_node', default=None, + help='Target node to migrate to; autodetect if unspecified.' +) +@click.option( + '-s', '--selector', 'selector', default='mem', show_default=True, + type=click.Choice(['mem','load','vcpus','vms']), + help='Method to determine optimal target node during autodetect.' +) +def vm_move(domain, target_node, selector): + """ + Permanently move virtual machine DOMAIN, via live migration if running and possible, to another node. DOMAIN may be a UUID or name. + """ + + # Open a Zookeeper connection + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_vm.move_vm(zk_conn, domain, target_node, selector) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc vm migrate +############################################################################### +@click.command(name='migrate', short_help='Temporarily migrate a virtual machine to another node.') +@click.argument( + 'domain' +) +@click.option( + '-t', '--target', 'target_node', default=None, + help='Target node to migrate to; autodetect if unspecified.' +) +@click.option( + '-s', '--selector', 'selector', default='mem', show_default=True, + type=click.Choice(['mem','load','vcpus','vms']), + help='Method to determine optimal target node during autodetect.' +) +@click.option( + '-f', '--force', 'force_migrate', is_flag=True, default=False, + help='Force migrate an already migrated VM.' +) +def vm_migrate(domain, target_node, selector, force_migrate): + """ + Temporarily migrate running virtual machine DOMAIN, via live migration if possible, to another node. DOMAIN may be a UUID or name. If DOMAIN is not running, it will be started on the target node. + """ + + # Open a Zookeeper connection + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_vm.migrate_vm(zk_conn, domain, target_node, selector, force_migrate) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc vm unmigrate +############################################################################### +@click.command(name='unmigrate', short_help='Restore a migrated virtual machine to its original node.') +@click.argument( + 'domain' +) +def vm_unmigrate(domain): + """ + Restore previously migrated virtual machine DOMAIN, via live migration if possible, to its original node. DOMAIN may be a UUID or name. If DOMAIN is not running, it will be started on the target node. + """ + + # Open a Zookeeper connection + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_vm.unmigrate_vm(zk_conn, domain) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc vm info +############################################################################### +@click.command(name='info', short_help='Show details of a VM object.') +@click.argument( + 'domain' +) +@click.option( + '-l', '--long', 'long_output', is_flag=True, default=False, + help='Display more detailed information.' +) +def vm_info(domain, long_output): + """ + Show information about virtual machine DOMAIN. DOMAIN may be a UUID or name. + """ + + # Open a Zookeeper connection + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_vm.get_info(zk_conn, domain, long_output) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc vm list +############################################################################### +@click.command(name='list', short_help='List all VM objects.') +@click.argument( + 'limit', default=None, required=False +) +@click.option( + '-t', '--target', 'target_node', default=None, + help='Limit list to VMs on the specified node.' +) +@click.option( + '-s', '--state', 'target_state', default=None, + help='Limit list to VMs in the specified state.' +) +@click.option( + '-r', '--raw', 'raw', is_flag=True, default=False, + help='Display the raw list of VM names only.' +) +def vm_list(target_node, target_state, limit, raw): + """ + List all virtual machines in the cluster; optionally only match names matching regex LIMIT. + + NOTE: Red-coloured network lists indicate one or more configured networks are missing/invalid. + """ + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_vm.get_list(zk_conn, target_node, target_state, limit, raw) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc network +############################################################################### +@click.group(name='network', short_help='Manage a PVC virtual network.', context_settings=CONTEXT_SETTINGS) +def cli_network(): + """ + Manage the state of a VXLAN network in the PVC cluster. + """ + pass + +############################################################################### +# pvc network add +############################################################################### +@click.command(name='add', short_help='Add a new virtual network to the cluster.') +@click.option( + '-d', '--description', 'description', + required=True, + help='Description of the network; must be unique and not contain whitespace.' +) +@click.option( + '-p', '--type', 'nettype', + required=True, + type=click.Choice(['managed', 'bridged']), + help='Network type; managed networks control IP addressing; bridged networks are simple vLAN bridges. All subsequent options are unused for bridged networks.' +) +@click.option( + '-n', '--domain', 'domain', + default=None, + help='Domain name of the network.' +) +@click.option( + '-i', '--ipnet', 'ip_network', + default=None, + help='CIDR-format IPv4 network address for subnet.' +) +@click.option( + '-i6', '--ipnet6', 'ip6_network', + default=None, + help='CIDR-format IPv6 network address for subnet; should be /64 or larger ending "::/YY".' +) +@click.option( + '-g', '--gateway', 'ip_gateway', + default=None, + help='Default IPv4 gateway address for subnet.' +) +@click.option( + '-g6', '--gateway6', 'ip6_gateway', + default=None, + help='Default IPv6 gateway address for subnet. [default: "X::1"]' +) +@click.option( + '--dhcp/--no-dhcp', 'dhcp_flag', + is_flag=True, + default=False, + help='Enable/disable IPv4 DHCP for clients on subnet.' +) +@click.option( + '--dhcp-start', 'dhcp_start', + default=None, + help='IPv4 DHCP range start address.' +) +@click.option( + '--dhcp-end', 'dhcp_end', + default=None, + help='IPv4 DHCP range end address.' +) +@click.argument( + 'vni' +) +def net_add(vni, description, nettype, domain, ip_network, ip_gateway, ip6_network, ip6_gateway, dhcp_flag, dhcp_start, dhcp_end): + """ + Add a new virtual network with VXLAN identifier VNI to the cluster. + + Examples: + + pvc network add 101 --type bridged + + > Creates vLAN 101 and a simple bridge on the VNI dev interface. + + pvc network add 1001 --type managed --domain test.local --ipnet 10.1.1.0/24 --gateway 10.1.1.1 + + > Creates a VXLAN with ID 1001 on the VNI dev interface, with IPv4 managed networking. + + IPv6 is fully supported with --ipnet6 and --gateway6 in addition to or instead of IPv4. PVC will configure DHCPv6 in a semi-managed configuration for the network if set. + """ + + if nettype == 'managed' and not ip_network and not ip6_network: + click.echo('Error: At least one of "-i" / "--ipnet" or "-i6" / "--ipnet6" must be specified.') + exit(1) + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_network.add_network(zk_conn, vni, description, nettype, domain, ip_network, ip_gateway, ip6_network, ip6_gateway, dhcp_flag, dhcp_start, dhcp_end) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc network modify +############################################################################### +@click.command(name='modify', short_help='Modify an existing virtual network.') +@click.option( + '-d', '--description', 'description', + default=None, + help='Description of the network; must be unique and not contain whitespace.' +) +@click.option( + '-n', '--domain', 'domain', + default=None, + help='Domain name of the network.' +) +@click.option( + '-i', '--ipnet', 'ip4_network', + default=None, + help='CIDR-format IPv4 network address for subnet.' +) +@click.option( + '-i6', '--ipnet6', 'ip6_network', + default=None, + help='CIDR-format IPv6 network address for subnet.' +) +@click.option( + '-g', '--gateway', 'ip4_gateway', + default=None, + help='Default IPv4 gateway address for subnet.' +) +@click.option( + '-g6', '--gateway6', 'ip6_gateway', + default=None, + help='Default IPv6 gateway address for subnet.' +) +@click.option( + '--dhcp/--no-dhcp', 'dhcp_flag', + is_flag=True, + default=None, + help='Enable/disable DHCP for clients on subnet.' +) +@click.option( + '--dhcp-start', 'dhcp_start', + default=None, + help='DHCP range start address.' +) +@click.option( + '--dhcp-end', 'dhcp_end', + default=None, + help='DHCP range end address.' +) +@click.argument( + 'vni' +) +def net_modify(vni, description, domain, ip6_network, ip6_gateway, ip4_network, ip4_gateway, dhcp_flag, dhcp_start, dhcp_end): + """ + Modify details of virtual network VNI. All fields optional; only specified fields will be updated. + + Example: + pvc network modify 1001 --gateway 10.1.1.1 --dhcp + """ + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_network.modify_network(zk_conn, vni, description=description, domain=domain, ip6_network=ip6_network, ip6_gateway=ip6_gateway, ip4_network=ip4_network, ip4_gateway=ip4_gateway, dhcp_flag=dhcp_flag, dhcp_start=dhcp_start, dhcp_end=dhcp_end) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc network remove +############################################################################### +@click.command(name='remove', short_help='Remove a virtual network from the cluster.') +@click.argument( + 'net' +) +def net_remove(net): + """ + Remove an existing virtual network NET from the cluster; NET can be either a VNI or description. + + WARNING: PVC does not verify whether clients are still present in this network. Before removing, ensure + that all client VMs have been removed from the network or undefined behaviour may occur. + """ + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_network.remove_network(zk_conn, net) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc network info +############################################################################### +@click.command(name='info', short_help='Show details of a network.') +@click.argument( + 'vni' +) +@click.option( + '-l', '--long', 'long_output', is_flag=True, default=False, + help='Display more detailed information.' +) +def net_info(vni, long_output): + """ + Show information about virtual network VNI. + """ + + # Open a Zookeeper connection + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_network.get_info(zk_conn, vni, long_output) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc network list +############################################################################### +@click.command(name='list', short_help='List all VM objects.') +@click.argument( + 'limit', default=None, required=False +) +def net_list(limit): + """ + List all virtual networks in the cluster; optionally only match VNIs or Descriptions matching regex LIMIT. + """ + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_network.get_list(zk_conn, limit) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc network dhcp +############################################################################### +@click.group(name='dhcp', short_help='Manage IPv4 DHCP leases in a PVC virtual network.', context_settings=CONTEXT_SETTINGS) +def net_dhcp(): + """ + Manage host IPv4 DHCP leases of a VXLAN network in the PVC cluster. + """ + pass + +############################################################################### +# pvc network dhcp list +############################################################################### +@click.command(name='list', short_help='List active DHCP leases.') +@click.argument( + 'net' +) +@click.argument( + 'limit', default=None, required=False +) +def net_dhcp_list(net, limit): + """ + List all DHCP leases in virtual network NET; optionally only match elements matching regex LIMIT; NET can be either a VNI or description. + """ + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_network.get_list_dhcp(zk_conn, net, limit, only_static=False) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc network dhcp static +############################################################################### +@click.group(name='static', short_help='Manage DHCP static reservations in a PVC virtual network.', context_settings=CONTEXT_SETTINGS) +def net_dhcp_static(): + """ + Manage host DHCP static reservations of a VXLAN network in the PVC cluster. + """ + pass + +############################################################################### +# pvc network dhcp static add +############################################################################### +@click.command(name='add', short_help='Add a DHCP static reservation.') +@click.argument( + 'net' +) +@click.argument( + 'ipaddr' +) +@click.argument( + 'hostname' +) +@click.argument( + 'macaddr' +) +def net_dhcp_static_add(net, ipaddr, macaddr, hostname): + """ + Add a new DHCP static reservation of IP address IPADDR with hostname HOSTNAME for MAC address MACADDR to virtual network NET; NET can be either a VNI or description. + """ + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_network.add_dhcp_reservation(zk_conn, net, ipaddr, macaddr, hostname) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc network dhcp static remove +############################################################################### +@click.command(name='remove', short_help='Remove a DHCP static reservation.') +@click.argument( + 'net' +) +@click.argument( + 'reservation' +) +def net_dhcp_static_remove(net, reservation): + """ + Remove a DHCP static reservation RESERVATION from virtual network NET; RESERVATION can be either a MAC address, an IP address, or a hostname; NET can be either a VNI or description. + """ + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_network.remove_dhcp_reservation(zk_conn, net, reservation) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc network dhcp static list +############################################################################### +@click.command(name='list', short_help='List DHCP static reservations.') +@click.argument( + 'net' +) +@click.argument( + 'limit', default=None, required=False +) +def net_dhcp_static_list(net, limit): + """ + List all DHCP static reservations in virtual network NET; optionally only match elements matching regex LIMIT; NET can be either a VNI or description. + """ + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_network.get_list_dhcp(zk_conn, net, limit, only_static=True) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc network acl +############################################################################### +@click.group(name='acl', short_help='Manage a PVC virtual network firewall ACL rule.', context_settings=CONTEXT_SETTINGS) +def net_acl(): + """ + Manage firewall ACLs of a VXLAN network in the PVC cluster. + """ + pass + +############################################################################### +# pvc network acl add +############################################################################### +@click.command(name='add', short_help='Add firewall ACL.') +@click.option( + '--in/--out', 'direction', + is_flag=True, + required=True, + default=None, + help='Inbound or outbound ruleset.' +) +@click.option( + '-d', '--description', 'description', + required=True, + help='Description of the ACL; must be unique and not contain whitespace.' +) +@click.option( + '-r', '--rule', 'rule', + required=True, + help='NFT firewall rule.' +) +@click.option( + '-o', '--order', 'order', + default=None, + help='Order of rule in the chain (see "list"); defaults to last.' +) +@click.argument( + 'net' +) +def net_acl_add(net, direction, description, rule, order): + """ + Add a new NFT firewall rule to network NET; the rule is a literal NFT rule belonging to the forward table for the client network; NET can be either a VNI or description. + + NOTE: All client networks are default-allow in both directions; deny rules MUST be added here at the end of the sequence for a default-deny setup. + + NOTE: Ordering places the rule at the specified ID, not before it; the old rule of that ID and all subsequent rules will be moved down. + + Example: + + pvc network acl add 1001 --in --rule "tcp dport 22 ct state new accept" --description "ssh-in" --order 3 + """ + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_network.add_acl(zk_conn, net, direction, description, rule, order) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc network acl remove +############################################################################### +@click.command(name='remove', short_help='Remove firewall ACL.') +@click.option( + '--in/--out', 'direction', + is_flag=True, + required=True, + default=None, + help='Inbound or outbound rule set.' +) +@click.argument( + 'net' +) +@click.argument( + 'rule', +) +def net_acl_remove(net, rule, direction): + """ + Remove an NFT firewall rule RULE from network NET; RULE can be either a sequence order identifier or description; NET can be either a VNI or description." + """ + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_network.remove_acl(zk_conn, net, rule, direction) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc network acl list +############################################################################### +@click.command(name='list', short_help='List firewall ACLs.') +@click.option( + '--in/--out', 'direction', + is_flag=True, + required=False, + default=None, + help='Inbound or outbound rule set only.' +) +@click.argument( + 'net' +) +@click.argument( + 'limit', default=None, required=False +) +def net_acl_list(net, limit, direction): + """ + List all NFT firewall rules in network NET; optionally only match elements matching description regex LIMIT; NET can be either a VNI or description. + """ + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_network.get_list_acl(zk_conn, net, limit, direction) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc ceph +############################################################################### +@click.group(name='ceph', short_help='Manage the PVC Ceph storage cluster.', context_settings=CONTEXT_SETTINGS) +def cli_ceph(): + """ + Manage the Ceph storage of the PVC cluster. + + NOTE: The PVC Ceph interface is limited to the most common tasks. Any other administrative tasks must be performed on a node directly. + """ + pass + +############################################################################### +# pvc ceph status +############################################################################### +@click.command(name='status', short_help='Show storage cluster status.') +def ceph_status(): + """ + Show detailed status of the storage cluster. + """ + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_ceph.get_status(zk_conn) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc ceph osd +############################################################################### +@click.group(name='osd', short_help='Manage OSDs in the PVC storage cluster.', context_settings=CONTEXT_SETTINGS) +def ceph_osd(): + """ + Manage the Ceph OSDs of the PVC cluster. + """ + pass + +############################################################################### +# pvc ceph osd add +############################################################################### +@click.command(name='add', short_help='Add new OSD.') +@click.argument( + 'node' +) +@click.argument( + 'device' +) +@click.option( + '-w', '--weight', 'weight', + default=1.0, show_default=True, + help='Weight of the OSD within the CRUSH map.' +) +def ceph_osd_add(node, device, weight): + """ + Add a new Ceph OSD on node NODE with block device DEVICE to the cluster. + """ + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_ceph.add_osd(zk_conn, node, device, weight) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc ceph osd remove +############################################################################### +@click.command(name='remove', short_help='Remove OSD.') +@click.argument( + 'osdid' +) +def ceph_osd_remove(osdid): + """ + Remove a Ceph OSD with ID OSDID from the cluster. + """ + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_ceph.remove_osd(zk_conn, osdid) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc ceph osd in +############################################################################### +@click.command(name='in', short_help='Online OSD.') +@click.argument( + 'osdid' +) +def ceph_osd_in(osdid): + """ + Set a Ceph OSD with ID OSDID online in the cluster. + """ + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_ceph.in_osd(zk_conn, osdid) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc ceph osd out +############################################################################### +@click.command(name='out', short_help='Offline OSD.') +@click.argument( + 'osdid' +) +def ceph_osd_out(osdid): + """ + Set a Ceph OSD with ID OSDID offline in the cluster. + """ + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_ceph.out_osd(zk_conn, osdid) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc ceph osd set +############################################################################### +@click.command(name='set', short_help='Set property.') +@click.argument( + 'osd_property' +) +def ceph_osd_set(osd_property): + """ + Set a Ceph OSD property OSD_PROPERTY on the cluster. + + Valid properties are: + + full|pause|noup|nodown|noout|noin|nobackfill|norebalance|norecover|noscrub|nodeep-scrub|notieragent|sortbitwise|recovery_deletes|require_jewel_osds|require_kraken_osds + """ + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_ceph.set_osd(zk_conn, osd_property) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc ceph osd unset +############################################################################### +@click.command(name='unset', short_help='Unset property.') +@click.argument( + 'osd_property' +) +def ceph_osd_unset(osd_property): + """ + Unset a Ceph OSD property OSD_PROPERTY on the cluster. + + Valid properties are: + + full|pause|noup|nodown|noout|noin|nobackfill|norebalance|norecover|noscrub|nodeep-scrub|notieragent|sortbitwise|recovery_deletes|require_jewel_osds|require_kraken_osds + """ + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_ceph.unset_osd(zk_conn, osd_property) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc ceph osd list +############################################################################### +@click.command(name='list', short_help='List cluster OSDs.') +@click.argument( + 'limit', default=None, required=False +) +def ceph_osd_list(limit): + """ + List all Ceph OSDs in the cluster; optinally only match elements matching ID regex LIMIT. + """ + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_ceph.get_list_osd(zk_conn, limit) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc ceph pool +############################################################################### +@click.group(name='pool', short_help='Manage RBD pools in the PVC storage cluster.', context_settings=CONTEXT_SETTINGS) +def ceph_pool(): + """ + Manage the Ceph RBD pools of the PVC cluster. + """ + pass + +############################################################################### +# pvc ceph pool add +############################################################################### +@click.command(name='add', short_help='Add new RBD pool.') +@click.argument( + 'name' +) +@click.argument( + 'pgs' +) +def ceph_pool_add(name, pgs): + """ + Add a new Ceph RBD pool with name NAME and PGS placement groups. + """ + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_ceph.add_pool(zk_conn, name, pgs) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc ceph pool remove +############################################################################### +@click.command(name='remove', short_help='Remove RBD pool.') +@click.argument( + 'name' +) +@click.option('--yes', is_flag=True, + expose_value=False, + prompt='DANGER: This command will destroy this pool and all volumes. Do you want to continue?' +) +def ceph_pool_remove(name): + """ + Remove a Ceph RBD pool with name NAME and all volumes on it. + """ + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_ceph.remove_pool(zk_conn, name) + cleanup(retcode, retmsg, zk_conn) + +############################################################################### +# pvc ceph pool list +############################################################################### +@click.command(name='list', short_help='List cluster RBD pools.') +@click.argument( + 'limit', default=None, required=False +) +def ceph_pool_list(limit): + """ + List all Ceph RBD pools in the cluster; optinally only match elements matching name regex LIMIT. + """ + + zk_conn = pvc_common.startZKConnection(zk_host) + retcode, retmsg = pvc_ceph.get_list_pool(zk_conn, limit) + cleanup(retcode, retmsg, zk_conn) + + +############################################################################### +# pvc init +############################################################################### + +@click.command(name='init', short_help='Initialize a new cluster.') +def init_cluster(): + """ + Perform initialization of a new PVC cluster. + """ + + import pvc_init +# pvc_init.run() + pvc_init.init_zookeeper(zk_host) + + +############################################################################### +# pvc +############################################################################### +@click.group(context_settings=CONTEXT_SETTINGS) +@click.option( + '-z', '--zookeeper', '_zk_host', envvar='PVC_ZOOKEEPER', default=None, + help='Zookeeper connection string.' +) +def cli(_zk_host): + """ + Parallel Virtual Cluster CLI management tool + + Environment variables: + + "PVC_ZOOKEEPER": Set the cluster Zookeeper address instead of using "--zookeeper". + + If no PVC_ZOOKEEPER/--zookeeper is specified, attempts to load coordinators list from /etc/pvc/pvcd.yaml. + """ + + # If no zk_host was passed, try to read from /etc/pvc/pvcd.yaml; otherwise fail + if _zk_host is None: + try: + cfgfile = '/etc/pvc/pvcd.yaml' + with open(cfgfile) as cfgf: + o_config = yaml.load(cfgf) + _zk_host = o_config['pvc']['cluster']['coordinators'] + except: + _zk_host = None + + if _zk_host is None: + print('ERROR: Must specify a PVC_ZOOKEEPER value or have a coordinator set configured in /etc/pvc/pvcd.yaml.') + exit(1) + + global zk_host + zk_host = _zk_host + + +# +# Click command tree +# +cli_node.add_command(node_secondary) +cli_node.add_command(node_primary) +cli_node.add_command(node_flush) +cli_node.add_command(node_ready) +cli_node.add_command(node_unflush) +cli_node.add_command(node_info) +cli_node.add_command(node_list) + +cli_vm.add_command(vm_add) +cli_vm.add_command(vm_define) +cli_vm.add_command(vm_modify) +cli_vm.add_command(vm_undefine) +cli_vm.add_command(vm_dump) +cli_vm.add_command(vm_start) +cli_vm.add_command(vm_restart) +cli_vm.add_command(vm_shutdown) +cli_vm.add_command(vm_stop) +cli_vm.add_command(vm_move) +cli_vm.add_command(vm_migrate) +cli_vm.add_command(vm_unmigrate) +cli_vm.add_command(vm_info) +cli_vm.add_command(vm_list) + +cli_network.add_command(net_add) +cli_network.add_command(net_modify) +cli_network.add_command(net_remove) +cli_network.add_command(net_info) +cli_network.add_command(net_list) +cli_network.add_command(net_dhcp) +cli_network.add_command(net_acl) + +net_dhcp.add_command(net_dhcp_list) +net_dhcp.add_command(net_dhcp_static) + +net_dhcp_static.add_command(net_dhcp_static_add) +net_dhcp_static.add_command(net_dhcp_static_remove) +net_dhcp_static.add_command(net_dhcp_static_list) + +net_acl.add_command(net_acl_add) +net_acl.add_command(net_acl_remove) +net_acl.add_command(net_acl_list) + +ceph_osd.add_command(ceph_osd_add) +ceph_osd.add_command(ceph_osd_remove) +ceph_osd.add_command(ceph_osd_in) +ceph_osd.add_command(ceph_osd_out) +ceph_osd.add_command(ceph_osd_set) +ceph_osd.add_command(ceph_osd_unset) +ceph_osd.add_command(ceph_osd_list) + +ceph_pool.add_command(ceph_pool_add) +ceph_pool.add_command(ceph_pool_remove) +ceph_pool.add_command(ceph_pool_list) + +cli_ceph.add_command(ceph_status) +cli_ceph.add_command(ceph_osd) +cli_ceph.add_command(ceph_pool) + +cli.add_command(cli_node) +cli.add_command(cli_vm) +cli.add_command(cli_network) +cli.add_command(cli_ceph) +cli.add_command(init_cluster) + +# +# Main entry point +# +def main(): + return cli(obj={}) + +if __name__ == '__main__': + main() + diff --git a/client-api/pvcapi.py b/client-api/pvcapi.py new file mode 100755 index 00000000..2d0073df --- /dev/null +++ b/client-api/pvcapi.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python3 + +# pvcapi.py - PVC HTTP API interface +# Part of the Parallel Virtual Cluster (PVC) system +# +# Copyright (C) 2018 Joshua M. Boniface +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +############################################################################### + +import flask + +import client_lib.common as pvc_common +import client_lib.node as pvc_node +import client_lib.vm as pvc_vm +import client_lib.network as pvc_network +import client_lib.ceph as pvc_ceph + +zk_host = "hv1:2181,hv2:2181,hv3:2181" + +pvcapi = flask.Flask(__name__) +pvcapi.config["DEBUG"] = True + +@pvcapi.route('/api/v1', methods=['GET']) +def api_root(): + print(flask.request) + print(flask.request.args) + return "", 200 + +@pvcapi.route('/api/v1/node', methods=['GET']) +def api_node(): + """ + Manage the state of a node in the PVC cluster + """ + return "Manage the state of a node in the PVC cluster", 209 + +@pvcapi.route('/api/v1/node/secondary', methods=['GET']) +def api_node_secondary(): + """ + Take NODE out of primary router mode. + """ + # Mandatory args + if 'node' in flask.request.args: + node = flask.request.args['node'] + else: + return "Error: No node provided. Please specify a node.", 510 + + zk_conn = pvc_common.startZKConnection(zk_host) + retflag, retmsg = pvc_node.secondary_node(zk_conn, node) + if retflag: + retcode = 200 + else: + retcode = 510 + + pvc_common.stopZKConnection(zk_conn) + output = { + 'message': retmsg, + } + return flask.jsonify(output), retcode + +@pvcapi.route('/api/v1/node/primary', methods=['GET']) +def api_node_primary(): + """ + Set NODE to primary router mode. + """ + # Mandatory args + if 'node' in flask.request.args: + node = flask.request.args['node'] + else: + return "Error: No node provided. Please specify a node.", 510 + + zk_conn = pvc_common.startZKConnection(zk_host) + retflag, retmsg = pvc_node.primary_node(zk_conn, node) + if retflag: + retcode = 200 + else: + retcode = 510 + + pvc_common.stopZKConnection(zk_conn) + output = { + 'message': retmsg, + } + return flask.jsonify(output), retcode + +#@pvcapi.route('/api/v1/node/flush', methods=['GET']) +#@pvcapi.route('/api/v1/node/unflush', methods=['GET']) +#@pvcapi.route('/api/v1/node/ready', methods=['GET']) +#@pvcapi.route('/api/v1/node/info', methods=['GET']) +#@pvcapi.route('/api/v1/node/list', methods=['GET']) +#@pvcapi.route('/api/v1/vm', methods=['GET']) +#@pvcapi.route('/api/v1/vm/add', methods=['GET']) +#@pvcapi.route('/api/v1/vm/define', methods=['GET']) +#@pvcapi.route('/api/v1/vm/modify', methods=['GET']) +#@pvcapi.route('/api/v1/vm/undefine', methods=['GET']) +#@pvcapi.route('/api/v1/vm/dump', methods=['GET']) +#@pvcapi.route('/api/v1/vm/start', methods=['GET']) +#@pvcapi.route('/api/v1/vm/restart', methods=['GET']) +#@pvcapi.route('/api/v1/vm/shutdown', methods=['GET']) +#@pvcapi.route('/api/v1/vm/stop', methods=['GET']) +#@pvcapi.route('/api/v1/vm/move', methods=['GET']) +#@pvcapi.route('/api/v1/vm/migrate', methods=['GET']) +#@pvcapi.route('/api/v1/vm/unmigrate', methods=['GET']) +#@pvcapi.route('/api/v1/vm/info', methods=['GET']) +#@pvcapi.route('/api/v1/vm/list', methods=['GET']) +#@pvcapi.route('/api/v1/network', methods=['GET']) +#@pvcapi.route('/api/v1/network/add', methods=['GET']) +#@pvcapi.route('/api/v1/network/modify', methods=['GET']) +#@pvcapi.route('/api/v1/network/remove', methods=['GET']) +#@pvcapi.route('/api/v1/network/info', methods=['GET']) +#@pvcapi.route('/api/v1/network/list', methods=['GET']) +#@pvcapi.route('/api/v1/network/dhcp', methods=['GET']) +#@pvcapi.route('/api/v1/network/dhcp/list', methods=['GET']) +#@pvcapi.route('/api/v1/network/dhcp/static', methods=['GET']) +#@pvcapi.route('/api/v1/network/dhcp/static/add', methods=['GET']) +#@pvcapi.route('/api/v1/network/dhcp/static/remove', methods=['GET']) +#@pvcapi.route('/api/v1/network/dhcp/static/list', methods=['GET']) +#@pvcapi.route('/api/v1/network/acl', methods=['GET']) +#@pvcapi.route('/api/v1/network/acl/add', methods=['GET']) +#@pvcapi.route('/api/v1/network/acl/remove', methods=['GET']) +#@pvcapi.route('/api/v1/network/acl/list', methods=['GET']) +# Ceph endpoints +#@pvcapi.route('/api/v1/ceph', methods=['GET']) +#@pvcapi.route('/api/v1/ceph/status', methods=['GET']) +#@pvcapi.route('/api/v1/ceph/osd', methods=['GET']) +#@pvcapi.route('/api/v1/ceph/osd/add', methods=['GET']) +#@pvcapi.route('/api/v1/ceph/osd/remove', methods=['GET']) +#@pvcapi.route('/api/v1/ceph/osd/in', methods=['GET']) +#@pvcapi.route('/api/v1/ceph/osd/out', methods=['GET']) +#@pvcapi.route('/api/v1/ceph/osd/set', methods=['GET']) +#@pvcapi.route('/api/v1/ceph/osd/unset', methods=['GET']) +#@pvcapi.route('/api/v1/ceph/osd/list', methods=['GET']) +#@pvcapi.route('/api/v1/ceph/pool', methods=['GET']) +#@pvcapi.route('/api/v1/ceph/pool/add', methods=['GET']) +#@pvcapi.route('/api/v1/ceph/pool/remove', methods=['GET']) +#@pvcapi.route('/api/v1/ceph/pool/list', methods=['GET']) + +pvcapi.run() diff --git a/client-common/node.py b/client-common/node.py index fdf98675..000f33e8 100644 --- a/client-common/node.py +++ b/client-common/node.py @@ -122,14 +122,14 @@ def secondary_node(zk_conn, node): # Get current state current_state = zkhandler.readdata(zk_conn, '/nodes/{}/routerstate'.format(node)) if current_state == 'primary': - click.echo('Setting node {} in secondary router mode.'.format(node)) + retmsg = 'Setting node {} in secondary router mode.'.format(node) zkhandler.writedata(zk_conn, { '/primary_node': 'none' }) else: - click.echo('Node {} is already in secondary router mode.'.format(node)) + return False, 'Node {} is already in secondary router mode.'.format(node) - return True, '' + return True, retmsg def primary_node(zk_conn, node): # Verify node is valid @@ -144,21 +144,26 @@ def primary_node(zk_conn, node): # Get current state current_state = zkhandler.readdata(zk_conn, '/nodes/{}/routerstate'.format(node)) if current_state == 'secondary': - click.echo('Setting node {} in primary router mode.'.format(node)) + retmsg = 'Setting node {} in primary router mode.'.format(node) zkhandler.writedata(zk_conn, { '/primary_node': node }) else: - click.echo('Node {} is already in primary router mode.'.format(node)) + return False, 'Node {} is already in primary router mode.'.format(node) - return True, '' + return True, retmsg def flush_node(zk_conn, node, wait): # Verify node is valid if not common.verifyNode(zk_conn, node): return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node) - click.echo('Flushing hypervisor {} of running VMs.'.format(node)) + retmsg = 'Flushing hypervisor {} of running VMs.'.format(node) + + # Wait cannot be triggered from the API + if wait: + click.echo(retmsg) + retmsg = "" # Add the new domain to Zookeeper zkhandler.writedata(zk_conn, { @@ -172,21 +177,21 @@ def flush_node(zk_conn, node, wait): if node_state == "flushed": break - return True, '' + return True, retmsg def ready_node(zk_conn, node): # Verify node is valid if not common.verifyNode(zk_conn, node): return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node) - click.echo('Restoring hypervisor {} to active service.'.format(node)) + retmsg = 'Restoring hypervisor {} to active service.'.format(node) # Add the new domain to Zookeeper zkhandler.writedata(zk_conn, { '/nodes/{}/domainstate'.format(node): 'unflush' }) - return True, '' + return True, retmsg def get_info(zk_conn, node, long_output): # Verify node is valid From e41b3053f704163914d4af1159ab01cb2958c141 Mon Sep 17 00:00:00 2001 From: Joshua Boniface Date: Fri, 12 Apr 2019 12:27:49 -0400 Subject: [PATCH 02/13] Update methods to POST where relevant --- client-api/pvcapi.py | 64 +++++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 31 deletions(-) diff --git a/client-api/pvcapi.py b/client-api/pvcapi.py index 2d0073df..cec66116 100755 --- a/client-api/pvcapi.py +++ b/client-api/pvcapi.py @@ -46,7 +46,7 @@ def api_node(): """ return "Manage the state of a node in the PVC cluster", 209 -@pvcapi.route('/api/v1/node/secondary', methods=['GET']) +@pvcapi.route('/api/v1/node/secondary', methods=['POST']) def api_node_secondary(): """ Take NODE out of primary router mode. @@ -70,7 +70,7 @@ def api_node_secondary(): } return flask.jsonify(output), retcode -@pvcapi.route('/api/v1/node/primary', methods=['GET']) +@pvcapi.route('/api/v1/node/primary', methods=['POST']) def api_node_primary(): """ Set NODE to primary router mode. @@ -94,56 +94,58 @@ def api_node_primary(): } return flask.jsonify(output), retcode -#@pvcapi.route('/api/v1/node/flush', methods=['GET']) -#@pvcapi.route('/api/v1/node/unflush', methods=['GET']) -#@pvcapi.route('/api/v1/node/ready', methods=['GET']) +#@pvcapi.route('/api/v1/node/flush', methods=['POST']) +#@pvcapi.route('/api/v1/node/unflush', methods=['POST']) +#@pvcapi.route('/api/v1/node/ready', methods=['POST']) #@pvcapi.route('/api/v1/node/info', methods=['GET']) #@pvcapi.route('/api/v1/node/list', methods=['GET']) +# VM endpoints #@pvcapi.route('/api/v1/vm', methods=['GET']) -#@pvcapi.route('/api/v1/vm/add', methods=['GET']) -#@pvcapi.route('/api/v1/vm/define', methods=['GET']) -#@pvcapi.route('/api/v1/vm/modify', methods=['GET']) -#@pvcapi.route('/api/v1/vm/undefine', methods=['GET']) +#@pvcapi.route('/api/v1/vm/add', methods=['POST']) +#@pvcapi.route('/api/v1/vm/define', methods=['POST']) +#@pvcapi.route('/api/v1/vm/modify', methods=['POST']) +#@pvcapi.route('/api/v1/vm/undefine', methods=['POST']) #@pvcapi.route('/api/v1/vm/dump', methods=['GET']) -#@pvcapi.route('/api/v1/vm/start', methods=['GET']) -#@pvcapi.route('/api/v1/vm/restart', methods=['GET']) -#@pvcapi.route('/api/v1/vm/shutdown', methods=['GET']) -#@pvcapi.route('/api/v1/vm/stop', methods=['GET']) -#@pvcapi.route('/api/v1/vm/move', methods=['GET']) -#@pvcapi.route('/api/v1/vm/migrate', methods=['GET']) -#@pvcapi.route('/api/v1/vm/unmigrate', methods=['GET']) +#@pvcapi.route('/api/v1/vm/start', methods=['POST']) +#@pvcapi.route('/api/v1/vm/restart', methods=['POST']) +#@pvcapi.route('/api/v1/vm/shutdown', methods=['POST']) +#@pvcapi.route('/api/v1/vm/stop', methods=['POST']) +#@pvcapi.route('/api/v1/vm/move', methods=['POST']) +#@pvcapi.route('/api/v1/vm/migrate', methods=['POST']) +#@pvcapi.route('/api/v1/vm/unmigrate', methods=['POST']) #@pvcapi.route('/api/v1/vm/info', methods=['GET']) #@pvcapi.route('/api/v1/vm/list', methods=['GET']) +# Network endpoints #@pvcapi.route('/api/v1/network', methods=['GET']) -#@pvcapi.route('/api/v1/network/add', methods=['GET']) -#@pvcapi.route('/api/v1/network/modify', methods=['GET']) -#@pvcapi.route('/api/v1/network/remove', methods=['GET']) +#@pvcapi.route('/api/v1/network/add', methods=['POST']) +#@pvcapi.route('/api/v1/network/modify', methods=['POST']) +#@pvcapi.route('/api/v1/network/remove', methods=['POST']) #@pvcapi.route('/api/v1/network/info', methods=['GET']) #@pvcapi.route('/api/v1/network/list', methods=['GET']) #@pvcapi.route('/api/v1/network/dhcp', methods=['GET']) #@pvcapi.route('/api/v1/network/dhcp/list', methods=['GET']) #@pvcapi.route('/api/v1/network/dhcp/static', methods=['GET']) -#@pvcapi.route('/api/v1/network/dhcp/static/add', methods=['GET']) -#@pvcapi.route('/api/v1/network/dhcp/static/remove', methods=['GET']) +#@pvcapi.route('/api/v1/network/dhcp/static/add', methods=['POST']) +#@pvcapi.route('/api/v1/network/dhcp/static/remove', methods=['POST']) #@pvcapi.route('/api/v1/network/dhcp/static/list', methods=['GET']) #@pvcapi.route('/api/v1/network/acl', methods=['GET']) -#@pvcapi.route('/api/v1/network/acl/add', methods=['GET']) -#@pvcapi.route('/api/v1/network/acl/remove', methods=['GET']) +#@pvcapi.route('/api/v1/network/acl/add', methods=['POST']) +#@pvcapi.route('/api/v1/network/acl/remove', methods=['POST']) #@pvcapi.route('/api/v1/network/acl/list', methods=['GET']) # Ceph endpoints #@pvcapi.route('/api/v1/ceph', methods=['GET']) #@pvcapi.route('/api/v1/ceph/status', methods=['GET']) #@pvcapi.route('/api/v1/ceph/osd', methods=['GET']) -#@pvcapi.route('/api/v1/ceph/osd/add', methods=['GET']) -#@pvcapi.route('/api/v1/ceph/osd/remove', methods=['GET']) -#@pvcapi.route('/api/v1/ceph/osd/in', methods=['GET']) -#@pvcapi.route('/api/v1/ceph/osd/out', methods=['GET']) -#@pvcapi.route('/api/v1/ceph/osd/set', methods=['GET']) -#@pvcapi.route('/api/v1/ceph/osd/unset', methods=['GET']) +#@pvcapi.route('/api/v1/ceph/osd/add', methods=['POST']) +#@pvcapi.route('/api/v1/ceph/osd/remove', methods=['POST']) +#@pvcapi.route('/api/v1/ceph/osd/in', methods=['POST']) +#@pvcapi.route('/api/v1/ceph/osd/out', methods=['POST']) +#@pvcapi.route('/api/v1/ceph/osd/set', methods=['POST']) +#@pvcapi.route('/api/v1/ceph/osd/unset', methods=['POST']) #@pvcapi.route('/api/v1/ceph/osd/list', methods=['GET']) #@pvcapi.route('/api/v1/ceph/pool', methods=['GET']) -#@pvcapi.route('/api/v1/ceph/pool/add', methods=['GET']) -#@pvcapi.route('/api/v1/ceph/pool/remove', methods=['GET']) +#@pvcapi.route('/api/v1/ceph/pool/add', methods=['POST']) +#@pvcapi.route('/api/v1/ceph/pool/remove', methods=['POST']) #@pvcapi.route('/api/v1/ceph/pool/list', methods=['GET']) pvcapi.run() From 989c5f6bed71901fbacea1d1cfc912606daeb9b0 Mon Sep 17 00:00:00 2001 From: Joshua Boniface Date: Tue, 9 Apr 2019 08:18:11 -0400 Subject: [PATCH 03/13] Don't depend start on mariadb --- node-daemon/pvcd.service | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/node-daemon/pvcd.service b/node-daemon/pvcd.service index 60dfcce4..74dfa8d2 100644 --- a/node-daemon/pvcd.service +++ b/node-daemon/pvcd.service @@ -1,7 +1,7 @@ # Parallel Virtual Cluster node daemon unit file [Unit] Description = Parallel Virtual Cluster node daemon -After = network-online.target libvirtd.service zookeeper.service mariadb.service +After = network-online.target libvirtd.service zookeeper.service [Service] Type = simple From ba4a44f5e70f02dd29b9cf741dc09688af602988 Mon Sep 17 00:00:00 2001 From: Joshua Boniface Date: Tue, 9 Apr 2019 16:25:45 -0400 Subject: [PATCH 04/13] Allow the zk_conn to be none properly --- client-cli/pvc.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/client-cli/pvc.py b/client-cli/pvc.py index fa7a06da..f2603f56 100755 --- a/client-cli/pvc.py +++ b/client-cli/pvc.py @@ -42,8 +42,9 @@ zk_host = '' CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'], max_content_width=120) -def cleanup(retcode, retmsg, zk_conn): - pvc_common.stopZKConnection(zk_conn) +def cleanup(retcode, retmsg, zk_conn=None): + if zk_conn: + pvc_common.stopZKConnection(zk_conn) if retcode == True: if retmsg != '': click.echo(retmsg) From 9809b798cc0345be9d6afd82a3b3cc82dd81f70d Mon Sep 17 00:00:00 2001 From: Joshua Boniface Date: Wed, 10 Apr 2019 16:18:18 -0400 Subject: [PATCH 05/13] Correct bug in non-editor modify --- client-cli/pvc.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/client-cli/pvc.py b/client-cli/pvc.py index f2603f56..ab35bb72 100755 --- a/client-cli/pvc.py +++ b/client-cli/pvc.py @@ -298,12 +298,12 @@ def vm_modify(domain, config, editor, restart): zk_conn = pvc_common.startZKConnection(zk_host) - if editor == True: - dom_uuid = pvc_vm.getDomainUUID(zk_conn, domain) - if dom_uuid == None: - cleanup(False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain)) - dom_name = pvc_vm.getDomainName(zk_conn, dom_uuid) + dom_uuid = pvc_vm.getDomainUUID(zk_conn, domain) + if dom_uuid == None: + cleanup(False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain)) + dom_name = pvc_vm.getDomainName(zk_conn, dom_uuid) + if editor == True: # Grab the current config current_vm_config = zk_conn.get('/domains/{}/xml'.format(dom_uuid))[0].decode('ascii') From 5ad2dda6d4feb7518f00474adc3165b52fdebaec Mon Sep 17 00:00:00 2001 From: Joshua Boniface Date: Wed, 10 Apr 2019 16:29:26 -0400 Subject: [PATCH 06/13] Only print file name --- client-cli/pvc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/client-cli/pvc.py b/client-cli/pvc.py index ab35bb72..e0e20774 100755 --- a/client-cli/pvc.py +++ b/client-cli/pvc.py @@ -358,9 +358,9 @@ def vm_modify(domain, config, editor, restart): config.close() if restart: - click.echo('Replacing config of VM "{}" with file "{}" and restarting.'.format(dom_name, config)) + click.echo('Replacing config of VM "{}" with file "{}" and restarting.'.format(dom_name, config.name)) else: - click.echo('Replacing config of VM "{}" with file "{}".'.format(dom_name, config)) + click.echo('Replacing config of VM "{}" with file "{}".'.format(dom_name, config.name)) retcode, retmsg = pvc_vm.modify_vm(zk_conn, domain, restart, new_vm_config) cleanup(retcode, retmsg, zk_conn) From b6ecd36588d5fabd10fa03e22262257d0dadb555 Mon Sep 17 00:00:00 2001 From: Joshua Boniface Date: Thu, 11 Apr 2019 19:06:06 -0400 Subject: [PATCH 07/13] Implement domain log watching Implements the ability for a client to watch almost-live domain console logs from the hypervisors. It does this using a deque-based "tail -f" mechanism (with a configurable buffer per-VM) that watches the domain console logfile in the (configurable) directory every half-second. It then stores the current buffer in Zookeeper when changed, where a client can then request it, either as a static piece of text in the `less` pager, or via a similar "tail -f" functionality implemented using fixed line splitting and comparison to provide a generally-seamless output. Enabling this feature requires each guest VM to implement a Libvirt serial log and write its (text) console to it, for example using the default logging directory: ``` ``` The append mode can be either on or off; on grows files unbounded, off causes the log (and hence the PVC log data) to be truncated on initial VM startup from offline. The administrator must choose how they best want to handle this until Libvirt implements their own clog-type logging format. --- client-cli/pvc.py | 32 ++++++ client-common/vm.py | 90 ++++++++++++++-- client-common/zkhandler.py | 6 +- node-daemon/pvcd.sample.yaml | 4 + node-daemon/pvcd/Daemon.py | 38 ++++--- .../pvcd/DomainConsoleWatcherInstance.py | 100 ++++++++++++++++++ node-daemon/pvcd/DomainInstance.py | 49 ++++++++- node-daemon/pvcd/zkhandler.py | 6 +- 8 files changed, 289 insertions(+), 36 deletions(-) create mode 100644 node-daemon/pvcd/DomainConsoleWatcherInstance.py diff --git a/client-cli/pvc.py b/client-cli/pvc.py index e0e20774..4dff0075 100755 --- a/client-cli/pvc.py +++ b/client-cli/pvc.py @@ -571,6 +571,37 @@ def vm_info(domain, long_output): retcode, retmsg = pvc_vm.get_info(zk_conn, domain, long_output) cleanup(retcode, retmsg, zk_conn) +############################################################################### +# pvc vm log +############################################################################### +@click.command(name='log', short_help='Show console logs of a VM object.') +@click.argument( + 'domain' +) +@click.option( + '-l', '--lines', 'lines', default=1000, show_default=True, + help='Display this many log lines from the end of the log buffer.' +) +@click.option( + '-f', '--follow', 'follow', is_flag=True, default=False, + help='Follow the log buffer; output may be delayed by a few seconds relative to the live system. The --lines value defaults to 10 for the initial output.' +) +def vm_log(domain, lines, follow): + """ + Show console logs of virtual machine DOMAIN on its current node in the 'less' pager or continuously. DOMAIN may be a UUID or name. Note that migrating a VM to a different node will cause the log buffer to be overwritten by entries from the new node. + """ + + # Open a Zookeeper connection + zk_conn = pvc_common.startZKConnection(zk_host) + if follow: + # Handle the "new" default of the follow + if lines == 1000: + lines = 10 + retcode, retmsg = pvc_vm.follow_console_log(zk_conn, domain, lines) + else: + retcode, retmsg = pvc_vm.get_console_log(zk_conn, domain, lines) + cleanup(retcode, retmsg, zk_conn) + ############################################################################### # pvc vm list ############################################################################### @@ -1331,6 +1362,7 @@ cli_vm.add_command(vm_move) cli_vm.add_command(vm_migrate) cli_vm.add_command(vm_unmigrate) cli_vm.add_command(vm_info) +cli_vm.add_command(vm_log) cli_vm.add_command(vm_list) cli_network.add_command(net_add) diff --git a/client-common/vm.py b/client-common/vm.py index fd2808d2..12931eb8 100644 --- a/client-common/vm.py +++ b/client-common/vm.py @@ -33,6 +33,8 @@ import lxml.objectify import configparser import kazoo.client +from collections import deque + import client_lib.ansiprint as ansiprint import client_lib.zkhandler as zkhandler import client_lib.common as common @@ -226,6 +228,7 @@ def define_vm(zk_conn, config_data, target_node, selector): '/domains/{}/node'.format(dom_uuid): target_node, '/domains/{}/lastnode'.format(dom_uuid): '', '/domains/{}/failedreason'.format(dom_uuid): '', + '/domains/{}/consolelog'.format(dom_uuid): '', '/domains/{}/xml'.format(dom_uuid): config_data }) @@ -233,7 +236,7 @@ def define_vm(zk_conn, config_data, target_node, selector): def modify_vm(zk_conn, domain, restart, new_vm_config): dom_uuid = getDomainUUID(zk_conn, domain) - if dom_uuid == None: + if not dom_uuid: return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain) dom_name = getDomainName(zk_conn, domain) @@ -250,7 +253,7 @@ def modify_vm(zk_conn, domain, restart, new_vm_config): def dump_vm(zk_conn, domain): dom_uuid = getDomainUUID(zk_conn, domain) - if dom_uuid == None: + if not dom_uuid: return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain) # Gram the domain XML and dump it to stdout @@ -262,7 +265,7 @@ def dump_vm(zk_conn, domain): def undefine_vm(zk_conn, domain): # Validate and obtain alternate passed value dom_uuid = getDomainUUID(zk_conn, domain) - if dom_uuid == None: + if not dom_uuid: common.stopZKConnection(zk_conn) return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain) @@ -300,7 +303,7 @@ def undefine_vm(zk_conn, domain): def start_vm(zk_conn, domain): # Validate and obtain alternate passed value dom_uuid = getDomainUUID(zk_conn, domain) - if dom_uuid == None: + if not dom_uuid: common.stopZKConnection(zk_conn) return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain) @@ -313,7 +316,7 @@ def start_vm(zk_conn, domain): def restart_vm(zk_conn, domain): # Validate and obtain alternate passed value dom_uuid = getDomainUUID(zk_conn, domain) - if dom_uuid == None: + if not dom_uuid: common.stopZKConnection(zk_conn) return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain) @@ -332,7 +335,7 @@ def restart_vm(zk_conn, domain): def shutdown_vm(zk_conn, domain): # Validate and obtain alternate passed value dom_uuid = getDomainUUID(zk_conn, domain) - if dom_uuid == None: + if not dom_uuid: return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain) # Get state and verify we're OK to proceed @@ -350,7 +353,7 @@ def shutdown_vm(zk_conn, domain): def stop_vm(zk_conn, domain): # Validate and obtain alternate passed value dom_uuid = getDomainUUID(zk_conn, domain) - if dom_uuid == None: + if not dom_uuid: common.stopZKConnection(zk_conn) return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain) @@ -366,7 +369,7 @@ def stop_vm(zk_conn, domain): def move_vm(zk_conn, domain, target_node, selector): # Validate and obtain alternate passed value dom_uuid = getDomainUUID(zk_conn, domain) - if dom_uuid == None: + if not dom_uuid: common.stopZKConnection(zk_conn) return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain) @@ -402,7 +405,7 @@ def move_vm(zk_conn, domain, target_node, selector): def migrate_vm(zk_conn, domain, target_node, selector, force_migrate): # Validate and obtain alternate passed value dom_uuid = getDomainUUID(zk_conn, domain) - if dom_uuid == None: + if not dom_uuid: common.stopZKConnection(zk_conn) return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain) @@ -446,7 +449,7 @@ def migrate_vm(zk_conn, domain, target_node, selector, force_migrate): def unmigrate_vm(zk_conn, domain): # Validate and obtain alternate passed value dom_uuid = getDomainUUID(zk_conn, domain) - if dom_uuid == None: + if not dom_uuid: common.stopZKConnection(zk_conn) return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain) @@ -476,7 +479,7 @@ def unmigrate_vm(zk_conn, domain): def get_info(zk_conn, domain, long_output): # Validate and obtain alternate passed value dom_uuid = getDomainUUID(zk_conn, domain) - if dom_uuid == None: + if not dom_uuid: common.stopZKConnection(zk_conn) return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain) @@ -494,6 +497,71 @@ def get_info(zk_conn, domain, long_output): return True, '' +def get_console_log(zk_conn, domain, lines=1000): + # Validate and obtain alternate passed value + dom_uuid = getDomainUUID(zk_conn, domain) + if not dom_uuid: + return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain) + + # Get the data from ZK + console_log = zkhandler.readdata(zk_conn, '/domains/{}/consolelog'.format(dom_uuid)) + + # Shrink the log buffer to length lines + shrunk_log = console_log.split('\n')[-lines:] + loglines = '\n'.join(shrunk_log) + + # Show it in the pager (less) + try: + pager = subprocess.Popen(['less', '-R'], stdin=subprocess.PIPE) + pager.communicate(input=loglines.encode('utf8')) + except FileNotFoundError: + return False, 'ERROR: The "less" pager is required to view console logs.' + + return True, '' + +def follow_console_log(zk_conn, domain, lines=10): + # Validate and obtain alternate passed value + dom_uuid = getDomainUUID(zk_conn, domain) + if not dom_uuid: + return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain) + + # Get the initial data from ZK + console_log = zkhandler.readdata(zk_conn, '/domains/{}/consolelog'.format(dom_uuid)) + + # Shrink the log buffer to length lines + shrunk_log = console_log.split('\n')[-lines:] + loglines = '\n'.join(shrunk_log) + + # Print the initial data and begin following + print(loglines, end='') + + while True: + # Grab the next line set + new_console_log = zkhandler.readdata(zk_conn, '/domains/{}/consolelog'.format(dom_uuid)) + # Split the new and old log strings into constitutent lines + old_console_loglines = console_log.split('\n') + new_console_loglines = new_console_log.split('\n') + # Set the console log to the new log value for the next iteration + console_log = new_console_log + # Remove the lines from the old log until we hit the first line of the new log; this + # ensures that the old log is a string that we can remove from the new log entirely + for index, line in enumerate(old_console_loglines, start=0): + if line == new_console_loglines[0]: + del old_console_loglines[0:index] + break + # Rejoin the log lines into strings + old_console_log = '\n'.join(old_console_loglines) + new_console_log = '\n'.join(new_console_loglines) + # Remove the old lines from the new log + diff_console_log = new_console_log.replace(old_console_log, "") + # If there's a difference, print it out + if diff_console_log != "": + print(diff_console_log, end='') + # Wait a second + time.sleep(1) + + return True, '' + def get_list(zk_conn, node, state, limit, raw): if node != None: # Verify node is valid diff --git a/client-common/zkhandler.py b/client-common/zkhandler.py index 6e7d0b8c..a07e57f4 100644 --- a/client-common/zkhandler.py +++ b/client-common/zkhandler.py @@ -45,7 +45,7 @@ def deletekey(zk_conn, key, recursive=True): # Data read function def readdata(zk_conn, key): data_raw = zk_conn.get(key) - data = data_raw[0].decode('ascii') + data = data_raw[0].decode('utf8') meta = data_raw[1] return data @@ -61,7 +61,7 @@ def writedata(zk_conn, kv): # Check if this key already exists or not if not zk_conn.exists(key): # We're creating a new key - zk_transaction.create(key, str(data).encode('ascii')) + zk_transaction.create(key, str(data).encode('utf8')) else: # We're updating a key with version validation orig_data = zk_conn.get(key) @@ -71,7 +71,7 @@ def writedata(zk_conn, kv): new_version = version + 1 # Update the data - zk_transaction.set_data(key, str(data).encode('ascii')) + zk_transaction.set_data(key, str(data).encode('utf8')) # Set up the check try: diff --git a/node-daemon/pvcd.sample.yaml b/node-daemon/pvcd.sample.yaml index da033858..e85e66d3 100644 --- a/node-daemon/pvcd.sample.yaml +++ b/node-daemon/pvcd.sample.yaml @@ -111,12 +111,16 @@ pvc: dynamic_directory: "/run/pvc" # log_directory: Logging directory log_directory: "/var/log/pvc" + # console_log_directory: Libvirt console logging directory + console_log_directory: "/var/log/libvirt" # logging: PVC logging configuration logging: # file_logging: Enable or disable logging to files under log_directory file_logging: True # stdout_logging: Enable or disable logging to stdout (i.e. journald) stdout_logging: True + # console_log_lines: Number of console log lines to store in Zookeeper per VM + console_log_lines: 1000 # networking: PVC networking configuration # OPTIONAL if enable_networking: False networking: diff --git a/node-daemon/pvcd/Daemon.py b/node-daemon/pvcd/Daemon.py index 44be8781..d0201dfc 100644 --- a/node-daemon/pvcd/Daemon.py +++ b/node-daemon/pvcd/Daemon.py @@ -141,8 +141,10 @@ def readConfig(pvcd_config_file, myhostname): 'enable_storage': o_config['pvc']['functions']['enable_storage'], 'dynamic_directory': o_config['pvc']['system']['configuration']['directories']['dynamic_directory'], 'log_directory': o_config['pvc']['system']['configuration']['directories']['log_directory'], + 'console_log_directory': o_config['pvc']['system']['configuration']['directories']['console_log_directory'], 'file_logging': o_config['pvc']['system']['configuration']['logging']['file_logging'], 'stdout_logging': o_config['pvc']['system']['configuration']['logging']['stdout_logging'], + 'console_log_lines': o_config['pvc']['system']['configuration']['logging']['console_log_lines'], 'keepalive_interval': o_config['pvc']['system']['fencing']['intervals']['keepalive_interval'], 'fence_intervals': o_config['pvc']['system']['fencing']['intervals']['fence_intervals'], 'suicide_intervals': o_config['pvc']['system']['fencing']['intervals']['suicide_intervals'], @@ -457,12 +459,28 @@ zk_conn.add_listener(zk_listener) # Cleanup function def cleanup(): - global zk_conn, update_timer + logger.out('Terminating pvcd and cleaning up', state='s') + + global zk_conn, update_timer, d_domains # Stop keepalive thread - stopKeepaliveTimer() + try: + stopKeepaliveTimer() + except NameError: + pass + except AttributeError: + pass - logger.out('Terminating pvcd and cleaning up', state='s') + # Stop console logging on all VMs + logger.out('Stopping domain console watchers', state='s') + for domain in d_domain: + if d_domain[domain].getnode() == myhostname: + try: + d_domain[domain].console_log_instance.stop() + except NameError as e: + pass + except AttributeError as e: + pass # Force into secondary network state if needed if zkhandler.readdata(zk_conn, '/nodes/{}/routerstate'.format(myhostname)) == 'primary': @@ -471,12 +489,7 @@ def cleanup(): '/nodes/{}/routerstate'.format(myhostname): 'secondary', '/primary_node': 'none' }) - else: - is_primary = False - - # Wait for things to flush - if is_primary: - logger.out('Waiting for primary migration', state='s') + logger.out('Waiting 3 seconds for primary migration', state='s') time.sleep(3) # Set stop state in Zookeeper @@ -493,14 +506,11 @@ def cleanup(): pass logger.out('Terminated pvc daemon', state='s') - -# Handle exit gracefully -atexit.register(cleanup) + sys.exit(0) # Termination function def term(signum='', frame=''): - # Exit - sys.exit(0) + cleanup() # Handle signals gracefully signal.signal(signal.SIGTERM, term) diff --git a/node-daemon/pvcd/DomainConsoleWatcherInstance.py b/node-daemon/pvcd/DomainConsoleWatcherInstance.py new file mode 100644 index 00000000..36376f1a --- /dev/null +++ b/node-daemon/pvcd/DomainConsoleWatcherInstance.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 + +# DomainConsoleWatcherInstance.py - Class implementing a console log watcher for PVC domains +# Part of the Parallel Virtual Cluster (PVC) system +# +# Copyright (C) 2018 Joshua M. Boniface +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +############################################################################### + +import os +import sys +import uuid +import time +import threading +import libvirt + +from collections import deque + +import fcntl +import signal + +import pvcd.log as log +import pvcd.zkhandler as zkhandler + +class DomainConsoleWatcherInstance(object): + # Initialization function + def __init__(self, domuuid, domname, zk_conn, config, logger, this_node): + self.domuuid = domuuid + self.domname = domname + self.zk_conn = zk_conn + self.config = config + self.logfile = '{}/{}.log'.format(config['console_log_directory'], self.domname) + self.console_log_lines = config['console_log_lines'] + self.logger = logger + self.this_node = this_node + + # Try to append (create) the logfile and set its permissions + open(self.logfile, 'a').close() + os.chmod(self.logfile, 0o600) + + self.logdeque = deque(open(self.logfile), self.console_log_lines) + + self.stamp = None + self.cached_stamp = None + + # Set up the deque with the current contents of the log + self.last_loglines = None + self.loglines = None + + # Thread options + self.thread = None + self.thread_stopper = threading.Event() + + # Start execution thread + def start(self): + self.thread_stopper.clear() + self.thread = threading.Thread(target=self.run, args=(), kwargs={}) + self.logger.out('Starting VM log parser', state='i', prefix='Domain {}:'.format(self.domuuid)) + self.thread.start() + + # Stop execution thread + def stop(self): + self.logger.out('Stopping VM log parser', state='i', prefix='Domain {}:'.format(self.domuuid)) + self.thread_stopper.set() + # Do one final flush + self.update() + + # Main entrypoint + def run(self): + # Main loop + while not self.thread_stopper.is_set(): + self.update() + time.sleep(0.5) + + def update(self): + self.stamp = os.stat(self.logfile).st_mtime + if self.stamp != self.cached_stamp: + self.cached_stamp = self.stamp + self.fetch_lines() + # Update Zookeeper with the new loglines if they changed + if self.loglines != self.last_loglines: + zkhandler.writedata(self.zk_conn, { '/domains/{}/consolelog'.format(self.domuuid): self.loglines }) + self.last_loglines = self.loglines + + def fetch_lines(self): + self.logdeque = deque(open(self.logfile), self.console_log_lines) + self.loglines = ''.join(self.logdeque) diff --git a/node-daemon/pvcd/DomainInstance.py b/node-daemon/pvcd/DomainInstance.py index 910f9a7e..32a1ece3 100644 --- a/node-daemon/pvcd/DomainInstance.py +++ b/node-daemon/pvcd/DomainInstance.py @@ -32,11 +32,14 @@ import kazoo.client import pvcd.log as log import pvcd.zkhandler as zkhandler +import pvcd.DomainConsoleWatcherInstance as DomainConsoleWatcherInstance + class DomainInstance(object): # Initialization function def __init__(self, domuuid, zk_conn, config, logger, this_node): # Passed-in variables on creation self.domuuid = domuuid + self.domname = zkhandler.readdata(zk_conn, '/domains/{}'.format(domuuid)) self.zk_conn = zk_conn self.config = config self.logger = logger @@ -52,8 +55,12 @@ class DomainInstance(object): self.inshutdown = False self.instop = False + # Libvirt domuuid self.dom = self.lookupByUUID(self.domuuid) + # Log watcher instance + self.console_log_instance = DomainConsoleWatcherInstance.DomainConsoleWatcherInstance(self.domuuid, self.domname, self.zk_conn, self.config, self.logger, self.this_node) + # Watch for changes to the state field in Zookeeper @self.zk_conn.DataWatch('/domains/{}/state'.format(self.domuuid)) def watch_state(data, stat, event=""): @@ -69,6 +76,7 @@ class DomainInstance(object): else: self.manage_vm_state() + # Get data functions def getstate(self): return self.state @@ -118,6 +126,9 @@ class DomainInstance(object): # Start up the VM def start_vm(self): + # Start the log watcher + self.console_log_instance.start() + self.logger.out('Starting VM', state='i', prefix='Domain {}:'.format(self.domuuid)) self.instart = True @@ -157,6 +168,7 @@ class DomainInstance(object): self.dom = None lv_conn.close() + self.instart = False # Restart the VM @@ -193,6 +205,9 @@ class DomainInstance(object): self.dom = None self.instop = False + # Stop the log watcher + self.console_log_instance.stop() + # Stop the VM forcibly def stop_vm(self): self.logger.out('Forcibly stopping VM', state='i', prefix='Domain {}:'.format(self.domuuid)) @@ -210,6 +225,9 @@ class DomainInstance(object): self.dom = None self.instop = False + # Stop the log watcher + self.console_log_instance.stop() + # Shutdown the VM gracefully def shutdown_vm(self): self.logger.out('Gracefully stopping VM', state='i', prefix='Domain {}:'.format(self.domuuid)) @@ -238,6 +256,9 @@ class DomainInstance(object): self.dom = None self.inshutdown = False + # Stop the log watcher + self.console_log_instance.stop() + def live_migrate_vm(self, dest_node): try: dest_lv_conn = libvirt.open('qemu+tcp://{}/system'.format(self.node)) @@ -245,7 +266,7 @@ class DomainInstance(object): raise except: self.logger.out('Failed to open connection to qemu+tcp://{}/system; aborting migration.'.format(self.node), state='e', prefix='Domain {}:'.format(self.domuuid)) - return 1 + return False try: target_dom = self.dom.migrate(dest_lv_conn, libvirt.VIR_MIGRATE_LIVE, None, None, 0) @@ -255,10 +276,10 @@ class DomainInstance(object): except: dest_lv_conn.close() - return 1 + return False dest_lv_conn.close() - return 0 + return True # Migrate the VM to a target host def migrate_vm(self): @@ -268,9 +289,9 @@ class DomainInstance(object): try: migrate_ret = self.live_migrate_vm(self.node) except: - migrate_ret = 0 + migrate_ret = True - if migrate_ret != 0: + if not migrate_ret: self.logger.out('Could not live migrate VM; shutting down to migrate instead', state='e', prefix='Domain {}:'.format(self.domuuid)) self.shutdown_vm() time.sleep(1) @@ -281,8 +302,14 @@ class DomainInstance(object): zkhandler.writedata(self.zk_conn, { '/domains/{}/state'.format(self.domuuid): 'start' }) self.inmigrate = False + # Stop the log watcher + self.console_log_instance.stop() + # Receive the migration from another host (wait until VM is running) def receive_migrate(self): + # Start the log watcher + self.console_log_instance.start() + self.inreceive = True self.logger.out('Receiving migration', state='i', prefix='Domain {}:'.format(self.domuuid)) while True: @@ -360,10 +387,16 @@ class DomainInstance(object): if running == libvirt.VIR_DOMAIN_RUNNING: # VM is already running and should be if self.state == "start": + # Start the log watcher + self.console_log_instance.start() + # Add domain to running list self.addDomainToList() # VM is already running and should be but stuck in migrate state elif self.state == "migrate": + # Start the log watcher + self.console_log_instance.start() zkhandler.writedata(self.zk_conn, { '/domains/{}/state'.format(self.domuuid): 'start' }) + # Add domain to running list self.addDomainToList() # VM should be restarted elif self.state == "restart": @@ -377,9 +410,11 @@ class DomainInstance(object): else: # VM should be started if self.state == "start": + # Start the domain self.start_vm() # VM should be migrated to this node elif self.state == "migrate": + # Receive the migration self.receive_migrate() # VM should be restarted (i.e. started since it isn't running) if self.state == "restart": @@ -387,9 +422,13 @@ class DomainInstance(object): # VM should be shut down; ensure it's gone from this node's domain_list elif self.state == "shutdown": self.removeDomainFromList() + # Stop the log watcher + self.console_log_instance.stop() # VM should be stoped; ensure it's gone from this node's domain_list elif self.state == "stop": self.removeDomainFromList() + # Stop the log watcher + self.console_log_instance.stop() else: # Conditional pass three - Is this VM currently running on this node diff --git a/node-daemon/pvcd/zkhandler.py b/node-daemon/pvcd/zkhandler.py index 279495cb..b1d351ce 100644 --- a/node-daemon/pvcd/zkhandler.py +++ b/node-daemon/pvcd/zkhandler.py @@ -37,7 +37,7 @@ def deletekey(zk_conn, key, recursive=True): # Data read function def readdata(zk_conn, key): data_raw = zk_conn.get(key) - data = data_raw[0].decode('ascii') + data = data_raw[0].decode('utf8') meta = data_raw[1] return data @@ -55,7 +55,7 @@ def writedata(zk_conn, kv): # Check if this key already exists or not if not zk_conn.exists(key): # We're creating a new key - zk_transaction.create(key, str(data).encode('ascii')) + zk_transaction.create(key, str(data).encode('utf8')) else: # We're updating a key with version validation orig_data = zk_conn.get(key) @@ -65,7 +65,7 @@ def writedata(zk_conn, kv): new_version = version + 1 # Update the data - zk_transaction.set_data(key, str(data).encode('ascii')) + zk_transaction.set_data(key, str(data).encode('utf8')) # Set up the check try: From 41d3e79187e575e3a263db28127344a5a5d76caf Mon Sep 17 00:00:00 2001 From: Joshua Boniface Date: Fri, 12 Apr 2019 11:18:53 -0400 Subject: [PATCH 08/13] Add pause between stop/start on restart --- node-daemon/pvcd/DomainInstance.py | 1 + 1 file changed, 1 insertion(+) diff --git a/node-daemon/pvcd/DomainInstance.py b/node-daemon/pvcd/DomainInstance.py index 32a1ece3..daa5c804 100644 --- a/node-daemon/pvcd/DomainInstance.py +++ b/node-daemon/pvcd/DomainInstance.py @@ -185,6 +185,7 @@ class DomainInstance(object): return self.shutdown_vm() + time.sleep(1) self.start_vm() self.addDomainToList() From ae1650759ba16f4c0d68b88c5c418386e30d12b0 Mon Sep 17 00:00:00 2001 From: Joshua Boniface Date: Fri, 12 Apr 2019 16:59:26 -0400 Subject: [PATCH 09/13] Add new endpoints --- client-api/pvcapi.py | 80 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 68 insertions(+), 12 deletions(-) diff --git a/client-api/pvcapi.py b/client-api/pvcapi.py index cec66116..7418cef0 100755 --- a/client-api/pvcapi.py +++ b/client-api/pvcapi.py @@ -21,6 +21,7 @@ ############################################################################### import flask +import json import client_lib.common as pvc_common import client_lib.node as pvc_node @@ -44,18 +45,20 @@ def api_node(): """ Manage the state of a node in the PVC cluster """ - return "Manage the state of a node in the PVC cluster", 209 + return "Manage the state of a node in the PVC cluster.\n", 209 @pvcapi.route('/api/v1/node/secondary', methods=['POST']) def api_node_secondary(): """ Take NODE out of primary router mode. """ - # Mandatory args - if 'node' in flask.request.args: - node = flask.request.args['node'] + # Get data from flask + data = json.loads(flask.request.data.decode('utf8')) + # Get node + if 'node' in data: + node = data['node'] else: - return "Error: No node provided. Please specify a node.", 510 + return "Error: No node provided. Please specify a node.\n", 510 zk_conn = pvc_common.startZKConnection(zk_host) retflag, retmsg = pvc_node.secondary_node(zk_conn, node) @@ -75,11 +78,13 @@ def api_node_primary(): """ Set NODE to primary router mode. """ - # Mandatory args - if 'node' in flask.request.args: - node = flask.request.args['node'] + # Get data from flask + data = json.loads(flask.request.data.decode('utf8')) + # Get node + if 'node' in data: + node = data['node'] else: - return "Error: No node provided. Please specify a node.", 510 + return "Error: No node provided. Please specify a node.\n", 510 zk_conn = pvc_common.startZKConnection(zk_host) retflag, retmsg = pvc_node.primary_node(zk_conn, node) @@ -94,9 +99,60 @@ def api_node_primary(): } return flask.jsonify(output), retcode -#@pvcapi.route('/api/v1/node/flush', methods=['POST']) -#@pvcapi.route('/api/v1/node/unflush', methods=['POST']) -#@pvcapi.route('/api/v1/node/ready', methods=['POST']) +@pvcapi.route('/api/v1/node/flush', methods=['POST']) +def api_node_flush(): + """ + Flush NODE of running VMs. + """ + # Get data from flask + data = json.loads(flask.request.data.decode('utf8')) + # Get node + if 'node' in data: + node = data['node'] + else: + return "Error: No node provided. Please specify a node.\n", 510 + + zk_conn = pvc_common.startZKConnection(zk_host) + retflag, retmsg = pvc_node.flush_node(zk_conn, node, False) + if retflag: + retcode = 200 + else: + retcode = 510 + + pvc_common.stopZKConnection(zk_conn) + output = { + 'message': retmsg, + } + return flask.jsonify(output), retcode + +@pvcapi.route('/api/v1/node/unflush', methods=['POST']) +@pvcapi.route('/api/v1/node/ready', methods=['POST']) +def api_node_ready(): + """ + Restore NODE to active service. + """ + # Get data from flask + data = json.loads(flask.request.data.decode('utf8')) + # Get node + if 'node' in data: + node = data['node'] + else: + return "Error: No node provided. Please specify a node.\n", 510 + + zk_conn = pvc_common.startZKConnection(zk_host) + retflag, retmsg = pvc_node.ready_node(zk_conn, node) + if retflag: + retcode = 200 + else: + retcode = 510 + + pvc_common.stopZKConnection(zk_conn) + output = { + 'message': retmsg, + } + return flask.jsonify(output), retcode + + #@pvcapi.route('/api/v1/node/info', methods=['GET']) #@pvcapi.route('/api/v1/node/list', methods=['GET']) # VM endpoints From 7416d440d5be33fed365cff359fc11be7ef3ff6c Mon Sep 17 00:00:00 2001 From: Joshua Boniface Date: Mon, 15 Apr 2019 18:24:00 -0400 Subject: [PATCH 10/13] Use zkhandler when writing initial node config --- node-daemon/pvcd/Daemon.py | 41 +++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/node-daemon/pvcd/Daemon.py b/node-daemon/pvcd/Daemon.py index d0201dfc..21e74151 100644 --- a/node-daemon/pvcd/Daemon.py +++ b/node-daemon/pvcd/Daemon.py @@ -530,28 +530,29 @@ if zk_conn.exists('/nodes/{}'.format(myhostname)): else: logger.out("Node is " + logger.fmt_red + "absent" + logger.fmt_end + " in Zookeeper; adding new node", state='i') keepalive_time = int(time.time()) - transaction = zk_conn.transaction() - transaction.create('/nodes/{}'.format(myhostname), config['daemon_mode'].encode('ascii')) + zkhander.writedata(zk_conn, { + '/nodes/{}'.format(myhostname): config['daemon_mode'].encode('ascii'), # Basic state information - transaction.create('/nodes/{}/daemonmode'.format(myhostname), config['daemon_mode'].encode('ascii')) - transaction.create('/nodes/{}/daemonstate'.format(myhostname), 'init'.encode('ascii')) - transaction.create('/nodes/{}/routerstate'.format(myhostname), 'client'.encode('ascii')) - transaction.create('/nodes/{}/domainstate'.format(myhostname), 'flushed'.encode('ascii')) - transaction.create('/nodes/{}/staticdata'.format(myhostname), ' '.join(staticdata).encode('ascii')) - transaction.create('/nodes/{}/memfree'.format(myhostname), '0'.encode('ascii')) - transaction.create('/nodes/{}/memused'.format(myhostname), '0'.encode('ascii')) - transaction.create('/nodes/{}/memalloc'.format(myhostname), '0'.encode('ascii')) - transaction.create('/nodes/{}/vcpualloc'.format(myhostname), '0'.encode('ascii')) - transaction.create('/nodes/{}/cpuload'.format(myhostname), '0.0'.encode('ascii')) - transaction.create('/nodes/{}/networkscount'.format(myhostname), '0'.encode('ascii')) - transaction.create('/nodes/{}/domainscount'.format(myhostname), '0'.encode('ascii')) - transaction.create('/nodes/{}/runningdomains'.format(myhostname), ''.encode('ascii')) + '/nodes/{}/daemonmode'.format(myhostname): config['daemon_mode'].encode('ascii'), + '/nodes/{}/daemonstate'.format(myhostname): 'init'.encode('ascii'), + '/nodes/{}/routerstate'.format(myhostname): 'client'.encode('ascii'), + '/nodes/{}/domainstate'.format(myhostname): 'flushed'.encode('ascii'), + '/nodes/{}/staticdata'.format(myhostname): ' '.join(staticdata).encode('ascii'), + '/nodes/{}/memtotal'.format(myhostname): '0'.encode('ascii'), + '/nodes/{}/memfree'.format(myhostname): '0'.encode('ascii'), + '/nodes/{}/memused'.format(myhostname): '0'.encode('ascii'), + '/nodes/{}/memalloc'.format(myhostname): '0'.encode('ascii'), + '/nodes/{}/vcpualloc'.format(myhostname): '0'.encode('ascii'), + '/nodes/{}/cpuload'.format(myhostname): '0.0'.encode('ascii'), + '/nodes/{}/networkscount'.format(myhostname): '0'.encode('ascii'), + '/nodes/{}/domainscount'.format(myhostname): '0'.encode('ascii'), + '/nodes/{}/runningdomains'.format(myhostname): ''.encode('ascii'), # Keepalives and fencing information - transaction.create('/nodes/{}/keepalive'.format(myhostname), str(keepalive_time).encode('ascii')) - transaction.create('/nodes/{}/ipmihostname'.format(myhostname), config['ipmi_hostname'].encode('ascii')) - transaction.create('/nodes/{}/ipmiusername'.format(myhostname), config['ipmi_username'].encode('ascii')) - transaction.create('/nodes/{}/ipmipassword'.format(myhostname), config['ipmi_password'].encode('ascii')) - transaction.commit() + '/nodes/{}/keepalive'.format(myhostname): str(keepalive_time).encode('ascii'), + '/nodes/{}/ipmihostname'.format(myhostname): config['ipmi_hostname'].encode('ascii'), + '/nodes/{}/ipmiusername'.format(myhostname): config['ipmi_username'].encode('ascii'), + '/nodes/{}/ipmipassword'.format(myhostname): config['ipmi_password'].encode('ascii') + }) # Check that the primary key exists, and create it with us as master if not try: From 2151566b747fad9ec4df667458acea53b8bbc448 Mon Sep 17 00:00:00 2001 From: Joshua Boniface Date: Mon, 15 Apr 2019 18:25:50 -0400 Subject: [PATCH 11/13] Send total memory via ZK so its accurate --- node-daemon/pvcd/Daemon.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/node-daemon/pvcd/Daemon.py b/node-daemon/pvcd/Daemon.py index 21e74151..a0763d1b 100644 --- a/node-daemon/pvcd/Daemon.py +++ b/node-daemon/pvcd/Daemon.py @@ -1033,6 +1033,7 @@ def update_zookeeper(): if debug: print("Set our information in zookeeper") #this_node.name = lv_conn.getHostname() + this_node.memtotal = int(psutil.virtual_memory().total / 1024 / 1024) this_node.memused = int(psutil.virtual_memory().used / 1024 / 1024) this_node.memfree = int(psutil.virtual_memory().free / 1024 / 1024) this_node.memalloc = memalloc @@ -1045,6 +1046,7 @@ def update_zookeeper(): keepalive_time = int(time.time()) try: zkhandler.writedata(zk_conn, { + '/nodes/{}/memtotal'.format(this_node.name): str(this_node.memtotal), '/nodes/{}/memused'.format(this_node.name): str(this_node.memused), '/nodes/{}/memfree'.format(this_node.name): str(this_node.memfree), '/nodes/{}/memalloc'.format(this_node.name): str(this_node.memalloc), From 045ad131afc7dbd875bc646065f0ce09a880128c Mon Sep 17 00:00:00 2001 From: Joshua Boniface Date: Fri, 10 May 2019 23:25:06 -0400 Subject: [PATCH 12/13] Update node.py to allow API use Addresses #33 --- client-cli/pvc.py | 20 +++- client-common/node.py | 265 +++++++++++++++++++----------------------- 2 files changed, 135 insertions(+), 150 deletions(-) diff --git a/client-cli/pvc.py b/client-cli/pvc.py index 4dff0075..c63cf4ef 100755 --- a/client-cli/pvc.py +++ b/client-cli/pvc.py @@ -30,6 +30,7 @@ import re import colorama import yaml +import client_lib.ansiprint as ansiprint import client_lib.common as pvc_common import client_lib.node as pvc_node import client_lib.vm as pvc_vm @@ -162,8 +163,16 @@ def node_info(node, long_output): """ zk_conn = pvc_common.startZKConnection(zk_host) - retcode, retmsg = pvc_node.get_info(zk_conn, node, long_output) - cleanup(retcode, retmsg, zk_conn) + retcode, retdata = pvc_node.get_info(zk_conn, node) + if retcode: + pvc_node.format_info(zk_conn, retdata, long_output) + if long_output: + click.echo('{}Virtual machines on node:{}'.format(ansiprint.bold(), ansiprint.end())) + click.echo('') + pvc_vm.get_list(zk_conn, node, None, None, None) + click.echo('') + retdata = '' + cleanup(retcode, retdata, zk_conn) ############################################################################### # pvc node list @@ -178,8 +187,11 @@ def node_list(limit): """ zk_conn = pvc_common.startZKConnection(zk_host) - retcode, retmsg = pvc_node.get_list(zk_conn, limit) - cleanup(retcode, retmsg, zk_conn) + retcode, retdata = pvc_node.get_list(zk_conn, limit) + if retcode: + pvc_node.format_list(retdata) + retdata = '' + cleanup(retcode, retdata, zk_conn) ############################################################################### # pvc vm diff --git a/client-common/node.py b/client-common/node.py index 000f33e8..154c3a58 100644 --- a/client-common/node.py +++ b/client-common/node.py @@ -39,7 +39,10 @@ import client_lib.zkhandler as zkhandler import client_lib.common as common import client_lib.vm as pvc_vm -def getInformationFromNode(zk_conn, node_name, long_output): +def getInformationFromNode(zk_conn, node_name): + """ + Gather information about a node from the Zookeeper database and return a dict() containing it. + """ node_daemon_state = zkhandler.readdata(zk_conn, '/nodes/{}/daemonstate'.format(node_name)) node_coordinator_state = zkhandler.readdata(zk_conn, '/nodes/{}/routerstate'.format(node_name)) node_domain_state = zkhandler.readdata(zk_conn, '/nodes/{}/domainstate'.format(node_name)) @@ -48,63 +51,36 @@ def getInformationFromNode(zk_conn, node_name, long_output): node_kernel = node_static_data[1] node_os = node_static_data[2] node_arch = node_static_data[3] + node_mem_total = int(zkhandler.readdata(zk_conn, '/nodes/{}/memtotal'.format(node_name))) node_mem_allocated = int(zkhandler.readdata(zk_conn, '/nodes/{}/memalloc'.format(node_name))) node_mem_used = int(zkhandler.readdata(zk_conn, '/nodes/{}/memused'.format(node_name))) node_mem_free = int(zkhandler.readdata(zk_conn, '/nodes/{}/memfree'.format(node_name))) - node_mem_total = node_mem_used + node_mem_free node_load = zkhandler.readdata(zk_conn, '/nodes/{}/cpuload'.format(node_name)) node_domains_count = zkhandler.readdata(zk_conn, '/nodes/{}/domainscount'.format(node_name)) node_running_domains = zkhandler.readdata(zk_conn, '/nodes/{}/runningdomains'.format(node_name)).split() - if node_daemon_state == 'run': - daemon_state_colour = ansiprint.green() - elif node_daemon_state == 'stop': - daemon_state_colour = ansiprint.red() - elif node_daemon_state == 'init': - daemon_state_colour = ansiprint.yellow() - elif node_daemon_state == 'dead': - daemon_state_colour = ansiprint.red() + ansiprint.bold() - else: - daemon_state_colour = ansiprint.blue() + # Construct a data structure to represent the data + node_information = { + 'name': node_name, + 'daemon_state': node_daemon_state, + 'coordinator_state': node_coordinator_state, + 'domain_state': node_domain_state, + 'cpu_count': node_cpu_count, + 'kernel': node_kernel, + 'os': node_os, + 'arch': node_arch, + 'load': node_load, + 'domains_count': node_domains_count, + 'running_domains': node_running_domains, + 'memory': { + 'total': node_mem_total, + 'allocated': node_mem_allocated, + 'used': node_mem_used, + 'free': node_mem_free + } + } - if node_coordinator_state == 'primary': - coordinator_state_colour = ansiprint.green() - elif node_coordinator_state == 'secondary': - coordinator_state_colour = ansiprint.blue() - else: - coordinator_state_colour = ansiprint.purple() - - if node_domain_state == 'ready': - domain_state_colour = ansiprint.green() - else: - domain_state_colour = ansiprint.blue() - - # Format a nice output; do this line-by-line then concat the elements at the end - ainformation = [] - ainformation.append('{}Node information:{}'.format(ansiprint.bold(), ansiprint.end())) - ainformation.append('') - # Basic information - ainformation.append('{}Name:{} {}'.format(ansiprint.purple(), ansiprint.end(), node_name)) - ainformation.append('{}Daemon State:{} {}{}{}'.format(ansiprint.purple(), ansiprint.end(), daemon_state_colour, node_daemon_state, ansiprint.end())) - ainformation.append('{}Coordinator State:{} {}{}{}'.format(ansiprint.purple(), ansiprint.end(), coordinator_state_colour, node_coordinator_state, ansiprint.end())) - ainformation.append('{}Domain State:{} {}{}{}'.format(ansiprint.purple(), ansiprint.end(), domain_state_colour, node_domain_state, ansiprint.end())) - ainformation.append('{}Active VM Count:{} {}'.format(ansiprint.purple(), ansiprint.end(), node_domains_count)) - if long_output == True: - ainformation.append('') - ainformation.append('{}Architecture:{} {}'.format(ansiprint.purple(), ansiprint.end(), node_arch)) - ainformation.append('{}Operating System:{} {}'.format(ansiprint.purple(), ansiprint.end(), node_os)) - ainformation.append('{}Kernel Version:{} {}'.format(ansiprint.purple(), ansiprint.end(), node_kernel)) - ainformation.append('') - ainformation.append('{}CPUs:{} {}'.format(ansiprint.purple(), ansiprint.end(), node_cpu_count)) - ainformation.append('{}Load:{} {}'.format(ansiprint.purple(), ansiprint.end(), node_load)) - ainformation.append('{}Total RAM (MiB):{} {}'.format(ansiprint.purple(), ansiprint.end(), node_mem_total)) - ainformation.append('{}Used RAM (MiB):{} {}'.format(ansiprint.purple(), ansiprint.end(), node_mem_used)) - ainformation.append('{}Free RAM (MiB):{} {}'.format(ansiprint.purple(), ansiprint.end(), node_mem_free)) - ainformation.append('{}Allocated RAM (MiB):{} {}'.format(ansiprint.purple(), ansiprint.end(), node_mem_allocated)) - - # Join it all together - information = '\n'.join(ainformation) - return information + return node_information # # Direct Functions @@ -193,29 +169,17 @@ def ready_node(zk_conn, node): return True, retmsg -def get_info(zk_conn, node, long_output): +def get_info(zk_conn, node): # Verify node is valid if not common.verifyNode(zk_conn, node): return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node) # Get information about node in a pretty format - information = getInformationFromNode(zk_conn, node, long_output) - - if information == None: + node_information = getInformationFromNode(zk_conn, node) + if node_information == None: return False, 'ERROR: Could not find a node matching that name.' - click.echo(information) - - if long_output == True: - click.echo('') - click.echo('{}Virtual machines on node:{}'.format(ansiprint.bold(), ansiprint.end())) - click.echo('') - # List all VMs on this node - pvc_vm.get_list(zk_conn, node, None) - - click.echo('') - - return True, '' + return True, node_information def get_list(zk_conn, limit): # Match our limit @@ -231,41 +195,77 @@ def get_list(zk_conn, limit): limit = limit + '.*' if re.match(limit, node) != None: - node_list.append(node) + node_list.append(getInformationFromNode(zk_conn, node)) except Exception as e: return False, 'Regex Error: {}'.format(e) else: - node_list.append(node) + node_list.append(getInformationFromNode(zk_conn, node)) + return True, node_list + +# +# CLI-specific functions +# +def getOutputColours(node_information): + if node_information['daemon_state'] == 'run': + daemon_state_colour = ansiprint.green() + elif node_information['daemon_state'] == 'stop': + daemon_state_colour = ansiprint.red() + elif node_information['daemon_state'] == 'init': + daemon_state_colour = ansiprint.yellow() + elif node_information['daemon_state'] == 'dead': + daemon_state_colour = ansiprint.red() + ansiprint.bold() + else: + daemon_state_colour = ansiprint.blue() + + if node_information['coordinator_state'] == 'primary': + coordinator_state_colour = ansiprint.green() + elif node_information['coordinator_state'] == 'secondary': + coordinator_state_colour = ansiprint.blue() + else: + coordinator_state_colour = ansiprint.purple() + + if node_information['domain_state'] == 'ready': + domain_state_colour = ansiprint.green() + else: + domain_state_colour = ansiprint.blue() + + return daemon_state_colour, coordinator_state_colour, domain_state_colour + +def format_info(zk_conn, node_information, long_output): + daemon_state_colour, coordinator_state_colour, domain_state_colour = getOutputColours(node_information) + + # Format a nice output; do this line-by-line then concat the elements at the end + ainformation = [] + # Basic information + ainformation.append('{}Name:{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['name'])) + ainformation.append('{}Daemon State:{} {}{}{}'.format(ansiprint.purple(), ansiprint.end(), daemon_state_colour, node_information['daemon_state'], ansiprint.end())) + ainformation.append('{}Coordinator State:{} {}{}{}'.format(ansiprint.purple(), ansiprint.end(), coordinator_state_colour, node_information['coordinator_state'], ansiprint.end())) + ainformation.append('{}Domain State:{} {}{}{}'.format(ansiprint.purple(), ansiprint.end(), domain_state_colour, node_information['domain_state'], ansiprint.end())) + ainformation.append('{}Active VM Count:{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['domains_count'])) + if long_output: + ainformation.append('') + ainformation.append('{}Architecture:{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['arch'])) + ainformation.append('{}Operating System:{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['os'])) + ainformation.append('{}Kernel Version:{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['kernel'])) + ainformation.append('') + ainformation.append('{}CPUs:{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['cpu_count'])) + ainformation.append('{}Load:{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['load'])) + ainformation.append('{}Total RAM (MiB):{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['memory']['total'])) + ainformation.append('{}Used RAM (MiB):{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['memory']['used'])) + ainformation.append('{}Free RAM (MiB):{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['memory']['free'])) + ainformation.append('{}Allocated RAM (MiB):{} {}'.format(ansiprint.purple(), ansiprint.end(), node_information['memory']['allocated'])) + + # Join it all together + information = '\n'.join(ainformation) + click.echo(information) + + click.echo('') + +def format_list(node_list): node_list_output = [] - node_daemon_state = {} - node_coordinator_state = {} - node_domain_state = {} - node_cpu_count = {} - node_mem_used = {} - node_mem_free = {} - node_mem_total = {} - node_mem_allocated = {} - node_domains_count = {} - node_running_domains = {} - node_load = {} - - # Gather information for printing - for node_name in node_list: - node_daemon_state[node_name] = zkhandler.readdata(zk_conn, '/nodes/{}/daemonstate'.format(node_name)) - node_coordinator_state[node_name] = zkhandler.readdata(zk_conn, '/nodes/{}/routerstate'.format(node_name)) - node_domain_state[node_name] = zkhandler.readdata(zk_conn, '/nodes/{}/domainstate'.format(node_name)) - node_cpu_count[node_name] = zkhandler.readdata(zk_conn, '/nodes/{}/staticdata'.format(node_name)).split()[0] - node_mem_allocated[node_name] = int(zkhandler.readdata(zk_conn, '/nodes/{}/memalloc'.format(node_name))) - node_mem_used[node_name] = int(zkhandler.readdata(zk_conn, '/nodes/{}/memused'.format(node_name))) - node_mem_free[node_name] = int(zkhandler.readdata(zk_conn, '/nodes/{}/memfree'.format(node_name))) - node_mem_total[node_name] = node_mem_used[node_name] + node_mem_free[node_name] - node_load[node_name] = zkhandler.readdata(zk_conn, '/nodes/{}/cpuload'.format(node_name)) - node_domains_count[node_name] = zkhandler.readdata(zk_conn, '/nodes/{}/domainscount'.format(node_name)) - node_running_domains[node_name] = zkhandler.readdata(zk_conn, '/nodes/{}/runningdomains'.format(node_name)).split() # Determine optimal column widths - # Dynamic columns: node_name, daemon_state, network_state, domain_state, load node_name_length = 5 daemon_state_length = 7 coordinator_state_length = 12 @@ -277,49 +277,49 @@ def get_list(zk_conn, limit): mem_used_length = 5 mem_free_length = 5 mem_alloc_length = 4 - for node_name in node_list: + for node_information in node_list: # node_name column - _node_name_length = len(node_name) + 1 + _node_name_length = len(node_information['name']) + 1 if _node_name_length > node_name_length: node_name_length = _node_name_length # daemon_state column - _daemon_state_length = len(node_daemon_state[node_name]) + 1 + _daemon_state_length = len(node_information['daemon_state']) + 1 if _daemon_state_length > daemon_state_length: daemon_state_length = _daemon_state_length # coordinator_state column - _coordinator_state_length = len(node_coordinator_state[node_name]) + 1 + _coordinator_state_length = len(node_information['coordinator_state']) + 1 if _coordinator_state_length > coordinator_state_length: coordinator_state_length = _coordinator_state_length # domain_state column - _domain_state_length = len(node_domain_state[node_name]) + 1 + _domain_state_length = len(node_information['domain_state']) + 1 if _domain_state_length > domain_state_length: domain_state_length = _domain_state_length # domains_count column - _domains_count_length = len(node_domains_count[node_name]) + 1 + _domains_count_length = len(node_information['domains_count']) + 1 if _domains_count_length > domains_count_length: domains_count_length = _domains_count_length # cpu_count column - _cpu_count_length = len(node_cpu_count[node_name]) + 1 + _cpu_count_length = len(node_information['cpu_count']) + 1 if _cpu_count_length > cpu_count_length: cpu_count_length = _cpu_count_length # load column - _load_length = len(node_load[node_name]) + 1 + _load_length = len(node_information['load']) + 1 if _load_length > load_length: load_length = _load_length # mem_total column - _mem_total_length = len(str(node_mem_total[node_name])) + 1 + _mem_total_length = len(str(node_information['memory']['total'])) + 1 if _mem_total_length > mem_total_length: mem_total_length = _mem_total_length # mem_used column - _mem_used_length = len(str(node_mem_used[node_name])) + 1 + _mem_used_length = len(str(node_information['memory']['used'])) + 1 if _mem_used_length > mem_used_length: mem_used_length = _mem_used_length # mem_free column - _mem_free_length = len(str(node_mem_free[node_name])) + 1 + _mem_free_length = len(str(node_information['memory']['free'])) + 1 if _mem_free_length > mem_free_length: mem_free_length = _mem_free_length # mem_alloc column - _mem_alloc_length = len(str(node_mem_allocated[node_name])) + 1 + _mem_alloc_length = len(str(node_information['memory']['allocated'])) + 1 if _mem_alloc_length > mem_alloc_length: mem_alloc_length = _mem_alloc_length @@ -361,33 +361,8 @@ Mem (M): {node_mem_total: <{mem_total_length}} {node_mem_used: <{mem_used_length ) # Format the string (elements) - for node_name in node_list: - if node_daemon_state[node_name] == 'run': - daemon_state_colour = ansiprint.green() - elif node_daemon_state[node_name] == 'stop': - daemon_state_colour = ansiprint.red() - elif node_daemon_state[node_name] == 'init': - daemon_state_colour = ansiprint.yellow() - elif node_daemon_state[node_name] == 'dead': - daemon_state_colour = ansiprint.red() + ansiprint.bold() - else: - daemon_state_colour = ansiprint.blue() - - if node_coordinator_state[node_name] == 'primary': - coordinator_state_colour = ansiprint.green() - elif node_coordinator_state[node_name] == 'secondary': - coordinator_state_colour = ansiprint.blue() - else: - coordinator_state_colour = ansiprint.purple() - - if node_mem_allocated[node_name] != 0 and node_mem_allocated[node_name] >= node_mem_total[node_name]: - node_domain_state[node_name] = 'overprov' - domain_state_colour = ansiprint.yellow() - elif node_domain_state[node_name] == 'ready': - domain_state_colour = ansiprint.green() - else: - domain_state_colour = ansiprint.blue() - + for node_information in node_list: + daemon_state_colour, coordinator_state_colour, domain_state_colour = getOutputColours(node_information) node_list_output.append( '{bold}{node_name: <{node_name_length}} \ {daemon_state_colour}{node_daemon_state: <{daemon_state_length}}{end_colour} {coordinator_state_colour}{node_coordinator_state: <{coordinator_state_length}}{end_colour} {domain_state_colour}{node_domain_state: <{domain_state_length}}{end_colour} \ @@ -410,20 +385,18 @@ Mem (M): {node_mem_total: <{mem_total_length}} {node_mem_used: <{mem_used_length coordinator_state_colour=coordinator_state_colour, domain_state_colour=domain_state_colour, end_colour=ansiprint.end(), - node_name=node_name, - node_daemon_state=node_daemon_state[node_name], - node_coordinator_state=node_coordinator_state[node_name], - node_domain_state=node_domain_state[node_name], - node_domains_count=node_domains_count[node_name], - node_cpu_count=node_cpu_count[node_name], - node_load=node_load[node_name], - node_mem_total=node_mem_total[node_name], - node_mem_used=node_mem_used[node_name], - node_mem_free=node_mem_free[node_name], - node_mem_allocated=node_mem_allocated[node_name] + node_name=node_information['name'], + node_daemon_state=node_information['daemon_state'], + node_coordinator_state=node_information['coordinator_state'], + node_domain_state=node_information['domain_state'], + node_domains_count=node_information['domains_count'], + node_cpu_count=node_information['cpu_count'], + node_load=node_information['load'], + node_mem_total=node_information['memory']['total'], + node_mem_used=node_information['memory']['used'], + node_mem_free=node_information['memory']['free'], + node_mem_allocated=node_information['memory']['allocated'] ) ) click.echo('\n'.join(sorted(node_list_output))) - - return True, '' From c19902d952da4bdaf9e3594a73bd2c45077a4e84 Mon Sep 17 00:00:00 2001 From: Joshua Boniface Date: Fri, 10 May 2019 23:52:24 -0400 Subject: [PATCH 13/13] Implement flush locking for nodes Implements a locking mechanism to prevent clobbering of node flushes. When a flush begins, a global cluster lock is placed which is freed once the flush completes. While the lock is in place, other flush events queue waiting for the lock to free before proceeding. Modifies the CLI output flow when the `--wait` option is specified. First, if a lock exists when running the command, the message is tweaked to indicate this, and the client will wait first for the lock to free, and then for the flush as normal. Second, the wait depends on the active lock rather than the domain_status for consistency purposes. Closes #32 --- client-cli/pvc_init.py | 2 ++ client-common/node.py | 23 ++++++++++++++++------- node-daemon/pvcd/NodeInstance.py | 16 ++++++++++++++++ 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/client-cli/pvc_init.py b/client-cli/pvc_init.py index 154c9d0b..7cd93f08 100755 --- a/client-cli/pvc_init.py +++ b/client-cli/pvc_init.py @@ -650,6 +650,8 @@ def init_zookeeper(zk_host): transaction.create('/ceph', ''.encode('ascii')) transaction.create('/ceph/osds', ''.encode('ascii')) transaction.create('/ceph/pools', ''.encode('ascii')) + transaction.create('/locks', ''.encode('ascii')) + transaction.create('/locks/flush_lock', 'False'.encode('ascii')) transaction.commit() # Close the Zookeeper connection diff --git a/client-common/node.py b/client-common/node.py index 154c3a58..7b0f44a4 100644 --- a/client-common/node.py +++ b/client-common/node.py @@ -134,24 +134,33 @@ def flush_node(zk_conn, node, wait): if not common.verifyNode(zk_conn, node): return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node) - retmsg = 'Flushing hypervisor {} of running VMs.'.format(node) - + if zkhandler.readdata(zk_conn, '/locks/flush_lock') == 'True': + retmsg = 'Flushing hypervisor {} of running VMs. A flush lock currently exists; flush will continue once the lock is freed.'.format(node) + lock_wait = True + else: + retmsg = 'Flushing hypervisor {} of running VMs.'.format(node) + lock_wait = False + # Wait cannot be triggered from the API if wait: click.echo(retmsg) retmsg = "" + if lock_wait: + time.sleep(1) + while zkhandler.readdata(zk_conn, '/locks/flush_lock') == 'True': + time.sleep(1) + click.echo('Previous flush completed. Proceeding with flush.') # Add the new domain to Zookeeper zkhandler.writedata(zk_conn, { '/nodes/{}/domainstate'.format(node): 'flush' }) - if wait == True: - while True: + # Wait cannot be triggered from the API + if wait: + time.sleep(1) + while zkhandler.readdata(zk_conn, '/locks/flush_lock') == 'True': time.sleep(1) - node_state = zkhandler.readdata(zk_conn, '/nodes/{}/domainstate'.format(node)) - if node_state == "flushed": - break return True, retmsg diff --git a/node-daemon/pvcd/NodeInstance.py b/node-daemon/pvcd/NodeInstance.py index 5ac366e2..839e1299 100644 --- a/node-daemon/pvcd/NodeInstance.py +++ b/node-daemon/pvcd/NodeInstance.py @@ -316,6 +316,17 @@ class NodeInstance(object): # Flush all VMs on the host def flush(self): + # Wait indefinitely for the flush_lock to be freed + time.sleep(0.5) + while zkhandler.readdata(self.zk_conn, '/locks/flush_lock') == 'True': + time.sleep(2) + + # Acquire the flush lock + zkhandler.writedata(self.zk_conn, { + '/locks/flush_lock'.format(node): 'True' + }) + + # Begin flush self.inflush = True self.logger.out('Flushing node "{}" of running VMs'.format(self.name), state='i') self.logger.out('Domain list: {}'.format(', '.join(self.domain_list))) @@ -347,6 +358,11 @@ class NodeInstance(object): zkhandler.writedata(self.zk_conn, { '/nodes/{}/domainstate'.format(self.name): 'flushed' }) self.inflush = False + # Release the flush lock + zkhandler.writedata(self.zk_conn, { + '/locks/flush_lock'.format(node): 'False' + }) + def unflush(self): self.inflush = True self.logger.out('Restoring node {} to active service.'.format(self.name), state='i')