From 4ac020888b04eb10cbe2095a0f789ab5fbca91ed Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Wed, 14 Jul 2021 15:02:03 -0400 Subject: [PATCH 01/13] Add some tag tests to test-cluster.sh --- test-cluster.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test-cluster.sh b/test-cluster.sh index 0bc05990..d9c9988c 100755 --- a/test-cluster.sh +++ b/test-cluster.sh @@ -38,7 +38,7 @@ sleep 30 _pvc vm stop --yes testX _pvc vm disable testX _pvc vm undefine --yes testX -_pvc vm define --target hv3 ${vm_tmp} +_pvc vm define --target hv3 --tag pvc-test ${vm_tmp} _pvc vm start testX sleep 30 _pvc vm restart --yes --wait testX @@ -50,6 +50,10 @@ sleep 5 _pvc vm move --wait --target hv1 testX sleep 5 _pvc vm meta testX --limit hv1 --selector vms --method live --profile test --no-autostart +_pvc vm tag add testX mytag +_pvc vm tag get testX +_pvc vm list --tag mytag +_pvc vm tag remove testX mytag _pvc vm network get testX _pvc vm vcpu set testX 4 _pvc vm vcpu get testX From cd1db3d5872b0a1afa69b60dcef3ce806299bdb9 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Sun, 18 Jul 2021 16:38:58 -0400 Subject: [PATCH 02/13] Ensure node name is part of confing --- node-daemon/pvcnoded/Daemon.py | 1 + 1 file changed, 1 insertion(+) diff --git a/node-daemon/pvcnoded/Daemon.py b/node-daemon/pvcnoded/Daemon.py index c96c04e2..4423f688 100644 --- a/node-daemon/pvcnoded/Daemon.py +++ b/node-daemon/pvcnoded/Daemon.py @@ -147,6 +147,7 @@ def readConfig(pvcnoded_config_file, myhostname): # Handle the basic config (hypervisor-only) try: config_general = { + 'node': o_config['pvc']['node'], 'coordinators': o_config['pvc']['cluster']['coordinators'], 'enable_hypervisor': o_config['pvc']['functions']['enable_hypervisor'], 'enable_networking': o_config['pvc']['functions']['enable_networking'], From 323c7c41aedd0051c7a1a574a14edc2289c6ff11 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Sun, 18 Jul 2021 17:03:57 -0400 Subject: [PATCH 03/13] Implement node logging into Zookeeper Adds the ability to send node daemon logs to Zookeeper to facilitate a command like "pvc node log", similar to "pvc vm log". Each node stores its logs in a separate tree under "/logs" which can then be combined or queried. By default, set by config, only 2000 lines are kept. --- daemon-common/log.py | 62 ++++++++++++++++++++++-- daemon-common/migrations/versions/4.json | 1 + daemon-common/zkhandler.py | 12 +++-- node-daemon/pvcnoded.sample.yaml | 6 ++- node-daemon/pvcnoded/Daemon.py | 2 + 5 files changed, 76 insertions(+), 7 deletions(-) create mode 100644 daemon-common/migrations/versions/4.json diff --git a/daemon-common/log.py b/daemon-common/log.py index 9451a8bd..dc856b94 100644 --- a/daemon-common/log.py +++ b/daemon-common/log.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# log.py - Output (stdout + logfile) functions +# log.py - PVC daemon logger functions # Part of the Parallel Virtual Cluster (PVC) system # # Copyright (C) 2018-2021 Joshua M. Boniface @@ -19,7 +19,12 @@ # ############################################################################### -import datetime +from collections import deque +from threading import Thread +from queue import Queue +from datetime import datetime + +from daemon_lib.zkhandler import ZKHandler class Logger(object): @@ -77,6 +82,10 @@ class Logger(object): self.last_colour = '' self.last_prompt = '' + if self.config['zookeeper_logging']: + self.zookeeper_logger = ZookeeperLogger(config) + self.zookeeper_logger.start() + # Provide a hup function to close and reopen the writer def hup(self): self.writer.close() @@ -87,7 +96,7 @@ class Logger(object): # Get the date if self.config['log_dates']: - date = '{} - '.format(datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S.%f')) + date = '{} - '.format(datetime.now().strftime('%Y/%m/%d %H:%M:%S.%f')) else: date = '' @@ -123,6 +132,53 @@ class Logger(object): if self.config['file_logging']: self.writer.write(message + '\n') + # Log to Zookeeper + if self.config['zookeeper_logging']: + self.zookeeper_logger.queue.put(message) + # Set last message variables self.last_colour = colour self.last_prompt = prompt + + +class ZookeeperLogger(Thread): + """ + Defines a threaded writer for Zookeeper locks. Threading prevents the blocking of other + daemon events while the records are written. They will be eventually-consistent + """ + def __init__(self, config): + self.config = config + self.node = self.config['node'] + self.max_lines = self.config['node_log_lines'] + self.queue = Queue() + self.zkhandler = None + self.start_zkhandler() + self.zkhandler.write([(('logs', self.node), '')]) + Thread.__init__(self, args=(), kwargs=None) + + def start_zkhandler(self): + # We must open our own dedicated Zookeeper instance because we can't guarantee one already exists when this starts + if self.zkhandler is not None: + try: + self.zkhandler.disconnect() + except Exception: + pass + self.zkhandler = ZKHandler(self.config, logger=None) + self.zkhandler.connect(persistent=True) + + def run(self): + while True: + message = self.queue.get() + self.write_log(message) + + def write_log(self, message): + # Log to Zookeeper + with self.zkhandler.writelock(('logs.messages', self.node)): + logs = deque(self.zkhandler.read(('logs.messages', self.node)).split('\n'), self.max_lines) + if not self.config['log_dates']: + # We want to log dates here, even if the log_dates config is not set + date = '{} - '.format(datetime.now().strftime('%Y/%m/%d %H:%M:%S.%f')) + else: + date = '' + logs.append(f'{date}{message}') + self.zkhandler.write([(('logs.messages', self.node), '\n'.join(logs))]) diff --git a/daemon-common/migrations/versions/4.json b/daemon-common/migrations/versions/4.json new file mode 100644 index 00000000..951950ca --- /dev/null +++ b/daemon-common/migrations/versions/4.json @@ -0,0 +1 @@ +{"version": "4", "root": "", "base": {"root": "", "schema": "/schema", "schema.version": "/schema/version", "config": "/config", "config.maintenance": "/config/maintenance", "config.primary_node": "/config/primary_node", "config.primary_node.sync_lock": "/config/primary_node/sync_lock", "config.upstream_ip": "/config/upstream_ip", "config.migration_target_selector": "/config/migration_target_selector", "cmd": "/cmd", "cmd.node": "/cmd/nodes", "cmd.domain": "/cmd/domains", "cmd.ceph": "/cmd/ceph", "logs": "/logs", "node": "/nodes", "domain": "/domains", "network": "/networks", "storage": "/ceph", "storage.util": "/ceph/util", "osd": "/ceph/osds", "pool": "/ceph/pools", "volume": "/ceph/volumes", "snapshot": "/ceph/snapshots"}, "logs": {"node": "", "messages": "/messages"}, "node": {"name": "", "keepalive": "/keepalive", "mode": "/daemonmode", "data.active_schema": "/activeschema", "data.latest_schema": "/latestschema", "data.static": "/staticdata", "data.pvc_version": "/pvcversion", "running_domains": "/runningdomains", "count.provisioned_domains": "/domainscount", "count.networks": "/networkscount", "state.daemon": "/daemonstate", "state.router": "/routerstate", "state.domain": "/domainstate", "cpu.load": "/cpuload", "vcpu.allocated": "/vcpualloc", "memory.total": "/memtotal", "memory.used": "/memused", "memory.free": "/memfree", "memory.allocated": "/memalloc", "memory.provisioned": "/memprov", "ipmi.hostname": "/ipmihostname", "ipmi.username": "/ipmiusername", "ipmi.password": "/ipmipassword", "sriov": "/sriov", "sriov.pf": "/sriov/pf", "sriov.vf": "/sriov/vf"}, "sriov_pf": {"phy": "", "mtu": "/mtu", "vfcount": "/vfcount"}, "sriov_vf": {"phy": "", "pf": "/pf", "mtu": "/mtu", "mac": "/mac", "phy_mac": "/phy_mac", "config": "/config", "config.vlan_id": "/config/vlan_id", "config.vlan_qos": "/config/vlan_qos", "config.tx_rate_min": "/config/tx_rate_min", "config.tx_rate_max": "/config/tx_rate_max", "config.spoof_check": "/config/spoof_check", "config.link_state": "/config/link_state", "config.trust": "/config/trust", "config.query_rss": "/config/query_rss", "pci": "/pci", "pci.domain": "/pci/domain", "pci.bus": "/pci/bus", "pci.slot": "/pci/slot", "pci.function": "/pci/function", "used": "/used", "used_by": "/used_by"}, "domain": {"name": "", "xml": "/xml", "state": "/state", "profile": "/profile", "stats": "/stats", "node": "/node", "last_node": "/lastnode", "failed_reason": "/failedreason", "storage.volumes": "/rbdlist", "console.log": "/consolelog", "console.vnc": "/vnc", "meta.autostart": "/node_autostart", "meta.migrate_method": "/migration_method", "meta.node_selector": "/node_selector", "meta.node_limit": "/node_limit", "meta.tags": "/tags", "migrate.sync_lock": "/migrate_sync_lock"}, "tag": {"name": "", "type": "/type", "protected": "/protected"}, "network": {"vni": "", "type": "/nettype", "rule": "/firewall_rules", "rule.in": "/firewall_rules/in", "rule.out": "/firewall_rules/out", "nameservers": "/name_servers", "domain": "/domain", "reservation": "/dhcp4_reservations", "lease": "/dhcp4_leases", "ip4.gateway": "/ip4_gateway", "ip4.network": "/ip4_network", "ip4.dhcp": "/dhcp4_flag", "ip4.dhcp_start": "/dhcp4_start", "ip4.dhcp_end": "/dhcp4_end", "ip6.gateway": "/ip6_gateway", "ip6.network": "/ip6_network", "ip6.dhcp": "/dhcp6_flag"}, "reservation": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname"}, "lease": {"mac": "", "ip": "/ipaddr", "hostname": "/hostname", "expiry": "/expiry", "client_id": "/clientid"}, "rule": {"description": "", "rule": "/rule", "order": "/order"}, "osd": {"id": "", "node": "/node", "device": "/device", "stats": "/stats"}, "pool": {"name": "", "pgs": "/pgs", "stats": "/stats"}, "volume": {"name": "", "stats": "/stats"}, "snapshot": {"name": "", "stats": "/stats"}} \ No newline at end of file diff --git a/daemon-common/zkhandler.py b/daemon-common/zkhandler.py index c0af11cf..90ed324e 100644 --- a/daemon-common/zkhandler.py +++ b/daemon-common/zkhandler.py @@ -466,7 +466,7 @@ class ZKHandler(object): # class ZKSchema(object): # Current version - _version = 3 + _version = 4 # Root for doing nested keys _schema_root = '' @@ -490,6 +490,7 @@ class ZKSchema(object): 'cmd.node': f'{_schema_root}/cmd/nodes', 'cmd.domain': f'{_schema_root}/cmd/domains', 'cmd.ceph': f'{_schema_root}/cmd/ceph', + 'logs': '/logs', 'node': f'{_schema_root}/nodes', 'domain': f'{_schema_root}/domains', 'network': f'{_schema_root}/networks', @@ -500,6 +501,11 @@ class ZKSchema(object): 'volume': f'{_schema_root}/ceph/volumes', 'snapshot': f'{_schema_root}/ceph/snapshots', }, + # The schema of an individual logs entry (/logs/{node_name}) + 'logs': { + 'node': '', # The root key + 'messages': '/messages', + }, # The schema of an individual node entry (/nodes/{node_name}) 'node': { 'name': '', # The root key @@ -771,7 +777,7 @@ class ZKSchema(object): logger.out(f'Key not found: {self.path(kpath)}', state='w') result = False - for elem in ['node', 'domain', 'network', 'osd', 'pool']: + for elem in ['logs', 'node', 'domain', 'network', 'osd', 'pool']: # First read all the subelements of the key class for child in zkhandler.zk_conn.get_children(self.path(f'base.{elem}')): # For each key in the schema for that particular elem @@ -850,7 +856,7 @@ class ZKSchema(object): data = '' zkhandler.zk_conn.create(self.path(kpath), data.encode(zkhandler.encoding)) - for elem in ['node', 'domain', 'network', 'osd', 'pool']: + for elem in ['logs', 'node', 'domain', 'network', 'osd', 'pool']: # First read all the subelements of the key class for child in zkhandler.zk_conn.get_children(self.path(f'base.{elem}')): # For each key in the schema for that particular elem diff --git a/node-daemon/pvcnoded.sample.yaml b/node-daemon/pvcnoded.sample.yaml index b728c84b..37097538 100644 --- a/node-daemon/pvcnoded.sample.yaml +++ b/node-daemon/pvcnoded.sample.yaml @@ -140,6 +140,8 @@ pvc: file_logging: True # stdout_logging: Enable or disable logging to stdout (i.e. journald) stdout_logging: True + # zookeeper_logging: Enable ot disable logging to Zookeeper (for `pvc node log` functionality) + zookeeper_logging: True # log_colours: Enable or disable ANSI colours in log output log_colours: True # log_dates: Enable or disable date strings in log output @@ -152,10 +154,12 @@ pvc: log_keepalive_storage_details: True # console_log_lines: Number of console log lines to store in Zookeeper per VM console_log_lines: 1000 + # node_log_lines: Number of node log lines to store in Zookeeper per node + node_log_lines: 2000 # networking: PVC networking configuration # OPTIONAL if enable_networking: False networking: - # bridge_device: Underlying device to use for bridged vLAN networks; usually the device underlying + # bridge_device: Underlying device to use for bridged vLAN networks; usually the device of bridge_device: ens4 # sriov_enable: Enable or disable (default if absent) SR-IOV network support sriov_enable: False diff --git a/node-daemon/pvcnoded/Daemon.py b/node-daemon/pvcnoded/Daemon.py index 4423f688..6d0e10f8 100644 --- a/node-daemon/pvcnoded/Daemon.py +++ b/node-daemon/pvcnoded/Daemon.py @@ -158,12 +158,14 @@ def readConfig(pvcnoded_config_file, myhostname): 'console_log_directory': o_config['pvc']['system']['configuration']['directories']['console_log_directory'], 'file_logging': o_config['pvc']['system']['configuration']['logging']['file_logging'], 'stdout_logging': o_config['pvc']['system']['configuration']['logging']['stdout_logging'], + 'zookeeper_logging': o_config['pvc']['system']['configuration']['logging'].get('zookeeper_logging', False), 'log_colours': o_config['pvc']['system']['configuration']['logging']['log_colours'], 'log_dates': o_config['pvc']['system']['configuration']['logging']['log_dates'], 'log_keepalives': o_config['pvc']['system']['configuration']['logging']['log_keepalives'], 'log_keepalive_cluster_details': o_config['pvc']['system']['configuration']['logging']['log_keepalive_cluster_details'], 'log_keepalive_storage_details': o_config['pvc']['system']['configuration']['logging']['log_keepalive_storage_details'], 'console_log_lines': o_config['pvc']['system']['configuration']['logging']['console_log_lines'], + 'node_log_lines': o_config['pvc']['system']['configuration']['logging'].get('node_log_lines', 0), 'vm_shutdown_timeout': int(o_config['pvc']['system']['intervals']['vm_shutdown_timeout']), 'keepalive_interval': int(o_config['pvc']['system']['intervals']['keepalive_interval']), 'fence_intervals': int(o_config['pvc']['system']['intervals']['fence_intervals']), From a088aa44843d788fbc1bf31446d94daf4772af5e Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Sun, 18 Jul 2021 17:37:49 -0400 Subject: [PATCH 04/13] Add node log functions to API and CLI --- api-daemon/pvcapid/flaskapi.py | 46 ++++++++++++++++++ api-daemon/pvcapid/helper.py | 28 +++++++++++ client-cli/pvc/cli_lib/node.py | 87 ++++++++++++++++++++++++++++++++++ client-cli/pvc/pvc.py | 38 +++++++++++++++ daemon-common/node.py | 18 +++++++ docs/manuals/swagger.json | 45 ++++++++++++++++++ 6 files changed, 262 insertions(+) diff --git a/api-daemon/pvcapid/flaskapi.py b/api-daemon/pvcapid/flaskapi.py index a2e63796..5e54d541 100755 --- a/api-daemon/pvcapid/flaskapi.py +++ b/api-daemon/pvcapid/flaskapi.py @@ -834,6 +834,52 @@ class API_Node_DomainState(Resource): api.add_resource(API_Node_DomainState, '/node//domain-state') +# /node//log') + + ########################################################## # Client API - VM ########################################################## diff --git a/api-daemon/pvcapid/helper.py b/api-daemon/pvcapid/helper.py index cb37e2ec..3fcd8f5a 100755 --- a/api-daemon/pvcapid/helper.py +++ b/api-daemon/pvcapid/helper.py @@ -307,6 +307,34 @@ def node_ready(zkhandler, node, wait): return output, retcode +@ZKConnection(config) +def node_log(zkhandler, node, lines=None): + """ + Return the current logs for Node. + """ + # Default to 10 lines of log if not set + try: + lines = int(lines) + except TypeError: + lines = 10 + + retflag, retdata = pvc_node.get_node_log(zkhandler, node, lines) + + if retflag: + retcode = 200 + retdata = { + 'name': node, + 'data': retdata + } + else: + retcode = 400 + retdata = { + 'message': retdata + } + + return retdata, retcode + + # # VM functions # diff --git a/client-cli/pvc/cli_lib/node.py b/client-cli/pvc/cli_lib/node.py index 99f91be2..ced6c269 100644 --- a/client-cli/pvc/cli_lib/node.py +++ b/client-cli/pvc/cli_lib/node.py @@ -19,6 +19,8 @@ # ############################################################################### +import time + import pvc.cli_lib.ansiprint as ansiprint from pvc.cli_lib.common import call_api @@ -69,6 +71,91 @@ def node_domain_state(config, node, action, wait): return retstatus, response.json().get('message', '') +def view_node_log(config, node, lines=100): + """ + Return node log lines from the API (and display them in a pager in the main CLI) + + API endpoint: GET /node/{node}/log + API arguments: lines={lines} + API schema: {"name":"{node}","data":"{node_log}"} + """ + params = { + 'lines': lines + } + response = call_api(config, 'get', '/node/{node}/log'.format(node=node), params=params) + + if response.status_code != 200: + return False, response.json().get('message', '') + + node_log = response.json()['data'] + + # Shrink the log buffer to length lines + shrunk_log = node_log.split('\n')[-lines:] + loglines = '\n'.join(shrunk_log) + + return True, loglines + + +def follow_node_log(config, node, lines=10): + """ + Return and follow node log lines from the API + + API endpoint: GET /node/{node}/log + API arguments: lines={lines} + API schema: {"name":"{nodename}","data":"{node_log}"} + """ + # We always grab 500 to match the follow call, but only _show_ `lines` number + params = { + 'lines': 500 + } + response = call_api(config, 'get', '/node/{node}/log'.format(node=node), params=params) + + if response.status_code != 200: + return False, response.json().get('message', '') + + # Shrink the log buffer to length lines + node_log = response.json()['data'] + shrunk_log = node_log.split('\n')[-int(lines):] + loglines = '\n'.join(shrunk_log) + + # Print the initial data and begin following + print(loglines, end='') + + while True: + # Grab the next line set (500 is a reasonable number of lines per second; any more are skipped) + try: + params = { + 'lines': 500 + } + response = call_api(config, 'get', '/node/{node}/log'.format(node=node), params=params) + new_node_log = response.json()['data'] + except Exception: + break + # Split the new and old log strings into constitutent lines + old_node_loglines = node_log.split('\n') + new_node_loglines = new_node_log.split('\n') + # Set the node log to the new log value for the next iteration + node_log = new_node_log + # Remove the lines from the old log until we hit the first line of the new log; this + # ensures that the old log is a string that we can remove from the new log entirely + for index, line in enumerate(old_node_loglines, start=0): + if line == new_node_loglines[0]: + del old_node_loglines[0:index] + break + # Rejoin the log lines into strings + old_node_log = '\n'.join(old_node_loglines) + new_node_log = '\n'.join(new_node_loglines) + # Remove the old lines from the new log + diff_node_log = new_node_log.replace(old_node_log, "") + # If there's a difference, print it out + if diff_node_log: + print(diff_node_log, end='') + # Wait a second + time.sleep(1) + + return True, '' + + def node_info(config, node): """ Get information about node diff --git a/client-cli/pvc/pvc.py b/client-cli/pvc/pvc.py index 80832654..bcf9c250 100755 --- a/client-cli/pvc/pvc.py +++ b/client-cli/pvc/pvc.py @@ -540,6 +540,43 @@ def node_unflush(node, wait): cleanup(retcode, retmsg) +############################################################################### +# pvc node log +############################################################################### +@click.command(name='log', short_help='Show logs of a node.') +@click.argument( + 'node' +) +@click.option( + '-l', '--lines', 'lines', default=None, show_default=False, + help='Display this many log lines from the end of the log buffer. [default: 1000; with follow: 10]' +) +@click.option( + '-f', '--follow', 'follow', is_flag=True, default=False, + help='Follow the log buffer; output may be delayed by a few seconds relative to the live system. The --lines value defaults to 10 for the initial output.' +) +@cluster_req +def node_log(node, lines, follow): + """ + Show node logs of virtual machine DOMAIN on its current node in a pager or continuously. DOMAIN may be a UUID or name. Note that migrating a VM to a different node will cause the log buffer to be overwritten by entries from the new node. + """ + + # Set the default here so we can handle it + if lines is None: + if follow: + lines = 10 + else: + lines = 1000 + + if follow: + retcode, retmsg = pvc_node.follow_node_log(config, node, lines) + else: + retcode, retmsg = pvc_node.view_node_log(config, node, lines) + click.echo_via_pager(retmsg) + retmsg = '' + cleanup(retcode, retmsg) + + ############################################################################### # pvc node info ############################################################################### @@ -4707,6 +4744,7 @@ cli_node.add_command(node_primary) cli_node.add_command(node_flush) cli_node.add_command(node_ready) cli_node.add_command(node_unflush) +cli_node.add_command(node_log) cli_node.add_command(node_info) cli_node.add_command(node_list) diff --git a/daemon-common/node.py b/daemon-common/node.py index 82f43ea2..11bc5529 100644 --- a/daemon-common/node.py +++ b/daemon-common/node.py @@ -182,6 +182,24 @@ def ready_node(zkhandler, node, wait=False): return True, retmsg +def get_node_log(zkhandler, node, lines=2000): + # Verify node is valid + if not common.verifyNode(zkhandler, node): + return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node) + + # Get the data from ZK + node_log = zkhandler.read(('logs.messages', node)) + + if node_log is None: + return True, '' + + # Shrink the log buffer to length lines + shrunk_log = node_log.split('\n')[-lines:] + loglines = '\n'.join(shrunk_log) + + return True, loglines + + def get_info(zkhandler, node): # Verify node is valid if not common.verifyNode(zkhandler, node): diff --git a/docs/manuals/swagger.json b/docs/manuals/swagger.json index 6770b32a..14c103ce 100644 --- a/docs/manuals/swagger.json +++ b/docs/manuals/swagger.json @@ -144,6 +144,19 @@ }, "type": "object" }, + "NodeLog": { + "properties": { + "data": { + "description": "The recent log text", + "type": "string" + }, + "name": { + "description": "The name of the Node", + "type": "string" + } + }, + "type": "object" + }, "VMLog": { "properties": { "data": { @@ -2665,6 +2678,38 @@ ] } }, + "/api/v1/node/{node}/log": { + "get": { + "description": "", + "parameters": [ + { + "description": "The number of lines to retrieve", + "in": "query", + "name": "lines", + "required": false, + "type": "integer" + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/NodeLog" + } + }, + "404": { + "description": "Node not found", + "schema": { + "$ref": "#/definitions/Message" + } + } + }, + "summary": "Return the recent logs of {node}", + "tags": [ + "node" + ] + } + }, "/api/v1/provisioner/create": { "post": { "description": "Note: Starts a background job in the pvc-provisioner-worker Celery worker while returning a task ID; the task ID can be used to query the \"GET /provisioner/status/\" endpoint for the job status", From 3a2478ee0cd719be79161ed2c36f916d58bc3bfc Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Sun, 18 Jul 2021 18:57:44 -0400 Subject: [PATCH 05/13] Cleanly terminate logger on cleanup --- node-daemon/pvcnoded/Daemon.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/node-daemon/pvcnoded/Daemon.py b/node-daemon/pvcnoded/Daemon.py index 6d0e10f8..781ea8e1 100644 --- a/node-daemon/pvcnoded/Daemon.py +++ b/node-daemon/pvcnoded/Daemon.py @@ -697,7 +697,7 @@ else: # Cleanup function def cleanup(): - global zkhandler, update_timer, d_domain + global logger, zkhandler, update_timer, d_domain logger.out('Terminating pvcnoded and cleaning up', state='s') @@ -761,6 +761,8 @@ def cleanup(): pass logger.out('Terminated pvc daemon', state='s') + del logger + os._exit(0) From 982dfd52c6f11abad8b6e78d191c79b6f8b3e240 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Sun, 18 Jul 2021 19:00:54 -0400 Subject: [PATCH 06/13] Adjust date output format --- daemon-common/log.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/daemon-common/log.py b/daemon-common/log.py index dc856b94..6cb34f26 100644 --- a/daemon-common/log.py +++ b/daemon-common/log.py @@ -96,7 +96,7 @@ class Logger(object): # Get the date if self.config['log_dates']: - date = '{} - '.format(datetime.now().strftime('%Y/%m/%d %H:%M:%S.%f')) + date = '{} '.format(datetime.now().strftime('%Y/%m/%d %H:%M:%S.%f')) else: date = '' @@ -177,7 +177,7 @@ class ZookeeperLogger(Thread): logs = deque(self.zkhandler.read(('logs.messages', self.node)).split('\n'), self.max_lines) if not self.config['log_dates']: # We want to log dates here, even if the log_dates config is not set - date = '{} - '.format(datetime.now().strftime('%Y/%m/%d %H:%M:%S.%f')) + date = '{} '.format(datetime.now().strftime('%Y/%m/%d %H:%M:%S.%f')) else: date = '' logs.append(f'{date}{message}') From e23a65128a36178812449cb4feceb32970564c01 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Sun, 18 Jul 2021 19:03:47 -0400 Subject: [PATCH 07/13] Remove del of logger item --- node-daemon/pvcnoded/Daemon.py | 1 - 1 file changed, 1 deletion(-) diff --git a/node-daemon/pvcnoded/Daemon.py b/node-daemon/pvcnoded/Daemon.py index 781ea8e1..56fa6a65 100644 --- a/node-daemon/pvcnoded/Daemon.py +++ b/node-daemon/pvcnoded/Daemon.py @@ -761,7 +761,6 @@ def cleanup(): pass logger.out('Terminated pvc daemon', state='s') - del logger os._exit(0) From b770e15a91d9f97216a06d766f5d3eba91cf5b9e Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Sun, 18 Jul 2021 19:53:00 -0400 Subject: [PATCH 08/13] Fix final termination of logger We need to do a bit more finagling with the logger on termination to ensure that all messages are written and the queue drained before actually terminating. --- daemon-common/log.py | 36 +++++++++++++++++++++++++++------- node-daemon/pvcnoded/Daemon.py | 1 + 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/daemon-common/log.py b/daemon-common/log.py index 6cb34f26..04e811b2 100644 --- a/daemon-common/log.py +++ b/daemon-common/log.py @@ -91,6 +91,17 @@ class Logger(object): self.writer.close() self.writer = open(self.logfile, 'a', buffering=0) + # Provide a termination function so all messages are flushed before terminating the main daemon + def terminate(self): + if self.config['file_logging']: + self.writer.close() + if self.config['zookeeper_logging']: + self.out("Waiting for Zookeeper message queue to drain", state='s') + while not self.zookeeper_logger.queue.empty(): + pass + self.zookeeper_logger.stop() + self.zookeeper_logger.join() + # Output function def out(self, message, state=None, prefix=''): @@ -154,6 +165,7 @@ class ZookeeperLogger(Thread): self.zkhandler = None self.start_zkhandler() self.zkhandler.write([(('logs', self.node), '')]) + self.running = False Thread.__init__(self, args=(), kwargs=None) def start_zkhandler(self): @@ -167,18 +179,28 @@ class ZookeeperLogger(Thread): self.zkhandler.connect(persistent=True) def run(self): - while True: - message = self.queue.get() - self.write_log(message) + self.running = True + # Get the logs that are currently in Zookeeper and populate our deque + logs = deque(self.zkhandler.read(('logs.messages', self.node)).split('\n'), self.max_lines) + while self.running: + # Get a new message + try: + message = self.queue.get(block=False) + if not message: + continue + except Exception: + continue - def write_log(self, message): - # Log to Zookeeper - with self.zkhandler.writelock(('logs.messages', self.node)): - logs = deque(self.zkhandler.read(('logs.messages', self.node)).split('\n'), self.max_lines) if not self.config['log_dates']: # We want to log dates here, even if the log_dates config is not set date = '{} '.format(datetime.now().strftime('%Y/%m/%d %H:%M:%S.%f')) else: date = '' + # Add the message to the deque logs.append(f'{date}{message}') + # Write the updated messages into Zookeeper self.zkhandler.write([(('logs.messages', self.node), '\n'.join(logs))]) + return + + def stop(self): + self.running = False diff --git a/node-daemon/pvcnoded/Daemon.py b/node-daemon/pvcnoded/Daemon.py index 56fa6a65..1a13c947 100644 --- a/node-daemon/pvcnoded/Daemon.py +++ b/node-daemon/pvcnoded/Daemon.py @@ -761,6 +761,7 @@ def cleanup(): pass logger.out('Terminated pvc daemon', state='s') + logger.terminate() os._exit(0) From ba257048ad52ee2f5f80f5bdd604ca7df9e25a9c Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Sun, 18 Jul 2021 20:06:08 -0400 Subject: [PATCH 09/13] Improve output formatting of node logs --- client-cli/pvc/cli_lib/node.py | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/client-cli/pvc/cli_lib/node.py b/client-cli/pvc/cli_lib/node.py index ced6c269..62de5627 100644 --- a/client-cli/pvc/cli_lib/node.py +++ b/client-cli/pvc/cli_lib/node.py @@ -120,6 +120,7 @@ def follow_node_log(config, node, lines=10): # Print the initial data and begin following print(loglines, end='') + print('\n', end='') while True: # Grab the next line set (500 is a reasonable number of lines per second; any more are skipped) @@ -134,24 +135,22 @@ def follow_node_log(config, node, lines=10): # Split the new and old log strings into constitutent lines old_node_loglines = node_log.split('\n') new_node_loglines = new_node_log.split('\n') + # Set the node log to the new log value for the next iteration node_log = new_node_log - # Remove the lines from the old log until we hit the first line of the new log; this - # ensures that the old log is a string that we can remove from the new log entirely - for index, line in enumerate(old_node_loglines, start=0): - if line == new_node_loglines[0]: - del old_node_loglines[0:index] - break - # Rejoin the log lines into strings - old_node_log = '\n'.join(old_node_loglines) - new_node_log = '\n'.join(new_node_loglines) - # Remove the old lines from the new log - diff_node_log = new_node_log.replace(old_node_log, "") + + # Get the set difference between the two sets of lines + # We can safely use sets here since, due to the timestamps, we can guarantee that every + # individual log line is distinct, and similarly we can re-sort the diff afterwards. + diff_node_loglines = sorted(list(set(new_node_loglines) - set(old_node_loglines))) + # If there's a difference, print it out - if diff_node_log: - print(diff_node_log, end='') - # Wait a second - time.sleep(1) + if len(diff_node_loglines) > 0: + print('\n'.join(diff_node_loglines), end='') + print('\n', end='') + + # Wait half a second + time.sleep(0.5) return True, '' From 55f2b00366745d1713ed523cfcec5cb63799f883 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Sun, 18 Jul 2021 20:18:23 -0400 Subject: [PATCH 10/13] Add some spaces for better readability --- client-cli/pvc/cli_lib/vm.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/client-cli/pvc/cli_lib/vm.py b/client-cli/pvc/cli_lib/vm.py index c45bc6bc..47e74e05 100644 --- a/client-cli/pvc/cli_lib/vm.py +++ b/client-cli/pvc/cli_lib/vm.py @@ -1245,8 +1245,10 @@ def follow_console_log(config, vm, lines=10): # Split the new and old log strings into constitutent lines old_console_loglines = console_log.split('\n') new_console_loglines = new_console_log.split('\n') + # Set the console log to the new log value for the next iteration console_log = new_console_log + # Remove the lines from the old log until we hit the first line of the new log; this # ensures that the old log is a string that we can remove from the new log entirely for index, line in enumerate(old_console_loglines, start=0): From 2d48127e9c26316a87b65e54b07aa99ae606c788 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Sun, 18 Jul 2021 20:18:35 -0400 Subject: [PATCH 11/13] Use even better/faster set comparison --- client-cli/pvc/cli_lib/node.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/client-cli/pvc/cli_lib/node.py b/client-cli/pvc/cli_lib/node.py index 62de5627..58751b79 100644 --- a/client-cli/pvc/cli_lib/node.py +++ b/client-cli/pvc/cli_lib/node.py @@ -139,10 +139,9 @@ def follow_node_log(config, node, lines=10): # Set the node log to the new log value for the next iteration node_log = new_node_log - # Get the set difference between the two sets of lines - # We can safely use sets here since, due to the timestamps, we can guarantee that every - # individual log line is distinct, and similarly we can re-sort the diff afterwards. - diff_node_loglines = sorted(list(set(new_node_loglines) - set(old_node_loglines))) + # Get the difference between the two sets of lines + old_node_loglines_set = set(old_node_loglines) + diff_node_loglines = [x for x in new_node_loglines if x not in old_node_loglines_set] # If there's a difference, print it out if len(diff_node_loglines) > 0: From a0e9b57d3922ae9632cc03d4c2a3f0c12929c368 Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Sun, 18 Jul 2021 20:19:59 -0400 Subject: [PATCH 12/13] Increase log line frequency --- client-cli/pvc/cli_lib/vm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/client-cli/pvc/cli_lib/vm.py b/client-cli/pvc/cli_lib/vm.py index 47e74e05..cbf3ea7b 100644 --- a/client-cli/pvc/cli_lib/vm.py +++ b/client-cli/pvc/cli_lib/vm.py @@ -1263,8 +1263,8 @@ def follow_console_log(config, vm, lines=10): # If there's a difference, print it out if diff_console_log: print(diff_console_log, end='') - # Wait a second - time.sleep(1) + # Wait half a second + time.sleep(0.5) return True, '' From 2358ad6bbe95085f0acc31809e0fd4ba9fa732cb Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Sun, 18 Jul 2021 20:23:45 -0400 Subject: [PATCH 13/13] Reduce the number of lines per call 500 was a lot every half second; 200 seems more reasonable. Even a fast kernel boot should generate < 200 lines in half a second. --- client-cli/pvc/cli_lib/node.py | 8 ++++---- client-cli/pvc/cli_lib/vm.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/client-cli/pvc/cli_lib/node.py b/client-cli/pvc/cli_lib/node.py index 58751b79..136beb9e 100644 --- a/client-cli/pvc/cli_lib/node.py +++ b/client-cli/pvc/cli_lib/node.py @@ -104,9 +104,9 @@ def follow_node_log(config, node, lines=10): API arguments: lines={lines} API schema: {"name":"{nodename}","data":"{node_log}"} """ - # We always grab 500 to match the follow call, but only _show_ `lines` number + # We always grab 200 to match the follow call, but only _show_ `lines` number params = { - 'lines': 500 + 'lines': 200 } response = call_api(config, 'get', '/node/{node}/log'.format(node=node), params=params) @@ -123,10 +123,10 @@ def follow_node_log(config, node, lines=10): print('\n', end='') while True: - # Grab the next line set (500 is a reasonable number of lines per second; any more are skipped) + # Grab the next line set (200 is a reasonable number of lines per half-second; any more are skipped) try: params = { - 'lines': 500 + 'lines': 200 } response = call_api(config, 'get', '/node/{node}/log'.format(node=node), params=params) new_node_log = response.json()['data'] diff --git a/client-cli/pvc/cli_lib/vm.py b/client-cli/pvc/cli_lib/vm.py index cbf3ea7b..16a81a21 100644 --- a/client-cli/pvc/cli_lib/vm.py +++ b/client-cli/pvc/cli_lib/vm.py @@ -1215,9 +1215,9 @@ def follow_console_log(config, vm, lines=10): API arguments: lines={lines} API schema: {"name":"{vmname}","data":"{console_log}"} """ - # We always grab 500 to match the follow call, but only _show_ `lines` number + # We always grab 200 to match the follow call, but only _show_ `lines` number params = { - 'lines': 500 + 'lines': 200 } response = call_api(config, 'get', '/vm/{vm}/console'.format(vm=vm), params=params) @@ -1233,10 +1233,10 @@ def follow_console_log(config, vm, lines=10): print(loglines, end='') while True: - # Grab the next line set (500 is a reasonable number of lines per second; any more are skipped) + # Grab the next line set (200 is a reasonable number of lines per half-second; any more are skipped) try: params = { - 'lines': 500 + 'lines': 200 } response = call_api(config, 'get', '/vm/{vm}/console'.format(vm=vm), params=params) new_console_log = response.json()['data']