From 534c7cd7f049e29330240142dc81569124601128 Mon Sep 17 00:00:00 2001
From: "Joshua M. Boniface" <joshua@boniface.me>
Date: Sat, 21 Aug 2021 02:46:11 -0400
Subject: [PATCH] Refactor pvcnoded to reduce Daemon.py size

This branch commit refactors the pvcnoded component to better adhere to
good programming practices. The previous Daemon.py was a massive file
which contained almost 2000 lines of direct, root-level code which was
directly imported. Not only was this poor practice, but this resulted
in a nigh-unmaintainable file which was hard even for me to understand.

This refactoring splits a large section of the code from Daemon.py into
separate small modules and functions in the `util/` directory. This will
hopefully make most of the functionality easy to find and modify without
having to dig through a single large file.

Further the existing subcomponents have been moved to the `objects/`
directory which clearly separates them.

Finally, the Daemon.py code has mostly been moved into a function,
`entrypoint()`, which is then called from the `pvcnoded.py` stub.

An additional item is that most format strings have been replaced by
f-strings to make use of the Python 3.6 features in Daemon.py and the
utility files.
---
 lint                                          |    2 +-
 node-daemon/pvcnoded.py                       |    2 +
 node-daemon/pvcnoded.sample.yaml              |    8 +-
 node-daemon/pvcnoded/CephInstance.py          |  428 ---
 node-daemon/pvcnoded/Daemon.py                | 2397 ++++-------------
 .../pvcnoded/dnsmasq-zookeeper-leases.py      |    2 +-
 node-daemon/pvcnoded/objects/CephInstance.py  |  428 +++
 .../{ => objects}/DNSAggregatorInstance.py    |    4 +-
 .../{ => objects}/MetadataAPIInstance.py      |    0
 .../pvcnoded/{ => objects}/NodeInstance.py    |   26 +-
 .../pvcnoded/{ => objects}/SRIOVVFInstance.py |    0
 .../{ => objects}/VMConsoleWatcherInstance.py |    0
 .../pvcnoded/{ => objects}/VMInstance.py      |  155 +-
 .../{ => objects}/VXNetworkInstance.py        |   16 +-
 node-daemon/pvcnoded/objects/__init__.py      |    0
 node-daemon/pvcnoded/util/__init__.py         |    0
 node-daemon/pvcnoded/util/config.py           |  384 +++
 node-daemon/pvcnoded/{ => util}/fencing.py    |   28 +-
 node-daemon/pvcnoded/util/keepalive.py        |  718 +++++
 node-daemon/pvcnoded/util/libvirt.py          |   36 +
 node-daemon/pvcnoded/util/networking.py       |  181 ++
 node-daemon/pvcnoded/util/services.py         |   77 +
 node-daemon/pvcnoded/util/zookeeper.py        |  132 +
 test-cluster.sh                               |   76 +-
 24 files changed, 2667 insertions(+), 2433 deletions(-)
 delete mode 100644 node-daemon/pvcnoded/CephInstance.py
 create mode 100644 node-daemon/pvcnoded/objects/CephInstance.py
 rename node-daemon/pvcnoded/{ => objects}/DNSAggregatorInstance.py (98%)
 rename node-daemon/pvcnoded/{ => objects}/MetadataAPIInstance.py (100%)
 rename node-daemon/pvcnoded/{ => objects}/NodeInstance.py (97%)
 rename node-daemon/pvcnoded/{ => objects}/SRIOVVFInstance.py (100%)
 rename node-daemon/pvcnoded/{ => objects}/VMConsoleWatcherInstance.py (100%)
 rename node-daemon/pvcnoded/{ => objects}/VMInstance.py (92%)
 rename node-daemon/pvcnoded/{ => objects}/VXNetworkInstance.py (99%)
 create mode 100644 node-daemon/pvcnoded/objects/__init__.py
 create mode 100644 node-daemon/pvcnoded/util/__init__.py
 create mode 100644 node-daemon/pvcnoded/util/config.py
 rename node-daemon/pvcnoded/{ => util}/fencing.py (89%)
 create mode 100644 node-daemon/pvcnoded/util/keepalive.py
 create mode 100644 node-daemon/pvcnoded/util/libvirt.py
 create mode 100644 node-daemon/pvcnoded/util/networking.py
 create mode 100644 node-daemon/pvcnoded/util/services.py
 create mode 100644 node-daemon/pvcnoded/util/zookeeper.py

diff --git a/lint b/lint
index a7638001..808e3e6b 100755
--- a/lint
+++ b/lint
@@ -6,7 +6,7 @@ if ! which flake8 &>/dev/null; then
 fi
 
 flake8 \
-    --ignore=E501 \
+    --ignore=E501,E241 \
     --exclude=debian,api-daemon/migrations/versions,api-daemon/provisioner/examples
 ret=$?
 if [[ $ret -eq 0 ]]; then
diff --git a/node-daemon/pvcnoded.py b/node-daemon/pvcnoded.py
index 20c1734d..49dba9c2 100755
--- a/node-daemon/pvcnoded.py
+++ b/node-daemon/pvcnoded.py
@@ -20,3 +20,5 @@
 ###############################################################################
 
 import pvcnoded.Daemon  # noqa: F401
+
+pvcnoded.Daemon.entrypoint()
diff --git a/node-daemon/pvcnoded.sample.yaml b/node-daemon/pvcnoded.sample.yaml
index 37097538..360a14b8 100644
--- a/node-daemon/pvcnoded.sample.yaml
+++ b/node-daemon/pvcnoded.sample.yaml
@@ -182,15 +182,15 @@ pvc:
           device: ens4
           # mtu: Upstream interface MTU; use 9000 for jumbo frames (requires switch support)
           mtu: 1500
-          # address: Upstream interface IP address, options: None, by-id, <static>/<mask>
-          address: None
+          # address: Upstream interface IP address, options: by-id, <static>/<mask>
+          address: by-id
         # cluster: Cluster (VNIC) physical interface device
         cluster:
           # device: Cluster (VNIC) interface device name
           device: ens4
           # mtu: Cluster (VNIC) interface MTU; use 9000 for jumbo frames (requires switch support)
           mtu: 1500
-          # address: Cluster (VNIC) interface IP address, options: None, by-id, <static>/<mask>
+          # address: Cluster (VNIC) interface IP address, options: by-id, <static>/<mask>
           address: by-id
         # storage: Storage (Ceph OSD) physical interface device
         storage:
@@ -198,7 +198,7 @@ pvc:
           device: ens4
           # mtu: Storage (Ceph OSD) interface MTU; use 9000 for jumbo frames (requires switch support)
           mtu: 1500
-          # address: Storage (Ceph OSD) interface IP address, options: None, by-id, <static>/<mask>
+          # address: Storage (Ceph OSD) interface IP address, options: by-id, <static>/<mask>
           address: by-id
       # storage; PVC storage configuration
       # OPTIONAL if enable_storage: False
diff --git a/node-daemon/pvcnoded/CephInstance.py b/node-daemon/pvcnoded/CephInstance.py
deleted file mode 100644
index f7214302..00000000
--- a/node-daemon/pvcnoded/CephInstance.py
+++ /dev/null
@@ -1,428 +0,0 @@
-#!/usr/bin/env python3
-
-# CephInstance.py - Class implementing a PVC node Ceph instance
-# Part of the Parallel Virtual Cluster (PVC) system
-#
-#    Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
-#
-#    This program is free software: you can redistribute it and/or modify
-#    it under the terms of the GNU General Public License as published by
-#    the Free Software Foundation, version 3.
-#
-#    This program is distributed in the hope that it will be useful,
-#    but WITHOUT ANY WARRANTY; without even the implied warranty of
-#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-#    GNU General Public License for more details.
-#
-#    You should have received a copy of the GNU General Public License
-#    along with this program.  If not, see <https://www.gnu.org/licenses/>.
-#
-###############################################################################
-
-import time
-import json
-import psutil
-
-import daemon_lib.common as common
-
-
-class CephOSDInstance(object):
-    def __init__(self, zkhandler, this_node, osd_id):
-        self.zkhandler = zkhandler
-        self.this_node = this_node
-        self.osd_id = osd_id
-        self.node = None
-        self.size = None
-        self.stats = dict()
-
-        @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('osd.node', self.osd_id))
-        def watch_osd_node(data, stat, event=''):
-            if event and event.type == 'DELETED':
-                # The key has been deleted after existing before; terminate this watcher
-                # because this class instance is about to be reaped in Daemon.py
-                return False
-
-            try:
-                data = data.decode('ascii')
-            except AttributeError:
-                data = ''
-
-            if data and data != self.node:
-                self.node = data
-
-        @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('osd.stats', self.osd_id))
-        def watch_osd_stats(data, stat, event=''):
-            if event and event.type == 'DELETED':
-                # The key has been deleted after existing before; terminate this watcher
-                # because this class instance is about to be reaped in Daemon.py
-                return False
-
-            try:
-                data = data.decode('ascii')
-            except AttributeError:
-                data = ''
-
-            if data and data != self.stats:
-                self.stats = json.loads(data)
-
-
-def add_osd(zkhandler, logger, node, device, weight):
-    # We are ready to create a new OSD on this node
-    logger.out('Creating new OSD disk on block device {}'.format(device), state='i')
-    try:
-        # 1. Create an OSD; we do this so we know what ID will be gen'd
-        retcode, stdout, stderr = common.run_os_command('ceph osd create')
-        if retcode:
-            print('ceph osd create')
-            print(stdout)
-            print(stderr)
-            raise
-        osd_id = stdout.rstrip()
-
-        # 2. Remove that newly-created OSD
-        retcode, stdout, stderr = common.run_os_command('ceph osd rm {}'.format(osd_id))
-        if retcode:
-            print('ceph osd rm')
-            print(stdout)
-            print(stderr)
-            raise
-
-        # 3a. Zap the disk to ensure it is ready to go
-        logger.out('Zapping disk {}'.format(device), state='i')
-        retcode, stdout, stderr = common.run_os_command('ceph-volume lvm zap --destroy {}'.format(device))
-        if retcode:
-            print('ceph-volume lvm zap')
-            print(stdout)
-            print(stderr)
-            raise
-
-        # 3b. Create the OSD for real
-        logger.out('Preparing LVM for new OSD disk with ID {} on {}'.format(osd_id, device), state='i')
-        retcode, stdout, stderr = common.run_os_command(
-            'ceph-volume lvm prepare --bluestore --data {device}'.format(
-                osdid=osd_id,
-                device=device
-            )
-        )
-        if retcode:
-            print('ceph-volume lvm prepare')
-            print(stdout)
-            print(stderr)
-            raise
-
-        # 4a. Get OSD FSID
-        logger.out('Getting OSD FSID for ID {} on {}'.format(osd_id, device), state='i')
-        retcode, stdout, stderr = common.run_os_command(
-            'ceph-volume lvm list {device}'.format(
-                osdid=osd_id,
-                device=device
-            )
-        )
-        for line in stdout.split('\n'):
-            if 'osd fsid' in line:
-                osd_fsid = line.split()[-1]
-
-        if not osd_fsid:
-            print('ceph-volume lvm list')
-            print('Could not find OSD fsid in data:')
-            print(stdout)
-            print(stderr)
-            raise
-
-        # 4b. Activate the OSD
-        logger.out('Activating new OSD disk with ID {}'.format(osd_id, device), state='i')
-        retcode, stdout, stderr = common.run_os_command(
-            'ceph-volume lvm activate --bluestore {osdid} {osdfsid}'.format(
-                osdid=osd_id,
-                osdfsid=osd_fsid
-            )
-        )
-        if retcode:
-            print('ceph-volume lvm activate')
-            print(stdout)
-            print(stderr)
-            raise
-
-        # 5. Add it to the crush map
-        logger.out('Adding new OSD disk with ID {} to CRUSH map'.format(osd_id), state='i')
-        retcode, stdout, stderr = common.run_os_command(
-            'ceph osd crush add osd.{osdid} {weight} root=default host={node}'.format(
-                osdid=osd_id,
-                weight=weight,
-                node=node
-            )
-        )
-        if retcode:
-            print('ceph osd crush add')
-            print(stdout)
-            print(stderr)
-            raise
-        time.sleep(0.5)
-
-        # 6. Verify it started
-        retcode, stdout, stderr = common.run_os_command(
-            'systemctl status ceph-osd@{osdid}'.format(
-                osdid=osd_id
-            )
-        )
-        if retcode:
-            print('systemctl status')
-            print(stdout)
-            print(stderr)
-            raise
-
-        # 7. Add the new OSD to the list
-        logger.out('Adding new OSD disk with ID {} to Zookeeper'.format(osd_id), state='i')
-        zkhandler.write([
-            (('osd', osd_id), ''),
-            (('osd.node', osd_id), node),
-            (('osd.device', osd_id), device),
-            (('osd.stats', osd_id), '{}'),
-        ])
-
-        # Log it
-        logger.out('Created new OSD disk with ID {}'.format(osd_id), state='o')
-        return True
-    except Exception as e:
-        # Log it
-        logger.out('Failed to create new OSD disk: {}'.format(e), state='e')
-        return False
-
-
-def remove_osd(zkhandler, logger, osd_id, osd_obj):
-    logger.out('Removing OSD disk {}'.format(osd_id), state='i')
-    try:
-        # 1. Verify the OSD is present
-        retcode, stdout, stderr = common.run_os_command('ceph osd ls')
-        osd_list = stdout.split('\n')
-        if osd_id not in osd_list:
-            logger.out('Could not find OSD {} in the cluster'.format(osd_id), state='e')
-            return True
-
-        # 1. Set the OSD out so it will flush
-        logger.out('Setting out OSD disk with ID {}'.format(osd_id), state='i')
-        retcode, stdout, stderr = common.run_os_command('ceph osd out {}'.format(osd_id))
-        if retcode:
-            print('ceph osd out')
-            print(stdout)
-            print(stderr)
-            raise
-
-        # 2. Wait for the OSD to flush
-        logger.out('Flushing OSD disk with ID {}'.format(osd_id), state='i')
-        osd_string = str()
-        while True:
-            try:
-                retcode, stdout, stderr = common.run_os_command('ceph pg dump osds --format json')
-                dump_string = json.loads(stdout)
-                for osd in dump_string:
-                    if str(osd['osd']) == osd_id:
-                        osd_string = osd
-                num_pgs = osd_string['num_pgs']
-                if num_pgs > 0:
-                    time.sleep(5)
-                else:
-                    raise
-            except Exception:
-                break
-
-        # 3. Stop the OSD process and wait for it to be terminated
-        logger.out('Stopping OSD disk with ID {}'.format(osd_id), state='i')
-        retcode, stdout, stderr = common.run_os_command('systemctl stop ceph-osd@{}'.format(osd_id))
-        if retcode:
-            print('systemctl stop')
-            print(stdout)
-            print(stderr)
-            raise
-
-        # FIXME: There has to be a better way to do this /shrug
-        while True:
-            is_osd_up = False
-            # Find if there is a process named ceph-osd with arg '--id {id}'
-            for p in psutil.process_iter(attrs=['name', 'cmdline']):
-                if 'ceph-osd' == p.info['name'] and '--id {}'.format(osd_id) in ' '.join(p.info['cmdline']):
-                    is_osd_up = True
-            # If there isn't, continue
-            if not is_osd_up:
-                break
-
-        # 4. Determine the block devices
-        retcode, stdout, stderr = common.run_os_command('readlink /var/lib/ceph/osd/ceph-{}/block'.format(osd_id))
-        vg_name = stdout.split('/')[-2]  # e.g. /dev/ceph-<uuid>/osd-block-<uuid>
-        retcode, stdout, stderr = common.run_os_command('vgs --separator , --noheadings -o pv_name {}'.format(vg_name))
-        pv_block = stdout.strip()
-
-        # 5. Zap the volumes
-        logger.out('Zapping OSD disk with ID {} on {}'.format(osd_id, pv_block), state='i')
-        retcode, stdout, stderr = common.run_os_command('ceph-volume lvm zap --destroy {}'.format(pv_block))
-        if retcode:
-            print('ceph-volume lvm zap')
-            print(stdout)
-            print(stderr)
-            raise
-
-        # 6. Purge the OSD from Ceph
-        logger.out('Purging OSD disk with ID {}'.format(osd_id), state='i')
-        retcode, stdout, stderr = common.run_os_command('ceph osd purge {} --yes-i-really-mean-it'.format(osd_id))
-        if retcode:
-            print('ceph osd purge')
-            print(stdout)
-            print(stderr)
-            raise
-
-        # 7. Delete OSD from ZK
-        logger.out('Deleting OSD disk with ID {} from Zookeeper'.format(osd_id), state='i')
-        zkhandler.delete(('osd', osd_id), recursive=True)
-
-        # Log it
-        logger.out('Removed OSD disk with ID {}'.format(osd_id), state='o')
-        return True
-    except Exception as e:
-        # Log it
-        logger.out('Failed to purge OSD disk with ID {}: {}'.format(osd_id, e), state='e')
-        return False
-
-
-class CephPoolInstance(object):
-    def __init__(self, zkhandler, this_node, name):
-        self.zkhandler = zkhandler
-        self.this_node = this_node
-        self.name = name
-        self.pgs = ''
-        self.stats = dict()
-
-        @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('pool.pgs', self.name))
-        def watch_pool_node(data, stat, event=''):
-            if event and event.type == 'DELETED':
-                # The key has been deleted after existing before; terminate this watcher
-                # because this class instance is about to be reaped in Daemon.py
-                return False
-
-            try:
-                data = data.decode('ascii')
-            except AttributeError:
-                data = ''
-
-            if data and data != self.pgs:
-                self.pgs = data
-
-        @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('pool.stats', self.name))
-        def watch_pool_stats(data, stat, event=''):
-            if event and event.type == 'DELETED':
-                # The key has been deleted after existing before; terminate this watcher
-                # because this class instance is about to be reaped in Daemon.py
-                return False
-
-            try:
-                data = data.decode('ascii')
-            except AttributeError:
-                data = ''
-
-            if data and data != self.stats:
-                self.stats = json.loads(data)
-
-
-class CephVolumeInstance(object):
-    def __init__(self, zkhandler, this_node, pool, name):
-        self.zkhandler = zkhandler
-        self.this_node = this_node
-        self.pool = pool
-        self.name = name
-        self.stats = dict()
-
-        @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('volume.stats', f'{self.pool}/{self.name}'))
-        def watch_volume_stats(data, stat, event=''):
-            if event and event.type == 'DELETED':
-                # The key has been deleted after existing before; terminate this watcher
-                # because this class instance is about to be reaped in Daemon.py
-                return False
-
-            try:
-                data = data.decode('ascii')
-            except AttributeError:
-                data = ''
-
-            if data and data != self.stats:
-                self.stats = json.loads(data)
-
-
-class CephSnapshotInstance(object):
-    def __init__(self, zkhandler, this_node, pool, volume, name):
-        self.zkhandler = zkhandler
-        self.this_node = this_node
-        self.pool = pool
-        self.volume = volume
-        self.name = name
-        self.stats = dict()
-
-        @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('snapshot.stats', f'{self.pool}/{self.volume}/{self.name}'))
-        def watch_snapshot_stats(data, stat, event=''):
-            if event and event.type == 'DELETED':
-                # The key has been deleted after existing before; terminate this watcher
-                # because this class instance is about to be reaped in Daemon.py
-                return False
-
-            try:
-                data = data.decode('ascii')
-            except AttributeError:
-                data = ''
-
-            if data and data != self.stats:
-                self.stats = json.loads(data)
-
-
-# Primary command function
-# This command pipe is only used for OSD adds and removes
-def run_command(zkhandler, logger, this_node, data, d_osd):
-    # Get the command and args
-    command, args = data.split()
-
-    # Adding a new OSD
-    if command == 'osd_add':
-        node, device, weight = args.split(',')
-        if node == this_node.name:
-            # Lock the command queue
-            zk_lock = zkhandler.writelock('base.cmd.ceph')
-            with zk_lock:
-                # Add the OSD
-                result = add_osd(zkhandler, logger, node, device, weight)
-                # Command succeeded
-                if result:
-                    # Update the command queue
-                    zkhandler.write([
-                        ('base.cmd.ceph', 'success-{}'.format(data))
-                    ])
-                # Command failed
-                else:
-                    # Update the command queue
-                    zkhandler.write([
-                        ('base.cmd.ceph', 'failure-{}'.format(data))
-                    ])
-                # Wait 1 seconds before we free the lock, to ensure the client hits the lock
-                time.sleep(1)
-
-    # Removing an OSD
-    elif command == 'osd_remove':
-        osd_id = args
-
-        # Verify osd_id is in the list
-        if d_osd[osd_id] and d_osd[osd_id].node == this_node.name:
-            # Lock the command queue
-            zk_lock = zkhandler.writelock('base.cmd.ceph')
-            with zk_lock:
-                # Remove the OSD
-                result = remove_osd(zkhandler, logger, osd_id, d_osd[osd_id])
-                # Command succeeded
-                if result:
-                    # Update the command queue
-                    zkhandler.write([
-                        ('base.cmd.ceph', 'success-{}'.format(data))
-                    ])
-                # Command failed
-                else:
-                    # Update the command queue
-                    zkhandler.write([
-                        ('base.cmd.ceph', 'failure-{}'.format(data))
-                    ])
-                # Wait 1 seconds before we free the lock, to ensure the client hits the lock
-                time.sleep(1)
diff --git a/node-daemon/pvcnoded/Daemon.py b/node-daemon/pvcnoded/Daemon.py
index 01d4cf32..baf9def6 100644
--- a/node-daemon/pvcnoded/Daemon.py
+++ b/node-daemon/pvcnoded/Daemon.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Daemon.py - Node daemon
+# Daemon.py - PVC Node daemon main entrypoing
 # Part of the Parallel Virtual Cluster (PVC) system
 #
 #    Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
@@ -19,1975 +19,680 @@
 #
 ###############################################################################
 
-import kazoo.client
-import libvirt
-import sys
-import os
-import signal
-import psutil
-import subprocess
-import time
-import re
-import yaml
-import json
+import pvcnoded.util.keepalive
+import pvcnoded.util.config
+import pvcnoded.util.fencing
+import pvcnoded.util.networking
+import pvcnoded.util.services
+import pvcnoded.util.libvirt
+import pvcnoded.util.zookeeper
 
-from socket import gethostname
-from datetime import datetime
-from threading import Thread
-from ipaddress import ip_address, ip_network
-from apscheduler.schedulers.background import BackgroundScheduler
-from distutils.util import strtobool
-from queue import Queue
-from xml.etree import ElementTree
-from rados import Rados
+import pvcnoded.objects.DNSAggregatorInstance as DNSAggregatorInstance
+import pvcnoded.objects.MetadataAPIInstance as MetadataAPIInstance
+import pvcnoded.objects.VMInstance as VMInstance
+import pvcnoded.objects.NodeInstance as NodeInstance
+import pvcnoded.objects.VXNetworkInstance as VXNetworkInstance
+import pvcnoded.objects.SRIOVVFInstance as SRIOVVFInstance
+import pvcnoded.objects.CephInstance as CephInstance
 
-from daemon_lib.zkhandler import ZKHandler
-
-import pvcnoded.fencing as fencing
 import daemon_lib.log as log
 import daemon_lib.common as common
 
-import pvcnoded.VMInstance as VMInstance
-import pvcnoded.NodeInstance as NodeInstance
-import pvcnoded.VXNetworkInstance as VXNetworkInstance
-import pvcnoded.SRIOVVFInstance as SRIOVVFInstance
-import pvcnoded.DNSAggregatorInstance as DNSAggregatorInstance
-import pvcnoded.CephInstance as CephInstance
-import pvcnoded.MetadataAPIInstance as MetadataAPIInstance
+from time import sleep
+from distutils.util import strtobool
 
-# Version string for startup output
+import os
+import sys
+import signal
+import re
+import json
+
+# Daemon version
 version = '0.9.32'
 
-###############################################################################
-# PVCD - node daemon startup program
-###############################################################################
-#
-# The PVC daemon starts a node and configures all the required components for
-# the node to run. It determines which of the 3 daemon modes it should be in
-# during initial setup based on hostname and the config file, and then starts
-# any required services. The 3 daemon modes are:
-#  * leader: the cluster leader, follows the Zookeeper leader
-#  * coordinator: a Zookeeper cluster member
-#  * hypervisor: a hypervisor without any cluster intelligence
-#
-###############################################################################
 
-###############################################################################
-# Daemon functions
-###############################################################################
+##########################################################
+# Entrypoint
+##########################################################
 
-# Ensure update_timer, this_node, and d_domain are None until they're set for real
-# Ensures cleanup() doesn't fail due to these items not being created yet
-update_timer = None
-this_node = None
-d_domain = None
+def entrypoint():
+    keepalive_timer = None
 
+    # Get our configuration
+    config = pvcnoded.util.config.get_configuration()
+    config['pvcnoded_version'] = version
 
-# Create timer to update this node in Zookeeper
-def startKeepaliveTimer():
-    # Create our timer object
-    update_timer = BackgroundScheduler()
-    interval = int(config['keepalive_interval'])
-    logger.out('Starting keepalive timer ({} second interval)'.format(interval), state='s')
-    update_timer.add_job(node_keepalive, 'interval', seconds=interval)
-    update_timer.start()
-    node_keepalive()
-    return update_timer
+    # Set some useful booleans for later (fewer characters)
+    debug = config['debug']
+    if debug:
+        print('DEBUG MODE ENABLED')
 
+    # Create and validate our directories
+    pvcnoded.util.config.validate_directories(config)
 
-def stopKeepaliveTimer():
-    global update_timer
-    try:
-        update_timer.shutdown()
-        logger.out('Stopping keepalive timer', state='s')
-    except Exception:
-        pass
+    # Set up the logger instance
+    logger = log.Logger(config)
 
+    # Print our startup message
+    logger.out('')
+    logger.out('|----------------------------------------------------------|')
+    logger.out('|                                                          |')
+    logger.out('|           ███████████ ▜█▙      ▟█▛ █████ █ █ █           |')
+    logger.out('|                    ██  ▜█▙    ▟█▛  ██                    |')
+    logger.out('|           ███████████   ▜█▙  ▟█▛   ██                    |')
+    logger.out('|           ██             ▜█▙▟█▛    ███████████           |')
+    logger.out('|                                                          |')
+    logger.out('|----------------------------------------------------------|')
+    logger.out('| Parallel Virtual Cluster node daemon v{0: <18} |'.format(version))
+    logger.out('| Debug: {0: <49} |'.format(str(config['debug'])))
+    logger.out('| FQDN: {0: <50} |'.format(config['node_fqdn']))
+    logger.out('| Host: {0: <50} |'.format(config['node_hostname']))
+    logger.out('| ID: {0: <52} |'.format(config['node_id']))
+    logger.out('| IPMI hostname: {0: <41} |'.format(config['ipmi_hostname']))
+    logger.out('| Machine details:                                         |')
+    logger.out('|   CPUs: {0: <48} |'.format(config['static_data'][0]))
+    logger.out('|   Arch: {0: <48} |'.format(config['static_data'][3]))
+    logger.out('|   OS: {0: <50} |'.format(config['static_data'][2]))
+    logger.out('|   Kernel: {0: <46} |'.format(config['static_data'][1]))
+    logger.out('|----------------------------------------------------------|')
+    logger.out('')
+    logger.out(f'Starting pvcnoded on host {config["node_fqdn"]}', state='s')
 
-###############################################################################
-# PHASE 1a - Configuration parsing
-###############################################################################
-
-# Get the config file variable from the environment
-try:
-    pvcnoded_config_file = os.environ['PVCD_CONFIG_FILE']
-except Exception:
-    print('ERROR: The "PVCD_CONFIG_FILE" environment variable must be set before starting pvcnoded.')
-    os._exit(1)
-
-# Set local hostname and domain variables
-myfqdn = gethostname()
-myhostname = myfqdn.split('.', 1)[0]
-mydomainname = ''.join(myfqdn.split('.', 1)[1:])
-try:
-    mynodeid = re.findall(r'\d+', myhostname)[-1]
-except IndexError:
-    mynodeid = 1
-
-# Maintenance mode off by default
-maintenance = False
-
-# Gather useful data about our host
-# Static data format: 'cpu_count', 'arch', 'os', 'kernel'
-staticdata = []
-staticdata.append(str(psutil.cpu_count()))
-staticdata.append(subprocess.run(['uname', '-r'], stdout=subprocess.PIPE).stdout.decode('ascii').strip())
-staticdata.append(subprocess.run(['uname', '-o'], stdout=subprocess.PIPE).stdout.decode('ascii').strip())
-staticdata.append(subprocess.run(['uname', '-m'], stdout=subprocess.PIPE).stdout.decode('ascii').strip())
-
-
-# Read and parse the config file
-def readConfig(pvcnoded_config_file, myhostname):
-    print('Loading configuration from file "{}"'.format(pvcnoded_config_file))
-
-    with open(pvcnoded_config_file, 'r') as cfgfile:
-        try:
-            o_config = yaml.load(cfgfile, Loader=yaml.SafeLoader)
-        except Exception as e:
-            print('ERROR: Failed to parse configuration file: {}'.format(e))
-            os._exit(1)
-
-    # Handle the basic config (hypervisor-only)
-    try:
-        config_general = {
-            'node': o_config['pvc']['node'],
-            'coordinators': o_config['pvc']['cluster']['coordinators'],
-            'enable_hypervisor': o_config['pvc']['functions']['enable_hypervisor'],
-            'enable_networking': o_config['pvc']['functions']['enable_networking'],
-            'enable_storage': o_config['pvc']['functions']['enable_storage'],
-            'enable_api': o_config['pvc']['functions']['enable_api'],
-            'dynamic_directory': o_config['pvc']['system']['configuration']['directories']['dynamic_directory'],
-            'log_directory': o_config['pvc']['system']['configuration']['directories']['log_directory'],
-            'console_log_directory': o_config['pvc']['system']['configuration']['directories']['console_log_directory'],
-            'file_logging': o_config['pvc']['system']['configuration']['logging']['file_logging'],
-            'stdout_logging': o_config['pvc']['system']['configuration']['logging']['stdout_logging'],
-            'zookeeper_logging': o_config['pvc']['system']['configuration']['logging'].get('zookeeper_logging', False),
-            'log_colours': o_config['pvc']['system']['configuration']['logging']['log_colours'],
-            'log_dates': o_config['pvc']['system']['configuration']['logging']['log_dates'],
-            'log_keepalives': o_config['pvc']['system']['configuration']['logging']['log_keepalives'],
-            'log_keepalive_cluster_details': o_config['pvc']['system']['configuration']['logging']['log_keepalive_cluster_details'],
-            'log_keepalive_storage_details': o_config['pvc']['system']['configuration']['logging']['log_keepalive_storage_details'],
-            'console_log_lines': o_config['pvc']['system']['configuration']['logging']['console_log_lines'],
-            'node_log_lines': o_config['pvc']['system']['configuration']['logging'].get('node_log_lines', 0),
-            'vm_shutdown_timeout': int(o_config['pvc']['system']['intervals']['vm_shutdown_timeout']),
-            'keepalive_interval': int(o_config['pvc']['system']['intervals']['keepalive_interval']),
-            'fence_intervals': int(o_config['pvc']['system']['intervals']['fence_intervals']),
-            'suicide_intervals': int(o_config['pvc']['system']['intervals']['suicide_intervals']),
-            'successful_fence': o_config['pvc']['system']['fencing']['actions']['successful_fence'],
-            'failed_fence': o_config['pvc']['system']['fencing']['actions']['failed_fence'],
-            'migration_target_selector': o_config['pvc']['system']['migration']['target_selector'],
-            'ipmi_hostname': o_config['pvc']['system']['fencing']['ipmi']['host'],
-            'ipmi_username': o_config['pvc']['system']['fencing']['ipmi']['user'],
-            'ipmi_password': o_config['pvc']['system']['fencing']['ipmi']['pass']
-        }
-    except Exception as e:
-        print('ERROR: Failed to load configuration: {}'.format(e))
-        cleanup(failure=True)
-    config = config_general
-
-    # Handle debugging config
-    try:
-        config_debug = {
-            'debug': o_config['pvc']['debug']
-        }
-    except Exception:
-        config_debug = {
-            'debug': False
-        }
-    config = {**config, **config_debug}
-
-    # Handle the networking config
     if config['enable_networking']:
-        try:
-            config_networking = {
-                'cluster_domain': o_config['pvc']['cluster']['networks']['cluster']['domain'],
-                'vni_floating_ip': o_config['pvc']['cluster']['networks']['cluster']['floating_ip'],
-                'vni_network': o_config['pvc']['cluster']['networks']['cluster']['network'],
-                'storage_domain': o_config['pvc']['cluster']['networks']['storage']['domain'],
-                'storage_floating_ip': o_config['pvc']['cluster']['networks']['storage']['floating_ip'],
-                'storage_network': o_config['pvc']['cluster']['networks']['storage']['network'],
-                'upstream_domain': o_config['pvc']['cluster']['networks']['upstream']['domain'],
-                'upstream_floating_ip': o_config['pvc']['cluster']['networks']['upstream']['floating_ip'],
-                'upstream_network': o_config['pvc']['cluster']['networks']['upstream']['network'],
-                'upstream_gateway': o_config['pvc']['cluster']['networks']['upstream']['gateway'],
-                'pdns_postgresql_host': o_config['pvc']['coordinator']['dns']['database']['host'],
-                'pdns_postgresql_port': o_config['pvc']['coordinator']['dns']['database']['port'],
-                'pdns_postgresql_dbname': o_config['pvc']['coordinator']['dns']['database']['name'],
-                'pdns_postgresql_user': o_config['pvc']['coordinator']['dns']['database']['user'],
-                'pdns_postgresql_password': o_config['pvc']['coordinator']['dns']['database']['pass'],
-                'metadata_postgresql_host': o_config['pvc']['coordinator']['metadata']['database']['host'],
-                'metadata_postgresql_port': o_config['pvc']['coordinator']['metadata']['database']['port'],
-                'metadata_postgresql_dbname': o_config['pvc']['coordinator']['metadata']['database']['name'],
-                'metadata_postgresql_user': o_config['pvc']['coordinator']['metadata']['database']['user'],
-                'metadata_postgresql_password': o_config['pvc']['coordinator']['metadata']['database']['pass'],
-                'bridge_dev': o_config['pvc']['system']['configuration']['networking']['bridge_device'],
-                'vni_dev': o_config['pvc']['system']['configuration']['networking']['cluster']['device'],
-                'vni_mtu': o_config['pvc']['system']['configuration']['networking']['cluster']['mtu'],
-                'vni_dev_ip': o_config['pvc']['system']['configuration']['networking']['cluster']['address'],
-                'storage_dev': o_config['pvc']['system']['configuration']['networking']['storage']['device'],
-                'storage_mtu': o_config['pvc']['system']['configuration']['networking']['storage']['mtu'],
-                'storage_dev_ip': o_config['pvc']['system']['configuration']['networking']['storage']['address'],
-                'upstream_dev': o_config['pvc']['system']['configuration']['networking']['upstream']['device'],
-                'upstream_mtu': o_config['pvc']['system']['configuration']['networking']['upstream']['mtu'],
-                'upstream_dev_ip': o_config['pvc']['system']['configuration']['networking']['upstream']['address'],
-            }
+        if config['enable_sriov']:
+            # Set up SR-IOV devices
+            pvcnoded.util.networking.setup_sriov(logger, config)
 
-            # Check if SR-IOV is enabled and activate
-            config_networking['enable_sriov'] = o_config['pvc']['system']['configuration']['networking'].get('sriov_enable', False)
-            if config_networking['enable_sriov']:
-                config_networking['sriov_device'] = list(o_config['pvc']['system']['configuration']['networking']['sriov_device'])
+        # Set up our interfaces
+        pvcnoded.util.networking.setup_interfaces(logger, config)
 
-        except Exception as e:
-            print('ERROR: Failed to load configuration: {}'.format(e))
-            cleanup(failure=True)
-        config = {**config, **config_networking}
+    # Get list of coordinator nodes
+    coordinator_nodes = config['coordinators']
 
-        # Create the by-id address entries
-        for net in ['vni', 'storage', 'upstream']:
-            address_key = '{}_dev_ip'.format(net)
-            floating_key = '{}_floating_ip'.format(net)
-            network_key = '{}_network'.format(net)
-
-            # Verify the network provided is valid
-            try:
-                network = ip_network(config[network_key])
-            except Exception:
-                print('ERROR: Network address {} for {} is not valid!'.format(config[network_key], network_key))
-                cleanup(failure=True)
-
-            # If we should be autoselected
-            if config[address_key] == 'by-id':
-                # Construct an IP from the relevant network
-                # The NodeID starts at 1, but indexes start at 0
-                address_id = int(mynodeid) - 1
-                # Grab the nth address from the network
-                config[address_key] = '{}/{}'.format(list(network.hosts())[address_id], network.prefixlen)
-
-            # Verify that the floating IP is valid
-
-            try:
-                # Set the ipaddr
-                floating_addr = ip_address(config[floating_key].split('/')[0])
-                # Verify we're in the network
-                if floating_addr not in list(network.hosts()):
-                    raise
-            except Exception:
-                print('ERROR: Floating address {} for {} is not valid!'.format(config[floating_key], floating_key))
-                cleanup(failure=True)
-
-    # Handle the storage config
-    if config['enable_storage']:
-        try:
-            config_storage = {
-                'ceph_config_file': o_config['pvc']['system']['configuration']['storage']['ceph_config_file'],
-                'ceph_admin_keyring': o_config['pvc']['system']['configuration']['storage']['ceph_admin_keyring']
-            }
-        except Exception as e:
-            print('ERROR: Failed to load configuration: {}'.format(e))
-            cleanup(failure=True)
-        config = {**config, **config_storage}
-
-    # Handle an empty ipmi_hostname
-    if config['ipmi_hostname'] == '':
-        config['ipmi_hostname'] = myhostname + '-lom.' + mydomainname
-
-    return config
-
-
-# Get the config object from readConfig()
-config = readConfig(pvcnoded_config_file, myhostname)
-debug = config['debug']
-if debug:
-    print('DEBUG MODE ENABLED')
-
-# Handle the enable values
-enable_hypervisor = config['enable_hypervisor']
-enable_networking = config['enable_networking']
-enable_sriov = config['enable_sriov']
-enable_storage = config['enable_storage']
-
-###############################################################################
-# PHASE 1b - Prepare filesystem directories
-###############################################################################
-
-# Define our dynamic directory schema
-# <dynamic_directory>/
-#                     dnsmasq/
-#                     pdns/
-#                     nft/
-config['dnsmasq_dynamic_directory'] = config['dynamic_directory'] + '/dnsmasq'
-config['pdns_dynamic_directory'] = config['dynamic_directory'] + '/pdns'
-config['nft_dynamic_directory'] = config['dynamic_directory'] + '/nft'
-
-# Create our dynamic directories if they don't exist
-if not os.path.exists(config['dynamic_directory']):
-    os.makedirs(config['dynamic_directory'])
-    os.makedirs(config['dnsmasq_dynamic_directory'])
-    os.makedirs(config['pdns_dynamic_directory'])
-    os.makedirs(config['nft_dynamic_directory'])
-
-# Define our log directory schema
-# <log_directory>/
-#                 dnsmasq/
-#                 pdns/
-#                 nft/
-config['dnsmasq_log_directory'] = config['log_directory'] + '/dnsmasq'
-config['pdns_log_directory'] = config['log_directory'] + '/pdns'
-config['nft_log_directory'] = config['log_directory'] + '/nft'
-
-# Create our log directories if they don't exist
-if not os.path.exists(config['log_directory']):
-    os.makedirs(config['log_directory'])
-    os.makedirs(config['dnsmasq_log_directory'])
-    os.makedirs(config['pdns_log_directory'])
-    os.makedirs(config['nft_log_directory'])
-
-###############################################################################
-# PHASE 1c - Set up logging
-###############################################################################
-
-logger = log.Logger(config)
-
-# Print our startup messages
-logger.out('')
-logger.out('|----------------------------------------------------------|')
-logger.out('|                                                          |')
-logger.out('|           ███████████ ▜█▙      ▟█▛ █████ █ █ █           |')
-logger.out('|                    ██  ▜█▙    ▟█▛  ██                    |')
-logger.out('|           ███████████   ▜█▙  ▟█▛   ██                    |')
-logger.out('|           ██             ▜█▙▟█▛    ███████████           |')
-logger.out('|                                                          |')
-logger.out('|----------------------------------------------------------|')
-logger.out('| Parallel Virtual Cluster node daemon v{0: <18} |'.format(version))
-logger.out('| Debug: {0: <49} |'.format(str(config['debug'])))
-logger.out('| FQDN: {0: <50} |'.format(myfqdn))
-logger.out('| Host: {0: <50} |'.format(myhostname))
-logger.out('| ID: {0: <52} |'.format(mynodeid))
-logger.out('| IPMI hostname: {0: <41} |'.format(config['ipmi_hostname']))
-logger.out('| Machine details:                                         |')
-logger.out('|   CPUs: {0: <48} |'.format(staticdata[0]))
-logger.out('|   Arch: {0: <48} |'.format(staticdata[3]))
-logger.out('|   OS: {0: <50} |'.format(staticdata[2]))
-logger.out('|   Kernel: {0: <46} |'.format(staticdata[1]))
-logger.out('|----------------------------------------------------------|')
-logger.out('')
-
-logger.out('Starting pvcnoded on host {}'.format(myfqdn), state='s')
-
-# Define some colours for future messages if applicable
-if config['log_colours']:
-    fmt_end = logger.fmt_end
-    fmt_bold = logger.fmt_bold
-    fmt_blue = logger.fmt_blue
-    fmt_cyan = logger.fmt_cyan
-    fmt_green = logger.fmt_green
-    fmt_yellow = logger.fmt_yellow
-    fmt_red = logger.fmt_red
-    fmt_purple = logger.fmt_purple
-else:
-    fmt_end = ''
-    fmt_bold = ''
-    fmt_blue = ''
-    fmt_cyan = ''
-    fmt_green = ''
-    fmt_yellow = ''
-    fmt_red = ''
-    fmt_purple = ''
-
-###############################################################################
-# PHASE 2a - Activate SR-IOV support
-###############################################################################
-
-# This happens before other networking steps to enable using VFs for cluster functions.
-if enable_networking and enable_sriov:
-    logger.out('Setting up SR-IOV device support', state='i')
-    # Enable unsafe interruptts for the vfio_iommu_type1 kernel module
-    try:
-        common.run_os_command('modprobe vfio_iommu_type1 allow_unsafe_interrupts=1')
-        with open('/sys/module/vfio_iommu_type1/parameters/allow_unsafe_interrupts', 'w') as mfh:
-            mfh.write('Y')
-    except Exception:
-        logger.out('Failed to enable kernel modules; SR-IOV may fail.', state='w')
-
-    # Loop through our SR-IOV NICs and enable the numvfs for each
-    for device in config['sriov_device']:
-        logger.out('Preparing SR-IOV PF {} with {} VFs'.format(device['phy'], device['vfcount']), state='i')
-        try:
-            with open('/sys/class/net/{}/device/sriov_numvfs'.format(device['phy']), 'r') as vfh:
-                current_sriov_count = vfh.read().strip()
-            with open('/sys/class/net/{}/device/sriov_numvfs'.format(device['phy']), 'w') as vfh:
-                vfh.write(str(device['vfcount']))
-        except FileNotFoundError:
-            logger.out('Failed to open SR-IOV configuration for PF {}; device may not support SR-IOV.'.format(device), state='w')
-        except OSError:
-            logger.out('Failed to set SR-IOV VF count for PF {} to {}; already set to {}.'.format(device['phy'], device['vfcount'], current_sriov_count), state='w')
-
-        if device.get('mtu', None) is not None:
-            logger.out('Setting SR-IOV PF {} to MTU {}'.format(device['phy'], device['mtu']), state='i')
-            common.run_os_command('ip link set {} mtu {} up'.format(device['phy'], device['mtu']))
-
-
-###############################################################################
-# PHASE 2b - Create local IP addresses for static networks
-###############################################################################
-
-if enable_networking:
-    # VNI configuration
-    vni_dev = config['vni_dev']
-    vni_mtu = config['vni_mtu']
-    vni_dev_ip = config['vni_dev_ip']
-    logger.out('Setting up VNI network interface {} with MTU {}'.format(vni_dev, vni_mtu), state='i')
-    common.run_os_command('ip link set {} mtu {} up'.format(vni_dev, vni_mtu))
-
-    # Cluster bridge configuration
-    logger.out('Setting up Cluster network bridge on interface {} with IP {}'.format(vni_dev, vni_dev_ip), state='i')
-    common.run_os_command('brctl addbr brcluster')
-    common.run_os_command('brctl addif brcluster {}'.format(vni_dev))
-    common.run_os_command('ip link set brcluster mtu {} up'.format(vni_mtu))
-    common.run_os_command('ip address add {} dev {}'.format(vni_dev_ip, 'brcluster'))
-
-    # Storage configuration
-    storage_dev = config['storage_dev']
-    storage_mtu = config['storage_mtu']
-    storage_dev_ip = config['storage_dev_ip']
-    logger.out('Setting up Storage network interface {} with MTU {}'.format(storage_dev, vni_mtu), state='i')
-    common.run_os_command('ip link set {} mtu {} up'.format(storage_dev, storage_mtu))
-
-    # Storage bridge configuration
-    if storage_dev == vni_dev:
-        logger.out('Adding Storage network IP {} to VNI Cluster bridge brcluster'.format(storage_dev_ip), state='i')
-        common.run_os_command('ip address add {} dev {}'.format(storage_dev_ip, 'brcluster'))
+    if config['node_hostname'] in coordinator_nodes:
+        # We are indeed a coordinator node
+        config['daemon_mode'] = 'coordinator'
+        logger.out(f'This node is a {logger.fmt_blue}coordinator{logger.fmt_end}', state='i')
     else:
-        logger.out('Setting up Storage network bridge on interface {} with IP {}'.format(vni_dev, vni_dev_ip), state='i')
-        common.run_os_command('brctl addbr brstorage')
-        common.run_os_command('brctl addif brstorage {}'.format(storage_dev))
-        common.run_os_command('ip link set brstorage mtu {} up'.format(storage_mtu))
-        common.run_os_command('ip address add {} dev {}'.format(storage_dev_ip, 'brstorage'))
+        # We are a hypervisor node
+        config['daemon_mode'] = 'hypervisor'
+        logger.out(f'This node is a {logger.fmt_cyan}hypervisor{logger.fmt_end}', state='i')
 
-    # Upstream configuration
-    upstream_dev = config['upstream_dev']
-    upstream_mtu = config['upstream_mtu']
-    upstream_dev_ip = config['upstream_dev_ip']
-    logger.out('Setting up Upstream network interface {} with MTU {}'.format(upstream_dev, upstream_mtu), state='i')
-    common.run_os_command('ip link set {} mtu {} up'.format(upstream_dev, upstream_mtu))
+    pvcnoded.util.services.start_system_services(logger, config)
 
-    # Upstream bridge configuration
-    if upstream_dev == vni_dev:
-        logger.out('Adding Upstream network IP {} to VNI Cluster bridge brcluster'.format(upstream_dev_ip), state='i')
-        common.run_os_command('ip address add {} dev {}'.format(upstream_dev_ip, 'brcluster'))
-    else:
-        logger.out('Setting up Upstream network bridge on interface {} with IP {}'.format(vni_dev, vni_dev_ip), state='i')
-        common.run_os_command('brctl addbr brupstream')
-        common.run_os_command('brctl addif brupstream {}'.format(upstream_dev))
-        common.run_os_command('ip link set brupstream mtu {} up'.format(upstream_mtu))
-        common.run_os_command('ip address add {} dev {}'.format(upstream_dev_ip, 'brupstream'))
+    # Connect to Zookeeper and return our handler and current schema version
+    zkhandler, node_schema_version = pvcnoded.util.zookeeper.connect(logger, config)
 
-    # Add upstream default gateway
-    upstream_gateway = config.get('upstream_gateway', None)
-    if upstream_gateway:
-        logger.out('Setting up Upstream default gateway IP {}'.format(upstream_gateway), state='i')
-        if upstream_dev == vni_dev:
-            common.run_os_command('ip route add default via {} dev {}'.format(upstream_gateway, 'brcluster'))
+    # Watch for a global schema update and fire
+    # This will only change by the API when triggered after seeing all nodes can update
+    @zkhandler.zk_conn.DataWatch(zkhandler.schema.path('base.schema.version'))
+    def update_schema(new_schema_version, stat, event=''):
+        nonlocal zkhandler, keepalive_timer, node_schema_version
+
+        try:
+            new_schema_version = int(new_schema_version.decode('ascii'))
+        except Exception:
+            new_schema_version = 0
+
+        if new_schema_version == node_schema_version:
+            return True
+
+        logger.out('Hot update of schema version started', state='s')
+        logger.out(f'Current version: {node_schema_version,}  New version: {new_schema_version}', state='s')
+
+        # Prevent any keepalive updates while this happens
+        if keepalive_timer is not None:
+            pvcnoded.util.keepalive.stop_keepalive_timer()
+            sleep(1)
+
+        # Perform the migration (primary only)
+        if zkhandler.read('base.config.primary_node') == config['node_hostname']:
+            logger.out('Primary node acquiring exclusive lock', state='s')
+            # Wait for things to settle
+            sleep(0.5)
+            # Acquire a write lock on the root key
+            with zkhandler.exclusivelock('base.schema.version'):
+                # Perform the schema migration tasks
+                logger.out('Performing schema update', state='s')
+                if new_schema_version > node_schema_version:
+                    zkhandler.schema.migrate(zkhandler, new_schema_version)
+                if new_schema_version < node_schema_version:
+                    zkhandler.schema.rollback(zkhandler, new_schema_version)
+        # Wait for the exclusive lock to be lifted
         else:
-            common.run_os_command('ip route add default via {} dev {}'.format(upstream_gateway, 'brupstream'))
+            logger.out('Non-primary node acquiring read lock', state='s')
+            # Wait for things to settle
+            sleep(1)
+            # Wait for a read lock
+            lock = zkhandler.readlock('base.schema.version')
+            lock.acquire()
+            # Wait a bit more for the primary to return to normal
+            sleep(1)
 
-    logger.out('Waiting 3s for networking to come up', state='s')
-    time.sleep(3)
-
-###############################################################################
-# PHASE 2c - Prepare sysctl for pvcnoded
-###############################################################################
-
-if enable_networking:
-    # Enable routing functions
-    common.run_os_command('sysctl net.ipv4.ip_forward=1')
-    common.run_os_command('sysctl net.ipv6.ip_forward=1')
-
-    # Send redirects
-    common.run_os_command('sysctl net.ipv4.conf.all.send_redirects=1')
-    common.run_os_command('sysctl net.ipv4.conf.default.send_redirects=1')
-    common.run_os_command('sysctl net.ipv6.conf.all.send_redirects=1')
-    common.run_os_command('sysctl net.ipv6.conf.default.send_redirects=1')
-
-    # Accept source routes
-    common.run_os_command('sysctl net.ipv4.conf.all.accept_source_route=1')
-    common.run_os_command('sysctl net.ipv4.conf.default.accept_source_route=1')
-    common.run_os_command('sysctl net.ipv6.conf.all.accept_source_route=1')
-    common.run_os_command('sysctl net.ipv6.conf.default.accept_source_route=1')
-
-    # Disable RP filtering on the VNI Cluster and Upstream interfaces (to allow traffic pivoting)
-    common.run_os_command('sysctl net.ipv4.conf.{}.rp_filter=0'.format(config['vni_dev']))
-    common.run_os_command('sysctl net.ipv4.conf.{}.rp_filter=0'.format(config['upstream_dev']))
-    common.run_os_command('sysctl net.ipv4.conf.brcluster.rp_filter=0')
-    common.run_os_command('sysctl net.ipv4.conf.brupstream.rp_filter=0')
-    common.run_os_command('sysctl net.ipv6.conf.{}.rp_filter=0'.format(config['vni_dev']))
-    common.run_os_command('sysctl net.ipv6.conf.{}.rp_filter=0'.format(config['upstream_dev']))
-    common.run_os_command('sysctl net.ipv6.conf.brcluster.rp_filter=0')
-    common.run_os_command('sysctl net.ipv6.conf.brupstream.rp_filter=0')
-
-###############################################################################
-# PHASE 3a - Determine coordinator mode
-###############################################################################
-
-# What is the list of coordinator hosts
-coordinator_nodes = config['coordinators']
-
-if myhostname in coordinator_nodes:
-    # We are indeed a coordinator host
-    config['daemon_mode'] = 'coordinator'
-    # Start the zookeeper service using systemctl
-    logger.out('Node is a ' + fmt_blue + 'coordinator' + fmt_end, state='i')
-else:
-    config['daemon_mode'] = 'hypervisor'
-
-###############################################################################
-# PHASE 3b - Start system daemons
-###############################################################################
-if config['daemon_mode'] == 'coordinator':
-    logger.out('Starting Zookeeper daemon', state='i')
-    common.run_os_command('systemctl start zookeeper.service')
-
-if enable_hypervisor:
-    logger.out('Starting Libvirt daemon', state='i')
-    common.run_os_command('systemctl start libvirtd.service')
-
-if enable_networking:
-    if config['daemon_mode'] == 'coordinator':
-        logger.out('Starting Patroni daemon', state='i')
-        common.run_os_command('systemctl start patroni.service')
-        logger.out('Starting FRRouting daemon', state='i')
-        common.run_os_command('systemctl start frr.service')
-
-if enable_storage:
-    if config['daemon_mode'] == 'coordinator':
-        logger.out('Starting Ceph monitor daemon', state='i')
-        common.run_os_command('systemctl start ceph-mon@{}'.format(myhostname))
-        logger.out('Starting Ceph manager daemon', state='i')
-        common.run_os_command('systemctl start ceph-mgr@{}'.format(myhostname))
-
-logger.out('Waiting 3s for daemons to start', state='s')
-time.sleep(3)
-
-###############################################################################
-# PHASE 4 - Attempt to connect to the coordinators and start zookeeper client
-###############################################################################
-
-# Create an instance of the handler
-zkhandler = ZKHandler(config, logger=logger)
-
-try:
-    logger.out('Connecting to Zookeeper cluster nodes {}'.format(config['coordinators']), state='i')
-    # Start connection
-    zkhandler.connect(persistent=True)
-except Exception as e:
-    logger.out('ERROR: Failed to connect to Zookeeper cluster: {}'.format(e), state='e')
-    os._exit(1)
-
-logger.out('Validating Zookeeper schema', state='i')
-
-try:
-    node_schema_version = int(zkhandler.read(('node.data.active_schema', myhostname)))
-except Exception:
-    node_schema_version = int(zkhandler.read('base.schema.version'))
-    if node_schema_version is None:
-        node_schema_version = 0
-    zkhandler.write([
-        (('node.data.active_schema', myhostname), node_schema_version)
-    ])
-
-# Load in the current node schema version
-zkhandler.schema.load(node_schema_version)
-
-# Record the latest intalled schema version
-latest_schema_version = zkhandler.schema.find_latest()
-logger.out('Latest installed schema is {}'.format(latest_schema_version), state='i')
-zkhandler.write([
-    (('node.data.latest_schema', myhostname), latest_schema_version)
-])
-
-
-# Watch for a global schema update and fire
-# This will only change by the API when triggered after seeing all nodes can update
-@zkhandler.zk_conn.DataWatch(zkhandler.schema.path('base.schema.version'))
-def update_schema(new_schema_version, stat, event=''):
-    global zkhandler, update_timer, node_schema_version
-
-    try:
-        new_schema_version = int(new_schema_version.decode('ascii'))
-    except Exception:
-        new_schema_version = 0
-
-    if new_schema_version == node_schema_version:
-        return True
-
-    logger.out('Hot update of schema version started', state='s')
-    logger.out('Current version: {}  New version: {}'.format(node_schema_version, new_schema_version), state='s')
-
-    # Prevent any keepalive updates while this happens
-    if update_timer is not None:
-        stopKeepaliveTimer()
-        time.sleep(1)
-
-    # Perform the migration (primary only)
-    if zkhandler.read('base.config.primary_node') == myhostname:
-        logger.out('Primary node acquiring exclusive lock', state='s')
-        # Wait for things to settle
-        time.sleep(0.5)
-        # Acquire a write lock on the root key
-        with zkhandler.exclusivelock('base.schema.version'):
-            # Perform the schema migration tasks
-            logger.out('Performing schema update', state='s')
-            if new_schema_version > node_schema_version:
-                zkhandler.schema.migrate(zkhandler, new_schema_version)
-            if new_schema_version < node_schema_version:
-                zkhandler.schema.rollback(zkhandler, new_schema_version)
-    # Wait for the exclusive lock to be lifted
-    else:
-        logger.out('Non-primary node acquiring read lock', state='s')
-        # Wait for things to settle
-        time.sleep(1)
-        # Wait for a read lock
-        lock = zkhandler.readlock('base.schema.version')
-        lock.acquire()
-        # Wait a bit more for the primary to return to normal
-        time.sleep(1)
-
-    # Update the local schema version
-    logger.out('Updating node target schema version', state='s')
-    zkhandler.write([
-        (('node.data.active_schema', myhostname), new_schema_version)
-    ])
-    node_schema_version = new_schema_version
-
-    # Restart the API daemons if applicable
-    logger.out('Restarting services', state='s')
-    common.run_os_command('systemctl restart pvcapid-worker.service')
-    if zkhandler.read('base.config.primary_node') == myhostname:
-        common.run_os_command('systemctl restart pvcapid.service')
-
-    # Restart ourselves with the new schema
-    logger.out('Reloading node daemon', state='s')
-    try:
-        zkhandler.disconnect(persistent=True)
-        del zkhandler
-    except Exception:
-        pass
-    os.execv(sys.argv[0], sys.argv)
-
-
-# If we are the last node to get a schema update, fire the master update
-if latest_schema_version > node_schema_version:
-    node_latest_schema_version = list()
-    for node in zkhandler.children('base.node'):
-        node_latest_schema_version.append(int(zkhandler.read(('node.data.latest_schema', node))))
-
-    # This is true if all elements of the latest schema version are identical to the latest version,
-    # i.e. they have all had the latest schema installed and ready to load.
-    if node_latest_schema_version.count(latest_schema_version) == len(node_latest_schema_version):
+        # Update the local schema version
+        logger.out('Updating node target schema version', state='s')
         zkhandler.write([
-            ('base.schema.version', latest_schema_version)
+            (('node.data.active_schema', config['node_hostname']), new_schema_version)
+        ])
+        node_schema_version = new_schema_version
+
+        # Restart the API daemons if applicable
+        logger.out('Restarting services', state='s')
+        common.run_os_command('systemctl restart pvcapid-worker.service')
+        if zkhandler.read('base.config.primary_node') == config['node_hostname']:
+            common.run_os_command('systemctl restart pvcapid.service')
+
+        # Restart ourselves with the new schema
+        logger.out('Reloading node daemon', state='s')
+        try:
+            zkhandler.disconnect(persistent=True)
+            del zkhandler
+        except Exception:
+            pass
+        os.execv(sys.argv[0], sys.argv)
+
+    # Validate the schema
+    pvcnoded.util.zookeeper.validate_schema(logger, zkhandler)
+
+    # Define a cleanup function
+    def cleanup(failure=False):
+        nonlocal logger, zkhandler, keepalive_timer, d_domain
+
+        logger.out('Terminating pvcnoded and cleaning up', state='s')
+
+        # Set shutdown state in Zookeeper
+        zkhandler.write([
+            (('node.state.daemon', config['node_hostname']), 'shutdown')
         ])
 
-# Validate our schema against the active version
-if not zkhandler.schema.validate(zkhandler, logger):
-    logger.out('Found schema violations, applying', state='i')
-    zkhandler.schema.apply(zkhandler)
-else:
-    logger.out('Schema successfully validated', state='o')
+        # Waiting for any flushes to complete
+        logger.out('Waiting for any active flushes', state='s')
+        if this_node is not None:
+            while this_node.flush_thread is not None:
+                sleep(0.5)
 
+        # Stop console logging on all VMs
+        logger.out('Stopping domain console watchers', state='s')
+        if d_domain is not None:
+            for domain in d_domain:
+                if d_domain[domain].getnode() == config['node_hostname']:
+                    try:
+                        d_domain[domain].console_log_instance.stop()
+                    except Exception:
+                        pass
 
-###############################################################################
-# PHASE 5 - Gracefully handle termination
-###############################################################################
+        # Force into secondary coordinator state if needed
+        try:
+            if this_node.router_state == 'primary':
+                zkhandler.write([
+                    ('base.config.primary_node', 'none')
+                ])
+                logger.out('Waiting for primary migration', state='s')
+                while this_node.router_state != 'secondary':
+                    sleep(0.5)
+        except Exception:
+            pass
 
+        # Stop keepalive thread
+        try:
+            pvcnoded.util.keepalive.stop_keepalive_timer(logger, keepalive_timer)
 
-# Cleanup function
-def cleanup(failure=False):
-    global logger, zkhandler, update_timer, d_domain
+            logger.out('Performing final keepalive update', state='s')
+            pvcnoded.util.keepalive.node_keepalive(logger, config, zkhandler, this_node)
+        except Exception:
+            pass
 
-    logger.out('Terminating pvcnoded and cleaning up', state='s')
+        # Set stop state in Zookeeper
+        zkhandler.write([
+            (('node.state.daemon', config['node_hostname']), 'stop')
+        ])
 
-    # Set shutdown state in Zookeeper
-    zkhandler.write([
-        (('node.state.daemon', myhostname), 'shutdown')
-    ])
+        # Forcibly terminate dnsmasq because it gets stuck sometimes
+        common.run_os_command('killall dnsmasq')
 
-    # Waiting for any flushes to complete
-    logger.out('Waiting for any active flushes', state='s')
-    if this_node is not None:
-        while this_node.flush_thread is not None:
-            time.sleep(0.5)
+        # Close the Zookeeper connection
+        try:
+            zkhandler.disconnect(persistent=True)
+            del zkhandler
+        except Exception:
+            pass
 
-    # Stop console logging on all VMs
-    logger.out('Stopping domain console watchers', state='s')
-    if d_domain is not None:
-        for domain in d_domain:
-            if d_domain[domain].getnode() == myhostname:
-                try:
-                    d_domain[domain].console_log_instance.stop()
-                except Exception:
-                    pass
+        logger.out('Terminated pvc daemon', state='s')
+        logger.terminate()
 
-    # Force into secondary coordinator state if needed
+        if failure:
+            retcode = 1
+        else:
+            retcode = 0
+
+        os._exit(retcode)
+
+    # Termination function
+    def term(signum='', frame=''):
+        cleanup(failure=False)
+
+    # Hangup (logrotate) function
+    def hup(signum='', frame=''):
+        if config['file_logging']:
+            logger.hup()
+
+    # Handle signals gracefully
+    signal.signal(signal.SIGTERM, term)
+    signal.signal(signal.SIGINT, term)
+    signal.signal(signal.SIGQUIT, term)
+    signal.signal(signal.SIGHUP, hup)
+
+    # Set up this node in Zookeeper
+    pvcnoded.util.zookeeper.setup_node(logger, config, zkhandler)
+
+    # Check that the primary node key exists and create it with us as primary if not
     try:
-        if this_node.router_state == 'primary':
+        current_primary = zkhandler.read('base.config.primary_node')
+    except Exception:
+        current_primary = 'none'
+
+    if current_primary and current_primary != 'none':
+        logger.out(f'Current primary node is {logger.fmt_blue}{current_primary}{logger.fmt_end}', state='i')
+    else:
+        if config['daemon_mode'] == 'coordinator':
+            logger.out('No primary node found; setting us as primary', state='i')
             zkhandler.write([
-                ('base.config.primary_node', 'none')
+                ('base.config.primary_node', config['node_hostname'])
             ])
-            logger.out('Waiting for primary migration', state='s')
-            while this_node.router_state != 'secondary':
-                time.sleep(0.5)
-    except Exception:
-        pass
 
-    # Stop keepalive thread
-    try:
-        stopKeepaliveTimer()
+    # Ensure that IPMI is reachable and working
+    if not pvcnoded.util.fencing.verify_ipmi(config['ipmi_hostname'], config['ipmi_username'], config['ipmi_password']):
+        logger.out('Our IPMI is not reachable; fencing of this node will likely fail', state='w')
 
-        logger.out('Performing final keepalive update', state='s')
-        node_keepalive()
-    except Exception:
-        pass
-
-    # Set stop state in Zookeeper
-    zkhandler.write([
-        (('node.state.daemon', myhostname), 'stop')
-    ])
-
-    # Forcibly terminate dnsmasq because it gets stuck sometimes
-    common.run_os_command('killall dnsmasq')
-
-    # Close the Zookeeper connection
-    try:
-        zkhandler.disconnect(persistent=True)
-        del zkhandler
-    except Exception:
-        pass
-
-    logger.out('Terminated pvc daemon', state='s')
-    logger.terminate()
-
-    if failure:
-        retcode = 1
-    else:
-        retcode = 0
-
-    os._exit(retcode)
-
-
-# Termination function
-def term(signum='', frame=''):
-    cleanup(failure=False)
-
-
-# Hangup (logrotate) function
-def hup(signum='', frame=''):
-    if config['file_logging']:
-        logger.hup()
-
-
-# Handle signals gracefully
-signal.signal(signal.SIGTERM, term)
-signal.signal(signal.SIGINT, term)
-signal.signal(signal.SIGQUIT, term)
-signal.signal(signal.SIGHUP, hup)
-
-###############################################################################
-# PHASE 6 - Prepare host in Zookeeper
-###############################################################################
-
-# Check if our node exists in Zookeeper, and create it if not
-if config['daemon_mode'] == 'coordinator':
-    init_routerstate = 'secondary'
-else:
-    init_routerstate = 'client'
-
-if zkhandler.exists(('node', myhostname)):
-    logger.out("Node is " + fmt_green + "present" + fmt_end + " in Zookeeper", state='i')
-    # Update static data just in case it's changed
-    zkhandler.write([
-        (('node', myhostname), config['daemon_mode']),
-        (('node.mode', myhostname), config['daemon_mode']),
-        (('node.state.daemon', myhostname), 'init'),
-        (('node.state.router', myhostname), init_routerstate),
-        (('node.data.static', myhostname), ' '.join(staticdata)),
-        (('node.data.pvc_version', myhostname), version),
-        (('node.ipmi.hostname', myhostname), config['ipmi_hostname']),
-        (('node.ipmi.username', myhostname), config['ipmi_username']),
-        (('node.ipmi.password', myhostname), config['ipmi_password']),
-    ])
-else:
-    logger.out("Node is " + fmt_red + "absent" + fmt_end + " in Zookeeper; adding new node", state='i')
-    keepalive_time = int(time.time())
-    zkhandler.write([
-        (('node', myhostname), config['daemon_mode']),
-        (('node.keepalive', myhostname), str(keepalive_time)),
-        (('node.mode', myhostname), config['daemon_mode']),
-        (('node.state.daemon', myhostname), 'init'),
-        (('node.state.domain', myhostname), 'flushed'),
-        (('node.state.router', myhostname), init_routerstate),
-        (('node.data.static', myhostname), ' '.join(staticdata)),
-        (('node.data.pvc_version', myhostname), version),
-        (('node.ipmi.hostname', myhostname), config['ipmi_hostname']),
-        (('node.ipmi.username', myhostname), config['ipmi_username']),
-        (('node.ipmi.password', myhostname), config['ipmi_password']),
-        (('node.memory.total', myhostname), '0'),
-        (('node.memory.used', myhostname), '0'),
-        (('node.memory.free', myhostname), '0'),
-        (('node.memory.allocated', myhostname), '0'),
-        (('node.memory.provisioned', myhostname), '0'),
-        (('node.vcpu.allocated', myhostname), '0'),
-        (('node.cpu.load', myhostname), '0.0'),
-        (('node.running_domains', myhostname), '0'),
-        (('node.count.provisioned_domains', myhostname), '0'),
-        (('node.count.networks', myhostname), '0'),
-    ])
-
-# Check that the primary key exists, and create it with us as master if not
-try:
-    current_primary = zkhandler.read('base.config.primary_node')
-except kazoo.exceptions.NoNodeError:
-    current_primary = 'none'
-
-if current_primary and current_primary != 'none':
-    logger.out('Current primary node is {}{}{}.'.format(fmt_blue, current_primary, fmt_end), state='i')
-else:
-    if config['daemon_mode'] == 'coordinator':
-        logger.out('No primary node found; setting us as primary.', state='i')
-        zkhandler.write([
-            ('base.config.primary_node', myhostname)
-        ])
-
-###############################################################################
-# PHASE 7a - Ensure IPMI is reachable and working
-###############################################################################
-if not fencing.verifyIPMI(config['ipmi_hostname'], config['ipmi_username'], config['ipmi_password']):
-    logger.out('Our IPMI is not reachable; fencing of this node will likely fail', state='w')
-
-###############################################################################
-# PHASE 7b - Ensure Libvirt is working
-###############################################################################
-
-if enable_hypervisor:
-    # Check that libvirtd is listening TCP
-    libvirt_check_name = "qemu+tcp://{}:16509/system".format(myhostname)
-    logger.out('Connecting to Libvirt daemon at {}'.format(libvirt_check_name), state='i')
-    try:
-        lv_conn = libvirt.open(libvirt_check_name)
-        lv_conn.close()
-    except Exception as e:
-        logger.out('ERROR: Failed to connect to Libvirt daemon: {}'.format(e), state='e')
+    # Validate libvirt
+    if not pvcnoded.util.libvirt.validate_libvirtd(logger, config):
         cleanup(failure=True)
 
-###############################################################################
-# PHASE 7c - Ensure NFT is running on the local host
-###############################################################################
+    # Set up NFT
+    pvcnoded.util.networking.create_nft_configuration(logger, config)
 
-if enable_networking:
-    logger.out("Creating NFT firewall configuration", state='i')
+    # Create our object dictionaries
+    logger.out('Setting up objects', state='i')
 
-    # Create our config dirs
-    common.run_os_command(
-        '/bin/mkdir --parents {}/networks'.format(
-            config['nft_dynamic_directory']
-        )
-    )
-    common.run_os_command(
-        '/bin/mkdir --parents {}/static'.format(
-            config['nft_dynamic_directory']
-        )
-    )
-    common.run_os_command(
-        '/bin/mkdir --parents {}'.format(
-            config['nft_dynamic_directory']
-        )
-    )
+    d_node = dict()
+    node_list = list()
+    d_network = dict()
+    network_list = list()
+    sriov_pf_list = list()
+    d_sriov_vf = dict()
+    sriov_vf_list = list()
+    d_domain = dict()
+    domain_list = list()
+    d_osd = dict()
+    osd_list = list()
+    d_pool = dict()
+    pool_list = list()
+    d_volume = dict()
+    volume_list = dict()
 
-    # Set up the basic features of the nftables firewall
-    nftables_base_rules = """# Base rules
-    flush ruleset
-    # Add the filter table and chains
-    add table inet filter
-    add chain inet filter forward {{type filter hook forward priority 0; }}
-    add chain inet filter input {{type filter hook input priority 0; }}
-    # Include static rules and network rules
-    include "{rulesdir}/static/*"
-    include "{rulesdir}/networks/*"
-    """.format(
-        rulesdir=config['nft_dynamic_directory']
-    )
-
-    # Write the basic firewall config
-    nftables_base_filename = '{}/base.nft'.format(config['nft_dynamic_directory'])
-    with open(nftables_base_filename, 'w') as nfbasefile:
-        nfbasefile.write(nftables_base_rules)
-    common.reload_firewall_rules(nftables_base_filename, logger=logger)
-
-###############################################################################
-# PHASE 7d - Ensure DNSMASQ is not running
-###############################################################################
-
-common.run_os_command('systemctl stop dnsmasq.service')
-
-###############################################################################
-# PHASE 8 - Set up our objects
-###############################################################################
-
-logger.out('Setting up objects', state='i')
-
-d_node = dict()
-d_network = dict()
-d_sriov_vf = dict()
-d_domain = dict()
-d_osd = dict()
-d_pool = dict()
-d_volume = dict()  # Dict of Dicts
-node_list = []
-network_list = []
-sriov_pf_list = []
-sriov_vf_list = []
-domain_list = []
-osd_list = []
-pool_list = []
-volume_list = dict()  # Dict of Lists
-
-if enable_networking:
-    # Create an instance of the DNS Aggregator and Metadata API if we're a coordinator
-    if config['daemon_mode'] == 'coordinator':
+    if config['enable_networking'] and config['daemon_mode'] == 'coordinator':
+        # Create an instance of the DNS Aggregator and Metadata API if we're a coordinator
         dns_aggregator = DNSAggregatorInstance.DNSAggregatorInstance(config, logger)
         metadata_api = MetadataAPIInstance.MetadataAPIInstance(zkhandler, config, logger)
     else:
         dns_aggregator = None
         metadata_api = None
-else:
-    dns_aggregator = None
-    metadata_api = None
 
+    #
+    # Zookeeper watchers for objects
+    #
 
-# Node objects
-@zkhandler.zk_conn.ChildrenWatch(zkhandler.schema.path('base.node'))
-def update_nodes(new_node_list):
-    global node_list, d_node
+    # Node objects
+    @zkhandler.zk_conn.ChildrenWatch(zkhandler.schema.path('base.node'))
+    def set_nodes(new_node_list):
+        nonlocal d_node, node_list
 
-    # Add any missing nodes to the list
-    for node in new_node_list:
-        if node not in node_list:
-            d_node[node] = NodeInstance.NodeInstance(node, myhostname, zkhandler, config, logger, d_node, d_network, d_domain, dns_aggregator, metadata_api)
+        # Add missing nodes to list
+        for node in [node for node in new_node_list if node not in node_list]:
+            d_node[node] = NodeInstance.NodeInstance(node, config['node_hostname'], zkhandler, config, logger, d_node, d_network, d_domain, dns_aggregator, metadata_api)
 
-    # Remove any deleted nodes from the list
-    for node in node_list:
-        if node not in new_node_list:
-            # Delete the object
+        # Remove deleted nodes from list
+        for node in [node for node in node_list if node not in new_node_list]:
             del(d_node[node])
 
-    # Update and print new list
-    node_list = new_node_list
-    logger.out('{}Node list:{} {}'.format(fmt_blue, fmt_end, ' '.join(node_list)), state='i')
-
-    # Update node objects' list
-    for node in d_node:
-        d_node[node].update_node_list(d_node)
-
-
-# Alias for our local node (passed to network and domain objects)
-this_node = d_node[myhostname]
-
-
-# Maintenance mode
-@zkhandler.zk_conn.DataWatch(zkhandler.schema.path('base.config.maintenance'))
-def set_maintenance(_maintenance, stat, event=''):
-    global maintenance
-    try:
-        maintenance = bool(strtobool(_maintenance.decode('ascii')))
-    except Exception:
-        maintenance = False
-
-
-# Primary node
-@zkhandler.zk_conn.DataWatch(zkhandler.schema.path('base.config.primary_node'))
-def update_primary(new_primary, stat, event=''):
-    try:
-        new_primary = new_primary.decode('ascii')
-    except AttributeError:
-        new_primary = 'none'
-    key_version = stat.version
-
-    if new_primary != this_node.primary_node:
-        if config['daemon_mode'] == 'coordinator':
-            # We're a coordinator and there is no primary
-            if new_primary == 'none':
-                if this_node.daemon_state == 'run' and this_node.router_state not in ['primary', 'takeover', 'relinquish']:
-                    logger.out('Contending for primary coordinator state', state='i')
-                    # Acquire an exclusive lock on the primary_node key
-                    primary_lock = zkhandler.exclusivelock('base.config.primary_node')
-                    try:
-                        # This lock times out after 0.4s, which is 0.1s less than the pre-takeover
-                        # timeout below, thus ensuring that a primary takeover will not deadlock
-                        # against a node that failed the contention
-                        primary_lock.acquire(timeout=0.4)
-                        # Ensure when we get the lock that the versions are still consistent and that
-                        # another node hasn't already acquired primary state
-                        if key_version == zkhandler.zk_conn.get(zkhandler.schema.path('base.config.primary_node'))[1].version:
-                            zkhandler.write([
-                                ('base.config.primary_node', myhostname)
-                            ])
-                        # Cleanly release the lock
-                        primary_lock.release()
-                    # We timed out acquiring a lock, which means we failed contention, so just pass
-                    except Exception:
-                        pass
-            elif new_primary == myhostname:
-                if this_node.router_state == 'secondary':
-                    time.sleep(0.5)
-                    zkhandler.write([
-                        (('node.state.router', myhostname), 'takeover')
-                    ])
-            else:
-                if this_node.router_state == 'primary':
-                    time.sleep(0.5)
-                    zkhandler.write([
-                        (('node.state.router', myhostname), 'relinquish')
-                    ])
-        else:
-            zkhandler.write([
-                (('node.state.router', myhostname), 'client')
-            ])
+        node_list = new_node_list
+        logger.out(f'{logger.fmt_blue}Node list:{logger.fmt_end} {" ".join(node_list)}', state='i')
 
+        # Update node objects lists
         for node in d_node:
-            d_node[node].primary_node = new_primary
+            d_node[node].update_node_list(d_node)
 
+    # Create helpful alias for this node
+    this_node = d_node[config['node_hostname']]
 
-if enable_networking:
-    # Network objects
-    @zkhandler.zk_conn.ChildrenWatch(zkhandler.schema.path('base.network'))
-    def update_networks(new_network_list):
-        global network_list, d_network
+    # Maintenance status
+    @zkhandler.zk_conn.DataWatch(zkhandler.schema.path('base.config.maintenance'))
+    def update_maintenance(_maintenance, stat):
+        try:
+            maintenance = bool(strtobool(_maintenance.decode('ascii')))
+        except Exception:
+            maintenance = False
 
-        # Add any missing networks to the list
-        for network in new_network_list:
-            if network not in network_list:
+        this_node.maintenance = maintenance
+
+    # Primary node
+    @zkhandler.zk_conn.DataWatch(zkhandler.schema.path('base.config.primary_node'))
+    def update_primary_node(new_primary, stat, event=''):
+        try:
+            new_primary = new_primary.decode('ascii')
+        except AttributeError:
+            new_primary = 'none'
+        key_version = stat.version
+
+        # TODO: Move this to the Node structure
+        if new_primary != this_node.primary_node:
+            if config['daemon_mode'] == 'coordinator':
+                # We're a coordinator and there's no primary
+                if new_primary == 'none':
+                    if this_node.daemon_state == 'run' and this_node.router_state not in ['primary', 'takeover', 'relinquish']:
+                        logger.out('Contending for primary coordinator state', state='i')
+                        # Acquire an exclusive lock on the primary_node key
+                        primary_lock = zkhandler.exclusivelock('base.config.primary_node')
+                        try:
+                            # This lock times out after 0.4s, which is 0.1s less than the pre-takeover
+                            # timeout beow. This ensures a primary takeover will not deadlock against
+                            # a node which has failed the contention
+                            primary_lock.acquire(timeout=0.4)
+                            # Ensure that when we get the lock the versions are still consistent and
+                            # that another node hasn't already acquired the primary state (maybe we're
+                            # extremely slow to respond)
+                            if key_version == zkhandler.zk_conn.get(zkhandler.schema.path('base.config.primary_node'))[1].version:
+                                # Set the primary to us
+                                logger.out('Acquiring primary coordinator state', state='o')
+                                zkhandler.write([
+                                    ('base.config.primary_node', config['node_hostname'])
+                                ])
+                            # Cleanly release the lock
+                            primary_lock.release()
+                        # We timed out acquiring a lock, or failed to write, which means we failed the
+                        # contention and should just log that
+                        except Exception:
+                            logger.out('Timed out contending for primary coordinator state', state='i')
+                elif new_primary == config['node_hostname']:
+                    if this_node.router_state == 'secondary':
+                        # Wait for 0.5s to ensure other contentions time out, then take over
+                        sleep(0.5)
+                        zkhandler.write([
+                            (('node.state.router', config['node_hostname']), 'takeover')
+                        ])
+                else:
+                    if this_node.router_state == 'primary':
+                        # Wait for 0.5s to ensure other contentions time out, then relinquish
+                        sleep(0.5)
+                        zkhandler.write([
+                            (('node.state.router', config['node_hostname']), 'relinquish')
+                        ])
+            else:
+                zkhandler.write([
+                    (('node.state.router', config['node_hostname']), 'client')
+                ])
+
+            # TODO: Turn this into a function like the others for clarity
+            for node in d_node:
+                d_node[node].primary_node = new_primary
+
+    if config['enable_networking']:
+        # Network objects
+        @zkhandler.zk_conn.ChildrenWatch(zkhandler.schema.path('base.network'))
+        def update_networks(new_network_list):
+            nonlocal network_list, d_network
+
+            # Add any missing networks to the list
+            for network in [network for network in new_network_list if network not in network_list]:
                 d_network[network] = VXNetworkInstance.VXNetworkInstance(network, zkhandler, config, logger, this_node, dns_aggregator)
+                # TODO: Move this to the Network structure
                 if config['daemon_mode'] == 'coordinator' and d_network[network].nettype == 'managed':
                     try:
                         dns_aggregator.add_network(d_network[network])
                     except Exception as e:
-                        logger.out('Failed to create DNS Aggregator for network {}: {}'.format(network, e), 'w')
+                        logger.out(f'Failed to create DNS Aggregator for network {network}: {e}', state='w')
                 # Start primary functionality
                 if this_node.router_state == 'primary' and d_network[network].nettype == 'managed':
                     d_network[network].createGateways()
                     d_network[network].startDHCPServer()
 
-        # Remove any deleted networks from the list
-        for network in network_list:
-            if network not in new_network_list:
+            # Remove any missing networks from the list
+            for network in [network for network in network_list if network not in new_network_list]:
+                # TODO: Move this to the Network structure
                 if d_network[network].nettype == 'managed':
                     # Stop primary functionality
                     if this_node.router_state == 'primary':
                         d_network[network].stopDHCPServer()
                         d_network[network].removeGateways()
                         dns_aggregator.remove_network(d_network[network])
-                    # Stop general functionality
+                    # Stop firewalling
                     d_network[network].removeFirewall()
+                # Delete the network
                 d_network[network].removeNetwork()
-                # Delete the object
                 del(d_network[network])
 
-        # Update and print new list
-        network_list = new_network_list
-        logger.out('{}Network list:{} {}'.format(fmt_blue, fmt_end, ' '.join(network_list)), state='i')
+            # Update the new list
+            network_list = new_network_list
+            logger.out(f'{logger.fmt_blue}Network list:{logger.fmt_end} {" ".join(network_list)}', state='i')
 
-        # Update node objects' list
-        for node in d_node:
-            d_node[node].update_network_list(d_network)
+            # Update node objects list
+            for node in d_node:
+                d_node[node].update_network_list(d_network)
 
-    # Add the SR-IOV PFs and VFs to Zookeeper
-    # These do not behave like the objects; they are not dynamic (the API cannot change them), and they
-    # exist for the lifetime of this Node instance. The objects are set here in Zookeeper on a per-node
-    # basis, under the Node configuration tree.
-    # MIGRATION: The schema.schema.get ensures that the current active Schema contains the required keys
-    if enable_sriov and zkhandler.schema.schema.get('sriov_pf', None) is not None:
-        vf_list = list()
-        for device in config['sriov_device']:
-            pf = device['phy']
-            vfcount = device['vfcount']
-            if device.get('mtu', None) is None:
-                mtu = 1500
-            else:
-                mtu = device['mtu']
+        # Add the SR-IOV PFs and VFs to Zookeeper
+        # These do not behave like the objects; they are not dynamic (the API cannot change them), and they
+        # exist for the lifetime of this Node instance. The objects are set here in Zookeeper on a per-node
+        # basis, under the Node configuration tree.
+        # MIGRATION: The schema.schema.get ensures that the current active Schema contains the required keys
+        if config['enable_sriov'] and zkhandler.schema.schema.get('sriov_pf', None) is not None:
+            vf_list = list()
+            for device in config['sriov_device']:
+                pf = device['phy']
+                vfcount = device['vfcount']
+                if device.get('mtu', None) is None:
+                    mtu = 1500
+                else:
+                    mtu = device['mtu']
 
-            # Create the PF device in Zookeeper
-            zkhandler.write([
-                (('node.sriov.pf', myhostname, 'sriov_pf', pf), ''),
-                (('node.sriov.pf', myhostname, 'sriov_pf.mtu', pf), mtu),
-                (('node.sriov.pf', myhostname, 'sriov_pf.vfcount', pf), vfcount),
-            ])
-            # Append the device to the list of PFs
-            sriov_pf_list.append(pf)
+                # Create the PF device in Zookeeper
+                zkhandler.write([
+                    (('node.sriov.pf', config['node_hostname'], 'sriov_pf', pf), ''),
+                    (('node.sriov.pf', config['node_hostname'], 'sriov_pf.mtu', pf), mtu),
+                    (('node.sriov.pf', config['node_hostname'], 'sriov_pf.vfcount', pf), vfcount),
+                ])
+                # Append the device to the list of PFs
+                sriov_pf_list.append(pf)
 
-            # Get the list of VFs from `ip link show`
-            vf_list = json.loads(common.run_os_command('ip --json link show {}'.format(pf))[1])[0].get('vfinfo_list', [])
-            for vf in vf_list:
-                # {
-                #   'vf': 3,
-                #   'link_type': 'ether',
-                #   'address': '00:00:00:00:00:00',
-                #   'broadcast': 'ff:ff:ff:ff:ff:ff',
-                #   'vlan_list': [{'vlan': 101, 'qos': 2}],
-                #   'rate': {'max_tx': 0, 'min_tx': 0},
-                #   'spoofchk': True,
-                #   'link_state': 'auto',
-                #   'trust': False,
-                #   'query_rss_en': False
-                # }
-                vfphy = '{}v{}'.format(pf, vf['vf'])
+                # Get the list of VFs from `ip link show`
+                vf_list = json.loads(common.run_os_command(f'ip --json link show {pf}')[1])[0].get('vfinfo_list', [])
+                for vf in vf_list:
+                    # {
+                    #   'vf': 3,
+                    #   'link_type': 'ether',
+                    #   'address': '00:00:00:00:00:00',
+                    #   'broadcast': 'ff:ff:ff:ff:ff:ff',
+                    #   'vlan_list': [{'vlan': 101, 'qos': 2}],
+                    #   'rate': {'max_tx': 0, 'min_tx': 0},
+                    #   'spoofchk': True,
+                    #   'link_state': 'auto',
+                    #   'trust': False,
+                    #   'query_rss_en': False
+                    # }
+                    vfphy = f'{pf}v{vf["vf"]}'
 
-                # Get the PCIe bus information
-                dev_pcie_path = None
-                try:
-                    with open('/sys/class/net/{}/device/uevent'.format(vfphy)) as vfh:
-                        dev_uevent = vfh.readlines()
-                    for line in dev_uevent:
-                        if re.match(r'^PCI_SLOT_NAME=.*', line):
-                            dev_pcie_path = line.rstrip().split('=')[-1]
-                except FileNotFoundError:
-                    # Something must already be using the PCIe device
-                    pass
+                    # Get the PCIe bus information
+                    dev_pcie_path = None
+                    try:
+                        with open(f'/sys/class/net/{vfphy}/device/uevent') as vfh:
+                            dev_uevent = vfh.readlines()
+                        for line in dev_uevent:
+                            if re.match(r'^PCI_SLOT_NAME=.*', line):
+                                dev_pcie_path = line.rstrip().split('=')[-1]
+                    except FileNotFoundError:
+                        # Something must already be using the PCIe device
+                        pass
 
-                # Add the VF to Zookeeper if it does not yet exist
-                if not zkhandler.exists(('node.sriov.vf', myhostname, 'sriov_vf', vfphy)):
-                    if dev_pcie_path is not None:
-                        pcie_domain, pcie_bus, pcie_slot, pcie_function = re.split(r':|\.', dev_pcie_path)
-                    else:
-                        # We can't add the device - for some reason we can't get any information on its PCIe bus path,
-                        # so just ignore this one, and continue.
-                        # This shouldn't happen under any real circumstances, unless the admin tries to attach a non-existent
-                        # VF to a VM manually, then goes ahead and adds that VF to the system with the VM running.
-                        continue
+                    # Add the VF to Zookeeper if it does not yet exist
+                    if not zkhandler.exists(('node.sriov.vf', config['node_hostname'], 'sriov_vf', vfphy)):
+                        if dev_pcie_path is not None:
+                            pcie_domain, pcie_bus, pcie_slot, pcie_function = re.split(r':|\.', dev_pcie_path)
+                        else:
+                            # We can't add the device - for some reason we can't get any information on its PCIe bus path,
+                            # so just ignore this one, and continue.
+                            # This shouldn't happen under any real circumstances, unless the admin tries to attach a non-existent
+                            # VF to a VM manually, then goes ahead and adds that VF to the system with the VM running.
+                            continue
 
-                    zkhandler.write([
-                        (('node.sriov.vf', myhostname, 'sriov_vf', vfphy), ''),
-                        (('node.sriov.vf', myhostname, 'sriov_vf.pf', vfphy), pf),
-                        (('node.sriov.vf', myhostname, 'sriov_vf.mtu', vfphy), mtu),
-                        (('node.sriov.vf', myhostname, 'sriov_vf.mac', vfphy), vf['address']),
-                        (('node.sriov.vf', myhostname, 'sriov_vf.phy_mac', vfphy), vf['address']),
-                        (('node.sriov.vf', myhostname, 'sriov_vf.config', vfphy), ''),
-                        (('node.sriov.vf', myhostname, 'sriov_vf.config.vlan_id', vfphy), vf['vlan_list'][0].get('vlan', '0')),
-                        (('node.sriov.vf', myhostname, 'sriov_vf.config.vlan_qos', vfphy), vf['vlan_list'][0].get('qos', '0')),
-                        (('node.sriov.vf', myhostname, 'sriov_vf.config.tx_rate_min', vfphy), vf['rate']['min_tx']),
-                        (('node.sriov.vf', myhostname, 'sriov_vf.config.tx_rate_max', vfphy), vf['rate']['max_tx']),
-                        (('node.sriov.vf', myhostname, 'sriov_vf.config.spoof_check', vfphy), vf['spoofchk']),
-                        (('node.sriov.vf', myhostname, 'sriov_vf.config.link_state', vfphy), vf['link_state']),
-                        (('node.sriov.vf', myhostname, 'sriov_vf.config.trust', vfphy), vf['trust']),
-                        (('node.sriov.vf', myhostname, 'sriov_vf.config.query_rss', vfphy), vf['query_rss_en']),
-                        (('node.sriov.vf', myhostname, 'sriov_vf.pci', vfphy), ''),
-                        (('node.sriov.vf', myhostname, 'sriov_vf.pci.domain', vfphy), pcie_domain),
-                        (('node.sriov.vf', myhostname, 'sriov_vf.pci.bus', vfphy), pcie_bus),
-                        (('node.sriov.vf', myhostname, 'sriov_vf.pci.slot', vfphy), pcie_slot),
-                        (('node.sriov.vf', myhostname, 'sriov_vf.pci.function', vfphy), pcie_function),
-                        (('node.sriov.vf', myhostname, 'sriov_vf.used', vfphy), False),
-                        (('node.sriov.vf', myhostname, 'sriov_vf.used_by', vfphy), ''),
+                        zkhandler.write([
+                            (('node.sriov.vf', config['node_hostname'], 'sriov_vf', vfphy), ''),
+                            (('node.sriov.vf', config['node_hostname'], 'sriov_vf.pf', vfphy), pf),
+                            (('node.sriov.vf', config['node_hostname'], 'sriov_vf.mtu', vfphy), mtu),
+                            (('node.sriov.vf', config['node_hostname'], 'sriov_vf.mac', vfphy), vf['address']),
+                            (('node.sriov.vf', config['node_hostname'], 'sriov_vf.phy_mac', vfphy), vf['address']),
+                            (('node.sriov.vf', config['node_hostname'], 'sriov_vf.config', vfphy), ''),
+                            (('node.sriov.vf', config['node_hostname'], 'sriov_vf.config.vlan_id', vfphy), vf['vlan_list'][0].get('vlan', '0')),
+                            (('node.sriov.vf', config['node_hostname'], 'sriov_vf.config.vlan_qos', vfphy), vf['vlan_list'][0].get('qos', '0')),
+                            (('node.sriov.vf', config['node_hostname'], 'sriov_vf.config.tx_rate_min', vfphy), vf['rate']['min_tx']),
+                            (('node.sriov.vf', config['node_hostname'], 'sriov_vf.config.tx_rate_max', vfphy), vf['rate']['max_tx']),
+                            (('node.sriov.vf', config['node_hostname'], 'sriov_vf.config.spoof_check', vfphy), vf['spoofchk']),
+                            (('node.sriov.vf', config['node_hostname'], 'sriov_vf.config.link_state', vfphy), vf['link_state']),
+                            (('node.sriov.vf', config['node_hostname'], 'sriov_vf.config.trust', vfphy), vf['trust']),
+                            (('node.sriov.vf', config['node_hostname'], 'sriov_vf.config.query_rss', vfphy), vf['query_rss_en']),
+                            (('node.sriov.vf', config['node_hostname'], 'sriov_vf.pci', vfphy), ''),
+                            (('node.sriov.vf', config['node_hostname'], 'sriov_vf.pci.domain', vfphy), pcie_domain),
+                            (('node.sriov.vf', config['node_hostname'], 'sriov_vf.pci.bus', vfphy), pcie_bus),
+                            (('node.sriov.vf', config['node_hostname'], 'sriov_vf.pci.slot', vfphy), pcie_slot),
+                            (('node.sriov.vf', config['node_hostname'], 'sriov_vf.pci.function', vfphy), pcie_function),
+                            (('node.sriov.vf', config['node_hostname'], 'sriov_vf.used', vfphy), False),
+                            (('node.sriov.vf', config['node_hostname'], 'sriov_vf.used_by', vfphy), ''),
+                        ])
+
+                    # Append the device to the list of VFs
+                    sriov_vf_list.append(vfphy)
+
+            # Remove any obsolete PFs from Zookeeper if they go away
+            for pf in zkhandler.children(('node.sriov.pf', config['node_hostname'])):
+                if pf not in sriov_pf_list:
+                    zkhandler.delete([
+                        ('node.sriov.pf', config['node_hostname'], 'sriov_pf', pf)
+                    ])
+            # Remove any obsolete VFs from Zookeeper if their PF goes away
+            for vf in zkhandler.children(('node.sriov.vf', config['node_hostname'])):
+                vf_pf = zkhandler.read(('node.sriov.vf', config['node_hostname'], 'sriov_vf.pf', vf))
+                if vf_pf not in sriov_pf_list:
+                    zkhandler.delete([
+                        ('node.sriov.vf', config['node_hostname'], 'sriov_vf', vf)
                     ])
 
-                # Append the device to the list of VFs
-                sriov_vf_list.append(vfphy)
+            # SR-IOV VF objects
+            # This is a ChildrenWatch just for consistency; the list never changes at runtime
+            @zkhandler.zk_conn.ChildrenWatch(zkhandler.schema.path('node.sriov.vf', config['node_hostname']))
+            def update_sriov_vfs(new_sriov_vf_list):
+                nonlocal sriov_vf_list, d_sriov_vf
 
-        # Remove any obsolete PFs from Zookeeper if they go away
-        for pf in zkhandler.children(('node.sriov.pf', myhostname)):
-            if pf not in sriov_pf_list:
-                zkhandler.delete([
-                    ('node.sriov.pf', myhostname, 'sriov_pf', pf)
-                ])
-        # Remove any obsolete VFs from Zookeeper if their PF goes away
-        for vf in zkhandler.children(('node.sriov.vf', myhostname)):
-            vf_pf = zkhandler.read(('node.sriov.vf', myhostname, 'sriov_vf.pf', vf))
-            if vf_pf not in sriov_pf_list:
-                zkhandler.delete([
-                    ('node.sriov.vf', myhostname, 'sriov_vf', vf)
-                ])
+                # Add VFs to the list
+                for vf in common.sortInterfaceNames(new_sriov_vf_list):
+                    d_sriov_vf[vf] = SRIOVVFInstance.SRIOVVFInstance(vf, zkhandler, config, logger, this_node)
 
-        # SR-IOV VF objects
-        # This is a ChildrenWatch just for consistency; the list never changes at runtime
-        @zkhandler.zk_conn.ChildrenWatch(zkhandler.schema.path('node.sriov.vf', myhostname))
-        def update_sriov_vfs(new_sriov_vf_list):
-            global sriov_vf_list, d_sriov_vf
+                sriov_vf_list = sorted(new_sriov_vf_list)
+                logger.out(f'{logger.fmt_blue}SR-IOV VF list:{logger.fmt_end} {" ".join(sriov_vf_list)}', state='i')
 
-            # Add VFs to the list
-            for vf in common.sortInterfaceNames(new_sriov_vf_list):
-                d_sriov_vf[vf] = SRIOVVFInstance.SRIOVVFInstance(vf, zkhandler, config, logger, this_node)
+    if config['enable_hypervisor']:
+        # VM command pipeline key
+        @zkhandler.zk_conn.DataWatch(zkhandler.schema.path('base.cmd.domain'))
+        def run_domain_command(data, stat, event=''):
+            if data:
+                VMInstance.vm_command(zkhandler, logger, this_node, data.decode('ascii'))
 
-            sriov_vf_list = sorted(new_sriov_vf_list)
-            logger.out('{}SR-IOV VF list:{} {}'.format(fmt_blue, fmt_end, ' '.join(sriov_vf_list)), state='i')
+        # VM domain objects
+        @zkhandler.zk_conn.ChildrenWatch(zkhandler.schema.path('base.domain'))
+        def update_domains(new_domain_list):
+            nonlocal domain_list, d_domain
 
-if enable_hypervisor:
-    # VM command pipeline key
-    @zkhandler.zk_conn.DataWatch(zkhandler.schema.path('base.cmd.domain'))
-    def cmd_domains(data, stat, event=''):
-        if data:
-            VMInstance.run_command(zkhandler, logger, this_node, data.decode('ascii'))
-
-    # VM domain objects
-    @zkhandler.zk_conn.ChildrenWatch(zkhandler.schema.path('base.domain'))
-    def update_domains(new_domain_list):
-        global domain_list, d_domain
-
-        # Add any missing domains to the list
-        for domain in new_domain_list:
-            if domain not in domain_list:
+            # Add missing domains to the list
+            for domain in [domain for domain in new_domain_list if domain not in domain_list]:
                 d_domain[domain] = VMInstance.VMInstance(domain, zkhandler, config, logger, this_node)
 
-        # Remove any deleted domains from the list
-        for domain in domain_list:
-            if domain not in new_domain_list:
-                # Delete the object
+            # Remove any deleted domains from the list
+            for domain in [domain for domain in domain_list if domain not in new_domain_list]:
                 del(d_domain[domain])
 
-        # Update and print new list
-        domain_list = new_domain_list
-        logger.out('{}VM list:{} {}'.format(fmt_blue, fmt_end, ' '.join(domain_list)), state='i')
+            # Update the new list
+            domain_list = new_domain_list
+            logger.out(f'{logger.fmt_blue}Domain list:{logger.fmt_end} {" ".join(domain_list)}', state='i')
 
-        # Update node objects' list
-        for node in d_node:
-            d_node[node].update_domain_list(d_domain)
+            # Update node objects' list
+            for node in d_node:
+                d_node[node].update_domain_list(d_domain)
 
-if enable_storage:
-    # Ceph command pipeline key
-    @zkhandler.zk_conn.DataWatch(zkhandler.schema.path('base.cmd.ceph'))
-    def cmd_ceph(data, stat, event=''):
-        if data:
-            CephInstance.run_command(zkhandler, logger, this_node, data.decode('ascii'), d_osd)
+    if config['enable_storage']:
+        # Ceph command pipeline key
+        @zkhandler.zk_conn.DataWatch(zkhandler.schema.path('base.cmd.ceph'))
+        def run_ceph_command(data, stat, event=''):
+            if data:
+                CephInstance.ceph_command(zkhandler, logger, this_node, data.decode('ascii'), d_osd)
 
-    # OSD objects
-    @zkhandler.zk_conn.ChildrenWatch(zkhandler.schema.path('base.osd'))
-    def update_osds(new_osd_list):
-        global osd_list, d_osd
+        # OSD objects
+        @zkhandler.zk_conn.ChildrenWatch(zkhandler.schema.path('base.osd'))
+        def update_osds(new_osd_list):
+            nonlocal osd_list, d_osd
 
-        # Add any missing OSDs to the list
-        for osd in new_osd_list:
-            if osd not in osd_list:
+            # Add any missing OSDs to the list
+            for osd in [osd for osd in new_osd_list if osd not in osd_list]:
                 d_osd[osd] = CephInstance.CephOSDInstance(zkhandler, this_node, osd)
 
-        # Remove any deleted OSDs from the list
-        for osd in osd_list:
-            if osd not in new_osd_list:
-                # Delete the object
+            # Remove any deleted OSDs from the list
+            for osd in [osd for osd in osd_list if osd not in new_osd_list]:
                 del(d_osd[osd])
 
-        # Update and print new list
-        osd_list = new_osd_list
-        logger.out('{}OSD list:{} {}'.format(fmt_blue, fmt_end, ' '.join(osd_list)), state='i')
+            # Update the new list
+            osd_list = new_osd_list
+            logger.out(f'{logger.fmt_blue}OSD list:{logger.fmt_end} {" ".join(osd_list)}', state='i')
 
-    # Pool objects
-    @zkhandler.zk_conn.ChildrenWatch(zkhandler.schema.path('base.pool'))
-    def update_pools(new_pool_list):
-        global pool_list, d_pool
+        # Pool objects
+        @zkhandler.zk_conn.ChildrenWatch(zkhandler.schema.path('base.pool'))
+        def update_pools(new_pool_list):
+            nonlocal pool_list, d_pool, volume_list, d_volume
 
-        # Add any missing Pools to the list
-        for pool in new_pool_list:
-            if pool not in pool_list:
+            # Add any missing pools to the list
+            for pool in [pool for pool in new_pool_list if pool not in pool_list]:
                 d_pool[pool] = CephInstance.CephPoolInstance(zkhandler, this_node, pool)
+                # Prepare the volume components for this pool
+                volume_list[pool] = list()
                 d_volume[pool] = dict()
-                volume_list[pool] = []
 
-        # Remove any deleted Pools from the list
-        for pool in pool_list:
-            if pool not in new_pool_list:
-                # Delete the object
+            # Remove any deleted pools from the list
+            for pool in [pool for pool in pool_list if pool not in new_pool_list]:
                 del(d_pool[pool])
 
-        # Update and print new list
-        pool_list = new_pool_list
-        logger.out('{}Pool list:{} {}'.format(fmt_blue, fmt_end, ' '.join(pool_list)), state='i')
+            # Update the new list
+            pool_list = new_pool_list
+            logger.out(f'{logger.fmt_blue}Pool list:{logger.fmt_end} {" ".join(pool_list)}', state='i')
 
-        # Volume objects in each pool
-        for pool in pool_list:
-            @zkhandler.zk_conn.ChildrenWatch(zkhandler.schema.path('volume', pool))
-            def update_volumes(new_volume_list):
-                global volume_list, d_volume
+            # Volume objects (in each pool)
+            for pool in pool_list:
+                @zkhandler.zk_conn.ChildrenWatch(zkhandler.schema.path('volume', pool))
+                def update_volumes(new_volume_list):
+                    nonlocal volume_list, d_volume
 
-                # Add any missing Volumes to the list
-                for volume in new_volume_list:
-                    if volume not in volume_list[pool]:
+                    # Add any missing volumes to the list
+                    for volume in [volume for volume in new_volume_list if volume not in volume_list[pool]]:
                         d_volume[pool][volume] = CephInstance.CephVolumeInstance(zkhandler, this_node, pool, volume)
 
-                # Remove any deleted Volumes from the list
-                for volume in volume_list[pool]:
-                    if volume not in new_volume_list:
-                        # Delete the object
+                    # Remove any deleted volumes from the list
+                    for volume in [volume for volume in volume_list[pool] if volume not in new_volume_list]:
                         del(d_volume[pool][volume])
 
-                # Update and print new list
-                volume_list[pool] = new_volume_list
-                logger.out('{}Volume list [{pool}]:{} {plist}'.format(fmt_blue, fmt_end, pool=pool, plist=' '.join(volume_list[pool])), state='i')
+                    # Update the new list
+                    volume_list[pool] = new_volume_list
+                    logger.out(f'{logger.fmt_blue}Volume list [{pool}:{logger.fmt_end} {" ".join(volume_list[pool])}', state='i')
 
+    # Start keepalived thread
+    keepalive_timer = pvcnoded.util.keepalive.start_keepalive_timer(logger, config, zkhandler, this_node)
 
-###############################################################################
-# PHASE 9 - Run the daemon
-###############################################################################
-
-# Ceph stats update function
-def collect_ceph_stats(queue):
-    if debug:
-        logger.out("Thread starting", state='d', prefix='ceph-thread')
-
-    # Connect to the Ceph cluster
-    try:
-        ceph_conn = Rados(conffile=config['ceph_config_file'], conf=dict(keyring=config['ceph_admin_keyring']))
-        if debug:
-            logger.out("Connecting to cluster", state='d', prefix='ceph-thread')
-        ceph_conn.connect(timeout=1)
-    except Exception as e:
-        logger.out('Failed to open connection to Ceph cluster: {}'.format(e), state='e')
-        return
-
-    if debug:
-        logger.out("Getting health stats from monitor", state='d', prefix='ceph-thread')
-
-    # Get Ceph cluster health for local status output
-    command = {"prefix": "health", "format": "json"}
-    try:
-        health_status = json.loads(ceph_conn.mon_command(json.dumps(command), b'', timeout=1)[1])
-        ceph_health = health_status['status']
-    except Exception as e:
-        logger.out('Failed to obtain Ceph health data: {}'.format(e), state='e')
-        ceph_health = 'HEALTH_UNKN'
-
-    if ceph_health in ['HEALTH_OK']:
-        ceph_health_colour = fmt_green
-    elif ceph_health in ['HEALTH_UNKN']:
-        ceph_health_colour = fmt_cyan
-    elif ceph_health in ['HEALTH_WARN']:
-        ceph_health_colour = fmt_yellow
-    else:
-        ceph_health_colour = fmt_red
-
-    # Primary-only functions
-    if this_node.router_state == 'primary':
-        if debug:
-            logger.out("Set ceph health information in zookeeper (primary only)", state='d', prefix='ceph-thread')
-
-        command = {"prefix": "status", "format": "pretty"}
-        ceph_status = ceph_conn.mon_command(json.dumps(command), b'', timeout=1)[1].decode('ascii')
+    # Tick loop; does nothing since everything is async
+    while True:
         try:
-            zkhandler.write([
-                ('base.storage', str(ceph_status))
-            ])
-        except Exception as e:
-            logger.out('Failed to set Ceph status data: {}'.format(e), state='e')
-
-        if debug:
-            logger.out("Set ceph rados df information in zookeeper (primary only)", state='d', prefix='ceph-thread')
-
-        # Get rados df info
-        command = {"prefix": "df", "format": "pretty"}
-        ceph_df = ceph_conn.mon_command(json.dumps(command), b'', timeout=1)[1].decode('ascii')
-        try:
-            zkhandler.write([
-                ('base.storage.util', str(ceph_df))
-            ])
-        except Exception as e:
-            logger.out('Failed to set Ceph utilization data: {}'.format(e), state='e')
-
-        if debug:
-            logger.out("Set pool information in zookeeper (primary only)", state='d', prefix='ceph-thread')
-
-        # Get pool info
-        command = {"prefix": "df", "format": "json"}
-        ceph_df_output = ceph_conn.mon_command(json.dumps(command), b'', timeout=1)[1].decode('ascii')
-        try:
-            ceph_pool_df_raw = json.loads(ceph_df_output)['pools']
-        except Exception as e:
-            logger.out('Failed to obtain Pool data (ceph df): {}'.format(e), state='w')
-            ceph_pool_df_raw = []
-
-        retcode, stdout, stderr = common.run_os_command('rados df --format json', timeout=1)
-        try:
-            rados_pool_df_raw = json.loads(stdout)['pools']
-        except Exception as e:
-            logger.out('Failed to obtain Pool data (rados df): {}'.format(e), state='w')
-            rados_pool_df_raw = []
-
-        pool_count = len(ceph_pool_df_raw)
-        if debug:
-            logger.out("Getting info for {} pools".format(pool_count), state='d', prefix='ceph-thread')
-        for pool_idx in range(0, pool_count):
-            try:
-                # Combine all the data for this pool
-                ceph_pool_df = ceph_pool_df_raw[pool_idx]
-                rados_pool_df = rados_pool_df_raw[pool_idx]
-                pool = ceph_pool_df
-                pool.update(rados_pool_df)
-
-                # Ignore any pools that aren't in our pool list
-                if pool['name'] not in pool_list:
-                    if debug:
-                        logger.out("Pool {} not in pool list {}".format(pool['name'], pool_list), state='d', prefix='ceph-thread')
-                    continue
-                else:
-                    if debug:
-                        logger.out("Parsing data for pool {}".format(pool['name']), state='d', prefix='ceph-thread')
-
-                # Assemble a useful data structure
-                pool_df = {
-                    'id': pool['id'],
-                    'stored_bytes': pool['stats']['stored'],
-                    'free_bytes': pool['stats']['max_avail'],
-                    'used_bytes': pool['stats']['bytes_used'],
-                    'used_percent': pool['stats']['percent_used'],
-                    'num_objects': pool['stats']['objects'],
-                    'num_object_clones': pool['num_object_clones'],
-                    'num_object_copies': pool['num_object_copies'],
-                    'num_objects_missing_on_primary': pool['num_objects_missing_on_primary'],
-                    'num_objects_unfound': pool['num_objects_unfound'],
-                    'num_objects_degraded': pool['num_objects_degraded'],
-                    'read_ops': pool['read_ops'],
-                    'read_bytes': pool['read_bytes'],
-                    'write_ops': pool['write_ops'],
-                    'write_bytes': pool['write_bytes']
-                }
-
-                # Write the pool data to Zookeeper
-                zkhandler.write([
-                    (('pool.stats', pool['name']), str(json.dumps(pool_df)))
-                ])
-            except Exception as e:
-                # One or more of the status commands timed out, just continue
-                logger.out('Failed to format and send pool data: {}'.format(e), state='w')
-                pass
-
-    # Only grab OSD stats if there are OSDs to grab (otherwise `ceph osd df` hangs)
-    osds_this_node = 0
-    if len(osd_list) > 0:
-        # Get data from Ceph OSDs
-        if debug:
-            logger.out("Get data from Ceph OSDs", state='d', prefix='ceph-thread')
-
-        # Parse the dump data
-        osd_dump = dict()
-
-        command = {"prefix": "osd dump", "format": "json"}
-        osd_dump_output = ceph_conn.mon_command(json.dumps(command), b'', timeout=1)[1].decode('ascii')
-        try:
-            osd_dump_raw = json.loads(osd_dump_output)['osds']
-        except Exception as e:
-            logger.out('Failed to obtain OSD data: {}'.format(e), state='w')
-            osd_dump_raw = []
-
-        if debug:
-            logger.out("Loop through OSD dump", state='d', prefix='ceph-thread')
-        for osd in osd_dump_raw:
-            osd_dump.update({
-                str(osd['osd']): {
-                    'uuid': osd['uuid'],
-                    'up': osd['up'],
-                    'in': osd['in'],
-                    'primary_affinity': osd['primary_affinity']
-                }
-            })
-
-        # Parse the df data
-        if debug:
-            logger.out("Parse the OSD df data", state='d', prefix='ceph-thread')
-
-        osd_df = dict()
-
-        command = {"prefix": "osd df", "format": "json"}
-        try:
-            osd_df_raw = json.loads(ceph_conn.mon_command(json.dumps(command), b'', timeout=1)[1])['nodes']
-        except Exception as e:
-            logger.out('Failed to obtain OSD data: {}'.format(e), state='w')
-            osd_df_raw = []
-
-        if debug:
-            logger.out("Loop through OSD df", state='d', prefix='ceph-thread')
-        for osd in osd_df_raw:
-            osd_df.update({
-                str(osd['id']): {
-                    'utilization': osd['utilization'],
-                    'var': osd['var'],
-                    'pgs': osd['pgs'],
-                    'kb': osd['kb'],
-                    'weight': osd['crush_weight'],
-                    'reweight': osd['reweight'],
-                }
-            })
-
-        # Parse the status data
-        if debug:
-            logger.out("Parse the OSD status data", state='d', prefix='ceph-thread')
-
-        osd_status = dict()
-
-        command = {"prefix": "osd status", "format": "pretty"}
-        try:
-            osd_status_raw = ceph_conn.mon_command(json.dumps(command), b'', timeout=1)[1].decode('ascii')
-        except Exception as e:
-            logger.out('Failed to obtain OSD status data: {}'.format(e), state='w')
-            osd_status_raw = []
-
-        if debug:
-            logger.out("Loop through OSD status data", state='d', prefix='ceph-thread')
-
-        for line in osd_status_raw.split('\n'):
-            # Strip off colour
-            line = re.sub(r'\x1b(\[.*?[@-~]|\].*?(\x07|\x1b\\))', '', line)
-            # Split it for parsing
-            line = line.split()
-            if len(line) > 1 and line[1].isdigit():
-                # This is an OSD line so parse it
-                osd_id = line[1]
-                node = line[3].split('.')[0]
-                used = line[5]
-                avail = line[7]
-                wr_ops = line[9]
-                wr_data = line[11]
-                rd_ops = line[13]
-                rd_data = line[15]
-                state = line[17]
-                osd_status.update({
-                    str(osd_id): {
-                        'node': node,
-                        'used': used,
-                        'avail': avail,
-                        'wr_ops': wr_ops,
-                        'wr_data': wr_data,
-                        'rd_ops': rd_ops,
-                        'rd_data': rd_data,
-                        'state': state
-                    }
-                })
-
-        # Merge them together into a single meaningful dict
-        if debug:
-            logger.out("Merge OSD data together", state='d', prefix='ceph-thread')
-
-        osd_stats = dict()
-
-        for osd in osd_list:
-            if d_osd[osd].node == myhostname:
-                osds_this_node += 1
-            try:
-                this_dump = osd_dump[osd]
-                this_dump.update(osd_df[osd])
-                this_dump.update(osd_status[osd])
-                osd_stats[osd] = this_dump
-            except KeyError as e:
-                # One or more of the status commands timed out, just continue
-                logger.out('Failed to parse OSD stats into dictionary: {}'.format(e), state='w')
-
-        # Upload OSD data for the cluster (primary-only)
-        if this_node.router_state == 'primary':
-            if debug:
-                logger.out("Trigger updates for each OSD", state='d', prefix='ceph-thread')
-
-            for osd in osd_list:
-                try:
-                    stats = json.dumps(osd_stats[osd])
-                    zkhandler.write([
-                        (('osd.stats', osd), str(stats))
-                    ])
-                except KeyError as e:
-                    # One or more of the status commands timed out, just continue
-                    logger.out('Failed to upload OSD stats from dictionary: {}'.format(e), state='w')
-
-    ceph_conn.shutdown()
-
-    queue.put(ceph_health_colour)
-    queue.put(ceph_health)
-    queue.put(osds_this_node)
-
-    if debug:
-        logger.out("Thread finished", state='d', prefix='ceph-thread')
-
-
-# State table for pretty stats
-libvirt_vm_states = {
-    0: "NOSTATE",
-    1: "RUNNING",
-    2: "BLOCKED",
-    3: "PAUSED",
-    4: "SHUTDOWN",
-    5: "SHUTOFF",
-    6: "CRASHED",
-    7: "PMSUSPENDED"
-}
-
-
-# VM stats update function
-def collect_vm_stats(queue):
-    if debug:
-        logger.out("Thread starting", state='d', prefix='vm-thread')
-
-    # Connect to libvirt
-    libvirt_name = "qemu:///system"
-    if debug:
-        logger.out("Connecting to libvirt", state='d', prefix='vm-thread')
-    lv_conn = libvirt.open(libvirt_name)
-    if lv_conn is None:
-        logger.out('Failed to open connection to "{}"'.format(libvirt_name), state='e')
-
-    memalloc = 0
-    memprov = 0
-    vcpualloc = 0
-    # Toggle state management of dead VMs to restart them
-    if debug:
-        logger.out("Toggle state management of dead VMs to restart them", state='d', prefix='vm-thread')
-    # Make a copy of the d_domain; if not, and it changes in flight, this can fail
-    fixed_d_domain = this_node.d_domain.copy()
-    for domain, instance in fixed_d_domain.items():
-        if domain in this_node.domain_list:
-            # Add the allocated memory to our memalloc value
-            memalloc += instance.getmemory()
-            memprov += instance.getmemory()
-            vcpualloc += instance.getvcpus()
-            if instance.getstate() == 'start' and instance.getnode() == this_node.name:
-                if instance.getdom() is not None:
-                    try:
-                        if instance.getdom().state()[0] != libvirt.VIR_DOMAIN_RUNNING:
-                            logger.out("VM {} has failed".format(instance.domname), state='w', prefix='vm-thread')
-                            raise
-                    except Exception:
-                        # Toggle a state "change"
-                        logger.out("Resetting state to {} for VM {}".format(instance.getstate(), instance.domname), state='i', prefix='vm-thread')
-                        zkhandler.write([
-                            (('domain.state', domain), instance.getstate())
-                        ])
-        elif instance.getnode() == this_node.name:
-            memprov += instance.getmemory()
-
-    # Get list of running domains from Libvirt
-    running_domains = lv_conn.listAllDomains(libvirt.VIR_CONNECT_LIST_DOMAINS_ACTIVE)
-
-    # Get statistics from any running VMs
-    for domain in running_domains:
-        try:
-            # Get basic information about the VM
-            tree = ElementTree.fromstring(domain.XMLDesc())
-            domain_uuid = domain.UUIDString()
-            domain_name = domain.name()
-
-            # Get all the raw information about the VM
-            if debug:
-                logger.out("Getting general statistics for VM {}".format(domain_name), state='d', prefix='vm-thread')
-            domain_state, domain_maxmem, domain_mem, domain_vcpus, domain_cputime = domain.info()
-            # We can't properly gather stats from a non-running VMs so continue
-            if domain_state != libvirt.VIR_DOMAIN_RUNNING:
-                continue
-            domain_memory_stats = domain.memoryStats()
-            domain_cpu_stats = domain.getCPUStats(True)[0]
-        except Exception as e:
-            logger.out("Failed getting VM information for {}: {}".format(domain.name(), e), state='w', prefix='vm-thread')
-            continue
-
-        # Ensure VM is present in the domain_list
-        if domain_uuid not in this_node.domain_list:
-            this_node.domain_list.append(domain_uuid)
-
-        if debug:
-            logger.out("Getting disk statistics for VM {}".format(domain_name), state='d', prefix='vm-thread')
-        domain_disk_stats = []
-        try:
-            for disk in tree.findall('devices/disk'):
-                disk_name = disk.find('source').get('name')
-                if not disk_name:
-                    disk_name = disk.find('source').get('file')
-                disk_stats = domain.blockStats(disk.find('target').get('dev'))
-                domain_disk_stats.append({
-                    "name": disk_name,
-                    "rd_req": disk_stats[0],
-                    "rd_bytes": disk_stats[1],
-                    "wr_req": disk_stats[2],
-                    "wr_bytes": disk_stats[3],
-                    "err": disk_stats[4]
-                })
-        except Exception as e:
-            logger.out("Failed to get disk stats for VM {}: {}".format(domain_name, e), state='w', prefix='vm-thread')
-            continue
-
-        if debug:
-            logger.out("Getting network statistics for VM {}".format(domain_name), state='d', prefix='vm-thread')
-        domain_network_stats = []
-        try:
-            for interface in tree.findall('devices/interface'):
-                interface_type = interface.get('type')
-                if interface_type not in ['bridge']:
-                    continue
-                interface_name = interface.find('target').get('dev')
-                interface_bridge = interface.find('source').get('bridge')
-                interface_stats = domain.interfaceStats(interface_name)
-                domain_network_stats.append({
-                    "name": interface_name,
-                    "bridge": interface_bridge,
-                    "rd_bytes": interface_stats[0],
-                    "rd_packets": interface_stats[1],
-                    "rd_errors": interface_stats[2],
-                    "rd_drops": interface_stats[3],
-                    "wr_bytes": interface_stats[4],
-                    "wr_packets": interface_stats[5],
-                    "wr_errors": interface_stats[6],
-                    "wr_drops": interface_stats[7]
-                })
-        except Exception as e:
-            logger.out("Failed to get network stats for VM {}: {}".format(domain_name, e), state='w', prefix='vm-thread')
-            continue
-
-        # Create the final dictionary
-        domain_stats = {
-            "state": libvirt_vm_states[domain_state],
-            "maxmem": domain_maxmem,
-            "livemem": domain_mem,
-            "cpus": domain_vcpus,
-            "cputime": domain_cputime,
-            "mem_stats": domain_memory_stats,
-            "cpu_stats": domain_cpu_stats,
-            "disk_stats": domain_disk_stats,
-            "net_stats": domain_network_stats
-        }
-
-        if debug:
-            logger.out("Writing statistics for VM {} to Zookeeper".format(domain_name), state='d', prefix='vm-thread')
-
-        try:
-            zkhandler.write([
-                (('domain.stats', domain_uuid), str(json.dumps(domain_stats)))
-            ])
-        except Exception as e:
-            if debug:
-                logger.out("{}".format(e), state='d', prefix='vm-thread')
-
-    # Close the Libvirt connection
-    lv_conn.close()
-
-    queue.put(len(running_domains))
-    queue.put(memalloc)
-    queue.put(memprov)
-    queue.put(vcpualloc)
-
-    if debug:
-        logger.out("Thread finished", state='d', prefix='vm-thread')
-
-
-# Keepalive update function
-@common.Profiler(config)
-def node_keepalive():
-    if debug:
-        logger.out("Keepalive starting", state='d', prefix='main-thread')
-
-    # Set the migration selector in Zookeeper for clients to read
-    if config['enable_hypervisor']:
-        if this_node.router_state == 'primary':
-            try:
-                if zkhandler.read('base.config.migration_target_selector') != config['migration_target_selector']:
-                    raise
-            except Exception:
-                zkhandler.write([
-                    ('base.config.migration_target_selector', config['migration_target_selector'])
-                ])
-
-    # Set the upstream IP in Zookeeper for clients to read
-    if config['enable_networking']:
-        if this_node.router_state == 'primary':
-            try:
-                if zkhandler.read('base.config.upstream_ip') != config['upstream_floating_ip']:
-                    raise
-            except Exception:
-                zkhandler.write([
-                    ('base.config.upstream_ip', config['upstream_floating_ip'])
-                ])
-
-    # Get past state and update if needed
-    if debug:
-        logger.out("Get past state and update if needed", state='d', prefix='main-thread')
-
-    past_state = zkhandler.read(('node.state.daemon', this_node.name))
-    if past_state != 'run' and past_state != 'shutdown':
-        this_node.daemon_state = 'run'
-        zkhandler.write([
-            (('node.state.daemon', this_node.name), 'run')
-        ])
-    else:
-        this_node.daemon_state = 'run'
-
-    # Ensure the primary key is properly set
-    if debug:
-        logger.out("Ensure the primary key is properly set", state='d', prefix='main-thread')
-    if this_node.router_state == 'primary':
-        if zkhandler.read('base.config.primary_node') != this_node.name:
-            zkhandler.write([
-                ('base.config.primary_node', this_node.name)
-            ])
-
-    # Run VM statistics collection in separate thread for parallelization
-    if enable_hypervisor:
-        vm_thread_queue = Queue()
-        vm_stats_thread = Thread(target=collect_vm_stats, args=(vm_thread_queue,), kwargs={})
-        vm_stats_thread.start()
-
-    # Run Ceph status collection in separate thread for parallelization
-    if enable_storage:
-        ceph_thread_queue = Queue()
-        ceph_stats_thread = Thread(target=collect_ceph_stats, args=(ceph_thread_queue,), kwargs={})
-        ceph_stats_thread.start()
-
-    # Get node performance statistics
-    this_node.memtotal = int(psutil.virtual_memory().total / 1024 / 1024)
-    this_node.memused = int(psutil.virtual_memory().used / 1024 / 1024)
-    this_node.memfree = int(psutil.virtual_memory().free / 1024 / 1024)
-    this_node.cpuload = os.getloadavg()[0]
-
-    # Join against running threads
-    if enable_hypervisor:
-        vm_stats_thread.join(timeout=4.0)
-        if vm_stats_thread.is_alive():
-            logger.out('VM stats gathering exceeded 4s timeout, continuing', state='w')
-    if enable_storage:
-        ceph_stats_thread.join(timeout=4.0)
-        if ceph_stats_thread.is_alive():
-            logger.out('Ceph stats gathering exceeded 4s timeout, continuing', state='w')
-
-    # Get information from thread queues
-    if enable_hypervisor:
-        try:
-            this_node.domains_count = vm_thread_queue.get()
-            this_node.memalloc = vm_thread_queue.get()
-            this_node.memprov = vm_thread_queue.get()
-            this_node.vcpualloc = vm_thread_queue.get()
+            sleep(1)
         except Exception:
-            pass
-    else:
-        this_node.domains_count = 0
-        this_node.memalloc = 0
-        this_node.memprov = 0
-        this_node.vcpualloc = 0
-
-    if enable_storage:
-        try:
-            ceph_health_colour = ceph_thread_queue.get()
-            ceph_health = ceph_thread_queue.get()
-            osds_this_node = ceph_thread_queue.get()
-        except Exception:
-            ceph_health_colour = fmt_cyan
-            ceph_health = 'UNKNOWN'
-            osds_this_node = '?'
-
-    # Set our information in zookeeper
-    keepalive_time = int(time.time())
-    if debug:
-        logger.out("Set our information in zookeeper", state='d', prefix='main-thread')
-    try:
-        zkhandler.write([
-            (('node.memory.total', this_node.name), str(this_node.memtotal)),
-            (('node.memory.used', this_node.name), str(this_node.memused)),
-            (('node.memory.free', this_node.name), str(this_node.memfree)),
-            (('node.memory.allocated', this_node.name), str(this_node.memalloc)),
-            (('node.memory.provisioned', this_node.name), str(this_node.memprov)),
-            (('node.vcpu.allocated', this_node.name), str(this_node.vcpualloc)),
-            (('node.cpu.load', this_node.name), str(this_node.cpuload)),
-            (('node.count.provisioned_domains', this_node.name), str(this_node.domains_count)),
-            (('node.running_domains', this_node.name), ' '.join(this_node.domain_list)),
-            (('node.keepalive', this_node.name), str(keepalive_time)),
-        ])
-    except Exception:
-        logger.out('Failed to set keepalive data', state='e')
-
-    # Display node information to the terminal
-    if config['log_keepalives']:
-        if this_node.router_state == 'primary':
-            cst_colour = fmt_green
-        elif this_node.router_state == 'secondary':
-            cst_colour = fmt_blue
-        else:
-            cst_colour = fmt_cyan
-        logger.out(
-            '{}{} keepalive @ {}{} [{}{}{}]'.format(
-                fmt_purple,
-                myhostname,
-                datetime.now(),
-                fmt_end,
-                fmt_bold + cst_colour,
-                this_node.router_state,
-                fmt_end
-            ),
-            state='t'
-        )
-        if config['log_keepalive_cluster_details']:
-            logger.out(
-                '{bold}Maintenance:{nofmt} {maint}  '
-                '{bold}Active VMs:{nofmt} {domcount}  '
-                '{bold}Networks:{nofmt} {netcount}  '
-                '{bold}Load:{nofmt} {load}  '
-                '{bold}Memory [MiB]: VMs:{nofmt} {allocmem}  '
-                '{bold}Used:{nofmt} {usedmem}  '
-                '{bold}Free:{nofmt} {freemem}'.format(
-                    bold=fmt_bold,
-                    nofmt=fmt_end,
-                    maint=maintenance,
-                    domcount=this_node.domains_count,
-                    netcount=len(network_list),
-                    load=this_node.cpuload,
-                    freemem=this_node.memfree,
-                    usedmem=this_node.memused,
-                    allocmem=this_node.memalloc,
-                ),
-                state='t'
-            )
-        if enable_storage and config['log_keepalive_storage_details']:
-            logger.out(
-                '{bold}Ceph cluster status:{nofmt} {health_colour}{health}{nofmt}  '
-                '{bold}Total OSDs:{nofmt} {total_osds}  '
-                '{bold}Node OSDs:{nofmt} {node_osds}  '
-                '{bold}Pools:{nofmt} {total_pools}  '.format(
-                    bold=fmt_bold,
-                    health_colour=ceph_health_colour,
-                    nofmt=fmt_end,
-                    health=ceph_health,
-                    total_osds=len(osd_list),
-                    node_osds=osds_this_node,
-                    total_pools=len(pool_list)
-                ),
-                state='t'
-            )
-
-    # Look for dead nodes and fence them
-    if not maintenance:
-        if debug:
-            logger.out("Look for dead nodes and fence them", state='d', prefix='main-thread')
-        if config['daemon_mode'] == 'coordinator':
-            for node_name in d_node:
-                try:
-                    node_daemon_state = zkhandler.read(('node.state.daemon', node_name))
-                    node_keepalive = int(zkhandler.read(('node.keepalive', node_name)))
-                except Exception:
-                    node_daemon_state = 'unknown'
-                    node_keepalive = 0
-
-                # Handle deadtime and fencng if needed
-                # (A node is considered dead when its keepalive timer is >6*keepalive_interval seconds
-                # out-of-date while in 'start' state)
-                node_deadtime = int(time.time()) - (int(config['keepalive_interval']) * int(config['fence_intervals']))
-                if node_keepalive < node_deadtime and node_daemon_state == 'run':
-                    logger.out('Node {} seems dead - starting monitor for fencing'.format(node_name), state='w')
-                    zk_lock = zkhandler.writelock(('node.state.daemon', node_name))
-                    with zk_lock:
-                        # Ensures that, if we lost the lock race and come out of waiting,
-                        # we won't try to trigger our own fence thread.
-                        if zkhandler.read(('node.state.daemon', node_name)) != 'dead':
-                            fence_thread = Thread(target=fencing.fenceNode, args=(node_name, zkhandler, config, logger), kwargs={})
-                            fence_thread.start()
-                            # Write the updated data after we start the fence thread
-                            zkhandler.write([
-                                (('node.state.daemon', node_name), 'dead')
-                            ])
-
-    if debug:
-        logger.out("Keepalive finished", state='d', prefix='main-thread')
-
-
-# Start keepalive thread
-update_timer = startKeepaliveTimer()
-
-# Tick loop; does nothing since everything else is async
-while True:
-    try:
-        time.sleep(1)
-    except Exception:
-        break
+            break
diff --git a/node-daemon/pvcnoded/dnsmasq-zookeeper-leases.py b/node-daemon/pvcnoded/dnsmasq-zookeeper-leases.py
index 4ada3d89..4a643077 100755
--- a/node-daemon/pvcnoded/dnsmasq-zookeeper-leases.py
+++ b/node-daemon/pvcnoded/dnsmasq-zookeeper-leases.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python3
+#!/usr/bin/env python3
 
 # dnsmasq-zookeeper-leases.py - DNSMASQ leases script for Zookeeper
 # Part of the Parallel Virtual Cluster (PVC) system
diff --git a/node-daemon/pvcnoded/objects/CephInstance.py b/node-daemon/pvcnoded/objects/CephInstance.py
new file mode 100644
index 00000000..89a4c5c9
--- /dev/null
+++ b/node-daemon/pvcnoded/objects/CephInstance.py
@@ -0,0 +1,428 @@
+#!/usr/bin/env python3
+
+# CephInstance.py - Class implementing a PVC node Ceph instance
+# Part of the Parallel Virtual Cluster (PVC) system
+#
+#    Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
+#
+#    This program is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, version 3.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+#
+###############################################################################
+
+import time
+import json
+import psutil
+
+import daemon_lib.common as common
+
+
+class CephOSDInstance(object):
+    def __init__(self, zkhandler, this_node, osd_id):
+        self.zkhandler = zkhandler
+        self.this_node = this_node
+        self.osd_id = osd_id
+        self.node = None
+        self.size = None
+        self.stats = dict()
+
+        @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('osd.node', self.osd_id))
+        def watch_osd_node(data, stat, event=''):
+            if event and event.type == 'DELETED':
+                # The key has been deleted after existing before; terminate this watcher
+                # because this class instance is about to be reaped in Daemon.py
+                return False
+
+            try:
+                data = data.decode('ascii')
+            except AttributeError:
+                data = ''
+
+            if data and data != self.node:
+                self.node = data
+
+        @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('osd.stats', self.osd_id))
+        def watch_osd_stats(data, stat, event=''):
+            if event and event.type == 'DELETED':
+                # The key has been deleted after existing before; terminate this watcher
+                # because this class instance is about to be reaped in Daemon.py
+                return False
+
+            try:
+                data = data.decode('ascii')
+            except AttributeError:
+                data = ''
+
+            if data and data != self.stats:
+                self.stats = json.loads(data)
+
+    @staticmethod
+    def add_osd(zkhandler, logger, node, device, weight):
+        # We are ready to create a new OSD on this node
+        logger.out('Creating new OSD disk on block device {}'.format(device), state='i')
+        try:
+            # 1. Create an OSD; we do this so we know what ID will be gen'd
+            retcode, stdout, stderr = common.run_os_command('ceph osd create')
+            if retcode:
+                print('ceph osd create')
+                print(stdout)
+                print(stderr)
+                raise
+            osd_id = stdout.rstrip()
+
+            # 2. Remove that newly-created OSD
+            retcode, stdout, stderr = common.run_os_command('ceph osd rm {}'.format(osd_id))
+            if retcode:
+                print('ceph osd rm')
+                print(stdout)
+                print(stderr)
+                raise
+
+            # 3a. Zap the disk to ensure it is ready to go
+            logger.out('Zapping disk {}'.format(device), state='i')
+            retcode, stdout, stderr = common.run_os_command('ceph-volume lvm zap --destroy {}'.format(device))
+            if retcode:
+                print('ceph-volume lvm zap')
+                print(stdout)
+                print(stderr)
+                raise
+
+            # 3b. Create the OSD for real
+            logger.out('Preparing LVM for new OSD disk with ID {} on {}'.format(osd_id, device), state='i')
+            retcode, stdout, stderr = common.run_os_command(
+                'ceph-volume lvm prepare --bluestore --data {device}'.format(
+                    osdid=osd_id,
+                    device=device
+                )
+            )
+            if retcode:
+                print('ceph-volume lvm prepare')
+                print(stdout)
+                print(stderr)
+                raise
+
+            # 4a. Get OSD FSID
+            logger.out('Getting OSD FSID for ID {} on {}'.format(osd_id, device), state='i')
+            retcode, stdout, stderr = common.run_os_command(
+                'ceph-volume lvm list {device}'.format(
+                    osdid=osd_id,
+                    device=device
+                )
+            )
+            for line in stdout.split('\n'):
+                if 'osd fsid' in line:
+                    osd_fsid = line.split()[-1]
+
+            if not osd_fsid:
+                print('ceph-volume lvm list')
+                print('Could not find OSD fsid in data:')
+                print(stdout)
+                print(stderr)
+                raise
+
+            # 4b. Activate the OSD
+            logger.out('Activating new OSD disk with ID {}'.format(osd_id, device), state='i')
+            retcode, stdout, stderr = common.run_os_command(
+                'ceph-volume lvm activate --bluestore {osdid} {osdfsid}'.format(
+                    osdid=osd_id,
+                    osdfsid=osd_fsid
+                )
+            )
+            if retcode:
+                print('ceph-volume lvm activate')
+                print(stdout)
+                print(stderr)
+                raise
+
+            # 5. Add it to the crush map
+            logger.out('Adding new OSD disk with ID {} to CRUSH map'.format(osd_id), state='i')
+            retcode, stdout, stderr = common.run_os_command(
+                'ceph osd crush add osd.{osdid} {weight} root=default host={node}'.format(
+                    osdid=osd_id,
+                    weight=weight,
+                    node=node
+                )
+            )
+            if retcode:
+                print('ceph osd crush add')
+                print(stdout)
+                print(stderr)
+                raise
+            time.sleep(0.5)
+
+            # 6. Verify it started
+            retcode, stdout, stderr = common.run_os_command(
+                'systemctl status ceph-osd@{osdid}'.format(
+                    osdid=osd_id
+                )
+            )
+            if retcode:
+                print('systemctl status')
+                print(stdout)
+                print(stderr)
+                raise
+
+            # 7. Add the new OSD to the list
+            logger.out('Adding new OSD disk with ID {} to Zookeeper'.format(osd_id), state='i')
+            zkhandler.write([
+                (('osd', osd_id), ''),
+                (('osd.node', osd_id), node),
+                (('osd.device', osd_id), device),
+                (('osd.stats', osd_id), '{}'),
+            ])
+
+            # Log it
+            logger.out('Created new OSD disk with ID {}'.format(osd_id), state='o')
+            return True
+        except Exception as e:
+            # Log it
+            logger.out('Failed to create new OSD disk: {}'.format(e), state='e')
+            return False
+
+    @staticmethod
+    def remove_osd(zkhandler, logger, osd_id, osd_obj):
+        logger.out('Removing OSD disk {}'.format(osd_id), state='i')
+        try:
+            # 1. Verify the OSD is present
+            retcode, stdout, stderr = common.run_os_command('ceph osd ls')
+            osd_list = stdout.split('\n')
+            if osd_id not in osd_list:
+                logger.out('Could not find OSD {} in the cluster'.format(osd_id), state='e')
+                return True
+
+            # 1. Set the OSD out so it will flush
+            logger.out('Setting out OSD disk with ID {}'.format(osd_id), state='i')
+            retcode, stdout, stderr = common.run_os_command('ceph osd out {}'.format(osd_id))
+            if retcode:
+                print('ceph osd out')
+                print(stdout)
+                print(stderr)
+                raise
+
+            # 2. Wait for the OSD to flush
+            logger.out('Flushing OSD disk with ID {}'.format(osd_id), state='i')
+            osd_string = str()
+            while True:
+                try:
+                    retcode, stdout, stderr = common.run_os_command('ceph pg dump osds --format json')
+                    dump_string = json.loads(stdout)
+                    for osd in dump_string:
+                        if str(osd['osd']) == osd_id:
+                            osd_string = osd
+                    num_pgs = osd_string['num_pgs']
+                    if num_pgs > 0:
+                        time.sleep(5)
+                    else:
+                        raise
+                except Exception:
+                    break
+
+            # 3. Stop the OSD process and wait for it to be terminated
+            logger.out('Stopping OSD disk with ID {}'.format(osd_id), state='i')
+            retcode, stdout, stderr = common.run_os_command('systemctl stop ceph-osd@{}'.format(osd_id))
+            if retcode:
+                print('systemctl stop')
+                print(stdout)
+                print(stderr)
+                raise
+
+            # FIXME: There has to be a better way to do this /shrug
+            while True:
+                is_osd_up = False
+                # Find if there is a process named ceph-osd with arg '--id {id}'
+                for p in psutil.process_iter(attrs=['name', 'cmdline']):
+                    if 'ceph-osd' == p.info['name'] and '--id {}'.format(osd_id) in ' '.join(p.info['cmdline']):
+                        is_osd_up = True
+                # If there isn't, continue
+                if not is_osd_up:
+                    break
+
+            # 4. Determine the block devices
+            retcode, stdout, stderr = common.run_os_command('readlink /var/lib/ceph/osd/ceph-{}/block'.format(osd_id))
+            vg_name = stdout.split('/')[-2]  # e.g. /dev/ceph-<uuid>/osd-block-<uuid>
+            retcode, stdout, stderr = common.run_os_command('vgs --separator , --noheadings -o pv_name {}'.format(vg_name))
+            pv_block = stdout.strip()
+
+            # 5. Zap the volumes
+            logger.out('Zapping OSD disk with ID {} on {}'.format(osd_id, pv_block), state='i')
+            retcode, stdout, stderr = common.run_os_command('ceph-volume lvm zap --destroy {}'.format(pv_block))
+            if retcode:
+                print('ceph-volume lvm zap')
+                print(stdout)
+                print(stderr)
+                raise
+
+            # 6. Purge the OSD from Ceph
+            logger.out('Purging OSD disk with ID {}'.format(osd_id), state='i')
+            retcode, stdout, stderr = common.run_os_command('ceph osd purge {} --yes-i-really-mean-it'.format(osd_id))
+            if retcode:
+                print('ceph osd purge')
+                print(stdout)
+                print(stderr)
+                raise
+
+            # 7. Delete OSD from ZK
+            logger.out('Deleting OSD disk with ID {} from Zookeeper'.format(osd_id), state='i')
+            zkhandler.delete(('osd', osd_id), recursive=True)
+
+            # Log it
+            logger.out('Removed OSD disk with ID {}'.format(osd_id), state='o')
+            return True
+        except Exception as e:
+            # Log it
+            logger.out('Failed to purge OSD disk with ID {}: {}'.format(osd_id, e), state='e')
+            return False
+
+
+class CephPoolInstance(object):
+    def __init__(self, zkhandler, this_node, name):
+        self.zkhandler = zkhandler
+        self.this_node = this_node
+        self.name = name
+        self.pgs = ''
+        self.stats = dict()
+
+        @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('pool.pgs', self.name))
+        def watch_pool_node(data, stat, event=''):
+            if event and event.type == 'DELETED':
+                # The key has been deleted after existing before; terminate this watcher
+                # because this class instance is about to be reaped in Daemon.py
+                return False
+
+            try:
+                data = data.decode('ascii')
+            except AttributeError:
+                data = ''
+
+            if data and data != self.pgs:
+                self.pgs = data
+
+        @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('pool.stats', self.name))
+        def watch_pool_stats(data, stat, event=''):
+            if event and event.type == 'DELETED':
+                # The key has been deleted after existing before; terminate this watcher
+                # because this class instance is about to be reaped in Daemon.py
+                return False
+
+            try:
+                data = data.decode('ascii')
+            except AttributeError:
+                data = ''
+
+            if data and data != self.stats:
+                self.stats = json.loads(data)
+
+
+class CephVolumeInstance(object):
+    def __init__(self, zkhandler, this_node, pool, name):
+        self.zkhandler = zkhandler
+        self.this_node = this_node
+        self.pool = pool
+        self.name = name
+        self.stats = dict()
+
+        @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('volume.stats', f'{self.pool}/{self.name}'))
+        def watch_volume_stats(data, stat, event=''):
+            if event and event.type == 'DELETED':
+                # The key has been deleted after existing before; terminate this watcher
+                # because this class instance is about to be reaped in Daemon.py
+                return False
+
+            try:
+                data = data.decode('ascii')
+            except AttributeError:
+                data = ''
+
+            if data and data != self.stats:
+                self.stats = json.loads(data)
+
+
+class CephSnapshotInstance(object):
+    def __init__(self, zkhandler, this_node, pool, volume, name):
+        self.zkhandler = zkhandler
+        self.this_node = this_node
+        self.pool = pool
+        self.volume = volume
+        self.name = name
+        self.stats = dict()
+
+        @self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('snapshot.stats', f'{self.pool}/{self.volume}/{self.name}'))
+        def watch_snapshot_stats(data, stat, event=''):
+            if event and event.type == 'DELETED':
+                # The key has been deleted after existing before; terminate this watcher
+                # because this class instance is about to be reaped in Daemon.py
+                return False
+
+            try:
+                data = data.decode('ascii')
+            except AttributeError:
+                data = ''
+
+            if data and data != self.stats:
+                self.stats = json.loads(data)
+
+
+# Primary command function
+# This command pipe is only used for OSD adds and removes
+def ceph_command(zkhandler, logger, this_node, data, d_osd):
+    # Get the command and args
+    command, args = data.split()
+
+    # Adding a new OSD
+    if command == 'osd_add':
+        node, device, weight = args.split(',')
+        if node == this_node.name:
+            # Lock the command queue
+            zk_lock = zkhandler.writelock('base.cmd.ceph')
+            with zk_lock:
+                # Add the OSD
+                result = CephOSDInstance.add_osd(zkhandler, logger, node, device, weight)
+                # Command succeeded
+                if result:
+                    # Update the command queue
+                    zkhandler.write([
+                        ('base.cmd.ceph', 'success-{}'.format(data))
+                    ])
+                # Command failed
+                else:
+                    # Update the command queue
+                    zkhandler.write([
+                        ('base.cmd.ceph', 'failure-{}'.format(data))
+                    ])
+                # Wait 1 seconds before we free the lock, to ensure the client hits the lock
+                time.sleep(1)
+
+    # Removing an OSD
+    elif command == 'osd_remove':
+        osd_id = args
+
+        # Verify osd_id is in the list
+        if d_osd[osd_id] and d_osd[osd_id].node == this_node.name:
+            # Lock the command queue
+            zk_lock = zkhandler.writelock('base.cmd.ceph')
+            with zk_lock:
+                # Remove the OSD
+                result = CephOSDInstance.remove_osd(zkhandler, logger, osd_id, d_osd[osd_id])
+                # Command succeeded
+                if result:
+                    # Update the command queue
+                    zkhandler.write([
+                        ('base.cmd.ceph', 'success-{}'.format(data))
+                    ])
+                # Command failed
+                else:
+                    # Update the command queue
+                    zkhandler.write([
+                        ('base.cmd.ceph', 'failure-{}'.format(data))
+                    ])
+                # Wait 1 seconds before we free the lock, to ensure the client hits the lock
+                time.sleep(1)
diff --git a/node-daemon/pvcnoded/DNSAggregatorInstance.py b/node-daemon/pvcnoded/objects/DNSAggregatorInstance.py
similarity index 98%
rename from node-daemon/pvcnoded/DNSAggregatorInstance.py
rename to node-daemon/pvcnoded/objects/DNSAggregatorInstance.py
index 9a2f94d0..e3e3bc8e 100644
--- a/node-daemon/pvcnoded/DNSAggregatorInstance.py
+++ b/node-daemon/pvcnoded/objects/DNSAggregatorInstance.py
@@ -74,7 +74,7 @@ class PowerDNSInstance(object):
         self.dns_server_daemon = None
 
         # Floating upstreams
-        self.vni_floatingipaddr, self.vni_cidrnetmask = self.config['vni_floating_ip'].split('/')
+        self.cluster_floatingipaddr, self.cluster_cidrnetmask = self.config['cluster_floating_ip'].split('/')
         self.upstream_floatingipaddr, self.upstream_cidrnetmask = self.config['upstream_floating_ip'].split('/')
 
     def start(self):
@@ -91,7 +91,7 @@ class PowerDNSInstance(object):
             '--disable-syslog=yes',              # Log only to stdout (which is then captured)
             '--disable-axfr=no',                 # Allow AXFRs
             '--allow-axfr-ips=0.0.0.0/0',        # Allow AXFRs to anywhere
-            '--local-address={},{}'.format(self.vni_floatingipaddr, self.upstream_floatingipaddr),  # Listen on floating IPs
+            '--local-address={},{}'.format(self.cluster_floatingipaddr, self.upstream_floatingipaddr),  # Listen on floating IPs
             '--local-port=53',                   # On port 53
             '--log-dns-details=on',              # Log details
             '--loglevel=3',                      # Log info
diff --git a/node-daemon/pvcnoded/MetadataAPIInstance.py b/node-daemon/pvcnoded/objects/MetadataAPIInstance.py
similarity index 100%
rename from node-daemon/pvcnoded/MetadataAPIInstance.py
rename to node-daemon/pvcnoded/objects/MetadataAPIInstance.py
diff --git a/node-daemon/pvcnoded/NodeInstance.py b/node-daemon/pvcnoded/objects/NodeInstance.py
similarity index 97%
rename from node-daemon/pvcnoded/NodeInstance.py
rename to node-daemon/pvcnoded/objects/NodeInstance.py
index 43c4df41..72f4f50f 100644
--- a/node-daemon/pvcnoded/NodeInstance.py
+++ b/node-daemon/pvcnoded/objects/NodeInstance.py
@@ -65,9 +65,9 @@ class NodeInstance(object):
             self.upstream_dev = self.config['upstream_dev']
             self.upstream_floatingipaddr = self.config['upstream_floating_ip'].split('/')[0]
             self.upstream_ipaddr, self.upstream_cidrnetmask = self.config['upstream_dev_ip'].split('/')
-            self.vni_dev = self.config['vni_dev']
-            self.vni_floatingipaddr = self.config['vni_floating_ip'].split('/')[0]
-            self.vni_ipaddr, self.vni_cidrnetmask = self.config['vni_dev_ip'].split('/')
+            self.cluster_dev = self.config['cluster_dev']
+            self.cluster_floatingipaddr = self.config['cluster_floating_ip'].split('/')[0]
+            self.cluster_ipaddr, self.cluster_cidrnetmask = self.config['cluster_dev_ip'].split('/')
             self.storage_dev = self.config['storage_dev']
             self.storage_floatingipaddr = self.config['storage_floating_ip'].split('/')[0]
             self.storage_ipaddr, self.storage_cidrnetmask = self.config['storage_dev_ip'].split('/')
@@ -76,10 +76,10 @@ class NodeInstance(object):
             self.upstream_floatingipaddr = None
             self.upstream_ipaddr = None
             self.upstream_cidrnetmask = None
-            self.vni_dev = None
-            self.vni_floatingipaddr = None
-            self.vni_ipaddr = None
-            self.vni_cidrnetmask = None
+            self.cluster_dev = None
+            self.cluster_floatingipaddr = None
+            self.cluster_ipaddr = None
+            self.cluster_cidrnetmask = None
             self.storage_dev = None
             self.storage_floatingipaddr = None
             self.storage_ipaddr = None
@@ -387,13 +387,13 @@ class NodeInstance(object):
         # 2. Add Cluster & Storage floating IP
         self.logger.out(
             'Creating floating management IP {}/{} on interface {}'.format(
-                self.vni_floatingipaddr,
-                self.vni_cidrnetmask,
+                self.cluster_floatingipaddr,
+                self.cluster_cidrnetmask,
                 'brcluster'
             ),
             state='o'
         )
-        common.createIPAddress(self.vni_floatingipaddr, self.vni_cidrnetmask, 'brcluster')
+        common.createIPAddress(self.cluster_floatingipaddr, self.cluster_cidrnetmask, 'brcluster')
         self.logger.out(
             'Creating floating storage IP {}/{} on interface {}'.format(
                 self.storage_floatingipaddr,
@@ -599,13 +599,13 @@ class NodeInstance(object):
         # 6. Remove Cluster & Storage floating IP
         self.logger.out(
             'Removing floating management IP {}/{} from interface {}'.format(
-                self.vni_floatingipaddr,
-                self.vni_cidrnetmask,
+                self.cluster_floatingipaddr,
+                self.cluster_cidrnetmask,
                 'brcluster'
             ),
             state='o'
         )
-        common.removeIPAddress(self.vni_floatingipaddr, self.vni_cidrnetmask, 'brcluster')
+        common.removeIPAddress(self.cluster_floatingipaddr, self.cluster_cidrnetmask, 'brcluster')
         self.logger.out(
             'Removing floating storage IP {}/{} from interface {}'.format(
                 self.storage_floatingipaddr,
diff --git a/node-daemon/pvcnoded/SRIOVVFInstance.py b/node-daemon/pvcnoded/objects/SRIOVVFInstance.py
similarity index 100%
rename from node-daemon/pvcnoded/SRIOVVFInstance.py
rename to node-daemon/pvcnoded/objects/SRIOVVFInstance.py
diff --git a/node-daemon/pvcnoded/VMConsoleWatcherInstance.py b/node-daemon/pvcnoded/objects/VMConsoleWatcherInstance.py
similarity index 100%
rename from node-daemon/pvcnoded/VMConsoleWatcherInstance.py
rename to node-daemon/pvcnoded/objects/VMConsoleWatcherInstance.py
diff --git a/node-daemon/pvcnoded/VMInstance.py b/node-daemon/pvcnoded/objects/VMInstance.py
similarity index 92%
rename from node-daemon/pvcnoded/VMInstance.py
rename to node-daemon/pvcnoded/objects/VMInstance.py
index 7a30fe65..0a56fbfa 100644
--- a/node-daemon/pvcnoded/VMInstance.py
+++ b/node-daemon/pvcnoded/objects/VMInstance.py
@@ -30,86 +30,11 @@ from xml.etree import ElementTree
 
 import daemon_lib.common as common
 
-import pvcnoded.VMConsoleWatcherInstance as VMConsoleWatcherInstance
+import pvcnoded.objects.VMConsoleWatcherInstance as VMConsoleWatcherInstance
 
 import daemon_lib.common as daemon_common
 
 
-def flush_locks(zkhandler, logger, dom_uuid, this_node=None):
-    logger.out('Flushing RBD locks for VM "{}"'.format(dom_uuid), state='i')
-    # Get the list of RBD images
-    rbd_list = zkhandler.read(('domain.storage.volumes', dom_uuid)).split(',')
-
-    for rbd in rbd_list:
-        # Check if a lock exists
-        lock_list_retcode, lock_list_stdout, lock_list_stderr = common.run_os_command('rbd lock list --format json {}'.format(rbd))
-        if lock_list_retcode != 0:
-            logger.out('Failed to obtain lock list for volume "{}"'.format(rbd), state='e')
-            continue
-
-        try:
-            lock_list = json.loads(lock_list_stdout)
-        except Exception as e:
-            logger.out('Failed to parse lock list for volume "{}": {}'.format(rbd, e), state='e')
-            continue
-
-        # If there's at least one lock
-        if lock_list:
-            # Loop through the locks
-            for lock in lock_list:
-                if this_node is not None and zkhandler.read(('domain.state', dom_uuid)) != 'stop' and lock['address'].split(':')[0] != this_node.storage_ipaddr:
-                    logger.out('RBD lock does not belong to this host (lock owner: {}): freeing this lock would be unsafe, aborting'.format(lock['address'].split(':')[0], state='e'))
-                    zkhandler.write([
-                        (('domain.state', dom_uuid), 'fail'),
-                        (('domain.failed_reason', dom_uuid), 'Could not safely free RBD lock {} ({}) on volume {}; stop VM and flush locks manually'.format(lock['id'], lock['address'], rbd)),
-                    ])
-                    break
-                # Free the lock
-                lock_remove_retcode, lock_remove_stdout, lock_remove_stderr = common.run_os_command('rbd lock remove {} "{}" "{}"'.format(rbd, lock['id'], lock['locker']))
-                if lock_remove_retcode != 0:
-                    logger.out('Failed to free RBD lock "{}" on volume "{}": {}'.format(lock['id'], rbd, lock_remove_stderr), state='e')
-                    zkhandler.write([
-                        (('domain.state', dom_uuid), 'fail'),
-                        (('domain.failed_reason', dom_uuid), 'Could not free RBD lock {} ({}) on volume {}: {}'.format(lock['id'], lock['address'], rbd, lock_remove_stderr)),
-                    ])
-                    break
-                logger.out('Freed RBD lock "{}" on volume "{}"'.format(lock['id'], rbd), state='o')
-
-    return True
-
-
-# Primary command function
-def run_command(zkhandler, logger, this_node, data):
-    # Get the command and args
-    command, args = data.split()
-
-    # Flushing VM RBD locks
-    if command == 'flush_locks':
-        dom_uuid = args
-
-        # Verify that the VM is set to run on this node
-        if this_node.d_domain[dom_uuid].getnode() == this_node.name:
-            # Lock the command queue
-            zk_lock = zkhandler.writelock('base.cmd.domain')
-            with zk_lock:
-                # Flush the lock
-                result = flush_locks(zkhandler, logger, dom_uuid, this_node)
-                # Command succeeded
-                if result:
-                    # Update the command queue
-                    zkhandler.write([
-                        ('base.cmd.domain', 'success-{}'.format(data))
-                    ])
-                # Command failed
-                else:
-                    # Update the command queue
-                    zkhandler.write([
-                        ('base.cmd.domain', 'failure-{}'.format(data))
-                    ])
-                # Wait 1 seconds before we free the lock, to ensure the client hits the lock
-                time.sleep(1)
-
-
 class VMInstance(object):
     # Initialization function
     def __init__(self, domuuid, zkhandler, config, logger, this_node):
@@ -265,7 +190,7 @@ class VMInstance(object):
         if self.getdom() is None or self.getdom().state()[0] != libvirt.VIR_DOMAIN_RUNNING:
             # Flush locks
             self.logger.out('Flushing RBD locks', state='i', prefix='Domain {}'.format(self.domuuid))
-            flush_locks(self.zkhandler, self.logger, self.domuuid, self.this_node)
+            VMInstance.flush_locks(self.zkhandler, self.logger, self.domuuid, self.this_node)
             if self.zkhandler.read(('domain.state', self.domuuid)) == 'fail':
                 lv_conn.close()
                 self.dom = None
@@ -877,3 +802,79 @@ class VMInstance(object):
 
         # Return the dom object (or None)
         return dom
+
+    # Flush the locks of a VM based on UUID
+    @staticmethod
+    def flush_locks(zkhandler, logger, dom_uuid, this_node=None):
+        logger.out('Flushing RBD locks for VM "{}"'.format(dom_uuid), state='i')
+        # Get the list of RBD images
+        rbd_list = zkhandler.read(('domain.storage.volumes', dom_uuid)).split(',')
+
+        for rbd in rbd_list:
+            # Check if a lock exists
+            lock_list_retcode, lock_list_stdout, lock_list_stderr = common.run_os_command('rbd lock list --format json {}'.format(rbd))
+            if lock_list_retcode != 0:
+                logger.out('Failed to obtain lock list for volume "{}"'.format(rbd), state='e')
+                continue
+
+            try:
+                lock_list = json.loads(lock_list_stdout)
+            except Exception as e:
+                logger.out('Failed to parse lock list for volume "{}": {}'.format(rbd, e), state='e')
+                continue
+
+            # If there's at least one lock
+            if lock_list:
+                # Loop through the locks
+                for lock in lock_list:
+                    if this_node is not None and zkhandler.read(('domain.state', dom_uuid)) != 'stop' and lock['address'].split(':')[0] != this_node.storage_ipaddr:
+                        logger.out('RBD lock does not belong to this host (lock owner: {}): freeing this lock would be unsafe, aborting'.format(lock['address'].split(':')[0], state='e'))
+                        zkhandler.write([
+                            (('domain.state', dom_uuid), 'fail'),
+                            (('domain.failed_reason', dom_uuid), 'Could not safely free RBD lock {} ({}) on volume {}; stop VM and flush locks manually'.format(lock['id'], lock['address'], rbd)),
+                        ])
+                        break
+                    # Free the lock
+                    lock_remove_retcode, lock_remove_stdout, lock_remove_stderr = common.run_os_command('rbd lock remove {} "{}" "{}"'.format(rbd, lock['id'], lock['locker']))
+                    if lock_remove_retcode != 0:
+                        logger.out('Failed to free RBD lock "{}" on volume "{}": {}'.format(lock['id'], rbd, lock_remove_stderr), state='e')
+                        zkhandler.write([
+                            (('domain.state', dom_uuid), 'fail'),
+                            (('domain.failed_reason', dom_uuid), 'Could not free RBD lock {} ({}) on volume {}: {}'.format(lock['id'], lock['address'], rbd, lock_remove_stderr)),
+                        ])
+                        break
+                    logger.out('Freed RBD lock "{}" on volume "{}"'.format(lock['id'], rbd), state='o')
+
+        return True
+
+
+# Primary command function
+def vm_command(zkhandler, logger, this_node, data):
+    # Get the command and args
+    command, args = data.split()
+
+    # Flushing VM RBD locks
+    if command == 'flush_locks':
+        dom_uuid = args
+
+        # Verify that the VM is set to run on this node
+        if this_node.d_domain[dom_uuid].getnode() == this_node.name:
+            # Lock the command queue
+            zk_lock = zkhandler.writelock('base.cmd.domain')
+            with zk_lock:
+                # Flush the lock
+                result = VMInstance.flush_locks(zkhandler, logger, dom_uuid, this_node)
+                # Command succeeded
+                if result:
+                    # Update the command queue
+                    zkhandler.write([
+                        ('base.cmd.domain', 'success-{}'.format(data))
+                    ])
+                # Command failed
+                else:
+                    # Update the command queue
+                    zkhandler.write([
+                        ('base.cmd.domain', 'failure-{}'.format(data))
+                    ])
+                # Wait 1 seconds before we free the lock, to ensure the client hits the lock
+                time.sleep(1)
diff --git a/node-daemon/pvcnoded/VXNetworkInstance.py b/node-daemon/pvcnoded/objects/VXNetworkInstance.py
similarity index 99%
rename from node-daemon/pvcnoded/VXNetworkInstance.py
rename to node-daemon/pvcnoded/objects/VXNetworkInstance.py
index 5af83173..35680d99 100644
--- a/node-daemon/pvcnoded/VXNetworkInstance.py
+++ b/node-daemon/pvcnoded/objects/VXNetworkInstance.py
@@ -36,8 +36,8 @@ class VXNetworkInstance(object):
         self.logger = logger
         self.this_node = this_node
         self.dns_aggregator = dns_aggregator
-        self.vni_dev = config['vni_dev']
-        self.vni_mtu = config['vni_mtu']
+        self.cluster_dev = config['cluster_dev']
+        self.cluster_mtu = config['cluster_mtu']
         self.bridge_dev = config['bridge_dev']
 
         self.nettype = self.zkhandler.read(('network.type', self.vni))
@@ -481,7 +481,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
         )
 
         # Set MTU of vLAN and bridge NICs
-        vx_mtu = self.vni_mtu
+        vx_mtu = self.cluster_mtu
         common.run_os_command(
             'ip link set {} mtu {} up'.format(
                 self.vlan_nic,
@@ -521,7 +521,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
     def createNetworkManaged(self):
         self.logger.out(
             'Creating VXLAN device on interface {}'.format(
-                self.vni_dev
+                self.cluster_dev
             ),
             prefix='VNI {}'.format(self.vni),
             state='o'
@@ -532,7 +532,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
             'ip link add {} type vxlan id {} dstport 4789 dev {}'.format(
                 self.vxlan_nic,
                 self.vni,
-                self.vni_dev
+                self.cluster_dev
             )
         )
         # Create bridge interface
@@ -543,7 +543,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
         )
 
         # Set MTU of VXLAN and bridge NICs
-        vx_mtu = self.vni_mtu - 50
+        vx_mtu = self.cluster_mtu - 50
         common.run_os_command(
             'ip link set {} mtu {} up'.format(
                 self.vxlan_nic,
@@ -716,7 +716,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
     def removeNetworkBridged(self):
         self.logger.out(
             'Removing VNI device on interface {}'.format(
-                self.vni_dev
+                self.cluster_dev
             ),
             prefix='VNI {}'.format(self.vni),
             state='o'
@@ -752,7 +752,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
     def removeNetworkManaged(self):
         self.logger.out(
             'Removing VNI device on interface {}'.format(
-                self.vni_dev
+                self.cluster_dev
             ),
             prefix='VNI {}'.format(self.vni),
             state='o'
diff --git a/node-daemon/pvcnoded/objects/__init__.py b/node-daemon/pvcnoded/objects/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/node-daemon/pvcnoded/util/__init__.py b/node-daemon/pvcnoded/util/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/node-daemon/pvcnoded/util/config.py b/node-daemon/pvcnoded/util/config.py
new file mode 100644
index 00000000..8561cd5b
--- /dev/null
+++ b/node-daemon/pvcnoded/util/config.py
@@ -0,0 +1,384 @@
+#!/usr/bin/env python3
+
+# config.py - Utility functions for pvcnoded configuration parsing
+# Part of the Parallel Virtual Cluster (PVC) system
+#
+#    Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
+#
+#    This program is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, version 3.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+#
+###############################################################################
+
+import os
+import subprocess
+import yaml
+from socket import gethostname
+from re import findall
+from psutil import cpu_count
+from ipaddress import ip_address, ip_network
+
+
+class MalformedConfigurationError(Exception):
+    """
+    An except when parsing the PVC Node daemon configuration file
+    """
+    def __init__(self, error=None):
+        self.msg = f'ERROR: Configuration file is malformed: {error}'
+
+    def __str__(self):
+        return str(self.msg)
+
+
+def get_static_data():
+    """
+    Data that is obtained once at node startup for use later
+    """
+    staticdata = list()
+    staticdata.append(str(cpu_count()))  # CPU count
+    staticdata.append(
+        subprocess.run(
+            ['uname', '-r'], stdout=subprocess.PIPE
+        ).stdout.decode('ascii').strip()
+    )
+    staticdata.append(
+        subprocess.run(
+            ['uname', '-o'], stdout=subprocess.PIPE
+        ).stdout.decode('ascii').strip()
+    )
+    staticdata.append(
+        subprocess.run(
+            ['uname', '-m'], stdout=subprocess.PIPE
+        ).stdout.decode('ascii').strip()
+    )
+
+    return staticdata
+
+
+def get_configuration_path():
+    try:
+        return os.environ['PVCD_CONFIG_FILE']
+    except KeyError:
+        print('ERROR: The "PVCD_CONFIG_FILE" environment variable must be set.')
+        os._exit(1)
+
+
+def get_hostname():
+    node_fqdn = gethostname()
+    node_hostname = node_fqdn.split('.', 1)[0]
+    node_domain = ''.join(node_fqdn.split('.', 1)[1:])
+    try:
+        node_id = findall(r'\d+', node_hostname)[-1]
+    except IndexError:
+        node_id = 0
+
+    return node_fqdn, node_hostname, node_domain, node_id
+
+
+def validate_floating_ip(config, network):
+    if network not in ['cluster', 'storage', 'upstream']:
+        return False, f'Specified network type "{network}" is not valid'
+
+    floating_key = f'{network}_floating_ip'
+    network_key = f'{network}_network'
+
+    # Verify the network provided is valid
+    try:
+        network = ip_network(config[network_key])
+    except Exception:
+        return False, f'Network address {config[network_key]} for {network_key} is not valid'
+
+    # Verify that the floating IP is valid (and in the network)
+    try:
+        floating_address = ip_address(config[floating_key].split('/')[0])
+        if floating_address not in list(network.hosts()):
+            raise
+    except Exception:
+        return False, f'Floating address {config[floating_key]} for {floating_key} is not valid'
+
+    return True, ''
+
+
+def get_configuration():
+    """
+    Parse the configuration of the node daemon.
+    """
+    pvcnoded_config_file = get_configuration_path()
+
+    print('Loading configuration from file "{}"'.format(pvcnoded_config_file))
+
+    with open(pvcnoded_config_file, 'r') as cfgfile:
+        try:
+            o_config = yaml.load(cfgfile, Loader=yaml.SafeLoader)
+        except Exception as e:
+            print('ERROR: Failed to parse configuration file: {}'.format(e))
+            os._exit(1)
+
+    node_fqdn, node_hostname, node_domain, node_id = get_hostname()
+
+    # Create the configuration dictionary
+    config = dict()
+
+    # Get the initial base configuration
+    try:
+        o_base = o_config['pvc']
+        o_cluster = o_config['pvc']['cluster']
+    except Exception as e:
+        raise MalformedConfigurationError(e)
+
+    config_general = {
+        'node':          o_base.get('node', node_hostname),
+        'node_hostname': node_hostname,
+        'node_fqdn':     node_fqdn,
+        'node_domain':   node_domain,
+        'node_id':       node_id,
+        'coordinators':  o_cluster.get('coordinators', list()),
+        'debug':         o_base.get('debug', False),
+    }
+
+    config = {**config, **config_general}
+
+    # Get the functions configuration
+    try:
+        o_functions = o_config['pvc']['functions']
+    except Exception as e:
+        raise MalformedConfigurationError(e)
+
+    config_functions = {
+        'enable_hypervisor': o_functions.get('enable_hypervisor', False),
+        'enable_networking': o_functions.get('enable_networking', False),
+        'enable_storage':    o_functions.get('enable_storage', False),
+        'enable_api':        o_functions.get('enable_api', False),
+    }
+
+    config = {**config, **config_functions}
+
+    # Get the directory configuration
+    try:
+        o_directories = o_config['pvc']['system']['configuration']['directories']
+    except Exception as e:
+        raise MalformedConfigurationError(e)
+
+    config_directories = {
+        'dynamic_directory':     o_directories.get('dynamic_directory', None),
+        'log_directory':         o_directories.get('log_directory', None),
+        'console_log_directory': o_directories.get('console_log_directory', None),
+    }
+
+    # Define our dynamic directory schema
+    config_directories['dnsmasq_dynamic_directory'] = config_directories['dynamic_directory'] + '/dnsmasq'
+    config_directories['pdns_dynamic_directory'] = config_directories['dynamic_directory'] + '/pdns'
+    config_directories['nft_dynamic_directory'] = config_directories['dynamic_directory'] + '/nft'
+
+    # Define our log directory schema
+    config_directories['dnsmasq_log_directory'] = config_directories['log_directory'] + '/dnsmasq'
+    config_directories['pdns_log_directory'] = config_directories['log_directory'] + '/pdns'
+    config_directories['nft_log_directory'] = config_directories['log_directory'] + '/nft'
+
+    config = {**config, **config_directories}
+
+    # Get the logging configuration
+    try:
+        o_logging = o_config['pvc']['system']['configuration']['logging']
+    except Exception as e:
+        raise MalformedConfigurationError(e)
+
+    config_logging = {
+        'file_logging':                  o_logging.get('file_logging', False),
+        'stdout_logging':                o_logging.get('stdout_logging', False),
+        'zookeeper_logging':             o_logging.get('zookeeper_logging', False),
+        'log_colours':                   o_logging.get('log_colours', False),
+        'log_dates':                     o_logging.get('log_dates', False),
+        'log_keepalives':                o_logging.get('log_keepalives', False),
+        'log_keepalive_cluster_details': o_logging.get('log_keepalive_cluster_details', False),
+        'log_keepalive_storage_details': o_logging.get('log_keepalive_storage_details', False),
+        'console_log_lines':             o_logging.get('console_log_lines', False),
+        'node_log_lines':                o_logging.get('node_log_lines', False),
+    }
+
+    config = {**config, **config_logging}
+
+    # Get the interval configuration
+    try:
+        o_intervals = o_config['pvc']['system']['intervals']
+    except Exception as e:
+        raise MalformedConfigurationError(e)
+
+    config_intervals = {
+        'vm_shutdown_timeout':  int(o_intervals.get('vm_shutdown_timeout', 60)),
+        'keepalive_interval':   int(o_intervals.get('keepalive_interval', 5)),
+        'fence_intervals':      int(o_intervals.get('fence_intervals', 6)),
+        'suicide_intervals':    int(o_intervals.get('suicide_interval', 0)),
+    }
+
+    config = {**config, **config_intervals}
+
+    # Get the fencing configuration
+    try:
+        o_fencing = o_config['pvc']['system']['fencing']
+        o_fencing_actions = o_fencing['actions']
+        o_fencing_ipmi = o_fencing['ipmi']
+    except Exception as e:
+        raise MalformedConfigurationError(e)
+
+    config_fencing = {
+        'successful_fence': o_fencing_actions.get('successful_fence', None),
+        'failed_fence':     o_fencing_actions.get('failed_fence', None),
+        'ipmi_hostname':    o_fencing_ipmi.get('host', f'{node_hostname}-lom.{node_domain}'),
+        'ipmi_username':    o_fencing_ipmi.get('user', 'null'),
+        'ipmi_password':    o_fencing_ipmi.get('pass', 'null'),
+    }
+
+    config = {**config, **config_fencing}
+
+    # Get the migration configuration
+    try:
+        o_migration = o_config['pvc']['system']['migration']
+    except Exception as e:
+        raise MalformedConfigurationError(e)
+
+    config_migration = {
+        'migration_target_selector': o_migration.get('target_selector', 'mem'),
+    }
+
+    config = {**config, **config_migration}
+
+    if config['enable_networking']:
+        # Get the node networks configuration
+        try:
+            o_networks = o_config['pvc']['cluster']['networks']
+            o_network_cluster = o_networks['cluster']
+            o_network_storage = o_networks['storage']
+            o_network_upstream = o_networks['upstream']
+            o_sysnetworks = o_config['pvc']['system']['configuration']['networking']
+            o_sysnetwork_cluster = o_sysnetworks['cluster']
+            o_sysnetwork_storage = o_sysnetworks['storage']
+            o_sysnetwork_upstream = o_sysnetworks['upstream']
+        except Exception as e:
+            raise MalformedConfigurationError(e)
+
+        config_networks = {
+            'cluster_domain':       o_network_cluster.get('domain', None),
+            'cluster_network':      o_network_cluster.get('network', None),
+            'cluster_floating_ip':  o_network_cluster.get('floating_ip', None),
+            'cluster_dev':          o_sysnetwork_cluster.get('device', None),
+            'cluster_mtu':          o_sysnetwork_cluster.get('mtu', None),
+            'cluster_dev_ip':       o_sysnetwork_cluster.get('address', None),
+            'storage_domain':       o_network_storage.get('domain', None),
+            'storage_network':      o_network_storage.get('network', None),
+            'storage_floating_ip':  o_network_storage.get('floating_ip', None),
+            'storage_dev':          o_sysnetwork_storage.get('device', None),
+            'storage_mtu':          o_sysnetwork_storage.get('mtu', None),
+            'storage_dev_ip':       o_sysnetwork_storage.get('address', None),
+            'upstream_domain':      o_network_upstream.get('domain', None),
+            'upstream_network':     o_network_upstream.get('network', None),
+            'upstream_floating_ip': o_network_upstream.get('floating_ip', None),
+            'upstream_gateway':     o_network_upstream.get('gateway', None),
+            'upstream_dev':         o_sysnetwork_upstream.get('device', None),
+            'upstream_mtu':         o_sysnetwork_upstream.get('mtu', None),
+            'upstream_dev_ip':      o_sysnetwork_upstream.get('address', None),
+            'bridge_dev':           o_sysnetworks.get('bridge_device', None),
+            'enable_sriov':         o_sysnetworks.get('sriov_enable', False),
+            'sriov_device':         o_sysnetworks.get('sriov_device', list())
+        }
+
+        config = {**config, **config_networks}
+
+        for network_type in ['cluster', 'storage', 'upstream']:
+            result, msg = validate_floating_ip(config, network_type)
+            if not result:
+                raise MalformedConfigurationError(msg)
+
+            address_key = '{}_dev_ip'.format(network_type)
+            network_key = f'{network_type}_network'
+            network = ip_network(config[network_key])
+            # With autoselection of addresses, construct an IP from the relevant network
+            if config[address_key] == 'by-id':
+                # The NodeID starts at 1, but indexes start at 0
+                address_id = int(config['node_id']) - 1
+                # Grab the nth address from the network
+                config[address_key] = '{}/{}'.format(list(network.hosts())[address_id], network.prefixlen)
+            # Validate the provided IP instead
+            else:
+                try:
+                    address = ip_address(config[address_key].split('/')[0])
+                    if address not in list(network.hosts()):
+                        raise
+                except Exception:
+                    raise MalformedConfigurationError(
+                        f'IP address {config[address_key]} for {address_key} is not valid'
+                    )
+
+        # Get the PowerDNS aggregator database configuration
+        try:
+            o_pdnsdb = o_config['pvc']['coordinator']['dns']['database']
+        except Exception as e:
+            raise MalformedConfigurationError(e)
+
+        config_pdnsdb = {
+            'pdns_postgresql_host':     o_pdnsdb.get('host', None),
+            'pdns_postgresql_port':     o_pdnsdb.get('port', None),
+            'pdns_postgresql_dbname':   o_pdnsdb.get('name', None),
+            'pdns_postgresql_user':     o_pdnsdb.get('user', None),
+            'pdns_postgresql_password': o_pdnsdb.get('pass', None),
+        }
+
+        config = {**config, **config_pdnsdb}
+
+        # Get the Cloud-Init Metadata database configuration
+        try:
+            o_metadatadb = o_config['pvc']['coordinator']['metadata']['database']
+        except Exception as e:
+            raise MalformedConfigurationError(e)
+
+        config_metadatadb = {
+            'metadata_postgresql_host':     o_metadatadb.get('host', None),
+            'metadata_postgresql_port':     o_metadatadb.get('port', None),
+            'metadata_postgresql_dbname':   o_metadatadb.get('name', None),
+            'metadata_postgresql_user':     o_metadatadb.get('user', None),
+            'metadata_postgresql_password': o_metadatadb.get('pass', None),
+        }
+
+        config = {**config, **config_metadatadb}
+
+    if config['enable_storage']:
+        # Get the storage configuration
+        try:
+            o_storage = o_config['pvc']['system']['configuration']['storage']
+        except Exception as e:
+            raise MalformedConfigurationError(e)
+
+        config_storage = {
+            'ceph_config_file':   o_storage.get('ceph_config_file', None),
+            'ceph_admin_keyring': o_storage.get('ceph_admin_keyring', None),
+        }
+
+        config = {**config, **config_storage}
+
+        # Add our node static data to the config
+        config['static_data'] = get_static_data()
+
+    return config
+
+
+def validate_directories(config):
+    if not os.path.exists(config['dynamic_directory']):
+        os.makedirs(config['dynamic_directory'])
+        os.makedirs(config['dnsmasq_dynamic_directory'])
+        os.makedirs(config['pdns_dynamic_directory'])
+        os.makedirs(config['nft_dynamic_directory'])
+
+    if not os.path.exists(config['log_directory']):
+        os.makedirs(config['log_directory'])
+        os.makedirs(config['dnsmasq_log_directory'])
+        os.makedirs(config['pdns_log_directory'])
+        os.makedirs(config['nft_log_directory'])
diff --git a/node-daemon/pvcnoded/fencing.py b/node-daemon/pvcnoded/util/fencing.py
similarity index 89%
rename from node-daemon/pvcnoded/fencing.py
rename to node-daemon/pvcnoded/util/fencing.py
index c04050ee..b519956f 100644
--- a/node-daemon/pvcnoded/fencing.py
+++ b/node-daemon/pvcnoded/util/fencing.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# fencing.py - PVC daemon function library, node fencing functions
+# fencing.py - Utility functions for pvcnoded fencing
 # Part of the Parallel Virtual Cluster (PVC) system
 #
 #    Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
@@ -22,13 +22,14 @@
 import time
 
 import daemon_lib.common as common
-import pvcnoded.VMInstance as VMInstance
+
+from pvcnoded.objects.VMInstance import VMInstance
 
 
 #
 # Fence thread entry function
 #
-def fenceNode(node_name, zkhandler, config, logger):
+def fence_node(node_name, zkhandler, config, logger):
     # We allow exactly 6 saving throws (30 seconds) for the host to come back online or we kill it
     failcount_limit = 6
     failcount = 0
@@ -40,13 +41,13 @@ def fenceNode(node_name, zkhandler, config, logger):
         # Is it still 'dead'
         if node_daemon_state == 'dead':
             failcount += 1
-            logger.out('Node "{}" failed {}/{} saving throws'.format(node_name, failcount, failcount_limit), state='w')
+            logger.out('Node "{}" failed {}/{} saving throws'.format(node_name, failcount, failcount_limit), state='s')
         # It changed back to something else so it must be alive
         else:
             logger.out('Node "{}" passed a saving throw; canceling fence'.format(node_name), state='o')
             return
 
-    logger.out('Fencing node "{}" via IPMI reboot signal'.format(node_name), state='w')
+    logger.out('Fencing node "{}" via IPMI reboot signal'.format(node_name), state='s')
 
     # Get IPMI information
     ipmi_hostname = zkhandler.read(('node.ipmi.hostname', node_name))
@@ -54,7 +55,7 @@ def fenceNode(node_name, zkhandler, config, logger):
     ipmi_password = zkhandler.read(('node.ipmi.password', node_name))
 
     # Shoot it in the head
-    fence_status = rebootViaIPMI(ipmi_hostname, ipmi_username, ipmi_password, logger)
+    fence_status = reboot_via_ipmi(ipmi_hostname, ipmi_username, ipmi_password, logger)
     # Hold to ensure the fence takes effect and system stabilizes
     time.sleep(config['keepalive_interval'] * 2)
 
@@ -123,7 +124,7 @@ def migrateFromFencedNode(zkhandler, node_name, config, logger):
 #
 # Perform an IPMI fence
 #
-def rebootViaIPMI(ipmi_hostname, ipmi_user, ipmi_password, logger):
+def reboot_via_ipmi(ipmi_hostname, ipmi_user, ipmi_password, logger):
     # Forcibly reboot the node
     ipmi_command_reset = '/usr/bin/ipmitool -I lanplus -H {} -U {} -P {} chassis power reset'.format(
         ipmi_hostname, ipmi_user, ipmi_password
@@ -131,8 +132,7 @@ def rebootViaIPMI(ipmi_hostname, ipmi_user, ipmi_password, logger):
     ipmi_reset_retcode, ipmi_reset_stdout, ipmi_reset_stderr = common.run_os_command(ipmi_command_reset)
 
     if ipmi_reset_retcode != 0:
-        logger.out('Failed to reboot dead node', state='e')
-        print(ipmi_reset_stderr)
+        logger.out(f'Failed to reboot dead node: {ipmi_reset_stderr}', state='e')
 
     time.sleep(1)
 
@@ -178,12 +178,10 @@ def rebootViaIPMI(ipmi_hostname, ipmi_user, ipmi_password, logger):
 #
 # Verify that IPMI connectivity to this host exists (used during node init)
 #
-def verifyIPMI(ipmi_hostname, ipmi_user, ipmi_password):
-    ipmi_command_status = '/usr/bin/ipmitool -I lanplus -H {} -U {} -P {} chassis power status'.format(
-        ipmi_hostname, ipmi_user, ipmi_password
-    )
-    ipmi_status_retcode, ipmi_status_stdout, ipmi_status_stderr = common.run_os_command(ipmi_command_status, timeout=2)
-    if ipmi_status_retcode == 0 and ipmi_status_stdout != "Chassis Power is on":
+def verify_ipmi(ipmi_hostname, ipmi_user, ipmi_password):
+    ipmi_command = f'/usr/bin/ipmitool -I lanplus -H {ipmi_hostname} -U {ipmi_user} -P {ipmi_password} chassis power status'
+    retcode, stdout, stderr = common.run_os_command(ipmi_command, timeout=2)
+    if retcode == 0 and stdout != "Chassis Power is on":
         return True
     else:
         return False
diff --git a/node-daemon/pvcnoded/util/keepalive.py b/node-daemon/pvcnoded/util/keepalive.py
new file mode 100644
index 00000000..fd2168e0
--- /dev/null
+++ b/node-daemon/pvcnoded/util/keepalive.py
@@ -0,0 +1,718 @@
+#!/usr/bin/env python3
+
+# keepalive.py - Utility functions for pvcnoded Keepalives
+# Part of the Parallel Virtual Cluster (PVC) system
+#
+#    Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
+#
+#    This program is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, version 3.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+#
+###############################################################################
+
+import pvcnoded.util.fencing
+
+import daemon_lib.common as common
+
+from apscheduler.schedulers.background import BackgroundScheduler
+from rados import Rados
+from xml.etree import ElementTree
+from queue import Queue
+from threading import Thread
+from datetime import datetime
+
+import json
+import re
+import libvirt
+import psutil
+import os
+import time
+
+
+# State table for pretty stats
+libvirt_vm_states = {
+    0: "NOSTATE",
+    1: "RUNNING",
+    2: "BLOCKED",
+    3: "PAUSED",
+    4: "SHUTDOWN",
+    5: "SHUTOFF",
+    6: "CRASHED",
+    7: "PMSUSPENDED"
+}
+
+
+def start_keepalive_timer(logger, config, zkhandler, this_node):
+    keepalive_interval = config['keepalive_interval']
+    logger.out(f'Starting keepalive timer ({keepalive_interval} second interval)', state='s')
+    keepalive_timer = BackgroundScheduler()
+    keepalive_timer.add_job(
+        node_keepalive,
+        args=(logger, config, zkhandler, this_node),
+        trigger='interval',
+        seconds=keepalive_interval)
+    keepalive_timer.start()
+    return keepalive_timer
+
+
+def stop_keepalive_timer(logger, keepalive_timer):
+    try:
+        keepalive_timer.shutdown()
+        logger.out('Stopping keepalive timer', state='s')
+    except Exception:
+        logger.out('Failed to stop keepalive timer', state='w')
+
+
+# Ceph stats update function
+def collect_ceph_stats(logger, config, zkhandler, this_node, queue):
+    pool_list = zkhandler.children('base.pool')
+    osd_list = zkhandler.children('base.osd')
+
+    debug = config['debug']
+    if debug:
+        logger.out("Thread starting", state='d', prefix='ceph-thread')
+
+    # Connect to the Ceph cluster
+    try:
+        ceph_conn = Rados(conffile=config['ceph_config_file'], conf=dict(keyring=config['ceph_admin_keyring']))
+        if debug:
+            logger.out("Connecting to cluster", state='d', prefix='ceph-thread')
+        ceph_conn.connect(timeout=1)
+    except Exception as e:
+        logger.out('Failed to open connection to Ceph cluster: {}'.format(e), state='e')
+        return
+
+    if debug:
+        logger.out("Getting health stats from monitor", state='d', prefix='ceph-thread')
+
+    # Get Ceph cluster health for local status output
+    command = {"prefix": "health", "format": "json"}
+    try:
+        health_status = json.loads(ceph_conn.mon_command(json.dumps(command), b'', timeout=1)[1])
+        ceph_health = health_status['status']
+    except Exception as e:
+        logger.out('Failed to obtain Ceph health data: {}'.format(e), state='e')
+        ceph_health = 'HEALTH_UNKN'
+
+    if ceph_health in ['HEALTH_OK']:
+        ceph_health_colour = logger.fmt_green
+    elif ceph_health in ['HEALTH_UNKN']:
+        ceph_health_colour = logger.fmt_cyan
+    elif ceph_health in ['HEALTH_WARN']:
+        ceph_health_colour = logger.fmt_yellow
+    else:
+        ceph_health_colour = logger.fmt_red
+
+    # Primary-only functions
+    if this_node.router_state == 'primary':
+        if debug:
+            logger.out("Set ceph health information in zookeeper (primary only)", state='d', prefix='ceph-thread')
+
+        command = {"prefix": "status", "format": "pretty"}
+        ceph_status = ceph_conn.mon_command(json.dumps(command), b'', timeout=1)[1].decode('ascii')
+        try:
+            zkhandler.write([
+                ('base.storage', str(ceph_status))
+            ])
+        except Exception as e:
+            logger.out('Failed to set Ceph status data: {}'.format(e), state='e')
+
+        if debug:
+            logger.out("Set ceph rados df information in zookeeper (primary only)", state='d', prefix='ceph-thread')
+
+        # Get rados df info
+        command = {"prefix": "df", "format": "pretty"}
+        ceph_df = ceph_conn.mon_command(json.dumps(command), b'', timeout=1)[1].decode('ascii')
+        try:
+            zkhandler.write([
+                ('base.storage.util', str(ceph_df))
+            ])
+        except Exception as e:
+            logger.out('Failed to set Ceph utilization data: {}'.format(e), state='e')
+
+        if debug:
+            logger.out("Set pool information in zookeeper (primary only)", state='d', prefix='ceph-thread')
+
+        # Get pool info
+        command = {"prefix": "df", "format": "json"}
+        ceph_df_output = ceph_conn.mon_command(json.dumps(command), b'', timeout=1)[1].decode('ascii')
+        try:
+            ceph_pool_df_raw = json.loads(ceph_df_output)['pools']
+        except Exception as e:
+            logger.out('Failed to obtain Pool data (ceph df): {}'.format(e), state='w')
+            ceph_pool_df_raw = []
+
+        retcode, stdout, stderr = common.run_os_command('rados df --format json', timeout=1)
+        try:
+            rados_pool_df_raw = json.loads(stdout)['pools']
+        except Exception as e:
+            logger.out('Failed to obtain Pool data (rados df): {}'.format(e), state='w')
+            rados_pool_df_raw = []
+
+        pool_count = len(ceph_pool_df_raw)
+        if debug:
+            logger.out("Getting info for {} pools".format(pool_count), state='d', prefix='ceph-thread')
+        for pool_idx in range(0, pool_count):
+            try:
+                # Combine all the data for this pool
+                ceph_pool_df = ceph_pool_df_raw[pool_idx]
+                rados_pool_df = rados_pool_df_raw[pool_idx]
+                pool = ceph_pool_df
+                pool.update(rados_pool_df)
+
+                # Ignore any pools that aren't in our pool list
+                if pool['name'] not in pool_list:
+                    if debug:
+                        logger.out("Pool {} not in pool list {}".format(pool['name'], pool_list), state='d', prefix='ceph-thread')
+                    continue
+                else:
+                    if debug:
+                        logger.out("Parsing data for pool {}".format(pool['name']), state='d', prefix='ceph-thread')
+
+                # Assemble a useful data structure
+                pool_df = {
+                    'id': pool['id'],
+                    'stored_bytes': pool['stats']['stored'],
+                    'free_bytes': pool['stats']['max_avail'],
+                    'used_bytes': pool['stats']['bytes_used'],
+                    'used_percent': pool['stats']['percent_used'],
+                    'num_objects': pool['stats']['objects'],
+                    'num_object_clones': pool['num_object_clones'],
+                    'num_object_copies': pool['num_object_copies'],
+                    'num_objects_missing_on_primary': pool['num_objects_missing_on_primary'],
+                    'num_objects_unfound': pool['num_objects_unfound'],
+                    'num_objects_degraded': pool['num_objects_degraded'],
+                    'read_ops': pool['read_ops'],
+                    'read_bytes': pool['read_bytes'],
+                    'write_ops': pool['write_ops'],
+                    'write_bytes': pool['write_bytes']
+                }
+
+                # Write the pool data to Zookeeper
+                zkhandler.write([
+                    (('pool.stats', pool['name']), str(json.dumps(pool_df)))
+                ])
+            except Exception as e:
+                # One or more of the status commands timed out, just continue
+                logger.out('Failed to format and send pool data: {}'.format(e), state='w')
+                pass
+
+    # Only grab OSD stats if there are OSDs to grab (otherwise `ceph osd df` hangs)
+    osds_this_node = 0
+    if len(osd_list) > 0:
+        # Get data from Ceph OSDs
+        if debug:
+            logger.out("Get data from Ceph OSDs", state='d', prefix='ceph-thread')
+
+        # Parse the dump data
+        osd_dump = dict()
+
+        command = {"prefix": "osd dump", "format": "json"}
+        osd_dump_output = ceph_conn.mon_command(json.dumps(command), b'', timeout=1)[1].decode('ascii')
+        try:
+            osd_dump_raw = json.loads(osd_dump_output)['osds']
+        except Exception as e:
+            logger.out('Failed to obtain OSD data: {}'.format(e), state='w')
+            osd_dump_raw = []
+
+        if debug:
+            logger.out("Loop through OSD dump", state='d', prefix='ceph-thread')
+        for osd in osd_dump_raw:
+            osd_dump.update({
+                str(osd['osd']): {
+                    'uuid': osd['uuid'],
+                    'up': osd['up'],
+                    'in': osd['in'],
+                    'primary_affinity': osd['primary_affinity']
+                }
+            })
+
+        # Parse the df data
+        if debug:
+            logger.out("Parse the OSD df data", state='d', prefix='ceph-thread')
+
+        osd_df = dict()
+
+        command = {"prefix": "osd df", "format": "json"}
+        try:
+            osd_df_raw = json.loads(ceph_conn.mon_command(json.dumps(command), b'', timeout=1)[1])['nodes']
+        except Exception as e:
+            logger.out('Failed to obtain OSD data: {}'.format(e), state='w')
+            osd_df_raw = []
+
+        if debug:
+            logger.out("Loop through OSD df", state='d', prefix='ceph-thread')
+        for osd in osd_df_raw:
+            osd_df.update({
+                str(osd['id']): {
+                    'utilization': osd['utilization'],
+                    'var': osd['var'],
+                    'pgs': osd['pgs'],
+                    'kb': osd['kb'],
+                    'weight': osd['crush_weight'],
+                    'reweight': osd['reweight'],
+                }
+            })
+
+        # Parse the status data
+        if debug:
+            logger.out("Parse the OSD status data", state='d', prefix='ceph-thread')
+
+        osd_status = dict()
+
+        command = {"prefix": "osd status", "format": "pretty"}
+        try:
+            osd_status_raw = ceph_conn.mon_command(json.dumps(command), b'', timeout=1)[1].decode('ascii')
+        except Exception as e:
+            logger.out('Failed to obtain OSD status data: {}'.format(e), state='w')
+            osd_status_raw = []
+
+        if debug:
+            logger.out("Loop through OSD status data", state='d', prefix='ceph-thread')
+
+        for line in osd_status_raw.split('\n'):
+            # Strip off colour
+            line = re.sub(r'\x1b(\[.*?[@-~]|\].*?(\x07|\x1b\\))', '', line)
+            # Split it for parsing
+            line = line.split()
+            if len(line) > 1 and line[1].isdigit():
+                # This is an OSD line so parse it
+                osd_id = line[1]
+                node = line[3].split('.')[0]
+                used = line[5]
+                avail = line[7]
+                wr_ops = line[9]
+                wr_data = line[11]
+                rd_ops = line[13]
+                rd_data = line[15]
+                state = line[17]
+                osd_status.update({
+                    str(osd_id): {
+                        'node': node,
+                        'used': used,
+                        'avail': avail,
+                        'wr_ops': wr_ops,
+                        'wr_data': wr_data,
+                        'rd_ops': rd_ops,
+                        'rd_data': rd_data,
+                        'state': state
+                    }
+                })
+
+        # Merge them together into a single meaningful dict
+        if debug:
+            logger.out("Merge OSD data together", state='d', prefix='ceph-thread')
+
+        osd_stats = dict()
+
+        for osd in osd_list:
+            if zkhandler.read(('osd.node', osd)) == config['node_hostname']:
+                osds_this_node += 1
+            try:
+                this_dump = osd_dump[osd]
+                this_dump.update(osd_df[osd])
+                this_dump.update(osd_status[osd])
+                osd_stats[osd] = this_dump
+            except KeyError as e:
+                # One or more of the status commands timed out, just continue
+                logger.out('Failed to parse OSD stats into dictionary: {}'.format(e), state='w')
+
+        # Upload OSD data for the cluster (primary-only)
+        if this_node.router_state == 'primary':
+            if debug:
+                logger.out("Trigger updates for each OSD", state='d', prefix='ceph-thread')
+
+            for osd in osd_list:
+                try:
+                    stats = json.dumps(osd_stats[osd])
+                    zkhandler.write([
+                        (('osd.stats', osd), str(stats))
+                    ])
+                except KeyError as e:
+                    # One or more of the status commands timed out, just continue
+                    logger.out('Failed to upload OSD stats from dictionary: {}'.format(e), state='w')
+
+    ceph_conn.shutdown()
+
+    queue.put(ceph_health_colour)
+    queue.put(ceph_health)
+    queue.put(osds_this_node)
+
+    if debug:
+        logger.out("Thread finished", state='d', prefix='ceph-thread')
+
+
+# VM stats update function
+def collect_vm_stats(logger, config, zkhandler, this_node, queue):
+    debug = config['debug']
+    if debug:
+        logger.out("Thread starting", state='d', prefix='vm-thread')
+
+    # Connect to libvirt
+    libvirt_name = "qemu:///system"
+    if debug:
+        logger.out("Connecting to libvirt", state='d', prefix='vm-thread')
+    lv_conn = libvirt.open(libvirt_name)
+    if lv_conn is None:
+        logger.out('Failed to open connection to "{}"'.format(libvirt_name), state='e')
+
+    memalloc = 0
+    memprov = 0
+    vcpualloc = 0
+    # Toggle state management of dead VMs to restart them
+    if debug:
+        logger.out("Toggle state management of dead VMs to restart them", state='d', prefix='vm-thread')
+    # Make a copy of the d_domain; if not, and it changes in flight, this can fail
+    fixed_d_domain = this_node.d_domain.copy()
+    for domain, instance in fixed_d_domain.items():
+        if domain in this_node.domain_list:
+            # Add the allocated memory to our memalloc value
+            memalloc += instance.getmemory()
+            memprov += instance.getmemory()
+            vcpualloc += instance.getvcpus()
+            if instance.getstate() == 'start' and instance.getnode() == this_node.name:
+                if instance.getdom() is not None:
+                    try:
+                        if instance.getdom().state()[0] != libvirt.VIR_DOMAIN_RUNNING:
+                            logger.out("VM {} has failed".format(instance.domname), state='w', prefix='vm-thread')
+                            raise
+                    except Exception:
+                        # Toggle a state "change"
+                        logger.out("Resetting state to {} for VM {}".format(instance.getstate(), instance.domname), state='i', prefix='vm-thread')
+                        zkhandler.write([
+                            (('domain.state', domain), instance.getstate())
+                        ])
+        elif instance.getnode() == this_node.name:
+            memprov += instance.getmemory()
+
+    # Get list of running domains from Libvirt
+    running_domains = lv_conn.listAllDomains(libvirt.VIR_CONNECT_LIST_DOMAINS_ACTIVE)
+
+    # Get statistics from any running VMs
+    for domain in running_domains:
+        try:
+            # Get basic information about the VM
+            tree = ElementTree.fromstring(domain.XMLDesc())
+            domain_uuid = domain.UUIDString()
+            domain_name = domain.name()
+
+            # Get all the raw information about the VM
+            if debug:
+                logger.out("Getting general statistics for VM {}".format(domain_name), state='d', prefix='vm-thread')
+            domain_state, domain_maxmem, domain_mem, domain_vcpus, domain_cputime = domain.info()
+            # We can't properly gather stats from a non-running VMs so continue
+            if domain_state != libvirt.VIR_DOMAIN_RUNNING:
+                continue
+            domain_memory_stats = domain.memoryStats()
+            domain_cpu_stats = domain.getCPUStats(True)[0]
+        except Exception as e:
+            if debug:
+                try:
+                    logger.out("Failed getting VM information for {}: {}".format(domain.name(), e), state='d', prefix='vm-thread')
+                except Exception:
+                    pass
+            continue
+
+        # Ensure VM is present in the domain_list
+        if domain_uuid not in this_node.domain_list:
+            this_node.domain_list.append(domain_uuid)
+
+        if debug:
+            logger.out("Getting disk statistics for VM {}".format(domain_name), state='d', prefix='vm-thread')
+        domain_disk_stats = []
+        for disk in tree.findall('devices/disk'):
+            disk_name = disk.find('source').get('name')
+            if not disk_name:
+                disk_name = disk.find('source').get('file')
+            disk_stats = domain.blockStats(disk.find('target').get('dev'))
+            domain_disk_stats.append({
+                "name": disk_name,
+                "rd_req": disk_stats[0],
+                "rd_bytes": disk_stats[1],
+                "wr_req": disk_stats[2],
+                "wr_bytes": disk_stats[3],
+                "err": disk_stats[4]
+            })
+
+        if debug:
+            logger.out("Getting network statistics for VM {}".format(domain_name), state='d', prefix='vm-thread')
+        domain_network_stats = []
+        for interface in tree.findall('devices/interface'):
+            interface_type = interface.get('type')
+            if interface_type not in ['bridge']:
+                continue
+            interface_name = interface.find('target').get('dev')
+            interface_bridge = interface.find('source').get('bridge')
+            interface_stats = domain.interfaceStats(interface_name)
+            domain_network_stats.append({
+                "name": interface_name,
+                "bridge": interface_bridge,
+                "rd_bytes": interface_stats[0],
+                "rd_packets": interface_stats[1],
+                "rd_errors": interface_stats[2],
+                "rd_drops": interface_stats[3],
+                "wr_bytes": interface_stats[4],
+                "wr_packets": interface_stats[5],
+                "wr_errors": interface_stats[6],
+                "wr_drops": interface_stats[7]
+            })
+
+        # Create the final dictionary
+        domain_stats = {
+            "state": libvirt_vm_states[domain_state],
+            "maxmem": domain_maxmem,
+            "livemem": domain_mem,
+            "cpus": domain_vcpus,
+            "cputime": domain_cputime,
+            "mem_stats": domain_memory_stats,
+            "cpu_stats": domain_cpu_stats,
+            "disk_stats": domain_disk_stats,
+            "net_stats": domain_network_stats
+        }
+
+        if debug:
+            logger.out("Writing statistics for VM {} to Zookeeper".format(domain_name), state='d', prefix='vm-thread')
+
+        try:
+            zkhandler.write([
+                (('domain.stats', domain_uuid), str(json.dumps(domain_stats)))
+            ])
+        except Exception as e:
+            if debug:
+                logger.out("{}".format(e), state='d', prefix='vm-thread')
+
+    # Close the Libvirt connection
+    lv_conn.close()
+
+    queue.put(len(running_domains))
+    queue.put(memalloc)
+    queue.put(memprov)
+    queue.put(vcpualloc)
+
+    if debug:
+        logger.out("Thread finished", state='d', prefix='vm-thread')
+
+
+# Keepalive update function
+def node_keepalive(logger, config, zkhandler, this_node):
+    debug = config['debug']
+    if debug:
+        logger.out("Keepalive starting", state='d', prefix='main-thread')
+
+    # Set the migration selector in Zookeeper for clients to read
+    if config['enable_hypervisor']:
+        if this_node.router_state == 'primary':
+            try:
+                if zkhandler.read('base.config.migration_target_selector') != config['migration_target_selector']:
+                    raise
+            except Exception:
+                zkhandler.write([
+                    ('base.config.migration_target_selector', config['migration_target_selector'])
+                ])
+
+    # Set the upstream IP in Zookeeper for clients to read
+    if config['enable_networking']:
+        if this_node.router_state == 'primary':
+            try:
+                if zkhandler.read('base.config.upstream_ip') != config['upstream_floating_ip']:
+                    raise
+            except Exception:
+                zkhandler.write([
+                    ('base.config.upstream_ip', config['upstream_floating_ip'])
+                ])
+
+    # Get past state and update if needed
+    if debug:
+        logger.out("Get past state and update if needed", state='d', prefix='main-thread')
+
+    past_state = zkhandler.read(('node.state.daemon', this_node.name))
+    if past_state != 'run' and past_state != 'shutdown':
+        this_node.daemon_state = 'run'
+        zkhandler.write([
+            (('node.state.daemon', this_node.name), 'run')
+        ])
+    else:
+        this_node.daemon_state = 'run'
+
+    # Ensure the primary key is properly set
+    if debug:
+        logger.out("Ensure the primary key is properly set", state='d', prefix='main-thread')
+    if this_node.router_state == 'primary':
+        if zkhandler.read('base.config.primary_node') != this_node.name:
+            zkhandler.write([
+                ('base.config.primary_node', this_node.name)
+            ])
+
+    # Run VM statistics collection in separate thread for parallelization
+    if config['enable_hypervisor']:
+        vm_thread_queue = Queue()
+        vm_stats_thread = Thread(target=collect_vm_stats, args=(logger, config, zkhandler, this_node, vm_thread_queue), kwargs={})
+        vm_stats_thread.start()
+
+    # Run Ceph status collection in separate thread for parallelization
+    if config['enable_storage']:
+        ceph_thread_queue = Queue()
+        ceph_stats_thread = Thread(target=collect_ceph_stats, args=(logger, config, zkhandler, this_node, ceph_thread_queue), kwargs={})
+        ceph_stats_thread.start()
+
+    # Get node performance statistics
+    this_node.memtotal = int(psutil.virtual_memory().total / 1024 / 1024)
+    this_node.memused = int(psutil.virtual_memory().used / 1024 / 1024)
+    this_node.memfree = int(psutil.virtual_memory().free / 1024 / 1024)
+    this_node.cpuload = os.getloadavg()[0]
+
+    # Join against running threads
+    if config['enable_hypervisor']:
+        vm_stats_thread.join(timeout=4.0)
+        if vm_stats_thread.is_alive():
+            logger.out('VM stats gathering exceeded 4s timeout, continuing', state='w')
+    if config['enable_storage']:
+        ceph_stats_thread.join(timeout=4.0)
+        if ceph_stats_thread.is_alive():
+            logger.out('Ceph stats gathering exceeded 4s timeout, continuing', state='w')
+
+    # Get information from thread queues
+    if config['enable_hypervisor']:
+        try:
+            this_node.domains_count = vm_thread_queue.get()
+            this_node.memalloc = vm_thread_queue.get()
+            this_node.memprov = vm_thread_queue.get()
+            this_node.vcpualloc = vm_thread_queue.get()
+        except Exception:
+            pass
+    else:
+        this_node.domains_count = 0
+        this_node.memalloc = 0
+        this_node.memprov = 0
+        this_node.vcpualloc = 0
+
+    if config['enable_storage']:
+        try:
+            ceph_health_colour = ceph_thread_queue.get()
+            ceph_health = ceph_thread_queue.get()
+            osds_this_node = ceph_thread_queue.get()
+        except Exception:
+            ceph_health_colour = logger.fmt_cyan
+            ceph_health = 'UNKNOWN'
+            osds_this_node = '?'
+
+    # Set our information in zookeeper
+    keepalive_time = int(time.time())
+    if debug:
+        logger.out("Set our information in zookeeper", state='d', prefix='main-thread')
+    try:
+        zkhandler.write([
+            (('node.memory.total', this_node.name), str(this_node.memtotal)),
+            (('node.memory.used', this_node.name), str(this_node.memused)),
+            (('node.memory.free', this_node.name), str(this_node.memfree)),
+            (('node.memory.allocated', this_node.name), str(this_node.memalloc)),
+            (('node.memory.provisioned', this_node.name), str(this_node.memprov)),
+            (('node.vcpu.allocated', this_node.name), str(this_node.vcpualloc)),
+            (('node.cpu.load', this_node.name), str(this_node.cpuload)),
+            (('node.count.provisioned_domains', this_node.name), str(this_node.domains_count)),
+            (('node.running_domains', this_node.name), ' '.join(this_node.domain_list)),
+            (('node.keepalive', this_node.name), str(keepalive_time)),
+        ])
+    except Exception:
+        logger.out('Failed to set keepalive data', state='e')
+
+    # Display node information to the terminal
+    if config['log_keepalives']:
+        if this_node.router_state == 'primary':
+            cst_colour = logger.fmt_green
+        elif this_node.router_state == 'secondary':
+            cst_colour = logger.fmt_blue
+        else:
+            cst_colour = logger.fmt_cyan
+        logger.out(
+            '{}{} keepalive @ {}{} [{}{}{}]'.format(
+                logger.fmt_purple,
+                config['node_hostname'],
+                datetime.now(),
+                logger.fmt_end,
+                logger.fmt_bold + cst_colour,
+                this_node.router_state,
+                logger.fmt_end
+            ),
+            state='t'
+        )
+        if config['log_keepalive_cluster_details']:
+            logger.out(
+                '{bold}Maintenance:{nofmt} {maint}  '
+                '{bold}Active VMs:{nofmt} {domcount}  '
+                '{bold}Networks:{nofmt} {netcount}  '
+                '{bold}Load:{nofmt} {load}  '
+                '{bold}Memory [MiB]: VMs:{nofmt} {allocmem}  '
+                '{bold}Used:{nofmt} {usedmem}  '
+                '{bold}Free:{nofmt} {freemem}'.format(
+                    bold=logger.fmt_bold,
+                    nofmt=logger.fmt_end,
+                    maint=this_node.maintenance,
+                    domcount=this_node.domains_count,
+                    netcount=len(zkhandler.children('base.network')),
+                    load=this_node.cpuload,
+                    freemem=this_node.memfree,
+                    usedmem=this_node.memused,
+                    allocmem=this_node.memalloc,
+                ),
+                state='t'
+            )
+        if config['enable_storage'] and config['log_keepalive_storage_details']:
+            logger.out(
+                '{bold}Ceph cluster status:{nofmt} {health_colour}{health}{nofmt}  '
+                '{bold}Total OSDs:{nofmt} {total_osds}  '
+                '{bold}Node OSDs:{nofmt} {node_osds}  '
+                '{bold}Pools:{nofmt} {total_pools}  '.format(
+                    bold=logger.fmt_bold,
+                    health_colour=ceph_health_colour,
+                    nofmt=logger.fmt_end,
+                    health=ceph_health,
+                    total_osds=len(zkhandler.children('base.osd')),
+                    node_osds=osds_this_node,
+                    total_pools=len(zkhandler.children('base.pool'))
+                ),
+                state='t'
+            )
+
+    # Look for dead nodes and fence them
+    if not this_node.maintenance:
+        if debug:
+            logger.out("Look for dead nodes and fence them", state='d', prefix='main-thread')
+        if config['daemon_mode'] == 'coordinator':
+            for node_name in zkhandler.children('base.node'):
+                try:
+                    node_daemon_state = zkhandler.read(('node.state.daemon', node_name))
+                    node_keepalive = int(zkhandler.read(('node.keepalive', node_name)))
+                except Exception:
+                    node_daemon_state = 'unknown'
+                    node_keepalive = 0
+
+                # Handle deadtime and fencng if needed
+                # (A node is considered dead when its keepalive timer is >6*keepalive_interval seconds
+                # out-of-date while in 'start' state)
+                node_deadtime = int(time.time()) - (int(config['keepalive_interval']) * int(config['fence_intervals']))
+                if node_keepalive < node_deadtime and node_daemon_state == 'run':
+                    logger.out('Node {} seems dead - starting monitor for fencing'.format(node_name), state='w')
+                    zk_lock = zkhandler.writelock(('node.state.daemon', node_name))
+                    with zk_lock:
+                        # Ensures that, if we lost the lock race and come out of waiting,
+                        # we won't try to trigger our own fence thread.
+                        if zkhandler.read(('node.state.daemon', node_name)) != 'dead':
+                            fence_thread = Thread(target=pvcnoded.util.fencing.fence_node, args=(node_name, zkhandler, config, logger), kwargs={})
+                            fence_thread.start()
+                            # Write the updated data after we start the fence thread
+                            zkhandler.write([
+                                (('node.state.daemon', node_name), 'dead')
+                            ])
+
+    if debug:
+        logger.out("Keepalive finished", state='d', prefix='main-thread')
diff --git a/node-daemon/pvcnoded/util/libvirt.py b/node-daemon/pvcnoded/util/libvirt.py
new file mode 100644
index 00000000..f6572b58
--- /dev/null
+++ b/node-daemon/pvcnoded/util/libvirt.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+
+# libvirt.py - Utility functions for pvcnoded libvirt
+# Part of the Parallel Virtual Cluster (PVC) system
+#
+#    Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
+#
+#    This program is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, version 3.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+#
+###############################################################################
+
+import libvirt
+
+
+def validate_libvirtd(logger, config):
+    if config['enable_hypervisor']:
+        libvirt_check_name = f'qemu+tcp://{config["node_hostname"]}/system'
+        logger.out(f'Connecting to Libvirt daemon at {libvirt_check_name}', state='i')
+        try:
+            lv_conn = libvirt.open(libvirt_check_name)
+            lv_conn.close()
+        except Exception as e:
+            logger.out(f'Failed to connect to Libvirt daemon: {e}', state='e')
+            return False
+
+    return True
diff --git a/node-daemon/pvcnoded/util/networking.py b/node-daemon/pvcnoded/util/networking.py
new file mode 100644
index 00000000..4a70371b
--- /dev/null
+++ b/node-daemon/pvcnoded/util/networking.py
@@ -0,0 +1,181 @@
+#!/usr/bin/env python3
+
+# networking.py - Utility functions for pvcnoded networking
+# Part of the Parallel Virtual Cluster (PVC) system
+#
+#    Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
+#
+#    This program is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, version 3.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+#
+###############################################################################
+
+import daemon_lib.common as common
+
+from time import sleep
+from os import makedirs
+
+
+def setup_sriov(logger, config):
+    logger.out('Setting up SR-IOV device support', state='i')
+
+    # Enable unsafe interrupts for the vfio_iommu_type1 kernel module
+    try:
+        common.run_os_command('modprobe vfio_iommu_type1 allow_unsafe_interrupts=1')
+        with open('/sys/module/vfio_iommu_type1/parameters/allow_unsafe_interrupts', 'w') as mfh:
+            mfh.write('Y')
+    except Exception:
+        logger.out('Failed to enable vfio_iommu_type1 kernel module; SR-IOV may fail', state='w')
+
+    # Loop through our SR-IOV NICs and enable the numvfs for each
+    for device in config['sriov_device']:
+        logger.out(f'Preparing SR-IOV PF {device["phy"]} with {device["vfcount"]} VFs', state='i')
+        try:
+            with open(f'/sys/class/net/{device["phy"]}/device/sriov_numvfs', 'r') as vfh:
+                current_vf_count = vfh.read().strip()
+            with open(f'/sys/class/net/{device["phy"]}/device/sriov_numvfs', 'w') as vfh:
+                vfh.write(str(device['vfcount']))
+        except FileNotFoundError:
+            logger.out(f'Failed to open SR-IOV configuration for PF {device["phy"]}; device may not support SR-IOV', state='w')
+        except OSError:
+            logger.out(f'Failed to set SR-IOV VF count for PF {device["phy"]} to {device["vfcount"]}; already set to {current_vf_count}', state='w')
+
+        if device.get('mtu', None) is not None:
+            logger.out(f'Setting SR-IOV PF {device["phy"]} to MTU {device["mtu"]}', state='i')
+            common.run_os_command(f'ip link set {device["phy"]} mtu {device["mtu"]} up')
+
+
+def setup_interfaces(logger, config):
+    # Set up the Cluster interface
+    cluster_dev = config['cluster_dev']
+    cluster_mtu = config['cluster_mtu']
+    cluster_dev_ip = config['cluster_dev_ip']
+
+    logger.out(f'Setting up Cluster network interface {cluster_dev} with MTU {cluster_mtu}', state='i')
+
+    common.run_os_command(f'ip link set {cluster_dev} mtu {cluster_mtu} up')
+
+    logger.out(f'Setting up Cluster network bridge on interface {cluster_dev} with IP {cluster_dev_ip}', state='i')
+
+    common.run_os_command(f'brctl addbr brcluster')
+    common.run_os_command(f'brctl addif brcluster {cluster_dev}')
+    common.run_os_command(f'ip link set brcluster mtu {cluster_mtu} up')
+    common.run_os_command(f'ip address add {cluster_dev_ip} dev brcluster')
+
+    # Set up the Storage interface
+    storage_dev = config['storage_dev']
+    storage_mtu = config['storage_mtu']
+    storage_dev_ip = config['storage_dev_ip']
+
+    logger.out(f'Setting up Storage network interface {storage_dev} with MTU {storage_mtu}', state='i')
+
+    common.run_os_command(f'ip link set {storage_dev} mtu {storage_mtu} up')
+
+    if storage_dev == cluster_dev:
+        if storage_dev_ip != cluster_dev_ip:
+            logger.out(f'Setting up Storage network on Cluster network bridge with IP {storage_dev_ip}', state='i')
+
+            common.run_os_command(f'ip address add {storage_dev_ip} dev brcluster')
+    else:
+        logger.out(f'Setting up Storage network bridge on interface {storage_dev} with IP {storage_dev_ip}', state='i')
+
+        common.run_os_command(f'brctl addbr brstorage')
+        common.run_os_command(f'brctl addif brstorage {storage_dev}')
+        common.run_os_command(f'ip link set brstorage mtu {storage_mtu} up')
+        common.run_os_command(f'ip address add {storage_dev_ip} dev brstorage')
+
+    # Set up the Upstream interface
+    upstream_dev = config['upstream_dev']
+    upstream_mtu = config['upstream_mtu']
+    upstream_dev_ip = config['upstream_dev_ip']
+
+    logger.out(f'Setting up Upstream network interface {upstream_dev} with MTU {upstream_mtu}', state='i')
+
+    if upstream_dev == cluster_dev:
+        if upstream_dev_ip != cluster_dev_ip:
+            logger.out(f'Setting up Upstream network on Cluster network bridge with IP {upstream_dev_ip}', state='i')
+
+            common.run_os_command(f'ip address add {upstream_dev_ip} dev brcluster')
+    else:
+        logger.out(f'Setting up Upstream network bridge on interface {upstream_dev} with IP {upstream_dev_ip}', state='i')
+
+        common.run_os_command(f'brctl addbr brupstream')
+        common.run_os_command(f'brctl addif brupstream {upstream_dev}')
+        common.run_os_command(f'ip link set brupstream mtu {upstream_mtu} up')
+        common.run_os_command(f'ip address add {upstream_dev_ip} dev brupstream')
+
+    upstream_gateway = config['upstream_gateway']
+    if upstream_gateway is not None:
+        logger.out(f'Setting up Upstream networok default gateway IP {upstream_gateway}', state='i')
+        if upstream_dev == cluster_dev:
+            common.run_os_command(f'ip route add default via {upstream_gateway} dev brcluster')
+        else:
+            common.run_os_command(f'ip route add default via {upstream_gateway} dev brupstream')
+
+    # Set up sysctl tweaks to optimize networking
+    # Enable routing functions
+    common.run_os_command('sysctl net.ipv4.ip_forward=1')
+    common.run_os_command('sysctl net.ipv6.ip_forward=1')
+    # Enable send redirects
+    common.run_os_command('sysctl net.ipv4.conf.all.send_redirects=1')
+    common.run_os_command('sysctl net.ipv4.conf.default.send_redirects=1')
+    common.run_os_command('sysctl net.ipv6.conf.all.send_redirects=1')
+    common.run_os_command('sysctl net.ipv6.conf.default.send_redirects=1')
+    # Accept source routes
+    common.run_os_command('sysctl net.ipv4.conf.all.accept_source_route=1')
+    common.run_os_command('sysctl net.ipv4.conf.default.accept_source_route=1')
+    common.run_os_command('sysctl net.ipv6.conf.all.accept_source_route=1')
+    common.run_os_command('sysctl net.ipv6.conf.default.accept_source_route=1')
+    # Disable RP filtering on Cluster and Upstream interfaces (to allow traffic pivoting)
+    common.run_os_command(f'sysctl net.ipv4.conf.{cluster_dev}.rp_filter=0')
+    common.run_os_command(f'sysctl net.ipv4.conf.brcluster.rp_filter=0')
+    common.run_os_command(f'sysctl net.ipv4.conf.{upstream_dev}.rp_filter=0')
+    common.run_os_command(f'sysctl net.ipv4.conf.brupstream.rp_filter=0')
+    common.run_os_command(f'sysctl net.ipv6.conf.{cluster_dev}.rp_filter=0')
+    common.run_os_command(f'sysctl net.ipv6.conf.brcluster.rp_filter=0')
+    common.run_os_command(f'sysctl net.ipv6.conf.{upstream_dev}.rp_filter=0')
+    common.run_os_command(f'sysctl net.ipv6.conf.brupstream.rp_filter=0')
+
+    # Stop DNSMasq if it is running
+    common.run_os_command('systemctl stop dnsmasq.service')
+
+    logger.out('Waiting 3 seconds for networking to come up', state='s')
+    sleep(3)
+
+
+def create_nft_configuration(logger, config):
+    if config['enable_networking']:
+        logger.out('Creating NFT firewall configuration', state='i')
+
+        dynamic_directory = config['nft_dynamic_directory']
+
+        # Create directories
+        makedirs(f'{dynamic_directory}/networks', exist_ok=True)
+        makedirs(f'{dynamic_directory}/static', exist_ok=True)
+
+        # Set up the base rules
+        nftables_base_rules = f"""# Base rules
+        flush ruleset
+        # Add the filter table and chains
+        add table inet filter
+        add chain inet filter forward {{ type filter hook forward priority 0; }}
+        add chain inet filter input {{ type filter hook input priority 0; }}
+        # Include static rules and network rules
+        include "{dynamic_directory}/static/*"
+        include "{dynamic_directory}/networks/*"
+        """
+
+        # Write the base firewall config
+        nftables_base_filename = f'{dynamic_directory}/base.nft'
+        with open(nftables_base_filename, 'w') as nftfh:
+            nftfh.write(nftables_base_rules)
+        common.reload_firewall_rules(nftables_base_filename, logger)
diff --git a/node-daemon/pvcnoded/util/services.py b/node-daemon/pvcnoded/util/services.py
new file mode 100644
index 00000000..c7574a27
--- /dev/null
+++ b/node-daemon/pvcnoded/util/services.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+
+# services.py - Utility functions for pvcnoded external services
+# Part of the Parallel Virtual Cluster (PVC) system
+#
+#    Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
+#
+#    This program is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, version 3.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+#
+###############################################################################
+
+import daemon_lib.common as common
+from time import sleep
+
+
+def start_zookeeper(logger, config):
+    if config['daemon_mode'] == 'coordinator':
+        logger.out('Starting Zookeeper daemon', state='i')
+        # TODO: Move our handling out of Systemd and integrate it directly as a subprocess?
+        common.run_os_command('systemctl start zookeeper.service')
+
+
+def start_libvirtd(logger, config):
+    if config['enable_hypervisor']:
+        logger.out('Starting Libvirt daemon', state='i')
+        # TODO: Move our handling out of Systemd and integrate it directly as a subprocess?
+        common.run_os_command('systemctl start libvirtd.service')
+
+
+def start_patroni(logger, config):
+    if config['enable_networking'] and config['daemon_mode'] == 'coordinator':
+        logger.out('Starting Patroni daemon', state='i')
+        # TODO: Move our handling out of Systemd and integrate it directly as a subprocess?
+        common.run_os_command('systemctl start patroni.service')
+
+
+def start_frrouting(logger, config):
+    if config['enable_networking'] and config['daemon_mode'] == 'coordinator':
+        logger.out('Starting FRRouting daemon', state='i')
+        # TODO: Move our handling out of Systemd and integrate it directly as a subprocess?
+        common.run_os_command('systemctl start frr.service')
+
+
+def start_ceph_mon(logger, config):
+    if config['enable_storage'] and config['daemon_mode'] == 'coordinator':
+        logger.out('Starting Ceph Monitor daemon', state='i')
+        # TODO: Move our handling out of Systemd and integrate it directly as a subprocess?
+        common.run_os_command(f'systemctl start ceph-mon@{config["node_hostname"]}.service')
+
+
+def start_ceph_mgr(logger, config):
+    if config['enable_storage'] and config['daemon_mode'] == 'coordinator':
+        logger.out('Starting Ceph Manager daemon', state='i')
+        # TODO: Move our handling out of Systemd and integrate it directly as a subprocess?
+        common.run_os_command(f'systemctl start ceph-mgr@{config["node_hostname"]}.service')
+
+
+def start_system_services(logger, config):
+    start_zookeeper(logger, config)
+    start_libvirtd(logger, config)
+    start_patroni(logger, config)
+    start_frrouting(logger, config)
+    start_ceph_mon(logger, config)
+    start_ceph_mgr(logger, config)
+
+    logger.out('Waiting 3 seconds for daemons to start', state='s')
+    sleep(3)
diff --git a/node-daemon/pvcnoded/util/zookeeper.py b/node-daemon/pvcnoded/util/zookeeper.py
new file mode 100644
index 00000000..933b70da
--- /dev/null
+++ b/node-daemon/pvcnoded/util/zookeeper.py
@@ -0,0 +1,132 @@
+#!/usr/bin/env python3
+
+# <Filename> - <Description>
+# zookeeper.py - Utility functions for pvcnoded Zookeeper connections
+# Part of the Parallel Virtual Cluster (PVC) system
+#
+#    Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
+#
+#    This program is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, version 3.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+#
+##############################################################################
+
+from daemon_lib.zkhandler import ZKHandler
+
+import os
+import time
+
+
+def connect(logger, config):
+    # Create an instance of the handler
+    zkhandler = ZKHandler(config, logger)
+
+    try:
+        logger.out('Connecting to Zookeeper on coordinator nodes {}'.format(config['coordinators']), state='i')
+        # Start connection
+        zkhandler.connect(persistent=True)
+    except Exception as e:
+        logger.out('ERROR: Failed to connect to Zookeeper cluster: {}'.format(e), state='e')
+        os._exit(1)
+
+    logger.out('Validating Zookeeper schema', state='i')
+
+    try:
+        node_schema_version = int(zkhandler.read(('node.data.active_schema', config['node_hostname'])))
+    except Exception:
+        node_schema_version = int(zkhandler.read('base.schema.version'))
+        zkhandler.write([
+            (('node.data.active_schema', config['node_hostname']), node_schema_version)
+        ])
+
+    # Load in the current node schema version
+    zkhandler.schema.load(node_schema_version)
+
+    # Record the latest intalled schema version
+    latest_schema_version = zkhandler.schema.find_latest()
+    logger.out('Latest installed schema is {}'.format(latest_schema_version), state='i')
+    zkhandler.write([
+        (('node.data.latest_schema', config['node_hostname']), latest_schema_version)
+    ])
+
+    # If we are the last node to get a schema update, fire the master update
+    if latest_schema_version > node_schema_version:
+        node_latest_schema_version = list()
+        for node in zkhandler.children('base.node'):
+            node_latest_schema_version.append(int(zkhandler.read(('node.data.latest_schema', node))))
+
+        # This is true if all elements of the latest schema version are identical to the latest version,
+        # i.e. they have all had the latest schema installed and ready to load.
+        if node_latest_schema_version.count(latest_schema_version) == len(node_latest_schema_version):
+            zkhandler.write([
+                ('base.schema.version', latest_schema_version)
+            ])
+
+    return zkhandler, node_schema_version
+
+
+def validate_schema(logger, zkhandler):
+    # Validate our schema against the active version
+    if not zkhandler.schema.validate(zkhandler, logger):
+        logger.out('Found schema violations, applying', state='i')
+        zkhandler.schema.apply(zkhandler)
+    else:
+        logger.out('Schema successfully validated', state='o')
+
+
+def setup_node(logger, config, zkhandler):
+    # Check if our node exists in Zookeeper, and create it if not
+    if config['daemon_mode'] == 'coordinator':
+        init_routerstate = 'secondary'
+    else:
+        init_routerstate = 'client'
+
+    if zkhandler.exists(('node', config['node_hostname'])):
+        logger.out(f'Node is {logger.fmt_green}present{logger.fmt_end} in Zookeeper', state='i')
+        # Update static data just in case it's changed
+        zkhandler.write([
+            (('node', config['node_hostname']), config['daemon_mode']),
+            (('node.mode', config['node_hostname']), config['daemon_mode']),
+            (('node.state.daemon', config['node_hostname']), 'init'),
+            (('node.state.router', config['node_hostname']), init_routerstate),
+            (('node.data.static', config['node_hostname']), ' '.join(config['static_data'])),
+            (('node.data.pvc_version', config['node_hostname']), config['pvcnoded_version']),
+            (('node.ipmi.hostname', config['node_hostname']), config['ipmi_hostname']),
+            (('node.ipmi.username', config['node_hostname']), config['ipmi_username']),
+            (('node.ipmi.password', config['node_hostname']), config['ipmi_password']),
+        ])
+    else:
+        logger.out(f'Node is {logger.fmt_red}absent{logger.fmt_end} in Zookeeper; adding new node', state='i')
+        keepalive_time = int(time.time())
+        zkhandler.write([
+            (('node', config['node_hostname']), config['daemon_mode']),
+            (('node.keepalive', config['node_hostname']), str(keepalive_time)),
+            (('node.mode', config['node_hostname']), config['daemon_mode']),
+            (('node.state.daemon', config['node_hostname']), 'init'),
+            (('node.state.domain', config['node_hostname']), 'flushed'),
+            (('node.state.router', config['node_hostname']), init_routerstate),
+            (('node.data.static', config['node_hostname']), ' '.join(config['static_data'])),
+            (('node.data.pvc_version', config['node_hostname']), config['pvcnoded_version']),
+            (('node.ipmi.hostname', config['node_hostname']), config['ipmi_hostname']),
+            (('node.ipmi.username', config['node_hostname']), config['ipmi_username']),
+            (('node.ipmi.password', config['node_hostname']), config['ipmi_password']),
+            (('node.memory.total', config['node_hostname']), '0'),
+            (('node.memory.used', config['node_hostname']), '0'),
+            (('node.memory.free', config['node_hostname']), '0'),
+            (('node.memory.allocated', config['node_hostname']), '0'),
+            (('node.memory.provisioned', config['node_hostname']), '0'),
+            (('node.vcpu.allocated', config['node_hostname']), '0'),
+            (('node.cpu.load', config['node_hostname']), '0.0'),
+            (('node.running_domains', config['node_hostname']), '0'),
+            (('node.count.provisioned_domains', config['node_hostname']), '0'),
+            (('node.count.networks', config['node_hostname']), '0'),
+        ])
diff --git a/test-cluster.sh b/test-cluster.sh
index d9c9988c..2f847f03 100755
--- a/test-cluster.sh
+++ b/test-cluster.sh
@@ -26,44 +26,44 @@ rm ${backup_tmp} || true
 
 # Provisioner tests
 _pvc provisioner profile list test
-_pvc provisioner create --wait testX test
+_pvc provisioner create --wait testx test
 sleep 30
 
 # VM tests
 vm_tmp=$(mktemp)
-_pvc vm dump testX --file ${vm_tmp}
-_pvc vm shutdown --yes --wait testX
-_pvc vm start testX
+_pvc vm dump testx --file ${vm_tmp}
+_pvc vm shutdown --yes --wait testx
+_pvc vm start testx
 sleep 30
-_pvc vm stop --yes testX
-_pvc vm disable testX
-_pvc vm undefine --yes testX
+_pvc vm stop --yes testx
+_pvc vm disable testx
+_pvc vm undefine --yes testx
 _pvc vm define --target hv3 --tag pvc-test ${vm_tmp}
-_pvc vm start testX
+_pvc vm start testx
 sleep 30
-_pvc vm restart --yes --wait testX
+_pvc vm restart --yes --wait testx
 sleep 30
-_pvc vm migrate --wait testX
+_pvc vm migrate --wait testx
 sleep 5
-_pvc vm unmigrate --wait testX
+_pvc vm unmigrate --wait testx
 sleep 5
-_pvc vm move --wait --target hv1 testX
+_pvc vm move --wait --target hv1 testx
 sleep 5
-_pvc vm meta testX --limit hv1 --selector vms --method live --profile test --no-autostart
-_pvc vm tag add testX mytag
-_pvc vm tag get testX
+_pvc vm meta testx --limit hv1 --selector vms --method live --profile test --no-autostart
+_pvc vm tag add testx mytag
+_pvc vm tag get testx
 _pvc vm list --tag mytag
-_pvc vm tag remove testX mytag
-_pvc vm network get testX
-_pvc vm vcpu set testX 4
-_pvc vm vcpu get testX
-_pvc vm memory set testX 4096
-_pvc vm memory get testX
-_pvc vm vcpu set testX 2
-_pvc vm memory set testX 2048 --restart --yes
+_pvc vm tag remove testx mytag
+_pvc vm network get testx
+_pvc vm vcpu set testx 4
+_pvc vm vcpu get testx
+_pvc vm memory set testx 4096
+_pvc vm memory get testx
+_pvc vm vcpu set testx 2
+_pvc vm memory set testx 2048 --restart --yes
 sleep 5
-_pvc vm list testX
-_pvc vm info --long testX
+_pvc vm list testx
+_pvc vm info --long testx
 rm ${vm_tmp} || true
 
 # Node tests
@@ -81,9 +81,9 @@ _pvc node info hv1
 # Network tests
 _pvc network add 10001 --description testing --type managed --domain testing.local --ipnet 10.100.100.0/24 --gateway 10.100.100.1 --dhcp --dhcp-start 10.100.100.100 --dhcp-end 10.100.100.199
 sleep 5
-_pvc vm network add --restart --yes testX 10001
+_pvc vm network add --restart --yes testx 10001
 sleep 30
-_pvc vm network remove --restart --yes testX 10001
+_pvc vm network remove --restart --yes testx 10001
 sleep 5
 
 _pvc network acl add 10001 --in --description test-acl --order 0 --rule "'ip daddr 10.0.0.0/8 counter'"
@@ -98,10 +98,10 @@ _pvc network list
 _pvc network info --long 10001
 
 # Network-VM interaction tests
-_pvc vm network add testX 10001 --model virtio --restart --yes
+_pvc vm network add testx 10001 --model virtio --restart --yes
 sleep 30
-_pvc vm network get testX
-_pvc vm network remove testX 10001 --restart --yes
+_pvc vm network get testx
+_pvc vm network remove testx 10001 --restart --yes
 sleep 5
 
 _pvc network remove --yes 10001
@@ -117,9 +117,9 @@ _pvc storage osd list
 _pvc storage pool add testing 64 --replcfg "copies=3,mincopies=2"
 sleep 5
 _pvc storage pool list
-_pvc storage volume add testing testX 1G
-_pvc storage volume resize testing testX 2G
-_pvc storage volume rename testing testX testerX
+_pvc storage volume add testing testx 1G
+_pvc storage volume resize testing testx 2G
+_pvc storage volume rename testing testx testerX
 _pvc storage volume clone testing testerX testerY
 _pvc storage volume list --pool testing
 _pvc storage volume snapshot add testing testerX asnapshotX
@@ -128,10 +128,10 @@ _pvc storage volume snapshot list
 _pvc storage volume snapshot remove --yes testing testerX asnapshotY
 
 # Storage-VM interaction tests
-_pvc vm volume add testX --type rbd --disk-id sdh --bus scsi testing/testerY --restart --yes
+_pvc vm volume add testx --type rbd --disk-id sdh --bus scsi testing/testerY --restart --yes
 sleep 30
-_pvc vm volume get testX
-_pvc vm volume remove testX testing/testerY --restart --yes
+_pvc vm volume get testx
+_pvc vm volume remove testx testing/testerY --restart --yes
 sleep 5
 
 _pvc storage volume remove --yes testing testerY
@@ -139,8 +139,8 @@ _pvc storage volume remove --yes testing testerX
 _pvc storage pool remove --yes testing
 
 # Remove the VM
-_pvc vm stop --yes testX
-_pvc vm remove --yes testX
+_pvc vm stop --yes testx
+_pvc vm remove --yes testx
 
 time_end=$(date +%s)