#!/usr/bin/env python3

# nics.py - PVC Monitoring example plugin for NIC interfaces
# Part of the Parallel Virtual Cluster (PVC) system
#
#    Copyright (C) 2018-2022 Joshua M. Boniface <joshua@boniface.me>
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, version 3.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################

# This script provides an example of a PVC monitoring plugin script. It will create
# a simple plugin to check the network interfaces of the host, specifically for speed
# and 802.3ad status (if applicable).

# This script can thus be used as an example or reference implementation of a
# PVC monitoring pluginscript and expanded upon as required.

# A monitoring plugin script must implement the class "MonitoringPluginScript" which
# extends "MonitoringPlugin", providing the 3 functions indicated. Detailed explanation
# of the role of each function is provided in context of the example; see the other
# examples for more potential uses.

# WARNING:
#
# This script will run in the context of the node daemon keepalives as root.
# DO NOT install untrusted, unvetted plugins under any circumstances.


# This import is always required here, as MonitoringPlugin is used by the
# MonitoringPluginScript class
from pvcnoded.objects.MonitoringInstance import MonitoringPlugin


# A monitoring plugin script must always expose its nice name, which must be identical to
# the file name
PLUGIN_NAME = "nics"


# The MonitoringPluginScript class must be named as such, and extend MonitoringPlugin.
class MonitoringPluginScript(MonitoringPlugin):
    def setup(self):
        """
        setup(): Perform special setup steps during node daemon startup

        This step is optional and should be used sparingly.

        If you wish for the plugin to not load in certain conditions, do any checks here
        and return a non-None failure message to indicate the error.
        """

        pass

    def run(self, coordinator_state=None):
        """
        run(): Perform the check actions and return a PluginResult object

        The {coordinator_state} can be used to check if this is a "primary" coordinator, "secondary" coordinator, or "client" (non-coordinator)
        """

        # Run any imports first
        import daemon_lib.common as common
        from re import match, search, findall

        messages = list()
        health_delta = 0

        # Get a list of the various underlying devices
        _core_nics = set()

        for dev in [
                self.config['bridge_dev'],
                self.config['upstream_dev'],
                self.config['cluster_dev'],
                self.config['storage_dev'],
        ]:
            with open(f'/sys/class/net/{dev}/uevent', 'r') as uevent:
                _devtype = uevent.readlines()[0].split('=')[-1].strip()

            if _devtype == 'vlan':
                with open(f"/proc/net/vlan/{dev}") as devfh:
                    vlan_info = devfh.read().split('\n')
                for line in vlan_info:
                    if match(r'^Device:', line):
                        dev = line.split()[-1]

            _core_nics.add(dev)

        core_nics = sorted(list(_core_nics))

        for dev in core_nics:
            with open(f'/sys/class/net/{dev}/uevent', 'r') as uevent:
                _devtype = uevent.readlines()[0].split('=')[-1].strip()

            if _devtype == "bond":
                syspath = f"/proc/net/bonding/{dev}"

                with open(syspath) as devfh:
                    bonding_stats = devfh.read()

                _, _mode, _info, *_slaves = bonding_stats.split('\n\n')

                slave_interfaces = list()
                for slavedev in _slaves:
                    lines = slavedev.split('\n')
                    for line in lines:
                        if match(r'^Slave Interface:', line):
                            interface_name = line.split()[-1]
                        if match(r'^MII Status:', line):
                            interface_status = line.split()[-1]
                        if match(r'^Speed:', line):
                            try:
                                interface_speed_mbps = int(line.split()[-2])
                            except Exception:
                                interface_speed_mbps = 0
                        if match(r'^Duplex:', line):
                            interface_duplex = line.split()[-1]
                    slave_interfaces.append((interface_name, interface_status, interface_speed_mbps, interface_duplex))

                # Ensure at least 2 slave interfaces are up
                slave_interface_up_count = 0
                for slave_interface in slave_interfaces:
                    if slave_interface[1] == 'up':
                        slave_interface_up_count += 1
                if slave_interface_up_count < len(slave_interfaces):
                    messages.append(f"{dev} DEGRADED with {slave_interface_up_count} active slaves")
                    health_delta += 10
                else:
                    messages.append(f"{dev} OK with {slave_interface_up_count} active slaves")

                # Get ethtool supported speeds for slave interfaces
                supported_link_speeds = set()
                for slave_interface in slave_interfaces:
                    slave_dev = slave_interface[0]
                    _, ethtool_stdout, _ = common.run_os_command(f"ethtool {slave_dev}")
                    in_modes = False
                    for line in ethtool_stdout.split('\n'):
                        if search('Supported link modes:', line):
                            in_modes = True
                        if search('Supported pause frame use:', line):
                            in_modes = False
                            break
                        if in_modes:
                            speed = int(findall(r'\d+', line.split()[-1])[0])
                            supported_link_speeds.add(speed)
            else:
                # Get ethtool supported speeds for interface
                supported_link_speeds = set()
                _, ethtool_stdout, _ = common.run_os_command(f"ethtool {dev}")
                in_modes = False
                for line in ethtool_stdout.split('\n'):
                    if search('Supported link modes:', line):
                        in_modes = True
                    if search('Supported pause frame use:', line):
                        in_modes = False
                        break
                    if in_modes:
                        speed = int(line.split()[-1].replace('baseT', '').split('/')[0])
                        supported_link_speeds.add(speed)

            max_supported_link_speed = sorted(list(supported_link_speeds))[-1]

            # Ensure interface is running at its maximum speed
            with open(f"/sys/class/net/{dev}/speed") as devfh:
                dev_speed = int(devfh.read())
            if dev_speed < max_supported_link_speed:
                messages.append(f"{dev} DEGRADED at {dev_speed} Mbps")
                health_delta += 10
            else:
                messages.append(f"{dev} OK at {dev_speed} Mbps")

        # Set the health delta in our local PluginResult object
        self.plugin_result.set_health_delta(health_delta)

        # Set the message in our local PluginResult object
        self.plugin_result.set_message(', '.join(messages))

        # Return our local PluginResult object
        return self.plugin_result

    def cleanup(self):
        """
        cleanup(): Perform special cleanup steps during node daemon termination

        This step is optional and should be used sparingly.
        """

        pass