177 lines
		
	
	
		
			4.7 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			177 lines
		
	
	
		
			4.7 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
#!/bin/bash
 | 
						|
# -*- sh -*-
 | 
						|
 | 
						|
: << =cut
 | 
						|
 | 
						|
=head1 NAME
 | 
						|
 | 
						|
pvc - Plugin to monitor a PVC cluster.
 | 
						|
 | 
						|
=head1 CONFIGURATION
 | 
						|
 | 
						|
Note that due to how Munin thresholds work, these values must always be slightly less than 1 or 2 respectively,
 | 
						|
or the alerts will never be triggered.
 | 
						|
 | 
						|
Defaults (no config required):
 | 
						|
 | 
						|
[pvc]
 | 
						|
env.warning 1.99
 | 
						|
env.critical 1.99
 | 
						|
 | 
						|
Make degraded cluster WARN only (max value is 2, so 3 effectively disables):
 | 
						|
 | 
						|
[pvc]
 | 
						|
env.pvc_cluster_warning 1.99
 | 
						|
env.pvc_cluster_critical 3
 | 
						|
 | 
						|
=head1 AUTHOR
 | 
						|
 | 
						|
Joshua Boniface <joshua@boniface.me>
 | 
						|
 | 
						|
=head1 LICENSE
 | 
						|
 | 
						|
GPLv3
 | 
						|
 | 
						|
=head1 BUGS
 | 
						|
 | 
						|
=back
 | 
						|
 | 
						|
=head1 MAGIC MARKERS
 | 
						|
 | 
						|
 #%# family=auto
 | 
						|
 #%# capabilities=autoconf
 | 
						|
 | 
						|
=cut
 | 
						|
 | 
						|
. "$MUNIN_LIBDIR/plugins/plugin.sh"
 | 
						|
 | 
						|
warning=1.99
 | 
						|
critical=1.99
 | 
						|
 | 
						|
export PVC_CLIENT_DIR="/run/shm/munin-pvc"
 | 
						|
PVC_CMD="/usr/bin/pvc --quiet --cluster local status --format json-pretty"
 | 
						|
JQ_CMD="/usr/bin/jq"
 | 
						|
 | 
						|
output_usage() {
 | 
						|
    echo "This plugin outputs numerical values based on the health of the PVC cluster."
 | 
						|
    echo
 | 
						|
    echo "There are separate outputs for both the PVC cluster itself as well as the Ceph storage cluster."
 | 
						|
    echo "In normal operation, i.e. when both clusters are in 'Optimal' state, the plugin returns 0 for"
 | 
						|
    echo "each cluster. When the cluster is placed into 'Maintenance' mode,the plugin returns 1 for each"
 | 
						|
    echo "cluster, and goes into WARN state (limit 0.99); this can be adjusted by overriding the WARNING"
 | 
						|
    echo "threshold of the plugin to something other than 0.99 - note that due to Munin's alerting design,"
 | 
						|
    echo "the warning value must always be very slightly below the whole number. When either cluster"
 | 
						|
    echo "element becomes 'Degraded', the plugin returns 2 for the relevant cluster, which is treated as a"
 | 
						|
    echo "critical. Like the WARNING threshold, this can be overridden, and with the same caveat about limit."
 | 
						|
    exit 0
 | 
						|
}
 | 
						|
 | 
						|
output_autoconf() {
 | 
						|
    $PVC_CMD &>/dev/null
 | 
						|
    pvc_ret=$?
 | 
						|
    $JQ_CMD --version &>/dev/null
 | 
						|
    jq_ret=$?
 | 
						|
 | 
						|
    if [[ ${pvc_ret} -eq 0 && ${jq_ret} -eq 0 ]]; then
 | 
						|
        echo "yes"
 | 
						|
    elif [[ ${pvc_ret} -ne 0 ]]; then
 | 
						|
        echo "no (no 'pvc' command found or local cluster not usable)"
 | 
						|
    elif [[ ${jq_ret} -ne 0 ]]; then
 | 
						|
        echo "no (no 'jq' command found)"
 | 
						|
    else
 | 
						|
        echo "no (generic failure)"
 | 
						|
    fi
 | 
						|
}
 | 
						|
 | 
						|
output_config() {
 | 
						|
    echo 'graph_title PVC Clusters'
 | 
						|
    echo 'graph_args --base 1000'
 | 
						|
    echo 'graph_vlabel Count'
 | 
						|
    echo 'graph_category pvc'
 | 
						|
    echo 'graph_period second'
 | 
						|
    echo 'graph_info This graph shows the nodes in the PVC cluster.'
 | 
						|
 | 
						|
    echo 'pvc_cluster.label Cluster Degradation'
 | 
						|
    echo 'pvc_cluster.type GAUGE'
 | 
						|
    echo 'pvc_cluster.max 2'
 | 
						|
    echo 'pvc_cluster.info Whether the PVC cluster is in a degraded state.'
 | 
						|
    print_warning pvc_cluster
 | 
						|
    print_critical pvc_cluster
 | 
						|
 | 
						|
    echo 'pvc_storage.label Storage Degradation'
 | 
						|
    echo 'pvc_storage.type GAUGE'
 | 
						|
    echo 'pvc_storage.max 2'
 | 
						|
    echo 'pvc_storage.info Whether the storage cluster is in a degraded state.'
 | 
						|
    print_warning pvc_storage
 | 
						|
    print_critical pvc_storage
 | 
						|
 | 
						|
    exit 0
 | 
						|
}
 | 
						|
 | 
						|
output_values() {
 | 
						|
    PVC_OUTPUT="$( $PVC_CMD )"
 | 
						|
 | 
						|
    cluster_health="$( $JQ_CMD '.health' <<<"${PVC_OUTPUT}" | tr -d '"' )"
 | 
						|
    cluster_failed_reason="$( $JQ_CMD -r '.health_msg | @csv' <<<"${PVC_OUTPUT}" | tr -d '"' | sed 's/,/, /g' )"
 | 
						|
    case $cluster_health in
 | 
						|
        "Optimal")
 | 
						|
            cluster_value="0"
 | 
						|
            ;;
 | 
						|
        "Maintenance")
 | 
						|
            cluster_value="1"
 | 
						|
            ;;
 | 
						|
        "Degraded")
 | 
						|
            cluster_value="2"
 | 
						|
    esac
 | 
						|
 | 
						|
    storage_health="$( $JQ_CMD '.storage_health' <<<"${PVC_OUTPUT}" | tr -d '"' )"
 | 
						|
    storage_failed_reason="$( $JQ_CMD -r '.storage_health_msg | @csv' <<<"${PVC_OUTPUT}" | tr -d '"' | sed 's/,/, /g' )"
 | 
						|
    case $storage_health in
 | 
						|
        "Optimal")
 | 
						|
            storage_value="0"
 | 
						|
            ;;
 | 
						|
        "Maintenance")
 | 
						|
            storage_value="1"
 | 
						|
            ;;
 | 
						|
        "Degraded")
 | 
						|
            storage_value="2"
 | 
						|
    esac
 | 
						|
 | 
						|
 | 
						|
    echo "pvc_cluster.value $cluster_value"
 | 
						|
    if [[ $cluster_value -eq 1 ]]; then
 | 
						|
        echo "pvc_cluster.extinfo Cluster in maintenance mode"
 | 
						|
    elif [[ $cluster_value -eq 2 ]]; then
 | 
						|
        echo "pvc_cluster.extinfo ${cluster_failed_reason}"
 | 
						|
    fi 
 | 
						|
    echo "pvc_storage.value $storage_value"
 | 
						|
    if [[ $storage_value -eq 1 ]]; then
 | 
						|
        echo "pvc_storage.extinfo Cluster in maintenance mode"
 | 
						|
    elif [[ $storage_value -eq 2 ]]; then
 | 
						|
        echo "pvc_storage.extinfo ${storage_failed_reason}"
 | 
						|
    fi 
 | 
						|
}
 | 
						|
 | 
						|
case $# in
 | 
						|
    0)
 | 
						|
        output_values
 | 
						|
        ;;
 | 
						|
    1)
 | 
						|
        case $1 in
 | 
						|
            autoconf)
 | 
						|
                output_autoconf
 | 
						|
                ;;
 | 
						|
            config)
 | 
						|
                output_config
 | 
						|
                ;;
 | 
						|
            *)
 | 
						|
                output_usage
 | 
						|
                exit 1
 | 
						|
                ;;
 | 
						|
        esac
 | 
						|
        ;;
 | 
						|
    *)
 | 
						|
        output_usage
 | 
						|
        exit 1
 | 
						|
esac
 |