monitoring-plugins-qlogic_s.../check_qlogic_sanbox

503 lines
15 KiB
Perl

#!/usr/bin/perl -w
# nagios: -epn
#
# Copyright (c) 2012, SUSE Linux Products GmbH
# Author: Lars Vogdt <lrupp@suse.de>
#
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of the Novell nor the names of its contributors may be
# used to endorse or promote products derived from this software without
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
use strict;
use Net::SNMP;
use Getopt::Long;
use Net::Ping;
use Pod::Usage;
use bignum qw/hex/;
Getopt::Long::Configure('bundling');
# cleanup the environment
$ENV{'PATH'} = '/bin:/usr/bin:/sbin:/usr/sbin:';
$ENV{'BASH_ENV'} = '';
$ENV{'ENV'} = '';
our $conf = {
'VERSION' => '1.4',
'PROGNAME' => 'check_qlogic_sanbox',
'timeout' => '60',
'critical' => '45',
'warning' => '35',
'sensor_4_warn' => '70',
'sensor_4_crit' => '75',
'debug' => 0,
'hostname' => 'localhost',
'community' => 'public',
'snmp_version' => '2c',
'snmp_port' => '161',
'maxmsgsize' => '65535',
};
our $print_version = 0;
our $print_help = 0;
our $exitcode = 0;
our $output = '';
# The Name_Identifier of the Fabric Element
our $fcFeElementName = '.1.3.6.1.2.1.75.1.1.2.0';
# The basic information about the Fabric Element
our $base_table_query = '.1.3.6.1.2.1.1';
our %ERRORS = (
'OK' => 0,
'WARNING' => 1,
'CRITICAL' => 2,
'UNKNOWN' => 3,
'DEPENDENT' => 4
);
our %BASE_OIDs = (
'sysDescr' => '.1.3.6.1.2.1.1.1.0',
'sysUpTime' => '.1.3.6.1.2.1.1.3.0',
'sysName' => '.1.3.6.1.2.1.1.5.0',
'sysContact' => '.1.3.6.1.2.1.1.4.0',
'sysLocation' => '.1.3.6.1.2.1.1.6.0',
);
#######################################################################
# Functions
#######################################################################
sub snmp_get_table($$$){
my ($conf,$session,$query) = @_;
my ($answer,$response);
if ( ! defined ( $response = $session->get_table($query))){
$answer=$session->error;
$session->close;
print "CRITICAL: no response with SNMP version $conf->{'snmp_version'} for get_table on $query (error $answer)\n";
exit $ERRORS{'CRITICAL'};
}
return $response;
}
sub snmp_get_request($$$){
my ($conf,$session,$query) = @_;
my ($answer,$response);
if ( ! defined ( $response = $session->get_request($query))){
$answer=$session->error;
$session->close;
print "CRITICAL: no response with SNMP version $conf->{'snmp_version'} for get_request on $query (error $answer)\n";
exit $ERRORS{'CRITICAL'};
}
return($response->{$query});
}
sub DEBUG($) {
my ($output) = @_;
print "DEBUG: $output\n" if ( $conf->{'debug'} );
}
sub print_myrevision ($$) {
my ($commandName,$pluginRevision) = @_;
print "$commandName v$pluginRevision\n";
}
#######################################################################
# Main
#######################################################################
Getopt::Long::Configure('bundling');
GetOptions(
"H=s" => \$conf->{'hostname'},
"hostname=s" => \$conf->{'hostname'},
"f=s" => \$conf->{'filename'},
"filename=s" => \$conf->{'filename'},
"v" => \$print_version,
"version" => \$print_version,
"h" => \$print_help,
"help" => \$print_help,
"d" => \$conf->{'debug'},
"debug" => \$conf->{'debug'},
"w=f" => \$conf->{'warning'},
"warning=f" => \$conf->{'warning'},
"c=f" => \$conf->{'critical'},
"critical=f" => \$conf->{'critical'},
"sensor4_warn=f"=> \$conf->{'sensor_4_warn'},
"sensor4_crit=f"=> \$conf->{'sensor_4_crit'},
"t=i" => \$conf->{'timeout'},
"timeout=i" => \$conf->{'timeout'},
"C=s" => \$conf->{'community'},
"community=s" => \$conf->{'community'},
"p=i" => \$conf->{'snmp_port'},
"port=i" => \$conf->{'snmp_port'},
"V=s" => \$conf->{'snmp_version'},
"snmp-version=s"=> \$conf->{'snmp_version'},
) or pod2usage(2);
pod2usage(
-exitstatus => 0,
-verbose => 2, # 2 to print full pod
) if $print_help;
# Just in case of problems, let's not hang Nagios
$SIG{'ALRM'} = sub {
print "UNKNOWN: No snmp response from ".$conf->{'hostname'}." (alarm timeout)\n";
exit $ERRORS{'UNKNOWN'};
};
alarm( $conf->{'timeout'} );
if ($print_version) {
print_myrevision( $conf->{'PROGNAME'}, $conf->{'VERSION'} );
exit $ERRORS{'OK'};
}
#
# Check the given options...
#
if (defined($conf->{'filename'})){
use Config::IniFiles;
my $ini = new Config::IniFiles( -file => "$conf->{'filename'}");
if( ! $ini ){
print "ERROR: Could not open $conf->{'filename'} : $!\n";
exit $ERRORS{'UNKNOWN'};
}
if (! $ini->SectionExists($conf->{'hostname'})){
print "ERROR: Could not find section [$conf->{'hostname'}] in $conf->{'filename'}\n";
exit $ERRORS{'UNKNOWN'};
}
$conf->{'community'} = $ini->val($conf->{'hostname'}, 'community') if (defined($ini->val($conf->{'hostname'},'community')));
$conf->{'snmp_version'} = $ini->val($conf->{'hostname'}, 'snmp_version') if (defined($ini->val($conf->{'hostname'},'snmp_version')));
$conf->{'snmp_port'} = $ini->val($conf->{'hostname'}, 'snmp_port') if (defined($ini->val($conf->{'hostname'},'snmp_port')));
$conf->{'warning'} = $ini->val($conf->{'hostname'}, 'warning') if (defined($ini->val($conf->{'hostname'},'warning')));
$conf->{'critical'} = $ini->val($conf->{'hostname'}, 'critical') if (defined($ini->val($conf->{'hostname'},'critical')));
$conf->{'sensor_4_warn'}= $ini->val($conf->{'hostname'}, 'sensor4_warn') if (defined($ini->val($conf->{'hostname'},'sensor4_warn')));
$conf->{'sensor_4_crit'}= $ini->val($conf->{'hostname'}, 'sensor4_crit') if (defined($ini->val($conf->{'hostname'},'sensor4_crit')));
}
if ( !defined($conf->{'hostname'}) ) {
print "ERROR - no hostname given\n";
pod2usage(2);
alarm(0);
exit $ERRORS{'UNKNOWN'};
}
if ($conf->{'warning'} gt $conf->{'critical'}){
print "ERROR: warning level ($conf->{'warning'}) should not be greater than critical level ($conf->{'critical'})\n";
pod2usage(2);
alarm(0);
exit $ERRORS{'UNKNOWN'};
}
if ($conf->{'sensor_4_warn'} gt $conf->{'sensor_4_crit'}){
print "ERROR: sensor 4 warning level ($conf->{'sensor_4_warn'}) should not be greater than sensor 4 critical level ($conf->{'sensor_4_crit'})\n";
pod2usage(2);
alarm(0);
exit $ERRORS{'UNKNOWN'};
}
if ($conf->{'debug'}){
use Data::Dumper;
print STDERR "Internal configuration:\n".Data::Dumper->Dump([$conf]);
}
#
# Basic defines / checks
#
my $ret_str='UNKNOWN';
my $error='UNKNOWN';
my $session;
# First try to ping the Sanbox, this gives faster results
my $p = Net::Ping->new();
if (! $p->ping($conf->{'hostname'}) ){
print "CRITICAL: could not ping your Sanbox ($conf->{'hostname'})\n";
exit $ERRORS{'CRITICAL'};
}
$p->close();
#
# Prepare SNMP Session
#
($session, $error) = Net::SNMP->session(
-hostname => $conf->{'hostname'},
-community => $conf->{'community'},
-port => $conf->{'snmp_port'},
-version => $conf->{'snmp_version'},
-maxmsgsize => $conf->{'maxmsgsize'},
-timeout => $conf->{'timeout'},
);
if (!defined($session)){
print "ERROR: $error";
exit $ERRORS{'UNKNOWN'};
}
#
# Query SNMP informations from the single switch
#
# first get the WWPN of the SANBox and convert it into octal number
my $WWPN=snmp_get_request($conf,$session,$fcFeElementName);
my @array = unpack ("C*", pack ("H*", $WWPN));
my $sanbox_id=join(".",splice(@array,-4,4));
my $snmp_status_query=".1.3.6.1.3.94.1.8.1.6.16.0.0.192.$sanbox_id.0.0.0.0.0.0.0.0";
# The installed Firmware of the Fabric Element
our $firmware_oid = ".1.3.6.1.3.94.1.7.1.3.16.0.0.192.$sanbox_id.0.0.0.0.0.0.0.0.1";
# Now get some basic parameters
my $baseinfo=snmp_get_table($conf,$session,$base_table_query);
# and also the currently running Firmware
my $firmware=snmp_get_request($conf,$session,$firmware_oid);
# Get the status output from the switch
my $status=snmp_get_table($conf,$session,$snmp_status_query);
# Get the names for the status output
my $snmp_status_names='.1.3.6.1.3.94.1.8.1.3.16.0.0.192.'.$sanbox_id.'.0.0.0.0.0.0.0.0';
my $status_names_ref=snmp_get_table($conf,$session,$snmp_status_names);
my %status_names=%$status_names_ref;
#
# Close the session and reset the alarm
#
$session->close;
alarm(0);
if ($conf->{'debug'}){
print STDERR "\nSanbox ID: $sanbox_id ($WWPN)\n";
print STDERR "Firmware : $firmware\n";
print STDERR "Basic information:\n";
print STDERR Data::Dumper->Dump([$baseinfo]);
print STDERR "\nQuerying : $snmp_status_query";
print STDERR "\nStatus Names Table:\n";
print STDERR Data::Dumper->Dump([$status_names_ref]);
print STDERR "\nStatus Table:\n";
print STDERR Data::Dumper->Dump([$status]);
}
#
# Analyze the tables...
#
my $perfdata='';
foreach my $value (sort(keys(%status_names))){
my ($is_temp, $is_temp_status, $is_status)=0;
my ($sensor_number,$sensor_name);
$output.="$status_names{$value}: ";
if ( $status_names{$value} =~ /.*Temperature Sensor (.*) Value/ ){
$perfdata.="'Temp ".$1."'=";
$sensor_number=$1;
$is_temp=1;
}
elsif ( $status_names{$value} =~ /Temperature Status/ ){
$is_temp_status=1;
$sensor_name=$status_names{$value};
}
elsif ( $status_names{$value} =~ /Power supply (.*) Status/ ){
$is_status=1;
$sensor_number=$1;
$sensor_name=$status_names{$value};
}
elsif ( $status_names{$value} =~ /Fan (.*) Status/ ){
$is_status=1;
$sensor_number=$1;
$sensor_name=$status_names{$value};
}
# convert the OID for the name to match the OID for the value:
$value =~ s/.1.3.6.1.3.94.1.8.1.3.16.0.0.192/.1.3.6.1.3.94.1.8.1.6.16.0.0.192/;
$output.="$status->{$value}; ";
# now check the results depending on the sensors:
if ($is_temp){
my $temp="$status->{$value}";
$temp=~ s/\D//g;
my $conf_warn = $conf->{'warning'};
my $conf_crit = $conf->{'critical'};
if ($sensor_number == 4) {
$conf_warn = $conf->{'sensor_4_warn'};
$conf_crit = $conf->{'sensor_4_crit'};
}
$perfdata.="$temp;$conf_warn;$conf_crit;; ";
if ($temp gt $conf_crit){
$exitcode=$ERRORS{'CRITICAL'};
$output="CRITICAL: temperatur $status->{$value} on sensor $sensor_number exceeds critical level ($conf_crit); ".$output;
}
elsif ($temp gt $conf_warn){
$exitcode=$ERRORS{'WARNING'};
$output="WARNING: temperatur $status->{$value} on sensor $sensor_number exceeds warning level ($conf_warn); ".$output;
}
}
if ($is_temp_status){
if ($status->{$value} !~ /Normal/){
$exitcode=$ERRORS{'CRITICAL'};
$output="CRITICAL: $sensor_name is $status->{$value}; ".$output;
}
}
if ($is_status){
if ($status->{$value} !~ /Good/){
$exitcode=$ERRORS{'CRITICAL'};
$output="CRITICAL: $sensor_name is $status->{$value}; ".$output;
}
}
}
foreach my $value (sort(keys(%BASE_OIDs))){
$output.="$value: $baseinfo->{$BASE_OIDs{$value}}; ";
}
$output.="Firmware: $firmware; ";
#
# Finished : output what we have so far...
#
print "$output | $perfdata\n";
exit $exitcode;
__END__
=head1 NAME
check_qlogic_sanbox - check health of QLogic SANbox FC switch
=head1 SYNOPSIS
check_qlogic_sanbox -H $HOSTNAME$ [ options ]
Options:
-H <HOSTNAME> | --hostname <HOSTNAME>
-w <int> | --warning <int>
-c <int> | --critical <int>
-t <int> | --timeout <int>
-C <string> | --community <string>
-V <string> | --snmp-version <string>
-p <int> | --port <int>
-f <file> | --filename <file>
--sensor4_warn <int>
--sensor4_crit <int>
-h | --help
-d | --debug
=head1 OPTIONS
=over 8
=item B<--hostname> I<hostname>
The hostname/IP of the SANbox. Default: localhost (which makes no sense).
=item B<--critical> I<int>
Critical temperature level. Default: 45 degrees Celsius.
=item B<--warning> I<int>
Warning temperature level. Default: 35 C
=item B<--timeout> I<int>
Timeout until the plugin times out with result UNKNOWN.
=item B<--community> I<string>
SNMP community string to use. Default: public.
=item B<--port> I<int>
SNMP port to use. Default: 161.
=item B<--snmp-version> I<string>
SNMP version to use. Default: 2c.
=item B<--filename> F<path_to_file>
You can put the configuration in a file with ini-file syntax (using the hostname as section separator) using the long format of the available options.
Example:
[192.168.0.1]
warning=37
critical=39
community=public
sensor4_warn=50
sensor4_crit=55
[192.168.0.2]
warning=40
critical=45
community=secure
sensor4_warn=60
sensor4_crit=65
The script should be called with the hostname/IP and the filename option like:
C<$USER1$/check_qlogic_sanbox -H 192.168.0.1 --filename /etc/nagios-plugins/sanboxes.ini>
=item B<--sensor4_warn> I<int>
Warning temperature level for the CPU sensor (Sensor 4). Default: 70 C
=item B<--sensor4_crit> I<int>
Critical temperature level for the CPU sensor (Sensor 4). Default: 75 C
=item B<--help>
Produces this output.
=item B<--debug>
Print debug output on STDERR.
=back
=head1 DESCRIPTION
check_qlogic_sanbox is a Nagios plugin which checks QLogic SANbox Fibre Channel
switches via SNMP.
This plugin has been tested with the following QLogic switches:
=over 4
=item SANbox 5200 FC Switch
=item SANbox 5202 FC Switch
=item SANbox 5600 FC Switch
=item SANbox 5602 FC Switch
=item SANbox 5800 FC Switch
=back
=head1 AUTHORS
Written by Lars Vogdt <lrupp@suse.de>
=head1 SUPPORT
Please use https://bugzilla.opensuse.org to submit patches or suggest improvements.
Include version information with all correspondence (when possible use output from
the --version option of the plugin itself).