#!/usr/bin/perl -w # nagios: -epn # # Copyright (c) 2012, SUSE Linux Products GmbH # Author: Lars Vogdt # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # * Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # * Neither the name of the Novell nor the names of its contributors may be # used to endorse or promote products derived from this software without # specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # use strict; use Net::SNMP; use Getopt::Long; use Net::Ping; use Pod::Usage; use bignum qw/hex/; Getopt::Long::Configure('bundling'); # cleanup the environment $ENV{'PATH'} = '/bin:/usr/bin:/sbin:/usr/sbin:'; $ENV{'BASH_ENV'} = ''; $ENV{'ENV'} = ''; our $conf = { 'VERSION' => '1.4', 'PROGNAME' => 'check_qlogic_sanbox', 'timeout' => '60', 'critical' => '45', 'warning' => '35', 'sensor_4_warn' => '70', 'sensor_4_crit' => '75', 'debug' => 0, 'hostname' => 'localhost', 'community' => 'public', 'snmp_version' => '2c', 'snmp_port' => '161', 'maxmsgsize' => '65535', }; our $print_version = 0; our $print_help = 0; our $exitcode = 0; our $output = ''; # The Name_Identifier of the Fabric Element our $fcFeElementName = '.1.3.6.1.2.1.75.1.1.2.0'; # The basic information about the Fabric Element our $base_table_query = '.1.3.6.1.2.1.1'; our %ERRORS = ( 'OK' => 0, 'WARNING' => 1, 'CRITICAL' => 2, 'UNKNOWN' => 3, 'DEPENDENT' => 4 ); our %BASE_OIDs = ( 'sysDescr' => '.1.3.6.1.2.1.1.1.0', 'sysUpTime' => '.1.3.6.1.2.1.1.3.0', 'sysName' => '.1.3.6.1.2.1.1.5.0', 'sysContact' => '.1.3.6.1.2.1.1.4.0', 'sysLocation' => '.1.3.6.1.2.1.1.6.0', ); ####################################################################### # Functions ####################################################################### sub snmp_get_table($$$){ my ($conf,$session,$query) = @_; my ($answer,$response); if ( ! defined ( $response = $session->get_table($query))){ $answer=$session->error; $session->close; print "CRITICAL: no response with SNMP version $conf->{'snmp_version'} for get_table on $query (error $answer)\n"; exit $ERRORS{'CRITICAL'}; } return $response; } sub snmp_get_request($$$){ my ($conf,$session,$query) = @_; my ($answer,$response); if ( ! defined ( $response = $session->get_request($query))){ $answer=$session->error; $session->close; print "CRITICAL: no response with SNMP version $conf->{'snmp_version'} for get_request on $query (error $answer)\n"; exit $ERRORS{'CRITICAL'}; } return($response->{$query}); } sub DEBUG($) { my ($output) = @_; print "DEBUG: $output\n" if ( $conf->{'debug'} ); } sub print_myrevision ($$) { my ($commandName,$pluginRevision) = @_; print "$commandName v$pluginRevision\n"; } ####################################################################### # Main ####################################################################### Getopt::Long::Configure('bundling'); GetOptions( "H=s" => \$conf->{'hostname'}, "hostname=s" => \$conf->{'hostname'}, "f=s" => \$conf->{'filename'}, "filename=s" => \$conf->{'filename'}, "v" => \$print_version, "version" => \$print_version, "h" => \$print_help, "help" => \$print_help, "d" => \$conf->{'debug'}, "debug" => \$conf->{'debug'}, "w=f" => \$conf->{'warning'}, "warning=f" => \$conf->{'warning'}, "c=f" => \$conf->{'critical'}, "critical=f" => \$conf->{'critical'}, "sensor4_warn=f"=> \$conf->{'sensor_4_warn'}, "sensor4_crit=f"=> \$conf->{'sensor_4_crit'}, "t=i" => \$conf->{'timeout'}, "timeout=i" => \$conf->{'timeout'}, "C=s" => \$conf->{'community'}, "community=s" => \$conf->{'community'}, "p=i" => \$conf->{'snmp_port'}, "port=i" => \$conf->{'snmp_port'}, "V=s" => \$conf->{'snmp_version'}, "snmp-version=s"=> \$conf->{'snmp_version'}, ) or pod2usage(2); pod2usage( -exitstatus => 0, -verbose => 2, # 2 to print full pod ) if $print_help; # Just in case of problems, let's not hang Nagios $SIG{'ALRM'} = sub { print "UNKNOWN: No snmp response from ".$conf->{'hostname'}." (alarm timeout)\n"; exit $ERRORS{'UNKNOWN'}; }; alarm( $conf->{'timeout'} ); if ($print_version) { print_myrevision( $conf->{'PROGNAME'}, $conf->{'VERSION'} ); exit $ERRORS{'OK'}; } # # Check the given options... # if (defined($conf->{'filename'})){ use Config::IniFiles; my $ini = new Config::IniFiles( -file => "$conf->{'filename'}"); if( ! $ini ){ print "ERROR: Could not open $conf->{'filename'} : $!\n"; exit $ERRORS{'UNKNOWN'}; } if (! $ini->SectionExists($conf->{'hostname'})){ print "ERROR: Could not find section [$conf->{'hostname'}] in $conf->{'filename'}\n"; exit $ERRORS{'UNKNOWN'}; } $conf->{'community'} = $ini->val($conf->{'hostname'}, 'community') if (defined($ini->val($conf->{'hostname'},'community'))); $conf->{'snmp_version'} = $ini->val($conf->{'hostname'}, 'snmp_version') if (defined($ini->val($conf->{'hostname'},'snmp_version'))); $conf->{'snmp_port'} = $ini->val($conf->{'hostname'}, 'snmp_port') if (defined($ini->val($conf->{'hostname'},'snmp_port'))); $conf->{'warning'} = $ini->val($conf->{'hostname'}, 'warning') if (defined($ini->val($conf->{'hostname'},'warning'))); $conf->{'critical'} = $ini->val($conf->{'hostname'}, 'critical') if (defined($ini->val($conf->{'hostname'},'critical'))); $conf->{'sensor_4_warn'}= $ini->val($conf->{'hostname'}, 'sensor4_warn') if (defined($ini->val($conf->{'hostname'},'sensor4_warn'))); $conf->{'sensor_4_crit'}= $ini->val($conf->{'hostname'}, 'sensor4_crit') if (defined($ini->val($conf->{'hostname'},'sensor4_crit'))); } if ( !defined($conf->{'hostname'}) ) { print "ERROR - no hostname given\n"; pod2usage(2); alarm(0); exit $ERRORS{'UNKNOWN'}; } if ($conf->{'warning'} gt $conf->{'critical'}){ print "ERROR: warning level ($conf->{'warning'}) should not be greater than critical level ($conf->{'critical'})\n"; pod2usage(2); alarm(0); exit $ERRORS{'UNKNOWN'}; } if ($conf->{'sensor_4_warn'} gt $conf->{'sensor_4_crit'}){ print "ERROR: sensor 4 warning level ($conf->{'sensor_4_warn'}) should not be greater than sensor 4 critical level ($conf->{'sensor_4_crit'})\n"; pod2usage(2); alarm(0); exit $ERRORS{'UNKNOWN'}; } if ($conf->{'debug'}){ use Data::Dumper; print STDERR "Internal configuration:\n".Data::Dumper->Dump([$conf]); } # # Basic defines / checks # my $ret_str='UNKNOWN'; my $error='UNKNOWN'; my $session; # First try to ping the Sanbox, this gives faster results my $p = Net::Ping->new(); if (! $p->ping($conf->{'hostname'}) ){ print "CRITICAL: could not ping your Sanbox ($conf->{'hostname'})\n"; exit $ERRORS{'CRITICAL'}; } $p->close(); # # Prepare SNMP Session # ($session, $error) = Net::SNMP->session( -hostname => $conf->{'hostname'}, -community => $conf->{'community'}, -port => $conf->{'snmp_port'}, -version => $conf->{'snmp_version'}, -maxmsgsize => $conf->{'maxmsgsize'}, -timeout => $conf->{'timeout'}, ); if (!defined($session)){ print "ERROR: $error"; exit $ERRORS{'UNKNOWN'}; } # # Query SNMP informations from the single switch # # first get the WWPN of the SANBox and convert it into octal number my $WWPN=snmp_get_request($conf,$session,$fcFeElementName); my @array = unpack ("C*", pack ("H*", $WWPN)); my $sanbox_id=join(".",splice(@array,-4,4)); my $snmp_status_query=".1.3.6.1.3.94.1.8.1.6.16.0.0.192.$sanbox_id.0.0.0.0.0.0.0.0"; # The installed Firmware of the Fabric Element our $firmware_oid = ".1.3.6.1.3.94.1.7.1.3.16.0.0.192.$sanbox_id.0.0.0.0.0.0.0.0.1"; # Now get some basic parameters my $baseinfo=snmp_get_table($conf,$session,$base_table_query); # and also the currently running Firmware my $firmware=snmp_get_request($conf,$session,$firmware_oid); # Get the status output from the switch my $status=snmp_get_table($conf,$session,$snmp_status_query); # Get the names for the status output my $snmp_status_names='.1.3.6.1.3.94.1.8.1.3.16.0.0.192.'.$sanbox_id.'.0.0.0.0.0.0.0.0'; my $status_names_ref=snmp_get_table($conf,$session,$snmp_status_names); my %status_names=%$status_names_ref; # # Close the session and reset the alarm # $session->close; alarm(0); if ($conf->{'debug'}){ print STDERR "\nSanbox ID: $sanbox_id ($WWPN)\n"; print STDERR "Firmware : $firmware\n"; print STDERR "Basic information:\n"; print STDERR Data::Dumper->Dump([$baseinfo]); print STDERR "\nQuerying : $snmp_status_query"; print STDERR "\nStatus Names Table:\n"; print STDERR Data::Dumper->Dump([$status_names_ref]); print STDERR "\nStatus Table:\n"; print STDERR Data::Dumper->Dump([$status]); } # # Analyze the tables... # my $perfdata=''; foreach my $value (sort(keys(%status_names))){ my ($is_temp, $is_temp_status, $is_status)=0; my ($sensor_number,$sensor_name); $output.="$status_names{$value}: "; if ( $status_names{$value} =~ /.*Temperature Sensor (.*) Value/ ){ $perfdata.="'Temp ".$1."'="; $sensor_number=$1; $is_temp=1; } elsif ( $status_names{$value} =~ /Temperature Status/ ){ $is_temp_status=1; $sensor_name=$status_names{$value}; } elsif ( $status_names{$value} =~ /Power supply (.*) Status/ ){ $is_status=1; $sensor_number=$1; $sensor_name=$status_names{$value}; } elsif ( $status_names{$value} =~ /Fan (.*) Status/ ){ $is_status=1; $sensor_number=$1; $sensor_name=$status_names{$value}; } # convert the OID for the name to match the OID for the value: $value =~ s/.1.3.6.1.3.94.1.8.1.3.16.0.0.192/.1.3.6.1.3.94.1.8.1.6.16.0.0.192/; $output.="$status->{$value}; "; # now check the results depending on the sensors: if ($is_temp){ my $temp="$status->{$value}"; $temp=~ s/\D//g; my $conf_warn = $conf->{'warning'}; my $conf_crit = $conf->{'critical'}; if ($sensor_number == 4) { $conf_warn = $conf->{'sensor_4_warn'}; $conf_crit = $conf->{'sensor_4_crit'}; } $perfdata.="$temp;$conf_warn;$conf_crit;; "; if ($temp gt $conf_crit){ $exitcode=$ERRORS{'CRITICAL'}; $output="CRITICAL: temperatur $status->{$value} on sensor $sensor_number exceeds critical level ($conf_crit); ".$output; } elsif ($temp gt $conf_warn){ $exitcode=$ERRORS{'WARNING'}; $output="WARNING: temperatur $status->{$value} on sensor $sensor_number exceeds warning level ($conf_warn); ".$output; } } if ($is_temp_status){ if ($status->{$value} !~ /Normal/){ $exitcode=$ERRORS{'CRITICAL'}; $output="CRITICAL: $sensor_name is $status->{$value}; ".$output; } } if ($is_status){ if ($status->{$value} !~ /Good/){ $exitcode=$ERRORS{'CRITICAL'}; $output="CRITICAL: $sensor_name is $status->{$value}; ".$output; } } } foreach my $value (sort(keys(%BASE_OIDs))){ $output.="$value: $baseinfo->{$BASE_OIDs{$value}}; "; } $output.="Firmware: $firmware; "; # # Finished : output what we have so far... # print "$output | $perfdata\n"; exit $exitcode; __END__ =head1 NAME check_qlogic_sanbox - check health of QLogic SANbox FC switch =head1 SYNOPSIS check_qlogic_sanbox -H $HOSTNAME$ [ options ] Options: -H | --hostname -w | --warning -c | --critical -t | --timeout -C | --community -V | --snmp-version -p | --port -f | --filename --sensor4_warn --sensor4_crit -h | --help -d | --debug =head1 OPTIONS =over 8 =item B<--hostname> I The hostname/IP of the SANbox. Default: localhost (which makes no sense). =item B<--critical> I Critical temperature level. Default: 45 degrees Celsius. =item B<--warning> I Warning temperature level. Default: 35 C =item B<--timeout> I Timeout until the plugin times out with result UNKNOWN. =item B<--community> I SNMP community string to use. Default: public. =item B<--port> I SNMP port to use. Default: 161. =item B<--snmp-version> I SNMP version to use. Default: 2c. =item B<--filename> F You can put the configuration in a file with ini-file syntax (using the hostname as section separator) using the long format of the available options. Example: [192.168.0.1] warning=37 critical=39 community=public sensor4_warn=50 sensor4_crit=55 [192.168.0.2] warning=40 critical=45 community=secure sensor4_warn=60 sensor4_crit=65 The script should be called with the hostname/IP and the filename option like: C<$USER1$/check_qlogic_sanbox -H 192.168.0.1 --filename /etc/nagios-plugins/sanboxes.ini> =item B<--sensor4_warn> I Warning temperature level for the CPU sensor (Sensor 4). Default: 70 C =item B<--sensor4_crit> I Critical temperature level for the CPU sensor (Sensor 4). Default: 75 C =item B<--help> Produces this output. =item B<--debug> Print debug output on STDERR. =back =head1 DESCRIPTION check_qlogic_sanbox is a Nagios plugin which checks QLogic SANbox Fibre Channel switches via SNMP. This plugin has been tested with the following QLogic switches: =over 4 =item SANbox 5200 FC Switch =item SANbox 5202 FC Switch =item SANbox 5600 FC Switch =item SANbox 5602 FC Switch =item SANbox 5800 FC Switch =back =head1 AUTHORS Written by Lars Vogdt =head1 SUPPORT Please use https://bugzilla.opensuse.org to submit patches or suggest improvements. Include version information with all correspondence (when possible use output from the --version option of the plugin itself).