From cd21084e3c409a3c2416b0cfab4563380e6dbf62 Mon Sep 17 00:00:00 2001 From: Elias Ohm Date: Mon, 20 Jul 2020 02:39:00 +0200 Subject: [PATCH] spare disks in shared mode, optional aggregte spare-count spares cluster- or node wide, fix filter to query only disks from home-node * add handling of spare disks in "shared" mode (partitioned or formated disks) these are logically not assigned to a specific nodes spare disk capacity so are shown at a _SHARED_ node and not shown in single-node mode * add a suboption "aggregate" that allows to sum up/aggregate all disks in the cluster (if run against a specific "node" the "shared" mode disks are accounted for the node) * fix query-building to find only disks for the specific home-node (the error was not visible before because it was post-filtered in result function) * fix broken branching conditions within these function (see also #86) --- check_netapp_ontap.pl | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/check_netapp_ontap.pl b/check_netapp_ontap.pl index 42defd6..ec05249 100644 --- a/check_netapp_ontap.pl +++ b/check_netapp_ontap.pl @@ -174,19 +174,26 @@ sub calc_disk_health { ############################################## sub get_spare_info { - my ($nahStorage, $strVHost, $strWarning, $strCritical) = @_; + my ($nahStorage, $strVHost, $strWarning, $strCritical, $strSuboption) = @_; my $nahSpareIterator = NaElement->new("storage-disk-get-iter"); my $nahQuery = NaElement->new("query"); my $nahSpareInfo = NaElement->new("storage-disk-info"); my $nahSpareOwnerInfo = NaElement->new("disk-ownership-info"); my $strActiveTag = ""; my %hshSpareInfo; + my $aggregateNode = undef; + + if (defined($strSuboption)) { + my @arySuboption = split(",",$strSuboption); + if ("aggregate" ~~ @arySuboption) { $aggregateNode = '_ALL_'; } + } if (defined($strVHost)) { $nahSpareIterator->child_add($nahQuery); $nahQuery->child_add($nahSpareInfo); $nahSpareInfo->child_add($nahSpareOwnerInfo); - $nahSpareOwnerInfo->child_add_string("home-node", $strVHost); + $nahSpareOwnerInfo->child_add_string("home-node-name", $strVHost); + if (defined($aggregateNode)) { $aggregateNode = $strVHost; } } while(defined($strActiveTag)) { @@ -218,8 +225,15 @@ sub get_spare_info { my $spareInfo = $raidInfo->child_get("disk-spare-info"); my $zeroed = $spareInfo->child_get_string('is-zeroed'); - $hshSpareInfo{$nodeName}{$strSpareName}{'status'} = $containertype; - $hshSpareInfo{$nodeName}{$strSpareName}{'zeroed'} = $zeroed; + $hshSpareInfo{$aggregateNode||$nodeName}{$strSpareName}{'status'} = $containertype; + $hshSpareInfo{$aggregateNode||$nodeName}{$strSpareName}{'zeroed'} = $zeroed; + } elsif ($containertype eq "shared") { + my $sharedInfo = $raidInfo->child_get("disk-shared-info"); + my $aggregateList = $sharedInfo->child_get("aggregate-list"); + next SPARE if (defined($aggregateList)); + + $hshSpareInfo{$aggregateNode||'_SHARED_'}{$strSpareName}{'status'} = $containertype; + $hshSpareInfo{$aggregateNode||'_SHARED_'}{$strSpareName}{'zeroed'} = undef; } else { next SPARE; } @@ -242,7 +256,7 @@ sub calc_spare_health { next NODE; } - my ($spareCount, $unassignedCount, $unknownCount, $notZeroedCount) = (0, 0, 0, 0); + my ($spareCount, $unassigneSpareCount, $sharedSpareCount, $unassignedCount, $unknownCount, $notZeroedCount) = (0, 0, 0, 0, 0, 0); my $strNewMessage; foreach my $strSpare (keys %{$hrefSpareInfo->{$node}}) { @@ -253,14 +267,17 @@ sub calc_spare_health { $notZeroedCount++; } my $status = $hrefSpareInfo->{$node}->{$strSpare}->{'status'}; - if (defined($status && $status eq "spare")) { - $spareCount++; - } elsif (defined($status && $status eq "unassigned")) { + if (defined($status) && $status eq "spare") { + $unassigneSpareCount++; + } elsif (defined($status) && $status eq "unassigned") { $unassignedCount++; + } elsif (defined($status) && $status eq "shared") { + $sharedSpareCount++; } else { $unknownCount++; } } + $spareCount = $unassigneSpareCount + $sharedSpareCount; if ($spareCount < $strCritical) { $critStatus++; @@ -272,7 +289,7 @@ sub calc_spare_health { $unknownStatus++; } - $strNewMessage = sprintf("%s: %d spare disks (%s not zeroed) and %s unassigned", $node, $spareCount, $notZeroedCount, $unassignedCount); + $strNewMessage = sprintf("%s: %d spare (%d shared, %s not zeroed) and %s unassigned", $node, $spareCount, $sharedSpareCount, $notZeroedCount, $unassignedCount); $strOutput = get_nagios_description($strOutput, $strNewMessage); } @@ -1968,6 +1985,8 @@ sub help { desc: Check the number of spare disks thresh: Warning / critical required spare disks. Default thresholds are 2 / 1. node: The node option restricts this check by cluster-node name. + (The spare disks already assigned to shared pools do not belong to a specific nodes aggregate, thus are not shown then.) + suboption: aggregate (sums up all spare disks in a cluster or on a node, includes also the shared disks on their home-node) * For keyword thresholds, if you want to ignore alerts for that particular keyword you set it at the same threshold that the alert defaults to. @@ -2362,7 +2381,7 @@ sub filter_object { $strWarning //= 2; $strCritical //= 1; - my $hrefSpareInfo = get_spare_info($nahStorage, $strVHost, $strWarning, $strCritical); + my $hrefSpareInfo = get_spare_info($nahStorage, $strVHost, $strWarning, $strCritical, $strSuboption); if (defined($strModifier)) { $hrefSpareInfo = filter_object($hrefSpareInfo, $strModifier);