Skip to content

Commit

Permalink
spare disks in shared mode, optional aggregte spare-count spares clus…
Browse files Browse the repository at this point in the history
…ter- or node wide, fix filter to query only disks from home-node

* add handling of spare disks in "shared" mode (partitioned or formated disks)
  these are logically not assigned to a specific nodes spare disk capacity so are shown at a _SHARED_ node and not shown in single-node mode
* add a suboption "aggregate" that allows to sum up/aggregate all disks in the cluster (if run against a specific "node" the "shared" mode disks are accounted for the node)
* fix query-building to find only disks for the specific home-node (the error was not visible before because it was post-filtered in result function)
* fix broken branching conditions within these function (see also district09#86)
  • Loading branch information
Elias481 committed Sep 24, 2020
1 parent 849fc7d commit f0f011c
Showing 1 changed file with 29 additions and 10 deletions.
39 changes: 29 additions & 10 deletions check_netapp_ontap.pl
Original file line number Diff line number Diff line change
Expand Up @@ -174,19 +174,26 @@ sub calc_disk_health {
##############################################

sub get_spare_info {
my ($nahStorage, $strVHost, $strWarning, $strCritical) = @_;
my ($nahStorage, $strVHost, $strWarning, $strCritical, $strSuboption) = @_;
my $nahSpareIterator = NaElement->new("storage-disk-get-iter");
my $nahQuery = NaElement->new("query");
my $nahSpareInfo = NaElement->new("storage-disk-info");
my $nahSpareOwnerInfo = NaElement->new("disk-ownership-info");
my $strActiveTag = "";
my %hshSpareInfo;
my $aggregateNode = undef;

if (defined($strSuboption)) {
my @arySuboption = split(",",$strSuboption);
if ("aggregate" ~~ @arySuboption) { $aggregateNode = '_ALL_'; }
}

if (defined($strVHost)) {
$nahSpareIterator->child_add($nahQuery);
$nahQuery->child_add($nahSpareInfo);
$nahSpareInfo->child_add($nahSpareOwnerInfo);
$nahSpareOwnerInfo->child_add_string("home-node", $strVHost);
$nahSpareOwnerInfo->child_add_string("home-node-name", $strVHost);
if (defined($aggregateNode)) { $aggregateNode = $strVHost; }
}

while(defined($strActiveTag)) {
Expand Down Expand Up @@ -218,8 +225,15 @@ sub get_spare_info {
my $spareInfo = $raidInfo->child_get("disk-spare-info");
my $zeroed = $spareInfo->child_get_string('is-zeroed');

$hshSpareInfo{$nodeName}{$strSpareName}{'status'} = $containertype;
$hshSpareInfo{$nodeName}{$strSpareName}{'zeroed'} = $zeroed;
$hshSpareInfo{$aggregateNode||$nodeName}{$strSpareName}{'status'} = $containertype;
$hshSpareInfo{$aggregateNode||$nodeName}{$strSpareName}{'zeroed'} = $zeroed;
} elsif ($containertype eq "shared") {
my $sharedInfo = $raidInfo->child_get("disk-shared-info");
my $aggregateList = $sharedInfo->child_get("aggregate-list");
next SPARE if (defined($aggregateList));

$hshSpareInfo{$aggregateNode||'_SHARED_'}{$strSpareName}{'status'} = $containertype;
$hshSpareInfo{$aggregateNode||'_SHARED_'}{$strSpareName}{'zeroed'} = undef;
} else {
next SPARE;
}
Expand All @@ -242,7 +256,7 @@ sub calc_spare_health {
next NODE;
}

my ($spareCount, $unassignedCount, $unknownCount, $notZeroedCount) = (0, 0, 0, 0);
my ($spareCount, $unassigneSpareCount, $sharedSpareCount, $unassignedCount, $unknownCount, $notZeroedCount) = (0, 0, 0, 0, 0, 0);
my $strNewMessage;

foreach my $strSpare (keys %{$hrefSpareInfo->{$node}}) {
Expand All @@ -253,14 +267,17 @@ sub calc_spare_health {
$notZeroedCount++;
}
my $status = $hrefSpareInfo->{$node}->{$strSpare}->{'status'};
if (defined($status && $status eq "spare")) {
$spareCount++;
} elsif (defined($status && $status eq "unassigned")) {
if (defined($status) && $status eq "spare") {
$unassigneSpareCount++;
} elsif (defined($status) && $status eq "unassigned") {
$unassignedCount++;
} elsif (defined($status) && $status eq "shared") {
$sharedSpareCount++;
} else {
$unknownCount++;
}
}
$spareCount = $unassigneSpareCount + $sharedSpareCount;

if ($spareCount < $strCritical) {
$critStatus++;
Expand All @@ -272,7 +289,7 @@ sub calc_spare_health {
$unknownStatus++;
}

$strNewMessage = sprintf("%s: %d spare disks (%s not zeroed) and %s unassigned", $node, $spareCount, $notZeroedCount, $unassignedCount);
$strNewMessage = sprintf("%s: %d spare (%d shared, %s not zeroed) and %s unassigned", $node, $spareCount, $sharedSpareCount, $notZeroedCount, $unassignedCount);
$strOutput = get_nagios_description($strOutput, $strNewMessage);
}

Expand Down Expand Up @@ -1968,6 +1985,8 @@ sub help {
desc: Check the number of spare disks
thresh: Warning / critical required spare disks. Default thresholds are 2 / 1.
node: The node option restricts this check by cluster-node name.
(The spare disks already assigned to shared pools do not belong to a specific nodes aggregate, thus are not shown then.)
suboption: aggregate (sums up all spare disks in a cluster or on a node, includes also the shared disks on their home-node)
* For keyword thresholds, if you want to ignore alerts for that particular keyword you set it at the same threshold that the alert defaults to.
Expand Down Expand Up @@ -2362,7 +2381,7 @@ sub filter_object {
$strWarning //= 2;
$strCritical //= 1;

my $hrefSpareInfo = get_spare_info($nahStorage, $strVHost, $strWarning, $strCritical);
my $hrefSpareInfo = get_spare_info($nahStorage, $strVHost, $strWarning, $strCritical, $strSuboption);

if (defined($strModifier)) {
$hrefSpareInfo = filter_object($hrefSpareInfo, $strModifier);
Expand Down

0 comments on commit f0f011c

Please sign in to comment.