[7.x] [ML] add GET _cat/ml/datafeeds (elastic#51500) (elastic#51829)

* [ML] add GET _cat/ml/datafeeds (elastic#51500) This adds GET _cat/ml/datafeeds && _cat/ml/datafeeds/{datafeed_id} * fixing for java8 compilation
jtibshirani · Feb 3, 2020 · d293980 · d293980
1 parent cff3fdb
commit d293980
Show file tree

Hide file tree

Showing 5 changed files with 335 additions and 2 deletions.
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java
@@ -254,6 +254,7 @@
 import org.elasticsearch.xpack.ml.rest.calendar.RestPostCalendarEventAction;
 import org.elasticsearch.xpack.ml.rest.calendar.RestPutCalendarAction;
 import org.elasticsearch.xpack.ml.rest.calendar.RestPutCalendarJobAction;
+import org.elasticsearch.xpack.ml.rest.cat.RestCatDatafeedsAction;
 import org.elasticsearch.xpack.ml.rest.cat.RestCatJobsAction;
 import org.elasticsearch.xpack.ml.rest.datafeeds.RestDeleteDatafeedAction;
 import org.elasticsearch.xpack.ml.rest.datafeeds.RestGetDatafeedStatsAction;
@@ -784,7 +785,8 @@ public List<RestHandler> getRestHandlers(Settings settings, RestController restC
             new RestGetTrainedModelsStatsAction(restController),
             new RestPutTrainedModelAction(restController),
             // CAT Handlers
-            new RestCatJobsAction(restController)
+            new RestCatJobsAction(restController),
+            new RestCatDatafeedsAction(restController)
         );
     }
 

diff --git a/...k/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatDatafeedsAction.java b/...k/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatDatafeedsAction.java
@@ -0,0 +1,150 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+package org.elasticsearch.xpack.ml.rest.cat;
+
+import org.elasticsearch.client.node.NodeClient;
+import org.elasticsearch.cluster.node.DiscoveryNode;
+import org.elasticsearch.common.Strings;
+import org.elasticsearch.common.Table;
+import org.elasticsearch.common.unit.TimeValue;
+import org.elasticsearch.rest.RestController;
+import org.elasticsearch.rest.RestRequest;
+import org.elasticsearch.rest.RestResponse;
+import org.elasticsearch.rest.action.RestResponseListener;
+import org.elasticsearch.rest.action.cat.AbstractCatAction;
+import org.elasticsearch.rest.action.cat.RestTable;
+import org.elasticsearch.xpack.core.ml.action.GetDatafeedsStatsAction;
+import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig;
+import org.elasticsearch.xpack.core.ml.datafeed.DatafeedTimingStats;
+
+import static org.elasticsearch.rest.RestRequest.Method.GET;
+
+public class RestCatDatafeedsAction extends AbstractCatAction {
+
+    public RestCatDatafeedsAction(RestController controller) {
+        controller.registerHandler(GET, "_cat/ml/datafeeds/{" + DatafeedConfig.ID.getPreferredName() + "}", this);
+        controller.registerHandler(GET, "_cat/ml/datafeeds", this);
+    }
+
+    @Override
+    public String getName() {
+        return "cat_ml_get_datafeeds_action";
+    }
+
+    @Override
+    protected RestChannelConsumer doCatRequest(RestRequest restRequest, NodeClient client) {
+        String datafeedId = restRequest.param(DatafeedConfig.ID.getPreferredName());
+        if (Strings.isNullOrEmpty(datafeedId)) {
+            datafeedId = GetDatafeedsStatsAction.ALL;
+        }
+        GetDatafeedsStatsAction.Request request = new GetDatafeedsStatsAction.Request(datafeedId);
+        request.setAllowNoDatafeeds(restRequest.paramAsBoolean(GetDatafeedsStatsAction.Request.ALLOW_NO_DATAFEEDS.getPreferredName(),
+            request.allowNoDatafeeds()));
+        return channel -> client.execute(GetDatafeedsStatsAction.INSTANCE,
+            request,
+            new RestResponseListener<GetDatafeedsStatsAction.Response>(channel) {
+            @Override
+            public RestResponse buildResponse(GetDatafeedsStatsAction.Response getDatafeedsStatsRespons) throws Exception {
+                return RestTable.buildResponse(buildTable(restRequest, getDatafeedsStatsRespons), channel);
+            }
+        });
+    }
+
+    @Override
+    protected void documentation(StringBuilder sb) {
+        sb.append("/_cat/ml/datafeeds\n");
+        sb.append("/_cat/ml/datafeeds/{datafeed_id}\n");
+    }
+
+    @Override
+    protected Table getTableWithHeader(RestRequest request) {
+        Table table = new Table();
+        table.startHeaders();
+
+        // Datafeed Info
+        table.addCell("id", TableColumnAttributeBuilder.builder().setDescription("the datafeed_id").build());
+        table.addCell("state", TableColumnAttributeBuilder.builder()
+            .setDescription("the datafeed state")
+            .setAliases("s")
+            .setTextAlignment(TableColumnAttributeBuilder.TextAlign.RIGHT)
+            .build());
+        table.addCell("assignment_explanation",
+            TableColumnAttributeBuilder.builder("why the datafeed is or is not assigned to a node", false)
+                .setAliases("ae")
+                .build());
+
+        // Timing stats
+        table.addCell("bucket.count",
+            TableColumnAttributeBuilder.builder("bucket count")
+                .setAliases("bc", "bucketCount")
+                .build());
+        table.addCell("search.count",
+            TableColumnAttributeBuilder.builder("number of searches ran by the datafeed")
+                .setAliases("sc", "searchCount")
+                .build());
+        table.addCell("search.time",
+            TableColumnAttributeBuilder.builder("the total search time", false)
+                .setAliases("st", "searchTime")
+                .build());
+        table.addCell("search.bucket_avg",
+            TableColumnAttributeBuilder.builder("the average search time per bucket (millisecond)", false)
+                .setAliases("sba", "bucketTimeMin")
+                .build());
+        table.addCell("search.exp_avg_hour",
+            TableColumnAttributeBuilder.builder("the exponential average search time per hour (millisecond)", false)
+                .setAliases("seah", "searchExpAvgHour")
+                .build());
+
+        //Node info
+        table.addCell("node.id",
+            TableColumnAttributeBuilder.builder("id of the assigned node", false)
+                .setAliases("ni", "nodeId")
+                .build());
+        table.addCell("node.name",
+            TableColumnAttributeBuilder.builder("name of the assigned node", false)
+                .setAliases("nn", "nodeName")
+                .build());
+        table.addCell("node.ephemeral_id",
+            TableColumnAttributeBuilder.builder("ephemeral id of the assigned node", false)
+                .setAliases("ne", "nodeEphemeralId")
+                .build());
+        table.addCell("node.address",
+            TableColumnAttributeBuilder.builder("network address of the assigned node", false)
+                .setAliases("na", "nodeAddress")
+                .build());
+
+        table.endHeaders();
+        return table;
+    }
+
+    private Table buildTable(RestRequest request, GetDatafeedsStatsAction.Response dfStats) {
+        Table table = getTableWithHeader(request);
+        dfStats.getResponse().results().forEach(df -> {
+            table.startRow();
+            table.addCell(df.getDatafeedId());
+            table.addCell(df.getDatafeedState().toString());
+            table.addCell(df.getAssignmentExplanation());
+
+            DatafeedTimingStats timingStats = df.getTimingStats();
+            table.addCell(timingStats == null ? 0 : timingStats.getBucketCount());
+            table.addCell(timingStats == null ? 0 : timingStats.getSearchCount());
+            table.addCell(timingStats == null ?
+                TimeValue.timeValueMillis(0) :
+                TimeValue.timeValueMillis((long)timingStats.getTotalSearchTimeMs()));
+            table.addCell(timingStats == null || timingStats.getBucketCount() == 0 ? 0.0 : timingStats.getAvgSearchTimePerBucketMs());
+            table.addCell(timingStats == null ? 0.0 : timingStats.getExponentialAvgSearchTimePerHourMs());
+
+            DiscoveryNode node = df.getNode();
+            table.addCell(node == null ? null : node.getId());
+            table.addCell(node == null ? null : node.getName());
+            table.addCell(node == null ? null : node.getEphemeralId());
+            table.addCell(node == null ? null : node.getAddress().toString());
+
+            table.endRow();
+        });
+        return table;
+    }
+}
diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/api/cat.ml.datafeeds.json b/x-pack/plugin/src/test/resources/rest-api-spec/api/cat.ml.datafeeds.json
@@ -0,0 +1,72 @@
+{
+  "cat.ml.datafeeds":{
+    "documentation":{
+      "url":"http://www.elastic.co/guide/en/elasticsearch/reference/current/ml-get-datafeed-stats.html"
+    },
+    "stability":"stable",
+    "url":{
+      "paths":[
+        {
+          "path":"/_cat/ml/datafeeds",
+          "methods":[
+            "GET"
+          ]
+        },
+        {
+          "path":"/_cat/ml/datafeeds/{datafeed_id}",
+          "methods":[
+            "GET"
+          ],
+          "parts":{
+            "datafeed_id":{
+              "type":"string",
+              "description":"The ID of the datafeeds stats to fetch"
+            }
+          }
+        }
+      ]
+    },
+    "params":{
+      "allow_no_datafeeds":{
+        "type":"boolean",
+        "required":false,
+        "description":"Whether to ignore if a wildcard expression matches no datafeeds. (This includes `_all` string or when no datafeeds have been specified)"
+      },
+      "format":{
+        "type":"string",
+        "description":"a short version of the Accept header, e.g. json, yaml"
+      },
+      "h":{
+        "type":"list",
+        "description":"Comma-separated list of column names to display"
+      },
+      "help":{
+        "type":"boolean",
+        "description":"Return help information",
+        "default":false
+      },
+      "s":{
+        "type":"list",
+        "description":"Comma-separated list of column names or column aliases to sort by"
+      },
+      "time":{
+        "type":"enum",
+        "description":"The unit in which to display time values",
+        "options":[
+          "d (Days)",
+          "h (Hours)",
+          "m (Minutes)",
+          "s (Seconds)",
+          "ms (Milliseconds)",
+          "micros (Microseconds)",
+          "nanos (Nanoseconds)"
+        ]
+      },
+      "v":{
+        "type":"boolean",
+        "description":"Verbose mode. Display column headers",
+        "default":false
+      }
+    }
+  }
+}
diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/datafeed_cat_apis.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/datafeed_cat_apis.yml
@@ -0,0 +1,109 @@
+setup:
+  - skip:
+      features: headers
+  - do:
+      indices.create:
+        index: airline-data
+        body:
+          mappings:
+            properties:
+              time:
+                type: date
+              airline:
+                type: keyword
+              responsetime:
+                type: float
+  - do:
+      headers:
+        Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser
+      ml.put_job:
+        job_id: job-stats-test
+        body:  >
+          {
+            "job_id":"job-stats-test",
+            "description":"Analysis of response time by airline",
+            "analysis_config" : {
+                "bucket_span": "1h",
+                "detectors" :[{"function":"metric","field_name":"responsetime","by_field_name":"airline"}]
+            },
+            "analysis_limits" : {
+                "model_memory_limit": "10mb"
+            },
+            "data_description" : {
+                "time_field":"time",
+                "time_format":"epoch"
+            }
+          }
+
+  - do:
+      headers:
+        Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser
+      ml.put_datafeed:
+        datafeed_id: datafeed-job-stats-test
+        body:  >
+          {
+            "job_id":"job-stats-test",
+            "indexes":["airline-data"]
+          }
+
+  - do:
+      headers:
+        Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser
+      ml.put_job:
+        job_id: jobs-get-stats-datafeed-job
+        body:  >
+          {
+            "job_id":"jobs-get-stats-datafeed-job",
+            "description":"A job with a datafeed",
+            "analysis_config" : {
+                "bucket_span": "1h",
+                "detectors" :[{"function":"metric","field_name":"responsetime","by_field_name":"airline"}]
+            },
+            "analysis_limits" : {
+                "model_memory_limit": "10mb"
+            },
+            "data_description" : {
+                "time_field":"time",
+                "time_format":"yyyy-MM-dd'T'HH:mm:ssX"
+            }
+          }
+  - do:
+      headers:
+        Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser
+      ml.put_datafeed:
+        datafeed_id: datafeed-jobs-get-stats-datafeed-job
+        body:  >
+          {
+            "job_id":"jobs-get-stats-datafeed-job",
+            "indexes":["airline-data"]
+          }
+
+---
+"Test cat datafeeds":
+
+  - do:
+      cat.ml.datafeeds:
+        datafeed_id: datafeed-job-stats-test
+  - match:
+      $body: |
+        / #id                             state    bucket.count     search.count
+        ^ (datafeed\-job\-stats\-test \s+ \w+ \s+  \d+         \s+  \d+         \n)+  $/
+
+  - do:
+      cat.ml.datafeeds:
+        v: true
+        datafeed_id: datafeed-job-stats-test
+  - match:
+      $body: |
+        /^  id                          \s+  state \s+ bucket\.count \s+ search\.count \n
+           (datafeed\-job\-stats\-test  \s+  \w+   \s+ \d+           \s+ \d+           \n)+  $/
+
+  - do:
+      cat.ml.datafeeds:
+        h: id,search.count,search.time,search.bucket_avg
+        v: true
+  - match:
+      $body: |
+        /^  id                                         \s+  search\.count \s+ search\.time \s+ search\.bucket_avg \n
+           (datafeed\-job\-stats\-test                 \s+  \d+           \s+ \w+          \s+ .*?                \n)+
+           (datafeed\-jobs\-get\-stats\-datafeed\-job  \s+  \d+           \s+ \w+          \s+ .*?                \n)+  $/
diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/job_cat_apis.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/job_cat_apis.yml
@@ -70,7 +70,7 @@ setup:
           }
 
 ---
-"Test get job stats after uploading data prompting the creation of some stats":
+"Test cat anomaly detector jobs":
 
   - do:
       ml.post_data: