From d147cd72cc730d543081a9241006cdf2d654800c Mon Sep 17 00:00:00 2001 From: Ed Savage <32410745+edsavage@users.noreply.github.com> Date: Mon, 13 Aug 2018 10:31:17 +0100 Subject: [PATCH] [ML] Partition-wise maximum scores (#32748) Added infrastructure to push through the 'person name field value' to the normalizer process. This is required by the normalizer to retrieve the maximum scores for individual partitions. --- .../BucketInfluencerNormalizable.java | 5 +++++ .../normalizer/BucketNormalizable.java | 5 +++++ .../normalizer/InfluencerNormalizable.java | 5 +++++ .../MultiplyingNormalizerProcess.java | 9 ++++---- .../job/process/normalizer/Normalizable.java | 2 ++ .../ml/job/process/normalizer/Normalizer.java | 2 ++ .../process/normalizer/NormalizerResult.java | 22 ++++++++++++++++++- .../PartitionScoreNormalizable.java | 5 +++++ .../normalizer/RecordNormalizable.java | 6 +++++ .../BucketInfluencerNormalizableTests.java | 8 +++++++ .../normalizer/BucketNormalizableTests.java | 4 ++++ .../InfluencerNormalizableTests.java | 4 ++++ .../normalizer/NormalizerResultTests.java | 2 ++ 13 files changed, 74 insertions(+), 5 deletions(-) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketInfluencerNormalizable.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketInfluencerNormalizable.java index e55e5ac0346d8..8ee5d1ad6e2d3 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketInfluencerNormalizable.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketInfluencerNormalizable.java @@ -46,6 +46,11 @@ public String getPersonFieldName() { return bucketInfluencer.getInfluencerFieldName(); } + @Override + public String getPersonFieldValue() { + return null; + } + @Override public String getFunctionName() { return null; diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketNormalizable.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketNormalizable.java index 1ba2e77040897..7ef23cb513b7f 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketNormalizable.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketNormalizable.java @@ -64,6 +64,11 @@ public String getPersonFieldName() { return null; } + @Override + public String getPersonFieldValue() { + return null; + } + @Override public String getFunctionName() { return null; diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/InfluencerNormalizable.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/InfluencerNormalizable.java index 74cb86a3fdfdd..bc1567ac00a14 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/InfluencerNormalizable.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/InfluencerNormalizable.java @@ -44,6 +44,11 @@ public String getPersonFieldName() { return influencer.getInfluencerFieldName(); } + @Override + public String getPersonFieldValue() { + return influencer.getInfluencerFieldValue(); + } + @Override public String getFunctionName() { return null; diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/MultiplyingNormalizerProcess.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/MultiplyingNormalizerProcess.java index fc7bd35188473..8aa266e15d22e 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/MultiplyingNormalizerProcess.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/MultiplyingNormalizerProcess.java @@ -63,10 +63,11 @@ public void writeRecord(String[] record) throws IOException { result.setPartitionFieldName(record[1]); result.setPartitionFieldValue(record[2]); result.setPersonFieldName(record[3]); - result.setFunctionName(record[4]); - result.setValueFieldName(record[5]); - result.setProbability(Double.parseDouble(record[6])); - result.setNormalizedScore(factor * Double.parseDouble(record[7])); + result.setPersonFieldValue(record[4]); + result.setFunctionName(record[5]); + result.setValueFieldName(record[6]); + result.setProbability(Double.parseDouble(record[7])); + result.setNormalizedScore(factor * Double.parseDouble(record[8])); } catch (NumberFormatException | ArrayIndexOutOfBoundsException e) { throw new IOException("Unable to write to no-op normalizer", e); } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/Normalizable.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/Normalizable.java index 606be98ae10e7..7efadf2961308 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/Normalizable.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/Normalizable.java @@ -44,6 +44,8 @@ public Normalizable(String indexName) { abstract String getPersonFieldName(); + abstract String getPersonFieldValue(); + abstract String getFunctionName(); abstract String getValueFieldName(); diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/Normalizer.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/Normalizer.java index 2c929ff4f1ae4..2d4e2135478f3 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/Normalizer.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/Normalizer.java @@ -70,6 +70,7 @@ public void normalize(Integer bucketSpan, boolean perPartitionNormalization, NormalizerResult.PARTITION_FIELD_NAME_FIELD.getPreferredName(), NormalizerResult.PARTITION_FIELD_VALUE_FIELD.getPreferredName(), NormalizerResult.PERSON_FIELD_NAME_FIELD.getPreferredName(), + NormalizerResult.PERSON_FIELD_VALUE_FIELD.getPreferredName(), NormalizerResult.FUNCTION_NAME_FIELD.getPreferredName(), NormalizerResult.VALUE_FIELD_NAME_FIELD.getPreferredName(), NormalizerResult.PROBABILITY_FIELD.getPreferredName(), @@ -108,6 +109,7 @@ private static void writeNormalizableAndChildrenRecursively(Normalizable normali Strings.coalesceToEmpty(normalizable.getPartitionFieldName()), Strings.coalesceToEmpty(normalizable.getPartitionFieldValue()), Strings.coalesceToEmpty(normalizable.getPersonFieldName()), + Strings.coalesceToEmpty(normalizable.getPersonFieldValue()), Strings.coalesceToEmpty(normalizable.getFunctionName()), Strings.coalesceToEmpty(normalizable.getValueFieldName()), Double.toString(normalizable.getProbability()), diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/NormalizerResult.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/NormalizerResult.java index 5cb2932a28ba5..269792dbe7797 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/NormalizerResult.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/NormalizerResult.java @@ -5,6 +5,7 @@ */ package org.elasticsearch.xpack.ml.job.process.normalizer; +import org.elasticsearch.Version; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; @@ -26,6 +27,7 @@ public class NormalizerResult implements ToXContentObject, Writeable { static final ParseField PARTITION_FIELD_NAME_FIELD = new ParseField("partition_field_name"); static final ParseField PARTITION_FIELD_VALUE_FIELD = new ParseField("partition_field_value"); static final ParseField PERSON_FIELD_NAME_FIELD = new ParseField("person_field_name"); + static final ParseField PERSON_FIELD_VALUE_FIELD = new ParseField("person_field_value"); static final ParseField FUNCTION_NAME_FIELD = new ParseField("function_name"); static final ParseField VALUE_FIELD_NAME_FIELD = new ParseField("value_field_name"); static final ParseField PROBABILITY_FIELD = new ParseField("probability"); @@ -39,6 +41,7 @@ public class NormalizerResult implements ToXContentObject, Writeable { PARSER.declareString(NormalizerResult::setPartitionFieldName, PARTITION_FIELD_NAME_FIELD); PARSER.declareString(NormalizerResult::setPartitionFieldValue, PARTITION_FIELD_VALUE_FIELD); PARSER.declareString(NormalizerResult::setPersonFieldName, PERSON_FIELD_NAME_FIELD); + PARSER.declareString(NormalizerResult::setPersonFieldValue, PERSON_FIELD_VALUE_FIELD); PARSER.declareString(NormalizerResult::setFunctionName, FUNCTION_NAME_FIELD); PARSER.declareString(NormalizerResult::setValueFieldName, VALUE_FIELD_NAME_FIELD); PARSER.declareDouble(NormalizerResult::setProbability, PROBABILITY_FIELD); @@ -49,6 +52,7 @@ public class NormalizerResult implements ToXContentObject, Writeable { private String partitionFieldName; private String partitionFieldValue; private String personFieldName; + private String personFieldValue; private String functionName; private String valueFieldName; private double probability; @@ -62,6 +66,9 @@ public NormalizerResult(StreamInput in) throws IOException { partitionFieldName = in.readOptionalString(); partitionFieldValue = in.readOptionalString(); personFieldName = in.readOptionalString(); + if (in.getVersion().onOrAfter(Version.V_6_5_0)) { + personFieldValue = in.readOptionalString(); + } functionName = in.readOptionalString(); valueFieldName = in.readOptionalString(); probability = in.readDouble(); @@ -74,6 +81,9 @@ public void writeTo(StreamOutput out) throws IOException { out.writeOptionalString(partitionFieldName); out.writeOptionalString(partitionFieldValue); out.writeOptionalString(personFieldName); + if (out.getVersion().onOrAfter(Version.V_6_5_0)) { + out.writeOptionalString(personFieldValue); + } out.writeOptionalString(functionName); out.writeOptionalString(valueFieldName); out.writeDouble(probability); @@ -87,6 +97,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field(PARTITION_FIELD_NAME_FIELD.getPreferredName(), partitionFieldName); builder.field(PARTITION_FIELD_VALUE_FIELD.getPreferredName(), partitionFieldValue); builder.field(PERSON_FIELD_NAME_FIELD.getPreferredName(), personFieldName); + builder.field(PERSON_FIELD_VALUE_FIELD.getPreferredName(), personFieldValue); builder.field(FUNCTION_NAME_FIELD.getPreferredName(), functionName); builder.field(VALUE_FIELD_NAME_FIELD.getPreferredName(), valueFieldName); builder.field(PROBABILITY_FIELD.getPreferredName(), probability); @@ -127,6 +138,14 @@ public void setPersonFieldName(String personFieldName) { this.personFieldName = personFieldName; } + public String getPersonFieldValue() { + return personFieldValue; + } + + public void setPersonFieldValue(String personFieldValue) { + this.personFieldValue = personFieldValue; + } + public String getFunctionName() { return functionName; } @@ -161,7 +180,7 @@ public void setNormalizedScore(double normalizedScore) { @Override public int hashCode() { - return Objects.hash(level, partitionFieldName, partitionFieldValue, personFieldName, + return Objects.hash(level, partitionFieldName, partitionFieldValue, personFieldName, personFieldValue, functionName, valueFieldName, probability, normalizedScore); } @@ -184,6 +203,7 @@ public boolean equals(Object other) { && Objects.equals(this.partitionFieldName, that.partitionFieldName) && Objects.equals(this.partitionFieldValue, that.partitionFieldValue) && Objects.equals(this.personFieldName, that.personFieldName) + && Objects.equals(this.personFieldValue, that.personFieldValue) && Objects.equals(this.functionName, that.functionName) && Objects.equals(this.valueFieldName, that.valueFieldName) && this.probability == that.probability diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/PartitionScoreNormalizable.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/PartitionScoreNormalizable.java index 4d5d91aa12f8c..91b2a7a505e35 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/PartitionScoreNormalizable.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/PartitionScoreNormalizable.java @@ -45,6 +45,11 @@ public String getPersonFieldName() { return null; } + @Override + public String getPersonFieldValue() { + return null; + } + @Override public String getFunctionName() { return null; diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/RecordNormalizable.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/RecordNormalizable.java index 97114130c84d9..f3f32cb526ead 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/RecordNormalizable.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/RecordNormalizable.java @@ -46,6 +46,12 @@ public String getPersonFieldName() { return over != null ? over : record.getByFieldName(); } + @Override + public String getPersonFieldValue() { + String over = record.getOverFieldValue(); + return over != null ? over : record.getByFieldValue(); + } + @Override public String getFunctionName() { return record.getFunction(); diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketInfluencerNormalizableTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketInfluencerNormalizableTests.java index f83d51d84b009..bde5c3e44f9b4 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketInfluencerNormalizableTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketInfluencerNormalizableTests.java @@ -43,10 +43,18 @@ public void testGetPartitionFieldName() { assertNull(new BucketInfluencerNormalizable(bucketInfluencer, INDEX_NAME).getPartitionFieldName()); } + public void testGetPartitionFieldValue() { + assertNull(new BucketInfluencerNormalizable(bucketInfluencer, INDEX_NAME).getPartitionFieldValue()); + } + public void testGetPersonFieldName() { assertEquals("airline", new BucketInfluencerNormalizable(bucketInfluencer, INDEX_NAME).getPersonFieldName()); } + public void testGetPersonFieldValue() { + assertNull(new BucketInfluencerNormalizable(bucketInfluencer, INDEX_NAME).getPersonFieldValue()); + } + public void testGetFunctionName() { assertNull(new BucketInfluencerNormalizable(bucketInfluencer, INDEX_NAME).getFunctionName()); } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketNormalizableTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketNormalizableTests.java index 630bffe11129a..4436fcc7026fe 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketNormalizableTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketNormalizableTests.java @@ -73,6 +73,10 @@ public void testGetPersonFieldName() { assertNull(new BucketNormalizable(bucket, INDEX_NAME).getPersonFieldName()); } + public void testGetPersonFieldValue() { + assertNull(new BucketNormalizable(bucket, INDEX_NAME).getPersonFieldValue()); + } + public void testGetFunctionName() { assertNull(new BucketNormalizable(bucket, INDEX_NAME).getFunctionName()); } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/InfluencerNormalizableTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/InfluencerNormalizableTests.java index 215f88ad33224..ee5518b9c12d5 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/InfluencerNormalizableTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/InfluencerNormalizableTests.java @@ -44,6 +44,10 @@ public void testGetPersonFieldName() { assertEquals("airline", new InfluencerNormalizable(influencer, INDEX_NAME).getPersonFieldName()); } + public void testGetPersonFieldValue() { + assertEquals("AAL", new InfluencerNormalizable(influencer, INDEX_NAME).getPersonFieldValue()); + } + public void testGetFunctionName() { assertNull(new InfluencerNormalizable(influencer, INDEX_NAME).getFunctionName()); } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/NormalizerResultTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/NormalizerResultTests.java index ecaea449f95c1..af35c01aa871d 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/NormalizerResultTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/NormalizerResultTests.java @@ -19,6 +19,7 @@ public void testDefaultConstructor() { assertNull(msg.getPartitionFieldName()); assertNull(msg.getPartitionFieldValue()); assertNull(msg.getPersonFieldName()); + assertNull(msg.getPersonFieldValue()); assertNull(msg.getFunctionName()); assertNull(msg.getValueFieldName()); assertEquals(0.0, msg.getProbability(), EPSILON); @@ -32,6 +33,7 @@ protected NormalizerResult createTestInstance() { msg.setPartitionFieldName("part"); msg.setPartitionFieldValue("something"); msg.setPersonFieldName("person"); + msg.setPersonFieldValue("fred"); msg.setFunctionName("mean"); msg.setValueFieldName("value"); msg.setProbability(0.005);