-
Notifications
You must be signed in to change notification settings - Fork 8.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[ML] Filtering runtime mappings in anomaly detection wizards (#91534)
* [ML] Filtering runtime mappings in anomaly detection wizards * updating tests * adding check for null when parsing aggs * removing async from tests
- Loading branch information
1 parent
a32f86d
commit d890d22
Showing
3 changed files
with
316 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
183 changes: 183 additions & 0 deletions
183
...l/public/application/jobs/new_job/common/job_creator/util/filter_runtime_mappings.test.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,183 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
import { Job, Datafeed } from '../../../../../../../common/types/anomaly_detection_jobs'; | ||
import { filterRuntimeMappings } from './filter_runtime_mappings'; | ||
|
||
function getJob(): Job { | ||
return { | ||
job_id: 'test', | ||
description: '', | ||
groups: [], | ||
analysis_config: { | ||
bucket_span: '15m', | ||
detectors: [ | ||
{ | ||
function: 'mean', | ||
field_name: 'responsetime', | ||
}, | ||
], | ||
influencers: [], | ||
}, | ||
data_description: { | ||
time_field: '@timestamp', | ||
}, | ||
analysis_limits: { | ||
model_memory_limit: '11MB', | ||
}, | ||
model_plot_config: { | ||
enabled: false, | ||
annotations_enabled: false, | ||
}, | ||
}; | ||
} | ||
|
||
function getDatafeed(): Datafeed { | ||
return { | ||
datafeed_id: 'datafeed-test', | ||
job_id: 'dds', | ||
indices: ['farequote-*'], | ||
query: { | ||
bool: { | ||
must: [ | ||
{ | ||
match_all: {}, | ||
}, | ||
], | ||
}, | ||
}, | ||
runtime_mappings: { | ||
responsetime_big: { | ||
type: 'double', | ||
script: { | ||
source: "emit(doc['responsetime'].value * 100.0)", | ||
}, | ||
}, | ||
airline_lower: { | ||
type: 'keyword', | ||
script: { | ||
source: "emit(doc['airline'].value.toLowerCase())", | ||
}, | ||
}, | ||
}, | ||
}; | ||
} | ||
|
||
function getAggs() { | ||
return { | ||
buckets: { | ||
date_histogram: { | ||
field: '@timestamp', | ||
fixed_interval: '90000ms', | ||
}, | ||
aggregations: { | ||
responsetime: { | ||
avg: { | ||
field: 'responsetime_big', | ||
}, | ||
}, | ||
'@timestamp': { | ||
max: { | ||
field: '@timestamp', | ||
}, | ||
}, | ||
}, | ||
}, | ||
}; | ||
} | ||
|
||
describe('filter_runtime_mappings', () => { | ||
describe('filterRuntimeMappings()', () => { | ||
let job: Job; | ||
let datafeed: Datafeed; | ||
beforeEach(() => { | ||
job = getJob(); | ||
datafeed = getDatafeed(); | ||
}); | ||
|
||
test('returns no runtime mappings, no mappings in aggs', () => { | ||
const resp = filterRuntimeMappings(job, datafeed); | ||
expect(Object.keys(resp.runtime_mappings).length).toEqual(0); | ||
|
||
expect(Object.keys(resp.discarded_mappings).length).toEqual(2); | ||
expect(resp.discarded_mappings.responsetime_big).not.toEqual(undefined); | ||
expect(resp.discarded_mappings.airline_lower).not.toEqual(undefined); | ||
}); | ||
|
||
test('returns no runtime mappings, no runtime mappings in datafeed', () => { | ||
datafeed.runtime_mappings = undefined; | ||
const resp = filterRuntimeMappings(job, datafeed); | ||
expect(Object.keys(resp.runtime_mappings).length).toEqual(0); | ||
expect(resp.runtime_mappings.responsetime_big).toEqual(undefined); | ||
|
||
expect(Object.keys(resp.discarded_mappings).length).toEqual(0); | ||
expect(resp.discarded_mappings.airline_lower).toEqual(undefined); | ||
}); | ||
|
||
test('return one runtime mapping and one unused mapping, mappings in aggs', () => { | ||
datafeed.aggregations = getAggs(); | ||
const resp = filterRuntimeMappings(job, datafeed); | ||
expect(Object.keys(resp.runtime_mappings).length).toEqual(1); | ||
expect(resp.runtime_mappings.responsetime_big).not.toEqual(undefined); | ||
|
||
expect(Object.keys(resp.discarded_mappings).length).toEqual(1); | ||
expect(resp.discarded_mappings.airline_lower).not.toEqual(undefined); | ||
}); | ||
|
||
test('return no runtime mappings, no mappings in aggs', () => { | ||
datafeed.aggregations = getAggs(); | ||
datafeed.aggregations!.buckets!.aggregations!.responsetime!.avg!.field! = 'responsetime'; | ||
|
||
const resp = filterRuntimeMappings(job, datafeed); | ||
expect(Object.keys(resp.runtime_mappings).length).toEqual(0); | ||
|
||
expect(Object.keys(resp.discarded_mappings).length).toEqual(2); | ||
expect(resp.discarded_mappings.responsetime_big).not.toEqual(undefined); | ||
expect(resp.discarded_mappings.airline_lower).not.toEqual(undefined); | ||
}); | ||
|
||
test('return one runtime mapping and one unused mapping, no mappings in aggs', () => { | ||
// set the detector field to be a runtime mapping | ||
job.analysis_config.detectors[0].field_name = 'responsetime_big'; | ||
const resp = filterRuntimeMappings(job, datafeed); | ||
expect(Object.keys(resp.runtime_mappings).length).toEqual(1); | ||
expect(resp.runtime_mappings.responsetime_big).not.toEqual(undefined); | ||
|
||
expect(Object.keys(resp.discarded_mappings).length).toEqual(1); | ||
expect(resp.discarded_mappings.airline_lower).not.toEqual(undefined); | ||
}); | ||
|
||
test('return two runtime mappings, no mappings in aggs', () => { | ||
// set the detector field to be a runtime mapping | ||
job.analysis_config.detectors[0].field_name = 'responsetime_big'; | ||
// set the detector by field to be a runtime mapping | ||
job.analysis_config.detectors[0].by_field_name = 'airline_lower'; | ||
const resp = filterRuntimeMappings(job, datafeed); | ||
expect(Object.keys(resp.runtime_mappings).length).toEqual(2); | ||
expect(resp.runtime_mappings.responsetime_big).not.toEqual(undefined); | ||
expect(resp.runtime_mappings.airline_lower).not.toEqual(undefined); | ||
|
||
expect(Object.keys(resp.discarded_mappings).length).toEqual(0); | ||
}); | ||
|
||
test('return two runtime mappings, no mappings in aggs, categorization job', () => { | ||
job.analysis_config.detectors[0].function = 'count'; | ||
// set the detector field to be a runtime mapping | ||
job.analysis_config.detectors[0].field_name = undefined; | ||
// set the detector by field to be a runtime mapping | ||
job.analysis_config.detectors[0].by_field_name = 'mlcategory'; | ||
job.analysis_config.categorization_field_name = 'airline_lower'; | ||
|
||
const resp = filterRuntimeMappings(job, datafeed); | ||
expect(Object.keys(resp.runtime_mappings).length).toEqual(1); | ||
expect(resp.runtime_mappings.airline_lower).not.toEqual(undefined); | ||
|
||
expect(Object.keys(resp.discarded_mappings).length).toEqual(1); | ||
expect(resp.discarded_mappings.responsetime_big).not.toEqual(undefined); | ||
}); | ||
}); | ||
}); |
104 changes: 104 additions & 0 deletions
104
...ins/ml/public/application/jobs/new_job/common/job_creator/util/filter_runtime_mappings.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0; you may not use this file except in compliance with the Elastic License | ||
* 2.0. | ||
*/ | ||
|
||
import type { RuntimeMappings } from '../../../../../../../common/types/fields'; | ||
import type { Datafeed, Job } from '../../../../../../../common/types/anomaly_detection_jobs'; | ||
|
||
interface Response { | ||
runtime_mappings: RuntimeMappings; | ||
discarded_mappings: RuntimeMappings; | ||
} | ||
|
||
export function filterRuntimeMappings(job: Job, datafeed: Datafeed): Response { | ||
if (datafeed.runtime_mappings === undefined) { | ||
return { | ||
runtime_mappings: {}, | ||
discarded_mappings: {}, | ||
}; | ||
} | ||
|
||
const usedFields = findFieldsInJob(job, datafeed); | ||
|
||
const { runtimeMappings, discardedMappings } = createMappings( | ||
datafeed.runtime_mappings, | ||
usedFields | ||
); | ||
|
||
return { runtime_mappings: runtimeMappings, discarded_mappings: discardedMappings }; | ||
} | ||
|
||
function findFieldsInJob(job: Job, datafeed: Datafeed) { | ||
const usedFields = new Set<string>(); | ||
job.analysis_config.detectors.forEach((d) => { | ||
if (d.field_name !== undefined) { | ||
usedFields.add(d.field_name); | ||
} | ||
if (d.by_field_name !== undefined) { | ||
usedFields.add(d.by_field_name); | ||
} | ||
if (d.over_field_name !== undefined) { | ||
usedFields.add(d.over_field_name); | ||
} | ||
if (d.partition_field_name !== undefined) { | ||
usedFields.add(d.partition_field_name); | ||
} | ||
}); | ||
|
||
if (job.analysis_config.categorization_field_name !== undefined) { | ||
usedFields.add(job.analysis_config.categorization_field_name); | ||
} | ||
|
||
if (job.analysis_config.summary_count_field_name !== undefined) { | ||
usedFields.add(job.analysis_config.summary_count_field_name); | ||
} | ||
|
||
if (job.analysis_config.influencers !== undefined) { | ||
job.analysis_config.influencers.forEach((i) => usedFields.add(i)); | ||
} | ||
|
||
const aggs = datafeed.aggregations ?? datafeed.aggs; | ||
if (aggs !== undefined) { | ||
findFieldsInAgg(aggs).forEach((f) => usedFields.add(f)); | ||
} | ||
|
||
return [...usedFields]; | ||
} | ||
|
||
function findFieldsInAgg(obj: Record<string, any>) { | ||
const fields: string[] = []; | ||
Object.entries(obj).forEach(([key, val]) => { | ||
if (typeof val === 'object' && val !== null) { | ||
fields.push(...findFieldsInAgg(val)); | ||
} else if (typeof val === 'string' && key === 'field') { | ||
fields.push(val); | ||
} | ||
}); | ||
return fields; | ||
} | ||
|
||
function createMappings(rm: RuntimeMappings, usedFieldNames: string[]) { | ||
return { | ||
runtimeMappings: usedFieldNames.reduce((acc, cur) => { | ||
if (rm[cur] !== undefined) { | ||
acc[cur] = rm[cur]; | ||
} | ||
return acc; | ||
}, {} as RuntimeMappings), | ||
discardedMappings: Object.keys(rm).reduce((acc, cur) => { | ||
if (usedFieldNames.includes(cur) === false && rm[cur] !== undefined) { | ||
acc[cur] = rm[cur]; | ||
} | ||
return acc; | ||
}, {} as RuntimeMappings), | ||
}; | ||
} |