From 295a267dae19dea205cb20ddfef548ccd9c091f5 Mon Sep 17 00:00:00 2001 From: Melissa Alvarez Date: Tue, 15 Nov 2022 10:33:28 -0500 Subject: [PATCH] [ML] Data Frame Analytics: Highlight filtered data in scatterplot charts (#144871) ## Summary Related meta issue: https://github.com/elastic/kibana/issues/131551 This PR adds functionality to the scatterplot charts to show the full data sample and, when the user has added a filter/query in the query bar, the portion of the data reflecting the filter is highlighted so it can be differentiated from the background data. Classification results view with query for `AvgTicketPrice > 400` image Outlier detection results view with same filter image Regression results view with same filter image Help text: image ### Checklist Delete any items that are not applicable to this PR. - [ ] Any text added follows [EUI's writing guidelines](https://elastic.github.io/eui/#/guidelines/writing), uses sentence case text and includes [i18n support](https://github.com/elastic/kibana/blob/main/packages/kbn-i18n/README.md) - [ ] [Documentation](https://www.elastic.co/guide/en/kibana/master/development-documentation.html) was added for features that require explanation or tutorials - [ ] [Unit or functional tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html) were updated or added to match the most common scenarios - [ ] Any UI touched in this PR is usable by keyboard only (learn more about [keyboard accessibility](https://webaim.org/techniques/keyboard/)) - [ ] Any UI touched in this PR does not create any new axe failures (run axe in browser: [FF](https://addons.mozilla.org/en-US/firefox/addon/axe-devtools/), [Chrome](https://chrome.google.com/webstore/detail/axe-web-accessibility-tes/lhdoppojpmngadmnindnejefpokejbdd?hl=en-US)) - [ ] If a plugin configuration key changed, check if it needs to be allowlisted in the cloud and added to the [docker list](https://github.com/elastic/kibana/blob/main/src/dev/build/tasks/os_packages/docker_generator/resources/base/bin/kibana-docker) - [ ] This renders correctly on smaller devices using a responsive layout. (You can test this [in your browser](https://www.browserstack.com/guide/responsive-testing-on-local-server)) - [ ] This was checked for [cross-browser compatibility](https://www.elastic.co/support/matrix#matrix_browsers) Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com> --- .../scatterplot_matrix/scatterplot_matrix.tsx | 131 ++++++-- .../scatterplot_matrix_vega_lite_spec.test.ts | 42 ++- .../scatterplot_matrix_vega_lite_spec.ts | 300 ++++++++++-------- 3 files changed, 302 insertions(+), 171 deletions(-) diff --git a/x-pack/plugins/ml/public/application/components/scatterplot_matrix/scatterplot_matrix.tsx b/x-pack/plugins/ml/public/application/components/scatterplot_matrix/scatterplot_matrix.tsx index ee9cf024a67497..e822d2ebd91d76 100644 --- a/x-pack/plugins/ml/public/application/components/scatterplot_matrix/scatterplot_matrix.tsx +++ b/x-pack/plugins/ml/public/application/components/scatterplot_matrix/scatterplot_matrix.tsx @@ -29,7 +29,6 @@ import { stringHash } from '@kbn/ml-string-hash'; import { extractErrorMessage } from '../../../../common'; import { isRuntimeMappings } from '../../../../common/util/runtime_field_utils'; import { RuntimeMappings } from '../../../../common/types/fields'; -import type { ResultsSearchQuery } from '../../data_frame_analytics/common/analytics'; import { getCombinedRuntimeMappings } from '../data_grid'; import { useMlApiContext } from '../../contexts/kibana'; @@ -81,13 +80,25 @@ const OptionLabelWithIconTip: FC = ({ label, toolti ); +function filterChartableItems(items: estypes.SearchHit[], resultsField?: string) { + return ( + items + .map((d) => + getProcessedFields(d.fields ?? {}, (key: string) => + key.startsWith(`${resultsField}.feature_importance`) + ) + ) + .filter((d) => !Object.keys(d).some((field) => Array.isArray(d[field]))) ?? [] + ); +} + export interface ScatterplotMatrixProps { fields: string[]; index: string; resultsField?: string; color?: string; legendType?: LegendType; - searchQuery?: ResultsSearchQuery; + searchQuery?: estypes.QueryDslQueryContainer; runtimeMappings?: RuntimeMappings; indexPattern?: DataView; } @@ -128,7 +139,7 @@ export const ScatterplotMatrix: FC = ({ // contains the fetched documents and columns to be passed on to the Vega spec. const [splom, setSplom] = useState< - { items: any[]; columns: string[]; messages: string[] } | undefined + { items: any[]; backgroundItems: any[]; columns: string[]; messages: string[] } | undefined >(); // formats the array of field names for EuiComboBox @@ -165,7 +176,7 @@ export const ScatterplotMatrix: FC = ({ useEffect(() => { if (fields.length === 0) { - setSplom({ columns: [], items: [], messages: [] }); + setSplom({ columns: [], items: [], backgroundItems: [], messages: [] }); setIsLoading(false); return; } @@ -184,7 +195,7 @@ export const ScatterplotMatrix: FC = ({ ...(includeOutlierScoreField ? [outlierScoreField] : []), ]; - const query = randomizeQuery + const foregroundQuery = randomizeQuery ? { function_score: { query: searchQuery, @@ -193,33 +204,65 @@ export const ScatterplotMatrix: FC = ({ } : searchQuery; + let backgroundQuery; + // If it's not the default query then we do a background search excluding the current query + if ( + searchQuery && + ((searchQuery.match_all && Object.keys(searchQuery.match_all).length > 0) || + (searchQuery.bool && Object.keys(searchQuery.bool).length > 0)) + ) { + backgroundQuery = randomizeQuery + ? { + function_score: { + query: { bool: { must_not: [searchQuery] } }, + random_score: { seed: 10, field: '_seq_no' }, + }, + } + : { bool: { must_not: [searchQuery] } }; + } + const combinedRuntimeMappings = indexPattern && getCombinedRuntimeMappings(indexPattern, runtimeMappings); - const resp: estypes.SearchResponse = await esSearch({ - index, - body: { - fields: queryFields, - _source: false, - query, - from: 0, - size: fetchSize, - ...(isRuntimeMappings(combinedRuntimeMappings) - ? { runtime_mappings: combinedRuntimeMappings } - : {}), - }, - }); + const body = { + fields: queryFields, + _source: false, + query: foregroundQuery, + from: 0, + size: fetchSize, + ...(isRuntimeMappings(combinedRuntimeMappings) + ? { runtime_mappings: combinedRuntimeMappings } + : {}), + }; + + const promises = [ + esSearch({ + index, + body, + }), + ]; + + if (backgroundQuery) { + promises.push( + esSearch({ + index, + body: { ...body, query: backgroundQuery }, + }) + ); + } + + const [foregroundResp, backgroundResp] = await Promise.all( + promises + ); if (!options.didCancel) { - const items = resp.hits.hits - .map((d) => - getProcessedFields(d.fields ?? {}, (key: string) => - key.startsWith(`${resultsField}.feature_importance`) - ) - ) - .filter((d) => !Object.keys(d).some((field) => Array.isArray(d[field]))); - - const originalDocsCount = resp.hits.hits.length; + const items = filterChartableItems(foregroundResp.hits.hits, resultsField); + const backgroundItems = filterChartableItems( + backgroundResp?.hits.hits ?? [], + resultsField + ); + + const originalDocsCount = foregroundResp.hits.hits.length; const filteredDocsCount = originalDocsCount - items.length; if (originalDocsCount === filteredDocsCount) { @@ -229,7 +272,7 @@ export const ScatterplotMatrix: FC = ({ 'All fetched documents included fields with arrays of values and cannot be visualized.', }) ); - } else if (resp.hits.hits.length !== items.length) { + } else if (foregroundResp.hits.hits.length !== items.length) { messages.push( i18n.translate('xpack.ml.splom.arrayFieldsWarningMessage', { defaultMessage: @@ -242,12 +285,17 @@ export const ScatterplotMatrix: FC = ({ ); } - setSplom({ columns: fields, items, messages }); + setSplom({ columns: fields, items, backgroundItems, messages }); setIsLoading(false); } } catch (e) { setIsLoading(false); - setSplom({ columns: [], items: [], messages: [extractErrorMessage(e)] }); + setSplom({ + columns: [], + items: [], + backgroundItems: [], + messages: [extractErrorMessage(e)], + }); } } @@ -265,10 +313,11 @@ export const ScatterplotMatrix: FC = ({ return; } - const { items, columns } = splom; + const { items, backgroundItems, columns } = splom; return getScatterplotMatrixVegaLiteSpec( items, + backgroundItems, columns, euiTheme, resultsField, @@ -409,7 +458,25 @@ export const ScatterplotMatrix: FC = ({ )} - {splom.items.length > 0 && } + {splom.items.length > 0 && ( + <> + + {splom.backgroundItems.length ? ( + <> + + + <> + + + ) : null} + + )} )} diff --git a/x-pack/plugins/ml/public/application/components/scatterplot_matrix/scatterplot_matrix_vega_lite_spec.test.ts b/x-pack/plugins/ml/public/application/components/scatterplot_matrix/scatterplot_matrix_vega_lite_spec.test.ts index 89ee0add9966ef..0fbe08dd24af7d 100644 --- a/x-pack/plugins/ml/public/application/components/scatterplot_matrix/scatterplot_matrix_vega_lite_spec.test.ts +++ b/x-pack/plugins/ml/public/application/components/scatterplot_matrix/scatterplot_matrix_vega_lite_spec.test.ts @@ -73,7 +73,8 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => { it('should return the default spec for non-outliers without a legend', () => { const data = [{ x: 1, y: 1 }]; - const vegaLiteSpec = getScatterplotMatrixVegaLiteSpec(data, ['x', 'y'], euiThemeLight); + const vegaLiteSpec = getScatterplotMatrixVegaLiteSpec(data, [], ['x', 'y'], euiThemeLight); + const specForegroundLayer = vegaLiteSpec.spec.layer[0]; // A valid Vega Lite spec shouldn't throw an error when compiled. expect(() => compile(vegaLiteSpec)).not.toThrow(); @@ -82,17 +83,17 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => { column: ['x', 'y'], row: ['y', 'x'], }); - expect(vegaLiteSpec.spec.data.values).toEqual(data); - expect(vegaLiteSpec.spec.mark).toEqual({ + expect(specForegroundLayer.data.values).toEqual(data); + expect(specForegroundLayer.mark).toEqual({ opacity: 0.75, size: 8, type: 'circle', }); - expect(vegaLiteSpec.spec.encoding.color).toEqual({ + expect(specForegroundLayer.encoding.color).toEqual({ condition: [{ selection: USER_SELECTION }, { selection: SINGLE_POINT_CLICK }], value: COLOR_BLUR, }); - expect(vegaLiteSpec.spec.encoding.tooltip).toEqual([ + expect(specForegroundLayer.encoding.tooltip).toEqual([ { field: 'x', type: 'quantitative' }, { field: 'y', type: 'quantitative' }, ]); @@ -101,7 +102,14 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => { it('should return the spec for outliers', () => { const data = [{ x: 1, y: 1 }]; - const vegaLiteSpec = getScatterplotMatrixVegaLiteSpec(data, ['x', 'y'], euiThemeLight, 'ml'); + const vegaLiteSpec = getScatterplotMatrixVegaLiteSpec( + data, + [], + ['x', 'y'], + euiThemeLight, + 'ml' + ); + const specForegroundLayer = vegaLiteSpec.spec.layer[0]; // A valid Vega Lite spec shouldn't throw an error when compiled. expect(() => compile(vegaLiteSpec)).not.toThrow(); @@ -110,13 +118,13 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => { column: ['x', 'y'], row: ['y', 'x'], }); - expect(vegaLiteSpec.spec.data.values).toEqual(data); - expect(vegaLiteSpec.spec.mark).toEqual({ + expect(specForegroundLayer.data.values).toEqual(data); + expect(specForegroundLayer.mark).toEqual({ opacity: 0.75, size: 8, type: 'circle', }); - expect(vegaLiteSpec.spec.encoding.color).toEqual({ + expect(specForegroundLayer.encoding.color).toEqual({ condition: { selection: USER_SELECTION, field: 'is_outlier', @@ -127,7 +135,7 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => { }, value: COLOR_BLUR, }); - expect(vegaLiteSpec.spec.encoding.tooltip).toEqual([ + expect(specForegroundLayer.encoding.tooltip).toEqual([ { field: 'x', type: 'quantitative' }, { field: 'y', type: 'quantitative' }, { @@ -144,12 +152,14 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => { const vegaLiteSpec = getScatterplotMatrixVegaLiteSpec( data, + [], ['x', 'y'], euiThemeLight, undefined, 'the-color-field', LEGEND_TYPES.NOMINAL ); + const specForegroundLayer = vegaLiteSpec.spec.layer[0]; // A valid Vega Lite spec shouldn't throw an error when compiled. expect(() => compile(vegaLiteSpec)).not.toThrow(); @@ -158,13 +168,13 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => { column: ['x', 'y'], row: ['y', 'x'], }); - expect(vegaLiteSpec.spec.data.values).toEqual(data); - expect(vegaLiteSpec.spec.mark).toEqual({ + expect(specForegroundLayer.data.values).toEqual(data); + expect(specForegroundLayer.mark).toEqual({ opacity: 0.75, size: 8, type: 'circle', }); - expect(vegaLiteSpec.spec.encoding.color).toEqual({ + expect(specForegroundLayer.encoding.color).toEqual({ condition: { selection: USER_SELECTION, field: 'the-color-field', @@ -175,7 +185,7 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => { }, value: COLOR_BLUR, }); - expect(vegaLiteSpec.spec.encoding.tooltip).toEqual([ + expect(specForegroundLayer.encoding.tooltip).toEqual([ { field: 'the-color-field', type: 'nominal' }, { field: 'x', type: 'quantitative' }, { field: 'y', type: 'quantitative' }, @@ -187,12 +197,14 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => { const vegaLiteSpec = getScatterplotMatrixVegaLiteSpec( data, + [], ['x.a', 'y[a]'], euiThemeLight, undefined, 'the-color-field', LEGEND_TYPES.NOMINAL ); + const specForegroundLayer = vegaLiteSpec.spec.layer[0]; // column values should be escaped expect(vegaLiteSpec.repeat).toEqual({ @@ -200,6 +212,6 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => { row: ['y\\[a\\]', 'x\\.a'], }); // raw data should not be escaped - expect(vegaLiteSpec.spec.data.values).toEqual(data); + expect(specForegroundLayer.data.values).toEqual(data); }); }); diff --git a/x-pack/plugins/ml/public/application/components/scatterplot_matrix/scatterplot_matrix_vega_lite_spec.ts b/x-pack/plugins/ml/public/application/components/scatterplot_matrix/scatterplot_matrix_vega_lite_spec.ts index 50b9fbf76daf29..de29332f57ef68 100644 --- a/x-pack/plugins/ml/public/application/components/scatterplot_matrix/scatterplot_matrix_vega_lite_spec.ts +++ b/x-pack/plugins/ml/public/application/components/scatterplot_matrix/scatterplot_matrix_vega_lite_spec.ts @@ -75,6 +75,151 @@ export const getColorSpec = ( }; }; +const getVegaSpecLayer = ( + isBackground: boolean, + values: VegaValue[], + colorSpec: any, + escapedOutlierScoreField: string, + outliers: boolean, + dynamicSize: boolean, + vegaColumns: string[], + color?: string +) => { + const selection = outliers + ? { + selection: { + [USER_SELECTION]: { type: 'interval' }, + [SINGLE_POINT_CLICK]: { type: 'single' }, + mlOutlierScoreThreshold: { + type: 'single', + fields: ['cutoff'], + bind: { + input: 'range', + max: 1, + min: 0, + name: i18n.translate('xpack.ml.splomSpec.outlierScoreThresholdName', { + defaultMessage: 'Outlier score threshold: ', + }), + step: 0.01, + }, + init: { cutoff: 0.99 }, + }, + }, + } + : { + selection: { + // Always allow user selection + [USER_SELECTION]: { + type: 'interval', + }, + [SINGLE_POINT_CLICK]: { type: 'single', empty: 'none' }, + }, + }; + + return { + data: { values: [...values] }, + mark: { + ...(outliers && dynamicSize + ? { + type: 'circle', + strokeWidth: 1.2, + strokeOpacity: 0.75, + fillOpacity: 0.1, + } + : { type: 'circle', opacity: 0.75, size: 8 }), + }, + // transformation to apply outlier threshold as category + ...(outliers + ? { + transform: [ + { + calculate: `datum['${escapedOutlierScoreField}'] >= mlOutlierScoreThreshold.cutoff`, + as: 'is_outlier', + }, + ], + } + : {}), + encoding: { + color: isBackground ? { value: COLOR_BLUR } : colorSpec, + opacity: { + condition: { + selection: USER_SELECTION, + value: 0.8, + }, + value: 0.5, + }, + ...(dynamicSize + ? { + stroke: colorSpec, + opacity: { + condition: { + value: 1, + test: `(datum['${escapedOutlierScoreField}'] >= mlOutlierScoreThreshold.cutoff)`, + }, + value: 0.5, + }, + } + : {}), + ...(outliers + ? { + order: { field: escapedOutlierScoreField }, + size: { + ...(!dynamicSize + ? { + condition: { + value: 40, + test: `(datum['${escapedOutlierScoreField}'] >= mlOutlierScoreThreshold.cutoff)`, + }, + value: 8, + } + : { + type: LEGEND_TYPES.QUANTITATIVE, + field: escapedOutlierScoreField, + scale: { + type: 'linear', + range: [8, 200], + domain: [0, 1], + }, + }), + }, + } + : {}), + x: { + type: LEGEND_TYPES.QUANTITATIVE, + field: { repeat: 'column' }, + scale: { zero: false }, + }, + y: { + type: LEGEND_TYPES.QUANTITATIVE, + field: { repeat: 'row' }, + scale: { zero: false }, + }, + tooltip: [ + ...(color !== undefined + ? // @ts-ignore + [{ type: colorSpec.condition.type, field: getEscapedVegaFieldName(color) }] + : []), + ...vegaColumns.map((d) => ({ + type: LEGEND_TYPES.QUANTITATIVE, + field: d, + })), + ...(outliers + ? [ + { + type: LEGEND_TYPES.QUANTITATIVE, + field: escapedOutlierScoreField, + format: '.3f', + }, + ] + : []), + ], + }, + ...(isBackground ? {} : selection), + width: SCATTERPLOT_SIZE, + height: SCATTERPLOT_SIZE, + }; +}; + // Escapes the characters .[] in field names with double backslashes // since VEGA treats dots/brackets in field names as nested values. // See https://vega.github.io/vega-lite/docs/field.html for details. @@ -86,6 +231,7 @@ type VegaValue = Record; export const getScatterplotMatrixVegaLiteSpec = ( values: VegaValue[], + backgroundValues: VegaValue[], columns: string[], euiTheme: typeof euiThemeLight, resultsField?: string, @@ -106,7 +252,7 @@ export const getScatterplotMatrixVegaLiteSpec = ( legendType ); - return { + const schema: TopLevelSpec = { $schema: 'https://vega.github.io/schema/vega-lite/v4.17.0.json', background: 'transparent', // There seems to be a bug in Vega which doesn't propagate these settings @@ -134,129 +280,35 @@ export const getScatterplotMatrixVegaLiteSpec = ( row: vegaColumns.slice().reverse(), }, spec: { - data: { values: [...vegaValues] }, - mark: { - ...(outliers && dynamicSize - ? { - type: 'circle', - strokeWidth: 1.2, - strokeOpacity: 0.75, - fillOpacity: 0.1, - } - : { type: 'circle', opacity: 0.75, size: 8 }), - }, - // transformation to apply outlier threshold as category - ...(outliers - ? { - transform: [ - { - calculate: `datum['${escapedOutlierScoreField}'] >= mlOutlierScoreThreshold.cutoff`, - as: 'is_outlier', - }, - ], - } - : {}), - encoding: { - color: colorSpec, - opacity: { - condition: { - selection: USER_SELECTION, - value: 0.8, - }, - value: 0.5, - }, - ...(dynamicSize - ? { - stroke: colorSpec, - opacity: { - condition: { - value: 1, - test: `(datum['${escapedOutlierScoreField}'] >= mlOutlierScoreThreshold.cutoff)`, - }, - value: 0.5, - }, - } - : {}), - ...(outliers - ? { - order: { field: escapedOutlierScoreField }, - size: { - ...(!dynamicSize - ? { - condition: { - value: 40, - test: `(datum['${escapedOutlierScoreField}'] >= mlOutlierScoreThreshold.cutoff)`, - }, - value: 8, - } - : { - type: LEGEND_TYPES.QUANTITATIVE, - field: escapedOutlierScoreField, - scale: { - type: 'linear', - range: [8, 200], - domain: [0, 1], - }, - }), - }, - } - : {}), - x: { - type: LEGEND_TYPES.QUANTITATIVE, - field: { repeat: 'column' }, - scale: { zero: false }, - }, - y: { - type: LEGEND_TYPES.QUANTITATIVE, - field: { repeat: 'row' }, - scale: { zero: false }, - }, - tooltip: [ - ...(color !== undefined - ? // @ts-ignore - [{ type: colorSpec.condition.type, field: getEscapedVegaFieldName(color) }] - : []), - ...vegaColumns.map((d) => ({ - type: LEGEND_TYPES.QUANTITATIVE, - field: d, - })), - ...(outliers - ? [{ type: LEGEND_TYPES.QUANTITATIVE, field: escapedOutlierScoreField, format: '.3f' }] - : []), - ], - }, - ...(outliers - ? { - selection: { - [USER_SELECTION]: { type: 'interval' }, - [SINGLE_POINT_CLICK]: { type: 'single' }, - mlOutlierScoreThreshold: { - type: 'single', - fields: ['cutoff'], - bind: { - input: 'range', - max: 1, - min: 0, - name: i18n.translate('xpack.ml.splomSpec.outlierScoreThresholdName', { - defaultMessage: 'Outlier score threshold: ', - }), - step: 0.01, - }, - init: { cutoff: 0.99 }, - }, - }, - } - : { - selection: { - // Always allow user selection - [USER_SELECTION]: { - type: 'interval', - }, - [SINGLE_POINT_CLICK]: { type: 'single', empty: 'none' }, - }, - }), - width: SCATTERPLOT_SIZE, - height: SCATTERPLOT_SIZE, + layer: [ + getVegaSpecLayer( + false, + vegaValues, + colorSpec, + escapedOutlierScoreField, + outliers, + !!dynamicSize, + vegaColumns, + color + ), + ], }, }; + + if (backgroundValues.length) { + schema.spec.layer.unshift( + getVegaSpecLayer( + true, + backgroundValues, + colorSpec, + escapedOutlierScoreField, + outliers, + !!dynamicSize, + vegaColumns, + color + ) + ); + } + + return schema; };