Skip to content

Commit

Permalink
[ML] Data Frame Analytics: Highlight filtered data in scatterplot cha…
Browse files Browse the repository at this point in the history
…rts (#144871)

## Summary

Related meta issue: #131551

This PR adds functionality to the scatterplot charts to show the full
data sample and, when the user has added a filter/query in the query
bar, the portion of the data reflecting the filter is highlighted so it
can be differentiated from the background data.

Classification results view with query for `AvgTicketPrice > 400`

<img width="1032" alt="image"
src="https://user-images.githubusercontent.com/6446462/200716771-b2012e9b-c620-46a8-9dc3-92df23ef4476.png">

Outlier detection results view with same filter

<img width="1026" alt="image"
src="https://user-images.githubusercontent.com/6446462/200716858-01407906-34de-43d6-892b-7bbfede05eac.png">

Regression results view with same filter

<img width="1007" alt="image"
src="https://user-images.githubusercontent.com/6446462/200716910-41165b81-a300-420c-8976-47a0ea9612bf.png">

Help text:

<img width="1005" alt="image"
src="https://user-images.githubusercontent.com/6446462/201484563-9f4ca87b-3025-485f-ac0e-4a30deee847f.png">




### Checklist

Delete any items that are not applicable to this PR.

- [ ] Any text added follows [EUI's writing
guidelines](https://elastic.github.io/eui/#/guidelines/writing), uses
sentence case text and includes [i18n
support](https://github.com/elastic/kibana/blob/main/packages/kbn-i18n/README.md)
- [ ]
[Documentation](https://www.elastic.co/guide/en/kibana/master/development-documentation.html)
was added for features that require explanation or tutorials
- [ ] [Unit or functional
tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)
were updated or added to match the most common scenarios
- [ ] Any UI touched in this PR is usable by keyboard only (learn more
about [keyboard accessibility](https://webaim.org/techniques/keyboard/))
- [ ] Any UI touched in this PR does not create any new axe failures
(run axe in browser:
[FF](https://addons.mozilla.org/en-US/firefox/addon/axe-devtools/),
[Chrome](https://chrome.google.com/webstore/detail/axe-web-accessibility-tes/lhdoppojpmngadmnindnejefpokejbdd?hl=en-US))
- [ ] If a plugin configuration key changed, check if it needs to be
allowlisted in the cloud and added to the [docker
list](https://github.com/elastic/kibana/blob/main/src/dev/build/tasks/os_packages/docker_generator/resources/base/bin/kibana-docker)
- [ ] This renders correctly on smaller devices using a responsive
layout. (You can test this [in your
browser](https://www.browserstack.com/guide/responsive-testing-on-local-server))
- [ ] This was checked for [cross-browser
compatibility](https://www.elastic.co/support/matrix#matrix_browsers)

Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com>
  • Loading branch information
alvarezmelissa87 and kibanamachine authored Nov 15, 2022
1 parent 75ce1e3 commit 295a267
Show file tree
Hide file tree
Showing 3 changed files with 302 additions and 171 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ import { stringHash } from '@kbn/ml-string-hash';
import { extractErrorMessage } from '../../../../common';
import { isRuntimeMappings } from '../../../../common/util/runtime_field_utils';
import { RuntimeMappings } from '../../../../common/types/fields';
import type { ResultsSearchQuery } from '../../data_frame_analytics/common/analytics';
import { getCombinedRuntimeMappings } from '../data_grid';

import { useMlApiContext } from '../../contexts/kibana';
Expand Down Expand Up @@ -81,13 +80,25 @@ const OptionLabelWithIconTip: FC<OptionLabelWithIconTipProps> = ({ label, toolti
</>
);

function filterChartableItems(items: estypes.SearchHit[], resultsField?: string) {
return (
items
.map((d) =>
getProcessedFields(d.fields ?? {}, (key: string) =>
key.startsWith(`${resultsField}.feature_importance`)
)
)
.filter((d) => !Object.keys(d).some((field) => Array.isArray(d[field]))) ?? []
);
}

export interface ScatterplotMatrixProps {
fields: string[];
index: string;
resultsField?: string;
color?: string;
legendType?: LegendType;
searchQuery?: ResultsSearchQuery;
searchQuery?: estypes.QueryDslQueryContainer;
runtimeMappings?: RuntimeMappings;
indexPattern?: DataView;
}
Expand Down Expand Up @@ -128,7 +139,7 @@ export const ScatterplotMatrix: FC<ScatterplotMatrixProps> = ({

// contains the fetched documents and columns to be passed on to the Vega spec.
const [splom, setSplom] = useState<
{ items: any[]; columns: string[]; messages: string[] } | undefined
{ items: any[]; backgroundItems: any[]; columns: string[]; messages: string[] } | undefined
>();

// formats the array of field names for EuiComboBox
Expand Down Expand Up @@ -165,7 +176,7 @@ export const ScatterplotMatrix: FC<ScatterplotMatrixProps> = ({

useEffect(() => {
if (fields.length === 0) {
setSplom({ columns: [], items: [], messages: [] });
setSplom({ columns: [], items: [], backgroundItems: [], messages: [] });
setIsLoading(false);
return;
}
Expand All @@ -184,7 +195,7 @@ export const ScatterplotMatrix: FC<ScatterplotMatrixProps> = ({
...(includeOutlierScoreField ? [outlierScoreField] : []),
];

const query = randomizeQuery
const foregroundQuery = randomizeQuery
? {
function_score: {
query: searchQuery,
Expand All @@ -193,33 +204,65 @@ export const ScatterplotMatrix: FC<ScatterplotMatrixProps> = ({
}
: searchQuery;

let backgroundQuery;
// If it's not the default query then we do a background search excluding the current query
if (
searchQuery &&
((searchQuery.match_all && Object.keys(searchQuery.match_all).length > 0) ||
(searchQuery.bool && Object.keys(searchQuery.bool).length > 0))
) {
backgroundQuery = randomizeQuery
? {
function_score: {
query: { bool: { must_not: [searchQuery] } },
random_score: { seed: 10, field: '_seq_no' },
},
}
: { bool: { must_not: [searchQuery] } };
}

const combinedRuntimeMappings =
indexPattern && getCombinedRuntimeMappings(indexPattern, runtimeMappings);

const resp: estypes.SearchResponse = await esSearch({
index,
body: {
fields: queryFields,
_source: false,
query,
from: 0,
size: fetchSize,
...(isRuntimeMappings(combinedRuntimeMappings)
? { runtime_mappings: combinedRuntimeMappings }
: {}),
},
});
const body = {
fields: queryFields,
_source: false,
query: foregroundQuery,
from: 0,
size: fetchSize,
...(isRuntimeMappings(combinedRuntimeMappings)
? { runtime_mappings: combinedRuntimeMappings }
: {}),
};

const promises = [
esSearch({
index,
body,
}),
];

if (backgroundQuery) {
promises.push(
esSearch({
index,
body: { ...body, query: backgroundQuery },
})
);
}

const [foregroundResp, backgroundResp] = await Promise.all<estypes.SearchResponse>(
promises
);

if (!options.didCancel) {
const items = resp.hits.hits
.map((d) =>
getProcessedFields(d.fields ?? {}, (key: string) =>
key.startsWith(`${resultsField}.feature_importance`)
)
)
.filter((d) => !Object.keys(d).some((field) => Array.isArray(d[field])));

const originalDocsCount = resp.hits.hits.length;
const items = filterChartableItems(foregroundResp.hits.hits, resultsField);
const backgroundItems = filterChartableItems(
backgroundResp?.hits.hits ?? [],
resultsField
);

const originalDocsCount = foregroundResp.hits.hits.length;
const filteredDocsCount = originalDocsCount - items.length;

if (originalDocsCount === filteredDocsCount) {
Expand All @@ -229,7 +272,7 @@ export const ScatterplotMatrix: FC<ScatterplotMatrixProps> = ({
'All fetched documents included fields with arrays of values and cannot be visualized.',
})
);
} else if (resp.hits.hits.length !== items.length) {
} else if (foregroundResp.hits.hits.length !== items.length) {
messages.push(
i18n.translate('xpack.ml.splom.arrayFieldsWarningMessage', {
defaultMessage:
Expand All @@ -242,12 +285,17 @@ export const ScatterplotMatrix: FC<ScatterplotMatrixProps> = ({
);
}

setSplom({ columns: fields, items, messages });
setSplom({ columns: fields, items, backgroundItems, messages });
setIsLoading(false);
}
} catch (e) {
setIsLoading(false);
setSplom({ columns: [], items: [], messages: [extractErrorMessage(e)] });
setSplom({
columns: [],
items: [],
backgroundItems: [],
messages: [extractErrorMessage(e)],
});
}
}

Expand All @@ -265,10 +313,11 @@ export const ScatterplotMatrix: FC<ScatterplotMatrixProps> = ({
return;
}

const { items, columns } = splom;
const { items, backgroundItems, columns } = splom;

return getScatterplotMatrixVegaLiteSpec(
items,
backgroundItems,
columns,
euiTheme,
resultsField,
Expand Down Expand Up @@ -409,7 +458,25 @@ export const ScatterplotMatrix: FC<ScatterplotMatrixProps> = ({
</>
)}

{splom.items.length > 0 && <VegaChart vegaSpec={vegaSpec} />}
{splom.items.length > 0 && (
<>
<VegaChart vegaSpec={vegaSpec} />
{splom.backgroundItems.length ? (
<>
<EuiSpacer size="s" />
<EuiFormRow
fullWidth
helpText={i18n.translate('xpack.ml.splom.backgroundLayerHelpText', {
defaultMessage:
"If the data points match your filter, they're shown in color; otherwise, they're blurred gray.",
})}
>
<></>
</EuiFormRow>
</>
) : null}
</>
)}
</div>
)}
</>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => {
it('should return the default spec for non-outliers without a legend', () => {
const data = [{ x: 1, y: 1 }];

const vegaLiteSpec = getScatterplotMatrixVegaLiteSpec(data, ['x', 'y'], euiThemeLight);
const vegaLiteSpec = getScatterplotMatrixVegaLiteSpec(data, [], ['x', 'y'], euiThemeLight);
const specForegroundLayer = vegaLiteSpec.spec.layer[0];

// A valid Vega Lite spec shouldn't throw an error when compiled.
expect(() => compile(vegaLiteSpec)).not.toThrow();
Expand All @@ -82,17 +83,17 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => {
column: ['x', 'y'],
row: ['y', 'x'],
});
expect(vegaLiteSpec.spec.data.values).toEqual(data);
expect(vegaLiteSpec.spec.mark).toEqual({
expect(specForegroundLayer.data.values).toEqual(data);
expect(specForegroundLayer.mark).toEqual({
opacity: 0.75,
size: 8,
type: 'circle',
});
expect(vegaLiteSpec.spec.encoding.color).toEqual({
expect(specForegroundLayer.encoding.color).toEqual({
condition: [{ selection: USER_SELECTION }, { selection: SINGLE_POINT_CLICK }],
value: COLOR_BLUR,
});
expect(vegaLiteSpec.spec.encoding.tooltip).toEqual([
expect(specForegroundLayer.encoding.tooltip).toEqual([
{ field: 'x', type: 'quantitative' },
{ field: 'y', type: 'quantitative' },
]);
Expand All @@ -101,7 +102,14 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => {
it('should return the spec for outliers', () => {
const data = [{ x: 1, y: 1 }];

const vegaLiteSpec = getScatterplotMatrixVegaLiteSpec(data, ['x', 'y'], euiThemeLight, 'ml');
const vegaLiteSpec = getScatterplotMatrixVegaLiteSpec(
data,
[],
['x', 'y'],
euiThemeLight,
'ml'
);
const specForegroundLayer = vegaLiteSpec.spec.layer[0];

// A valid Vega Lite spec shouldn't throw an error when compiled.
expect(() => compile(vegaLiteSpec)).not.toThrow();
Expand All @@ -110,13 +118,13 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => {
column: ['x', 'y'],
row: ['y', 'x'],
});
expect(vegaLiteSpec.spec.data.values).toEqual(data);
expect(vegaLiteSpec.spec.mark).toEqual({
expect(specForegroundLayer.data.values).toEqual(data);
expect(specForegroundLayer.mark).toEqual({
opacity: 0.75,
size: 8,
type: 'circle',
});
expect(vegaLiteSpec.spec.encoding.color).toEqual({
expect(specForegroundLayer.encoding.color).toEqual({
condition: {
selection: USER_SELECTION,
field: 'is_outlier',
Expand All @@ -127,7 +135,7 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => {
},
value: COLOR_BLUR,
});
expect(vegaLiteSpec.spec.encoding.tooltip).toEqual([
expect(specForegroundLayer.encoding.tooltip).toEqual([
{ field: 'x', type: 'quantitative' },
{ field: 'y', type: 'quantitative' },
{
Expand All @@ -144,12 +152,14 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => {

const vegaLiteSpec = getScatterplotMatrixVegaLiteSpec(
data,
[],
['x', 'y'],
euiThemeLight,
undefined,
'the-color-field',
LEGEND_TYPES.NOMINAL
);
const specForegroundLayer = vegaLiteSpec.spec.layer[0];

// A valid Vega Lite spec shouldn't throw an error when compiled.
expect(() => compile(vegaLiteSpec)).not.toThrow();
Expand All @@ -158,13 +168,13 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => {
column: ['x', 'y'],
row: ['y', 'x'],
});
expect(vegaLiteSpec.spec.data.values).toEqual(data);
expect(vegaLiteSpec.spec.mark).toEqual({
expect(specForegroundLayer.data.values).toEqual(data);
expect(specForegroundLayer.mark).toEqual({
opacity: 0.75,
size: 8,
type: 'circle',
});
expect(vegaLiteSpec.spec.encoding.color).toEqual({
expect(specForegroundLayer.encoding.color).toEqual({
condition: {
selection: USER_SELECTION,
field: 'the-color-field',
Expand All @@ -175,7 +185,7 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => {
},
value: COLOR_BLUR,
});
expect(vegaLiteSpec.spec.encoding.tooltip).toEqual([
expect(specForegroundLayer.encoding.tooltip).toEqual([
{ field: 'the-color-field', type: 'nominal' },
{ field: 'x', type: 'quantitative' },
{ field: 'y', type: 'quantitative' },
Expand All @@ -187,19 +197,21 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => {

const vegaLiteSpec = getScatterplotMatrixVegaLiteSpec(
data,
[],
['x.a', 'y[a]'],
euiThemeLight,
undefined,
'the-color-field',
LEGEND_TYPES.NOMINAL
);
const specForegroundLayer = vegaLiteSpec.spec.layer[0];

// column values should be escaped
expect(vegaLiteSpec.repeat).toEqual({
column: ['x\\.a', 'y\\[a\\]'],
row: ['y\\[a\\]', 'x\\.a'],
});
// raw data should not be escaped
expect(vegaLiteSpec.spec.data.values).toEqual(data);
expect(specForegroundLayer.data.values).toEqual(data);
});
});
Loading

0 comments on commit 295a267

Please sign in to comment.