Skip to content

Commit

Permalink
[lens] Add significant terms support as ranking function in Top Values (
Browse files Browse the repository at this point in the history
elastic#158962)

Part of elastic#154307
Closes elastic#116421

PR adds `Significance` selection to `Rank by` select. When selected, top
values are fetched with [significant_terms
aggregation](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-significantterms-aggregation.html)

<img width="600" alt="Screen Shot 2023-06-02 at 12 56 29 PM"
src="https://github.com/elastic/kibana/assets/373691/b6a4f70a-4121-4c38-b6d5-7d46dc48ab1a">

---------

Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com>
  • Loading branch information
nreese and kibanamachine authored Jun 8, 2023
1 parent 737cf26 commit 59eb4ea
Show file tree
Hide file tree
Showing 7 changed files with 157 additions and 16 deletions.
11 changes: 9 additions & 2 deletions src/plugins/data/common/search/aggs/buckets/significant_terms.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@ const significantTermsTitle = i18n.translate('data.search.aggs.buckets.significa
export interface AggParamsSignificantTerms extends BaseAggParams {
field: string;
size?: number;
exclude?: string;
include?: string;
shardSize?: number;
exclude?: string | string[];
include?: string | string[];
}

export const getSignificantTermsBucketAgg = () =>
Expand Down Expand Up @@ -52,6 +53,12 @@ export const getSignificantTermsBucketAgg = () =>
name: 'size',
default: '',
},
{
name: 'shardSize',
write: (aggConfig, output) => {
output.params.shard_size = aggConfig.params.shardSize;
},
},
{
name: 'exclude',
displayName: i18n.translate('data.search.aggs.buckets.significantTerms.excludeLabel', {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ describe('agg_expression_functions', () => {
"field": "machine.os.keyword",
"include": undefined,
"json": undefined,
"shardSize": undefined,
"size": undefined,
},
"schema": undefined,
Expand Down Expand Up @@ -59,6 +60,7 @@ describe('agg_expression_functions', () => {
"field": "machine.os.keyword",
"include": "win",
"json": undefined,
"shardSize": undefined,
"size": 6,
},
"schema": "whatever",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,17 +64,25 @@ export const aggSignificantTerms = (): FunctionDefinition => ({
defaultMessage: 'Max number of buckets to retrieve',
}),
},
shardSize: {
types: ['number'],
help: i18n.translate('data.search.aggs.buckets.significantTerms.shardSize.help', {
defaultMessage: 'Number of terms provided by each shard and returned to coordinating node',
}),
},
exclude: {
types: ['string'],
help: i18n.translate('data.search.aggs.buckets.significantTerms.exclude.help', {
defaultMessage: 'Specific bucket values to exclude from results',
}),
multi: true,
},
include: {
types: ['string'],
help: i18n.translate('data.search.aggs.buckets.significantTerms.include.help', {
defaultMessage: 'Specific bucket values to include in results',
}),
multi: true,
},
json: {
types: ['string'],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,9 @@ export function FieldInputs({
defaultMessage: 'Add field',
})}
isDisabled={
column.params.orderBy.type === 'rare' || localValues.length > MAX_MULTI_FIELDS_SIZE
column.params.orderBy.type === 'rare' ||
column.params.orderBy.type === 'significant' ||
localValues.length > MAX_MULTI_FIELDS_SIZE
}
/>
</>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ export function supportsRarityRanking(field?: IndexPatternField) {
['double', 'float', 'half_float', 'scaled_float'].includes(esType)
);
}
export function supportsSignificantRanking(field?: IndexPatternField) {
return field?.esTypes?.some((esType) => esType === 'keyword');
}
function isRareOrSignificant(orderBy: TermsIndexPatternColumn['params']['orderBy']) {
return orderBy.type === 'rare' || orderBy.type === 'significant';
}
export type { TermsIndexPatternColumn } from './types';

const missingFieldLabel = i18n.translate('xpack.lens.indexPattern.missingFieldLabel', {
Expand All @@ -70,6 +76,7 @@ function ofName(
name?: string,
secondaryFieldsCount: number = 0,
rare: boolean = false,
significant: boolean = false,
termsSize: number = 0
) {
if (rare) {
Expand All @@ -80,6 +87,14 @@ function ofName(
},
});
}
if (significant) {
return i18n.translate('xpack.lens.indexPattern.significantTermsOf', {
defaultMessage: 'Significant values of {name}',
values: {
name: name ?? missingFieldLabel,
},
});
}
if (secondaryFieldsCount) {
return i18n.translate('xpack.lens.indexPattern.multipleTermsOf', {
defaultMessage: 'Top values of {name} + {count} {count, plural, one {other} other {others}}',
Expand Down Expand Up @@ -149,7 +164,7 @@ export const termsOperation: OperationDefinition<
return ret;
},
canAddNewField: ({ targetColumn, sourceColumn, field, indexPattern }) => {
if (targetColumn.params.orderBy.type === 'rare') {
if (isRareOrSignificant(targetColumn.params.orderBy)) {
return false;
}
// collect the fields from the targetColumn
Expand Down Expand Up @@ -258,6 +273,30 @@ export const termsOperation: OperationDefinition<
max_doc_count: column.params.orderBy.maxDocCount,
}).toAst();
}

// To get more accurate results, we set shard_size to a minimum of 1000
// The other calculation matches the current Elasticsearch shard_size default,
// but they may diverge in the future
const shardSize = column.params.accuracyMode
? Math.max(1000, column.params.size * 1.5 + 10)
: undefined;

if (column.params?.orderBy.type === 'significant') {
return buildExpressionFunction<AggFunctionsMapping['aggSignificantTerms']>(
'aggSignificantTerms',
{
id: columnId,
enabled: true,
schema: 'segment',
field: column.sourceField,
size: column.params.size,
shardSize,
...(column.params.include?.length && { include: column.params.include as string[] }),
...(column.params.exclude?.length && { exclude: column.params.exclude as string[] }),
}
).toAst();
}

let orderBy = '_key';

if (column.params?.orderBy.type === 'column') {
Expand All @@ -269,13 +308,6 @@ export const termsOperation: OperationDefinition<
}
}

// To get more accurate results, we set shard_size to a minimum of 1000
// The other calculation matches the current Elasticsearch shard_size default,
// but they may diverge in the future
const shardSize = column.params.accuracyMode
? Math.max(1000, column.params.size * 1.5 + 10)
: undefined;

const orderAggColumn = column.params.orderAgg;
let orderAgg;
if (orderAggColumn) {
Expand Down Expand Up @@ -346,6 +378,7 @@ export const termsOperation: OperationDefinition<
indexPattern.getFieldByName(column.sourceField)?.displayName,
column.params.secondaryFields?.length,
column.params.orderBy.type === 'rare',
column.params.orderBy.type === 'significant',
column.params.size
),
onFieldChange: (oldColumn, field, params) => {
Expand All @@ -358,7 +391,10 @@ export const termsOperation: OperationDefinition<
delete newParams.format;
}
newParams.parentFormat = getParentFormatter(newParams);
if (!supportsRarityRanking(field) && newParams.orderBy.type === 'rare') {
if (
(!supportsRarityRanking(field) && newParams.orderBy.type === 'rare') ||
(!supportsSignificantRanking(field) && newParams.orderBy.type === 'significant')
) {
newParams.orderBy = { type: 'alphabetical' };
}

Expand All @@ -371,6 +407,7 @@ export const termsOperation: OperationDefinition<
field.displayName,
newParams.secondaryFields?.length,
newParams.orderBy.type === 'rare',
newParams.orderBy.type === 'significant',
newParams.size
),
sourceField: field.name,
Expand Down Expand Up @@ -438,7 +475,10 @@ export const termsOperation: OperationDefinition<
delete newParams.format;
}
const mainField = indexPattern.getFieldByName(sourcefield);
if (!supportsRarityRanking(mainField) && newParams.orderBy.type === 'rare') {
if (
(!supportsRarityRanking(mainField) && newParams.orderBy.type === 'rare') ||
(!supportsSignificantRanking(mainField) && newParams.orderBy.type === 'significant')
) {
newParams.orderBy = { type: 'alphabetical' };
}
// in single field mode, allow the automatic switch of the function to
Expand Down Expand Up @@ -476,6 +516,7 @@ export const termsOperation: OperationDefinition<
mainField?.displayName,
fields.length - 1,
newParams.orderBy.type === 'rare',
newParams.orderBy.type === 'significant',
newParams.size
),
params: {
Expand Down Expand Up @@ -588,6 +629,9 @@ The top values of a specified field ranked by the chosen metric.
if (value === 'rare') {
return { type: 'rare', maxDocCount: DEFAULT_MAX_DOC_COUNT };
}
if (value === 'significant') {
return { type: 'significant' };
}
if (value === 'custom') {
return { type: 'custom' };
}
Expand Down Expand Up @@ -623,6 +667,17 @@ The top values of a specified field ranked by the chosen metric.
}),
});
}
if (
!currentColumn.params.secondaryFields?.length &&
supportsSignificantRanking(indexPattern.getFieldByName(currentColumn.sourceField))
) {
orderOptions.push({
value: toValue({ type: 'significant' }),
text: i18n.translate('xpack.lens.indexPattern.terms.orderSignificant', {
defaultMessage: 'Significance',
}),
});
}
orderOptions.push({
value: toValue({ type: 'custom' }),
text: i18n.translate('xpack.lens.indexPattern.terms.orderCustomMetric', {
Expand Down Expand Up @@ -654,6 +709,7 @@ The top values of a specified field ranked by the chosen metric.
indexPattern.getFieldByName(currentColumn.sourceField)?.displayName,
secondaryFieldsCount,
currentColumn.params.orderBy.type === 'rare',
currentColumn.params.orderBy.type === 'significant',
value
),
params: {
Expand Down Expand Up @@ -889,7 +945,7 @@ The top values of a specified field ranked by the chosen metric.
aria-label={i18n.translate('xpack.lens.indexPattern.terms.orderDirection', {
defaultMessage: 'Rank direction',
})}
isDisabled={currentColumn.params.orderBy.type === 'rare'}
isDisabled={isRareOrSignificant(currentColumn.params.orderBy)}
options={[
{
id: `${idPrefix}asc`,
Expand Down Expand Up @@ -964,7 +1020,7 @@ The top values of a specified field ranked by the chosen metric.
disabled={
!currentColumn.params.otherBucket ||
indexPattern.getFieldByName(currentColumn.sourceField)?.type !== 'string' ||
currentColumn.params.orderBy.type === 'rare'
isRareOrSignificant(currentColumn.params.orderBy)
}
data-test-subj="indexPattern-terms-missing-bucket"
checked={Boolean(currentColumn.params.missingBucket)}
Expand All @@ -991,7 +1047,7 @@ The top values of a specified field ranked by the chosen metric.
compressed
data-test-subj="indexPattern-terms-other-bucket"
checked={Boolean(currentColumn.params.otherBucket)}
disabled={currentColumn.params.orderBy.type === 'rare'}
disabled={isRareOrSignificant(currentColumn.params.orderBy)}
onChange={(e: EuiSwitchEvent) =>
paramEditorUpdater(
updateColumnParam({
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,39 @@ describe('terms', () => {
);
});

it('should return significant terms expression when ordered by significance', () => {
const termsColumn = layer.columns.col1 as TermsIndexPatternColumn;
const esAggsFn = termsOperation.toEsAggsFn(
{
...termsColumn,
params: {
...termsColumn.params,
accuracyMode: true,
include: ['C.'],
exclude: ['U.'],
orderBy: { type: 'significant' },
},
},
'col1',
{} as IndexPattern,
layer,
uiSettingsMock,
[]
);
expect(esAggsFn).toEqual(
expect.objectContaining({
function: 'aggSignificantTerms',
arguments: expect.objectContaining({
field: ['source'],
size: [3],
shardSize: [1000],
include: ['C.'],
exclude: ['U.'],
}),
})
);
});

it('should pass orderAgg correctly', () => {
const termsColumn = layer.columns.col1 as TermsIndexPatternColumn;
const esAggsFn = termsOperation.toEsAggsFn(
Expand Down Expand Up @@ -1973,6 +2006,37 @@ describe('terms', () => {
expect(select2.prop('disabled')).toEqual(true);
});

it('should disable missing bucket and other bucket setting when ordered by significance', () => {
const updateLayerSpy = jest.fn();
const instance = shallow(
<InlineOptions
{...defaultProps}
layer={layer}
paramEditorUpdater={updateLayerSpy}
columnId="col1"
currentColumn={{
...(layer.columns.col1 as TermsIndexPatternColumn),
params: {
...(layer.columns.col1 as TermsIndexPatternColumn).params,
orderBy: { type: 'significant' },
},
}}
/>
);

const select1 = instance
.find('[data-test-subj="indexPattern-terms-missing-bucket"]')
.find(EuiSwitch);

expect(select1.prop('disabled')).toEqual(true);

const select2 = instance
.find('[data-test-subj="indexPattern-terms-other-bucket"]')
.find(EuiSwitch);

expect(select2.prop('disabled')).toEqual(true);
});

describe('accuracy mode', () => {
const renderWithAccuracy = (accuracy: boolean, rareTerms: boolean) =>
shallow(
Expand Down Expand Up @@ -2180,6 +2244,7 @@ describe('terms', () => {
'column$$$col2',
'alphabetical',
'rare',
'significant',
'custom',
]);
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ export interface TermsIndexPatternColumn extends FieldBasedIndexPatternColumn {
orderBy:
| { type: 'alphabetical'; fallback?: boolean }
| { type: 'rare'; maxDocCount: number }
| { type: 'significant' }
| { type: 'column'; columnId: string }
| { type: 'custom' };
orderAgg?: FieldBasedIndexPatternColumn;
Expand Down

0 comments on commit 59eb4ea

Please sign in to comment.