Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[lens] Add significant terms support as ranking function in Top Values #158962

Merged
merged 15 commits into from
Jun 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@ const significantTermsTitle = i18n.translate('data.search.aggs.buckets.significa
export interface AggParamsSignificantTerms extends BaseAggParams {
field: string;
size?: number;
exclude?: string;
include?: string;
shardSize?: number;
exclude?: string | string[];
include?: string | string[];
}

export const getSignificantTermsBucketAgg = () =>
Expand Down Expand Up @@ -52,6 +53,12 @@ export const getSignificantTermsBucketAgg = () =>
name: 'size',
default: '',
},
{
name: 'shardSize',
write: (aggConfig, output) => {
output.params.shard_size = aggConfig.params.shardSize;
},
},
{
name: 'exclude',
displayName: i18n.translate('data.search.aggs.buckets.significantTerms.excludeLabel', {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ describe('agg_expression_functions', () => {
"field": "machine.os.keyword",
"include": undefined,
"json": undefined,
"shardSize": undefined,
"size": undefined,
},
"schema": undefined,
Expand Down Expand Up @@ -59,6 +60,7 @@ describe('agg_expression_functions', () => {
"field": "machine.os.keyword",
"include": "win",
"json": undefined,
"shardSize": undefined,
"size": 6,
},
"schema": "whatever",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,17 +64,25 @@ export const aggSignificantTerms = (): FunctionDefinition => ({
defaultMessage: 'Max number of buckets to retrieve',
}),
},
shardSize: {
types: ['number'],
help: i18n.translate('data.search.aggs.buckets.significantTerms.shardSize.help', {
defaultMessage: 'Number of terms provided by each shard and returned to coordinating node',
}),
},
exclude: {
types: ['string'],
help: i18n.translate('data.search.aggs.buckets.significantTerms.exclude.help', {
defaultMessage: 'Specific bucket values to exclude from results',
}),
multi: true,
},
include: {
types: ['string'],
help: i18n.translate('data.search.aggs.buckets.significantTerms.include.help', {
defaultMessage: 'Specific bucket values to include in results',
}),
multi: true,
},
json: {
types: ['string'],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,9 @@ export function FieldInputs({
defaultMessage: 'Add field',
})}
isDisabled={
column.params.orderBy.type === 'rare' || localValues.length > MAX_MULTI_FIELDS_SIZE
column.params.orderBy.type === 'rare' ||
column.params.orderBy.type === 'significant' ||
localValues.length > MAX_MULTI_FIELDS_SIZE
}
/>
</>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ export function supportsRarityRanking(field?: IndexPatternField) {
['double', 'float', 'half_float', 'scaled_float'].includes(esType)
);
}
export function supportsSignificantRanking(field?: IndexPatternField) {
return field?.esTypes?.some((esType) => esType === 'keyword');
}
function isRareOrSignificant(orderBy: TermsIndexPatternColumn['params']['orderBy']) {
return orderBy.type === 'rare' || orderBy.type === 'significant';
}
export type { TermsIndexPatternColumn } from './types';

const missingFieldLabel = i18n.translate('xpack.lens.indexPattern.missingFieldLabel', {
Expand All @@ -70,6 +76,7 @@ function ofName(
name?: string,
secondaryFieldsCount: number = 0,
rare: boolean = false,
significant: boolean = false,
termsSize: number = 0
) {
if (rare) {
Expand All @@ -80,6 +87,14 @@ function ofName(
},
});
}
if (significant) {
return i18n.translate('xpack.lens.indexPattern.significantTermsOf', {
defaultMessage: 'Significant values of {name}',
values: {
name: name ?? missingFieldLabel,
},
});
}
if (secondaryFieldsCount) {
return i18n.translate('xpack.lens.indexPattern.multipleTermsOf', {
defaultMessage: 'Top values of {name} + {count} {count, plural, one {other} other {others}}',
Expand Down Expand Up @@ -149,7 +164,7 @@ export const termsOperation: OperationDefinition<
return ret;
},
canAddNewField: ({ targetColumn, sourceColumn, field, indexPattern }) => {
if (targetColumn.params.orderBy.type === 'rare') {
if (isRareOrSignificant(targetColumn.params.orderBy)) {
return false;
}
// collect the fields from the targetColumn
Expand Down Expand Up @@ -258,6 +273,30 @@ export const termsOperation: OperationDefinition<
max_doc_count: column.params.orderBy.maxDocCount,
}).toAst();
}

// To get more accurate results, we set shard_size to a minimum of 1000
// The other calculation matches the current Elasticsearch shard_size default,
// but they may diverge in the future
const shardSize = column.params.accuracyMode
? Math.max(1000, column.params.size * 1.5 + 10)
: undefined;

if (column.params?.orderBy.type === 'significant') {
return buildExpressionFunction<AggFunctionsMapping['aggSignificantTerms']>(
'aggSignificantTerms',
{
id: columnId,
enabled: true,
schema: 'segment',
field: column.sourceField,
size: column.params.size,
shardSize,
...(column.params.include?.length && { include: column.params.include as string[] }),
...(column.params.exclude?.length && { exclude: column.params.exclude as string[] }),
}
).toAst();
}

let orderBy = '_key';

if (column.params?.orderBy.type === 'column') {
Expand All @@ -269,13 +308,6 @@ export const termsOperation: OperationDefinition<
}
}

// To get more accurate results, we set shard_size to a minimum of 1000
// The other calculation matches the current Elasticsearch shard_size default,
// but they may diverge in the future
const shardSize = column.params.accuracyMode
? Math.max(1000, column.params.size * 1.5 + 10)
: undefined;

const orderAggColumn = column.params.orderAgg;
let orderAgg;
if (orderAggColumn) {
Expand Down Expand Up @@ -346,6 +378,7 @@ export const termsOperation: OperationDefinition<
indexPattern.getFieldByName(column.sourceField)?.displayName,
column.params.secondaryFields?.length,
column.params.orderBy.type === 'rare',
column.params.orderBy.type === 'significant',
column.params.size
),
onFieldChange: (oldColumn, field, params) => {
Expand All @@ -358,7 +391,10 @@ export const termsOperation: OperationDefinition<
delete newParams.format;
}
newParams.parentFormat = getParentFormatter(newParams);
if (!supportsRarityRanking(field) && newParams.orderBy.type === 'rare') {
if (
(!supportsRarityRanking(field) && newParams.orderBy.type === 'rare') ||
(!supportsSignificantRanking(field) && newParams.orderBy.type === 'significant')
) {
newParams.orderBy = { type: 'alphabetical' };
}

Expand All @@ -371,6 +407,7 @@ export const termsOperation: OperationDefinition<
field.displayName,
newParams.secondaryFields?.length,
newParams.orderBy.type === 'rare',
newParams.orderBy.type === 'significant',
newParams.size
),
sourceField: field.name,
Expand Down Expand Up @@ -438,7 +475,10 @@ export const termsOperation: OperationDefinition<
delete newParams.format;
}
const mainField = indexPattern.getFieldByName(sourcefield);
if (!supportsRarityRanking(mainField) && newParams.orderBy.type === 'rare') {
if (
(!supportsRarityRanking(mainField) && newParams.orderBy.type === 'rare') ||
(!supportsSignificantRanking(mainField) && newParams.orderBy.type === 'significant')
) {
newParams.orderBy = { type: 'alphabetical' };
}
// in single field mode, allow the automatic switch of the function to
Expand Down Expand Up @@ -476,6 +516,7 @@ export const termsOperation: OperationDefinition<
mainField?.displayName,
fields.length - 1,
newParams.orderBy.type === 'rare',
newParams.orderBy.type === 'significant',
newParams.size
),
params: {
Expand Down Expand Up @@ -588,6 +629,9 @@ The top values of a specified field ranked by the chosen metric.
if (value === 'rare') {
return { type: 'rare', maxDocCount: DEFAULT_MAX_DOC_COUNT };
}
if (value === 'significant') {
return { type: 'significant' };
}
if (value === 'custom') {
return { type: 'custom' };
}
Expand Down Expand Up @@ -623,6 +667,17 @@ The top values of a specified field ranked by the chosen metric.
}),
});
}
if (
!currentColumn.params.secondaryFields?.length &&
supportsSignificantRanking(indexPattern.getFieldByName(currentColumn.sourceField))
) {
orderOptions.push({
value: toValue({ type: 'significant' }),
text: i18n.translate('xpack.lens.indexPattern.terms.orderSignificant', {
defaultMessage: 'Significance',
}),
});
}
orderOptions.push({
value: toValue({ type: 'custom' }),
text: i18n.translate('xpack.lens.indexPattern.terms.orderCustomMetric', {
Expand Down Expand Up @@ -654,6 +709,7 @@ The top values of a specified field ranked by the chosen metric.
indexPattern.getFieldByName(currentColumn.sourceField)?.displayName,
secondaryFieldsCount,
currentColumn.params.orderBy.type === 'rare',
currentColumn.params.orderBy.type === 'significant',
value
),
params: {
Expand Down Expand Up @@ -889,7 +945,7 @@ The top values of a specified field ranked by the chosen metric.
aria-label={i18n.translate('xpack.lens.indexPattern.terms.orderDirection', {
defaultMessage: 'Rank direction',
})}
isDisabled={currentColumn.params.orderBy.type === 'rare'}
isDisabled={isRareOrSignificant(currentColumn.params.orderBy)}
options={[
{
id: `${idPrefix}asc`,
Expand Down Expand Up @@ -964,7 +1020,7 @@ The top values of a specified field ranked by the chosen metric.
disabled={
!currentColumn.params.otherBucket ||
indexPattern.getFieldByName(currentColumn.sourceField)?.type !== 'string' ||
currentColumn.params.orderBy.type === 'rare'
isRareOrSignificant(currentColumn.params.orderBy)
}
data-test-subj="indexPattern-terms-missing-bucket"
checked={Boolean(currentColumn.params.missingBucket)}
Expand All @@ -991,7 +1047,7 @@ The top values of a specified field ranked by the chosen metric.
compressed
data-test-subj="indexPattern-terms-other-bucket"
checked={Boolean(currentColumn.params.otherBucket)}
disabled={currentColumn.params.orderBy.type === 'rare'}
disabled={isRareOrSignificant(currentColumn.params.orderBy)}
onChange={(e: EuiSwitchEvent) =>
paramEditorUpdater(
updateColumnParam({
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,39 @@ describe('terms', () => {
);
});

it('should return significant terms expression when ordered by significance', () => {
const termsColumn = layer.columns.col1 as TermsIndexPatternColumn;
const esAggsFn = termsOperation.toEsAggsFn(
{
...termsColumn,
params: {
...termsColumn.params,
accuracyMode: true,
include: ['C.'],
exclude: ['U.'],
orderBy: { type: 'significant' },
},
},
'col1',
{} as IndexPattern,
layer,
uiSettingsMock,
[]
);
expect(esAggsFn).toEqual(
expect.objectContaining({
function: 'aggSignificantTerms',
arguments: expect.objectContaining({
field: ['source'],
size: [3],
shardSize: [1000],
include: ['C.'],
exclude: ['U.'],
}),
})
);
});

it('should pass orderAgg correctly', () => {
const termsColumn = layer.columns.col1 as TermsIndexPatternColumn;
const esAggsFn = termsOperation.toEsAggsFn(
Expand Down Expand Up @@ -1973,6 +2006,37 @@ describe('terms', () => {
expect(select2.prop('disabled')).toEqual(true);
});

it('should disable missing bucket and other bucket setting when ordered by significance', () => {
const updateLayerSpy = jest.fn();
const instance = shallow(
<InlineOptions
{...defaultProps}
layer={layer}
paramEditorUpdater={updateLayerSpy}
columnId="col1"
currentColumn={{
...(layer.columns.col1 as TermsIndexPatternColumn),
params: {
...(layer.columns.col1 as TermsIndexPatternColumn).params,
orderBy: { type: 'significant' },
},
}}
/>
);

const select1 = instance
.find('[data-test-subj="indexPattern-terms-missing-bucket"]')
.find(EuiSwitch);

expect(select1.prop('disabled')).toEqual(true);

const select2 = instance
.find('[data-test-subj="indexPattern-terms-other-bucket"]')
.find(EuiSwitch);

expect(select2.prop('disabled')).toEqual(true);
});

describe('accuracy mode', () => {
const renderWithAccuracy = (accuracy: boolean, rareTerms: boolean) =>
shallow(
Expand Down Expand Up @@ -2180,6 +2244,7 @@ describe('terms', () => {
'column$$$col2',
'alphabetical',
'rare',
'significant',
'custom',
]);
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ export interface TermsIndexPatternColumn extends FieldBasedIndexPatternColumn {
orderBy:
| { type: 'alphabetical'; fallback?: boolean }
| { type: 'rare'; maxDocCount: number }
| { type: 'significant' }
| { type: 'column'; columnId: string }
| { type: 'custom' };
orderAgg?: FieldBasedIndexPatternColumn;
Expand Down