Skip to content

Commit

Permalink
[Ingest Pipelines] Add attachment processor (elastic#155226)
Browse files Browse the repository at this point in the history
  • Loading branch information
sabarasaba authored and nikitaindik committed Apr 25, 2023
1 parent 814cc45 commit fbcd6db
Show file tree
Hide file tree
Showing 5 changed files with 348 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { act } from 'react-dom/test-utils';
import { setup, SetupResult, getProcessorValue, setupEnvironment } from './processor.helpers';

const ATTACHMENT_TYPE = 'attachment';

describe('Processor: Attachment', () => {
let onUpdate: jest.Mock;
let testBed: SetupResult;
const { httpSetup } = setupEnvironment();

beforeAll(() => {
jest.useFakeTimers({ legacyFakeTimers: true });
});

afterAll(() => {
jest.useRealTimers();
});

beforeEach(async () => {
onUpdate = jest.fn();

await act(async () => {
testBed = await setup(httpSetup, {
value: {
processors: [],
},
onFlyoutOpen: jest.fn(),
onUpdate,
});
});

const { component, actions } = testBed;

component.update();

// Open flyout to add new processor
actions.addProcessor();
// Add type (the other fields are not visible until a type is selected)
await actions.addProcessorType(ATTACHMENT_TYPE);
});

test('prevents form submission if required fields are not provided', async () => {
const {
actions: { saveNewProcessor },
form,
} = testBed;

// Click submit button with only the type defined
await saveNewProcessor();

// Expect form error as "field" is a required parameter
expect(form.getErrorsMessages()).toEqual([
'A field value is required.', // "Field" input
]);
});

test('saves with default parameter values', async () => {
const {
actions: { saveNewProcessor },
form,
} = testBed;

// Add "field" value
form.setInputValue('fieldNameField.input', 'test_attachment_processor');

// Save the field
await saveNewProcessor();

const processors = getProcessorValue(onUpdate, ATTACHMENT_TYPE);

expect(processors[0][ATTACHMENT_TYPE]).toEqual({
field: 'test_attachment_processor',
});
});

test('saves with optional parameter values', async () => {
const {
actions: { saveNewProcessor },
form,
find,
component,
} = testBed;

// Add required fields
form.setInputValue('fieldNameField.input', 'test_attachment_processor');

// Add optional fields
form.setInputValue('targetField.input', 'test_target');
form.setInputValue('indexedCharsField.input', '123456');
form.setInputValue('indexedCharsFieldField.input', 'indexed_chars_field');
form.toggleEuiSwitch('removeBinaryField.input');
form.setInputValue('resourceNameField.input', 'resource_name_field');

// Add "networkDirectionField" value (required)
await act(async () => {
find('propertiesField').simulate('change', [{ label: 'content' }]);
});
component.update();

// Save the field
await saveNewProcessor();

const processors = getProcessorValue(onUpdate, ATTACHMENT_TYPE);

expect(processors[0][ATTACHMENT_TYPE]).toEqual({
field: 'test_attachment_processor',
target_field: 'test_target',
properties: ['content'],
indexed_chars: '123456',
indexed_chars_field: 'indexed_chars_field',
remove_binary: true,
resource_name: 'resource_name_field',
});
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,11 @@ type TestSubject =
| 'droppableList.input-2'
| 'prefixField.input'
| 'suffixField.input'
| 'indexedCharsField.input'
| 'indexedCharsFieldField.input'
| 'removeBinaryField.input'
| 'resourceNameField.input'
| 'propertiesField'
| 'tileTypeField'
| 'targetFormatField'
| 'parentField.input'
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import React, { FunctionComponent } from 'react';
import { i18n } from '@kbn/i18n';
import { FormattedMessage } from '@kbn/i18n-react';
import { EuiCode, EuiFlexGroup, EuiFlexItem, EuiSpacer } from '@elastic/eui';

import {
ComboBoxField,
FIELD_TYPES,
UseField,
ToggleField,
Field,
} from '../../../../../../shared_imports';

import { FieldNameField } from './common_fields/field_name_field';
import { TargetField } from './common_fields/target_field';
import { IgnoreMissingField } from './common_fields/ignore_missing_field';
import { FieldsConfig, to, from } from './shared';

const propertyValues: string[] = [
'content',
'title',
'author',
'keywords',
'date',
'content_type',
'content_length',
'language',
];

const fieldsConfig: FieldsConfig = {
/* Optional field configs */
indexed_chars: {
type: FIELD_TYPES.NUMBER,
serializer: from.emptyStringToUndefined,
label: i18n.translate(
'xpack.ingestPipelines.pipelineEditor.attachment.indexedCharsFieldLabel',
{
defaultMessage: 'Indexed chars (optional)',
}
),
helpText: (
<FormattedMessage
id="xpack.ingestPipelines.pipelineEditor.attachment.indexedCharsFieldHelperText"
defaultMessage="The number of chars being used for extraction to prevent huge fields. Use -1 for no limit. Defaults to {field}."
values={{ field: <EuiCode>{'100000'}</EuiCode> }}
/>
),
},
indexed_chars_field: {
type: FIELD_TYPES.TEXT,
serializer: from.emptyStringToUndefined,
label: i18n.translate(
'xpack.ingestPipelines.pipelineEditor.attachment.indexedCharsFieldFieldLabel',
{
defaultMessage: 'Indexed chars field (optional)',
}
),
helpText: (
<FormattedMessage
id="xpack.ingestPipelines.pipelineEditor.attachment.indexedCharsFieldFieldHelperText"
defaultMessage="Field name from which you can overwrite the number of chars being used for extraction. Defaults to {field}."
values={{ field: <EuiCode>{'null'}</EuiCode> }}
/>
),
},
properties: {
type: FIELD_TYPES.COMBO_BOX,
deserializer: to.arrayOfStrings,
serializer: from.optionalArrayOfStrings,
label: i18n.translate('xpack.ingestPipelines.pipelineEditor.attachment.propertiesFieldLabel', {
defaultMessage: 'Properties (optional)',
}),
helpText: (
<FormattedMessage
id="xpack.ingestPipelines.pipelineEditor.attachment.propertiesFieldHelperText"
defaultMessage=" Array of properties to select to be stored. Defaults to {field}."
values={{ field: <EuiCode>{'all'}</EuiCode> }}
/>
),
},
remove_binary: {
type: FIELD_TYPES.TOGGLE,
defaultValue: false,
deserializer: to.booleanOrUndef,
serializer: from.undefinedIfValue(false),
label: i18n.translate(
'xpack.ingestPipelines.pipelineEditor.attachment.removeBinaryFieldLabel',
{
defaultMessage: 'Remove binary',
}
),
helpText: i18n.translate(
'xpack.ingestPipelines.pipelineEditor.attachment.removeBinaryFieldHelpText',
{
defaultMessage: 'If enabled, the binary field will be removed from the document.',
}
),
},
resource_name: {
type: FIELD_TYPES.TEXT,
serializer: from.emptyStringToUndefined,
label: i18n.translate(
'xpack.ingestPipelines.pipelineEditor.attachment.resourceNameFieldLabel',
{
defaultMessage: 'Resource name (optional)',
}
),
helpText: (
<FormattedMessage
id="xpack.ingestPipelines.pipelineEditor.attachment.resourceNameFieldHelperText"
defaultMessage="Field containing the name of the resource to decode. If specified, the processor passes this resource name to the underlying Tika library to enable Resource Name Based Detection."
/>
),
},
};

export const Attachment: FunctionComponent = () => {
return (
<>
<EuiFlexGroup>
<EuiFlexItem>
<FieldNameField
helpText={
<FormattedMessage
id="xpack.ingestPipelines.pipelineEditor.attachment.fieldHelpText"
defaultMessage="The field to get the base64 encoded field from."
/>
}
/>
</EuiFlexItem>
<EuiFlexItem>
<TargetField
helpText={
<FormattedMessage
id="xpack.ingestPipelines.pipelineEditor.attachment.targetHelpText"
defaultMessage="The field that will hold the attachment information. Defaults to {field}."
values={{ field: <EuiCode>{'attachment'}</EuiCode> }}
/>
}
/>
</EuiFlexItem>
</EuiFlexGroup>

<EuiSpacer size="m" />

<EuiFlexGroup>
<EuiFlexItem>
<UseField
data-test-subj="indexedCharsField"
config={fieldsConfig.indexed_chars}
component={Field}
path="fields.indexed_chars"
/>
</EuiFlexItem>
<EuiFlexItem>
<UseField
data-test-subj="indexedCharsFieldField"
config={fieldsConfig.indexed_chars_field}
component={Field}
path="fields.indexed_chars_field"
/>
</EuiFlexItem>
</EuiFlexGroup>

<EuiSpacer size="m" />

<UseField
component={ComboBoxField}
config={fieldsConfig.properties}
euiFieldProps={{
'data-test-subj': 'propertiesField',
noSuggestions: false,
options: propertyValues.map((label) => ({ label })),
}}
path="fields.properties"
/>

<UseField
data-test-subj="removeBinaryField"
component={ToggleField}
config={fieldsConfig.remove_binary}
path="fields.remove_binary"
/>

<UseField
data-test-subj="resourceNameField"
config={fieldsConfig.resource_name}
component={Field}
path="fields.resource_name"
/>

<IgnoreMissingField />
</>
);
};
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
// please try to keep this list sorted by module name (e.g. './bar' before './foo')

export { Append } from './append';
export { Attachment } from './attachment';
export { Bytes } from './bytes';
export { Circle } from './circle';
export { CommunityId } from './community_id';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import { LicenseType } from '../../../../../types';

import {
Append,
Attachment,
Bytes,
Circle,
CommunityId,
Expand Down Expand Up @@ -100,6 +101,23 @@ export const mapProcessorTypeToDescriptor: MapProcessorTypeToDescriptor = {
},
}),
},
attachment: {
FieldsComponent: Attachment,
docLinkPath: '/attachment.html',
label: i18n.translate('xpack.ingestPipelines.processors.label.attachment', {
defaultMessage: 'Attachment',
}),
typeDescription: i18n.translate('xpack.ingestPipelines.processors.description.attachment', {
defaultMessage: 'Extract file attachments in common formats (such as PPT, XLS, and PDF).',
}),
getDefaultDescription: ({ field }) =>
i18n.translate('xpack.ingestPipelines.processors.defaultDescription.attachment', {
defaultMessage: 'Extracts attachment from "{field}"',
values: {
field,
},
}),
},
bytes: {
FieldsComponent: Bytes,
docLinkPath: '/bytes-processor.html',
Expand Down

0 comments on commit fbcd6db

Please sign in to comment.