From 57fa5a00eb286758dff6bcf7b592f10dd5399905 Mon Sep 17 00:00:00 2001 From: Dima Arnautov Date: Mon, 14 Sep 2020 10:52:58 +0200 Subject: [PATCH] [ML] improve query string parsing --- .../application/util/custom_url_utils.test.ts | 3 +- .../application/util/custom_url_utils.ts | 133 +++++++++++------- 2 files changed, 80 insertions(+), 56 deletions(-) diff --git a/x-pack/plugins/ml/public/application/util/custom_url_utils.test.ts b/x-pack/plugins/ml/public/application/util/custom_url_utils.test.ts index 1ac6c1207a4500..1e19fb3e997902 100644 --- a/x-pack/plugins/ml/public/application/util/custom_url_utils.test.ts +++ b/x-pack/plugins/ml/public/application/util/custom_url_utils.test.ts @@ -268,7 +268,6 @@ describe('ML - custom URL utils', () => { ); }); - /** FIXME */ test.skip('returns expected URL for a Kibana Discover type URL when record field contains special characters', () => { expect(getUrlForRecord(TEST_DISCOVER_URL, TEST_RECORD_SPECIAL_CHARS)).toBe( "discover#/?_g=(time:(from:'2017-02-09T15:10:00.000Z',mode:absolute,to:'2017-02-09T17:15:00.000Z'))&_a=(index:bf6e5860-9404-11e8-8d4c-593f69c47267,query:(language:kuery,query:'airline:\"%3C%3E%3A%3B%5B%7D%5C%22)\" and odd:field,name:>:&12<''))" @@ -459,7 +458,7 @@ describe('ML - custom URL utils', () => { }; expect(getUrlForRecord(urlConfig, testRecords)).toBe( - "security/hosts/ml-hosts?_g=()&query=(query:'process.name:\"seq\"',language:kuery)&timerange=(global:(linkTo:!(timeline),timerange:(from:'2019-02-01T16:00:00.000Z',kind:absolute,to:'2019-02-01T18:59:59.999Z')),timeline:(linkTo:!(global),timerange:(from:'2019-02-01T16%3A00%3A00.000Z',kind:absolute,to:'2019-02-01T18%3A59%3A59.999Z')))" + "security/hosts/ml-hosts?_g=()&query=(language:kuery,query:'process.name:\"seq\"')&timerange=(global:(linkTo:!(timeline),timerange:(from:'2019-02-01T16:00:00.000Z',kind:absolute,to:'2019-02-01T18:59:59.999Z')),timeline:(linkTo:!(global),timerange:(from:'2019-02-01T16%3A00%3A00.000Z',kind:absolute,to:'2019-02-01T18%3A59%3A59.999Z')))" ); }); diff --git a/x-pack/plugins/ml/public/application/util/custom_url_utils.ts b/x-pack/plugins/ml/public/application/util/custom_url_utils.ts index e538008a24a8a2..4f8e27993c878a 100644 --- a/x-pack/plugins/ml/public/application/util/custom_url_utils.ts +++ b/x-pack/plugins/ml/public/application/util/custom_url_utils.ts @@ -8,6 +8,7 @@ import { get, flow } from 'lodash'; import moment from 'moment'; +import rison, { RisonObject, RisonValue } from 'rison-node'; import { parseInterval } from '../../../common/util/parse_interval'; import { escapeForElasticsearchQuery, replaceStringTokens } from './string_utils'; @@ -131,13 +132,70 @@ function escapeForKQL(value: string | number): string { type GetResultTokenValue = (v: string) => string; +export const isRisonObject = (value: RisonValue): value is RisonObject => { + return value !== null && typeof value === 'object'; +}; + +const getQueryStringResultProvider = ( + record: CustomUrlAnomalyRecordDoc, + getResultTokenValue: GetResultTokenValue +) => (resultPrefix: string, queryString: string, resultPostfix: string): string => { + const URL_LENGTH_LIMIT = 2000; + + let availableCharactersLeft = URL_LENGTH_LIMIT - resultPrefix.length - resultPostfix.length; + + // URL template might contain encoded characters + const queryFields = queryString + // Split query string by AND operator. + .split(/\sand\s/i) + // Get property name from `influencerField:$influencerField$` string. + .map((v) => String(v.split(/:(.+)?\$/)[0]).trim()); + + const queryParts: string[] = []; + const joinOperator = ' AND '; + + fieldsLoop: for (let i = 0; i < queryFields.length; i++) { + const field = queryFields[i]; + // Use lodash get to allow nested JSON fields to be retrieved. + let tokenValues: string[] | string | null = get(record, field) || null; + if (tokenValues === null) { + continue; + } + tokenValues = Array.isArray(tokenValues) ? tokenValues : [tokenValues]; + + // Create a pair `influencerField:value`. + // In cases where there are multiple influencer field values for an anomaly + // combine values with OR operator e.g. `(influencerField:value or influencerField:another_value)`. + let result = ''; + for (let j = 0; j < tokenValues.length; j++) { + const part = `${j > 0 ? ' OR ' : ''}${field}:"${getResultTokenValue(tokenValues[j])}"`; + + // Build up a URL string which is not longer than the allowed length and isn't corrupted by invalid query. + if (availableCharactersLeft < part.length) { + if (result.length > 0) { + queryParts.push(j > 0 ? `(${result})` : result); + } + break fieldsLoop; + } + + result += part; + + availableCharactersLeft -= result.length; + } + + if (result.length > 0) { + queryParts.push(tokenValues.length > 1 ? `(${result})` : result); + } + } + return queryParts.join(joinOperator); +}; + /** * Builds a Kibana dashboard or Discover URL from the supplied config, with any * dollar delimited tokens substituted from the supplied anomaly record. */ function buildKibanaUrl(urlConfig: UrlConfig, record: CustomUrlAnomalyRecordDoc) { const urlValue = urlConfig.url_value; - const URL_LENGTH_LIMIT = 2000; const isLuceneQueryLanguage = urlValue.includes('language:lucene'); @@ -172,67 +230,34 @@ function buildKibanaUrl(urlConfig: UrlConfig, record: CustomUrlAnomalyRecordDoc) return flow( (str: string) => str.replace('$earliest$', record.earliest).replace('$latest$', record.latest), // Process query string content of the URL + decodeURIComponent, (str: string) => { const getResultTokenValue: GetResultTokenValue = flow( queryLanguageEscapeCallback, commonEscapeCallback ); + + const getQueryStringResult = getQueryStringResultProvider(record, getResultTokenValue); + + const match = str.match(/(.+)(\(.*\blanguage:(?:lucene|kuery)\b.*?\))(.+)/); + + if (match !== null && match[2] !== undefined) { + const [, prefix, queryDef, postfix] = match; + + const q = rison.decode(queryDef); + + if (isRisonObject(q) && q.hasOwnProperty('query')) { + const [resultPrefix, resultPostfix] = [prefix, postfix].map(replaceSingleTokenValues); + const resultQuery = getQueryStringResult(resultPrefix, q.query as string, resultPostfix); + return `${resultPrefix}${rison.encode({ ...q, query: resultQuery })}${resultPostfix}`; + } + } + return str.replace( - /(.+query:'|.+&kuery=)([^']*)(['&].+)/, + /(.+&kuery=)(.*?)[^!](&.+)/, (fullMatch, prefix: string, queryString: string, postfix: string) => { const [resultPrefix, resultPostfix] = [prefix, postfix].map(replaceSingleTokenValues); - - let availableCharactersLeft = - URL_LENGTH_LIMIT - resultPrefix.length - resultPostfix.length; - - // URL template might contain encoded characters - const queryFields = decodeURIComponent(queryString) - // Split query string by AND operator. - .split(/\sand\s/i) - // Get property name from `influencerField:$influencerField$` string. - .map((v) => String(v.split(/:(.+)?\$/)[0]).trim()); - - const queryParts: string[] = []; - const joinOperator = ' AND '; - - fieldsLoop: for (let i = 0; i < queryFields.length; i++) { - const field = queryFields[i]; - // Use lodash get to allow nested JSON fields to be retrieved. - let tokenValues: string[] | string | null = get(record, field) || null; - if (tokenValues === null) { - continue; - } - tokenValues = Array.isArray(tokenValues) ? tokenValues : [tokenValues]; - - // Create a pair `influencerField:value`. - // In cases where there are multiple influencer field values for an anomaly - // combine values with OR operator e.g. `(influencerField:value or influencerField:another_value)`. - let result = ''; - for (let j = 0; j < tokenValues.length; j++) { - const part = `${j > 0 ? ' OR ' : ''}${field}:"${getResultTokenValue( - tokenValues[j] - )}"`; - - // Build up a URL string which is not longer than the allowed length and isn't corrupted by invalid query. - if (availableCharactersLeft < part.length) { - if (result.length > 0) { - queryParts.push(j > 0 ? `(${result})` : result); - } - break fieldsLoop; - } - - result += part; - - availableCharactersLeft -= result.length; - } - - if (result.length > 0) { - queryParts.push(tokenValues.length > 1 ? `(${result})` : result); - } - } - - const resultQuery = queryParts.join(joinOperator); - + const resultQuery = getQueryStringResult(resultPrefix, queryString, resultPostfix); return `${resultPrefix}${resultQuery}${resultPostfix}`; } );