From 2111045cf92726dd082c5e00cef339c96c9819a2 Mon Sep 17 00:00:00 2001 From: Aleh Zasypkin Date: Wed, 15 May 2024 15:44:52 +0200 Subject: [PATCH] Use spec-compliant URL parser to parse `next` URL parameter. --- .github/CODEOWNERS | 4 ++ packages/kbn-std/src/is_internal_url.test.ts | 35 +++++++++++++- packages/kbn-std/src/is_internal_url.ts | 50 +++++++++++++++----- 3 files changed, 77 insertions(+), 12 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index ed4759629b65924..3b20bef783f9117 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1278,6 +1278,10 @@ x-pack/plugins/cloud_integrations/cloud_full_story/server/config.ts @elastic/kib /src/dev/eslint/security_eslint_rule_tests.ts @elastic/kibana-security /src/core/server/integration_tests/config/check_dynamic_config.test.ts @elastic/kibana-security /src/plugins/telemetry/server/config/telemetry_labels.ts @elastic/kibana-security +/packages/kbn-std/src/is_internal_url.test.ts @elastic/kibana-core @elastic/kibana-security +/packages/kbn-std/src/is_internal_url.ts @elastic/kibana-core @elastic/kibana-security +/packages/kbn-std/src/parse_next_url.test.ts @elastic/kibana-core @elastic/kibana-security +/packages/kbn-std/src/parse_next_url.ts @elastic/kibana-core @elastic/kibana-security /test/interactive_setup_api_integration/ @elastic/kibana-security /test/interactive_setup_functional/ @elastic/kibana-security /test/plugin_functional/test_suites/core_plugins/rendering.ts @elastic/kibana-security diff --git a/packages/kbn-std/src/is_internal_url.test.ts b/packages/kbn-std/src/is_internal_url.test.ts index cc1f5a32d7db42f..b9098cf806ce181 100644 --- a/packages/kbn-std/src/is_internal_url.test.ts +++ b/packages/kbn-std/src/is_internal_url.test.ts @@ -6,7 +6,7 @@ * Side Public License, v 1. */ -import { isInternalURL } from './is_internal_url'; +import { CUSTOM_BASE_URL, isInternalURL } from './is_internal_url'; describe('isInternalURL', () => { describe('with basePath defined', () => { @@ -61,6 +61,22 @@ describe('isInternalURL', () => { const href = `${basePath}/app/kibana/../../../management`; expect(isInternalURL(href, basePath)).toBe(false); }); + + it('should return `false` if absolute URL contains tabs or new lines', () => { + for (const char of ['\t', '\n', '\r', '\t\n\r']) { + for (const url of [ + `/${char}/${basePath}app/kibana`, + `/${char}//${basePath}app/kibana`, + `//${char}/${basePath}app/kibana`, + `htt${char}ps://example.com${basePath}`, + CUSTOM_BASE_URL, + `${CUSTOM_BASE_URL}${basePath}`, + `${CUSTOM_BASE_URL}example.com${basePath}`, + ]) { + expect(isInternalURL(url, basePath)).toBe(false); + } + } + }); }); describe('without basePath defined', () => { @@ -86,5 +102,22 @@ describe('isInternalURL', () => { const hrefWithThreeSlashes = `///app/kibana`; expect(isInternalURL(hrefWithThreeSlashes)).toBe(false); }); + + it('should return `false` if absolute URL contains tabs or new lines', () => { + for (const char of ['\t', '\n', '\r', '\t\n\r']) { + for (const url of [ + `/${char}/app/kibana`, + `/${char}//app/kibana`, + `//${char}/app/kibana`, + `htt${char}ps://example.com`, + `java${char}script:alert(1)`, + CUSTOM_BASE_URL, + `${CUSTOM_BASE_URL}app/kibana`, + `${CUSTOM_BASE_URL}example.com/path`, + ]) { + expect(isInternalURL(url)).toBe(false); + } + } + }); }); }); diff --git a/packages/kbn-std/src/is_internal_url.ts b/packages/kbn-std/src/is_internal_url.ts index 434fa6eaf7c274a..ce2c3ccdd9aad9f 100644 --- a/packages/kbn-std/src/is_internal_url.ts +++ b/packages/kbn-std/src/is_internal_url.ts @@ -8,35 +8,63 @@ import { parse as parseUrl } from 'url'; +// Used as the dummy base URL for WHATWG URL parser only. We use custom `estc` protocol since it allows us to use empty +// host: https://url.spec.whatwg.org/#empty-host. +export const CUSTOM_BASE_URL = 'estc://'; + +// The regex to match characters that are automatically removed from the URL by WHATWG URL parser: +// https://infra.spec.whatwg.org/#ascii-tab-or-newline +const TAB_AND_NEW_LINE_REGEX = /[\r\n\t]/g; + /** * Determine if url is outside of this Kibana install. */ export function isInternalURL(url: string, basePath = '') { + // First, perform a basic check for the absolute URLs since only such URLs can be parsed without base URL. Also, if + // the URL cannot be parsed with a custom base URL, then it's not a valid URL, and we should also return `false`. + // @ts-expect-error `canParse` isn't defined in types yet. + if (URL.canParse(url) || !URL.canParse(url, CUSTOM_BASE_URL)) { + return false; + } + + // Now, let's use the WHATWG URL parser, the one used by browsers, to parse the URL. Since WHATWG URL parser doesn't + // support parsing relative URLs as is, we have to use a custom base URL with an empty host. If the parsed URL has a + // non-empty host, then the provided URL is either an absolute or scheme-relative (//) URL. + const normalizedURL = new URL(url, CUSTOM_BASE_URL); + if (normalizedURL.host) { + return false; + } + + // The fact that we have to use a custom base URL with an empty host doesn't allow us to distinguish proper relative + // URLs from scheme-relative URLs with an empty host (///), since both of these URLs will be parsed as URLs with an + // empty host and custom protocol. Due to backward compatibility reasons, browsers can process such URLs in unexpected + // ways (e.g., browser treat `///abc.com` as just `abc.com`). To account for that, we resort to a legacy Node.js URL + // parser that will set `hostname` to `null` only if the URL is truly relative. However, unlike the WHATWG URL parser, + // the Node.js legacy URL parser isn't spec-compliant and doesn't remove tab and newline characters from the URL. + // Instead, it URL-encodes them, preventing the parser from interpreting scheme-relative URLs and empty hosts as + // defined in the specification. But since this behavior is clearly defined in the specification, we can safely remove + // these characters manually to reduce inconsistencies in how Node.js and browsers parse URLs. For more details, see + // URL specification at https://url.spec.whatwg.org/#concept-basic-url-parser. + const urlWithoutTabsAndNewlines = url.replace(TAB_AND_NEW_LINE_REGEX, ''); const { protocol, hostname, port, pathname } = parseUrl( - url, + urlWithoutTabsAndNewlines, false /* parseQueryString */, true /* slashesDenoteHost */ ); - // We should explicitly compare `protocol`, `port` and `hostname` to null to make sure these are not - // detected in the URL at all. For example `hostname` can be empty string for Node URL parser, but - // browser (because of various bwc reasons) processes URL differently (e.g. `///abc.com` - for browser - // hostname is `abc.com`, but for Node hostname is an empty string i.e. everything between schema (`//`) - // and the first slash that belongs to path. + // As explained above, we should explicitly compare `protocol`, `port` and `hostname` to null to make sure these are + // not detected in the URL at all. if (protocol !== null || hostname !== null || port !== null) { return false; } + // Now we need to normalize URL to make sure any relative path segments (`..`) cannot escape expected base path. if (basePath) { - // Now we need to normalize URL to make sure any relative path segments (`..`) cannot escape expected - // base path. We can rely on `URL` with a localhost to automatically "normalize" the URL. - const normalizedPathname = new URL(String(pathname), 'https://localhost').pathname; - return ( // Normalized pathname can add a leading slash, but we should also make sure it's included in // the original URL too pathname?.startsWith('/') && - (normalizedPathname === basePath || normalizedPathname.startsWith(`${basePath}/`)) + (normalizedURL.pathname === basePath || normalizedURL.pathname.startsWith(`${basePath}/`)) ); }