Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Percent decode #1361

Merged
merged 13 commits into from
Aug 15, 2024
136 changes: 115 additions & 21 deletions packages/core-js/modules/web.url-search-params.constructor.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
'use strict';
// TODO: in core-js@4, move /modules/ dependencies to public entries for better optimization by tools like `preset-env`
require('../modules/es.array.iterator');
require('../modules/es.string.from-code-point');
var $ = require('../internals/export');
var globalThis = require('../internals/global-this');
var safeGetBuiltIn = require('../internals/safe-get-built-in');
var getBuiltIn = require('../internals/get-built-in');
var call = require('../internals/function-call');
var uncurryThis = require('../internals/function-uncurry-this');
var DESCRIPTORS = require('../internals/descriptors');
Expand Down Expand Up @@ -43,10 +45,12 @@ var NativeRequest = safeGetBuiltIn('Request');
var Headers = safeGetBuiltIn('Headers');
var RequestPrototype = NativeRequest && NativeRequest.prototype;
var HeadersPrototype = Headers && Headers.prototype;
var RegExp = globalThis.RegExp;
var TypeError = globalThis.TypeError;
var decodeURIComponent = globalThis.decodeURIComponent;
var encodeURIComponent = globalThis.encodeURIComponent;
var fromCharCode = String.fromCharCode;
var fromCodePoint = getBuiltIn('String', 'fromCodePoint');
var $isNaN = isNaN;
var $parseInt = parseInt;
var charAt = uncurryThis(''.charAt);
var join = uncurryThis([].join);
var push = uncurryThis([].push);
Expand All @@ -55,33 +59,123 @@ var shift = uncurryThis([].shift);
var splice = uncurryThis([].splice);
var split = uncurryThis(''.split);
var stringSlice = uncurryThis(''.slice);
var exec = uncurryThis(/./.exec);

var plus = /\+/g;
var sequences = Array(4);
var FALLBACK_REPLACER = '\uFFFD';
var VALID_HEX = /^[0-9a-f]+$/i;

var percentSequence = function (bytes) {
return sequences[bytes - 1] || (sequences[bytes - 1] = RegExp('((?:%[\\da-f]{2}){' + bytes + '})', 'gi'));
var parseHexOctet = function (string, start) {
var substr = stringSlice(string, start, start + 2);
if (!exec(VALID_HEX, substr)) return NaN;

return $parseInt(substr, 16);
};

var getLeadingOnes = function (octet) {
var count = 0;
for (var mask = 0x80; mask > 0 && (octet & mask) !== 0; mask >>= 1) {
count++;
}
return count;
};
slowcheetah marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Awesome -)


var percentDecode = function (sequence) {
try {
return decodeURIComponent(sequence);
} catch (error) {
return sequence;
var utf8Decode = function (octets) {
var codePoint = null;

switch (octets.length) {
case 1:
codePoint = octets[0];
break;
case 2:
codePoint = (octets[0] & 0x1F) << 6 | (octets[1] & 0x3F);
break;
case 3:
codePoint = (octets[0] & 0x0F) << 12 | (octets[1] & 0x3F) << 6 | (octets[2] & 0x3F);
break;
case 4:
codePoint = (octets[0] & 0x07) << 18 | (octets[1] & 0x3F) << 12 | (octets[2] & 0x3F) << 6 | (octets[3] & 0x3F);
break;
}

return codePoint > 0x10FFFF ? null : codePoint;
};

var deserialize = function (it) {
var result = replace(it, plus, ' ');
var bytes = 4;
try {
return decodeURIComponent(result);
} catch (error) {
while (bytes) {
result = replace(result, percentSequence(bytes--), percentDecode);
var decode = function (input) {
input = replace(input, plus, ' ');
var length = input.length;
var result = '';
var i = 0;

while (i < length) {
var decodedChar = charAt(input, i);

if (decodedChar === '%') {
if (charAt(input, i + 1) === '%' || i + 3 > length) {
result += '%';
i++;
continue;
}

var octet = parseHexOctet(input, i + 1);

if ($isNaN(octet)) {
result += decodedChar;
i++;
continue;
}

i += 2;
var byteSequenceLength = getLeadingOnes(octet);

if (byteSequenceLength === 0) {
decodedChar = fromCharCode(octet);
} else {
if (byteSequenceLength === 1 || byteSequenceLength > 4) {
result += FALLBACK_REPLACER;
i++;
continue;
}

var octets = [octet];
var sequenceIndex = 1;

while (sequenceIndex < byteSequenceLength) {
i++;
if (i + 3 > length || charAt(input, i) !== '%') break;

var nextByte = parseHexOctet(input, i + 1);

if ($isNaN(nextByte)) {
i += 3;
break;
}
if (nextByte > 191 || nextByte < 128) break;

push(octets, nextByte);
i += 2;
sequenceIndex++;
}

if (octets.length !== byteSequenceLength) {
result += FALLBACK_REPLACER;
continue;
}

var codePoint = utf8Decode(octets);
if (codePoint === null) {
result += FALLBACK_REPLACER;
} else {
decodedChar = fromCodePoint(codePoint);
}
}
}
return result;

result += decodedChar;
i++;
}

return result;
};

var find = /[!'()~]|%20/g;
Expand Down Expand Up @@ -174,8 +268,8 @@ URLSearchParamsState.prototype = {
if (attribute.length) {
entry = split(attribute, '=');
push(entries, {
key: deserialize(shift(entry)),
value: deserialize(join(entry, '='))
key: decode(shift(entry)),
value: decode(join(entry, '='))
});
}
}
Expand Down
30 changes: 30 additions & 0 deletions tests/unit-global/web.url-search-params.js
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,36 @@ QUnit.test('URLSearchParams', assert => {
params = new URLSearchParams(params.toString());
assert.same(params.get('query'), '+15555555555', 'parse encoded +');

params = new URLSearchParams('b=%2sf%2a');
assert.same(params.get('b'), '%2sf*', 'parse encoded %2sf%2a');
params = new URLSearchParams('b=%%2a');
assert.same(params.get('b'), '%*', 'parse encoded b=%%2a');

params = new URLSearchParams('a=b\u2384');
assert.same(params.get('a'), 'b\u2384', 'parse \u2384');
params = new URLSearchParams('a\u2384b=c');
assert.same(params.get('a\u2384b'), 'c', 'parse \u2384');

params = new URLSearchParams('a=b%e2%8e%84');
assert.same(params.get('a'), 'b\u2384', 'parse b%e2%8e%84');
params = new URLSearchParams('a%e2%8e%84b=c');
assert.same(params.get('a\u2384b'), 'c', 'parse b%e2%8e%84');

params = new URLSearchParams('a=b\uD83D\uDCA9c');
assert.same(params.get('a'), 'b\uD83D\uDCA9c', 'parse \uD83D\uDCA9');
params = new URLSearchParams('a\uD83D\uDCA9b=c');
assert.same(params.get('a\uD83D\uDCA9b'), 'c', 'parse \uD83D\uDCA9');

params = new URLSearchParams('a=b%f0%9f%92%a9c');
assert.same(params.get('a'), 'b\uD83D\uDCA9c', 'parse %f0%9f%92%a9');
params = new URLSearchParams('a%f0%9f%92%a9b=c');
assert.same(params.get('a\uD83D\uDCA9b'), 'c', 'parse %f0%9f%92%a9');

assert.same(String(new URLSearchParams('%C2')), '%EF%BF%BD=');
assert.same(String(new URLSearchParams('%F0%9F%D0%90')), '%EF%BF%BD%D0%90=');
assert.same(String(new URLSearchParams('%25')), '%25=');
assert.same(String(new URLSearchParams('%4')), '%254=');

const testData = [
{ input: '?a=%', output: [['a', '%']], name: 'handling %' },
{ input: { '+': '%C2' }, output: [['+', '%C2']], name: 'object with +' },
Expand Down
30 changes: 30 additions & 0 deletions tests/unit-pure/web.url-search-params.js
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,36 @@ QUnit.test('URLSearchParams', assert => {
params = new URLSearchParams(params.toString());
assert.same(params.get('query'), '+15555555555', 'parse encoded +');

params = new URLSearchParams('b=%2sf%2a');
assert.same(params.get('b'), '%2sf*', 'parse encoded %2sf%2a');
params = new URLSearchParams('b=%%2a');
assert.same(params.get('b'), '%*', 'parse encoded b=%%2a');

params = new URLSearchParams('a=b\u2384');
assert.same(params.get('a'), 'b\u2384', 'parse \u2384');
params = new URLSearchParams('a\u2384b=c');
assert.same(params.get('a\u2384b'), 'c', 'parse \u2384');

params = new URLSearchParams('a=b%e2%8e%84');
assert.same(params.get('a'), 'b\u2384', 'parse b%e2%8e%84');
params = new URLSearchParams('a%e2%8e%84b=c');
assert.same(params.get('a\u2384b'), 'c', 'parse b%e2%8e%84');

params = new URLSearchParams('a=b\uD83D\uDCA9c');
assert.same(params.get('a'), 'b\uD83D\uDCA9c', 'parse \uD83D\uDCA9');
params = new URLSearchParams('a\uD83D\uDCA9b=c');
assert.same(params.get('a\uD83D\uDCA9b'), 'c', 'parse \uD83D\uDCA9');

params = new URLSearchParams('a=b%f0%9f%92%a9c');
assert.same(params.get('a'), 'b\uD83D\uDCA9c', 'parse %f0%9f%92%a9');
params = new URLSearchParams('a%f0%9f%92%a9b=c');
assert.same(params.get('a\uD83D\uDCA9b'), 'c', 'parse %f0%9f%92%a9');

assert.same(String(new URLSearchParams('%C2')), '%EF%BF%BD=');
assert.same(String(new URLSearchParams('%F0%9F%D0%90')), '%EF%BF%BD%D0%90=');
assert.same(String(new URLSearchParams('%25')), '%25=');
assert.same(String(new URLSearchParams('%4')), '%254=');

const testData = [
{ input: '?a=%', output: [['a', '%']], name: 'handling %' },
{ input: { '+': '%C2' }, output: [['+', '%C2']], name: 'object with +' },
Expand Down