Skip to content

Commit

Permalink
write data up until error in setFromX methods (#58)
Browse files Browse the repository at this point in the history
  • Loading branch information
bakkot committed Jun 11, 2024
1 parent 58eaa4c commit 66fa8c8
Show file tree
Hide file tree
Showing 3 changed files with 130 additions and 63 deletions.
90 changes: 57 additions & 33 deletions playground/polyfill-core.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ function skipAsciiWhitespace(string, index) {

function fromBase64(string, alphabet, lastChunkHandling, maxLength) {
if (maxLength === 0) {
return { read: 0, bytes: [] };
return { read: 0, bytes: [], error: null };
}

let read = 0;
Expand All @@ -138,62 +138,69 @@ function fromBase64(string, alphabet, lastChunkHandling, maxLength) {
if (index === string.length) {
if (chunk.length > 0) {
if (lastChunkHandling === 'stop-before-partial') {
return { bytes, read };
return { bytes, read, error: null };
} else if (lastChunkHandling === 'loose') {
if (chunk.length === 1) {
throw new SyntaxError('malformed padding: exactly one additional character');
let error = new SyntaxError('malformed padding: exactly one additional character');
return { bytes, read, error };
}
bytes.push(...decodeBase64Chunk(chunk, false));
} else {
assert(lastChunkHandling === 'strict');
throw new SyntaxError('missing padding');
let error = new SyntaxError('missing padding');
return { bytes, read, error };
}
}
return { bytes, read: string.length };
return { bytes, read: string.length, error: null };
}
let char = string[index];
++index;
if (char === '=') {
if (chunk.length < 2) {
throw new SyntaxError('padding is too early');
let error = new SyntaxError('padding is too early');
return { bytes, read, error };
}
index = skipAsciiWhitespace(string, index);
if (chunk.length === 2) {
if (index === string.length) {
if (lastChunkHandling === 'stop-before-partial') {
// two characters then `=` then EOS: this is, technically, a partial chunk
return { bytes, read };
return { bytes, read, error: null };
}
throw new SyntaxError('malformed padding - only one =');
let error = new SyntaxError('malformed padding - only one =');
return { bytes, read, error };
}
if (string[index] === '=') {
++index;
index = skipAsciiWhitespace(string, index);
}
}
if (index < string.length) {
throw new SyntaxError('unexpected character after padding');
let error = new SyntaxError('unexpected character after padding');
return { bytes, read, error };
}
bytes.push(...decodeBase64Chunk(chunk, lastChunkHandling === 'strict'));
assert(bytes.length <= maxLength);
return { bytes, read: string.length };
return { bytes, read: string.length, error: null };
}
if (alphabet === 'base64url') {
if (char === '+' || char === '/') {
throw new SyntaxError(`unexpected character ${JSON.stringify(char)}`);
let error = new SyntaxError(`unexpected character ${JSON.stringify(char)}`);
return { bytes, read, error };
} else if (char === '-') {
char = '+';
} else if (char === '_') {
char = '/';
}
}
if (!base64Characters.includes(char)) {
throw new SyntaxError(`unexpected character ${JSON.stringify(char)}`);
let error = new SyntaxError(`unexpected character ${JSON.stringify(char)}`);
return { bytes, read, error };
}
let remainingBytes = maxLength - bytes.length;
if (remainingBytes === 1 && chunk.length === 2 || remainingBytes === 2 && chunk.length === 3) {
// special case: we can fit exactly the number of bytes currently represented by chunk, so we were just checking for `=`
return { bytes, read };
return { bytes, read, error: null };
}

chunk += char;
Expand All @@ -203,7 +210,7 @@ function fromBase64(string, alphabet, lastChunkHandling, maxLength) {
read = index;
assert(bytes.length <= maxLength);
if (bytes.length === maxLength) {
return { bytes, read };
return { bytes, read, error: null };
}
}
}
Expand Down Expand Up @@ -231,14 +238,21 @@ export function base64ToUint8Array(string, options, into) {

let maxLength = into ? into.length : 2 ** 53 - 1;

let { bytes, read } = fromBase64(string, alphabet, lastChunkHandling, maxLength);
let { bytes, read, error } = fromBase64(string, alphabet, lastChunkHandling, maxLength);
if (error && !into) {
throw error;
}

bytes = new Uint8Array(bytes);
if (into && bytes.length > 0) {
assert(bytes.length <= into.length);
into.set(bytes);
}

if (error) {
throw error;
}

return { read, bytes };
}

Expand All @@ -254,6 +268,26 @@ export function uint8ArrayToHex(arr) {
return out;
}

function fromHex(string, maxLength) {
let bytes = [];
let read = 0;
if (maxLength > 0) {
while (read < string.length) {
let hexits = string.slice(read, read + 2);
if (/[^0-9a-fA-F]/.test(hexits)) {
let error = new SyntaxError('string should only contain hex characters');
return { read, bytes, error }
}
bytes.push(parseInt(hexits, 16));
read += 2;
if (bytes.length === maxLength) {
break;
}
}
}
return { read, bytes, error: null }
}

export function hexToUint8Array(string, into) {
if (typeof string !== 'string') {
throw new TypeError('expected string to be a string');
Expand All @@ -266,23 +300,9 @@ export function hexToUint8Array(string, into) {
}

let maxLength = into ? into.length : 2 ** 53 - 1;

// TODO should hex allow whitespace?
// TODO should hex support lastChunkHandling? (only 'strict' or 'stop-before-partial')
let bytes = [];
let index = 0;
if (maxLength > 0) {
while (index < string.length) {
let hexits = string.slice(index, index + 2);
if (/[^0-9a-fA-F]/.test(hexits)) {
throw new SyntaxError('string should only contain hex characters');
}
bytes.push(parseInt(hexits, 16));
index += 2;
if (bytes.length === maxLength) {
break;
}
}
let { read, bytes, error } = fromHex(string, maxLength);
if (error && !into) {
throw error;
}

bytes = new Uint8Array(bytes);
Expand All @@ -291,5 +311,9 @@ export function hexToUint8Array(string, into) {
into.set(bytes);
}

return { read: index, bytes };
if (error) {
throw error;
}

return { read, bytes };
}
87 changes: 57 additions & 30 deletions spec.html
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,9 @@ <h1>Uint8Array.fromBase64 ( _string_ [ , _options_ ] )</h1>
1. Let _lastChunkHandling_ be ? Get(_opts_, *"lastChunkHandling"*).
1. If _lastChunkHandling_ is *undefined*, set _lastChunkHandling_ to *"loose"*.
1. If _lastChunkHandling_ is not one of *"loose"*, *"strict"*, or *"stop-before-partial"*, throw a *TypeError* exception.
1. Let _result_ be ? FromBase64(_string_, _alphabet_, _lastChunkHandling_).
1. Let _result_ be FromBase64(_string_, _alphabet_, _lastChunkHandling_).
1. If _result_.[[Error]] is not ~none~, then
1. Throw _result_.[[Error]].
1. Let _resultLength_ be the length of _result_.[[Bytes]].
1. Let _ta_ be ? <emu-meta suppress-effects="user-code">AllocateTypedArray(*"Uint8Array"*, %Uint8Array%, *"%Uint8Array.prototype%"*, _resultLength_)</emu-meta>.
1. Set the value at each index of _ta_.[[ViewedArrayBuffer]].[[ArrayBufferData]] to the value at the corresponding index of _result_.[[Bytes]].
Expand All @@ -84,12 +86,14 @@ <h1>Uint8Array.prototype.setFromBase64 ( _string_ [ , _options_ ] )</h1>
1. Let _taRecord_ be MakeTypedArrayWithBufferWitnessRecord(_into_, ~seq-cst~).
1. If IsTypedArrayOutOfBounds(_taRecord_) is *true*, throw a *TypeError* exception.
1. Let _byteLength_ be TypedArrayLength(_taRecord_).
1. Let _result_ be ? FromBase64(_string_, _alphabet_, _lastChunkHandling_, _byteLength_).
1. Let _result_ be FromBase64(_string_, _alphabet_, _lastChunkHandling_, _byteLength_).
1. Let _bytes_ be _result_.[[Bytes]].
1. Let _written_ be the length of _bytes_.
1. NOTE: FromBase64 does not invoke any user code, so the ArrayBuffer backing _into_ cannot have been detached or shrunk.
1. Assert: _written__byteLength_.
1. Perform SetUint8ArrayBytes(_into_, _bytes_).
1. If _result_.[[Error]] is not ~none~, then
1. Throw _result_.[[Error]].
1. Let _resultObject_ be OrdinaryObjectCreate(%Object.prototype%).
1. Perform ! CreateDataPropertyOrThrow(_resultObject_, *"read"*, 𝔽(_result_.[[Read]])).
1. Perform ! CreateDataPropertyOrThrow(_resultObject_, *"written"*, 𝔽(_written_)).
Expand All @@ -101,7 +105,9 @@ <h1>Uint8Array.prototype.setFromBase64 ( _string_ [ , _options_ ] )</h1>
<h1>Uint8Array.fromHex ( _string_ )</h1>
<emu-alg>
1. If _string_ is not a String, throw a *TypeError* exception.
1. Let _result_ be ? FromHex(_string_).
1. Let _result_ be FromHex(_string_).
1. If _result_.[[Error]] is not ~none~, then
1. Throw _result_.[[Error]].
1. Let _resultLength_ be the length of _result_.[[Bytes]].
1. Let _ta_ be ? <emu-meta suppress-effects="user-code">AllocateTypedArray(*"Uint8Array"*, %Uint8Array%, *"%Uint8Array.prototype%"*, _resultLength_)</emu-meta>.
1. Set the value at each index of _ta_.[[ViewedArrayBuffer]].[[ArrayBufferData]] to the value at the corresponding index of _result_.[[Bytes]].
Expand All @@ -118,12 +124,14 @@ <h1>Uint8Array.prototype.setFromHex ( _string_ )</h1>
1. Let _taRecord_ be MakeTypedArrayWithBufferWitnessRecord(_into_, ~seq-cst~).
1. If IsTypedArrayOutOfBounds(_taRecord_) is *true*, throw a *TypeError* exception.
1. Let _byteLength_ be TypedArrayLength(_taRecord_).
1. Let _result_ be ? FromHex(_string_, _byteLength_).
1. Let _result_ be FromHex(_string_, _byteLength_).
1. Let _bytes_ be _result_.[[Bytes]].
1. Let _written_ be the length of _bytes_.
1. NOTE: FromHex does not invoke any user code, so the ArrayBuffer backing _into_ cannot have been detached or shrunk.
1. Assert: _written__byteLength_.
1. Perform SetUint8ArrayBytes(_into_, _bytes_).
1. If _result_.[[Error]] is not ~none~, then
1. Throw _result_.[[Error]].
1. Let _resultObject_ be OrdinaryObjectCreate(%Object.prototype%).
1. Perform ! CreateDataPropertyOrThrow(_resultObject_, *"read"*, 𝔽(_result_.[[Read]])).
1. Perform ! CreateDataPropertyOrThrow(_resultObject_, *"written"*, 𝔽(_written_)).
Expand Down Expand Up @@ -254,7 +262,7 @@ <h1>
_alphabet_: *"base64"* or *"base64url"*,
_lastChunkHandling_: *"loose"*, *"strict"*, or *"stop-before-partial"*,
optional _maxLength_: a non-negative integer,
): either a normal completion containing a Record with fields [[Read]] (an integral Number) and [[Bytes]] (a List of byte values), or a throw completion
): a Record with fields [[Read]] (an integral Number), [[Bytes]] (a List of byte values), and [[Error]] (either ~none~ or a throw completion)
</h1>
<dl class="header">
</dl>
Expand All @@ -264,7 +272,7 @@ <h1>
1. NOTE: Because the input is a string, the length of strings is limited to 2<sup>53</sup> - 1 characters, and the output requires no more bytes than the input has characters, this limit can never be reached. However, it is editorially convenient to use a finite value here.
1. NOTE: The order of validation and decoding in the algorithm below is not observable. Implementations are encouraged to perform them in whatever order is most efficient, possibly interleaving validation with decoding, as long as the behaviour is observably equivalent.
1. If _maxLength_ is 0, then
1. Return the Record { [[Read]]: 0, [[Bytes]]: « » }.
1. Return the Record { [[Read]]: 0, [[Bytes]]: « », [[Error]]: ~none~ }.
1. Let _read_ be 0.
1. Let _bytes_ be « ».
1. Let _chunk_ be the empty String.
Expand All @@ -276,43 +284,58 @@ <h1>
1. If _index_ = _length_, then
1. If _chunkLength_ > 0, then
1. If _lastChunkHandling_ is *"stop-before-partial"*, then
1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_ }.
1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_, [[Error]]: ~none~ }.
1. Else if _lastChunkHandling_ is *"loose"*, then
1. If _chunkLength_ is 1, then
1. Throw a *SyntaxError* exception.
1. Let _error_ be a new *SyntaxError* exception.
1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_, [[Error]]: _error_ }.
1. Set _bytes_ to the list-concatenation of _bytes_ and ! DecodeBase64Chunk(_chunk_, *false*).
1. Else,
1. Assert: _lastChunkHandling_ is *"strict"*.
1. Throw a *SyntaxError* exception.
1. Return the Record { [[Read]]: _length_, [[Bytes]]: _bytes_ }.
1. Let _error_ be a new *SyntaxError* exception.
1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_, [[Error]]: _error_ }.
1. Return the Record { [[Read]]: _length_, [[Bytes]]: _bytes_, [[Error]]: ~none~ }.
1. Let _char_ be the substring of _string_ from _index_ to _index_ + 1.
1. Set _index_ to _index_ + 1.
1. If _char_ is *"="*, then
1. If _chunkLength_ < 2, then
1. Throw a *SyntaxError* exception.
1. Let _error_ be a new *SyntaxError* exception.
1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_, [[Error]]: _error_ }.
1. Set _index_ to SkipAsciiWhitespace(_string_, _index_).
1. If _chunkLength_ = 2, then
1. If _index_ = _length_, then
1. If _lastChunkHandling_ is *"stop-before-partial"*, then
1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_ }.
1. Throw a *SyntaxError* exception.
1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_, [[Error]]: ~none~ }.
1. Let _error_ be a new *SyntaxError* exception.
1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_, [[Error]]: _error_ }.
1. Set _char_ to the substring of _string_ from _index_ to _index_ + 1.
1. If _char_ is *"="*, then
1. Set _index_ to SkipAsciiWhitespace(_string_, _index_ + 1).
1. If _index_ < _length_, then
1. Throw a *SyntaxError* exception.
1. Let _error_ be a new *SyntaxError* exception.
1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_, [[Error]]: _error_ }.
1. If _lastChunkHandling_ is *"strict"*, let _throwOnExtraBits_ be *true*.
1. Else, let _throwOnExtraBits_ be *false*.
1. Set _bytes_ to the list-concatenation of _bytes_ and ? DecodeBase64Chunk(_chunk_, _throwOnExtraBits_).
1. Return the Record { [[Read]]: _length_, [[Bytes]]: _bytes_ }.
1. Let _decodeResult_ be Completion(DecodeBase64Chunk(_chunk_, _throwOnExtraBits_)).
1. If _decodeResult_ is an abrupt completion, then
1. Let _error_ be _decodeResult_.[[Value]].
1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_, [[Error]]: _error_ }.
1. Set _bytes_ to the list-concatenation of _bytes_ and ! _decodeResult_.
1. Return the Record { [[Read]]: _length_, [[Bytes]]: _bytes_, [[Error]]: ~none~ }.
1. If _alphabet_ is *"base64url"*, then
1. If _char_ is either *"+"* or *"/"*, throw a *SyntaxError* exception.
1. Else if _char_ is *"-"*, set _char_ to *"+"*.
1. Else if _char_ is *"_"*, set _char_ to *"/"*.
1. If the sole code unit of _char_ is not an element of the standard base64 alphabet, throw a *SyntaxError* exception.
1. If _char_ is either *"+"* or *"/"*, then
1. Let _error_ be a new *SyntaxError* exception.
1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_, [[Error]]: _error_ }.
1. Else if _char_ is *"-"*, then
1. Set _char_ to *"+"*.
1. Else if _char_ is *"_"*, then
1. Set _char_ to *"/"*.
1. If the sole code unit of _char_ is not an element of the standard base64 alphabet, then
1. Let _error_ be a new *SyntaxError* exception.
1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_, [[Error]]: _error_ }.
1. Let _remaining_ be _maxLength_ - the length of _bytes_.
1. If _remaining_ = 1 and _chunkLength_ = 2, or if _remaining_ = 2 and _chunkLength_ = 3, then
1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_ }.
1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_, [[Error]]: ~none~ }.
1. Set _chunk_ to the string-concatenation of _chunk_ and _char_.
1. Set _chunkLength_ to the length of _chunk_.
1. If _chunkLength_ = 4, then
Expand All @@ -321,7 +344,7 @@ <h1>
1. Set _chunkLength_ to 0.
1. Set _read_ to _index_.
1. If the length of _bytes_ = _maxLength_, then
1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_ }.
1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_, [[Error]]: ~none~ }.
</emu-alg>
</emu-clause>

Expand All @@ -330,23 +353,27 @@ <h1>
FromHex (
_string_: a string,
optional _maxLength_: a non-negative integer,
): either a normal completion containing a Record with fields [[Read]] (an integral Number) and [[Bytes]] (a List of byte values), or a throw completion
): a Record with fields [[Read]] (an integral Number), [[Bytes]] (a List of byte values), and [[Error]] (either ~none~ or a throw completion)
</h1>
<dl class="header">
</dl>
<emu-alg>
1. If _maxLength_ is not present, let _maxLength_ be 2<sup>53</sup> - 1.
1. Let _length_ be the length of _string_.
1. If _length_ modulo 2 is not 0, throw a *SyntaxError* exception.
1. Let _bytes_ be « ».
1. Let _index_ be 0.
1. Repeat, while _index_ &lt; _length_ and the length of _bytes_ &lt; _maxLength_,
1. Let _hexits_ be the substring of _string_ from _index_ to _index_ + 2.
1. If _hexits_ contains any code units which are not in *"0123456789abcdefABCDEF"*, throw a *SyntaxError* exception.
1. Set _index_ to _index_ + 2.
1. Let _read_ be 0.
1. If _length_ modulo 2 is not 0, then
1. Let _error_ be a new *SyntaxError* exception.
1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_, [[Error]]: _error_ }.
1. Repeat, while _read_ &lt; _length_ and the length of _bytes_ &lt; _maxLength_,
1. Let _hexits_ be the substring of _string_ from _read_ to _read_ + 2.
1. If _hexits_ contains any code units which are not in *"0123456789abcdefABCDEF"*, then
1. Let _error_ be a new *SyntaxError* exception.
1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_, [[Error]]: _error_ }.
1. Set _read_ to _read_ + 2.
1. Let _byte_ be the integer value represented by _hexits_ in base-16 notation, using the letters A-F and a-f for digits with values 10 through 15.
1. Append _byte_ to _bytes_.
1. Return the Record { [[Read]]: _index_, [[Bytes]]: _bytes_ }.
1. Return the Record { [[Read]]: _read_, [[Bytes]]: _bytes_, [[Error]]: ~none~ }.
</emu-alg>
</emu-clause>

Expand Down
Loading

0 comments on commit 66fa8c8

Please sign in to comment.