Skip to content

Commit

Permalink
[node:buffer] Implement isUtf8 and isAscii (#3498)
Browse files Browse the repository at this point in the history
Co-authored-by: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com>
  • Loading branch information
Jarred-Sumner and Jarred-Sumner committed Jul 3, 2023
1 parent 975362a commit 6528f5c
Show file tree
Hide file tree
Showing 3 changed files with 177 additions and 1 deletion.
24 changes: 24 additions & 0 deletions packages/bun-types/buffer.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2084,6 +2084,30 @@ declare module "buffer" {
values(): IterableIterator<number>;
}
var Buffer: BufferConstructor;

/**
* This function returns `true` if `input` contains only valid UTF-8-encoded data,
* including the case in which `input` is empty.
*
* Throws if the `input` is a detached array buffer.
* @since Bun v0.6.13
* @param input The input to validate.
*/
export function isUtf8(
input: TypedArray | ArrayBufferLike | DataView,
): boolean;

/**
* This function returns `true` if `input` contains only valid ASCII-encoded data,
* including the case in which `input` is empty.
*
* Throws if the `input` is a detached array buffer.
* @since Bun v0.6.13
* @param input The input to validate.
*/
export function isAscii(
input: TypedArray | ArrayBufferLike | DataView,
): boolean;
}
}
declare module "node:buffer" {
Expand Down
130 changes: 130 additions & 0 deletions src/bun.js/modules/BufferModule.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,127 @@
#include "../bindings/ZigGlobalObject.h"
#include "JavaScriptCore/JSGlobalObject.h"
#include "JavaScriptCore/ObjectConstructor.h"
#include "simdutf.h"

namespace Zig {
using namespace WebCore;
using namespace JSC;

// TODO: Add DOMJIT fast path
JSC_DEFINE_HOST_FUNCTION(jsBufferConstructorFunction_isUtf8,
(JSC::JSGlobalObject * lexicalGlobalObject,
JSC::CallFrame *callframe)) {
auto throwScope = DECLARE_THROW_SCOPE(lexicalGlobalObject->vm());

auto buffer = callframe->argument(0);
auto *bufferView = JSC::jsDynamicCast<JSC::JSArrayBufferView *>(buffer);
const char *ptr = nullptr;
size_t byteLength = 0;
if (bufferView) {
if (UNLIKELY(bufferView->isDetached())) {
throwTypeError(lexicalGlobalObject, throwScope,
"ArrayBufferView is detached"_s);
return JSValue::encode({});
}

byteLength = bufferView->byteLength();

if (byteLength == 0) {
return JSValue::encode(jsBoolean(true));
}

ptr = reinterpret_cast<const char *>(bufferView->vector());
} else if (auto *arrayBuffer =
JSC::jsDynamicCast<JSC::JSArrayBuffer *>(buffer)) {
auto *impl = arrayBuffer->impl();

if (!impl) {
return JSValue::encode(jsBoolean(true));
}

if (UNLIKELY(impl->isDetached())) {
throwTypeError(lexicalGlobalObject, throwScope,
"ArrayBuffer is detached"_s);
return JSValue::encode({});
}

byteLength = impl->byteLength();

if (byteLength == 0) {
return JSValue::encode(jsBoolean(true));
}

ptr = reinterpret_cast<const char *>(impl->data());
} else {
throwVMError(
lexicalGlobalObject, throwScope,
createTypeError(lexicalGlobalObject,
"First argument must be an ArrayBufferView"_s));
return JSValue::encode({});
}

RELEASE_AND_RETURN(throwScope, JSValue::encode(jsBoolean(
simdutf::validate_utf8(ptr, byteLength))));
}

// TODO: Add DOMJIT fast path
JSC_DEFINE_HOST_FUNCTION(jsBufferConstructorFunction_isAscii,
(JSC::JSGlobalObject * lexicalGlobalObject,
JSC::CallFrame *callframe)) {
auto throwScope = DECLARE_THROW_SCOPE(lexicalGlobalObject->vm());

auto buffer = callframe->argument(0);
auto *bufferView = JSC::jsDynamicCast<JSC::JSArrayBufferView *>(buffer);
const char *ptr = nullptr;
size_t byteLength = 0;
if (bufferView) {

if (UNLIKELY(bufferView->isDetached())) {
throwTypeError(lexicalGlobalObject, throwScope,
"ArrayBufferView is detached"_s);
return JSValue::encode({});
}

byteLength = bufferView->byteLength();

if (byteLength == 0) {
return JSValue::encode(jsBoolean(true));
}

ptr = reinterpret_cast<const char *>(bufferView->vector());
} else if (auto *arrayBuffer =
JSC::jsDynamicCast<JSC::JSArrayBuffer *>(buffer)) {
auto *impl = arrayBuffer->impl();
if (UNLIKELY(impl->isDetached())) {
throwTypeError(lexicalGlobalObject, throwScope,
"ArrayBuffer is detached"_s);
return JSValue::encode({});
}

if (!impl) {
return JSValue::encode(jsBoolean(true));
}

byteLength = impl->byteLength();

if (byteLength == 0) {
return JSValue::encode(jsBoolean(true));
}

ptr = reinterpret_cast<const char *>(impl->data());
} else {
throwVMError(
lexicalGlobalObject, throwScope,
createTypeError(lexicalGlobalObject,
"First argument must be an ArrayBufferView"_s));
return JSValue::encode({});
}

RELEASE_AND_RETURN(
throwScope,
JSValue::encode(jsBoolean(simdutf::validate_ascii(ptr, byteLength))));
}

JSC_DEFINE_HOST_FUNCTION(jsFunctionNotImplemented,
(JSGlobalObject * globalObject,
CallFrame *callFrame)) {
Expand Down Expand Up @@ -106,6 +222,20 @@ inline void generateBufferSourceCode(JSC::JSGlobalObject *lexicalGlobalObject,
exportProperty(JSC::Identifier::fromString(vm, "resolveObjectURL"_s),
resolveObjectURL);

exportProperty(JSC::Identifier::fromString(vm, "isAscii"_s),
JSC::JSFunction::create(vm, globalObject, 1, "isAscii"_s,
jsBufferConstructorFunction_isAscii,
ImplementationVisibility::Public,
NoIntrinsic,
jsBufferConstructorFunction_isUtf8));

exportProperty(JSC::Identifier::fromString(vm, "isUtf8"_s),
JSC::JSFunction::create(vm, globalObject, 1, "isUtf8"_s,
jsBufferConstructorFunction_isUtf8,
ImplementationVisibility::Public,
NoIntrinsic,
jsBufferConstructorFunction_isUtf8));

exportNames.append(vm.propertyNames->defaultKeyword);
exportValues.append(defaultObject);
}
Expand Down
24 changes: 23 additions & 1 deletion test/js/node/buffer.test.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { Buffer, SlowBuffer } from "buffer";
import { Buffer, SlowBuffer, isAscii, isUtf8 } from "buffer";
import { describe, it, expect, beforeEach, afterEach } from "bun:test";
import { gc } from "harness";

Expand All @@ -7,6 +7,28 @@ const BufferModule = await import("buffer");
beforeEach(() => gc());
afterEach(() => gc());

it("isAscii", () => {
expect(isAscii(new Buffer("abc"))).toBeTrue();
expect(isAscii(new Buffer(""))).toBeTrue();
expect(isAscii(new Buffer([32, 32, 128]))).toBeFalse();
expect(isAscii(new Buffer("What did the 🦊 say?"))).toBeFalse();

expect(isAscii(new Buffer("").buffer)).toBeTrue();
expect(isAscii(new Buffer([32, 32, 128]).buffer)).toBeFalse();
});

it("isUtf8", () => {
expect(isUtf8(new Buffer("abc"))).toBeTrue();
expect(isAscii(new Buffer(""))).toBeTrue();
expect(isUtf8(new Buffer("What did the 🦊 say?"))).toBeTrue();
expect(isUtf8(new Buffer([129, 129, 129]))).toBeFalse();

expect(isUtf8(new Buffer("abc").buffer)).toBeTrue();
expect(isAscii(new Buffer("").buffer)).toBeTrue();
expect(isUtf8(new Buffer("What did the 🦊 say?").buffer)).toBeTrue();
expect(isUtf8(new Buffer([129, 129, 129]).buffer)).toBeFalse();
});

// https://github.com/oven-sh/bun/issues/2052
it("Buffer global is settable", () => {
var prevBuffer = globalThis.Buffer;
Expand Down

0 comments on commit 6528f5c

Please sign in to comment.