Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Wasm jiterpreter cleanup and bug fixes pt. 3 #78782

Merged
merged 12 commits into from
Dec 2, 2022
6 changes: 3 additions & 3 deletions src/mono/mono/mini/interp/interp.c
Original file line number Diff line number Diff line change
Expand Up @@ -2672,7 +2672,7 @@ do_jit_call (ThreadContext *context, stackval *ret_sp, stackval *sp, InterpFrame
goto epilogue;
} else {
int count = cinfo->hit_count;
if (count == mono_opt_jiterpreter_jit_call_trampoline_hit_count) {
if (count == (mono_opt_interp_tier_instantly ? 1 : mono_opt_jiterpreter_jit_call_trampoline_hit_count)) {
void *fn = cinfo->no_wrapper ? cinfo->addr : cinfo->wrapper;
mono_interp_jit_wasm_jit_call_trampoline (
rmethod, cinfo, fn, rmethod->hasthis, rmethod->param_count,
Expand Down Expand Up @@ -7250,15 +7250,15 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;

MINT_IN_CASE(MINT_TIER_ENTER_METHOD) {
frame->imethod->entry_count++;
if (frame->imethod->entry_count > INTERP_TIER_ENTRY_LIMIT && !clause_args)
if (frame->imethod->entry_count > (mono_opt_interp_tier_instantly ? INTERP_TIER_ENTRY_LIMIT_LOW : INTERP_TIER_ENTRY_LIMIT) && !clause_args)
ip = mono_interp_tier_up_frame_enter (frame, context);
else
ip++;
MINT_IN_BREAK;
}
MINT_IN_CASE(MINT_TIER_PATCHPOINT) {
frame->imethod->entry_count++;
if (frame->imethod->entry_count > INTERP_TIER_ENTRY_LIMIT && !clause_args)
if (frame->imethod->entry_count > (mono_opt_interp_tier_instantly ? INTERP_TIER_ENTRY_LIMIT_LOW : INTERP_TIER_ENTRY_LIMIT) && !clause_args)
kg marked this conversation as resolved.
Show resolved Hide resolved
ip = mono_interp_tier_up_frame_patchpoint (frame, context, ip [1]);
else
ip += 2;
Expand Down
40 changes: 39 additions & 1 deletion src/mono/mono/mini/interp/jiterpreter.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ void jiterp_preserve_module (void);
#include "interp-intrins.h"
#include "tiering.h"

#include <mono/utils/mono-math.h>
#include <mono/mini/mini.h>
#include <mono/mini/mini-runtime.h>
#include <mono/mini/aot-runtime.h>
Expand Down Expand Up @@ -140,7 +141,7 @@ mono_jiterp_increase_entry_count (void *_imethod) {
InterpMethod *imethod = (InterpMethod*)_imethod;
imethod->entry_count++;
// Return whether the trace should bail out because the method needs to be tiered
return imethod->entry_count >= INTERP_TIER_ENTRY_LIMIT;
return imethod->entry_count >= (mono_opt_interp_tier_instantly ? INTERP_TIER_ENTRY_LIMIT_LOW : INTERP_TIER_ENTRY_LIMIT);
}

EMSCRIPTEN_KEEPALIVE void*
Expand Down Expand Up @@ -437,6 +438,43 @@ mono_jiterp_conv_ovf (void *dest, void *src, int opcode) {
return 0;
}

#define JITERP_RELOP(opcode, type, op, noorder) \
case opcode: \
{ \
if (is_unordered) \
return noorder; \
else \
return ((type)lhs op (type)rhs); \
}

EMSCRIPTEN_KEEPALIVE int
mono_jiterp_relop_fp (double lhs, double rhs, int opcode) {
gboolean is_unordered = mono_isunordered (lhs, rhs);
switch (opcode) {
JITERP_RELOP(MINT_CEQ_R4, float, ==, 0);
BrzVlad marked this conversation as resolved.
Show resolved Hide resolved
JITERP_RELOP(MINT_CEQ_R8, double, ==, 0);
JITERP_RELOP(MINT_CNE_R4, float, !=, 1);
JITERP_RELOP(MINT_CNE_R8, double, !=, 1);
JITERP_RELOP(MINT_CGT_R4, float, >, 0);
JITERP_RELOP(MINT_CGT_R8, double, >, 0);
JITERP_RELOP(MINT_CGE_R4, float, >=, 0);
JITERP_RELOP(MINT_CGE_R8, double, >=, 0);
JITERP_RELOP(MINT_CGT_UN_R4, float, >, 1);
JITERP_RELOP(MINT_CGT_UN_R8, double, >, 1);
JITERP_RELOP(MINT_CLT_R4, float, <, 0);
JITERP_RELOP(MINT_CLT_R8, double, <, 0);
JITERP_RELOP(MINT_CLT_UN_R4, float, <, 1);
JITERP_RELOP(MINT_CLT_UN_R8, double, <, 1);
JITERP_RELOP(MINT_CLE_R4, float, <=, 0);
JITERP_RELOP(MINT_CLE_R8, double, <=, 0);

default:
g_assert_not_reached();
}
}

#undef JITERP_RELOP

// we use these helpers at JIT time to figure out where to do memory loads and stores
EMSCRIPTEN_KEEPALIVE size_t
mono_jiterp_get_offset_of_vtable_initialized_flag () {
Expand Down
1 change: 1 addition & 0 deletions src/mono/mono/mini/interp/tiering.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include "interp-internals.h"

#define INTERP_TIER_ENTRY_LIMIT_LOW 2
kg marked this conversation as resolved.
Show resolved Hide resolved
#define INTERP_TIER_ENTRY_LIMIT 1000

void
Expand Down
1 change: 1 addition & 0 deletions src/mono/mono/utils/options-def.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ DEFINE_BOOL_READONLY(readonly_flag, "readonly-flag", FALSE, "Example")
DEFINE_BOOL(wasm_exceptions, "wasm-exceptions", FALSE, "Enable codegen for WASM exceptions")
DEFINE_BOOL(wasm_gc_safepoints, "wasm-gc-safepoints", FALSE, "Use GC safepoints on WASM")
DEFINE_BOOL(aot_lazy_assembly_load, "aot-lazy-assembly-load", FALSE, "Load assemblies referenced by AOT images lazily")
DEFINE_BOOL(interp_tier_instantly, "interp-tier-instantly", FALSE, "Immediately tier up and optimize interpreter methods")

#if HOST_BROWSER

Expand Down
140 changes: 100 additions & 40 deletions src/mono/wasm/runtime/jiterpreter-support.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ import { WasmOpcode } from "./jiterpreter-opcodes";
import cwraps from "./cwraps";

export const maxFailures = 2,
maxMemsetSize = 64;
maxMemsetSize = 64,
maxMemmoveSize = 64;

// uint16
export declare interface MintOpcodePtr extends NativePointer {
Expand Down Expand Up @@ -661,13 +662,14 @@ export function try_append_memset_fast (builder: WasmBuilder, localOffset: numbe
if (count >= maxMemsetSize)
return false;

const destLocal = destOnStack ? "math_lhs32" : "pLocals";
if (destOnStack)
builder.local("math_lhs32", WasmOpcode.set_local);

let offset = destOnStack ? 0 : localOffset;
// Do blocks of 8-byte sets first for smaller/faster code
while (count >= 8) {
builder.local(destOnStack ? "math_lhs32" : "pLocals");
builder.local(destLocal);
builder.i52_const(0);
builder.appendU8(WasmOpcode.i64_store);
builder.appendMemarg(offset, 0);
Expand All @@ -677,7 +679,7 @@ export function try_append_memset_fast (builder: WasmBuilder, localOffset: numbe

// Then set the remaining 0-7 bytes
while (count >= 1) {
builder.local(destOnStack ? "math_lhs32" : "pLocals");
builder.local(destLocal);
builder.i32_const(0);
let localCount = count % 4;
switch (localCount) {
Expand Down Expand Up @@ -716,45 +718,100 @@ export function append_memset_dest (builder: WasmBuilder, value: number, count:
builder.appendU8(0);
}

export function try_append_memmove_fast (
builder: WasmBuilder, destLocalOffset: number, srcLocalOffset: number,
count: number, addressesOnStack: boolean
) {
let destLocal = "math_lhs32", srcLocal = "math_rhs32";

if (count <= 0) {
if (addressesOnStack) {
builder.appendU8(WasmOpcode.drop);
builder.appendU8(WasmOpcode.drop);
}
return true;
}

if (count >= maxMemmoveSize)
return false;

if (addressesOnStack) {
builder.local(srcLocal, WasmOpcode.set_local);
builder.local(destLocal, WasmOpcode.set_local);
} else {
destLocal = srcLocal = "pLocals";
}

let destOffset = addressesOnStack ? 0 : destLocalOffset,
srcOffset = addressesOnStack ? 0 : srcLocalOffset;

// Do blocks of 8-byte copies first for smaller/faster code
while (count >= 8) {
builder.local(destLocal);
builder.local(srcLocal);
builder.appendU8(WasmOpcode.i64_load);
builder.appendMemarg(srcOffset, 0);
builder.appendU8(WasmOpcode.i64_store);
builder.appendMemarg(destOffset, 0);
destOffset += 8;
srcOffset += 8;
count -= 8;
}

// Then copy the remaining 0-7 bytes
while (count >= 1) {
let loadOp : WasmOpcode, storeOp : WasmOpcode;
let localCount = count % 4;
switch (localCount) {
case 0:
// since we did %, 4 bytes turned into 0. gotta fix that up to avoid infinite loop
localCount = 4;
loadOp = WasmOpcode.i32_load;
storeOp = WasmOpcode.i32_store;
break;
default:
case 1:
localCount = 1; // silence tsc
loadOp = WasmOpcode.i32_load8_s;
storeOp = WasmOpcode.i32_store8;
break;
case 3:
case 2:
// For 3 bytes we just want to do a 2 write then a 1
localCount = 2;
loadOp = WasmOpcode.i32_load16_s;
storeOp = WasmOpcode.i32_store16;
break;

}

builder.local(destLocal);
builder.local(srcLocal);
builder.appendU8(loadOp);
builder.appendMemarg(srcOffset, 0);
builder.appendU8(storeOp);
builder.appendMemarg(destOffset, 0);
srcOffset += localCount;
destOffset += localCount;
count -= localCount;
}

return true;
}

// expects dest then source to have been pushed onto wasm stack
export function append_memmove_dest_src (builder: WasmBuilder, count: number) {
// FIXME: Unroll this like memset, since we now know that the memory ops generate expensive
// function calls
switch (count) {
case 1:
builder.appendU8(WasmOpcode.i32_load8_u);
builder.appendMemarg(0, 0);
builder.appendU8(WasmOpcode.i32_store8);
builder.appendMemarg(0, 0);
return true;
case 2:
builder.appendU8(WasmOpcode.i32_load16_u);
builder.appendMemarg(0, 0);
builder.appendU8(WasmOpcode.i32_store16);
builder.appendMemarg(0, 0);
return true;
case 4:
builder.appendU8(WasmOpcode.i32_load);
builder.appendMemarg(0, 0);
builder.appendU8(WasmOpcode.i32_store);
builder.appendMemarg(0, 0);
return true;
case 8:
builder.appendU8(WasmOpcode.i64_load);
builder.appendMemarg(0, 0);
builder.appendU8(WasmOpcode.i64_store);
builder.appendMemarg(0, 0);
return true;
default:
// spec: pop n, pop s, pop d, copy n bytes from s to d
builder.i32_const(count);
// great encoding isn't it
builder.appendU8(WasmOpcode.PREFIX_sat);
builder.appendU8(10);
builder.appendU8(0);
builder.appendU8(0);
return true;
}
if (try_append_memmove_fast(builder, 0, 0, count, true))
return true;

// spec: pop n, pop s, pop d, copy n bytes from s to d
builder.i32_const(count);
// great encoding isn't it
builder.appendU8(WasmOpcode.PREFIX_sat);
builder.appendU8(10);
builder.appendU8(0);
builder.appendU8(0);
return true;
}

export function recordFailure () : void {
Expand Down Expand Up @@ -800,6 +857,8 @@ export type JiterpreterOptions = {
countBailouts: boolean;
// Dump the wasm blob for all compiled traces
dumpTraces: boolean;
// Instantly tiers up methods and traces
tierInstantly: boolean;
minimumTraceLength: number;
minimumTraceHitCount: number;
}
Expand All @@ -818,6 +877,7 @@ const optionNames : { [jsName: string] : string } = {
"dumpTraces": "jiterpreter-dump-traces",
"minimumTraceLength": "jiterpreter-minimum-trace-length",
"minimumTraceHitCount": "jiterpreter-minimum-trace-hit-count",
"tierInstantly": "interp-tier-instantly",
};

let optionsVersion = -1;
Expand Down
Loading