Skip to content

Commit

Permalink
[mono][interp] Add 16 byte default alignment to interp frames (#80700)
Browse files Browse the repository at this point in the history
* [mono][interp] Remove MINT_VT_ALIGNMENT

It serves no purpose nowadays. All vars are aligned to MINT_STACK_SLOT_SIZE.

* [mono][interp] Add 16 byte default alignment to optimized code

This will enable us to control alignment of Vector128 vars at compile time.

* [mono][interp] Add stack alignment for unoptimized code

For normal calls, we introduce a new opcode before the call that will move all the arguments to aligned stack location. When emitting the code for the call, we emit directly the aligned call args offset instead. Unoptimized code has its own opcodes that do moving of param, we tweak them to copy them into aligned location.
  • Loading branch information
BrzVlad committed Jan 18, 2023
1 parent 6e773f2 commit 5a17537
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 26 deletions.
3 changes: 2 additions & 1 deletion src/mono/mono/mini/interp/interp-internals.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@
#define TRACING_FLAG 0x1
#define PROFILING_FLAG 0x2

#define MINT_VT_ALIGNMENT 8
#define MINT_STACK_SLOT_SIZE (sizeof (stackval))
// This alignment provides us with straight forward support for Vector128
#define MINT_STACK_ALIGNMENT (2 * MINT_STACK_SLOT_SIZE)

#define INTERP_STACK_SIZE (1024*1024)
#define INTERP_REDZONE_SIZE (8*1024)
Expand Down
39 changes: 28 additions & 11 deletions src/mono/mono/mini/interp/interp.c
Original file line number Diff line number Diff line change
Expand Up @@ -403,11 +403,11 @@ get_context (void)
ThreadContext *context = (ThreadContext *) mono_native_tls_get_value (thread_context_id);
if (context == NULL) {
context = g_new0 (ThreadContext, 1);
context->stack_start = (guchar*)mono_valloc (0, INTERP_STACK_SIZE, MONO_MMAP_READ | MONO_MMAP_WRITE, MONO_MEM_ACCOUNT_INTERP_STACK);
context->stack_start = (guchar*)mono_valloc_aligned (INTERP_STACK_SIZE, MINT_STACK_ALIGNMENT, MONO_MMAP_READ | MONO_MMAP_WRITE, MONO_MEM_ACCOUNT_INTERP_STACK);
context->stack_end = context->stack_start + INTERP_STACK_SIZE - INTERP_REDZONE_SIZE;
context->stack_real_end = context->stack_start + INTERP_STACK_SIZE;
/* We reserve a stack slot at the top of the interp stack to make temp objects visible to GC */
context->stack_pointer = context->stack_start + MINT_STACK_SLOT_SIZE;
context->stack_pointer = context->stack_start + MINT_STACK_ALIGNMENT;

frame_data_allocator_init (&context->data_stack, 8192);
/* Make sure all data is initialized before publishing the context */
Expand Down Expand Up @@ -2226,6 +2226,7 @@ interp_entry (InterpEntryData *data)
sp_args = STACK_ADD_BYTES (sp_args, size);
}
}
sp_args = (stackval*)ALIGN_TO (sp_args, MINT_STACK_ALIGNMENT);

InterpFrame frame = {0};
frame.imethod = data->rmethod;
Expand Down Expand Up @@ -2600,7 +2601,7 @@ init_jit_call_info (InterpMethod *rmethod, MonoError *error)
* that could end up doing a jit call.
*/
gint32 size = mono_class_value_size (klass, NULL);
cinfo->res_size = ALIGN_TO (size, MINT_VT_ALIGNMENT);
cinfo->res_size = ALIGN_TO (size, MINT_STACK_SLOT_SIZE);
} else {
cinfo->res_size = MINT_STACK_SLOT_SIZE;
}
Expand Down Expand Up @@ -3103,6 +3104,7 @@ interp_entry_from_trampoline (gpointer ccontext_untyped, gpointer rmethod_untype
}
newsp = STACK_ADD_BYTES (newsp, size);
}
newsp = (stackval*)ALIGN_TO (newsp, MINT_STACK_ALIGNMENT);
context->stack_pointer = (guchar*)newsp;
g_assert (context->stack_pointer < context->stack_end);

Expand Down Expand Up @@ -3965,6 +3967,15 @@ mono_interp_exec_method (InterpFrame *frame, ThreadContext *context, FrameClause
ip = frame->imethod->code;
MINT_IN_BREAK;
}
MINT_IN_CASE(MINT_CALL_ALIGN_STACK) {
int call_offset = ip [1];
int aligned_call_offset = call_offset + MINT_STACK_SLOT_SIZE;
int params_stack_size = ip [2];

memmove (locals + aligned_call_offset, locals + call_offset, params_stack_size);
ip += 3;
MINT_IN_BREAK;
}
MINT_IN_CASE(MINT_CALL_DELEGATE) {
// FIXME We don't need to encode the whole signature, just param_count
MonoMethodSignature *csignature = (MonoMethodSignature*)frame->imethod->data_items [ip [4]];
Expand Down Expand Up @@ -4215,6 +4226,8 @@ mono_interp_exec_method (InterpFrame *frame, ThreadContext *context, FrameClause
}

context->stack_pointer = (guchar*)frame->stack + cmethod->alloca_size;
g_assert_checked (((gsize)context->stack_pointer % MINT_STACK_ALIGNMENT) == 0);

if (G_UNLIKELY (context->stack_pointer >= context->stack_end)) {
context->stack_end = context->stack_real_end;
THROW_EX (mono_domain_get ()->stack_overflow_ex, ip);
Expand Down Expand Up @@ -5592,10 +5605,12 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
cmethod = (InterpMethod*)frame->imethod->data_items [ip [2]];
return_offset = ip [1];
call_args_offset = ip [1];
int aligned_call_args_offset = ALIGN_TO (call_args_offset, MINT_STACK_ALIGNMENT);

int param_size = ip [3];
if (param_size)
memmove (locals + call_args_offset + MINT_STACK_SLOT_SIZE, locals + call_args_offset, param_size);
memmove (locals + aligned_call_args_offset + MINT_STACK_SLOT_SIZE, locals + call_args_offset, param_size);
call_args_offset = aligned_call_args_offset;
LOCAL_VAR (call_args_offset, gpointer) = NULL;
ip += 4;
goto call;
Expand Down Expand Up @@ -5711,19 +5726,21 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
gboolean is_vt = ret_size != 0;
if (!is_vt)
ret_size = MINT_STACK_SLOT_SIZE;
return_offset = call_args_offset;

cmethod = (InterpMethod*)frame->imethod->data_items [ip [2]];

MonoClass *newobj_class = cmethod->method->klass;

call_args_offset = ALIGN_TO (call_args_offset + ret_size, MINT_STACK_ALIGNMENT);
// We allocate space on the stack for return value and for this pointer, that is passed to ctor
// Here we use return_offset as meaning original call_args_offset
if (param_size)
memmove (locals + call_args_offset + ret_size + MINT_STACK_SLOT_SIZE, locals + call_args_offset, param_size);
memmove (locals + call_args_offset + MINT_STACK_SLOT_SIZE, locals + return_offset, param_size);

if (is_vt) {
this_ptr = locals + call_args_offset;
this_ptr = locals + return_offset;
memset (this_ptr, 0, ret_size);
call_args_offset += ret_size;
} else {
// FIXME push/pop LMF
MonoVTable *vtable = mono_class_vtable_checked (newobj_class, error);
Expand All @@ -5735,11 +5752,9 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
error_init_reuse (error);
this_ptr = mono_object_new_checked (newobj_class, error);
mono_interp_error_cleanup (error); // FIXME: do not swallow the error
LOCAL_VAR (call_args_offset, gpointer) = this_ptr; // return value
call_args_offset += MINT_STACK_SLOT_SIZE;
LOCAL_VAR (return_offset, gpointer) = this_ptr; // return value
}
LOCAL_VAR (call_args_offset, gpointer) = this_ptr;
return_offset = call_args_offset; // unused, prevent warning
ip += 5;
goto call;
}
Expand Down Expand Up @@ -7340,7 +7355,8 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
int len = LOCAL_VAR (ip [2], gint32);
gpointer mem;
if (len > 0) {
mem = frame_data_allocator_alloc (&context->data_stack, frame, ALIGN_TO (len, MINT_VT_ALIGNMENT));
// We align len to 8 so we can safely load all primitive types on all platforms
mem = frame_data_allocator_alloc (&context->data_stack, frame, ALIGN_TO (len, sizeof (gint64)));

if (frame->imethod->init_locals)
memset (mem, 0, len);
Expand Down Expand Up @@ -7940,6 +7956,7 @@ interp_run_clause_with_il_state (gpointer il_state_ptr, int clause_index, MonoOb
}
findex ++;
}
sp_args = (stackval*)ALIGN_TO (sp_args, MINT_STACK_ALIGNMENT);

/* Allocate frame */
InterpFrame frame = {0};
Expand Down
2 changes: 1 addition & 1 deletion src/mono/mono/mini/interp/jiterpreter.c
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ mono_jiterp_localloc (gpointer *destination, gint32 len, InterpFrame *frame)
ThreadContext *context = mono_jiterp_get_context();
gpointer mem;
if (len > 0) {
mem = mono_jiterp_frame_data_allocator_alloc (&context->data_stack, frame, ALIGN_TO (len, MINT_VT_ALIGNMENT));
mem = mono_jiterp_frame_data_allocator_alloc (&context->data_stack, frame, ALIGN_TO (len, sizeof (gint64)));

if (frame->imethod->init_locals)
memset (mem, 0, len);
Expand Down
2 changes: 2 additions & 0 deletions src/mono/mono/mini/interp/mintops.def
Original file line number Diff line number Diff line change
Expand Up @@ -682,6 +682,8 @@ OPDEF(MINT_STRLEN, "strlen", 3, 1, 1, MintOpNoArgs)
OPDEF(MINT_ARRAY_RANK, "array_rank", 3, 1, 1, MintOpNoArgs)
OPDEF(MINT_ARRAY_ELEMENT_SIZE, "array_element_size", 3, 1, 1, MintOpNoArgs)

OPDEF(MINT_CALL_ALIGN_STACK, "call_align_stack", 3, 1, 0, MintOpShortInt)

/* Calls */
OPDEF(MINT_CALL, "call", 4, 1, 1, MintOpMethodToken)
OPDEF(MINT_CALLVIRT_FAST, "callvirt.fast", 5, 1, 1, MintOpMethodToken)
Expand Down
49 changes: 36 additions & 13 deletions src/mono/mono/mini/interp/transform.c
Original file line number Diff line number Diff line change
Expand Up @@ -357,10 +357,11 @@ interp_last_ins (InterpBasicBlock *bb)
return ret; \
} while (0)

// We want to allow any block of stack slots to get moved in order for them to be aligned to MINT_STACK_ALIGNMENT
#define ENSURE_STACK_SIZE(td, size) \
do { \
if ((size) > td->max_stack_size) \
td->max_stack_size = size; \
if ((size) >= td->max_stack_size) \
td->max_stack_size = ALIGN_TO (size + MINT_STACK_ALIGNMENT - MINT_STACK_SLOT_SIZE, MINT_STACK_ALIGNMENT); \
} while (0)

#define ENSURE_I4(td, sp_off) \
Expand Down Expand Up @@ -3238,7 +3239,6 @@ interp_transform_call (TransformData *td, MonoMethod *method, MonoMethod *target
MonoMethodSignature *csignature;
int is_virtual = *td->ip == CEE_CALLVIRT;
int calli = *td->ip == CEE_CALLI || *td->ip == CEE_MONO_CALLI_EXTRA_ARG;
guint32 res_size = 0;
int op = -1;
int native = 0;
int need_null_check = is_virtual;
Expand Down Expand Up @@ -3529,19 +3529,18 @@ interp_transform_call (TransformData *td, MonoMethod *method, MonoMethod *target
MonoClass *klass = mono_class_from_mono_type_internal (csignature->ret);

if (mt == MINT_TYPE_VT) {
guint32 res_size;
if (csignature->pinvoke && !csignature->marshalling_disabled && method->wrapper_type != MONO_WRAPPER_NONE)
res_size = mono_class_native_size (klass, NULL);
else
res_size = mono_class_value_size (klass, NULL);
push_type_vt (td, klass, res_size);
res_size = ALIGN_TO (res_size, MINT_VT_ALIGNMENT);
if (mono_class_has_failure (klass)) {
mono_error_set_for_class_failure (error, klass);
return FALSE;
}
} else {
push_type (td, stack_type[mt], klass);
res_size = MINT_STACK_SLOT_SIZE;
}
dreg = td->sp [-1].local;
} else {
Expand Down Expand Up @@ -3667,7 +3666,23 @@ interp_transform_call (TransformData *td, MonoMethod *method, MonoMethod *target
td->last_ins->flags |= INTERP_INST_FLAG_CALL;
}
td->ip += 5;
td->last_ins->info.call_args = call_args;
if (td->last_ins->flags & INTERP_INST_FLAG_CALL) {
td->last_ins->info.call_args = call_args;
if (!td->optimized) {
int call_dreg = td->last_ins->dreg;
int call_offset = td->locals [call_dreg].stack_offset;
if ((call_offset % MINT_STACK_ALIGNMENT) != 0) {
InterpInst *align_ins = interp_insert_ins_bb (td, td->cbb, interp_prev_ins (td->last_ins), MINT_CALL_ALIGN_STACK);
interp_ins_set_dreg (align_ins, call_dreg);
align_ins->data [0] = params_stack_size;
if (calli) {
// fp_sreg is at the top of the stack, make sure it is not overwritten by MINT_CALL_ALIGN_STACK
int offset = ALIGN_TO (call_offset, MINT_STACK_ALIGNMENT) - call_offset;
td->locals [fp_sreg].stack_offset += offset;
}
}
}
}

return TRUE;
}
Expand Down Expand Up @@ -4100,8 +4115,6 @@ interp_method_compute_offsets (TransformData *td, InterpMethod *imethod, MonoMet
td->locals_capacity = td->locals_size;
offset = 0;

g_assert (MINT_STACK_SLOT_SIZE == MINT_VT_ALIGNMENT);

/*
* We will load arguments as if they are locals. Unlike normal locals, every argument
* is stored in a stackval sized slot and valuetypes have special semantics since we
Expand Down Expand Up @@ -4129,6 +4142,7 @@ interp_method_compute_offsets (TransformData *td, InterpMethod *imethod, MonoMet
offset += MINT_STACK_SLOT_SIZE;
}
}
offset = ALIGN_TO (offset, MINT_STACK_ALIGNMENT);

td->il_locals_offset = offset;
for (int i = 0; i < num_il_locals; ++i) {
Expand Down Expand Up @@ -4156,7 +4170,8 @@ interp_method_compute_offsets (TransformData *td, InterpMethod *imethod, MonoMet
// Every local takes a MINT_STACK_SLOT_SIZE so IL locals have same behavior as execution locals
offset += ALIGN_TO (size, MINT_STACK_SLOT_SIZE);
}
offset = ALIGN_TO (offset, MINT_VT_ALIGNMENT);
offset = ALIGN_TO (offset, MINT_STACK_ALIGNMENT);

td->il_locals_size = offset - td->il_locals_offset;
td->total_locals_size = offset;

Expand Down Expand Up @@ -6285,8 +6300,6 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
MonoClass *field_klass = mono_class_from_mono_type_internal (ftype);
mt = mint_type (ftype);
int field_size = mono_class_value_size (field_klass, NULL);
int obj_size = mono_class_value_size (klass, NULL);
obj_size = ALIGN_TO (obj_size, MINT_VT_ALIGNMENT);

{
if (is_static) {
Expand Down Expand Up @@ -8046,6 +8059,9 @@ compute_native_offset_estimates (TransformData *td)
foreach_local_var (td, ins, NULL, alloc_unopt_global_local);
}
}

if (!td->optimized)
td->total_locals_size = ALIGN_TO (td->total_locals_size, MINT_STACK_ALIGNMENT);
return noe;
}

Expand Down Expand Up @@ -8289,8 +8305,11 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in
// same offset. Use the dreg offset so we don't need to rely on existing call_args.
if (td->optimized)
offset = get_local_offset (td, ins->info.call_args [0]);
else
else if (opcode == MINT_NEWOBJ_ARRAY || opcode == MINT_LDELEMA_TC || opcode == MINT_LDELEMA)
// no alignment required since this is not a real call
offset = get_local_offset (td, ins->dreg);
else
offset = ALIGN_TO (get_local_offset (td, ins->dreg), MINT_STACK_ALIGNMENT);
*ip++ = GINT_TO_UINT16 (offset);
} else {
*ip++ = GINT_TO_UINT16 (get_local_offset (td, ins->sregs [i]));
Expand Down Expand Up @@ -10106,6 +10125,7 @@ initialize_global_vars (TransformData *td)
foreach_local_var (td, ins, (gpointer)(gsize)bb->index, initialize_global_var_cb);
}
}
td->total_locals_size = ALIGN_TO (td->total_locals_size, MINT_STACK_ALIGNMENT);
}

// Data structure used for offset allocation of call args
Expand Down Expand Up @@ -10152,6 +10172,7 @@ get_call_param_size (TransformData *td, InterpInst *call)
call_args++;
var = *call_args;
}
param_size = ALIGN_TO (param_size, MINT_STACK_ALIGNMENT);
return param_size;
}

Expand Down Expand Up @@ -10473,6 +10494,7 @@ interp_alloc_offsets (TransformData *td)
ins_index++;
}
}
final_total_locals_size = ALIGN_TO (final_total_locals_size, MINT_STACK_ALIGNMENT);

// Iterate over all call args locals, update their final offset (aka add td->total_locals_size to them)
// then also update td->total_locals_size to account for this space.
Expand All @@ -10484,7 +10506,7 @@ interp_alloc_offsets (TransformData *td)
final_total_locals_size = MAX (td->locals [i].offset + td->locals [i].size, final_total_locals_size);
}
}
td->total_locals_size = ALIGN_TO (final_total_locals_size, MINT_STACK_SLOT_SIZE);
td->total_locals_size = ALIGN_TO (final_total_locals_size, MINT_STACK_ALIGNMENT);
}

/*
Expand Down Expand Up @@ -10696,6 +10718,7 @@ generate (MonoMethod *method, MonoMethodHeader *header, InterpMethod *rtm, MonoG
// When unoptimized, the param area is stored in the same order, within the IL execution stack.
g_assert (!td->optimized || !td->max_stack_size);
rtm->alloca_size = td->total_locals_size + td->max_stack_size;
g_assert ((rtm->alloca_size % MINT_STACK_ALIGNMENT) == 0);
rtm->locals_size = td->optimized ? td->param_area_offset : td->total_locals_size;
rtm->data_items = (gpointer*)mono_mem_manager_alloc0 (td->mem_manager, td->n_data_items * sizeof (td->data_items [0]));
memcpy (rtm->data_items, td->data_items, td->n_data_items * sizeof (td->data_items [0]));
Expand Down

0 comments on commit 5a17537

Please sign in to comment.