diff --git a/src/mono/mono/mini/interp/interp-internals.h b/src/mono/mono/mini/interp/interp-internals.h index b3af834109f0d..6671c04837b24 100644 --- a/src/mono/mono/mini/interp/interp-internals.h +++ b/src/mono/mono/mini/interp/interp-internals.h @@ -26,8 +26,9 @@ #define TRACING_FLAG 0x1 #define PROFILING_FLAG 0x2 -#define MINT_VT_ALIGNMENT 8 #define MINT_STACK_SLOT_SIZE (sizeof (stackval)) +// This alignment provides us with straight forward support for Vector128 +#define MINT_STACK_ALIGNMENT (2 * MINT_STACK_SLOT_SIZE) #define INTERP_STACK_SIZE (1024*1024) #define INTERP_REDZONE_SIZE (8*1024) diff --git a/src/mono/mono/mini/interp/interp.c b/src/mono/mono/mini/interp/interp.c index 5f58f2148436a..9a6014e9c3ced 100644 --- a/src/mono/mono/mini/interp/interp.c +++ b/src/mono/mono/mini/interp/interp.c @@ -403,11 +403,11 @@ get_context (void) ThreadContext *context = (ThreadContext *) mono_native_tls_get_value (thread_context_id); if (context == NULL) { context = g_new0 (ThreadContext, 1); - context->stack_start = (guchar*)mono_valloc (0, INTERP_STACK_SIZE, MONO_MMAP_READ | MONO_MMAP_WRITE, MONO_MEM_ACCOUNT_INTERP_STACK); + context->stack_start = (guchar*)mono_valloc_aligned (INTERP_STACK_SIZE, MINT_STACK_ALIGNMENT, MONO_MMAP_READ | MONO_MMAP_WRITE, MONO_MEM_ACCOUNT_INTERP_STACK); context->stack_end = context->stack_start + INTERP_STACK_SIZE - INTERP_REDZONE_SIZE; context->stack_real_end = context->stack_start + INTERP_STACK_SIZE; /* We reserve a stack slot at the top of the interp stack to make temp objects visible to GC */ - context->stack_pointer = context->stack_start + MINT_STACK_SLOT_SIZE; + context->stack_pointer = context->stack_start + MINT_STACK_ALIGNMENT; frame_data_allocator_init (&context->data_stack, 8192); /* Make sure all data is initialized before publishing the context */ @@ -2226,6 +2226,7 @@ interp_entry (InterpEntryData *data) sp_args = STACK_ADD_BYTES (sp_args, size); } } + sp_args = (stackval*)ALIGN_TO (sp_args, MINT_STACK_ALIGNMENT); InterpFrame frame = {0}; frame.imethod = data->rmethod; @@ -2600,7 +2601,7 @@ init_jit_call_info (InterpMethod *rmethod, MonoError *error) * that could end up doing a jit call. */ gint32 size = mono_class_value_size (klass, NULL); - cinfo->res_size = ALIGN_TO (size, MINT_VT_ALIGNMENT); + cinfo->res_size = ALIGN_TO (size, MINT_STACK_SLOT_SIZE); } else { cinfo->res_size = MINT_STACK_SLOT_SIZE; } @@ -3103,6 +3104,7 @@ interp_entry_from_trampoline (gpointer ccontext_untyped, gpointer rmethod_untype } newsp = STACK_ADD_BYTES (newsp, size); } + newsp = (stackval*)ALIGN_TO (newsp, MINT_STACK_ALIGNMENT); context->stack_pointer = (guchar*)newsp; g_assert (context->stack_pointer < context->stack_end); @@ -3965,6 +3967,15 @@ mono_interp_exec_method (InterpFrame *frame, ThreadContext *context, FrameClause ip = frame->imethod->code; MINT_IN_BREAK; } + MINT_IN_CASE(MINT_CALL_ALIGN_STACK) { + int call_offset = ip [1]; + int aligned_call_offset = call_offset + MINT_STACK_SLOT_SIZE; + int params_stack_size = ip [2]; + + memmove (locals + aligned_call_offset, locals + call_offset, params_stack_size); + ip += 3; + MINT_IN_BREAK; + } MINT_IN_CASE(MINT_CALL_DELEGATE) { // FIXME We don't need to encode the whole signature, just param_count MonoMethodSignature *csignature = (MonoMethodSignature*)frame->imethod->data_items [ip [4]]; @@ -4215,6 +4226,8 @@ mono_interp_exec_method (InterpFrame *frame, ThreadContext *context, FrameClause } context->stack_pointer = (guchar*)frame->stack + cmethod->alloca_size; + g_assert_checked (((gsize)context->stack_pointer % MINT_STACK_ALIGNMENT) == 0); + if (G_UNLIKELY (context->stack_pointer >= context->stack_end)) { context->stack_end = context->stack_real_end; THROW_EX (mono_domain_get ()->stack_overflow_ex, ip); @@ -5592,10 +5605,12 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK; cmethod = (InterpMethod*)frame->imethod->data_items [ip [2]]; return_offset = ip [1]; call_args_offset = ip [1]; + int aligned_call_args_offset = ALIGN_TO (call_args_offset, MINT_STACK_ALIGNMENT); int param_size = ip [3]; if (param_size) - memmove (locals + call_args_offset + MINT_STACK_SLOT_SIZE, locals + call_args_offset, param_size); + memmove (locals + aligned_call_args_offset + MINT_STACK_SLOT_SIZE, locals + call_args_offset, param_size); + call_args_offset = aligned_call_args_offset; LOCAL_VAR (call_args_offset, gpointer) = NULL; ip += 4; goto call; @@ -5711,19 +5726,21 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK; gboolean is_vt = ret_size != 0; if (!is_vt) ret_size = MINT_STACK_SLOT_SIZE; + return_offset = call_args_offset; cmethod = (InterpMethod*)frame->imethod->data_items [ip [2]]; MonoClass *newobj_class = cmethod->method->klass; + call_args_offset = ALIGN_TO (call_args_offset + ret_size, MINT_STACK_ALIGNMENT); // We allocate space on the stack for return value and for this pointer, that is passed to ctor + // Here we use return_offset as meaning original call_args_offset if (param_size) - memmove (locals + call_args_offset + ret_size + MINT_STACK_SLOT_SIZE, locals + call_args_offset, param_size); + memmove (locals + call_args_offset + MINT_STACK_SLOT_SIZE, locals + return_offset, param_size); if (is_vt) { - this_ptr = locals + call_args_offset; + this_ptr = locals + return_offset; memset (this_ptr, 0, ret_size); - call_args_offset += ret_size; } else { // FIXME push/pop LMF MonoVTable *vtable = mono_class_vtable_checked (newobj_class, error); @@ -5735,11 +5752,9 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK; error_init_reuse (error); this_ptr = mono_object_new_checked (newobj_class, error); mono_interp_error_cleanup (error); // FIXME: do not swallow the error - LOCAL_VAR (call_args_offset, gpointer) = this_ptr; // return value - call_args_offset += MINT_STACK_SLOT_SIZE; + LOCAL_VAR (return_offset, gpointer) = this_ptr; // return value } LOCAL_VAR (call_args_offset, gpointer) = this_ptr; - return_offset = call_args_offset; // unused, prevent warning ip += 5; goto call; } @@ -7340,7 +7355,8 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK; int len = LOCAL_VAR (ip [2], gint32); gpointer mem; if (len > 0) { - mem = frame_data_allocator_alloc (&context->data_stack, frame, ALIGN_TO (len, MINT_VT_ALIGNMENT)); + // We align len to 8 so we can safely load all primitive types on all platforms + mem = frame_data_allocator_alloc (&context->data_stack, frame, ALIGN_TO (len, sizeof (gint64))); if (frame->imethod->init_locals) memset (mem, 0, len); @@ -7940,6 +7956,7 @@ interp_run_clause_with_il_state (gpointer il_state_ptr, int clause_index, MonoOb } findex ++; } + sp_args = (stackval*)ALIGN_TO (sp_args, MINT_STACK_ALIGNMENT); /* Allocate frame */ InterpFrame frame = {0}; diff --git a/src/mono/mono/mini/interp/jiterpreter.c b/src/mono/mono/mini/interp/jiterpreter.c index 759235247fedd..971e6e8d49a51 100644 --- a/src/mono/mono/mini/interp/jiterpreter.c +++ b/src/mono/mono/mini/interp/jiterpreter.c @@ -346,7 +346,7 @@ mono_jiterp_localloc (gpointer *destination, gint32 len, InterpFrame *frame) ThreadContext *context = mono_jiterp_get_context(); gpointer mem; if (len > 0) { - mem = mono_jiterp_frame_data_allocator_alloc (&context->data_stack, frame, ALIGN_TO (len, MINT_VT_ALIGNMENT)); + mem = mono_jiterp_frame_data_allocator_alloc (&context->data_stack, frame, ALIGN_TO (len, sizeof (gint64))); if (frame->imethod->init_locals) memset (mem, 0, len); diff --git a/src/mono/mono/mini/interp/mintops.def b/src/mono/mono/mini/interp/mintops.def index 444ceb417b59d..0fc232f837b32 100644 --- a/src/mono/mono/mini/interp/mintops.def +++ b/src/mono/mono/mini/interp/mintops.def @@ -682,6 +682,8 @@ OPDEF(MINT_STRLEN, "strlen", 3, 1, 1, MintOpNoArgs) OPDEF(MINT_ARRAY_RANK, "array_rank", 3, 1, 1, MintOpNoArgs) OPDEF(MINT_ARRAY_ELEMENT_SIZE, "array_element_size", 3, 1, 1, MintOpNoArgs) +OPDEF(MINT_CALL_ALIGN_STACK, "call_align_stack", 3, 1, 0, MintOpShortInt) + /* Calls */ OPDEF(MINT_CALL, "call", 4, 1, 1, MintOpMethodToken) OPDEF(MINT_CALLVIRT_FAST, "callvirt.fast", 5, 1, 1, MintOpMethodToken) diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index 432a23c390966..4cdcd53efd979 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -357,10 +357,11 @@ interp_last_ins (InterpBasicBlock *bb) return ret; \ } while (0) +// We want to allow any block of stack slots to get moved in order for them to be aligned to MINT_STACK_ALIGNMENT #define ENSURE_STACK_SIZE(td, size) \ do { \ - if ((size) > td->max_stack_size) \ - td->max_stack_size = size; \ + if ((size) >= td->max_stack_size) \ + td->max_stack_size = ALIGN_TO (size + MINT_STACK_ALIGNMENT - MINT_STACK_SLOT_SIZE, MINT_STACK_ALIGNMENT); \ } while (0) #define ENSURE_I4(td, sp_off) \ @@ -3238,7 +3239,6 @@ interp_transform_call (TransformData *td, MonoMethod *method, MonoMethod *target MonoMethodSignature *csignature; int is_virtual = *td->ip == CEE_CALLVIRT; int calli = *td->ip == CEE_CALLI || *td->ip == CEE_MONO_CALLI_EXTRA_ARG; - guint32 res_size = 0; int op = -1; int native = 0; int need_null_check = is_virtual; @@ -3529,19 +3529,18 @@ interp_transform_call (TransformData *td, MonoMethod *method, MonoMethod *target MonoClass *klass = mono_class_from_mono_type_internal (csignature->ret); if (mt == MINT_TYPE_VT) { + guint32 res_size; if (csignature->pinvoke && !csignature->marshalling_disabled && method->wrapper_type != MONO_WRAPPER_NONE) res_size = mono_class_native_size (klass, NULL); else res_size = mono_class_value_size (klass, NULL); push_type_vt (td, klass, res_size); - res_size = ALIGN_TO (res_size, MINT_VT_ALIGNMENT); if (mono_class_has_failure (klass)) { mono_error_set_for_class_failure (error, klass); return FALSE; } } else { push_type (td, stack_type[mt], klass); - res_size = MINT_STACK_SLOT_SIZE; } dreg = td->sp [-1].local; } else { @@ -3667,7 +3666,23 @@ interp_transform_call (TransformData *td, MonoMethod *method, MonoMethod *target td->last_ins->flags |= INTERP_INST_FLAG_CALL; } td->ip += 5; - td->last_ins->info.call_args = call_args; + if (td->last_ins->flags & INTERP_INST_FLAG_CALL) { + td->last_ins->info.call_args = call_args; + if (!td->optimized) { + int call_dreg = td->last_ins->dreg; + int call_offset = td->locals [call_dreg].stack_offset; + if ((call_offset % MINT_STACK_ALIGNMENT) != 0) { + InterpInst *align_ins = interp_insert_ins_bb (td, td->cbb, interp_prev_ins (td->last_ins), MINT_CALL_ALIGN_STACK); + interp_ins_set_dreg (align_ins, call_dreg); + align_ins->data [0] = params_stack_size; + if (calli) { + // fp_sreg is at the top of the stack, make sure it is not overwritten by MINT_CALL_ALIGN_STACK + int offset = ALIGN_TO (call_offset, MINT_STACK_ALIGNMENT) - call_offset; + td->locals [fp_sreg].stack_offset += offset; + } + } + } + } return TRUE; } @@ -4100,8 +4115,6 @@ interp_method_compute_offsets (TransformData *td, InterpMethod *imethod, MonoMet td->locals_capacity = td->locals_size; offset = 0; - g_assert (MINT_STACK_SLOT_SIZE == MINT_VT_ALIGNMENT); - /* * We will load arguments as if they are locals. Unlike normal locals, every argument * is stored in a stackval sized slot and valuetypes have special semantics since we @@ -4129,6 +4142,7 @@ interp_method_compute_offsets (TransformData *td, InterpMethod *imethod, MonoMet offset += MINT_STACK_SLOT_SIZE; } } + offset = ALIGN_TO (offset, MINT_STACK_ALIGNMENT); td->il_locals_offset = offset; for (int i = 0; i < num_il_locals; ++i) { @@ -4156,7 +4170,8 @@ interp_method_compute_offsets (TransformData *td, InterpMethod *imethod, MonoMet // Every local takes a MINT_STACK_SLOT_SIZE so IL locals have same behavior as execution locals offset += ALIGN_TO (size, MINT_STACK_SLOT_SIZE); } - offset = ALIGN_TO (offset, MINT_VT_ALIGNMENT); + offset = ALIGN_TO (offset, MINT_STACK_ALIGNMENT); + td->il_locals_size = offset - td->il_locals_offset; td->total_locals_size = offset; @@ -6285,8 +6300,6 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header, MonoClass *field_klass = mono_class_from_mono_type_internal (ftype); mt = mint_type (ftype); int field_size = mono_class_value_size (field_klass, NULL); - int obj_size = mono_class_value_size (klass, NULL); - obj_size = ALIGN_TO (obj_size, MINT_VT_ALIGNMENT); { if (is_static) { @@ -8046,6 +8059,9 @@ compute_native_offset_estimates (TransformData *td) foreach_local_var (td, ins, NULL, alloc_unopt_global_local); } } + + if (!td->optimized) + td->total_locals_size = ALIGN_TO (td->total_locals_size, MINT_STACK_ALIGNMENT); return noe; } @@ -8289,8 +8305,11 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in // same offset. Use the dreg offset so we don't need to rely on existing call_args. if (td->optimized) offset = get_local_offset (td, ins->info.call_args [0]); - else + else if (opcode == MINT_NEWOBJ_ARRAY || opcode == MINT_LDELEMA_TC || opcode == MINT_LDELEMA) + // no alignment required since this is not a real call offset = get_local_offset (td, ins->dreg); + else + offset = ALIGN_TO (get_local_offset (td, ins->dreg), MINT_STACK_ALIGNMENT); *ip++ = GINT_TO_UINT16 (offset); } else { *ip++ = GINT_TO_UINT16 (get_local_offset (td, ins->sregs [i])); @@ -10106,6 +10125,7 @@ initialize_global_vars (TransformData *td) foreach_local_var (td, ins, (gpointer)(gsize)bb->index, initialize_global_var_cb); } } + td->total_locals_size = ALIGN_TO (td->total_locals_size, MINT_STACK_ALIGNMENT); } // Data structure used for offset allocation of call args @@ -10152,6 +10172,7 @@ get_call_param_size (TransformData *td, InterpInst *call) call_args++; var = *call_args; } + param_size = ALIGN_TO (param_size, MINT_STACK_ALIGNMENT); return param_size; } @@ -10473,6 +10494,7 @@ interp_alloc_offsets (TransformData *td) ins_index++; } } + final_total_locals_size = ALIGN_TO (final_total_locals_size, MINT_STACK_ALIGNMENT); // Iterate over all call args locals, update their final offset (aka add td->total_locals_size to them) // then also update td->total_locals_size to account for this space. @@ -10484,7 +10506,7 @@ interp_alloc_offsets (TransformData *td) final_total_locals_size = MAX (td->locals [i].offset + td->locals [i].size, final_total_locals_size); } } - td->total_locals_size = ALIGN_TO (final_total_locals_size, MINT_STACK_SLOT_SIZE); + td->total_locals_size = ALIGN_TO (final_total_locals_size, MINT_STACK_ALIGNMENT); } /* @@ -10696,6 +10718,7 @@ generate (MonoMethod *method, MonoMethodHeader *header, InterpMethod *rtm, MonoG // When unoptimized, the param area is stored in the same order, within the IL execution stack. g_assert (!td->optimized || !td->max_stack_size); rtm->alloca_size = td->total_locals_size + td->max_stack_size; + g_assert ((rtm->alloca_size % MINT_STACK_ALIGNMENT) == 0); rtm->locals_size = td->optimized ? td->param_area_offset : td->total_locals_size; rtm->data_items = (gpointer*)mono_mem_manager_alloc0 (td->mem_manager, td->n_data_items * sizeof (td->data_items [0])); memcpy (rtm->data_items, td->data_items, td->n_data_items * sizeof (td->data_items [0]));