Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[mono][jit] Transition the x86 backend to use SSE for fp arithmetic. #65723

Merged
merged 10 commits into from
Aug 8, 2022
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,6 @@ public static void op_Increment(float value)
[InlineData(0.0f, 3.14f)]
[InlineData(4567.0f, -3.14f)]
[InlineData(4567.89101f, -3.14569f)]
[ActiveIssue("https://github.com/dotnet/runtime/issues/65557", typeof(PlatformDetection), nameof(PlatformDetection.IsAndroid), nameof(PlatformDetection.Is32BitProcess))]
public static void op_Addition(float left, float right)
{
NFloat result = new NFloat(left) + new NFloat(right);
Expand All @@ -253,7 +252,6 @@ public static void op_Addition(float left, float right)
[InlineData(0.0f, 3.14f)]
[InlineData(4567.0f, -3.14f)]
[InlineData(4567.89101f, -3.14569f)]
[ActiveIssue("https://github.com/dotnet/runtime/issues/65557", typeof(PlatformDetection), nameof(PlatformDetection.IsAndroid), nameof(PlatformDetection.Is32BitProcess))]
public static void op_Subtraction(float left, float right)
{
NFloat result = new NFloat(left) - new NFloat(right);
Expand All @@ -274,7 +272,6 @@ public static void op_Subtraction(float left, float right)
[InlineData(0.0f, 3.14f)]
[InlineData(4567.0f, -3.14f)]
[InlineData(4567.89101f, -3.14569f)]
[ActiveIssue("https://github.com/dotnet/runtime/issues/65557", typeof(PlatformDetection), nameof(PlatformDetection.IsAndroid), nameof(PlatformDetection.Is32BitProcess))]
public static void op_Multiply(float left, float right)
{
NFloat result = new NFloat(left) * new NFloat(right);
Expand All @@ -295,7 +292,6 @@ public static void op_Multiply(float left, float right)
[InlineData(0.0f, 3.14f)]
[InlineData(4567.0f, -3.14f)]
[InlineData(4567.89101f, -3.14569f)]
[ActiveIssue("https://github.com/dotnet/runtime/issues/65557", typeof(PlatformDetection), nameof(PlatformDetection.IsAndroid), nameof(PlatformDetection.Is32BitProcess))]
public static void op_Division(float left, float right)
{
NFloat result = new NFloat(left) / new NFloat(right);
Expand Down
509 changes: 500 additions & 9 deletions src/mono/mono/arch/x86/x86-codegen.h

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion src/mono/mono/mini/aot-compiler.c
Original file line number Diff line number Diff line change
Expand Up @@ -12638,7 +12638,8 @@ compile_asm (MonoAotCompile *acfg)
#define LD_NAME "clang"
#define LD_OPTIONS "-m32 -dynamiclib"
#elif defined(TARGET_X86) && !defined(TARGET_MACH)
#define LD_OPTIONS "-m elf_i386 -Bsymbolic"
#define LD_NAME "ld"
#define LD_OPTIONS "--shared -m elf_i386"
#elif defined(TARGET_ARM) && !defined(TARGET_ANDROID)
#define LD_NAME "gcc"
#define LD_OPTIONS "--shared -Wl,-Bsymbolic"
Expand Down
77 changes: 56 additions & 21 deletions src/mono/mono/mini/cpu-x86.mdesc
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ endfinally: len:16
endfilter: src1:a len:16
get_ex_obj: dest:a len:16

ckfinite: dest:f src1:f len:32
ckfinite: dest:f src1:f len:40
ceq: dest:y len:6
cgt: dest:y len:6
cgt_un: dest:y len:6
Expand All @@ -153,14 +153,18 @@ localloc: dest:i src1:i len:120
compare: src1:i src2:i len:2
compare_imm: src1:i len:6
fcompare: src1:f src2:f clob:a len:9
rcompare: src1:f src2:f clob:a len:13
arglist: src1:b len:10
check_this: src1:b len:3
voidcall: len:17 clob:c
voidcall_reg: src1:i len:11 clob:c
voidcall_membase: src1:b len:16 clob:c
fcall: dest:f len:17 clob:c
fcall_reg: dest:f src1:i len:11 clob:c
fcall_membase: dest:f src1:b len:16 clob:c
fcall: dest:f len:28 clob:c
fcall_reg: dest:f src1:i len:28 clob:c
fcall_membase: dest:f src1:b len:28 clob:c
rcall: dest:f len:28 clob:c
rcall_reg: dest:f src1:i len:28 clob:c
rcall_membase: dest:f src1:b len:28 clob:c
lcall: dest:l len:17 clob:c
lcall_reg: dest:l src1:i len:11 clob:c
lcall_membase: dest:l src1:b len:16 clob:c
Expand All @@ -170,8 +174,8 @@ vcall_membase: src1:b len:16 clob:c
call_reg: dest:a src1:i len:11 clob:c
call_membase: dest:a src1:b len:16 clob:c
iconst: dest:i len:5
r4const: dest:f len:15
r8const: dest:f len:16
r4const: dest:f len:24
r8const: dest:f len:24
store_membase_imm: dest:b len:11
store_membase_reg: dest:b src1:i len:7
storei1_membase_imm: dest:b len:10
Expand All @@ -182,8 +186,8 @@ storei4_membase_imm: dest:b len:10
storei4_membase_reg: dest:b src1:i len:7
storei8_membase_imm: dest:b
storei8_membase_reg: dest:b src1:i
storer4_membase_reg: dest:b src1:f len:7
storer8_membase_reg: dest:b src1:f len:7
storer4_membase_reg: dest:b src1:f len:9
storer8_membase_reg: dest:b src1:f len:9
load_membase: dest:i src1:b len:7
loadi1_membase: dest:y src1:b len:7
loadu1_membase: dest:y src1:b len:7
Expand All @@ -192,8 +196,8 @@ loadu2_membase: dest:i src1:b len:7
loadi4_membase: dest:i src1:b len:7
loadu4_membase: dest:i src1:b len:7
loadi8_membase: dest:i src1:b
loadr4_membase: dest:f src1:b len:7
loadr8_membase: dest:f src1:b len:7
loadr4_membase: dest:f src1:b len:9
loadr8_membase: dest:f src1:b len:9
loadu4_mem: dest:i len:9
move: dest:i src1:i len:2
addcc_imm: dest:i src1:i len:6 clob:1
Expand Down Expand Up @@ -237,25 +241,26 @@ float_bge: len:22
float_bge_un: len:12
float_ble: len:22
float_ble_un: len:12
float_add: dest:f src1:f src2:f len:2
float_sub: dest:f src1:f src2:f len:2
float_mul: dest:f src1:f src2:f len:2
float_div: dest:f src1:f src2:f len:2
float_div_un: dest:f src1:f src2:f len:2
float_add: dest:f src1:f src2:f len:8
float_sub: dest:f src1:f src2:f len:8
float_mul: dest:f src1:f src2:f len:8
float_div: dest:f src1:f src2:f len:8
float_div_un: dest:f src1:f src2:f len:8
float_rem: dest:f src1:f src2:f len:17
float_rem_un: dest:f src1:f src2:f len:17
float_neg: dest:f src1:f len:2
float_neg: dest:f src1:f len:24
float_not: dest:f src1:f len:2
float_conv_to_i1: dest:y src1:f len:39
float_conv_to_i2: dest:y src1:f len:39
float_conv_to_i4: dest:i src1:f len:39
float_conv_to_i8: dest:L src1:f len:39
float_conv_to_i8: dest:L src1:f len:50
float_conv_to_u4: dest:i src1:f len:39
float_conv_to_u8: dest:L src1:f len:39
float_conv_to_u2: dest:y src1:f len:39
float_conv_to_u1: dest:y src1:f len:39
float_conv_to_ovf_i: dest:a src1:f len:30
float_conv_to_ovd_u: dest:a src1:f len:30
float_conv_to_r4: dest:f src1:f len:17
float_mul_ovf:
float_ceq: dest:y src1:f src2:f len:25
float_cgt: dest:y src1:f src2:f len:25
Expand Down Expand Up @@ -312,7 +317,7 @@ sbb_imm: dest:i src1:i len:6 clob:1
br_reg: src1:i len:2
sin: dest:f src1:f len:6
cos: dest:f src1:f len:6
abs: dest:f src1:f len:2
abs: dest:f src1:f clob:1 len:16
tan: dest:f src1:f len:49
atan: dest:f src1:f len:8
sqrt: dest:f src1:f len:2
Expand Down Expand Up @@ -423,11 +428,12 @@ cmov_ile_un: dest:i src1:i src2:i len:16 clob:1
cmov_ilt_un: dest:i src1:i src2:i len:16 clob:1

long_conv_to_ovf_i4_2: dest:i src1:i src2:i len:30
long_conv_to_r8_2: dest:f src1:i src2:i len:14
long_conv_to_r4_2: dest:f src1:i src2:i len:14
long_conv_to_r8_2: dest:f src1:i src2:i len:24
long_conv_to_r4_2: dest:f src1:i src2:i len:24
long_conv_to_r_un_2: dest:f src1:i src2:i len:40

fmove: dest:f src1:f
fmove: dest:f src1:f len:4
rmove: dest:f src1:f len:4
move_f_to_i4: dest:i src1:f len:17
move_i4_to_f: dest:f src1:i len:17
float_conv_to_r4: dest:f src1:f len:12
Expand Down Expand Up @@ -671,3 +677,32 @@ set_sp: src1:i len:6
fill_prof_call_ctx: src1:i len:128

get_last_error: dest:i len:32

x86_move_r8_to_fpstack: src1:f len:16
x86_move_r4_to_fpstack: src1:f len:16
iconv_to_r4_raw: dest:f src1:i len:10

# R4 opcodes
r4_conv_to_i1: dest:y src1:f len:32
r4_conv_to_u1: dest:y src1:f len:32
r4_conv_to_i2: dest:y src1:f len:32
r4_conv_to_u2: dest:y src1:f len:32
r4_conv_to_i4: dest:i src1:f len:16
r4_conv_to_u4: dest:i src1:f len:32
r4_conv_to_i8: dest:L src1:f len:64
r4_conv_to_i: dest:i src1:f len:32
r4_conv_to_r8: dest:f src1:f len:17
r4_conv_to_r4: dest:f src1:f len:17
r4_add: dest:f src1:f src2:f clob:1 len:5
r4_sub: dest:f src1:f src2:f clob:1 len:5
r4_mul: dest:f src1:f src2:f clob:1 len:5
r4_div: dest:f src1:f src2:f clob:1 len:5
r4_neg: dest:f src1:f clob:1 len:23
r4_ceq: dest:y src1:f src2:f len:35
r4_cgt: dest:y src1:f src2:f len:35
r4_cgt_un: dest:y src1:f src2:f len:48
r4_clt: dest:y src1:f src2:f len:35
r4_clt_un: dest:y src1:f src2:f len:42
r4_cneq: dest:y src1:f src2:f len:42
r4_cge: dest:y src1:f src2:f len:35
r4_cle: dest:y src1:f src2:f len:35
17 changes: 2 additions & 15 deletions src/mono/mono/mini/local-propagation.c
Original file line number Diff line number Diff line change
Expand Up @@ -623,7 +623,6 @@ mono_local_cprop (MonoCompile *cfg)
/* This avoids propagating local vregs across calls */
((get_vreg_to_inst (cfg, def->sreg1) || !defs [def->sreg1] || (def_index [def->sreg1] >= last_call_index) || (def->opcode == OP_VMOVE))) &&
!(defs [def->sreg1] && mono_inst_next (defs [def->sreg1], filter) == def) &&
(!MONO_ARCH_USE_FPSTACK || (def->opcode != OP_FMOVE)) &&
(def->opcode != OP_FMOVE)) {
int vreg = def->sreg1;

Expand All @@ -640,7 +639,7 @@ mono_local_cprop (MonoCompile *cfg)
/* is_inst_imm is only needed for binops */
if ((((def->opcode == OP_ICONST) || ((sizeof (gpointer) == 8) && (def->opcode == OP_I8CONST)) || (def->opcode == OP_PCONST)))
||
(!MONO_ARCH_USE_FPSTACK && (def->opcode == OP_R8CONST))) {
(def->opcode == OP_R8CONST)) {
guint32 opcode2;

/* srcindex == 1 -> binop, ins->sreg2 == -1 -> unop */
Expand Down Expand Up @@ -815,17 +814,6 @@ mono_local_cprop (MonoCompile *cfg)
}
}

static gboolean
reg_is_softreg_no_fpstack (int reg, const char spec)
{
return (spec == 'i' && reg >= MONO_MAX_IREGS)
|| ((spec == 'f' && reg >= MONO_MAX_FREGS) && !MONO_ARCH_USE_FPSTACK)
#ifdef MONO_ARCH_SIMD_INTRINSICS
|| (spec == 'x' && reg >= MONO_MAX_XREGS)
#endif
|| (spec == 'v');
}

static gboolean
reg_is_softreg (int reg, const char spec)
{
Expand Down Expand Up @@ -953,8 +941,7 @@ mono_local_deadce (MonoCompile *cfg)
}
}

/* Enabling this on x86 could screw up the fp stack */
if (reg_is_softreg_no_fpstack (ins->dreg, spec [MONO_INST_DEST])) {
if (reg_is_softreg (ins->dreg, spec [MONO_INST_DEST])) {
/*
* Assignments to global vregs can only be eliminated if there is another
* assignment to the same vreg later in the same bblock.
Expand Down
10 changes: 2 additions & 8 deletions src/mono/mono/mini/method-to-ir.c
Original file line number Diff line number Diff line change
Expand Up @@ -7181,12 +7181,6 @@ mono_method_to_ir (MonoCompile *cfg, MonoMethod *method, MonoBasicBlock *start_b
}
case MONO_CEE_POP:
--sp;

#ifdef TARGET_X86
if (sp [0]->type == STACK_R8)
/* we need to pop the value from the x86 FP stack */
MONO_EMIT_NEW_UNALU (cfg, OP_X86_FPOP, -1, sp [0]->dreg);
#endif
break;
case MONO_CEE_JMP: {
MonoCallInst *call;
Expand Down Expand Up @@ -13057,7 +13051,7 @@ mono_spill_global_vars (MonoCompile *cfg, gboolean *need_local_opts)
* sregs could use it. So set a flag, and do it after
* the sregs.
*/
if ((!cfg->backend->use_fpstack || ((store_opcode != OP_STORER8_MEMBASE_REG) && (store_opcode != OP_STORER4_MEMBASE_REG))) && !((var)->flags & (MONO_INST_VOLATILE|MONO_INST_INDIRECT)))
if (!((var)->flags & (MONO_INST_VOLATILE|MONO_INST_INDIRECT)))
dest_has_lvreg = TRUE;
}
}
Expand Down Expand Up @@ -13147,7 +13141,7 @@ mono_spill_global_vars (MonoCompile *cfg, gboolean *need_local_opts)

sreg = alloc_dreg (cfg, stacktypes [regtype]);

if ((!cfg->backend->use_fpstack || ((load_opcode != OP_LOADR8_MEMBASE) && (load_opcode != OP_LOADR4_MEMBASE))) && !((var)->flags & (MONO_INST_VOLATILE|MONO_INST_INDIRECT)) && !no_lvreg) {
if (!((var)->flags & (MONO_INST_VOLATILE|MONO_INST_INDIRECT)) && !no_lvreg) {
if (var->dreg == prev_dreg) {
/*
* sreg refers to the value loaded by the load
Expand Down
4 changes: 0 additions & 4 deletions src/mono/mono/mini/mini-amd64.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,6 @@ struct sigcontext {
#define MONO_ARCH_USE_SHARED_FP_SIMD_BANK 1
#endif



#if defined(__APPLE__)
#define MONO_ARCH_SIGNAL_STACK_SIZE MINSIGSTKSZ
#else
Expand Down Expand Up @@ -164,8 +162,6 @@ struct sigcontext {
#define MONO_ARCH_CALLEE_REGS AMD64_CALLEE_REGS
#define MONO_ARCH_CALLEE_SAVED_REGS AMD64_CALLEE_SAVED_REGS

#define MONO_ARCH_USE_FPSTACK FALSE

#define MONO_ARCH_INST_FIXED_REG(desc) ((desc == '\0') ? -1 : ((desc == 'i' ? -1 : ((desc == 'a') ? AMD64_RAX : ((desc == 's') ? AMD64_RCX : ((desc == 'd') ? AMD64_RDX : ((desc == 'A') ? MONO_AMD64_ARG_REG1 : -1)))))))

/* RDX is clobbered by the opcode implementation before accessing sreg2 */
Expand Down
2 changes: 0 additions & 2 deletions src/mono/mono/mini/mini-arm.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,6 @@
#define MONO_ARCH_CALLEE_SAVED_FREGS 0x00000000
#endif

#define MONO_ARCH_USE_FPSTACK FALSE

#define MONO_ARCH_INST_SREG2_MASK(ins) (0)

#define MONO_ARCH_INST_FIXED_REG(desc) \
Expand Down
4 changes: 0 additions & 4 deletions src/mono/mono/mini/mini-arm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,6 @@

#define MONO_ARCH_CALLEE_XREGS MONO_ARCH_CALLEE_FREGS

#define MONO_ARCH_USE_FPSTACK FALSE

#define MONO_ARCH_INST_SREG2_MASK(ins) (0)

#define MONO_ARCH_INST_FIXED_REG(desc) ((desc) == 'a' ? ARMREG_R0 : -1)
Expand All @@ -68,8 +66,6 @@

#define MONO_ARCH_INST_REGPAIR_REG2(desc,hreg1) (-1)

#define MONO_ARCH_USE_FPSTACK FALSE

#define MONO_ARCH_FRAME_ALIGNMENT 16

#define MONO_ARCH_CODE_ALIGNMENT 32
Expand Down
Loading