Skip to content

Commit

Permalink
[Arm64] Implement MultiplyHigh
Browse files Browse the repository at this point in the history
Closes dotnet/runtime#43106

In addition to implementing the intrinsics I have updated `System.Math:BigMul(long,long,byref):long` implementation in System.Private.CoreLib. The following is the codegen of the methods:
```asm
; Assembly listing for method System.Math:BigMul(long,long,byref):long
; Emitting BLENDED_CODE for generic ARM64 CPU - Windows
; ReadyToRun compilation
; optimized code
; fp based frame
; partially interruptible
; Final local variable assignments
;
;  V00 arg0         [V00,T00] (  4,  4   )    long  ->   x0
;  V01 arg1         [V01,T01] (  4,  4   )    long  ->   x1
;  V02 arg2         [V02,T02] (  3,  3   )   byref  ->   x2
;# V03 OutArgs      [V03    ] (  1,  1   )  lclBlk ( 0) [sp+0x00]   "OutgoingArgSpace"
;
; Lcl frame size = 0

G_M18264_IG01:              ;; offset=0000H
        A9BF7BFD          stp     fp, lr, [sp,#-16]!
        910003FD          mov     fp, sp
						;; bbWeight=1    PerfScore 1.50
G_M18264_IG02:              ;; offset=0008H
        9B017C03          mul     x3, x0, x1
        F9000043          str     x3, [x2]
        9BC17C00          umulh   x0, x0, x1
						;; bbWeight=1    PerfScore 8.00
G_M18264_IG03:              ;; offset=0014H
        A8C17BFD          ldp     fp, lr, [sp],mono#16
        D65F03C0          ret     lr
						;; bbWeight=1    PerfScore 2.00

; Total bytes of code 28, prolog size 8, PerfScore 14.30, instruction count 7, allocated bytes for code 28 (MethodHash=96edb8a7) for method System.Math:BigMul(long,long,byref):long
; ============================================================

; Assembly listing for method System.Math:BigMul(long,long,byref):long
; Emitting BLENDED_CODE for generic ARM64 CPU - Windows
; ReadyToRun compilation
; optimized code
; fp based frame
; partially interruptible
; Final local variable assignments
;
;  V00 arg0         [V00,T00] (  4,  4   )    long  ->   x0
;  V01 arg1         [V01,T01] (  4,  4   )    long  ->   x1
;  V02 arg2         [V02,T02] (  3,  3   )   byref  ->   x2
;* V03 loc0         [V03    ] (  0,  0   )    long  ->  zero-ref
;* V04 loc1         [V04    ] (  0,  0   )    long  ->  zero-ref    ld-addr-op
;# V05 OutArgs      [V05    ] (  1,  1   )  lclBlk ( 0) [sp+0x00]   "OutgoingArgSpace"
;
; Lcl frame size = 0

G_M18264_IG01:              ;; offset=0000H
        A9BF7BFD          stp     fp, lr, [sp,#-16]!
        910003FD          mov     fp, sp
						;; bbWeight=1    PerfScore 1.50
G_M18264_IG02:              ;; offset=0008H
        9B017C03          mul     x3, x0, x1
        F9000043          str     x3, [x2]
        9B417C00          smulh   x0, x0, x1
						;; bbWeight=1    PerfScore 8.00
G_M18264_IG03:              ;; offset=0014H
        A8C17BFD          ldp     fp, lr, [sp],mono#16
        D65F03C0          ret     lr
						;; bbWeight=1    PerfScore 2.00

; Total bytes of code 28, prolog size 8, PerfScore 14.30, instruction count 7, allocated bytes for code 28 (MethodHash=96edb8a7) for method System.Math:BigMul(long,long,byref):long
; ============================================================
```
  • Loading branch information
echesakovMSFT authored and echesakovMSFT committed Jan 27, 2021
1 parent 62d990e commit 211da55
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 0 deletions.
16 changes: 16 additions & 0 deletions mono/mini/mini-llvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -9118,6 +9118,22 @@ process_bb (EmitContext *ctx, MonoBasicBlock *bb)
values [ins->dreg] = LLVMBuildCall (builder, get_intrins (ctx, ins->opcode == OP_LSCNT32 ? INTRINS_CTLZ_I32 : INTRINS_CTLZ_I64), args, 2, "");
break;
}
case OP_ARM64_SMULH:
case OP_ARM64_UMULH: {
LLVMValueRef op1, op2;
if (ins->opcode == OP_ARM64_SMULH) {
op1 = LLVMBuildSExt (builder, lhs, LLVMInt128Type (), "");
op2 = LLVMBuildSExt (builder, rhs, LLVMInt128Type (), "");
} else {
op1 = LLVMBuildZExt (builder, lhs, LLVMInt128Type (), "");
op2 = LLVMBuildZExt (builder, rhs, LLVMInt128Type (), "");
}
LLVMValueRef mul = LLVMBuildMul (builder, op1, op2, "");
LLVMValueRef hi64 = LLVMBuildLShr (builder, mul,
LLVMConstInt (LLVMInt128Type (), 64, FALSE), "");
values [ins->dreg] = LLVMBuildTrunc (builder, hi64, LLVMInt64Type (), "");
break;
}
#endif

case OP_DUMMY_USE:
Expand Down
2 changes: 2 additions & 0 deletions mono/mini/mini-ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -1579,4 +1579,6 @@ MINI_OP(OP_POPCNT64, "popcnt64", LREG, LREG, NONE)
#ifdef TARGET_ARM64
MINI_OP(OP_LSCNT32, "lscnt32", IREG, IREG, NONE)
MINI_OP(OP_LSCNT64, "lscnt64", LREG, LREG, NONE)
MINI_OP(OP_ARM64_SMULH, "arm64_smulh", LREG, LREG, LREG)
MINI_OP(OP_ARM64_UMULH, "arm64_umulh", LREG, LREG, LREG)
#endif // TARGET_ARM64
4 changes: 4 additions & 0 deletions mono/mini/simd-intrinsics-netcore.c
Original file line number Diff line number Diff line change
Expand Up @@ -807,6 +807,7 @@ emit_invalid_operation (MonoCompile *cfg, const char* message)
static SimdIntrinsic armbase_methods [] = {
{SN_LeadingSignCount},
{SN_LeadingZeroCount},
{SN_MultiplyHigh},
{SN_ReverseElementBits},
{SN_get_IsSupported}
};
Expand Down Expand Up @@ -870,6 +871,9 @@ emit_arm64_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignatur
return emit_simd_ins_for_sig (cfg, klass, arg0_i32 ? OP_LZCNT32 : OP_LZCNT64, 0, arg0_type, fsig, args);
case SN_LeadingSignCount:
return emit_simd_ins_for_sig (cfg, klass, arg0_i32 ? OP_LSCNT32 : OP_LSCNT64, 0, arg0_type, fsig, args);
case SN_MultiplyHigh:
return emit_simd_ins_for_sig (cfg, klass,
(arg0_type == MONO_TYPE_I8 ? OP_ARM64_SMULH : OP_ARM64_UMULH), 0, arg0_type, fsig, args);
case SN_ReverseElementBits:
return emit_simd_ins_for_sig (cfg, klass,
(is_64bit ? OP_XOP_I8_I8 : OP_XOP_I4_I4),
Expand Down

0 comments on commit 211da55

Please sign in to comment.