Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ISimdVector issue (GVM resolution doesn't report [Intrinsic]?) #93174

Open
EgorBo opened this issue Oct 7, 2023 · 3 comments
Open

ISimdVector issue (GVM resolution doesn't report [Intrinsic]?) #93174

EgorBo opened this issue Oct 7, 2023 · 3 comments
Assignees
Labels
area-CodeGen-coreclr CLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMI
Milestone

Comments

@EgorBo
Copy link
Member

EgorBo commented Oct 7, 2023

[MethodImpl(MethodImplOptions.NoInlining)]
private static bool Foo1<TVector>(ref int t1, ref int t2)
    where TVector : struct, ISimdVector<TVector, int>
{
    TVector vec1 = TVector.LoadUnsafe(ref t1);
    TVector vec2 = TVector.LoadUnsafe(ref t2);
    return vec1 == vec2;
}

[MethodImpl(MethodImplOptions.NoInlining)]
private static bool Foo2<TVector>(ref int t1, ref int t2)
    where TVector : struct, ISimdVector<TVector, int>
{
    TVector vec1 = TVector.LoadUnsafe(ref t1);
    TVector vec2 = TVector.LoadUnsafe(ref t2);
    return vec1.Equals(vec2);
}

Current codegen for FooX<Vector128<int>>(...):

; Assembly listing for method Foo1
G_M44695_IG01:  ;; offset=0x0000
       sub      rsp, 72
       vzeroupper 
G_M44695_IG02:  ;; offset=0x0007
       vmovups  xmm0, xmmword ptr [rcx]
       vmovaps  xmmword ptr [rsp+0x30], xmm0
       vmovups  xmm0, xmmword ptr [rdx]
       vmovaps  xmmword ptr [rsp+0x20], xmm0
       mov      rax, qword ptr [rsp+0x30]
       mov      qword ptr [rsp+0x18], rax
       mov      rax, qword ptr [rsp+0x20]
       mov      qword ptr [rsp+0x10], rax
       mov      eax, dword ptr [rsp+0x18]
       cmp      dword ptr [rsp+0x10], eax
       jne      SHORT G_M44695_IG08
G_M44695_IG03:  ;; offset=0x0039
       mov      eax, dword ptr [rsp+0x1C]
       cmp      dword ptr [rsp+0x14], eax
       jne      SHORT G_M44695_IG08
G_M44695_IG04:  ;; offset=0x0043
       mov      rax, qword ptr [rsp+0x38]
       mov      qword ptr [rsp+0x08], rax
       mov      rax, qword ptr [rsp+0x28]
       mov      qword ptr [rsp], rax
       mov      eax, dword ptr [rsp+0x08]
       cmp      dword ptr [rsp], eax
       jne      SHORT G_M44695_IG06
G_M44695_IG05:  ;; offset=0x005F
       mov      eax, dword ptr [rsp+0x0C]
       cmp      dword ptr [rsp+0x04], eax
       jne      SHORT G_M44695_IG06
       mov      eax, 1
       jmp      SHORT G_M44695_IG07
G_M44695_IG06:  ;; offset=0x0070
       xor      eax, eax
G_M44695_IG07:  ;; offset=0x0072
       jmp      SHORT G_M44695_IG09
G_M44695_IG08:  ;; offset=0x0074
       xor      eax, eax
G_M44695_IG09:  ;; offset=0x0076
       add      rsp, 72
       ret      
; Total bytes of code 123


; Assembly listing for method Foo2
G_M34100_IG01:  ;; offset=0x0000
       vzeroupper 
G_M34100_IG02:  ;; offset=0x0003
       vmovups  xmm0, xmmword ptr [rcx]
       vpcmpd   k1, xmm0, xmmword ptr [rdx], 4
       kortestb k1, k1
       sete     al
       movzx    rax, al
G_M34100_IG03:  ;; offset=0x0018
       ret      
; Total bytes of code 25

Both Foo1 and Foo2 are expected to produce the same codegen. It looks like vec1 == vec2; was inlined into slow path of Vector128.op_Equality as if [Intrinsic] didn't exist on it. Equals works fine because it's a non-Intrinsic wrapper around intrinsic ==.

cc @tannergooding @dotnet/jit-contrib

We need to fix this issue before we start adopting ISimdVector

@ghost ghost added the untriaged New issue has not been triaged by the area owner label Oct 7, 2023
@dotnet-issue-labeler dotnet-issue-labeler bot added the area-CodeGen-coreclr CLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMI label Oct 7, 2023
@ghost
Copy link

ghost commented Oct 7, 2023

Tagging subscribers to this area: @JulieLeeMSFT, @jakobbotsch
See info in area-owners.md if you want to be subscribed.

Issue Details
[MethodImpl(MethodImplOptions.NoInlining)]
private static bool Foo1<TVector>(ref int t1, ref int t2)
    where TVector : struct, ISimdVector<TVector, int>
{
    TVector vec1 = TVector.LoadUnsafe(ref t1);
    TVector vec2 = TVector.LoadUnsafe(ref t2);
    return vec1 == vec2;
}

[MethodImpl(MethodImplOptions.NoInlining)]
private static bool Foo2<TVector>(ref int t1, ref int t2)
    where TVector : struct, ISimdVector<TVector, int>
{
    TVector vec1 = TVector.LoadUnsafe(ref t1);
    TVector vec2 = TVector.LoadUnsafe(ref t2);
    return vec1.Equals(vec2);
}

Current codegen for FooX<Vector128<int>>(...):

; Assembly listing for method Foo1
G_M44695_IG01:  ;; offset=0x0000
       sub      rsp, 72
       vzeroupper 
G_M44695_IG02:  ;; offset=0x0007
       vmovups  xmm0, xmmword ptr [rcx]
       vmovaps  xmmword ptr [rsp+0x30], xmm0
       vmovups  xmm0, xmmword ptr [rdx]
       vmovaps  xmmword ptr [rsp+0x20], xmm0
       mov      rax, qword ptr [rsp+0x30]
       mov      qword ptr [rsp+0x18], rax
       mov      rax, qword ptr [rsp+0x20]
       mov      qword ptr [rsp+0x10], rax
       mov      eax, dword ptr [rsp+0x18]
       cmp      dword ptr [rsp+0x10], eax
       jne      SHORT G_M44695_IG08
G_M44695_IG03:  ;; offset=0x0039
       mov      eax, dword ptr [rsp+0x1C]
       cmp      dword ptr [rsp+0x14], eax
       jne      SHORT G_M44695_IG08
G_M44695_IG04:  ;; offset=0x0043
       mov      rax, qword ptr [rsp+0x38]
       mov      qword ptr [rsp+0x08], rax
       mov      rax, qword ptr [rsp+0x28]
       mov      qword ptr [rsp], rax
       mov      eax, dword ptr [rsp+0x08]
       cmp      dword ptr [rsp], eax
       jne      SHORT G_M44695_IG06
G_M44695_IG05:  ;; offset=0x005F
       mov      eax, dword ptr [rsp+0x0C]
       cmp      dword ptr [rsp+0x04], eax
       jne      SHORT G_M44695_IG06
       mov      eax, 1
       jmp      SHORT G_M44695_IG07
G_M44695_IG06:  ;; offset=0x0070
       xor      eax, eax
G_M44695_IG07:  ;; offset=0x0072
       jmp      SHORT G_M44695_IG09
G_M44695_IG08:  ;; offset=0x0074
       xor      eax, eax
G_M44695_IG09:  ;; offset=0x0076
       add      rsp, 72
       ret      
; Total bytes of code 123


; Assembly listing for method Foo2
G_M34100_IG01:  ;; offset=0x0000
       vzeroupper 
G_M34100_IG02:  ;; offset=0x0003
       vmovups  xmm0, xmmword ptr [rcx]
       vpcmpd   k1, xmm0, xmmword ptr [rdx], 4
       kortestb k1, k1
       sete     al
       movzx    rax, al
G_M34100_IG03:  ;; offset=0x0018
       ret      
; Total bytes of code 25

Both Foo1 and Foo2 are expected to produce the same codegen. It looks like vec1 == vec2; was inlined into slow path of Vector128.op_Equality as if [Intrinsic] didn't exist on it. Equals works fine because it's a non-Intrinsic wrapper around intrinsic ==.

cc @tannergooding @dotnet/jit-contrib

We need to fix this issue before we start adopting ISimdVector

Author: EgorBo
Assignees: -
Labels:

area-CodeGen-coreclr, untriaged

Milestone: -

@EgorBo EgorBo added this to the 9.0.0 milestone Oct 7, 2023
@EgorBo EgorBo removed the untriaged New issue has not been triaged by the area owner label Oct 7, 2023
@ghost ghost added the in-pr There is an active PR which will close this issue when it is merged label Oct 7, 2023
@ghost ghost removed the in-pr There is an active PR which will close this issue when it is merged label Feb 4, 2024
@JulieLeeMSFT JulieLeeMSFT assigned EgorBo and unassigned BruceForstall May 7, 2024
@EgorBo EgorBo modified the milestones: 9.0.0, 10.0.0 Jul 26, 2024
@EgorBo
Copy link
Member Author

EgorBo commented Jul 26, 2024

Just an optimization for an internal feature - moving to 10.0

@tannergooding
Copy link
Member

This should be improved by #104983, which ensures that even explicitly implemented APIs can get treated as intrinsic.

I've also marked that PR for .NET 10, however, as its an optimization for an internal feature as you indicated

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
area-CodeGen-coreclr CLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMI
Projects
None yet
Development

Successfully merging a pull request may close this issue.

3 participants