Skip to content

Commit

Permalink
JIT: Handle half accesses for SIMDs in local morph (#89520)
Browse files Browse the repository at this point in the history
While it's not generally expected that halves can be accessed directly (ending
up with LCL_FLD), it sometimes happens in some of the SW implementations of
Vector256/Vector512 methods. In rare cases the JIT still falls back to these
even with there is HW acceleration. In those cases we want to avoid DNER'ing the
involved locals, so expand the existing recognition with these patterns.

Also add a check to the existing SIMD16 -> SIMD12 to verify the source is a
SIMD16.

Fix #85359
Fix #89456

Some size wise regressions are expected, which come down to

- A large number of similar looking tests end up now enregistering some locals
  that cause new upper half saves/restores to be required. This accounts for
  most of the size-wise regressions.
- The expansions often do not result in smaller code because loading/storing the
  halves directly from/to stack is smaller code than the vector equivalent with
  extraction/insertion.

Many of the regressions are in SW implementations of Vector256/Vector512 methods
that we usually do not expect to see called with HW acceleration supported.
  • Loading branch information
jakobbotsch committed Jul 27, 2023
1 parent 7f513e6 commit 87526fb
Show file tree
Hide file tree
Showing 3 changed files with 144 additions and 29 deletions.
129 changes: 100 additions & 29 deletions src/coreclr/jit/lclmorph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -936,18 +936,45 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
var_types elementType = indir->TypeGet();
lclNode = indir->gtGetOp1()->BashToLclVar(m_compiler, lclNum);

if (elementType == TYP_FLOAT)
switch (elementType)
{
GenTree* indexNode = m_compiler->gtNewIconNode(offset / genTypeSize(elementType));
hwiNode = m_compiler->gtNewSimdGetElementNode(elementType, lclNode, indexNode, CORINFO_TYPE_FLOAT,
genTypeSize(varDsc));
}
else
{
assert(elementType == TYP_SIMD12);
assert(genTypeSize(varDsc) == 16);
hwiNode = m_compiler->gtNewSimdHWIntrinsicNode(elementType, lclNode, NI_Vector128_AsVector3,
CORINFO_TYPE_FLOAT, 16);
case TYP_FLOAT:
{
GenTree* indexNode = m_compiler->gtNewIconNode(offset / genTypeSize(elementType));
hwiNode = m_compiler->gtNewSimdGetElementNode(elementType, lclNode, indexNode,
CORINFO_TYPE_FLOAT, genTypeSize(varDsc));
break;
}
case TYP_SIMD12:
{
assert(genTypeSize(varDsc) == 16);
hwiNode = m_compiler->gtNewSimdHWIntrinsicNode(elementType, lclNode, NI_Vector128_AsVector3,
CORINFO_TYPE_FLOAT, 16);
break;
}
case TYP_SIMD8:
#if defined(FEATURE_SIMD) && defined(TARGET_XARCH)
case TYP_SIMD16:
case TYP_SIMD32:
#endif
{
assert(genTypeSize(elementType) * 2 == genTypeSize(varDsc));
if (offset == 0)
{
hwiNode = m_compiler->gtNewSimdGetLowerNode(elementType, lclNode, CORINFO_TYPE_FLOAT,
genTypeSize(varDsc));
}
else
{
assert(offset == genTypeSize(elementType));
hwiNode = m_compiler->gtNewSimdGetUpperNode(elementType, lclNode, CORINFO_TYPE_FLOAT,
genTypeSize(varDsc));
}

break;
}
default:
unreached();
}

indir = hwiNode;
Expand All @@ -962,23 +989,50 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
GenTree* simdLclNode = m_compiler->gtNewLclVarNode(lclNum);
GenTree* elementNode = indir->AsIndir()->Data();

if (elementType == TYP_FLOAT)
switch (elementType)
{
GenTree* indexNode = m_compiler->gtNewIconNode(offset / genTypeSize(elementType));
hwiNode =
m_compiler->gtNewSimdWithElementNode(varDsc->TypeGet(), simdLclNode, indexNode, elementNode,
CORINFO_TYPE_FLOAT, genTypeSize(varDsc));
}
else
{
assert(elementType == TYP_SIMD12);
assert(varDsc->TypeGet() == TYP_SIMD16);

// We inverse the operands here and take elementNode as the main value and simdLclNode[3] as the
// new value. This gives us a new TYP_SIMD16 with all elements in the right spots
GenTree* indexNode = m_compiler->gtNewIconNode(3, TYP_INT);
hwiNode = m_compiler->gtNewSimdWithElementNode(TYP_SIMD16, elementNode, indexNode, simdLclNode,
CORINFO_TYPE_FLOAT, 16);
case TYP_FLOAT:
{
GenTree* indexNode = m_compiler->gtNewIconNode(offset / genTypeSize(elementType));
hwiNode =
m_compiler->gtNewSimdWithElementNode(varDsc->TypeGet(), simdLclNode, indexNode, elementNode,
CORINFO_TYPE_FLOAT, genTypeSize(varDsc));
break;
}
case TYP_SIMD12:
{
assert(varDsc->TypeGet() == TYP_SIMD16);

// We inverse the operands here and take elementNode as the main value and simdLclNode[3] as the
// new value. This gives us a new TYP_SIMD16 with all elements in the right spots
GenTree* indexNode = m_compiler->gtNewIconNode(3, TYP_INT);
hwiNode = m_compiler->gtNewSimdWithElementNode(TYP_SIMD16, elementNode, indexNode, simdLclNode,
CORINFO_TYPE_FLOAT, 16);
break;
}
case TYP_SIMD8:
#if defined(FEATURE_SIMD) && defined(TARGET_XARCH)
case TYP_SIMD16:
case TYP_SIMD32:
#endif
{
assert(genTypeSize(elementType) * 2 == genTypeSize(varDsc));
if (offset == 0)
{
hwiNode = m_compiler->gtNewSimdWithLowerNode(varDsc->TypeGet(), simdLclNode, elementNode,
CORINFO_TYPE_FLOAT, genTypeSize(varDsc));
}
else
{
assert(offset == genTypeSize(elementType));
hwiNode = m_compiler->gtNewSimdWithUpperNode(varDsc->TypeGet(), simdLclNode, elementNode,
CORINFO_TYPE_FLOAT, genTypeSize(varDsc));
}

break;
}
default:
unreached();
}

indir->ChangeType(varDsc->TypeGet());
Expand Down Expand Up @@ -1112,9 +1166,10 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
#ifdef FEATURE_HW_INTRINSICS
if (varTypeIsSIMD(varDsc))
{
// We have two cases we want to handle:
// We have three cases we want to handle:
// 1. Vector2/3/4 and Quaternion where we have 4x float fields
// 2. Plane where we have 1x Vector3 and 1x float field
// 3. Accesses of halves of larger SIMD types

if (indir->TypeIs(TYP_FLOAT))
{
Expand All @@ -1125,11 +1180,27 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
}
else if (indir->TypeIs(TYP_SIMD12))
{
if ((offset == 0) && m_compiler->IsBaselineSimdIsaSupported())
if ((offset == 0) && (varDsc->TypeGet() == TYP_SIMD16) && m_compiler->IsBaselineSimdIsaSupported())
{
return isDef ? IndirTransform::WithElement : IndirTransform::GetElement;
}
}
#ifdef TARGET_ARM64
else if (indir->TypeIs(TYP_SIMD8))
{
if ((varDsc->TypeGet() == TYP_SIMD16) && ((offset % 8) == 0))
{
return isDef ? IndirTransform::WithElement : IndirTransform::GetElement;
}
}
#endif
#if defined(FEATURE_SIMD) && defined(TARGET_XARCH)
else if (indir->TypeIs(TYP_SIMD16, TYP_SIMD32) && (genTypeSize(indir) * 2 == genTypeSize(varDsc)) &&
((offset % genTypeSize(indir)) == 0))
{
return isDef ? IndirTransform::WithElement : IndirTransform::GetElement;
}
#endif // FEATURE_SIMD && TARGET_XARCH
}
#endif // FEATURE_HW_INTRINSICS

Expand Down
36 changes: 36 additions & 0 deletions src/tests/JIT/Regression/JitBlue/Runtime_89456/Runtime_89456.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using Xunit;

public class Runtime_89456
{
[MethodImpl(MethodImplOptions.NoInlining)]
private static Vector3 Reinterp128(Vector128<float> v)
{
return Unsafe.As<Vector128<float>, Vector3>(ref v);
}

[MethodImpl(MethodImplOptions.NoInlining)]
private static Vector3 Reinterp256(Vector256<float> v)
{
return Unsafe.As<Vector256<float>, Vector3>(ref v);
}

[MethodImpl(MethodImplOptions.NoInlining)]
private static Vector3 Reinterp512(Vector512<float> v)
{
return Unsafe.As<Vector512<float>, Vector3>(ref v);
}

[Fact]
public static void TestEntryPoint()
{
Reinterp128(default);
Reinterp256(default);
Reinterp512(default);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<Optimize>True</Optimize>
</PropertyGroup>
<ItemGroup>
<Compile Include="$(MSBuildProjectName).cs" />
</ItemGroup>
</Project>

0 comments on commit 87526fb

Please sign in to comment.