Skip to content

Commit

Permalink
'cmeq' and 'fcmeq' Vector64<T>.Zero/Vector128<T>.Zero ARM64 containme…
Browse files Browse the repository at this point in the history
…nt optimizations (dotnet#62933)

* Initial work

* Added a comma to display

* Cleanup

* Fixing build

* More cleanup

* Update comment

* Update comment

* Added CompareEqual Vector64/128 with Zero tests

* Do not contain op1 for now

* Wrong intrinsic id used

* Removing generated tests

* Removing generated tests

* Added CompareEqual tests

* Supporting containment for first operand

* Fix test build

* Passing correct register

* Check IsVectorZero before not allocing a register

* Update comment

* Fixing test

* Minor format change

* Fixed formatting

* Renamed test

* Adding AdvSimd_Arm64 tests:

* Adding support for rest of 'cmeq' and 'fcmeq' instructions

* Removing github csproj

* Minor test fix

* Fixed tests

* Fix print

* Minor format change

* Fixing test

* Added some emitter tests

* Feedback

* Update emitarm64.cpp

* Feedback
  • Loading branch information
TIHan committed Jan 20, 2022
1 parent 68a923a commit a5158df
Show file tree
Hide file tree
Showing 13 changed files with 785 additions and 54 deletions.
5 changes: 4 additions & 1 deletion src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6922,9 +6922,12 @@ void CodeGen::genArm64EmitterUnitTests()

#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
//
// R_R fmov/fcmp/fcvt
// R_R cmeq/fmov/fcmp/fcvt
//

// cmeq scalar
theEmitter->emitIns_R_R(INS_cmeq, EA_8BYTE, REG_V0, REG_V1);

// fmov to vector to vector
theEmitter->emitIns_Mov(INS_fmov, EA_8BYTE, REG_V0, REG_V2, /* canSkip */ false);
theEmitter->emitIns_Mov(INS_fmov, EA_4BYTE, REG_V1, REG_V3, /* canSkip */ false);
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/codegenlinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1657,7 +1657,7 @@ void CodeGen::genConsumeRegs(GenTree* tree)
#ifdef FEATURE_SIMD
// (In)Equality operation that produces bool result, when compared
// against Vector zero, marks its Vector Zero operand as contained.
assert(tree->OperIsLeaf() || tree->IsSIMDZero());
assert(tree->OperIsLeaf() || tree->IsSIMDZero() || tree->IsVectorZero());
#else
assert(tree->OperIsLeaf());
#endif
Expand Down
23 changes: 19 additions & 4 deletions src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4732,19 +4732,19 @@ void emitter::emitIns_R_R(
assert(isVectorRegister(reg1));
assert(isVectorRegister(reg2));

if (isValidVectorDatasize(size))
if (insOptsAnyArrangement(opt))
{
// Vector operation
assert(insOptsAnyArrangement(opt));
assert(isValidVectorDatasize(size));
assert(isValidArrangement(size, opt));
elemsize = optGetElemsize(opt);
fmt = IF_DV_2M;
}
else
{
NYI("Untested");
// Scalar operation
assert(size == EA_8BYTE); // Only Double supported
assert(size == EA_8BYTE);
assert(insOptsNone(opt));
fmt = IF_DV_2L;
}
break;
Expand Down Expand Up @@ -12971,6 +12971,11 @@ void emitter::emitDispIns(
emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
emitDispVectorReg(id->idReg2(), id->idInsOpt(), false);
}
if (ins == INS_fcmeq)
{
printf(", ");
emitDispImm(0, false);
}
break;

case IF_DV_2P: // DV_2P ................ ......nnnnnddddd Vd Vn (aes*, sha1su1)
Expand All @@ -12990,6 +12995,11 @@ void emitter::emitDispIns(
emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
emitDispVectorReg(id->idReg2(), id->idInsOpt(), false);
}
if (ins == INS_cmeq)
{
printf(", ");
emitDispImm(0, false);
}
break;

case IF_DV_2N: // DV_2N .........iiiiiii ......nnnnnddddd Vd Vn imm (shift - scalar)
Expand Down Expand Up @@ -13126,6 +13136,11 @@ void emitter::emitDispIns(
emitDispReg(id->idReg1(), size, true);
emitDispReg(id->idReg2(), size, false);
}
if (fmt == IF_DV_2L && ins == INS_cmeq)
{
printf(", ");
emitDispImm(0, false);
}
break;

case IF_DV_2H: // DV_2H X........X...... ......nnnnnddddd Rd Vn (fmov, fcvtXX - to general)
Expand Down
14 changes: 14 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17735,6 +17735,20 @@ bool GenTree::isContainableHWIntrinsic() const
return true;
}

default:
{
return false;
}
}
#elif TARGET_ARM64
switch (AsHWIntrinsic()->GetHWIntrinsicId())
{
case NI_Vector64_get_Zero:
case NI_Vector128_get_Zero:
{
return true;
}

default:
{
return false;
Expand Down
79 changes: 77 additions & 2 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -1704,6 +1704,8 @@ struct GenTree
inline bool IsIntegralConst(ssize_t constVal) const;
inline bool IsIntegralConstVector(ssize_t constVal) const;
inline bool IsSIMDZero() const;
inline bool IsFloatPositiveZero() const;
inline bool IsVectorZero() const;

inline bool IsBoxedValue();

Expand Down Expand Up @@ -2097,7 +2099,7 @@ struct GenTree

inline bool IsCnsFltOrDbl() const;

inline bool IsCnsNonZeroFltOrDbl();
inline bool IsCnsNonZeroFltOrDbl() const;

bool IsIconHandle() const
{
Expand Down Expand Up @@ -7650,6 +7652,79 @@ inline bool GenTree::IsSIMDZero() const
return false;
}

//-------------------------------------------------------------------
// IsFloatPositiveZero: returns true if this is exactly a const float value of postive zero (+0.0)
//
// Returns:
// True if this represents a const floating-point value of exactly positive zero (+0.0).
// Will return false if the value is negative zero (-0.0).
//
inline bool GenTree::IsFloatPositiveZero() const
{
return !(IsCnsNonZeroFltOrDbl());
}

//-------------------------------------------------------------------
// IsVectorZero: returns true if this is an integral or floating-point (SIMD or HW intrinsic) vector
// with all its elements equal to zero.
//
// Returns:
// True if this represents an integral or floating-point const (SIMD or HW intrinsic) vector with all its elements
// equal to zero.
//
// TODO: We already have IsSIMDZero() and IsIntegralConstVector(0),
// however, IsSIMDZero() does not cover hardware intrinsics, and IsIntegralConstVector(0) does not cover floating
// point. In order to not risk adverse behaviour by modifying those, this function 'IsVectorZero' was introduced.
// At some point, it makes sense to normalize this logic to be a single function call rather than have several
// separate ones; preferably this one.
inline bool GenTree::IsVectorZero() const
{
#ifdef FEATURE_SIMD
if (gtOper == GT_SIMD)
{
const GenTreeSIMD* node = AsSIMD();

if (node->GetSIMDIntrinsicId() == SIMDIntrinsicInit)
{
return (node->Op(1)->IsIntegralConst(0) || node->Op(1)->IsFloatPositiveZero());
}
}
#endif

#ifdef FEATURE_HW_INTRINSICS
if (gtOper == GT_HWINTRINSIC)
{
const GenTreeHWIntrinsic* node = AsHWIntrinsic();
const var_types simdBaseType = node->GetSimdBaseType();

if (varTypeIsIntegral(simdBaseType) || varTypeIsFloating(simdBaseType))
{
const NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();

if (node->GetOperandCount() == 0)
{
#if defined(TARGET_XARCH)
return (intrinsicId == NI_Vector128_get_Zero) || (intrinsicId == NI_Vector256_get_Zero);
#elif defined(TARGET_ARM64)
return (intrinsicId == NI_Vector64_get_Zero) || (intrinsicId == NI_Vector128_get_Zero);
#endif // !TARGET_XARCH && !TARGET_ARM64
}
else if ((node->GetOperandCount() == 1) &&
(node->Op(1)->IsIntegralConst(0) || node->Op(1)->IsFloatPositiveZero()))
{
#if defined(TARGET_XARCH)
return (intrinsicId == NI_Vector128_Create) || (intrinsicId == NI_Vector256_Create);
#elif defined(TARGET_ARM64)
return (intrinsicId == NI_Vector64_Create) || (intrinsicId == NI_Vector128_Create);
#endif // !TARGET_XARCH && !TARGET_ARM64
}
}
}
#endif // FEATURE_HW_INTRINSICS

return false;
}

inline bool GenTree::IsBoxedValue()
{
assert(gtOper != GT_BOX || AsBox()->BoxOp() != nullptr);
Expand Down Expand Up @@ -8328,7 +8403,7 @@ inline bool GenTree::IsCnsFltOrDbl() const
return OperGet() == GT_CNS_DBL;
}

inline bool GenTree::IsCnsNonZeroFltOrDbl()
inline bool GenTree::IsCnsNonZeroFltOrDbl() const
{
if (OperGet() == GT_CNS_DBL)
{
Expand Down
23 changes: 23 additions & 0 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,29 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt);
break;

case NI_AdvSimd_CompareEqual:
case NI_AdvSimd_Arm64_CompareEqual:
case NI_AdvSimd_Arm64_CompareEqualScalar:
if (intrin.op1->isContained())
{
assert(HWIntrinsicInfo::SupportsContainment(intrin.id));
assert(intrin.op1->IsVectorZero());

GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op2Reg, opt);
}
else if (intrin.op2->isContained())
{
assert(HWIntrinsicInfo::SupportsContainment(intrin.id));
assert(intrin.op2->IsVectorZero());

GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg, opt);
}
else
{
GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt);
}
break;

case NI_AdvSimd_AbsoluteCompareLessThan:
case NI_AdvSimd_AbsoluteCompareLessThanOrEqual:
case NI_AdvSimd_CompareLessThan:
Expand Down
6 changes: 3 additions & 3 deletions src/coreclr/jit/hwintrinsiclistarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ HARDWARE_INTRINSIC(AdvSimd, BitwiseClear,
HARDWARE_INTRINSIC(AdvSimd, BitwiseSelect, -1, 3, {INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl}, HW_Category_SIMD, HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AdvSimd, Ceiling, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintp, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AdvSimd, CeilingScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintp, INS_frintp}, HW_Category_SIMD, HW_Flag_SIMDScalar)
HARDWARE_INTRINSIC(AdvSimd, CompareEqual, -1, 2, {INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_invalid, INS_invalid, INS_fcmeq, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative)
HARDWARE_INTRINSIC(AdvSimd, CompareEqual, -1, 2, {INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_invalid, INS_invalid, INS_fcmeq, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment)
HARDWARE_INTRINSIC(AdvSimd, CompareGreaterThan, -1, 2, {INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_invalid, INS_invalid, INS_fcmgt, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AdvSimd, CompareGreaterThanOrEqual, -1, 2, {INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_invalid, INS_invalid, INS_fcmge, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AdvSimd, CompareLessThan, -1, 2, {INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_invalid, INS_invalid, INS_fcmgt, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen)
Expand Down Expand Up @@ -492,8 +492,8 @@ HARDWARE_INTRINSIC(AdvSimd_Arm64, AddPairwiseScalar,
HARDWARE_INTRINSIC(AdvSimd_Arm64, AddSaturate, -1, 2, {INS_suqadd, INS_usqadd, INS_suqadd, INS_usqadd, INS_suqadd, INS_usqadd, INS_suqadd, INS_usqadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(AdvSimd_Arm64, AddSaturateScalar, 8, 2, {INS_sqadd, INS_uqadd, INS_sqadd, INS_uqadd, INS_sqadd, INS_uqadd, INS_suqadd, INS_usqadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AdvSimd_Arm64, Ceiling, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintp}, HW_Category_SIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmeq, INS_cmeq, INS_invalid, INS_fcmeq}, HW_Category_SIMD, HW_Flag_Commutative)
HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareEqualScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmeq, INS_cmeq, INS_fcmeq, INS_fcmeq}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar)
HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmeq, INS_cmeq, INS_invalid, INS_fcmeq}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment)
HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareEqualScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmeq, INS_cmeq, INS_fcmeq, INS_fcmeq}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment)
HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmgt, INS_cmhi, INS_invalid, INS_fcmgt}, HW_Category_SIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmge, INS_cmhs, INS_invalid, INS_fcmge}, HW_Category_SIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThanOrEqualScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmge, INS_cmhs, INS_fcmge, INS_fcmge}, HW_Category_SIMD, HW_Flag_SIMDScalar)
Expand Down
8 changes: 4 additions & 4 deletions src/coreclr/jit/instrsarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -312,8 +312,8 @@ INST4(neg, "neg", 0, IF_EN4G, 0x4B0003E0, 0x4B0003E0,
INST4(cmeq, "cmeq", 0, IF_EN4H, 0x7EE08C00, 0x2E208C00, 0x5E209800, 0x0E209800)
// cmeq Vd,Vn,Vm DV_3E 01111110111mmmmm 100011nnnnnddddd 7EE0 8C00 Vd,Vn,Vm (scalar)
// cmeq Vd,Vn,Vm DV_3A 0Q101110XX1mmmmm 100011nnnnnddddd 2E20 8C00 Vd,Vn,Vm (vector)
// cmeq Vd,Vn DV_2L 01011110XX100000 100110nnnnnddddd 5E20 9800 Vd,Vn (scalar)
// cmeq Vd,Vn DV_2M 0Q001110XX100000 100110nnnnnddddd 0E20 9800 Vd,Vn (vector)
// cmeq Vd,Vn,#0 DV_2L 01011110XX100000 100110nnnnnddddd 5E20 9800 Vd,Vn,#0 (scalar - with zero)
// cmeq Vd,Vn,#0 DV_2M 0Q001110XX100000 100110nnnnnddddd 0E20 9800 Vd,Vn,#0 (vector - with zero)

INST4(cmge, "cmge", 0, IF_EN4H, 0x5EE03C00, 0x0E203C00, 0x7E208800, 0x2E208800)
// cmge Vd,Vn,Vm DV_3E 01011110111mmmmm 001111nnnnnddddd 5EE0 3C00 Vd,Vn,Vm (scalar)
Expand All @@ -331,8 +331,8 @@ INST4(cmgt, "cmgt", 0, IF_EN4H, 0x5EE03400, 0x0E203400,
INST4(fcmeq, "fcmeq", 0, IF_EN4I, 0x5E20E400, 0x0E20E400, 0x5EA0D800, 0x0EA0D800)
// fcmeq Vd,Vn,Vm DV_3D 010111100X1mmmmm 111001nnnnnddddd 5E20 E400 Vd Vn Vm (scalar)
// fcmeq Vd,Vn,Vm DV_3B 0Q0011100X1mmmmm 111001nnnnnddddd 0E20 E400 Vd,Vn,Vm (vector)
// fcmeq Vd,Vn DV_2G 010111101X100000 110110nnnnnddddd 5EA0 D800 Vd Vn (scalar)
// fcmeq Vd,Vn DV_2A 0Q0011101X100000 110110nnnnnddddd 0EA0 D800 Vd Vn (vector)
// fcmeq Vd,Vn,#0 DV_2G 010111101X100000 110110nnnnnddddd 5EA0 D800 Vd Vn,#0 (scalar - with zero)
// fcmeq Vd,Vn,#0 DV_2A 0Q0011101X100000 110110nnnnnddddd 0EA0 D800 Vd Vn,#0 (vector - with zero)

INST4(fcmge, "fcmge", 0, IF_EN4I, 0x7E20E400, 0x2E20E400, 0x7EA0C800, 0x2EA0C800)
// fcmge Vd,Vn,Vm DV_3D 011111100X1mmmmm 111001nnnnnddddd 7E20 E400 Vd Vn Vm (scalar)
Expand Down
15 changes: 15 additions & 0 deletions src/coreclr/jit/lowerarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1842,6 +1842,21 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
MakeSrcContained(node, intrin.op4);
break;

case NI_AdvSimd_CompareEqual:
case NI_AdvSimd_Arm64_CompareEqual:
case NI_AdvSimd_Arm64_CompareEqualScalar:
{
if (intrin.op1->IsVectorZero())
{
MakeSrcContained(node, intrin.op1);
}
else if (intrin.op2->IsVectorZero())
{
MakeSrcContained(node, intrin.op2);
}
break;
}

case NI_Vector64_CreateScalarUnsafe:
case NI_Vector128_CreateScalarUnsafe:
case NI_AdvSimd_DuplicateToVector64:
Expand Down
Loading

0 comments on commit a5158df

Please sign in to comment.