diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index ae39217dc8ff8e..a04b5769f095fb 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -26535,6 +26535,219 @@ This function returns the tangent of the specified argument, returning the same values as the libm ``tan`` functions would, and handles error conditions in the same way. +'``llvm.experimental.constrained.asin``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.asin( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.asin``' intrinsic returns the arcsine of the +first operand. + +Arguments: +"""""""""" + +The first argument and the return type are floating-point numbers of the same +type. + +The second and third arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +This function returns the arcsine of the specified operand, returning the +same values as the libm ``asin`` functions would, and handles error +conditions in the same way. + + +'``llvm.experimental.constrained.acos``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.acos( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.acos``' intrinsic returns the arccosine of the +first operand. + +Arguments: +"""""""""" + +The first argument and the return type are floating-point numbers of the same +type. + +The second and third arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +This function returns the arccosine of the specified operand, returning the +same values as the libm ``acos`` functions would, and handles error +conditions in the same way. + + +'``llvm.experimental.constrained.atan``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.atan( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.atan``' intrinsic returns the arctangent of the +first operand. + +Arguments: +"""""""""" + +The first argument and the return type are floating-point numbers of the same +type. + +The second and third arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +This function returns the arctangent of the specified operand, returning the +same values as the libm ``atan`` functions would, and handles error +conditions in the same way. + +'``llvm.experimental.constrained.sinh``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.sinh( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.sinh``' intrinsic returns the hyperbolic sine of the +first operand. + +Arguments: +"""""""""" + +The first argument and the return type are floating-point numbers of the same +type. + +The second and third arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +This function returns the hyperbolic sine of the specified operand, returning the +same values as the libm ``sinh`` functions would, and handles error +conditions in the same way. + + +'``llvm.experimental.constrained.cosh``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.cosh( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.cosh``' intrinsic returns the hyperbolic cosine of the +first operand. + +Arguments: +"""""""""" + +The first argument and the return type are floating-point numbers of the same +type. + +The second and third arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +This function returns the hyperbolic cosine of the specified operand, returning the +same values as the libm ``cosh`` functions would, and handles error +conditions in the same way. + + +'``llvm.experimental.constrained.tanh``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.tanh( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.tanh``' intrinsic returns the hyperbolic tangent of the +first operand. + +Arguments: +"""""""""" + +The first argument and the return type are floating-point numbers of the same +type. + +The second and third arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +This function returns the hyperbolic tangent of the specified operand, returning the +same values as the libm ``tanh`` functions would, and handles error +conditions in the same way. '``llvm.experimental.constrained.exp``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 4f1dc9f991c065..5b9cc5dfeeadb8 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1979,6 +1979,24 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::tan: ISD = ISD::FTAN; break; + case Intrinsic::asin: + ISD = ISD::FASIN; + break; + case Intrinsic::acos: + ISD = ISD::FACOS; + break; + case Intrinsic::atan: + ISD = ISD::FATAN; + break; + case Intrinsic::sinh: + ISD = ISD::FSINH; + break; + case Intrinsic::cosh: + ISD = ISD::FCOSH; + break; + case Intrinsic::tanh: + ISD = ISD::FTANH; + break; case Intrinsic::exp: ISD = ISD::FEXP; break; diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index 88e3339e2453f6..e6b10209b4767b 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -422,6 +422,12 @@ enum NodeType { STRICT_FSIN, STRICT_FCOS, STRICT_FTAN, + STRICT_FASIN, + STRICT_FACOS, + STRICT_FATAN, + STRICT_FSINH, + STRICT_FCOSH, + STRICT_FTANH, STRICT_FEXP, STRICT_FEXP2, STRICT_FLOG, @@ -948,6 +954,12 @@ enum NodeType { FSIN, FCOS, FTAN, + FASIN, + FACOS, + FATAN, + FSINH, + FCOSH, + FTANH, FPOW, FPOWI, /// FLDEXP - ldexp, inspired by libm (op0 * 2**op1). diff --git a/llvm/include/llvm/IR/ConstrainedOps.def b/llvm/include/llvm/IR/ConstrainedOps.def index a7b37c5cb204da..56304c377b8393 100644 --- a/llvm/include/llvm/IR/ConstrainedOps.def +++ b/llvm/include/llvm/IR/ConstrainedOps.def @@ -69,8 +69,12 @@ CMP_INSTRUCTION(FCmp, 2, 0, experimental_constrained_fcmps, FSETCCS // Theses are definitions for intrinsic functions, that are converted into // constrained intrinsics. // +DAG_FUNCTION(acos, 1, 1, experimental_constrained_acos, FACOS) +DAG_FUNCTION(asin, 1, 1, experimental_constrained_asin, FASIN) +DAG_FUNCTION(atan, 1, 1, experimental_constrained_atan, FATAN) DAG_FUNCTION(ceil, 1, 0, experimental_constrained_ceil, FCEIL) DAG_FUNCTION(cos, 1, 1, experimental_constrained_cos, FCOS) +DAG_FUNCTION(cosh, 1, 1, experimental_constrained_cosh, FCOSH) DAG_FUNCTION(exp, 1, 1, experimental_constrained_exp, FEXP) DAG_FUNCTION(exp2, 1, 1, experimental_constrained_exp2, FEXP2) DAG_FUNCTION(floor, 1, 0, experimental_constrained_floor, FFLOOR) @@ -94,8 +98,10 @@ DAG_FUNCTION(rint, 1, 1, experimental_constrained_rint, FRINT) DAG_FUNCTION(round, 1, 0, experimental_constrained_round, FROUND) DAG_FUNCTION(roundeven, 1, 0, experimental_constrained_roundeven, FROUNDEVEN) DAG_FUNCTION(sin, 1, 1, experimental_constrained_sin, FSIN) +DAG_FUNCTION(sinh, 1, 1, experimental_constrained_sinh, FSINH) DAG_FUNCTION(sqrt, 1, 1, experimental_constrained_sqrt, FSQRT) DAG_FUNCTION(tan, 1, 1, experimental_constrained_tan, FTAN) +DAG_FUNCTION(tanh, 1, 1, experimental_constrained_tanh, FTANH) DAG_FUNCTION(trunc, 1, 0, experimental_constrained_trunc, FTRUNC) // This is definition for fmuladd intrinsic function, that is converted into diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 65a9b68b5229df..01e379dfcebcad 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1211,6 +1211,18 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn, IntrStrictFP] in llvm_anyint_ty, llvm_metadata_ty, llvm_metadata_ty ]>; + def int_experimental_constrained_asin : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_acos : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_atan : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; def int_experimental_constrained_sin : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, llvm_metadata_ty, @@ -1223,6 +1235,18 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn, IntrStrictFP] in [ LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; + def int_experimental_constrained_sinh : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_cosh : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_tanh : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; def int_experimental_constrained_pow : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def index d8eab80656c06a..89aaf6d1ad83f8 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.def +++ b/llvm/include/llvm/IR/RuntimeLibcalls.def @@ -202,6 +202,36 @@ HANDLE_LIBCALL(TAN_F64, "tan") HANDLE_LIBCALL(TAN_F80, "tanl") HANDLE_LIBCALL(TAN_F128,"tanl") HANDLE_LIBCALL(TAN_PPCF128, "tanl") +HANDLE_LIBCALL(SINH_F32, "sinhf") +HANDLE_LIBCALL(SINH_F64, "sinh") +HANDLE_LIBCALL(SINH_F80, "sinhl") +HANDLE_LIBCALL(SINH_F128, "sinhl") +HANDLE_LIBCALL(SINH_PPCF128, "sinhl") +HANDLE_LIBCALL(COSH_F32, "coshf") +HANDLE_LIBCALL(COSH_F64, "cosh") +HANDLE_LIBCALL(COSH_F80, "coshl") +HANDLE_LIBCALL(COSH_F128, "coshl") +HANDLE_LIBCALL(COSH_PPCF128, "coshl") +HANDLE_LIBCALL(TANH_F32, "tanhf") +HANDLE_LIBCALL(TANH_F64, "tanh") +HANDLE_LIBCALL(TANH_F80, "tanhl") +HANDLE_LIBCALL(TANH_F128,"tanhl") +HANDLE_LIBCALL(TANH_PPCF128, "tanhl") +HANDLE_LIBCALL(ASIN_F32, "asinf") +HANDLE_LIBCALL(ASIN_F64, "asin") +HANDLE_LIBCALL(ASIN_F80, "asinl") +HANDLE_LIBCALL(ASIN_F128, "asinl") +HANDLE_LIBCALL(ASIN_PPCF128, "asinl") +HANDLE_LIBCALL(ACOS_F32, "acosf") +HANDLE_LIBCALL(ACOS_F64, "acos") +HANDLE_LIBCALL(ACOS_F80, "acosl") +HANDLE_LIBCALL(ACOS_F128, "acosl") +HANDLE_LIBCALL(ACOS_PPCF128, "acosl") +HANDLE_LIBCALL(ATAN_F32, "atanf") +HANDLE_LIBCALL(ATAN_F64, "atan") +HANDLE_LIBCALL(ATAN_F80, "atanl") +HANDLE_LIBCALL(ATAN_F128,"atanl") +HANDLE_LIBCALL(ATAN_PPCF128, "atanl") HANDLE_LIBCALL(SINCOS_F32, nullptr) HANDLE_LIBCALL(SINCOS_F64, nullptr) HANDLE_LIBCALL(SINCOS_F80, nullptr) diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index 560d3b434d07d5..fbe551e1be9115 100644 --- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -149,6 +149,12 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 8cbf98cd58ca98..133c9b113e51b2 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -515,6 +515,12 @@ def fsqrt : SDNode<"ISD::FSQRT" , SDTFPUnaryOp>; def fsin : SDNode<"ISD::FSIN" , SDTFPUnaryOp>; def fcos : SDNode<"ISD::FCOS" , SDTFPUnaryOp>; def ftan : SDNode<"ISD::FTAN" , SDTFPUnaryOp>; +def fasin : SDNode<"ISD::FASIN" , SDTFPUnaryOp>; +def facos : SDNode<"ISD::FACOS" , SDTFPUnaryOp>; +def fatan : SDNode<"ISD::FATAN" , SDTFPUnaryOp>; +def fsinh : SDNode<"ISD::FSINH" , SDTFPUnaryOp>; +def fcosh : SDNode<"ISD::FCOSH" , SDTFPUnaryOp>; +def ftanh : SDNode<"ISD::FTANH" , SDTFPUnaryOp>; def fexp2 : SDNode<"ISD::FEXP2" , SDTFPUnaryOp>; def fexp10 : SDNode<"ISD::FEXP10" , SDTFPUnaryOp>; def fpow : SDNode<"ISD::FPOW" , SDTFPBinOp>; @@ -570,11 +576,23 @@ def strict_fcos : SDNode<"ISD::STRICT_FCOS", SDTFPUnaryOp, [SDNPHasChain]>; def strict_ftan : SDNode<"ISD::STRICT_FTAN", SDTFPUnaryOp, [SDNPHasChain]>; +def strict_fasin : SDNode<"ISD::STRICT_FASIN", + SDTFPUnaryOp, [SDNPHasChain]>; +def strict_facos : SDNode<"ISD::STRICT_FACOS", + SDTFPUnaryOp, [SDNPHasChain]>; +def strict_fatan : SDNode<"ISD::STRICT_FATAN", + SDTFPUnaryOp, [SDNPHasChain]>; +def strict_fsinh : SDNode<"ISD::STRICT_FSINH", + SDTFPUnaryOp, [SDNPHasChain]>; +def strict_fcosh : SDNode<"ISD::STRICT_FCOSH", + SDTFPUnaryOp, [SDNPHasChain]>; +def strict_ftanh : SDNode<"ISD::STRICT_FTANH", + SDTFPUnaryOp, [SDNPHasChain]>; def strict_fexp2 : SDNode<"ISD::STRICT_FEXP2", SDTFPUnaryOp, [SDNPHasChain]>; def strict_fpow : SDNode<"ISD::STRICT_FPOW", SDTFPBinOp, [SDNPHasChain]>; -def strict_fldexp : SDNode<"ISD::STRICT_FLDEXP", +def strict_fldexp : SDNode<"ISD::STRICT_FLDEXP", SDTFPExpOp, [SDNPHasChain]>; def strict_flog2 : SDNode<"ISD::STRICT_FLOG2", SDTFPUnaryOp, [SDNPHasChain]>; @@ -1528,6 +1546,24 @@ def any_fcos : PatFrags<(ops node:$src), def any_ftan : PatFrags<(ops node:$src), [(strict_ftan node:$src), (ftan node:$src)]>; +def any_fasin : PatFrags<(ops node:$src), + [(strict_fasin node:$src), + (fasin node:$src)]>; +def any_facos : PatFrags<(ops node:$src), + [(strict_facos node:$src), + (facos node:$src)]>; +def any_fatan : PatFrags<(ops node:$src), + [(strict_fatan node:$src), + (fatan node:$src)]>; +def any_fsinh : PatFrags<(ops node:$src), + [(strict_fsinh node:$src), + (fsinh node:$src)]>; +def any_fcosh : PatFrags<(ops node:$src), + [(strict_fcosh node:$src), + (fcosh node:$src)]>; +def any_ftanh : PatFrags<(ops node:$src), + [(strict_ftanh node:$src), + (ftanh node:$src)]>; def any_fexp2 : PatFrags<(ops node:$src), [(strict_fexp2 node:$src), (fexp2 node:$src)]>; diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 3f1094e0ac703d..f717849317ba72 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -451,6 +451,18 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { RTLIBCASE(COS_F); case TargetOpcode::G_FTAN: RTLIBCASE(TAN_F); + case TargetOpcode::G_FASIN: + RTLIBCASE(ASIN_F); + case TargetOpcode::G_FACOS: + RTLIBCASE(ACOS_F); + case TargetOpcode::G_FATAN: + RTLIBCASE(ATAN_F); + case TargetOpcode::G_FSINH: + RTLIBCASE(SINH_F); + case TargetOpcode::G_FCOSH: + RTLIBCASE(COSH_F); + case TargetOpcode::G_FTANH: + RTLIBCASE(TANH_F); case TargetOpcode::G_FLOG10: RTLIBCASE(LOG10_F); case TargetOpcode::G_FLOG: @@ -1040,6 +1052,12 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { case TargetOpcode::G_FCOS: case TargetOpcode::G_FSIN: case TargetOpcode::G_FTAN: + case TargetOpcode::G_FACOS: + case TargetOpcode::G_FASIN: + case TargetOpcode::G_FATAN: + case TargetOpcode::G_FCOSH: + case TargetOpcode::G_FSINH: + case TargetOpcode::G_FTANH: case TargetOpcode::G_FLOG10: case TargetOpcode::G_FLOG: case TargetOpcode::G_FLOG2: @@ -2904,6 +2922,12 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_FCOS: case TargetOpcode::G_FSIN: case TargetOpcode::G_FTAN: + case TargetOpcode::G_FACOS: + case TargetOpcode::G_FASIN: + case TargetOpcode::G_FATAN: + case TargetOpcode::G_FCOSH: + case TargetOpcode::G_FSINH: + case TargetOpcode::G_FTANH: case TargetOpcode::G_FLOG10: case TargetOpcode::G_FLOG: case TargetOpcode::G_FLOG2: diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 328a1465804523..ee289674307379 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -834,6 +834,12 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, case TargetOpcode::G_FSIN: case TargetOpcode::G_FCOS: case TargetOpcode::G_FTAN: + case TargetOpcode::G_FACOS: + case TargetOpcode::G_FASIN: + case TargetOpcode::G_FATAN: + case TargetOpcode::G_FCOSH: + case TargetOpcode::G_FSINH: + case TargetOpcode::G_FTANH: case TargetOpcode::G_FMA: case TargetOpcode::G_FMAD: if (SNaN) @@ -1715,6 +1721,12 @@ bool llvm::isPreISelGenericFloatingPointOpcode(unsigned Opc) { case TargetOpcode::G_FRINT: case TargetOpcode::G_FSIN: case TargetOpcode::G_FTAN: + case TargetOpcode::G_FACOS: + case TargetOpcode::G_FASIN: + case TargetOpcode::G_FATAN: + case TargetOpcode::G_FCOSH: + case TargetOpcode::G_FSINH: + case TargetOpcode::G_FTANH: case TargetOpcode::G_FSQRT: case TargetOpcode::G_FSUB: case TargetOpcode::G_INTRINSIC_ROUND: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index d6a0dd9ae9b208..1be93276b69613 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4537,6 +4537,36 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { ExpandFPLibCall(Node, RTLIB::TAN_F32, RTLIB::TAN_F64, RTLIB::TAN_F80, RTLIB::TAN_F128, RTLIB::TAN_PPCF128, Results); break; + case ISD::FASIN: + case ISD::STRICT_FASIN: + ExpandFPLibCall(Node, RTLIB::ASIN_F32, RTLIB::ASIN_F64, RTLIB::ASIN_F80, + RTLIB::ASIN_F128, RTLIB::ASIN_PPCF128, Results); + break; + case ISD::FACOS: + case ISD::STRICT_FACOS: + ExpandFPLibCall(Node, RTLIB::ACOS_F32, RTLIB::ACOS_F64, RTLIB::ACOS_F80, + RTLIB::ACOS_F128, RTLIB::ACOS_PPCF128, Results); + break; + case ISD::FATAN: + case ISD::STRICT_FATAN: + ExpandFPLibCall(Node, RTLIB::ATAN_F32, RTLIB::ATAN_F64, RTLIB::ATAN_F80, + RTLIB::ATAN_F128, RTLIB::ATAN_PPCF128, Results); + break; + case ISD::FSINH: + case ISD::STRICT_FSINH: + ExpandFPLibCall(Node, RTLIB::SINH_F32, RTLIB::SINH_F64, RTLIB::SINH_F80, + RTLIB::SINH_F128, RTLIB::SINH_PPCF128, Results); + break; + case ISD::FCOSH: + case ISD::STRICT_FCOSH: + ExpandFPLibCall(Node, RTLIB::COSH_F32, RTLIB::COSH_F64, RTLIB::COSH_F80, + RTLIB::COSH_F128, RTLIB::COSH_PPCF128, Results); + break; + case ISD::FTANH: + case ISD::STRICT_FTANH: + ExpandFPLibCall(Node, RTLIB::TANH_F32, RTLIB::TANH_F64, RTLIB::TANH_F80, + RTLIB::TANH_F128, RTLIB::TANH_PPCF128, Results); + break; case ISD::FSINCOS: // Expand into sincos libcall. ExpandSinCosLibCall(Node, Results); @@ -5510,6 +5540,12 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::FSIN: case ISD::FCOS: case ISD::FTAN: + case ISD::FASIN: + case ISD::FACOS: + case ISD::FATAN: + case ISD::FSINH: + case ISD::FCOSH: + case ISD::FTANH: case ISD::FLOG: case ISD::FLOG2: case ISD::FLOG10: @@ -5535,6 +5571,12 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::STRICT_FSIN: case ISD::STRICT_FCOS: case ISD::STRICT_FTAN: + case ISD::STRICT_FASIN: + case ISD::STRICT_FACOS: + case ISD::STRICT_FATAN: + case ISD::STRICT_FSINH: + case ISD::STRICT_FCOSH: + case ISD::STRICT_FTANH: case ISD::STRICT_FLOG: case ISD::STRICT_FLOG2: case ISD::STRICT_FLOG10: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index aa116c9de5d8c4..41fcc9afe4e905 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -76,12 +76,20 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break; case ISD::STRICT_FADD: case ISD::FADD: R = SoftenFloatRes_FADD(N); break; + case ISD::STRICT_FACOS: + case ISD::FACOS: R = SoftenFloatRes_FACOS(N); break; + case ISD::STRICT_FASIN: + case ISD::FASIN: R = SoftenFloatRes_FASIN(N); break; + case ISD::STRICT_FATAN: + case ISD::FATAN: R = SoftenFloatRes_FATAN(N); break; case ISD::FCBRT: R = SoftenFloatRes_FCBRT(N); break; case ISD::STRICT_FCEIL: case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break; case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break; case ISD::STRICT_FCOS: case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break; + case ISD::STRICT_FCOSH: + case ISD::FCOSH: R = SoftenFloatRes_FCOSH(N); break; case ISD::STRICT_FDIV: case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break; case ISD::STRICT_FEXP: @@ -127,12 +135,16 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FROUNDEVEN: R = SoftenFloatRes_FROUNDEVEN(N); break; case ISD::STRICT_FSIN: case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break; + case ISD::STRICT_FSINH: + case ISD::FSINH: R = SoftenFloatRes_FSINH(N); break; case ISD::STRICT_FSQRT: case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; case ISD::STRICT_FSUB: case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; case ISD::STRICT_FTAN: case ISD::FTAN: R = SoftenFloatRes_FTAN(N); break; + case ISD::STRICT_FTANH: + case ISD::FTANH: R = SoftenFloatRes_FTANH(N); break; case ISD::STRICT_FTRUNC: case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break; case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break; @@ -320,6 +332,24 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { RTLIB::ADD_PPCF128)); } +SDValue DAGTypeLegalizer::SoftenFloatRes_FACOS(SDNode *N) { + return SoftenFloatRes_Unary( + N, GetFPLibCall(N->getValueType(0), RTLIB::ACOS_F32, RTLIB::ACOS_F64, + RTLIB::ACOS_F80, RTLIB::ACOS_F128, RTLIB::ACOS_PPCF128)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FASIN(SDNode *N) { + return SoftenFloatRes_Unary( + N, GetFPLibCall(N->getValueType(0), RTLIB::ASIN_F32, RTLIB::ASIN_F64, + RTLIB::ASIN_F80, RTLIB::ASIN_F128, RTLIB::ASIN_PPCF128)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FATAN(SDNode *N) { + return SoftenFloatRes_Unary( + N, GetFPLibCall(N->getValueType(0), RTLIB::ATAN_F32, RTLIB::ATAN_F64, + RTLIB::ATAN_F80, RTLIB::ATAN_F128, RTLIB::ATAN_PPCF128)); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FCBRT(SDNode *N) { return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), RTLIB::CBRT_F32, @@ -395,6 +425,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { RTLIB::COS_PPCF128)); } +SDValue DAGTypeLegalizer::SoftenFloatRes_FCOSH(SDNode *N) { + return SoftenFloatRes_Unary( + N, GetFPLibCall(N->getValueType(0), RTLIB::COSH_F32, RTLIB::COSH_F64, + RTLIB::COSH_F80, RTLIB::COSH_F128, RTLIB::COSH_PPCF128)); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), RTLIB::DIV_F32, @@ -758,6 +794,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { RTLIB::SIN_PPCF128)); } +SDValue DAGTypeLegalizer::SoftenFloatRes_FSINH(SDNode *N) { + return SoftenFloatRes_Unary( + N, GetFPLibCall(N->getValueType(0), RTLIB::SINH_F32, RTLIB::SINH_F64, + RTLIB::SINH_F80, RTLIB::SINH_F128, RTLIB::SINH_PPCF128)); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), RTLIB::SQRT_F32, @@ -782,6 +824,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTAN(SDNode *N) { RTLIB::TAN_F80, RTLIB::TAN_F128, RTLIB::TAN_PPCF128)); } +SDValue DAGTypeLegalizer::SoftenFloatRes_FTANH(SDNode *N) { + return SoftenFloatRes_Unary( + N, GetFPLibCall(N->getValueType(0), RTLIB::TANH_F32, RTLIB::TANH_F64, + RTLIB::TANH_F80, RTLIB::TANH_F128, RTLIB::TANH_PPCF128)); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), RTLIB::TRUNC_F32, @@ -1358,12 +1406,20 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::FMAXNUM: ExpandFloatRes_FMAXNUM(N, Lo, Hi); break; case ISD::STRICT_FADD: case ISD::FADD: ExpandFloatRes_FADD(N, Lo, Hi); break; + case ISD::STRICT_FACOS: + case ISD::FACOS: ExpandFloatRes_FACOS(N, Lo, Hi); break; + case ISD::STRICT_FASIN: + case ISD::FASIN: ExpandFloatRes_FASIN(N, Lo, Hi); break; + case ISD::STRICT_FATAN: + case ISD::FATAN: ExpandFloatRes_FATAN(N, Lo, Hi); break; case ISD::FCBRT: ExpandFloatRes_FCBRT(N, Lo, Hi); break; case ISD::STRICT_FCEIL: case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break; case ISD::FCOPYSIGN: ExpandFloatRes_FCOPYSIGN(N, Lo, Hi); break; case ISD::STRICT_FCOS: case ISD::FCOS: ExpandFloatRes_FCOS(N, Lo, Hi); break; + case ISD::STRICT_FCOSH: + case ISD::FCOSH: ExpandFloatRes_FCOSH(N, Lo, Hi); break; case ISD::STRICT_FDIV: case ISD::FDIV: ExpandFloatRes_FDIV(N, Lo, Hi); break; case ISD::STRICT_FEXP: @@ -1403,12 +1459,16 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::FROUNDEVEN: ExpandFloatRes_FROUNDEVEN(N, Lo, Hi); break; case ISD::STRICT_FSIN: case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break; + case ISD::STRICT_FSINH: + case ISD::FSINH: ExpandFloatRes_FSINH(N, Lo, Hi); break; case ISD::STRICT_FSQRT: case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break; case ISD::STRICT_FSUB: case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break; case ISD::STRICT_FTAN: case ISD::FTAN: ExpandFloatRes_FTAN(N, Lo, Hi); break; + case ISD::STRICT_FTANH: + case ISD::FTANH: ExpandFloatRes_FTANH(N, Lo, Hi); break; case ISD::STRICT_FTRUNC: case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break; case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break; @@ -1509,6 +1569,33 @@ void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo, RTLIB::ADD_PPCF128), Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FACOS(SDNode *N, SDValue &Lo, + SDValue &Hi) { + ExpandFloatRes_Unary(N, + GetFPLibCall(N->getValueType(0), RTLIB::ACOS_F32, + RTLIB::ACOS_F64, RTLIB::ACOS_F80, + RTLIB::ACOS_F128, RTLIB::ACOS_PPCF128), + Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FASIN(SDNode *N, SDValue &Lo, + SDValue &Hi) { + ExpandFloatRes_Unary(N, + GetFPLibCall(N->getValueType(0), RTLIB::ASIN_F32, + RTLIB::ASIN_F64, RTLIB::ASIN_F80, + RTLIB::ASIN_F128, RTLIB::ASIN_PPCF128), + Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FATAN(SDNode *N, SDValue &Lo, + SDValue &Hi) { + ExpandFloatRes_Unary(N, + GetFPLibCall(N->getValueType(0), RTLIB::ATAN_F32, + RTLIB::ATAN_F64, RTLIB::ATAN_F80, + RTLIB::ATAN_F128, RTLIB::ATAN_PPCF128), + Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FCBRT(SDNode *N, SDValue &Lo, SDValue &Hi) { ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), RTLIB::CBRT_F32, @@ -1543,6 +1630,15 @@ void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N, RTLIB::COS_PPCF128), Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FCOSH(SDNode *N, SDValue &Lo, + SDValue &Hi) { + ExpandFloatRes_Unary(N, + GetFPLibCall(N->getValueType(0), RTLIB::COSH_F32, + RTLIB::COSH_F64, RTLIB::COSH_F80, + RTLIB::COSH_F128, RTLIB::COSH_PPCF128), + Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), @@ -1761,6 +1857,15 @@ void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N, RTLIB::SIN_PPCF128), Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FSINH(SDNode *N, SDValue &Lo, + SDValue &Hi) { + ExpandFloatRes_Unary(N, + GetFPLibCall(N->getValueType(0), RTLIB::SINH_F32, + RTLIB::SINH_F64, RTLIB::SINH_F80, + RTLIB::SINH_F128, RTLIB::SINH_PPCF128), + Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N, SDValue &Lo, SDValue &Hi) { ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), @@ -1788,6 +1893,15 @@ void DAGTypeLegalizer::ExpandFloatRes_FTAN(SDNode *N, SDValue &Lo, Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FTANH(SDNode *N, SDValue &Lo, + SDValue &Hi) { + ExpandFloatRes_Unary(N, + GetFPLibCall(N->getValueType(0), RTLIB::TANH_F32, + RTLIB::TANH_F64, RTLIB::TANH_F80, + RTLIB::TANH_F128, RTLIB::TANH_PPCF128), + Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N, SDValue &Lo, SDValue &Hi) { ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), @@ -2481,9 +2595,13 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { // Unary FP Operations case ISD::FABS: + case ISD::FACOS: + case ISD::FASIN: + case ISD::FATAN: case ISD::FCBRT: case ISD::FCEIL: case ISD::FCOS: + case ISD::FCOSH: case ISD::FEXP: case ISD::FEXP2: case ISD::FEXP10: @@ -2497,9 +2615,11 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { case ISD::FROUND: case ISD::FROUNDEVEN: case ISD::FSIN: + case ISD::FSINH: case ISD::FSQRT: case ISD::FTRUNC: case ISD::FTAN: + case ISD::FTANH: case ISD::FCANONICALIZE: R = PromoteFloatRes_UnaryOp(N); break; // Binary FP Operations @@ -2916,9 +3036,13 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { // Unary FP Operations case ISD::FABS: + case ISD::FACOS: + case ISD::FASIN: + case ISD::FATAN: case ISD::FCBRT: case ISD::FCEIL: case ISD::FCOS: + case ISD::FCOSH: case ISD::FEXP: case ISD::FEXP2: case ISD::FEXP10: @@ -2933,9 +3057,11 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { case ISD::FROUND: case ISD::FROUNDEVEN: case ISD::FSIN: + case ISD::FSINH: case ISD::FSQRT: case ISD::FTRUNC: case ISD::FTAN: + case ISD::FTANH: case ISD::FCANONICALIZE: R = SoftPromoteHalfRes_UnaryOp(N); break; // Binary FP Operations diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 15075bea104d3f..7af47ed250d91b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -559,6 +559,9 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftenFloatRes_EXTRACT_ELEMENT(SDNode *N); SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_FABS(SDNode *N); + SDValue SoftenFloatRes_FACOS(SDNode *N); + SDValue SoftenFloatRes_FASIN(SDNode *N); + SDValue SoftenFloatRes_FATAN(SDNode *N); SDValue SoftenFloatRes_FMINNUM(SDNode *N); SDValue SoftenFloatRes_FMAXNUM(SDNode *N); SDValue SoftenFloatRes_FADD(SDNode *N); @@ -566,6 +569,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftenFloatRes_FCEIL(SDNode *N); SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N); SDValue SoftenFloatRes_FCOS(SDNode *N); + SDValue SoftenFloatRes_FCOSH(SDNode *N); SDValue SoftenFloatRes_FDIV(SDNode *N); SDValue SoftenFloatRes_FEXP(SDNode *N); SDValue SoftenFloatRes_FEXP2(SDNode *N); @@ -591,9 +595,11 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftenFloatRes_FROUND(SDNode *N); SDValue SoftenFloatRes_FROUNDEVEN(SDNode *N); SDValue SoftenFloatRes_FSIN(SDNode *N); + SDValue SoftenFloatRes_FSINH(SDNode *N); SDValue SoftenFloatRes_FSQRT(SDNode *N); SDValue SoftenFloatRes_FSUB(SDNode *N); SDValue SoftenFloatRes_FTAN(SDNode *N); + SDValue SoftenFloatRes_FTANH(SDNode *N); SDValue SoftenFloatRes_FTRUNC(SDNode *N); SDValue SoftenFloatRes_LOAD(SDNode *N); SDValue SoftenFloatRes_ATOMIC_LOAD(SDNode *N); @@ -645,6 +651,9 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue &Lo, SDValue &Hi); // clang-format off void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FACOS (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FASIN (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FATAN (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FMINNUM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FMAXNUM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -652,6 +661,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FCOPYSIGN (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FCOS (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FCOSH (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FDIV (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FEXP (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FEXP2 (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -674,9 +684,11 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void ExpandFloatRes_FROUND (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FROUNDEVEN(SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FSINH (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FTAN (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FTANH (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 14b147cc5b01be..307d1fc920d488 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -402,6 +402,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FSIN: case ISD::FCOS: case ISD::FTAN: + case ISD::FASIN: + case ISD::FACOS: + case ISD::FATAN: + case ISD::FSINH: + case ISD::FCOSH: + case ISD::FTANH: case ISD::FLDEXP: case ISD::FPOWI: case ISD::FPOW: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index bbf08e862da12f..dde7046e56e9c6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -85,8 +85,12 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: case ISD::FABS: + case ISD::FACOS: + case ISD::FASIN: + case ISD::FATAN: case ISD::FCEIL: case ISD::FCOS: + case ISD::FCOSH: case ISD::FEXP: case ISD::FEXP2: case ISD::FEXP10: @@ -107,8 +111,10 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FROUND: case ISD::FROUNDEVEN: case ISD::FSIN: + case ISD::FSINH: case ISD::FSQRT: case ISD::FTAN: + case ISD::FTANH: case ISD::FTRUNC: case ISD::SIGN_EXTEND: case ISD::SINT_TO_FP: @@ -1146,9 +1152,13 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::CTPOP: case ISD::VP_CTPOP: case ISD::FABS: case ISD::VP_FABS: + case ISD::FACOS: + case ISD::FASIN: + case ISD::FATAN: case ISD::FCEIL: case ISD::VP_FCEIL: case ISD::FCOS: + case ISD::FCOSH: case ISD::FEXP: case ISD::FEXP2: case ISD::FEXP10: @@ -1181,8 +1191,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FROUNDEVEN: case ISD::VP_FROUNDEVEN: case ISD::FSIN: + case ISD::FSINH: case ISD::FSQRT: case ISD::VP_SQRT: case ISD::FTAN: + case ISD::FTANH: case ISD::FTRUNC: case ISD::VP_FROUNDTOZERO: case ISD::SINT_TO_FP: @@ -4479,8 +4491,12 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { break; case ISD::FABS: + case ISD::FACOS: + case ISD::FASIN: + case ISD::FATAN: case ISD::FCEIL: case ISD::FCOS: + case ISD::FCOSH: case ISD::FEXP: case ISD::FEXP2: case ISD::FEXP10: @@ -4493,8 +4509,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FROUND: case ISD::FROUNDEVEN: case ISD::FSIN: + case ISD::FSINH: case ISD::FSQRT: case ISD::FTAN: + case ISD::FTANH: case ISD::FTRUNC: if (unrollExpandedOp()) break; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 79f90bae1d8d66..b335308844fe9d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5397,6 +5397,12 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const case ISD::FSIN: case ISD::FCOS: case ISD::FTAN: + case ISD::FASIN: + case ISD::FACOS: + case ISD::FATAN: + case ISD::FSINH: + case ISD::FCOSH: + case ISD::FTANH: case ISD::FMA: case ISD::FMAD: { if (SNaN) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 276d980c1dcca9..b0746014daf5ac 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6792,6 +6792,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::sin: case Intrinsic::cos: case Intrinsic::tan: + case Intrinsic::asin: + case Intrinsic::acos: + case Intrinsic::atan: + case Intrinsic::sinh: + case Intrinsic::cosh: + case Intrinsic::tanh: case Intrinsic::exp10: case Intrinsic::floor: case Intrinsic::ceil: @@ -6810,6 +6816,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::sin: Opcode = ISD::FSIN; break; case Intrinsic::cos: Opcode = ISD::FCOS; break; case Intrinsic::tan: Opcode = ISD::FTAN; break; + case Intrinsic::asin: Opcode = ISD::FASIN; break; + case Intrinsic::acos: Opcode = ISD::FACOS; break; + case Intrinsic::atan: Opcode = ISD::FATAN; break; + case Intrinsic::sinh: Opcode = ISD::FSINH; break; + case Intrinsic::cosh: Opcode = ISD::FCOSH; break; + case Intrinsic::tanh: Opcode = ISD::FTANH; break; case Intrinsic::exp10: Opcode = ISD::FEXP10; break; case Intrinsic::floor: Opcode = ISD::FFLOOR; break; case Intrinsic::ceil: Opcode = ISD::FCEIL; break; @@ -9261,6 +9273,42 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (visitUnaryFloatCall(I, ISD::FTAN)) return; break; + case LibFunc_asin: + case LibFunc_asinf: + case LibFunc_asinl: + if (visitUnaryFloatCall(I, ISD::FASIN)) + return; + break; + case LibFunc_acos: + case LibFunc_acosf: + case LibFunc_acosl: + if (visitUnaryFloatCall(I, ISD::FACOS)) + return; + break; + case LibFunc_atan: + case LibFunc_atanf: + case LibFunc_atanl: + if (visitUnaryFloatCall(I, ISD::FATAN)) + return; + break; + case LibFunc_sinh: + case LibFunc_sinhf: + case LibFunc_sinhl: + if (visitUnaryFloatCall(I, ISD::FSINH)) + return; + break; + case LibFunc_cosh: + case LibFunc_coshf: + case LibFunc_coshl: + if (visitUnaryFloatCall(I, ISD::FCOSH)) + return; + break; + case LibFunc_tanh: + case LibFunc_tanhf: + case LibFunc_tanhl: + if (visitUnaryFloatCall(I, ISD::FTANH)) + return; + break; case LibFunc_sqrt: case LibFunc_sqrtf: case LibFunc_sqrtl: diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index c1d2c095b103c2..cc8de3a217f826 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -214,6 +214,18 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FSINCOS: return "fsincos"; case ISD::FTAN: return "ftan"; case ISD::STRICT_FTAN: return "strict_ftan"; + case ISD::FASIN: return "fasin"; + case ISD::STRICT_FASIN: return "strict_fasin"; + case ISD::FACOS: return "facos"; + case ISD::STRICT_FACOS: return "strict_facos"; + case ISD::FATAN: return "fatan"; + case ISD::STRICT_FATAN: return "strict_fatan"; + case ISD::FSINH: return "fsinh"; + case ISD::STRICT_FSINH: return "strict_fsinh"; + case ISD::FCOSH: return "fcosh"; + case ISD::STRICT_FCOSH: return "strict_fcosh"; + case ISD::FTANH: return "ftanh"; + case ISD::STRICT_FTANH: return "strict_ftanh"; case ISD::FTRUNC: return "ftrunc"; case ISD::STRICT_FTRUNC: return "strict_ftrunc"; case ISD::FFLOOR: return "ffloor"; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 4a2db272defd11..8ea4dbdd3227ae 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -143,6 +143,12 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) { setLibcallName(RTLIB::COS_F128, "cosf128"); setLibcallName(RTLIB::TAN_F128, "tanf128"); setLibcallName(RTLIB::SINCOS_F128, "sincosf128"); + setLibcallName(RTLIB::ASIN_F128, "asinf128"); + setLibcallName(RTLIB::ACOS_F128, "acosf128"); + setLibcallName(RTLIB::ATAN_F128, "atanf128"); + setLibcallName(RTLIB::SINH_F128, "sinhf128"); + setLibcallName(RTLIB::COSH_F128, "coshf128"); + setLibcallName(RTLIB::TANH_F128, "tanhf128"); setLibcallName(RTLIB::POW_F128, "powf128"); setLibcallName(RTLIB::POW_FINITE_F128, "__powf128_finite"); setLibcallName(RTLIB::CEIL_F128, "ceilf128"); @@ -1102,7 +1108,8 @@ void TargetLoweringBase::initActions() { setOperationAction( {ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG, ISD::ANY_EXTEND_VECTOR_INREG, ISD::SIGN_EXTEND_VECTOR_INREG, ISD::ZERO_EXTEND_VECTOR_INREG, - ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT, ISD::FTAN}, + ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT, ISD::FTAN, ISD::FACOS, + ISD::FASIN, ISD::FATAN, ISD::FCOSH, ISD::FSINH, ISD::FTANH}, VT, Expand); // Constrained floating-point operations default to expand. @@ -1154,14 +1161,17 @@ void TargetLoweringBase::initActions() { Expand); // These library functions default to expand. - setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, ISD::FEXP, - ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR, ISD::FNEARBYINT, - ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, ISD::LROUND, - ISD::LLROUND, ISD::LRINT, ISD::LLRINT, ISD::FROUNDEVEN, - ISD::FTAN}, + setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, + ISD::FEXP, ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR, + ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, + ISD::LROUND, ISD::LLROUND, ISD::LRINT, ISD::LLRINT, + ISD::FROUNDEVEN, ISD::FTAN, ISD::FACOS, ISD::FASIN, + ISD::FATAN, ISD::FCOSH, ISD::FSINH, ISD::FTANH}, {MVT::f32, MVT::f64, MVT::f128}, Expand); - setOperationAction(ISD::FTAN, MVT::f16, Promote); + setOperationAction({ISD::FTAN, ISD::FACOS, ISD::FASIN, ISD::FATAN, ISD::FCOSH, + ISD::FSINH, ISD::FTANH}, + MVT::f16, Promote); // Default ISD::TRAP to expand (which turns it into abort). setOperationAction(ISD::TRAP, MVT::Other, Expand); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index bfe17398879e3e..bf655fc533db75 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -833,6 +833,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FCOS , MVT::f80, Expand); setOperationAction(ISD::FSINCOS, MVT::f80, Expand); setOperationAction(ISD::FTAN , MVT::f80, Expand); + setOperationAction(ISD::FASIN , MVT::f80, Expand); + setOperationAction(ISD::FACOS , MVT::f80, Expand); + setOperationAction(ISD::FATAN , MVT::f80, Expand); + setOperationAction(ISD::FSINH , MVT::f80, Expand); + setOperationAction(ISD::FCOSH , MVT::f80, Expand); + setOperationAction(ISD::FTANH , MVT::f80, Expand); // clang-format on setOperationAction(ISD::FFLOOR, MVT::f80, Expand); diff --git a/llvm/test/Assembler/fp-intrinsics-attr.ll b/llvm/test/Assembler/fp-intrinsics-attr.ll index 613630e1a2b4d2..da6507f051766c 100644 --- a/llvm/test/Assembler/fp-intrinsics-attr.ll +++ b/llvm/test/Assembler/fp-intrinsics-attr.ll @@ -90,6 +90,36 @@ define void @func(double %a, double %b, double %c, i32 %i) strictfp { metadata !"round.dynamic", metadata !"fpexcept.strict") + %acos = call double @llvm.experimental.constrained.acos.f64( + double %a, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + + %asin = call double @llvm.experimental.constrained.asin.f64( + double %a, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + + %atan = call double @llvm.experimental.constrained.atan.f64( + double %a, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + + %cosh = call double @llvm.experimental.constrained.cosh.f64( + double %a, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + + %sinh = call double @llvm.experimental.constrained.sinh.f64( + double %a, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + + %tanh = call double @llvm.experimental.constrained.tanh.f64( + double %a, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %pow = call double @llvm.experimental.constrained.pow.f64( double %a, double %b, metadata !"round.dynamic", @@ -252,6 +282,24 @@ declare double @llvm.experimental.constrained.cos.f64(double, metadata, metadata declare double @llvm.experimental.constrained.tan.f64(double, metadata, metadata) ; CHECK: @llvm.experimental.constrained.tan.f64({{.*}}) #[[ATTR1]] +declare double @llvm.experimental.constrained.asin.f64(double, metadata, metadata) +; CHECK: @llvm.experimental.constrained.asin.f64({{.*}}) #[[ATTR1]] + +declare double @llvm.experimental.constrained.acos.f64(double, metadata, metadata) +; CHECK: @llvm.experimental.constrained.acos.f64({{.*}}) #[[ATTR1]] + +declare double @llvm.experimental.constrained.atan.f64(double, metadata, metadata) +; CHECK: @llvm.experimental.constrained.atan.f64({{.*}}) #[[ATTR1]] + +declare double @llvm.experimental.constrained.sinh.f64(double, metadata, metadata) +; CHECK: @llvm.experimental.constrained.sinh.f64({{.*}}) #[[ATTR1]] + +declare double @llvm.experimental.constrained.cosh.f64(double, metadata, metadata) +; CHECK: @llvm.experimental.constrained.cosh.f64({{.*}}) #[[ATTR1]] + +declare double @llvm.experimental.constrained.tanh.f64(double, metadata, metadata) +; CHECK: @llvm.experimental.constrained.tanh.f64({{.*}}) #[[ATTR1]] + declare double @llvm.experimental.constrained.pow.f64(double, double, metadata, metadata) ; CHECK: @llvm.experimental.constrained.pow.f64({{.*}}) #[[ATTR1]] diff --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll index 8c48e6f9da80a7..bb87252e0b9b08 100644 --- a/llvm/test/CodeGen/X86/fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll @@ -2809,6 +2809,311 @@ entry: ret double %result } +; Verify that acos(42.0) isn't simplified when the rounding mode is unknown. +define double @facos() #0 { +; X87-LABEL: facos: +; X87: # %bb.0: # %entry +; X87-NEXT: subl $12, %esp +; X87-NEXT: .cfi_def_cfa_offset 16 +; X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; X87-NEXT: fstpl (%esp) +; X87-NEXT: wait +; X87-NEXT: calll acos +; X87-NEXT: addl $12, %esp +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl +; +; X86-SSE-LABEL: facos: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: subl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 16 +; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; X86-SSE-NEXT: movsd %xmm0, (%esp) +; X86-SSE-NEXT: calll acos +; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: facos: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 +; SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; SSE-NEXT: callq acos@PLT +; SSE-NEXT: popq %rax +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq +; +; AVX-LABEL: facos: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq acos@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %result = call double @llvm.experimental.constrained.acos.f64(double 42.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +; Verify that asin(42.0) isn't simplified when the rounding mode is unknown. +define double @fasin() #0 { +; X87-LABEL: fasin: +; X87: # %bb.0: # %entry +; X87-NEXT: subl $12, %esp +; X87-NEXT: .cfi_def_cfa_offset 16 +; X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; X87-NEXT: fstpl (%esp) +; X87-NEXT: wait +; X87-NEXT: calll asin +; X87-NEXT: addl $12, %esp +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl +; +; X86-SSE-LABEL: fasin: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: subl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 16 +; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; X86-SSE-NEXT: movsd %xmm0, (%esp) +; X86-SSE-NEXT: calll asin +; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: fasin: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 +; SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; SSE-NEXT: callq asin@PLT +; SSE-NEXT: popq %rax +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq +; +; AVX-LABEL: fasin: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq asin@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %result = call double @llvm.experimental.constrained.asin.f64(double 42.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +; Verify that atan(42.0) isn't simplified when the rounding mode is unknown. +define double @fatan() #0 { +; X87-LABEL: fatan: +; X87: # %bb.0: # %entry +; X87-NEXT: subl $12, %esp +; X87-NEXT: .cfi_def_cfa_offset 16 +; X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; X87-NEXT: fstpl (%esp) +; X87-NEXT: wait +; X87-NEXT: calll atan +; X87-NEXT: addl $12, %esp +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl +; +; X86-SSE-LABEL: fatan: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: subl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 16 +; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; X86-SSE-NEXT: movsd %xmm0, (%esp) +; X86-SSE-NEXT: calll atan +; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: fatan: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 +; SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; SSE-NEXT: callq atan@PLT +; SSE-NEXT: popq %rax +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq +; +; AVX-LABEL: fatan: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq atan@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %result = call double @llvm.experimental.constrained.atan.f64(double 42.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +; Verify that cosh(42.0) isn't simplified when the rounding mode is unknown. +define double @fcosh() #0 { +; X87-LABEL: fcosh: +; X87: # %bb.0: # %entry +; X87-NEXT: subl $12, %esp +; X87-NEXT: .cfi_def_cfa_offset 16 +; X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; X87-NEXT: fstpl (%esp) +; X87-NEXT: wait +; X87-NEXT: calll cosh +; X87-NEXT: addl $12, %esp +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl +; +; X86-SSE-LABEL: fcosh: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: subl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 16 +; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; X86-SSE-NEXT: movsd %xmm0, (%esp) +; X86-SSE-NEXT: calll cosh +; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: fcosh: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 +; SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; SSE-NEXT: callq cosh@PLT +; SSE-NEXT: popq %rax +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq +; +; AVX-LABEL: fcosh: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq cosh@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %result = call double @llvm.experimental.constrained.cosh.f64(double 42.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +; Verify that sinh(42.0) isn't simplified when the rounding mode is unknown. +define double @fsinh() #0 { +; X87-LABEL: fsinh: +; X87: # %bb.0: # %entry +; X87-NEXT: subl $12, %esp +; X87-NEXT: .cfi_def_cfa_offset 16 +; X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; X87-NEXT: fstpl (%esp) +; X87-NEXT: wait +; X87-NEXT: calll sinh +; X87-NEXT: addl $12, %esp +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl +; +; X86-SSE-LABEL: fsinh: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: subl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 16 +; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; X86-SSE-NEXT: movsd %xmm0, (%esp) +; X86-SSE-NEXT: calll sinh +; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: fsinh: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 +; SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; SSE-NEXT: callq sinh@PLT +; SSE-NEXT: popq %rax +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq +; +; AVX-LABEL: fsinh: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq sinh@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %result = call double @llvm.experimental.constrained.sinh.f64(double 42.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +; Verify that tanh(42.0) isn't simplified when the rounding mode is unknown. +define double @ftanh() #0 { +; X87-LABEL: ftanh: +; X87: # %bb.0: # %entry +; X87-NEXT: subl $12, %esp +; X87-NEXT: .cfi_def_cfa_offset 16 +; X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; X87-NEXT: fstpl (%esp) +; X87-NEXT: wait +; X87-NEXT: calll tanh +; X87-NEXT: addl $12, %esp +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl +; +; X86-SSE-LABEL: ftanh: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: subl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 16 +; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; X86-SSE-NEXT: movsd %xmm0, (%esp) +; X86-SSE-NEXT: calll tanh +; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: ftanh: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 +; SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; SSE-NEXT: callq tanh@PLT +; SSE-NEXT: popq %rax +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq +; +; AVX-LABEL: ftanh: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq tanh@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %result = call double @llvm.experimental.constrained.tanh.f64(double 42.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} attributes #0 = { strictfp } @@ -2824,6 +3129,12 @@ declare double @llvm.experimental.constrained.powi.f64(double, i32, metadata, me declare double @llvm.experimental.constrained.sin.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.cos.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.tan.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.asin.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.acos.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.atan.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.sinh.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.cosh.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.tanh.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.exp.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.exp2.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.log.f64(double, metadata, metadata) diff --git a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll index bd51f553587db7..9e84dfa5c41ae6 100644 --- a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll +++ b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll @@ -347,6 +347,46 @@ entry: ret fp128 %ceil } +define fp128 @acos(fp128 %x) nounwind strictfp { +; ANDROID-LABEL: acos: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq acosl@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: acos: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq acosf128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq +; +; X86-LABEL: acos: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll acosl +; X86-NEXT: addl $28, %esp +; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: movaps %xmm0, (%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $24, %esp +; X86-NEXT: popl %esi +; X86-NEXT: retl $4 +entry: + %acos = call fp128 @llvm.experimental.constrained.acos.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret fp128 %acos +} + define fp128 @cos(fp128 %x) nounwind strictfp { ; ANDROID-LABEL: cos: ; ANDROID: # %bb.0: # %entry @@ -387,6 +427,46 @@ entry: ret fp128 %cos } +define fp128 @cosh(fp128 %x) nounwind strictfp { +; ANDROID-LABEL: cosh: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq coshl@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: cosh: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq coshf128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq +; +; X86-LABEL: cosh: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll coshl +; X86-NEXT: addl $28, %esp +; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: movaps %xmm0, (%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $24, %esp +; X86-NEXT: popl %esi +; X86-NEXT: retl $4 +entry: + %cosh = call fp128 @llvm.experimental.constrained.cosh.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret fp128 %cosh +} + define fp128 @exp(fp128 %x) nounwind strictfp { ; ANDROID-LABEL: exp: ; ANDROID: # %bb.0: # %entry @@ -967,6 +1047,46 @@ entry: ret fp128 %roundeven } +define fp128 @asin(fp128 %x) nounwind strictfp { +; ANDROID-LABEL: asin: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq asinl@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: asin: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq asinf128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq +; +; X86-LABEL: asin: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll asinl +; X86-NEXT: addl $28, %esp +; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: movaps %xmm0, (%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $24, %esp +; X86-NEXT: popl %esi +; X86-NEXT: retl $4 +entry: + %asin = call fp128 @llvm.experimental.constrained.asin.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret fp128 %asin +} + define fp128 @sin(fp128 %x) nounwind strictfp { ; ANDROID-LABEL: sin: ; ANDROID: # %bb.0: # %entry @@ -1007,6 +1127,46 @@ entry: ret fp128 %sin } +define fp128 @sinh(fp128 %x) nounwind strictfp { +; ANDROID-LABEL: sinh: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq sinhl@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: sinh: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq sinhf128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq +; +; X86-LABEL: sinh: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll sinhl +; X86-NEXT: addl $28, %esp +; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: movaps %xmm0, (%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $24, %esp +; X86-NEXT: popl %esi +; X86-NEXT: retl $4 +entry: + %sinh = call fp128 @llvm.experimental.constrained.sinh.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret fp128 %sinh +} + define fp128 @sqrt(fp128 %x) nounwind strictfp { ; ANDROID-LABEL: sqrt: ; ANDROID: # %bb.0: # %entry @@ -1047,6 +1207,46 @@ entry: ret fp128 %sqrt } +define fp128 @atan(fp128 %x) nounwind strictfp { +; ANDROID-LABEL: atan: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq atanl@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: atan: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq atanf128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq +; +; X86-LABEL: atan: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll atanl +; X86-NEXT: addl $28, %esp +; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: movaps %xmm0, (%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $24, %esp +; X86-NEXT: popl %esi +; X86-NEXT: retl $4 +entry: + %atan = call fp128 @llvm.experimental.constrained.atan.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret fp128 %atan +} + define fp128 @tan(fp128 %x) nounwind strictfp { ; ANDROID-LABEL: tan: ; ANDROID: # %bb.0: # %entry @@ -1087,6 +1287,46 @@ entry: ret fp128 %tan } +define fp128 @tanh(fp128 %x) nounwind strictfp { +; ANDROID-LABEL: tanh: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq tanhl@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: tanh: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq tanhf128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq +; +; X86-LABEL: tanh: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll tanhl +; X86-NEXT: addl $28, %esp +; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: movaps %xmm0, (%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $24, %esp +; X86-NEXT: popl %esi +; X86-NEXT: retl $4 +entry: + %tanh = call fp128 @llvm.experimental.constrained.tanh.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret fp128 %tanh +} + define fp128 @trunc(fp128 %x) nounwind strictfp { ; ANDROID-LABEL: trunc: ; ANDROID: # %bb.0: # %entry diff --git a/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll b/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll index 89729975cfd61b..c14e99f3acb34e 100644 --- a/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll +++ b/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll @@ -89,6 +89,31 @@ entry: ret x86_fp80 %ceil } +define x86_fp80 @acos(x86_fp80 %x) nounwind strictfp { +; X86-LABEL: acos: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fstpt (%esp) +; X86-NEXT: wait +; X86-NEXT: calll acosl +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +; +; X64-LABEL: acos: +; X64: # %bb.0: # %entry +; X64-NEXT: subq $24, %rsp +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fstpt (%rsp) +; X64-NEXT: wait +; X64-NEXT: callq acosl@PLT +; X64-NEXT: addq $24, %rsp +; X64-NEXT: retq +entry: + %acos = call x86_fp80 @llvm.experimental.constrained.acos.f80(x86_fp80 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret x86_fp80 %acos +} + define x86_fp80 @cos(x86_fp80 %x) nounwind strictfp { ; X86-LABEL: cos: ; X86: # %bb.0: # %entry @@ -114,6 +139,31 @@ entry: ret x86_fp80 %cos } +define x86_fp80 @cosh(x86_fp80 %x) nounwind strictfp { +; X86-LABEL: cosh: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fstpt (%esp) +; X86-NEXT: wait +; X86-NEXT: calll coshl +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +; +; X64-LABEL: cosh: +; X64: # %bb.0: # %entry +; X64-NEXT: subq $24, %rsp +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fstpt (%rsp) +; X64-NEXT: wait +; X64-NEXT: callq coshl@PLT +; X64-NEXT: addq $24, %rsp +; X64-NEXT: retq +entry: + %cosh = call x86_fp80 @llvm.experimental.constrained.cosh.f80(x86_fp80 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret x86_fp80 %cosh +} + define x86_fp80 @exp(x86_fp80 %x) nounwind strictfp { ; X86-LABEL: exp: ; X86: # %bb.0: # %entry @@ -479,6 +529,31 @@ entry: ret x86_fp80 %roundeven } +define x86_fp80 @asin(x86_fp80 %x) nounwind strictfp { +; X86-LABEL: asin: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fstpt (%esp) +; X86-NEXT: wait +; X86-NEXT: calll asinl +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +; +; X64-LABEL: asin: +; X64: # %bb.0: # %entry +; X64-NEXT: subq $24, %rsp +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fstpt (%rsp) +; X64-NEXT: wait +; X64-NEXT: callq asinl@PLT +; X64-NEXT: addq $24, %rsp +; X64-NEXT: retq +entry: + %asin = call x86_fp80 @llvm.experimental.constrained.asin.f80(x86_fp80 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret x86_fp80 %asin +} + define x86_fp80 @sin(x86_fp80 %x) nounwind strictfp { ; X86-LABEL: sin: ; X86: # %bb.0: # %entry @@ -504,6 +579,56 @@ entry: ret x86_fp80 %sin } +define x86_fp80 @sinh(x86_fp80 %x) nounwind strictfp { +; X86-LABEL: sinh: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fstpt (%esp) +; X86-NEXT: wait +; X86-NEXT: calll sinhl +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +; +; X64-LABEL: sinh: +; X64: # %bb.0: # %entry +; X64-NEXT: subq $24, %rsp +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fstpt (%rsp) +; X64-NEXT: wait +; X64-NEXT: callq sinhl@PLT +; X64-NEXT: addq $24, %rsp +; X64-NEXT: retq +entry: + %sinh = call x86_fp80 @llvm.experimental.constrained.sinh.f80(x86_fp80 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret x86_fp80 %sinh +} + +define x86_fp80 @atan(x86_fp80 %x) nounwind strictfp { +; X86-LABEL: atan: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fstpt (%esp) +; X86-NEXT: wait +; X86-NEXT: calll atanl +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +; +; X64-LABEL: atan: +; X64: # %bb.0: # %entry +; X64-NEXT: subq $24, %rsp +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fstpt (%rsp) +; X64-NEXT: wait +; X64-NEXT: callq atanl@PLT +; X64-NEXT: addq $24, %rsp +; X64-NEXT: retq +entry: + %atan = call x86_fp80 @llvm.experimental.constrained.atan.f80(x86_fp80 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret x86_fp80 %atan +} + define x86_fp80 @tan(x86_fp80 %x) nounwind strictfp { ; X86-LABEL: tan: ; X86: # %bb.0: # %entry @@ -529,6 +654,31 @@ entry: ret x86_fp80 %tan } +define x86_fp80 @tanh(x86_fp80 %x) nounwind strictfp { +; X86-LABEL: tanh: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fstpt (%esp) +; X86-NEXT: wait +; X86-NEXT: calll tanhl +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +; +; X64-LABEL: tanh: +; X64: # %bb.0: # %entry +; X64-NEXT: subq $24, %rsp +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fstpt (%rsp) +; X64-NEXT: wait +; X64-NEXT: callq tanhl@PLT +; X64-NEXT: addq $24, %rsp +; X64-NEXT: retq +entry: + %tanh = call x86_fp80 @llvm.experimental.constrained.tanh.f80(x86_fp80 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret x86_fp80 %tanh +} + define x86_fp80 @trunc(x86_fp80 %x) nounwind strictfp { ; X86-LABEL: trunc: ; X86: # %bb.0: # %entry diff --git a/llvm/test/CodeGen/X86/llvm.acos.ll b/llvm/test/CodeGen/X86/llvm.acos.ll new file mode 100644 index 00000000000000..202fde8291930f --- /dev/null +++ b/llvm/test/CodeGen/X86/llvm.acos.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +define half @use_acosf16(half %a) nounwind { +; CHECK-LABEL: use_acosf16: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq __extendhfsf2@PLT +; CHECK-NEXT: callq acosf@PLT +; CHECK-NEXT: callq __truncsfhf2@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %x = call half @llvm.acos.f16(half %a) + ret half %x +} + +define float @use_acosf32(float %a) nounwind { +; CHECK-LABEL: use_acosf32: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp acosf@PLT # TAILCALL + %x = call float @llvm.acos.f32(float %a) + ret float %x +} + +define double @use_acosf64(double %a) nounwind { +; CHECK-LABEL: use_acosf64: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp acos@PLT # TAILCALL + %x = call double @llvm.acos.f64(double %a) + ret double %x +} + +define x86_fp80 @use_acosf80(x86_fp80 %a) nounwind { +; CHECK-LABEL: use_acosf80: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: fldt 32(%rsp) +; CHECK-NEXT: fstpt (%rsp) +; CHECK-NEXT: callq acosl@PLT +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: retq + %x = call x86_fp80 @llvm.acos.f80(x86_fp80 %a) + ret x86_fp80 %x +} + +define fp128 @use_acosfp128(fp128 %a) nounwind { +; CHECK-LABEL: use_acosfp128: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp acosf128@PLT # TAILCALL + %x = call fp128 @llvm.acos.f128(fp128 %a) + ret fp128 %x +} + +define ppc_fp128 @use_acosppc_fp128(ppc_fp128 %a) nounwind { +; CHECK-LABEL: use_acosppc_fp128: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq acosl@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %x = call ppc_fp128 @llvm.acos.ppcf128(ppc_fp128 %a) + ret ppc_fp128 %x +} + +declare half @llvm.acos.f16(half) +declare float @llvm.acos.f32(float) +declare double @llvm.acos.f64(double) +declare x86_fp80 @llvm.acos.f80(x86_fp80) +declare fp128 @llvm.acos.f128(fp128) +declare ppc_fp128 @llvm.acos.ppcf128(ppc_fp128) diff --git a/llvm/test/CodeGen/X86/llvm.asin.ll b/llvm/test/CodeGen/X86/llvm.asin.ll new file mode 100644 index 00000000000000..1e047d01c703c3 --- /dev/null +++ b/llvm/test/CodeGen/X86/llvm.asin.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +define half @use_asinf16(half %a) nounwind { +; CHECK-LABEL: use_asinf16: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq __extendhfsf2@PLT +; CHECK-NEXT: callq asinf@PLT +; CHECK-NEXT: callq __truncsfhf2@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %x = call half @llvm.asin.f16(half %a) + ret half %x +} + +define float @use_asinf32(float %a) nounwind { +; CHECK-LABEL: use_asinf32: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp asinf@PLT # TAILCALL + %x = call float @llvm.asin.f32(float %a) + ret float %x +} + +define double @use_asinf64(double %a) nounwind { +; CHECK-LABEL: use_asinf64: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp asin@PLT # TAILCALL + %x = call double @llvm.asin.f64(double %a) + ret double %x +} + +define x86_fp80 @use_asinf80(x86_fp80 %a) nounwind { +; CHECK-LABEL: use_asinf80: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: fldt 32(%rsp) +; CHECK-NEXT: fstpt (%rsp) +; CHECK-NEXT: callq asinl@PLT +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: retq + %x = call x86_fp80 @llvm.asin.f80(x86_fp80 %a) + ret x86_fp80 %x +} + +define fp128 @use_asinfp128(fp128 %a) nounwind { +; CHECK-LABEL: use_asinfp128: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp asinf128@PLT # TAILCALL + %x = call fp128 @llvm.asin.f128(fp128 %a) + ret fp128 %x +} + +define ppc_fp128 @use_asinppc_fp128(ppc_fp128 %a) nounwind { +; CHECK-LABEL: use_asinppc_fp128: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq asinl@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %x = call ppc_fp128 @llvm.asin.ppcf128(ppc_fp128 %a) + ret ppc_fp128 %x +} + +declare half @llvm.asin.f16(half) +declare float @llvm.asin.f32(float) +declare double @llvm.asin.f64(double) +declare x86_fp80 @llvm.asin.f80(x86_fp80) +declare fp128 @llvm.asin.f128(fp128) +declare ppc_fp128 @llvm.asin.ppcf128(ppc_fp128) diff --git a/llvm/test/CodeGen/X86/llvm.atan.ll b/llvm/test/CodeGen/X86/llvm.atan.ll new file mode 100644 index 00000000000000..d33ef7fd3ac5f0 --- /dev/null +++ b/llvm/test/CodeGen/X86/llvm.atan.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +define half @use_atanf16(half %a) nounwind { +; CHECK-LABEL: use_atanf16: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq __extendhfsf2@PLT +; CHECK-NEXT: callq atanf@PLT +; CHECK-NEXT: callq __truncsfhf2@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %x = call half @llvm.atan.f16(half %a) + ret half %x +} + +define float @use_atanf32(float %a) nounwind { +; CHECK-LABEL: use_atanf32: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp atanf@PLT # TAILCALL + %x = call float @llvm.atan.f32(float %a) + ret float %x +} + +define double @use_atanf64(double %a) nounwind { +; CHECK-LABEL: use_atanf64: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp atan@PLT # TAILCALL + %x = call double @llvm.atan.f64(double %a) + ret double %x +} + +define x86_fp80 @use_atanf80(x86_fp80 %a) nounwind { +; CHECK-LABEL: use_atanf80: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: fldt 32(%rsp) +; CHECK-NEXT: fstpt (%rsp) +; CHECK-NEXT: callq atanl@PLT +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: retq + %x = call x86_fp80 @llvm.atan.f80(x86_fp80 %a) + ret x86_fp80 %x +} + +define fp128 @use_atanfp128(fp128 %a) nounwind { +; CHECK-LABEL: use_atanfp128: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp atanf128@PLT # TAILCALL + %x = call fp128 @llvm.atan.f128(fp128 %a) + ret fp128 %x +} + +define ppc_fp128 @use_atanppc_fp128(ppc_fp128 %a) nounwind { +; CHECK-LABEL: use_atanppc_fp128: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq atanl@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %x = call ppc_fp128 @llvm.atan.ppcf128(ppc_fp128 %a) + ret ppc_fp128 %x +} + +declare half @llvm.atan.f16(half) +declare float @llvm.atan.f32(float) +declare double @llvm.atan.f64(double) +declare x86_fp80 @llvm.atan.f80(x86_fp80) +declare fp128 @llvm.atan.f128(fp128) +declare ppc_fp128 @llvm.atan.ppcf128(ppc_fp128) diff --git a/llvm/test/CodeGen/X86/llvm.cosh.ll b/llvm/test/CodeGen/X86/llvm.cosh.ll new file mode 100644 index 00000000000000..5e7582c8f86a4e --- /dev/null +++ b/llvm/test/CodeGen/X86/llvm.cosh.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +define half @use_coshf16(half %a) nounwind { +; CHECK-LABEL: use_coshf16: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq __extendhfsf2@PLT +; CHECK-NEXT: callq coshf@PLT +; CHECK-NEXT: callq __truncsfhf2@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %x = call half @llvm.cosh.f16(half %a) + ret half %x +} + +define float @use_coshf32(float %a) nounwind { +; CHECK-LABEL: use_coshf32: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp coshf@PLT # TAILCALL + %x = call float @llvm.cosh.f32(float %a) + ret float %x +} + +define double @use_coshf64(double %a) nounwind { +; CHECK-LABEL: use_coshf64: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp cosh@PLT # TAILCALL + %x = call double @llvm.cosh.f64(double %a) + ret double %x +} + +define x86_fp80 @use_coshf80(x86_fp80 %a) nounwind { +; CHECK-LABEL: use_coshf80: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: fldt 32(%rsp) +; CHECK-NEXT: fstpt (%rsp) +; CHECK-NEXT: callq coshl@PLT +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: retq + %x = call x86_fp80 @llvm.cosh.f80(x86_fp80 %a) + ret x86_fp80 %x +} + +define fp128 @use_coshfp128(fp128 %a) nounwind { +; CHECK-LABEL: use_coshfp128: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp coshf128@PLT # TAILCALL + %x = call fp128 @llvm.cosh.f128(fp128 %a) + ret fp128 %x +} + +define ppc_fp128 @use_coshppc_fp128(ppc_fp128 %a) nounwind { +; CHECK-LABEL: use_coshppc_fp128: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq coshl@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %x = call ppc_fp128 @llvm.cosh.ppcf128(ppc_fp128 %a) + ret ppc_fp128 %x +} + +declare half @llvm.cosh.f16(half) +declare float @llvm.cosh.f32(float) +declare double @llvm.cosh.f64(double) +declare x86_fp80 @llvm.cosh.f80(x86_fp80) +declare fp128 @llvm.cosh.f128(fp128) +declare ppc_fp128 @llvm.cosh.ppcf128(ppc_fp128) diff --git a/llvm/test/CodeGen/X86/llvm.sinh.ll b/llvm/test/CodeGen/X86/llvm.sinh.ll new file mode 100644 index 00000000000000..ba228421117f03 --- /dev/null +++ b/llvm/test/CodeGen/X86/llvm.sinh.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +define half @use_sinhf16(half %a) nounwind { +; CHECK-LABEL: use_sinhf16: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq __extendhfsf2@PLT +; CHECK-NEXT: callq sinhf@PLT +; CHECK-NEXT: callq __truncsfhf2@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %x = call half @llvm.sinh.f16(half %a) + ret half %x +} + +define float @use_sinhf32(float %a) nounwind { +; CHECK-LABEL: use_sinhf32: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp sinhf@PLT # TAILCALL + %x = call float @llvm.sinh.f32(float %a) + ret float %x +} + +define double @use_sinhf64(double %a) nounwind { +; CHECK-LABEL: use_sinhf64: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp sinh@PLT # TAILCALL + %x = call double @llvm.sinh.f64(double %a) + ret double %x +} + +define x86_fp80 @use_sinhf80(x86_fp80 %a) nounwind { +; CHECK-LABEL: use_sinhf80: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: fldt 32(%rsp) +; CHECK-NEXT: fstpt (%rsp) +; CHECK-NEXT: callq sinhl@PLT +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: retq + %x = call x86_fp80 @llvm.sinh.f80(x86_fp80 %a) + ret x86_fp80 %x +} + +define fp128 @use_sinhfp128(fp128 %a) nounwind { +; CHECK-LABEL: use_sinhfp128: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp sinhf128@PLT # TAILCALL + %x = call fp128 @llvm.sinh.f128(fp128 %a) + ret fp128 %x +} + +define ppc_fp128 @use_sinhppc_fp128(ppc_fp128 %a) nounwind { +; CHECK-LABEL: use_sinhppc_fp128: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq sinhl@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %x = call ppc_fp128 @llvm.sinh.ppcf128(ppc_fp128 %a) + ret ppc_fp128 %x +} + +declare half @llvm.sinh.f16(half) +declare float @llvm.sinh.f32(float) +declare double @llvm.sinh.f64(double) +declare x86_fp80 @llvm.sinh.f80(x86_fp80) +declare fp128 @llvm.sinh.f128(fp128) +declare ppc_fp128 @llvm.sinh.ppcf128(ppc_fp128) diff --git a/llvm/test/CodeGen/X86/llvm.tanh.ll b/llvm/test/CodeGen/X86/llvm.tanh.ll new file mode 100644 index 00000000000000..7119c401c80400 --- /dev/null +++ b/llvm/test/CodeGen/X86/llvm.tanh.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +define half @use_tanhf16(half %a) nounwind { +; CHECK-LABEL: use_tanhf16: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq __extendhfsf2@PLT +; CHECK-NEXT: callq tanhf@PLT +; CHECK-NEXT: callq __truncsfhf2@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %x = call half @llvm.tanh.f16(half %a) + ret half %x +} + +define float @use_tanhf32(float %a) nounwind { +; CHECK-LABEL: use_tanhf32: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp tanhf@PLT # TAILCALL + %x = call float @llvm.tanh.f32(float %a) + ret float %x +} + +define double @use_tanhf64(double %a) nounwind { +; CHECK-LABEL: use_tanhf64: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp tanh@PLT # TAILCALL + %x = call double @llvm.tanh.f64(double %a) + ret double %x +} + +define x86_fp80 @use_tanhf80(x86_fp80 %a) nounwind { +; CHECK-LABEL: use_tanhf80: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: fldt 32(%rsp) +; CHECK-NEXT: fstpt (%rsp) +; CHECK-NEXT: callq tanhl@PLT +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: retq + %x = call x86_fp80 @llvm.tanh.f80(x86_fp80 %a) + ret x86_fp80 %x +} + +define fp128 @use_tanhfp128(fp128 %a) nounwind { +; CHECK-LABEL: use_tanhfp128: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp tanhf128@PLT # TAILCALL + %x = call fp128 @llvm.tanh.f128(fp128 %a) + ret fp128 %x +} + +define ppc_fp128 @use_tanhppc_fp128(ppc_fp128 %a) nounwind { +; CHECK-LABEL: use_tanhppc_fp128: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq tanhl@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %x = call ppc_fp128 @llvm.tanh.ppcf128(ppc_fp128 %a) + ret ppc_fp128 %x +} + +declare half @llvm.tanh.f16(half) +declare float @llvm.tanh.f32(float) +declare double @llvm.tanh.f64(double) +declare x86_fp80 @llvm.tanh.f80(x86_fp80) +declare fp128 @llvm.tanh.f128(fp128) +declare ppc_fp128 @llvm.tanh.ppcf128(ppc_fp128) diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll index d71fd470651cf9..b486014678466e 100644 --- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -7997,7 +7997,1355 @@ entry: ret <4 x double> %tan } +define <1 x float> @constrained_vector_acos_v1f32() #0 { +; CHECK-LABEL: constrained_vector_acos_v1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq acosf@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_acos_v1f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq acosf@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %acos = call <1 x float> @llvm.experimental.constrained.acos.v1f32( + <1 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <1 x float> %acos +} + +define <2 x double> @constrained_vector_acos_v2f64() #0 { +; CHECK-LABEL: constrained_vector_acos_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq acos@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq acos@PLT +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_acos_v2f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $24, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 32 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq acos@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq acos@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: addq $24, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %acos = call <2 x double> @llvm.experimental.constrained.acos.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %acos +} + +define <3 x float> @constrained_vector_acos_v3f32() #0 { +; CHECK-LABEL: constrained_vector_acos_v3f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq acosf@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq acosf@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq acosf@PLT +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_acos_v3f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq acosf@PLT +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq acosf@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq acosf@PLT +; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %acos = call <3 x float> @llvm.experimental.constrained.acos.v3f32( + <3 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x float> %acos +} + +define <3 x double> @constrained_vector_acos_v3f64() #0 { +; CHECK-LABEL: constrained_vector_acos_v3f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq acos@PLT +; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq acos@PLT +; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; CHECK-NEXT: callq acos@PLT +; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_acos_v3f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq acos@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq acos@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; AVX-NEXT: vzeroupper +; AVX-NEXT: callq acos@PLT +; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload +; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %acos = call <3 x double> @llvm.experimental.constrained.acos.v3f64( + <3 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x double> %acos +} + +define <4 x double> @constrained_vector_acos_v4f64() #0 { +; CHECK-LABEL: constrained_vector_acos_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq acos@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq acos@PLT +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] +; CHECK-NEXT: callq acos@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; CHECK-NEXT: callq acos@PLT +; CHECK-NEXT: movaps %xmm0, %xmm1 +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_acos_v4f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] +; AVX-NEXT: callq acos@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; AVX-NEXT: callq acos@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq acos@PLT +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq acos@PLT +; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %acos = call <4 x double> @llvm.experimental.constrained.acos.v4f64( + <4 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %acos +} + +define <1 x float> @constrained_vector_asin_v1f32() #0 { +; CHECK-LABEL: constrained_vector_asin_v1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq asinf@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_asin_v1f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq asinf@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %asin = call <1 x float> @llvm.experimental.constrained.asin.v1f32( + <1 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <1 x float> %asin +} + +define <2 x double> @constrained_vector_asin_v2f64() #0 { +; CHECK-LABEL: constrained_vector_asin_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq asin@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq asin@PLT +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_asin_v2f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $24, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 32 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq asin@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq asin@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: addq $24, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %asin = call <2 x double> @llvm.experimental.constrained.asin.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %asin +} + +define <3 x float> @constrained_vector_asin_v3f32() #0 { +; CHECK-LABEL: constrained_vector_asin_v3f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq asinf@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq asinf@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq asinf@PLT +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_asin_v3f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq asinf@PLT +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq asinf@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq asinf@PLT +; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %asin = call <3 x float> @llvm.experimental.constrained.asin.v3f32( + <3 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x float> %asin +} + +define <3 x double> @constrained_vector_asin_v3f64() #0 { +; CHECK-LABEL: constrained_vector_asin_v3f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq asin@PLT +; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq asin@PLT +; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; CHECK-NEXT: callq asin@PLT +; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_asin_v3f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq asin@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq asin@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; AVX-NEXT: vzeroupper +; AVX-NEXT: callq asin@PLT +; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload +; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %asin = call <3 x double> @llvm.experimental.constrained.asin.v3f64( + <3 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x double> %asin +} + +define <4 x double> @constrained_vector_asin_v4f64() #0 { +; CHECK-LABEL: constrained_vector_asin_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq asin@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq asin@PLT +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] +; CHECK-NEXT: callq asin@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; CHECK-NEXT: callq asin@PLT +; CHECK-NEXT: movaps %xmm0, %xmm1 +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_asin_v4f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] +; AVX-NEXT: callq asin@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; AVX-NEXT: callq asin@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq asin@PLT +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq asin@PLT +; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %asin = call <4 x double> @llvm.experimental.constrained.asin.v4f64( + <4 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %asin +} + +define <1 x float> @constrained_vector_atan_v1f32() #0 { +; CHECK-LABEL: constrained_vector_atan_v1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq atanf@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_atan_v1f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq atanf@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %atan = call <1 x float> @llvm.experimental.constrained.atan.v1f32( + <1 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <1 x float> %atan +} + +define <2 x double> @constrained_vector_atan_v2f64() #0 { +; CHECK-LABEL: constrained_vector_atan_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq atan@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq atan@PLT +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_atan_v2f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $24, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 32 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq atan@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq atan@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: addq $24, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %atan = call <2 x double> @llvm.experimental.constrained.atan.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %atan +} + +define <3 x float> @constrained_vector_atan_v3f32() #0 { +; CHECK-LABEL: constrained_vector_atan_v3f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq atanf@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq atanf@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq atanf@PLT +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_atan_v3f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq atanf@PLT +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq atanf@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq atanf@PLT +; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %atan = call <3 x float> @llvm.experimental.constrained.atan.v3f32( + <3 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x float> %atan +} + +define <3 x double> @constrained_vector_atan_v3f64() #0 { +; CHECK-LABEL: constrained_vector_atan_v3f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq atan@PLT +; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq atan@PLT +; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; CHECK-NEXT: callq atan@PLT +; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_atan_v3f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq atan@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq atan@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; AVX-NEXT: vzeroupper +; AVX-NEXT: callq atan@PLT +; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload +; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %atan = call <3 x double> @llvm.experimental.constrained.atan.v3f64( + <3 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x double> %atan +} + +define <4 x double> @constrained_vector_atan_v4f64() #0 { +; CHECK-LABEL: constrained_vector_atan_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq atan@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq atan@PLT +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] +; CHECK-NEXT: callq atan@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; CHECK-NEXT: callq atan@PLT +; CHECK-NEXT: movaps %xmm0, %xmm1 +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_atan_v4f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] +; AVX-NEXT: callq atan@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; AVX-NEXT: callq atan@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq atan@PLT +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq atan@PLT +; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %atan = call <4 x double> @llvm.experimental.constrained.atan.v4f64( + <4 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %atan +} + +define <1 x float> @constrained_vector_cosh_v1f32() #0 { +; CHECK-LABEL: constrained_vector_cosh_v1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq coshf@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_cosh_v1f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq coshf@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %cosh = call <1 x float> @llvm.experimental.constrained.cosh.v1f32( + <1 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <1 x float> %cosh +} + +define <2 x double> @constrained_vector_cosh_v2f64() #0 { +; CHECK-LABEL: constrained_vector_cosh_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq cosh@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq cosh@PLT +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_cosh_v2f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $24, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 32 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq cosh@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq cosh@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: addq $24, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %cosh = call <2 x double> @llvm.experimental.constrained.cosh.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %cosh +} + +define <3 x float> @constrained_vector_cosh_v3f32() #0 { +; CHECK-LABEL: constrained_vector_cosh_v3f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq coshf@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq coshf@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq coshf@PLT +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_cosh_v3f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq coshf@PLT +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq coshf@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq coshf@PLT +; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %cosh = call <3 x float> @llvm.experimental.constrained.cosh.v3f32( + <3 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x float> %cosh +} + +define <3 x double> @constrained_vector_cosh_v3f64() #0 { +; CHECK-LABEL: constrained_vector_cosh_v3f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq cosh@PLT +; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq cosh@PLT +; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; CHECK-NEXT: callq cosh@PLT +; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_cosh_v3f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq cosh@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq cosh@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; AVX-NEXT: vzeroupper +; AVX-NEXT: callq cosh@PLT +; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload +; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %cosh = call <3 x double> @llvm.experimental.constrained.cosh.v3f64( + <3 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x double> %cosh +} + +define <4 x double> @constrained_vector_cosh_v4f64() #0 { +; CHECK-LABEL: constrained_vector_cosh_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq cosh@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq cosh@PLT +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] +; CHECK-NEXT: callq cosh@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; CHECK-NEXT: callq cosh@PLT +; CHECK-NEXT: movaps %xmm0, %xmm1 +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_cosh_v4f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] +; AVX-NEXT: callq cosh@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; AVX-NEXT: callq cosh@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq cosh@PLT +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq cosh@PLT +; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %cosh = call <4 x double> @llvm.experimental.constrained.cosh.v4f64( + <4 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %cosh +} + +define <1 x float> @constrained_vector_sinh_v1f32() #0 { +; CHECK-LABEL: constrained_vector_sinh_v1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq sinhf@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_sinh_v1f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq sinhf@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %sinh = call <1 x float> @llvm.experimental.constrained.sinh.v1f32( + <1 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <1 x float> %sinh +} + +define <2 x double> @constrained_vector_sinh_v2f64() #0 { +; CHECK-LABEL: constrained_vector_sinh_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq sinh@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq sinh@PLT +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_sinh_v2f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $24, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 32 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq sinh@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq sinh@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: addq $24, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %sinh = call <2 x double> @llvm.experimental.constrained.sinh.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %sinh +} + +define <3 x float> @constrained_vector_sinh_v3f32() #0 { +; CHECK-LABEL: constrained_vector_sinh_v3f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq sinhf@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq sinhf@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq sinhf@PLT +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_sinh_v3f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq sinhf@PLT +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq sinhf@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq sinhf@PLT +; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %sinh = call <3 x float> @llvm.experimental.constrained.sinh.v3f32( + <3 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x float> %sinh +} + +define <3 x double> @constrained_vector_sinh_v3f64() #0 { +; CHECK-LABEL: constrained_vector_sinh_v3f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq sinh@PLT +; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq sinh@PLT +; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; CHECK-NEXT: callq sinh@PLT +; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_sinh_v3f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq sinh@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq sinh@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; AVX-NEXT: vzeroupper +; AVX-NEXT: callq sinh@PLT +; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload +; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %sinh = call <3 x double> @llvm.experimental.constrained.sinh.v3f64( + <3 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x double> %sinh +} + +define <4 x double> @constrained_vector_sinh_v4f64() #0 { +; CHECK-LABEL: constrained_vector_sinh_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq sinh@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq sinh@PLT +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] +; CHECK-NEXT: callq sinh@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; CHECK-NEXT: callq sinh@PLT +; CHECK-NEXT: movaps %xmm0, %xmm1 +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_sinh_v4f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] +; AVX-NEXT: callq sinh@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; AVX-NEXT: callq sinh@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq sinh@PLT +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq sinh@PLT +; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %sinh = call <4 x double> @llvm.experimental.constrained.sinh.v4f64( + <4 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %sinh +} + +define <1 x float> @constrained_vector_tanh_v1f32() #0 { +; CHECK-LABEL: constrained_vector_tanh_v1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq tanhf@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_tanh_v1f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq tanhf@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %tanh = call <1 x float> @llvm.experimental.constrained.tanh.v1f32( + <1 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <1 x float> %tanh +} + +define <2 x double> @constrained_vector_tanh_v2f64() #0 { +; CHECK-LABEL: constrained_vector_tanh_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq tanh@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq tanh@PLT +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_tanh_v2f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $24, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 32 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq tanh@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq tanh@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: addq $24, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %tanh = call <2 x double> @llvm.experimental.constrained.tanh.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %tanh +} + +define <3 x float> @constrained_vector_tanh_v3f32() #0 { +; CHECK-LABEL: constrained_vector_tanh_v3f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq tanhf@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq tanhf@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq tanhf@PLT +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_tanh_v3f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq tanhf@PLT +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq tanhf@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq tanhf@PLT +; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %tanh = call <3 x float> @llvm.experimental.constrained.tanh.v3f32( + <3 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x float> %tanh +} + +define <3 x double> @constrained_vector_tanh_v3f64() #0 { +; CHECK-LABEL: constrained_vector_tanh_v3f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq tanh@PLT +; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq tanh@PLT +; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; CHECK-NEXT: callq tanh@PLT +; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_tanh_v3f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq tanh@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq tanh@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; AVX-NEXT: vzeroupper +; AVX-NEXT: callq tanh@PLT +; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload +; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %tanh = call <3 x double> @llvm.experimental.constrained.tanh.v3f64( + <3 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x double> %tanh +} +define <4 x double> @constrained_vector_tanh_v4f64() #0 { +; CHECK-LABEL: constrained_vector_tanh_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq tanh@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq tanh@PLT +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] +; CHECK-NEXT: callq tanh@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; CHECK-NEXT: callq tanh@PLT +; CHECK-NEXT: movaps %xmm0, %xmm1 +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_tanh_v4f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] +; AVX-NEXT: callq tanh@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; AVX-NEXT: callq tanh@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq tanh@PLT +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq tanh@PLT +; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %tanh = call <4 x double> @llvm.experimental.constrained.tanh.v4f64( + <4 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %tanh +} declare <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float>, <16 x float>, metadata, metadata) @@ -8015,6 +9363,12 @@ declare <2 x double> @llvm.experimental.constrained.powi.v2f64(<2 x double>, i32 declare <2 x double> @llvm.experimental.constrained.sin.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.cos.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.tan.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.asin.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.acos.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.atan.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.sinh.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.cosh.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.tanh.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.exp.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.exp2.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.log.v2f64(<2 x double>, metadata, metadata) @@ -8059,6 +9413,12 @@ declare <1 x float> @llvm.experimental.constrained.powi.v1f32(<1 x float>, i32, declare <1 x float> @llvm.experimental.constrained.sin.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.cos.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.tan.v1f32(<1 x float>, metadata, metadata) +declare <1 x float> @llvm.experimental.constrained.asin.v1f32(<1 x float>, metadata, metadata) +declare <1 x float> @llvm.experimental.constrained.acos.v1f32(<1 x float>, metadata, metadata) +declare <1 x float> @llvm.experimental.constrained.atan.v1f32(<1 x float>, metadata, metadata) +declare <1 x float> @llvm.experimental.constrained.sinh.v1f32(<1 x float>, metadata, metadata) +declare <1 x float> @llvm.experimental.constrained.cosh.v1f32(<1 x float>, metadata, metadata) +declare <1 x float> @llvm.experimental.constrained.tanh.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.exp.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.exp2.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.log.v1f32(<1 x float>, metadata, metadata) @@ -8114,6 +9474,18 @@ declare <3 x float> @llvm.experimental.constrained.cos.v3f32(<3 x float>, metada declare <3 x double> @llvm.experimental.constrained.cos.v3f64(<3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.tan.v3f32(<3 x float>, metadata, metadata) declare <3 x double> @llvm.experimental.constrained.tan.v3f64(<3 x double>, metadata, metadata) +declare <3 x float> @llvm.experimental.constrained.asin.v3f32(<3 x float>, metadata, metadata) +declare <3 x double> @llvm.experimental.constrained.asin.v3f64(<3 x double>, metadata, metadata) +declare <3 x float> @llvm.experimental.constrained.acos.v3f32(<3 x float>, metadata, metadata) +declare <3 x double> @llvm.experimental.constrained.acos.v3f64(<3 x double>, metadata, metadata) +declare <3 x float> @llvm.experimental.constrained.atan.v3f32(<3 x float>, metadata, metadata) +declare <3 x double> @llvm.experimental.constrained.atan.v3f64(<3 x double>, metadata, metadata) +declare <3 x float> @llvm.experimental.constrained.sinh.v3f32(<3 x float>, metadata, metadata) +declare <3 x double> @llvm.experimental.constrained.sinh.v3f64(<3 x double>, metadata, metadata) +declare <3 x float> @llvm.experimental.constrained.cosh.v3f32(<3 x float>, metadata, metadata) +declare <3 x double> @llvm.experimental.constrained.cosh.v3f64(<3 x double>, metadata, metadata) +declare <3 x float> @llvm.experimental.constrained.tanh.v3f32(<3 x float>, metadata, metadata) +declare <3 x double> @llvm.experimental.constrained.tanh.v3f64(<3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.exp.v3f32(<3 x float>, metadata, metadata) declare <3 x double> @llvm.experimental.constrained.exp.v3f64(<3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.exp2.v3f32(<3 x float>, metadata, metadata) @@ -8171,6 +9543,12 @@ declare <4 x double> @llvm.experimental.constrained.powi.v4f64(<4 x double>, i32 declare <4 x double> @llvm.experimental.constrained.sin.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.cos.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.tan.v4f64(<4 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.asin.v4f64(<4 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.acos.v4f64(<4 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.atan.v4f64(<4 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.sinh.v4f64(<4 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.cosh.v4f64(<4 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.tanh.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.exp.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.exp2.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.log.v4f64(<4 x double>, metadata, metadata) diff --git a/llvm/test/Feature/fp-intrinsics.ll b/llvm/test/Feature/fp-intrinsics.ll index 78275a16d3e8f7..80f8b15abfaabe 100644 --- a/llvm/test/Feature/fp-intrinsics.ll +++ b/llvm/test/Feature/fp-intrinsics.ll @@ -162,6 +162,72 @@ entry: ret double %result } +; Verify that acos(42.0) isn't simplified when the rounding mode is unknown. +; CHECK-LABEL: facos +; CHECK: call double @llvm.experimental.constrained.acos +define double @facos() #0 { +entry: + %result = call double @llvm.experimental.constrained.acos.f64(double 42.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +; Verify that asin(42.0) isn't simplified when the rounding mode is unknown. +; CHECK-LABEL: fasin +; CHECK: call double @llvm.experimental.constrained.asin +define double @fasin() #0 { +entry: + %result = call double @llvm.experimental.constrained.asin.f64(double 42.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +; Verify that atan(42.0) isn't simplified when the rounding mode is unknown. +; CHECK-LABEL: fatan +; CHECK: call double @llvm.experimental.constrained.atan +define double @fatan() #0 { +entry: + %result = call double @llvm.experimental.constrained.atan.f64(double 42.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +; Verify that cosh(42.0) isn't simplified when the rounding mode is unknown. +; CHECK-LABEL: fcosh +; CHECK: call double @llvm.experimental.constrained.cosh +define double @fcosh() #0 { +entry: + %result = call double @llvm.experimental.constrained.cosh.f64(double 42.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +; Verify that sinh(42.0) isn't simplified when the rounding mode is unknown. +; CHECK-LABEL: fsinh +; CHECK: call double @llvm.experimental.constrained.sinh +define double @fsinh() #0 { +entry: + %result = call double @llvm.experimental.constrained.sinh.f64(double 42.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +; Verify that tanh(42.0) isn't simplified when the rounding mode is unknown. +; CHECK-LABEL: ftanh +; CHECK: call double @llvm.experimental.constrained.tanh +define double @ftanh() #0 { +entry: + %result = call double @llvm.experimental.constrained.tanh.f64(double 42.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + ; Verify that exp(42.0) isn't simplified when the rounding mode is unknown. ; CHECK-LABEL: f10 ; CHECK: call double @llvm.experimental.constrained.exp