From 0b58f34c98e4715c3c920820d79e53e8d99c1b59 Mon Sep 17 00:00:00 2001 From: Farzon Lotfi <1802579+farzonl@users.noreply.github.com> Date: Thu, 11 Jul 2024 15:58:43 -0400 Subject: [PATCH] [X86][CodeGen] Add base trig intrinsic lowerings (#96222) This change is an implementation of https://github.com/llvm/llvm-project/issues/87367's investigation on supporting IEEE math operations as intrinsics. Which was discussed in this RFC: https://discourse.llvm.org/t/rfc-all-the-math-intrinsics/78294 This change adds constraint intrinsics and some lowering cases for `acos`, `asin`, `atan`, `cosh`, `sinh`, and `tanh`. The only x86 specific change was for f80. https://github.com/llvm/llvm-project/issues/70079 https://github.com/llvm/llvm-project/issues/70080 https://github.com/llvm/llvm-project/issues/70081 https://github.com/llvm/llvm-project/issues/70083 https://github.com/llvm/llvm-project/issues/70084 https://github.com/llvm/llvm-project/issues/95966 The x86 lowering is going to be done in three pr changes with this being the first. A second PR will be put up for Loop Vectorizing and then SLPVectorizer. The constraint intrinsics is also going to be in multiple parts, but just 2. This part covers just the llvm specific changes, part2 will cover clang specifc changes and legalization for backends than have special legalization requirements like aarch64 and wasm. --- llvm/docs/LangRef.rst | 213 +++ llvm/include/llvm/CodeGen/BasicTTIImpl.h | 18 + llvm/include/llvm/CodeGen/ISDOpcodes.h | 12 + llvm/include/llvm/IR/ConstrainedOps.def | 6 + llvm/include/llvm/IR/Intrinsics.td | 24 + llvm/include/llvm/IR/RuntimeLibcalls.def | 30 + .../Target/GlobalISel/SelectionDAGCompat.td | 6 + .../include/llvm/Target/TargetSelectionDAG.td | 38 +- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 24 + llvm/lib/CodeGen/GlobalISel/Utils.cpp | 12 + llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 42 + .../SelectionDAG/LegalizeFloatTypes.cpp | 126 ++ llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 12 + .../SelectionDAG/LegalizeVectorOps.cpp | 6 + .../SelectionDAG/LegalizeVectorTypes.cpp | 18 + .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 6 + .../SelectionDAG/SelectionDAGBuilder.cpp | 48 + .../SelectionDAG/SelectionDAGDumper.cpp | 12 + llvm/lib/CodeGen/TargetLoweringBase.cpp | 24 +- llvm/lib/Target/X86/X86ISelLowering.cpp | 6 + llvm/test/Assembler/fp-intrinsics-attr.ll | 48 + llvm/test/CodeGen/X86/fp-intrinsics.ll | 311 ++++ .../test/CodeGen/X86/fp128-libcalls-strict.ll | 240 +++ llvm/test/CodeGen/X86/fp80-strict-libcalls.ll | 150 ++ llvm/test/CodeGen/X86/llvm.acos.ll | 70 + llvm/test/CodeGen/X86/llvm.asin.ll | 70 + llvm/test/CodeGen/X86/llvm.atan.ll | 70 + llvm/test/CodeGen/X86/llvm.cosh.ll | 70 + llvm/test/CodeGen/X86/llvm.sinh.ll | 70 + llvm/test/CodeGen/X86/llvm.tanh.ll | 70 + .../X86/vector-constrained-fp-intrinsics.ll | 1378 +++++++++++++++++ llvm/test/Feature/fp-intrinsics.ll | 66 + 32 files changed, 3288 insertions(+), 8 deletions(-) create mode 100644 llvm/test/CodeGen/X86/llvm.acos.ll create mode 100644 llvm/test/CodeGen/X86/llvm.asin.ll create mode 100644 llvm/test/CodeGen/X86/llvm.atan.ll create mode 100644 llvm/test/CodeGen/X86/llvm.cosh.ll create mode 100644 llvm/test/CodeGen/X86/llvm.sinh.ll create mode 100644 llvm/test/CodeGen/X86/llvm.tanh.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index ae39217dc8ff8e..a04b5769f095fb 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -26535,6 +26535,219 @@ This function returns the tangent of the specified argument, returning the same values as the libm ``tan`` functions would, and handles error conditions in the same way. +'``llvm.experimental.constrained.asin``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.asin( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.asin``' intrinsic returns the arcsine of the +first operand. + +Arguments: +"""""""""" + +The first argument and the return type are floating-point numbers of the same +type. + +The second and third arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +This function returns the arcsine of the specified operand, returning the +same values as the libm ``asin`` functions would, and handles error +conditions in the same way. + + +'``llvm.experimental.constrained.acos``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.acos( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.acos``' intrinsic returns the arccosine of the +first operand. + +Arguments: +"""""""""" + +The first argument and the return type are floating-point numbers of the same +type. + +The second and third arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +This function returns the arccosine of the specified operand, returning the +same values as the libm ``acos`` functions would, and handles error +conditions in the same way. + + +'``llvm.experimental.constrained.atan``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.atan( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.atan``' intrinsic returns the arctangent of the +first operand. + +Arguments: +"""""""""" + +The first argument and the return type are floating-point numbers of the same +type. + +The second and third arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +This function returns the arctangent of the specified operand, returning the +same values as the libm ``atan`` functions would, and handles error +conditions in the same way. + +'``llvm.experimental.constrained.sinh``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.sinh( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.sinh``' intrinsic returns the hyperbolic sine of the +first operand. + +Arguments: +"""""""""" + +The first argument and the return type are floating-point numbers of the same +type. + +The second and third arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +This function returns the hyperbolic sine of the specified operand, returning the +same values as the libm ``sinh`` functions would, and handles error +conditions in the same way. + + +'``llvm.experimental.constrained.cosh``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.cosh( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.cosh``' intrinsic returns the hyperbolic cosine of the +first operand. + +Arguments: +"""""""""" + +The first argument and the return type are floating-point numbers of the same +type. + +The second and third arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +This function returns the hyperbolic cosine of the specified operand, returning the +same values as the libm ``cosh`` functions would, and handles error +conditions in the same way. + + +'``llvm.experimental.constrained.tanh``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.tanh( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.tanh``' intrinsic returns the hyperbolic tangent of the +first operand. + +Arguments: +"""""""""" + +The first argument and the return type are floating-point numbers of the same +type. + +The second and third arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +This function returns the hyperbolic tangent of the specified operand, returning the +same values as the libm ``tanh`` functions would, and handles error +conditions in the same way. '``llvm.experimental.constrained.exp``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 4f1dc9f991c065..5b9cc5dfeeadb8 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1979,6 +1979,24 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::tan: ISD = ISD::FTAN; break; + case Intrinsic::asin: + ISD = ISD::FASIN; + break; + case Intrinsic::acos: + ISD = ISD::FACOS; + break; + case Intrinsic::atan: + ISD = ISD::FATAN; + break; + case Intrinsic::sinh: + ISD = ISD::FSINH; + break; + case Intrinsic::cosh: + ISD = ISD::FCOSH; + break; + case Intrinsic::tanh: + ISD = ISD::FTANH; + break; case Intrinsic::exp: ISD = ISD::FEXP; break; diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index 88e3339e2453f6..e6b10209b4767b 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -422,6 +422,12 @@ enum NodeType { STRICT_FSIN, STRICT_FCOS, STRICT_FTAN, + STRICT_FASIN, + STRICT_FACOS, + STRICT_FATAN, + STRICT_FSINH, + STRICT_FCOSH, + STRICT_FTANH, STRICT_FEXP, STRICT_FEXP2, STRICT_FLOG, @@ -948,6 +954,12 @@ enum NodeType { FSIN, FCOS, FTAN, + FASIN, + FACOS, + FATAN, + FSINH, + FCOSH, + FTANH, FPOW, FPOWI, /// FLDEXP - ldexp, inspired by libm (op0 * 2**op1). diff --git a/llvm/include/llvm/IR/ConstrainedOps.def b/llvm/include/llvm/IR/ConstrainedOps.def index a7b37c5cb204da..56304c377b8393 100644 --- a/llvm/include/llvm/IR/ConstrainedOps.def +++ b/llvm/include/llvm/IR/ConstrainedOps.def @@ -69,8 +69,12 @@ CMP_INSTRUCTION(FCmp, 2, 0, experimental_constrained_fcmps, FSETCCS // Theses are definitions for intrinsic functions, that are converted into // constrained intrinsics. // +DAG_FUNCTION(acos, 1, 1, experimental_constrained_acos, FACOS) +DAG_FUNCTION(asin, 1, 1, experimental_constrained_asin, FASIN) +DAG_FUNCTION(atan, 1, 1, experimental_constrained_atan, FATAN) DAG_FUNCTION(ceil, 1, 0, experimental_constrained_ceil, FCEIL) DAG_FUNCTION(cos, 1, 1, experimental_constrained_cos, FCOS) +DAG_FUNCTION(cosh, 1, 1, experimental_constrained_cosh, FCOSH) DAG_FUNCTION(exp, 1, 1, experimental_constrained_exp, FEXP) DAG_FUNCTION(exp2, 1, 1, experimental_constrained_exp2, FEXP2) DAG_FUNCTION(floor, 1, 0, experimental_constrained_floor, FFLOOR) @@ -94,8 +98,10 @@ DAG_FUNCTION(rint, 1, 1, experimental_constrained_rint, FRINT) DAG_FUNCTION(round, 1, 0, experimental_constrained_round, FROUND) DAG_FUNCTION(roundeven, 1, 0, experimental_constrained_roundeven, FROUNDEVEN) DAG_FUNCTION(sin, 1, 1, experimental_constrained_sin, FSIN) +DAG_FUNCTION(sinh, 1, 1, experimental_constrained_sinh, FSINH) DAG_FUNCTION(sqrt, 1, 1, experimental_constrained_sqrt, FSQRT) DAG_FUNCTION(tan, 1, 1, experimental_constrained_tan, FTAN) +DAG_FUNCTION(tanh, 1, 1, experimental_constrained_tanh, FTANH) DAG_FUNCTION(trunc, 1, 0, experimental_constrained_trunc, FTRUNC) // This is definition for fmuladd intrinsic function, that is converted into diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 65a9b68b5229df..01e379dfcebcad 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1211,6 +1211,18 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn, IntrStrictFP] in llvm_anyint_ty, llvm_metadata_ty, llvm_metadata_ty ]>; + def int_experimental_constrained_asin : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_acos : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_atan : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; def int_experimental_constrained_sin : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, llvm_metadata_ty, @@ -1223,6 +1235,18 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn, IntrStrictFP] in [ LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; + def int_experimental_constrained_sinh : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_cosh : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_tanh : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; def int_experimental_constrained_pow : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def index d8eab80656c06a..89aaf6d1ad83f8 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.def +++ b/llvm/include/llvm/IR/RuntimeLibcalls.def @@ -202,6 +202,36 @@ HANDLE_LIBCALL(TAN_F64, "tan") HANDLE_LIBCALL(TAN_F80, "tanl") HANDLE_LIBCALL(TAN_F128,"tanl") HANDLE_LIBCALL(TAN_PPCF128, "tanl") +HANDLE_LIBCALL(SINH_F32, "sinhf") +HANDLE_LIBCALL(SINH_F64, "sinh") +HANDLE_LIBCALL(SINH_F80, "sinhl") +HANDLE_LIBCALL(SINH_F128, "sinhl") +HANDLE_LIBCALL(SINH_PPCF128, "sinhl") +HANDLE_LIBCALL(COSH_F32, "coshf") +HANDLE_LIBCALL(COSH_F64, "cosh") +HANDLE_LIBCALL(COSH_F80, "coshl") +HANDLE_LIBCALL(COSH_F128, "coshl") +HANDLE_LIBCALL(COSH_PPCF128, "coshl") +HANDLE_LIBCALL(TANH_F32, "tanhf") +HANDLE_LIBCALL(TANH_F64, "tanh") +HANDLE_LIBCALL(TANH_F80, "tanhl") +HANDLE_LIBCALL(TANH_F128,"tanhl") +HANDLE_LIBCALL(TANH_PPCF128, "tanhl") +HANDLE_LIBCALL(ASIN_F32, "asinf") +HANDLE_LIBCALL(ASIN_F64, "asin") +HANDLE_LIBCALL(ASIN_F80, "asinl") +HANDLE_LIBCALL(ASIN_F128, "asinl") +HANDLE_LIBCALL(ASIN_PPCF128, "asinl") +HANDLE_LIBCALL(ACOS_F32, "acosf") +HANDLE_LIBCALL(ACOS_F64, "acos") +HANDLE_LIBCALL(ACOS_F80, "acosl") +HANDLE_LIBCALL(ACOS_F128, "acosl") +HANDLE_LIBCALL(ACOS_PPCF128, "acosl") +HANDLE_LIBCALL(ATAN_F32, "atanf") +HANDLE_LIBCALL(ATAN_F64, "atan") +HANDLE_LIBCALL(ATAN_F80, "atanl") +HANDLE_LIBCALL(ATAN_F128,"atanl") +HANDLE_LIBCALL(ATAN_PPCF128, "atanl") HANDLE_LIBCALL(SINCOS_F32, nullptr) HANDLE_LIBCALL(SINCOS_F64, nullptr) HANDLE_LIBCALL(SINCOS_F80, nullptr) diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index 560d3b434d07d5..fbe551e1be9115 100644 --- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -149,6 +149,12 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 8cbf98cd58ca98..133c9b113e51b2 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -515,6 +515,12 @@ def fsqrt : SDNode<"ISD::FSQRT" , SDTFPUnaryOp>; def fsin : SDNode<"ISD::FSIN" , SDTFPUnaryOp>; def fcos : SDNode<"ISD::FCOS" , SDTFPUnaryOp>; def ftan : SDNode<"ISD::FTAN" , SDTFPUnaryOp>; +def fasin : SDNode<"ISD::FASIN" , SDTFPUnaryOp>; +def facos : SDNode<"ISD::FACOS" , SDTFPUnaryOp>; +def fatan : SDNode<"ISD::FATAN" , SDTFPUnaryOp>; +def fsinh : SDNode<"ISD::FSINH" , SDTFPUnaryOp>; +def fcosh : SDNode<"ISD::FCOSH" , SDTFPUnaryOp>; +def ftanh : SDNode<"ISD::FTANH" , SDTFPUnaryOp>; def fexp2 : SDNode<"ISD::FEXP2" , SDTFPUnaryOp>; def fexp10 : SDNode<"ISD::FEXP10" , SDTFPUnaryOp>; def fpow : SDNode<"ISD::FPOW" , SDTFPBinOp>; @@ -570,11 +576,23 @@ def strict_fcos : SDNode<"ISD::STRICT_FCOS", SDTFPUnaryOp, [SDNPHasChain]>; def strict_ftan : SDNode<"ISD::STRICT_FTAN", SDTFPUnaryOp, [SDNPHasChain]>; +def strict_fasin : SDNode<"ISD::STRICT_FASIN", + SDTFPUnaryOp, [SDNPHasChain]>; +def strict_facos : SDNode<"ISD::STRICT_FACOS", + SDTFPUnaryOp, [SDNPHasChain]>; +def strict_fatan : SDNode<"ISD::STRICT_FATAN", + SDTFPUnaryOp, [SDNPHasChain]>; +def strict_fsinh : SDNode<"ISD::STRICT_FSINH", + SDTFPUnaryOp, [SDNPHasChain]>; +def strict_fcosh : SDNode<"ISD::STRICT_FCOSH", + SDTFPUnaryOp, [SDNPHasChain]>; +def strict_ftanh : SDNode<"ISD::STRICT_FTANH", + SDTFPUnaryOp, [SDNPHasChain]>; def strict_fexp2 : SDNode<"ISD::STRICT_FEXP2", SDTFPUnaryOp, [SDNPHasChain]>; def strict_fpow : SDNode<"ISD::STRICT_FPOW", SDTFPBinOp, [SDNPHasChain]>; -def strict_fldexp : SDNode<"ISD::STRICT_FLDEXP", +def strict_fldexp : SDNode<"ISD::STRICT_FLDEXP", SDTFPExpOp, [SDNPHasChain]>; def strict_flog2 : SDNode<"ISD::STRICT_FLOG2", SDTFPUnaryOp, [SDNPHasChain]>; @@ -1528,6 +1546,24 @@ def any_fcos : PatFrags<(ops node:$src), def any_ftan : PatFrags<(ops node:$src), [(strict_ftan node:$src), (ftan node:$src)]>; +def any_fasin : PatFrags<(ops node:$src), + [(strict_fasin node:$src), + (fasin node:$src)]>; +def any_facos : PatFrags<(ops node:$src), + [(strict_facos node:$src), + (facos node:$src)]>; +def any_fatan : PatFrags<(ops node:$src), + [(strict_fatan node:$src), + (fatan node:$src)]>; +def any_fsinh : PatFrags<(ops node:$src), + [(strict_fsinh node:$src), + (fsinh node:$src)]>; +def any_fcosh : PatFrags<(ops node:$src), + [(strict_fcosh node:$src), + (fcosh node:$src)]>; +def any_ftanh : PatFrags<(ops node:$src), + [(strict_ftanh node:$src), + (ftanh node:$src)]>; def any_fexp2 : PatFrags<(ops node:$src), [(strict_fexp2 node:$src), (fexp2 node:$src)]>; diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 3f1094e0ac703d..f717849317ba72 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -451,6 +451,18 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { RTLIBCASE(COS_F); case TargetOpcode::G_FTAN: RTLIBCASE(TAN_F); + case TargetOpcode::G_FASIN: + RTLIBCASE(ASIN_F); + case TargetOpcode::G_FACOS: + RTLIBCASE(ACOS_F); + case TargetOpcode::G_FATAN: + RTLIBCASE(ATAN_F); + case TargetOpcode::G_FSINH: + RTLIBCASE(SINH_F); + case TargetOpcode::G_FCOSH: + RTLIBCASE(COSH_F); + case TargetOpcode::G_FTANH: + RTLIBCASE(TANH_F); case TargetOpcode::G_FLOG10: RTLIBCASE(LOG10_F); case TargetOpcode::G_FLOG: @@ -1040,6 +1052,12 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { case TargetOpcode::G_FCOS: case TargetOpcode::G_FSIN: case TargetOpcode::G_FTAN: + case TargetOpcode::G_FACOS: + case TargetOpcode::G_FASIN: + case TargetOpcode::G_FATAN: + case TargetOpcode::G_FCOSH: + case TargetOpcode::G_FSINH: + case TargetOpcode::G_FTANH: case TargetOpcode::G_FLOG10: case TargetOpcode::G_FLOG: case TargetOpcode::G_FLOG2: @@ -2904,6 +2922,12 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_FCOS: case TargetOpcode::G_FSIN: case TargetOpcode::G_FTAN: + case TargetOpcode::G_FACOS: + case TargetOpcode::G_FASIN: + case TargetOpcode::G_FATAN: + case TargetOpcode::G_FCOSH: + case TargetOpcode::G_FSINH: + case TargetOpcode::G_FTANH: case TargetOpcode::G_FLOG10: case TargetOpcode::G_FLOG: case TargetOpcode::G_FLOG2: diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 328a1465804523..ee289674307379 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -834,6 +834,12 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, case TargetOpcode::G_FSIN: case TargetOpcode::G_FCOS: case TargetOpcode::G_FTAN: + case TargetOpcode::G_FACOS: + case TargetOpcode::G_FASIN: + case TargetOpcode::G_FATAN: + case TargetOpcode::G_FCOSH: + case TargetOpcode::G_FSINH: + case TargetOpcode::G_FTANH: case TargetOpcode::G_FMA: case TargetOpcode::G_FMAD: if (SNaN) @@ -1715,6 +1721,12 @@ bool llvm::isPreISelGenericFloatingPointOpcode(unsigned Opc) { case TargetOpcode::G_FRINT: case TargetOpcode::G_FSIN: case TargetOpcode::G_FTAN: + case TargetOpcode::G_FACOS: + case TargetOpcode::G_FASIN: + case TargetOpcode::G_FATAN: + case TargetOpcode::G_FCOSH: + case TargetOpcode::G_FSINH: + case TargetOpcode::G_FTANH: case TargetOpcode::G_FSQRT: case TargetOpcode::G_FSUB: case TargetOpcode::G_INTRINSIC_ROUND: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index d6a0dd9ae9b208..1be93276b69613 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4537,6 +4537,36 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { ExpandFPLibCall(Node, RTLIB::TAN_F32, RTLIB::TAN_F64, RTLIB::TAN_F80, RTLIB::TAN_F128, RTLIB::TAN_PPCF128, Results); break; + case ISD::FASIN: + case ISD::STRICT_FASIN: + ExpandFPLibCall(Node, RTLIB::ASIN_F32, RTLIB::ASIN_F64, RTLIB::ASIN_F80, + RTLIB::ASIN_F128, RTLIB::ASIN_PPCF128, Results); + break; + case ISD::FACOS: + case ISD::STRICT_FACOS: + ExpandFPLibCall(Node, RTLIB::ACOS_F32, RTLIB::ACOS_F64, RTLIB::ACOS_F80, + RTLIB::ACOS_F128, RTLIB::ACOS_PPCF128, Results); + break; + case ISD::FATAN: + case ISD::STRICT_FATAN: + ExpandFPLibCall(Node, RTLIB::ATAN_F32, RTLIB::ATAN_F64, RTLIB::ATAN_F80, + RTLIB::ATAN_F128, RTLIB::ATAN_PPCF128, Results); + break; + case ISD::FSINH: + case ISD::STRICT_FSINH: + ExpandFPLibCall(Node, RTLIB::SINH_F32, RTLIB::SINH_F64, RTLIB::SINH_F80, + RTLIB::SINH_F128, RTLIB::SINH_PPCF128, Results); + break; + case ISD::FCOSH: + case ISD::STRICT_FCOSH: + ExpandFPLibCall(Node, RTLIB::COSH_F32, RTLIB::COSH_F64, RTLIB::COSH_F80, + RTLIB::COSH_F128, RTLIB::COSH_PPCF128, Results); + break; + case ISD::FTANH: + case ISD::STRICT_FTANH: + ExpandFPLibCall(Node, RTLIB::TANH_F32, RTLIB::TANH_F64, RTLIB::TANH_F80, + RTLIB::TANH_F128, RTLIB::TANH_PPCF128, Results); + break; case ISD::FSINCOS: // Expand into sincos libcall. ExpandSinCosLibCall(Node, Results); @@ -5510,6 +5540,12 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::FSIN: case ISD::FCOS: case ISD::FTAN: + case ISD::FASIN: + case ISD::FACOS: + case ISD::FATAN: + case ISD::FSINH: + case ISD::FCOSH: + case ISD::FTANH: case ISD::FLOG: case ISD::FLOG2: case ISD::FLOG10: @@ -5535,6 +5571,12 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::STRICT_FSIN: case ISD::STRICT_FCOS: case ISD::STRICT_FTAN: + case ISD::STRICT_FASIN: + case ISD::STRICT_FACOS: + case ISD::STRICT_FATAN: + case ISD::STRICT_FSINH: + case ISD::STRICT_FCOSH: + case ISD::STRICT_FTANH: case ISD::STRICT_FLOG: case ISD::STRICT_FLOG2: case ISD::STRICT_FLOG10: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index aa116c9de5d8c4..41fcc9afe4e905 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -76,12 +76,20 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break; case ISD::STRICT_FADD: case ISD::FADD: R = SoftenFloatRes_FADD(N); break; + case ISD::STRICT_FACOS: + case ISD::FACOS: R = SoftenFloatRes_FACOS(N); break; + case ISD::STRICT_FASIN: + case ISD::FASIN: R = SoftenFloatRes_FASIN(N); break; + case ISD::STRICT_FATAN: + case ISD::FATAN: R = SoftenFloatRes_FATAN(N); break; case ISD::FCBRT: R = SoftenFloatRes_FCBRT(N); break; case ISD::STRICT_FCEIL: case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break; case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break; case ISD::STRICT_FCOS: case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break; + case ISD::STRICT_FCOSH: + case ISD::FCOSH: R = SoftenFloatRes_FCOSH(N); break; case ISD::STRICT_FDIV: case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break; case ISD::STRICT_FEXP: @@ -127,12 +135,16 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FROUNDEVEN: R = SoftenFloatRes_FROUNDEVEN(N); break; case ISD::STRICT_FSIN: case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break; + case ISD::STRICT_FSINH: + case ISD::FSINH: R = SoftenFloatRes_FSINH(N); break; case ISD::STRICT_FSQRT: case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; case ISD::STRICT_FSUB: case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; case ISD::STRICT_FTAN: case ISD::FTAN: R = SoftenFloatRes_FTAN(N); break; + case ISD::STRICT_FTANH: + case ISD::FTANH: R = SoftenFloatRes_FTANH(N); break; case ISD::STRICT_FTRUNC: case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break; case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break; @@ -320,6 +332,24 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { RTLIB::ADD_PPCF128)); } +SDValue DAGTypeLegalizer::SoftenFloatRes_FACOS(SDNode *N) { + return SoftenFloatRes_Unary( + N, GetFPLibCall(N->getValueType(0), RTLIB::ACOS_F32, RTLIB::ACOS_F64, + RTLIB::ACOS_F80, RTLIB::ACOS_F128, RTLIB::ACOS_PPCF128)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FASIN(SDNode *N) { + return SoftenFloatRes_Unary( + N, GetFPLibCall(N->getValueType(0), RTLIB::ASIN_F32, RTLIB::ASIN_F64, + RTLIB::ASIN_F80, RTLIB::ASIN_F128, RTLIB::ASIN_PPCF128)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FATAN(SDNode *N) { + return SoftenFloatRes_Unary( + N, GetFPLibCall(N->getValueType(0), RTLIB::ATAN_F32, RTLIB::ATAN_F64, + RTLIB::ATAN_F80, RTLIB::ATAN_F128, RTLIB::ATAN_PPCF128)); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FCBRT(SDNode *N) { return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), RTLIB::CBRT_F32, @@ -395,6 +425,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { RTLIB::COS_PPCF128)); } +SDValue DAGTypeLegalizer::SoftenFloatRes_FCOSH(SDNode *N) { + return SoftenFloatRes_Unary( + N, GetFPLibCall(N->getValueType(0), RTLIB::COSH_F32, RTLIB::COSH_F64, + RTLIB::COSH_F80, RTLIB::COSH_F128, RTLIB::COSH_PPCF128)); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), RTLIB::DIV_F32, @@ -758,6 +794,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { RTLIB::SIN_PPCF128)); } +SDValue DAGTypeLegalizer::SoftenFloatRes_FSINH(SDNode *N) { + return SoftenFloatRes_Unary( + N, GetFPLibCall(N->getValueType(0), RTLIB::SINH_F32, RTLIB::SINH_F64, + RTLIB::SINH_F80, RTLIB::SINH_F128, RTLIB::SINH_PPCF128)); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), RTLIB::SQRT_F32, @@ -782,6 +824,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTAN(SDNode *N) { RTLIB::TAN_F80, RTLIB::TAN_F128, RTLIB::TAN_PPCF128)); } +SDValue DAGTypeLegalizer::SoftenFloatRes_FTANH(SDNode *N) { + return SoftenFloatRes_Unary( + N, GetFPLibCall(N->getValueType(0), RTLIB::TANH_F32, RTLIB::TANH_F64, + RTLIB::TANH_F80, RTLIB::TANH_F128, RTLIB::TANH_PPCF128)); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), RTLIB::TRUNC_F32, @@ -1358,12 +1406,20 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::FMAXNUM: ExpandFloatRes_FMAXNUM(N, Lo, Hi); break; case ISD::STRICT_FADD: case ISD::FADD: ExpandFloatRes_FADD(N, Lo, Hi); break; + case ISD::STRICT_FACOS: + case ISD::FACOS: ExpandFloatRes_FACOS(N, Lo, Hi); break; + case ISD::STRICT_FASIN: + case ISD::FASIN: ExpandFloatRes_FASIN(N, Lo, Hi); break; + case ISD::STRICT_FATAN: + case ISD::FATAN: ExpandFloatRes_FATAN(N, Lo, Hi); break; case ISD::FCBRT: ExpandFloatRes_FCBRT(N, Lo, Hi); break; case ISD::STRICT_FCEIL: case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break; case ISD::FCOPYSIGN: ExpandFloatRes_FCOPYSIGN(N, Lo, Hi); break; case ISD::STRICT_FCOS: case ISD::FCOS: ExpandFloatRes_FCOS(N, Lo, Hi); break; + case ISD::STRICT_FCOSH: + case ISD::FCOSH: ExpandFloatRes_FCOSH(N, Lo, Hi); break; case ISD::STRICT_FDIV: case ISD::FDIV: ExpandFloatRes_FDIV(N, Lo, Hi); break; case ISD::STRICT_FEXP: @@ -1403,12 +1459,16 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::FROUNDEVEN: ExpandFloatRes_FROUNDEVEN(N, Lo, Hi); break; case ISD::STRICT_FSIN: case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break; + case ISD::STRICT_FSINH: + case ISD::FSINH: ExpandFloatRes_FSINH(N, Lo, Hi); break; case ISD::STRICT_FSQRT: case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break; case ISD::STRICT_FSUB: case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break; case ISD::STRICT_FTAN: case ISD::FTAN: ExpandFloatRes_FTAN(N, Lo, Hi); break; + case ISD::STRICT_FTANH: + case ISD::FTANH: ExpandFloatRes_FTANH(N, Lo, Hi); break; case ISD::STRICT_FTRUNC: case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break; case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break; @@ -1509,6 +1569,33 @@ void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo, RTLIB::ADD_PPCF128), Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FACOS(SDNode *N, SDValue &Lo, + SDValue &Hi) { + ExpandFloatRes_Unary(N, + GetFPLibCall(N->getValueType(0), RTLIB::ACOS_F32, + RTLIB::ACOS_F64, RTLIB::ACOS_F80, + RTLIB::ACOS_F128, RTLIB::ACOS_PPCF128), + Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FASIN(SDNode *N, SDValue &Lo, + SDValue &Hi) { + ExpandFloatRes_Unary(N, + GetFPLibCall(N->getValueType(0), RTLIB::ASIN_F32, + RTLIB::ASIN_F64, RTLIB::ASIN_F80, + RTLIB::ASIN_F128, RTLIB::ASIN_PPCF128), + Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FATAN(SDNode *N, SDValue &Lo, + SDValue &Hi) { + ExpandFloatRes_Unary(N, + GetFPLibCall(N->getValueType(0), RTLIB::ATAN_F32, + RTLIB::ATAN_F64, RTLIB::ATAN_F80, + RTLIB::ATAN_F128, RTLIB::ATAN_PPCF128), + Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FCBRT(SDNode *N, SDValue &Lo, SDValue &Hi) { ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), RTLIB::CBRT_F32, @@ -1543,6 +1630,15 @@ void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N, RTLIB::COS_PPCF128), Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FCOSH(SDNode *N, SDValue &Lo, + SDValue &Hi) { + ExpandFloatRes_Unary(N, + GetFPLibCall(N->getValueType(0), RTLIB::COSH_F32, + RTLIB::COSH_F64, RTLIB::COSH_F80, + RTLIB::COSH_F128, RTLIB::COSH_PPCF128), + Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), @@ -1761,6 +1857,15 @@ void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N, RTLIB::SIN_PPCF128), Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FSINH(SDNode *N, SDValue &Lo, + SDValue &Hi) { + ExpandFloatRes_Unary(N, + GetFPLibCall(N->getValueType(0), RTLIB::SINH_F32, + RTLIB::SINH_F64, RTLIB::SINH_F80, + RTLIB::SINH_F128, RTLIB::SINH_PPCF128), + Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N, SDValue &Lo, SDValue &Hi) { ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), @@ -1788,6 +1893,15 @@ void DAGTypeLegalizer::ExpandFloatRes_FTAN(SDNode *N, SDValue &Lo, Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FTANH(SDNode *N, SDValue &Lo, + SDValue &Hi) { + ExpandFloatRes_Unary(N, + GetFPLibCall(N->getValueType(0), RTLIB::TANH_F32, + RTLIB::TANH_F64, RTLIB::TANH_F80, + RTLIB::TANH_F128, RTLIB::TANH_PPCF128), + Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N, SDValue &Lo, SDValue &Hi) { ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), @@ -2481,9 +2595,13 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { // Unary FP Operations case ISD::FABS: + case ISD::FACOS: + case ISD::FASIN: + case ISD::FATAN: case ISD::FCBRT: case ISD::FCEIL: case ISD::FCOS: + case ISD::FCOSH: case ISD::FEXP: case ISD::FEXP2: case ISD::FEXP10: @@ -2497,9 +2615,11 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { case ISD::FROUND: case ISD::FROUNDEVEN: case ISD::FSIN: + case ISD::FSINH: case ISD::FSQRT: case ISD::FTRUNC: case ISD::FTAN: + case ISD::FTANH: case ISD::FCANONICALIZE: R = PromoteFloatRes_UnaryOp(N); break; // Binary FP Operations @@ -2916,9 +3036,13 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { // Unary FP Operations case ISD::FABS: + case ISD::FACOS: + case ISD::FASIN: + case ISD::FATAN: case ISD::FCBRT: case ISD::FCEIL: case ISD::FCOS: + case ISD::FCOSH: case ISD::FEXP: case ISD::FEXP2: case ISD::FEXP10: @@ -2933,9 +3057,11 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { case ISD::FROUND: case ISD::FROUNDEVEN: case ISD::FSIN: + case ISD::FSINH: case ISD::FSQRT: case ISD::FTRUNC: case ISD::FTAN: + case ISD::FTANH: case ISD::FCANONICALIZE: R = SoftPromoteHalfRes_UnaryOp(N); break; // Binary FP Operations diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 15075bea104d3f..7af47ed250d91b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -559,6 +559,9 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftenFloatRes_EXTRACT_ELEMENT(SDNode *N); SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_FABS(SDNode *N); + SDValue SoftenFloatRes_FACOS(SDNode *N); + SDValue SoftenFloatRes_FASIN(SDNode *N); + SDValue SoftenFloatRes_FATAN(SDNode *N); SDValue SoftenFloatRes_FMINNUM(SDNode *N); SDValue SoftenFloatRes_FMAXNUM(SDNode *N); SDValue SoftenFloatRes_FADD(SDNode *N); @@ -566,6 +569,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftenFloatRes_FCEIL(SDNode *N); SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N); SDValue SoftenFloatRes_FCOS(SDNode *N); + SDValue SoftenFloatRes_FCOSH(SDNode *N); SDValue SoftenFloatRes_FDIV(SDNode *N); SDValue SoftenFloatRes_FEXP(SDNode *N); SDValue SoftenFloatRes_FEXP2(SDNode *N); @@ -591,9 +595,11 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftenFloatRes_FROUND(SDNode *N); SDValue SoftenFloatRes_FROUNDEVEN(SDNode *N); SDValue SoftenFloatRes_FSIN(SDNode *N); + SDValue SoftenFloatRes_FSINH(SDNode *N); SDValue SoftenFloatRes_FSQRT(SDNode *N); SDValue SoftenFloatRes_FSUB(SDNode *N); SDValue SoftenFloatRes_FTAN(SDNode *N); + SDValue SoftenFloatRes_FTANH(SDNode *N); SDValue SoftenFloatRes_FTRUNC(SDNode *N); SDValue SoftenFloatRes_LOAD(SDNode *N); SDValue SoftenFloatRes_ATOMIC_LOAD(SDNode *N); @@ -645,6 +651,9 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue &Lo, SDValue &Hi); // clang-format off void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FACOS (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FASIN (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FATAN (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FMINNUM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FMAXNUM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -652,6 +661,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FCOPYSIGN (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FCOS (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FCOSH (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FDIV (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FEXP (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FEXP2 (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -674,9 +684,11 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void ExpandFloatRes_FROUND (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FROUNDEVEN(SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FSINH (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FTAN (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FTANH (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 14b147cc5b01be..307d1fc920d488 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -402,6 +402,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FSIN: case ISD::FCOS: case ISD::FTAN: + case ISD::FASIN: + case ISD::FACOS: + case ISD::FATAN: + case ISD::FSINH: + case ISD::FCOSH: + case ISD::FTANH: case ISD::FLDEXP: case ISD::FPOWI: case ISD::FPOW: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index bbf08e862da12f..dde7046e56e9c6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -85,8 +85,12 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: case ISD::FABS: + case ISD::FACOS: + case ISD::FASIN: + case ISD::FATAN: case ISD::FCEIL: case ISD::FCOS: + case ISD::FCOSH: case ISD::FEXP: case ISD::FEXP2: case ISD::FEXP10: @@ -107,8 +111,10 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FROUND: case ISD::FROUNDEVEN: case ISD::FSIN: + case ISD::FSINH: case ISD::FSQRT: case ISD::FTAN: + case ISD::FTANH: case ISD::FTRUNC: case ISD::SIGN_EXTEND: case ISD::SINT_TO_FP: @@ -1146,9 +1152,13 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::CTPOP: case ISD::VP_CTPOP: case ISD::FABS: case ISD::VP_FABS: + case ISD::FACOS: + case ISD::FASIN: + case ISD::FATAN: case ISD::FCEIL: case ISD::VP_FCEIL: case ISD::FCOS: + case ISD::FCOSH: case ISD::FEXP: case ISD::FEXP2: case ISD::FEXP10: @@ -1181,8 +1191,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FROUNDEVEN: case ISD::VP_FROUNDEVEN: case ISD::FSIN: + case ISD::FSINH: case ISD::FSQRT: case ISD::VP_SQRT: case ISD::FTAN: + case ISD::FTANH: case ISD::FTRUNC: case ISD::VP_FROUNDTOZERO: case ISD::SINT_TO_FP: @@ -4479,8 +4491,12 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { break; case ISD::FABS: + case ISD::FACOS: + case ISD::FASIN: + case ISD::FATAN: case ISD::FCEIL: case ISD::FCOS: + case ISD::FCOSH: case ISD::FEXP: case ISD::FEXP2: case ISD::FEXP10: @@ -4493,8 +4509,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FROUND: case ISD::FROUNDEVEN: case ISD::FSIN: + case ISD::FSINH: case ISD::FSQRT: case ISD::FTAN: + case ISD::FTANH: case ISD::FTRUNC: if (unrollExpandedOp()) break; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 79f90bae1d8d66..b335308844fe9d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5397,6 +5397,12 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const case ISD::FSIN: case ISD::FCOS: case ISD::FTAN: + case ISD::FASIN: + case ISD::FACOS: + case ISD::FATAN: + case ISD::FSINH: + case ISD::FCOSH: + case ISD::FTANH: case ISD::FMA: case ISD::FMAD: { if (SNaN) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 276d980c1dcca9..b0746014daf5ac 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6792,6 +6792,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::sin: case Intrinsic::cos: case Intrinsic::tan: + case Intrinsic::asin: + case Intrinsic::acos: + case Intrinsic::atan: + case Intrinsic::sinh: + case Intrinsic::cosh: + case Intrinsic::tanh: case Intrinsic::exp10: case Intrinsic::floor: case Intrinsic::ceil: @@ -6810,6 +6816,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::sin: Opcode = ISD::FSIN; break; case Intrinsic::cos: Opcode = ISD::FCOS; break; case Intrinsic::tan: Opcode = ISD::FTAN; break; + case Intrinsic::asin: Opcode = ISD::FASIN; break; + case Intrinsic::acos: Opcode = ISD::FACOS; break; + case Intrinsic::atan: Opcode = ISD::FATAN; break; + case Intrinsic::sinh: Opcode = ISD::FSINH; break; + case Intrinsic::cosh: Opcode = ISD::FCOSH; break; + case Intrinsic::tanh: Opcode = ISD::FTANH; break; case Intrinsic::exp10: Opcode = ISD::FEXP10; break; case Intrinsic::floor: Opcode = ISD::FFLOOR; break; case Intrinsic::ceil: Opcode = ISD::FCEIL; break; @@ -9261,6 +9273,42 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (visitUnaryFloatCall(I, ISD::FTAN)) return; break; + case LibFunc_asin: + case LibFunc_asinf: + case LibFunc_asinl: + if (visitUnaryFloatCall(I, ISD::FASIN)) + return; + break; + case LibFunc_acos: + case LibFunc_acosf: + case LibFunc_acosl: + if (visitUnaryFloatCall(I, ISD::FACOS)) + return; + break; + case LibFunc_atan: + case LibFunc_atanf: + case LibFunc_atanl: + if (visitUnaryFloatCall(I, ISD::FATAN)) + return; + break; + case LibFunc_sinh: + case LibFunc_sinhf: + case LibFunc_sinhl: + if (visitUnaryFloatCall(I, ISD::FSINH)) + return; + break; + case LibFunc_cosh: + case LibFunc_coshf: + case LibFunc_coshl: + if (visitUnaryFloatCall(I, ISD::FCOSH)) + return; + break; + case LibFunc_tanh: + case LibFunc_tanhf: + case LibFunc_tanhl: + if (visitUnaryFloatCall(I, ISD::FTANH)) + return; + break; case LibFunc_sqrt: case LibFunc_sqrtf: case LibFunc_sqrtl: diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index c1d2c095b103c2..cc8de3a217f826 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -214,6 +214,18 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FSINCOS: return "fsincos"; case ISD::FTAN: return "ftan"; case ISD::STRICT_FTAN: return "strict_ftan"; + case ISD::FASIN: return "fasin"; + case ISD::STRICT_FASIN: return "strict_fasin"; + case ISD::FACOS: return "facos"; + case ISD::STRICT_FACOS: return "strict_facos"; + case ISD::FATAN: return "fatan"; + case ISD::STRICT_FATAN: return "strict_fatan"; + case ISD::FSINH: return "fsinh"; + case ISD::STRICT_FSINH: return "strict_fsinh"; + case ISD::FCOSH: return "fcosh"; + case ISD::STRICT_FCOSH: return "strict_fcosh"; + case ISD::FTANH: return "ftanh"; + case ISD::STRICT_FTANH: return "strict_ftanh"; case ISD::FTRUNC: return "ftrunc"; case ISD::STRICT_FTRUNC: return "strict_ftrunc"; case ISD::FFLOOR: return "ffloor"; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 4a2db272defd11..8ea4dbdd3227ae 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -143,6 +143,12 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) { setLibcallName(RTLIB::COS_F128, "cosf128"); setLibcallName(RTLIB::TAN_F128, "tanf128"); setLibcallName(RTLIB::SINCOS_F128, "sincosf128"); + setLibcallName(RTLIB::ASIN_F128, "asinf128"); + setLibcallName(RTLIB::ACOS_F128, "acosf128"); + setLibcallName(RTLIB::ATAN_F128, "atanf128"); + setLibcallName(RTLIB::SINH_F128, "sinhf128"); + setLibcallName(RTLIB::COSH_F128, "coshf128"); + setLibcallName(RTLIB::TANH_F128, "tanhf128"); setLibcallName(RTLIB::POW_F128, "powf128"); setLibcallName(RTLIB::POW_FINITE_F128, "__powf128_finite"); setLibcallName(RTLIB::CEIL_F128, "ceilf128"); @@ -1102,7 +1108,8 @@ void TargetLoweringBase::initActions() { setOperationAction( {ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG, ISD::ANY_EXTEND_VECTOR_INREG, ISD::SIGN_EXTEND_VECTOR_INREG, ISD::ZERO_EXTEND_VECTOR_INREG, - ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT, ISD::FTAN}, + ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT, ISD::FTAN, ISD::FACOS, + ISD::FASIN, ISD::FATAN, ISD::FCOSH, ISD::FSINH, ISD::FTANH}, VT, Expand); // Constrained floating-point operations default to expand. @@ -1154,14 +1161,17 @@ void TargetLoweringBase::initActions() { Expand); // These library functions default to expand. - setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, ISD::FEXP, - ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR, ISD::FNEARBYINT, - ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, ISD::LROUND, - ISD::LLROUND, ISD::LRINT, ISD::LLRINT, ISD::FROUNDEVEN, - ISD::FTAN}, + setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, + ISD::FEXP, ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR, + ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, + ISD::LROUND, ISD::LLROUND, ISD::LRINT, ISD::LLRINT, + ISD::FROUNDEVEN, ISD::FTAN, ISD::FACOS, ISD::FASIN, + ISD::FATAN, ISD::FCOSH, ISD::FSINH, ISD::FTANH}, {MVT::f32, MVT::f64, MVT::f128}, Expand); - setOperationAction(ISD::FTAN, MVT::f16, Promote); + setOperationAction({ISD::FTAN, ISD::FACOS, ISD::FASIN, ISD::FATAN, ISD::FCOSH, + ISD::FSINH, ISD::FTANH}, + MVT::f16, Promote); // Default ISD::TRAP to expand (which turns it into abort). setOperationAction(ISD::TRAP, MVT::Other, Expand); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index bfe17398879e3e..bf655fc533db75 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -833,6 +833,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FCOS , MVT::f80, Expand); setOperationAction(ISD::FSINCOS, MVT::f80, Expand); setOperationAction(ISD::FTAN , MVT::f80, Expand); + setOperationAction(ISD::FASIN , MVT::f80, Expand); + setOperationAction(ISD::FACOS , MVT::f80, Expand); + setOperationAction(ISD::FATAN , MVT::f80, Expand); + setOperationAction(ISD::FSINH , MVT::f80, Expand); + setOperationAction(ISD::FCOSH , MVT::f80, Expand); + setOperationAction(ISD::FTANH , MVT::f80, Expand); // clang-format on setOperationAction(ISD::FFLOOR, MVT::f80, Expand); diff --git a/llvm/test/Assembler/fp-intrinsics-attr.ll b/llvm/test/Assembler/fp-intrinsics-attr.ll index 613630e1a2b4d2..da6507f051766c 100644 --- a/llvm/test/Assembler/fp-intrinsics-attr.ll +++ b/llvm/test/Assembler/fp-intrinsics-attr.ll @@ -90,6 +90,36 @@ define void @func(double %a, double %b, double %c, i32 %i) strictfp { metadata !"round.dynamic", metadata !"fpexcept.strict") + %acos = call double @llvm.experimental.constrained.acos.f64( + double %a, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + + %asin = call double @llvm.experimental.constrained.asin.f64( + double %a, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + + %atan = call double @llvm.experimental.constrained.atan.f64( + double %a, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + + %cosh = call double @llvm.experimental.constrained.cosh.f64( + double %a, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + + %sinh = call double @llvm.experimental.constrained.sinh.f64( + double %a, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + + %tanh = call double @llvm.experimental.constrained.tanh.f64( + double %a, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %pow = call double @llvm.experimental.constrained.pow.f64( double %a, double %b, metadata !"round.dynamic", @@ -252,6 +282,24 @@ declare double @llvm.experimental.constrained.cos.f64(double, metadata, metadata declare double @llvm.experimental.constrained.tan.f64(double, metadata, metadata) ; CHECK: @llvm.experimental.constrained.tan.f64({{.*}}) #[[ATTR1]] +declare double @llvm.experimental.constrained.asin.f64(double, metadata, metadata) +; CHECK: @llvm.experimental.constrained.asin.f64({{.*}}) #[[ATTR1]] + +declare double @llvm.experimental.constrained.acos.f64(double, metadata, metadata) +; CHECK: @llvm.experimental.constrained.acos.f64({{.*}}) #[[ATTR1]] + +declare double @llvm.experimental.constrained.atan.f64(double, metadata, metadata) +; CHECK: @llvm.experimental.constrained.atan.f64({{.*}}) #[[ATTR1]] + +declare double @llvm.experimental.constrained.sinh.f64(double, metadata, metadata) +; CHECK: @llvm.experimental.constrained.sinh.f64({{.*}}) #[[ATTR1]] + +declare double @llvm.experimental.constrained.cosh.f64(double, metadata, metadata) +; CHECK: @llvm.experimental.constrained.cosh.f64({{.*}}) #[[ATTR1]] + +declare double @llvm.experimental.constrained.tanh.f64(double, metadata, metadata) +; CHECK: @llvm.experimental.constrained.tanh.f64({{.*}}) #[[ATTR1]] + declare double @llvm.experimental.constrained.pow.f64(double, double, metadata, metadata) ; CHECK: @llvm.experimental.constrained.pow.f64({{.*}}) #[[ATTR1]] diff --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll index 8c48e6f9da80a7..bb87252e0b9b08 100644 --- a/llvm/test/CodeGen/X86/fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll @@ -2809,6 +2809,311 @@ entry: ret double %result } +; Verify that acos(42.0) isn't simplified when the rounding mode is unknown. +define double @facos() #0 { +; X87-LABEL: facos: +; X87: # %bb.0: # %entry +; X87-NEXT: subl $12, %esp +; X87-NEXT: .cfi_def_cfa_offset 16 +; X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; X87-NEXT: fstpl (%esp) +; X87-NEXT: wait +; X87-NEXT: calll acos +; X87-NEXT: addl $12, %esp +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl +; +; X86-SSE-LABEL: facos: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: subl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 16 +; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; X86-SSE-NEXT: movsd %xmm0, (%esp) +; X86-SSE-NEXT: calll acos +; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: facos: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 +; SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; SSE-NEXT: callq acos@PLT +; SSE-NEXT: popq %rax +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq +; +; AVX-LABEL: facos: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq acos@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %result = call double @llvm.experimental.constrained.acos.f64(double 42.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +; Verify that asin(42.0) isn't simplified when the rounding mode is unknown. +define double @fasin() #0 { +; X87-LABEL: fasin: +; X87: # %bb.0: # %entry +; X87-NEXT: subl $12, %esp +; X87-NEXT: .cfi_def_cfa_offset 16 +; X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; X87-NEXT: fstpl (%esp) +; X87-NEXT: wait +; X87-NEXT: calll asin +; X87-NEXT: addl $12, %esp +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl +; +; X86-SSE-LABEL: fasin: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: subl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 16 +; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; X86-SSE-NEXT: movsd %xmm0, (%esp) +; X86-SSE-NEXT: calll asin +; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: fasin: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 +; SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; SSE-NEXT: callq asin@PLT +; SSE-NEXT: popq %rax +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq +; +; AVX-LABEL: fasin: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq asin@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %result = call double @llvm.experimental.constrained.asin.f64(double 42.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +; Verify that atan(42.0) isn't simplified when the rounding mode is unknown. +define double @fatan() #0 { +; X87-LABEL: fatan: +; X87: # %bb.0: # %entry +; X87-NEXT: subl $12, %esp +; X87-NEXT: .cfi_def_cfa_offset 16 +; X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; X87-NEXT: fstpl (%esp) +; X87-NEXT: wait +; X87-NEXT: calll atan +; X87-NEXT: addl $12, %esp +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl +; +; X86-SSE-LABEL: fatan: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: subl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 16 +; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; X86-SSE-NEXT: movsd %xmm0, (%esp) +; X86-SSE-NEXT: calll atan +; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: fatan: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 +; SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; SSE-NEXT: callq atan@PLT +; SSE-NEXT: popq %rax +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq +; +; AVX-LABEL: fatan: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq atan@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %result = call double @llvm.experimental.constrained.atan.f64(double 42.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +; Verify that cosh(42.0) isn't simplified when the rounding mode is unknown. +define double @fcosh() #0 { +; X87-LABEL: fcosh: +; X87: # %bb.0: # %entry +; X87-NEXT: subl $12, %esp +; X87-NEXT: .cfi_def_cfa_offset 16 +; X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; X87-NEXT: fstpl (%esp) +; X87-NEXT: wait +; X87-NEXT: calll cosh +; X87-NEXT: addl $12, %esp +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl +; +; X86-SSE-LABEL: fcosh: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: subl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 16 +; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; X86-SSE-NEXT: movsd %xmm0, (%esp) +; X86-SSE-NEXT: calll cosh +; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: fcosh: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 +; SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; SSE-NEXT: callq cosh@PLT +; SSE-NEXT: popq %rax +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq +; +; AVX-LABEL: fcosh: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq cosh@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %result = call double @llvm.experimental.constrained.cosh.f64(double 42.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +; Verify that sinh(42.0) isn't simplified when the rounding mode is unknown. +define double @fsinh() #0 { +; X87-LABEL: fsinh: +; X87: # %bb.0: # %entry +; X87-NEXT: subl $12, %esp +; X87-NEXT: .cfi_def_cfa_offset 16 +; X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; X87-NEXT: fstpl (%esp) +; X87-NEXT: wait +; X87-NEXT: calll sinh +; X87-NEXT: addl $12, %esp +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl +; +; X86-SSE-LABEL: fsinh: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: subl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 16 +; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; X86-SSE-NEXT: movsd %xmm0, (%esp) +; X86-SSE-NEXT: calll sinh +; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: fsinh: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 +; SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; SSE-NEXT: callq sinh@PLT +; SSE-NEXT: popq %rax +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq +; +; AVX-LABEL: fsinh: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq sinh@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %result = call double @llvm.experimental.constrained.sinh.f64(double 42.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +; Verify that tanh(42.0) isn't simplified when the rounding mode is unknown. +define double @ftanh() #0 { +; X87-LABEL: ftanh: +; X87: # %bb.0: # %entry +; X87-NEXT: subl $12, %esp +; X87-NEXT: .cfi_def_cfa_offset 16 +; X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} +; X87-NEXT: fstpl (%esp) +; X87-NEXT: wait +; X87-NEXT: calll tanh +; X87-NEXT: addl $12, %esp +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl +; +; X86-SSE-LABEL: ftanh: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: subl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 16 +; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; X86-SSE-NEXT: movsd %xmm0, (%esp) +; X86-SSE-NEXT: calll tanh +; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: ftanh: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: .cfi_def_cfa_offset 16 +; SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; SSE-NEXT: callq tanh@PLT +; SSE-NEXT: popq %rax +; SSE-NEXT: .cfi_def_cfa_offset 8 +; SSE-NEXT: retq +; +; AVX-LABEL: ftanh: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq tanh@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %result = call double @llvm.experimental.constrained.tanh.f64(double 42.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} attributes #0 = { strictfp } @@ -2824,6 +3129,12 @@ declare double @llvm.experimental.constrained.powi.f64(double, i32, metadata, me declare double @llvm.experimental.constrained.sin.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.cos.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.tan.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.asin.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.acos.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.atan.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.sinh.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.cosh.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.tanh.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.exp.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.exp2.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.log.f64(double, metadata, metadata) diff --git a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll index bd51f553587db7..9e84dfa5c41ae6 100644 --- a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll +++ b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll @@ -347,6 +347,46 @@ entry: ret fp128 %ceil } +define fp128 @acos(fp128 %x) nounwind strictfp { +; ANDROID-LABEL: acos: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq acosl@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: acos: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq acosf128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq +; +; X86-LABEL: acos: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll acosl +; X86-NEXT: addl $28, %esp +; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: movaps %xmm0, (%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $24, %esp +; X86-NEXT: popl %esi +; X86-NEXT: retl $4 +entry: + %acos = call fp128 @llvm.experimental.constrained.acos.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret fp128 %acos +} + define fp128 @cos(fp128 %x) nounwind strictfp { ; ANDROID-LABEL: cos: ; ANDROID: # %bb.0: # %entry @@ -387,6 +427,46 @@ entry: ret fp128 %cos } +define fp128 @cosh(fp128 %x) nounwind strictfp { +; ANDROID-LABEL: cosh: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq coshl@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: cosh: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq coshf128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq +; +; X86-LABEL: cosh: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll coshl +; X86-NEXT: addl $28, %esp +; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: movaps %xmm0, (%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $24, %esp +; X86-NEXT: popl %esi +; X86-NEXT: retl $4 +entry: + %cosh = call fp128 @llvm.experimental.constrained.cosh.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret fp128 %cosh +} + define fp128 @exp(fp128 %x) nounwind strictfp { ; ANDROID-LABEL: exp: ; ANDROID: # %bb.0: # %entry @@ -967,6 +1047,46 @@ entry: ret fp128 %roundeven } +define fp128 @asin(fp128 %x) nounwind strictfp { +; ANDROID-LABEL: asin: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq asinl@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: asin: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq asinf128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq +; +; X86-LABEL: asin: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll asinl +; X86-NEXT: addl $28, %esp +; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: movaps %xmm0, (%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $24, %esp +; X86-NEXT: popl %esi +; X86-NEXT: retl $4 +entry: + %asin = call fp128 @llvm.experimental.constrained.asin.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret fp128 %asin +} + define fp128 @sin(fp128 %x) nounwind strictfp { ; ANDROID-LABEL: sin: ; ANDROID: # %bb.0: # %entry @@ -1007,6 +1127,46 @@ entry: ret fp128 %sin } +define fp128 @sinh(fp128 %x) nounwind strictfp { +; ANDROID-LABEL: sinh: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq sinhl@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: sinh: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq sinhf128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq +; +; X86-LABEL: sinh: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll sinhl +; X86-NEXT: addl $28, %esp +; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: movaps %xmm0, (%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $24, %esp +; X86-NEXT: popl %esi +; X86-NEXT: retl $4 +entry: + %sinh = call fp128 @llvm.experimental.constrained.sinh.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret fp128 %sinh +} + define fp128 @sqrt(fp128 %x) nounwind strictfp { ; ANDROID-LABEL: sqrt: ; ANDROID: # %bb.0: # %entry @@ -1047,6 +1207,46 @@ entry: ret fp128 %sqrt } +define fp128 @atan(fp128 %x) nounwind strictfp { +; ANDROID-LABEL: atan: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq atanl@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: atan: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq atanf128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq +; +; X86-LABEL: atan: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll atanl +; X86-NEXT: addl $28, %esp +; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: movaps %xmm0, (%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $24, %esp +; X86-NEXT: popl %esi +; X86-NEXT: retl $4 +entry: + %atan = call fp128 @llvm.experimental.constrained.atan.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret fp128 %atan +} + define fp128 @tan(fp128 %x) nounwind strictfp { ; ANDROID-LABEL: tan: ; ANDROID: # %bb.0: # %entry @@ -1087,6 +1287,46 @@ entry: ret fp128 %tan } +define fp128 @tanh(fp128 %x) nounwind strictfp { +; ANDROID-LABEL: tanh: +; ANDROID: # %bb.0: # %entry +; ANDROID-NEXT: pushq %rax +; ANDROID-NEXT: callq tanhl@PLT +; ANDROID-NEXT: popq %rax +; ANDROID-NEXT: retq +; +; GNU-LABEL: tanh: +; GNU: # %bb.0: # %entry +; GNU-NEXT: pushq %rax +; GNU-NEXT: callq tanhf128@PLT +; GNU-NEXT: popq %rax +; GNU-NEXT: retq +; +; X86-LABEL: tanh: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll tanhl +; X86-NEXT: addl $28, %esp +; X86-NEXT: movaps (%esp), %xmm0 +; X86-NEXT: movaps %xmm0, (%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $24, %esp +; X86-NEXT: popl %esi +; X86-NEXT: retl $4 +entry: + %tanh = call fp128 @llvm.experimental.constrained.tanh.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret fp128 %tanh +} + define fp128 @trunc(fp128 %x) nounwind strictfp { ; ANDROID-LABEL: trunc: ; ANDROID: # %bb.0: # %entry diff --git a/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll b/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll index 89729975cfd61b..c14e99f3acb34e 100644 --- a/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll +++ b/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll @@ -89,6 +89,31 @@ entry: ret x86_fp80 %ceil } +define x86_fp80 @acos(x86_fp80 %x) nounwind strictfp { +; X86-LABEL: acos: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fstpt (%esp) +; X86-NEXT: wait +; X86-NEXT: calll acosl +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +; +; X64-LABEL: acos: +; X64: # %bb.0: # %entry +; X64-NEXT: subq $24, %rsp +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fstpt (%rsp) +; X64-NEXT: wait +; X64-NEXT: callq acosl@PLT +; X64-NEXT: addq $24, %rsp +; X64-NEXT: retq +entry: + %acos = call x86_fp80 @llvm.experimental.constrained.acos.f80(x86_fp80 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret x86_fp80 %acos +} + define x86_fp80 @cos(x86_fp80 %x) nounwind strictfp { ; X86-LABEL: cos: ; X86: # %bb.0: # %entry @@ -114,6 +139,31 @@ entry: ret x86_fp80 %cos } +define x86_fp80 @cosh(x86_fp80 %x) nounwind strictfp { +; X86-LABEL: cosh: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fstpt (%esp) +; X86-NEXT: wait +; X86-NEXT: calll coshl +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +; +; X64-LABEL: cosh: +; X64: # %bb.0: # %entry +; X64-NEXT: subq $24, %rsp +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fstpt (%rsp) +; X64-NEXT: wait +; X64-NEXT: callq coshl@PLT +; X64-NEXT: addq $24, %rsp +; X64-NEXT: retq +entry: + %cosh = call x86_fp80 @llvm.experimental.constrained.cosh.f80(x86_fp80 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret x86_fp80 %cosh +} + define x86_fp80 @exp(x86_fp80 %x) nounwind strictfp { ; X86-LABEL: exp: ; X86: # %bb.0: # %entry @@ -479,6 +529,31 @@ entry: ret x86_fp80 %roundeven } +define x86_fp80 @asin(x86_fp80 %x) nounwind strictfp { +; X86-LABEL: asin: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fstpt (%esp) +; X86-NEXT: wait +; X86-NEXT: calll asinl +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +; +; X64-LABEL: asin: +; X64: # %bb.0: # %entry +; X64-NEXT: subq $24, %rsp +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fstpt (%rsp) +; X64-NEXT: wait +; X64-NEXT: callq asinl@PLT +; X64-NEXT: addq $24, %rsp +; X64-NEXT: retq +entry: + %asin = call x86_fp80 @llvm.experimental.constrained.asin.f80(x86_fp80 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret x86_fp80 %asin +} + define x86_fp80 @sin(x86_fp80 %x) nounwind strictfp { ; X86-LABEL: sin: ; X86: # %bb.0: # %entry @@ -504,6 +579,56 @@ entry: ret x86_fp80 %sin } +define x86_fp80 @sinh(x86_fp80 %x) nounwind strictfp { +; X86-LABEL: sinh: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fstpt (%esp) +; X86-NEXT: wait +; X86-NEXT: calll sinhl +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +; +; X64-LABEL: sinh: +; X64: # %bb.0: # %entry +; X64-NEXT: subq $24, %rsp +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fstpt (%rsp) +; X64-NEXT: wait +; X64-NEXT: callq sinhl@PLT +; X64-NEXT: addq $24, %rsp +; X64-NEXT: retq +entry: + %sinh = call x86_fp80 @llvm.experimental.constrained.sinh.f80(x86_fp80 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret x86_fp80 %sinh +} + +define x86_fp80 @atan(x86_fp80 %x) nounwind strictfp { +; X86-LABEL: atan: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fstpt (%esp) +; X86-NEXT: wait +; X86-NEXT: calll atanl +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +; +; X64-LABEL: atan: +; X64: # %bb.0: # %entry +; X64-NEXT: subq $24, %rsp +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fstpt (%rsp) +; X64-NEXT: wait +; X64-NEXT: callq atanl@PLT +; X64-NEXT: addq $24, %rsp +; X64-NEXT: retq +entry: + %atan = call x86_fp80 @llvm.experimental.constrained.atan.f80(x86_fp80 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret x86_fp80 %atan +} + define x86_fp80 @tan(x86_fp80 %x) nounwind strictfp { ; X86-LABEL: tan: ; X86: # %bb.0: # %entry @@ -529,6 +654,31 @@ entry: ret x86_fp80 %tan } +define x86_fp80 @tanh(x86_fp80 %x) nounwind strictfp { +; X86-LABEL: tanh: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fstpt (%esp) +; X86-NEXT: wait +; X86-NEXT: calll tanhl +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl +; +; X64-LABEL: tanh: +; X64: # %bb.0: # %entry +; X64-NEXT: subq $24, %rsp +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fstpt (%rsp) +; X64-NEXT: wait +; X64-NEXT: callq tanhl@PLT +; X64-NEXT: addq $24, %rsp +; X64-NEXT: retq +entry: + %tanh = call x86_fp80 @llvm.experimental.constrained.tanh.f80(x86_fp80 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret x86_fp80 %tanh +} + define x86_fp80 @trunc(x86_fp80 %x) nounwind strictfp { ; X86-LABEL: trunc: ; X86: # %bb.0: # %entry diff --git a/llvm/test/CodeGen/X86/llvm.acos.ll b/llvm/test/CodeGen/X86/llvm.acos.ll new file mode 100644 index 00000000000000..202fde8291930f --- /dev/null +++ b/llvm/test/CodeGen/X86/llvm.acos.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +define half @use_acosf16(half %a) nounwind { +; CHECK-LABEL: use_acosf16: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq __extendhfsf2@PLT +; CHECK-NEXT: callq acosf@PLT +; CHECK-NEXT: callq __truncsfhf2@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %x = call half @llvm.acos.f16(half %a) + ret half %x +} + +define float @use_acosf32(float %a) nounwind { +; CHECK-LABEL: use_acosf32: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp acosf@PLT # TAILCALL + %x = call float @llvm.acos.f32(float %a) + ret float %x +} + +define double @use_acosf64(double %a) nounwind { +; CHECK-LABEL: use_acosf64: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp acos@PLT # TAILCALL + %x = call double @llvm.acos.f64(double %a) + ret double %x +} + +define x86_fp80 @use_acosf80(x86_fp80 %a) nounwind { +; CHECK-LABEL: use_acosf80: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: fldt 32(%rsp) +; CHECK-NEXT: fstpt (%rsp) +; CHECK-NEXT: callq acosl@PLT +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: retq + %x = call x86_fp80 @llvm.acos.f80(x86_fp80 %a) + ret x86_fp80 %x +} + +define fp128 @use_acosfp128(fp128 %a) nounwind { +; CHECK-LABEL: use_acosfp128: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp acosf128@PLT # TAILCALL + %x = call fp128 @llvm.acos.f128(fp128 %a) + ret fp128 %x +} + +define ppc_fp128 @use_acosppc_fp128(ppc_fp128 %a) nounwind { +; CHECK-LABEL: use_acosppc_fp128: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq acosl@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %x = call ppc_fp128 @llvm.acos.ppcf128(ppc_fp128 %a) + ret ppc_fp128 %x +} + +declare half @llvm.acos.f16(half) +declare float @llvm.acos.f32(float) +declare double @llvm.acos.f64(double) +declare x86_fp80 @llvm.acos.f80(x86_fp80) +declare fp128 @llvm.acos.f128(fp128) +declare ppc_fp128 @llvm.acos.ppcf128(ppc_fp128) diff --git a/llvm/test/CodeGen/X86/llvm.asin.ll b/llvm/test/CodeGen/X86/llvm.asin.ll new file mode 100644 index 00000000000000..1e047d01c703c3 --- /dev/null +++ b/llvm/test/CodeGen/X86/llvm.asin.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +define half @use_asinf16(half %a) nounwind { +; CHECK-LABEL: use_asinf16: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq __extendhfsf2@PLT +; CHECK-NEXT: callq asinf@PLT +; CHECK-NEXT: callq __truncsfhf2@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %x = call half @llvm.asin.f16(half %a) + ret half %x +} + +define float @use_asinf32(float %a) nounwind { +; CHECK-LABEL: use_asinf32: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp asinf@PLT # TAILCALL + %x = call float @llvm.asin.f32(float %a) + ret float %x +} + +define double @use_asinf64(double %a) nounwind { +; CHECK-LABEL: use_asinf64: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp asin@PLT # TAILCALL + %x = call double @llvm.asin.f64(double %a) + ret double %x +} + +define x86_fp80 @use_asinf80(x86_fp80 %a) nounwind { +; CHECK-LABEL: use_asinf80: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: fldt 32(%rsp) +; CHECK-NEXT: fstpt (%rsp) +; CHECK-NEXT: callq asinl@PLT +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: retq + %x = call x86_fp80 @llvm.asin.f80(x86_fp80 %a) + ret x86_fp80 %x +} + +define fp128 @use_asinfp128(fp128 %a) nounwind { +; CHECK-LABEL: use_asinfp128: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp asinf128@PLT # TAILCALL + %x = call fp128 @llvm.asin.f128(fp128 %a) + ret fp128 %x +} + +define ppc_fp128 @use_asinppc_fp128(ppc_fp128 %a) nounwind { +; CHECK-LABEL: use_asinppc_fp128: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq asinl@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %x = call ppc_fp128 @llvm.asin.ppcf128(ppc_fp128 %a) + ret ppc_fp128 %x +} + +declare half @llvm.asin.f16(half) +declare float @llvm.asin.f32(float) +declare double @llvm.asin.f64(double) +declare x86_fp80 @llvm.asin.f80(x86_fp80) +declare fp128 @llvm.asin.f128(fp128) +declare ppc_fp128 @llvm.asin.ppcf128(ppc_fp128) diff --git a/llvm/test/CodeGen/X86/llvm.atan.ll b/llvm/test/CodeGen/X86/llvm.atan.ll new file mode 100644 index 00000000000000..d33ef7fd3ac5f0 --- /dev/null +++ b/llvm/test/CodeGen/X86/llvm.atan.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +define half @use_atanf16(half %a) nounwind { +; CHECK-LABEL: use_atanf16: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq __extendhfsf2@PLT +; CHECK-NEXT: callq atanf@PLT +; CHECK-NEXT: callq __truncsfhf2@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %x = call half @llvm.atan.f16(half %a) + ret half %x +} + +define float @use_atanf32(float %a) nounwind { +; CHECK-LABEL: use_atanf32: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp atanf@PLT # TAILCALL + %x = call float @llvm.atan.f32(float %a) + ret float %x +} + +define double @use_atanf64(double %a) nounwind { +; CHECK-LABEL: use_atanf64: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp atan@PLT # TAILCALL + %x = call double @llvm.atan.f64(double %a) + ret double %x +} + +define x86_fp80 @use_atanf80(x86_fp80 %a) nounwind { +; CHECK-LABEL: use_atanf80: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: fldt 32(%rsp) +; CHECK-NEXT: fstpt (%rsp) +; CHECK-NEXT: callq atanl@PLT +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: retq + %x = call x86_fp80 @llvm.atan.f80(x86_fp80 %a) + ret x86_fp80 %x +} + +define fp128 @use_atanfp128(fp128 %a) nounwind { +; CHECK-LABEL: use_atanfp128: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp atanf128@PLT # TAILCALL + %x = call fp128 @llvm.atan.f128(fp128 %a) + ret fp128 %x +} + +define ppc_fp128 @use_atanppc_fp128(ppc_fp128 %a) nounwind { +; CHECK-LABEL: use_atanppc_fp128: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq atanl@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %x = call ppc_fp128 @llvm.atan.ppcf128(ppc_fp128 %a) + ret ppc_fp128 %x +} + +declare half @llvm.atan.f16(half) +declare float @llvm.atan.f32(float) +declare double @llvm.atan.f64(double) +declare x86_fp80 @llvm.atan.f80(x86_fp80) +declare fp128 @llvm.atan.f128(fp128) +declare ppc_fp128 @llvm.atan.ppcf128(ppc_fp128) diff --git a/llvm/test/CodeGen/X86/llvm.cosh.ll b/llvm/test/CodeGen/X86/llvm.cosh.ll new file mode 100644 index 00000000000000..5e7582c8f86a4e --- /dev/null +++ b/llvm/test/CodeGen/X86/llvm.cosh.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +define half @use_coshf16(half %a) nounwind { +; CHECK-LABEL: use_coshf16: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq __extendhfsf2@PLT +; CHECK-NEXT: callq coshf@PLT +; CHECK-NEXT: callq __truncsfhf2@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %x = call half @llvm.cosh.f16(half %a) + ret half %x +} + +define float @use_coshf32(float %a) nounwind { +; CHECK-LABEL: use_coshf32: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp coshf@PLT # TAILCALL + %x = call float @llvm.cosh.f32(float %a) + ret float %x +} + +define double @use_coshf64(double %a) nounwind { +; CHECK-LABEL: use_coshf64: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp cosh@PLT # TAILCALL + %x = call double @llvm.cosh.f64(double %a) + ret double %x +} + +define x86_fp80 @use_coshf80(x86_fp80 %a) nounwind { +; CHECK-LABEL: use_coshf80: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: fldt 32(%rsp) +; CHECK-NEXT: fstpt (%rsp) +; CHECK-NEXT: callq coshl@PLT +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: retq + %x = call x86_fp80 @llvm.cosh.f80(x86_fp80 %a) + ret x86_fp80 %x +} + +define fp128 @use_coshfp128(fp128 %a) nounwind { +; CHECK-LABEL: use_coshfp128: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp coshf128@PLT # TAILCALL + %x = call fp128 @llvm.cosh.f128(fp128 %a) + ret fp128 %x +} + +define ppc_fp128 @use_coshppc_fp128(ppc_fp128 %a) nounwind { +; CHECK-LABEL: use_coshppc_fp128: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq coshl@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %x = call ppc_fp128 @llvm.cosh.ppcf128(ppc_fp128 %a) + ret ppc_fp128 %x +} + +declare half @llvm.cosh.f16(half) +declare float @llvm.cosh.f32(float) +declare double @llvm.cosh.f64(double) +declare x86_fp80 @llvm.cosh.f80(x86_fp80) +declare fp128 @llvm.cosh.f128(fp128) +declare ppc_fp128 @llvm.cosh.ppcf128(ppc_fp128) diff --git a/llvm/test/CodeGen/X86/llvm.sinh.ll b/llvm/test/CodeGen/X86/llvm.sinh.ll new file mode 100644 index 00000000000000..ba228421117f03 --- /dev/null +++ b/llvm/test/CodeGen/X86/llvm.sinh.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +define half @use_sinhf16(half %a) nounwind { +; CHECK-LABEL: use_sinhf16: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq __extendhfsf2@PLT +; CHECK-NEXT: callq sinhf@PLT +; CHECK-NEXT: callq __truncsfhf2@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %x = call half @llvm.sinh.f16(half %a) + ret half %x +} + +define float @use_sinhf32(float %a) nounwind { +; CHECK-LABEL: use_sinhf32: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp sinhf@PLT # TAILCALL + %x = call float @llvm.sinh.f32(float %a) + ret float %x +} + +define double @use_sinhf64(double %a) nounwind { +; CHECK-LABEL: use_sinhf64: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp sinh@PLT # TAILCALL + %x = call double @llvm.sinh.f64(double %a) + ret double %x +} + +define x86_fp80 @use_sinhf80(x86_fp80 %a) nounwind { +; CHECK-LABEL: use_sinhf80: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: fldt 32(%rsp) +; CHECK-NEXT: fstpt (%rsp) +; CHECK-NEXT: callq sinhl@PLT +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: retq + %x = call x86_fp80 @llvm.sinh.f80(x86_fp80 %a) + ret x86_fp80 %x +} + +define fp128 @use_sinhfp128(fp128 %a) nounwind { +; CHECK-LABEL: use_sinhfp128: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp sinhf128@PLT # TAILCALL + %x = call fp128 @llvm.sinh.f128(fp128 %a) + ret fp128 %x +} + +define ppc_fp128 @use_sinhppc_fp128(ppc_fp128 %a) nounwind { +; CHECK-LABEL: use_sinhppc_fp128: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq sinhl@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %x = call ppc_fp128 @llvm.sinh.ppcf128(ppc_fp128 %a) + ret ppc_fp128 %x +} + +declare half @llvm.sinh.f16(half) +declare float @llvm.sinh.f32(float) +declare double @llvm.sinh.f64(double) +declare x86_fp80 @llvm.sinh.f80(x86_fp80) +declare fp128 @llvm.sinh.f128(fp128) +declare ppc_fp128 @llvm.sinh.ppcf128(ppc_fp128) diff --git a/llvm/test/CodeGen/X86/llvm.tanh.ll b/llvm/test/CodeGen/X86/llvm.tanh.ll new file mode 100644 index 00000000000000..7119c401c80400 --- /dev/null +++ b/llvm/test/CodeGen/X86/llvm.tanh.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +define half @use_tanhf16(half %a) nounwind { +; CHECK-LABEL: use_tanhf16: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq __extendhfsf2@PLT +; CHECK-NEXT: callq tanhf@PLT +; CHECK-NEXT: callq __truncsfhf2@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %x = call half @llvm.tanh.f16(half %a) + ret half %x +} + +define float @use_tanhf32(float %a) nounwind { +; CHECK-LABEL: use_tanhf32: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp tanhf@PLT # TAILCALL + %x = call float @llvm.tanh.f32(float %a) + ret float %x +} + +define double @use_tanhf64(double %a) nounwind { +; CHECK-LABEL: use_tanhf64: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp tanh@PLT # TAILCALL + %x = call double @llvm.tanh.f64(double %a) + ret double %x +} + +define x86_fp80 @use_tanhf80(x86_fp80 %a) nounwind { +; CHECK-LABEL: use_tanhf80: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: fldt 32(%rsp) +; CHECK-NEXT: fstpt (%rsp) +; CHECK-NEXT: callq tanhl@PLT +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: retq + %x = call x86_fp80 @llvm.tanh.f80(x86_fp80 %a) + ret x86_fp80 %x +} + +define fp128 @use_tanhfp128(fp128 %a) nounwind { +; CHECK-LABEL: use_tanhfp128: +; CHECK: # %bb.0: +; CHECK-NEXT: jmp tanhf128@PLT # TAILCALL + %x = call fp128 @llvm.tanh.f128(fp128 %a) + ret fp128 %x +} + +define ppc_fp128 @use_tanhppc_fp128(ppc_fp128 %a) nounwind { +; CHECK-LABEL: use_tanhppc_fp128: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq tanhl@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + %x = call ppc_fp128 @llvm.tanh.ppcf128(ppc_fp128 %a) + ret ppc_fp128 %x +} + +declare half @llvm.tanh.f16(half) +declare float @llvm.tanh.f32(float) +declare double @llvm.tanh.f64(double) +declare x86_fp80 @llvm.tanh.f80(x86_fp80) +declare fp128 @llvm.tanh.f128(fp128) +declare ppc_fp128 @llvm.tanh.ppcf128(ppc_fp128) diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll index d71fd470651cf9..b486014678466e 100644 --- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -7997,7 +7997,1355 @@ entry: ret <4 x double> %tan } +define <1 x float> @constrained_vector_acos_v1f32() #0 { +; CHECK-LABEL: constrained_vector_acos_v1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq acosf@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_acos_v1f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq acosf@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %acos = call <1 x float> @llvm.experimental.constrained.acos.v1f32( + <1 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <1 x float> %acos +} + +define <2 x double> @constrained_vector_acos_v2f64() #0 { +; CHECK-LABEL: constrained_vector_acos_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq acos@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq acos@PLT +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_acos_v2f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $24, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 32 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq acos@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq acos@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: addq $24, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %acos = call <2 x double> @llvm.experimental.constrained.acos.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %acos +} + +define <3 x float> @constrained_vector_acos_v3f32() #0 { +; CHECK-LABEL: constrained_vector_acos_v3f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq acosf@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq acosf@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq acosf@PLT +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_acos_v3f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq acosf@PLT +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq acosf@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq acosf@PLT +; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %acos = call <3 x float> @llvm.experimental.constrained.acos.v3f32( + <3 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x float> %acos +} + +define <3 x double> @constrained_vector_acos_v3f64() #0 { +; CHECK-LABEL: constrained_vector_acos_v3f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq acos@PLT +; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq acos@PLT +; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; CHECK-NEXT: callq acos@PLT +; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_acos_v3f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq acos@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq acos@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; AVX-NEXT: vzeroupper +; AVX-NEXT: callq acos@PLT +; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload +; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %acos = call <3 x double> @llvm.experimental.constrained.acos.v3f64( + <3 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x double> %acos +} + +define <4 x double> @constrained_vector_acos_v4f64() #0 { +; CHECK-LABEL: constrained_vector_acos_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq acos@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq acos@PLT +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] +; CHECK-NEXT: callq acos@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; CHECK-NEXT: callq acos@PLT +; CHECK-NEXT: movaps %xmm0, %xmm1 +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_acos_v4f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] +; AVX-NEXT: callq acos@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; AVX-NEXT: callq acos@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq acos@PLT +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq acos@PLT +; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %acos = call <4 x double> @llvm.experimental.constrained.acos.v4f64( + <4 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %acos +} + +define <1 x float> @constrained_vector_asin_v1f32() #0 { +; CHECK-LABEL: constrained_vector_asin_v1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq asinf@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_asin_v1f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq asinf@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %asin = call <1 x float> @llvm.experimental.constrained.asin.v1f32( + <1 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <1 x float> %asin +} + +define <2 x double> @constrained_vector_asin_v2f64() #0 { +; CHECK-LABEL: constrained_vector_asin_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq asin@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq asin@PLT +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_asin_v2f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $24, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 32 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq asin@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq asin@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: addq $24, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %asin = call <2 x double> @llvm.experimental.constrained.asin.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %asin +} + +define <3 x float> @constrained_vector_asin_v3f32() #0 { +; CHECK-LABEL: constrained_vector_asin_v3f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq asinf@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq asinf@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq asinf@PLT +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_asin_v3f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq asinf@PLT +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq asinf@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq asinf@PLT +; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %asin = call <3 x float> @llvm.experimental.constrained.asin.v3f32( + <3 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x float> %asin +} + +define <3 x double> @constrained_vector_asin_v3f64() #0 { +; CHECK-LABEL: constrained_vector_asin_v3f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq asin@PLT +; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq asin@PLT +; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; CHECK-NEXT: callq asin@PLT +; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_asin_v3f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq asin@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq asin@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; AVX-NEXT: vzeroupper +; AVX-NEXT: callq asin@PLT +; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload +; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %asin = call <3 x double> @llvm.experimental.constrained.asin.v3f64( + <3 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x double> %asin +} + +define <4 x double> @constrained_vector_asin_v4f64() #0 { +; CHECK-LABEL: constrained_vector_asin_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq asin@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq asin@PLT +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] +; CHECK-NEXT: callq asin@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; CHECK-NEXT: callq asin@PLT +; CHECK-NEXT: movaps %xmm0, %xmm1 +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_asin_v4f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] +; AVX-NEXT: callq asin@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; AVX-NEXT: callq asin@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq asin@PLT +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq asin@PLT +; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %asin = call <4 x double> @llvm.experimental.constrained.asin.v4f64( + <4 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %asin +} + +define <1 x float> @constrained_vector_atan_v1f32() #0 { +; CHECK-LABEL: constrained_vector_atan_v1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq atanf@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_atan_v1f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq atanf@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %atan = call <1 x float> @llvm.experimental.constrained.atan.v1f32( + <1 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <1 x float> %atan +} + +define <2 x double> @constrained_vector_atan_v2f64() #0 { +; CHECK-LABEL: constrained_vector_atan_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq atan@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq atan@PLT +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_atan_v2f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $24, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 32 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq atan@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq atan@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: addq $24, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %atan = call <2 x double> @llvm.experimental.constrained.atan.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %atan +} + +define <3 x float> @constrained_vector_atan_v3f32() #0 { +; CHECK-LABEL: constrained_vector_atan_v3f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq atanf@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq atanf@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq atanf@PLT +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_atan_v3f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq atanf@PLT +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq atanf@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq atanf@PLT +; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %atan = call <3 x float> @llvm.experimental.constrained.atan.v3f32( + <3 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x float> %atan +} + +define <3 x double> @constrained_vector_atan_v3f64() #0 { +; CHECK-LABEL: constrained_vector_atan_v3f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq atan@PLT +; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq atan@PLT +; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; CHECK-NEXT: callq atan@PLT +; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_atan_v3f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq atan@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq atan@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; AVX-NEXT: vzeroupper +; AVX-NEXT: callq atan@PLT +; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload +; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %atan = call <3 x double> @llvm.experimental.constrained.atan.v3f64( + <3 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x double> %atan +} + +define <4 x double> @constrained_vector_atan_v4f64() #0 { +; CHECK-LABEL: constrained_vector_atan_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq atan@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq atan@PLT +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] +; CHECK-NEXT: callq atan@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; CHECK-NEXT: callq atan@PLT +; CHECK-NEXT: movaps %xmm0, %xmm1 +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_atan_v4f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] +; AVX-NEXT: callq atan@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; AVX-NEXT: callq atan@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq atan@PLT +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq atan@PLT +; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %atan = call <4 x double> @llvm.experimental.constrained.atan.v4f64( + <4 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %atan +} + +define <1 x float> @constrained_vector_cosh_v1f32() #0 { +; CHECK-LABEL: constrained_vector_cosh_v1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq coshf@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_cosh_v1f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq coshf@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %cosh = call <1 x float> @llvm.experimental.constrained.cosh.v1f32( + <1 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <1 x float> %cosh +} + +define <2 x double> @constrained_vector_cosh_v2f64() #0 { +; CHECK-LABEL: constrained_vector_cosh_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq cosh@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq cosh@PLT +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_cosh_v2f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $24, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 32 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq cosh@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq cosh@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: addq $24, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %cosh = call <2 x double> @llvm.experimental.constrained.cosh.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %cosh +} + +define <3 x float> @constrained_vector_cosh_v3f32() #0 { +; CHECK-LABEL: constrained_vector_cosh_v3f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq coshf@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq coshf@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq coshf@PLT +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_cosh_v3f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq coshf@PLT +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq coshf@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq coshf@PLT +; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %cosh = call <3 x float> @llvm.experimental.constrained.cosh.v3f32( + <3 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x float> %cosh +} + +define <3 x double> @constrained_vector_cosh_v3f64() #0 { +; CHECK-LABEL: constrained_vector_cosh_v3f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq cosh@PLT +; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq cosh@PLT +; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; CHECK-NEXT: callq cosh@PLT +; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_cosh_v3f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq cosh@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq cosh@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; AVX-NEXT: vzeroupper +; AVX-NEXT: callq cosh@PLT +; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload +; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %cosh = call <3 x double> @llvm.experimental.constrained.cosh.v3f64( + <3 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x double> %cosh +} + +define <4 x double> @constrained_vector_cosh_v4f64() #0 { +; CHECK-LABEL: constrained_vector_cosh_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq cosh@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq cosh@PLT +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] +; CHECK-NEXT: callq cosh@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; CHECK-NEXT: callq cosh@PLT +; CHECK-NEXT: movaps %xmm0, %xmm1 +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_cosh_v4f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] +; AVX-NEXT: callq cosh@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; AVX-NEXT: callq cosh@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq cosh@PLT +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq cosh@PLT +; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %cosh = call <4 x double> @llvm.experimental.constrained.cosh.v4f64( + <4 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %cosh +} + +define <1 x float> @constrained_vector_sinh_v1f32() #0 { +; CHECK-LABEL: constrained_vector_sinh_v1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq sinhf@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_sinh_v1f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq sinhf@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %sinh = call <1 x float> @llvm.experimental.constrained.sinh.v1f32( + <1 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <1 x float> %sinh +} + +define <2 x double> @constrained_vector_sinh_v2f64() #0 { +; CHECK-LABEL: constrained_vector_sinh_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq sinh@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq sinh@PLT +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_sinh_v2f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $24, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 32 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq sinh@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq sinh@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: addq $24, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %sinh = call <2 x double> @llvm.experimental.constrained.sinh.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %sinh +} + +define <3 x float> @constrained_vector_sinh_v3f32() #0 { +; CHECK-LABEL: constrained_vector_sinh_v3f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq sinhf@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq sinhf@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq sinhf@PLT +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_sinh_v3f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq sinhf@PLT +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq sinhf@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq sinhf@PLT +; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %sinh = call <3 x float> @llvm.experimental.constrained.sinh.v3f32( + <3 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x float> %sinh +} + +define <3 x double> @constrained_vector_sinh_v3f64() #0 { +; CHECK-LABEL: constrained_vector_sinh_v3f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq sinh@PLT +; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq sinh@PLT +; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; CHECK-NEXT: callq sinh@PLT +; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_sinh_v3f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq sinh@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq sinh@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; AVX-NEXT: vzeroupper +; AVX-NEXT: callq sinh@PLT +; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload +; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %sinh = call <3 x double> @llvm.experimental.constrained.sinh.v3f64( + <3 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x double> %sinh +} + +define <4 x double> @constrained_vector_sinh_v4f64() #0 { +; CHECK-LABEL: constrained_vector_sinh_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq sinh@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq sinh@PLT +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] +; CHECK-NEXT: callq sinh@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; CHECK-NEXT: callq sinh@PLT +; CHECK-NEXT: movaps %xmm0, %xmm1 +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_sinh_v4f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] +; AVX-NEXT: callq sinh@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; AVX-NEXT: callq sinh@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq sinh@PLT +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq sinh@PLT +; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %sinh = call <4 x double> @llvm.experimental.constrained.sinh.v4f64( + <4 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %sinh +} + +define <1 x float> @constrained_vector_tanh_v1f32() #0 { +; CHECK-LABEL: constrained_vector_tanh_v1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq tanhf@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_tanh_v1f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: .cfi_def_cfa_offset 16 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq tanhf@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %tanh = call <1 x float> @llvm.experimental.constrained.tanh.v1f32( + <1 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <1 x float> %tanh +} + +define <2 x double> @constrained_vector_tanh_v2f64() #0 { +; CHECK-LABEL: constrained_vector_tanh_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq tanh@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq tanh@PLT +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_tanh_v2f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $24, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 32 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq tanh@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq tanh@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: addq $24, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %tanh = call <2 x double> @llvm.experimental.constrained.tanh.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %tanh +} + +define <3 x float> @constrained_vector_tanh_v3f32() #0 { +; CHECK-LABEL: constrained_vector_tanh_v3f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq tanhf@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq tanhf@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq tanhf@PLT +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_tanh_v3f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq tanhf@PLT +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq tanhf@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] +; AVX-NEXT: callq tanhf@PLT +; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %tanh = call <3 x float> @llvm.experimental.constrained.tanh.v3f32( + <3 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x float> %tanh +} + +define <3 x double> @constrained_vector_tanh_v3f64() #0 { +; CHECK-LABEL: constrained_vector_tanh_v3f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq tanh@PLT +; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq tanh@PLT +; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; CHECK-NEXT: callq tanh@PLT +; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait +; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_tanh_v3f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq tanh@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq tanh@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; AVX-NEXT: vzeroupper +; AVX-NEXT: callq tanh@PLT +; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload +; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %tanh = call <3 x double> @llvm.experimental.constrained.tanh.v3f64( + <3 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x double> %tanh +} +define <4 x double> @constrained_vector_tanh_v4f64() #0 { +; CHECK-LABEL: constrained_vector_tanh_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; CHECK-NEXT: callq tanh@PLT +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; CHECK-NEXT: callq tanh@PLT +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] +; CHECK-NEXT: callq tanh@PLT +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; CHECK-NEXT: callq tanh@PLT +; CHECK-NEXT: movaps %xmm0, %xmm1 +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_tanh_v4f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: subq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 48 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] +; AVX-NEXT: callq tanh@PLT +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] +; AVX-NEXT: callq tanh@PLT +; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] +; AVX-NEXT: callq tanh@PLT +; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] +; AVX-NEXT: callq tanh@PLT +; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; AVX-NEXT: # xmm0 = xmm0[0],mem[0] +; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload +; AVX-NEXT: addq $40, %rsp +; AVX-NEXT: .cfi_def_cfa_offset 8 +; AVX-NEXT: retq +entry: + %tanh = call <4 x double> @llvm.experimental.constrained.tanh.v4f64( + <4 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %tanh +} declare <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float>, <16 x float>, metadata, metadata) @@ -8015,6 +9363,12 @@ declare <2 x double> @llvm.experimental.constrained.powi.v2f64(<2 x double>, i32 declare <2 x double> @llvm.experimental.constrained.sin.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.cos.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.tan.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.asin.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.acos.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.atan.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.sinh.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.cosh.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.tanh.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.exp.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.exp2.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.log.v2f64(<2 x double>, metadata, metadata) @@ -8059,6 +9413,12 @@ declare <1 x float> @llvm.experimental.constrained.powi.v1f32(<1 x float>, i32, declare <1 x float> @llvm.experimental.constrained.sin.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.cos.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.tan.v1f32(<1 x float>, metadata, metadata) +declare <1 x float> @llvm.experimental.constrained.asin.v1f32(<1 x float>, metadata, metadata) +declare <1 x float> @llvm.experimental.constrained.acos.v1f32(<1 x float>, metadata, metadata) +declare <1 x float> @llvm.experimental.constrained.atan.v1f32(<1 x float>, metadata, metadata) +declare <1 x float> @llvm.experimental.constrained.sinh.v1f32(<1 x float>, metadata, metadata) +declare <1 x float> @llvm.experimental.constrained.cosh.v1f32(<1 x float>, metadata, metadata) +declare <1 x float> @llvm.experimental.constrained.tanh.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.exp.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.exp2.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.log.v1f32(<1 x float>, metadata, metadata) @@ -8114,6 +9474,18 @@ declare <3 x float> @llvm.experimental.constrained.cos.v3f32(<3 x float>, metada declare <3 x double> @llvm.experimental.constrained.cos.v3f64(<3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.tan.v3f32(<3 x float>, metadata, metadata) declare <3 x double> @llvm.experimental.constrained.tan.v3f64(<3 x double>, metadata, metadata) +declare <3 x float> @llvm.experimental.constrained.asin.v3f32(<3 x float>, metadata, metadata) +declare <3 x double> @llvm.experimental.constrained.asin.v3f64(<3 x double>, metadata, metadata) +declare <3 x float> @llvm.experimental.constrained.acos.v3f32(<3 x float>, metadata, metadata) +declare <3 x double> @llvm.experimental.constrained.acos.v3f64(<3 x double>, metadata, metadata) +declare <3 x float> @llvm.experimental.constrained.atan.v3f32(<3 x float>, metadata, metadata) +declare <3 x double> @llvm.experimental.constrained.atan.v3f64(<3 x double>, metadata, metadata) +declare <3 x float> @llvm.experimental.constrained.sinh.v3f32(<3 x float>, metadata, metadata) +declare <3 x double> @llvm.experimental.constrained.sinh.v3f64(<3 x double>, metadata, metadata) +declare <3 x float> @llvm.experimental.constrained.cosh.v3f32(<3 x float>, metadata, metadata) +declare <3 x double> @llvm.experimental.constrained.cosh.v3f64(<3 x double>, metadata, metadata) +declare <3 x float> @llvm.experimental.constrained.tanh.v3f32(<3 x float>, metadata, metadata) +declare <3 x double> @llvm.experimental.constrained.tanh.v3f64(<3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.exp.v3f32(<3 x float>, metadata, metadata) declare <3 x double> @llvm.experimental.constrained.exp.v3f64(<3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.exp2.v3f32(<3 x float>, metadata, metadata) @@ -8171,6 +9543,12 @@ declare <4 x double> @llvm.experimental.constrained.powi.v4f64(<4 x double>, i32 declare <4 x double> @llvm.experimental.constrained.sin.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.cos.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.tan.v4f64(<4 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.asin.v4f64(<4 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.acos.v4f64(<4 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.atan.v4f64(<4 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.sinh.v4f64(<4 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.cosh.v4f64(<4 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.tanh.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.exp.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.exp2.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.log.v4f64(<4 x double>, metadata, metadata) diff --git a/llvm/test/Feature/fp-intrinsics.ll b/llvm/test/Feature/fp-intrinsics.ll index 78275a16d3e8f7..80f8b15abfaabe 100644 --- a/llvm/test/Feature/fp-intrinsics.ll +++ b/llvm/test/Feature/fp-intrinsics.ll @@ -162,6 +162,72 @@ entry: ret double %result } +; Verify that acos(42.0) isn't simplified when the rounding mode is unknown. +; CHECK-LABEL: facos +; CHECK: call double @llvm.experimental.constrained.acos +define double @facos() #0 { +entry: + %result = call double @llvm.experimental.constrained.acos.f64(double 42.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +; Verify that asin(42.0) isn't simplified when the rounding mode is unknown. +; CHECK-LABEL: fasin +; CHECK: call double @llvm.experimental.constrained.asin +define double @fasin() #0 { +entry: + %result = call double @llvm.experimental.constrained.asin.f64(double 42.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +; Verify that atan(42.0) isn't simplified when the rounding mode is unknown. +; CHECK-LABEL: fatan +; CHECK: call double @llvm.experimental.constrained.atan +define double @fatan() #0 { +entry: + %result = call double @llvm.experimental.constrained.atan.f64(double 42.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +; Verify that cosh(42.0) isn't simplified when the rounding mode is unknown. +; CHECK-LABEL: fcosh +; CHECK: call double @llvm.experimental.constrained.cosh +define double @fcosh() #0 { +entry: + %result = call double @llvm.experimental.constrained.cosh.f64(double 42.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +; Verify that sinh(42.0) isn't simplified when the rounding mode is unknown. +; CHECK-LABEL: fsinh +; CHECK: call double @llvm.experimental.constrained.sinh +define double @fsinh() #0 { +entry: + %result = call double @llvm.experimental.constrained.sinh.f64(double 42.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +; Verify that tanh(42.0) isn't simplified when the rounding mode is unknown. +; CHECK-LABEL: ftanh +; CHECK: call double @llvm.experimental.constrained.tanh +define double @ftanh() #0 { +entry: + %result = call double @llvm.experimental.constrained.tanh.f64(double 42.0, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + ; Verify that exp(42.0) isn't simplified when the rounding mode is unknown. ; CHECK-LABEL: f10 ; CHECK: call double @llvm.experimental.constrained.exp