Skip to content

Commit

Permalink
[X86][LoopVectorize] Add support for arc and hyperbolic trig functions (
Browse files Browse the repository at this point in the history
llvm#99383)

This change is part 2 x86 Loop Vectorization of :
llvm#96222

It also has veclib call loop vectorization hence the test cases in
`llvm/test/Transforms/LoopVectorize/X86/veclib-calls.ll`

finally the last pr missed tests for
`llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll` and
`llvm/test/CodeGen/X86/vec-libcalls.ll` so added those aswell.

No evidence was found for arc and hyperbolic trig glibc vector math
functions

https://github.com/lattera/glibc/blob/master/sysdeps/x86/fpu/bits/math-vector.h
so no  new `_ZGVbN2v_*` and  `_ZGVdN4v_*` .
So no new tests in
`llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll`

Also no new svml and no new tests to:
`llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll`
There was not enough evidence that there were svml arc and hyperbolic
trig vector implementations, Documentation was scarces so looked at test
cases in
[numpy](https://github.com/numpy/SVML/blob/32bf2a98420762a63ab418aaa0a7d6e17eb9627a/linux/avx512/svml_z0_acos_d_la.s#L8).
Someone with more experience with svml should investigate.

## Note 
amd libm doesn't have a vector hyperbolic sine api hence why youi might
notice there are no tests for `sinh`.

## History
This change is part of
llvm#87367 investigation on
supporting IEEE math operations as intrinsics.
Which was discussed in this RFC:
https://discourse.llvm.org/t/rfc-all-the-math-intrinsics/78294

This change adds loop vectorization for `acos`, `asin`, `atan`, `cosh`,
`sinh`, and `tanh`.
resolves llvm#70079
resolves llvm#70080
resolves llvm#70081
resolves llvm#70083
resolves llvm#70084
resolves llvm#95966
  • Loading branch information
farzonl authored and banach-space committed Aug 7, 2024
1 parent bea1e03 commit dc089c5
Show file tree
Hide file tree
Showing 5 changed files with 1,863 additions and 0 deletions.
27 changes: 27 additions & 0 deletions llvm/include/llvm/Analysis/VecFuncs.def
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,19 @@ TLI_DEFINE_VECFUNC("llvm.cos.f32", "vcosf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("tanf", "vtanf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.tan.f32", "vtanf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("asinf", "vasinf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.asin.f32", "vasinf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("acosf", "vacosf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.acos.f32", "vacosf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("atanf", "vatanf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.atan.f32", "vatanf", FIXED(4), "_ZGV_LLVM_N4v")

// Hyperbolic Functions
TLI_DEFINE_VECFUNC("sinhf", "vsinhf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.sinh.f32", "vsinhf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("coshf", "vcoshf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.cosh.f32", "vcoshf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("tanhf", "vtanhf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.tanh.f32", "vtanhf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("asinhf", "vasinhf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("acoshf", "vacoshf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("atanhf", "vatanhf", FIXED(4), "_ZGV_LLVM_N4v")
Expand Down Expand Up @@ -1358,22 +1364,43 @@ TLI_DEFINE_VECFUNC("asinf", "amd_vrs4_asinf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("asinf", "amd_vrs8_asinf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
TLI_DEFINE_VECFUNC("asinf", "amd_vrs16_asinf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")

TLI_DEFINE_VECFUNC("llvm.asin.f64", "amd_vrd8_asin", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
TLI_DEFINE_VECFUNC("llvm.asin.f32", "amd_vrs4_asinf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.asin.f32", "amd_vrs8_asinf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
TLI_DEFINE_VECFUNC("llvm.asin.f32", "amd_vrs16_asinf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")

TLI_DEFINE_VECFUNC("acosf", "amd_vrs4_acosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("acosf", "amd_vrs8_acosf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")

TLI_DEFINE_VECFUNC("llvm.acos.f32", "amd_vrs8_acosf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
TLI_DEFINE_VECFUNC("llvm.acos.f32", "amd_vrs4_acosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")

TLI_DEFINE_VECFUNC("atan", "amd_vrd2_atan", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("atan", "amd_vrd4_atan", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("atan", "amd_vrd8_atan", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
TLI_DEFINE_VECFUNC("atanf", "amd_vrs4_atanf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("atanf", "amd_vrs8_atanf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
TLI_DEFINE_VECFUNC("atanf", "amd_vrs16_atanf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")

TLI_DEFINE_VECFUNC("llvm.atan.f64", "amd_vrd2_atan", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("llvm.atan.f64", "amd_vrd4_atan", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.atan.f64", "amd_vrd8_atan", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
TLI_DEFINE_VECFUNC("llvm.atan.f32", "amd_vrs4_atanf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.atan.f32", "amd_vrs8_atanf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
TLI_DEFINE_VECFUNC("llvm.atan.f32", "amd_vrs16_atanf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")

TLI_DEFINE_VECFUNC("coshf", "amd_vrs4_coshf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("coshf", "amd_vrs8_coshf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")

TLI_DEFINE_VECFUNC("llvm.cosh.f32", "amd_vrs4_coshf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.cosh.f32", "amd_vrs8_coshf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")

TLI_DEFINE_VECFUNC("tanhf", "amd_vrs4_tanhf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("tanhf", "amd_vrs8_tanhf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")

TLI_DEFINE_VECFUNC("llvm.tanh.f32", "amd_vrs4_tanhf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.tanh.f32", "amd_vrs8_tanhf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")

TLI_DEFINE_VECFUNC("cbrt", "amd_vrd2_cbrt", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("cbrtf", "amd_vrs4_cbrtf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")

Expand Down
90 changes: 90 additions & 0 deletions llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,90 @@ define float @tan(float %x) #0 {
ret float %result
}

define float @acos(float %x) #0 {
; CHECK-LABEL: acos:
; CHECK: # %bb.0:
; CHECK-NEXT: pushl %eax
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
; CHECK-NEXT: fstps (%esp)
; CHECK-NEXT: wait
; CHECK-NEXT: calll _acosf
; CHECK-NEXT: popl %eax
; CHECK-NEXT: retl
%result = call float @llvm.experimental.constrained.acos.f32(float %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
ret float %result
}

define float @asin(float %x) #0 {
; CHECK-LABEL: asin:
; CHECK: # %bb.0:
; CHECK-NEXT: pushl %eax
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
; CHECK-NEXT: fstps (%esp)
; CHECK-NEXT: wait
; CHECK-NEXT: calll _asinf
; CHECK-NEXT: popl %eax
; CHECK-NEXT: retl
%result = call float @llvm.experimental.constrained.asin.f32(float %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
ret float %result
}

define float @atan(float %x) #0 {
; CHECK-LABEL: atan:
; CHECK: # %bb.0:
; CHECK-NEXT: pushl %eax
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
; CHECK-NEXT: fstps (%esp)
; CHECK-NEXT: wait
; CHECK-NEXT: calll _atanf
; CHECK-NEXT: popl %eax
; CHECK-NEXT: retl
%result = call float @llvm.experimental.constrained.atan.f32(float %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
ret float %result
}

define float @cosh(float %x) #0 {
; CHECK-LABEL: cosh:
; CHECK: # %bb.0:
; CHECK-NEXT: pushl %eax
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
; CHECK-NEXT: fstps (%esp)
; CHECK-NEXT: wait
; CHECK-NEXT: calll _coshf
; CHECK-NEXT: popl %eax
; CHECK-NEXT: retl
%result = call float @llvm.experimental.constrained.cosh.f32(float %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
ret float %result
}

define float @sinh(float %x) #0 {
; CHECK-LABEL: sinh:
; CHECK: # %bb.0:
; CHECK-NEXT: pushl %eax
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
; CHECK-NEXT: fstps (%esp)
; CHECK-NEXT: wait
; CHECK-NEXT: calll _sinhf
; CHECK-NEXT: popl %eax
; CHECK-NEXT: retl
%result = call float @llvm.experimental.constrained.sinh.f32(float %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
ret float %result
}

define float @tanh(float %x) #0 {
; CHECK-LABEL: tanh:
; CHECK: # %bb.0:
; CHECK-NEXT: pushl %eax
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
; CHECK-NEXT: fstps (%esp)
; CHECK-NEXT: wait
; CHECK-NEXT: calll _tanhf
; CHECK-NEXT: popl %eax
; CHECK-NEXT: retl
%result = call float @llvm.experimental.constrained.tanh.f32(float %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
ret float %result
}

attributes #0 = { strictfp }

declare float @llvm.experimental.constrained.ceil.f32(float, metadata)
Expand All @@ -189,3 +273,9 @@ declare float @llvm.experimental.constrained.log10.f32(float, metadata, metadata
declare float @llvm.experimental.constrained.pow.f32(float, float, metadata, metadata)
declare float @llvm.experimental.constrained.sin.f32(float, metadata, metadata)
declare float @llvm.experimental.constrained.tan.f32(float, metadata, metadata)
declare float @llvm.experimental.constrained.acos.f32(float, metadata, metadata)
declare float @llvm.experimental.constrained.asin.f32(float, metadata, metadata)
declare float @llvm.experimental.constrained.atan.f32(float, metadata, metadata)
declare float @llvm.experimental.constrained.cosh.f32(float, metadata, metadata)
declare float @llvm.experimental.constrained.sinh.f32(float, metadata, metadata)
declare float @llvm.experimental.constrained.tanh.f32(float, metadata, metadata)
Loading

0 comments on commit dc089c5

Please sign in to comment.