Skip to content

Commit

Permalink
Merge pull request OpenMathLib#4563 from XiWeiGu/loongarch_fix_lasx
Browse files Browse the repository at this point in the history
Loongarch: Fixed LASX opt
  • Loading branch information
martin-frbg committed Mar 16, 2024
2 parents 66bde62 + 60e251a commit 79cb121
Show file tree
Hide file tree
Showing 9 changed files with 205 additions and 59 deletions.
6 changes: 3 additions & 3 deletions kernel/loongarch64/amin_lasx.S
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvinsgr2vr.d VX1, t2, 1
xvinsgr2vr.d VX1, t3, 2
xvinsgr2vr.d VX1, t4, 3
xvfmaxa.d VM1, VX0, VX1
xvfmaxa.d VM0, VM0, VM1
xvfmina.d VM1, VX0, VX1
xvfmina.d VM0, VM0, VM1
#else
ld.w t1, X, 0
add.d X, X, INCX
Expand All @@ -187,7 +187,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvinsgr2vr.w VM1, t2, 5
xvinsgr2vr.w VM1, t3, 6
xvinsgr2vr.w VM1, t4, 7
xvfmaxa.s VM0, VM0, VM1
xvfmina.s VM0, VM0, VM1
#endif
addi.d I, I, -1
blt $r0, I, .L21
Expand Down
10 changes: 9 additions & 1 deletion kernel/loongarch64/axpby_lasx.S
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvst VX1, Y, 4 * SIZE
#else
xvfmul.s VX0, VX0, VXA
addi.d I, I, -1
xvst VX0, Y, 0 * SIZE
#endif
addi.d I, I, -1
addi.d X, X, 8 * SIZE
addi.d Y, Y, 8 * SIZE
blt $r0, I, .L112
Expand Down Expand Up @@ -288,6 +288,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
addi.d X, X, 8 * SIZE
addi.d I, I, -1
blt $r0, I, .L121
move Y, YY
b .L997
.align 3

Expand Down Expand Up @@ -334,6 +335,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add.d YY, YY, INCY
addi.d X, X, 8 * SIZE
blt $r0, I, .L122
move Y, YY
b .L997
.align 3

Expand Down Expand Up @@ -425,6 +427,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add.d YY, YY, INCY
addi.d I, I, -1
blt $r0, I, .L123
move Y, YY
b .L997
.align 3

Expand Down Expand Up @@ -465,6 +468,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add.d YY, YY, INCY
addi.d I, I, -1
blt $r0, I, .L124
move Y, YY
b .L997
.align 3

Expand Down Expand Up @@ -803,6 +807,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
add.d YY, YY, INCY
blt $r0, I, .L221
move Y, YY
b .L997
.align 3

Expand Down Expand Up @@ -895,6 +900,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
add.d YY, YY, INCY
blt $r0, I, .L222
move Y, YY
b .L997
.align 3

Expand Down Expand Up @@ -987,6 +993,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
add.d YY, YY, INCY
blt $r0, I, .L223
move Y, YY
b .L997
.align 3

Expand Down Expand Up @@ -1027,6 +1034,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add.d YY, YY, INCY
addi.d I, I, -1
blt $r0, I, .L224
move Y, YY
b .L997
.align 3

Expand Down
25 changes: 20 additions & 5 deletions kernel/loongarch64/camax_lasx.S
Original file line number Diff line number Diff line change
Expand Up @@ -107,15 +107,27 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifdef DOUBLE
xvpickve.d x1, VM0, 0
xvpickve.d x2, VM0, 1
XVFMAX VM0, x1, x2
XVFMAX VX0, x1, x2
xvpickve.d x1, VM0, 2
xvpickve.d x2, VM0, 3
XVFMAX VX1, x1, x2
XVFMAX VM0, VX0, VX1
#else
xvpickve.w x1, VM0, 0
xvpickve.w x2, VM0, 1
xvpickve.w x3, VM0, 2
xvpickve.w x4, VM0, 3
XVFMAX VX0, x1, x2
XVFMAX VX1, x3, x4
XVFMAX VX0, VX0, VX1
xvpickve.w x1, VM0, 4
xvpickve.w x2, VM0, 5
xvpickve.w x3, VM0, 6
xvpickve.w x4, VM0, 7
XVFMAX VM0, x1, x2
XVFMAX VM1, x3, x4
XVFMAX VM0, VM0, VM1
XVFMAX VM0, VM0, VX0
#endif
b .L23
.align 3
Expand Down Expand Up @@ -150,7 +162,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FABS t4, t4
ADD t1, t1, t2
ADD t3, t3, t4
FMAX s1, t1, t3
FMAX s2, t1, t3
LD t1, X, 0 * SIZE
LD t2, X, 1 * SIZE
add.d X, X, INCX
Expand Down Expand Up @@ -178,13 +190,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ADD t1, t1, t2
ADD t3, t3, t4
FMAX s4, t1, t3

FMAX s1, s1, s2
FMAX s3, s3, s4
FMAX a0, a0, s3
FMAX a0, a0, s1
blt $r0, I, .L21
.align 3

.L22:
FMAX s1, s1, s2
FMAX s3, s3, s4
FMAX s1, s1, s3
MOV s1, a0
.align 3

.L23: //N<8
Expand Down
25 changes: 20 additions & 5 deletions kernel/loongarch64/camin_lasx.S
Original file line number Diff line number Diff line change
Expand Up @@ -116,15 +116,27 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifdef DOUBLE
xvpickve.d x1, VM0, 0
xvpickve.d x2, VM0, 1
XVFMIN VM0, x1, x2
XVFMIN VX0, x1, x2
xvpickve.d x1, VM0, 2
xvpickve.d x2, VM0, 3
XVFMIN VX1, x1, x2
XVFMIN VM0, VX0, VX1
#else
xvpickve.w x1, VM0, 0
xvpickve.w x2, VM0, 1
xvpickve.w x3, VM0, 2
xvpickve.w x4, VM0, 3
XVFMIN VX0, x1, x2
XVFMIN VX1, x3, x4
XVFMIN VX0, VX0, VX1
xvpickve.w x1, VM0, 4
xvpickve.w x2, VM0, 5
xvpickve.w x3, VM0, 6
xvpickve.w x4, VM0, 7
XVFMIN VM0, x1, x2
XVFMIN VM1, x3, x4
XVFMIN VM0, VM0, VM1
XVFMIN VM0, VM0, VX0
#endif
b .L23
.align 3
Expand Down Expand Up @@ -159,7 +171,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FABS t4, t4
ADD t1, t1, t2
ADD t3, t3, t4
FMIN s1, t1, t3
FMIN s2, t1, t3
LD t1, X, 0 * SIZE
LD t2, X, 1 * SIZE
add.d X, X, INCX
Expand Down Expand Up @@ -187,13 +199,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ADD t1, t1, t2
ADD t3, t3, t4
FMIN s4, t1, t3

FMIN s1, s1, s2
FMIN s3, s3, s4
FMIN a0, a0, s3
FMIN a0, a0, s1
blt $r0, I, .L21
.align 3

.L22:
FMIN s1, s1, s2
FMIN s3, s3, s4
FMIN s1, s1, s3
MOV s1, a0
.align 3

.L23: //N<8
Expand Down
10 changes: 8 additions & 2 deletions kernel/loongarch64/caxpby_lasx.S
Original file line number Diff line number Diff line change
Expand Up @@ -176,13 +176,13 @@
xvilvh.d VX3, x4, x3
xvst VX2, Y, 0 * SIZE
xvst VX3, Y, 4 * SIZE
addi.d X, Y, 8 * SIZE
addi.d Y, Y, 8 * SIZE
#else
xvilvl.w VX2, x4 ,x3
xvilvh.w VX3, x4, x3
xvst VX2, Y, 0 * SIZE
xvst VX3, Y, 8 * SIZE
addi.d X, Y, 16 * SIZE
addi.d Y, Y, 16 * SIZE
#endif
addi.d I, I, -1
blt $r0, I, .L113
Expand Down Expand Up @@ -617,6 +617,7 @@
xvstelm.d x4, YY, 1 * SIZE, 3
add.d YY, YY, INCY
blt $r0, I, .L222
move Y, YY
b .L997
.align 3
#else
Expand Down Expand Up @@ -691,6 +692,7 @@
xvstelm.w x4, YY, 1 * SIZE, 7
add.d YY, YY, INCY
blt $r0, I, .L222
move Y, YY
b .L997
.align 3
#endif
Expand Down Expand Up @@ -1011,7 +1013,11 @@
#endif

.L997:
#ifdef DOUBLE
andi I, N, 3
#else
andi I, N, 7
#endif
bge $r0, I, .L999
.align 3

Expand Down
4 changes: 2 additions & 2 deletions kernel/loongarch64/csum_lasx.S
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvfadd.s res1, VX0, res1
xvfadd.s res1, VX1, res1
xvfadd.s res1, VX2, res1
xvfadd.s res1, VX2, res1
xvfadd.s res1, VX3, res1
#endif
.align 3

Expand Down Expand Up @@ -246,7 +246,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvfadd.s res1, VX0, res1
xvfadd.s res1, VX1, res1
xvfadd.s res1, VX2, res1
xvfadd.s res1, VX2, res1
xvfadd.s res1, VX3, res1
#endif
.align 3

Expand Down
12 changes: 7 additions & 5 deletions kernel/loongarch64/iamin_lasx.S
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvfmina.d VM1, VM0, VM1
#else
addi.d I, I, -1
xvadd.w VI2, VI1, VINC8
xvadd.w VI1, VI1, VINC8
xvor.v VI2, VI1, VI1
xvfmina.s VM1, VX0, VM0
#endif
XVCMPEQ VT0, VM0, VM1
Expand Down Expand Up @@ -189,6 +190,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
XVFMINA VM0, VM0, VM1
XVCMPEQ VT0, VM0, VM1
xvbitsel.v VI0, VINC8, VINC4, VT0
// $f9: x1
fcmp.ceq.d $fcc0, $f15, $f9
bceqz $fcc0, .L26
XVCMPLT VT0, VI1, VI0
Expand Down Expand Up @@ -357,7 +359,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
xvinsgr2vr.w VX0, t2, 5
xvinsgr2vr.w VX0, t3, 6
xvinsgr2vr.w VX0, t4, 7
xvadd.w VI2, VI1, VINC8
xvadd.w VI1, VI1, VINC8
xvor.v VI2, VI1, VI1
xvfmina.s VM1, VX0, VM0
xvfcmp.ceq.s VT0, VM1, VM0
#endif
Expand Down Expand Up @@ -393,7 +396,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
movfr2gr.d i0, $f20
.align 3
#else
fmov.s $f16, $f20
fmov.s $f7, $f20
.align 3

.L252:
Expand Down Expand Up @@ -449,9 +452,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.L292:
xvfmina.s VM0, VX0, VM0
xvfcmp.ceq.s VT0, VM0, VX0
xvbitsel.v VI0, VI0, VI1, VT0
xvbitsel.v VI0, VI0, $xr7, VT0
movfr2gr.s i0, $f20

#endif

.L21: // N<8
Expand Down
Loading

0 comments on commit 79cb121

Please sign in to comment.