From 7d755912b9ae0462a92facb6f817b8803975882b Mon Sep 17 00:00:00 2001 From: gxw Date: Thu, 14 Mar 2024 20:32:02 +0800 Subject: [PATCH 1/5] loongarch: Fixed {s/d/c/z}axpby LASX opt --- kernel/loongarch64/axpby_lasx.S | 10 +++++++++- kernel/loongarch64/caxpby_lasx.S | 10 ++++++++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/kernel/loongarch64/axpby_lasx.S b/kernel/loongarch64/axpby_lasx.S index 7a246ca5c8..b5cf77dc46 100644 --- a/kernel/loongarch64/axpby_lasx.S +++ b/kernel/loongarch64/axpby_lasx.S @@ -139,9 +139,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvst VX1, Y, 4 * SIZE #else xvfmul.s VX0, VX0, VXA - addi.d I, I, -1 xvst VX0, Y, 0 * SIZE #endif + addi.d I, I, -1 addi.d X, X, 8 * SIZE addi.d Y, Y, 8 * SIZE blt $r0, I, .L112 @@ -288,6 +288,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi.d X, X, 8 * SIZE addi.d I, I, -1 blt $r0, I, .L121 + move Y, YY b .L997 .align 3 @@ -334,6 +335,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add.d YY, YY, INCY addi.d X, X, 8 * SIZE blt $r0, I, .L122 + move Y, YY b .L997 .align 3 @@ -425,6 +427,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add.d YY, YY, INCY addi.d I, I, -1 blt $r0, I, .L123 + move Y, YY b .L997 .align 3 @@ -465,6 +468,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add.d YY, YY, INCY addi.d I, I, -1 blt $r0, I, .L124 + move Y, YY b .L997 .align 3 @@ -803,6 +807,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif add.d YY, YY, INCY blt $r0, I, .L221 + move Y, YY b .L997 .align 3 @@ -895,6 +900,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif add.d YY, YY, INCY blt $r0, I, .L222 + move Y, YY b .L997 .align 3 @@ -987,6 +993,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif add.d YY, YY, INCY blt $r0, I, .L223 + move Y, YY b .L997 .align 3 @@ -1027,6 +1034,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add.d YY, YY, INCY addi.d I, I, -1 blt $r0, I, .L224 + move Y, YY b .L997 .align 3 diff --git a/kernel/loongarch64/caxpby_lasx.S b/kernel/loongarch64/caxpby_lasx.S index c5802092ed..5f34f13804 100644 --- a/kernel/loongarch64/caxpby_lasx.S +++ b/kernel/loongarch64/caxpby_lasx.S @@ -176,13 +176,13 @@ xvilvh.d VX3, x4, x3 xvst VX2, Y, 0 * SIZE xvst VX3, Y, 4 * SIZE - addi.d X, Y, 8 * SIZE + addi.d Y, Y, 8 * SIZE #else xvilvl.w VX2, x4 ,x3 xvilvh.w VX3, x4, x3 xvst VX2, Y, 0 * SIZE xvst VX3, Y, 8 * SIZE - addi.d X, Y, 16 * SIZE + addi.d Y, Y, 16 * SIZE #endif addi.d I, I, -1 blt $r0, I, .L113 @@ -617,6 +617,7 @@ xvstelm.d x4, YY, 1 * SIZE, 3 add.d YY, YY, INCY blt $r0, I, .L222 + move Y, YY b .L997 .align 3 #else @@ -691,6 +692,7 @@ xvstelm.w x4, YY, 1 * SIZE, 7 add.d YY, YY, INCY blt $r0, I, .L222 + move Y, YY b .L997 .align 3 #endif @@ -1011,7 +1013,11 @@ #endif .L997: +#ifdef DOUBLE + andi I, N, 3 +#else andi I, N, 7 +#endif bge $r0, I, .L999 .align 3 From 6159cffc58d233ca66651dc58af1e97fcf6ad5e8 Mon Sep 17 00:00:00 2001 From: gxw Date: Thu, 14 Mar 2024 20:32:39 +0800 Subject: [PATCH 2/5] loongarch: Fixed i{s/c/z}amin LASX opt --- kernel/loongarch64/iamin_lasx.S | 12 ++- kernel/loongarch64/icamin_lasx.S | 168 ++++++++++++++++++++++++------- 2 files changed, 141 insertions(+), 39 deletions(-) diff --git a/kernel/loongarch64/iamin_lasx.S b/kernel/loongarch64/iamin_lasx.S index 6ea117907f..eeba4f239d 100644 --- a/kernel/loongarch64/iamin_lasx.S +++ b/kernel/loongarch64/iamin_lasx.S @@ -144,7 +144,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvfmina.d VM1, VM0, VM1 #else addi.d I, I, -1 - xvadd.w VI2, VI1, VINC8 + xvadd.w VI1, VI1, VINC8 + xvor.v VI2, VI1, VI1 xvfmina.s VM1, VX0, VM0 #endif XVCMPEQ VT0, VM0, VM1 @@ -189,6 +190,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. XVFMINA VM0, VM0, VM1 XVCMPEQ VT0, VM0, VM1 xvbitsel.v VI0, VINC8, VINC4, VT0 + // $f9: x1 fcmp.ceq.d $fcc0, $f15, $f9 bceqz $fcc0, .L26 XVCMPLT VT0, VI1, VI0 @@ -357,7 +359,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvinsgr2vr.w VX0, t2, 5 xvinsgr2vr.w VX0, t3, 6 xvinsgr2vr.w VX0, t4, 7 - xvadd.w VI2, VI1, VINC8 + xvadd.w VI1, VI1, VINC8 + xvor.v VI2, VI1, VI1 xvfmina.s VM1, VX0, VM0 xvfcmp.ceq.s VT0, VM1, VM0 #endif @@ -393,7 +396,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. movfr2gr.d i0, $f20 .align 3 #else - fmov.s $f16, $f20 + fmov.s $f7, $f20 .align 3 .L252: @@ -449,9 +452,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .L292: xvfmina.s VM0, VX0, VM0 xvfcmp.ceq.s VT0, VM0, VX0 - xvbitsel.v VI0, VI0, VI1, VT0 + xvbitsel.v VI0, VI0, $xr7, VT0 movfr2gr.s i0, $f20 - #endif .L21: // N<8 diff --git a/kernel/loongarch64/icamin_lasx.S b/kernel/loongarch64/icamin_lasx.S index 01abd45b2f..d815c3f60f 100644 --- a/kernel/loongarch64/icamin_lasx.S +++ b/kernel/loongarch64/icamin_lasx.S @@ -72,12 +72,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. FABS a1, a1 ADD s1, a1, a0 #ifdef DOUBLE - xvreplve0.d VM0, VM0 xvxor.v VI3, VI3, VI3 // 0 li.d I, -1 xvreplgr2vr.d VI4, I xvffint.d.l VI4, VI4 // -1 bne INCX, TEMP, .L20 + // Init VM0 + xvreplve0.d VM0, VM0 + xvld VX0, X, 0 * SIZE + xvld VX1, X, 4 * SIZE + xvpickev.d x1, VX1, VX0 + xvpickod.d x2, VX1, VX0 + xvfmul.d x3, VI4, x1 + xvfmul.d x4, VI4, x2 + xvfcmp.clt.d VT0, x1, VI3 + xvfcmp.clt.d VINC8, x2, VI3 + xvbitsel.v x1, x1, x3, VT0 + xvbitsel.v x2, x2, x4, VINC8 + xvfadd.d VM0, x1, x2 + addi.d i0, i0, 1 srai.d I, N, 2 bge $r0, I, .L21 @@ -100,12 +113,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. addi.d i0, i0, 2 xvinsgr2vr.d VI0, i0, 3 //4 #else - xvreplve0.w VM0, VM0 xvxor.v VI3, VI3, VI3 // 0 li.w I, -1 xvreplgr2vr.w VI4, I xvffint.s.w VI4, VI4 // -1 bne INCX, TEMP, .L20 + // Init VM0 + xvld VX0, X, 0 * SIZE + xvld VX1, X, 8 * SIZE + xvpickev.w x1, VX1, VX0 + xvpickod.w x2, VX1, VX0 + xvfmul.s x3, VI4, x1 + xvfmul.s x4, VI4, x2 + xvfcmp.clt.s VT0, x1, VI3 + xvfcmp.clt.s VINC4, x2, VI3 + xvbitsel.v x1, x1, x3, VT0 + xvbitsel.v x2, x2, x4, VINC4 + xvfadd.s VM0, x1, x2 + addi.w i0, i0, 1 srai.d I, N, 3 bge $r0, I, .L21 @@ -160,6 +185,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvfcmp.clt.d VINC8, x2, VI3 xvbitsel.v x1, x1, x3, VT0 xvbitsel.v x2, x2, x4, VINC8 + addi.d X, X, 8 * SIZE #else xvadd.w VI1, VI1, VINC8 xvld VX1, X, 8 * SIZE @@ -172,11 +198,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvfcmp.clt.s VINC4, x2, VI3 xvbitsel.v x1, x1, x3, VT0 xvbitsel.v x2, x2, x4, VINC4 + addi.d X, X, 16 * SIZE #endif XVFADD x1, x1, x2 XVFMIN x3, VM0, x1 XVCMPEQ VT0, x3, VM0 - addi.d X, X, 8 * SIZE xvbitsel.v VM0, x3, VM0, VT0 xvbitsel.v VI0, VI1, VI0, VT0 blt $r0, I, .L10 @@ -214,13 +240,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvpickve.w x2, VM0, 1 xvpickve.w x3, VM0, 2 xvpickve.w x4, VM0, 3 - xvfcmp.clt.s VT0, x1, x2 + xvfcmp.clt.s VT0, x2, x1 xvbitsel.v VM1, x1, x2, VT0 xvbitsel.v VINC4, VI1, VI2, VT0 - xvfcmp.clt.s VT0, x3, x4 + xvfcmp.clt.s VT0, x4, x3 xvbitsel.v VM0, x3, x4, VT0 xvbitsel.v VINC8, VI3, VI4, VT0 - xvfcmp.clt.s VT0, VM0, VM1 + xvfcmp.clt.s VT0, VM1, VM0 xvbitsel.v VM0, VM0, VM1, VT0 xvbitsel.v VI0, VINC8, VINC4, VT0 #endif @@ -233,6 +259,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .L20: // INCX!=1 #ifdef DOUBLE + // Init VM0 + ld.d t1, X, 0 * SIZE + ld.d t2, X, 1 * SIZE + add.d i1, X, INCX + ld.d t3, i1, 0 * SIZE + ld.d t4, i1, 1 * SIZE + add.d i1, i1, INCX + xvinsgr2vr.d x1, t1, 0 + xvinsgr2vr.d x2, t2, 0 + xvinsgr2vr.d x1, t3, 1 + xvinsgr2vr.d x2, t4, 1 + ld.d t1, i1, 0 * SIZE + ld.d t2, i1, 1 * SIZE + add.d i1, i1, INCX + ld.d t3, i1, 0 * SIZE + ld.d t4, i1, 1 * SIZE + xvinsgr2vr.d x1, t1, 2 + xvinsgr2vr.d x2, t2, 2 + xvinsgr2vr.d x1, t3, 3 + xvinsgr2vr.d x2, t4, 3 + xvfmul.d x3, VI4, x1 + xvfmul.d x4, VI4, x2 + xvfcmp.clt.d VT0, x1, VI3 + xvfcmp.clt.d VINC8, x2, VI3 + xvbitsel.v x1, x1, x3, VT0 + xvbitsel.v x2, x2, x4, VINC8 + xvfadd.d VM0, x1, x2 + addi.d i0, i0, 1 srai.d I, N, 2 bge $r0, I, .L21 @@ -240,21 +294,70 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvreplgr2vr.d VINC4, i0 addi.d i0, i0, -7 xvinsgr2vr.d VI1, i0, 0 //initialize the index value for vectorization - addi.d i0, i0, 2 + addi.d i0, i0, 1 xvinsgr2vr.d VI1, i0, 1 - addi.d i0, i0, -1 + addi.d i0, i0, 1 xvinsgr2vr.d VI1, i0, 2 - addi.d i0, i0, 2 + addi.d i0, i0, 1 xvinsgr2vr.d VI1, i0, 3 addi.d i0, i0, 1 xvinsgr2vr.d VI0, i0, 0 //1 - addi.d i0, i0, 2 - xvinsgr2vr.d VI0, i0, 1 //3 - addi.d i0, i0, -1 - xvinsgr2vr.d VI0, i0, 2 //2 - addi.d i0, i0, 2 + addi.d i0, i0, 1 + xvinsgr2vr.d VI0, i0, 1 //2 + addi.d i0, i0, 1 + xvinsgr2vr.d VI0, i0, 2 //3 + addi.d i0, i0, 1 xvinsgr2vr.d VI0, i0, 3 //4 #else + // Init VM0 + ld.w t1, X, 0 * SIZE + ld.w t2, X, 1 * SIZE + add.d i1, X, INCX + ld.w t3, i1, 0 * SIZE + ld.w t4, i1, 1 * SIZE + add.d i1, i1, INCX + xvinsgr2vr.w x1, t1, 0 + xvinsgr2vr.w x2, t2, 0 + xvinsgr2vr.w x1, t3, 1 + xvinsgr2vr.w x2, t4, 1 + ld.w t1, i1, 0 * SIZE + ld.w t2, i1, 1 * SIZE + add.d i1, i1, INCX + ld.w t3, i1, 0 * SIZE + ld.w t4, i1, 1 * SIZE + add.d i1, i1, INCX + xvinsgr2vr.w x1, t1, 2 + xvinsgr2vr.w x2, t2, 2 + xvinsgr2vr.w x1, t3, 3 + xvinsgr2vr.w x2, t4, 3 + ld.w t1, i1, 0 * SIZE + ld.w t2, i1, 1 * SIZE + add.d i1, i1, INCX + ld.w t3, i1, 0 * SIZE + ld.w t4, i1, 1 * SIZE + add.d i1, i1, INCX + xvinsgr2vr.w x1, t1, 4 + xvinsgr2vr.w x2, t2, 4 + xvinsgr2vr.w x1, t3, 5 + xvinsgr2vr.w x2, t4, 5 + ld.w t1, i1, 0 * SIZE + ld.w t2, i1, 1 * SIZE + add.d i1, i1, INCX + ld.w t3, i1, 0 * SIZE + ld.w t4, i1, 1 * SIZE + add.d i1, i1, INCX + xvinsgr2vr.w x1, t1, 6 + xvinsgr2vr.w x2, t2, 6 + xvinsgr2vr.w x1, t3, 7 + xvinsgr2vr.w x2, t4, 7 + xvfmul.s x3, VI4, x1 + xvfmul.s x4, VI4, x2 + xvfcmp.clt.s VT0, x1, VI3 + xvfcmp.clt.s VINC8, x2, VI3 + xvbitsel.v x1, x1, x3, VT0 + xvbitsel.v x2, x2, x4, VINC8 + xvfadd.s VM0, x1, x2 + addi.w i0, i0, 1 srai.d I, N, 3 bge $r0, I, .L21 @@ -264,15 +367,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvinsgr2vr.w VI1, i0, 0 //initialize the index value for vectorization addi.w i0, i0, 1 xvinsgr2vr.w VI1, i0, 1 - addi.w i0, i0, 3 + addi.w i0, i0, 1 xvinsgr2vr.w VI1, i0, 2 addi.w i0, i0, 1 xvinsgr2vr.w VI1, i0, 3 - addi.w i0, i0, -3 + addi.w i0, i0, 1 xvinsgr2vr.w VI1, i0, 4 addi.w i0, i0, 1 xvinsgr2vr.w VI1, i0, 5 - addi.w i0, i0, 3 + addi.w i0, i0, 1 xvinsgr2vr.w VI1, i0, 6 addi.w i0, i0, 1 xvinsgr2vr.w VI1, i0, 7 @@ -280,15 +383,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvinsgr2vr.w VI0, i0, 0 //1 addi.w i0, i0, 1 xvinsgr2vr.w VI0, i0, 1 //2 - addi.w i0, i0, 3 - xvinsgr2vr.w VI0, i0, 2 //5 addi.w i0, i0, 1 - xvinsgr2vr.w VI0, i0, 3 //6 - addi.w i0, i0, -3 - xvinsgr2vr.w VI0, i0, 4 //3 + xvinsgr2vr.w VI0, i0, 2 //3 + addi.w i0, i0, 1 + xvinsgr2vr.w VI0, i0, 3 //4 + addi.w i0, i0, 1 + xvinsgr2vr.w VI0, i0, 4 //5 + addi.w i0, i0, 1 + xvinsgr2vr.w VI0, i0, 5 //6 addi.w i0, i0, 1 - xvinsgr2vr.w VI0, i0, 5 //4 - addi.w i0, i0, 3 xvinsgr2vr.w VI0, i0, 6 //7 addi.w i0, i0, 1 xvinsgr2vr.w VI0, i0, 7 //8 @@ -350,7 +453,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvinsgr2vr.w x2, t2, 4 xvinsgr2vr.w x1, t3, 5 xvinsgr2vr.w x2, t4, 5 - xvadd.w VI1, VI1, VINC8 ld.w t1, X, 0 * SIZE ld.w t2, X, 1 * SIZE add.d X, X, INCX @@ -361,8 +463,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvinsgr2vr.w x2, t2, 6 xvinsgr2vr.w x1, t3, 7 xvinsgr2vr.w x2, t4, 7 - xvpickev.w x1, VX1, VX0 - xvpickod.w x2, VX1, VX0 #endif addi.d I, I, -1 XVFMUL x3, VI4, x1 @@ -410,13 +510,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvpickve.w x2, VM0, 1 xvpickve.w x3, VM0, 2 xvpickve.w x4, VM0, 3 - xvfcmp.clt.s VT0, x1, x2 + xvfcmp.clt.s VT0, x2, x1 xvbitsel.v VM1, x1, x2, VT0 xvbitsel.v VINC4, VI1, VI2, VT0 - xvfcmp.clt.s VT0, x3, x4 + xvfcmp.clt.s VT0, x4, x3 xvbitsel.v VM0, x3, x4, VT0 xvbitsel.v VINC8, VI3, VI4, VT0 - xvfcmp.clt.s VT0, VM0, VM1 + xvfcmp.clt.s VT0, VM1, VM0 xvbitsel.v VM0, VM0, VM1, VT0 #endif xvbitsel.v VI0, VINC8, VINC4, VT0 @@ -475,13 +575,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvpickve.w x2, VM0, 5 xvpickve.w x3, VM0, 6 xvpickve.w x4, VM0, 7 - xvfcmp.clt.s VT0, x1, x2 + xvfcmp.clt.s VT0, x2, x1 xvbitsel.v x1, x1, x2, VT0 xvbitsel.v VINC4, VI1, VI2, VT0 - xvfcmp.clt.s VT0, x3, x4 + xvfcmp.clt.s VT0, x4, x3 xvbitsel.v VM0, x3, x4, VT0 xvbitsel.v VINC8, VI3, VI4, VT0 - xvfcmp.clt.s VT0, VM0, x1 + xvfcmp.clt.s VT0, x1, VM0 xvbitsel.v VM0, VM0, x1, VT0 xvbitsel.v VI0, VINC8, VINC4, VT0 fcmp.ceq.d $fcc0, $f15, $f9 @@ -512,7 +612,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .align 3 .L292: - fcmp.clt.s $fcc0, $f15, $f13 + fcmp.clt.s $fcc0, $f13, $f15 fsel $f15, $f15, $f13, $fcc0 fsel $f20, $f20, $f16, $fcc0 movfr2gr.s i0, $f20 From 6534d378b7454eb19864b797142a1bb2af246349 Mon Sep 17 00:00:00 2001 From: gxw Date: Sat, 16 Mar 2024 09:36:05 +0800 Subject: [PATCH 3/5] loongarch: Fixed {s/d/c/z}sum LASX opt --- kernel/loongarch64/csum_lasx.S | 4 ++-- kernel/loongarch64/sum_lasx.S | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/loongarch64/csum_lasx.S b/kernel/loongarch64/csum_lasx.S index 3e65f2c152..1466899782 100644 --- a/kernel/loongarch64/csum_lasx.S +++ b/kernel/loongarch64/csum_lasx.S @@ -104,7 +104,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvfadd.s res1, VX0, res1 xvfadd.s res1, VX1, res1 xvfadd.s res1, VX2, res1 - xvfadd.s res1, VX2, res1 + xvfadd.s res1, VX3, res1 #endif .align 3 @@ -246,7 +246,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvfadd.s res1, VX0, res1 xvfadd.s res1, VX1, res1 xvfadd.s res1, VX2, res1 - xvfadd.s res1, VX2, res1 + xvfadd.s res1, VX3, res1 #endif .align 3 diff --git a/kernel/loongarch64/sum_lasx.S b/kernel/loongarch64/sum_lasx.S index fd6d5adb34..895b49b908 100644 --- a/kernel/loongarch64/sum_lasx.S +++ b/kernel/loongarch64/sum_lasx.S @@ -96,7 +96,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvfadd.s res1, VX0, res1 xvfadd.s res1, VX1, res1 xvfadd.s res1, VX2, res1 - xvfadd.s res1, VX2, res1 + xvfadd.s res1, VX3, res1 #endif .align 3 @@ -200,7 +200,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvfadd.s res1, VX0, res1 xvfadd.s res1, VX1, res1 xvfadd.s res1, VX2, res1 - xvfadd.s res1, VX2, res1 + xvfadd.s res1, VX3, res1 #endif .align 3 From a10dde555407d11b82063df1472a209898489e37 Mon Sep 17 00:00:00 2001 From: gxw Date: Sat, 16 Mar 2024 09:41:38 +0800 Subject: [PATCH 4/5] loongarch: Fixed {s/d/sc/dz}amin LASX opt --- kernel/loongarch64/amin_lasx.S | 6 +++--- kernel/loongarch64/camin_lasx.S | 25 ++++++++++++++++++++----- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/kernel/loongarch64/amin_lasx.S b/kernel/loongarch64/amin_lasx.S index c91a33006a..62b1097997 100644 --- a/kernel/loongarch64/amin_lasx.S +++ b/kernel/loongarch64/amin_lasx.S @@ -160,8 +160,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvinsgr2vr.d VX1, t2, 1 xvinsgr2vr.d VX1, t3, 2 xvinsgr2vr.d VX1, t4, 3 - xvfmaxa.d VM1, VX0, VX1 - xvfmaxa.d VM0, VM0, VM1 + xvfmina.d VM1, VX0, VX1 + xvfmina.d VM0, VM0, VM1 #else ld.w t1, X, 0 add.d X, X, INCX @@ -187,7 +187,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. xvinsgr2vr.w VM1, t2, 5 xvinsgr2vr.w VM1, t3, 6 xvinsgr2vr.w VM1, t4, 7 - xvfmaxa.s VM0, VM0, VM1 + xvfmina.s VM0, VM0, VM1 #endif addi.d I, I, -1 blt $r0, I, .L21 diff --git a/kernel/loongarch64/camin_lasx.S b/kernel/loongarch64/camin_lasx.S index c1c4c98c85..c5074f79d4 100644 --- a/kernel/loongarch64/camin_lasx.S +++ b/kernel/loongarch64/camin_lasx.S @@ -116,15 +116,27 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef DOUBLE xvpickve.d x1, VM0, 0 xvpickve.d x2, VM0, 1 - XVFMIN VM0, x1, x2 + XVFMIN VX0, x1, x2 + xvpickve.d x1, VM0, 2 + xvpickve.d x2, VM0, 3 + XVFMIN VX1, x1, x2 + XVFMIN VM0, VX0, VX1 #else xvpickve.w x1, VM0, 0 xvpickve.w x2, VM0, 1 xvpickve.w x3, VM0, 2 xvpickve.w x4, VM0, 3 + XVFMIN VX0, x1, x2 + XVFMIN VX1, x3, x4 + XVFMIN VX0, VX0, VX1 + xvpickve.w x1, VM0, 4 + xvpickve.w x2, VM0, 5 + xvpickve.w x3, VM0, 6 + xvpickve.w x4, VM0, 7 XVFMIN VM0, x1, x2 XVFMIN VM1, x3, x4 XVFMIN VM0, VM0, VM1 + XVFMIN VM0, VM0, VX0 #endif b .L23 .align 3 @@ -159,7 +171,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. FABS t4, t4 ADD t1, t1, t2 ADD t3, t3, t4 - FMIN s1, t1, t3 + FMIN s2, t1, t3 LD t1, X, 0 * SIZE LD t2, X, 1 * SIZE add.d X, X, INCX @@ -187,13 +199,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ADD t1, t1, t2 ADD t3, t3, t4 FMIN s4, t1, t3 + + FMIN s1, s1, s2 + FMIN s3, s3, s4 + FMIN a0, a0, s3 + FMIN a0, a0, s1 blt $r0, I, .L21 .align 3 .L22: - FMIN s1, s1, s2 - FMIN s3, s3, s4 - FMIN s1, s1, s3 + MOV s1, a0 .align 3 .L23: //N<8 From 60e251a1f864228081fb74d13de96024a4c2071a Mon Sep 17 00:00:00 2001 From: gxw Date: Sat, 16 Mar 2024 10:42:40 +0800 Subject: [PATCH 5/5] loongarch: Fixed {sc/dz}amax LASX opt --- kernel/loongarch64/camax_lasx.S | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/kernel/loongarch64/camax_lasx.S b/kernel/loongarch64/camax_lasx.S index f9a4e9012c..b646f7412c 100644 --- a/kernel/loongarch64/camax_lasx.S +++ b/kernel/loongarch64/camax_lasx.S @@ -107,15 +107,27 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef DOUBLE xvpickve.d x1, VM0, 0 xvpickve.d x2, VM0, 1 - XVFMAX VM0, x1, x2 + XVFMAX VX0, x1, x2 + xvpickve.d x1, VM0, 2 + xvpickve.d x2, VM0, 3 + XVFMAX VX1, x1, x2 + XVFMAX VM0, VX0, VX1 #else xvpickve.w x1, VM0, 0 xvpickve.w x2, VM0, 1 xvpickve.w x3, VM0, 2 xvpickve.w x4, VM0, 3 + XVFMAX VX0, x1, x2 + XVFMAX VX1, x3, x4 + XVFMAX VX0, VX0, VX1 + xvpickve.w x1, VM0, 4 + xvpickve.w x2, VM0, 5 + xvpickve.w x3, VM0, 6 + xvpickve.w x4, VM0, 7 XVFMAX VM0, x1, x2 XVFMAX VM1, x3, x4 XVFMAX VM0, VM0, VM1 + XVFMAX VM0, VM0, VX0 #endif b .L23 .align 3 @@ -150,7 +162,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. FABS t4, t4 ADD t1, t1, t2 ADD t3, t3, t4 - FMAX s1, t1, t3 + FMAX s2, t1, t3 LD t1, X, 0 * SIZE LD t2, X, 1 * SIZE add.d X, X, INCX @@ -178,13 +190,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ADD t1, t1, t2 ADD t3, t3, t4 FMAX s4, t1, t3 + + FMAX s1, s1, s2 + FMAX s3, s3, s4 + FMAX a0, a0, s3 + FMAX a0, a0, s1 blt $r0, I, .L21 .align 3 .L22: - FMAX s1, s1, s2 - FMAX s3, s3, s4 - FMAX s1, s1, s3 + MOV s1, a0 .align 3 .L23: //N<8