Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for SVE/SVE2 on ARM #26

Merged
merged 5 commits into from
Jun 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ module github.com/minio/highwayhash

go 1.15

require golang.org/x/sys v0.1.0
require golang.org/x/sys v0.21.0
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U=
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws=
golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
132 changes: 132 additions & 0 deletions highwayhashSVE_arm64.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
//
// Copyright (c) 2024 Minio Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//

//+build !noasm,!appengine

#include "textflag.h"

TEXT ·getVectorLength(SB), NOSPLIT, $0
WORD $0xd2800002 // mov x2, #0
WORD $0x04225022 // addvl x2, x2, #1
WORD $0xd37df042 // lsl x2, x2, #3
WORD $0xd2800003 // mov x3, #0
WORD $0x04635023 // addpl x3, x3, #1
WORD $0xd37df063 // lsl x3, x3, #3
MOVD R2, vl+0(FP)
MOVD R3, pl+8(FP)
RET

TEXT ·updateArm64Sve(SB), NOSPLIT, $0
MOVD state+0(FP), R0
MOVD msg_base+8(FP), R1
MOVD msg_len+16(FP), R2 // length of message
SUBS $32, R2
BMI completeSve

WORD $0x2518e3e1 // ptrue p1.b
WORD $0xa5e0a401 // ld1d z1.d, p1/z, [x0]
WORD $0xa5e1a402 // ld1d z2.d, p1/z, [x0, #1, MUL VL]
WORD $0xa5e2a403 // ld1d z3.d, p1/z, [x0, #2, MUL VL]
WORD $0xa5e3a404 // ld1d z4.d, p1/z, [x0, #3, MUL VL]

// Load zipper merge constants table pointer
MOVD $·zipperMergeSve(SB), R3
WORD $0xa5e0a465 // ld1d z5.d, p1/z, [x3]
WORD $0x25b8c006 // mov z6.s, #0
WORD $0x25d8e3e2 // ptrue p2.d /* set every other lane for "s" type */

loopSve:
WORD $0xa5e0a420 // ld1d z0.d, p1/z, [x1]
ADD $32, R1

WORD $0x04e00042 // add z2.d, z2.d, z0.d
WORD $0x04e30042 // add z2.d, z2.d, z3.d
WORD $0x04e09420 // lsr z0.d, z1.d, #32
WORD $0x05a6c847 // sel z7.s, p2, z2.s, z6.s
WORD $0x04d004e0 // mul z0.d, p1/m, z0.d, z7.d
WORD $0x04a33003 // eor z3.d, z0.d, z3.d
WORD $0x04e10081 // add z1.d, z4.d, z1.d
WORD $0x04e09440 // lsr z0.d, z2.d, #32
WORD $0x05a6c827 // sel z7.s, p2, z1.s, z6.s
WORD $0x04d004e0 // mul z0.d, p1/m, z0.d, z7.d
WORD $0x04a43004 // eor z4.d, z0.d, z4.d
WORD $0x05253040 // tbl z0.b, z2.b, z5.b
WORD $0x04e00021 // add z1.d, z1.d, z0.d
WORD $0x05253020 // tbl z0.b, z1.b, z5.b
WORD $0x04e00042 // add z2.d, z2.d, z0.d

SUBS $32, R2
BPL loopSve

WORD $0xe5e0e401 // st1d z1.d, p1, [x0]
WORD $0xe5e1e402 // st1d z2.d, p1, [x0, #1, MUL VL]
WORD $0xe5e2e403 // st1d z3.d, p1, [x0, #2, MUL VL]
WORD $0xe5e3e404 // st1d z4.d, p1, [x0, #3, MUL VL]

completeSve:
RET

TEXT ·updateArm64Sve2(SB), NOSPLIT, $0
MOVD state+0(FP), R0
MOVD msg_base+8(FP), R1
MOVD msg_len+16(FP), R2 // length of message
SUBS $32, R2
BMI completeSve2

WORD $0x2518e3e1 // ptrue p1.b
WORD $0xa5e0a401 // ld1d z1.d, p1/z, [x0]
WORD $0xa5e1a402 // ld1d z2.d, p1/z, [x0, #1, MUL VL]
WORD $0xa5e2a403 // ld1d z3.d, p1/z, [x0, #2, MUL VL]
WORD $0xa5e3a404 // ld1d z4.d, p1/z, [x0, #3, MUL VL]

// Load zipper merge constants table pointer
MOVD $·zipperMergeSve(SB), R3
WORD $0xa5e0a465 // ld1d z5.d, p1/z, [x3]

loopSve2:
WORD $0xa5e0a420 // ld1d z0.d, p1/z, [x1]
ADD $32, R1

WORD $0x04e00042 // add z2.d, z2.d, z0.d
WORD $0x04e30042 // add z2.d, z2.d, z3.d
WORD $0x04e09420 // lsr z0.d, z1.d, #32
WORD $0x45c27800 // umullb z0.d, z0.s, z2.s
WORD $0x04a33003 // eor z3.d, z0.d, z3.d
WORD $0x04e10081 // add z1.d, z4.d, z1.d
WORD $0x04e09440 // lsr z0.d, z2.d, #32
WORD $0x45c17800 // umullb z0.d, z0.s, z1.s
WORD $0x04a43004 // eor z4.d, z0.d, z4.d
WORD $0x05253040 // tbl z0.b, z2.b, z5.b
WORD $0x04e00021 // add z1.d, z1.d, z0.d
WORD $0x05253020 // tbl z0.b, z1.b, z5.b
WORD $0x04e00042 // add z2.d, z2.d, z0.d

SUBS $32, R2
BPL loopSve2

WORD $0xe5e0e401 // st1d z1.d, p1, [x0]
WORD $0xe5e1e402 // st1d z2.d, p1, [x0, #1, MUL VL]
WORD $0xe5e2e403 // st1d z3.d, p1, [x0, #2, MUL VL]
WORD $0xe5e3e404 // st1d z4.d, p1, [x0, #3, MUL VL]

completeSve2:
RET

DATA ·zipperMergeSve+0x00(SB)/8, $0x000f010e05020c03
DATA ·zipperMergeSve+0x08(SB)/8, $0x070806090d0a040b
DATA ·zipperMergeSve+0x10(SB)/8, $0x101f111e15121c13
DATA ·zipperMergeSve+0x18(SB)/8, $0x171816191d1a141b
GLOBL ·zipperMergeSve(SB), (NOPTR+RODATA), $32
2 changes: 2 additions & 0 deletions highwayhash_amd64.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ var (
useSSE4 = cpu.X86.HasSSE41
useAVX2 = cpu.X86.HasAVX2
useNEON = false
useSVE = false
useSVE2 = false
useVMX = false
)

Expand Down
39 changes: 36 additions & 3 deletions highwayhash_arm64.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2017 Minio Inc. All rights reserved.
// Copyright (c) 2017-2024 Minio Inc. All rights reserved.
// Use of this source code is governed by a license that can be
// found in the LICENSE file.

Expand All @@ -7,19 +7,48 @@

package highwayhash

import (
"golang.org/x/sys/cpu"
)

var (
useSSE4 = false
useAVX2 = false
useNEON = true
useNEON = cpu.ARM64.HasASIMD
useSVE = cpu.ARM64.HasSVE
useSVE2 = false // cpu.ARM64.HasSVE2 -- disable until tested on real hardware
useVMX = false
)

func init() {
if useSVE {
if vl, _ := getVectorLength(); vl != 256 {
//
// Since HighwahHash is designed for AVX2,
// SVE/SVE2 instructions only run correctly
// for vector length of 256
//
useSVE2 = false
useSVE = false
}
}
}

//go:noescape
func initializeArm64(state *[16]uint64, key []byte)

//go:noescape
func updateArm64(state *[16]uint64, msg []byte)

//go:noescape
func getVectorLength() (vl, pl uint64)

//go:noescape
func updateArm64Sve(state *[16]uint64, msg []byte)

//go:noescape
func updateArm64Sve2(state *[16]uint64, msg []byte)

//go:noescape
func finalizeArm64(out []byte, state *[16]uint64)

Expand All @@ -32,7 +61,11 @@ func initialize(state *[16]uint64, key []byte) {
}

func update(state *[16]uint64, msg []byte) {
if useNEON {
if useSVE2 {
updateArm64Sve2(state, msg)
} else if useSVE {
updateArm64Sve(state, msg)
} else if useNEON {
updateArm64(state, msg)
} else {
updateGeneric(state, msg)
Expand Down
2 changes: 2 additions & 0 deletions highwayhash_ppc64le.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ var (
useSSE4 = false
useAVX2 = false
useNEON = false
useSVE = false
useSVE2 = false
useVMX = true
)

Expand Down
2 changes: 2 additions & 0 deletions highwayhash_ref.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ var (
useSSE4 = false
useAVX2 = false
useNEON = false
useSVE = false
useSVE2 = false
useVMX = false
)

Expand Down
70 changes: 70 additions & 0 deletions highwayhash_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,67 @@ import (
"bytes"
"encoding/binary"
"encoding/hex"
"fmt"
"hash"
"math/rand"
"runtime"
"strings"
"sync/atomic"
"testing"
)

func TestUpdateFunc(t *testing.T) {

const want = `0xea006216335db212 - 0x97280efedaccd722
0x2a0ba13d83848de6 - 0x476fe817e850c8cd
0x5b8176dd6d555cf6 - 0x5740be93095fd2b5
0xda9d392c1fd04836 - 0x28398f8f4d922993
0x41a1794ac4aec11e - 0x41123afc4b6d0e35
0x761f6fbe30a051ce - 0x6a4afc2b722e4155
0x3f0887c550d61850 - 0x3368c4779ce6c2c8
0xd1f892c21e69b968 - 0x974e980803544d90`

var state [16]uint64
state[0] = 0x0102030405060708
state[1] = 0x090a0b0c0d0e0f10
state[2] = 0x1112131415161718
state[3] = 0x191a1b1c1d1e1f20
state[4] = 0x2122232425262728
state[5] = 0x292a2b2c2d2e2f30
state[6] = 0x3132333435363738
state[7] = 0x393a3b3c3d3e3f40
state[8] = 0x4142434445464748
state[9] = 0x494a4b4c4d4e4f50
state[10] = 0x5152535455565758
state[11] = 0x595a5b5c5d5e5f60
state[12] = 0x6162636465666768
state[13] = 0x696a6b6c6d6e6f70
state[14] = 0x7172737475767778
state[15] = 0x797a7b7c7d7e7f80

msg := make([]byte, 64)
for i := range msg {
msg[i] = 0x33 + byte(i)
}

updateGeneric(&state, msg)

buf := bytes.Buffer{}
buf.WriteString(fmt.Sprintf("0x%016x - 0x%016x\n", state[0], state[1]))
buf.WriteString(fmt.Sprintf("0x%016x - 0x%016x\n", state[2], state[3]))
buf.WriteString(fmt.Sprintf("0x%016x - 0x%016x\n", state[4], state[5]))
buf.WriteString(fmt.Sprintf("0x%016x - 0x%016x\n", state[6], state[7]))
buf.WriteString(fmt.Sprintf("0x%016x - 0x%016x\n", state[8], state[9]))
buf.WriteString(fmt.Sprintf("0x%016x - 0x%016x\n", state[10], state[11]))
buf.WriteString(fmt.Sprintf("0x%016x - 0x%016x\n", state[12], state[13]))
buf.WriteString(fmt.Sprintf("0x%016x - 0x%016x\n", state[14], state[15]))

got := strings.TrimSpace(buf.String())
if got != want {
t.Errorf("TestUpdateFunc:\ngot: \n%v\nwant: \n%v", got, want)
}
}

func TestVectors(t *testing.T) {
defer func(sse4, avx2, neon, vmx bool) {
useSSE4, useAVX2, useNEON, useVMX = sse4, avx2, neon, vmx
Expand All @@ -36,6 +90,22 @@ func TestVectors(t *testing.T) {
useSSE4 = false
})
}
if useSVE2 {
t.Run("SVE2 version", func(t *testing.T) {
testVectors(func(key []byte) (hash.Hash, error) { return New64(key) }, testVectors64, t)
testVectors(New128, testVectors128, t)
testVectors(New, testVectors256, t)
useSVE2 = false
})
}
if useSVE {
t.Run("SVE version", func(t *testing.T) {
testVectors(func(key []byte) (hash.Hash, error) { return New64(key) }, testVectors64, t)
testVectors(New128, testVectors128, t)
testVectors(New, testVectors256, t)
useSVE = false
})
}
if useNEON {
t.Run("NEON version", func(t *testing.T) {
testVectors(func(key []byte) (hash.Hash, error) { return New64(key) }, testVectors64, t)
Expand Down
Loading