Skip to content

Commit

Permalink
blake2s: fix 386 assembly not to smash SP
Browse files Browse the repository at this point in the history
For golang/go#44269.

Change-Id: I92e168674612af390bcb80a0579df5c777c26970
Reviewed-on: https://go-review.googlesource.com/c/crypto/+/292052
Trust: Russ Cox <rsc@golang.org>
Trust: Jason A. Donenfeld <Jason@zx2c4.com>
Reviewed-by: Jason A. Donenfeld <Jason@zx2c4.com>
  • Loading branch information
rsc committed Feb 18, 2021
1 parent f5d19e4 commit 88c0e5a
Showing 1 changed file with 44 additions and 50 deletions.
94 changes: 44 additions & 50 deletions blake2s_386.s
Original file line number Diff line number Diff line change
Expand Up @@ -297,19 +297,17 @@ TEXT ·hashBlocksSSE2(SB), 0, $672-24 // frame = 656 + 16 byte alignment
MOVL blocks_base+12(FP), SI
MOVL blocks_len+16(FP), DX

MOVL SP, BP
MOVL SP, DI
ADDL $15, DI
ANDL $~15, DI
MOVL DI, SP

MOVL CX, 8(SP)
MOVL CX, 8(DI)
MOVL 0(BX), CX
MOVL CX, 0(SP)
MOVL CX, 0(DI)
MOVL 4(BX), CX
MOVL CX, 4(SP)
MOVL CX, 4(DI)
XORL CX, CX
MOVL CX, 12(SP)
MOVL CX, 12(DI)

MOVOU 0(AX), X0
MOVOU 16(AX), X1
Expand All @@ -321,22 +319,22 @@ loop:
MOVOU iv0<>(SB), X6
MOVOU iv1<>(SB), X7

MOVO 0(SP), X3
MOVO 0(DI), X3
PADDQ X2, X3
PXOR X3, X7
MOVO X3, 0(SP)

PRECOMPUTE(SP, 16, SI, CX)
ROUND_SSE2(X4, X5, X6, X7, 16(SP), 32(SP), 48(SP), 64(SP), X3)
ROUND_SSE2(X4, X5, X6, X7, 16+64(SP), 32+64(SP), 48+64(SP), 64+64(SP), X3)
ROUND_SSE2(X4, X5, X6, X7, 16+128(SP), 32+128(SP), 48+128(SP), 64+128(SP), X3)
ROUND_SSE2(X4, X5, X6, X7, 16+192(SP), 32+192(SP), 48+192(SP), 64+192(SP), X3)
ROUND_SSE2(X4, X5, X6, X7, 16+256(SP), 32+256(SP), 48+256(SP), 64+256(SP), X3)
ROUND_SSE2(X4, X5, X6, X7, 16+320(SP), 32+320(SP), 48+320(SP), 64+320(SP), X3)
ROUND_SSE2(X4, X5, X6, X7, 16+384(SP), 32+384(SP), 48+384(SP), 64+384(SP), X3)
ROUND_SSE2(X4, X5, X6, X7, 16+448(SP), 32+448(SP), 48+448(SP), 64+448(SP), X3)
ROUND_SSE2(X4, X5, X6, X7, 16+512(SP), 32+512(SP), 48+512(SP), 64+512(SP), X3)
ROUND_SSE2(X4, X5, X6, X7, 16+576(SP), 32+576(SP), 48+576(SP), 64+576(SP), X3)
MOVO X3, 0(DI)

PRECOMPUTE(DI, 16, SI, CX)
ROUND_SSE2(X4, X5, X6, X7, 16(DI), 32(DI), 48(DI), 64(DI), X3)
ROUND_SSE2(X4, X5, X6, X7, 16+64(DI), 32+64(DI), 48+64(DI), 64+64(DI), X3)
ROUND_SSE2(X4, X5, X6, X7, 16+128(DI), 32+128(DI), 48+128(DI), 64+128(DI), X3)
ROUND_SSE2(X4, X5, X6, X7, 16+192(DI), 32+192(DI), 48+192(DI), 64+192(DI), X3)
ROUND_SSE2(X4, X5, X6, X7, 16+256(DI), 32+256(DI), 48+256(DI), 64+256(DI), X3)
ROUND_SSE2(X4, X5, X6, X7, 16+320(DI), 32+320(DI), 48+320(DI), 64+320(DI), X3)
ROUND_SSE2(X4, X5, X6, X7, 16+384(DI), 32+384(DI), 48+384(DI), 64+384(DI), X3)
ROUND_SSE2(X4, X5, X6, X7, 16+448(DI), 32+448(DI), 48+448(DI), 64+448(DI), X3)
ROUND_SSE2(X4, X5, X6, X7, 16+512(DI), 32+512(DI), 48+512(DI), 64+512(DI), X3)
ROUND_SSE2(X4, X5, X6, X7, 16+576(DI), 32+576(DI), 48+576(DI), 64+576(DI), X3)

PXOR X4, X0
PXOR X5, X1
Expand All @@ -347,15 +345,14 @@ loop:
SUBL $64, DX
JNE loop

MOVL 0(SP), CX
MOVL 0(DI), CX
MOVL CX, 0(BX)
MOVL 4(SP), CX
MOVL 4(DI), CX
MOVL CX, 4(BX)

MOVOU X0, 0(AX)
MOVOU X1, 16(AX)

MOVL BP, SP
RET

// func hashBlocksSSSE3(h *[8]uint32, c *[2]uint32, flag uint32, blocks []byte)
Expand All @@ -366,54 +363,52 @@ TEXT ·hashBlocksSSSE3(SB), 0, $704-24 // frame = 688 + 16 byte alignment
MOVL blocks_base+12(FP), SI
MOVL blocks_len+16(FP), DX

MOVL SP, BP
MOVL SP, DI
ADDL $15, DI
ANDL $~15, DI
MOVL DI, SP

MOVL CX, 8(SP)
MOVL CX, 8(DI)
MOVL 0(BX), CX
MOVL CX, 0(SP)
MOVL CX, 0(DI)
MOVL 4(BX), CX
MOVL CX, 4(SP)
MOVL CX, 4(DI)
XORL CX, CX
MOVL CX, 12(SP)
MOVL CX, 12(DI)

MOVOU 0(AX), X0
MOVOU 16(AX), X1
MOVOU counter<>(SB), X2

loop:
MOVO X0, 656(SP)
MOVO X1, 672(SP)
MOVO X0, 656(DI)
MOVO X1, 672(DI)
MOVO X0, X4
MOVO X1, X5
MOVOU iv0<>(SB), X6
MOVOU iv1<>(SB), X7

MOVO 0(SP), X3
MOVO 0(DI), X3
PADDQ X2, X3
PXOR X3, X7
MOVO X3, 0(SP)
MOVO X3, 0(DI)

MOVOU rol16<>(SB), X0
MOVOU rol8<>(SB), X1

PRECOMPUTE(SP, 16, SI, CX)
ROUND_SSSE3(X4, X5, X6, X7, 16(SP), 32(SP), 48(SP), 64(SP), X3, X0, X1)
ROUND_SSSE3(X4, X5, X6, X7, 16+64(SP), 32+64(SP), 48+64(SP), 64+64(SP), X3, X0, X1)
ROUND_SSSE3(X4, X5, X6, X7, 16+128(SP), 32+128(SP), 48+128(SP), 64+128(SP), X3, X0, X1)
ROUND_SSSE3(X4, X5, X6, X7, 16+192(SP), 32+192(SP), 48+192(SP), 64+192(SP), X3, X0, X1)
ROUND_SSSE3(X4, X5, X6, X7, 16+256(SP), 32+256(SP), 48+256(SP), 64+256(SP), X3, X0, X1)
ROUND_SSSE3(X4, X5, X6, X7, 16+320(SP), 32+320(SP), 48+320(SP), 64+320(SP), X3, X0, X1)
ROUND_SSSE3(X4, X5, X6, X7, 16+384(SP), 32+384(SP), 48+384(SP), 64+384(SP), X3, X0, X1)
ROUND_SSSE3(X4, X5, X6, X7, 16+448(SP), 32+448(SP), 48+448(SP), 64+448(SP), X3, X0, X1)
ROUND_SSSE3(X4, X5, X6, X7, 16+512(SP), 32+512(SP), 48+512(SP), 64+512(SP), X3, X0, X1)
ROUND_SSSE3(X4, X5, X6, X7, 16+576(SP), 32+576(SP), 48+576(SP), 64+576(SP), X3, X0, X1)

MOVO 656(SP), X0
MOVO 672(SP), X1
PRECOMPUTE(DI, 16, SI, CX)
ROUND_SSSE3(X4, X5, X6, X7, 16(DI), 32(DI), 48(DI), 64(DI), X3, X0, X1)
ROUND_SSSE3(X4, X5, X6, X7, 16+64(DI), 32+64(DI), 48+64(DI), 64+64(DI), X3, X0, X1)
ROUND_SSSE3(X4, X5, X6, X7, 16+128(DI), 32+128(DI), 48+128(DI), 64+128(DI), X3, X0, X1)
ROUND_SSSE3(X4, X5, X6, X7, 16+192(DI), 32+192(DI), 48+192(DI), 64+192(DI), X3, X0, X1)
ROUND_SSSE3(X4, X5, X6, X7, 16+256(DI), 32+256(DI), 48+256(DI), 64+256(DI), X3, X0, X1)
ROUND_SSSE3(X4, X5, X6, X7, 16+320(DI), 32+320(DI), 48+320(DI), 64+320(DI), X3, X0, X1)
ROUND_SSSE3(X4, X5, X6, X7, 16+384(DI), 32+384(DI), 48+384(DI), 64+384(DI), X3, X0, X1)
ROUND_SSSE3(X4, X5, X6, X7, 16+448(DI), 32+448(DI), 48+448(DI), 64+448(DI), X3, X0, X1)
ROUND_SSSE3(X4, X5, X6, X7, 16+512(DI), 32+512(DI), 48+512(DI), 64+512(DI), X3, X0, X1)
ROUND_SSSE3(X4, X5, X6, X7, 16+576(DI), 32+576(DI), 48+576(DI), 64+576(DI), X3, X0, X1)

MOVO 656(DI), X0
MOVO 672(DI), X1
PXOR X4, X0
PXOR X5, X1
PXOR X6, X0
Expand All @@ -423,13 +418,12 @@ loop:
SUBL $64, DX
JNE loop

MOVL 0(SP), CX
MOVL 0(DI), CX
MOVL CX, 0(BX)
MOVL 4(SP), CX
MOVL 4(DI), CX
MOVL CX, 4(BX)

MOVOU X0, 0(AX)
MOVOU X1, 16(AX)

MOVL BP, SP
RET

0 comments on commit 88c0e5a

Please sign in to comment.