From 56001ec4f6e39e9afc3266b707e1b093a72d6fd8 Mon Sep 17 00:00:00 2001 From: Chong Yeol Nah Date: Tue, 16 May 2023 05:21:37 -0700 Subject: [PATCH] MSVC x64 support Requires Build Tools for Visual Studio for compilation https://visualstudio.microsoft.com/thank-you-downloading-visual-studio/?sku=BuildTools&rel=17 See #17 --- md5/build.rs | 5 +- md5/src/x64_masm.asm | 160 +++++++++++++++++++++++ sha1/build.rs | 5 +- sha1/src/x64_masm.asm | 231 ++++++++++++++++++++++++++++++++ sha2/build.rs | 5 +- sha2/src/sha256_x64_masm.asm | 247 +++++++++++++++++++++++++++++++++++ whirlpool/build.rs | 5 +- 7 files changed, 654 insertions(+), 4 deletions(-) create mode 100644 md5/src/x64_masm.asm create mode 100644 sha1/src/x64_masm.asm create mode 100644 sha2/src/sha256_x64_masm.asm diff --git a/md5/build.rs b/md5/build.rs index b6376d0..e80d976 100644 --- a/md5/build.rs +++ b/md5/build.rs @@ -1,10 +1,13 @@ fn main() { let target_arch = std::env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default(); + let target_family = std::env::var("CARGO_CFG_TARGET_FAMILY").unwrap_or_default(); let asm_path = if target_arch == "x86" { "src/x86.S" - } else if target_arch == "x86_64" { + } else if target_arch == "x86_64" && target_family == "unix" { "src/x64.S" + } else if target_arch == "x86_64" && target_family == "windows" { + "src/x64_masm.asm" } else { panic!("Unsupported target architecture"); }; diff --git a/md5/src/x64_masm.asm b/md5/src/x64_masm.asm new file mode 100644 index 0000000..9d41988 --- /dev/null +++ b/md5/src/x64_masm.asm @@ -0,0 +1,160 @@ +; +; MD5 hash in x64 MASM +; +; Copyright (c) 2023 Chong Yeol Nah (MIT License) +; +; Permission is hereby granted, free of charge, to any person obtaining a copy of +; this software and associated documentation files (the "Software"), to deal in +; the Software without restriction, including without limitation the rights to +; use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +; the Software, and to permit persons to whom the Software is furnished to do so, +; subject to the following conditions: +; - The above copyright notice and this permission notice shall be included in +; all copies or substantial portions of the Software. +; - The Software is provided "as is", without warranty of any kind, express or +; implied, including but not limited to the warranties of merchantability, +; fitness for a particular purpose and noninfringement. In no event shall the +; authors or copyright holders be liable for any claim, damages or other +; liability, whether in an action of contract, tort or otherwise, arising from, +; out of or in connection with the Software or the use or other dealings in the +; Software. +; +; +; Storage usage: +; Bytes Location Volatile Description +; 4 eax yes Temporary w-bit word used in the hash computation +; 8 rcx yes Base address of message block array argument (read-only) +; 8 rdx yes Base address of hash value array argument (read-only) +; 4 r8d yes MD5 working variable A +; 4 r9d yes MD5 working variable B +; 4 r10d yes MD5 working variable C +; 4 r11d yes MD5 working variable D + + option casemap:none + + .const +ROUND macro i, a, b, c, d, k, s, t + +if i LT 16 + + ; eax = F(b,c,d) = (b & c) | (!b & d) = d ^ (b & (c ^ d)) + mov eax, c + xor eax, d + and eax, b + xor eax, d + +elseif i LT 32 + + ; eax = G(b,c,d) = (b & d) | (c & !d) = c ^ (d & (b ^ c)) + mov eax, c + xor eax, b + and eax, d + xor eax, c + +elseif i LT 48 + + ; eax = H(b,c,d) = b ^ c ^ d + mov eax, c + xor eax, d + xor eax, b + +else + + ; eax = I(b,c,d) = c ^ (b | !d) + mov eax, d + not eax + or eax, b + xor eax, c + +endif + + lea a, [eax + a + t] + add a, [rcx + k*4] + rol a, s + add a, b + endm + + .code + ; void md5_compress(const uint8_t block[64], uint32_t state[4]) + public md5_compress +md5_compress proc + ; Initialize working variables with previous hash value + mov r8d, [rdx] ; a + mov r9d, [rdx + 4] ; b + mov r10d, [rdx + 8] ; c + mov r11d, [rdx + 12] ; d + + ; 64 rounds of hashing + ROUND 0, r8d, r9d, r10d, r11d, 0, 7, -28955B88h + ROUND 1, r11d, r8d, r9d, r10d, 1, 12, -173848AAh + ROUND 2, r10d, r11d, r8d, r9d, 2, 17, 242070DBh + ROUND 3, r9d, r10d, r11d, r8d, 3, 22, -3E423112h + ROUND 4, r8d, r9d, r10d, r11d, 4, 7, -0A83F051h + ROUND 5, r11d, r8d, r9d, r10d, 5, 12, 4787C62Ah + ROUND 6, r10d, r11d, r8d, r9d, 6, 17, -57CFB9EDh + ROUND 7, r9d, r10d, r11d, r8d, 7, 22, -02B96AFFh + ROUND 8, r8d, r9d, r10d, r11d, 8, 7, 698098D8h + ROUND 9, r11d, r8d, r9d, r10d, 9, 12, -74BB0851h + ROUND 10, r10d, r11d, r8d, r9d, 10, 17, -0000A44Fh + ROUND 11, r9d, r10d, r11d, r8d, 11, 22, -76A32842h + ROUND 12, r8d, r9d, r10d, r11d, 12, 7, 6B901122h + ROUND 13, r11d, r8d, r9d, r10d, 13, 12, -02678E6Dh + ROUND 14, r10d, r11d, r8d, r9d, 14, 17, -5986BC72h + ROUND 15, r9d, r10d, r11d, r8d, 15, 22, 49B40821h + ROUND 16, r8d, r9d, r10d, r11d, 1, 5, -09E1DA9Eh + ROUND 17, r11d, r8d, r9d, r10d, 6, 9, -3FBF4CC0h + ROUND 18, r10d, r11d, r8d, r9d, 11, 14, 265E5A51h + ROUND 19, r9d, r10d, r11d, r8d, 0, 20, -16493856h + ROUND 20, r8d, r9d, r10d, r11d, 5, 5, -29D0EFA3h + ROUND 21, r11d, r8d, r9d, r10d, 10, 9, 02441453h + ROUND 22, r10d, r11d, r8d, r9d, 15, 14, -275E197Fh + ROUND 23, r9d, r10d, r11d, r8d, 4, 20, -182C0438h + ROUND 24, r8d, r9d, r10d, r11d, 9, 5, 21E1CDE6h + ROUND 25, r11d, r8d, r9d, r10d, 14, 9, -3CC8F82Ah + ROUND 26, r10d, r11d, r8d, r9d, 3, 14, -0B2AF279h + ROUND 27, r9d, r10d, r11d, r8d, 8, 20, 455A14EDh + ROUND 28, r8d, r9d, r10d, r11d, 13, 5, -561C16FBh + ROUND 29, r11d, r8d, r9d, r10d, 2, 9, -03105C08h + ROUND 30, r10d, r11d, r8d, r9d, 7, 14, 676F02D9h + ROUND 31, r9d, r10d, r11d, r8d, 12, 20, -72D5B376h + ROUND 32, r8d, r9d, r10d, r11d, 5, 4, -0005C6BEh + ROUND 33, r11d, r8d, r9d, r10d, 8, 11, -788E097Fh + ROUND 34, r10d, r11d, r8d, r9d, 11, 16, 6D9D6122h + ROUND 35, r9d, r10d, r11d, r8d, 14, 23, -021AC7F4h + ROUND 36, r8d, r9d, r10d, r11d, 1, 4, -5B4115BCh + ROUND 37, r11d, r8d, r9d, r10d, 4, 11, 4BDECFA9h + ROUND 38, r10d, r11d, r8d, r9d, 7, 16, -0944B4A0h + ROUND 39, r9d, r10d, r11d, r8d, 10, 23, -41404390h + ROUND 40, r8d, r9d, r10d, r11d, 13, 4, 289B7EC6h + ROUND 41, r11d, r8d, r9d, r10d, 0, 11, -155ED806h + ROUND 42, r10d, r11d, r8d, r9d, 3, 16, -2B10CF7Bh + ROUND 43, r9d, r10d, r11d, r8d, 6, 23, 04881D05h + ROUND 44, r8d, r9d, r10d, r11d, 9, 4, -262B2FC7h + ROUND 45, r11d, r8d, r9d, r10d, 12, 11, -1924661Bh + ROUND 46, r10d, r11d, r8d, r9d, 15, 16, 1FA27CF8h + ROUND 47, r9d, r10d, r11d, r8d, 2, 23, -3B53A99Bh + ROUND 48, r8d, r9d, r10d, r11d, 0, 6, -0BD6DDBCh + ROUND 49, r11d, r8d, r9d, r10d, 7, 10, 432AFF97h + ROUND 50, r10d, r11d, r8d, r9d, 14, 15, -546BDC59h + ROUND 51, r9d, r10d, r11d, r8d, 5, 21, -036C5FC7h + ROUND 52, r8d, r9d, r10d, r11d, 12, 6, 655B59C3h + ROUND 53, r11d, r8d, r9d, r10d, 3, 10, -70F3336Eh + ROUND 54, r10d, r11d, r8d, r9d, 10, 15, -00100B83h + ROUND 55, r9d, r10d, r11d, r8d, 1, 21, -7A7BA22Fh + ROUND 56, r8d, r9d, r10d, r11d, 8, 6, 6FA87E4Fh + ROUND 57, r11d, r8d, r9d, r10d, 15, 10, -01D31920h + ROUND 58, r10d, r11d, r8d, r9d, 6, 15, -5CFEBCECh + ROUND 59, r9d, r10d, r11d, r8d, 13, 21, 4E0811A1h + ROUND 60, r8d, r9d, r10d, r11d, 4, 6, -08AC817Eh + ROUND 61, r11d, r8d, r9d, r10d, 11, 10, -42C50DCBh + ROUND 62, r10d, r11d, r8d, r9d, 2, 15, 2AD7D2BBh + ROUND 63, r9d, r10d, r11d, r8d, 9, 21, -14792C6Fh + + ; Compute intermediate hash value + add [rdx] , r8d + add [rdx + 4], r9d + add [rdx + 8], r10d + add [rdx + 12], r11d + ret +md5_compress endp + end diff --git a/sha1/build.rs b/sha1/build.rs index afed737..b97e0bd 100644 --- a/sha1/build.rs +++ b/sha1/build.rs @@ -1,11 +1,14 @@ fn main() { let target_arch = std::env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default(); let target_vendor = std::env::var("CARGO_CFG_TARGET_VENDOR").unwrap_or_default(); + let target_family = std::env::var("CARGO_CFG_TARGET_FAMILY").unwrap_or_default(); let asm_path = if target_arch == "x86" { "src/x86.S" - } else if target_arch == "x86_64" { + } else if target_arch == "x86_64" && target_family == "unix" { "src/x64.S" + } else if target_arch == "x86_64" && target_family == "windows" { + "src/x64_masm.asm" } else if target_arch == "aarch64" && target_vendor == "apple" { "src/aarch64_apple.S" } else if target_arch == "aarch64" { diff --git a/sha1/src/x64_masm.asm b/sha1/src/x64_masm.asm new file mode 100644 index 0000000..92032da --- /dev/null +++ b/sha1/src/x64_masm.asm @@ -0,0 +1,231 @@ +; +; SHA1 hash in x64 MASM +; +; Copyright (c) 2023 Chong Yeol Nah (MIT License) +; +; Permission is hereby granted, free of charge, to any person obtaining a copy of +; this software and associated documentation files (the "Software"), to deal in +; the Software without restriction, including without limitation the rights to +; use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +; the Software, and to permit persons to whom the Software is furnished to do so, +; subject to the following conditions: +; - The above copyright notice and this permission notice shall be included in +; all copies or substantial portions of the Software. +; - The Software is provided "as is", without warranty of any kind, express or +; implied, including but not limited to the warranties of merchantability, +; fitness for a particular purpose and noninfringement. In no event shall the +; authors or copyright holders be liable for any claim, damages or other +; liability, whether in an action of contract, tort or otherwise, arising from, +; out of or in connection with the Software or the use or other dealings in the +; Software. +; +; +; Storage usage: +; Bytes Location Volatile Description +; 4 eax yes Temporary w-bit word used in the hash computation +; 4 ebx no Temporary w-bit word used in the hash computation +; 8 rcx yes Base address of message block array argument (read-only) +; 8 rdx yes Base address of hash value array argument (read-only) +; 8 rsp no x64 stack pointer +; 4 r8d yes SHA1 working variable A +; 4 r9d yes SHA1 working variable B +; 4 r10d yes SHA1 working variable C +; 4 r11d yes SHA1 working variable D +; 4 r12d no SHA1 working variable E +; 64 [rsp+0] no Circular buffer of most recent 16 message schedule items, 4 bytes each + + option casemap:none + + .const +SCHED macro i + index textequ %i AND 0fh ; i mod 16 + exitm <[rsp + index*4]> + endm + +ROUNDTAIL macro a, b, e, k ; eax = f[i], e -> e + w[i] + ; (obj1) e -> a rol 5 + f[i] + e + w[i] + k[i] + ; (obj2) b -> b rol 30 + mov ebx, a + rol ebx, 5 + lea e, [ebx + e + k] ; e -> a rol 5 + e + w[i] + k[i] + add e, eax ; e -> a rol 5 + f[i] + e + w[i] + k[i] (obj1) + rol b, 30 ; b -> b rol 30 (obj2) + endm + +ROUND macro i, a, b, c, d, e + +if i LT 16 + + mov eax, [rcx + i*4] + bswap eax + +else + + mov eax, SCHED(i - 3) + xor eax, SCHED(i - 8) + xor eax, SCHED(i - 14) + xor eax, SCHED(i - 16) + rol eax, 1 + +endif + + mov SCHED(i), eax + add e, eax ; e -> e + w[i] + +if i LT 20 + + ; eax = f[i] = (b & c) ^ (~b & d) = d ^ b & (c ^ d) + ; & and ^ form the Z/2Z ring (& is *, ^ is +) + ; ~b is (1 + b) + ; bc + (1 + b)d = bc + d + bd = d + b(c + d) + mov eax, c + xor eax, d + and eax, b + xor eax, d + ROUNDTAIL a, b, e, 5A827999h + +elseif i GE 40 AND i LT 60 + + ; eax = f[i] = (b & c) ^ (b & d) ^ (c & d) = (b & (c | d)) | (c & d) + ; https://www.wolframalpha.com/input?i=simplify+%28b+%26%26+c%29+xor+%28b+%26%26+d%29+xor+%28c+%26%26+d%29 + mov eax, c + mov ebx, c + or eax, d + and eax, b + and ebx, d + or eax, ebx + ROUNDTAIL a, b, e, -70E44324h + +else + + ; eax = f[i] = b ^ c ^ d + mov eax, b + xor eax, c + xor eax, d + + if i LT 40 + + ROUNDTAIL a, b, e, 6ED9EBA1h + + else + + ROUNDTAIL a, b, e, -359D3E2Ah + + endif + +endif + + endm + + .code + ; void sha1_compress(const uint8_t block[64], uint32_t state[5]) + public sha1_compress +sha1_compress proc + ; Save nonvolatile registers, allocate scratch space + push rbx + push r12 + sub rsp, 64 + + ; Initialize working variables with previous hash value + mov r8d, [rdx] ; a + mov r9d, [rdx + 4] ; b + mov r10d, [rdx + 8] ; c + mov r11d, [rdx + 12] ; d + mov r12d, [rdx + 16] ; e + + ; 80 rounds of hashing + ROUND 0, r8d, r9d, r10d, r11d, r12d + ROUND 1, r12d, r8d, r9d, r10d, r11d + ROUND 2, r11d, r12d, r8d, r9d, r10d + ROUND 3, r10d, r11d, r12d, r8d, r9d + ROUND 4, r9d, r10d, r11d, r12d, r8d + ROUND 5, r8d, r9d, r10d, r11d, r12d + ROUND 6, r12d, r8d, r9d, r10d, r11d + ROUND 7, r11d, r12d, r8d, r9d, r10d + ROUND 8, r10d, r11d, r12d, r8d, r9d + ROUND 9, r9d, r10d, r11d, r12d, r8d + ROUND 10, r8d, r9d, r10d, r11d, r12d + ROUND 11, r12d, r8d, r9d, r10d, r11d + ROUND 12, r11d, r12d, r8d, r9d, r10d + ROUND 13, r10d, r11d, r12d, r8d, r9d + ROUND 14, r9d, r10d, r11d, r12d, r8d + ROUND 15, r8d, r9d, r10d, r11d, r12d + ROUND 16, r12d, r8d, r9d, r10d, r11d + ROUND 17, r11d, r12d, r8d, r9d, r10d + ROUND 18, r10d, r11d, r12d, r8d, r9d + ROUND 19, r9d, r10d, r11d, r12d, r8d + ROUND 20, r8d, r9d, r10d, r11d, r12d + ROUND 21, r12d, r8d, r9d, r10d, r11d + ROUND 22, r11d, r12d, r8d, r9d, r10d + ROUND 23, r10d, r11d, r12d, r8d, r9d + ROUND 24, r9d, r10d, r11d, r12d, r8d + ROUND 25, r8d, r9d, r10d, r11d, r12d + ROUND 26, r12d, r8d, r9d, r10d, r11d + ROUND 27, r11d, r12d, r8d, r9d, r10d + ROUND 28, r10d, r11d, r12d, r8d, r9d + ROUND 29, r9d, r10d, r11d, r12d, r8d + ROUND 30, r8d, r9d, r10d, r11d, r12d + ROUND 31, r12d, r8d, r9d, r10d, r11d + ROUND 32, r11d, r12d, r8d, r9d, r10d + ROUND 33, r10d, r11d, r12d, r8d, r9d + ROUND 34, r9d, r10d, r11d, r12d, r8d + ROUND 35, r8d, r9d, r10d, r11d, r12d + ROUND 36, r12d, r8d, r9d, r10d, r11d + ROUND 37, r11d, r12d, r8d, r9d, r10d + ROUND 38, r10d, r11d, r12d, r8d, r9d + ROUND 39, r9d, r10d, r11d, r12d, r8d + ROUND 40, r8d, r9d, r10d, r11d, r12d + ROUND 41, r12d, r8d, r9d, r10d, r11d + ROUND 42, r11d, r12d, r8d, r9d, r10d + ROUND 43, r10d, r11d, r12d, r8d, r9d + ROUND 44, r9d, r10d, r11d, r12d, r8d + ROUND 45, r8d, r9d, r10d, r11d, r12d + ROUND 46, r12d, r8d, r9d, r10d, r11d + ROUND 47, r11d, r12d, r8d, r9d, r10d + ROUND 48, r10d, r11d, r12d, r8d, r9d + ROUND 49, r9d, r10d, r11d, r12d, r8d + ROUND 50, r8d, r9d, r10d, r11d, r12d + ROUND 51, r12d, r8d, r9d, r10d, r11d + ROUND 52, r11d, r12d, r8d, r9d, r10d + ROUND 53, r10d, r11d, r12d, r8d, r9d + ROUND 54, r9d, r10d, r11d, r12d, r8d + ROUND 55, r8d, r9d, r10d, r11d, r12d + ROUND 56, r12d, r8d, r9d, r10d, r11d + ROUND 57, r11d, r12d, r8d, r9d, r10d + ROUND 58, r10d, r11d, r12d, r8d, r9d + ROUND 59, r9d, r10d, r11d, r12d, r8d + ROUND 60, r8d, r9d, r10d, r11d, r12d + ROUND 61, r12d, r8d, r9d, r10d, r11d + ROUND 62, r11d, r12d, r8d, r9d, r10d + ROUND 63, r10d, r11d, r12d, r8d, r9d + ROUND 64, r9d, r10d, r11d, r12d, r8d + ROUND 65, r8d, r9d, r10d, r11d, r12d + ROUND 66, r12d, r8d, r9d, r10d, r11d + ROUND 67, r11d, r12d, r8d, r9d, r10d + ROUND 68, r10d, r11d, r12d, r8d, r9d + ROUND 69, r9d, r10d, r11d, r12d, r8d + ROUND 70, r8d, r9d, r10d, r11d, r12d + ROUND 71, r12d, r8d, r9d, r10d, r11d + ROUND 72, r11d, r12d, r8d, r9d, r10d + ROUND 73, r10d, r11d, r12d, r8d, r9d + ROUND 74, r9d, r10d, r11d, r12d, r8d + ROUND 75, r8d, r9d, r10d, r11d, r12d + ROUND 76, r12d, r8d, r9d, r10d, r11d + ROUND 77, r11d, r12d, r8d, r9d, r10d + ROUND 78, r10d, r11d, r12d, r8d, r9d + ROUND 79, r9d, r10d, r11d, r12d, r8d + + ; Compute intermediate hash value + add [rdx] , r8d + add [rdx + 4], r9d + add [rdx + 8], r10d + add [rdx + 12], r11d + add [rdx + 16], r12d + + ; Restore nonvolatile registers + add rsp, 64 + pop r12 + pop rbx + ret +sha1_compress endp + end diff --git a/sha2/build.rs b/sha2/build.rs index 4fd331f..66689d3 100644 --- a/sha2/build.rs +++ b/sha2/build.rs @@ -3,12 +3,15 @@ fn main() { let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default(); let target_vendor = env::var("CARGO_CFG_TARGET_VENDOR").unwrap_or_default(); + let target_family = env::var("CARGO_CFG_TARGET_FAMILY").unwrap_or_default(); let mut build256 = cc::Build::new(); let (sha256_path, sha512_path) = if target_arch == "x86" { ("src/sha256_x86.S", "src/sha512_x86.S") - } else if target_arch == "x86_64" { + } else if target_arch == "x86_64" && target_family == "unix" { ("src/sha256_x64.S", "src/sha512_x64.S") + } else if target_arch == "x86_64" && target_family == "windows" { + ("src/sha256_x64_masm.asm", "src/sha512_x64_masm.asm") } else if target_arch == "aarch64" && target_vendor == "apple" { build256.flag("-march=armv8-a+crypto"); ("src/sha256_aarch64_apple.S", "") diff --git a/sha2/src/sha256_x64_masm.asm b/sha2/src/sha256_x64_masm.asm new file mode 100644 index 0000000..c886331 --- /dev/null +++ b/sha2/src/sha256_x64_masm.asm @@ -0,0 +1,247 @@ +; +; SHA256 hash in x64 MASM +; +; Copyright (c) 2023 Chong Yeol Nah (MIT License) +; +; Permission is hereby granted, free of charge, to any person obtaining a copy of +; this software and associated documentation files (the "Software"), to deal in +; the Software without restriction, including without limitation the rights to +; use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +; the Software, and to permit persons to whom the Software is furnished to do so, +; subject to the following conditions: +; - The above copyright notice and this permission notice shall be included in +; all copies or substantial portions of the Software. +; - The Software is provided "as is", without warranty of any kind, express or +; implied, including but not limited to the warranties of merchantability, +; fitness for a particular purpose and noninfringement. In no event shall the +; authors or copyright holders be liable for any claim, damages or other +; liability, whether in an action of contract, tort or otherwise, arising from, +; out of or in connection with the Software or the use or other dealings in the +; Software. +; +; +; Storage usage: +; Bytes Location Volatile Description +; 4 eax yes Temporary w-bit word used in the hash computation +; 4 ebx no Temporary w-bit word used in the hash computation +; 8 rcx yes Base address of message block array argument (read-only) +; 8 rdx yes Base address of hash value array argument (read-only) +; 4 edi no Temporary w-bit word used in the hash computation +; 4 esi no Temporary w-bit word used in the hash computation +; 8 rsp no x64 stack pointer +; 4 r8d yes SHA256 working variable A +; 4 r9d yes SHA256 working variable B +; 4 r10d yes SHA256 working variable C +; 4 r11d yes SHA256 working variable D +; 4 r12d no SHA256 working variable E +; 4 r13d no SHA256 working variable F +; 4 r14d no SHA256 working variable G +; 4 r15d no SHA256 working variable H +; 64 [rsp+0] no Circular buffer of most recent 16 message schedule items, 4 bytes each + + option casemap:none + + .const +SCHED macro i + index textequ %i AND 0fh ; i mod 16 + exitm <[rsp + index*4]> + endm + +ROUNDTAIL macro a, b, c, d, e, f, g, h, k ; ebx = w[i] + ; temp1 = h + S1 + ch + k[i] + w[i] + ; temp2 = S0 + maj + ; (obj1) h -> temp1 + temp2 = h + S1 + ch + k[i] + w[i] + S0 + maj + ; (obj2) d -> d + temp1 + ; Part 0 + mov eax, e + mov edi, e + mov esi, e + ror eax, 6 + ror edi, 11 + ror esi, 25 + xor edi, esi + xor eax, edi ; eax = S1 + ; ch = (e & f) ^ (~e & g) = (g ^ (e & (f ^ g))) + ; & and ^ form the Z/2Z ring (& is *, ^ is +) + ; ~e is (1 + e) + ; ef + (1 + e)g = ef + g + eg = g + ef + eg = g + e(f + g) + mov edi, g + xor edi, f + and edi, e + xor edi, g ; edi = ch + lea eax, [eax + edi + k] ; eax = S1 + ch + k[i] + add h, eax ; h -> h + S1 + ch + k[i] + add h, ebx ; h -> h + S1 + ch + k[i] + w[i] = temp1 + ; Part 1 + add d, h ; d -> d + temp1 (obj2) + ; Part 2 + mov eax, a + mov edi, a + mov esi, a + ror eax, 2 + ror edi, 13 + ror esi, 22 + xor edi, esi + xor eax, edi ; eax = S0 + add h, eax ; h -> temp1 + S0 + ; maj = (a and b) xor (a and c) xor (b and c) = (a and (b or c)) or (b and c) + ; https://www.wolframalpha.com/input?i=simplify+%28A+%26%26+B%29+xor+%28A+%26%26+C%29+xor+%28B+%26%26+C%29 + mov edi, c + mov eax, c + or eax, b + and edi, b + and eax, a + or eax, edi ; eax = maj + add h, eax ; h -> temp1 + S0 + maj = temp1 + temp2 (obj1) + endm + +ROUND macro i, a, b, c, d, e, f, g, h, k + +if i LT 16 + + mov ebx, [rcx + i*4] ; ebx = w[i] + bswap ebx + mov SCHED(i), ebx + +else + + ; (obj) w[i] -> w[i-16] + s0 + w[i-7] + s1 + mov ebx, SCHED(i - 16) ; ebx = w[i-16] + mov eax, SCHED(i - 15) + mov edi, eax + mov esi, eax + ror edi, 18 + shr esi, 3 + ror eax, 7 + xor edi, esi + xor eax, edi ; s0 = eax + add ebx, eax ; ebx = w[i-16] + s0 + add ebx, SCHED(i - 7) ; ebx = w[i-16] + s0 + w[i-7] + mov eax, SCHED(i - 2) + mov edi, eax + mov esi, eax + ror edi, 19 + shr esi, 10 + ror eax, 17 + xor edi, esi + xor eax, edi ; eax = s1 + add ebx, eax ; ebx = w[i-16] + s0 + w[i-7] + s1 + mov SCHED(i), ebx ; w[i] -> w[i-16] + s0 + w[i-7] + s1 (obj) + +endif + + ROUNDTAIL a, b, c, d, e, f, g, h, k ; ebx = w[i] + endm + + .code + ; void sha256_compress(const uint8_t block[64], uint32_t state[8]) + public sha256_compress +sha256_compress proc + ; Save nonvolatile registers, allocate scratch space + push rbx + push rdi + push rsi + push r12 + push r13 + push r14 + push r15 + sub rsp, 64 + + ; Initialize working variables with previous hash value + mov r8d, [rdx] ; a + mov r9d, [rdx + 4] ; b + mov r10d, [rdx + 8] ; c + mov r11d, [rdx + 12] ; d + mov r12d, [rdx + 16] ; e + mov r13d, [rdx + 20] ; f + mov r14d, [rdx + 24] ; g + mov r15d, [rdx + 28] ; h + + ; 64 rounds of hashing + ROUND 0, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, 428A2F98h + ROUND 1, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, 71374491h + ROUND 2, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, -4A3F0431h + ROUND 3, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, -164A245Bh + ROUND 4, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, 3956C25Bh + ROUND 5, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, 59F111F1h + ROUND 6, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , -6DC07D5Ch + ROUND 7, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , -54E3A12Bh + ROUND 8, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, -27F85568h + ROUND 9, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, 12835B01h + ROUND 10, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, 243185BEh + ROUND 11, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, 550C7DC3h + ROUND 12, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, 72BE5D74h + ROUND 13, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, -7F214E02h + ROUND 14, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , -6423F959h + ROUND 15, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , -3E640E8Ch + ROUND 16, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, -1B64963Fh + ROUND 17, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, -1041B87Ah + ROUND 18, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, 0FC19DC6h + ROUND 19, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, 240CA1CCh + ROUND 20, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, 2DE92C6Fh + ROUND 21, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, 4A7484AAh + ROUND 22, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , 5CB0A9DCh + ROUND 23, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , 76F988DAh + ROUND 24, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, -67C1AEAEh + ROUND 25, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, -57CE3993h + ROUND 26, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, -4FFCD838h + ROUND 27, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, -40A68039h + ROUND 28, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, -391FF40Dh + ROUND 29, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, -2A586EB9h + ROUND 30, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , 06CA6351h + ROUND 31, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , 14292967h + ROUND 32, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, 27B70A85h + ROUND 33, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, 2E1B2138h + ROUND 34, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, 4D2C6DFCh + ROUND 35, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, 53380D13h + ROUND 36, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, 650A7354h + ROUND 37, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, 766A0ABBh + ROUND 38, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , -7E3D36D2h + ROUND 39, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , -6D8DD37Bh + ROUND 40, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, -5D40175Fh + ROUND 41, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, -57E599B5h + ROUND 42, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, -3DB47490h + ROUND 43, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, -3893AE5Dh + ROUND 44, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, -2E6D17E7h + ROUND 45, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, -2966F9DCh + ROUND 46, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , -0BF1CA7Bh + ROUND 47, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , 106AA070h + ROUND 48, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, 19A4C116h + ROUND 49, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, 1E376C08h + ROUND 50, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, 2748774Ch + ROUND 51, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, 34B0BCB5h + ROUND 52, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, 391C0CB3h + ROUND 53, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, 4ED8AA4Ah + ROUND 54, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , 5B9CCA4Fh + ROUND 55, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , 682E6FF3h + ROUND 56, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, 748F82EEh + ROUND 57, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, 78A5636Fh + ROUND 58, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, -7B3787ECh + ROUND 59, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, -7338FDF8h + ROUND 60, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, -6F410006h + ROUND 61, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, -5BAF9315h + ROUND 62, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , -41065C09h + ROUND 63, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , -398E870Eh + + ; Compute intermediate hash value + add [rdx] , r8d + add [rdx + 4], r9d + add [rdx + 8], r10d + add [rdx + 12], r11d + add [rdx + 16], r12d + add [rdx + 20], r13d + add [rdx + 24], r14d + add [rdx + 28], r15d + + ; Restore nonvolatile registers + add rsp, 64 + pop r15 + pop r14 + pop r13 + pop r12 + pop rsi + pop rdi + pop rbx + ret +sha256_compress endp + end diff --git a/whirlpool/build.rs b/whirlpool/build.rs index 88d3542..1d5d80c 100644 --- a/whirlpool/build.rs +++ b/whirlpool/build.rs @@ -1,10 +1,13 @@ fn main() { let target_arch = std::env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default(); + let target_family = std::env::var("CARGO_CFG_TARGET_FAMILY").unwrap_or_default(); let asm_path = if target_arch == "x86" { "src/x86.S" - } else if target_arch == "x86_64" { + } else if target_arch == "x86_64" && target_family == "unix" { "src/x64.S" + } else if target_arch == "x86_64" && target_family == "windows" { + "src/x64_masm.asm" } else { panic!("Unsupported target architecture"); };