Merge pull request #487 from savi-lang/change/string-bytes-hash

Change hash algorithm for `String` and `Bytes`.
savi-lang · Aug 3, 2024 · f8f1944 · f8f1944
2 parents 618efe9 + 8f573b8
commit f8f1944
Show file tree

Hide file tree

Showing 10 changed files with 332 additions and 69 deletions.
diff --git a/core/Bytes.savi b/core/Bytes.savi
@@ -9,7 +9,7 @@
   :new ref from_cpointer(@_ptr, @_size, @_space)
   :new iso iso_from_cpointer(@_ptr, @_size, @_space) // TODO: remove this and use recover instead?
   :new val val_from_cpointer(@_ptr, @_size, @_space) // TODO: remove this and use recover instead?
-  :fun hash: @_ptr._hash(@_size)
+  :fun hash: _Unsafe.RapidHash._run(@_ptr, @_size)
   :fun size: @_size
   :fun space: @_space
   :fun cpointer(offset = 0) CPointer(U8)'tag: @_ptr._offset(offset)
@@ -653,98 +653,65 @@
   :: Raises an error if there aren't enough bytes at that offset to fill a U16.
   :fun read_native_u16!(offset USize) U16
     if (offset + U16.byte_width.usize) > @_size error!
-    result U16 = 0
-    result_ptr = _FFI.Cast(CPointer(U16), CPointer(U8)'ref)
-      .pointer(stack_address_of_variable result)
-    // (we use memcpy here because it gracefully handles unaligned addresses)
-    @_ptr._offset(offset)._copy_to(result_ptr, U16.byte_width.usize)
-    result
+    _Unsafe.MultiByteAccess._read_native_u16(@_ptr._offset(offset))
 
   :: Write a U16 as bytes starting at the given offset, in native byte order.
   :: Raises an error if there aren't enough bytes at that offset to fit a U16.
   :: Use push_native_u16 instead if writing past the end is needed.
   :fun ref write_native_u16!(offset USize, value U16)
     if (offset + U16.byte_width.usize) > @_size error!
-    // (we use memcpy here because it gracefully handles unaligned addresses)
-    _FFI.Cast(CPointer(U16), CPointer(U8)'box)
-      .pointer(stack_address_of_variable value)
-      ._copy_to(@_ptr._offset(offset), U16.byte_width.usize)
+    _Unsafe.MultiByteAccess._write_native_u16(@_ptr._offset(offset), value)
     @
 
   :: Add a U16 as bytes onto the end of the buffer, in native byte order.
   :: Use write_native_u16 instead if overwriting existing data is needed.
   :fun ref push_native_u16(value U16)
     @reserve(@_size + U16.byte_width.usize)
-    // (we use memcpy here because it gracefully handles unaligned addresses)
-    _FFI.Cast(CPointer(U16), CPointer(U8)'box)
-      .pointer(stack_address_of_variable value)
-      ._copy_to(@_ptr._offset(@_size), U16.byte_width.usize)
+    _Unsafe.MultiByteAccess._write_native_u16(@_ptr._offset(@_size), value)
     @_size += U16.byte_width.usize
     @
 
   :: Read a U32 from the bytes at the given offset, with native byte order.
   :: Raises an error if there aren't enough bytes at that offset to fill a U32.
   :fun read_native_u32!(offset USize) U32
     if (offset + U32.byte_width.usize) > @_size error!
-    result U32 = 0
-    result_ptr = _FFI.Cast(CPointer(U32), CPointer(U8)'ref)
-      .pointer(stack_address_of_variable result)
-    // (we use memcpy here because it gracefully handles unaligned addresses)
-    @_ptr._offset(offset)._copy_to(result_ptr, U32.byte_width.usize)
-    result
+    _Unsafe.MultiByteAccess._read_native_u32(@_ptr._offset(offset))
 
   :: Write a U32 as bytes starting at the given offset, in native byte order.
   :: Raises an error if there aren't enough bytes at that offset to fit a U32.
   :: Use push_native_u32 instead if writing past the end is needed.
   :fun ref write_native_u32!(offset USize, value U32)
     if (offset + U32.byte_width.usize) > @_size error!
-    // (we use memcpy here because it gracefully handles unaligned addresses)
-    _FFI.Cast(CPointer(U32), CPointer(U8)'box)
-      .pointer(stack_address_of_variable value)
-      ._copy_to(@_ptr._offset(offset), U32.byte_width.usize)
+    _Unsafe.MultiByteAccess._write_native_u32(@_ptr._offset(offset), value)
     @
 
   :: Add a U32 as bytes onto the end of the buffer, in native byte order.
   :: Use write_native_u32 instead if overwriting existing data is needed.
   :fun ref push_native_u32(value U32)
     @reserve(@_size + U32.byte_width.usize)
-    // (we use memcpy here because it gracefully handles unaligned addresses)
-    _FFI.Cast(CPointer(U32), CPointer(U8)'box)
-      .pointer(stack_address_of_variable value)
-      ._copy_to(@_ptr._offset(@_size), U32.byte_width.usize)
+    _Unsafe.MultiByteAccess._write_native_u32(@_ptr._offset(@_size), value)
     @_size += U32.byte_width.usize
     @
 
   :: Read a U64 from the bytes at the given offset, with native byte order.
   :: Raises an error if there aren't enough bytes at that offset to fill a U64.
   :fun read_native_u64!(offset USize) U64
     if (offset + U64.byte_width.usize) > @_size error!
-    result U64 = 0
-    result_ptr = _FFI.Cast(CPointer(U64), CPointer(U8)'ref)
-      .pointer(stack_address_of_variable result)
-    // (we use memcpy here because it gracefully handles unaligned addresses)
-    @_ptr._offset(offset)._copy_to(result_ptr, U64.byte_width.usize)
-    result
+    _Unsafe.MultiByteAccess._read_native_u64(@_ptr._offset(offset))
 
   :: Write a U64 as bytes starting at the given offset, in native byte order.
   :: Raises an error if there aren't enough bytes at that offset to fit a U64.
   :: Use push_native_u64 instead if writing past the end is needed.
   :fun ref write_native_u64!(offset USize, value U64)
     if (offset + U64.byte_width.usize) > @_size error!
-    // (we use memcpy here because it gracefully handles unaligned addresses)
-    _FFI.Cast(CPointer(U64), CPointer(U8)'box)
-      .pointer(stack_address_of_variable value)
-      ._copy_to(@_ptr._offset(offset), U64.byte_width.usize)
+    _Unsafe.MultiByteAccess._write_native_u64(@_ptr._offset(offset), value)
     @
 
   :: Add a U64 as bytes onto the end of the buffer, in native byte order.
   :: Use write_native_u64 instead if overwriting existing data is needed.
   :fun ref push_native_u64(value U64)
     @reserve(@_size + U64.byte_width.usize)
-    // (we use memcpy here because it gracefully handles unaligned addresses)
-    _FFI.Cast(CPointer(U64), CPointer(U8)'box)
-      .pointer(stack_address_of_variable value)
-      ._copy_to(@_ptr._offset(@_size), U64.byte_width.usize)
+    _Unsafe.MultiByteAccess._write_native_u64(@_ptr._offset(@_size), value)
     @_size += U64.byte_width.usize
     @
 

diff --git a/core/CPointer.savi b/core/CPointer.savi
@@ -121,19 +121,6 @@
   :: The caller is expected to only do this for in-bounds element counts.
   :fun box _compare(other @'box, count USize) I32: compiler intrinsic
 
-  :: Calculate the hash of the block of memory starting at this pointer's head,
-  :: continuing through the given number of elements in that memory.
-  ::
-  :: On the Pony runtime, this uses the `ponyint_hash_block` function.
-  ::
-  :: Only the direct memory referenced by the pointer is hashed, so similar
-  :: caveats to those documented for the `_compare` method also apply here.
-  :: As such, callers should avoid using this without statically knowing the
-  :: ramifications of what kind of representation the element has in its memory.
-  ::
-  :: The caller is expected to only do this for in-bounds element counts.
-  :fun box _hash(count USize) USize: compiler intrinsic
-
   :: Return True if this is a null pointer (i.e. a zero address).
   :fun tag is_null Bool: compiler intrinsic
 

diff --git a/core/String.savi b/core/String.savi
@@ -9,7 +9,7 @@
   :new iso iso_from_cpointer(@_ptr, @_size, @_space) // TODO: remove this and use recover instead?
   :new val val_from_cpointer(@_ptr, @_size, @_space) // TODO: remove this and use recover instead?
   :fun cpointer CPointer(U8): @_ptr
-  :fun hash: @_ptr._hash(@_size)
+  :fun hash: _Unsafe.RapidHash._run(@_ptr, @_size)
   :fun size: @_size
   :fun space: @_space
 

diff --git a/core/_Unsafe.MultiByteAccess.savi b/core/_Unsafe.MultiByteAccess.savi
@@ -0,0 +1,51 @@
+:module _Unsafe.MultiByteAccess
+  :fun _read_native_u16(ptr CPointer(U8)) U16
+    value U16 = 0
+    value_ptr = _FFI.Cast(CPointer(U16), CPointer(U8)'ref)
+      .pointer(stack_address_of_variable value)
+    // (we use memcpy here because it gracefully handles unaligned addresses)
+    _FFI.Cast(CPointer(U8), CPointer(U8)'box).pointer(ptr)
+      ._copy_to(value_ptr, U16.byte_width.usize)
+    value
+
+  :fun _read_native_u32(ptr CPointer(U8)) U32
+    value U32 = 0
+    value_ptr = _FFI.Cast(CPointer(U32), CPointer(U8)'ref)
+      .pointer(stack_address_of_variable value)
+    // (we use memcpy here because it gracefully handles unaligned addresses)
+    _FFI.Cast(CPointer(U8), CPointer(U8)'box).pointer(ptr)
+      ._copy_to(value_ptr, U32.byte_width.usize)
+    value
+
+  :fun _read_native_u64(ptr CPointer(U8)) U64
+    value U64 = 0
+    value_ptr = _FFI.Cast(CPointer(U64), CPointer(U8)'ref)
+      .pointer(stack_address_of_variable value)
+    // (we use memcpy here because it gracefully handles unaligned addresses)
+    _FFI.Cast(CPointer(U8), CPointer(U8)'box).pointer(ptr)
+      ._copy_to(value_ptr, U64.byte_width.usize)
+    value
+
+  :fun _write_native_u16(ptr CPointer(U8), value U16)
+    dest_ptr = _FFI.Cast(CPointer(U8), CPointer(U8)'ref).pointer(ptr)
+    // (we use memcpy here because it gracefully handles unaligned addresses)
+    _FFI.Cast(CPointer(U16), CPointer(U8)'box)
+      .pointer(stack_address_of_variable value)
+      ._copy_to(dest_ptr, U16.byte_width.usize)
+    value
+
+  :fun _write_native_u32(ptr CPointer(U8), value U32)
+    dest_ptr = _FFI.Cast(CPointer(U8), CPointer(U8)'ref).pointer(ptr)
+    // (we use memcpy here because it gracefully handles unaligned addresses)
+    _FFI.Cast(CPointer(U32), CPointer(U8)'box)
+      .pointer(stack_address_of_variable value)
+      ._copy_to(dest_ptr, U32.byte_width.usize)
+    value
+
+  :fun _write_native_u64(ptr CPointer(U8), value U64)
+    dest_ptr = _FFI.Cast(CPointer(U8), CPointer(U8)'ref).pointer(ptr)
+    // (we use memcpy here because it gracefully handles unaligned addresses)
+    _FFI.Cast(CPointer(U64), CPointer(U8)'box)
+      .pointer(stack_address_of_variable value)
+      ._copy_to(dest_ptr, U64.byte_width.usize)
+    value
diff --git a/core/_Unsafe.RapidHash.savi b/core/_Unsafe.RapidHash.savi
@@ -0,0 +1,153 @@
+:: An implementation of a leading hash function in terms of maximizing speed
+:: without sacrificing too much quality. Note that this is not a cryptographic
+:: hash function, and should not be used as such.
+::
+:: Note that rapidhash is an official successor to wyhash, which is/was a
+:: widely used hash function in many languages/platforms.
+::
+:: To follow the latest research on hash functions, see:
+:: https://github.com/rurban/smhasher
+:module _Unsafe.RapidHash
+  // Default seed.
+  :const _seed USize: U64[0xbdd89aa982704029].usize
+
+  // Default secret.
+  :const _secret_0 USize: U64[0x2d358dccaa6c78a5].usize
+  :const _secret_1 USize: U64[0x8bb84b93962eacc9].usize
+  :const _secret_2 USize: U64[0x4b33a62ed433d4a3].usize
+
+  :const _width USize: U64.byte_width.usize
+
+  :: Read a little-endian USize integer from the given byte pointer.
+  :fun _read_word(ptr CPointer(U8)) USize
+    if USize.byte_width == 8 (
+      _Unsafe.MultiByteAccess._read_native_u64(ptr).native_to_le.usize
+    |
+      _Unsafe.MultiByteAccess._read_native_u32(ptr).native_to_le.usize
+    )
+
+  :: Read a half-USize from the given byte pointer (but return as a USize)
+  :fun _read_half(ptr CPointer(U8)) USize
+    if USize.byte_width == 8 (
+      _Unsafe.MultiByteAccess._read_native_u32(ptr).native_to_le.usize
+    |
+      _Unsafe.MultiByteAccess._read_native_u16(ptr).native_to_le.usize
+    )
+
+  :: Read one, two, or three bytes (without branching) into a USize.
+  :: This is only safe if the pointer is known to point to at least one byte.
+  :: Branching is avoided by the potential of reading some bytes more than once.
+  :fun _read_small(ptr_tag CPointer(U8), count USize) USize
+    ptr = _FFI.Cast(CPointer(U8), CPointer(U8)'box).pointer(ptr_tag)
+    ptr._get_at(0).usize.bit_shl(USize.bit_width - 8)
+      .bit_or(ptr._get_at(count.bit_shr(1)).usize.bit_shl(USize.byte_width * 4))
+      .bit_or(ptr._get_at(count - 1).usize)
+
+  :fun _mix(a USize, b USize) USize
+    pair = a.wide_multiply(b)
+    pair.low.bit_xor(pair.high)
+
+  :fun _run(ptr CPointer(U8), count USize) USize
+    a USize = 0
+    b USize = 0
+    count_word = count.usize
+    twelve_width = @_width * 12
+    six_width = @_width * 6
+    two_width = @_width * 2
+    half_width = @_width / 2
+    seed = @_seed.bit_xor(
+      @_mix(
+        @_seed.bit_xor(@_secret_0)
+        @_secret_1
+      ).bit_xor(count_word)
+    )
+
+    if count <= two_width ( // TODO: "likely" annotation
+      case (
+      | count >= half_width | // TODO: "likely" annotation
+        ptr_last = ptr.offset(count - 4)
+        a = @_read_half(ptr)
+          .bit_shl(USize.bit_width / 2)
+          .bit_or(@_read_half(ptr_last))
+        delta = count.bit_and(@_width * 3)
+          .bit_shr(count.bit_shr((@_width * 3).trailing_zero_bits).u8)
+        b = @_read_half(ptr.offset(delta))
+          .bit_shl(USize.bit_width / 2)
+          .bit_or(@_read_half(ptr_last.offset(0.usize - delta)))
+      | count > 0 | // TODO: "likely" annotation
+        a = @_read_small(ptr, count)
+      )
+    |
+      i = count
+      if i > six_width  ( // TODO: "unlikely" annotation
+        see1 = seed
+        see2 = seed
+        while i >= twelve_width ( // TODO: "likely" annotation
+          seed = @_mix(
+            @_read_word(ptr).bit_xor(@_secret_0)
+            @_read_word(ptr.offset(@_width)).bit_xor(seed)
+          )
+          see1 = @_mix(
+            @_read_word(ptr.offset(@_width * 2)).bit_xor(@_secret_1)
+            @_read_word(ptr.offset(@_width * 3)).bit_xor(see1)
+          )
+          see2 = @_mix(
+            @_read_word(ptr.offset(@_width * 4)).bit_xor(@_secret_2)
+            @_read_word(ptr.offset(@_width * 5)).bit_xor(see2)
+          )
+          seed = @_mix(
+            @_read_word(ptr.offset(@_width * 6)).bit_xor(@_secret_0)
+            @_read_word(ptr.offset(@_width * 7)).bit_xor(seed)
+          )
+          see1 = @_mix(
+            @_read_word(ptr.offset(@_width * 8)).bit_xor(@_secret_1)
+            @_read_word(ptr.offset(@_width * 9)).bit_xor(see1)
+          )
+          see2 = @_mix(
+            @_read_word(ptr.offset(@_width * 10)).bit_xor(@_secret_2)
+            @_read_word(ptr.offset(@_width * 11)).bit_xor(see2)
+          )
+          ptr = ptr.offset(twelve_width), i -= twelve_width
+        )
+        if i >= six_width ( // TODO: "unlikely" annotation
+          seed = @_mix(
+            @_read_word(ptr).bit_xor(@_secret_0)
+            @_read_word(ptr.offset(@_width)).bit_xor(seed)
+          )
+          see1 = @_mix(
+            @_read_word(ptr.offset(@_width * 2)).bit_xor(@_secret_1)
+            @_read_word(ptr.offset(@_width * 3)).bit_xor(see1)
+          )
+          see2 = @_mix(
+            @_read_word(ptr.offset(@_width * 4)).bit_xor(@_secret_2)
+            @_read_word(ptr.offset(@_width * 5)).bit_xor(see2)
+          )
+          ptr = ptr.offset(six_width), i -= six_width
+        )
+        seed = seed.bit_xor(see1.bit_xor(see2))
+      )
+      if i > @_width * 2 (
+        seed = @_mix(
+          @_read_word(ptr).bit_xor(@_secret_2)
+          @_read_word(ptr.offset(@_width)).bit_xor(seed).bit_xor(@_secret_1)
+        )
+        if i > @_width * 4 (
+          seed = @_mix(
+            @_read_word(ptr.offset(@_width * 2)).bit_xor(@_secret_2)
+            @_read_word(ptr.offset(@_width * 3)).bit_xor(seed)
+          )
+        )
+      )
+      a = @_read_word(ptr.offset(i - @_width * 2))
+      b = @_read_word(ptr.offset(i - @_width))
+    )
+    a = a.bit_xor(@_secret_1)
+    b = b.bit_xor(seed)
+    pair = a.wide_multiply(b)
+    a = pair.low
+    b = pair.high
+
+    @_mix(
+      a.bit_xor(@_secret_0).bit_xor(count_word)
+      b.bit_xor(@_secret_1)
+    )