Skip to content

Commit

Permalink
Add low-level HashTable API
Browse files Browse the repository at this point in the history
The primary use case for this type over [`HashMap`] or [`HashSet`] is to
support types that do not implement the [`Hash`] and [`Eq`] traits, but
instead require additional data not contained in the key itself to compute a
hash and compare two elements for equality.

`HashTable` has some similarities with `RawTable`, but has a completely
safe API. It is intended as a replacement for the existing raw entry
API, with the intend of deprecating the latter and eventually removing
it.

Examples of when this can be useful include:
- An `IndexMap` implementation where indices into a `Vec` are stored as
  elements in a `HashTable<usize>`. Hashing and comparing the elements
  requires indexing the associated `Vec` to get the actual value referred to
  by the index.
- Avoiding re-computing a hash when it is already known.
- Mutating the key of an element in a way that doesn't affect its hash.

To achieve this, `HashTable` methods that search for an element in the table
require a hash value and equality function to be explicitly passed in as
arguments. The method will then iterate over the elements with the given
hash and call the equality function on each of them, until a match is found.
  • Loading branch information
Amanieu committed Aug 31, 2023
1 parent f9e4900 commit 763ccf4
Show file tree
Hide file tree
Showing 7 changed files with 2,096 additions and 35 deletions.
1 change: 1 addition & 0 deletions src/external_trait_impls/rayon/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ mod helpers;
pub(crate) mod map;
pub(crate) mod raw;
pub(crate) mod set;
pub(crate) mod table;
252 changes: 252 additions & 0 deletions src/external_trait_impls/rayon/table.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,252 @@
//! Rayon extensions for `HashTable`.

use super::raw::{RawIntoParIter, RawParDrain, RawParIter};
use crate::hash_table::HashTable;
use crate::raw::{Allocator, Global};
use core::fmt;
use core::marker::PhantomData;
use rayon::iter::plumbing::UnindexedConsumer;
use rayon::iter::{IntoParallelIterator, ParallelIterator};

/// Parallel iterator over shared references to entries in a map.
///
/// This iterator is created by the [`par_iter`] method on [`HashTable`]
/// (provided by the [`IntoParallelRefIterator`] trait).
/// See its documentation for more.
///
/// [`par_iter`]: /hashbrown/struct.HashTable.html#method.par_iter
/// [`HashTable`]: /hashbrown/struct.HashTable.html
/// [`IntoParallelRefIterator`]: https://docs.rs/rayon/1.0/rayon/iter/trait.IntoParallelRefIterator.html
pub struct ParIter<'a, T> {
inner: RawParIter<T>,
marker: PhantomData<&'a T>,
}

impl<'a, T: Sync> ParallelIterator for ParIter<'a, T> {
type Item = &'a T;

#[cfg_attr(feature = "inline-more", inline)]
fn drive_unindexed<C>(self, consumer: C) -> C::Result
where
C: UnindexedConsumer<Self::Item>,
{
self.inner
.map(|x| unsafe { x.as_ref() })
.drive_unindexed(consumer)
}
}

impl<T> Clone for ParIter<'_, T> {
#[cfg_attr(feature = "inline-more", inline)]
fn clone(&self) -> Self {
Self {
inner: self.inner.clone(),
marker: PhantomData,
}
}
}

impl<T: fmt::Debug> fmt::Debug for ParIter<'_, T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let iter = unsafe { self.inner.iter() }.map(|x| unsafe { x.as_ref() });
f.debug_list().entries(iter).finish()
}
}

/// Parallel iterator over mutable references to entries in a map.
///
/// This iterator is created by the [`par_iter_mut`] method on [`HashTable`]
/// (provided by the [`IntoParallelRefMutIterator`] trait).
/// See its documentation for more.
///
/// [`par_iter_mut`]: /hashbrown/struct.HashTable.html#method.par_iter_mut
/// [`HashTable`]: /hashbrown/struct.HashTable.html
/// [`IntoParallelRefMutIterator`]: https://docs.rs/rayon/1.0/rayon/iter/trait.IntoParallelRefMutIterator.html
pub struct ParIterMut<'a, T> {
inner: RawParIter<T>,
marker: PhantomData<&'a mut T>,
}

impl<'a, T: Send> ParallelIterator for ParIterMut<'a, T> {
type Item = &'a mut T;

#[cfg_attr(feature = "inline-more", inline)]
fn drive_unindexed<C>(self, consumer: C) -> C::Result
where
C: UnindexedConsumer<Self::Item>,
{
self.inner
.map(|x| unsafe { x.as_mut() })
.drive_unindexed(consumer)
}
}

impl<T: fmt::Debug> fmt::Debug for ParIterMut<'_, T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
ParIter {
inner: self.inner.clone(),
marker: PhantomData,
}
.fmt(f)
}
}

/// Parallel iterator over entries of a consumed map.
///
/// This iterator is created by the [`into_par_iter`] method on [`HashTable`]
/// (provided by the [`IntoParallelIterator`] trait).
/// See its documentation for more.
///
/// [`into_par_iter`]: /hashbrown/struct.HashTable.html#method.into_par_iter
/// [`HashTable`]: /hashbrown/struct.HashTable.html
/// [`IntoParallelIterator`]: https://docs.rs/rayon/1.0/rayon/iter/trait.IntoParallelIterator.html
pub struct IntoParIter<T, A: Allocator + Clone = Global> {
inner: RawIntoParIter<T, A>,
}

impl<T: Send, A: Allocator + Clone + Send> ParallelIterator for IntoParIter<T, A> {
type Item = T;

#[cfg_attr(feature = "inline-more", inline)]
fn drive_unindexed<C>(self, consumer: C) -> C::Result
where
C: UnindexedConsumer<Self::Item>,
{
self.inner.drive_unindexed(consumer)
}
}

impl<T: fmt::Debug, A: Allocator + Clone> fmt::Debug for IntoParIter<T, A> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
ParIter {
inner: unsafe { self.inner.par_iter() },
marker: PhantomData,
}
.fmt(f)
}
}

/// Parallel draining iterator over entries of a map.
///
/// This iterator is created by the [`par_drain`] method on [`HashTable`].
/// See its documentation for more.
///
/// [`par_drain`]: /hashbrown/struct.HashTable.html#method.par_drain
/// [`HashTable`]: /hashbrown/struct.HashTable.html
pub struct ParDrain<'a, T, A: Allocator + Clone = Global> {
inner: RawParDrain<'a, T, A>,
}

impl<T: Send, A: Allocator + Clone + Sync> ParallelIterator for ParDrain<'_, T, A> {
type Item = T;

#[cfg_attr(feature = "inline-more", inline)]
fn drive_unindexed<C>(self, consumer: C) -> C::Result
where
C: UnindexedConsumer<Self::Item>,
{
self.inner.drive_unindexed(consumer)
}
}

impl<T: fmt::Debug, A: Allocator + Clone> fmt::Debug for ParDrain<'_, T, A> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
ParIter {
inner: unsafe { self.inner.par_iter() },
marker: PhantomData,
}
.fmt(f)
}
}

impl<T: Send, A: Allocator + Clone> HashTable<T, A> {
/// Consumes (potentially in parallel) all values in an arbitrary order,
/// while preserving the map's allocated memory for reuse.
#[cfg_attr(feature = "inline-more", inline)]
pub fn par_drain(&mut self) -> ParDrain<'_, T, A> {
ParDrain {
inner: self.table.par_drain(),
}
}
}

impl<T: Send, A: Allocator + Clone + Send> IntoParallelIterator for HashTable<T, A> {
type Item = T;
type Iter = IntoParIter<T, A>;

#[cfg_attr(feature = "inline-more", inline)]
fn into_par_iter(self) -> Self::Iter {
IntoParIter {
inner: self.table.into_par_iter(),
}
}
}

impl<'a, T: Sync, A: Allocator + Clone> IntoParallelIterator for &'a HashTable<T, A> {
type Item = &'a T;
type Iter = ParIter<'a, T>;

#[cfg_attr(feature = "inline-more", inline)]
fn into_par_iter(self) -> Self::Iter {
ParIter {
inner: unsafe { self.table.par_iter() },
marker: PhantomData,
}
}
}

impl<'a, T: Send, A: Allocator + Clone> IntoParallelIterator for &'a mut HashTable<T, A> {
type Item = &'a mut T;
type Iter = ParIterMut<'a, T>;

#[cfg_attr(feature = "inline-more", inline)]
fn into_par_iter(self) -> Self::Iter {
ParIterMut {
inner: unsafe { self.table.par_iter() },
marker: PhantomData,
}
}
}

#[cfg(test)]
mod test_par_table {
use alloc::vec::Vec;
use core::sync::atomic::{AtomicUsize, Ordering};

use rayon::prelude::*;

use crate::{
hash_map::{make_hash, DefaultHashBuilder},
hash_table::HashTable,
};

#[test]
fn test_iterate() {
let hasher = DefaultHashBuilder::default();
let mut a = HashTable::new();
for i in 0..32 {
a.insert_unchecked(make_hash(&hasher, &i), i, |x| make_hash(&hasher, x));
}
let observed = AtomicUsize::new(0);
a.par_iter().for_each(|k| {
observed.fetch_or(1 << *k, Ordering::Relaxed);
});
assert_eq!(observed.into_inner(), 0xFFFF_FFFF);
}

#[test]
fn test_move_iter() {
let hasher = DefaultHashBuilder::default();
let hs = {
let mut hs = HashTable::new();

hs.insert_unchecked(make_hash(&hasher, &'a'), 'a', |x| make_hash(&hasher, x));
hs.insert_unchecked(make_hash(&hasher, &'b'), 'b', |x| make_hash(&hasher, x));

hs
};

let v = hs.into_par_iter().collect::<Vec<char>>();
assert!(v == ['a', 'b'] || v == ['b', 'a']);
}
}
16 changes: 16 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ mod map;
mod rustc_entry;
mod scopeguard;
mod set;
mod table;

pub mod hash_map {
//! A hash map implemented with quadratic probing and SIMD lookup.
Expand Down Expand Up @@ -113,9 +114,24 @@ pub mod hash_set {
pub use crate::external_trait_impls::rayon::set::*;
}
}
pub mod hash_table {
//! A hash table implemented with quadratic probing and SIMD lookup.
pub use crate::table::*;

#[cfg(feature = "rayon")]
/// [rayon]-based parallel iterator types for hash tables.
/// You will rarely need to interact with it directly unless you have need
/// to name one of the iterator types.
///
/// [rayon]: https://docs.rs/rayon/1.0/rayon
pub mod rayon {
pub use crate::external_trait_impls::rayon::table::*;
}
}

pub use crate::map::HashMap;
pub use crate::set::HashSet;
pub use crate::table::HashTable;

#[cfg(feature = "equivalent")]
pub use equivalent::Equivalent;
Expand Down
34 changes: 6 additions & 28 deletions src/map.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use crate::raw::{Allocator, Bucket, Global, RawDrain, RawIntoIter, RawIter, RawTable};
use crate::raw::{
Allocator, Bucket, Global, RawDrain, RawExtractIf, RawIntoIter, RawIter, RawTable,
};
use crate::{Equivalent, TryReserveError};
use core::borrow::Borrow;
use core::fmt::{self, Debug};
Expand Down Expand Up @@ -977,7 +979,7 @@ impl<K, V, S, A: Allocator + Clone> HashMap<K, V, S, A> {
{
ExtractIf {
f,
inner: ExtractIfInner {
inner: RawExtractIf {
iter: unsafe { self.table.iter() },
table: &mut self.table,
},
Expand Down Expand Up @@ -2722,7 +2724,7 @@ where
F: FnMut(&K, &mut V) -> bool,
{
f: F,
inner: ExtractIfInner<'a, K, V, A>,
inner: RawExtractIf<'a, (K, V), A>,
}

impl<K, V, F, A> Iterator for ExtractIf<'_, K, V, F, A>
Expand All @@ -2734,7 +2736,7 @@ where

#[cfg_attr(feature = "inline-more", inline)]
fn next(&mut self) -> Option<Self::Item> {
self.inner.next(&mut self.f)
self.inner.next(|&mut (ref k, ref mut v)| (self.f)(k, v))
}

#[inline]
Expand All @@ -2745,30 +2747,6 @@ where

impl<K, V, F> FusedIterator for ExtractIf<'_, K, V, F> where F: FnMut(&K, &mut V) -> bool {}

/// Portions of `ExtractIf` shared with `set::ExtractIf`
pub(super) struct ExtractIfInner<'a, K, V, A: Allocator + Clone> {
pub iter: RawIter<(K, V)>,
pub table: &'a mut RawTable<(K, V), A>,
}

impl<K, V, A: Allocator + Clone> ExtractIfInner<'_, K, V, A> {
#[cfg_attr(feature = "inline-more", inline)]
pub(super) fn next<F>(&mut self, f: &mut F) -> Option<(K, V)>
where
F: FnMut(&K, &mut V) -> bool,
{
unsafe {
for item in &mut self.iter {
let &mut (ref key, ref mut value) = item.as_mut();
if f(key, value) {
return Some(self.table.remove(item).0);
}
}
}
None
}
}

/// A mutable iterator over the values of a `HashMap` in arbitrary order.
/// The iterator element type is `&'a mut V`.
///
Expand Down
22 changes: 22 additions & 0 deletions src/raw/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3710,6 +3710,28 @@ impl Iterator for RawIterHashInner {
}
}

pub(crate) struct RawExtractIf<'a, T, A: Allocator + Clone> {
pub iter: RawIter<T>,
pub table: &'a mut RawTable<T, A>,
}

impl<T, A: Allocator + Clone> RawExtractIf<'_, T, A> {
#[cfg_attr(feature = "inline-more", inline)]
pub(crate) fn next<F>(&mut self, mut f: F) -> Option<T>
where
F: FnMut(&mut T) -> bool,
{
unsafe {
for item in &mut self.iter {
if f(item.as_mut()) {
return Some(self.table.remove(item).0);
}
}
}
None
}
}

#[cfg(test)]
mod test_map {
use super::*;
Expand Down
Loading

0 comments on commit 763ccf4

Please sign in to comment.