From dbbc00ef571fbcea01a330349c94d95bd849cdaa Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Wed, 6 May 2026 14:43:40 +0200 Subject: [PATCH 01/23] add normal iterator --- .../hash-sorted-map/benchmarks/performance.rs | 105 ++++- crates/hash-sorted-map/src/hash_sorted_map.rs | 22 +- crates/hash-sorted-map/src/iter.rs | 401 ++++++++++++++++++ crates/hash-sorted-map/src/lib.rs | 2 + 4 files changed, 519 insertions(+), 11 deletions(-) create mode 100644 crates/hash-sorted-map/src/iter.rs diff --git a/crates/hash-sorted-map/benchmarks/performance.rs b/crates/hash-sorted-map/benchmarks/performance.rs index 5a04801..ebd204d 100644 --- a/crates/hash-sorted-map/benchmarks/performance.rs +++ b/crates/hash-sorted-map/benchmarks/performance.rs @@ -291,11 +291,114 @@ fn bench_count(c: &mut Criterion) { group.finish(); } +fn bench_iter(c: &mut Criterion) { + let trigrams = trigrams(); + + let mut group = c.benchmark_group("iter_1000_trigrams"); + + group.bench_function("hashbrown+Identity iter()", |b| { + b.iter_batched( + || { + let mut map = + hashbrown::HashMap::::with_capacity_and_hasher( + trigrams.len(), + Default::default(), + ); + for (i, &key) in trigrams.iter().enumerate() { + map.insert(key, i); + } + map + }, + |map| { + let mut sum = 0usize; + for (&k, &v) in &map { + sum = sum.wrapping_add(v).wrapping_add(k as usize); + } + sum + }, + BatchSize::SmallInput, + ); + }); + + group.bench_function("HashSortedMap iter()", |b| { + b.iter_batched( + || { + let mut map = HashSortedMap::with_capacity_and_hasher( + trigrams.len(), + IdentityBuildHasher::default(), + ); + for (i, &key) in trigrams.iter().enumerate() { + map.insert(key, i); + } + map + }, + |map| { + let mut sum = 0usize; + for (&k, &v) in &map { + sum = sum.wrapping_add(v).wrapping_add(k as usize); + } + sum + }, + BatchSize::SmallInput, + ); + }); + + group.bench_function("hashbrown+Identity into_iter()", |b| { + b.iter_batched( + || { + let mut map = + hashbrown::HashMap::::with_capacity_and_hasher( + trigrams.len(), + Default::default(), + ); + for (i, &key) in trigrams.iter().enumerate() { + map.insert(key, i); + } + map + }, + |map| { + let mut sum = 0usize; + for (k, v) in map { + sum = sum.wrapping_add(v).wrapping_add(k as usize); + } + sum + }, + BatchSize::SmallInput, + ); + }); + + group.bench_function("HashSortedMap into_iter()", |b| { + b.iter_batched( + || { + let mut map = HashSortedMap::with_capacity_and_hasher( + trigrams.len(), + IdentityBuildHasher::default(), + ); + for (i, &key) in trigrams.iter().enumerate() { + map.insert(key, i); + } + map + }, + |map| { + let mut sum = 0usize; + for (k, v) in map { + sum = sum.wrapping_add(v).wrapping_add(k as usize); + } + sum + }, + BatchSize::SmallInput, + ); + }); + + group.finish(); +} + criterion_group!( benches, bench_insert, bench_reinsert, bench_grow, - bench_count + bench_count, + bench_iter ); criterion_main!(benches); diff --git a/crates/hash-sorted-map/src/hash_sorted_map.rs b/crates/hash-sorted-map/src/hash_sorted_map.rs index 26a4ecd..abc268f 100644 --- a/crates/hash-sorted-map/src/hash_sorted_map.rs +++ b/crates/hash-sorted-map/src/hash_sorted_map.rs @@ -6,7 +6,7 @@ use std::marker::PhantomData; use super::group_ops::{self, CTRL_EMPTY, GROUP_SIZE}; -const NO_OVERFLOW: u32 = u32::MAX; +pub(crate) const NO_OVERFLOW: u32 = u32::MAX; // ── Helpers ───────────────────────────────────────────────────────────────── @@ -20,11 +20,11 @@ fn slot_hint(hash: u64) -> usize { ((hash >> 7) & (GROUP_SIZE as u64 - 1)) as usize } -struct Group { - ctrl: [u8; GROUP_SIZE], - keys: [MaybeUninit; GROUP_SIZE], - values: [MaybeUninit; GROUP_SIZE], - overflow: u32, +pub(crate) struct Group { + pub(crate) ctrl: [u8; GROUP_SIZE], + pub(crate) keys: [MaybeUninit; GROUP_SIZE], + pub(crate) values: [MaybeUninit; GROUP_SIZE], + pub(crate) overflow: u32, } impl Group { @@ -43,10 +43,10 @@ impl Group { /// Uses NEON on aarch64, SSE2 on x86_64, scalar fallback elsewhere. /// Generic over key type `K`, value type `V`, and hash builder `S`. pub struct HashSortedMap { - groups: Box<[Group]>, - num_groups: u32, - n_bits: u32, - len: usize, + pub(crate) groups: Box<[Group]>, + pub(crate) num_groups: u32, + pub(crate) n_bits: u32, + pub(crate) len: usize, hash_builder: S, } @@ -592,6 +592,8 @@ impl Drop for HashSortedMap { } } +// Re-export `CTRL_EMPTY` for the `iter` module. + #[cfg(test)] mod tests { use std::hash::{BuildHasher, Hasher}; diff --git a/crates/hash-sorted-map/src/iter.rs b/crates/hash-sorted-map/src/iter.rs new file mode 100644 index 0000000..e155c82 --- /dev/null +++ b/crates/hash-sorted-map/src/iter.rs @@ -0,0 +1,401 @@ +use std::marker::PhantomData; +use std::mem::ManuallyDrop; + +use super::group_ops::{self}; +use super::hash_sorted_map::{Group, HashSortedMap, NO_OVERFLOW}; + +/// State shared by `Iter`, `IterMut`, and `IntoIter`: tracks which primary +/// group we're visiting and where we are within that group's overflow chain. +struct IterCursor { + /// Index of the next primary group to visit (0..num_primary). + primary: u32, + /// Number of primary groups (1 << n_bits). + num_primary: u32, + /// Current position within the group we're scanning: group index in the + /// groups array, and a SIMD bitmask of remaining occupied slots. + current_group: u32, + current_mask: group_ops::Mask, +} + +impl IterCursor { + fn new(map: &HashSortedMap) -> Self { + let num_primary = 1u32 << map.n_bits; + Self { + primary: 0, + num_primary, + // Start past all allocated groups so the first call falls through to + // "move to next primary group" rather than checking overflow on an + // un-scanned group 0. + current_group: map.groups.len() as u32, + current_mask: 0, + } + } + + /// Advance to the next occupied slot, returning `(group_index, slot)`. + /// Visits primary groups 0..num_primary in order; for each, follows the + /// overflow chain. Within each group, yields occupied slots via bitmask. + fn next_slot(&mut self, groups: &[Group]) -> Option<(usize, usize)> { + loop { + if let Some(slot) = group_ops::next_match(&mut self.current_mask) { + return Some((self.current_group as usize, slot)); + } + // Current group exhausted — try overflow chain. + let gi = self.current_group as usize; + if gi < groups.len() && groups[gi].overflow != NO_OVERFLOW { + let next = groups[gi].overflow; + self.current_group = next; + self.current_mask = group_ops::match_full(&groups[next as usize].ctrl); + continue; + } + // No more overflow — move to next primary group. + if self.primary >= self.num_primary { + return None; + } + let gi = self.primary as usize; + self.primary += 1; + self.current_group = gi as u32; + self.current_mask = group_ops::match_full(&groups[gi].ctrl); + } + } +} + +/// Immutable iterator over `(&K, &V)` pairs. +pub struct Iter<'a, K, V> { + groups: &'a [Group], + cursor: IterCursor, +} + +impl<'a, K, V> Iterator for Iter<'a, K, V> { + type Item = (&'a K, &'a V); + fn next(&mut self) -> Option { + let (gi, slot) = self.cursor.next_slot(self.groups)?; + let group = &self.groups[gi]; + // SAFETY: slot is occupied (bitmask guarantees ctrl byte has high bit set). + unsafe { + Some(( + group.keys[slot].assume_init_ref(), + group.values[slot].assume_init_ref(), + )) + } + } +} + +/// Mutable iterator over `(&K, &mut V)` pairs. +pub struct IterMut<'a, K, V> { + groups: *mut [Group], + cursor: IterCursor, + _marker: PhantomData<&'a mut HashSortedMap>, +} + +impl<'a, K, V> Iterator for IterMut<'a, K, V> { + type Item = (&'a K, &'a mut V); + fn next(&mut self) -> Option { + // SAFETY: we use raw pointer to avoid holding multiple &mut borrows. + // The cursor guarantees each slot is yielded at most once. + let groups = unsafe { &mut *self.groups }; + let (gi, slot) = self.cursor.next_slot(groups)?; + let group = &mut groups[gi]; + unsafe { + Some(( + group.keys[slot].assume_init_ref(), + group.values[slot].assume_init_mut(), + )) + } + } +} + +/// Owning iterator that yields `(K, V)` pairs and consumes the map. +pub struct IntoIter { + inner: ManuallyDrop>, + cursor: IterCursor, +} + +impl Iterator for IntoIter { + type Item = (K, V); + fn next(&mut self) -> Option { + let (gi, slot) = self.cursor.next_slot(&self.inner.groups)?; + let group = &self.inner.groups[gi]; + // SAFETY: slot is occupied (bitmask guarantees ctrl byte has high bit set). + unsafe { + Some(( + group.keys[slot].assume_init_read(), + group.values[slot].assume_init_read(), + )) + } + } + + fn size_hint(&self) -> (usize, Option) { + (0, Some(self.inner.len)) + } +} + +impl Drop for IntoIter { + fn drop(&mut self) { + // Continue iterating to drop remaining entries one by one. + while let Some((gi, slot)) = self.cursor.next_slot(&self.inner.groups) { + unsafe { + self.inner.groups[gi].keys[slot].assume_init_drop(); + self.inner.groups[gi].values[slot].assume_init_drop(); + } + } + // All entries consumed or dropped above. Set num_groups to 0 so the + // map's Drop won't try to drop them again, then let it run to free + // the groups allocation and drop hash_builder. + self.inner.num_groups = 0; + unsafe { ManuallyDrop::drop(&mut self.inner) }; + } +} + +impl HashSortedMap { + /// Returns an iterator over `(&K, &V)` pairs. + /// + /// Entries are visited in group-index order (primary groups in order of + /// hash prefix, each followed by its overflow chain). Within each group, + /// occupied slots are visited in slot order. + pub fn iter(&self) -> Iter<'_, K, V> { + Iter { + groups: &self.groups, + cursor: IterCursor::new(self), + } + } + + /// Returns a mutable iterator over `(&K, &mut V)` pairs. + pub fn iter_mut(&mut self) -> IterMut<'_, K, V> { + let cursor = IterCursor::new(self); + IterMut { + groups: &mut *self.groups as *mut [Group], + cursor, + _marker: PhantomData, + } + } + + /// Consumes the map and returns an iterator over `(K, V)` pairs. + pub fn into_iter(self) -> IntoIter { + let cursor = IterCursor::new(&self); + IntoIter { + inner: ManuallyDrop::new(self), + cursor, + } + } +} + +impl IntoIterator for HashSortedMap { + type Item = (K, V); + type IntoIter = IntoIter; + fn into_iter(self) -> Self::IntoIter { + self.into_iter() + } +} + +impl<'a, K, V, S> IntoIterator for &'a HashSortedMap { + type Item = (&'a K, &'a V); + type IntoIter = Iter<'a, K, V>; + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl<'a, K, V, S> IntoIterator for &'a mut HashSortedMap { + type Item = (&'a K, &'a mut V); + type IntoIter = IterMut<'a, K, V>; + fn into_iter(self) -> Self::IntoIter { + self.iter_mut() + } +} + +#[cfg(test)] +mod tests { + use std::hash::{BuildHasher, Hasher}; + + use super::*; + + /// Degenerate hasher that returns a fixed hash code, for forcing collisions. + struct FixedHasher(u64); + + impl Hasher for FixedHasher { + fn finish(&self) -> u64 { + self.0 + } + fn write(&mut self, _bytes: &[u8]) {} + } + + #[derive(Clone)] + struct FixedState(u64); + + impl BuildHasher for FixedState { + type Hasher = FixedHasher; + fn build_hasher(&self) -> FixedHasher { + FixedHasher(self.0) + } + } + + #[test] + fn iter_empty() { + let map: HashSortedMap = HashSortedMap::new(); + assert_eq!(map.iter().count(), 0); + } + + #[test] + fn iter_yields_all_entries() { + let mut map = HashSortedMap::new(); + for i in 0..100u32 { + map.insert(i, i * 10); + } + let mut collected: Vec<(u32, u32)> = map.iter().map(|(&k, &v)| (k, v)).collect(); + collected.sort(); + assert_eq!(collected.len(), 100); + for i in 0..100u32 { + assert_eq!(collected[i as usize], (i, i * 10)); + } + } + + #[test] + fn iter_with_overflow_chains() { + let mut map = HashSortedMap::with_capacity_and_hasher(1, FixedState(0xABCD)); + for i in 0..50u32 { + map.insert(i, i); + } + let collected: Vec = map.iter().map(|(&k, _)| k).collect(); + assert_eq!(collected.len(), 50); + let mut sorted = collected.clone(); + sorted.sort(); + sorted.dedup(); + assert_eq!(sorted.len(), 50); + } + + #[test] + fn iter_mut_mutates_values() { + let mut map = HashSortedMap::new(); + for i in 0..20u32 { + map.insert(i, i); + } + for (_, v) in map.iter_mut() { + *v *= 2; + } + for i in 0..20u32 { + assert_eq!(map.get(&i), Some(&(i * 2))); + } + } + + #[test] + fn into_iter_yields_all() { + let mut map = HashSortedMap::new(); + for i in 0..100u32 { + map.insert(i, i * 3); + } + let mut collected: Vec<(u32, u32)> = map.into_iter().collect(); + collected.sort(); + assert_eq!(collected.len(), 100); + for i in 0..100u32 { + assert_eq!(collected[i as usize], (i, i * 3)); + } + } + + #[test] + fn into_iter_partial_consume_then_drop() { + let mut map: HashSortedMap = HashSortedMap::new(); + for i in 0..50u32 { + map.insert(format!("key-{i}"), format!("val-{i}")); + } + let mut iter = map.into_iter(); + for _ in 0..10 { + let _ = iter.next(); + } + drop(iter); + } + + #[test] + fn into_iter_empty() { + let map: HashSortedMap = HashSortedMap::new(); + assert_eq!(map.into_iter().count(), 0); + } + + #[test] + fn into_iter_with_overflow() { + let mut map = HashSortedMap::with_capacity_and_hasher(1, FixedState(0)); + for i in 0..80u32 { + map.insert(i, i); + } + let collected: Vec<(u32, u32)> = map.into_iter().collect(); + assert_eq!(collected.len(), 80); + let mut keys: Vec = collected.into_iter().map(|(k, _)| k).collect(); + keys.sort(); + keys.dedup(); + assert_eq!(keys.len(), 80); + } + + #[test] + fn into_iter_after_grow() { + let mut map = HashSortedMap::with_capacity(1); + for i in 0..500u32 { + map.insert(i, i); + } + let collected: Vec<(u32, u32)> = map.into_iter().collect(); + assert_eq!(collected.len(), 500); + } + + /// Track drops to verify no leaks or double-drops. + #[test] + fn into_iter_drop_count() { + use std::cell::Cell; + use std::rc::Rc; + + #[derive(Clone)] + struct Tracked(Rc>); + impl Drop for Tracked { + fn drop(&mut self) { + self.0.set(self.0.get() + 1); + } + } + + let counter = Rc::new(Cell::new(0usize)); + let n = 100; + { + let mut map = HashSortedMap::new(); + for i in 0..n { + map.insert(i, Tracked(counter.clone())); + } + let mut iter = map.into_iter(); + for _ in 0..n / 2 { + let _ = iter.next(); + } + } + assert_eq!(counter.get(), n); + } + + #[test] + fn for_loop_ref() { + let mut map = HashSortedMap::new(); + map.insert(1, "a"); + map.insert(2, "b"); + let mut count = 0; + for (_k, _v) in &map { + count += 1; + } + assert_eq!(count, 2); + } + + #[test] + fn for_loop_mut() { + let mut map = HashSortedMap::new(); + map.insert(1u32, 10u32); + map.insert(2, 20); + for (_, v) in &mut map { + *v += 1; + } + assert_eq!(map.get(&1), Some(&11)); + assert_eq!(map.get(&2), Some(&21)); + } + + #[test] + fn for_loop_owned() { + let mut map = HashSortedMap::new(); + map.insert(1, 10); + map.insert(2, 20); + let mut sum = 0; + for (_k, v) in map { + sum += v; + } + assert_eq!(sum, 30); + } +} diff --git a/crates/hash-sorted-map/src/lib.rs b/crates/hash-sorted-map/src/lib.rs index 79dac69..085147e 100644 --- a/crates/hash-sorted-map/src/lib.rs +++ b/crates/hash-sorted-map/src/lib.rs @@ -1,4 +1,6 @@ mod group_ops; mod hash_sorted_map; +mod iter; pub use hash_sorted_map::{Entry, HashSortedMap, OccupiedEntry, VacantEntry}; +pub use iter::{IntoIter, Iter, IterMut}; From f6f4c213cc4b46fd96a0e546abe583baa758308c Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Thu, 7 May 2026 08:36:59 +0200 Subject: [PATCH 02/23] sorting + cleanup --- crates/hash-sorted-map/src/container.rs | 58 ++++ crates/hash-sorted-map/src/group.rs | 23 ++ crates/hash-sorted-map/src/hash_sorted_map.rs | 310 ++++++++++++------ crates/hash-sorted-map/src/iter.rs | 77 ++++- crates/hash-sorted-map/src/lib.rs | 3 + 5 files changed, 352 insertions(+), 119 deletions(-) create mode 100644 crates/hash-sorted-map/src/container.rs create mode 100644 crates/hash-sorted-map/src/group.rs diff --git a/crates/hash-sorted-map/src/container.rs b/crates/hash-sorted-map/src/container.rs new file mode 100644 index 0000000..0be6709 --- /dev/null +++ b/crates/hash-sorted-map/src/container.rs @@ -0,0 +1,58 @@ +use super::group::Group; +use super::group_ops::{CTRL_EMPTY, GROUP_SIZE}; + +/// Core storage for a hash-sorted map. Owns the group array and supports +/// iteration and drop. Does not contain a hasher — use [`HashSortedMap`] +/// for insertion and lookup. +pub struct HashSortedContainer { + pub(crate) groups: Box<[Group]>, + pub(crate) num_groups: u32, + pub(crate) n_bits: u32, + pub(crate) len: usize, +} + +impl HashSortedContainer { + pub(crate) fn alloc_groups(n_bits: u32) -> (Box<[Group]>, u32) { + let num_primary = 1usize << n_bits; + let total = num_primary + num_primary / 8 + 1; + let mut groups: Vec> = Vec::with_capacity(total); + groups.resize_with(total, Group::new); + (groups.into_boxed_slice(), num_primary as u32) + } + + pub(crate) fn new(n_bits: u32) -> Self { + let (groups, num_primary) = Self::alloc_groups(n_bits); + Self { + groups, + num_groups: num_primary, + n_bits, + len: 0, + } + } + + #[inline] + pub(crate) fn group_index(&self, hash: u64) -> usize { + (hash >> (64 - self.n_bits)) as usize + } + + pub fn len(&self) -> usize { + self.len + } + + pub fn is_empty(&self) -> bool { + self.len == 0 + } +} + +impl Drop for HashSortedContainer { + fn drop(&mut self) { + for group in &mut self.groups[..self.num_groups as usize] { + for i in 0..GROUP_SIZE { + if group.ctrl[i] != CTRL_EMPTY { + unsafe { group.keys[i].assume_init_drop() }; + unsafe { group.values[i].assume_init_drop() }; + } + } + } + } +} diff --git a/crates/hash-sorted-map/src/group.rs b/crates/hash-sorted-map/src/group.rs new file mode 100644 index 0000000..c1ba315 --- /dev/null +++ b/crates/hash-sorted-map/src/group.rs @@ -0,0 +1,23 @@ +use core::mem::MaybeUninit; + +use super::group_ops::{CTRL_EMPTY, GROUP_SIZE}; + +pub(crate) const NO_OVERFLOW: u32 = u32::MAX; + +pub(crate) struct Group { + pub(crate) ctrl: [u8; GROUP_SIZE], + pub(crate) keys: [MaybeUninit; GROUP_SIZE], + pub(crate) values: [MaybeUninit; GROUP_SIZE], + pub(crate) overflow: u32, +} + +impl Group { + pub(crate) fn new() -> Self { + Self { + ctrl: [CTRL_EMPTY; GROUP_SIZE], + keys: [const { MaybeUninit::uninit() }; GROUP_SIZE], + values: [const { MaybeUninit::uninit() }; GROUP_SIZE], + overflow: NO_OVERFLOW, + } + } +} diff --git a/crates/hash-sorted-map/src/hash_sorted_map.rs b/crates/hash-sorted-map/src/hash_sorted_map.rs index abc268f..63cfe16 100644 --- a/crates/hash-sorted-map/src/hash_sorted_map.rs +++ b/crates/hash-sorted-map/src/hash_sorted_map.rs @@ -4,9 +4,11 @@ use std::collections::hash_map::RandomState; use std::hash::{BuildHasher, Hash}; use std::marker::PhantomData; +use super::container::HashSortedContainer; +use super::group::Group; use super::group_ops::{self, CTRL_EMPTY, GROUP_SIZE}; -pub(crate) const NO_OVERFLOW: u32 = u32::MAX; +pub(crate) use super::group::NO_OVERFLOW; // ── Helpers ───────────────────────────────────────────────────────────────── @@ -20,33 +22,16 @@ fn slot_hint(hash: u64) -> usize { ((hash >> 7) & (GROUP_SIZE as u64 - 1)) as usize } -pub(crate) struct Group { - pub(crate) ctrl: [u8; GROUP_SIZE], - pub(crate) keys: [MaybeUninit; GROUP_SIZE], - pub(crate) values: [MaybeUninit; GROUP_SIZE], - pub(crate) overflow: u32, -} - -impl Group { - fn new() -> Self { - Self { - ctrl: [CTRL_EMPTY; GROUP_SIZE], - keys: [const { MaybeUninit::uninit() }; GROUP_SIZE], - values: [const { MaybeUninit::uninit() }; GROUP_SIZE], - overflow: NO_OVERFLOW, - } - } -} +// ──────────────────────────────────────────────────────────────────────── +// HashSortedMap — wraps a container with a hash builder +// ──────────────────────────────────────────────────────────────────────── /// Insertion-only hash map with SIMD group scanning. /// /// Uses NEON on aarch64, SSE2 on x86_64, scalar fallback elsewhere. /// Generic over key type `K`, value type `V`, and hash builder `S`. pub struct HashSortedMap { - pub(crate) groups: Box<[Group]>, - pub(crate) num_groups: u32, - pub(crate) n_bits: u32, - pub(crate) len: usize, + pub(crate) container: HashSortedContainer, hash_builder: S, } @@ -75,42 +60,93 @@ impl HashSortedMap { let adjusted = (capacity as f64 / group_ops::MAX_FILL).ceil() as usize; let min_groups = (adjusted.div_ceil(GROUP_SIZE)).max(1).next_power_of_two(); let n_bits = min_groups.trailing_zeros().max(1); - let (groups, num_primary) = Self::alloc_groups(n_bits); Self { - groups, - num_groups: num_primary, - n_bits, - len: 0, + container: HashSortedContainer::new(n_bits), hash_builder, } } - /// Allocate a fully default-initialized boxed slice sized for `n_bits` primary groups - /// plus the standard 12.5% overflow reserve. Returns the slice and the number of - /// primary groups (which is also the initial in-use count). - fn alloc_groups(n_bits: u32) -> (Box<[Group]>, u32) { - let num_primary = 1usize << n_bits; - let total = num_primary + num_primary / 8 + 1; - let mut groups: Vec> = Vec::with_capacity(total); - groups.resize_with(total, Group::new); - (groups.into_boxed_slice(), num_primary as u32) - } - - #[inline] - fn group_index(&self, hash: u64) -> usize { - (hash >> (64 - self.n_bits)) as usize - } - pub fn len(&self) -> usize { - self.len + self.container.len } pub fn is_empty(&self) -> bool { - self.len == 0 + self.container.len == 0 + } + + /// Consume the map, returning the underlying container and hash builder. + pub fn into_parts(self) -> (HashSortedContainer, S) { + // Prevent Drop from running on self — we're moving fields out. + let this = std::mem::ManuallyDrop::new(self); + unsafe { + let container = std::ptr::read(&this.container); + let hash_builder = std::ptr::read(&this.hash_builder); + (container, hash_builder) + } } } impl HashSortedMap { + /// Sort all entries within each primary group chain by their hash value. + /// + /// After sorting, iteration visits entries in hash order within each + /// primary group (and since primary groups are visited in group-index + /// order, the overall iteration is in full hash order). + /// + /// This is a one-time operation intended to be called before iteration + /// or serialization. After sorting, lookups via `get()` won't work + /// correctly because the preferred `slot_hint` position might now be empty + /// breaking an invariant. + pub fn sort_by_hash(&mut self) { + let num_primary = 1usize << self.container.n_bits; + let mut buf: Vec<(u64, K, V)> = Vec::new(); + for primary_gi in 0..num_primary { + buf.clear(); + // Extract all entries from this primary group's chain. + let mut gi = primary_gi; + loop { + let group = &mut self.container.groups[gi]; + let mut full_mask = group_ops::match_full(&group.ctrl); + while let Some(slot) = group_ops::next_match(&mut full_mask) { + let key = unsafe { group.keys[slot].assume_init_read() }; + let value = unsafe { group.values[slot].assume_init_read() }; + let hash = self.hash_builder.hash_one(&key); + buf.push((hash, key, value)); + group.ctrl[slot] = CTRL_EMPTY; + } + if group.overflow == NO_OVERFLOW { + break; + } + gi = group.overflow as usize; + } + if buf.len() <= 1 { + // 0 or 1 entry — write back to slot 0 if present (already extracted). + if let Some((hash, key, value)) = buf.pop() { + let group = &mut self.container.groups[primary_gi]; + group.ctrl[0] = tag(hash); + group.keys[0] = MaybeUninit::new(key); + group.values[0] = MaybeUninit::new(value); + } + continue; + } + buf.sort_unstable_by_key(|&(hash, _, _)| hash); + // Write back in sorted order, filling slots linearly. + let mut gi = primary_gi; + let mut slot = 0; + for (hash, key, value) in buf.drain(..) { + if slot == GROUP_SIZE { + slot = 0; + gi = self.container.groups[gi].overflow as usize; + } + let group = &mut self.container.groups[gi]; + group.ctrl[slot] = tag(hash); + group.keys[slot] = MaybeUninit::new(key); + group.values[slot] = MaybeUninit::new(value); + slot += 1; + } + } + } + pub fn insert(&mut self, key: K, value: V) -> Option { let hash = self.hash_builder.hash_one(&key); self.insert_hashed(hash, key, value) @@ -163,16 +199,16 @@ impl HashSortedMap { fn insert_hashed(&mut self, hash: u64, key: K, value: V) -> Option { let tag = tag(hash); let hint = slot_hint(hash); - let mut gi = self.group_index(hash); + let mut gi = self.container.group_index(hash); loop { - let group = &mut self.groups[gi]; + let group = &mut self.container.groups[gi]; // Fast path: check preferred slot. let c = group.ctrl[hint]; if c == CTRL_EMPTY { group.ctrl[hint] = tag; group.keys[hint] = MaybeUninit::new(key); group.values[hint] = MaybeUninit::new(value); - self.len += 1; + self.container.len += 1; return None; } if c == tag && unsafe { group.keys[hint].assume_init_ref() } == &key { @@ -196,7 +232,7 @@ impl HashSortedMap { group.ctrl[i] = tag; group.keys[i] = MaybeUninit::new(key); group.values[i] = MaybeUninit::new(value); - self.len += 1; + self.container.len += 1; return None; } // Group full — follow or create overflow chain. @@ -204,20 +240,20 @@ impl HashSortedMap { if overflow != NO_OVERFLOW { gi = overflow as usize; } else { - if self.num_groups as usize == self.groups.len() { + if self.container.num_groups as usize == self.container.groups.len() { self.grow(); // n_bits changed; recompute the primary group and retry. - gi = self.group_index(hash); + gi = self.container.group_index(hash); continue; } - let new_gi = self.num_groups as usize; - self.num_groups += 1; - self.groups[gi].overflow = new_gi as u32; - let group = &mut self.groups[new_gi]; + let new_gi = self.container.num_groups as usize; + self.container.num_groups += 1; + self.container.groups[gi].overflow = new_gi as u32; + let group = &mut self.container.groups[new_gi]; group.ctrl[hint] = tag; group.keys[hint] = MaybeUninit::new(key); group.values[hint] = MaybeUninit::new(value); - self.len += 1; + self.container.len += 1; return None; } } @@ -230,10 +266,10 @@ impl HashSortedMap { { let tag = tag(hash); let hint = slot_hint(hash); - let mut gi = self.group_index(hash); + let mut gi = self.container.group_index(hash); loop { - let group = &self.groups[gi]; + let group = &self.container.groups[gi]; // Fast path: preferred slot. let c = group.ctrl[hint]; @@ -270,10 +306,10 @@ impl HashSortedMap { fn find_or_insertion_slot(&mut self, hash: u64, key: &K) -> FindResult { let tag = tag(hash); let hint = slot_hint(hash); - let mut gi = self.group_index(hash); + let mut gi = self.container.group_index(hash); loop { - let group = &mut self.groups[gi]; + let group = &mut self.container.groups[gi]; // Fast path: preferred slot. let c = group.ctrl[hint]; @@ -318,17 +354,17 @@ impl HashSortedMap { fn grow(&mut self) { let old_groups = std::mem::replace( - &mut self.groups, + &mut self.container.groups, Vec::>::new().into_boxed_slice(), ); - let old_num_groups = self.num_groups as usize; - let old_len = self.len; + let old_num_groups = self.container.num_groups as usize; + let old_len = self.container.len; - self.n_bits += 1; - let (new_groups, num_primary) = Self::alloc_groups(self.n_bits); - self.groups = new_groups; - self.num_groups = num_primary; - self.len = 0; + self.container.n_bits += 1; + let (new_groups, num_primary) = HashSortedContainer::alloc_groups(self.container.n_bits); + self.container.groups = new_groups; + self.container.num_groups = num_primary; + self.container.len = 0; for group in &old_groups[..old_num_groups] { let mut full_mask = group_ops::match_full(&group.ctrl); @@ -343,14 +379,14 @@ impl HashSortedMap { // old_groups runs no destructors but does free the backing buffer. drop(old_groups); - debug_assert_eq!(self.len, old_len); + debug_assert_eq!(self.container.len, old_len); } fn insert_for_grow(&mut self, hash: u64, key_src: *const K, value_src: *const V) { let tag = tag(hash); let mut hint = slot_hint(hash); - let gi = self.group_index(hash); - let mut group = &mut self.groups[gi]; + let gi = self.container.group_index(hash); + let mut group = &mut self.container.groups[gi]; loop { if group.ctrl[hint] == CTRL_EMPTY { @@ -363,12 +399,12 @@ impl HashSortedMap { } let overflow = group.overflow; if overflow != NO_OVERFLOW { - group = &mut self.groups[overflow as usize]; + group = &mut self.container.groups[overflow as usize]; } else { - let new_gi = self.num_groups as usize; + let new_gi = self.container.num_groups as usize; group.overflow = new_gi as u32; - self.num_groups += 1; - group = &mut self.groups[new_gi]; + self.container.num_groups += 1; + group = &mut self.container.groups[new_gi]; break; } } @@ -381,7 +417,7 @@ impl HashSortedMap { .as_mut_ptr() .copy_from_nonoverlapping(value_src, 1); } - self.len += 1; + self.container.len += 1; } } @@ -509,16 +545,16 @@ impl<'a, K: Hash + Eq, V, S: BuildHasher> VacantEntry<'a, K, V, S> { Insertion::NeedsOverflow { tail } => { let (new_gi, new_group) = unsafe { let map = &mut *map; - if map.num_groups as usize == map.groups.len() { + if map.container.num_groups as usize == map.container.groups.len() { return insert_after_grow(map, hash, key, value); } - let new_gi = map.num_groups as usize; - map.num_groups += 1; - let new_group: *mut Group = &mut map.groups[new_gi]; + let new_gi = map.container.num_groups as usize; + map.container.num_groups += 1; + let new_group: *mut Group = &mut map.container.groups[new_gi]; (new_gi, new_group) }; unsafe { - // SAFETY: `tail` was obtained from `&mut self.groups[..]` and + // SAFETY: `tail` was obtained from `&mut self.container.groups[..]` and // remains valid because no reallocation occurred between // `entry()` and now (we hold the only `&mut self`). (*tail).overflow = new_gi as u32; @@ -529,8 +565,8 @@ impl<'a, K: Hash + Eq, V, S: BuildHasher> VacantEntry<'a, K, V, S> { let tag = tag(hash); unsafe { - (*map).len += 1; - // SAFETY: `group_ptr` points into `map.groups` and is valid for `'a`. + (*map).container.len += 1; + // SAFETY: `group_ptr` points into `map.container.groups` and is valid for `'a`. let group = &mut *group_ptr; group.ctrl[slot] = tag; group.keys[slot] = MaybeUninit::new(key); @@ -561,13 +597,13 @@ fn insert_after_grow( match map.find_or_insertion_slot(hash, &key) { FindResult::Vacant(Insertion::Empty { group, slot }) => { let tag = tag(hash); - // SAFETY: `group` points into `map.groups` and is valid for `'a`. + // SAFETY: `group` points into `map.container.groups` and is valid for `'a`. unsafe { let g = &mut *group; g.ctrl[slot] = tag; g.keys[slot] = MaybeUninit::new(key); g.values[slot] = MaybeUninit::new(value); - map.len += 1; + map.container.len += 1; g.values[slot].assume_init_mut() } } @@ -579,20 +615,7 @@ fn insert_after_grow( } } -impl Drop for HashSortedMap { - fn drop(&mut self) { - for group in &mut self.groups[..self.num_groups as usize] { - for i in 0..GROUP_SIZE { - if group.ctrl[i] != CTRL_EMPTY { - unsafe { group.keys[i].assume_init_drop() }; - unsafe { group.values[i].assume_init_drop() }; - } - } - } - } -} - -// Re-export `CTRL_EMPTY` for the `iter` module. +// No custom Drop needed for HashSortedMap — dropping `container` handles entries. #[cfg(test)] mod tests { @@ -661,12 +684,12 @@ mod tests { #[test] fn grow_on_overflow_exhaustion() { let mut map = HashSortedMap::with_capacity(1); - let old_n_bits = map.n_bits; + let old_n_bits = map.container.n_bits; for i in 0..100u32 { let key = i | 0xFF000000; map.insert(key, i); } - assert!(map.n_bits > old_n_bits, "should have grown"); + assert!(map.container.n_bits > old_n_bits, "should have grown"); assert_eq!(map.len(), 100); for i in 0..100u32 { let key = i | 0xFF000000; @@ -814,4 +837,89 @@ mod tests { assert_eq!(m.get(&i), Some(&i)); } } + + // ── sort_by_hash tests ────────────────────────────────────────────── + + #[test] + fn sort_by_hash_empty() { + let mut map: HashSortedMap = HashSortedMap::new(); + map.sort_by_hash(); // should not panic + assert_eq!(map.len(), 0); + } + + #[test] + fn sort_by_hash_single() { + let mut map = HashSortedMap::new(); + map.insert(42u32, "hello"); + map.sort_by_hash(); + assert_eq!(map.get(&42), Some(&"hello")); + assert_eq!(map.len(), 1); + } + + #[test] + fn sort_by_hash_preserves_entries() { + let mut map = HashSortedMap::new(); + for i in 0..200u32 { + map.insert(i, i * 10); + } + map.sort_by_hash(); + assert_eq!(map.len(), 200); + for i in 0..200u32 { + assert_eq!(map.get(&i), Some(&(i * 10)), "missing key {i}"); + } + } + + #[test] + fn sort_by_hash_produces_hash_order() { + use std::collections::hash_map::RandomState; + + let hasher = RandomState::new(); + let mut map = HashSortedMap::with_hasher(hasher.clone()); + for i in 0..500u32 { + map.insert(i, i); + } + map.sort_by_hash(); + // Iteration should now yield entries in hash order. + let mut prev_hash = 0u64; + let mut first = true; + for (&k, _) in &map { + let h = hasher.hash_one(&k); + if !first { + assert!(h >= prev_hash, "hash order violated: {prev_hash:#x} > {h:#x}"); + } + prev_hash = h; + first = false; + } + } + + #[test] + fn sort_by_hash_with_overflow() { + // Force overflow chains via fixed hash, then sort. + let mut map = HashSortedMap::with_capacity_and_hasher(1, FixedState(0)); + for i in 0..50u32 { + map.insert(i, i); + } + map.sort_by_hash(); + assert_eq!(map.len(), 50); + for i in 0..50u32 { + assert_eq!(map.get(&i), Some(&i), "missing key {i}"); + } + } + + #[test] + fn sort_by_hash_with_strings() { + let mut map = HashSortedMap::new(); + for i in 0..100u32 { + map.insert(format!("key-{i}"), format!("val-{i}")); + } + map.sort_by_hash(); + assert_eq!(map.len(), 100); + for i in 0..100u32 { + assert_eq!( + map.get(&format!("key-{i}")), + Some(&format!("val-{i}")), + "missing key-{i}" + ); + } + } } diff --git a/crates/hash-sorted-map/src/iter.rs b/crates/hash-sorted-map/src/iter.rs index e155c82..2ebb1c6 100644 --- a/crates/hash-sorted-map/src/iter.rs +++ b/crates/hash-sorted-map/src/iter.rs @@ -1,8 +1,10 @@ use std::marker::PhantomData; use std::mem::ManuallyDrop; +use super::container::HashSortedContainer; +use super::group::Group; use super::group_ops::{self}; -use super::hash_sorted_map::{Group, HashSortedMap, NO_OVERFLOW}; +use super::hash_sorted_map::{HashSortedMap, NO_OVERFLOW}; /// State shared by `Iter`, `IterMut`, and `IntoIter`: tracks which primary /// group we're visiting and where we are within that group's overflow chain. @@ -18,15 +20,12 @@ struct IterCursor { } impl IterCursor { - fn new(map: &HashSortedMap) -> Self { - let num_primary = 1u32 << map.n_bits; + fn new(container: &HashSortedContainer) -> Self { + let num_primary = 1u32 << container.n_bits; Self { primary: 0, num_primary, - // Start past all allocated groups so the first call falls through to - // "move to next primary group" rather than checking overflow on an - // un-scanned group 0. - current_group: map.groups.len() as u32, + current_group: container.groups.len() as u32, current_mask: 0, } } @@ -84,7 +83,7 @@ impl<'a, K, V> Iterator for Iter<'a, K, V> { pub struct IterMut<'a, K, V> { groups: *mut [Group], cursor: IterCursor, - _marker: PhantomData<&'a mut HashSortedMap>, + _marker: PhantomData<&'a mut HashSortedContainer>, } impl<'a, K, V> Iterator for IterMut<'a, K, V> { @@ -105,12 +104,12 @@ impl<'a, K, V> Iterator for IterMut<'a, K, V> { } /// Owning iterator that yields `(K, V)` pairs and consumes the map. -pub struct IntoIter { - inner: ManuallyDrop>, +pub struct IntoIter { + inner: ManuallyDrop>, cursor: IterCursor, } -impl Iterator for IntoIter { +impl Iterator for IntoIter { type Item = (K, V); fn next(&mut self) -> Option { let (gi, slot) = self.cursor.next_slot(&self.inner.groups)?; @@ -129,7 +128,7 @@ impl Iterator for IntoIter { } } -impl Drop for IntoIter { +impl Drop for IntoIter { fn drop(&mut self) { // Continue iterating to drop remaining entries one by one. while let Some((gi, slot)) = self.cursor.next_slot(&self.inner.groups) { @@ -139,14 +138,14 @@ impl Drop for IntoIter { } } // All entries consumed or dropped above. Set num_groups to 0 so the - // map's Drop won't try to drop them again, then let it run to free - // the groups allocation and drop hash_builder. + // container's Drop won't try to drop them again, then let it run to + // free the groups allocation. self.inner.num_groups = 0; unsafe { ManuallyDrop::drop(&mut self.inner) }; } } -impl HashSortedMap { +impl HashSortedContainer { /// Returns an iterator over `(&K, &V)` pairs. /// /// Entries are visited in group-index order (primary groups in order of @@ -169,8 +168,8 @@ impl HashSortedMap { } } - /// Consumes the map and returns an iterator over `(K, V)` pairs. - pub fn into_iter(self) -> IntoIter { + /// Consumes the container and returns an iterator over `(K, V)` pairs. + pub fn into_iter(self) -> IntoIter { let cursor = IterCursor::new(&self); IntoIter { inner: ManuallyDrop::new(self), @@ -179,9 +178,51 @@ impl HashSortedMap { } } +impl IntoIterator for HashSortedContainer { + type Item = (K, V); + type IntoIter = IntoIter; + fn into_iter(self) -> Self::IntoIter { + self.into_iter() + } +} + +impl<'a, K, V> IntoIterator for &'a HashSortedContainer { + type Item = (&'a K, &'a V); + type IntoIter = Iter<'a, K, V>; + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl<'a, K, V> IntoIterator for &'a mut HashSortedContainer { + type Item = (&'a K, &'a mut V); + type IntoIter = IterMut<'a, K, V>; + fn into_iter(self) -> Self::IntoIter { + self.iter_mut() + } +} + +// Delegate HashSortedMap iteration to its container. + +impl HashSortedMap { + pub fn iter(&self) -> Iter<'_, K, V> { + self.container.iter() + } + + pub fn iter_mut(&mut self) -> IterMut<'_, K, V> { + self.container.iter_mut() + } + + /// Consumes the map and returns an iterator over `(K, V)` pairs. + pub fn into_iter(self) -> IntoIter { + let (container, _hash_builder) = self.into_parts(); + container.into_iter() + } +} + impl IntoIterator for HashSortedMap { type Item = (K, V); - type IntoIter = IntoIter; + type IntoIter = IntoIter; fn into_iter(self) -> Self::IntoIter { self.into_iter() } diff --git a/crates/hash-sorted-map/src/lib.rs b/crates/hash-sorted-map/src/lib.rs index 085147e..82e9102 100644 --- a/crates/hash-sorted-map/src/lib.rs +++ b/crates/hash-sorted-map/src/lib.rs @@ -1,6 +1,9 @@ +mod container; +mod group; mod group_ops; mod hash_sorted_map; mod iter; +pub use container::HashSortedContainer; pub use hash_sorted_map::{Entry, HashSortedMap, OccupiedEntry, VacantEntry}; pub use iter::{IntoIter, Iter, IterMut}; From 540b30892e76b54abebf1683ccb292518237f1b2 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Thu, 7 May 2026 15:40:03 +0200 Subject: [PATCH 03/23] sorting benchmark --- crates/hash-sorted-map/Cargo.toml | 3 + .../hash-sorted-map/benchmarks/performance.rs | 38 ++- crates/hash-sorted-map/src/hash_sorted_map.rs | 222 +++++++++++++----- 3 files changed, 197 insertions(+), 66 deletions(-) diff --git a/crates/hash-sorted-map/Cargo.toml b/crates/hash-sorted-map/Cargo.toml index 84ffa02..7f17b58 100644 --- a/crates/hash-sorted-map/Cargo.toml +++ b/crates/hash-sorted-map/Cargo.toml @@ -8,3 +8,6 @@ repository = "https://github.com/github/rust-gems" license = "MIT" keywords = ["hashmap", "sorted", "merge", "simd"] categories = ["algorithms", "data-structures"] + +[dependencies] +smallvec = "1" diff --git a/crates/hash-sorted-map/benchmarks/performance.rs b/crates/hash-sorted-map/benchmarks/performance.rs index ebd204d..fff9cbe 100644 --- a/crates/hash-sorted-map/benchmarks/performance.rs +++ b/crates/hash-sorted-map/benchmarks/performance.rs @@ -1,3 +1,5 @@ +use std::hash::BuildHasher; + use criterion::{criterion_group, criterion_main, BatchSize, Criterion}; use hash_sorted_map::HashSortedMap; use hash_sorted_map_benchmarks::{random_trigram_hashes, IdentityBuildHasher}; @@ -393,12 +395,46 @@ fn bench_iter(c: &mut Criterion) { group.finish(); } +fn bench_sort(c: &mut Criterion) { + let keys = random_trigram_hashes(100_000); + let hasher = IdentityBuildHasher::default(); + let mut group = c.benchmark_group("sort_100000_trigrams"); + + group.bench_function("Vec::sort_unstable", |b| { + b.iter(|| { + let mut vec: Vec<_> = keys + .iter() + .enumerate() + .map(|(i, &key)| (key, i)) + .collect(); + vec.sort_unstable_by_key(|&(key, _)| hasher.hash_one(key)); + vec + }); + }); + + group.bench_function("HashSortedMap sort_by_hash", |b| { + b.iter(|| { + let mut map = HashSortedMap::with_capacity_and_hasher( + keys.len(), + IdentityBuildHasher::default(), + ); + for (i, &key) in keys.iter().enumerate() { + map.insert(key, i); + } + map.sort_by_hash() + }); + }); + + group.finish(); +} + criterion_group!( benches, bench_insert, bench_reinsert, bench_grow, bench_count, - bench_iter + bench_iter, + bench_sort ); criterion_main!(benches); diff --git a/crates/hash-sorted-map/src/hash_sorted_map.rs b/crates/hash-sorted-map/src/hash_sorted_map.rs index 63cfe16..427194d 100644 --- a/crates/hash-sorted-map/src/hash_sorted_map.rs +++ b/crates/hash-sorted-map/src/hash_sorted_map.rs @@ -4,6 +4,8 @@ use std::collections::hash_map::RandomState; use std::hash::{BuildHasher, Hash}; use std::marker::PhantomData; +use smallvec::SmallVec; + use super::container::HashSortedContainer; use super::group::Group; use super::group_ops::{self, CTRL_EMPTY, GROUP_SIZE}; @@ -87,64 +89,79 @@ impl HashSortedMap { } impl HashSortedMap { - /// Sort all entries within each primary group chain by their hash value. + /// Sort all entries within each primary group chain by their hash value + /// and return the underlying container. /// /// After sorting, iteration visits entries in hash order within each /// primary group (and since primary groups are visited in group-index /// order, the overall iteration is in full hash order). /// - /// This is a one-time operation intended to be called before iteration - /// or serialization. After sorting, lookups via `get()` won't work - /// correctly because the preferred `slot_hint` position might now be empty - /// breaking an invariant. - pub fn sort_by_hash(&mut self) { + /// Consumes the map because lookups via `get()` won't work correctly + /// after sorting (the preferred `slot_hint` position might now be empty, + /// breaking an invariant). + /// + /// # Complexity + /// + /// Each of `n` elements hashes uniformly into one of `m` primary groups, + /// so chain lengths follow `X_i ~ Binomial(n, 1/m)` with `E[X_i] = n/m`. + /// With a quadratic sort per chain the total expected cost is: + /// + /// ```text + /// Σ E[X_i²] = m · (Var[X_i] + E[X_i]²) + /// = m · (n/m · (1 − 1/m) + n²/m²) + /// = n · (1 − 1/m) + n²/m + /// ``` + /// + /// Dividing by `n` gives the expected cost per element: `1 + n/m` (for + /// `m ≫ 1`). Since `n/m` is the average chain length, bounded by + /// `GROUP_SIZE / MAX_FILL ≈ 16`, the per-element cost stays constant. + pub fn sort_by_hash(mut self) -> HashSortedContainer { let num_primary = 1usize << self.container.n_bits; - let mut buf: Vec<(u64, K, V)> = Vec::new(); + let mut chain: SmallVec<[u32; 4]> = SmallVec::new(); + let mut hashes: SmallVec<[u64; 16]> = SmallVec::new(); + for primary_gi in 0..num_primary { - buf.clear(); - // Extract all entries from this primary group's chain. + chain.clear(); + hashes.clear(); + + // Collect group indices in this chain. let mut gi = primary_gi; loop { - let group = &mut self.container.groups[gi]; - let mut full_mask = group_ops::match_full(&group.ctrl); - while let Some(slot) = group_ops::next_match(&mut full_mask) { - let key = unsafe { group.keys[slot].assume_init_read() }; - let value = unsafe { group.values[slot].assume_init_read() }; - let hash = self.hash_builder.hash_one(&key); - buf.push((hash, key, value)); - group.ctrl[slot] = CTRL_EMPTY; - } - if group.overflow == NO_OVERFLOW { + chain.push(gi as u32); + let overflow = self.container.groups[gi].overflow; + if overflow == NO_OVERFLOW { break; } - gi = group.overflow as usize; + gi = overflow as usize; } - if buf.len() <= 1 { - // 0 or 1 entry — write back to slot 0 if present (already extracted). - if let Some((hash, key, value)) = buf.pop() { - let group = &mut self.container.groups[primary_gi]; - group.ctrl[0] = tag(hash); - group.keys[0] = MaybeUninit::new(key); - group.values[0] = MaybeUninit::new(value); + // All groups before the last are fully packed (overflow is only + // allocated when the previous group is full). Compute hashes for + // those directly. + for &cgi in &chain[..chain.len() - 1] { + let g = &self.container.groups[cgi as usize]; + for slot in 0..GROUP_SIZE { + let hash = self + .hash_builder + .hash_one(unsafe { g.keys[slot].assume_init_ref() }); + hashes.push(hash); } - continue; } - buf.sort_unstable_by_key(|&(hash, _, _)| hash); - // Write back in sorted order, filling slots linearly. - let mut gi = primary_gi; - let mut slot = 0; - for (hash, key, value) in buf.drain(..) { - if slot == GROUP_SIZE { - slot = 0; - gi = self.container.groups[gi].overflow as usize; + // The last group may have gaps — compact it to the front. + let last_gi = *chain.last().unwrap() as usize; + compact_last_group(&mut self.container.groups[last_gi], &self.hash_builder, &mut hashes); + let n = hashes.len(); + // Insertion sort by hash. + for i in 1..n { + let mut j = i; + while j > 0 && hashes[j - 1] > hashes[j] { + hashes.swap(j - 1, j); + swap_chain_slots(&mut self.container.groups, &chain, j - 1, j); + j -= 1; } - let group = &mut self.container.groups[gi]; - group.ctrl[slot] = tag(hash); - group.keys[slot] = MaybeUninit::new(key); - group.values[slot] = MaybeUninit::new(value); - slot += 1; } + } + self.container } pub fn insert(&mut self, key: K, value: V) -> Option { @@ -421,6 +438,70 @@ impl HashSortedMap { } } +// ── Chain-slot helpers for sort_by_hash ───────────────────────────────── + +/// Map a flat position (0..chain.len()*GROUP_SIZE) to a (group_index, slot). +#[inline] +fn chain_slot(chain: &[u32], pos: usize) -> (usize, usize) { + (chain[pos / GROUP_SIZE] as usize, pos % GROUP_SIZE) +} + +/// Compact the last group in a chain: move all occupied entries to slots +/// 0..n and clear the rest. Computes hashes for each occupied entry and +/// appends them to `hashes`. +fn compact_last_group( + group: &mut Group, + hash_builder: &S, + hashes: &mut SmallVec<[u64; 16]>, +) { + let mut write = 0usize; + let mut full_mask = group_ops::match_full(&group.ctrl); + while let Some(read) = group_ops::next_match(&mut full_mask) { + let hash = hash_builder.hash_one(unsafe { group.keys[read].assume_init_ref() }); + hashes.push(hash); + if read != write { + unsafe { + group.keys[write] = std::ptr::read(&group.keys[read]); + group.values[write] = std::ptr::read(&group.values[read]); + } + } + write += 1; + } + // Fix ctrl bytes: only the top bit matters (full vs empty). + for slot in 0..write { + group.ctrl[slot] = 0x80; + } + for slot in write..GROUP_SIZE { + group.ctrl[slot] = CTRL_EMPTY; + } +} + +/// Swap the ctrl byte, key, and value between two flat positions in a chain. +fn swap_chain_slots( + groups: &mut [Group], + chain: &[u32], + a: usize, + b: usize, +) { + let (gi_a, slot_a) = chain_slot(chain, a); + let (gi_b, slot_b) = chain_slot(chain, b); + if gi_a == gi_b { + let g = &mut groups[gi_a]; + g.keys.swap(slot_a, slot_b); + g.values.swap(slot_a, slot_b); + } else { + let (ga, gb) = if gi_a < gi_b { + let (left, right) = groups.split_at_mut(gi_b); + (&mut left[gi_a], &mut right[0]) + } else { + let (left, right) = groups.split_at_mut(gi_a); + (&mut right[0], &mut left[gi_b]) + }; + std::mem::swap(&mut ga.keys[slot_a], &mut gb.keys[slot_b]); + std::mem::swap(&mut ga.values[slot_a], &mut gb.values[slot_b]); + } +} + // ──────────────────────────────────────────────────────────────────────── // Entry API // ──────────────────────────────────────────────────────────────────────── @@ -842,18 +923,19 @@ mod tests { #[test] fn sort_by_hash_empty() { - let mut map: HashSortedMap = HashSortedMap::new(); - map.sort_by_hash(); // should not panic - assert_eq!(map.len(), 0); + let map: HashSortedMap = HashSortedMap::new(); + let container = map.sort_by_hash(); + assert_eq!(container.len, 0); } #[test] fn sort_by_hash_single() { let mut map = HashSortedMap::new(); map.insert(42u32, "hello"); - map.sort_by_hash(); - assert_eq!(map.get(&42), Some(&"hello")); - assert_eq!(map.len(), 1); + let container = map.sort_by_hash(); + assert_eq!(container.len, 1); + let entries: Vec<_> = container.into_iter().collect(); + assert_eq!(entries, vec![(42, "hello")]); } #[test] @@ -862,10 +944,12 @@ mod tests { for i in 0..200u32 { map.insert(i, i * 10); } - map.sort_by_hash(); - assert_eq!(map.len(), 200); + let container = map.sort_by_hash(); + assert_eq!(container.len, 200); + let mut entries: Vec<_> = container.into_iter().collect(); + entries.sort_by_key(|&(k, _)| k); for i in 0..200u32 { - assert_eq!(map.get(&i), Some(&(i * 10)), "missing key {i}"); + assert_eq!(entries[i as usize], (i, i * 10), "missing key {i}"); } } @@ -878,14 +962,14 @@ mod tests { for i in 0..500u32 { map.insert(i, i); } - map.sort_by_hash(); + let container = map.sort_by_hash(); // Iteration should now yield entries in hash order. let mut prev_hash = 0u64; let mut first = true; - for (&k, _) in &map { + for (&k, _) in &container { let h = hasher.hash_one(&k); if !first { - assert!(h >= prev_hash, "hash order violated: {prev_hash:#x} > {h:#x}"); + assert!(h > prev_hash, "hash order violated: {prev_hash:#x} > {h:#x}"); } prev_hash = h; first = false; @@ -899,27 +983,35 @@ mod tests { for i in 0..50u32 { map.insert(i, i); } - map.sort_by_hash(); - assert_eq!(map.len(), 50); + let container = map.sort_by_hash(); + assert_eq!(container.len, 50); + let mut entries: Vec<_> = container.into_iter().collect(); + entries.sort_by_key(|&(k, _)| k); for i in 0..50u32 { - assert_eq!(map.get(&i), Some(&i), "missing key {i}"); + assert_eq!(entries[i as usize], (i, i), "missing key {i}"); } } #[test] fn sort_by_hash_with_strings() { - let mut map = HashSortedMap::new(); + use std::collections::hash_map::RandomState; + + let hasher = RandomState::new(); + let mut map = HashSortedMap::with_hasher(hasher.clone()); for i in 0..100u32 { map.insert(format!("key-{i}"), format!("val-{i}")); } - map.sort_by_hash(); - assert_eq!(map.len(), 100); - for i in 0..100u32 { - assert_eq!( - map.get(&format!("key-{i}")), - Some(&format!("val-{i}")), - "missing key-{i}" - ); + let container = map.sort_by_hash(); + assert_eq!(container.len, 100); + let mut prev_hash = 0u64; + let mut first = true; + for (k, _) in &container { + let h = hasher.hash_one(k); + if !first { + assert!(h > prev_hash, "hash order violated: {prev_hash:#x} > {h:#x}"); + } + prev_hash = h; + first = false; } } } From 0efe8e0e769cedb9418445da07d6f479bb3b520a Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Thu, 7 May 2026 17:17:17 +0200 Subject: [PATCH 04/23] more benches --- crates/hash-sorted-map/benchmarks/Cargo.toml | 1 + .../hash-sorted-map/benchmarks/performance.rs | 154 +++++++++++++++++- crates/hash-sorted-map/src/hash_sorted_map.rs | 37 +++-- 3 files changed, 179 insertions(+), 13 deletions(-) diff --git a/crates/hash-sorted-map/benchmarks/Cargo.toml b/crates/hash-sorted-map/benchmarks/Cargo.toml index 9ee37dc..91019a4 100644 --- a/crates/hash-sorted-map/benchmarks/Cargo.toml +++ b/crates/hash-sorted-map/benchmarks/Cargo.toml @@ -21,3 +21,4 @@ ahash = "0.8" hashbrown = "0.15" foldhash = "0.1" fnv = "1" +itertools = "0.14" diff --git a/crates/hash-sorted-map/benchmarks/performance.rs b/crates/hash-sorted-map/benchmarks/performance.rs index fff9cbe..b6e24fd 100644 --- a/crates/hash-sorted-map/benchmarks/performance.rs +++ b/crates/hash-sorted-map/benchmarks/performance.rs @@ -2,7 +2,8 @@ use std::hash::BuildHasher; use criterion::{criterion_group, criterion_main, BatchSize, Criterion}; use hash_sorted_map::HashSortedMap; -use hash_sorted_map_benchmarks::{random_trigram_hashes, IdentityBuildHasher}; +use hash_sorted_map_benchmarks::{folded_multiply, random_trigram_hashes, IdentityBuildHasher}; +use rand::RngExt; fn trigrams() -> Vec { random_trigram_hashes(1000) @@ -428,6 +429,154 @@ fn bench_sort(c: &mut Criterion) { group.finish(); } +fn bench_merge_sort(c: &mut Criterion) { + const NUM_MAPS: usize = 100; + const KEYS_PER_MAP: usize = 100_000; + + // Pre-generate 100 key vectors with random u32 values scrambled via folded_multiply. + let maps_data: Vec> = (0..NUM_MAPS) + .map(|_| { + let mut rng = rand::rng(); + (0..KEYS_PER_MAP) + .map(|_| folded_multiply(rng.random_range(0..1_000_000u32) as u64, 0x243f6a8885a308d3) as u32) + .collect() + }) + .collect(); + + let hasher = IdentityBuildHasher::default(); + let mut group = c.benchmark_group("merge_100_maps_sorted"); + group.sample_size(10); + + // ── 1. HashSortedMap: merge all, then sort_by_hash ────────────── + group.bench_function("HashSortedMap merge + sort_by_hash", |b| { + b.iter(|| { + let mut map: HashSortedMap = + HashSortedMap::with_hasher(IdentityBuildHasher::default()); + for keys in &maps_data { + for &key in keys { + *map.entry(key).or_default() += 1u32; + } + } + map.sort_by_hash() + }); + }); + + // ── 2. K-way merge over pre-sorted vectors ────────────────────── + group.bench_function("k-way merge sorted vecs", |b| { + use itertools::Itertools; + + b.iter(|| { + // Phase 1: build per-map sorted (hash, key, count) vectors. + let sorted_vecs: Vec> = maps_data + .iter() + .map(|keys| { + let mut counts = std::collections::HashMap::::with_hasher(IdentityBuildHasher::default()); + for &key in keys { + *counts.entry(key).or_default() += 1; + } + let mut vec: Vec<(u64, u32, u32)> = counts + .into_iter() + .map(|(k, v)| (hasher.hash_one(k), k, v)) + .collect(); + vec.sort_unstable_by_key(|&(h, _, _)| h); + vec + }) + .collect(); + + // Phase 2: k-merge + group_by to aggregate counts. + let result: Vec<(u32, u32)> = sorted_vecs + .into_iter() + .map(|v| v.into_iter()) + .kmerge_by(|a, b| a.0 <= b.0) + .chunk_by(|&(_, key, _)| key) + .into_iter() + .map(|(key, group)| (key, group.map(|(_, _, c)| c).sum())) + .collect(); + result + }); + }); + + // ── 3. hashbrown HashMap merge, then sort into Vec ────────────── + group.bench_function("hashbrown merge + Vec sort", |b| { + b.iter(|| { + let mut map = + hashbrown::HashMap::::with_hasher(IdentityBuildHasher::default()); + for keys in &maps_data { + for &key in keys { + *map.entry(key).or_default() += 1; + } + } + let mut vec: Vec<(u32, u32)> = map.into_iter().collect(); + vec.sort_unstable_by_key(|&(key, _)| hasher.hash_one(key)); + vec + }); + }); + + // ── 4. hashbrown HashMap merge only (no sort) ─────────────────── + group.bench_function("hashbrown merge", |b| { + b.iter(|| { + let mut map = + hashbrown::HashMap::::with_hasher(IdentityBuildHasher::default()); + for keys in &maps_data { + for &key in keys { + *map.entry(key).or_default() += 1; + } + } + map + }); + }); + + // ── 5. HashSortedMap merge only (no sort) ─────────────────────── + group.bench_function("HashSortedMap merge", |b| { + b.iter(|| { + let mut map: HashSortedMap = + HashSortedMap::with_hasher(IdentityBuildHasher::default()); + for keys in &maps_data { + for &key in keys { + *map.entry(key).or_default() += 1u32; + } + } + map + }); + }); + + // ── 6. hashbrown presized merge only ──────────────────────────── + group.bench_function("hashbrown merge presized", |b| { + b.iter(|| { + let mut map = + hashbrown::HashMap::::with_capacity_and_hasher( + 1_000_000, + IdentityBuildHasher::default(), + ); + for keys in &maps_data { + for &key in keys { + *map.entry(key).or_default() += 1; + } + } + map + }); + }); + + // ── 7. HashSortedMap presized merge only ───────────────────────── + group.bench_function("HashSortedMap merge presized", |b| { + b.iter(|| { + let mut map: HashSortedMap = + HashSortedMap::with_capacity_and_hasher( + 1_000_000, + IdentityBuildHasher::default(), + ); + for keys in &maps_data { + for &key in keys { + *map.entry(key).or_default() += 1u32; + } + } + map + }); + }); + + group.finish(); +} + criterion_group!( benches, bench_insert, @@ -435,6 +584,7 @@ criterion_group!( bench_grow, bench_count, bench_iter, - bench_sort + bench_sort, + bench_merge_sort ); criterion_main!(benches); diff --git a/crates/hash-sorted-map/src/hash_sorted_map.rs b/crates/hash-sorted-map/src/hash_sorted_map.rs index 427194d..1175f49 100644 --- a/crates/hash-sorted-map/src/hash_sorted_map.rs +++ b/crates/hash-sorted-map/src/hash_sorted_map.rs @@ -660,12 +660,9 @@ impl<'a, K: Hash + Eq, V, S: BuildHasher> VacantEntry<'a, K, V, S> { /// Cold path: the chain was full, the table is at capacity, and we need to /// grow before inserting. Re-walks via the slow path after grow. /// -/// After `grow()` doubles `num_primary` (`n_bits += 1`), our key's new -/// primary group can have at most ~half the old chain's keys, so hitting -/// `NeedsOverflow` again would require `GROUP_SIZE` keys to all collide on -/// one extra bit of hash — essentially impossible for any reasonable hash. -/// (`insert_for_grow` relies on the same assumption to skip its own -/// capacity check.) +/// With clustered hash functions (e.g. identity hashing), the new primary +/// group may still be full after grow, so we handle `NeedsOverflow` by +/// allocating an overflow group. #[cold] #[inline(never)] fn insert_after_grow( @@ -675,9 +672,9 @@ fn insert_after_grow( value: V, ) -> &mut V { map.grow(); + let tag = tag(hash); match map.find_or_insertion_slot(hash, &key) { FindResult::Vacant(Insertion::Empty { group, slot }) => { - let tag = tag(hash); // SAFETY: `group` points into `map.container.groups` and is valid for `'a`. unsafe { let g = &mut *group; @@ -688,10 +685,28 @@ fn insert_after_grow( g.values[slot].assume_init_mut() } } - // After grow, the new primary group for `key` cannot be full (see - // function docs), and the key wasn't in the table before grow. - FindResult::Vacant(Insertion::NeedsOverflow { .. }) | FindResult::Found(_) => { - unreachable!("post-grow walk must hit an empty slot") + FindResult::Vacant(Insertion::NeedsOverflow { tail }) => { + // Primary group chain is full even after grow (possible with + // clustered identity hashes). Allocate an overflow group. + debug_assert!( + (map.container.num_groups as usize) < map.container.groups.len(), + "overflow pool exhausted right after grow" + ); + let new_gi = map.container.num_groups as usize; + map.container.num_groups += 1; + unsafe { + (*tail).overflow = new_gi as u32; + } + let slot = slot_hint(hash); + let group = &mut map.container.groups[new_gi]; + group.ctrl[slot] = tag; + group.keys[slot] = MaybeUninit::new(key); + group.values[slot] = MaybeUninit::new(value); + map.container.len += 1; + unsafe { group.values[slot].assume_init_mut() } + } + FindResult::Found(_) => { + unreachable!("key was not in the table before grow") } } } From 34dad1a0171de71b9a4a66904ad13c239c6988bf Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Thu, 7 May 2026 17:24:22 +0200 Subject: [PATCH 05/23] Update hash_sorted_map.rs --- crates/hash-sorted-map/src/hash_sorted_map.rs | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/crates/hash-sorted-map/src/hash_sorted_map.rs b/crates/hash-sorted-map/src/hash_sorted_map.rs index 1175f49..dcbad38 100644 --- a/crates/hash-sorted-map/src/hash_sorted_map.rs +++ b/crates/hash-sorted-map/src/hash_sorted_map.rs @@ -322,27 +322,13 @@ impl HashSortedMap { /// of `&mut self` until any reallocation (`grow`). fn find_or_insertion_slot(&mut self, hash: u64, key: &K) -> FindResult { let tag = tag(hash); - let hint = slot_hint(hash); let mut gi = self.container.group_index(hash); loop { let group = &mut self.container.groups[gi]; - // Fast path: preferred slot. - let c = group.ctrl[hint]; - if c == CTRL_EMPTY { - return FindResult::Vacant(Insertion::Empty { - group: group as *mut _, - slot: hint, - }); - } - if c == tag && unsafe { group.keys[hint].assume_init_ref() } == key { - return FindResult::Found(group.values[hint].as_mut_ptr()); - } - - // Slow path: SIMD scan group for tag match. + // SIMD scan group for tag match. let mut tag_mask = group_ops::match_tag(&group.ctrl, tag); - tag_mask = group_ops::clear_slot(tag_mask, hint); while let Some(i) = group_ops::next_match(&mut tag_mask) { if unsafe { group.keys[i].assume_init_ref() } == key { return FindResult::Found(group.values[i].as_mut_ptr()); From 170870e0e8bc493273906e5d77c9a4acc3c455c3 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Thu, 7 May 2026 19:05:24 +0200 Subject: [PATCH 06/23] remove slots mostly and update optimizations file --- crates/hash-sorted-map/OPTIMIZATIONS.md | 118 +++++++++++++++--- crates/hash-sorted-map/src/hash_sorted_map.rs | 87 ++----------- 2 files changed, 112 insertions(+), 93 deletions(-) diff --git a/crates/hash-sorted-map/OPTIMIZATIONS.md b/crates/hash-sorted-map/OPTIMIZATIONS.md index 0b04520..bc1eb52 100644 --- a/crates/hash-sorted-map/OPTIMIZATIONS.md +++ b/crates/hash-sorted-map/OPTIMIZATIONS.md @@ -4,8 +4,8 @@ `HashSortedMap` is a Swiss-table-inspired hash map that uses **overflow chaining** (instead of open addressing), **SIMD group scanning** (NEON/SSE2), -a **slot-hint fast path**, and an **optimized growth strategy**. It is generic -over key type, value type, and hash builder. +and an **optimized growth strategy**. It is generic over key type, value type, +and hash builder. This document analyzes the design trade-offs versus [hashbrown](https://github.com/rust-lang/hashbrown) and records the @@ -38,7 +38,6 @@ experimental results that guided the current design. │ • Overflow chaining (linked groups) │ │ • 8-byte groups with NEON/SSE2/scalar SIMD scan │ │ • EMPTY / FULL tag states only (insertion-only, no deletion) │ -│ • Slot-hint fast path │ └──────────────────────────────────────────────────────────────────┘ ``` @@ -106,17 +105,32 @@ the overflow path. SIMD version** by pessimizing NEON code generation. Removed from the SIMD implementation, kept in the scalar version. -### 7. Slot Hint Fast Path (Unique to HashSortedMap) +### 7. Slot Hint Fast Path ⚠️ Removed from Lookup Paths -HashSortedMap checks a preferred slot before scanning the group: +Originally, HashSortedMap checked a preferred slot before scanning the group: ```rust let hint = slot_hint(hash); // 3 bits from hash → slot index if ctrl[hint] == EMPTY { /* direct insert */ } if ctrl[hint] == tag && keys[hint] == key { /* direct hit */ } ``` -hashbrown does **not** have this optimization — it always does a full SIMD -group scan. The reason why the performance is different is probably due to the different overflow strategies and the different load factors. +**Experimental finding**: This scalar check **hurts performance** on random +workloads. The branch predictor cannot help because random keys map to random +slots, making the hint check a 50/50 branch that pollutes the branch +predictor. SIMD-only scanning (match_tag + match_empty) is uniformly fast +regardless of key distribution. + +**Results of removing slot_hint from different paths:** +- `find_or_insertion_slot` (entry API): **−25% latency** on merge benchmark +- `get_hashed`: **−4.4%** improvement (SIMD scan is faster than branch+scalar) +- `insert_hashed`: **+7%** regression on presized insert (the hint genuinely + helps when inserting into a mostly-empty group), but accepted for code + simplicity since the merge workload matters more + +**Current state**: slot_hint is **only** used in `insert_for_grow()`, where +the map is guaranteed sparse after a resize (groups are mostly empty, so the +hint slot is very likely free). For all other paths, SIMD-only scanning is +used. ### 8. Overflow Reserve Sizing ✅ Validated @@ -159,13 +173,85 @@ entropy in both halves. Also changed trigram generation to use ## Summary of Impact -| Change | Effect on insert time | -|----------------------------|------------------------------| -| Capacity sizing fix | **−50%** (biggest win) | -| Optimized growth path | **−10%** on growth scenarios | -| SIMD group scanning | **−5%** | -| Branch hints (scalar only) | **−2–6%** | -| IdentityHasher fix | Enabled fair comparison | +| Change | Effect | +|---------------------------------|-------------------------------------| +| Capacity sizing fix | **−50%** insert time (biggest win) | +| Optimized growth path | **2× faster** growth than hashbrown | +| SIMD group scanning | **−5%** insert time | +| Slot hint removal (entry/get) | **−25%** merge latency | +| Branch hints (scalar only) | **−2–6%** | +| IdentityHasher fix | Enabled fair comparison | + +--- -The current HashSortedMap **matches hashbrown+FxHash** on pre-sized inserts, -**beats all hashbrown variants** on overwrites, and has **2× faster growth**. +## Benchmark Results (Apple M-series, aarch64 NEON) + +### Insert (1000 trigrams, pre-sized) + +| Implementation | Time (µs) | vs hashbrown | +|----------------------|-----------|--------------| +| FoldHashMap | 2.44 | −11% | +| FxHashMap | 2.61 | −5% | +| hashbrown+Identity | 2.63 | baseline | +| hashbrown::HashMap | 2.74 | +4% | +| std::HashMap+FNV | 3.18 | +21% | +| AHashMap | 3.38 | +29% | +| **HashSortedMap** | **3.46** | **+32%** | +| std::HashMap | 8.65 | +229% | + +### Reinsert (1000 trigrams, all keys exist) + +| Implementation | Time (µs) | +|----------------------|-----------| +| hashbrown+Identity | 2.50 | +| **HashSortedMap** | **2.70** | + +### Growth (128 → 1000 trigrams, 3 resize rounds) + +| Implementation | Time (µs) | +|----------------------|-----------| +| **HashSortedMap** | **5.35** | +| hashbrown+Identity | 10.12 | + +### Count (4000 trigrams, mixed insert/update) + +| Implementation | Time (µs) | +|----------------------------------|-----------| +| hashbrown+Identity entry() | 4.89 | +| **HashSortedMap entry().or_default()** | **5.44** | +| **HashSortedMap get_or_default** | **5.48** | + +### Iteration (1000 trigrams) + +| Implementation | Time (ns) | +|-------------------------------|-----------| +| **HashSortedMap iter()** | **794** | +| **HashSortedMap into_iter()** | **998** | +| hashbrown+Identity iter() | 1,067 | +| hashbrown+Identity into_iter()| 1,060 | + +### Sort (100K trigrams) + +| Implementation | Time (µs) | +|-----------------------------|-----------| +| **HashSortedMap sort_by_hash** | **706** | +| Vec::sort_unstable | 984 | + +### Merge (100 maps × 100K keys each → sorted output) + +| Implementation | Time (ms) | vs HSM merge+sort | +|-----------------------------------|-----------|--------------------| +| hashbrown merge presized | 30.4 | −46% | +| **HashSortedMap merge presized** | **37.3** | **−33%** | +| **HashSortedMap merge (no sort)** | **44.0** | **−21%** | +| hashbrown merge | 45.4 | −19% | +| **HashSortedMap merge + sort** | **55.9** | **baseline** | +| hashbrown merge + Vec sort | 58.7 | +5% | +| k-way merge sorted vecs | 445 | +696% | + +**Key takeaways:** +- HashSortedMap has **2× faster growth** than hashbrown +- **25% faster iteration** than hashbrown (dense group layout) +- **sort_by_hash is 28% faster** than Vec::sort_unstable (data is partially sorted by group) +- **merge + sort is 5% faster** than hashbrown merge + Vec sort (the primary use case) +- Pre-sized insert is 32% slower than hashbrown (trade-off for sort/merge efficiency) diff --git a/crates/hash-sorted-map/src/hash_sorted_map.rs b/crates/hash-sorted-map/src/hash_sorted_map.rs index dcbad38..535ae6f 100644 --- a/crates/hash-sorted-map/src/hash_sorted_map.rs +++ b/crates/hash-sorted-map/src/hash_sorted_map.rs @@ -215,26 +215,11 @@ impl HashSortedMap { fn insert_hashed(&mut self, hash: u64, key: K, value: V) -> Option { let tag = tag(hash); - let hint = slot_hint(hash); let mut gi = self.container.group_index(hash); loop { let group = &mut self.container.groups[gi]; - // Fast path: check preferred slot. - let c = group.ctrl[hint]; - if c == CTRL_EMPTY { - group.ctrl[hint] = tag; - group.keys[hint] = MaybeUninit::new(key); - group.values[hint] = MaybeUninit::new(value); - self.container.len += 1; - return None; - } - if c == tag && unsafe { group.keys[hint].assume_init_ref() } == &key { - let old = std::mem::replace(unsafe { group.values[hint].assume_init_mut() }, value); - return Some(old); - } - // Slow path: SIMD scan group for tag match. + // SIMD scan group for tag match. let mut tag_mask = group_ops::match_tag(&group.ctrl, tag); - tag_mask = group_ops::clear_slot(tag_mask, hint); while let Some(i) = group_ops::next_match(&mut tag_mask) { if unsafe { group.keys[i].assume_init_ref() } == &key { let old = @@ -267,9 +252,9 @@ impl HashSortedMap { self.container.num_groups += 1; self.container.groups[gi].overflow = new_gi as u32; let group = &mut self.container.groups[new_gi]; - group.ctrl[hint] = tag; - group.keys[hint] = MaybeUninit::new(key); - group.values[hint] = MaybeUninit::new(value); + group.ctrl[0] = tag; + group.keys[0] = MaybeUninit::new(key); + group.values[0] = MaybeUninit::new(value); self.container.len += 1; return None; } @@ -282,31 +267,20 @@ impl HashSortedMap { Q: Eq + ?Sized, { let tag = tag(hash); - let hint = slot_hint(hash); let mut gi = self.container.group_index(hash); loop { let group = &self.container.groups[gi]; - - // Fast path: preferred slot. - let c = group.ctrl[hint]; - if c == tag && unsafe { group.keys[hint].assume_init_ref() }.borrow() == key { - return Some(unsafe { group.values[hint].assume_init_ref() }); - } - - // Slow path: SIMD scan group. + // SIMD scan group for tag match. let mut tag_mask = group_ops::match_tag(&group.ctrl, tag); - tag_mask = group_ops::clear_slot(tag_mask, hint); while let Some(i) = group_ops::next_match(&mut tag_mask) { if unsafe { group.keys[i].assume_init_ref() }.borrow() == key { return Some(unsafe { group.values[i].assume_init_ref() }); } } - if group_ops::match_empty(&group.ctrl) != 0 { return None; } - if group.overflow == NO_OVERFLOW { return None; } @@ -334,7 +308,6 @@ impl HashSortedMap { return FindResult::Found(group.values[i].as_mut_ptr()); } } - // Check for empty slot in this group. let empty_mask = group_ops::match_empty(&group.ctrl); if empty_mask != 0 { @@ -344,7 +317,6 @@ impl HashSortedMap { slot: i, }); } - // Group full — follow or report end of chain. if group.overflow == NO_OVERFLOW { return FindResult::Vacant(Insertion::NeedsOverflow { @@ -626,7 +598,7 @@ impl<'a, K: Hash + Eq, V, S: BuildHasher> VacantEntry<'a, K, V, S> { // `entry()` and now (we hold the only `&mut self`). (*tail).overflow = new_gi as u32; } - (new_group, slot_hint(hash)) + (new_group, 0) } }; @@ -644,57 +616,18 @@ impl<'a, K: Hash + Eq, V, S: BuildHasher> VacantEntry<'a, K, V, S> { } /// Cold path: the chain was full, the table is at capacity, and we need to -/// grow before inserting. Re-walks via the slow path after grow. -/// -/// With clustered hash functions (e.g. identity hashing), the new primary -/// group may still be full after grow, so we handle `NeedsOverflow` by -/// allocating an overflow group. +/// grow before inserting. Grows the map, then re-walks via `entry()` to find +/// the new insertion slot. #[cold] #[inline(never)] fn insert_after_grow( map: &mut HashSortedMap, - hash: u64, + _hash: u64, key: K, value: V, ) -> &mut V { map.grow(); - let tag = tag(hash); - match map.find_or_insertion_slot(hash, &key) { - FindResult::Vacant(Insertion::Empty { group, slot }) => { - // SAFETY: `group` points into `map.container.groups` and is valid for `'a`. - unsafe { - let g = &mut *group; - g.ctrl[slot] = tag; - g.keys[slot] = MaybeUninit::new(key); - g.values[slot] = MaybeUninit::new(value); - map.container.len += 1; - g.values[slot].assume_init_mut() - } - } - FindResult::Vacant(Insertion::NeedsOverflow { tail }) => { - // Primary group chain is full even after grow (possible with - // clustered identity hashes). Allocate an overflow group. - debug_assert!( - (map.container.num_groups as usize) < map.container.groups.len(), - "overflow pool exhausted right after grow" - ); - let new_gi = map.container.num_groups as usize; - map.container.num_groups += 1; - unsafe { - (*tail).overflow = new_gi as u32; - } - let slot = slot_hint(hash); - let group = &mut map.container.groups[new_gi]; - group.ctrl[slot] = tag; - group.keys[slot] = MaybeUninit::new(key); - group.values[slot] = MaybeUninit::new(value); - map.container.len += 1; - unsafe { group.values[slot].assume_init_mut() } - } - FindResult::Found(_) => { - unreachable!("key was not in the table before grow") - } - } + map.entry(key).or_insert(value) } // No custom Drop needed for HashSortedMap — dropping `container` handles entries. From e73f6420e4825f0aefd0b496cf8f708b9a04e58b Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Thu, 7 May 2026 19:10:06 +0200 Subject: [PATCH 07/23] Update performance.rs --- .../hash-sorted-map/benchmarks/performance.rs | 73 +++++++++++-------- 1 file changed, 42 insertions(+), 31 deletions(-) diff --git a/crates/hash-sorted-map/benchmarks/performance.rs b/crates/hash-sorted-map/benchmarks/performance.rs index b6e24fd..bd837d1 100644 --- a/crates/hash-sorted-map/benchmarks/performance.rs +++ b/crates/hash-sorted-map/benchmarks/performance.rs @@ -430,6 +430,8 @@ fn bench_sort(c: &mut Criterion) { } fn bench_merge_sort(c: &mut Criterion) { + use hash_sorted_map::HashSortedContainer; + const NUM_MAPS: usize = 100; const KEYS_PER_MAP: usize = 100_000; @@ -443,40 +445,49 @@ fn bench_merge_sort(c: &mut Criterion) { }) .collect(); + // Pre-build sorted containers from the input data. + let sorted_containers: Vec> = maps_data + .iter() + .map(|keys| { + let mut map: HashSortedMap = + HashSortedMap::with_hasher(IdentityBuildHasher::default()); + for &key in keys { + *map.entry(key).or_default() += 1u32; + } + map.sort_by_hash() + }) + .collect(); + let hasher = IdentityBuildHasher::default(); let mut group = c.benchmark_group("merge_100_maps_sorted"); group.sample_size(10); - // ── 1. HashSortedMap: merge all, then sort_by_hash ────────────── + // ── 1. HashSortedMap: merge sorted containers, then sort_by_hash ─ group.bench_function("HashSortedMap merge + sort_by_hash", |b| { b.iter(|| { let mut map: HashSortedMap = HashSortedMap::with_hasher(IdentityBuildHasher::default()); - for keys in &maps_data { - for &key in keys { - *map.entry(key).or_default() += 1u32; + for container in &sorted_containers { + for (&key, &value) in container { + *map.entry(key).or_default() += value; } } map.sort_by_hash() }); }); - // ── 2. K-way merge over pre-sorted vectors ────────────────────── - group.bench_function("k-way merge sorted vecs", |b| { + // ── 2. K-way merge over pre-sorted containers ──────────────────── + group.bench_function("k-way merge sorted containers", |b| { use itertools::Itertools; b.iter(|| { - // Phase 1: build per-map sorted (hash, key, count) vectors. - let sorted_vecs: Vec> = maps_data + // Phase 1: build per-container sorted (hash, key, count) vectors. + let sorted_vecs: Vec> = sorted_containers .iter() - .map(|keys| { - let mut counts = std::collections::HashMap::::with_hasher(IdentityBuildHasher::default()); - for &key in keys { - *counts.entry(key).or_default() += 1; - } - let mut vec: Vec<(u64, u32, u32)> = counts - .into_iter() - .map(|(k, v)| (hasher.hash_one(k), k, v)) + .map(|container| { + let mut vec: Vec<(u64, u32, u32)> = container + .iter() + .map(|(&k, &v)| (hasher.hash_one(k), k, v)) .collect(); vec.sort_unstable_by_key(|&(h, _, _)| h); vec @@ -501,9 +512,9 @@ fn bench_merge_sort(c: &mut Criterion) { b.iter(|| { let mut map = hashbrown::HashMap::::with_hasher(IdentityBuildHasher::default()); - for keys in &maps_data { - for &key in keys { - *map.entry(key).or_default() += 1; + for container in &sorted_containers { + for (&key, &value) in container { + *map.entry(key).or_default() += value; } } let mut vec: Vec<(u32, u32)> = map.into_iter().collect(); @@ -517,9 +528,9 @@ fn bench_merge_sort(c: &mut Criterion) { b.iter(|| { let mut map = hashbrown::HashMap::::with_hasher(IdentityBuildHasher::default()); - for keys in &maps_data { - for &key in keys { - *map.entry(key).or_default() += 1; + for container in &sorted_containers { + for (&key, &value) in container { + *map.entry(key).or_default() += value; } } map @@ -531,9 +542,9 @@ fn bench_merge_sort(c: &mut Criterion) { b.iter(|| { let mut map: HashSortedMap = HashSortedMap::with_hasher(IdentityBuildHasher::default()); - for keys in &maps_data { - for &key in keys { - *map.entry(key).or_default() += 1u32; + for container in &sorted_containers { + for (&key, &value) in container { + *map.entry(key).or_default() += value; } } map @@ -548,9 +559,9 @@ fn bench_merge_sort(c: &mut Criterion) { 1_000_000, IdentityBuildHasher::default(), ); - for keys in &maps_data { - for &key in keys { - *map.entry(key).or_default() += 1; + for container in &sorted_containers { + for (&key, &value) in container { + *map.entry(key).or_default() += value; } } map @@ -565,9 +576,9 @@ fn bench_merge_sort(c: &mut Criterion) { 1_000_000, IdentityBuildHasher::default(), ); - for keys in &maps_data { - for &key in keys { - *map.entry(key).or_default() += 1u32; + for container in &sorted_containers { + for (&key, &value) in container { + *map.entry(key).or_default() += value; } } map From b02de1688042c4604ad8c2d66399495ebb6c2f7d Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Thu, 7 May 2026 19:15:01 +0200 Subject: [PATCH 08/23] unsorted --- crates/hash-sorted-map/benchmarks/performance.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/crates/hash-sorted-map/benchmarks/performance.rs b/crates/hash-sorted-map/benchmarks/performance.rs index bd837d1..cb930e5 100644 --- a/crates/hash-sorted-map/benchmarks/performance.rs +++ b/crates/hash-sorted-map/benchmarks/performance.rs @@ -430,8 +430,6 @@ fn bench_sort(c: &mut Criterion) { } fn bench_merge_sort(c: &mut Criterion) { - use hash_sorted_map::HashSortedContainer; - const NUM_MAPS: usize = 100; const KEYS_PER_MAP: usize = 100_000; @@ -446,7 +444,7 @@ fn bench_merge_sort(c: &mut Criterion) { .collect(); // Pre-build sorted containers from the input data. - let sorted_containers: Vec> = maps_data + let sorted_containers: Vec<_> = maps_data .iter() .map(|keys| { let mut map: HashSortedMap = @@ -454,7 +452,7 @@ fn bench_merge_sort(c: &mut Criterion) { for &key in keys { *map.entry(key).or_default() += 1u32; } - map.sort_by_hash() + map }) .collect(); From 2f2c7a3542dda8f7b1a3275273538a49de98fe97 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Thu, 7 May 2026 19:20:20 +0200 Subject: [PATCH 09/23] cleanup code a bit --- .../hash-sorted-map/benchmarks/performance.rs | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/crates/hash-sorted-map/benchmarks/performance.rs b/crates/hash-sorted-map/benchmarks/performance.rs index cb930e5..c773daf 100644 --- a/crates/hash-sorted-map/benchmarks/performance.rs +++ b/crates/hash-sorted-map/benchmarks/performance.rs @@ -444,12 +444,12 @@ fn bench_merge_sort(c: &mut Criterion) { .collect(); // Pre-build sorted containers from the input data. - let sorted_containers: Vec<_> = maps_data - .iter() + let hash_maps: Vec<_> = maps_data + .into_iter() .map(|keys| { - let mut map: HashSortedMap = + let mut map = HashSortedMap::with_hasher(IdentityBuildHasher::default()); - for &key in keys { + for key in keys { *map.entry(key).or_default() += 1u32; } map @@ -465,7 +465,7 @@ fn bench_merge_sort(c: &mut Criterion) { b.iter(|| { let mut map: HashSortedMap = HashSortedMap::with_hasher(IdentityBuildHasher::default()); - for container in &sorted_containers { + for container in &hash_maps { for (&key, &value) in container { *map.entry(key).or_default() += value; } @@ -480,7 +480,7 @@ fn bench_merge_sort(c: &mut Criterion) { b.iter(|| { // Phase 1: build per-container sorted (hash, key, count) vectors. - let sorted_vecs: Vec> = sorted_containers + let sorted_vecs: Vec> = hash_maps .iter() .map(|container| { let mut vec: Vec<(u64, u32, u32)> = container @@ -510,7 +510,7 @@ fn bench_merge_sort(c: &mut Criterion) { b.iter(|| { let mut map = hashbrown::HashMap::::with_hasher(IdentityBuildHasher::default()); - for container in &sorted_containers { + for container in &hash_maps { for (&key, &value) in container { *map.entry(key).or_default() += value; } @@ -526,7 +526,7 @@ fn bench_merge_sort(c: &mut Criterion) { b.iter(|| { let mut map = hashbrown::HashMap::::with_hasher(IdentityBuildHasher::default()); - for container in &sorted_containers { + for container in &hash_maps { for (&key, &value) in container { *map.entry(key).or_default() += value; } @@ -540,7 +540,7 @@ fn bench_merge_sort(c: &mut Criterion) { b.iter(|| { let mut map: HashSortedMap = HashSortedMap::with_hasher(IdentityBuildHasher::default()); - for container in &sorted_containers { + for container in &hash_maps { for (&key, &value) in container { *map.entry(key).or_default() += value; } @@ -557,7 +557,7 @@ fn bench_merge_sort(c: &mut Criterion) { 1_000_000, IdentityBuildHasher::default(), ); - for container in &sorted_containers { + for container in &hash_maps { for (&key, &value) in container { *map.entry(key).or_default() += value; } @@ -574,7 +574,7 @@ fn bench_merge_sort(c: &mut Criterion) { 1_000_000, IdentityBuildHasher::default(), ); - for container in &sorted_containers { + for container in &hash_maps { for (&key, &value) in container { *map.entry(key).or_default() += value; } From c12cac892ba76f82067a0f06cb8f3de6287305d2 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Fri, 8 May 2026 10:14:50 +0200 Subject: [PATCH 10/23] without slot_hint, we sort keeps the structure intact --- .../hash-sorted-map/benchmarks/performance.rs | 6 +- crates/hash-sorted-map/src/container.rs | 58 ------ crates/hash-sorted-map/src/hash_sorted_map.rs | 183 ++++++++++-------- crates/hash-sorted-map/src/iter.rs | 97 +++------- crates/hash-sorted-map/src/lib.rs | 2 - 5 files changed, 133 insertions(+), 213 deletions(-) delete mode 100644 crates/hash-sorted-map/src/container.rs diff --git a/crates/hash-sorted-map/benchmarks/performance.rs b/crates/hash-sorted-map/benchmarks/performance.rs index c773daf..6695ee3 100644 --- a/crates/hash-sorted-map/benchmarks/performance.rs +++ b/crates/hash-sorted-map/benchmarks/performance.rs @@ -422,7 +422,8 @@ fn bench_sort(c: &mut Criterion) { for (i, &key) in keys.iter().enumerate() { map.insert(key, i); } - map.sort_by_hash() + map.sort_by_hash(); + map }); }); @@ -470,7 +471,8 @@ fn bench_merge_sort(c: &mut Criterion) { *map.entry(key).or_default() += value; } } - map.sort_by_hash() + map.sort_by_hash(); + map }); }); diff --git a/crates/hash-sorted-map/src/container.rs b/crates/hash-sorted-map/src/container.rs deleted file mode 100644 index 0be6709..0000000 --- a/crates/hash-sorted-map/src/container.rs +++ /dev/null @@ -1,58 +0,0 @@ -use super::group::Group; -use super::group_ops::{CTRL_EMPTY, GROUP_SIZE}; - -/// Core storage for a hash-sorted map. Owns the group array and supports -/// iteration and drop. Does not contain a hasher — use [`HashSortedMap`] -/// for insertion and lookup. -pub struct HashSortedContainer { - pub(crate) groups: Box<[Group]>, - pub(crate) num_groups: u32, - pub(crate) n_bits: u32, - pub(crate) len: usize, -} - -impl HashSortedContainer { - pub(crate) fn alloc_groups(n_bits: u32) -> (Box<[Group]>, u32) { - let num_primary = 1usize << n_bits; - let total = num_primary + num_primary / 8 + 1; - let mut groups: Vec> = Vec::with_capacity(total); - groups.resize_with(total, Group::new); - (groups.into_boxed_slice(), num_primary as u32) - } - - pub(crate) fn new(n_bits: u32) -> Self { - let (groups, num_primary) = Self::alloc_groups(n_bits); - Self { - groups, - num_groups: num_primary, - n_bits, - len: 0, - } - } - - #[inline] - pub(crate) fn group_index(&self, hash: u64) -> usize { - (hash >> (64 - self.n_bits)) as usize - } - - pub fn len(&self) -> usize { - self.len - } - - pub fn is_empty(&self) -> bool { - self.len == 0 - } -} - -impl Drop for HashSortedContainer { - fn drop(&mut self) { - for group in &mut self.groups[..self.num_groups as usize] { - for i in 0..GROUP_SIZE { - if group.ctrl[i] != CTRL_EMPTY { - unsafe { group.keys[i].assume_init_drop() }; - unsafe { group.values[i].assume_init_drop() }; - } - } - } - } -} diff --git a/crates/hash-sorted-map/src/hash_sorted_map.rs b/crates/hash-sorted-map/src/hash_sorted_map.rs index 535ae6f..d4b9ead 100644 --- a/crates/hash-sorted-map/src/hash_sorted_map.rs +++ b/crates/hash-sorted-map/src/hash_sorted_map.rs @@ -6,7 +6,6 @@ use std::marker::PhantomData; use smallvec::SmallVec; -use super::container::HashSortedContainer; use super::group::Group; use super::group_ops::{self, CTRL_EMPTY, GROUP_SIZE}; @@ -25,7 +24,7 @@ fn slot_hint(hash: u64) -> usize { } // ──────────────────────────────────────────────────────────────────────── -// HashSortedMap — wraps a container with a hash builder +// HashSortedMap // ──────────────────────────────────────────────────────────────────────── /// Insertion-only hash map with SIMD group scanning. @@ -33,7 +32,10 @@ fn slot_hint(hash: u64) -> usize { /// Uses NEON on aarch64, SSE2 on x86_64, scalar fallback elsewhere. /// Generic over key type `K`, value type `V`, and hash builder `S`. pub struct HashSortedMap { - pub(crate) container: HashSortedContainer, + pub(crate) groups: Box<[Group]>, + pub(crate) num_groups: u32, + pub(crate) n_bits: u32, + pub(crate) len: usize, hash_builder: S, } @@ -62,44 +64,45 @@ impl HashSortedMap { let adjusted = (capacity as f64 / group_ops::MAX_FILL).ceil() as usize; let min_groups = (adjusted.div_ceil(GROUP_SIZE)).max(1).next_power_of_two(); let n_bits = min_groups.trailing_zeros().max(1); + let (groups, num_groups) = Self::alloc_groups(n_bits); Self { - container: HashSortedContainer::new(n_bits), + groups, + num_groups, + n_bits, + len: 0, hash_builder, } } pub fn len(&self) -> usize { - self.container.len + self.len } pub fn is_empty(&self) -> bool { - self.container.len == 0 + self.len == 0 } - /// Consume the map, returning the underlying container and hash builder. - pub fn into_parts(self) -> (HashSortedContainer, S) { - // Prevent Drop from running on self — we're moving fields out. - let this = std::mem::ManuallyDrop::new(self); - unsafe { - let container = std::ptr::read(&this.container); - let hash_builder = std::ptr::read(&this.hash_builder); - (container, hash_builder) - } + fn alloc_groups(n_bits: u32) -> (Box<[Group]>, u32) { + let num_primary = 1usize << n_bits; + let total = num_primary + num_primary / 8 + 1; + let mut groups: Vec> = Vec::with_capacity(total); + groups.resize_with(total, Group::new); + (groups.into_boxed_slice(), num_primary as u32) + } + + #[inline] + pub(crate) fn group_index(&self, hash: u64) -> usize { + (hash >> (64 - self.n_bits)) as usize } } impl HashSortedMap { - /// Sort all entries within each primary group chain by their hash value - /// and return the underlying container. + /// Sort all entries within each primary group chain by their hash value. /// /// After sorting, iteration visits entries in hash order within each /// primary group (and since primary groups are visited in group-index /// order, the overall iteration is in full hash order). /// - /// Consumes the map because lookups via `get()` won't work correctly - /// after sorting (the preferred `slot_hint` position might now be empty, - /// breaking an invariant). - /// /// # Complexity /// /// Each of `n` elements hashes uniformly into one of `m` primary groups, @@ -115,8 +118,8 @@ impl HashSortedMap { /// Dividing by `n` gives the expected cost per element: `1 + n/m` (for /// `m ≫ 1`). Since `n/m` is the average chain length, bounded by /// `GROUP_SIZE / MAX_FILL ≈ 16`, the per-element cost stays constant. - pub fn sort_by_hash(mut self) -> HashSortedContainer { - let num_primary = 1usize << self.container.n_bits; + pub fn sort_by_hash(&mut self) { + let num_primary = 1usize << self.n_bits; let mut chain: SmallVec<[u32; 4]> = SmallVec::new(); let mut hashes: SmallVec<[u64; 16]> = SmallVec::new(); @@ -128,7 +131,7 @@ impl HashSortedMap { let mut gi = primary_gi; loop { chain.push(gi as u32); - let overflow = self.container.groups[gi].overflow; + let overflow = self.groups[gi].overflow; if overflow == NO_OVERFLOW { break; } @@ -138,7 +141,7 @@ impl HashSortedMap { // allocated when the previous group is full). Compute hashes for // those directly. for &cgi in &chain[..chain.len() - 1] { - let g = &self.container.groups[cgi as usize]; + let g = &self.groups[cgi as usize]; for slot in 0..GROUP_SIZE { let hash = self .hash_builder @@ -148,20 +151,19 @@ impl HashSortedMap { } // The last group may have gaps — compact it to the front. let last_gi = *chain.last().unwrap() as usize; - compact_last_group(&mut self.container.groups[last_gi], &self.hash_builder, &mut hashes); + compact_last_group(&mut self.groups[last_gi], &self.hash_builder, &mut hashes); let n = hashes.len(); // Insertion sort by hash. for i in 1..n { let mut j = i; while j > 0 && hashes[j - 1] > hashes[j] { hashes.swap(j - 1, j); - swap_chain_slots(&mut self.container.groups, &chain, j - 1, j); + swap_chain_slots(&mut self.groups, &chain, j - 1, j); j -= 1; } } } - self.container } pub fn insert(&mut self, key: K, value: V) -> Option { @@ -215,9 +217,9 @@ impl HashSortedMap { fn insert_hashed(&mut self, hash: u64, key: K, value: V) -> Option { let tag = tag(hash); - let mut gi = self.container.group_index(hash); + let mut gi = self.group_index(hash); loop { - let group = &mut self.container.groups[gi]; + let group = &mut self.groups[gi]; // SIMD scan group for tag match. let mut tag_mask = group_ops::match_tag(&group.ctrl, tag); while let Some(i) = group_ops::next_match(&mut tag_mask) { @@ -234,7 +236,7 @@ impl HashSortedMap { group.ctrl[i] = tag; group.keys[i] = MaybeUninit::new(key); group.values[i] = MaybeUninit::new(value); - self.container.len += 1; + self.len += 1; return None; } // Group full — follow or create overflow chain. @@ -242,20 +244,20 @@ impl HashSortedMap { if overflow != NO_OVERFLOW { gi = overflow as usize; } else { - if self.container.num_groups as usize == self.container.groups.len() { + if self.num_groups as usize == self.groups.len() { self.grow(); // n_bits changed; recompute the primary group and retry. - gi = self.container.group_index(hash); + gi = self.group_index(hash); continue; } - let new_gi = self.container.num_groups as usize; - self.container.num_groups += 1; - self.container.groups[gi].overflow = new_gi as u32; - let group = &mut self.container.groups[new_gi]; + let new_gi = self.num_groups as usize; + self.num_groups += 1; + self.groups[gi].overflow = new_gi as u32; + let group = &mut self.groups[new_gi]; group.ctrl[0] = tag; group.keys[0] = MaybeUninit::new(key); group.values[0] = MaybeUninit::new(value); - self.container.len += 1; + self.len += 1; return None; } } @@ -267,10 +269,10 @@ impl HashSortedMap { Q: Eq + ?Sized, { let tag = tag(hash); - let mut gi = self.container.group_index(hash); + let mut gi = self.group_index(hash); loop { - let group = &self.container.groups[gi]; + let group = &self.groups[gi]; // SIMD scan group for tag match. let mut tag_mask = group_ops::match_tag(&group.ctrl, tag); while let Some(i) = group_ops::next_match(&mut tag_mask) { @@ -296,10 +298,10 @@ impl HashSortedMap { /// of `&mut self` until any reallocation (`grow`). fn find_or_insertion_slot(&mut self, hash: u64, key: &K) -> FindResult { let tag = tag(hash); - let mut gi = self.container.group_index(hash); + let mut gi = self.group_index(hash); loop { - let group = &mut self.container.groups[gi]; + let group = &mut self.groups[gi]; // SIMD scan group for tag match. let mut tag_mask = group_ops::match_tag(&group.ctrl, tag); @@ -329,17 +331,17 @@ impl HashSortedMap { fn grow(&mut self) { let old_groups = std::mem::replace( - &mut self.container.groups, + &mut self.groups, Vec::>::new().into_boxed_slice(), ); - let old_num_groups = self.container.num_groups as usize; - let old_len = self.container.len; + let old_num_groups = self.num_groups as usize; + let old_len = self.len; - self.container.n_bits += 1; - let (new_groups, num_primary) = HashSortedContainer::alloc_groups(self.container.n_bits); - self.container.groups = new_groups; - self.container.num_groups = num_primary; - self.container.len = 0; + self.n_bits += 1; + let (new_groups, num_primary) = Self::alloc_groups(self.n_bits); + self.groups = new_groups; + self.num_groups = num_primary; + self.len = 0; for group in &old_groups[..old_num_groups] { let mut full_mask = group_ops::match_full(&group.ctrl); @@ -354,14 +356,14 @@ impl HashSortedMap { // old_groups runs no destructors but does free the backing buffer. drop(old_groups); - debug_assert_eq!(self.container.len, old_len); + debug_assert_eq!(self.len, old_len); } fn insert_for_grow(&mut self, hash: u64, key_src: *const K, value_src: *const V) { let tag = tag(hash); let mut hint = slot_hint(hash); - let gi = self.container.group_index(hash); - let mut group = &mut self.container.groups[gi]; + let gi = self.group_index(hash); + let mut group = &mut self.groups[gi]; loop { if group.ctrl[hint] == CTRL_EMPTY { @@ -374,12 +376,12 @@ impl HashSortedMap { } let overflow = group.overflow; if overflow != NO_OVERFLOW { - group = &mut self.container.groups[overflow as usize]; + group = &mut self.groups[overflow as usize]; } else { - let new_gi = self.container.num_groups as usize; + let new_gi = self.num_groups as usize; group.overflow = new_gi as u32; - self.container.num_groups += 1; - group = &mut self.container.groups[new_gi]; + self.num_groups += 1; + group = &mut self.groups[new_gi]; break; } } @@ -392,7 +394,7 @@ impl HashSortedMap { .as_mut_ptr() .copy_from_nonoverlapping(value_src, 1); } - self.container.len += 1; + self.len += 1; } } @@ -584,16 +586,16 @@ impl<'a, K: Hash + Eq, V, S: BuildHasher> VacantEntry<'a, K, V, S> { Insertion::NeedsOverflow { tail } => { let (new_gi, new_group) = unsafe { let map = &mut *map; - if map.container.num_groups as usize == map.container.groups.len() { + if map.num_groups as usize == map.groups.len() { return insert_after_grow(map, hash, key, value); } - let new_gi = map.container.num_groups as usize; - map.container.num_groups += 1; - let new_group: *mut Group = &mut map.container.groups[new_gi]; + let new_gi = map.num_groups as usize; + map.num_groups += 1; + let new_group: *mut Group = &mut map.groups[new_gi]; (new_gi, new_group) }; unsafe { - // SAFETY: `tail` was obtained from `&mut self.container.groups[..]` and + // SAFETY: `tail` was obtained from `&mut groups[..]` and // remains valid because no reallocation occurred between // `entry()` and now (we hold the only `&mut self`). (*tail).overflow = new_gi as u32; @@ -604,8 +606,8 @@ impl<'a, K: Hash + Eq, V, S: BuildHasher> VacantEntry<'a, K, V, S> { let tag = tag(hash); unsafe { - (*map).container.len += 1; - // SAFETY: `group_ptr` points into `map.container.groups` and is valid for `'a`. + (*map).len += 1; + // SAFETY: `group_ptr` points into `map.groups` and is valid for `'a`. let group = &mut *group_ptr; group.ctrl[slot] = tag; group.keys[slot] = MaybeUninit::new(key); @@ -630,7 +632,18 @@ fn insert_after_grow( map.entry(key).or_insert(value) } -// No custom Drop needed for HashSortedMap — dropping `container` handles entries. +impl Drop for HashSortedMap { + fn drop(&mut self) { + for group in &mut self.groups[..self.num_groups as usize] { + for i in 0..GROUP_SIZE { + if group.ctrl[i] != CTRL_EMPTY { + unsafe { group.keys[i].assume_init_drop() }; + unsafe { group.values[i].assume_init_drop() }; + } + } + } + } +} #[cfg(test)] mod tests { @@ -699,12 +712,12 @@ mod tests { #[test] fn grow_on_overflow_exhaustion() { let mut map = HashSortedMap::with_capacity(1); - let old_n_bits = map.container.n_bits; + let old_n_bits = map.n_bits; for i in 0..100u32 { let key = i | 0xFF000000; map.insert(key, i); } - assert!(map.container.n_bits > old_n_bits, "should have grown"); + assert!(map.n_bits > old_n_bits, "should have grown"); assert_eq!(map.len(), 100); for i in 0..100u32 { let key = i | 0xFF000000; @@ -857,18 +870,18 @@ mod tests { #[test] fn sort_by_hash_empty() { - let map: HashSortedMap = HashSortedMap::new(); - let container = map.sort_by_hash(); - assert_eq!(container.len, 0); + let mut map: HashSortedMap = HashSortedMap::new(); + map.sort_by_hash(); + assert_eq!(map.len(), 0); } #[test] fn sort_by_hash_single() { let mut map = HashSortedMap::new(); map.insert(42u32, "hello"); - let container = map.sort_by_hash(); - assert_eq!(container.len, 1); - let entries: Vec<_> = container.into_iter().collect(); + map.sort_by_hash(); + assert_eq!(map.len(), 1); + let entries: Vec<_> = map.into_iter().collect(); assert_eq!(entries, vec![(42, "hello")]); } @@ -878,9 +891,9 @@ mod tests { for i in 0..200u32 { map.insert(i, i * 10); } - let container = map.sort_by_hash(); - assert_eq!(container.len, 200); - let mut entries: Vec<_> = container.into_iter().collect(); + map.sort_by_hash(); + assert_eq!(map.len(), 200); + let mut entries: Vec<_> = map.into_iter().collect(); entries.sort_by_key(|&(k, _)| k); for i in 0..200u32 { assert_eq!(entries[i as usize], (i, i * 10), "missing key {i}"); @@ -896,11 +909,11 @@ mod tests { for i in 0..500u32 { map.insert(i, i); } - let container = map.sort_by_hash(); + map.sort_by_hash(); // Iteration should now yield entries in hash order. let mut prev_hash = 0u64; let mut first = true; - for (&k, _) in &container { + for (&k, _) in &map { let h = hasher.hash_one(&k); if !first { assert!(h > prev_hash, "hash order violated: {prev_hash:#x} > {h:#x}"); @@ -917,9 +930,9 @@ mod tests { for i in 0..50u32 { map.insert(i, i); } - let container = map.sort_by_hash(); - assert_eq!(container.len, 50); - let mut entries: Vec<_> = container.into_iter().collect(); + map.sort_by_hash(); + assert_eq!(map.len(), 50); + let mut entries: Vec<_> = map.into_iter().collect(); entries.sort_by_key(|&(k, _)| k); for i in 0..50u32 { assert_eq!(entries[i as usize], (i, i), "missing key {i}"); @@ -935,11 +948,11 @@ mod tests { for i in 0..100u32 { map.insert(format!("key-{i}"), format!("val-{i}")); } - let container = map.sort_by_hash(); - assert_eq!(container.len, 100); + map.sort_by_hash(); + assert_eq!(map.len(), 100); let mut prev_hash = 0u64; let mut first = true; - for (k, _) in &container { + for (k, _) in &map { let h = hasher.hash_one(k); if !first { assert!(h > prev_hash, "hash order violated: {prev_hash:#x} > {h:#x}"); diff --git a/crates/hash-sorted-map/src/iter.rs b/crates/hash-sorted-map/src/iter.rs index 2ebb1c6..6a5c6fe 100644 --- a/crates/hash-sorted-map/src/iter.rs +++ b/crates/hash-sorted-map/src/iter.rs @@ -1,9 +1,8 @@ use std::marker::PhantomData; use std::mem::ManuallyDrop; -use super::container::HashSortedContainer; use super::group::Group; -use super::group_ops::{self}; +use super::group_ops::{self, CTRL_EMPTY}; use super::hash_sorted_map::{HashSortedMap, NO_OVERFLOW}; /// State shared by `Iter`, `IterMut`, and `IntoIter`: tracks which primary @@ -20,12 +19,12 @@ struct IterCursor { } impl IterCursor { - fn new(container: &HashSortedContainer) -> Self { - let num_primary = 1u32 << container.n_bits; + fn new(n_bits: u32, groups_len: usize) -> Self { + let num_primary = 1u32 << n_bits; Self { primary: 0, num_primary, - current_group: container.groups.len() as u32, + current_group: groups_len as u32, current_mask: 0, } } @@ -83,7 +82,7 @@ impl<'a, K, V> Iterator for Iter<'a, K, V> { pub struct IterMut<'a, K, V> { groups: *mut [Group], cursor: IterCursor, - _marker: PhantomData<&'a mut HashSortedContainer>, + _marker: PhantomData<&'a mut [Group]>, } impl<'a, K, V> Iterator for IterMut<'a, K, V> { @@ -105,15 +104,17 @@ impl<'a, K, V> Iterator for IterMut<'a, K, V> { /// Owning iterator that yields `(K, V)` pairs and consumes the map. pub struct IntoIter { - inner: ManuallyDrop>, + groups: Box<[Group]>, + num_groups: u32, + len: usize, cursor: IterCursor, } impl Iterator for IntoIter { type Item = (K, V); fn next(&mut self) -> Option { - let (gi, slot) = self.cursor.next_slot(&self.inner.groups)?; - let group = &self.inner.groups[gi]; + let (gi, slot) = self.cursor.next_slot(&self.groups)?; + let group = &self.groups[gi]; // SAFETY: slot is occupied (bitmask guarantees ctrl byte has high bit set). unsafe { Some(( @@ -124,28 +125,25 @@ impl Iterator for IntoIter { } fn size_hint(&self) -> (usize, Option) { - (0, Some(self.inner.len)) + (0, Some(self.len)) } } impl Drop for IntoIter { fn drop(&mut self) { // Continue iterating to drop remaining entries one by one. - while let Some((gi, slot)) = self.cursor.next_slot(&self.inner.groups) { + while let Some((gi, slot)) = self.cursor.next_slot(&self.groups) { unsafe { - self.inner.groups[gi].keys[slot].assume_init_drop(); - self.inner.groups[gi].values[slot].assume_init_drop(); + self.groups[gi].keys[slot].assume_init_drop(); + self.groups[gi].values[slot].assume_init_drop(); } } - // All entries consumed or dropped above. Set num_groups to 0 so the - // container's Drop won't try to drop them again, then let it run to - // free the groups allocation. - self.inner.num_groups = 0; - unsafe { ManuallyDrop::drop(&mut self.inner) }; } } -impl HashSortedContainer { +// ── HashSortedMap iteration ───────────────────────────────────────────── + +impl HashSortedMap { /// Returns an iterator over `(&K, &V)` pairs. /// /// Entries are visited in group-index order (primary groups in order of @@ -154,13 +152,13 @@ impl HashSortedContainer { pub fn iter(&self) -> Iter<'_, K, V> { Iter { groups: &self.groups, - cursor: IterCursor::new(self), + cursor: IterCursor::new(self.n_bits, self.groups.len()), } } /// Returns a mutable iterator over `(&K, &mut V)` pairs. pub fn iter_mut(&mut self) -> IterMut<'_, K, V> { - let cursor = IterCursor::new(self); + let cursor = IterCursor::new(self.n_bits, self.groups.len()); IterMut { groups: &mut *self.groups as *mut [Group], cursor, @@ -168,58 +166,25 @@ impl HashSortedContainer { } } - /// Consumes the container and returns an iterator over `(K, V)` pairs. + /// Consumes the map and returns an iterator over `(K, V)` pairs. pub fn into_iter(self) -> IntoIter { - let cursor = IterCursor::new(&self); + let cursor = IterCursor::new(self.n_bits, self.groups.len()); + // Prevent Drop from running on self — we're moving groups out. + let mut this = ManuallyDrop::new(self); + let groups = unsafe { std::ptr::read(&this.groups) }; + let num_groups = this.num_groups; + let len = this.len; + // Zero out num_groups so if Drop somehow runs it won't double-free. + this.num_groups = 0; IntoIter { - inner: ManuallyDrop::new(self), + groups, + num_groups, + len, cursor, } } } -impl IntoIterator for HashSortedContainer { - type Item = (K, V); - type IntoIter = IntoIter; - fn into_iter(self) -> Self::IntoIter { - self.into_iter() - } -} - -impl<'a, K, V> IntoIterator for &'a HashSortedContainer { - type Item = (&'a K, &'a V); - type IntoIter = Iter<'a, K, V>; - fn into_iter(self) -> Self::IntoIter { - self.iter() - } -} - -impl<'a, K, V> IntoIterator for &'a mut HashSortedContainer { - type Item = (&'a K, &'a mut V); - type IntoIter = IterMut<'a, K, V>; - fn into_iter(self) -> Self::IntoIter { - self.iter_mut() - } -} - -// Delegate HashSortedMap iteration to its container. - -impl HashSortedMap { - pub fn iter(&self) -> Iter<'_, K, V> { - self.container.iter() - } - - pub fn iter_mut(&mut self) -> IterMut<'_, K, V> { - self.container.iter_mut() - } - - /// Consumes the map and returns an iterator over `(K, V)` pairs. - pub fn into_iter(self) -> IntoIter { - let (container, _hash_builder) = self.into_parts(); - container.into_iter() - } -} - impl IntoIterator for HashSortedMap { type Item = (K, V); type IntoIter = IntoIter; diff --git a/crates/hash-sorted-map/src/lib.rs b/crates/hash-sorted-map/src/lib.rs index 82e9102..3ff5461 100644 --- a/crates/hash-sorted-map/src/lib.rs +++ b/crates/hash-sorted-map/src/lib.rs @@ -1,9 +1,7 @@ -mod container; mod group; mod group_ops; mod hash_sorted_map; mod iter; -pub use container::HashSortedContainer; pub use hash_sorted_map::{Entry, HashSortedMap, OccupiedEntry, VacantEntry}; pub use iter::{IntoIter, Iter, IterMut}; From 0a9dd244dc631847bd603d0fe6d939e4fe6c8f31 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Fri, 8 May 2026 10:23:15 +0200 Subject: [PATCH 11/23] fix build warnings --- crates/hash-sorted-map/src/group_ops.rs | 15 --------------- crates/hash-sorted-map/src/iter.rs | 9 +++------ 2 files changed, 3 insertions(+), 21 deletions(-) diff --git a/crates/hash-sorted-map/src/group_ops.rs b/crates/hash-sorted-map/src/group_ops.rs index a1b92ec..27958ce 100644 --- a/crates/hash-sorted-map/src/group_ops.rs +++ b/crates/hash-sorted-map/src/group_ops.rs @@ -58,11 +58,6 @@ mod arch { mask.trailing_zeros() as usize } - #[inline(always)] - pub fn clear_slot(mask: Mask, slot: usize) -> Mask { - mask & !(1u32 << slot) - } - #[inline(always)] pub fn next_match(mask: &mut Mask) -> Option { if *mask == 0 { @@ -112,11 +107,6 @@ mod arch { (mask.trailing_zeros() >> 3) as usize } - #[inline(always)] - pub fn clear_slot(mask: Mask, slot: usize) -> Mask { - mask & !(0x80u64 << (slot * 8)) - } - #[inline(always)] pub fn next_match(mask: &mut Mask) -> Option { if *mask == 0 { @@ -158,11 +148,6 @@ mod arch { (mask.trailing_zeros() >> 3) as usize } - #[inline(always)] - pub fn clear_slot(mask: Mask, slot: usize) -> Mask { - mask & !(0x80u64 << (slot * 8)) - } - #[inline(always)] pub fn next_match(mask: &mut Mask) -> Option { if *mask == 0 { diff --git a/crates/hash-sorted-map/src/iter.rs b/crates/hash-sorted-map/src/iter.rs index 6a5c6fe..c5bed6a 100644 --- a/crates/hash-sorted-map/src/iter.rs +++ b/crates/hash-sorted-map/src/iter.rs @@ -2,7 +2,7 @@ use std::marker::PhantomData; use std::mem::ManuallyDrop; use super::group::Group; -use super::group_ops::{self, CTRL_EMPTY}; +use super::group_ops; use super::hash_sorted_map::{HashSortedMap, NO_OVERFLOW}; /// State shared by `Iter`, `IterMut`, and `IntoIter`: tracks which primary @@ -105,7 +105,6 @@ impl<'a, K, V> Iterator for IterMut<'a, K, V> { /// Owning iterator that yields `(K, V)` pairs and consumes the map. pub struct IntoIter { groups: Box<[Group]>, - num_groups: u32, len: usize, cursor: IterCursor, } @@ -172,13 +171,11 @@ impl HashSortedMap { // Prevent Drop from running on self — we're moving groups out. let mut this = ManuallyDrop::new(self); let groups = unsafe { std::ptr::read(&this.groups) }; - let num_groups = this.num_groups; let len = this.len; - // Zero out num_groups so if Drop somehow runs it won't double-free. - this.num_groups = 0; + // Zero out len so if Drop somehow runs it sees an empty map. + this.len = 0; IntoIter { groups, - num_groups, len, cursor, } From 21359c1a713b567e430df8b63a87beeec9dc5916 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Fri, 8 May 2026 08:51:10 +0000 Subject: [PATCH 12/23] Update table with intel results --- crates/hash-sorted-map/README.md | 57 ++++++++++---------------------- 1 file changed, 18 insertions(+), 39 deletions(-) diff --git a/crates/hash-sorted-map/README.md b/crates/hash-sorted-map/README.md index ebd5ef6..bd9bb3e 100644 --- a/crates/hash-sorted-map/README.md +++ b/crates/hash-sorted-map/README.md @@ -42,45 +42,24 @@ keys, which means: ## Benchmark results -All benchmarks insert 1000 random trigram hashes (scrambled with -`folded_multiply`) into maps with various configurations. Measured on Apple -M-series (aarch64). - -### Insert 1000 trigrams — pre-sized, no growth - -| Rank | Map | Time (µs) | vs best | -|------|-----|-----------|---------| -| 🥇 | FoldHashMap | 2.44 | — | -| 🥈 | FxHashMap | 2.61 | +7% | -| 🥉 | hashbrown::HashMap | 2.67 | +9% | -| 4 | **HashSortedMap** | **2.71** | +11% | -| 5 | hashbrown+Identity | 2.74 | +12% | -| 6 | std::HashMap+FNV | 3.27 | +34% | -| 7 | AHashMap | 3.22 | +32% | -| 8 | std::HashMap | 8.49 | +248% | - -### Re-insert same keys (all overwrites) - -| Map | Time (µs) | -|-----|-----------| -| **HashSortedMap** | **2.36** ✅ | -| hashbrown+Identity | 2.58 | - -### Growth from small (`with_capacity(128)`, 3 resize rounds) - -| Map | Time (µs) | Growth penalty | -|-----|-----------|----------------| -| **HashSortedMap** | **4.85** | +2.14 | -| hashbrown+Identity | 9.77 | +7.03 | - -### Key takeaways - -- **HashSortedMap matches the fastest hashbrown configurations** on pre-sized - first-time inserts and is **the fastest for overwrites**. -- **Growth is ~2× faster** than hashbrown thanks to the optimized - `insert_for_grow` path that skips duplicate checking and uses raw copies. -- The remaining gap to FoldHashMap (~11%) comes from foldhash's extremely - efficient hash function that pipelines well with hashbrown's SIMD scan. +Latest local Criterion snapshot from this repository's +`target/criterion` outputs (lower is better): + +| Scenario | HashSortedMap | Comparison | Result | +| :------------------------------------------- | ------------: | :------------------------------------- | :---------- | +| Insert 1000 trigrams (pre-sized) | 7.34 µs | hashbrown::HashMap: 12.88 µs | ~43% faster | +| Grow from capacity 128 | 20.54 µs | hashbrown+Identity: 23.17 µs | ~11% faster | +| Count 4000 trigrams (`entry().or_default()`) | 12.70 µs | hashbrown+Identity `entry()`: 13.53 µs | ~6% faster | +| Iterate 1000 trigrams (`iter()`) | 3.93 µs | hashbrown+Identity `iter()`: 2.87 µs | ~37% slower | +| Sort 100000 trigrams by hash | 1.83 ms | `Vec::sort_unstable`: 2.09 ms | ~12% faster | +| Merge 100 sorted maps + final sort | 161.93 ms | hashbrown merge + vec sort: 234.70 ms | ~31% faster | + +Key takeaways: + +- `HashSortedMap` is strongest on insert-heavy and merge/sort-heavy paths. +- Iteration throughput is currently behind `hashbrown+Identity`. +- In workloads that need deterministic hash-order serialization, the merge and + sort advantages can outweigh the iteration gap. ## Running From 9e613741e4d38d437b2b6478996791100567e92c Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Fri, 8 May 2026 11:03:55 +0200 Subject: [PATCH 13/23] solve tiebreaker --- .../hash-sorted-map/benchmarks/performance.rs | 14 ++++- crates/hash-sorted-map/src/hash_sorted_map.rs | 60 +++++++++++++++---- 2 files changed, 59 insertions(+), 15 deletions(-) diff --git a/crates/hash-sorted-map/benchmarks/performance.rs b/crates/hash-sorted-map/benchmarks/performance.rs index 6695ee3..9a54dcd 100644 --- a/crates/hash-sorted-map/benchmarks/performance.rs +++ b/crates/hash-sorted-map/benchmarks/performance.rs @@ -408,7 +408,11 @@ fn bench_sort(c: &mut Criterion) { .enumerate() .map(|(i, &key)| (key, i)) .collect(); - vec.sort_unstable_by_key(|&(key, _)| hasher.hash_one(key)); + vec.sort_unstable_by(|a, b| { + let ha = hasher.hash_one(a.0); + let hb = hasher.hash_one(b.0); + (ha, a.0).cmp(&(hb, b.0)) + }); vec }); }); @@ -498,7 +502,7 @@ fn bench_merge_sort(c: &mut Criterion) { let result: Vec<(u32, u32)> = sorted_vecs .into_iter() .map(|v| v.into_iter()) - .kmerge_by(|a, b| a.0 <= b.0) + .kmerge_by(|a, b| (a.0, a.1) <= (b.0, b.1)) .chunk_by(|&(_, key, _)| key) .into_iter() .map(|(key, group)| (key, group.map(|(_, _, c)| c).sum())) @@ -518,7 +522,11 @@ fn bench_merge_sort(c: &mut Criterion) { } } let mut vec: Vec<(u32, u32)> = map.into_iter().collect(); - vec.sort_unstable_by_key(|&(key, _)| hasher.hash_one(key)); + vec.sort_unstable_by(|a, b| { + let ha = hasher.hash_one(a.0); + let hb = hasher.hash_one(b.0); + (ha, a.0).cmp(&(hb, b.0)) + }); vec }); }); diff --git a/crates/hash-sorted-map/src/hash_sorted_map.rs b/crates/hash-sorted-map/src/hash_sorted_map.rs index d4b9ead..d93cdf2 100644 --- a/crates/hash-sorted-map/src/hash_sorted_map.rs +++ b/crates/hash-sorted-map/src/hash_sorted_map.rs @@ -96,8 +96,9 @@ impl HashSortedMap { } } -impl HashSortedMap { - /// Sort all entries within each primary group chain by their hash value. +impl HashSortedMap { + /// Sort all entries within each primary group chain by their hash value, + /// breaking ties by key. /// /// After sorting, iteration visits entries in hash order within each /// primary group (and since primary groups are visited in group-index @@ -153,19 +154,22 @@ impl HashSortedMap { let last_gi = *chain.last().unwrap() as usize; compact_last_group(&mut self.groups[last_gi], &self.hash_builder, &mut hashes); let n = hashes.len(); - // Insertion sort by hash. + // Insertion sort by (hash, key). for i in 1..n { let mut j = i; - while j > 0 && hashes[j - 1] > hashes[j] { + while j > 0 + && should_swap(hashes[j - 1], hashes[j], &self.groups, &chain, j - 1, j) + { hashes.swap(j - 1, j); swap_chain_slots(&mut self.groups, &chain, j - 1, j); j -= 1; } } - } } +} +impl HashSortedMap { pub fn insert(&mut self, key: K, value: V) -> Option { let hash = self.hash_builder.hash_one(&key); self.insert_hashed(hash, key, value) @@ -406,6 +410,27 @@ fn chain_slot(chain: &[u32], pos: usize) -> (usize, usize) { (chain[pos / GROUP_SIZE] as usize, pos % GROUP_SIZE) } +/// Compare two positions: returns true if position `a` should come after position `b` +/// (i.e., the pair is out of order). Comparison is (hash, key). +#[inline] +fn should_swap( + hash_a: u64, + hash_b: u64, + groups: &[Group], + chain: &[u32], + a: usize, + b: usize, +) -> bool { + if hash_a != hash_b { + return hash_a > hash_b; + } + let (gi_a, slot_a) = chain_slot(chain, a); + let (gi_b, slot_b) = chain_slot(chain, b); + let key_a = unsafe { groups[gi_a].keys[slot_a].assume_init_ref() }; + let key_b = unsafe { groups[gi_b].keys[slot_b].assume_init_ref() }; + key_a > key_b +} + /// Compact the last group in a chain: move all occupied entries to slots /// 0..n and clear the rest. Computes hashes for each occupied entry and /// appends them to `hashes`. @@ -910,32 +935,38 @@ mod tests { map.insert(i, i); } map.sort_by_hash(); - // Iteration should now yield entries in hash order. + // Iteration should now yield entries in (hash, key) order. let mut prev_hash = 0u64; + let mut prev_key = 0u32; let mut first = true; for (&k, _) in &map { let h = hasher.hash_one(&k); if !first { - assert!(h > prev_hash, "hash order violated: {prev_hash:#x} > {h:#x}"); + assert!( + (h, k) >= (prev_hash, prev_key), + "(hash, key) order violated: ({prev_hash:#x}, {prev_key}) > ({h:#x}, {k})" + ); } prev_hash = h; + prev_key = k; first = false; } } #[test] fn sort_by_hash_with_overflow() { - // Force overflow chains via fixed hash, then sort. + // Force overflow chains via fixed hash — all keys collide, so sort + // should produce key order as tie-breaker. let mut map = HashSortedMap::with_capacity_and_hasher(1, FixedState(0)); for i in 0..50u32 { map.insert(i, i); } map.sort_by_hash(); assert_eq!(map.len(), 50); - let mut entries: Vec<_> = map.into_iter().collect(); - entries.sort_by_key(|&(k, _)| k); + // All hashes are equal, so entries should be in key order. + let entries: Vec<_> = map.into_iter().collect(); for i in 0..50u32 { - assert_eq!(entries[i as usize], (i, i), "missing key {i}"); + assert_eq!(entries[i as usize], (i, i), "key order violated at {i}"); } } @@ -951,13 +982,18 @@ mod tests { map.sort_by_hash(); assert_eq!(map.len(), 100); let mut prev_hash = 0u64; + let mut prev_key = String::new(); let mut first = true; for (k, _) in &map { let h = hasher.hash_one(k); if !first { - assert!(h > prev_hash, "hash order violated: {prev_hash:#x} > {h:#x}"); + assert!( + (h, k) >= (prev_hash, &prev_key), + "(hash, key) order violated" + ); } prev_hash = h; + prev_key = k.clone(); first = false; } } From c11ba848c517b6488e71b1a3d99134891270d0e0 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Fri, 8 May 2026 11:09:18 +0200 Subject: [PATCH 14/23] remove smallvec --- crates/hash-sorted-map/Cargo.toml | 1 - crates/hash-sorted-map/src/hash_sorted_map.rs | 7 +++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/crates/hash-sorted-map/Cargo.toml b/crates/hash-sorted-map/Cargo.toml index 7f17b58..6eac82f 100644 --- a/crates/hash-sorted-map/Cargo.toml +++ b/crates/hash-sorted-map/Cargo.toml @@ -10,4 +10,3 @@ keywords = ["hashmap", "sorted", "merge", "simd"] categories = ["algorithms", "data-structures"] [dependencies] -smallvec = "1" diff --git a/crates/hash-sorted-map/src/hash_sorted_map.rs b/crates/hash-sorted-map/src/hash_sorted_map.rs index d93cdf2..898e91a 100644 --- a/crates/hash-sorted-map/src/hash_sorted_map.rs +++ b/crates/hash-sorted-map/src/hash_sorted_map.rs @@ -4,7 +4,6 @@ use std::collections::hash_map::RandomState; use std::hash::{BuildHasher, Hash}; use std::marker::PhantomData; -use smallvec::SmallVec; use super::group::Group; use super::group_ops::{self, CTRL_EMPTY, GROUP_SIZE}; @@ -121,8 +120,8 @@ impl HashSortedMap { /// `GROUP_SIZE / MAX_FILL ≈ 16`, the per-element cost stays constant. pub fn sort_by_hash(&mut self) { let num_primary = 1usize << self.n_bits; - let mut chain: SmallVec<[u32; 4]> = SmallVec::new(); - let mut hashes: SmallVec<[u64; 16]> = SmallVec::new(); + let mut chain: Vec = Vec::new(); + let mut hashes: Vec = Vec::new(); for primary_gi in 0..num_primary { chain.clear(); @@ -437,7 +436,7 @@ fn should_swap( fn compact_last_group( group: &mut Group, hash_builder: &S, - hashes: &mut SmallVec<[u64; 16]>, + hashes: &mut Vec, ) { let mut write = 0usize; let mut full_mask = group_ops::match_full(&group.ctrl); From 7c9c89b4eae1e4fedb1e3ada9f851f1bcb83ae46 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Fri, 8 May 2026 11:15:15 +0200 Subject: [PATCH 15/23] fix clippy --- crates/hash-sorted-map/src/hash_sorted_map.rs | 11 ++--------- crates/hash-sorted-map/src/iter.rs | 1 + 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/crates/hash-sorted-map/src/hash_sorted_map.rs b/crates/hash-sorted-map/src/hash_sorted_map.rs index 898e91a..09264d1 100644 --- a/crates/hash-sorted-map/src/hash_sorted_map.rs +++ b/crates/hash-sorted-map/src/hash_sorted_map.rs @@ -4,7 +4,6 @@ use std::collections::hash_map::RandomState; use std::hash::{BuildHasher, Hash}; use std::marker::PhantomData; - use super::group::Group; use super::group_ops::{self, CTRL_EMPTY, GROUP_SIZE}; @@ -156,8 +155,7 @@ impl HashSortedMap { // Insertion sort by (hash, key). for i in 1..n { let mut j = i; - while j > 0 - && should_swap(hashes[j - 1], hashes[j], &self.groups, &chain, j - 1, j) + while j > 0 && should_swap(hashes[j - 1], hashes[j], &self.groups, &chain, j - 1, j) { hashes.swap(j - 1, j); swap_chain_slots(&mut self.groups, &chain, j - 1, j); @@ -461,12 +459,7 @@ fn compact_last_group( } /// Swap the ctrl byte, key, and value between two flat positions in a chain. -fn swap_chain_slots( - groups: &mut [Group], - chain: &[u32], - a: usize, - b: usize, -) { +fn swap_chain_slots(groups: &mut [Group], chain: &[u32], a: usize, b: usize) { let (gi_a, slot_a) = chain_slot(chain, a); let (gi_b, slot_b) = chain_slot(chain, b); if gi_a == gi_b { diff --git a/crates/hash-sorted-map/src/iter.rs b/crates/hash-sorted-map/src/iter.rs index c5bed6a..48623fa 100644 --- a/crates/hash-sorted-map/src/iter.rs +++ b/crates/hash-sorted-map/src/iter.rs @@ -166,6 +166,7 @@ impl HashSortedMap { } /// Consumes the map and returns an iterator over `(K, V)` pairs. + #[allow(clippy::should_implement_trait)] pub fn into_iter(self) -> IntoIter { let cursor = IterCursor::new(self.n_bits, self.groups.len()); // Prevent Drop from running on self — we're moving groups out. From 7e3931d57d3e5299082187413995cf002aee27a7 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Fri, 8 May 2026 11:22:57 +0200 Subject: [PATCH 16/23] Update hash_sorted_map.rs --- crates/hash-sorted-map/src/hash_sorted_map.rs | 77 ++++++++----------- 1 file changed, 31 insertions(+), 46 deletions(-) diff --git a/crates/hash-sorted-map/src/hash_sorted_map.rs b/crates/hash-sorted-map/src/hash_sorted_map.rs index 09264d1..a5e7577 100644 --- a/crates/hash-sorted-map/src/hash_sorted_map.rs +++ b/crates/hash-sorted-map/src/hash_sorted_map.rs @@ -154,13 +154,40 @@ impl HashSortedMap { let n = hashes.len(); // Insertion sort by (hash, key). for i in 1..n { + // Extract element at position i. + let cur_hash = hashes[i]; + let (gi, si) = chain_slot(&chain, i); + let cur_key = unsafe { self.groups[gi].keys[si].assume_init_read() }; + let cur_val = unsafe { self.groups[gi].values[si].assume_init_read() }; + // Find insertion point via linear scan backward. let mut j = i; - while j > 0 && should_swap(hashes[j - 1], hashes[j], &self.groups, &chain, j - 1, j) - { - hashes.swap(j - 1, j); - swap_chain_slots(&mut self.groups, &chain, j - 1, j); + while j > 0 { + let (gj, sj) = chain_slot(&chain, j - 1); + let prev_key = unsafe { self.groups[gj].keys[sj].assume_init_ref() }; + if (hashes[j - 1], prev_key) <= (cur_hash, &cur_key) { + break; + } j -= 1; } + if j < i { + // Shift positions j..i up by one. + hashes.copy_within(j..i, j + 1); + for pos in (j..i).rev() { + let (src_g, src_s) = chain_slot(&chain, pos); + let (dst_g, dst_s) = chain_slot(&chain, pos + 1); + unsafe { + let k = std::ptr::read(&self.groups[src_g].keys[src_s]); + let v = std::ptr::read(&self.groups[src_g].values[src_s]); + self.groups[dst_g].keys[dst_s] = k; + self.groups[dst_g].values[dst_s] = v; + } + } + } + // Insert at position j (or write back to i if already in place). + hashes[j] = cur_hash; + let (gj, sj) = chain_slot(&chain, j); + self.groups[gj].keys[sj] = MaybeUninit::new(cur_key); + self.groups[gj].values[sj] = MaybeUninit::new(cur_val); } } } @@ -407,27 +434,6 @@ fn chain_slot(chain: &[u32], pos: usize) -> (usize, usize) { (chain[pos / GROUP_SIZE] as usize, pos % GROUP_SIZE) } -/// Compare two positions: returns true if position `a` should come after position `b` -/// (i.e., the pair is out of order). Comparison is (hash, key). -#[inline] -fn should_swap( - hash_a: u64, - hash_b: u64, - groups: &[Group], - chain: &[u32], - a: usize, - b: usize, -) -> bool { - if hash_a != hash_b { - return hash_a > hash_b; - } - let (gi_a, slot_a) = chain_slot(chain, a); - let (gi_b, slot_b) = chain_slot(chain, b); - let key_a = unsafe { groups[gi_a].keys[slot_a].assume_init_ref() }; - let key_b = unsafe { groups[gi_b].keys[slot_b].assume_init_ref() }; - key_a > key_b -} - /// Compact the last group in a chain: move all occupied entries to slots /// 0..n and clear the rest. Computes hashes for each occupied entry and /// appends them to `hashes`. @@ -458,27 +464,6 @@ fn compact_last_group( } } -/// Swap the ctrl byte, key, and value between two flat positions in a chain. -fn swap_chain_slots(groups: &mut [Group], chain: &[u32], a: usize, b: usize) { - let (gi_a, slot_a) = chain_slot(chain, a); - let (gi_b, slot_b) = chain_slot(chain, b); - if gi_a == gi_b { - let g = &mut groups[gi_a]; - g.keys.swap(slot_a, slot_b); - g.values.swap(slot_a, slot_b); - } else { - let (ga, gb) = if gi_a < gi_b { - let (left, right) = groups.split_at_mut(gi_b); - (&mut left[gi_a], &mut right[0]) - } else { - let (left, right) = groups.split_at_mut(gi_a); - (&mut right[0], &mut left[gi_b]) - }; - std::mem::swap(&mut ga.keys[slot_a], &mut gb.keys[slot_b]); - std::mem::swap(&mut ga.values[slot_a], &mut gb.values[slot_b]); - } -} - // ──────────────────────────────────────────────────────────────────────── // Entry API // ──────────────────────────────────────────────────────────────────────── From 280aa74d5bb505bca6f4b81ec7b0fb8f31718d6c Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Fri, 8 May 2026 11:23:47 +0200 Subject: [PATCH 17/23] Update performance.rs --- .../hash-sorted-map/benchmarks/performance.rs | 35 ++++++++----------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/crates/hash-sorted-map/benchmarks/performance.rs b/crates/hash-sorted-map/benchmarks/performance.rs index 9a54dcd..07cfc2f 100644 --- a/crates/hash-sorted-map/benchmarks/performance.rs +++ b/crates/hash-sorted-map/benchmarks/performance.rs @@ -403,11 +403,7 @@ fn bench_sort(c: &mut Criterion) { group.bench_function("Vec::sort_unstable", |b| { b.iter(|| { - let mut vec: Vec<_> = keys - .iter() - .enumerate() - .map(|(i, &key)| (key, i)) - .collect(); + let mut vec: Vec<_> = keys.iter().enumerate().map(|(i, &key)| (key, i)).collect(); vec.sort_unstable_by(|a, b| { let ha = hasher.hash_one(a.0); let hb = hasher.hash_one(b.0); @@ -419,10 +415,8 @@ fn bench_sort(c: &mut Criterion) { group.bench_function("HashSortedMap sort_by_hash", |b| { b.iter(|| { - let mut map = HashSortedMap::with_capacity_and_hasher( - keys.len(), - IdentityBuildHasher::default(), - ); + let mut map = + HashSortedMap::with_capacity_and_hasher(keys.len(), IdentityBuildHasher::default()); for (i, &key) in keys.iter().enumerate() { map.insert(key, i); } @@ -443,7 +437,10 @@ fn bench_merge_sort(c: &mut Criterion) { .map(|_| { let mut rng = rand::rng(); (0..KEYS_PER_MAP) - .map(|_| folded_multiply(rng.random_range(0..1_000_000u32) as u64, 0x243f6a8885a308d3) as u32) + .map(|_| { + folded_multiply(rng.random_range(0..1_000_000u32) as u64, 0x243f6a8885a308d3) + as u32 + }) .collect() }) .collect(); @@ -452,8 +449,7 @@ fn bench_merge_sort(c: &mut Criterion) { let hash_maps: Vec<_> = maps_data .into_iter() .map(|keys| { - let mut map = - HashSortedMap::with_hasher(IdentityBuildHasher::default()); + let mut map = HashSortedMap::with_hasher(IdentityBuildHasher::default()); for key in keys { *map.entry(key).or_default() += 1u32; } @@ -514,8 +510,9 @@ fn bench_merge_sort(c: &mut Criterion) { // ── 3. hashbrown HashMap merge, then sort into Vec ────────────── group.bench_function("hashbrown merge + Vec sort", |b| { b.iter(|| { - let mut map = - hashbrown::HashMap::::with_hasher(IdentityBuildHasher::default()); + let mut map = hashbrown::HashMap::::with_hasher( + IdentityBuildHasher::default(), + ); for container in &hash_maps { for (&key, &value) in container { *map.entry(key).or_default() += value; @@ -534,8 +531,9 @@ fn bench_merge_sort(c: &mut Criterion) { // ── 4. hashbrown HashMap merge only (no sort) ─────────────────── group.bench_function("hashbrown merge", |b| { b.iter(|| { - let mut map = - hashbrown::HashMap::::with_hasher(IdentityBuildHasher::default()); + let mut map = hashbrown::HashMap::::with_hasher( + IdentityBuildHasher::default(), + ); for container in &hash_maps { for (&key, &value) in container { *map.entry(key).or_default() += value; @@ -580,10 +578,7 @@ fn bench_merge_sort(c: &mut Criterion) { group.bench_function("HashSortedMap merge presized", |b| { b.iter(|| { let mut map: HashSortedMap = - HashSortedMap::with_capacity_and_hasher( - 1_000_000, - IdentityBuildHasher::default(), - ); + HashSortedMap::with_capacity_and_hasher(1_000_000, IdentityBuildHasher::default()); for container in &hash_maps { for (&key, &value) in container { *map.entry(key).or_default() += value; From 1a35afa5421e891b7631fbd9898384e71ef0f322 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Fri, 8 May 2026 11:38:20 +0200 Subject: [PATCH 18/23] fix bug --- crates/hash-sorted-map/src/hash_sorted_map.rs | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/crates/hash-sorted-map/src/hash_sorted_map.rs b/crates/hash-sorted-map/src/hash_sorted_map.rs index a5e7577..c4c98a6 100644 --- a/crates/hash-sorted-map/src/hash_sorted_map.rs +++ b/crates/hash-sorted-map/src/hash_sorted_map.rs @@ -116,7 +116,7 @@ impl HashSortedMap { /// /// Dividing by `n` gives the expected cost per element: `1 + n/m` (for /// `m ≫ 1`). Since `n/m` is the average chain length, bounded by - /// `GROUP_SIZE / MAX_FILL ≈ 16`, the per-element cost stays constant. + /// `GROUP_SIZE / MAX_FILL`, the per-element cost stays constant. pub fn sort_by_hash(&mut self) { let num_primary = 1usize << self.n_bits; let mut chain: Vec = Vec::new(); @@ -189,6 +189,13 @@ impl HashSortedMap { self.groups[gj].keys[sj] = MaybeUninit::new(cur_key); self.groups[gj].values[sj] = MaybeUninit::new(cur_val); } + // Rebuild ctrl/tag bytes from the sorted hashes so that + // get/insert/entry still work after sorting. + // This adds a small performance penalty of maybe 6%. + for (pos, &h) in hashes.iter().enumerate() { + let (gi, si) = chain_slot(&chain, pos); + self.groups[gi].ctrl[si] = tag(h); + } } } } @@ -455,10 +462,7 @@ fn compact_last_group( } write += 1; } - // Fix ctrl bytes: only the top bit matters (full vs empty). - for slot in 0..write { - group.ctrl[slot] = 0x80; - } + // Mark tail slots as empty; real tags are filled in after sorting. for slot in write..GROUP_SIZE { group.ctrl[slot] = CTRL_EMPTY; } @@ -895,6 +899,10 @@ mod tests { } map.sort_by_hash(); assert_eq!(map.len(), 200); + // Lookups must still work after sorting. + for i in 0..200u32 { + assert_eq!(map.get(&i), Some(&(i * 10)), "get failed for key {i}"); + } let mut entries: Vec<_> = map.into_iter().collect(); entries.sort_by_key(|&(k, _)| k); for i in 0..200u32 { From ca37af7b80fde4f1d5228b0c24ce525638737910 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Fri, 8 May 2026 12:28:31 +0200 Subject: [PATCH 19/23] Update hash_sorted_map.rs --- crates/hash-sorted-map/src/hash_sorted_map.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/hash-sorted-map/src/hash_sorted_map.rs b/crates/hash-sorted-map/src/hash_sorted_map.rs index c4c98a6..560afd8 100644 --- a/crates/hash-sorted-map/src/hash_sorted_map.rs +++ b/crates/hash-sorted-map/src/hash_sorted_map.rs @@ -149,7 +149,7 @@ impl HashSortedMap { } } // The last group may have gaps — compact it to the front. - let last_gi = *chain.last().unwrap() as usize; + let last_gi = *chain.last().expect("chain not be empty") as usize; compact_last_group(&mut self.groups[last_gi], &self.hash_builder, &mut hashes); let n = hashes.len(); // Insertion sort by (hash, key). From f9699904c037e4a63c66a3c8941fce3a34c92e31 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Fri, 8 May 2026 14:29:06 +0200 Subject: [PATCH 20/23] Update hash_sorted_map.rs --- crates/hash-sorted-map/src/hash_sorted_map.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/hash-sorted-map/src/hash_sorted_map.rs b/crates/hash-sorted-map/src/hash_sorted_map.rs index 560afd8..a096cde 100644 --- a/crates/hash-sorted-map/src/hash_sorted_map.rs +++ b/crates/hash-sorted-map/src/hash_sorted_map.rs @@ -925,7 +925,7 @@ mod tests { let mut prev_key = 0u32; let mut first = true; for (&k, _) in &map { - let h = hasher.hash_one(&k); + let h = hasher.hash_one(k); if !first { assert!( (h, k) >= (prev_hash, prev_key), From 41c4e9fd8f9676e804857166b444395cdabac4d0 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Tue, 19 May 2026 12:37:40 +0200 Subject: [PATCH 21/23] remove slot hint altogether --- Cargo.toml | 1 + crates/hash-sorted-map/OPTIMIZATIONS.md | 25 ++--- crates/hash-sorted-map/README.md | 4 +- crates/hash-sorted-map/src/group_ops.rs | 67 ++++--------- crates/hash-sorted-map/src/hash_sorted_map.rs | 95 ++++++------------- crates/hash-sorted-map/src/iter.rs | 30 +++--- 6 files changed, 85 insertions(+), 137 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 312f46d..7547f1b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,6 +4,7 @@ members = [ "crates/*", "crates/bpe/benchmarks", "crates/bpe/tests", + "crates/hash-sorted-map/benchmarks", ] resolver = "2" diff --git a/crates/hash-sorted-map/OPTIMIZATIONS.md b/crates/hash-sorted-map/OPTIMIZATIONS.md index bc1eb52..a6d130b 100644 --- a/crates/hash-sorted-map/OPTIMIZATIONS.md +++ b/crates/hash-sorted-map/OPTIMIZATIONS.md @@ -105,7 +105,7 @@ the overflow path. SIMD version** by pessimizing NEON code generation. Removed from the SIMD implementation, kept in the scalar version. -### 7. Slot Hint Fast Path ⚠️ Removed from Lookup Paths +### 7. Slot Hint Fast Path ❌ Removed Originally, HashSortedMap checked a preferred slot before scanning the group: ```rust @@ -120,17 +120,18 @@ slots, making the hint check a 50/50 branch that pollutes the branch predictor. SIMD-only scanning (match_tag + match_empty) is uniformly fast regardless of key distribution. -**Results of removing slot_hint from different paths:** -- `find_or_insertion_slot` (entry API): **−25% latency** on merge benchmark -- `get_hashed`: **−4.4%** improvement (SIMD scan is faster than branch+scalar) -- `insert_hashed`: **+7%** regression on presized insert (the hint genuinely - helps when inserting into a mostly-empty group), but accepted for code - simplicity since the merge workload matters more +**Structural benefit of removal**: Without the slot hint, inserts always +append to the first empty slot. This guarantees that occupied slots are +**packed contiguously from the beginning** of each group (no gaps). This +invariant enables: +- `count_occupied()`: a single `leading_zeros()` on the ctrl word replaces + bitmask scanning to find the next free slot or count entries +- Simpler `insert_for_grow()`: just write at position `count_occupied()` +- Simpler iteration: occupied slots are always `0..count_occupied()` +- Simpler `sort_by_hash()`: no need to compact gaps before sorting -**Current state**: slot_hint is **only** used in `insert_for_grow()`, where -the map is guaranteed sparse after a resize (groups are mostly empty, so the -hint slot is very likely free). For all other paths, SIMD-only scanning is -used. +**Current state**: Slot hint is fully removed. All paths use SIMD group +scanning for lookups and `count_occupied()` for finding the insertion point. ### 8. Overflow Reserve Sizing ✅ Validated @@ -178,7 +179,7 @@ entropy in both halves. Also changed trigram generation to use | Capacity sizing fix | **−50%** insert time (biggest win) | | Optimized growth path | **2× faster** growth than hashbrown | | SIMD group scanning | **−5%** insert time | -| Slot hint removal (entry/get) | **−25%** merge latency | +| Slot hint removal | **−25%** merge latency, contiguous packing | | Branch hints (scalar only) | **−2–6%** | | IdentityHasher fix | Enabled fair comparison | diff --git a/crates/hash-sorted-map/README.md b/crates/hash-sorted-map/README.md index bd9bb3e..92fa107 100644 --- a/crates/hash-sorted-map/README.md +++ b/crates/hash-sorted-map/README.md @@ -29,8 +29,8 @@ keys, which means: - **Overflow chaining** instead of open addressing — groups that fill up link to overflow groups rather than probing into neighbours. -- **Slot hint** — a preferred slot index derived from the hash, checked before - scanning the group. Gives a direct hit on most inserts at low load. +- **Contiguous packing** — occupied slots are always packed from position 0 + with no gaps, enabling a single `leading_zeros()` to find the next free slot. - **SIMD group scanning** — uses NEON on aarch64, SSE2 on x86\_64, and a scalar fallback elsewhere to scan 8–16 control bytes in parallel. - **AoS group layout** — each group stores its control bytes, keys, and values diff --git a/crates/hash-sorted-map/src/group_ops.rs b/crates/hash-sorted-map/src/group_ops.rs index 27958ce..cc14813 100644 --- a/crates/hash-sorted-map/src/group_ops.rs +++ b/crates/hash-sorted-map/src/group_ops.rs @@ -38,21 +38,6 @@ mod arch { } } - #[inline(always)] - pub fn match_empty(ctrl: &[u8; GROUP_SIZE]) -> Mask { - match_tag(ctrl, super::CTRL_EMPTY) - } - - /// Mask of slots whose ctrl byte has the high bit set (occupied). - /// Uses SSE2 `_mm_movemask_epi8` which extracts the top bit of each byte. - #[inline(always)] - pub fn match_full(ctrl: &[u8; GROUP_SIZE]) -> Mask { - unsafe { - let group = x86::_mm_loadu_si128(ctrl.as_ptr() as *const x86::__m128i); - x86::_mm_movemask_epi8(group) as u32 - } - } - #[inline(always)] pub fn lowest(mask: Mask) -> usize { mask.trailing_zeros() as usize @@ -67,6 +52,13 @@ mod arch { *mask &= *mask - 1; Some(i) } + + /// Number of trailing occupied (non-zero) bytes in the ctrl array. + #[inline(always)] + pub fn count_occupied(ctrl: &[u8; GROUP_SIZE]) -> usize { + let word = u128::from_ne_bytes(*ctrl); + GROUP_SIZE - (word.leading_zeros() / 8) as usize + } } #[cfg(target_arch = "aarch64")] @@ -84,24 +76,6 @@ mod arch { } } - #[inline(always)] - pub fn match_empty(ctrl: &[u8; GROUP_SIZE]) -> Mask { - unsafe { - let group = neon::vld1_u8(ctrl.as_ptr()); - let cmp = neon::vceq_u8(group, neon::vdup_n_u8(0)); - neon::vget_lane_u64(neon::vreinterpret_u64_u8(cmp), 0) & 0x8080808080808080 - } - } - - /// Mask of slots whose ctrl byte has the high bit set (occupied). - #[inline(always)] - pub fn match_full(ctrl: &[u8; GROUP_SIZE]) -> Mask { - unsafe { - let group = neon::vld1_u8(ctrl.as_ptr()); - neon::vget_lane_u64(neon::vreinterpret_u64_u8(group), 0) & 0x8080808080808080 - } - } - #[inline(always)] pub fn lowest(mask: Mask) -> usize { (mask.trailing_zeros() >> 3) as usize @@ -116,6 +90,13 @@ mod arch { *mask &= *mask - 1; Some(i) } + + /// Number of trailing occupied (non-zero) bytes in the ctrl array. + #[inline(always)] + pub fn count_occupied(ctrl: &[u8; GROUP_SIZE]) -> usize { + let word = u64::from_ne_bytes(*ctrl); + GROUP_SIZE - (word.leading_zeros() / 8) as usize + } } #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] @@ -130,19 +111,6 @@ mod arch { (xor.wrapping_sub(0x0101010101010101)) & !xor & 0x8080808080808080 } - #[inline(always)] - pub fn match_empty(ctrl: &[u8; GROUP_SIZE]) -> Mask { - let word = u64::from_ne_bytes(*ctrl); - !word & 0x8080808080808080 - } - - /// Mask of slots whose ctrl byte has the high bit set (occupied). - #[inline(always)] - pub fn match_full(ctrl: &[u8; GROUP_SIZE]) -> Mask { - let word = u64::from_ne_bytes(*ctrl); - word & 0x8080808080808080 - } - #[inline(always)] pub fn lowest(mask: Mask) -> usize { (mask.trailing_zeros() >> 3) as usize @@ -157,6 +125,13 @@ mod arch { *mask &= *mask - 1; Some(i) } + + /// Number of trailing occupied (non-zero) bytes in the ctrl array. + #[inline(always)] + pub fn count_occupied(ctrl: &[u8; GROUP_SIZE]) -> usize { + let word = u64::from_ne_bytes(*ctrl); + GROUP_SIZE - (word.leading_zeros() / 8) as usize + } } pub use arch::*; diff --git a/crates/hash-sorted-map/src/hash_sorted_map.rs b/crates/hash-sorted-map/src/hash_sorted_map.rs index a096cde..8566cf1 100644 --- a/crates/hash-sorted-map/src/hash_sorted_map.rs +++ b/crates/hash-sorted-map/src/hash_sorted_map.rs @@ -16,11 +16,6 @@ fn tag(hash: u64) -> u8 { (hash as u8) | 0x80 } -#[inline] -fn slot_hint(hash: u64) -> usize { - ((hash >> 7) & (GROUP_SIZE as u64 - 1)) as usize -} - // ──────────────────────────────────────────────────────────────────────── // HashSortedMap // ──────────────────────────────────────────────────────────────────────── @@ -148,9 +143,17 @@ impl HashSortedMap { hashes.push(hash); } } - // The last group may have gaps — compact it to the front. - let last_gi = *chain.last().expect("chain not be empty") as usize; - compact_last_group(&mut self.groups[last_gi], &self.hash_builder, &mut hashes); + let g = &self.groups[*chain.last().expect("chain should have at least one group") as usize]; + for slot in 0..GROUP_SIZE { + if g.ctrl[slot] == CTRL_EMPTY { + break; + } + let hash = self + .hash_builder + .hash_one(unsafe { g.keys[slot].assume_init_ref() }); + hashes.push(hash); + } + let n = hashes.len(); // Insertion sort by (hash, key). for i in 1..n { @@ -265,12 +268,11 @@ impl HashSortedMap { } } // Check for empty slot in this group. - let empty_mask = group_ops::match_empty(&group.ctrl); - if empty_mask != 0 { - let i = group_ops::lowest(empty_mask); - group.ctrl[i] = tag; - group.keys[i] = MaybeUninit::new(key); - group.values[i] = MaybeUninit::new(value); + let occupied_slots = group_ops::count_occupied(&group.ctrl); + if occupied_slots != GROUP_SIZE { + group.ctrl[occupied_slots] = tag; + group.keys[occupied_slots] = MaybeUninit::new(key); + group.values[occupied_slots] = MaybeUninit::new(value); self.len += 1; return None; } @@ -315,7 +317,7 @@ impl HashSortedMap { return Some(unsafe { group.values[i].assume_init_ref() }); } } - if group_ops::match_empty(&group.ctrl) != 0 { + if group.ctrl[GROUP_SIZE - 1] == CTRL_EMPTY { return None; } if group.overflow == NO_OVERFLOW { @@ -346,12 +348,11 @@ impl HashSortedMap { } } // Check for empty slot in this group. - let empty_mask = group_ops::match_empty(&group.ctrl); - if empty_mask != 0 { - let i = group_ops::lowest(empty_mask); + let occupied_slots = group_ops::count_occupied(&group.ctrl); + if occupied_slots != GROUP_SIZE { return FindResult::Vacant(Insertion::Empty { group: group as *mut _, - slot: i, + slot: occupied_slots, }); } // Group full — follow or report end of chain. @@ -379,8 +380,7 @@ impl HashSortedMap { self.len = 0; for group in &old_groups[..old_num_groups] { - let mut full_mask = group_ops::match_full(&group.ctrl); - while let Some(i) = group_ops::next_match(&mut full_mask) { + for i in 0..group_ops::count_occupied(&group.ctrl) { let hash = self .hash_builder .hash_one(unsafe { group.keys[i].assume_init_ref() }); @@ -396,18 +396,13 @@ impl HashSortedMap { fn insert_for_grow(&mut self, hash: u64, key_src: *const K, value_src: *const V) { let tag = tag(hash); - let mut hint = slot_hint(hash); let gi = self.group_index(hash); let mut group = &mut self.groups[gi]; - loop { - if group.ctrl[hint] == CTRL_EMPTY { - break; - } - let empty_mask = group_ops::match_empty(&group.ctrl); - if empty_mask != 0 { - hint = group_ops::lowest(empty_mask); - break; + let slot = loop { + let occupied = group_ops::count_occupied(&group.ctrl); + if occupied != GROUP_SIZE { + break occupied; } let overflow = group.overflow; if overflow != NO_OVERFLOW { @@ -417,15 +412,15 @@ impl HashSortedMap { group.overflow = new_gi as u32; self.num_groups += 1; group = &mut self.groups[new_gi]; - break; + break 0; } - } - group.ctrl[hint] = tag; + }; + group.ctrl[slot] = tag; unsafe { - group.keys[hint] + group.keys[slot] .as_mut_ptr() .copy_from_nonoverlapping(key_src, 1); - group.values[hint] + group.values[slot] .as_mut_ptr() .copy_from_nonoverlapping(value_src, 1); } @@ -441,33 +436,6 @@ fn chain_slot(chain: &[u32], pos: usize) -> (usize, usize) { (chain[pos / GROUP_SIZE] as usize, pos % GROUP_SIZE) } -/// Compact the last group in a chain: move all occupied entries to slots -/// 0..n and clear the rest. Computes hashes for each occupied entry and -/// appends them to `hashes`. -fn compact_last_group( - group: &mut Group, - hash_builder: &S, - hashes: &mut Vec, -) { - let mut write = 0usize; - let mut full_mask = group_ops::match_full(&group.ctrl); - while let Some(read) = group_ops::next_match(&mut full_mask) { - let hash = hash_builder.hash_one(unsafe { group.keys[read].assume_init_ref() }); - hashes.push(hash); - if read != write { - unsafe { - group.keys[write] = std::ptr::read(&group.keys[read]); - group.values[write] = std::ptr::read(&group.values[read]); - } - } - write += 1; - } - // Mark tail slots as empty; real tags are filled in after sorting. - for slot in write..GROUP_SIZE { - group.ctrl[slot] = CTRL_EMPTY; - } -} - // ──────────────────────────────────────────────────────────────────────── // Entry API // ──────────────────────────────────────────────────────────────────────── @@ -593,7 +561,7 @@ impl<'a, K: Hash + Eq, V, S: BuildHasher> VacantEntry<'a, K, V, S> { let (new_gi, new_group) = unsafe { let map = &mut *map; if map.num_groups as usize == map.groups.len() { - return insert_after_grow(map, hash, key, value); + return insert_after_grow(map, key, value); } let new_gi = map.num_groups as usize; map.num_groups += 1; @@ -630,7 +598,6 @@ impl<'a, K: Hash + Eq, V, S: BuildHasher> VacantEntry<'a, K, V, S> { #[inline(never)] fn insert_after_grow( map: &mut HashSortedMap, - _hash: u64, key: K, value: V, ) -> &mut V { diff --git a/crates/hash-sorted-map/src/iter.rs b/crates/hash-sorted-map/src/iter.rs index 48623fa..aafd36c 100644 --- a/crates/hash-sorted-map/src/iter.rs +++ b/crates/hash-sorted-map/src/iter.rs @@ -1,6 +1,8 @@ use std::marker::PhantomData; use std::mem::ManuallyDrop; +use crate::group_ops::{CTRL_EMPTY, GROUP_SIZE}; + use super::group::Group; use super::group_ops; use super::hash_sorted_map::{HashSortedMap, NO_OVERFLOW}; @@ -15,7 +17,7 @@ struct IterCursor { /// Current position within the group we're scanning: group index in the /// groups array, and a SIMD bitmask of remaining occupied slots. current_group: u32, - current_mask: group_ops::Mask, + current_slot: u32, } impl IterCursor { @@ -24,8 +26,8 @@ impl IterCursor { Self { primary: 0, num_primary, - current_group: groups_len as u32, - current_mask: 0, + current_group: 0, + current_slot: 0, } } @@ -34,25 +36,27 @@ impl IterCursor { /// overflow chain. Within each group, yields occupied slots via bitmask. fn next_slot(&mut self, groups: &[Group]) -> Option<(usize, usize)> { loop { - if let Some(slot) = group_ops::next_match(&mut self.current_mask) { - return Some((self.current_group as usize, slot)); + let gi = self.current_group as usize; + if self.current_slot < GROUP_SIZE as u32 { + let slot = self.current_slot; + if groups[gi].ctrl[slot as usize] != CTRL_EMPTY { + self.current_slot += 1; + return Some((gi as usize, slot as usize)); + } } // Current group exhausted — try overflow chain. - let gi = self.current_group as usize; if gi < groups.len() && groups[gi].overflow != NO_OVERFLOW { - let next = groups[gi].overflow; - self.current_group = next; - self.current_mask = group_ops::match_full(&groups[next as usize].ctrl); + self.current_group = groups[gi].overflow; + self.current_slot = 0; continue; } + self.primary += 1; // No more overflow — move to next primary group. if self.primary >= self.num_primary { return None; } - let gi = self.primary as usize; - self.primary += 1; - self.current_group = gi as u32; - self.current_mask = group_ops::match_full(&groups[gi].ctrl); + self.current_group = self.primary; + self.current_slot = 0; } } } From 775e1edb6e69cbd5954812a7191a34b667e3ca56 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Tue, 19 May 2026 12:44:28 +0200 Subject: [PATCH 22/23] linter --- crates/hash-sorted-map/src/hash_sorted_map.rs | 5 +++-- crates/hash-sorted-map/src/iter.rs | 11 +++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/crates/hash-sorted-map/src/hash_sorted_map.rs b/crates/hash-sorted-map/src/hash_sorted_map.rs index 8566cf1..0cc37b4 100644 --- a/crates/hash-sorted-map/src/hash_sorted_map.rs +++ b/crates/hash-sorted-map/src/hash_sorted_map.rs @@ -143,7 +143,8 @@ impl HashSortedMap { hashes.push(hash); } } - let g = &self.groups[*chain.last().expect("chain should have at least one group") as usize]; + let g = + &self.groups[*chain.last().expect("chain should have at least one group") as usize]; for slot in 0..GROUP_SIZE { if g.ctrl[slot] == CTRL_EMPTY { break; @@ -153,7 +154,7 @@ impl HashSortedMap { .hash_one(unsafe { g.keys[slot].assume_init_ref() }); hashes.push(hash); } - + let n = hashes.len(); // Insertion sort by (hash, key). for i in 1..n { diff --git a/crates/hash-sorted-map/src/iter.rs b/crates/hash-sorted-map/src/iter.rs index aafd36c..e981bad 100644 --- a/crates/hash-sorted-map/src/iter.rs +++ b/crates/hash-sorted-map/src/iter.rs @@ -4,7 +4,6 @@ use std::mem::ManuallyDrop; use crate::group_ops::{CTRL_EMPTY, GROUP_SIZE}; use super::group::Group; -use super::group_ops; use super::hash_sorted_map::{HashSortedMap, NO_OVERFLOW}; /// State shared by `Iter`, `IterMut`, and `IntoIter`: tracks which primary @@ -21,7 +20,7 @@ struct IterCursor { } impl IterCursor { - fn new(n_bits: u32, groups_len: usize) -> Self { + fn new(n_bits: u32) -> Self { let num_primary = 1u32 << n_bits; Self { primary: 0, @@ -41,7 +40,7 @@ impl IterCursor { let slot = self.current_slot; if groups[gi].ctrl[slot as usize] != CTRL_EMPTY { self.current_slot += 1; - return Some((gi as usize, slot as usize)); + return Some((gi, slot as usize)); } } // Current group exhausted — try overflow chain. @@ -155,13 +154,13 @@ impl HashSortedMap { pub fn iter(&self) -> Iter<'_, K, V> { Iter { groups: &self.groups, - cursor: IterCursor::new(self.n_bits, self.groups.len()), + cursor: IterCursor::new(self.n_bits), } } /// Returns a mutable iterator over `(&K, &mut V)` pairs. pub fn iter_mut(&mut self) -> IterMut<'_, K, V> { - let cursor = IterCursor::new(self.n_bits, self.groups.len()); + let cursor = IterCursor::new(self.n_bits); IterMut { groups: &mut *self.groups as *mut [Group], cursor, @@ -172,7 +171,7 @@ impl HashSortedMap { /// Consumes the map and returns an iterator over `(K, V)` pairs. #[allow(clippy::should_implement_trait)] pub fn into_iter(self) -> IntoIter { - let cursor = IterCursor::new(self.n_bits, self.groups.len()); + let cursor = IterCursor::new(self.n_bits); // Prevent Drop from running on self — we're moving groups out. let mut this = ManuallyDrop::new(self); let groups = unsafe { std::ptr::read(&this.groups) }; From 6af2de32275a1d6882798090fd71750e556a4179 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Tue, 19 May 2026 13:14:20 +0000 Subject: [PATCH 23/23] update benchmark results --- crates/hash-sorted-map/OPTIMIZATIONS.md | 80 ++++++++++++++----------- crates/hash-sorted-map/README.md | 28 +++++---- 2 files changed, 62 insertions(+), 46 deletions(-) diff --git a/crates/hash-sorted-map/OPTIMIZATIONS.md b/crates/hash-sorted-map/OPTIMIZATIONS.md index a6d130b..9019582 100644 --- a/crates/hash-sorted-map/OPTIMIZATIONS.md +++ b/crates/hash-sorted-map/OPTIMIZATIONS.md @@ -185,74 +185,82 @@ entropy in both halves. Also changed trigram generation to use --- -## Benchmark Results (Apple M-series, aarch64 NEON) +## Benchmark Results (local x86_64 snapshot) + +Hardware used for the current local snapshot: + +- CPU: Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +- Architecture: x86_64 +- Topology: 1 socket, 1 core, 2 threads +- CPU frequency range: 800 MHz to 2800 MHz +- Memory: 7.8 GiB RAM ### Insert (1000 trigrams, pre-sized) | Implementation | Time (µs) | vs hashbrown | |----------------------|-----------|--------------| -| FoldHashMap | 2.44 | −11% | -| FxHashMap | 2.61 | −5% | -| hashbrown+Identity | 2.63 | baseline | -| hashbrown::HashMap | 2.74 | +4% | -| std::HashMap+FNV | 3.18 | +21% | -| AHashMap | 3.38 | +29% | -| **HashSortedMap** | **3.46** | **+32%** | -| std::HashMap | 8.65 | +229% | +| FoldHashMap | 13.88 | −5% | +| FxHashMap | 14.60 | ~0% | +| hashbrown+Identity | 14.44 | baseline | +| hashbrown::HashMap | 14.55 | +1% | +| std::HashMap+FNV | 15.55 | +8% | +| AHashMap | 15.59 | +8% | +| **HashSortedMap** | **9.40** | **−35%** | +| std::HashMap | 25.26 | +75% | ### Reinsert (1000 trigrams, all keys exist) | Implementation | Time (µs) | |----------------------|-----------| -| hashbrown+Identity | 2.50 | -| **HashSortedMap** | **2.70** | +| **HashSortedMap** | **6.59** | +| hashbrown+Identity | 6.95 | ### Growth (128 → 1000 trigrams, 3 resize rounds) | Implementation | Time (µs) | |----------------------|-----------| -| **HashSortedMap** | **5.35** | -| hashbrown+Identity | 10.12 | +| hashbrown+Identity | 26.66 | +| **HashSortedMap** | **27.50** | ### Count (4000 trigrams, mixed insert/update) | Implementation | Time (µs) | |----------------------------------|-----------| -| hashbrown+Identity entry() | 4.89 | -| **HashSortedMap entry().or_default()** | **5.44** | -| **HashSortedMap get_or_default** | **5.48** | +| hashbrown+Identity entry() | 15.49 | +| **HashSortedMap get_or_default** | **15.88** | +| **HashSortedMap entry().or_default()** | **16.15** | ### Iteration (1000 trigrams) -| Implementation | Time (ns) | +| Implementation | Time (µs) | |-------------------------------|-----------| -| **HashSortedMap iter()** | **794** | -| **HashSortedMap into_iter()** | **998** | -| hashbrown+Identity iter() | 1,067 | -| hashbrown+Identity into_iter()| 1,060 | +| **HashSortedMap iter()** | **3.02** | +| hashbrown+Identity iter() | 3.04 | +| **HashSortedMap into_iter()** | **3.03** | +| hashbrown+Identity into_iter()| 3.56 | ### Sort (100K trigrams) -| Implementation | Time (µs) | +| Implementation | Time (ms) | |-----------------------------|-----------| -| **HashSortedMap sort_by_hash** | **706** | -| Vec::sort_unstable | 984 | +| **HashSortedMap sort_by_hash** | **1.66** | +| Vec::sort_unstable | 2.20 | ### Merge (100 maps × 100K keys each → sorted output) | Implementation | Time (ms) | vs HSM merge+sort | |-----------------------------------|-----------|--------------------| -| hashbrown merge presized | 30.4 | −46% | -| **HashSortedMap merge presized** | **37.3** | **−33%** | -| **HashSortedMap merge (no sort)** | **44.0** | **−21%** | -| hashbrown merge | 45.4 | −19% | -| **HashSortedMap merge + sort** | **55.9** | **baseline** | -| hashbrown merge + Vec sort | 58.7 | +5% | -| k-way merge sorted vecs | 445 | +696% | +| hashbrown merge presized | 160.79 | +6% | +| **HashSortedMap merge presized** | **117.01**| **−23%** | +| **HashSortedMap merge (no sort)** | **141.57**| **−7%** | +| hashbrown merge | 163.59 | +7% | +| **HashSortedMap merge + sort** | **152.34**| **baseline** | +| hashbrown merge + Vec sort | 193.37 | +27% | +| k-way merge sorted vecs | 445 | +192% | **Key takeaways:** -- HashSortedMap has **2× faster growth** than hashbrown -- **25% faster iteration** than hashbrown (dense group layout) -- **sort_by_hash is 28% faster** than Vec::sort_unstable (data is partially sorted by group) -- **merge + sort is 5% faster** than hashbrown merge + Vec sort (the primary use case) -- Pre-sized insert is 32% slower than hashbrown (trade-off for sort/merge efficiency) +- Pre-sized insert is **~35% faster** than hashbrown+Identity +- Reinsert and iter paths are now close to parity with hashbrown+Identity +- Growth path is currently **~3% slower** than hashbrown+Identity +- sort_by_hash is **~24% faster** than Vec::sort_unstable +- merge + sort is **~21% faster** than hashbrown merge + Vec sort diff --git a/crates/hash-sorted-map/README.md b/crates/hash-sorted-map/README.md index 92fa107..bbf6e3b 100644 --- a/crates/hash-sorted-map/README.md +++ b/crates/hash-sorted-map/README.md @@ -45,21 +45,29 @@ keys, which means: Latest local Criterion snapshot from this repository's `target/criterion` outputs (lower is better): +Hardware used for this snapshot: + +- CPU: Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +- Architecture: x86_64 +- Topology: 1 socket, 1 core, 2 threads +- CPU frequency range: 800 MHz to 2800 MHz +- Memory: 7.8 GiB RAM + | Scenario | HashSortedMap | Comparison | Result | | :------------------------------------------- | ------------: | :------------------------------------- | :---------- | -| Insert 1000 trigrams (pre-sized) | 7.34 µs | hashbrown::HashMap: 12.88 µs | ~43% faster | -| Grow from capacity 128 | 20.54 µs | hashbrown+Identity: 23.17 µs | ~11% faster | -| Count 4000 trigrams (`entry().or_default()`) | 12.70 µs | hashbrown+Identity `entry()`: 13.53 µs | ~6% faster | -| Iterate 1000 trigrams (`iter()`) | 3.93 µs | hashbrown+Identity `iter()`: 2.87 µs | ~37% slower | -| Sort 100000 trigrams by hash | 1.83 ms | `Vec::sort_unstable`: 2.09 ms | ~12% faster | -| Merge 100 sorted maps + final sort | 161.93 ms | hashbrown merge + vec sort: 234.70 ms | ~31% faster | +| Insert 1000 trigrams (pre-sized) | 9.40 µs | hashbrown::HashMap: 14.55 µs | ~35% faster | +| Grow from capacity 128 | 27.50 µs | hashbrown+Identity: 26.66 µs | ~3% slower | +| Count 4000 trigrams (`entry().or_default()`) | 16.15 µs | hashbrown+Identity `entry()`: 15.49 µs | ~4% slower | +| Iterate 1000 trigrams (`iter()`) | 3.02 µs | hashbrown+Identity `iter()`: 3.04 µs | ~1% faster | +| Sort 100000 trigrams by hash | 1.66 ms | `Vec::sort_unstable`: 2.20 ms | ~24% faster | +| Merge 100 sorted maps + final sort | 152.34 ms | hashbrown merge + vec sort: 193.37 ms | ~21% faster | Key takeaways: -- `HashSortedMap` is strongest on insert-heavy and merge/sort-heavy paths. -- Iteration throughput is currently behind `hashbrown+Identity`. -- In workloads that need deterministic hash-order serialization, the merge and - sort advantages can outweigh the iteration gap. +- Pre-sized inserts, sorting, and merge+sort remain the strongest paths. +- Iteration is now roughly on par with `hashbrown+Identity`. +- Growth and count/update workloads are currently slightly slower than + `hashbrown+Identity` in this run. ## Running