diff --git a/Cargo.toml b/Cargo.toml index e3debc5..09167d2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,8 +6,10 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -smallvec = "*" +smallvec = "1.13.1" [features] default = [] std = [] +# Optimize memory layout for slab allocators, reducing memory usage +slab-friendly = [] diff --git a/benches/xarray_bench.rs b/benches/xarray_bench.rs new file mode 100644 index 0000000..90012cc --- /dev/null +++ b/benches/xarray_bench.rs @@ -0,0 +1,111 @@ +#![feature(test)] + +extern crate test; + +use std::sync::Arc; + +use test::{black_box, Bencher}; +use xarray::XArray; + +const DENSE_LEN: u64 = 100_000; +const SPARSE_LEN: u64 = 200_000; +const RANDOM_QUERIES: usize = 50_000; +const COW_WRITE_COUNT: u64 = 20_000; + +fn build_dense(len: u64) -> XArray> { + let mut xa: XArray> = XArray::new(); + for i in 0..len { + xa.store(i, Arc::new(i)); + } + xa +} + +fn random_indices(len: u64, count: usize) -> Vec { + let mut seed = 0x1234_5678_9abc_def0_u64; + let mut out = Vec::with_capacity(count); + for _ in 0..count { + seed ^= seed << 7; + seed ^= seed >> 9; + seed ^= seed << 8; + out.push(seed % len); + } + out +} + +#[bench] +fn bench_store_dense(b: &mut Bencher) { + b.iter(|| { + let mut xa: XArray> = XArray::new(); + for i in 0..DENSE_LEN { + xa.store(i, Arc::new(i)); + } + black_box(xa); + }); +} + +#[bench] +fn bench_cursor_load_dense(b: &mut Bencher) { + let xa = build_dense(DENSE_LEN); + + b.iter(|| { + let mut cursor = xa.cursor(0); + let mut sum = 0_u64; + for _ in 0..DENSE_LEN { + if let Some(v) = cursor.load() { + sum = sum.wrapping_add(*v.as_ref()); + } + cursor.next(); + } + black_box(sum); + }); +} + +#[bench] +fn bench_load_random_dense(b: &mut Bencher) { + let xa = build_dense(DENSE_LEN); + let indices = random_indices(DENSE_LEN, RANDOM_QUERIES); + + b.iter(|| { + let mut sum = 0_u64; + for &idx in &indices { + if let Some(v) = xa.load(idx) { + sum = sum.wrapping_add(*v.as_ref()); + } + } + black_box(sum); + }); +} + +#[bench] +fn bench_range_sparse_even(b: &mut Bencher) { + let mut xa: XArray> = XArray::new(); + for i in 0..SPARSE_LEN { + if i % 2 == 0 { + xa.store(i, Arc::new(i)); + } + } + + b.iter(|| { + let mut sum = 0_u64; + let mut cnt = 0_u64; + for (index, item) in xa.range(0..SPARSE_LEN) { + sum = sum.wrapping_add(index).wrapping_add(*item.as_ref()); + cnt += 1; + } + black_box((sum, cnt)); + }); +} + +#[bench] +fn bench_cow_clone_then_overwrite(b: &mut Bencher) { + let xa = build_dense(DENSE_LEN); + + b.iter(|| { + let mut cloned = xa.clone(); + for i in 0..COW_WRITE_COUNT { + let idx = (i * 7) % DENSE_LEN; + cloned.store(idx, Arc::new(i)); + } + black_box(cloned.load(0)); + }); +} diff --git a/src/entry.rs b/src/entry.rs index 4aa7700..42e574b 100644 --- a/src/entry.rs +++ b/src/entry.rs @@ -2,7 +2,7 @@ use alloc::boxed::Box; use alloc::sync::Arc; use core::marker::PhantomData; use core::mem::ManuallyDrop; -use core::ops::{Deref, Not}; +use core::ops::Deref; use crate::node::{TryClone, XNode}; @@ -67,7 +67,10 @@ impl<'a, T> Deref for ArcRef<'a, T> { // SAFETY: `Arc` meets the safety requirements of `ItemEntry`. unsafe impl ItemEntry for Arc { - type Ref<'a> = ArcRef<'a, T> where Self: 'a; + type Ref<'a> + = ArcRef<'a, T> + where + Self: 'a; fn into_raw(self) -> *const () { // A contant expression, so compilers should be smart enough to optimize it away. @@ -118,7 +121,10 @@ impl<'a, T> Deref for BoxRef<'a, T> { // SAFETY: `Box` meets the safety requirements of `ItemEntry`. unsafe impl ItemEntry for Box { - type Ref<'a> = BoxRef<'a, T> where Self: 'a; + type Ref<'a> + = BoxRef<'a, T> + where + Self: 'a; fn into_raw(self) -> *const () { // A contant expression, so compilers should be smart enough to optimize it away. @@ -214,9 +220,16 @@ impl XEntry { } fn ty(&self) -> Option { - self.is_null() - .not() - .then(|| (self.raw.addr() & Self::TYPE_MASK).try_into().unwrap()) + if self.is_null() { + return None; + } + + let raw = self.raw.addr(); + let tag = raw & Self::TYPE_MASK; + match tag.try_into() { + Ok(ty) => Some(ty), + Err(()) => panic!("xarray: invalid XEntry tag={tag:#x}, raw={raw:#x}"), + } } pub fn is_null(&self) -> bool { diff --git a/src/node.rs b/src/node.rs index d572ee0..699cc16 100644 --- a/src/node.rs +++ b/src/node.rs @@ -1,4 +1,6 @@ use core::cmp::Ordering; +#[cfg(feature = "slab-friendly")] +use core::convert::TryInto; use core::ops::{Deref, DerefMut}; use crate::entry::{ItemEntry, XEntry}; @@ -103,7 +105,10 @@ where offset_in_parent: u8, /// The slots storing `XEntry`s, which point to user-given items for leaf nodes and other /// `XNode`s for interior nodes. + #[cfg(not(feature = "slab-friendly"))] slots: [XEntry; SLOT_SIZE], + #[cfg(feature = "slab-friendly")] + slots: alloc::boxed::Box<[XEntry; SLOT_SIZE]>, /// The marks representing whether each slot is marked or not. /// /// Users can set mark or unset mark on user-given items, and a leaf node or an interior node @@ -112,6 +117,16 @@ where } impl XNode { + #[cfg(feature = "slab-friendly")] + fn empty_slots() -> alloc::boxed::Box<[XEntry; SLOT_SIZE]> { + let mut slots = alloc::vec::Vec::with_capacity(SLOT_SIZE); + slots.resize_with(SLOT_SIZE, || XEntry::EMPTY); + match slots.into_boxed_slice().try_into() { + Ok(slots) => slots, + Err(_) => panic!("xarray: invalid slab-friendly slot length"), + } + } + pub fn new_root(height: Height) -> Self { Self::new(height, 0) } @@ -120,7 +135,10 @@ impl XNode { Self { height, offset_in_parent: offset, + #[cfg(not(feature = "slab-friendly"))] slots: [XEntry::EMPTY; SLOT_SIZE], + #[cfg(feature = "slab-friendly")] + slots: Self::empty_slots(), marks: [Mark::EMPTY; NUM_MARKS], } } @@ -147,7 +165,10 @@ impl XNode { } pub fn entries_mut(&mut self) -> &mut [XEntry] { - &mut self.slots + #[cfg(not(feature = "slab-friendly"))] + return &mut self.slots; + #[cfg(feature = "slab-friendly")] + return self.slots.as_mut(); } pub fn is_marked(&self, offset: u8, mark: usize) -> bool { diff --git a/src/test.rs b/src/test.rs index 44f86bb..63954c1 100644 --- a/src/test.rs +++ b/src/test.rs @@ -74,6 +74,54 @@ fn test_store_overwrite() { assert_eq!(*v.as_ref(), 40); } +#[cfg(feature = "slab-friendly")] +#[test] +fn test_slab_friendly_store_overwrite_remove() { + #[derive(Debug, Clone, PartialEq, Eq)] + struct Token(u32); + + let mut xa: XArray> = XArray::new(); + let shared = Arc::new(Token(7)); + + for i in 1..n!(256) { + xa.store(i as u64, shared.clone()); + } + for i in 1..n!(256) { + assert_eq!(xa.load(i as u64).as_deref(), Some(&shared)); + } + + for i in 1..n!(256) { + xa.store(i as u64, Arc::new(Token((i % 251) as u32))); + } + for i in 1..n!(256) { + assert_eq!(xa.load(i as u64).unwrap().0, (i % 251) as u32); + } + + for i in 1..n!(256) { + assert!(xa.remove(i as u64).is_some()); + assert!(xa.load(i as u64).is_none()); + } +} + +#[cfg(feature = "slab-friendly")] +#[test] +fn test_slab_friendly_cow_overwrite() { + let mut xa: XArray> = XArray::new(); + for i in 1..n!(128) { + xa.store(i as u64, Arc::new(i)); + } + + let mut cloned = xa.clone(); + for i in 1..n!(128) { + cloned.store(i as u64, Arc::new(i * 3)); + } + + for i in 1..n!(128) { + assert_eq!(*xa.load(i as u64).unwrap().as_ref(), i); + assert_eq!(*cloned.load(i as u64).unwrap().as_ref(), i * 3); + } +} + #[test] fn test_remove() { let mut xarray_arc: XArray> = XArray::new();