refactor: split obicompactvec storage into primary and overflow files

Refactors the storage format to separate primary and overflow data into distinct files. Introduces a cache-friendly sparse index with dynamically computed step and entry counts. Consolidates dual memory-mapped regions into a single file with explicit header parsing and validation, replacing unsafe slice casting with direct byte-offset indexing. Updates the test suite to accommodate the new file structure.
This commit is contained in:
Eric Coissac
2026-05-13 10:20:15 +08:00
parent f2de79acde
commit 4d5fcd4340
4 changed files with 145 additions and 165 deletions
+7 -15
View File
@@ -1,9 +1,8 @@
use std::collections::HashMap; use std::collections::HashMap;
use std::fs::File; use std::io;
use std::io::{self, BufWriter, Write as _};
use std::path::Path; use std::path::Path;
use crate::format::write_overflow; use crate::format::write_pciv;
pub struct PersistentCompactIntVecBuilder { pub struct PersistentCompactIntVecBuilder {
primary: Vec<u8>, primary: Vec<u8>,
@@ -39,19 +38,12 @@ impl PersistentCompactIntVecBuilder {
self.primary.len() self.primary.len()
} }
/// Write `counts_primary.bin` and (if needed) `counts_overflow.bin`, then drop all state. pub fn is_empty(&self) -> bool {
pub fn close(self, primary_path: &Path, overflow_path: &Path) -> io::Result<()> { self.primary.is_empty()
// Write primary array.
let mut w = BufWriter::new(File::create(primary_path)?);
w.write_all(&self.primary)?;
w.flush()?;
drop(w);
if self.overflow.is_empty() {
return Ok(());
} }
// Sort overflow entries by slot. /// Finalise and write a single PCIV file, then drop all in-memory state.
pub fn close(self, path: &Path) -> io::Result<()> {
let mut entries: Vec<(u32, u32)> = self let mut entries: Vec<(u32, u32)> = self
.overflow .overflow
.into_iter() .into_iter()
@@ -59,6 +51,6 @@ impl PersistentCompactIntVecBuilder {
.collect(); .collect();
entries.sort_unstable_by_key(|&(slot, _)| slot); entries.sort_unstable_by_key(|&(slot, _)| slot);
write_overflow(overflow_path, &entries) write_pciv(path, &self.primary, &entries)
} }
} }
+45 -60
View File
@@ -1,81 +1,66 @@
use std::fs::File; use std::fs::File;
use std::io::{self, BufWriter, Write as _}; use std::io::{self, BufWriter, Seek, SeekFrom, Write as _};
use std::path::Path; use std::path::Path;
pub const MAGIC: [u8; 4] = *b"PCIV"; pub const MAGIC: [u8; 4] = *b"PCIV";
pub const HEADER_SIZE: usize = 24; // magic(4) + n(8) + n_overflow(4) + step(4) + n_index(4)
// L1 cache target: 32 KB / 8 bytes per index entry // Sparse index target: fits in 32 KB L1 cache / 8 bytes per entry.
pub const L1_INDEX_ENTRIES: usize = 4096; pub const L1_INDEX_ENTRIES: usize = 4096;
/// Write the overflow file from a sorted list of (slot, value) pairs. /// Write a single PCIV file.
pub fn write_overflow( ///
path: &Path, /// Layout (write order = computation order):
entries: &[(u32, u32)], /// header 24 B placeholder, overwritten at the end
) -> io::Result<()> { /// primary n B u8 per slot (255 = overflow sentinel)
let n_overflow = entries.len() as u32; /// data n_overflow × 8 B (slot: u32, value: u32) sorted by slot
/// index n_index × 8 B (slot: u32, pos: u32) sparse index
let step: u32 = if entries.len() <= L1_INDEX_ENTRIES { pub fn write_pciv(path: &Path, primary: &[u8], entries: &[(u32, u32)]) -> io::Result<()> {
0 let n = primary.len();
} else { let n_overflow = entries.len();
entries.len().div_ceil(L1_INDEX_ENTRIES) as u32
};
let mut w = BufWriter::new(File::create(path)?); let mut w = BufWriter::new(File::create(path)?);
w.write_all(&MAGIC)?; // Placeholder header — will be overwritten at the end.
w.write_all(&n_overflow.to_le_bytes())?; w.write_all(&[0u8; HEADER_SIZE])?;
w.write_all(&step.to_le_bytes())?;
if step > 0 { // Primary array.
let n_index = entries.len().div_ceil(step as usize) as u32; w.write_all(primary)?;
w.write_all(&n_index.to_le_bytes())?;
for (pos, chunk) in entries.chunks(step as usize).enumerate() {
let slot = chunk[0].0;
w.write_all(&slot.to_le_bytes())?;
w.write_all(&((pos * step as usize) as u32).to_le_bytes())?;
}
}
// Overflow data (sorted by slot).
for &(slot, value) in entries { for &(slot, value) in entries {
w.write_all(&slot.to_le_bytes())?; w.write_all(&slot.to_le_bytes())?;
w.write_all(&value.to_le_bytes())?; w.write_all(&value.to_le_bytes())?;
} }
w.flush() // Sparse index (computed now that n_overflow is known).
} let step: u32 = if n_overflow <= L1_INDEX_ENTRIES {
0
} else {
n_overflow.div_ceil(L1_INDEX_ENTRIES) as u32
};
/// Parse the header and sparse index from the overflow file bytes. let n_index: u32 = if step > 0 {
/// Returns (n_overflow, step, index_entries, data_byte_offset). let count = n_overflow.div_ceil(step as usize) as u32;
pub fn parse_overflow_header( for (block, chunk) in entries.chunks(step as usize).enumerate() {
data: &[u8], let slot = chunk[0].0;
) -> io::Result<(u32, u32, Vec<(u32, u32)>, usize)> { let pos = (block * step as usize) as u32;
let mut pos = 0; w.write_all(&slot.to_le_bytes())?;
w.write_all(&pos.to_le_bytes())?;
let magic = read4(data, &mut pos)?;
if magic != MAGIC {
return Err(io::Error::new(io::ErrorKind::InvalidData, "bad PCIV magic"));
} }
count
} else {
0
};
let n_overflow = u32::from_le_bytes(read4(data, &mut pos)?); // Flush, then seek back to write the real header.
let step = u32::from_le_bytes(read4(data, &mut pos)?); w.flush()?;
let mut file = w.into_inner().map_err(|e| e.into_error())?;
let mut index = Vec::new(); file.seek(SeekFrom::Start(0))?;
if step > 0 { file.write_all(&MAGIC)?;
let n_index = u32::from_le_bytes(read4(data, &mut pos)?) as usize; file.write_all(&(n as u64).to_le_bytes())?;
index.reserve(n_index); file.write_all(&(n_overflow as u32).to_le_bytes())?;
for _ in 0..n_index { file.write_all(&step.to_le_bytes())?;
let slot = u32::from_le_bytes(read4(data, &mut pos)?); file.write_all(&n_index.to_le_bytes())?;
let ipos = u32::from_le_bytes(read4(data, &mut pos)?); file.flush()
index.push((slot, ipos));
}
}
Ok((n_overflow, step, index, pos))
}
fn read4(data: &[u8], pos: &mut usize) -> io::Result<[u8; 4]> {
data.get(*pos..*pos + 4)
.and_then(|s| s.try_into().ok())
.map(|arr| { *pos += 4; arr })
.ok_or_else(|| io::Error::new(io::ErrorKind::UnexpectedEof, "truncated PCIV file"))
} }
+58 -44
View File
@@ -4,90 +4,104 @@ use std::path::Path;
use memmap2::Mmap; use memmap2::Mmap;
use crate::format::parse_overflow_header; use crate::format::{HEADER_SIZE, MAGIC};
pub struct PersistentCompactIntVec { pub struct PersistentCompactIntVec {
primary: Mmap, mmap: Mmap,
index: Vec<(u32, u32)>, // (slot, pos) — L1-resident sparse index n: usize,
data: Option<Mmap>, // mmap of overflow data region n_overflow: usize,
n_overflow: u32,
pub step: u32, pub step: u32,
data_offset: usize, // byte offset of data region within overflow mmap index: Vec<(u32, u32)>, // (slot, pos) — L1-resident sparse index
primary_offset: usize, // = HEADER_SIZE
data_offset: usize, // = HEADER_SIZE + n
} }
impl PersistentCompactIntVec { impl PersistentCompactIntVec {
/// Open a previously written `PersistentCompactIntVec`. pub fn open(path: &Path) -> io::Result<Self> {
/// let mmap = unsafe { Mmap::map(&File::open(path)?)? };
/// `overflow_path` is optional: pass `None` if no overflow file was written
/// (i.e. all values were < 255).
pub fn open(primary_path: &Path, overflow_path: Option<&Path>) -> io::Result<Self> {
let primary = unsafe { Mmap::map(&File::open(primary_path)?)? };
let (data, n_overflow, step, index, data_offset) = match overflow_path { if mmap.len() < HEADER_SIZE {
None => (None, 0, 0, Vec::new(), 0), return Err(io::Error::new(io::ErrorKind::InvalidData, "PCIV file too short"));
Some(p) => { }
let mmap = unsafe { Mmap::map(&File::open(p)?)? }; if &mmap[0..4] != &MAGIC {
let (n_overflow, step, index, data_offset) = return Err(io::Error::new(io::ErrorKind::InvalidData, "bad PCIV magic"));
parse_overflow_header(&mmap)?;
(Some(mmap), n_overflow, step, index, data_offset)
} }
};
Ok(Self { primary, index, data, n_overflow, step, data_offset }) let n = u64::from_le_bytes(mmap[4..12].try_into().unwrap()) as usize;
let n_overflow = u32::from_le_bytes(mmap[12..16].try_into().unwrap()) as usize;
let step = u32::from_le_bytes(mmap[16..20].try_into().unwrap());
let n_index = u32::from_le_bytes(mmap[20..24].try_into().unwrap()) as usize;
let primary_offset = HEADER_SIZE;
let data_offset = primary_offset + n;
let index_offset = data_offset + n_overflow * 8;
// Load the sparse index into RAM (L1-resident at query time).
let mut index = Vec::with_capacity(n_index);
for i in 0..n_index {
let off = index_offset + i * 8;
let slot = u32::from_le_bytes(mmap[off..off + 4].try_into().unwrap());
let pos = u32::from_le_bytes(mmap[off + 4..off + 8].try_into().unwrap());
index.push((slot, pos));
}
Ok(Self { mmap, n, n_overflow, step, index, primary_offset, data_offset })
} }
pub fn len(&self) -> usize { pub fn len(&self) -> usize {
self.primary.len() self.n
} }
pub fn is_empty(&self) -> bool { pub fn is_empty(&self) -> bool {
self.primary.is_empty() self.n == 0
} }
pub fn get(&self, slot: u64) -> u32 { pub fn get(&self, slot: u64) -> u32 {
match self.primary[slot as usize] { match self.mmap[self.primary_offset + slot as usize] {
255 => self.overflow_get(slot as u32), 255 => self.overflow_get(slot as u32),
v => v as u32, v => v as u32,
} }
} }
fn overflow_get(&self, slot: u32) -> u32 { fn overflow_get(&self, slot: u32) -> u32 {
let data = self.data.as_ref().expect("sentinel without overflow file");
let raw = &data[self.data_offset..];
let pos_start; let pos_start;
let pos_end; let pos_end;
if self.step == 0 { if self.step == 0 {
pos_start = 0; pos_start = 0;
pos_end = self.n_overflow as usize; pos_end = self.n_overflow;
} else { } else {
// Binary search in the L1-resident sparse index.
let i = self.index.partition_point(|&(s, _)| s <= slot).saturating_sub(1); let i = self.index.partition_point(|&(s, _)| s <= slot).saturating_sub(1);
pos_start = self.index[i].1 as usize; pos_start = self.index[i].1 as usize;
pos_end = if i + 1 < self.index.len() { pos_end = if i + 1 < self.index.len() {
self.index[i + 1].1 as usize self.index[i + 1].1 as usize
} else { } else {
self.n_overflow as usize self.n_overflow
}; };
} }
// Binary search within the block. let mut lo = pos_start;
let block = Self::data_slice(raw, pos_start, pos_end); let mut hi = pos_end;
match block.binary_search_by_key(&slot, |&(s, _)| s) { while lo < hi {
Ok(i) => block[i].1, let mid = lo + (hi - lo) / 2;
Err(_) => panic!("slot {slot} marked as overflow but not found in data"), match self.data_slot(mid).cmp(&slot) {
std::cmp::Ordering::Equal => return self.data_value(mid),
std::cmp::Ordering::Less => lo = mid + 1,
std::cmp::Ordering::Greater => hi = mid,
} }
} }
panic!("slot {slot} marked as overflow but not found")
}
/// Interpret a byte slice as a slice of (u32, u32) pairs without copying. #[inline]
fn data_slice(raw: &[u8], from: usize, to: usize) -> &[(u32, u32)] { fn data_slot(&self, i: usize) -> u32 {
let byte_start = from * 8; let off = self.data_offset + i * 8;
let byte_end = to * 8; u32::from_le_bytes(self.mmap[off..off + 4].try_into().unwrap())
let bytes = &raw[byte_start..byte_end]; }
// Safety: the file was written as LE u32 pairs; alignment is guaranteed
// because we read from a byte slice and cast explicitly. #[inline]
let ptr = bytes.as_ptr() as *const (u32, u32); fn data_value(&self, i: usize) -> u32 {
unsafe { std::slice::from_raw_parts(ptr, to - from) } let off = self.data_offset + i * 8 + 4;
u32::from_le_bytes(self.mmap[off..off + 4].try_into().unwrap())
} }
} }
+23 -34
View File
@@ -2,20 +2,15 @@ use tempfile::tempdir;
use crate::{PersistentCompactIntVec, PersistentCompactIntVecBuilder}; use crate::{PersistentCompactIntVec, PersistentCompactIntVecBuilder};
fn primary(dir: &std::path::Path) -> std::path::PathBuf { dir.join("primary.bin") }
fn overflow(dir: &std::path::Path) -> std::path::PathBuf { dir.join("overflow.bin") }
fn roundtrip(values: &[(u64, u32)], n: usize) -> Vec<u32> { fn roundtrip(values: &[(u64, u32)], n: usize) -> Vec<u32> {
let dir = tempdir().unwrap(); let dir = tempdir().unwrap();
let path = dir.path().join("test.pciv");
let mut b = PersistentCompactIntVecBuilder::new(n); let mut b = PersistentCompactIntVecBuilder::new(n);
for &(slot, v) in values { for &(slot, v) in values {
b.set(slot, v); b.set(slot, v);
} }
let ov = overflow(dir.path()); b.close(&path).unwrap();
b.close(&primary(dir.path()), &ov).unwrap(); let r = PersistentCompactIntVec::open(&path).unwrap();
let ov_path = ov.exists().then_some(ov.as_path());
let r = PersistentCompactIntVec::open(&primary(dir.path()), ov_path).unwrap();
(0..n as u64).map(|s| r.get(s)).collect() (0..n as u64).map(|s| r.get(s)).collect()
} }
@@ -26,17 +21,16 @@ fn all_zero_by_default() {
} }
#[test] #[test]
fn small_values_no_overflow_file() { fn small_values_no_overflow() {
let dir = tempdir().unwrap(); let dir = tempdir().unwrap();
let path = dir.path().join("test.pciv");
let mut b = PersistentCompactIntVecBuilder::new(4); let mut b = PersistentCompactIntVecBuilder::new(4);
b.set(0, 1); b.set(0, 1);
b.set(1, 254); b.set(1, 254);
b.set(2, 0); b.set(2, 0);
b.set(3, 100); b.set(3, 100);
let ov = overflow(dir.path()); b.close(&path).unwrap();
b.close(&primary(dir.path()), &ov).unwrap(); let r = PersistentCompactIntVec::open(&path).unwrap();
assert!(!ov.exists(), "no overflow file expected");
let r = PersistentCompactIntVec::open(&primary(dir.path()), None).unwrap();
assert_eq!(r.get(0), 1); assert_eq!(r.get(0), 1);
assert_eq!(r.get(1), 254); assert_eq!(r.get(1), 254);
assert_eq!(r.get(2), 0); assert_eq!(r.get(2), 0);
@@ -56,42 +50,39 @@ fn overflow_values_roundtrip() {
#[test] #[test]
fn mutation_downward_removes_from_overflow() { fn mutation_downward_removes_from_overflow() {
let dir = tempdir().unwrap(); let dir = tempdir().unwrap();
let path = dir.path().join("test.pciv");
let mut b = PersistentCompactIntVecBuilder::new(2); let mut b = PersistentCompactIntVecBuilder::new(2);
b.set(0, 1000); // goes to overflow b.set(0, 1000);
b.set(0, 42); // comes back below threshold b.set(0, 42);
let ov = overflow(dir.path()); b.close(&path).unwrap();
b.close(&primary(dir.path()), &ov).unwrap(); let r = PersistentCompactIntVec::open(&path).unwrap();
assert!(!ov.exists(), "no overflow file expected after downward mutation");
let r = PersistentCompactIntVec::open(&primary(dir.path()), None).unwrap();
assert_eq!(r.get(0), 42); assert_eq!(r.get(0), 42);
assert_eq!(r.step, 0, "no overflow entries expected");
} }
#[test] #[test]
fn mutation_upward_updates_overflow() { fn mutation_upward_updates_overflow() {
let dir = tempdir().unwrap(); let dir = tempdir().unwrap();
let path = dir.path().join("test.pciv");
let mut b = PersistentCompactIntVecBuilder::new(1); let mut b = PersistentCompactIntVecBuilder::new(1);
b.set(0, 300); b.set(0, 300);
b.set(0, 500); b.set(0, 500);
let ov = overflow(dir.path()); b.close(&path).unwrap();
b.close(&primary(dir.path()), &ov).unwrap(); let r = PersistentCompactIntVec::open(&path).unwrap();
let r = PersistentCompactIntVec::open(&primary(dir.path()), Some(&ov)).unwrap();
assert_eq!(r.get(0), 500); assert_eq!(r.get(0), 500);
} }
#[test] #[test]
fn sparse_index_built_for_many_overflows() { fn sparse_index_built_for_many_overflows() {
// Generate more than L1_INDEX_ENTRIES (4096) overflow entries to trigger the sparse index.
let n = 5000usize; let n = 5000usize;
let dir = tempdir().unwrap(); let dir = tempdir().unwrap();
let path = dir.path().join("test.pciv");
let mut b = PersistentCompactIntVecBuilder::new(n); let mut b = PersistentCompactIntVecBuilder::new(n);
for i in 0..n { for i in 0..n {
b.set(i as u64, 1000 + i as u32); // all ≥ 255 b.set(i as u64, 1000 + i as u32);
} }
let ov = overflow(dir.path()); b.close(&path).unwrap();
b.close(&primary(dir.path()), &ov).unwrap(); let r = PersistentCompactIntVec::open(&path).unwrap();
assert!(ov.exists());
let r = PersistentCompactIntVec::open(&primary(dir.path()), Some(&ov)).unwrap();
assert!(r.step > 0, "sparse index should have been built"); assert!(r.step > 0, "sparse index should have been built");
for i in 0..n { for i in 0..n {
assert_eq!(r.get(i as u64), 1000 + i as u32, "mismatch at slot {i}"); assert_eq!(r.get(i as u64), 1000 + i as u32, "mismatch at slot {i}");
@@ -100,18 +91,16 @@ fn sparse_index_built_for_many_overflows() {
#[test] #[test]
fn mixed_large_dataset() { fn mixed_large_dataset() {
// Mirrors realistic distribution: most values small, sparse overflows.
let n = 1000usize; let n = 1000usize;
let dir = tempdir().unwrap(); let dir = tempdir().unwrap();
let path = dir.path().join("test.pciv");
let mut b = PersistentCompactIntVecBuilder::new(n); let mut b = PersistentCompactIntVecBuilder::new(n);
for i in 0..n { for i in 0..n {
let v = if i % 100 == 0 { 100_000 + i as u32 } else { i as u32 % 200 }; let v = if i % 100 == 0 { 100_000 + i as u32 } else { i as u32 % 200 };
b.set(i as u64, v); b.set(i as u64, v);
} }
let ov = overflow(dir.path()); b.close(&path).unwrap();
b.close(&primary(dir.path()), &ov).unwrap(); let r = PersistentCompactIntVec::open(&path).unwrap();
let r = PersistentCompactIntVec::open(&primary(dir.path()), ov.exists().then_some(ov.as_path())).unwrap();
for i in 0..n { for i in 0..n {
let expected = if i % 100 == 0 { 100_000 + i as u32 } else { i as u32 % 200 }; let expected = if i % 100 == 0 { 100_000 + i as u32 } else { i as u32 % 200 };
assert_eq!(r.get(i as u64), expected, "slot {i}"); assert_eq!(r.get(i as u64), expected, "slot {i}");