feat: add optimized new_ones constructor for all-ones bit vectors

Introduces `new_ones` and `add_col_ones` methods to directly initialize all-ones bit vectors and matrix columns. This replaces redundant initialization sequences that created zero-filled structures and applied bitwise NOT, with a single pass that writes contiguous 0xFF bytes to disk. The change eliminates inversion overhead, streamlines test setup, and improves performance for filter mask intersection logic while preserving identical semantics.
This commit is contained in:
Eric Coissac
2026-06-19 09:23:44 +02:00
parent 9abb2db92f
commit 280ca1f5a3
6 changed files with 44 additions and 10 deletions
+7 -3
View File
@@ -299,9 +299,7 @@ impl PersistentBitMatrix {
Self::Columnar(m) => PersistentBitVecBuilder::build_from(m.col(c), path),
Self::Packed(m) => m.col_persist(c, path),
Self::Implicit { n_rows, .. } => {
let mut b = PersistentBitVecBuilder::new(*n_rows, path)?;
b.not();
Ok(b)
PersistentBitVecBuilder::new_ones(*n_rows, path)
}
}
}
@@ -402,6 +400,12 @@ impl PersistentBitMatrixBuilder {
PersistentBitVecBuilder::new(self.n, &path)
}
pub fn add_col_ones(&mut self) -> io::Result<PersistentBitVecBuilder> {
let path = col_path(&self.dir, self.n_cols);
self.n_cols += 1;
PersistentBitVecBuilder::new_ones(self.n, &path)
}
pub fn add_col_from(&mut self, src: &TempBitVec) -> io::Result<()> {
src.make_persistent(&col_path(&self.dir, self.n_cols))?;
self.n_cols += 1;
+27
View File
@@ -145,6 +145,33 @@ impl PersistentBitVecBuilder {
Ok(Self { mmap, n, path: path.to_path_buf() })
}
/// Create an all-ones bit vector of length `n` at `path`.
///
/// More efficient than `new(n, path)` + `not()`: the data is written as
/// 0xFF bytes in a single sequential pass, with no intermediate all-zeros state.
pub fn new_ones(n: usize, path: &Path) -> io::Result<Self> {
let nw = n_words(n);
let file_size = HEADER_SIZE + nw * 8;
let mut file = OpenOptions::new()
.read(true).write(true).create(true).truncate(true)
.open(path)?;
file.write_all(&MAGIC)?;
file.write_all(&[0u8; 4])?;
file.write_all(&(n as u64).to_le_bytes())?;
file.write_all(&vec![0xFFu8; nw * 8])?;
file.seek(SeekFrom::Start(0))?;
file.set_len(file_size as u64)?;
let mut mmap = unsafe { MmapMut::map_mut(&file)? };
// Clear padding bits in the last word so trailing bits are always 0.
let rem = n % 64;
if rem != 0 {
let ptr = mmap[HEADER_SIZE..].as_mut_ptr() as *mut u64;
let words = unsafe { std::slice::from_raw_parts_mut(ptr, nw) };
words[nw - 1] &= (1u64 << rem) - 1;
}
Ok(Self { mmap, n, path: path.to_path_buf() })
}
pub fn build_from(source: &PersistentBitVec, path: &Path) -> io::Result<Self> {
fs::copy(source.path(), path)?;
let file = OpenOptions::new().read(true).write(true).open(path)?;
+1 -2
View File
@@ -126,8 +126,7 @@ pub fn eval_filter_mask(expr: &FilterMask, mat: &dyn MatrixGroupOps, n: usize) -
b.freeze()
}
FilterMask::And(parts) => {
let mut b = TempBitVecBuilder::new(n)?;
b.not(); // initialise à tout-1 (tout passe)
let mut b = TempBitVecBuilder::new_ones(n)?;
for part in parts {
let m = eval_filter_mask(part, mat, n)?;
b.and(m.view());
+7
View File
@@ -56,6 +56,13 @@ impl TempBitVecBuilder {
Ok(Self { builder, temp })
}
pub(crate) fn new_ones(n: usize) -> io::Result<Self> {
let temp = TempDir::new()?;
let path = temp.path().join("data.pbiv");
let builder = PersistentBitVecBuilder::new_ones(n, &path)?;
Ok(Self { builder, temp })
}
pub(crate) fn freeze(self) -> io::Result<TempBitVec> {
let Self { builder, temp } = self;
let vec = builder.finish()?;
+1 -2
View File
@@ -150,8 +150,7 @@ fn mask_with_all_ones_is_noop() {
let dir = tempdir().unwrap();
let mut v = PersistentCompactIntVecBuilder::new(4, &dir.path().join("v.pciv")).unwrap();
v.set(0, 300); v.set(1, 1); v.set(2, 0); v.set(3, 42);
let mut mask = PersistentBitVecBuilder::new(4, &dir.path().join("m.pbiv")).unwrap();
mask.not(); // all bits → 1
let mask = PersistentBitVecBuilder::new_ones(4, &dir.path().join("m.pbiv")).unwrap();
v.mask_with(mask.view());
v.close().unwrap();
let r = PersistentCompactIntVec::open(&dir.path().join("v.pciv")).unwrap();
+1 -3
View File
@@ -106,9 +106,7 @@ impl Layer<()> {
let presence_dir = layer_dir.join(PRESENCE_DIR);
fs::create_dir_all(&presence_dir).map_err(OLMError::Io)?;
let mut mb = PersistentBitMatrixBuilder::new(n_kmers, &presence_dir).map_err(OLMError::Io)?;
let mut col = mb.add_col().map_err(OLMError::Io)?;
col.not();
col.close().map_err(OLMError::Io)?;
mb.add_col_ones().map_err(OLMError::Io)?.close().map_err(OLMError::Io)?;
mb.close().map_err(OLMError::Io)
}
}