feat: add optimized new_ones constructor for all-ones bit vectors
Introduces `new_ones` and `add_col_ones` methods to directly initialize all-ones bit vectors and matrix columns. This replaces redundant initialization sequences that created zero-filled structures and applied bitwise NOT, with a single pass that writes contiguous 0xFF bytes to disk. The change eliminates inversion overhead, streamlines test setup, and improves performance for filter mask intersection logic while preserving identical semantics.
This commit is contained in:
@@ -299,9 +299,7 @@ impl PersistentBitMatrix {
|
||||
Self::Columnar(m) => PersistentBitVecBuilder::build_from(m.col(c), path),
|
||||
Self::Packed(m) => m.col_persist(c, path),
|
||||
Self::Implicit { n_rows, .. } => {
|
||||
let mut b = PersistentBitVecBuilder::new(*n_rows, path)?;
|
||||
b.not();
|
||||
Ok(b)
|
||||
PersistentBitVecBuilder::new_ones(*n_rows, path)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -402,6 +400,12 @@ impl PersistentBitMatrixBuilder {
|
||||
PersistentBitVecBuilder::new(self.n, &path)
|
||||
}
|
||||
|
||||
pub fn add_col_ones(&mut self) -> io::Result<PersistentBitVecBuilder> {
|
||||
let path = col_path(&self.dir, self.n_cols);
|
||||
self.n_cols += 1;
|
||||
PersistentBitVecBuilder::new_ones(self.n, &path)
|
||||
}
|
||||
|
||||
pub fn add_col_from(&mut self, src: &TempBitVec) -> io::Result<()> {
|
||||
src.make_persistent(&col_path(&self.dir, self.n_cols))?;
|
||||
self.n_cols += 1;
|
||||
|
||||
@@ -145,6 +145,33 @@ impl PersistentBitVecBuilder {
|
||||
Ok(Self { mmap, n, path: path.to_path_buf() })
|
||||
}
|
||||
|
||||
/// Create an all-ones bit vector of length `n` at `path`.
|
||||
///
|
||||
/// More efficient than `new(n, path)` + `not()`: the data is written as
|
||||
/// 0xFF bytes in a single sequential pass, with no intermediate all-zeros state.
|
||||
pub fn new_ones(n: usize, path: &Path) -> io::Result<Self> {
|
||||
let nw = n_words(n);
|
||||
let file_size = HEADER_SIZE + nw * 8;
|
||||
let mut file = OpenOptions::new()
|
||||
.read(true).write(true).create(true).truncate(true)
|
||||
.open(path)?;
|
||||
file.write_all(&MAGIC)?;
|
||||
file.write_all(&[0u8; 4])?;
|
||||
file.write_all(&(n as u64).to_le_bytes())?;
|
||||
file.write_all(&vec![0xFFu8; nw * 8])?;
|
||||
file.seek(SeekFrom::Start(0))?;
|
||||
file.set_len(file_size as u64)?;
|
||||
let mut mmap = unsafe { MmapMut::map_mut(&file)? };
|
||||
// Clear padding bits in the last word so trailing bits are always 0.
|
||||
let rem = n % 64;
|
||||
if rem != 0 {
|
||||
let ptr = mmap[HEADER_SIZE..].as_mut_ptr() as *mut u64;
|
||||
let words = unsafe { std::slice::from_raw_parts_mut(ptr, nw) };
|
||||
words[nw - 1] &= (1u64 << rem) - 1;
|
||||
}
|
||||
Ok(Self { mmap, n, path: path.to_path_buf() })
|
||||
}
|
||||
|
||||
pub fn build_from(source: &PersistentBitVec, path: &Path) -> io::Result<Self> {
|
||||
fs::copy(source.path(), path)?;
|
||||
let file = OpenOptions::new().read(true).write(true).open(path)?;
|
||||
|
||||
@@ -126,8 +126,7 @@ pub fn eval_filter_mask(expr: &FilterMask, mat: &dyn MatrixGroupOps, n: usize) -
|
||||
b.freeze()
|
||||
}
|
||||
FilterMask::And(parts) => {
|
||||
let mut b = TempBitVecBuilder::new(n)?;
|
||||
b.not(); // initialise à tout-1 (tout passe)
|
||||
let mut b = TempBitVecBuilder::new_ones(n)?;
|
||||
for part in parts {
|
||||
let m = eval_filter_mask(part, mat, n)?;
|
||||
b.and(m.view());
|
||||
|
||||
@@ -56,6 +56,13 @@ impl TempBitVecBuilder {
|
||||
Ok(Self { builder, temp })
|
||||
}
|
||||
|
||||
pub(crate) fn new_ones(n: usize) -> io::Result<Self> {
|
||||
let temp = TempDir::new()?;
|
||||
let path = temp.path().join("data.pbiv");
|
||||
let builder = PersistentBitVecBuilder::new_ones(n, &path)?;
|
||||
Ok(Self { builder, temp })
|
||||
}
|
||||
|
||||
pub(crate) fn freeze(self) -> io::Result<TempBitVec> {
|
||||
let Self { builder, temp } = self;
|
||||
let vec = builder.finish()?;
|
||||
|
||||
@@ -150,8 +150,7 @@ fn mask_with_all_ones_is_noop() {
|
||||
let dir = tempdir().unwrap();
|
||||
let mut v = PersistentCompactIntVecBuilder::new(4, &dir.path().join("v.pciv")).unwrap();
|
||||
v.set(0, 300); v.set(1, 1); v.set(2, 0); v.set(3, 42);
|
||||
let mut mask = PersistentBitVecBuilder::new(4, &dir.path().join("m.pbiv")).unwrap();
|
||||
mask.not(); // all bits → 1
|
||||
let mask = PersistentBitVecBuilder::new_ones(4, &dir.path().join("m.pbiv")).unwrap();
|
||||
v.mask_with(mask.view());
|
||||
v.close().unwrap();
|
||||
let r = PersistentCompactIntVec::open(&dir.path().join("v.pciv")).unwrap();
|
||||
|
||||
@@ -106,9 +106,7 @@ impl Layer<()> {
|
||||
let presence_dir = layer_dir.join(PRESENCE_DIR);
|
||||
fs::create_dir_all(&presence_dir).map_err(OLMError::Io)?;
|
||||
let mut mb = PersistentBitMatrixBuilder::new(n_kmers, &presence_dir).map_err(OLMError::Io)?;
|
||||
let mut col = mb.add_col().map_err(OLMError::Io)?;
|
||||
col.not();
|
||||
col.close().map_err(OLMError::Io)?;
|
||||
mb.add_col_ones().map_err(OLMError::Io)?.close().map_err(OLMError::Io)?;
|
||||
mb.close().map_err(OLMError::Io)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user