diff --git a/src/obicompactvec/src/bitmatrix.rs b/src/obicompactvec/src/bitmatrix.rs index c054ae0..72f8b05 100644 --- a/src/obicompactvec/src/bitmatrix.rs +++ b/src/obicompactvec/src/bitmatrix.rs @@ -299,9 +299,7 @@ impl PersistentBitMatrix { Self::Columnar(m) => PersistentBitVecBuilder::build_from(m.col(c), path), Self::Packed(m) => m.col_persist(c, path), Self::Implicit { n_rows, .. } => { - let mut b = PersistentBitVecBuilder::new(*n_rows, path)?; - b.not(); - Ok(b) + PersistentBitVecBuilder::new_ones(*n_rows, path) } } } @@ -402,6 +400,12 @@ impl PersistentBitMatrixBuilder { PersistentBitVecBuilder::new(self.n, &path) } + pub fn add_col_ones(&mut self) -> io::Result { + let path = col_path(&self.dir, self.n_cols); + self.n_cols += 1; + PersistentBitVecBuilder::new_ones(self.n, &path) + } + pub fn add_col_from(&mut self, src: &TempBitVec) -> io::Result<()> { src.make_persistent(&col_path(&self.dir, self.n_cols))?; self.n_cols += 1; diff --git a/src/obicompactvec/src/bitvec.rs b/src/obicompactvec/src/bitvec.rs index 966d57f..145bd63 100644 --- a/src/obicompactvec/src/bitvec.rs +++ b/src/obicompactvec/src/bitvec.rs @@ -145,6 +145,33 @@ impl PersistentBitVecBuilder { Ok(Self { mmap, n, path: path.to_path_buf() }) } + /// Create an all-ones bit vector of length `n` at `path`. + /// + /// More efficient than `new(n, path)` + `not()`: the data is written as + /// 0xFF bytes in a single sequential pass, with no intermediate all-zeros state. + pub fn new_ones(n: usize, path: &Path) -> io::Result { + let nw = n_words(n); + let file_size = HEADER_SIZE + nw * 8; + let mut file = OpenOptions::new() + .read(true).write(true).create(true).truncate(true) + .open(path)?; + file.write_all(&MAGIC)?; + file.write_all(&[0u8; 4])?; + file.write_all(&(n as u64).to_le_bytes())?; + file.write_all(&vec![0xFFu8; nw * 8])?; + file.seek(SeekFrom::Start(0))?; + file.set_len(file_size as u64)?; + let mut mmap = unsafe { MmapMut::map_mut(&file)? }; + // Clear padding bits in the last word so trailing bits are always 0. + let rem = n % 64; + if rem != 0 { + let ptr = mmap[HEADER_SIZE..].as_mut_ptr() as *mut u64; + let words = unsafe { std::slice::from_raw_parts_mut(ptr, nw) }; + words[nw - 1] &= (1u64 << rem) - 1; + } + Ok(Self { mmap, n, path: path.to_path_buf() }) + } + pub fn build_from(source: &PersistentBitVec, path: &Path) -> io::Result { fs::copy(source.path(), path)?; let file = OpenOptions::new().read(true).write(true).open(path)?; diff --git a/src/obicompactvec/src/colgroup.rs b/src/obicompactvec/src/colgroup.rs index b1545e6..49ca477 100644 --- a/src/obicompactvec/src/colgroup.rs +++ b/src/obicompactvec/src/colgroup.rs @@ -126,8 +126,7 @@ pub fn eval_filter_mask(expr: &FilterMask, mat: &dyn MatrixGroupOps, n: usize) - b.freeze() } FilterMask::And(parts) => { - let mut b = TempBitVecBuilder::new(n)?; - b.not(); // initialise à tout-1 (tout passe) + let mut b = TempBitVecBuilder::new_ones(n)?; for part in parts { let m = eval_filter_mask(part, mat, n)?; b.and(m.view()); diff --git a/src/obicompactvec/src/tempbitvec.rs b/src/obicompactvec/src/tempbitvec.rs index b8991df..8bbec16 100644 --- a/src/obicompactvec/src/tempbitvec.rs +++ b/src/obicompactvec/src/tempbitvec.rs @@ -56,6 +56,13 @@ impl TempBitVecBuilder { Ok(Self { builder, temp }) } + pub(crate) fn new_ones(n: usize) -> io::Result { + let temp = TempDir::new()?; + let path = temp.path().join("data.pbiv"); + let builder = PersistentBitVecBuilder::new_ones(n, &path)?; + Ok(Self { builder, temp }) + } + pub(crate) fn freeze(self) -> io::Result { let Self { builder, temp } = self; let vec = builder.finish()?; diff --git a/src/obicompactvec/src/tests/colgroup.rs b/src/obicompactvec/src/tests/colgroup.rs index 884450f..d1c7cf1 100644 --- a/src/obicompactvec/src/tests/colgroup.rs +++ b/src/obicompactvec/src/tests/colgroup.rs @@ -150,8 +150,7 @@ fn mask_with_all_ones_is_noop() { let dir = tempdir().unwrap(); let mut v = PersistentCompactIntVecBuilder::new(4, &dir.path().join("v.pciv")).unwrap(); v.set(0, 300); v.set(1, 1); v.set(2, 0); v.set(3, 42); - let mut mask = PersistentBitVecBuilder::new(4, &dir.path().join("m.pbiv")).unwrap(); - mask.not(); // all bits → 1 + let mask = PersistentBitVecBuilder::new_ones(4, &dir.path().join("m.pbiv")).unwrap(); v.mask_with(mask.view()); v.close().unwrap(); let r = PersistentCompactIntVec::open(&dir.path().join("v.pciv")).unwrap(); diff --git a/src/obilayeredmap/src/layer.rs b/src/obilayeredmap/src/layer.rs index 241feea..475bca7 100644 --- a/src/obilayeredmap/src/layer.rs +++ b/src/obilayeredmap/src/layer.rs @@ -106,9 +106,7 @@ impl Layer<()> { let presence_dir = layer_dir.join(PRESENCE_DIR); fs::create_dir_all(&presence_dir).map_err(OLMError::Io)?; let mut mb = PersistentBitMatrixBuilder::new(n_kmers, &presence_dir).map_err(OLMError::Io)?; - let mut col = mb.add_col().map_err(OLMError::Io)?; - col.not(); - col.close().map_err(OLMError::Io)?; + mb.add_col_ones().map_err(OLMError::Io)?.close().map_err(OLMError::Io)?; mb.close().map_err(OLMError::Io) } }