refactor: rename compute_degrees and mark start nodes
Renames `compute_degrees` to `compute_degrees_and_mark_starts` across the De Bruijn graph and partitioner layers to consolidate degree calculation and start-node flagging. Introduces safe neighbor iteration methods and a debug validation block to verify graph consistency. Refactors unitig extraction to use sequential execution with a `Mutex` for safe error propagation. Fixes malformed and duplicated method calls, adds auto-generation of missing `meta.json` files, and ensures persistent matrix builders are explicitly closed to finalize metadata.
This commit is contained in:
@@ -5,8 +5,8 @@ use cacheline_ef::{CachelineEf, CachelineEfVec};
|
||||
use epserde::prelude::*;
|
||||
use obicompactvec::{PersistentCompactIntMatrix, PersistentCompactIntVec};
|
||||
use obidebruinj::GraphDeBruijn;
|
||||
use obilayeredmap::{IndexMode, OLMError, layer::Layer};
|
||||
use obilayeredmap::meta::PartitionMeta;
|
||||
use obilayeredmap::{IndexMode, OLMError, layer::Layer};
|
||||
use obiskio::{SKError, SKFileMeta, SKFileReader};
|
||||
use ptr_hash::{PtrHash, bucket_fn::CubicEps, hash::Xx64};
|
||||
|
||||
@@ -17,7 +17,10 @@ type Mphf = PtrHash<u64, CubicEps, CachelineEfVec<Vec<CachelineEf>>, Xx64, Vec<u
|
||||
fn olm_to_sk(e: OLMError) -> SKError {
|
||||
match e {
|
||||
OLMError::Io(io_err) => SKError::Io(io_err),
|
||||
other => SKError::InvalidData { context: "layer build", detail: other.to_string() },
|
||||
other => SKError::InvalidData {
|
||||
context: "layer build",
|
||||
detail: other.to_string(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -99,7 +102,7 @@ impl KmerPartition {
|
||||
}
|
||||
|
||||
let n_kmers = g.len();
|
||||
g.compute_degrees();
|
||||
g.compute_degrees_and_mark_starts();
|
||||
|
||||
fs::create_dir_all(&layer_dir)?;
|
||||
|
||||
@@ -111,19 +114,27 @@ impl KmerPartition {
|
||||
uw.close()?;
|
||||
|
||||
if with_counts {
|
||||
Layer::<PersistentCompactIntMatrix>::build(&layer_dir, block_bits, mode, |kmer| {
|
||||
match (&mphf1_opt, &counts1_opt) {
|
||||
Layer::<PersistentCompactIntMatrix>::build(
|
||||
&layer_dir,
|
||||
block_bits,
|
||||
mode,
|
||||
|kmer| match (&mphf1_opt, &counts1_opt) {
|
||||
(Some(mphf), Some(counts)) => counts.get(mphf.index(&kmer.raw())),
|
||||
_ => 1,
|
||||
}
|
||||
})
|
||||
},
|
||||
)
|
||||
.map_err(olm_to_sk)?;
|
||||
} else {
|
||||
Layer::<()>::build(&layer_dir, block_bits, mode).map_err(olm_to_sk)?;
|
||||
}
|
||||
|
||||
let index_dir = layer_dir.parent().expect("layer_dir has a parent");
|
||||
PartitionMeta { n_layers: 1, mode: mode.clone() }.save(index_dir).map_err(olm_to_sk)?;
|
||||
PartitionMeta {
|
||||
n_layers: 1,
|
||||
mode: mode.clone(),
|
||||
}
|
||||
.save(index_dir)
|
||||
.map_err(olm_to_sk)?;
|
||||
|
||||
Ok(n_kmers)
|
||||
}
|
||||
|
||||
@@ -2,22 +2,25 @@ use std::fs;
|
||||
use std::io;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use obicompactvec::{
|
||||
PersistentBitMatrix, PersistentBitMatrixBuilder, PersistentBitVecBuilder,
|
||||
PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder, PersistentCompactIntVecBuilder,
|
||||
};
|
||||
use obidebruinj::GraphDeBruijn;
|
||||
use obicompactvec::{PersistentBitMatrix, PersistentBitMatrixBuilder,
|
||||
PersistentBitVecBuilder,
|
||||
PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder,
|
||||
PersistentCompactIntVecBuilder};
|
||||
use obikseq::CanonicalKmer;
|
||||
use obiskio::{SKError, SKResult, UnitigFileReader};
|
||||
use obilayeredmap::{IndexMode, Layer, LayeredMap, MphfOnly, OLMError};
|
||||
use obilayeredmap::meta::PartitionMeta;
|
||||
use obilayeredmap::{IndexMode, Layer, LayeredMap, MphfOnly, OLMError};
|
||||
use obiskio::{SKError, SKResult, UnitigFileReader};
|
||||
|
||||
use crate::partition::KmerPartition;
|
||||
|
||||
// ── MergeMode ─────────────────────────────────────────────────────────────────
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum MergeMode { Presence, Count }
|
||||
pub enum MergeMode {
|
||||
Presence,
|
||||
Count,
|
||||
}
|
||||
|
||||
// ── ColBuilder — enum dispatch to avoid trait-object boxing issues ─────────────
|
||||
|
||||
@@ -36,8 +39,8 @@ impl ColBuilder {
|
||||
|
||||
fn close(self) -> SKResult<()> {
|
||||
match self {
|
||||
ColBuilder::Bit(b) => b.close().map_err(SKError::Io),
|
||||
ColBuilder::Int(b) => b.close().map_err(SKError::Io),
|
||||
ColBuilder::Bit(b) => b.close().map_err(SKError::Io),
|
||||
ColBuilder::Int(b) => b.close().map_err(SKError::Io),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -56,12 +59,12 @@ impl SrcLayerData {
|
||||
MergeMode::Presence => {
|
||||
if counts_dir.exists() && !layer_dir.join("presence").exists() {
|
||||
let mphf = MphfOnly::open(layer_dir).map_err(olm_to_sk)?;
|
||||
let mat = PersistentCompactIntMatrix::open(layer_dir).map_err(SKError::Io)?;
|
||||
let mat = PersistentCompactIntMatrix::open(layer_dir).map_err(SKError::Io)?;
|
||||
Ok(SrcLayerData::Count(mphf, mat))
|
||||
} else {
|
||||
// presence dir exists, or neither exists → Implicit handled by open()
|
||||
let mphf = MphfOnly::open(layer_dir).map_err(olm_to_sk)?;
|
||||
let mat = PersistentBitMatrix::open(layer_dir).map_err(SKError::Io)?;
|
||||
let mat = PersistentBitMatrix::open(layer_dir).map_err(SKError::Io)?;
|
||||
Ok(SrcLayerData::Presence(mphf, mat))
|
||||
}
|
||||
}
|
||||
@@ -86,7 +89,7 @@ impl SrcLayerData {
|
||||
let mut buf = vec![0u32; n_genomes];
|
||||
match self {
|
||||
SrcLayerData::Presence(mphf, mat) => mat.fill_row(mphf.index(kmer), &mut buf),
|
||||
SrcLayerData::Count(mphf, mat) => mat.fill_row(mphf.index(kmer), &mut buf),
|
||||
SrcLayerData::Count(mphf, mat) => mat.fill_row(mphf.index(kmer), &mut buf),
|
||||
}
|
||||
buf
|
||||
}
|
||||
@@ -101,10 +104,16 @@ const INDEX_SUBDIR: &str = "index";
|
||||
fn load_meta(dir: &Path) -> SKResult<PartitionMeta> {
|
||||
match PartitionMeta::load(dir) {
|
||||
Ok(m) => Ok(m),
|
||||
Err(e) if matches!(e, OLMError::Io(ref io_e) if io_e.kind() == std::io::ErrorKind::NotFound) => {
|
||||
Err(e) if matches!(e, OLMError::Io(ref io_e) if io_e.kind() == std::io::ErrorKind::NotFound) =>
|
||||
{
|
||||
let mut n = 0usize;
|
||||
while dir.join(format!("layer_{n}")).exists() { n += 1; }
|
||||
let m = PartitionMeta { n_layers: n, mode: IndexMode::default() };
|
||||
while dir.join(format!("layer_{n}")).exists() {
|
||||
n += 1;
|
||||
}
|
||||
let m = PartitionMeta {
|
||||
n_layers: n,
|
||||
mode: IndexMode::default(),
|
||||
};
|
||||
m.save(dir).map_err(olm_to_sk)?;
|
||||
Ok(m)
|
||||
}
|
||||
@@ -115,7 +124,10 @@ fn load_meta(dir: &Path) -> SKResult<PartitionMeta> {
|
||||
fn olm_to_sk(e: OLMError) -> SKError {
|
||||
match e {
|
||||
OLMError::Io(e) => SKError::Io(e),
|
||||
other => SKError::InvalidData { context: "merge", detail: other.to_string() },
|
||||
other => SKError::InvalidData {
|
||||
context: "merge",
|
||||
detail: other.to_string(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -128,7 +140,10 @@ fn col_path_int(dir: &Path, col: usize) -> PathBuf {
|
||||
}
|
||||
|
||||
fn write_matrix_meta(dir: &Path, n: usize, n_cols: usize) -> io::Result<()> {
|
||||
fs::write(dir.join("meta.json"), format!("{{\"n\":{n},\"n_cols\":{n_cols}}}\n"))
|
||||
fs::write(
|
||||
dir.join("meta.json"),
|
||||
format!("{{\"n\":{n},\"n_cols\":{n_cols}}}\n"),
|
||||
)
|
||||
}
|
||||
|
||||
// ── KmerPartition::merge_partition ────────────────────────────────────────────
|
||||
@@ -157,7 +172,7 @@ impl KmerPartition {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
load_meta(&dst_index_dir)?; // ensure meta.json exists before LayeredMap::open
|
||||
load_meta(&dst_index_dir)?; // ensure meta.json exists before LayeredMap::open
|
||||
let dst_map = LayeredMap::<()>::open(&dst_index_dir).map_err(olm_to_sk)?;
|
||||
let n_dst_layers = dst_map.n_layers();
|
||||
let n_src_total: usize = sources.iter().map(|(_, n)| *n).sum();
|
||||
@@ -178,12 +193,13 @@ impl KmerPartition {
|
||||
|
||||
for (src, _) in sources.iter() {
|
||||
let src_index_dir = src.part_dir(i).join(INDEX_SUBDIR);
|
||||
if !src_index_dir.exists() { continue; }
|
||||
if !src_index_dir.exists() {
|
||||
continue;
|
||||
}
|
||||
let src_meta = load_meta(&src_index_dir)?;
|
||||
|
||||
for l in 0..src_meta.n_layers {
|
||||
let unitigs_path = src_index_dir
|
||||
.join(format!("layer_{l}")).join("unitigs.bin");
|
||||
let unitigs_path = src_index_dir.join(format!("layer_{l}")).join("unitigs.bin");
|
||||
let reader = UnitigFileReader::open_sequential(&unitigs_path)?;
|
||||
for (kmer, _, _) in reader.iter_indexed_canonical_kmers() {
|
||||
if dst_map.query(kmer).is_none() {
|
||||
@@ -199,7 +215,7 @@ impl KmerPartition {
|
||||
let new_layer_dir = dst_index_dir.join(format!("layer_{new_layer_idx}"));
|
||||
|
||||
let n_new = if any_new {
|
||||
g.compute_degrees();
|
||||
g.compute_degrees_and_mark_starts();
|
||||
fs::create_dir_all(&new_layer_dir)?;
|
||||
let mut uw = Layer::<()>::unitig_writer(&new_layer_dir).map_err(olm_to_sk)?;
|
||||
g.try_for_each_unitig(|nuc_iter| {
|
||||
@@ -226,35 +242,47 @@ impl KmerPartition {
|
||||
let mut new_src_builders: Vec<ColBuilder> = if any_new {
|
||||
let data_dir = match mode {
|
||||
MergeMode::Presence => new_layer_dir.join("presence"),
|
||||
MergeMode::Count => new_layer_dir.join("counts"),
|
||||
MergeMode::Count => new_layer_dir.join("counts"),
|
||||
};
|
||||
fs::create_dir_all(&data_dir)?;
|
||||
match mode {
|
||||
MergeMode::Presence => {
|
||||
PersistentBitMatrixBuilder::new(n_new, &data_dir)
|
||||
.map_err(SKError::Io)?.close().map_err(SKError::Io)?;
|
||||
.map_err(SKError::Io)?
|
||||
.close()
|
||||
.map_err(SKError::Io)?;
|
||||
for _ in 0..n_dst_genomes {
|
||||
PersistentBitMatrix::append_column(&data_dir, |_| false)
|
||||
.map_err(SKError::Io)?;
|
||||
}
|
||||
(0..n_src_total).map(|g| -> SKResult<ColBuilder> {
|
||||
let b = PersistentBitVecBuilder::new(
|
||||
n_new, &col_path_bit(&data_dir, n_dst_genomes + g))?;
|
||||
Ok(ColBuilder::Bit(b))
|
||||
}).collect::<SKResult<_>>()?
|
||||
(0..n_src_total)
|
||||
.map(|g| -> SKResult<ColBuilder> {
|
||||
let b = PersistentBitVecBuilder::new(
|
||||
n_new,
|
||||
&col_path_bit(&data_dir, n_dst_genomes + g),
|
||||
)?;
|
||||
Ok(ColBuilder::Bit(b))
|
||||
})
|
||||
.collect::<SKResult<_>>()?
|
||||
}
|
||||
MergeMode::Count => {
|
||||
PersistentCompactIntMatrixBuilder::new(n_new, &data_dir)
|
||||
.map_err(SKError::Io)?.close().map_err(SKError::Io)?;
|
||||
.map_err(SKError::Io)?
|
||||
.close()
|
||||
.map_err(SKError::Io)?;
|
||||
for _ in 0..n_dst_genomes {
|
||||
PersistentCompactIntMatrix::append_column(&data_dir, |_| 0)
|
||||
.map_err(SKError::Io)?;
|
||||
}
|
||||
(0..n_src_total).map(|g| -> SKResult<ColBuilder> {
|
||||
let b = PersistentCompactIntVecBuilder::new(
|
||||
n_new, &col_path_int(&data_dir, n_dst_genomes + g))?;
|
||||
Ok(ColBuilder::Int(b))
|
||||
}).collect::<SKResult<_>>()?
|
||||
(0..n_src_total)
|
||||
.map(|g| -> SKResult<ColBuilder> {
|
||||
let b = PersistentCompactIntVecBuilder::new(
|
||||
n_new,
|
||||
&col_path_int(&data_dir, n_dst_genomes + g),
|
||||
)?;
|
||||
Ok(ColBuilder::Int(b))
|
||||
})
|
||||
.collect::<SKResult<_>>()?
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@@ -267,22 +295,28 @@ impl KmerPartition {
|
||||
.map(|l| {
|
||||
let layer_dir = dst_index_dir.join(format!("layer_{l}"));
|
||||
let n = dst_map.layer(l).n();
|
||||
(0..n_src_total).map(|src_g| -> SKResult<ColBuilder> {
|
||||
match mode {
|
||||
MergeMode::Presence => {
|
||||
let data_dir = layer_dir.join("presence");
|
||||
let b = PersistentBitVecBuilder::new(
|
||||
n, &col_path_bit(&data_dir, n_dst_genomes + src_g))?;
|
||||
Ok(ColBuilder::Bit(b))
|
||||
(0..n_src_total)
|
||||
.map(|src_g| -> SKResult<ColBuilder> {
|
||||
match mode {
|
||||
MergeMode::Presence => {
|
||||
let data_dir = layer_dir.join("presence");
|
||||
let b = PersistentBitVecBuilder::new(
|
||||
n,
|
||||
&col_path_bit(&data_dir, n_dst_genomes + src_g),
|
||||
)?;
|
||||
Ok(ColBuilder::Bit(b))
|
||||
}
|
||||
MergeMode::Count => {
|
||||
let data_dir = layer_dir.join("counts");
|
||||
let b = PersistentCompactIntVecBuilder::new(
|
||||
n,
|
||||
&col_path_int(&data_dir, n_dst_genomes + src_g),
|
||||
)?;
|
||||
Ok(ColBuilder::Int(b))
|
||||
}
|
||||
}
|
||||
MergeMode::Count => {
|
||||
let data_dir = layer_dir.join("counts");
|
||||
let b = PersistentCompactIntVecBuilder::new(
|
||||
n, &col_path_int(&data_dir, n_dst_genomes + src_g))?;
|
||||
Ok(ColBuilder::Int(b))
|
||||
}
|
||||
}
|
||||
}).collect::<SKResult<_>>()
|
||||
})
|
||||
.collect::<SKResult<_>>()
|
||||
})
|
||||
.collect::<SKResult<_>>()?;
|
||||
|
||||
@@ -290,7 +324,10 @@ impl KmerPartition {
|
||||
let mut col_offset = 0usize;
|
||||
for (src, src_n) in sources.iter() {
|
||||
let src_index_dir = src.part_dir(i).join(INDEX_SUBDIR);
|
||||
if !src_index_dir.exists() { col_offset += src_n; continue; }
|
||||
if !src_index_dir.exists() {
|
||||
col_offset += src_n;
|
||||
continue;
|
||||
}
|
||||
let src_meta = load_meta(&src_index_dir)?;
|
||||
|
||||
for l in 0..src_meta.n_layers {
|
||||
@@ -317,22 +354,27 @@ impl KmerPartition {
|
||||
// ── Close builders and update metadata ────────────────────────────────
|
||||
for (l, builders) in exist_builders.into_iter().enumerate() {
|
||||
let layer_dir = dst_index_dir.join(format!("layer_{l}"));
|
||||
for b in builders { b.close()?; }
|
||||
for b in builders {
|
||||
b.close()?;
|
||||
}
|
||||
let n = dst_map.layer(l).n();
|
||||
let data_dir = match mode {
|
||||
MergeMode::Presence => layer_dir.join("presence"),
|
||||
MergeMode::Count => layer_dir.join("counts"),
|
||||
MergeMode::Count => layer_dir.join("counts"),
|
||||
};
|
||||
write_matrix_meta(&data_dir, n, n_dst_genomes + n_src_total).map_err(SKError::Io)?;
|
||||
}
|
||||
|
||||
for b in new_src_builders { b.close()?; }
|
||||
for b in new_src_builders {
|
||||
b.close()?;
|
||||
}
|
||||
if any_new {
|
||||
let data_dir = match mode {
|
||||
MergeMode::Presence => new_layer_dir.join("presence"),
|
||||
MergeMode::Count => new_layer_dir.join("counts"),
|
||||
MergeMode::Count => new_layer_dir.join("counts"),
|
||||
};
|
||||
write_matrix_meta(&data_dir, n_new, n_dst_genomes + n_src_total).map_err(SKError::Io)?;
|
||||
write_matrix_meta(&data_dir, n_new, n_dst_genomes + n_src_total)
|
||||
.map_err(SKError::Io)?;
|
||||
|
||||
let mut part_meta = PartitionMeta::load(&dst_index_dir).map_err(olm_to_sk)?;
|
||||
part_meta.n_layers = new_layer_idx + 1;
|
||||
|
||||
@@ -2,15 +2,15 @@ use std::fs;
|
||||
use std::io;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use obicompactvec::{PersistentBitMatrixBuilder,
|
||||
PersistentBitVecBuilder,
|
||||
PersistentCompactIntMatrixBuilder,
|
||||
PersistentCompactIntVecBuilder};
|
||||
use obicompactvec::{
|
||||
PersistentBitMatrixBuilder, PersistentBitVecBuilder, PersistentCompactIntMatrixBuilder,
|
||||
PersistentCompactIntVecBuilder,
|
||||
};
|
||||
use obidebruinj::GraphDeBruijn;
|
||||
use obikseq::CanonicalKmer;
|
||||
use obiskio::{SKError, SKResult, UnitigFileReader};
|
||||
use obilayeredmap::{IndexMode, Layer, MphfLayer, OLMError};
|
||||
use obilayeredmap::meta::PartitionMeta;
|
||||
use obilayeredmap::{IndexMode, Layer, MphfLayer, OLMError};
|
||||
use obiskio::{SKError, SKResult, UnitigFileReader};
|
||||
|
||||
use crate::filter::{KmerFilter, passes_all};
|
||||
use crate::merge_layer::{MergeMode, SrcLayerData};
|
||||
@@ -21,7 +21,10 @@ const INDEX_SUBDIR: &str = "index";
|
||||
fn olm_to_sk(e: OLMError) -> SKError {
|
||||
match e {
|
||||
OLMError::Io(e) => SKError::Io(e),
|
||||
other => SKError::InvalidData { context: "rebuild", detail: other.to_string() },
|
||||
other => SKError::InvalidData {
|
||||
context: "rebuild",
|
||||
detail: other.to_string(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -34,7 +37,10 @@ fn col_path_int(dir: &Path, col: usize) -> PathBuf {
|
||||
}
|
||||
|
||||
fn write_matrix_meta(dir: &Path, n: usize, n_cols: usize) -> io::Result<()> {
|
||||
fs::write(dir.join("meta.json"), format!("{{\"n\":{n},\"n_cols\":{n_cols}}}\n"))
|
||||
fs::write(
|
||||
dir.join("meta.json"),
|
||||
format!("{{\"n\":{n},\"n_cols\":{n_cols}}}\n"),
|
||||
)
|
||||
}
|
||||
|
||||
// ── ColBuilder ────────────────────────────────────────────────────────────────
|
||||
@@ -54,8 +60,8 @@ impl ColBuilder {
|
||||
|
||||
fn close(self) -> SKResult<()> {
|
||||
match self {
|
||||
ColBuilder::Bit(b) => b.close().map_err(SKError::Io),
|
||||
ColBuilder::Int(b) => b.close().map_err(SKError::Io),
|
||||
ColBuilder::Bit(b) => b.close().map_err(SKError::Io),
|
||||
ColBuilder::Int(b) => b.close().map_err(SKError::Io),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -65,10 +71,16 @@ impl ColBuilder {
|
||||
fn load_meta(dir: &Path) -> SKResult<PartitionMeta> {
|
||||
match PartitionMeta::load(dir) {
|
||||
Ok(m) => Ok(m),
|
||||
Err(e) if matches!(e, OLMError::Io(ref io_e) if io_e.kind() == std::io::ErrorKind::NotFound) => {
|
||||
Err(e) if matches!(e, OLMError::Io(ref io_e) if io_e.kind() == std::io::ErrorKind::NotFound) =>
|
||||
{
|
||||
let mut n = 0usize;
|
||||
while dir.join(format!("layer_{n}")).exists() { n += 1; }
|
||||
let m = PartitionMeta { n_layers: n, mode: IndexMode::default() };
|
||||
while dir.join(format!("layer_{n}")).exists() {
|
||||
n += 1;
|
||||
}
|
||||
let m = PartitionMeta {
|
||||
n_layers: n,
|
||||
mode: IndexMode::default(),
|
||||
};
|
||||
m.save(dir).map_err(olm_to_sk)?;
|
||||
Ok(m)
|
||||
}
|
||||
@@ -90,10 +102,12 @@ fn iter_src_layers(
|
||||
let src_meta = load_meta(src_index_dir)?;
|
||||
for l in 0..src_meta.n_layers {
|
||||
let src_layer_dir = src_index_dir.join(format!("layer_{l}"));
|
||||
let unitigs_path = src_layer_dir.join("unitigs.bin");
|
||||
if !unitigs_path.exists() { continue; }
|
||||
let unitigs_path = src_layer_dir.join("unitigs.bin");
|
||||
if !unitigs_path.exists() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let reader = UnitigFileReader::open_sequential(&unitigs_path)?;
|
||||
let reader = UnitigFileReader::open_sequential(&unitigs_path)?;
|
||||
let src_data = SrcLayerData::open(&src_layer_dir, mode)?;
|
||||
|
||||
for (kmer, _, _) in reader.iter_indexed_canonical_kmers() {
|
||||
@@ -146,7 +160,7 @@ impl KmerPartition {
|
||||
}
|
||||
|
||||
let n_new = g.len();
|
||||
g.compute_degrees();
|
||||
g.compute_degrees_and_mark_starts();
|
||||
|
||||
// ── Build MPHF in dst layer_0 ─────────────────────────────────────────
|
||||
let dst_index_dir = self.part_dir(i).join(INDEX_SUBDIR);
|
||||
@@ -167,26 +181,37 @@ impl KmerPartition {
|
||||
// ── Prepare matrix builders (one column per genome) ───────────────────
|
||||
let data_dir = match mode {
|
||||
MergeMode::Presence => dst_layer_dir.join("presence"),
|
||||
MergeMode::Count => dst_layer_dir.join("counts"),
|
||||
MergeMode::Count => dst_layer_dir.join("counts"),
|
||||
};
|
||||
fs::create_dir_all(&data_dir)?;
|
||||
|
||||
let mut builders: Vec<ColBuilder> = match mode {
|
||||
MergeMode::Presence => {
|
||||
PersistentBitMatrixBuilder::new(n_new, &data_dir)
|
||||
.map_err(SKError::Io)?.close().map_err(SKError::Io)?;
|
||||
(0..n_genomes).map(|g| -> SKResult<ColBuilder> {
|
||||
let b = PersistentBitVecBuilder::new(n_new, &col_path_bit(&data_dir, g))?;
|
||||
Ok(ColBuilder::Bit(b))
|
||||
}).collect::<SKResult<_>>()?
|
||||
.map_err(SKError::Io)?
|
||||
.close()
|
||||
.map_err(SKError::Io)?;
|
||||
(0..n_genomes)
|
||||
.map(|g| -> SKResult<ColBuilder> {
|
||||
let b = PersistentBitVecBuilder::new(n_new, &col_path_bit(&data_dir, g))?;
|
||||
Ok(ColBuilder::Bit(b))
|
||||
})
|
||||
.collect::<SKResult<_>>()?
|
||||
}
|
||||
MergeMode::Count => {
|
||||
PersistentCompactIntMatrixBuilder::new(n_new, &data_dir)
|
||||
.map_err(SKError::Io)?.close().map_err(SKError::Io)?;
|
||||
(0..n_genomes).map(|g| -> SKResult<ColBuilder> {
|
||||
let b = PersistentCompactIntVecBuilder::new(n_new, &col_path_int(&data_dir, g))?;
|
||||
Ok(ColBuilder::Int(b))
|
||||
}).collect::<SKResult<_>>()?
|
||||
.map_err(SKError::Io)?
|
||||
.close()
|
||||
.map_err(SKError::Io)?;
|
||||
(0..n_genomes)
|
||||
.map(|g| -> SKResult<ColBuilder> {
|
||||
let b = PersistentCompactIntVecBuilder::new(
|
||||
n_new,
|
||||
&col_path_int(&data_dir, g),
|
||||
)?;
|
||||
Ok(ColBuilder::Int(b))
|
||||
})
|
||||
.collect::<SKResult<_>>()?
|
||||
}
|
||||
};
|
||||
|
||||
@@ -200,10 +225,17 @@ impl KmerPartition {
|
||||
})?;
|
||||
|
||||
// ── Close builders, write metadata ────────────────────────────────────
|
||||
for b in builders { b.close()?; }
|
||||
for b in builders {
|
||||
b.close()?;
|
||||
}
|
||||
write_matrix_meta(&data_dir, n_new, n_genomes).map_err(SKError::Io)?;
|
||||
|
||||
PartitionMeta { n_layers: 1, mode: IndexMode::Exact }.save(&dst_index_dir).map_err(olm_to_sk)?;
|
||||
PartitionMeta {
|
||||
n_layers: 1,
|
||||
mode: IndexMode::Exact,
|
||||
}
|
||||
.save(&dst_index_dir)
|
||||
.map_err(olm_to_sk)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user