.gitignore: ignore zstandard-compressed files

- Add *.zst pattern to .gitignore
- Prevents tracking of zstandard-compressed archives
This commit is contained in:
Eric Coissac
2026-04-27 16:53:42 +02:00
parent 1f466bf113
commit 7efec54b27
12 changed files with 281 additions and 94 deletions
+4
View File
@@ -4,4 +4,8 @@ version = "0.1.0"
edition = "2024"
[dependencies]
niffler = "3.0.0"
obikseq = { path = "../obikseq" }
obiskio = { path = "../obiskio" }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
+2 -2
View File
@@ -1,3 +1,3 @@
mod manager;
mod partition;
pub use manager::PartitionManager;
pub use partition::KmerPartition;
-18
View File
@@ -1,18 +0,0 @@
use obiskio::SKFileWriter;
use std::path::Path;
pub struct PartitionManager {
root_path: Box<Path>,
partitions_mask: u64,
writers: Vec<SKFileWriter>,
}
impl PartitionManager {
pub fn new(root_path: Box<Path>, n_partition_bits: usize) -> Self {
Self {
root_path,
partitions_mask: (1u64 << n_partition_bits) - 1,
writers: Vec::new(),
}
}
}
+196
View File
@@ -0,0 +1,196 @@
use std::fs;
use std::io;
use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex};
use niffler::Level;
use niffler::send::compression::Format;
use obikseq::superkmer::SuperKmer;
use serde::{Deserialize, Serialize};
use obiskio::{SKFilePool, SKFileWriter, SKResult, SharedPool, create_token_with};
const META_FILENAME: &str = "partition.meta";
#[derive(Serialize, Deserialize)]
struct PartitionMeta {
n_bits: usize,
kmer_size: usize,
minimizer_size: usize,
format: String,
level: u32,
}
pub struct KmerPartition {
root_path: PathBuf,
n_partitions: usize,
partitions_mask: u64,
kmer_size: usize,
minimizer_size: usize,
pool: SharedPool,
writers: Vec<Option<SKFileWriter>>,
format: Format,
level: Level,
closed: bool,
}
impl KmerPartition {
pub fn create<P: AsRef<Path>>(
path: P,
n_bits: usize,
kmer_size: usize,
minimizer_size: usize,
force: bool,
) -> SKResult<Self> {
Self::create_with(path, n_bits, kmer_size, minimizer_size, Format::Zstd, Level::Three, force)
}
pub fn create_with<P: AsRef<Path>>(
path: P,
n_bits: usize,
kmer_size: usize,
minimizer_size: usize,
format: Format,
level: Level,
force: bool,
) -> SKResult<Self> {
let root_path = path.as_ref().to_owned();
if root_path.exists() {
if force {
fs::remove_dir_all(&root_path)?;
} else {
return Err(io::Error::new(
io::ErrorKind::AlreadyExists,
format!("{}: partition directory already exists", root_path.display()),
)
.into());
}
}
fs::create_dir_all(&root_path)?;
let n_partitions = 1usize << n_bits;
let pool = Arc::new(Mutex::new(SKFilePool::from_system_limits()));
let writers = (0..n_partitions).map(|_| None).collect();
let partition = Self {
root_path,
n_partitions,
partitions_mask: (1u64 << n_bits) - 1,
kmer_size,
minimizer_size,
pool,
writers,
format,
level,
closed: false,
};
partition.write_meta(n_bits)?;
Ok(partition)
}
pub fn write(&mut self, sk: &SuperKmer) -> SKResult<()> {
self.check_not_closed()?;
let partition = self.partition_of(sk)?;
self.ensure_writer(partition)?.write(sk)
}
pub fn write_batch(&mut self, sks: &[SuperKmer]) -> SKResult<()> {
self.check_not_closed()?;
for sk in sks {
let partition = self.partition_of(sk)?;
self.ensure_writer(partition)?.write(sk)?;
}
Ok(())
}
pub fn flush(&mut self) -> SKResult<()> {
self.check_not_closed()?;
for writer in self.writers.iter_mut().flatten() {
writer.flush()?;
}
Ok(())
}
pub fn close(&mut self) -> SKResult<()> {
if self.closed {
return Ok(());
}
self.closed = true;
for writer in self.writers.iter_mut().flatten() {
writer.close()?;
}
Ok(())
}
pub fn is_open(&self) -> bool {
!self.closed
}
pub fn path(&self) -> &Path {
&self.root_path
}
// ── private ───────────────────────────────────────────────────────────────
fn check_not_closed(&self) -> SKResult<()> {
if self.closed {
Err(io::Error::new(
io::ErrorKind::BrokenPipe,
"write to closed KmerPartition",
)
.into())
} else {
Ok(())
}
}
fn partition_of(&self, sk: &SuperKmer) -> SKResult<usize> {
let minimizer = sk
.kmer(sk.minimizer_pos() as usize, self.minimizer_size)
.map_err(|e| io::Error::other(e))?
.canonical(self.minimizer_size);
Ok((minimizer.hash(self.minimizer_size) & self.partitions_mask) as usize)
}
fn write_meta(&self, n_bits: usize) -> SKResult<()> {
let meta = PartitionMeta {
n_bits,
kmer_size: self.kmer_size,
minimizer_size: self.minimizer_size,
format: match self.format {
Format::Gzip => "gzip",
Format::Bzip => "bzip2",
Format::Lzma => "lzma",
Format::Zstd => "zstd",
Format::No => "none",
}
.to_owned(),
level: u32::from(self.level),
};
let f = fs::File::create(self.root_path.join(META_FILENAME))?;
serde_json::to_writer_pretty(f, &meta)
.map_err(|e| io::Error::other(e))?;
Ok(())
}
fn ensure_writer(&mut self, partition: usize) -> SKResult<&mut SKFileWriter> {
if self.writers[partition].is_none() {
let dir = self.root_path.join(format!("part_{:05}", partition));
fs::create_dir_all(&dir)?;
let ext = match self.format {
Format::Gzip => "skmer.gz",
Format::Bzip => "skmer.bz2",
Format::Lzma => "skmer.xz",
Format::Zstd => "skmer.zst",
Format::No => "skmer",
};
let file_path = dir.join(format!("raw.{ext}"));
let writer = create_token_with(&self.pool, file_path, self.format, self.level)?;
self.writers[partition] = Some(writer);
}
Ok(self.writers[partition].as_mut().unwrap())
}
}
impl Drop for KmerPartition {
fn drop(&mut self) {
let _ = self.close();
}
}