feat: implement persistent layered index and chunked binary format
Introduce the `obilayeredmap` specification and persistent MPHF-based index architecture for incremental multi-dataset indexing. Implement chunked binary serialization with a fixed `u8` k-mer count limit (256) and overlapping super-kmer segments. Add memory-mapped I/O and a companion `.idx` index file for allocation-free, O(1) unitig access. Update MkDocs navigation, enhance the k-mer comparison script, and add comprehensive tests for serialization, partitioning, and file I/O pipelines.
This commit is contained in:
@@ -75,7 +75,12 @@ def main():
|
||||
parser.add_argument("file_a", help="First FASTA file (reference)")
|
||||
parser.add_argument("file_b", help="Second FASTA file (to compare)")
|
||||
parser.add_argument(
|
||||
"-k", "--kmer-size", type=int, default=31, metavar="K", help="k-mer size (default: 31)"
|
||||
"-k",
|
||||
"--kmer-size",
|
||||
type=int,
|
||||
default=31,
|
||||
metavar="K",
|
||||
help="k-mer size (default: 31)",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -104,6 +109,10 @@ def main():
|
||||
|
||||
if only_a or only_b:
|
||||
print("\nSets differ.", file=sys.stderr)
|
||||
if len(only_a) > 0 and len(only_b) <= 10:
|
||||
print(f"\nOnly in A: {only_a}")
|
||||
if len(only_b) > 0 and len(only_b) <= 10:
|
||||
print(f"\nOnly in B: {only_b}")
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("\nSets are identical.")
|
||||
|
||||
Reference in New Issue
Block a user