1402 lines
33 KiB
HTML
1402 lines
33 KiB
HTML
|
|
|
|||
|
|
<!doctype html>
|
|||
|
|
<html lang="en" class="no-js">
|
|||
|
|
<head>
|
|||
|
|
|
|||
|
|
<meta charset="utf-8">
|
|||
|
|
<meta name="viewport" content="width=device-width,initial-scale=1">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<link rel="icon" href="../../assets/images/favicon.png">
|
|||
|
|
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.7.6">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<title>Query system - obikmer</title>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<link rel="stylesheet" href="../../assets/stylesheets/main.484c7ddc.min.css">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
|||
|
|
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
|
|||
|
|
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</head>
|
|||
|
|
|
|||
|
|
|
|||
|
|
<body dir="ltr">
|
|||
|
|
|
|||
|
|
|
|||
|
|
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
|
|||
|
|
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
|
|||
|
|
<label class="md-overlay" for="__drawer"></label>
|
|||
|
|
<div data-md-component="skip">
|
|||
|
|
|
|||
|
|
|
|||
|
|
<a href="#query-system" class="md-skip">
|
|||
|
|
Skip to content
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</div>
|
|||
|
|
<div data-md-component="announce">
|
|||
|
|
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<header class="md-header md-header--shadow" data-md-component="header">
|
|||
|
|
<nav class="md-header__inner md-grid" aria-label="Header">
|
|||
|
|
<a href="../.." title="obikmer" class="md-header__button md-logo" aria-label="obikmer" data-md-component="logo">
|
|||
|
|
|
|||
|
|
|
|||
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
<label class="md-header__button md-icon" for="__drawer">
|
|||
|
|
|
|||
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
|
|||
|
|
</label>
|
|||
|
|
<div class="md-header__title" data-md-component="header-title">
|
|||
|
|
<div class="md-header__ellipsis">
|
|||
|
|
<div class="md-header__topic">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
obikmer
|
|||
|
|
</span>
|
|||
|
|
</div>
|
|||
|
|
<div class="md-header__topic" data-md-component="header-topic">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Query system
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
|
|||
|
|
<script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</header>
|
|||
|
|
|
|||
|
|
<div class="md-container" data-md-component="container">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<main class="md-main" data-md-component="main">
|
|||
|
|
<div class="md-main__inner md-grid">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
|
|||
|
|
<div class="md-sidebar__scrollwrap">
|
|||
|
|
<div class="md-sidebar__inner">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
|
|||
|
|
<label class="md-nav__title" for="__drawer">
|
|||
|
|
<a href="../.." title="obikmer" class="md-nav__button md-logo" aria-label="obikmer" data-md-component="logo">
|
|||
|
|
|
|||
|
|
|
|||
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
obikmer
|
|||
|
|
</label>
|
|||
|
|
|
|||
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../.." class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Home
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item md-nav__item--nested">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" >
|
|||
|
|
|
|||
|
|
|
|||
|
|
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Theory
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-nav__icon md-icon"></span>
|
|||
|
|
</label>
|
|||
|
|
|
|||
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
|
|||
|
|
<label class="md-nav__title" for="__nav_2">
|
|||
|
|
<span class="md-nav__icon md-icon"></span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
Theory
|
|||
|
|
|
|||
|
|
|
|||
|
|
</label>
|
|||
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../kmers/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Kmers and super-kmers
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../theory/encoding/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
DNA encoding
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../theory/entropy/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Entropy filter
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../theory/minimizer/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Minimizer selection
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../theory/indexing/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Partitioning architecture
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item md-nav__item--nested">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" >
|
|||
|
|
|
|||
|
|
|
|||
|
|
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Implementation
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-nav__icon md-icon"></span>
|
|||
|
|
</label>
|
|||
|
|
|
|||
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
|
|||
|
|
<label class="md-nav__title" for="__nav_3">
|
|||
|
|
<span class="md-nav__icon md-icon"></span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
Implementation
|
|||
|
|
|
|||
|
|
|
|||
|
|
</label>
|
|||
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../implementation/superkmer/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
SuperKmer
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../implementation/kmer/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Kmer
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../implementation/chunkreader/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Chunk reader
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../implementation/pipeline/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Construction pipeline
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../implementation/obipipeline/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
obipipeline library
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../implementation/storage/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
On-disk storage
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../implementation/mphf/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
MPHF selection
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../implementation/unitig_evidence/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Unitig evidence encoding
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../implementation/evidence_elimination/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Evidence elimination (discussion)
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../implementation/obilayeredmap/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
obilayeredmap crate
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../implementation/persistent_compact_int_vec/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
PersistentCompactIntVec
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../implementation/persistent_bit_vec/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
PersistentBitVec
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../implementation/merge/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Merge command
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../implementation/rebuild_filter/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Kmer filtering (rebuild/dump/unitig)
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item md-nav__item--nested">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4" >
|
|||
|
|
|
|||
|
|
|
|||
|
|
<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Architecture
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-nav__icon md-icon"></span>
|
|||
|
|
</label>
|
|||
|
|
|
|||
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
|
|||
|
|
<label class="md-nav__title" for="__nav_4">
|
|||
|
|
<span class="md-nav__icon md-icon"></span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
Architecture
|
|||
|
|
|
|||
|
|
|
|||
|
|
</label>
|
|||
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../sequences/invariant/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Sequences
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../index_architecture/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Kmer index
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
|
|||
|
|
<div class="md-sidebar__scrollwrap">
|
|||
|
|
<div class="md-sidebar__inner">
|
|||
|
|
|
|||
|
|
|
|||
|
|
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<label class="md-nav__title" for="__toc">
|
|||
|
|
<span class="md-nav__icon md-icon"></span>
|
|||
|
|
Table of contents
|
|||
|
|
</label>
|
|||
|
|
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#goal" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Goal
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#input" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Input
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#algorithm" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Algorithm
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#findere-z-window-filter" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Findere z-window filter
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
<nav class="md-nav" aria-label="Findere z-window filter">
|
|||
|
|
<ul class="md-nav__list">
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#effective-z-at-query-time" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Effective z at query time
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#layer-lookup-mphflayerfind" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Layer lookup: MphfLayer::find
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
<nav class="md-nav" aria-label="Layer lookup: MphfLayer::find">
|
|||
|
|
<ul class="md-nav__list">
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#querylayer-variant-selection" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
QueryLayer variant selection
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#presence-count-mode-at-query-time" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Presence / count mode at query time
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#coverage-vectors-detail" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Coverage vectors (--detail)
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#kmer_missing-semantics" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
kmer_missing semantics
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#output-format" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Output format
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#annotation-schema" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Annotation schema
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#cli" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
CLI
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#future-work" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Future work
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
|
|||
|
|
</nav>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<div class="md-content" data-md-component="content">
|
|||
|
|
|
|||
|
|
<article class="md-content__inner md-typeset">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<h1 id="query-system">Query system</h1>
|
|||
|
|
<h2 id="goal">Goal</h2>
|
|||
|
|
<p>Given a set of query sequences, determine for each sequence how many of its k-mers are found in the index and, for each indexed genome, how many k-mers match. The query system is the foundation for read classification and sequence-to-genome mapping.</p>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="input">Input</h2>
|
|||
|
|
<ul>
|
|||
|
|
<li>Query sequences in FASTA or FASTQ format (gzip supported, streaming stdin supported). GenBank flat files are not supported at query time (only at index time).</li>
|
|||
|
|
<li>Sequences shorter than k bases are silently skipped.</li>
|
|||
|
|
<li>Non-ACGT characters are handled by the superkmer decomposition layer: they act as hard breaks, producing shorter superkmers (identical to the behaviour at indexing time).</li>
|
|||
|
|
</ul>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="algorithm">Algorithm</h2>
|
|||
|
|
<p>The query follows the same superkmer-based partitioning strategy used at indexing time.</p>
|
|||
|
|
<div class="highlight"><pre><span></span><code>for each chunk of sequences (parallel workers via obipipeline):
|
|||
|
|
build QueryBatch: decompose all sequences into s-mers via superkmers, deduplicate
|
|||
|
|
allocate seq_results[seq_idx][smer_pos] = None ← per-sequence s-mer result vectors
|
|||
|
|
split superkmers by partition via minimiser hash
|
|||
|
|
for each partition p:
|
|||
|
|
query_partition(p, superkmers_routed_to_p)
|
|||
|
|
→ load QueryLayer(s) for p
|
|||
|
|
→ for each s-mer in each superkmer: MphfLayer::find(smer)
|
|||
|
|
fill seq_results[seq_idx][kmer_offset + j] from partition results
|
|||
|
|
for each sequence:
|
|||
|
|
apply_findere(seq_results[seq_idx], effective_z) ← per full sequence
|
|||
|
|
accumulate confirmed k-mer results into acc and cov
|
|||
|
|
emit annotated sequences
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p>Superkmers that appear more than once in the batch (same sequence or across sequences) are deduplicated: each unique <code>RoutableSuperKmer</code> is queried once per partition, and the result is broadcast to every <code>SKDesc</code> entry that references it.</p>
|
|||
|
|
<p><strong>Findere requires full-sequence aggregation.</strong> <code>apply_findere</code> is applied once per sequence on the complete s-mer result vector, after all partitions have contributed. Applying it per superkmer would produce false negatives at superkmer boundaries, where the z-window spans two superkmers.</p>
|
|||
|
|
<p>Batches are processed in parallel via <code>obipipeline</code> workers; the <code>--threads</code> flag controls the number of worker threads.</p>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="findere-z-window-filter">Findere z-window filter</h2>
|
|||
|
|
<p>For approximate index modes, the index physically stores s-mers of size <code>s = k_user − z + 1</code>. At query time, <code>set_k(s)</code> is in effect, so queries naturally produce s-mer results. <code>apply_findere</code> then aggregates z consecutive s-mer results into one k_user-mer answer:</p>
|
|||
|
|
<div class="highlight"><pre><span></span><code><span class="k">fn</span><span class="w"> </span><span class="nf">apply_findere</span><span class="p">(</span>
|
|||
|
|
<span class="w"> </span><span class="n">results</span><span class="p">:</span><span class="w"> </span><span class="kp">&</span><span class="p">[</span><span class="nb">Option</span><span class="o"><</span><span class="nb">Box</span><span class="o"><</span><span class="p">[</span><span class="kt">u32</span><span class="p">]</span><span class="o">>></span><span class="p">],</span><span class="w"> </span><span class="c1">// N s-mer results</span>
|
|||
|
|
<span class="w"> </span><span class="n">z</span><span class="p">:</span><span class="w"> </span><span class="kt">usize</span><span class="p">,</span>
|
|||
|
|
<span class="w"> </span><span class="n">n_genomes</span><span class="p">:</span><span class="w"> </span><span class="kt">usize</span><span class="p">,</span>
|
|||
|
|
<span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nb">Vec</span><span class="o"><</span><span class="nb">Option</span><span class="o"><</span><span class="nb">Box</span><span class="o"><</span><span class="p">[</span><span class="kt">u32</span><span class="p">]</span><span class="o">>>></span><span class="w"> </span><span class="c1">// N − z + 1 k_user-mer results</span>
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p>Input length N (s-mers), output length N − z + 1 (k_user-mers).</p>
|
|||
|
|
<p>For each genome g independently, a sliding window of size z scans the input. Output position i is confirmed for genome g iff all z values <code>results[i..i+z][g]</code> are nonzero (<code>None</code> counts as zero for all genomes). The scan is O(n) per genome.</p>
|
|||
|
|
<p>Output values come from <code>results[i]</code> (leftmost s-mer of each window); genomes not confirmed are zeroed. If all genomes are zero, the position is returned as <code>None</code>.</p>
|
|||
|
|
<p><strong>Short sequences</strong>: when the s-mer count is less than z, no complete window can form — <code>apply_findere</code> returns an empty vector. K-mers from sequences shorter than k_user are not emitted.</p>
|
|||
|
|
<p><strong>Exact indexes</strong>: <code>z = 1</code>, <code>apply_findere</code> is a passthrough (output length = input length).</p>
|
|||
|
|
<h3 id="effective-z-at-query-time">Effective z at query time</h3>
|
|||
|
|
<p><code>effective_z</code> is resolved at the start of <code>run()</code>:</p>
|
|||
|
|
<div class="highlight"><pre><span></span><code><span class="kd">let</span><span class="w"> </span><span class="n">effective_z</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">args</span><span class="p">.</span><span class="n">findere_z</span><span class="p">.</span><span class="n">unwrap_or_else</span><span class="p">(</span><span class="o">||</span><span class="w"> </span><span class="k">match</span><span class="w"> </span><span class="n">idx</span><span class="p">.</span><span class="n">meta</span><span class="p">().</span><span class="n">config</span><span class="p">.</span><span class="n">evidence</span><span class="w"> </span><span class="p">{</span>
|
|||
|
|
<span class="w"> </span><span class="n">IndexMode</span><span class="p">::</span><span class="n">Approx</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="n">z</span><span class="p">,</span><span class="w"> </span><span class="o">..</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="n">IndexMode</span><span class="p">::</span><span class="n">Hybrid</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="n">z</span><span class="p">,</span><span class="w"> </span><span class="o">..</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="o">=></span><span class="w"> </span><span class="n">z</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="kt">usize</span><span class="p">,</span>
|
|||
|
|
<span class="w"> </span><span class="n">IndexMode</span><span class="p">::</span><span class="n">Exact</span><span class="w"> </span><span class="o">=></span><span class="w"> </span><span class="mi">1</span><span class="p">,</span>
|
|||
|
|
<span class="p">});</span>
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p>The <code>-z</code> CLI option overrides the index metadata value. A higher z increases stringency (lower FP, some true positives may be discarded at sequence ends); a lower z increases sensitivity.</p>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="layer-lookup-mphflayerfind">Layer lookup: <code>MphfLayer::find</code></h2>
|
|||
|
|
<p><code>MphfLayer::open(dir, mode: &IndexMode)</code> receives the mode from <code>PartitionMeta</code> — no per-layer file is read. The caller (<code>QueryLayer</code>) never chooses the dispatch path: it is fixed at open time by <code>LayerEvidence</code>. See <a href="../../implementation/obilayeredmap/">obilayeredmap</a> for the full <code>find</code> / <code>find_strict</code> API.</p>
|
|||
|
|
<h3 id="querylayer-variant-selection"><code>QueryLayer</code> variant selection</h3>
|
|||
|
|
<p><code>QueryLayer::open</code> in <code>query_layer.rs</code> selects the data matrix to pair with <code>MphfLayer</code>:</p>
|
|||
|
|
<table>
|
|||
|
|
<thead>
|
|||
|
|
<tr>
|
|||
|
|
<th>Condition</th>
|
|||
|
|
<th>Variant</th>
|
|||
|
|
<th>Data returned per k-mer</th>
|
|||
|
|
</tr>
|
|||
|
|
</thead>
|
|||
|
|
<tbody>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>with_counts=true</code> and <code>counts/</code> exists</td>
|
|||
|
|
<td><code>Count</code></td>
|
|||
|
|
<td>raw count per genome</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>presence/</code> exists</td>
|
|||
|
|
<td><code>Presence</code></td>
|
|||
|
|
<td>0/1 per genome (bit matrix)</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td>only <code>counts/</code> exists</td>
|
|||
|
|
<td><code>Count</code></td>
|
|||
|
|
<td>counts used as-is</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td>neither exists</td>
|
|||
|
|
<td><code>SetOnly</code></td>
|
|||
|
|
<td>1 for every genome</td>
|
|||
|
|
</tr>
|
|||
|
|
</tbody>
|
|||
|
|
</table>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="presence-count-mode-at-query-time">Presence / count mode at query time</h2>
|
|||
|
|
<p>The <code>--force-presence</code> flag and <code>--presence-threshold</code> control how per-genome values are accumulated, independently of what the index stores:</p>
|
|||
|
|
<div class="highlight"><pre><span></span><code>genome_totals[g] += if presence { u32::from(v >= threshold) } else { v }
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p><code>presence</code> is true when <code>--force-presence</code> is set or when the index has no counts (<code>!with_counts</code>). The default <code>presence_threshold</code> is 1, so any nonzero count counts as a match.</p>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="coverage-vectors-detail">Coverage vectors (<code>--detail</code>)</h2>
|
|||
|
|
<p>When <code>--detail</code> is requested, a 3-D accumulator <code>cov[seq_idx][genome][kmer_pos]</code> is allocated after all partitions are queried, with dimensions derived from <code>n_kmers_out = n_smers − z + 1</code> (k_user-mer positions, not s-mer positions):</p>
|
|||
|
|
<div class="highlight"><pre><span></span><code>cov[seq_idx][g][pos] += contribution
|
|||
|
|
where pos is the k_user-mer index in the filtered (post-Findere) vector
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p>Coverage reflects confirmed k_user-mers only. The vectors are emitted in the JSON annotation under the key <code>"coverage"</code>.</p>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="kmer_missing-semantics"><code>kmer_missing</code> semantics</h2>
|
|||
|
|
<p><code>kmer_missing</code> counts k_user-mer positions where the first s-mer (<code>seq_results[seq_idx][pos]</code>) is <code>None</code> — i.e. absent from the index entirely. K-mers where the z-window fails because a later s-mer is absent or zero are not counted as missing (the first s-mer being present is used as proxy for index membership).</p>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="output-format">Output format</h2>
|
|||
|
|
<p>Output sequences are written in <strong>OBITools4 format</strong>: the original sequence with a JSON annotation map in the title line.</p>
|
|||
|
|
<div class="highlight"><pre><span></span><code>>read_id {"kmer_count":59,"kmer_strict_matches":{"genome_a":42,"genome_b":7}}
|
|||
|
|
ATCGATCG...
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p>With <code>--detail</code>:</p>
|
|||
|
|
<div class="highlight"><pre><span></span><code>>read_id {"kmer_count":59,"kmer_strict_matches":{...},"coverage":{"genome_a":[0,1,2,...],...}}
|
|||
|
|
ATCGATCG...
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p>Genome keys follow the iteration order of <code>meta.genomes</code>.</p>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="annotation-schema">Annotation schema</h2>
|
|||
|
|
<table>
|
|||
|
|
<thead>
|
|||
|
|
<tr>
|
|||
|
|
<th>Key</th>
|
|||
|
|
<th>Type</th>
|
|||
|
|
<th>Condition</th>
|
|||
|
|
<th>Semantics</th>
|
|||
|
|
</tr>
|
|||
|
|
</thead>
|
|||
|
|
<tbody>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>kmer_count</code></td>
|
|||
|
|
<td>int</td>
|
|||
|
|
<td>always</td>
|
|||
|
|
<td>k-mers confirmed (post-Findere) with at least one genome match</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>kmer_missing</code></td>
|
|||
|
|
<td>int</td>
|
|||
|
|
<td><code>--count-missing</code></td>
|
|||
|
|
<td>k-mers absent from the index entirely (pre-Findere None)</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>kmer_strict_matches</code></td>
|
|||
|
|
<td>object</td>
|
|||
|
|
<td>always</td>
|
|||
|
|
<td>per-genome accumulated value (label → count or 0/1)</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>coverage</code></td>
|
|||
|
|
<td>object</td>
|
|||
|
|
<td><code>--detail</code></td>
|
|||
|
|
<td>per-genome array of per-position contributions (label → [u32])</td>
|
|||
|
|
</tr>
|
|||
|
|
</tbody>
|
|||
|
|
</table>
|
|||
|
|
<p><code>kmer_count + kmer_missing</code> ≤ total k_user-mers in the sequence. The gap corresponds to k_user-mers whose z-window was not fully confirmed (at least one s-mer absent or zero for all genomes) but whose first s-mer was present in the index.</p>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="cli">CLI</h2>
|
|||
|
|
<div class="highlight"><pre><span></span><code>obikmer query <index> [--detail] [--mismatch] [--count-missing]
|
|||
|
|
[--force-presence] [--presence-threshold <n>]
|
|||
|
|
[-z <z>] [-T <threads>]
|
|||
|
|
<query.fa> [<query2.fa> ...]
|
|||
|
|
</code></pre></div>
|
|||
|
|
<table>
|
|||
|
|
<thead>
|
|||
|
|
<tr>
|
|||
|
|
<th>Option</th>
|
|||
|
|
<th>Default</th>
|
|||
|
|
<th>Semantics</th>
|
|||
|
|
</tr>
|
|||
|
|
</thead>
|
|||
|
|
<tbody>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>-z</code> / <code>--findere-z</code></td>
|
|||
|
|
<td>from index metadata</td>
|
|||
|
|
<td>Override Findere z parameter</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>--detail</code></td>
|
|||
|
|
<td>off</td>
|
|||
|
|
<td>Emit per-position coverage vectors in JSON</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>--count-missing</code></td>
|
|||
|
|
<td>off</td>
|
|||
|
|
<td>Add <code>kmer_missing</code> field to JSON</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>--force-presence</code></td>
|
|||
|
|
<td>off</td>
|
|||
|
|
<td>Report 0/1 per genome regardless of index counts</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>--presence-threshold</code></td>
|
|||
|
|
<td>1</td>
|
|||
|
|
<td>Minimum count to declare genome present</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>-T</code> / <code>--threads</code></td>
|
|||
|
|
<td>all CPUs</td>
|
|||
|
|
<td>Worker threads</td>
|
|||
|
|
</tr>
|
|||
|
|
</tbody>
|
|||
|
|
</table>
|
|||
|
|
<p><code>--mismatch</code> is accepted but currently ignored with a warning on stderr.</p>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="future-work">Future work</h2>
|
|||
|
|
<ul>
|
|||
|
|
<li><strong><code>--mismatch</code></strong>: 1-mismatch approximate matching — generate <code>3·k</code> single-substitution variants per k-mer, look each up independently.</li>
|
|||
|
|
<li><strong>Read classification</strong> (<code>--classify</code>): assign each read to the genome with the highest match score.</li>
|
|||
|
|
<li><strong>Whitelist / blacklist filtering</strong>: threshold-based accept/reject on per-genome match scores.</li>
|
|||
|
|
</ul>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</article>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
|
|||
|
|
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
</main>
|
|||
|
|
|
|||
|
|
<footer class="md-footer">
|
|||
|
|
|
|||
|
|
<div class="md-footer-meta md-typeset">
|
|||
|
|
<div class="md-footer-meta__inner md-grid">
|
|||
|
|
<div class="md-copyright">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Made with
|
|||
|
|
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
|
|||
|
|
Material for MkDocs
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</footer>
|
|||
|
|
|
|||
|
|
</div>
|
|||
|
|
<div class="md-dialog" data-md-component="dialog">
|
|||
|
|
<div class="md-dialog__inner md-typeset"></div>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<script id="__config" type="application/json">{"annotate": null, "base": "../..", "features": [], "search": "../../assets/javascripts/workers/search.2c215733.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
|
|||
|
|
|
|||
|
|
|
|||
|
|
<script src="../../assets/javascripts/bundle.79ae519e.min.js"></script>
|
|||
|
|
|
|||
|
|
<script src="https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js"></script>
|
|||
|
|
|
|||
|
|
|
|||
|
|
</body>
|
|||
|
|
</html>
|