1816 lines
65 KiB
HTML
1816 lines
65 KiB
HTML
|
|
|
|||
|
|
<!doctype html>
|
|||
|
|
<html lang="en" class="no-js">
|
|||
|
|
<head>
|
|||
|
|
|
|||
|
|
<meta charset="utf-8">
|
|||
|
|
<meta name="viewport" content="width=device-width,initial-scale=1">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<link rel="prev" href="../sequences/invariant/">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<link rel="icon" href="../../assets/images/favicon.png">
|
|||
|
|
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.7.6">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<title>Kmer index - obikmer</title>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<link rel="stylesheet" href="../../assets/stylesheets/main.484c7ddc.min.css">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
|||
|
|
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
|
|||
|
|
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</head>
|
|||
|
|
|
|||
|
|
|
|||
|
|
<body dir="ltr">
|
|||
|
|
|
|||
|
|
|
|||
|
|
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
|
|||
|
|
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
|
|||
|
|
<label class="md-overlay" for="__drawer"></label>
|
|||
|
|
<div data-md-component="skip">
|
|||
|
|
|
|||
|
|
|
|||
|
|
<a href="#kmer-index-architecture" class="md-skip">
|
|||
|
|
Skip to content
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</div>
|
|||
|
|
<div data-md-component="announce">
|
|||
|
|
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<header class="md-header md-header--shadow" data-md-component="header">
|
|||
|
|
<nav class="md-header__inner md-grid" aria-label="Header">
|
|||
|
|
<a href="../.." title="obikmer" class="md-header__button md-logo" aria-label="obikmer" data-md-component="logo">
|
|||
|
|
|
|||
|
|
|
|||
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
<label class="md-header__button md-icon" for="__drawer">
|
|||
|
|
|
|||
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
|
|||
|
|
</label>
|
|||
|
|
<div class="md-header__title" data-md-component="header-title">
|
|||
|
|
<div class="md-header__ellipsis">
|
|||
|
|
<div class="md-header__topic">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
obikmer
|
|||
|
|
</span>
|
|||
|
|
</div>
|
|||
|
|
<div class="md-header__topic" data-md-component="header-topic">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Kmer index
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
|
|||
|
|
<script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</header>
|
|||
|
|
|
|||
|
|
<div class="md-container" data-md-component="container">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<main class="md-main" data-md-component="main">
|
|||
|
|
<div class="md-main__inner md-grid">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
|
|||
|
|
<div class="md-sidebar__scrollwrap">
|
|||
|
|
<div class="md-sidebar__inner">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
|
|||
|
|
<label class="md-nav__title" for="__drawer">
|
|||
|
|
<a href="../.." title="obikmer" class="md-nav__button md-logo" aria-label="obikmer" data-md-component="logo">
|
|||
|
|
|
|||
|
|
|
|||
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
obikmer
|
|||
|
|
</label>
|
|||
|
|
|
|||
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../.." class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Home
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item md-nav__item--nested">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" >
|
|||
|
|
|
|||
|
|
|
|||
|
|
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Theory
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-nav__icon md-icon"></span>
|
|||
|
|
</label>
|
|||
|
|
|
|||
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
|
|||
|
|
<label class="md-nav__title" for="__nav_2">
|
|||
|
|
<span class="md-nav__icon md-icon"></span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
Theory
|
|||
|
|
|
|||
|
|
|
|||
|
|
</label>
|
|||
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../kmers/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Kmers and super-kmers
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../theory/encoding/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
DNA encoding
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../theory/entropy/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Entropy filter
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../theory/minimizer/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Minimizer selection
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../theory/indexing/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Partitioning architecture
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item md-nav__item--nested">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" >
|
|||
|
|
|
|||
|
|
|
|||
|
|
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Implementation
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-nav__icon md-icon"></span>
|
|||
|
|
</label>
|
|||
|
|
|
|||
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
|
|||
|
|
<label class="md-nav__title" for="__nav_3">
|
|||
|
|
<span class="md-nav__icon md-icon"></span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
Implementation
|
|||
|
|
|
|||
|
|
|
|||
|
|
</label>
|
|||
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../implementation/superkmer/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
SuperKmer
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../implementation/kmer/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Kmer
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../implementation/chunkreader/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Chunk reader
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../implementation/pipeline/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Construction pipeline
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../implementation/obipipeline/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
obipipeline library
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../implementation/storage/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
On-disk storage
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../implementation/mphf/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
MPHF selection
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../implementation/unitig_evidence/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Unitig evidence encoding
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../implementation/obilayeredmap/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
obilayeredmap crate
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../implementation/persistent_compact_int_vec/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
PersistentCompactIntVec
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../../implementation/persistent_bit_vec/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
PersistentBitVec
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item md-nav__item--active md-nav__item--nested">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4" checked>
|
|||
|
|
|
|||
|
|
|
|||
|
|
<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Architecture
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-nav__icon md-icon"></span>
|
|||
|
|
</label>
|
|||
|
|
|
|||
|
|
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="true">
|
|||
|
|
<label class="md-nav__title" for="__nav_4">
|
|||
|
|
<span class="md-nav__icon md-icon"></span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
Architecture
|
|||
|
|
|
|||
|
|
|
|||
|
|
</label>
|
|||
|
|
<ul class="md-nav__list" data-md-scrollfix>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="../sequences/invariant/" class="md-nav__link">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Sequences
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<li class="md-nav__item md-nav__item--active">
|
|||
|
|
|
|||
|
|
<input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<label class="md-nav__link md-nav__link--active" for="__toc">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Kmer index
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-nav__icon md-icon"></span>
|
|||
|
|
</label>
|
|||
|
|
|
|||
|
|
<a href="./" class="md-nav__link md-nav__link--active">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Kmer index
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<label class="md-nav__title" for="__toc">
|
|||
|
|
<span class="md-nav__icon md-icon"></span>
|
|||
|
|
Table of contents
|
|||
|
|
</label>
|
|||
|
|
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#fundamental-invariant" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Fundamental invariant
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#three-level-hierarchy" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Three-level hierarchy
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#mphflayer-autonomous-mapping-layer" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
MphfLayer — autonomous mapping layer
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#datastore-slot-indexed-data" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
DataStore — slot-indexed data
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#distance-matrix-api-on-datastore-types" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Distance matrix API on DataStore types
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
<nav class="md-nav" aria-label="Distance matrix API on DataStore types">
|
|||
|
|
<ul class="md-nav__list">
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#full-distance-matrices" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Full distance matrices
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#partial-distance-matrices" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Partial distance matrices
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#progressive-aggregation-principle" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Progressive aggregation principle
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#layereddatastore-aggregation-within-one-partition" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
LayeredDataStore — aggregation within one partition
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
<nav class="md-nav" aria-label="LayeredDataStore — aggregation within one partition">
|
|||
|
|
<ul class="md-nav__list">
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#column-statistics" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Column statistics
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#self-contained-partials" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Self-contained partials
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#normalised-partials-require-global-sums-from-above" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Normalised partials (require global sums from above)
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#partitioneddatastore-aggregation-across-all-partitions" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
PartitionedDataStore — aggregation across all partitions
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
<nav class="md-nav" aria-label="PartitionedDataStore — aggregation across all partitions">
|
|||
|
|
<ul class="md-nav__list">
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#column-statistics_1" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Column statistics
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#self-contained-metrics-single-pass" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Self-contained metrics — single pass
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#normalised-metrics-two-passes" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Normalised metrics — two passes
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#parallelism-model" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Parallelism model
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#query-model" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Query model
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
<nav class="md-nav" aria-label="Query model">
|
|||
|
|
<ul class="md-nav__list">
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#point-query-kmer-optionitem" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Point query — kmer → Option<Item>
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#aggregation-result" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Aggregation — → Result
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#datastore-derivation" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
DataStore derivation
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#relationship-to-current-implementation" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Relationship to current implementation
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
|
|||
|
|
<div class="md-sidebar__scrollwrap">
|
|||
|
|
<div class="md-sidebar__inner">
|
|||
|
|
|
|||
|
|
|
|||
|
|
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<label class="md-nav__title" for="__toc">
|
|||
|
|
<span class="md-nav__icon md-icon"></span>
|
|||
|
|
Table of contents
|
|||
|
|
</label>
|
|||
|
|
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#fundamental-invariant" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Fundamental invariant
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#three-level-hierarchy" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Three-level hierarchy
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#mphflayer-autonomous-mapping-layer" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
MphfLayer — autonomous mapping layer
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#datastore-slot-indexed-data" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
DataStore — slot-indexed data
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#distance-matrix-api-on-datastore-types" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Distance matrix API on DataStore types
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
<nav class="md-nav" aria-label="Distance matrix API on DataStore types">
|
|||
|
|
<ul class="md-nav__list">
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#full-distance-matrices" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Full distance matrices
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#partial-distance-matrices" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Partial distance matrices
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#progressive-aggregation-principle" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Progressive aggregation principle
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#layereddatastore-aggregation-within-one-partition" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
LayeredDataStore — aggregation within one partition
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
<nav class="md-nav" aria-label="LayeredDataStore — aggregation within one partition">
|
|||
|
|
<ul class="md-nav__list">
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#column-statistics" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Column statistics
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#self-contained-partials" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Self-contained partials
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#normalised-partials-require-global-sums-from-above" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Normalised partials (require global sums from above)
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#partitioneddatastore-aggregation-across-all-partitions" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
PartitionedDataStore — aggregation across all partitions
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
<nav class="md-nav" aria-label="PartitionedDataStore — aggregation across all partitions">
|
|||
|
|
<ul class="md-nav__list">
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#column-statistics_1" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Column statistics
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#self-contained-metrics-single-pass" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Self-contained metrics — single pass
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#normalised-metrics-two-passes" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Normalised metrics — two passes
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#parallelism-model" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Parallelism model
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#query-model" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Query model
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
<nav class="md-nav" aria-label="Query model">
|
|||
|
|
<ul class="md-nav__list">
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#point-query-kmer-optionitem" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Point query — kmer → Option<Item>
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#aggregation-result" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Aggregation — → Result
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
</nav>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#datastore-derivation" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
DataStore derivation
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
<li class="md-nav__item">
|
|||
|
|
<a href="#relationship-to-current-implementation" class="md-nav__link">
|
|||
|
|
<span class="md-ellipsis">
|
|||
|
|
|
|||
|
|
Relationship to current implementation
|
|||
|
|
|
|||
|
|
</span>
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</li>
|
|||
|
|
|
|||
|
|
</ul>
|
|||
|
|
|
|||
|
|
</nav>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<div class="md-content" data-md-component="content">
|
|||
|
|
|
|||
|
|
<article class="md-content__inner md-typeset">
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<h1 id="kmer-index-architecture">Kmer index architecture</h1>
|
|||
|
|
<h2 id="fundamental-invariant">Fundamental invariant</h2>
|
|||
|
|
<p>A given canonical kmer belongs to <strong>exactly one partition</strong> and <strong>exactly one layer</strong> within that partition. This is the property that makes all aggregation operations decomposable and parallelisable without coordination.</p>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="three-level-hierarchy">Three-level hierarchy</h2>
|
|||
|
|
<div class="highlight"><pre><span></span><code>PartitionedIndex
|
|||
|
|
├── LayeredPartition (one per minimiser bucket)
|
|||
|
|
│ ├── MphfLayer 0 kmer → slot (immutable bijection)
|
|||
|
|
│ │ ├── DataStore A slot → T (e.g. counts)
|
|||
|
|
│ │ └── DataStore B slot → T (e.g. presence/absence, derived)
|
|||
|
|
│ ├── MphfLayer 1
|
|||
|
|
│ │ └── DataStore A
|
|||
|
|
│ └── ...
|
|||
|
|
├── LayeredPartition
|
|||
|
|
│ └── ...
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p><strong>PartitionedIndex</strong>: routes queries to partitions via canonical minimiser hash. Owns the partition count and routing scheme (fixed at creation). Dispatches aggregations across partitions in parallel.</p>
|
|||
|
|
<p><strong>LayeredPartition</strong>: one directory per minimiser bucket. Holds a <code>Vec<MphfLayer></code>. Each layer covers a disjoint kmer set — layer 0 is built from dataset A; layer 1 covers kmers in B absent from layer 0; and so on. Layers within a partition are always disjoint.</p>
|
|||
|
|
<p><strong>MphfLayer</strong>: the MPHF + evidence + unitig spine. Maps <code>kmer → slot</code> for its disjoint kmer set. Immutable once built. Independent of any data attached to it.</p>
|
|||
|
|
<p><strong>DataStore</strong>: a slot-indexed data array (e.g. <code>PersistentCompactIntMatrix</code>, <code>PersistentBitMatrix</code>). Attached to a <code>MphfLayer</code> externally. Multiple stores of different types can coexist on the same <code>MphfLayer</code>.</p>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="mphflayer-autonomous-mapping-layer">MphfLayer — autonomous mapping layer</h2>
|
|||
|
|
<div class="highlight"><pre><span></span><code><span class="n">MphfLayer</span><span class="p">::</span><span class="n">find</span><span class="p">(</span><span class="n">kmer</span><span class="p">:</span><span class="w"> </span><span class="nc">CanonicalKmer</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nb">Option</span><span class="o"><</span><span class="kt">usize</span><span class="o">></span><span class="w"> </span><span class="c1">// slot, or None if absent</span>
|
|||
|
|
<span class="n">MphfLayer</span><span class="p">::</span><span class="n">n</span><span class="p">()</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="kt">usize</span><span class="w"> </span><span class="c1">// number of slots</span>
|
|||
|
|
<span class="n">MphfLayer</span><span class="p">::</span><span class="n">build</span><span class="p">(</span><span class="n">dir</span><span class="p">:</span><span class="w"> </span><span class="kp">&</span><span class="nc">Path</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">OLMResult</span><span class="o"><</span><span class="p">(</span><span class="bp">Self</span><span class="p">,</span><span class="w"> </span><span class="kt">usize</span><span class="p">)</span><span class="o">></span><span class="w"> </span><span class="c1">// from unitigs.bin</span>
|
|||
|
|
<span class="n">MphfLayer</span><span class="p">::</span><span class="n">open</span><span class="p">(</span><span class="n">dir</span><span class="p">:</span><span class="w"> </span><span class="kp">&</span><span class="nc">Path</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">OLMResult</span><span class="o"><</span><span class="bp">Self</span><span class="o">></span>
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p><code>find</code> returns <code>Some(slot)</code> only if the kmer is actually in this layer (evidence check included). Returns <code>None</code> for kmers present in other layers or absent from the index.</p>
|
|||
|
|
<p>The MPHF (<code>mphf.bin</code>, <code>evidence.bin</code>, <code>unitigs.bin</code>) is built once and never rebuilt. All data derivation operations (count → presence, thresholding, merging) reuse the same <code>MphfLayer</code>.</p>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="datastore-slot-indexed-data">DataStore — slot-indexed data</h2>
|
|||
|
|
<div class="highlight"><pre><span></span><code><span class="k">trait</span><span class="w"> </span><span class="n">DataStore</span><span class="w"> </span><span class="p">{</span>
|
|||
|
|
<span class="w"> </span><span class="k">type</span><span class="w"> </span><span class="nc">Item</span><span class="p">;</span>
|
|||
|
|
<span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">get</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">slot</span><span class="p">:</span><span class="w"> </span><span class="kt">usize</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Self</span><span class="p">::</span><span class="n">Item</span><span class="p">;</span>
|
|||
|
|
<span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">n</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="kt">usize</span><span class="p">;</span>
|
|||
|
|
<span class="p">}</span>
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p>Concrete types from <code>obicompactvec</code>:</p>
|
|||
|
|
<table>
|
|||
|
|
<thead>
|
|||
|
|
<tr>
|
|||
|
|
<th>Type</th>
|
|||
|
|
<th><code>Item</code></th>
|
|||
|
|
<th>Column stats</th>
|
|||
|
|
<th>Use</th>
|
|||
|
|
</tr>
|
|||
|
|
</thead>
|
|||
|
|
<tbody>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>PersistentCompactIntMatrix</code></td>
|
|||
|
|
<td><code>Box<[u32]></code></td>
|
|||
|
|
<td><code>sum() -> Array1<u64></code></td>
|
|||
|
|
<td>count per sample per slot</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td><code>PersistentBitMatrix</code></td>
|
|||
|
|
<td><code>Box<[bool]></code></td>
|
|||
|
|
<td><code>count_ones() -> Array1<u64></code></td>
|
|||
|
|
<td>presence per sample per slot</td>
|
|||
|
|
</tr>
|
|||
|
|
</tbody>
|
|||
|
|
</table>
|
|||
|
|
<p><code>sum()</code> and <code>count_ones()</code> are the bridge between the per-matrix level and cross-layer aggregation: they give the total weight of each column within one (partition, layer) pair, which can be summed to get global column weights.</p>
|
|||
|
|
<p>A <code>DataStore</code> knows nothing about kmers or MPHFs. It is indexed by <code>usize</code> slot only.</p>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="distance-matrix-api-on-datastore-types">Distance matrix API on DataStore types</h2>
|
|||
|
|
<p>Both <code>PersistentCompactIntMatrix</code> and <code>PersistentBitMatrix</code> expose two families of distance matrix methods.</p>
|
|||
|
|
<h3 id="full-distance-matrices">Full distance matrices</h3>
|
|||
|
|
<p>Compute the final <code>n_cols × n_cols</code> distance matrix from data within a single matrix. Internally parallelised over the upper triangle via rayon.</p>
|
|||
|
|
<div class="highlight"><pre><span></span><code><span class="c1">// PersistentCompactIntMatrix</span>
|
|||
|
|
<span class="k">fn</span><span class="w"> </span><span class="nf">bray_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span>
|
|||
|
|
<span class="k">fn</span><span class="w"> </span><span class="nf">relfreq_bray_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span>
|
|||
|
|
<span class="k">fn</span><span class="w"> </span><span class="nf">euclidean_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span>
|
|||
|
|
<span class="k">fn</span><span class="w"> </span><span class="nf">relfreq_euclidean_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span>
|
|||
|
|
<span class="k">fn</span><span class="w"> </span><span class="nf">hellinger_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span>
|
|||
|
|
<span class="k">fn</span><span class="w"> </span><span class="nf">jaccard_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span>
|
|||
|
|
<span class="k">fn</span><span class="w"> </span><span class="nf">threshold_jaccard_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">threshold</span><span class="p">:</span><span class="w"> </span><span class="kt">u32</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span>
|
|||
|
|
|
|||
|
|
<span class="c1">// PersistentBitMatrix</span>
|
|||
|
|
<span class="k">fn</span><span class="w"> </span><span class="nf">jaccard_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span>
|
|||
|
|
<span class="k">fn</span><span class="w"> </span><span class="nf">hamming_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span>
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p>These are convenience methods. For a <code>LayeredDataStore</code> or <code>PartitionedDataStore</code> they cannot be used directly — the partial API is required.</p>
|
|||
|
|
<h3 id="partial-distance-matrices">Partial distance matrices</h3>
|
|||
|
|
<p>Return additive components that can be summed element-wise across (partition, layer) pairs before computing the final distance. This is what makes cross-layer and cross-partition aggregation possible.</p>
|
|||
|
|
<p><strong>Category 1 — self-contained partials</strong>: additive without any external parameter.</p>
|
|||
|
|
<div class="highlight"><pre><span></span><code><span class="c1">// PersistentCompactIntMatrix</span>
|
|||
|
|
<span class="k">fn</span><span class="w"> </span><span class="nf">partial_bray_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span>
|
|||
|
|
<span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="p">(</span><span class="n">Array2</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">,</span><span class="w"> </span><span class="c1">// sum_min[i,j]</span>
|
|||
|
|
<span class="w"> </span><span class="n">Array1</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">)</span><span class="w"> </span><span class="c1">// col_sums[k]</span>
|
|||
|
|
|
|||
|
|
<span class="k">fn</span><span class="w"> </span><span class="nf">partial_euclidean_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span><span class="w"> </span><span class="c1">// sum of squared diffs</span>
|
|||
|
|
<span class="k">fn</span><span class="w"> </span><span class="nf">partial_threshold_jaccard_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">threshold</span><span class="p">:</span><span class="w"> </span><span class="kt">u32</span><span class="p">)</span>
|
|||
|
|
<span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="p">(</span><span class="n">Array2</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">,</span><span class="w"> </span><span class="c1">// inter[i,j]</span>
|
|||
|
|
<span class="w"> </span><span class="n">Array2</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">)</span><span class="w"> </span><span class="c1">// union[i,j]</span>
|
|||
|
|
|
|||
|
|
<span class="c1">// PersistentBitMatrix</span>
|
|||
|
|
<span class="k">fn</span><span class="w"> </span><span class="nf">partial_jaccard_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span>
|
|||
|
|
<span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="p">(</span><span class="n">Array2</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">,</span><span class="w"> </span><span class="c1">// inter[i,j]</span>
|
|||
|
|
<span class="w"> </span><span class="n">Array2</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">)</span><span class="w"> </span><span class="c1">// union[i,j]</span>
|
|||
|
|
<span class="k">fn</span><span class="w"> </span><span class="nf">partial_hamming_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="w"> </span><span class="c1">// differing bits</span>
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p><strong>Category 2 — normalised partials</strong>: require global column sums as input, computed beforehand across all (partition, layer) pairs.</p>
|
|||
|
|
<div class="highlight"><pre><span></span><code><span class="c1">// PersistentCompactIntMatrix only</span>
|
|||
|
|
<span class="k">fn</span><span class="w"> </span><span class="nf">partial_relfreq_bray_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">col_sums</span><span class="p">:</span><span class="w"> </span><span class="kp">&</span><span class="nc">Array1</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">)</span>
|
|||
|
|
<span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span><span class="w"> </span><span class="c1">// Σ_slot min(a_slot/sum_i, b_slot/sum_j)</span>
|
|||
|
|
|
|||
|
|
<span class="k">fn</span><span class="w"> </span><span class="nf">partial_relfreq_euclidean_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">col_sums</span><span class="p">:</span><span class="w"> </span><span class="kp">&</span><span class="nc">Array1</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">)</span>
|
|||
|
|
<span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span><span class="w"> </span><span class="c1">// Σ_slot (a_slot/sum_i - b_slot/sum_j)²</span>
|
|||
|
|
|
|||
|
|
<span class="k">fn</span><span class="w"> </span><span class="nf">partial_hellinger_euclidean_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">col_sums</span><span class="p">:</span><span class="w"> </span><span class="kp">&</span><span class="nc">Array1</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">)</span>
|
|||
|
|
<span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span><span class="w"> </span><span class="c1">// Σ_slot (√(a/sum_i) - √(b/sum_j))²</span>
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p>The <code>col_sums</code> parameter must reflect the GLOBAL count across all layers and all partitions — passing a per-layer sum would give a wrong result. This constraint drives the two-pass algorithm described below.</p>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="progressive-aggregation-principle">Progressive aggregation principle</h2>
|
|||
|
|
<p>Aggregation is <strong>hierarchical</strong>: each level computes its contribution by aggregating from the level immediately below it. No level skips a level or collects raw data from two levels down.</p>
|
|||
|
|
<div class="highlight"><pre><span></span><code>PersistentCompactIntMatrix::sum() — column sums for one (partition, layer) matrix
|
|||
|
|
↓ Σ across layers
|
|||
|
|
LayeredCompactIntMatrix::sum() — column sums for one partition
|
|||
|
|
↓ Σ across partitions
|
|||
|
|
PartitionedCompactIntMatrix::sum() — global column sums
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p>The same cascade applies to every partial computation:</p>
|
|||
|
|
<div class="highlight"><pre><span></span><code>PersistentCompactIntMatrix::partial_bray_dist_matrix() — one (partition, layer)
|
|||
|
|
↓ element-wise Σ across layers
|
|||
|
|
LayeredCompactIntMatrix::partial_bray() — one partition
|
|||
|
|
↓ element-wise Σ across partitions
|
|||
|
|
PartitionedCompactIntMatrix::partial_bray() — global partial → final dist
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p>This means <code>LayeredCompactIntMatrix</code> never inspects individual <code>PersistentCompactIntVec</code> columns directly, and <code>PartitionedCompactIntMatrix</code> never inspects individual layers. Each level presents a stable API surface to the level above.</p>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="layereddatastore-aggregation-within-one-partition">LayeredDataStore — aggregation within one partition</h2>
|
|||
|
|
<p>A <code>LayeredDataStore</code> holds one <code>DataStore</code> per layer within a single partition:</p>
|
|||
|
|
<div class="highlight"><pre><span></span><code><span class="k">struct</span><span class="w"> </span><span class="nc">LayeredCompactIntMatrix</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="n">layers</span><span class="p">:</span><span class="w"> </span><span class="nb">Vec</span><span class="o"><</span><span class="n">PersistentCompactIntMatrix</span><span class="o">></span><span class="w"> </span><span class="p">}</span>
|
|||
|
|
<span class="k">struct</span><span class="w"> </span><span class="nc">LayeredBitMatrix</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="n">layers</span><span class="p">:</span><span class="w"> </span><span class="nb">Vec</span><span class="o"><</span><span class="n">PersistentBitMatrix</span><span class="o">></span><span class="w"> </span><span class="p">}</span>
|
|||
|
|
</code></pre></div>
|
|||
|
|
<h3 id="column-statistics">Column statistics</h3>
|
|||
|
|
<div class="highlight"><pre><span></span><code><span class="c1">// LayeredCompactIntMatrix</span>
|
|||
|
|
<span class="k">fn</span><span class="w"> </span><span class="nf">sum</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array1</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span>
|
|||
|
|
<span class="w"> </span><span class="c1">// = layers.par_iter().map(|m| m.sum()).reduce(element-wise +)</span>
|
|||
|
|
|
|||
|
|
<span class="c1">// LayeredBitMatrix</span>
|
|||
|
|
<span class="k">fn</span><span class="w"> </span><span class="nf">count_ones</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array1</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span>
|
|||
|
|
<span class="w"> </span><span class="c1">// = layers.par_iter().map(|m| m.count_ones()).reduce(element-wise +)</span>
|
|||
|
|
</code></pre></div>
|
|||
|
|
<h3 id="self-contained-partials">Self-contained partials</h3>
|
|||
|
|
<p>Each method reduces across layers by element-wise addition of per-layer matrices:</p>
|
|||
|
|
<div class="highlight"><pre><span></span><code><span class="k">fn</span><span class="w"> </span><span class="nf">partial_bray</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="p">(</span><span class="n">Array2</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">,</span><span class="w"> </span><span class="n">Array1</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">)</span>
|
|||
|
|
<span class="w"> </span><span class="c1">// Σ_l layer_l.partial_bray_dist_matrix()</span>
|
|||
|
|
|
|||
|
|
<span class="k">fn</span><span class="w"> </span><span class="nf">partial_euclidean</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span>
|
|||
|
|
<span class="w"> </span><span class="c1">// Σ_l layer_l.partial_euclidean_dist_matrix()</span>
|
|||
|
|
|
|||
|
|
<span class="k">fn</span><span class="w"> </span><span class="nf">partial_jaccard</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="p">(</span><span class="n">Array2</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">,</span><span class="w"> </span><span class="n">Array2</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">)</span>
|
|||
|
|
<span class="w"> </span><span class="c1">// Σ_l layer_l.partial_jaccard_dist_matrix() [bit matrix]</span>
|
|||
|
|
<span class="w"> </span><span class="c1">// Σ_l layer_l.partial_threshold_jaccard_dist_matrix() [int matrix]</span>
|
|||
|
|
|
|||
|
|
<span class="k">fn</span><span class="w"> </span><span class="nf">partial_hamming</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span>
|
|||
|
|
<span class="w"> </span><span class="c1">// Σ_l layer_l.partial_hamming_dist_matrix() [bit matrix]</span>
|
|||
|
|
</code></pre></div>
|
|||
|
|
<h3 id="normalised-partials-require-global-sums-from-above">Normalised partials (require global sums from above)</h3>
|
|||
|
|
<div class="highlight"><pre><span></span><code><span class="k">fn</span><span class="w"> </span><span class="nf">partial_relfreq_bray</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">global_sums</span><span class="p">:</span><span class="w"> </span><span class="kp">&</span><span class="nc">Array1</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span>
|
|||
|
|
<span class="w"> </span><span class="c1">// Σ_l layer_l.partial_relfreq_bray_dist_matrix(global_sums)</span>
|
|||
|
|
|
|||
|
|
<span class="k">fn</span><span class="w"> </span><span class="nf">partial_relfreq_euclidean</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">global_sums</span><span class="p">:</span><span class="w"> </span><span class="kp">&</span><span class="nc">Array1</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span>
|
|||
|
|
<span class="w"> </span><span class="c1">// Σ_l layer_l.partial_relfreq_euclidean_dist_matrix(global_sums)</span>
|
|||
|
|
|
|||
|
|
<span class="k">fn</span><span class="w"> </span><span class="nf">partial_hellinger</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">global_sums</span><span class="p">:</span><span class="w"> </span><span class="kp">&</span><span class="nc">Array1</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span>
|
|||
|
|
<span class="w"> </span><span class="c1">// Σ_l layer_l.partial_hellinger_euclidean_dist_matrix(global_sums)</span>
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p><code>global_sums</code> is provided by the <code>PartitionedDataStore</code>; this level does not compute it.</p>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="partitioneddatastore-aggregation-across-all-partitions">PartitionedDataStore — aggregation across all partitions</h2>
|
|||
|
|
<p>A <code>PartitionedDataStore</code> holds one <code>LayeredDataStore</code> per partition:</p>
|
|||
|
|
<div class="highlight"><pre><span></span><code><span class="k">struct</span><span class="w"> </span><span class="nc">PartitionedCompactIntMatrix</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="n">partitions</span><span class="p">:</span><span class="w"> </span><span class="nb">Vec</span><span class="o"><</span><span class="n">LayeredCompactIntMatrix</span><span class="o">></span><span class="w"> </span><span class="p">}</span>
|
|||
|
|
<span class="k">struct</span><span class="w"> </span><span class="nc">PartitionedBitMatrix</span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="n">partitions</span><span class="p">:</span><span class="w"> </span><span class="nb">Vec</span><span class="o"><</span><span class="n">LayeredBitMatrix</span><span class="o">></span><span class="w"> </span><span class="p">}</span>
|
|||
|
|
</code></pre></div>
|
|||
|
|
<h3 id="column-statistics_1">Column statistics</h3>
|
|||
|
|
<div class="highlight"><pre><span></span><code><span class="k">fn</span><span class="w"> </span><span class="nf">sum</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array1</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span>
|
|||
|
|
<span class="w"> </span><span class="c1">// = partitions.par_iter().map(|p| p.sum()).reduce(element-wise +)</span>
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p><code>p.sum()</code> is itself a reduction across layers (see above) — the cascade is preserved.</p>
|
|||
|
|
<h3 id="self-contained-metrics-single-pass">Self-contained metrics — single pass</h3>
|
|||
|
|
<div class="highlight"><pre><span></span><code><span class="k">fn</span><span class="w"> </span><span class="nf">bray_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span><span class="w"> </span><span class="p">{</span>
|
|||
|
|
<span class="w"> </span><span class="kd">let</span><span class="w"> </span><span class="p">(</span><span class="n">sum_min</span><span class="p">,</span><span class="w"> </span><span class="n">col_sums</span><span class="p">)</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">partitions</span>
|
|||
|
|
<span class="w"> </span><span class="p">.</span><span class="n">par_iter</span><span class="p">()</span>
|
|||
|
|
<span class="w"> </span><span class="p">.</span><span class="n">map</span><span class="p">(</span><span class="o">|</span><span class="n">p</span><span class="o">|</span><span class="w"> </span><span class="n">p</span><span class="p">.</span><span class="n">partial_bray</span><span class="p">())</span>
|
|||
|
|
<span class="w"> </span><span class="p">.</span><span class="n">reduce</span><span class="p">(</span><span class="n">element</span><span class="o">-</span><span class="n">wise</span><span class="w"> </span><span class="o">+</span><span class="p">);</span>
|
|||
|
|
<span class="w"> </span><span class="c1">// finalise</span>
|
|||
|
|
<span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="n">i</span><span class="p">,</span><span class="n">j</span><span class="p">):</span><span class="w"> </span><span class="nc">dist</span><span class="p">[</span><span class="n">i</span><span class="p">,</span><span class="n">j</span><span class="p">]</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">1</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="mi">2</span><span class="err">·</span><span class="n">sum_min</span><span class="p">[</span><span class="n">i</span><span class="p">,</span><span class="n">j</span><span class="p">]</span><span class="w"> </span><span class="o">/</span><span class="w"> </span><span class="p">(</span><span class="n">col_sums</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">col_sums</span><span class="p">[</span><span class="n">j</span><span class="p">])</span>
|
|||
|
|
<span class="p">}</span>
|
|||
|
|
</code></pre></div>
|
|||
|
|
<h3 id="normalised-metrics-two-passes">Normalised metrics — two passes</h3>
|
|||
|
|
<div class="highlight"><pre><span></span><code><span class="k">fn</span><span class="w"> </span><span class="nf">relfreq_bray_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span><span class="w"> </span><span class="p">{</span>
|
|||
|
|
<span class="w"> </span><span class="c1">// pass 1 — progressive: PartitionedDataStore::sum()</span>
|
|||
|
|
<span class="w"> </span><span class="c1">// calls LayeredDataStore::sum() per partition (parallel)</span>
|
|||
|
|
<span class="w"> </span><span class="c1">// calls PersistentCompactIntMatrix::sum() per layer (parallel)</span>
|
|||
|
|
<span class="w"> </span><span class="kd">let</span><span class="w"> </span><span class="n">global_sums</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="bp">self</span><span class="p">.</span><span class="n">sum</span><span class="p">();</span>
|
|||
|
|
|
|||
|
|
<span class="w"> </span><span class="c1">// pass 2 — per-partition partial using global_sums (parallel)</span>
|
|||
|
|
<span class="w"> </span><span class="kd">let</span><span class="w"> </span><span class="n">matrix</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">partitions</span>
|
|||
|
|
<span class="w"> </span><span class="p">.</span><span class="n">par_iter</span><span class="p">()</span>
|
|||
|
|
<span class="w"> </span><span class="p">.</span><span class="n">map</span><span class="p">(</span><span class="o">|</span><span class="n">p</span><span class="o">|</span><span class="w"> </span><span class="n">p</span><span class="p">.</span><span class="n">partial_relfreq_bray</span><span class="p">(</span><span class="o">&</span><span class="n">global_sums</span><span class="p">))</span>
|
|||
|
|
<span class="w"> </span><span class="p">.</span><span class="n">reduce</span><span class="p">(</span><span class="n">element</span><span class="o">-</span><span class="n">wise</span><span class="w"> </span><span class="o">+</span><span class="p">);</span>
|
|||
|
|
<span class="w"> </span><span class="c1">// finalise</span>
|
|||
|
|
<span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="n">i</span><span class="p">,</span><span class="n">j</span><span class="p">):</span><span class="w"> </span><span class="nc">dist</span><span class="p">[</span><span class="n">i</span><span class="p">,</span><span class="n">j</span><span class="p">]</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">1</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="n">matrix</span><span class="p">[</span><span class="n">i</span><span class="p">,</span><span class="n">j</span><span class="p">]</span>
|
|||
|
|
<span class="p">}</span>
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p><code>global_sums</code> is exact because each kmer belongs to exactly one (partition, layer) pair — no double-counting. Pass 1 is itself fully parallel at every level of the hierarchy.</p>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="parallelism-model">Parallelism model</h2>
|
|||
|
|
<table>
|
|||
|
|
<thead>
|
|||
|
|
<tr>
|
|||
|
|
<th>Level</th>
|
|||
|
|
<th>Unit</th>
|
|||
|
|
<th>Coordination</th>
|
|||
|
|
</tr>
|
|||
|
|
</thead>
|
|||
|
|
<tbody>
|
|||
|
|
<tr>
|
|||
|
|
<td>Across partitions</td>
|
|||
|
|
<td><code>LayeredDataStore</code></td>
|
|||
|
|
<td>none — fully independent</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td>Across layers (self-contained)</td>
|
|||
|
|
<td><code>(partition, layer)</code> pair</td>
|
|||
|
|
<td>none — disjoint kmer sets</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td>Across layers (normalised, pass 1)</td>
|
|||
|
|
<td><code>(partition, layer)</code> pair</td>
|
|||
|
|
<td>none — sums are additive</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td>Across layers (normalised, pass 2)</td>
|
|||
|
|
<td><code>(partition, layer)</code> pair</td>
|
|||
|
|
<td>global_sums broadcast read-only</td>
|
|||
|
|
</tr>
|
|||
|
|
<tr>
|
|||
|
|
<td>Within a DataStore (distance matrix)</td>
|
|||
|
|
<td>upper-triangle pair <code>(i,j)</code></td>
|
|||
|
|
<td>none — rayon par_iter</td>
|
|||
|
|
</tr>
|
|||
|
|
</tbody>
|
|||
|
|
</table>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="query-model">Query model</h2>
|
|||
|
|
<h3 id="point-query-kmer-optionitem">Point query — <code>kmer → Option<Item></code></h3>
|
|||
|
|
<div class="highlight"><pre><span></span><code>minimiser(kmer) → partition p
|
|||
|
|
for each layer l in p:
|
|||
|
|
slot = MphfLayer_l.find(kmer)
|
|||
|
|
if slot is Some:
|
|||
|
|
return DataStore_l.get(slot)
|
|||
|
|
return None
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p>O(n_layers) MPHF probes worst case; O(1) expected. No cross-layer fusion — the result comes from exactly one (partition, layer).</p>
|
|||
|
|
<h3 id="aggregation-result">Aggregation — <code>→ Result</code></h3>
|
|||
|
|
<div class="highlight"><pre><span></span><code>result = reduce(
|
|||
|
|
for p in partitions: // parallel
|
|||
|
|
for l in layers(p): // parallel
|
|||
|
|
partial(DataStore_p_l)
|
|||
|
|
)
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p>For normalised metrics replace with the two-pass scheme above.</p>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="datastore-derivation">DataStore derivation</h2>
|
|||
|
|
<p>Because the <code>MphfLayer</code> is independent of its data stores, new stores can be derived from existing ones without rebuilding the MPHF:</p>
|
|||
|
|
<div class="highlight"><pre><span></span><code>// count → presence/absence, parallel across (partition, layer)
|
|||
|
|
for (p, l) in all_partition_layer_pairs().par_iter():
|
|||
|
|
count_store = open PersistentCompactIntMatrix at (p, l)
|
|||
|
|
presence_store = PersistentBitMatrix::from_count_matrix(count_store, threshold, dir)
|
|||
|
|
</code></pre></div>
|
|||
|
|
<p>Other derivations: threshold a count matrix → binary presence matrix; union two presence matrices; merge two count matrices (saturating add, column-wise). All are local to one <code>(partition, layer)</code> pair.</p>
|
|||
|
|
<hr />
|
|||
|
|
<h2 id="relationship-to-current-implementation">Relationship to current implementation</h2>
|
|||
|
|
<p>The current <code>obilayeredmap</code> crate implements a subset of this architecture. Key divergences:</p>
|
|||
|
|
<ul>
|
|||
|
|
<li><code>Layer<D: LayerData></code> fuses <code>MphfLayer</code> and one <code>DataStore</code> into a single generic type. Multiple data stores on the same MPHF are not supported.</li>
|
|||
|
|
<li><code>LayerData::open(dir)</code> embeds the path convention (<code>counts/</code>, <code>presence/</code>) inside the store type, preventing the <code>PartitionedIndex</code> from managing paths externally.</li>
|
|||
|
|
<li><code>LayeredDataStore</code> and <code>PartitionedDataStore</code> do not yet exist; <code>LayeredMap</code> is a single-partition structure without a distance matrix API.</li>
|
|||
|
|
<li>The partial distance methods exist on <code>PersistentCompactIntMatrix</code> and <code>PersistentBitMatrix</code> and are tested; they are not yet composed across layers and partitions.</li>
|
|||
|
|
</ul>
|
|||
|
|
<p>Planned refactoring:
|
|||
|
|
1. Extract <code>MphfLayer</code> from <code>Layer<D></code> as an autonomous type.
|
|||
|
|
2. Replace <code>LayerData</code> trait with <code>DataStore</code> trait (no path knowledge).
|
|||
|
|
3. Implement <code>LayeredCompactIntMatrix</code> / <code>LayeredBitMatrix</code> with the partial + full distance APIs described above.
|
|||
|
|
4. Implement <code>PartitionedCompactIntMatrix</code> / <code>PartitionedBitMatrix</code> with two-pass support for normalised metrics.
|
|||
|
|
5. Implement <code>PartitionedIndex</code> for point queries with parallel dispatch.</p>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
</article>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
|
|||
|
|
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
</main>
|
|||
|
|
|
|||
|
|
<footer class="md-footer">
|
|||
|
|
|
|||
|
|
<div class="md-footer-meta md-typeset">
|
|||
|
|
<div class="md-footer-meta__inner md-grid">
|
|||
|
|
<div class="md-copyright">
|
|||
|
|
|
|||
|
|
|
|||
|
|
Made with
|
|||
|
|
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
|
|||
|
|
Material for MkDocs
|
|||
|
|
</a>
|
|||
|
|
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
</div>
|
|||
|
|
</div>
|
|||
|
|
</footer>
|
|||
|
|
|
|||
|
|
</div>
|
|||
|
|
<div class="md-dialog" data-md-component="dialog">
|
|||
|
|
<div class="md-dialog__inner md-typeset"></div>
|
|||
|
|
</div>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<script id="__config" type="application/json">{"annotate": null, "base": "../..", "features": [], "search": "../../assets/javascripts/workers/search.2c215733.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
|
|||
|
|
|
|||
|
|
|
|||
|
|
<script src="../../assets/javascripts/bundle.79ae519e.min.js"></script>
|
|||
|
|
|
|||
|
|
<script src="https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js"></script>
|
|||
|
|
|
|||
|
|
|
|||
|
|
</body>
|
|||
|
|
</html>
|