13e69e23c9
Introduces ColumnWeights, CountPartials, and BitPartials traits to compute and finalize partial distance matrices. Implements these traits for PersistentBitMatrix, PersistentCompactIntMatrix, and a new LayeredStore<S> wrapper that aggregates metrics across layers via parallel reduction. Adds ndarray for numerical aggregation and updates architecture documentation to reflect the trait-driven design and pending refactoring roadmap.
1777 lines
66 KiB
HTML
1777 lines
66 KiB
HTML
|
||
<!doctype html>
|
||
<html lang="en" class="no-js">
|
||
<head>
|
||
|
||
<meta charset="utf-8">
|
||
<meta name="viewport" content="width=device-width,initial-scale=1">
|
||
|
||
|
||
|
||
|
||
<link rel="prev" href="../sequences/invariant/">
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<link rel="icon" href="../../assets/images/favicon.png">
|
||
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.7.6">
|
||
|
||
|
||
|
||
<title>Kmer index - obikmer</title>
|
||
|
||
|
||
|
||
<link rel="stylesheet" href="../../assets/stylesheets/main.484c7ddc.min.css">
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
||
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
|
||
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
|
||
|
||
|
||
|
||
<script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
|
||
|
||
|
||
|
||
|
||
|
||
</head>
|
||
|
||
|
||
<body dir="ltr">
|
||
|
||
|
||
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
|
||
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
|
||
<label class="md-overlay" for="__drawer"></label>
|
||
<div data-md-component="skip">
|
||
|
||
|
||
<a href="#kmer-index-architecture" class="md-skip">
|
||
Skip to content
|
||
</a>
|
||
|
||
</div>
|
||
<div data-md-component="announce">
|
||
|
||
</div>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<header class="md-header md-header--shadow" data-md-component="header">
|
||
<nav class="md-header__inner md-grid" aria-label="Header">
|
||
<a href="../.." title="obikmer" class="md-header__button md-logo" aria-label="obikmer" data-md-component="logo">
|
||
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
|
||
|
||
</a>
|
||
<label class="md-header__button md-icon" for="__drawer">
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
|
||
</label>
|
||
<div class="md-header__title" data-md-component="header-title">
|
||
<div class="md-header__ellipsis">
|
||
<div class="md-header__topic">
|
||
<span class="md-ellipsis">
|
||
obikmer
|
||
</span>
|
||
</div>
|
||
<div class="md-header__topic" data-md-component="header-topic">
|
||
<span class="md-ellipsis">
|
||
|
||
Kmer index
|
||
|
||
</span>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
<script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
|
||
|
||
|
||
|
||
|
||
</nav>
|
||
|
||
</header>
|
||
|
||
<div class="md-container" data-md-component="container">
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<main class="md-main" data-md-component="main">
|
||
<div class="md-main__inner md-grid">
|
||
|
||
|
||
|
||
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
|
||
<div class="md-sidebar__scrollwrap">
|
||
<div class="md-sidebar__inner">
|
||
|
||
|
||
|
||
|
||
<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
|
||
<label class="md-nav__title" for="__drawer">
|
||
<a href="../.." title="obikmer" class="md-nav__button md-logo" aria-label="obikmer" data-md-component="logo">
|
||
|
||
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
|
||
|
||
</a>
|
||
obikmer
|
||
</label>
|
||
|
||
<ul class="md-nav__list" data-md-scrollfix>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../.." class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Home
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item md-nav__item--nested">
|
||
|
||
|
||
|
||
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" >
|
||
|
||
|
||
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Theory
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
<span class="md-nav__icon md-icon"></span>
|
||
</label>
|
||
|
||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
|
||
<label class="md-nav__title" for="__nav_2">
|
||
<span class="md-nav__icon md-icon"></span>
|
||
|
||
|
||
Theory
|
||
|
||
|
||
</label>
|
||
<ul class="md-nav__list" data-md-scrollfix>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../kmers/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Kmers and super-kmers
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../theory/encoding/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
DNA encoding
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../theory/entropy/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Entropy filter
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../theory/minimizer/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Minimizer selection
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../theory/indexing/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Partitioning architecture
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item md-nav__item--nested">
|
||
|
||
|
||
|
||
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" >
|
||
|
||
|
||
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Implementation
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
<span class="md-nav__icon md-icon"></span>
|
||
</label>
|
||
|
||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
|
||
<label class="md-nav__title" for="__nav_3">
|
||
<span class="md-nav__icon md-icon"></span>
|
||
|
||
|
||
Implementation
|
||
|
||
|
||
</label>
|
||
<ul class="md-nav__list" data-md-scrollfix>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../implementation/superkmer/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
SuperKmer
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../implementation/kmer/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Kmer
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../implementation/chunkreader/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Chunk reader
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../implementation/pipeline/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Construction pipeline
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../implementation/obipipeline/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
obipipeline library
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../implementation/storage/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
On-disk storage
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../implementation/mphf/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
MPHF selection
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../implementation/unitig_evidence/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Unitig evidence encoding
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../implementation/obilayeredmap/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
obilayeredmap crate
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../implementation/persistent_compact_int_vec/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
PersistentCompactIntVec
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../../implementation/persistent_bit_vec/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
PersistentBitVec
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item md-nav__item--active md-nav__item--nested">
|
||
|
||
|
||
|
||
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4" checked>
|
||
|
||
|
||
<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Architecture
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
<span class="md-nav__icon md-icon"></span>
|
||
</label>
|
||
|
||
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="true">
|
||
<label class="md-nav__title" for="__nav_4">
|
||
<span class="md-nav__icon md-icon"></span>
|
||
|
||
|
||
Architecture
|
||
|
||
|
||
</label>
|
||
<ul class="md-nav__list" data-md-scrollfix>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item">
|
||
<a href="../sequences/invariant/" class="md-nav__link">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Sequences
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
</li>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<li class="md-nav__item md-nav__item--active">
|
||
|
||
<input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
|
||
|
||
|
||
|
||
|
||
|
||
<label class="md-nav__link md-nav__link--active" for="__toc">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Kmer index
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
<span class="md-nav__icon md-icon"></span>
|
||
</label>
|
||
|
||
<a href="./" class="md-nav__link md-nav__link--active">
|
||
|
||
|
||
|
||
<span class="md-ellipsis">
|
||
|
||
|
||
Kmer index
|
||
|
||
|
||
|
||
</span>
|
||
|
||
|
||
|
||
</a>
|
||
|
||
|
||
|
||
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<label class="md-nav__title" for="__toc">
|
||
<span class="md-nav__icon md-icon"></span>
|
||
Table of contents
|
||
</label>
|
||
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#fundamental-invariant" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Fundamental invariant
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#three-level-hierarchy" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Three-level hierarchy
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#mphflayer-autonomous-mapping-layer" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
MphfLayer — autonomous mapping layer
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#datastore-slot-indexed-data" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
DataStore — slot-indexed data
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#distance-matrix-api-on-datastore-types" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Distance matrix API on DataStore types
|
||
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="Distance matrix API on DataStore types">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#full-distance-matrices" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Full distance matrices
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#partial-distance-matrices" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Partial distance matrices
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#progressive-aggregation-principle" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Progressive aggregation principle
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#traits-obicompactvectraits" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Traits — obicompactvec::traits
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#layeredstores-obilayeredmap" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
LayeredStore<S> — obilayeredmap
|
||
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="LayeredStore<S> — obilayeredmap">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#normalised-metrics-two-pass-cascade" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Normalised metrics — two-pass cascade
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#parallelism-model" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Parallelism model
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#query-model" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Query model
|
||
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="Query model">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#point-query-kmer-optionitem" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Point query — kmer → Option<Item>
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#aggregation-result" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Aggregation — → Result
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#datastore-derivation" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
DataStore derivation
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#relationship-to-current-implementation" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Relationship to current implementation
|
||
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="Relationship to current implementation">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#what-is-implemented" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
What is implemented
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#what-is-not-yet-implemented" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
What is not yet implemented
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#planned-refactoring" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Planned refactoring
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
|
||
|
||
</ul>
|
||
</nav>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
|
||
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
|
||
<div class="md-sidebar__scrollwrap">
|
||
<div class="md-sidebar__inner">
|
||
|
||
|
||
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
|
||
|
||
|
||
|
||
|
||
|
||
|
||
<label class="md-nav__title" for="__toc">
|
||
<span class="md-nav__icon md-icon"></span>
|
||
Table of contents
|
||
</label>
|
||
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#fundamental-invariant" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Fundamental invariant
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#three-level-hierarchy" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Three-level hierarchy
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#mphflayer-autonomous-mapping-layer" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
MphfLayer — autonomous mapping layer
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#datastore-slot-indexed-data" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
DataStore — slot-indexed data
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#distance-matrix-api-on-datastore-types" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Distance matrix API on DataStore types
|
||
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="Distance matrix API on DataStore types">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#full-distance-matrices" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Full distance matrices
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#partial-distance-matrices" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Partial distance matrices
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#progressive-aggregation-principle" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Progressive aggregation principle
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#traits-obicompactvectraits" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Traits — obicompactvec::traits
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#layeredstores-obilayeredmap" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
LayeredStore<S> — obilayeredmap
|
||
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="LayeredStore<S> — obilayeredmap">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#normalised-metrics-two-pass-cascade" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Normalised metrics — two-pass cascade
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#parallelism-model" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Parallelism model
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#query-model" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Query model
|
||
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="Query model">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#point-query-kmer-optionitem" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Point query — kmer → Option<Item>
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#aggregation-result" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Aggregation — → Result
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#datastore-derivation" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
DataStore derivation
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#relationship-to-current-implementation" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Relationship to current implementation
|
||
|
||
</span>
|
||
</a>
|
||
|
||
<nav class="md-nav" aria-label="Relationship to current implementation">
|
||
<ul class="md-nav__list">
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#what-is-implemented" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
What is implemented
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#what-is-not-yet-implemented" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
What is not yet implemented
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
<li class="md-nav__item">
|
||
<a href="#planned-refactoring" class="md-nav__link">
|
||
<span class="md-ellipsis">
|
||
|
||
Planned refactoring
|
||
|
||
</span>
|
||
</a>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
</nav>
|
||
|
||
</li>
|
||
|
||
</ul>
|
||
|
||
</nav>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
|
||
|
||
<div class="md-content" data-md-component="content">
|
||
|
||
<article class="md-content__inner md-typeset">
|
||
|
||
|
||
|
||
|
||
|
||
<h1 id="kmer-index-architecture">Kmer index architecture</h1>
|
||
<h2 id="fundamental-invariant">Fundamental invariant</h2>
|
||
<p>A given canonical kmer belongs to <strong>exactly one partition</strong> and <strong>exactly one layer</strong> within that partition. This is the property that makes all aggregation operations decomposable and parallelisable without coordination.</p>
|
||
<hr />
|
||
<h2 id="three-level-hierarchy">Three-level hierarchy</h2>
|
||
<div class="highlight"><pre><span></span><code>PartitionedIndex
|
||
├── LayeredPartition (one per minimiser bucket)
|
||
│ ├── MphfLayer 0 kmer → slot (immutable bijection)
|
||
│ │ ├── DataStore A slot → T (e.g. counts)
|
||
│ │ └── DataStore B slot → T (e.g. presence/absence, derived)
|
||
│ ├── MphfLayer 1
|
||
│ │ └── DataStore A
|
||
│ └── ...
|
||
├── LayeredPartition
|
||
│ └── ...
|
||
</code></pre></div>
|
||
<p><strong>PartitionedIndex</strong>: routes queries to partitions via canonical minimiser hash. Owns the partition count and routing scheme (fixed at creation). Dispatches aggregations across partitions in parallel.</p>
|
||
<p><strong>LayeredPartition</strong>: one directory per minimiser bucket. Holds a <code>Vec<MphfLayer></code>. Each layer covers a disjoint kmer set — layer 0 is built from dataset A; layer 1 covers kmers in B absent from layer 0; and so on. Layers within a partition are always disjoint.</p>
|
||
<p><strong>MphfLayer</strong>: the MPHF + evidence + unitig spine. Maps <code>kmer → slot</code> for its disjoint kmer set. Immutable once built. Independent of any data attached to it.</p>
|
||
<p><strong>DataStore</strong>: a slot-indexed data array (e.g. <code>PersistentCompactIntMatrix</code>, <code>PersistentBitMatrix</code>). Attached to a <code>MphfLayer</code> externally. Multiple stores of different types can coexist on the same <code>MphfLayer</code>.</p>
|
||
<hr />
|
||
<h2 id="mphflayer-autonomous-mapping-layer">MphfLayer — autonomous mapping layer</h2>
|
||
<div class="highlight"><pre><span></span><code><span class="n">MphfLayer</span><span class="p">::</span><span class="n">find</span><span class="p">(</span><span class="n">kmer</span><span class="p">:</span><span class="w"> </span><span class="nc">CanonicalKmer</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nb">Option</span><span class="o"><</span><span class="kt">usize</span><span class="o">></span><span class="w"> </span><span class="c1">// slot, or None if absent</span>
|
||
<span class="n">MphfLayer</span><span class="p">::</span><span class="n">n</span><span class="p">()</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="kt">usize</span><span class="w"> </span><span class="c1">// number of slots</span>
|
||
<span class="n">MphfLayer</span><span class="p">::</span><span class="n">build</span><span class="p">(</span><span class="n">dir</span><span class="p">:</span><span class="w"> </span><span class="kp">&</span><span class="nc">Path</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">OLMResult</span><span class="o"><</span><span class="p">(</span><span class="bp">Self</span><span class="p">,</span><span class="w"> </span><span class="kt">usize</span><span class="p">)</span><span class="o">></span><span class="w"> </span><span class="c1">// from unitigs.bin</span>
|
||
<span class="n">MphfLayer</span><span class="p">::</span><span class="n">open</span><span class="p">(</span><span class="n">dir</span><span class="p">:</span><span class="w"> </span><span class="kp">&</span><span class="nc">Path</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">OLMResult</span><span class="o"><</span><span class="bp">Self</span><span class="o">></span>
|
||
</code></pre></div>
|
||
<p><code>find</code> returns <code>Some(slot)</code> only if the kmer is actually in this layer (evidence check included). Returns <code>None</code> for kmers present in other layers or absent from the index.</p>
|
||
<p>The MPHF (<code>mphf.bin</code>, <code>evidence.bin</code>, <code>unitigs.bin</code>) is built once and never rebuilt. All data derivation operations (count → presence, thresholding, merging) reuse the same <code>MphfLayer</code>.</p>
|
||
<hr />
|
||
<h2 id="datastore-slot-indexed-data">DataStore — slot-indexed data</h2>
|
||
<div class="highlight"><pre><span></span><code><span class="k">trait</span><span class="w"> </span><span class="n">DataStore</span><span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="k">type</span><span class="w"> </span><span class="nc">Item</span><span class="p">;</span>
|
||
<span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">get</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">slot</span><span class="p">:</span><span class="w"> </span><span class="kt">usize</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Self</span><span class="p">::</span><span class="n">Item</span><span class="p">;</span>
|
||
<span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">n</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="kt">usize</span><span class="p">;</span>
|
||
<span class="p">}</span>
|
||
</code></pre></div>
|
||
<p>Concrete types from <code>obicompactvec</code>:</p>
|
||
<table>
|
||
<thead>
|
||
<tr>
|
||
<th>Type</th>
|
||
<th><code>Item</code></th>
|
||
<th>Column stats</th>
|
||
<th>Use</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr>
|
||
<td><code>PersistentCompactIntMatrix</code></td>
|
||
<td><code>Box<[u32]></code></td>
|
||
<td><code>sum() -> Array1<u64></code></td>
|
||
<td>count per sample per slot</td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>PersistentBitMatrix</code></td>
|
||
<td><code>Box<[bool]></code></td>
|
||
<td><code>count_ones() -> Array1<u64></code></td>
|
||
<td>presence per sample per slot</td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
<p><code>sum()</code> and <code>count_ones()</code> are the bridge between the per-matrix level and cross-layer aggregation: they give the total weight of each column within one (partition, layer) pair, which can be summed to get global column weights.</p>
|
||
<p>A <code>DataStore</code> knows nothing about kmers or MPHFs. It is indexed by <code>usize</code> slot only.</p>
|
||
<hr />
|
||
<h2 id="distance-matrix-api-on-datastore-types">Distance matrix API on DataStore types</h2>
|
||
<p>Both <code>PersistentCompactIntMatrix</code> and <code>PersistentBitMatrix</code> expose two families of distance matrix methods.</p>
|
||
<h3 id="full-distance-matrices">Full distance matrices</h3>
|
||
<p>Compute the final <code>n_cols × n_cols</code> distance matrix from data within a single matrix. Internally parallelised over the upper triangle via rayon.</p>
|
||
<div class="highlight"><pre><span></span><code><span class="c1">// PersistentCompactIntMatrix</span>
|
||
<span class="k">fn</span><span class="w"> </span><span class="nf">bray_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span>
|
||
<span class="k">fn</span><span class="w"> </span><span class="nf">relfreq_bray_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span>
|
||
<span class="k">fn</span><span class="w"> </span><span class="nf">euclidean_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span>
|
||
<span class="k">fn</span><span class="w"> </span><span class="nf">relfreq_euclidean_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span>
|
||
<span class="k">fn</span><span class="w"> </span><span class="nf">hellinger_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span>
|
||
<span class="k">fn</span><span class="w"> </span><span class="nf">jaccard_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span>
|
||
<span class="k">fn</span><span class="w"> </span><span class="nf">threshold_jaccard_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">threshold</span><span class="p">:</span><span class="w"> </span><span class="kt">u32</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span>
|
||
|
||
<span class="c1">// PersistentBitMatrix</span>
|
||
<span class="k">fn</span><span class="w"> </span><span class="nf">jaccard_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span>
|
||
<span class="k">fn</span><span class="w"> </span><span class="nf">hamming_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span>
|
||
</code></pre></div>
|
||
<p>These are convenience methods. For a <code>LayeredDataStore</code> or <code>PartitionedDataStore</code> they cannot be used directly — the partial API is required.</p>
|
||
<h3 id="partial-distance-matrices">Partial distance matrices</h3>
|
||
<p>Return additive components that can be summed element-wise across (partition, layer) pairs before computing the final distance. This is what makes cross-layer and cross-partition aggregation possible.</p>
|
||
<p><strong>Category 1 — self-contained partials</strong>: additive without any external parameter.</p>
|
||
<div class="highlight"><pre><span></span><code><span class="c1">// PersistentCompactIntMatrix</span>
|
||
<span class="k">fn</span><span class="w"> </span><span class="nf">partial_bray_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span>
|
||
<span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="p">(</span><span class="n">Array2</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">,</span><span class="w"> </span><span class="c1">// sum_min[i,j]</span>
|
||
<span class="w"> </span><span class="n">Array1</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">)</span><span class="w"> </span><span class="c1">// col_sums[k]</span>
|
||
|
||
<span class="k">fn</span><span class="w"> </span><span class="nf">partial_euclidean_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span><span class="w"> </span><span class="c1">// sum of squared diffs</span>
|
||
<span class="k">fn</span><span class="w"> </span><span class="nf">partial_threshold_jaccard_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">threshold</span><span class="p">:</span><span class="w"> </span><span class="kt">u32</span><span class="p">)</span>
|
||
<span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="p">(</span><span class="n">Array2</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">,</span><span class="w"> </span><span class="c1">// inter[i,j]</span>
|
||
<span class="w"> </span><span class="n">Array2</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">)</span><span class="w"> </span><span class="c1">// union[i,j]</span>
|
||
|
||
<span class="c1">// PersistentBitMatrix</span>
|
||
<span class="k">fn</span><span class="w"> </span><span class="nf">partial_jaccard_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span>
|
||
<span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="p">(</span><span class="n">Array2</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">,</span><span class="w"> </span><span class="c1">// inter[i,j]</span>
|
||
<span class="w"> </span><span class="n">Array2</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">)</span><span class="w"> </span><span class="c1">// union[i,j]</span>
|
||
<span class="k">fn</span><span class="w"> </span><span class="nf">partial_hamming_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="w"> </span><span class="c1">// differing bits</span>
|
||
</code></pre></div>
|
||
<p><strong>Category 2 — normalised partials</strong>: require global column sums as input, computed beforehand across all (partition, layer) pairs.</p>
|
||
<div class="highlight"><pre><span></span><code><span class="c1">// PersistentCompactIntMatrix only</span>
|
||
<span class="k">fn</span><span class="w"> </span><span class="nf">partial_relfreq_bray_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">col_sums</span><span class="p">:</span><span class="w"> </span><span class="kp">&</span><span class="nc">Array1</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">)</span>
|
||
<span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span><span class="w"> </span><span class="c1">// Σ_slot min(a_slot/sum_i, b_slot/sum_j)</span>
|
||
|
||
<span class="k">fn</span><span class="w"> </span><span class="nf">partial_relfreq_euclidean_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">col_sums</span><span class="p">:</span><span class="w"> </span><span class="kp">&</span><span class="nc">Array1</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">)</span>
|
||
<span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span><span class="w"> </span><span class="c1">// Σ_slot (a_slot/sum_i - b_slot/sum_j)²</span>
|
||
|
||
<span class="k">fn</span><span class="w"> </span><span class="nf">partial_hellinger_euclidean_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">col_sums</span><span class="p">:</span><span class="w"> </span><span class="kp">&</span><span class="nc">Array1</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">)</span>
|
||
<span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span><span class="w"> </span><span class="c1">// Σ_slot (√(a/sum_i) - √(b/sum_j))²</span>
|
||
</code></pre></div>
|
||
<p>The <code>col_sums</code> parameter must reflect the GLOBAL count across all layers and all partitions — passing a per-layer sum would give a wrong result. This constraint drives the two-pass algorithm described below.</p>
|
||
<hr />
|
||
<h2 id="progressive-aggregation-principle">Progressive aggregation principle</h2>
|
||
<p>Aggregation is <strong>hierarchical</strong>: each level computes its contribution by aggregating from the level immediately below it. No level skips a level or collects raw data from two levels down.</p>
|
||
<div class="highlight"><pre><span></span><code>PersistentCompactIntMatrix::col_weights() — column sums for one (partition, layer) matrix
|
||
↓ Σ across layers
|
||
LayeredStore<PersistentCompactIntMatrix>::col_weights() — column sums for one partition
|
||
↓ Σ across partitions
|
||
LayeredStore<LayeredStore<…>>::col_weights() — global column sums
|
||
</code></pre></div>
|
||
<p>The same cascade applies to every partial:</p>
|
||
<div class="highlight"><pre><span></span><code>PersistentCompactIntMatrix::partial_bray() — one (partition, layer)
|
||
↓ element-wise Σ across layers
|
||
LayeredStore<PersistentCompactIntMatrix>::partial_bray() — one partition
|
||
↓ element-wise Σ across partitions
|
||
LayeredStore<LayeredStore<…>>::partial_bray() — global partial → final dist
|
||
</code></pre></div>
|
||
<p>Each level presents a stable trait surface to the level above; no level reaches two levels down.</p>
|
||
<hr />
|
||
<h2 id="traits-obicompactvectraits">Traits — <code>obicompactvec::traits</code></h2>
|
||
<p>Three traits unify the aggregation API across all levels of the hierarchy.</p>
|
||
<div class="highlight"><pre><span></span><code><span class="k">trait</span><span class="w"> </span><span class="n">ColumnWeights</span><span class="p">:</span><span class="w"> </span><span class="nb">Send</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="nb">Sync</span><span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">col_weights</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array1</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">;</span>
|
||
<span class="p">}</span>
|
||
|
||
<span class="k">trait</span><span class="w"> </span><span class="n">CountPartials</span><span class="p">:</span><span class="w"> </span><span class="nc">ColumnWeights</span><span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="c1">// self-contained partials (additive, no parameter)</span>
|
||
<span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">partial_bray</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">;</span>
|
||
<span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">partial_euclidean</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span><span class="p">;</span>
|
||
<span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">partial_threshold_jaccard</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">threshold</span><span class="p">:</span><span class="w"> </span><span class="kt">u32</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="p">(</span><span class="n">Array2</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">,</span><span class="w"> </span><span class="n">Array2</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">);</span>
|
||
<span class="w"> </span><span class="c1">// normalised partials (global col_weights passed in cascade)</span>
|
||
<span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">partial_relfreq_bray</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">global</span><span class="p">:</span><span class="w"> </span><span class="kp">&</span><span class="nc">Array1</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span><span class="p">;</span>
|
||
<span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">partial_relfreq_euclidean</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">global</span><span class="p">:</span><span class="w"> </span><span class="kp">&</span><span class="nc">Array1</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span><span class="p">;</span>
|
||
<span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">partial_hellinger</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">global</span><span class="p">:</span><span class="w"> </span><span class="kp">&</span><span class="nc">Array1</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span><span class="p">;</span>
|
||
<span class="w"> </span><span class="c1">// provided finalisation methods (default implementations)</span>
|
||
<span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">bray_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="err">…</span><span class="w"> </span><span class="p">}</span>
|
||
<span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">euclidean_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="err">…</span><span class="w"> </span><span class="p">}</span>
|
||
<span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">threshold_jaccard_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">threshold</span><span class="p">:</span><span class="w"> </span><span class="kt">u32</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="err">…</span><span class="w"> </span><span class="p">}</span>
|
||
<span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">relfreq_bray_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="err">…</span><span class="w"> </span><span class="p">}</span>
|
||
<span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">relfreq_euclidean_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="err">…</span><span class="w"> </span><span class="p">}</span>
|
||
<span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">hellinger_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="err">…</span><span class="w"> </span><span class="p">}</span>
|
||
<span class="p">}</span>
|
||
|
||
<span class="k">trait</span><span class="w"> </span><span class="n">BitPartials</span><span class="p">:</span><span class="w"> </span><span class="nc">ColumnWeights</span><span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">partial_jaccard</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="p">(</span><span class="n">Array2</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">,</span><span class="w"> </span><span class="n">Array2</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">);</span>
|
||
<span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">partial_hamming</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="p">;</span>
|
||
<span class="w"> </span><span class="c1">// provided</span>
|
||
<span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">jaccard_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="err">…</span><span class="w"> </span><span class="p">}</span>
|
||
<span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="nf">hamming_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">u64</span><span class="o">></span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="err">…</span><span class="w"> </span><span class="p">}</span>
|
||
<span class="p">}</span>
|
||
</code></pre></div>
|
||
<p><strong>Leaf implementors</strong> (in <code>obicompactvec</code>):</p>
|
||
<table>
|
||
<thead>
|
||
<tr>
|
||
<th>Type</th>
|
||
<th>Traits</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr>
|
||
<td><code>PersistentCompactIntMatrix</code></td>
|
||
<td><code>ColumnWeights</code> (via <code>sum()</code>), <code>CountPartials</code></td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>PersistentBitMatrix</code></td>
|
||
<td><code>ColumnWeights</code> (via <code>count_ones()</code>), <code>BitPartials</code></td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
<p><code>PersistentCompactIntVec</code> and <code>PersistentBitVec</code> do <strong>not</strong> implement these traits — they are single-column primitives, not matrix-level aggregators.</p>
|
||
<hr />
|
||
<h2 id="layeredstores-obilayeredmap"><code>LayeredStore<S></code> — <code>obilayeredmap</code></h2>
|
||
<p>A single generic wrapper replaces the need for named <code>LayeredDataStore</code> and <code>PartitionedDataStore</code> types:</p>
|
||
<div class="highlight"><pre><span></span><code><span class="k">pub</span><span class="w"> </span><span class="k">struct</span><span class="w"> </span><span class="nc">LayeredStore</span><span class="o"><</span><span class="n">S</span><span class="o">></span><span class="p">(</span><span class="nb">Vec</span><span class="o"><</span><span class="n">S</span><span class="o">></span><span class="p">);</span>
|
||
</code></pre></div>
|
||
<p>Three blanket impls propagate the traits up the hierarchy:</p>
|
||
<div class="highlight"><pre><span></span><code><span class="k">impl</span><span class="o"><</span><span class="n">S</span><span class="p">:</span><span class="w"> </span><span class="nc">ColumnWeights</span><span class="o">></span><span class="w"> </span><span class="n">ColumnWeights</span><span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="n">LayeredStore</span><span class="o"><</span><span class="n">S</span><span class="o">></span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="err">…</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="c1">// Σ across inner stores</span>
|
||
<span class="k">impl</span><span class="o"><</span><span class="n">S</span><span class="p">:</span><span class="w"> </span><span class="nc">CountPartials</span><span class="o">></span><span class="w"> </span><span class="n">CountPartials</span><span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="n">LayeredStore</span><span class="o"><</span><span class="n">S</span><span class="o">></span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="err">…</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="c1">// same pattern</span>
|
||
<span class="k">impl</span><span class="o"><</span><span class="n">S</span><span class="p">:</span><span class="w"> </span><span class="nc">BitPartials</span><span class="o">></span><span class="w"> </span><span class="n">BitPartials</span><span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="n">LayeredStore</span><span class="o"><</span><span class="n">S</span><span class="o">></span><span class="w"> </span><span class="p">{</span><span class="w"> </span><span class="err">…</span><span class="w"> </span><span class="p">}</span><span class="w"> </span><span class="c1">// same pattern</span>
|
||
</code></pre></div>
|
||
<p>Because the blanket impl is recursive, <strong><code>LayeredStore<LayeredStore<S>></code></strong> automatically inherits all three traits when <code>S</code> does — no separate <code>PartitionedStore</code> type is needed:</p>
|
||
<div class="highlight"><pre><span></span><code>PersistentCompactIntMatrix implements CountPartials
|
||
LayeredStore<PersistentCompactIntMatrix> via blanket impl (= one partition)
|
||
LayeredStore<LayeredStore<…>> via blanket impl (= partitioned index)
|
||
</code></pre></div>
|
||
<h3 id="normalised-metrics-two-pass-cascade">Normalised metrics — two-pass cascade</h3>
|
||
<p>The normalised finalisation methods call <code>col_weights()</code> first (pass 1), then the normalised partial (pass 2). Both calls go through the same blanket impl, so the cascade is automatic:</p>
|
||
<div class="highlight"><pre><span></span><code><span class="c1">// called on LayeredStore<LayeredStore<PersistentCompactIntMatrix>></span>
|
||
<span class="k">fn</span><span class="w"> </span><span class="nf">relfreq_bray_dist_matrix</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">)</span><span class="w"> </span><span class="p">-></span><span class="w"> </span><span class="nc">Array2</span><span class="o"><</span><span class="kt">f64</span><span class="o">></span><span class="w"> </span><span class="p">{</span>
|
||
<span class="w"> </span><span class="kd">let</span><span class="w"> </span><span class="n">global</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="bp">self</span><span class="p">.</span><span class="n">col_weights</span><span class="p">();</span><span class="w"> </span><span class="c1">// pass 1 — progressive sum at every level</span>
|
||
<span class="w"> </span><span class="kd">let</span><span class="w"> </span><span class="n">p</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="bp">self</span><span class="p">.</span><span class="n">partial_relfreq_bray</span><span class="p">(</span><span class="o">&</span><span class="n">global</span><span class="p">);</span><span class="w"> </span><span class="c1">// pass 2 — global passed in cascade</span>
|
||
<span class="w"> </span><span class="n">p</span><span class="p">.</span><span class="n">mapv</span><span class="p">(</span><span class="o">|</span><span class="n">v</span><span class="o">|</span><span class="w"> </span><span class="mf">1.0</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="n">v</span><span class="p">)</span><span class="w"> </span><span class="c1">// finalise (diagonal zeroed separately)</span>
|
||
<span class="p">}</span>
|
||
</code></pre></div>
|
||
<p><code>global</code> is exact: each kmer belongs to exactly one <code>(partition, layer)</code> pair, so there is no double-counting across the hierarchy.</p>
|
||
<hr />
|
||
<h2 id="parallelism-model">Parallelism model</h2>
|
||
<table>
|
||
<thead>
|
||
<tr>
|
||
<th>Level</th>
|
||
<th>Unit</th>
|
||
<th>Coordination</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr>
|
||
<td>Across partitions</td>
|
||
<td><code>LayeredStore<LayeredStore<S>></code> inner stores</td>
|
||
<td>none — fully independent</td>
|
||
</tr>
|
||
<tr>
|
||
<td>Across layers within a partition</td>
|
||
<td><code>LayeredStore<S></code> inner stores</td>
|
||
<td>none — disjoint kmer sets</td>
|
||
</tr>
|
||
<tr>
|
||
<td>Normalised pass 1 (<code>col_weights</code>)</td>
|
||
<td>per inner store</td>
|
||
<td>none — additive</td>
|
||
</tr>
|
||
<tr>
|
||
<td>Normalised pass 2 (partial)</td>
|
||
<td>per inner store</td>
|
||
<td><code>global</code> broadcast read-only</td>
|
||
</tr>
|
||
<tr>
|
||
<td>Within a matrix (distance)</td>
|
||
<td>upper-triangle pair <code>(i,j)</code></td>
|
||
<td>none — rayon <code>par_iter</code></td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
<p>All levels use rayon <code>par_iter</code> internally; <code>reduce_with</code> performs a parallel tree reduction.</p>
|
||
<hr />
|
||
<h2 id="query-model">Query model</h2>
|
||
<h3 id="point-query-kmer-optionitem">Point query — <code>kmer → Option<Item></code></h3>
|
||
<div class="highlight"><pre><span></span><code>minimiser(kmer) → partition p
|
||
for each layer l in p:
|
||
slot = MphfLayer_l.find(kmer)
|
||
if slot is Some:
|
||
return DataStore_l.get(slot)
|
||
return None
|
||
</code></pre></div>
|
||
<p>O(n_layers) MPHF probes worst case; O(1) expected. No cross-layer fusion — the result comes from exactly one (partition, layer).</p>
|
||
<h3 id="aggregation-result">Aggregation — <code>→ Result</code></h3>
|
||
<div class="highlight"><pre><span></span><code>result = reduce(
|
||
for p in partitions: // parallel
|
||
for l in layers(p): // parallel
|
||
partial(DataStore_p_l)
|
||
)
|
||
</code></pre></div>
|
||
<p>For normalised metrics replace with the two-pass scheme above.</p>
|
||
<hr />
|
||
<h2 id="datastore-derivation">DataStore derivation</h2>
|
||
<p>Because the <code>MphfLayer</code> is independent of its data stores, new stores can be derived from existing ones without rebuilding the MPHF:</p>
|
||
<div class="highlight"><pre><span></span><code>// count → presence/absence, parallel across (partition, layer)
|
||
for (p, l) in all_partition_layer_pairs().par_iter():
|
||
count_store = open PersistentCompactIntMatrix at (p, l)
|
||
presence_store = PersistentBitMatrix::from_count_matrix(count_store, threshold, dir)
|
||
</code></pre></div>
|
||
<p>Other derivations: threshold a count matrix → binary presence matrix; union two presence matrices; merge two count matrices (saturating add, column-wise). All are local to one <code>(partition, layer)</code> pair.</p>
|
||
<hr />
|
||
<h2 id="relationship-to-current-implementation">Relationship to current implementation</h2>
|
||
<h3 id="what-is-implemented">What is implemented</h3>
|
||
<ul>
|
||
<li><strong><code>obicompactvec::traits</code></strong>: <code>ColumnWeights</code>, <code>CountPartials</code>, <code>BitPartials</code> are defined and implemented on <code>PersistentCompactIntMatrix</code> and <code>PersistentBitMatrix</code>.</li>
|
||
<li><strong><code>obilayeredmap::LayeredStore<S></code></strong>: generic wrapper with blanket impls for all three traits. <code>LayeredStore<LayeredStore<S>></code> is the partitioned level — no separate type needed. Tests confirm that splitting data across layers and across partitions gives the same distance matrices as computing on flat combined data.</li>
|
||
</ul>
|
||
<h3 id="what-is-not-yet-implemented">What is not yet implemented</h3>
|
||
<ul>
|
||
<li><code>Layer<D: LayerData></code> still fuses <code>MphfLayer</code> and one <code>DataStore</code>. Multiple data stores on the same MPHF are not supported.</li>
|
||
<li><code>LayeredMap</code> is a single-partition structure without distance matrix API; it does not yet use <code>LayeredStore</code>.</li>
|
||
<li>No <code>PartitionedIndex</code> type for point queries with parallel partition dispatch.</li>
|
||
</ul>
|
||
<h3 id="planned-refactoring">Planned refactoring</h3>
|
||
<ol>
|
||
<li>Extract <code>MphfLayer</code> from <code>Layer<D></code> as an autonomous type.</li>
|
||
<li>Replace <code>LayerData</code> trait with the <code>DataStore</code> / <code>ColumnWeights</code> / <code>CountPartials</code> / <code>BitPartials</code> system.</li>
|
||
<li>Rewire <code>LayeredMap</code> to hold <code>LayeredStore<PersistentCompactIntMatrix></code> (or bit variant) alongside the MPHF layers.</li>
|
||
<li>Implement <code>PartitionedIndex</code> using <code>LayeredStore<LayeredStore<S>></code> for data and parallel dispatch for queries.</li>
|
||
</ol>
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
</article>
|
||
</div>
|
||
|
||
|
||
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
|
||
</div>
|
||
|
||
</main>
|
||
|
||
<footer class="md-footer">
|
||
|
||
<div class="md-footer-meta md-typeset">
|
||
<div class="md-footer-meta__inner md-grid">
|
||
<div class="md-copyright">
|
||
|
||
|
||
Made with
|
||
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
|
||
Material for MkDocs
|
||
</a>
|
||
|
||
</div>
|
||
|
||
</div>
|
||
</div>
|
||
</footer>
|
||
|
||
</div>
|
||
<div class="md-dialog" data-md-component="dialog">
|
||
<div class="md-dialog__inner md-typeset"></div>
|
||
</div>
|
||
|
||
|
||
|
||
|
||
|
||
<script id="__config" type="application/json">{"annotate": null, "base": "../..", "features": [], "search": "../../assets/javascripts/workers/search.2c215733.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
|
||
|
||
|
||
<script src="../../assets/javascripts/bundle.79ae519e.min.js"></script>
|
||
|
||
<script src="https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js"></script>
|
||
|
||
|
||
</body>
|
||
</html> |