Reorganization of the documentation book directory

Former-commit-id: 095acaf9c8649b0e527c6253dc79330feedac865
This commit is contained in:
2023-02-23 23:41:24 +01:00
parent 072b85e155
commit c94b2974fb
154 changed files with 1201200 additions and 5348 deletions

10
doc/Makefile Normal file
View File

@ -0,0 +1,10 @@
book:
make -C book all
man:
make -C man all
all: book man

Binary file not shown.

View File

@ -1,712 +0,0 @@
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
<meta charset="utf-8">
<meta name="generator" content="quarto-1.2.256">
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
<title>OBITools V4 - 4&nbsp; The OBITools V4 commands</title>
<style>
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
div.columns{display: flex; gap: min(4vw, 1.5em);}
div.column{flex: auto; overflow-x: auto;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
ul.task-list li input[type="checkbox"] {
width: 0.8em;
margin: 0 0.8em 0.2em -1.6em;
vertical-align: middle;
}
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
{ counter-reset: source-line 0; }
pre.numberSource code > span
{ position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
{ content: counter(source-line);
position: relative; left: -1em; text-align: right; vertical-align: baseline;
border: none; display: inline-block;
-webkit-touch-callout: none; -webkit-user-select: none;
-khtml-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;
padding: 0 4px; width: 4em;
color: #aaaaaa;
}
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; }
div.sourceCode
{ }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
code span.al { color: #ff0000; font-weight: bold; } /* Alert */
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
code span.at { color: #7d9029; } /* Attribute */
code span.bn { color: #40a070; } /* BaseN */
code span.bu { color: #008000; } /* BuiltIn */
code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
code span.ch { color: #4070a0; } /* Char */
code span.cn { color: #880000; } /* Constant */
code span.co { color: #60a0b0; font-style: italic; } /* Comment */
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
code span.do { color: #ba2121; font-style: italic; } /* Documentation */
code span.dt { color: #902000; } /* DataType */
code span.dv { color: #40a070; } /* DecVal */
code span.er { color: #ff0000; font-weight: bold; } /* Error */
code span.ex { } /* Extension */
code span.fl { color: #40a070; } /* Float */
code span.fu { color: #06287e; } /* Function */
code span.im { color: #008000; font-weight: bold; } /* Import */
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
code span.kw { color: #007020; font-weight: bold; } /* Keyword */
code span.op { color: #666666; } /* Operator */
code span.ot { color: #007020; } /* Other */
code span.pp { color: #bc7a00; } /* Preprocessor */
code span.sc { color: #4070a0; } /* SpecialChar */
code span.ss { color: #bb6688; } /* SpecialString */
code span.st { color: #4070a0; } /* String */
code span.va { color: #19177c; } /* Variable */
code span.vs { color: #4070a0; } /* VerbatimString */
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
div.csl-bib-body { }
div.csl-entry {
clear: both;
}
.hanging div.csl-entry {
margin-left:2em;
text-indent:-2em;
}
div.csl-left-margin {
min-width:2em;
float:left;
}
div.csl-right-inline {
margin-left:2em;
padding-left:1em;
}
div.csl-indent {
margin-left: 2em;
}
</style>
<script src="site_libs/quarto-nav/quarto-nav.js"></script>
<script src="site_libs/quarto-nav/headroom.min.js"></script>
<script src="site_libs/clipboard/clipboard.min.js"></script>
<script src="site_libs/quarto-search/autocomplete.umd.js"></script>
<script src="site_libs/quarto-search/fuse.min.js"></script>
<script src="site_libs/quarto-search/quarto-search.js"></script>
<meta name="quarto:offset" content="./">
<link href="./library.html" rel="next">
<link href="./tutorial.html" rel="prev">
<script src="site_libs/quarto-html/quarto.js"></script>
<script src="site_libs/quarto-html/popper.min.js"></script>
<script src="site_libs/quarto-html/tippy.umd.min.js"></script>
<script src="site_libs/quarto-html/anchor.min.js"></script>
<link href="site_libs/quarto-html/tippy.css" rel="stylesheet">
<link href="site_libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
<script src="site_libs/bootstrap/bootstrap.min.js"></script>
<link href="site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
<link href="site_libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
<script id="quarto-search-options" type="application/json">{
"location": "sidebar",
"copy-button": false,
"collapse-after": 3,
"panel-placement": "start",
"type": "textbox",
"limit": 20,
"language": {
"search-no-results-text": "No results",
"search-matching-documents-text": "matching documents",
"search-copy-link-title": "Copy link to search",
"search-hide-matches-text": "Hide additional matches",
"search-more-match-text": "more match in this document",
"search-more-matches-text": "more matches in this document",
"search-clear-button-title": "Clear",
"search-detached-cancel-button-title": "Cancel",
"search-submit-button-title": "Submit"
}
}</script>
<script src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml-full.js" type="text/javascript"></script>
</head>
<body class="nav-sidebar floating">
<div id="quarto-search-results"></div>
<header id="quarto-header" class="headroom fixed-top">
<nav class="quarto-secondary-nav" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
<div class="container-fluid d-flex justify-content-between">
<h1 class="quarto-secondary-nav-title"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">The <em>OBITools V4</em> commands</span></h1>
<button type="button" class="quarto-btn-toggle btn" aria-label="Show secondary navigation">
<i class="bi bi-chevron-right"></i>
</button>
</div>
</nav>
</header>
<!-- content -->
<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article">
<!-- sidebar -->
<nav id="quarto-sidebar" class="sidebar collapse sidebar-navigation floating overflow-auto">
<div class="pt-lg-2 mt-2 text-left sidebar-header">
<div class="sidebar-title mb-0 py-0">
<a href="./">OBITools V4</a>
</div>
</div>
<div class="mt-2 flex-shrink-0 align-items-center">
<div class="sidebar-search">
<div id="quarto-search" class="" title="Search"></div>
</div>
</div>
<div class="sidebar-menu-container">
<ul class="list-unstyled mt-1">
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./index.html" class="sidebar-item-text sidebar-link">Preface</a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./intro.html" class="sidebar-item-text sidebar-link"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">The OBITools</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./formats.html" class="sidebar-item-text sidebar-link"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">File formats usable with <em>OBITools</em></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./tutorial.html" class="sidebar-item-text sidebar-link"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">OBITools V4 Tutorial</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./commands.html" class="sidebar-item-text sidebar-link active"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">The <em>OBITools V4</em> commands</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./library.html" class="sidebar-item-text sidebar-link"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">The GO <em>OBITools</em> library</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./annexes.html" class="sidebar-item-text sidebar-link"><span class="chapter-number">6</span>&nbsp; <span class="chapter-title">Annexes</span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="./references.html" class="sidebar-item-text sidebar-link">References</a>
</div>
</li>
</ul>
</div>
</nav>
<!-- margin-sidebar -->
<div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
<nav id="TOC" role="doc-toc" class="toc-active">
<h2 id="toc-title">Table of contents</h2>
<ul>
<li><a href="#specifying-the-input-files-to-obitools-commands" id="toc-specifying-the-input-files-to-obitools-commands" class="nav-link active" data-scroll-target="#specifying-the-input-files-to-obitools-commands"><span class="toc-section-number">4.1</span> Specifying the input files to <em>OBITools</em> commands</a></li>
<li><a href="#options-common-to-most-of-the-obitools-commands" id="toc-options-common-to-most-of-the-obitools-commands" class="nav-link" data-scroll-target="#options-common-to-most-of-the-obitools-commands"><span class="toc-section-number">4.2</span> Options common to most of the <em>OBITools</em> commands</a>
<ul class="collapse">
<li><a href="#specifying-input-format" id="toc-specifying-input-format" class="nav-link" data-scroll-target="#specifying-input-format"><span class="toc-section-number">4.2.1</span> Specifying input format</a></li>
<li><a href="#specifying-output-format" id="toc-specifying-output-format" class="nav-link" data-scroll-target="#specifying-output-format"><span class="toc-section-number">4.2.2</span> Specifying output format</a></li>
<li><a href="#the-fasta-and-fastq-annotations-format" id="toc-the-fasta-and-fastq-annotations-format" class="nav-link" data-scroll-target="#the-fasta-and-fastq-annotations-format"><span class="toc-section-number">4.2.3</span> The Fasta and Fastq annotations format</a></li>
</ul></li>
<li><a href="#obitools-expression-language" id="toc-obitools-expression-language" class="nav-link" data-scroll-target="#obitools-expression-language"><span class="toc-section-number">4.3</span> OBITools expression language</a>
<ul class="collapse">
<li><a href="#variables-usable-in-the-expression" id="toc-variables-usable-in-the-expression" class="nav-link" data-scroll-target="#variables-usable-in-the-expression"><span class="toc-section-number">4.3.1</span> Variables usable in the expression</a></li>
<li><a href="#function-defined-in-the-language" id="toc-function-defined-in-the-language" class="nav-link" data-scroll-target="#function-defined-in-the-language"><span class="toc-section-number">4.3.2</span> Function defined in the language</a></li>
<li><a href="#accessing-to-the-sequence-annotations" id="toc-accessing-to-the-sequence-annotations" class="nav-link" data-scroll-target="#accessing-to-the-sequence-annotations"><span class="toc-section-number">4.3.3</span> Accessing to the sequence annotations</a></li>
</ul></li>
<li><a href="#metabarcode-design-and-quality-assessment" id="toc-metabarcode-design-and-quality-assessment" class="nav-link" data-scroll-target="#metabarcode-design-and-quality-assessment"><span class="toc-section-number">4.4</span> Metabarcode design and quality assessment</a>
<ul class="collapse">
<li><a href="#obipcr" id="toc-obipcr" class="nav-link" data-scroll-target="#obipcr"><span class="toc-section-number">4.4.1</span> <code>obipcr</code></a></li>
</ul></li>
<li><a href="#file-format-conversions" id="toc-file-format-conversions" class="nav-link" data-scroll-target="#file-format-conversions"><span class="toc-section-number">4.5</span> File format conversions</a>
<ul class="collapse">
<li><a href="#obiconvert" id="toc-obiconvert" class="nav-link" data-scroll-target="#obiconvert"><span class="toc-section-number">4.5.1</span> <code>obiconvert</code></a></li>
</ul></li>
<li><a href="#sequence-annotations" id="toc-sequence-annotations" class="nav-link" data-scroll-target="#sequence-annotations"><span class="toc-section-number">4.6</span> Sequence annotations</a>
<ul class="collapse">
<li><a href="#obiannotate" id="toc-obiannotate" class="nav-link" data-scroll-target="#obiannotate"><span class="toc-section-number">4.6.1</span> <code>obiannotate</code></a></li>
<li><a href="#obitag" id="toc-obitag" class="nav-link" data-scroll-target="#obitag"><span class="toc-section-number">4.6.2</span> <code>obitag</code></a></li>
</ul></li>
<li><a href="#computations-on-sequences" id="toc-computations-on-sequences" class="nav-link" data-scroll-target="#computations-on-sequences"><span class="toc-section-number">4.7</span> Computations on sequences</a>
<ul class="collapse">
<li><a href="#obipairing" id="toc-obipairing" class="nav-link" data-scroll-target="#obipairing"><span class="toc-section-number">4.7.1</span> <code>obipairing</code></a></li>
<li><a href="#obimultiplex" id="toc-obimultiplex" class="nav-link" data-scroll-target="#obimultiplex"><span class="toc-section-number">4.7.2</span> <code>obimultiplex</code></a></li>
<li><a href="#obicomplement" id="toc-obicomplement" class="nav-link" data-scroll-target="#obicomplement"><span class="toc-section-number">4.7.3</span> <code>obicomplement</code></a></li>
<li><a href="#obiclean" id="toc-obiclean" class="nav-link" data-scroll-target="#obiclean"><span class="toc-section-number">4.7.4</span> <code>obiclean</code></a></li>
<li><a href="#obiuniq" id="toc-obiuniq" class="nav-link" data-scroll-target="#obiuniq"><span class="toc-section-number">4.7.5</span> <code>obiuniq</code></a></li>
</ul></li>
<li><a href="#sequence-sampling-and-filtering" id="toc-sequence-sampling-and-filtering" class="nav-link" data-scroll-target="#sequence-sampling-and-filtering"><span class="toc-section-number">4.8</span> Sequence sampling and filtering</a>
<ul class="collapse">
<li><a href="#obigrep" id="toc-obigrep" class="nav-link" data-scroll-target="#obigrep"><span class="toc-section-number">4.8.1</span> <code>obigrep</code></a></li>
</ul></li>
<li><a href="#utilities" id="toc-utilities" class="nav-link" data-scroll-target="#utilities"><span class="toc-section-number">4.9</span> Utilities</a>
<ul class="collapse">
<li><a href="#obicount" id="toc-obicount" class="nav-link" data-scroll-target="#obicount"><span class="toc-section-number">4.9.1</span> <code>obicount</code></a></li>
<li><a href="#obidistribute" id="toc-obidistribute" class="nav-link" data-scroll-target="#obidistribute"><span class="toc-section-number">4.9.2</span> <code>obidistribute</code></a></li>
<li><a href="#obifind" id="toc-obifind" class="nav-link" data-scroll-target="#obifind"><span class="toc-section-number">4.9.3</span> <code>obifind</code></a></li>
</ul></li>
</ul>
</nav>
</div>
<!-- main -->
<main class="content" id="quarto-document-content">
<header id="title-block-header" class="quarto-title-block default">
<div class="quarto-title">
<h1 class="title d-none d-lg-block"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">The <em>OBITools V4</em> commands</span></h1>
</div>
<div class="quarto-title-meta">
</div>
</header>
<section id="specifying-the-input-files-to-obitools-commands" class="level2" data-number="4.1">
<h2 data-number="4.1" class="anchored" data-anchor-id="specifying-the-input-files-to-obitools-commands"><span class="header-section-number">4.1</span> Specifying the input files to <em>OBITools</em> commands</h2>
</section>
<section id="options-common-to-most-of-the-obitools-commands" class="level2" data-number="4.2">
<h2 data-number="4.2" class="anchored" data-anchor-id="options-common-to-most-of-the-obitools-commands"><span class="header-section-number">4.2</span> Options common to most of the <em>OBITools</em> commands</h2>
<section id="specifying-input-format" class="level3" data-number="4.2.1">
<h3 data-number="4.2.1" class="anchored" data-anchor-id="specifying-input-format"><span class="header-section-number">4.2.1</span> Specifying input format</h3>
<p>Five sequence formats are accepted for input files. <em>Fasta</em> (<a href="formats.html#sec-fasta"><span>Section&nbsp;2.1.2</span></a>) and <em>Fastq</em> (<a href="formats.html#sec-fastq"><span>Section&nbsp;2.1.3</span></a>) are the main ones, EMBL and Genbank allow the use of flat files produced by these two international databases. The last one, ecoPCR, is maintained for compatibility with previous <em>OBITools</em> and allows to read <em>ecoPCR</em> outputs as sequence files.</p>
<ul>
<li><code>--ecopcr</code> : Read data following the <em>ecoPCR</em> output format.</li>
<li><code>--embl</code> Read data following the <em>EMBL</em> flatfile format.</li>
<li><code>--genbank</code> Read data following the <em>Genbank</em> flatfile format.</li>
</ul>
<p>Several encoding schemes have been proposed for quality scores in <em>Fastq</em> format. Currently, <em>OBITools</em> considers Sanger encoding as the standard. For reasons of compatibility with older datasets produced with <em>Solexa</em> sequencers, it is possible, by using the following option, to force the use of the corresponding quality encoding scheme when reading these older files.</p>
<ul>
<li><code>--solexa</code> Decodes quality string according to the Solexa specification. (default: false)</li>
</ul>
</section>
<section id="specifying-output-format" class="level3" data-number="4.2.2">
<h3 data-number="4.2.2" class="anchored" data-anchor-id="specifying-output-format"><span class="header-section-number">4.2.2</span> Specifying output format</h3>
<p>Only two output sequence formats are supported by OBITools, Fasta and Fastq. Fastq is used when output sequences are associated with quality information. Otherwise, Fasta is the default format. However, it is possible to force the output format by using one of the following two options. Forcing the use of Fasta results in the loss of quality information. Conversely, when the Fastq format is forced with sequences that have no quality data, dummy qualities set to 40 for each nucleotide are added.</p>
<ul>
<li><code>--fasta-output</code> Read data following the ecoPCR output format.</li>
<li><code>--fastq-output</code> Read data following the EMBL flatfile format.</li>
</ul>
<p>OBITools allows multiple input files to be specified for a single command.</p>
<ul>
<li><code>--no-order</code> When several input files are provided, indicates that there is no order among them. (default: false). Using such option can increase a lot the processing of the data.</li>
</ul>
</section>
<section id="the-fasta-and-fastq-annotations-format" class="level3" data-number="4.2.3">
<h3 data-number="4.2.3" class="anchored" data-anchor-id="the-fasta-and-fastq-annotations-format"><span class="header-section-number">4.2.3</span> The Fasta and Fastq annotations format</h3>
<p>OBITools extend the <a href="#the-fasta-sequence-format">Fasta</a> and <a href="#the-fastq-sequence-format">Fastq</a> formats by introducing a format for the title lines of these formats allowing to annotate every sequence. While the previous version of OBITools used an <em>ad-hoc</em> format for these annotation, this new version introduce the usage of the standard JSON format to store them.</p>
<p>On input, OBITools automatically recognize the format of the annotations, but two options allows to force the parsing following one of them. You should normally not need to use these options.</p>
<ul>
<li><p><code>--input-OBI-header</code> FASTA/FASTQ title line annotations follow OBI format. (default: false)</p></li>
<li><p><code>--input-json-header</code> FASTA/FASTQ title line annotations follow json format. (default: false)</p></li>
</ul>
<p>On output, by default annotation are formatted using the new JSON format. For compatibility with previous version of OBITools and with external scripts and software, it is possible to force the usage of the previous OBITools format.</p>
<ul>
<li><p><code>--output-OBI-header|-O</code> output FASTA/FASTQ title line annotations follow OBI format. (default: false)</p></li>
<li><p><code>--output-json-header</code> output FASTA/FASTQ title line annotations follow json format. (default: false)</p></li>
</ul>
<section id="system-related-options" class="level4" data-number="4.2.3.1">
<h4 data-number="4.2.3.1" class="anchored" data-anchor-id="system-related-options"><span class="header-section-number">4.2.3.1</span> System related options</h4>
<ul>
<li><code>--debug</code> (default: false)</li>
<li><code>--help\|-h\|-?</code> (default: false)</li>
<li><code>--max-cpu &lt;int&gt;</code> Number of parallele threads computing the result (default: 10)</li>
<li><code>--workers\|-w &lt;int&gt;</code> Number of parallele threads computing the result (default: 9)</li>
</ul>
</section>
</section>
</section>
<section id="obitools-expression-language" class="level2" data-number="4.3">
<h2 data-number="4.3" class="anchored" data-anchor-id="obitools-expression-language"><span class="header-section-number">4.3</span> OBITools expression language</h2>
<p>Several OBITools (<em>e.g.</em> obigrep, obiannotate) allow the user to specify some simple expressions to compute values or define predicates. This expressions are parsed and evaluated using the <a href="https://pkg.go.dev/github.com/PaesslerAG/gval" title="Gval (Go eVALuate) for evaluating arbitrary expressions Go-like expressions.">gval</a> go package, which allows for evaluating go-Like expression.</p>
<section id="variables-usable-in-the-expression" class="level3" data-number="4.3.1">
<h3 data-number="4.3.1" class="anchored" data-anchor-id="variables-usable-in-the-expression"><span class="header-section-number">4.3.1</span> Variables usable in the expression</h3>
<ul>
<li><code>sequence</code> is the sequence object on which the expression is evaluated.</li>
<li><code>annotations</code>is a map object containing every annotations associated to the currently processed sequence.</li>
</ul>
</section>
<section id="function-defined-in-the-language" class="level3" data-number="4.3.2">
<h3 data-number="4.3.2" class="anchored" data-anchor-id="function-defined-in-the-language"><span class="header-section-number">4.3.2</span> Function defined in the language</h3>
<section id="instrospection-functions" class="level4 unnumbered">
<h4 class="unnumbered anchored" data-anchor-id="instrospection-functions">Instrospection functions</h4>
<ul>
<li><code>len(x)</code>is a generic function allowing to retreive the size of a object. It returns the length of a sequences, the number of element in a map like <code>annotations</code>, the number of elements in an array. The reurned value is an <code>int</code>.</li>
</ul>
</section>
<section id="cast-functions" class="level4 unnumbered">
<h4 class="unnumbered anchored" data-anchor-id="cast-functions">Cast functions</h4>
<ul>
<li><code>int(x)</code> converts if possible the <code>x</code> value to an integer value. The function returns an <code>int</code>.</li>
<li><code>numeric(x)</code> converts if possible the <code>x</code> value to a float value. The function returns a <code>float</code>.</li>
<li><code>bool(x)</code> converts if possible the <code>x</code> value to a boolean value. The function returns a <code>bool</code>.</li>
</ul>
</section>
<section id="string-related-functions" class="level4 unnumbered">
<h4 class="unnumbered anchored" data-anchor-id="string-related-functions">String related functions</h4>
<ul>
<li><code>printf(format,...)</code> allows to combine several values to build a string. <code>format</code> follows the classical C <code>printf</code> syntax. The function returns a <code>string</code>.</li>
<li><code>subspc(x)</code> substitutes every space in the <code>x</code> string by the underscore (<code>_</code>) character. The function returns a <code>string</code>.</li>
</ul>
</section>
</section>
<section id="accessing-to-the-sequence-annotations" class="level3" data-number="4.3.3">
<h3 data-number="4.3.3" class="anchored" data-anchor-id="accessing-to-the-sequence-annotations"><span class="header-section-number">4.3.3</span> Accessing to the sequence annotations</h3>
<p>The <code>annotations</code> variable is a map object containing all the annotations associated to the currently processed sequence. Index of the map are the attribute names. It exists to possibillities to retreive an annotation. It is possible to use the classical <code>[]</code> indexing operator, putting the attribute name quoted by double quotes between them.</p>
<div class="sourceCode" id="cb1"><pre class="sourceCode go code-with-copy"><code class="sourceCode go"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a>annotations<span class="op">[</span><span class="st">"direction"</span><span class="op">]</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>The above code retreives the <code>direction</code> annotation. A second notation using the dot (<code>.</code>) is often more convenient.</p>
<div class="sourceCode" id="cb2"><pre class="sourceCode go code-with-copy"><code class="sourceCode go"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>annotations<span class="op">.</span>direction</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>Special attributes of the sequence are accessible only by dedicated methods of the <code>sequence</code> object.</p>
<ul>
<li>The sequence identifier : <code>Id()</code></li>
<li>THe sequence definition : <code>Definition()</code></li>
</ul>
</section>
</section>
<section id="metabarcode-design-and-quality-assessment" class="level2" data-number="4.4">
<h2 data-number="4.4" class="anchored" data-anchor-id="metabarcode-design-and-quality-assessment"><span class="header-section-number">4.4</span> Metabarcode design and quality assessment</h2>
<section id="obipcr" class="level3" data-number="4.4.1">
<h3 data-number="4.4.1" class="anchored" data-anchor-id="obipcr"><span class="header-section-number">4.4.1</span> <code>obipcr</code></h3>
<blockquote class="blockquote">
<p>Replace the <code>ecoPCR</code> original <em>OBITools</em></p>
</blockquote>
</section>
</section>
<section id="file-format-conversions" class="level2" data-number="4.5">
<h2 data-number="4.5" class="anchored" data-anchor-id="file-format-conversions"><span class="header-section-number">4.5</span> File format conversions</h2>
<section id="obiconvert" class="level3" data-number="4.5.1">
<h3 data-number="4.5.1" class="anchored" data-anchor-id="obiconvert"><span class="header-section-number">4.5.1</span> <code>obiconvert</code></h3>
</section>
</section>
<section id="sequence-annotations" class="level2" data-number="4.6">
<h2 data-number="4.6" class="anchored" data-anchor-id="sequence-annotations"><span class="header-section-number">4.6</span> Sequence annotations</h2>
<section id="obiannotate" class="level3" data-number="4.6.1">
<h3 data-number="4.6.1" class="anchored" data-anchor-id="obiannotate"><span class="header-section-number">4.6.1</span> <code>obiannotate</code></h3>
</section>
<section id="obitag" class="level3" data-number="4.6.2">
<h3 data-number="4.6.2" class="anchored" data-anchor-id="obitag"><span class="header-section-number">4.6.2</span> <code>obitag</code></h3>
</section>
</section>
<section id="computations-on-sequences" class="level2" data-number="4.7">
<h2 data-number="4.7" class="anchored" data-anchor-id="computations-on-sequences"><span class="header-section-number">4.7</span> Computations on sequences</h2>
<section id="obipairing" class="level3" data-number="4.7.1">
<h3 data-number="4.7.1" class="anchored" data-anchor-id="obipairing"><span class="header-section-number">4.7.1</span> <code>obipairing</code></h3>
<blockquote class="blockquote">
<p>Replace the <code>illuminapairedends</code> original <em>OBITools</em></p>
</blockquote>
<section id="alignment-procedure" class="level4 unnumbered">
<h4 class="unnumbered anchored" data-anchor-id="alignment-procedure">Alignment procedure</h4>
<p><code>obipairing</code> is introducing a new alignment algorithm compared to the <code>illuminapairedend</code> command of the <code>OBITools V2</code>. Nethertheless this new algorithm has been design to produce the same results than the previous, except in very few cases.</p>
<p>The new algorithm is a two-step procedure. First, a FASTN-type algorithm <span class="citation" data-cites="Lipman1985-hw">(<a href="references.html#ref-Lipman1985-hw" role="doc-biblioref">Lipman and Pearson 1985</a>)</span> identifies the best offset between the two matched readings. This identifies the region of overlap.</p>
<p>In the second step, the matching regions of the two reads are extracted along with a flanking sequence of <span class="math inline">\(\Delta\)</span> base pairs. The two subsequences are then aligned using a “one side free end-gap” dynamic programming algorithm. This latter step is only called if at least one mismatch is detected by the FASTP step.</p>
<p>Unless the similarity between the two reads at their overlap region is very low, the addition of the flanking regions in the second step of the alignment ensures the same alignment as if the dynamic programming alignment was performed on the full reads.</p>
</section>
<section id="the-scoring-system" class="level4 unnumbered">
<h4 class="unnumbered anchored" data-anchor-id="the-scoring-system">The scoring system</h4>
<p>In the dynamic programming step, the match and mismatch scores take into account the quality scores of the two aligned nucleotides. By taking these into account, the probability of a true match can be calculated for each aligned base pair.</p>
<p>If we consider a nucleotide read with a quality score <span class="math inline">\(Q\)</span>, the probability of misreading this base (<span class="math inline">\(P_E\)</span>) is : <span class="math display">\[
P_E = 10^{-\frac{Q}{10}}
\]</span></p>
<p>Thus, when a given nucleotide <span class="math inline">\(X\)</span> is observed with the quality score <span class="math inline">\(Q\)</span>. The probability that <span class="math inline">\(X\)</span> is really an <span class="math inline">\(X\)</span> is :</p>
<p><span class="math display">\[
P(X=X) = 1 - P_E
\]</span></p>
<p>Otherwise, <span class="math inline">\(X\)</span> is actually one of the three other possible nucleotides (<span class="math inline">\(X_{E1}\)</span>, <span class="math inline">\(X_{E2}\)</span> or <span class="math inline">\(X_{E3}\)</span>). If we suppose that the three reading error have the same probability :</p>
<p><span class="math display">\[
P(X=X_{E1}) = P(X=X_{E3}) = P(X=X_{E3}) = \frac{P_E}{3}
\]</span></p>
<p>At each position in an alignment where the two nucleotides <span class="math inline">\(X_1\)</span> and <span class="math inline">\(X_2\)</span> face each other (not a gapped position), the probability of a true match varies depending on whether <span class="math inline">\(X_1=X_2\)</span>, an observed match, or <span class="math inline">\(X_1 \neq X_2\)</span>, an observed mismatch.</p>
<p><strong>Probability of a true match when <span class="math inline">\(X_1=X_2\)</span></strong></p>
<p>That probability can be divided in two parts. First <span class="math inline">\(X_1\)</span> and <span class="math inline">\(X_2\)</span> have been correctly read. The corresponding probability is :</p>
<p><span class="math display">\[
\begin{aligned}
P_{TM} &amp;= (1- PE_1)(1-PE_2)\\
&amp;=(1 - 10^{-\frac{Q_1}{10} } )(1 - 10^{-\frac{Q_2}{10}} )
\end{aligned}
\]</span></p>
<p>Secondly, a match can occure if the true nucleotides read as <span class="math inline">\(X_1\)</span> and <span class="math inline">\(X_2\)</span> are not <span class="math inline">\(X_1\)</span> and <span class="math inline">\(X_2\)</span> but identical.</p>
<p><span class="math display">\[
\begin{aligned}
P(X_1==X_{E1}) \cap P(X_2==X_{E1}) &amp;= \frac{P_{E1} P_{E2}}{9} \\
P(X_1==X_{Ex}) \cap P(X_2==X_{Ex}) &amp; = \frac{P_{E1} P_{E2}}{3}
\end{aligned}
\]</span></p>
<p>The probability of a true match between <span class="math inline">\(X_1\)</span> and <span class="math inline">\(X_2\)</span> when <span class="math inline">\(X_1 = X_2\)</span> an observed match :</p>
<p><span class="math display">\[
\begin{aligned}
P(MATCH | X_1 = X_2) = (1- PE_1)(1-PE_2) + \frac{P_{E1} P_{E2}}{3}
\end{aligned}
\]</span></p>
<p><strong>Probability of a true match when <span class="math inline">\(X_1 \neq X_2\)</span></strong></p>
<p>That probability can be divided in three parts.</p>
<ol type="a">
<li><span class="math inline">\(X_1\)</span> has been correctly read and <span class="math inline">\(X_2\)</span> is a sequencing error and is actually equal to <span class="math inline">\(X_1\)</span>. <span class="math display">\[
P_a = (1-P_{E1})\frac{P_{E2}}{3}
\]</span></li>
<li><span class="math inline">\(X_2\)</span> has been correctly read and <span class="math inline">\(X_1\)</span> is a sequencing error and is actually equal to <span class="math inline">\(X_2\)</span>. <span class="math display">\[
P_b = (1-P_{E2})\frac{P_{E1}}{3}
\]</span></li>
<li><span class="math inline">\(X_1\)</span> and <span class="math inline">\(X_2\)</span> corresponds to sequencing error but are actually the same base <span class="math inline">\(X_{Ex}\)</span> <span class="math display">\[
P_c = 2\frac{P_{E1} P_{E2}}{9}
\]</span></li>
</ol>
<p>Consequently : <span class="math display">\[
\begin{aligned}
P(MATCH | X_1 \neq X_2) = (1-P_{E1})\frac{P_{E2}}{3} + (1-P_{E2})\frac{P_{E1}}{3} + 2\frac{P_{E1} P_{E2}}{9}
\end{aligned}
\]</span></p>
<p><strong>Probability of a match under the random model</strong></p>
<p>The second considered model is a pure random model where every base is equiprobable, hence having a probability of occurrence of a nucleotide equals <span class="math inline">\(0.25\)</span>. Under that hypothesis</p>
<p><span class="math display">\[
P(MATCH | \text{Random model}) = 0.25
\]</span></p>
<p><strong>The score is a log ration of likelyhood</strong></p>
<p>Score is define as the logarithm of the ratio between the likelyhood of the observations considering the sequencer error model over tha likelyhood u</p>
<div class="cell">
<div class="cell-output-display">
<div class="quarto-figure quarto-figure-center">
<figure class="figure">
<p><img src="commands_files/figure-html/unnamed-chunk-1-1.png" class="img-fluid figure-img" width="672"></p>
<p></p><figcaption class="figure-caption">Evolution of the match and mismatch scores when the quality of base is 20 while the second range from 10 to 40.</figcaption><p></p>
</figure>
</div>
</div>
</div>
</section>
</section>
<section id="obimultiplex" class="level3" data-number="4.7.2">
<h3 data-number="4.7.2" class="anchored" data-anchor-id="obimultiplex"><span class="header-section-number">4.7.2</span> <code>obimultiplex</code></h3>
<blockquote class="blockquote">
<p>Replace the <code>ngsfilter</code> original <em>OBITools</em></p>
</blockquote>
</section>
<section id="obicomplement" class="level3" data-number="4.7.3">
<h3 data-number="4.7.3" class="anchored" data-anchor-id="obicomplement"><span class="header-section-number">4.7.3</span> <code>obicomplement</code></h3>
</section>
<section id="obiclean" class="level3" data-number="4.7.4">
<h3 data-number="4.7.4" class="anchored" data-anchor-id="obiclean"><span class="header-section-number">4.7.4</span> <code>obiclean</code></h3>
</section>
<section id="obiuniq" class="level3" data-number="4.7.5">
<h3 data-number="4.7.5" class="anchored" data-anchor-id="obiuniq"><span class="header-section-number">4.7.5</span> <code>obiuniq</code></h3>
</section>
</section>
<section id="sequence-sampling-and-filtering" class="level2" data-number="4.8">
<h2 data-number="4.8" class="anchored" data-anchor-id="sequence-sampling-and-filtering"><span class="header-section-number">4.8</span> Sequence sampling and filtering</h2>
<section id="obigrep" class="level3" data-number="4.8.1">
<h3 data-number="4.8.1" class="anchored" data-anchor-id="obigrep"><span class="header-section-number">4.8.1</span> <code>obigrep</code></h3>
</section>
</section>
<section id="utilities" class="level2" data-number="4.9">
<h2 data-number="4.9" class="anchored" data-anchor-id="utilities"><span class="header-section-number">4.9</span> Utilities</h2>
<section id="obicount" class="level3" data-number="4.9.1">
<h3 data-number="4.9.1" class="anchored" data-anchor-id="obicount"><span class="header-section-number">4.9.1</span> <code>obicount</code></h3>
<p><code>obicount</code> counts the number of sequence records, the sum of the <code>count</code> attributes, and the sum of the length of all the sequences.</p>
<p><em>Example:</em></p>
<div class="sourceCode" id="cb3"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="ex">obicount</span> seq.fasta </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>Prints the number of sequence records contained in the <code>seq.fasta</code> file and the sum of their <code>count</code> attributes.</p>
<p><em>Options specific to the command</em></p>
<ul>
<li><code>--reads|-r</code> Prints read counts.</li>
<li><code>--symbols|-s</code> Prints symbol counts.</li>
<li><code>--variants|-v</code> Prints variant counts.</li>
</ul>
</section>
<section id="obidistribute" class="level3" data-number="4.9.2">
<h3 data-number="4.9.2" class="anchored" data-anchor-id="obidistribute"><span class="header-section-number">4.9.2</span> <code>obidistribute</code></h3>
</section>
<section id="obifind" class="level3" data-number="4.9.3">
<h3 data-number="4.9.3" class="anchored" data-anchor-id="obifind"><span class="header-section-number">4.9.3</span> <code>obifind</code></h3>
<blockquote class="blockquote">
<p>Replace the <code>ecofind</code> original <em>OBITools.</em></p>
</blockquote>
<div id="refs" class="references csl-bib-body hanging-indent" role="doc-bibliography" style="display: none">
<div id="ref-Lipman1985-hw" class="csl-entry" role="doc-biblioentry">
Lipman, D J, and W R Pearson. 1985. <span><span class="nocase">Rapid and sensitive protein similarity searches</span>.”</span> <em>Science</em> 227 (4693): 143541. <a href="http://www.ncbi.nlm.nih.gov/pubmed/2983426">http://www.ncbi.nlm.nih.gov/pubmed/2983426</a>.
</div>
</div>
</section>
</section>
</main> <!-- /main -->
<script id="quarto-html-after-body" type="application/javascript">
window.document.addEventListener("DOMContentLoaded", function (event) {
const toggleBodyColorMode = (bsSheetEl) => {
const mode = bsSheetEl.getAttribute("data-mode");
const bodyEl = window.document.querySelector("body");
if (mode === "dark") {
bodyEl.classList.add("quarto-dark");
bodyEl.classList.remove("quarto-light");
} else {
bodyEl.classList.add("quarto-light");
bodyEl.classList.remove("quarto-dark");
}
}
const toggleBodyColorPrimary = () => {
const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
if (bsSheetEl) {
toggleBodyColorMode(bsSheetEl);
}
}
toggleBodyColorPrimary();
const icon = "";
const anchorJS = new window.AnchorJS();
anchorJS.options = {
placement: 'right',
icon: icon
};
anchorJS.add('.anchored');
const clipboard = new window.ClipboardJS('.code-copy-button', {
target: function(trigger) {
return trigger.previousElementSibling;
}
});
clipboard.on('success', function(e) {
// button target
const button = e.trigger;
// don't keep focus
button.blur();
// flash "checked"
button.classList.add('code-copy-button-checked');
var currentTitle = button.getAttribute("title");
button.setAttribute("title", "Copied!");
let tooltip;
if (window.bootstrap) {
button.setAttribute("data-bs-toggle", "tooltip");
button.setAttribute("data-bs-placement", "left");
button.setAttribute("data-bs-title", "Copied!");
tooltip = new bootstrap.Tooltip(button,
{ trigger: "manual",
customClass: "code-copy-button-tooltip",
offset: [0, -8]});
tooltip.show();
}
setTimeout(function() {
if (tooltip) {
tooltip.hide();
button.removeAttribute("data-bs-title");
button.removeAttribute("data-bs-toggle");
button.removeAttribute("data-bs-placement");
}
button.setAttribute("title", currentTitle);
button.classList.remove('code-copy-button-checked');
}, 1000);
// clear code selection
e.clearSelection();
});
function tippyHover(el, contentFn) {
const config = {
allowHTML: true,
content: contentFn,
maxWidth: 500,
delay: 100,
arrow: false,
appendTo: function(el) {
return el.parentElement;
},
interactive: true,
interactiveBorder: 10,
theme: 'quarto',
placement: 'bottom-start'
};
window.tippy(el, config);
}
const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
for (var i=0; i<noterefs.length; i++) {
const ref = noterefs[i];
tippyHover(ref, function() {
// use id or data attribute instead here
let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
try { href = new URL(href).hash; } catch {}
const id = href.replace(/^#\/?/, "");
const note = window.document.getElementById(id);
return note.innerHTML;
});
}
const findCites = (el) => {
const parentEl = el.parentElement;
if (parentEl) {
const cites = parentEl.dataset.cites;
if (cites) {
return {
el,
cites: cites.split(' ')
};
} else {
return findCites(el.parentElement)
}
} else {
return undefined;
}
};
var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
for (var i=0; i<bibliorefs.length; i++) {
const ref = bibliorefs[i];
const citeInfo = findCites(ref);
if (citeInfo) {
tippyHover(citeInfo.el, function() {
var popup = window.document.createElement('div');
citeInfo.cites.forEach(function(cite) {
var citeDiv = window.document.createElement('div');
citeDiv.classList.add('hanging-indent');
citeDiv.classList.add('csl-entry');
var biblioDiv = window.document.getElementById('ref-' + cite);
if (biblioDiv) {
citeDiv.innerHTML = biblioDiv.innerHTML;
}
popup.appendChild(citeDiv);
});
return popup.innerHTML;
});
}
}
});
</script>
<nav class="page-navigation">
<div class="nav-page nav-page-previous">
<a href="./tutorial.html" class="pagination-link">
<i class="bi bi-arrow-left-short"></i> <span class="nav-page-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">OBITools V4 Tutorial</span></span>
</a>
</div>
<div class="nav-page nav-page-next">
<a href="./library.html" class="pagination-link">
<span class="nav-page-text"><span class="chapter-number">5</span>&nbsp; <span class="chapter-title">The GO <em>OBITools</em> library</span></span> <i class="bi bi-arrow-right-short"></i>
</a>
</div>
</nav>
</div> <!-- /content -->
</body></html>

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -1,171 +0,0 @@
/* quarto syntax highlight colors */
:root {
--quarto-hl-ot-color: #003B4F;
--quarto-hl-at-color: #657422;
--quarto-hl-ss-color: #20794D;
--quarto-hl-an-color: #5E5E5E;
--quarto-hl-fu-color: #4758AB;
--quarto-hl-st-color: #20794D;
--quarto-hl-cf-color: #003B4F;
--quarto-hl-op-color: #5E5E5E;
--quarto-hl-er-color: #AD0000;
--quarto-hl-bn-color: #AD0000;
--quarto-hl-al-color: #AD0000;
--quarto-hl-va-color: #111111;
--quarto-hl-bu-color: inherit;
--quarto-hl-ex-color: inherit;
--quarto-hl-pp-color: #AD0000;
--quarto-hl-in-color: #5E5E5E;
--quarto-hl-vs-color: #20794D;
--quarto-hl-wa-color: #5E5E5E;
--quarto-hl-do-color: #5E5E5E;
--quarto-hl-im-color: #00769E;
--quarto-hl-ch-color: #20794D;
--quarto-hl-dt-color: #AD0000;
--quarto-hl-fl-color: #AD0000;
--quarto-hl-co-color: #5E5E5E;
--quarto-hl-cv-color: #5E5E5E;
--quarto-hl-cn-color: #8f5902;
--quarto-hl-sc-color: #5E5E5E;
--quarto-hl-dv-color: #AD0000;
--quarto-hl-kw-color: #003B4F;
}
/* other quarto variables */
:root {
--quarto-font-monospace: SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
}
pre > code.sourceCode > span {
color: #003B4F;
}
code span {
color: #003B4F;
}
code.sourceCode > span {
color: #003B4F;
}
div.sourceCode,
div.sourceCode pre.sourceCode {
color: #003B4F;
}
code span.ot {
color: #003B4F;
}
code span.at {
color: #657422;
}
code span.ss {
color: #20794D;
}
code span.an {
color: #5E5E5E;
}
code span.fu {
color: #4758AB;
}
code span.st {
color: #20794D;
}
code span.cf {
color: #003B4F;
}
code span.op {
color: #5E5E5E;
}
code span.er {
color: #AD0000;
}
code span.bn {
color: #AD0000;
}
code span.al {
color: #AD0000;
}
code span.va {
color: #111111;
}
code span.pp {
color: #AD0000;
}
code span.in {
color: #5E5E5E;
}
code span.vs {
color: #20794D;
}
code span.wa {
color: #5E5E5E;
font-style: italic;
}
code span.do {
color: #5E5E5E;
font-style: italic;
}
code span.im {
color: #00769E;
}
code span.ch {
color: #20794D;
}
code span.dt {
color: #AD0000;
}
code span.fl {
color: #AD0000;
}
code span.co {
color: #5E5E5E;
}
code span.cv {
color: #5E5E5E;
font-style: italic;
}
code span.cn {
color: #8f5902;
}
code span.sc {
color: #5E5E5E;
}
code span.dv {
color: #AD0000;
}
code span.kw {
color: #003B4F;
}
.prevent-inlining {
content: "</";
}
/*# sourceMappingURL=debc5d5d77c3f9108843748ff7464032.css.map */

View File

@ -1,770 +0,0 @@
const sectionChanged = new CustomEvent("quarto-sectionChanged", {
detail: {},
bubbles: true,
cancelable: false,
composed: false,
});
window.document.addEventListener("DOMContentLoaded", function (_event) {
const tocEl = window.document.querySelector('nav.toc-active[role="doc-toc"]');
const sidebarEl = window.document.getElementById("quarto-sidebar");
const leftTocEl = window.document.getElementById("quarto-sidebar-toc-left");
const marginSidebarEl = window.document.getElementById(
"quarto-margin-sidebar"
);
// function to determine whether the element has a previous sibling that is active
const prevSiblingIsActiveLink = (el) => {
const sibling = el.previousElementSibling;
if (sibling && sibling.tagName === "A") {
return sibling.classList.contains("active");
} else {
return false;
}
};
// fire slideEnter for bootstrap tab activations (for htmlwidget resize behavior)
function fireSlideEnter(e) {
const event = window.document.createEvent("Event");
event.initEvent("slideenter", true, true);
window.document.dispatchEvent(event);
}
const tabs = window.document.querySelectorAll('a[data-bs-toggle="tab"]');
tabs.forEach((tab) => {
tab.addEventListener("shown.bs.tab", fireSlideEnter);
});
// fire slideEnter for tabby tab activations (for htmlwidget resize behavior)
document.addEventListener("tabby", fireSlideEnter, false);
// Track scrolling and mark TOC links as active
// get table of contents and sidebar (bail if we don't have at least one)
const tocLinks = tocEl
? [...tocEl.querySelectorAll("a[data-scroll-target]")]
: [];
const makeActive = (link) => tocLinks[link].classList.add("active");
const removeActive = (link) => tocLinks[link].classList.remove("active");
const removeAllActive = () =>
[...Array(tocLinks.length).keys()].forEach((link) => removeActive(link));
// activate the anchor for a section associated with this TOC entry
tocLinks.forEach((link) => {
link.addEventListener("click", () => {
if (link.href.indexOf("#") !== -1) {
const anchor = link.href.split("#")[1];
const heading = window.document.querySelector(
`[data-anchor-id=${anchor}]`
);
if (heading) {
// Add the class
heading.classList.add("reveal-anchorjs-link");
// function to show the anchor
const handleMouseout = () => {
heading.classList.remove("reveal-anchorjs-link");
heading.removeEventListener("mouseout", handleMouseout);
};
// add a function to clear the anchor when the user mouses out of it
heading.addEventListener("mouseout", handleMouseout);
}
}
});
});
const sections = tocLinks.map((link) => {
const target = link.getAttribute("data-scroll-target");
if (target.startsWith("#")) {
return window.document.getElementById(decodeURI(`${target.slice(1)}`));
} else {
return window.document.querySelector(decodeURI(`${target}`));
}
});
const sectionMargin = 200;
let currentActive = 0;
// track whether we've initialized state the first time
let init = false;
const updateActiveLink = () => {
// The index from bottom to top (e.g. reversed list)
let sectionIndex = -1;
if (
window.innerHeight + window.pageYOffset >=
window.document.body.offsetHeight
) {
sectionIndex = 0;
} else {
sectionIndex = [...sections].reverse().findIndex((section) => {
if (section) {
return window.pageYOffset >= section.offsetTop - sectionMargin;
} else {
return false;
}
});
}
if (sectionIndex > -1) {
const current = sections.length - sectionIndex - 1;
if (current !== currentActive) {
removeAllActive();
currentActive = current;
makeActive(current);
if (init) {
window.dispatchEvent(sectionChanged);
}
init = true;
}
}
};
const inHiddenRegion = (top, bottom, hiddenRegions) => {
for (const region of hiddenRegions) {
if (top <= region.bottom && bottom >= region.top) {
return true;
}
}
return false;
};
const categorySelector = "header.quarto-title-block .quarto-category";
const activateCategories = (href) => {
// Find any categories
// Surround them with a link pointing back to:
// #category=Authoring
try {
const categoryEls = window.document.querySelectorAll(categorySelector);
for (const categoryEl of categoryEls) {
const categoryText = categoryEl.textContent;
if (categoryText) {
const link = `${href}#category=${encodeURIComponent(categoryText)}`;
const linkEl = window.document.createElement("a");
linkEl.setAttribute("href", link);
for (const child of categoryEl.childNodes) {
linkEl.append(child);
}
categoryEl.appendChild(linkEl);
}
}
} catch {
// Ignore errors
}
};
function hasTitleCategories() {
return window.document.querySelector(categorySelector) !== null;
}
function offsetRelativeUrl(url) {
const offset = getMeta("quarto:offset");
return offset ? offset + url : url;
}
function offsetAbsoluteUrl(url) {
const offset = getMeta("quarto:offset");
const baseUrl = new URL(offset, window.location);
const projRelativeUrl = url.replace(baseUrl, "");
if (projRelativeUrl.startsWith("/")) {
return projRelativeUrl;
} else {
return "/" + projRelativeUrl;
}
}
// read a meta tag value
function getMeta(metaName) {
const metas = window.document.getElementsByTagName("meta");
for (let i = 0; i < metas.length; i++) {
if (metas[i].getAttribute("name") === metaName) {
return metas[i].getAttribute("content");
}
}
return "";
}
async function findAndActivateCategories() {
const currentPagePath = offsetAbsoluteUrl(window.location.href);
const response = await fetch(offsetRelativeUrl("listings.json"));
if (response.status == 200) {
return response.json().then(function (listingPaths) {
const listingHrefs = [];
for (const listingPath of listingPaths) {
const pathWithoutLeadingSlash = listingPath.listing.substring(1);
for (const item of listingPath.items) {
if (
item === currentPagePath ||
item === currentPagePath + "index.html"
) {
// Resolve this path against the offset to be sure
// we already are using the correct path to the listing
// (this adjusts the listing urls to be rooted against
// whatever root the page is actually running against)
const relative = offsetRelativeUrl(pathWithoutLeadingSlash);
const baseUrl = window.location;
const resolvedPath = new URL(relative, baseUrl);
listingHrefs.push(resolvedPath.pathname);
break;
}
}
}
// Look up the tree for a nearby linting and use that if we find one
const nearestListing = findNearestParentListing(
offsetAbsoluteUrl(window.location.pathname),
listingHrefs
);
if (nearestListing) {
activateCategories(nearestListing);
} else {
// See if the referrer is a listing page for this item
const referredRelativePath = offsetAbsoluteUrl(document.referrer);
const referrerListing = listingHrefs.find((listingHref) => {
const isListingReferrer =
listingHref === referredRelativePath ||
listingHref === referredRelativePath + "index.html";
return isListingReferrer;
});
if (referrerListing) {
// Try to use the referrer if possible
activateCategories(referrerListing);
} else if (listingHrefs.length > 0) {
// Otherwise, just fall back to the first listing
activateCategories(listingHrefs[0]);
}
}
});
}
}
if (hasTitleCategories()) {
findAndActivateCategories();
}
const findNearestParentListing = (href, listingHrefs) => {
if (!href || !listingHrefs) {
return undefined;
}
// Look up the tree for a nearby linting and use that if we find one
const relativeParts = href.substring(1).split("/");
while (relativeParts.length > 0) {
const path = relativeParts.join("/");
for (const listingHref of listingHrefs) {
if (listingHref.startsWith(path)) {
return listingHref;
}
}
relativeParts.pop();
}
return undefined;
};
const manageSidebarVisiblity = (el, placeholderDescriptor) => {
let isVisible = true;
return (hiddenRegions) => {
if (el === null) {
return;
}
// Find the last element of the TOC
const lastChildEl = el.lastElementChild;
if (lastChildEl) {
// Find the top and bottom o the element that is being managed
const elTop = el.offsetTop;
const elBottom =
elTop + lastChildEl.offsetTop + lastChildEl.offsetHeight;
// Converts the sidebar to a menu
const convertToMenu = () => {
for (const child of el.children) {
child.style.opacity = 0;
child.style.overflow = "hidden";
}
const toggleContainer = window.document.createElement("div");
toggleContainer.style.width = "100%";
toggleContainer.classList.add("zindex-over-content");
toggleContainer.classList.add("quarto-sidebar-toggle");
toggleContainer.classList.add("headroom-target"); // Marks this to be managed by headeroom
toggleContainer.id = placeholderDescriptor.id;
toggleContainer.style.position = "fixed";
const toggleIcon = window.document.createElement("i");
toggleIcon.classList.add("quarto-sidebar-toggle-icon");
toggleIcon.classList.add("bi");
toggleIcon.classList.add("bi-caret-down-fill");
const toggleTitle = window.document.createElement("div");
const titleEl = window.document.body.querySelector(
placeholderDescriptor.titleSelector
);
if (titleEl) {
toggleTitle.append(titleEl.innerText, toggleIcon);
}
toggleTitle.classList.add("zindex-over-content");
toggleTitle.classList.add("quarto-sidebar-toggle-title");
toggleContainer.append(toggleTitle);
const toggleContents = window.document.createElement("div");
toggleContents.classList = el.classList;
toggleContents.classList.add("zindex-over-content");
toggleContents.classList.add("quarto-sidebar-toggle-contents");
for (const child of el.children) {
if (child.id === "toc-title") {
continue;
}
const clone = child.cloneNode(true);
clone.style.opacity = 1;
clone.style.display = null;
toggleContents.append(clone);
}
toggleContents.style.height = "0px";
toggleContainer.append(toggleContents);
el.parentElement.prepend(toggleContainer);
// Process clicks
let tocShowing = false;
// Allow the caller to control whether this is dismissed
// when it is clicked (e.g. sidebar navigation supports
// opening and closing the nav tree, so don't dismiss on click)
const clickEl = placeholderDescriptor.dismissOnClick
? toggleContainer
: toggleTitle;
const closeToggle = () => {
if (tocShowing) {
toggleContainer.classList.remove("expanded");
toggleContents.style.height = "0px";
tocShowing = false;
}
};
const positionToggle = () => {
// position the element (top left of parent, same width as parent)
const elRect = el.getBoundingClientRect();
toggleContainer.style.left = `${elRect.left}px`;
toggleContainer.style.top = `${elRect.top}px`;
toggleContainer.style.width = `${elRect.width}px`;
};
// Get rid of any expanded toggle if the user scrolls
window.document.addEventListener(
"scroll",
throttle(() => {
closeToggle();
}, 50)
);
// Handle positioning of the toggle
window.addEventListener(
"resize",
throttle(() => {
positionToggle();
}, 50)
);
positionToggle();
// Process the click
clickEl.onclick = () => {
if (!tocShowing) {
toggleContainer.classList.add("expanded");
toggleContents.style.height = null;
tocShowing = true;
} else {
closeToggle();
}
};
};
// Converts a sidebar from a menu back to a sidebar
const convertToSidebar = () => {
for (const child of el.children) {
child.style.opacity = 1;
child.style.overflow = null;
}
const placeholderEl = window.document.getElementById(
placeholderDescriptor.id
);
if (placeholderEl) {
placeholderEl.remove();
}
el.classList.remove("rollup");
};
if (isReaderMode()) {
convertToMenu();
isVisible = false;
} else {
if (!isVisible) {
// If the element is current not visible reveal if there are
// no conflicts with overlay regions
if (!inHiddenRegion(elTop, elBottom, hiddenRegions)) {
convertToSidebar();
isVisible = true;
}
} else {
// If the element is visible, hide it if it conflicts with overlay regions
// and insert a placeholder toggle (or if we're in reader mode)
if (inHiddenRegion(elTop, elBottom, hiddenRegions)) {
convertToMenu();
isVisible = false;
}
}
}
}
};
};
// Find any conflicting margin elements and add margins to the
// top to prevent overlap
const marginChildren = window.document.querySelectorAll(
".column-margin.column-container > * "
);
nexttick(() => {
let lastBottom = 0;
for (const marginChild of marginChildren) {
const top = marginChild.getBoundingClientRect().top + window.scrollY;
if (top < lastBottom) {
const margin = lastBottom - top;
marginChild.style.marginTop = `${margin}px`;
}
const styles = window.getComputedStyle(marginChild);
const marginTop = parseFloat(styles["marginTop"]);
lastBottom = top + marginChild.getBoundingClientRect().height + marginTop;
}
});
// Manage the visibility of the toc and the sidebar
const marginScrollVisibility = manageSidebarVisiblity(marginSidebarEl, {
id: "quarto-toc-toggle",
titleSelector: "#toc-title",
dismissOnClick: true,
});
const sidebarScrollVisiblity = manageSidebarVisiblity(sidebarEl, {
id: "quarto-sidebarnav-toggle",
titleSelector: ".title",
dismissOnClick: false,
});
let tocLeftScrollVisibility;
if (leftTocEl) {
tocLeftScrollVisibility = manageSidebarVisiblity(leftTocEl, {
id: "quarto-lefttoc-toggle",
titleSelector: "#toc-title",
dismissOnClick: true,
});
}
// Find the first element that uses formatting in special columns
const conflictingEls = window.document.body.querySelectorAll(
'[class^="column-"], [class*=" column-"], aside, [class*="margin-caption"], [class*=" margin-caption"], [class*="margin-ref"], [class*=" margin-ref"]'
);
// Filter all the possibly conflicting elements into ones
// the do conflict on the left or ride side
const arrConflictingEls = Array.from(conflictingEls);
const leftSideConflictEls = arrConflictingEls.filter((el) => {
if (el.tagName === "ASIDE") {
return false;
}
return Array.from(el.classList).find((className) => {
return (
className !== "column-body" &&
className.startsWith("column-") &&
!className.endsWith("right") &&
!className.endsWith("container") &&
className !== "column-margin"
);
});
});
const rightSideConflictEls = arrConflictingEls.filter((el) => {
if (el.tagName === "ASIDE") {
return true;
}
const hasMarginCaption = Array.from(el.classList).find((className) => {
return className == "margin-caption";
});
if (hasMarginCaption) {
return true;
}
return Array.from(el.classList).find((className) => {
return (
className !== "column-body" &&
!className.endsWith("container") &&
className.startsWith("column-") &&
!className.endsWith("left")
);
});
});
const kOverlapPaddingSize = 10;
function toRegions(els) {
return els.map((el) => {
const top =
el.getBoundingClientRect().top +
document.documentElement.scrollTop -
kOverlapPaddingSize;
return {
top,
bottom: top + el.scrollHeight + 2 * kOverlapPaddingSize,
};
});
}
const hideOverlappedSidebars = () => {
marginScrollVisibility(toRegions(rightSideConflictEls));
sidebarScrollVisiblity(toRegions(leftSideConflictEls));
if (tocLeftScrollVisibility) {
tocLeftScrollVisibility(toRegions(leftSideConflictEls));
}
};
window.quartoToggleReader = () => {
// Applies a slow class (or removes it)
// to update the transition speed
const slowTransition = (slow) => {
const manageTransition = (id, slow) => {
const el = document.getElementById(id);
if (el) {
if (slow) {
el.classList.add("slow");
} else {
el.classList.remove("slow");
}
}
};
manageTransition("TOC", slow);
manageTransition("quarto-sidebar", slow);
};
const readerMode = !isReaderMode();
setReaderModeValue(readerMode);
// If we're entering reader mode, slow the transition
if (readerMode) {
slowTransition(readerMode);
}
highlightReaderToggle(readerMode);
hideOverlappedSidebars();
// If we're exiting reader mode, restore the non-slow transition
if (!readerMode) {
slowTransition(!readerMode);
}
};
const highlightReaderToggle = (readerMode) => {
const els = document.querySelectorAll(".quarto-reader-toggle");
if (els) {
els.forEach((el) => {
if (readerMode) {
el.classList.add("reader");
} else {
el.classList.remove("reader");
}
});
}
};
const setReaderModeValue = (val) => {
if (window.location.protocol !== "file:") {
window.localStorage.setItem("quarto-reader-mode", val);
} else {
localReaderMode = val;
}
};
const isReaderMode = () => {
if (window.location.protocol !== "file:") {
return window.localStorage.getItem("quarto-reader-mode") === "true";
} else {
return localReaderMode;
}
};
let localReaderMode = null;
// Walk the TOC and collapse/expand nodes
// Nodes are expanded if:
// - they are top level
// - they have children that are 'active' links
// - they are directly below an link that is 'active'
const walk = (el, depth) => {
// Tick depth when we enter a UL
if (el.tagName === "UL") {
depth = depth + 1;
}
// It this is active link
let isActiveNode = false;
if (el.tagName === "A" && el.classList.contains("active")) {
isActiveNode = true;
}
// See if there is an active child to this element
let hasActiveChild = false;
for (child of el.children) {
hasActiveChild = walk(child, depth) || hasActiveChild;
}
// Process the collapse state if this is an UL
if (el.tagName === "UL") {
if (depth === 1 || hasActiveChild || prevSiblingIsActiveLink(el)) {
el.classList.remove("collapse");
} else {
el.classList.add("collapse");
}
// untick depth when we leave a UL
depth = depth - 1;
}
return hasActiveChild || isActiveNode;
};
// walk the TOC and expand / collapse any items that should be shown
if (tocEl) {
walk(tocEl, 0);
updateActiveLink();
}
// Throttle the scroll event and walk peridiocally
window.document.addEventListener(
"scroll",
throttle(() => {
if (tocEl) {
updateActiveLink();
walk(tocEl, 0);
}
if (!isReaderMode()) {
hideOverlappedSidebars();
}
}, 5)
);
window.addEventListener(
"resize",
throttle(() => {
if (!isReaderMode()) {
hideOverlappedSidebars();
}
}, 10)
);
hideOverlappedSidebars();
highlightReaderToggle(isReaderMode());
});
// grouped tabsets
window.addEventListener("pageshow", (_event) => {
function getTabSettings() {
const data = localStorage.getItem("quarto-persistent-tabsets-data");
if (!data) {
localStorage.setItem("quarto-persistent-tabsets-data", "{}");
return {};
}
if (data) {
return JSON.parse(data);
}
}
function setTabSettings(data) {
localStorage.setItem(
"quarto-persistent-tabsets-data",
JSON.stringify(data)
);
}
function setTabState(groupName, groupValue) {
const data = getTabSettings();
data[groupName] = groupValue;
setTabSettings(data);
}
function toggleTab(tab, active) {
const tabPanelId = tab.getAttribute("aria-controls");
const tabPanel = document.getElementById(tabPanelId);
if (active) {
tab.classList.add("active");
tabPanel.classList.add("active");
} else {
tab.classList.remove("active");
tabPanel.classList.remove("active");
}
}
function toggleAll(selectedGroup, selectorsToSync) {
for (const [thisGroup, tabs] of Object.entries(selectorsToSync)) {
const active = selectedGroup === thisGroup;
for (const tab of tabs) {
toggleTab(tab, active);
}
}
}
function findSelectorsToSyncByLanguage() {
const result = {};
const tabs = Array.from(
document.querySelectorAll(`div[data-group] a[id^='tabset-']`)
);
for (const item of tabs) {
const div = item.parentElement.parentElement.parentElement;
const group = div.getAttribute("data-group");
if (!result[group]) {
result[group] = {};
}
const selectorsToSync = result[group];
const value = item.innerHTML;
if (!selectorsToSync[value]) {
selectorsToSync[value] = [];
}
selectorsToSync[value].push(item);
}
return result;
}
function setupSelectorSync() {
const selectorsToSync = findSelectorsToSyncByLanguage();
Object.entries(selectorsToSync).forEach(([group, tabSetsByValue]) => {
Object.entries(tabSetsByValue).forEach(([value, items]) => {
items.forEach((item) => {
item.addEventListener("click", (_event) => {
setTabState(group, value);
toggleAll(value, selectorsToSync[group]);
});
});
});
});
return selectorsToSync;
}
const selectorsToSync = setupSelectorSync();
for (const [group, selectedName] of Object.entries(getTabSettings())) {
const selectors = selectorsToSync[group];
// it's possible that stale state gives us empty selections, so we explicitly check here.
if (selectors) {
toggleAll(selectedName, selectors);
}
}
});
function throttle(func, wait) {
let waiting = false;
return function () {
if (!waiting) {
func.apply(this, arguments);
waiting = true;
setTimeout(function () {
waiting = false;
}, wait);
}
};
}
function nexttick(func) {
return setTimeout(func, 0);
}

View File

@ -1 +0,0 @@
.tippy-box[data-animation=fade][data-state=hidden]{opacity:0}[data-tippy-root]{max-width:calc(100vw - 10px)}.tippy-box{position:relative;background-color:#333;color:#fff;border-radius:4px;font-size:14px;line-height:1.4;white-space:normal;outline:0;transition-property:transform,visibility,opacity}.tippy-box[data-placement^=top]>.tippy-arrow{bottom:0}.tippy-box[data-placement^=top]>.tippy-arrow:before{bottom:-7px;left:0;border-width:8px 8px 0;border-top-color:initial;transform-origin:center top}.tippy-box[data-placement^=bottom]>.tippy-arrow{top:0}.tippy-box[data-placement^=bottom]>.tippy-arrow:before{top:-7px;left:0;border-width:0 8px 8px;border-bottom-color:initial;transform-origin:center bottom}.tippy-box[data-placement^=left]>.tippy-arrow{right:0}.tippy-box[data-placement^=left]>.tippy-arrow:before{border-width:8px 0 8px 8px;border-left-color:initial;right:-7px;transform-origin:center left}.tippy-box[data-placement^=right]>.tippy-arrow{left:0}.tippy-box[data-placement^=right]>.tippy-arrow:before{left:-7px;border-width:8px 8px 8px 0;border-right-color:initial;transform-origin:center right}.tippy-box[data-inertia][data-state=visible]{transition-timing-function:cubic-bezier(.54,1.5,.38,1.11)}.tippy-arrow{width:16px;height:16px;color:#333}.tippy-arrow:before{content:"";position:absolute;border-color:transparent;border-style:solid}.tippy-content{position:relative;padding:5px 9px;z-index:1}

File diff suppressed because one or more lines are too long

View File

@ -1,7 +0,0 @@
/*!
* headroom.js v0.12.0 - Give your page some headroom. Hide your header until you need it
* Copyright (c) 2020 Nick Williams - http://wicky.nillia.ms/headroom.js
* License: MIT
*/
!function(t,n){"object"==typeof exports&&"undefined"!=typeof module?module.exports=n():"function"==typeof define&&define.amd?define(n):(t=t||self).Headroom=n()}(this,function(){"use strict";function t(){return"undefined"!=typeof window}function d(t){return function(t){return t&&t.document&&function(t){return 9===t.nodeType}(t.document)}(t)?function(t){var n=t.document,o=n.body,s=n.documentElement;return{scrollHeight:function(){return Math.max(o.scrollHeight,s.scrollHeight,o.offsetHeight,s.offsetHeight,o.clientHeight,s.clientHeight)},height:function(){return t.innerHeight||s.clientHeight||o.clientHeight},scrollY:function(){return void 0!==t.pageYOffset?t.pageYOffset:(s||o.parentNode||o).scrollTop}}}(t):function(t){return{scrollHeight:function(){return Math.max(t.scrollHeight,t.offsetHeight,t.clientHeight)},height:function(){return Math.max(t.offsetHeight,t.clientHeight)},scrollY:function(){return t.scrollTop}}}(t)}function n(t,s,e){var n,o=function(){var n=!1;try{var t={get passive(){n=!0}};window.addEventListener("test",t,t),window.removeEventListener("test",t,t)}catch(t){n=!1}return n}(),i=!1,r=d(t),l=r.scrollY(),a={};function c(){var t=Math.round(r.scrollY()),n=r.height(),o=r.scrollHeight();a.scrollY=t,a.lastScrollY=l,a.direction=l<t?"down":"up",a.distance=Math.abs(t-l),a.isOutOfBounds=t<0||o<t+n,a.top=t<=s.offset[a.direction],a.bottom=o<=t+n,a.toleranceExceeded=a.distance>s.tolerance[a.direction],e(a),l=t,i=!1}function h(){i||(i=!0,n=requestAnimationFrame(c))}var u=!!o&&{passive:!0,capture:!1};return t.addEventListener("scroll",h,u),c(),{destroy:function(){cancelAnimationFrame(n),t.removeEventListener("scroll",h,u)}}}function o(t){return t===Object(t)?t:{down:t,up:t}}function s(t,n){n=n||{},Object.assign(this,s.options,n),this.classes=Object.assign({},s.options.classes,n.classes),this.elem=t,this.tolerance=o(this.tolerance),this.offset=o(this.offset),this.initialised=!1,this.frozen=!1}return s.prototype={constructor:s,init:function(){return s.cutsTheMustard&&!this.initialised&&(this.addClass("initial"),this.initialised=!0,setTimeout(function(t){t.scrollTracker=n(t.scroller,{offset:t.offset,tolerance:t.tolerance},t.update.bind(t))},100,this)),this},destroy:function(){this.initialised=!1,Object.keys(this.classes).forEach(this.removeClass,this),this.scrollTracker.destroy()},unpin:function(){!this.hasClass("pinned")&&this.hasClass("unpinned")||(this.addClass("unpinned"),this.removeClass("pinned"),this.onUnpin&&this.onUnpin.call(this))},pin:function(){this.hasClass("unpinned")&&(this.addClass("pinned"),this.removeClass("unpinned"),this.onPin&&this.onPin.call(this))},freeze:function(){this.frozen=!0,this.addClass("frozen")},unfreeze:function(){this.frozen=!1,this.removeClass("frozen")},top:function(){this.hasClass("top")||(this.addClass("top"),this.removeClass("notTop"),this.onTop&&this.onTop.call(this))},notTop:function(){this.hasClass("notTop")||(this.addClass("notTop"),this.removeClass("top"),this.onNotTop&&this.onNotTop.call(this))},bottom:function(){this.hasClass("bottom")||(this.addClass("bottom"),this.removeClass("notBottom"),this.onBottom&&this.onBottom.call(this))},notBottom:function(){this.hasClass("notBottom")||(this.addClass("notBottom"),this.removeClass("bottom"),this.onNotBottom&&this.onNotBottom.call(this))},shouldUnpin:function(t){return"down"===t.direction&&!t.top&&t.toleranceExceeded},shouldPin:function(t){return"up"===t.direction&&t.toleranceExceeded||t.top},addClass:function(t){this.elem.classList.add.apply(this.elem.classList,this.classes[t].split(" "))},removeClass:function(t){this.elem.classList.remove.apply(this.elem.classList,this.classes[t].split(" "))},hasClass:function(t){return this.classes[t].split(" ").every(function(t){return this.classList.contains(t)},this.elem)},update:function(t){t.isOutOfBounds||!0!==this.frozen&&(t.top?this.top():this.notTop(),t.bottom?this.bottom():this.notBottom(),this.shouldUnpin(t)?this.unpin():this.shouldPin(t)&&this.pin())}},s.options={tolerance:{up:0,down:0},offset:0,scroller:t()?window:null,classes:{frozen:"headroom--frozen",pinned:"headroom--pinned",unpinned:"headroom--unpinned",top:"headroom--top",notTop:"headroom--not-top",bottom:"headroom--bottom",notBottom:"headroom--not-bottom",initial:"headroom"}},s.cutsTheMustard=!!(t()&&function(){}.bind&&"classList"in document.documentElement&&Object.assign&&Object.keys&&requestAnimationFrame),s});

View File

@ -1,221 +0,0 @@
const headroomChanged = new CustomEvent("quarto-hrChanged", {
detail: {},
bubbles: true,
cancelable: false,
composed: false,
});
window.document.addEventListener("DOMContentLoaded", function () {
let init = false;
function throttle(func, wait) {
var timeout;
return function () {
const context = this;
const args = arguments;
const later = function () {
clearTimeout(timeout);
timeout = null;
func.apply(context, args);
};
if (!timeout) {
timeout = setTimeout(later, wait);
}
};
}
function headerOffset() {
// Set an offset if there is are fixed top navbar
const headerEl = window.document.querySelector("header.fixed-top");
if (headerEl) {
return headerEl.clientHeight;
} else {
return 0;
}
}
function footerOffset() {
const footerEl = window.document.querySelector("footer.footer");
if (footerEl) {
return footerEl.clientHeight;
} else {
return 0;
}
}
function updateDocumentOffsetWithoutAnimation() {
updateDocumentOffset(false);
}
function updateDocumentOffset(animated) {
// set body offset
const topOffset = headerOffset();
const bodyOffset = topOffset + footerOffset();
const bodyEl = window.document.body;
bodyEl.setAttribute("data-bs-offset", topOffset);
bodyEl.style.paddingTop = topOffset + "px";
// deal with sidebar offsets
const sidebars = window.document.querySelectorAll(
".sidebar, .headroom-target"
);
sidebars.forEach((sidebar) => {
if (!animated) {
sidebar.classList.add("notransition");
// Remove the no transition class after the animation has time to complete
setTimeout(function () {
sidebar.classList.remove("notransition");
}, 201);
}
if (window.Headroom && sidebar.classList.contains("sidebar-unpinned")) {
sidebar.style.top = "0";
sidebar.style.maxHeight = "100vh";
} else {
sidebar.style.top = topOffset + "px";
sidebar.style.maxHeight = "calc(100vh - " + topOffset + "px)";
}
});
// allow space for footer
const mainContainer = window.document.querySelector(".quarto-container");
if (mainContainer) {
mainContainer.style.minHeight = "calc(100vh - " + bodyOffset + "px)";
}
// link offset
let linkStyle = window.document.querySelector("#quarto-target-style");
if (!linkStyle) {
linkStyle = window.document.createElement("style");
window.document.head.appendChild(linkStyle);
}
while (linkStyle.firstChild) {
linkStyle.removeChild(linkStyle.firstChild);
}
if (topOffset > 0) {
linkStyle.appendChild(
window.document.createTextNode(`
section:target::before {
content: "";
display: block;
height: ${topOffset}px;
margin: -${topOffset}px 0 0;
}`)
);
}
if (init) {
window.dispatchEvent(headroomChanged);
}
init = true;
}
// initialize headroom
var header = window.document.querySelector("#quarto-header");
if (header && window.Headroom) {
const headroom = new window.Headroom(header, {
tolerance: 5,
onPin: function () {
const sidebars = window.document.querySelectorAll(
".sidebar, .headroom-target"
);
sidebars.forEach((sidebar) => {
sidebar.classList.remove("sidebar-unpinned");
});
updateDocumentOffset();
},
onUnpin: function () {
const sidebars = window.document.querySelectorAll(
".sidebar, .headroom-target"
);
sidebars.forEach((sidebar) => {
sidebar.classList.add("sidebar-unpinned");
});
updateDocumentOffset();
},
});
headroom.init();
let frozen = false;
window.quartoToggleHeadroom = function () {
if (frozen) {
headroom.unfreeze();
frozen = false;
} else {
headroom.freeze();
frozen = true;
}
};
}
// Observe size changed for the header
const headerEl = window.document.querySelector("header.fixed-top");
if (headerEl && window.ResizeObserver) {
const observer = new window.ResizeObserver(
updateDocumentOffsetWithoutAnimation
);
observer.observe(headerEl, {
attributes: true,
childList: true,
characterData: true,
});
} else {
window.addEventListener(
"resize",
throttle(updateDocumentOffsetWithoutAnimation, 50)
);
}
setTimeout(updateDocumentOffsetWithoutAnimation, 250);
// fixup index.html links if we aren't on the filesystem
if (window.location.protocol !== "file:") {
const links = window.document.querySelectorAll("a");
for (let i = 0; i < links.length; i++) {
links[i].href = links[i].href.replace(/\/index\.html/, "/");
}
// Fixup any sharing links that require urls
// Append url to any sharing urls
const sharingLinks = window.document.querySelectorAll(
"a.sidebar-tools-main-item"
);
for (let i = 0; i < sharingLinks.length; i++) {
const sharingLink = sharingLinks[i];
const href = sharingLink.getAttribute("href");
if (href) {
sharingLink.setAttribute(
"href",
href.replace("|url|", window.location.href)
);
}
}
// Scroll the active navigation item into view, if necessary
const navSidebar = window.document.querySelector("nav#quarto-sidebar");
if (navSidebar) {
// Find the active item
const activeItem = navSidebar.querySelector("li.sidebar-item a.active");
if (activeItem) {
// Wait for the scroll height and height to resolve by observing size changes on the
// nav element that is scrollable
const resizeObserver = new ResizeObserver((_entries) => {
// The bottom of the element
const elBottom = activeItem.offsetTop;
const viewBottom = navSidebar.scrollTop + navSidebar.clientHeight;
// The element height and scroll height are the same, then we are still loading
if (viewBottom !== navSidebar.scrollHeight) {
// Determine if the item isn't visible and scroll to it
if (elBottom >= viewBottom) {
navSidebar.scrollTop = elBottom;
}
// stop observing now since we've completed the scroll
resizeObserver.unobserve(navSidebar);
}
});
resizeObserver.observe(navSidebar);
}
}
}
});

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@ -1,35 +0,0 @@
project:
type: book
engine: jupyter
book:
title: "OBITools V4"
author: "Eric Coissac"
date: "1/17/2023"
chapters:
- index.qmd
- intro.qmd
- formats.qmd
- tutorial.qmd
- commands.qmd
- library.qmd
- annexes.qmd
- references.qmd
bibliography: book.bib
execute:
freeze: auto
format:
html:
theme: zephyr
pdf:
documentclass: scrreprt
epub: default

1
doc/book/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/.quarto/

13
doc/book/Makefile Normal file
View File

@ -0,0 +1,13 @@
all: html pdf epub
.PHONY: html pdf epub all
html:
quarto render --to html
pdf:
quarto render --to pdf
epub:
quarto render --to epub

2765
doc/book/OBITools-V4.tex Normal file

File diff suppressed because it is too large Load Diff

View File

Before

Width:  |  Height:  |  Size: 7.6 KiB

After

Width:  |  Height:  |  Size: 7.6 KiB

56975
doc/book/TAXO/citations.dmp Normal file

File diff suppressed because one or more lines are too long

468117
doc/book/TAXO/delnodes.dmp Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,12 @@
0 | BCT | Bacteria | |
1 | INV | Invertebrates | |
2 | MAM | Mammals | |
3 | PHG | Phages | |
4 | PLN | Plants and Fungi | |
5 | PRI | Primates | |
6 | ROD | Rodents | |
7 | SYN | Synthetic and Chimeric | |
8 | UNA | Unassigned | No species nodes should inherit this division assignment |
9 | VRL | Viruses | |
10 | VRT | Vertebrates | |
11 | ENV | Environmental samples | Anonymous sequences cloned directly from the environment |

358
doc/book/TAXO/gc.prt Normal file
View File

@ -0,0 +1,358 @@
--**************************************************************************
-- This is the NCBI genetic code table
-- Initial base data set from Andrzej Elzanowski while at PIR International
-- Addition of Eubacterial and Alternative Yeast by J.Ostell at NCBI
-- Base 1-3 of each codon have been added as comments to facilitate
-- readability at the suggestion of Peter Rice, EMBL
-- Later additions by Taxonomy Group staff at NCBI
--
-- Version 4.6
-- Renamed genetic code 24 to Rhabdopleuridae Mitochondrial
--
-- Version 4.5
-- Added Cephalodiscidae mitochondrial genetic code 33
--
-- Version 4.4
-- Added GTG as start codon for genetic code 3
-- Added Balanophoraceae plastid genetic code 32
--
-- Version 4.3
-- Change to CTG -> Leu in genetic codes 27, 28, 29, 30
--
-- Version 4.2
-- Added Karyorelict nuclear genetic code 27
-- Added Condylostoma nuclear genetic code 28
-- Added Mesodinium nuclear genetic code 29
-- Added Peritrich nuclear genetic code 30
-- Added Blastocrithidia nuclear genetic code 31
--
-- Version 4.1
-- Added Pachysolen tannophilus nuclear genetic code 26
--
-- Version 4.0
-- Updated version to reflect numerous undocumented changes:
-- Corrected start codons for genetic code 25
-- Name of new genetic code is Candidate Division SR1 and Gracilibacteria
-- Added candidate division SR1 nuclear genetic code 25
-- Added GTG as start codon for genetic code 24
-- Corrected Pterobranchia Mitochondrial genetic code (24)
-- Added genetic code 24, Pterobranchia Mitochondrial
-- Genetic code 11 is now Bacterial, Archaeal and Plant Plastid
-- Fixed capitalization of mitochondrial in codes 22 and 23
-- Added GTG, ATA, and TTG as alternative start codons to code 13
--
-- Version 3.9
-- Code 14 differs from code 9 only by translating UAA to Tyr rather than
-- STOP. A recent study (Telford et al, 2000) has found no evidence that
-- the codon UAA codes for Tyr in the flatworms, but other opinions exist.
-- There are very few GenBank records that are translated with code 14,
-- but a test translation shows that retranslating these records with code
-- 9 can cause premature terminations. Therefore, GenBank will maintain
-- code 14 until further information becomes available.
--
-- Version 3.8
-- Added GTG start to Echinoderm mitochondrial code, code 9
--
-- Version 3.7
-- Added code 23 Thraustochytrium mitochondrial code
-- formerly OGMP code 93
-- submitted by Gertraude Berger, Ph.D.
--
-- Version 3.6
-- Added code 22 TAG-Leu, TCA-stop
-- found in mitochondrial DNA of Scenedesmus obliquus
-- submitted by Gertraude Berger, Ph.D.
-- Organelle Genome Megasequencing Program, Univ Montreal
--
-- Version 3.5
-- Added code 21, Trematode Mitochondrial
-- (as deduced from: Garey & Wolstenholme,1989; Ohama et al, 1990)
-- Added code 16, Chlorophycean Mitochondrial
-- (TAG can translated to Leucine instaed to STOP in chlorophyceans
-- and fungi)
--
-- Version 3.4
-- Added CTG,TTG as allowed alternate start codons in Standard code.
-- Prats et al. 1989, Hann et al. 1992
--
-- Version 3.3 - 10/13/95
-- Added alternate intiation codon ATC to code 5
-- based on complete mitochondrial genome of honeybee
-- Crozier and Crozier (1993)
--
-- Version 3.2 - 6/24/95
-- Code Comments
-- 10 Alternative Ciliate Macronuclear renamed to Euplotid Macro...
-- 15 Blepharisma Macro.. code added
-- 5 Invertebrate Mito.. GTG allowed as alternate initiator
-- 11 Eubacterial renamed to Bacterial as most alternate starts
-- have been found in Archea
--
--
-- Version 3.1 - 1995
-- Updated as per Andrzej Elzanowski at NCBI
-- Complete documentation in NCBI toolkit documentation
-- Note: 2 genetic codes have been deleted
--
-- Old id Use id - Notes
--
-- id 7 id 4 - Kinetoplast code now merged in code id 4
-- id 8 id 1 - all plant chloroplast differences due to RNA edit
--
--
--*************************************************************************
Genetic-code-table ::= {
{
name "Standard" ,
name "SGC0" ,
id 1 ,
ncbieaa "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "---M------**--*----M---------------M----------------------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
},
{
name "Vertebrate Mitochondrial" ,
name "SGC1" ,
id 2 ,
ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG",
sncbieaa "----------**--------------------MMMM----------**---M------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
},
{
name "Yeast Mitochondrial" ,
name "SGC2" ,
id 3 ,
ncbieaa "FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "----------**----------------------MM---------------M------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
},
{
name "Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate
Mitochondrial; Mycoplasma; Spiroplasma" ,
name "SGC3" ,
id 4 ,
ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "--MM------**-------M------------MMMM---------------M------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
},
{
name "Invertebrate Mitochondrial" ,
name "SGC4" ,
id 5 ,
ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG",
sncbieaa "---M------**--------------------MMMM---------------M------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
},
{
name "Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear" ,
name "SGC5" ,
id 6 ,
ncbieaa "FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "--------------*--------------------M----------------------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
},
{
name "Echinoderm Mitochondrial; Flatworm Mitochondrial" ,
name "SGC8" ,
id 9 ,
ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
sncbieaa "----------**-----------------------M---------------M------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
},
{
name "Euplotid Nuclear" ,
name "SGC9" ,
id 10 ,
ncbieaa "FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "----------**-----------------------M----------------------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
},
{
name "Bacterial, Archaeal and Plant Plastid" ,
id 11 ,
ncbieaa "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "---M------**--*----M------------MMMM---------------M------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
},
{
name "Alternative Yeast Nuclear" ,
id 12 ,
ncbieaa "FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "----------**--*----M---------------M----------------------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
},
{
name "Ascidian Mitochondrial" ,
id 13 ,
ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG",
sncbieaa "---M------**----------------------MM---------------M------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
},
{
name "Alternative Flatworm Mitochondrial" ,
id 14 ,
ncbieaa "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
sncbieaa "-----------*-----------------------M----------------------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
} ,
{
name "Blepharisma Macronuclear" ,
id 15 ,
ncbieaa "FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "----------*---*--------------------M----------------------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
} ,
{
name "Chlorophycean Mitochondrial" ,
id 16 ,
ncbieaa "FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "----------*---*--------------------M----------------------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
} ,
{
name "Trematode Mitochondrial" ,
id 21 ,
ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
sncbieaa "----------**-----------------------M---------------M------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
} ,
{
name "Scenedesmus obliquus Mitochondrial" ,
id 22 ,
ncbieaa "FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "------*---*---*--------------------M----------------------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
} ,
{
name "Thraustochytrium Mitochondrial" ,
id 23 ,
ncbieaa "FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "--*-------**--*-----------------M--M---------------M------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
} ,
{
name "Rhabdopleuridae Mitochondrial" ,
id 24 ,
ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG",
sncbieaa "---M------**-------M---------------M---------------M------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
} ,
{
name "Candidate Division SR1 and Gracilibacteria" ,
id 25 ,
ncbieaa "FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "---M------**-----------------------M---------------M------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
} ,
{
name "Pachysolen tannophilus Nuclear" ,
id 26 ,
ncbieaa "FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "----------**--*----M---------------M----------------------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
} ,
{
name "Karyorelict Nuclear" ,
id 27 ,
ncbieaa "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "--------------*--------------------M----------------------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
} ,
{
name "Condylostoma Nuclear" ,
id 28 ,
ncbieaa "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "----------**--*--------------------M----------------------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
} ,
{
name "Mesodinium Nuclear" ,
id 29 ,
ncbieaa "FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "--------------*--------------------M----------------------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
} ,
{
name "Peritrich Nuclear" ,
id 30 ,
ncbieaa "FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "--------------*--------------------M----------------------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
} ,
{
name "Blastocrithidia Nuclear" ,
id 31 ,
ncbieaa "FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "----------**-----------------------M----------------------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
} ,
{
name "Balanophoraceae Plastid" ,
id 32 ,
ncbieaa "FFLLSSSSYY*WCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "---M------*---*----M------------MMMM---------------M------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
} ,
{
name "Cephalodiscidae Mitochondrial" ,
id 33 ,
ncbieaa "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG",
sncbieaa "---M-------*-------M---------------M---------------M------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
}
}

28
doc/book/TAXO/gencode.dmp Normal file
View File

@ -0,0 +1,28 @@
0 | | Unspecified | | |
1 | | Standard | FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ---M------**--*----M---------------M---------------------------- |
2 | | Vertebrate Mitochondrial | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG | ----------**--------------------MMMM----------**---M------------ |
3 | | Yeast Mitochondrial | FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ----------**----------------------MM---------------M------------ |
4 | | Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate Mitochondrial; Mycoplasma; Spiroplasma | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | --MM------**-------M------------MMMM---------------M------------ |
5 | | Invertebrate Mitochondrial | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG | ---M------**--------------------MMMM---------------M------------ |
6 | | Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear | FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | --------------*--------------------M---------------------------- |
9 | | Echinoderm Mitochondrial; Flatworm Mitochondrial | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG | ----------**-----------------------M---------------M------------ |
10 | | Euplotid Nuclear | FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ----------**-----------------------M---------------------------- |
11 | | Bacterial, Archaeal and Plant Plastid | FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ---M------**--*----M------------MMMM---------------M------------ |
12 | | Alternative Yeast Nuclear | FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ----------**--*----M---------------M---------------------------- |
13 | | Ascidian Mitochondrial | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG | ---M------**----------------------MM---------------M------------ |
14 | | Alternative Flatworm Mitochondrial | FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG | -----------*-----------------------M---------------------------- |
15 | | Blepharisma Macronuclear | FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ----------*---*--------------------M---------------------------- |
16 | | Chlorophycean Mitochondrial | FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ----------*---*--------------------M---------------------------- |
21 | | Trematode Mitochondrial | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG | ----------**-----------------------M---------------M------------ |
22 | | Scenedesmus obliquus mitochondrial | FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ------*---*---*--------------------M---------------------------- |
23 | | Thraustochytrium mitochondrial code | FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | --*-------**--*-----------------M--M---------------M------------ |
24 | | Rhabdopleuridae Mitochondrial | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG | ---M------**-------M---------------M---------------M------------ |
25 | | Candidate Division SR1 and Gracilibacteria | FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ---M------**-----------------------M---------------M------------ |
26 | | Pachysolen tannophilus Nuclear | FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ----------**--*----M---------------M---------------------------- |
27 | | Karyorelict Nuclear | FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | --------------*--------------------M---------------------------- |
28 | | Condylostoma Nuclear | FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ----------**--*--------------------M---------------------------- |
29 | | Mesodinium Nuclear | FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | --------------*--------------------M---------------------------- |
30 | | Peritrich Nuclear | FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | --------------*--------------------M---------------------------- |
31 | | Blastocrithidia Nuclear | FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ----------**-----------------------M---------------------------- |
32 | | Balanophoraceae Plastid | FFLLSSSSYY*WCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ---M------*---*----M------------MMMM---------------M------------ |
33 | | Cephalodiscidae Mitochondrial | FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG | ---M-------*-------M---------------M---------------M------------ |

70199
doc/book/TAXO/merged.dmp Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1 @@
b3c80c27c81098c8127a9136dd6fda8f65ef8c19

View File

@ -0,0 +1 @@
12d48656aa2d4d0b558355ad48b8856463944b0b

61
doc/book/TAXO/readme.txt Normal file
View File

@ -0,0 +1,61 @@
*.dmp files are bcp-like dump from GenBank taxonomy database.
General information.
Field terminator is "\t|\t"
Row terminator is "\t|\n"
nodes.dmp file consists of taxonomy nodes. The description for each node includes the following
fields:
tax_id -- node id in GenBank taxonomy database
parent tax_id -- parent node id in GenBank taxonomy database
rank -- rank of this node (superkingdom, kingdom, ...)
embl code -- locus-name prefix; not unique
division id -- see division.dmp file
inherited div flag (1 or 0) -- 1 if node inherits division from parent
genetic code id -- see gencode.dmp file
inherited GC flag (1 or 0) -- 1 if node inherits genetic code from parent
mitochondrial genetic code id -- see gencode.dmp file
inherited MGC flag (1 or 0) -- 1 if node inherits mitochondrial gencode from parent
GenBank hidden flag (1 or 0) -- 1 if name is suppressed in GenBank entry lineage
hidden subtree root flag (1 or 0) -- 1 if this subtree has no sequence data yet
comments -- free-text comments and citations
Taxonomy names file (names.dmp):
tax_id -- the id of node associated with this name
name_txt -- name itself
unique name -- the unique variant of this name if name not unique
name class -- (synonym, common name, ...)
Divisions file (division.dmp):
division id -- taxonomy database division id
division cde -- GenBank division code (three characters)
division name -- e.g. BCT, PLN, VRT, MAM, PRI...
comments
Genetic codes file (gencode.dmp):
genetic code id -- GenBank genetic code id
abbreviation -- genetic code name abbreviation
name -- genetic code name
cde -- translation table for this genetic code
starts -- start codons for this genetic code
Deleted nodes file (delnodes.dmp):
tax_id -- deleted node id
Merged nodes file (merged.dmp):
old_tax_id -- id of nodes which has been merged
new_tax_id -- id of nodes which is result of merging
Citations file (citations.dmp):
cit_id -- the unique id of citation
cit_key -- citation key
pubmed_id -- unique id in PubMed database (0 if not in PubMed)
medline_id -- unique id in MedLine database (0 if not in MedLine)
url -- URL associated with citation
text -- any text (usually article name and authors).
-- The following characters are escaped in this text by a backslash:
-- newline (appear as "\n"),
-- tab character ("\t"),
-- double quotes ('\"'),
-- backslash character ("\\").
taxid_list -- list of node ids separated by a single space

View File

@ -0,0 +1,20 @@
{
"hash": "e0f95c0479d4275bf95ac924807680f6",
"result": {
"markdown": "# Computations on sequences\n\n## `obipairing`\n\n> Replace the `illuminapairedends` original *OBITools*\n\n### Alignment procedure {.unnumbered}\n\n`obipairing` is introducing a new alignment algorithm compared to the `illuminapairedend` command of the `OBITools V2`.\nNethertheless this new algorithm has been design to produce the same results than the previous, except in very few cases.\n\nThe new algorithm is a two-step procedure. First, a FASTN-type algorithm [@Lipman1985-hw] identifies the best offset between the two matched readings. This identifies the region of overlap. \n\nIn the second step, the matching regions of the two reads are extracted along with a flanking sequence of $\\Delta$ base pairs. The two subsequences are then aligned using a \"one side free end-gap\" dynamic programming algorithm. This latter step is only called if at least one mismatch is detected by the FASTP step. \n\nUnless the similarity between the two reads at their overlap region is very low, the addition of the flanking regions in the second step of the alignment ensures the same alignment as if the dynamic programming alignment was performed on the full reads. \n\n### The scoring system {.unnumbered}\n\nIn the dynamic programming step, the match and mismatch scores take into account the quality scores of the two aligned nucleotides. By taking these into account, the probability of a true match can be calculated for each aligned base pair. \n\nIf we consider a nucleotide read with a quality score $Q$, the probability of misreading this base ($P_E$) is :\n$$\nP_E = 10^{-\\frac{Q}{10}}\n$$\n\nThus, when a given nucleotide $X$ is observed with the quality score $Q$. The probability that $X$ is really an $X$ is :\n\n$$\nP(X=X) = 1 - P_E\n$$\n\nOtherwise, $X$ is actually one of the three other possible nucleotides ($X_{E1}$, $X_{E2}$ or $X_{E3}$). If we suppose that the three reading error have the same probability :\n\n$$\nP(X=X_{E1}) = P(X=X_{E3}) = P(X=X_{E3}) = \\frac{P_E}{3}\n$$\n\nAt each position in an alignment where the two nucleotides $X_1$ and $X_2$ face each other (not a gapped position), the probability of a true match varies depending on whether $X_1=X_2$, an observed match, or $X_1 \\neq X_2$, an observed mismatch. \n\n**Probability of a true match when $X_1=X_2$**\n\nThat probability can be divided in two parts. First $X_1$ and $X_2$ have been correctly read. The corresponding probability is :\n\n$$\n\\begin{aligned}\nP_{TM} &= (1- PE_1)(1-PE_2)\\\\ \n &=(1 - 10^{-\\frac{Q_1}{10} } )(1 - 10^{-\\frac{Q_2}{10}} )\n\\end{aligned}\n$$\n\nSecondly, a match can occure if the true nucleotides read as $X_1$ and $X_2$ are not $X_1$ and $X_2$ but identical.\n\n$$\n\\begin{aligned}\nP(X_1==X_{E1}) \\cap P(X_2==X_{E1}) &= \\frac{P_{E1} P_{E2}}{9} \\\\\nP(X_1==X_{Ex}) \\cap P(X_2==X_{Ex}) & = \\frac{P_{E1} P_{E2}}{3}\n\\end{aligned}\n$$\n\nThe probability of a true match between $X_1$ and $X_2$ when $X_1 = X_2$ an observed match :\n\n$$\n\\begin{aligned}\nP(MATCH | X_1 = X_2) = (1- PE_1)(1-PE_2) + \\frac{P_{E1} P_{E2}}{3}\n\\end{aligned}\n$$\n\n**Probability of a true match when $X_1 \\neq X_2$**\n\nThat probability can be divided in three parts. \n\na. $X_1$ has been correctly read and $X_2$ is a sequencing error and is actually equal to $X_1$. \n$$\nP_a = (1-P_{E1})\\frac{P_{E2}}{3}\n$$\na. $X_2$ has been correctly read and $X_1$ is a sequencing error and is actually equal to $X_2$. \n$$\nP_b = (1-P_{E2})\\frac{P_{E1}}{3}\n$$\na. $X_1$ and $X_2$ corresponds to sequencing error but are actually the same base $X_{Ex}$\n$$\nP_c = 2\\frac{P_{E1} P_{E2}}{9}\n$$\n\nConsequently : \n$$\n\\begin{aligned}\nP(MATCH | X_1 \\neq X_2) = (1-P_{E1})\\frac{P_{E2}}{3} + (1-P_{E2})\\frac{P_{E1}}{3} + 2\\frac{P_{E1} P_{E2}}{9}\n\\end{aligned}\n$$\n\n**Probability of a match under the random model**\n\nThe second considered model is a pure random model where every base is equiprobable, hence having a probability of occurrence of a nucleotide equals $0.25$. Under that hypothesis \n\n$$\nP(MATCH | \\text{Random model}) = 0.25\n$$\n\n**The score is a log ration of likelyhood**\n\nScore is define as the logarithm of the ratio between the likelyhood of the observations considering the sequencer error model over tha likelyhood u\n\n\n\n::: {.cell}\n::: {.cell-output-display}\n![Evolution of the match and mismatch scores when the quality of base is 20 while the second range from 10 to 40.](comm_computation_files/figure-epub/unnamed-chunk-1-1.png)\n:::\n:::\n\n\n\n## `obimultiplex`\n\n> Replace the `ngsfilter` original *OBITools*\n\n## `obicomplement`\n\n## `obiclean`\n\n## `obiuniq`\n\n",
"supporting": [
"comm_computation_files/figure-epub"
],
"filters": [
"rmarkdown/pagebreak.lua"
],
"includes": {},
"engineDependencies": {
"knitr": [
"{\"type\":\"list\",\"attributes\":{},\"value\":[]}"
]
},
"preserve": null,
"postProcess": false
}
}

View File

@ -0,0 +1,16 @@
{
"hash": "e0f95c0479d4275bf95ac924807680f6",
"result": {
"markdown": "# Computations on sequences\n\n## `obipairing`\n\n> Replace the `illuminapairedends` original *OBITools*\n\n### Alignment procedure {.unnumbered}\n\n`obipairing` is introducing a new alignment algorithm compared to the `illuminapairedend` command of the `OBITools V2`.\nNethertheless this new algorithm has been design to produce the same results than the previous, except in very few cases.\n\nThe new algorithm is a two-step procedure. First, a FASTN-type algorithm [@Lipman1985-hw] identifies the best offset between the two matched readings. This identifies the region of overlap. \n\nIn the second step, the matching regions of the two reads are extracted along with a flanking sequence of $\\Delta$ base pairs. The two subsequences are then aligned using a \"one side free end-gap\" dynamic programming algorithm. This latter step is only called if at least one mismatch is detected by the FASTP step. \n\nUnless the similarity between the two reads at their overlap region is very low, the addition of the flanking regions in the second step of the alignment ensures the same alignment as if the dynamic programming alignment was performed on the full reads. \n\n### The scoring system {.unnumbered}\n\nIn the dynamic programming step, the match and mismatch scores take into account the quality scores of the two aligned nucleotides. By taking these into account, the probability of a true match can be calculated for each aligned base pair. \n\nIf we consider a nucleotide read with a quality score $Q$, the probability of misreading this base ($P_E$) is :\n$$\nP_E = 10^{-\\frac{Q}{10}}\n$$\n\nThus, when a given nucleotide $X$ is observed with the quality score $Q$. The probability that $X$ is really an $X$ is :\n\n$$\nP(X=X) = 1 - P_E\n$$\n\nOtherwise, $X$ is actually one of the three other possible nucleotides ($X_{E1}$, $X_{E2}$ or $X_{E3}$). If we suppose that the three reading error have the same probability :\n\n$$\nP(X=X_{E1}) = P(X=X_{E3}) = P(X=X_{E3}) = \\frac{P_E}{3}\n$$\n\nAt each position in an alignment where the two nucleotides $X_1$ and $X_2$ face each other (not a gapped position), the probability of a true match varies depending on whether $X_1=X_2$, an observed match, or $X_1 \\neq X_2$, an observed mismatch. \n\n**Probability of a true match when $X_1=X_2$**\n\nThat probability can be divided in two parts. First $X_1$ and $X_2$ have been correctly read. The corresponding probability is :\n\n$$\n\\begin{aligned}\nP_{TM} &= (1- PE_1)(1-PE_2)\\\\ \n &=(1 - 10^{-\\frac{Q_1}{10} } )(1 - 10^{-\\frac{Q_2}{10}} )\n\\end{aligned}\n$$\n\nSecondly, a match can occure if the true nucleotides read as $X_1$ and $X_2$ are not $X_1$ and $X_2$ but identical.\n\n$$\n\\begin{aligned}\nP(X_1==X_{E1}) \\cap P(X_2==X_{E1}) &= \\frac{P_{E1} P_{E2}}{9} \\\\\nP(X_1==X_{Ex}) \\cap P(X_2==X_{Ex}) & = \\frac{P_{E1} P_{E2}}{3}\n\\end{aligned}\n$$\n\nThe probability of a true match between $X_1$ and $X_2$ when $X_1 = X_2$ an observed match :\n\n$$\n\\begin{aligned}\nP(MATCH | X_1 = X_2) = (1- PE_1)(1-PE_2) + \\frac{P_{E1} P_{E2}}{3}\n\\end{aligned}\n$$\n\n**Probability of a true match when $X_1 \\neq X_2$**\n\nThat probability can be divided in three parts. \n\na. $X_1$ has been correctly read and $X_2$ is a sequencing error and is actually equal to $X_1$. \n$$\nP_a = (1-P_{E1})\\frac{P_{E2}}{3}\n$$\na. $X_2$ has been correctly read and $X_1$ is a sequencing error and is actually equal to $X_2$. \n$$\nP_b = (1-P_{E2})\\frac{P_{E1}}{3}\n$$\na. $X_1$ and $X_2$ corresponds to sequencing error but are actually the same base $X_{Ex}$\n$$\nP_c = 2\\frac{P_{E1} P_{E2}}{9}\n$$\n\nConsequently : \n$$\n\\begin{aligned}\nP(MATCH | X_1 \\neq X_2) = (1-P_{E1})\\frac{P_{E2}}{3} + (1-P_{E2})\\frac{P_{E1}}{3} + 2\\frac{P_{E1} P_{E2}}{9}\n\\end{aligned}\n$$\n\n**Probability of a match under the random model**\n\nThe second considered model is a pure random model where every base is equiprobable, hence having a probability of occurrence of a nucleotide equals $0.25$. Under that hypothesis \n\n$$\nP(MATCH | \\text{Random model}) = 0.25\n$$\n\n**The score is a log ration of likelyhood**\n\nScore is define as the logarithm of the ratio between the likelyhood of the observations considering the sequencer error model over tha likelyhood u\n\n\n\n::: {.cell}\n::: {.cell-output-display}\n![Evolution of the match and mismatch scores when the quality of base is 20 while the second range from 10 to 40.](comm_computation_files/figure-html/unnamed-chunk-1-1.png){width=672}\n:::\n:::\n\n\n\n## `obimultiplex`\n\n> Replace the `ngsfilter` original *OBITools*\n\n## `obicomplement`\n\n## `obiclean`\n\n## `obiuniq`\n\n",
"supporting": [
"comm_computation_files/figure-html"
],
"filters": [
"rmarkdown/pagebreak.lua"
],
"includes": {},
"engineDependencies": {},
"preserve": {},
"postProcess": true
}
}

View File

@ -0,0 +1,20 @@
{
"hash": "e0f95c0479d4275bf95ac924807680f6",
"result": {
"markdown": "# Computations on sequences\n\n## `obipairing`\n\n> Replace the `illuminapairedends` original *OBITools*\n\n### Alignment procedure {.unnumbered}\n\n`obipairing` is introducing a new alignment algorithm compared to the `illuminapairedend` command of the `OBITools V2`.\nNethertheless this new algorithm has been design to produce the same results than the previous, except in very few cases.\n\nThe new algorithm is a two-step procedure. First, a FASTN-type algorithm [@Lipman1985-hw] identifies the best offset between the two matched readings. This identifies the region of overlap. \n\nIn the second step, the matching regions of the two reads are extracted along with a flanking sequence of $\\Delta$ base pairs. The two subsequences are then aligned using a \"one side free end-gap\" dynamic programming algorithm. This latter step is only called if at least one mismatch is detected by the FASTP step. \n\nUnless the similarity between the two reads at their overlap region is very low, the addition of the flanking regions in the second step of the alignment ensures the same alignment as if the dynamic programming alignment was performed on the full reads. \n\n### The scoring system {.unnumbered}\n\nIn the dynamic programming step, the match and mismatch scores take into account the quality scores of the two aligned nucleotides. By taking these into account, the probability of a true match can be calculated for each aligned base pair. \n\nIf we consider a nucleotide read with a quality score $Q$, the probability of misreading this base ($P_E$) is :\n$$\nP_E = 10^{-\\frac{Q}{10}}\n$$\n\nThus, when a given nucleotide $X$ is observed with the quality score $Q$. The probability that $X$ is really an $X$ is :\n\n$$\nP(X=X) = 1 - P_E\n$$\n\nOtherwise, $X$ is actually one of the three other possible nucleotides ($X_{E1}$, $X_{E2}$ or $X_{E3}$). If we suppose that the three reading error have the same probability :\n\n$$\nP(X=X_{E1}) = P(X=X_{E3}) = P(X=X_{E3}) = \\frac{P_E}{3}\n$$\n\nAt each position in an alignment where the two nucleotides $X_1$ and $X_2$ face each other (not a gapped position), the probability of a true match varies depending on whether $X_1=X_2$, an observed match, or $X_1 \\neq X_2$, an observed mismatch. \n\n**Probability of a true match when $X_1=X_2$**\n\nThat probability can be divided in two parts. First $X_1$ and $X_2$ have been correctly read. The corresponding probability is :\n\n$$\n\\begin{aligned}\nP_{TM} &= (1- PE_1)(1-PE_2)\\\\ \n &=(1 - 10^{-\\frac{Q_1}{10} } )(1 - 10^{-\\frac{Q_2}{10}} )\n\\end{aligned}\n$$\n\nSecondly, a match can occure if the true nucleotides read as $X_1$ and $X_2$ are not $X_1$ and $X_2$ but identical.\n\n$$\n\\begin{aligned}\nP(X_1==X_{E1}) \\cap P(X_2==X_{E1}) &= \\frac{P_{E1} P_{E2}}{9} \\\\\nP(X_1==X_{Ex}) \\cap P(X_2==X_{Ex}) & = \\frac{P_{E1} P_{E2}}{3}\n\\end{aligned}\n$$\n\nThe probability of a true match between $X_1$ and $X_2$ when $X_1 = X_2$ an observed match :\n\n$$\n\\begin{aligned}\nP(MATCH | X_1 = X_2) = (1- PE_1)(1-PE_2) + \\frac{P_{E1} P_{E2}}{3}\n\\end{aligned}\n$$\n\n**Probability of a true match when $X_1 \\neq X_2$**\n\nThat probability can be divided in three parts. \n\na. $X_1$ has been correctly read and $X_2$ is a sequencing error and is actually equal to $X_1$. \n$$\nP_a = (1-P_{E1})\\frac{P_{E2}}{3}\n$$\na. $X_2$ has been correctly read and $X_1$ is a sequencing error and is actually equal to $X_2$. \n$$\nP_b = (1-P_{E2})\\frac{P_{E1}}{3}\n$$\na. $X_1$ and $X_2$ corresponds to sequencing error but are actually the same base $X_{Ex}$\n$$\nP_c = 2\\frac{P_{E1} P_{E2}}{9}\n$$\n\nConsequently : \n$$\n\\begin{aligned}\nP(MATCH | X_1 \\neq X_2) = (1-P_{E1})\\frac{P_{E2}}{3} + (1-P_{E2})\\frac{P_{E1}}{3} + 2\\frac{P_{E1} P_{E2}}{9}\n\\end{aligned}\n$$\n\n**Probability of a match under the random model**\n\nThe second considered model is a pure random model where every base is equiprobable, hence having a probability of occurrence of a nucleotide equals $0.25$. Under that hypothesis \n\n$$\nP(MATCH | \\text{Random model}) = 0.25\n$$\n\n**The score is a log ration of likelyhood**\n\nScore is define as the logarithm of the ratio between the likelyhood of the observations considering the sequencer error model over tha likelyhood u\n\n\n\n::: {.cell}\n::: {.cell-output-display}\n![Evolution of the match and mismatch scores when the quality of base is 20 while the second range from 10 to 40.](comm_computation_files/figure-pdf/unnamed-chunk-1-1.pdf)\n:::\n:::\n\n\n\n## `obimultiplex`\n\n> Replace the `ngsfilter` original *OBITools*\n\n## `obicomplement`\n\n## `obiclean`\n\n## `obiuniq`\n\n",
"supporting": [
"comm_computation_files"
],
"filters": [
"rmarkdown/pagebreak.lua"
],
"includes": {},
"engineDependencies": {
"knitr": [
"{\"type\":\"list\",\"attributes\":{},\"value\":[]}"
]
},
"preserve": null,
"postProcess": false
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

View File

Before

Width:  |  Height:  |  Size: 61 KiB

After

Width:  |  Height:  |  Size: 61 KiB

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 61 KiB

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

Before

Width:  |  Height:  |  Size: 16 KiB

After

Width:  |  Height:  |  Size: 16 KiB

View File

Before

Width:  |  Height:  |  Size: 15 KiB

After

Width:  |  Height:  |  Size: 15 KiB

View File

Before

Width:  |  Height:  |  Size: 46 KiB

After

Width:  |  Height:  |  Size: 46 KiB

View File

Before

Width:  |  Height:  |  Size: 45 KiB

After

Width:  |  Height:  |  Size: 45 KiB

53
doc/book/_quarto.yml Normal file
View File

@ -0,0 +1,53 @@
project:
type: book
output-dir: ../build/_book
engine: jupyter
book:
title: "OBITools V4"
author: "Eric Coissac"
date: "1/17/2023"
page-navigation: true
chapters:
- index.qmd
- part: intro.qmd
chapters:
- installation.qmd
- formats.qmd
- tutorial.qmd
- part: commands.qmd
chapters:
- inupt.qmd
- output.qmd
- common_options.qmd
- expressions.qmd
- comm_metabarcode_design.qmd
- comm_reformat.qmd
- comm_annotation.qmd
- comm_computation.qmd
- comm_sampling.qmd
- comm_utilities.qmd
- part: library.qmd
appendices:
- annexes.qmd
- references.qmd
bibliography: ../lib/book.bib
execute:
freeze: auto
format:
html:
theme: zephyr
pdf:
documentclass: scrreprt
keep-tex: true
epub:
html-math-method: mathml

View File

@ -0,0 +1,6 @@
# Sequence annotations
## `obiannotate`
## `obitag`

View File

@ -1,8 +1,10 @@
### `obipairing`
# Computations on sequences
## `obipairing`
> Replace the `illuminapairedends` original *OBITools*
#### Alignment procedure {.unnumbered}
### Alignment procedure {.unnumbered}
`obipairing` is introducing a new alignment algorithm compared to the `illuminapairedend` command of the `OBITools V2`.
Nethertheless this new algorithm has been design to produce the same results than the previous, except in very few cases.
@ -13,7 +15,7 @@ In the second step, the matching regions of the two reads are extracted along wi
Unless the similarity between the two reads at their overlap region is very low, the addition of the flanking regions in the second step of the alignment ensures the same alignment as if the dynamic programming alignment was performed on the full reads.
#### The scoring system {.unnumbered}
### The scoring system {.unnumbered}
In the dynamic programming step, the match and mismatch scores take into account the quality scores of the two aligned nucleotides. By taking these into account, the probability of a true match can be calculated for each aligned base pair.
@ -137,3 +139,15 @@ tibble(Q = 10:40) %>%
geom_line() +
xlab("Q1 (Q2=20)")
```
## `obimultiplex`
> Replace the `ngsfilter` original *OBITools*
## `obicomplement`
## `obiclean`
## `obiuniq`

Binary file not shown.

After

Width:  |  Height:  |  Size: 61 KiB

View File

@ -0,0 +1,6 @@
# Metabarcode design and quality assessment
## `obipcr`
> Replace the `ecoPCR` original *OBITools*

View File

@ -0,0 +1,4 @@
# File format conversions
## `obiconvert`

View File

@ -0,0 +1,25 @@
# Sequence sampling and filtering
## `obigrep` -- filters sequence files according to numerous conditions
{{< include ../lib/descriptions/_obigrep.qmd >}}
### The options usable with `obigrep`
#### Selecting sequences based on their caracteristics
Sequences can be selected on several of their caracteristics, their length, their id, their sequence. Options allow for specifying the condition if selection.
{{< include ../lib/options/selection/_min-count.qmd >}}
{{< include ../lib/options/selection/_max-count.qmd >}}
Example
: Selecting sequence records representing at least five reads in the dataset.
```bash
obigrep -c 5 data_SPER01.fasta > data_norare_SPER01.fasta
```

View File

@ -1,6 +1,6 @@
## Utilities
# Utilities
### `obicount`
## `obicount`
`obicount` counts the number of sequence records, the sum of the ``count`` attributes, and the sum
of the length of all the sequences.
@ -19,8 +19,8 @@ file and the sum of their ``count`` attributes.
- `--symbols|-s` Prints symbol counts.
- `--variants|-v` Prints variant counts.
### `obidistribute`
## `obidistribute`
### `obifind`
## `obifind`
> Replace the `ecofind` original *OBITools.*

2
doc/book/commands.qmd Normal file
View File

@ -0,0 +1,2 @@
# The *OBITools V4* commands

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 61 KiB

View File

@ -0,0 +1,46 @@
# Options common to most of the *OBITools* commands
## Helpful options
{{< include ../lib/options/system/_help.qmd >}}
{{< include ../lib/options/system/_no-progressbar.qmd >}}
## System related options
**Managing parallel execution of tasks**
A new feature of *OBITools* V4 is the ability to run multiple tasks in parallel, reading files, calculating on the data, formatting and writing the results. Each of these tasks can itself be parallelized by dividing the data into batches and running the calculation on several batches in parallel. This allows the overall calculation time of an *OBITools* command to be reduced considerably. The parameters organizing the parallel calculation are determined automatically to use the maximum capacity of your computer. But in some circumstances, it is necessary to override these default settings either to try to optimize the computation on a given machine, or to limit the OBITools to using only a part of the computational capacity. There are two options for doing this.
{{< include ../lib/options/system/_max-cpu.qmd >}}
{{< include ../lib/options/system/_workers.qmd >}}
If your computer has 8 cores, but you want to limit *OBITools* to use only two of them you have several solution:
- If you want to set the limit for a single execution you can use the **--max-cpu** option
```bash
obiconvert --max-cpu 2 --fasta-output data.fastq > data.fasta
```
or you can precede the command by setting the environment variable `OBIMAXCPU`
```bash
OBIMAXCPU=2 obiconvert --fasta-output data.fastq > data.fasta
```
- If you want to set the limit to your complete session, you have to export `OBIMAXCPU`
```bash
export OBIMAXCPU=2
```
all the following OBITools commands will be limited to use at max 2 CPU cores.
- If all the time you want to impose this limit, you must include the above `export`
command in your `.bashrc` file.
**OBITools debuging related options**
{{< include ../lib/options/system/_debug.qmd >}}

56
doc/book/expressions.qmd Normal file
View File

@ -0,0 +1,56 @@
# OBITools expression language
Several OBITools (*e.g.* obigrep, obiannotate) allow the user to specify some simple expressions to compute values or define predicates. This expressions are parsed and evaluated using the [gval](https://pkg.go.dev/github.com/PaesslerAG/gval "Gval (Go eVALuate) for evaluating arbitrary expressions Go-like expressions.") go package, which allows for evaluating go-Like expression.
## Variables usable in the expression
- `sequence` is the sequence object on which the expression is evaluated.
- `annotations`is a map object containing every annotations associated to the currently processed sequence.
## Function defined in the language
### Instrospection functions {.unnumbered}
- `len(x)`is a generic function allowing to retreive the size of a object. It returns
the length of a sequences, the number of element in a map like `annotations`, the number
of elements in an array. The reurned value is an `int`.
### Cast functions {.unnumbered}
- `int(x)` converts if possible the `x` value to an integer value. The function
returns an `int`.
- `numeric(x)` converts if possible the `x` value to a float value. The function
returns a `float`.
- `bool(x)` converts if possible the `x` value to a boolean value. The function
returns a `bool`.
### String related functions {.unnumbered}
- `printf(format,...)` allows to combine several values to build a string. `format` follows the
classical C `printf` syntax. The function returns a `string`.
- `subspc(x)` substitutes every space in the `x` string by the underscore (`_`) character. The function
returns a `string`.
## Accessing to the sequence annotations
The `annotations` variable is a map object containing all the annotations associated to the currently processed sequence. Index of the map are the attribute names. It exists to possibillities to retreive
an annotation. It is possible to use the classical `[]` indexing operator, putting the attribute name
quoted by double quotes between them.
```go
annotations["direction"]
```
The above code retreives the `direction` annotation. A second notation using the dot (`.`) is often
more convenient.
```go
annotations.direction
```
Special attributes of the sequence are accessible only by dedicated methods of the `sequence` object.
- The sequence identifier : `Id()`
- THe sequence definition : `Definition()`

View File

@ -1,11 +1,11 @@
## File formats usable with *OBITools*
# File formats usable with *OBITools*
OBITools manipulate have to manipulate DNA sequence data and taxonomical data.
They can use some supplentary metadata describing the experiment and produce
some stats about the processed DNA data. All the manipulated data are stored in
text files, following standard data format.
## The DNA sequence data
# The DNA sequence data
Sequences can be stored following various format. OBITools knows some of them. The central formats for sequence files manipulated by OBITools scripts are the [`fasta`](#the-fasta-sequence-format) and [`fastq`](#the-fastq-sequence-format) format. OBITools extends the both these formats by specifying a syntax to include in the definition line data qualifying the sequence. All file formats use the `IUPAC` code for encoding nucleotides.
@ -15,7 +15,7 @@ Moreover these two formats that can be used as input and output formats, **OBITo
- [Genbank flat file format](https://www.ncbi.nlm.nih.gov/Sitemap/samplerecord.html)
- [ecoPCR output files](https://pythonhosted.org/OBITools/scripts/ecoPCR.html)
### The IUPAC Code
## The IUPAC Code
The International Union of Pure and Applied Chemistry (IUPAC\_) defined the standard code for representing protein or DNA sequences.
@ -38,7 +38,7 @@ The International Union of Pure and Applied Chemistry (IUPAC\_) defined the stan
| V | A, C, or G (not T, not U) |
| N | Any base (A, C, G, T, or U) |
### The *fasta* sequence format {#sec-fasta}
## The *fasta* sequence format {#sec-fasta}
The **fasta format** is certainly the most widely used sequence file format. This is certainly due to its great simplicity. It was originally created for the Lipman and Pearson [FASTA program](http://www.ncbi.nlm.nih.gov/pubmed/3162770?dopt=Citation). OBITools use in more of the classical `fasta` format an `extended version` of this format where structured data are included in the title line.
@ -64,7 +64,7 @@ This is no special format for the title line excepting that this line should be
GTGCTGACGTTGCAGTACGTTGCAGTACGTTGCAGTACGTTGCAGTACGTTGCAGTGTTT
AACGACGTTGCAGTACGTTGCAGT
### The *fastq* sequence format[^01_obitools_doc-1]{#sec-fastq}
## The *fastq* sequence format[^01_obitools_doc-1]{#sec-fastq}
The **FASTQ** format is a text file format for storing both biological sequences (only nucleic acid sequences) and the associated quality scores. The sequence and score are each encoded by a single ASCII character. This format was originally developed by the Wellcome Trust Sanger Institute to link a [FASTA](#the-fasta-sequence-format) sequence file to the corresponding quality data, but has recently become the de facto standard for storing results from high-throughput sequencers [@cock2010sanger].
@ -88,11 +88,14 @@ GATTTGGGGTTCAAAGCAGTATCGATCAAATAGTAAATCCATTTGTTCAACTCACAGTTT
The character '!' represents the lowest quality while '\~' is the highest. Here are the quality value characters in left-to-right increasing order of quality (`ASCII`):
!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~
```
!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]
^_`abcdefghijklmnopqrstuvwxyz{|}~
```
The original Sanger FASTQ files also allowed the sequence and quality strings to be wrapped (split over multiple lines), but this is generally discouraged as it can make parsing complicated due to the unfortunate choice of "\@" and "+" as markers (these characters can also occur in the quality string).
#### Sequence quality scores{.unnumbered}
### Sequence quality scores{.unnumbered}
The Phred quality value *Q* is an integer mapping of *p* (i.e., the probability that the corresponding base call is incorrect). Two different equations have been in use. The first is the standard Sanger variant to assess reliability of a base call, otherwise known as Phred quality score:
@ -111,7 +114,7 @@ Although both mappings are asymptotically identical at higher quality values, th
![Relationship between *Q* and *p* using the Sanger (red) and Solexa (black) equations (described above). The vertical dotted line indicates $\mathbf{p}= 0.05$, or equivalently, $Q = 13$.](Probabilitymetrics.png){#fig-Probabilitymetrics}
##### Encoding{.unnumbered}
#### Encoding{.unnumbered}
The *fastq* format had differente way of encoding the Phred quality score along the time. Here a breif history of these changes is presented.
@ -129,17 +132,19 @@ The *fastq* format had differente way of encoding the Phred quality score along
```
@HWI-EAS209_0006_FC706VJ:5:58:5894:21141#ATCACG/1
TTAATTGGTAAATAAATCTCCTAATAGCTTAGATNTTACCTTNNNNNNNNNNTAGTTTCTTGAGATTTGTTGGGGGAGACATTTTTGTGATTGCCTTGAT
TTAATTGGTAAATAAATCTCCTAATAGCTTAGATNTTACCTTNNNNNNNNNNTAGTTTCTTGAGA
TTTGTTGGGGGAGACATTTTTGTGATTGCCTTGAT
+HWI-EAS209_0006_FC706VJ:5:58:5894:21141#ATCACG/1
efcfffffcfeefffcffffffddf`feed]`]_Ba_^__[YBBBBBBBBBBRTT\]][]dddd`ddd^dddadd^BBBBBBBBBBBBBBBBBBBBBBBB
efcfffffcfeefffcffffffddf`feed]`]_Ba_^__[YBBBBBBBBBBRTT\]][ dddd`
ddd^dddadd^BBBBBBBBBBBBBBBBBBBBBBBB
```
An alternative interpretation of this ASCII encoding has been proposed. Also, in Illumina runs using PhiX controls, the character 'B' was observed to represent an "unknown quality score". The error rate of 'B' reads was roughly 3 phred scores lower the mean observed score of a given run.
- Starting in Illumina 1.8, the quality scores have basically returned to the use of the Sanger format (Phred+33).
OBItools support the Sanger format. It is nevertheless to read files encoded following the Solexa/Illumina format, that are still possible to find in old files, by applying a shift of 62.
OBItools follows the Sanger format. Nevertheless, It is possible to read files encoded following the Solexa/Illumina format by applying a shift of 62 (see the option **\--solexa** of the OBITools commands).
### File extension
## File extension
There is no standard file extension for a FASTQ file, but .fq and .fastq, are commonly used.

18
doc/book/installation.qmd Normal file
View File

@ -0,0 +1,18 @@
# Installation of the obitools
## Availability of the OBITools
The *OBITools* are open source and protected by the [CeCILL 2.1 license](http://www.cecill.info/licences/Licence_CeCILL_V2.1-en.html).
All the sources of the [*OBITools4*](http://metabarcoding.org/obitools4) can be downloaded from the metabarcoding git server (https://git.metabarcoding.org).
## Prerequisites
The *OBITools4* are developped using the [GO programming language](https://go.dev/), we stick to the latest version of the language, today the $1.19.5$. If you want to download and compile the sources yourself, you first need to install the corresponding compiler on your system. Some parts of the soft are also written in C, therefore a recent C compiler is also requested, GCC on Linux or Windows, the Developer Tools on Mac.
Whatever the installation you decide for, you will have to ensure that a C compiler is available on your system.
## Installation with the install script
## Compilation from sources

View File

@ -42,16 +42,3 @@ The innovation of the *OBITools* is their ability to take into account the
taxonomic annotations, ultimately allowing sorting and filtering of sequence
records based on the taxonomy.
## Installation of the obitools
### Availability of the OBITools
The *OBITools* are open source and protected by the [CeCILL 2.1 license](http://www.cecill.info/licences/Licence_CeCILL_V2.1-en.html).
All the sources of the [*OBITools4*](http://metabarcoding.org/obitools4) can be downloaded from the metabarcoding git server (https://git.metabarcoding.org).
### Prerequisites
The *OBITools4* are developped using the [GO programming language](https://go.dev/), we stick to the latest version of the language, today the $1.19.5$. If you want to download and compile the sources yourself, you first need to install the corresponding compiler on your system. Some parts of the soft are also written in C, therefore a recent C compiler is also requested, GCC on Linux or Windows, the Developer Tools on Mac.
Whatever the installation you decide for, you will have to ensure that a C compiler is available on your system.

15
doc/book/inupt.qmd Normal file
View File

@ -0,0 +1,15 @@
# Specifying the data input to *OBITools* commands
## Specifying input format
Five sequence formats are accepted for input files. *Fasta* (@sec-fasta) and *Fastq* (@sec-fastq) are the main ones, EMBL and Genbank allow the use of flat files produced by these two international databases. The last one, ecoPCR, is maintained for compatibility with previous *OBITools* and allows to read *ecoPCR* outputs as sequence files.
- `--ecopcr` : Read data following the *ecoPCR* output format.
- `--embl` Read data following the *EMBL* flatfile format.
- `--genbank` Read data following the *Genbank* flatfile format.
Several encoding schemes have been proposed for quality scores in *Fastq* format. Currently, *OBITools* considers Sanger encoding as the standard. For reasons of compatibility with older datasets produced with *Solexa* sequencers, it is possible, by using the following option, to force the use of the corresponding quality encoding scheme when reading these older files.
- `--solexa` Decodes quality string according to the Solexa specification. (default: false)

31
doc/book/output.qmd Normal file
View File

@ -0,0 +1,31 @@
# Controling OBITools outputs
## Specifying output format
Only two output sequence formats are supported by OBITools, Fasta and Fastq. Fastq is used when output sequences are associated with quality information. Otherwise, Fasta is the default format. However, it is possible to force the output format by using one of the following two options. Forcing the use of Fasta results in the loss of quality information. Conversely, when the Fastq format is forced with sequences that have no quality data, dummy qualities set to 40 for each nucleotide are added.
- `--fasta-output` Read data following the ecoPCR output format.
- `--fastq-output` Read data following the EMBL flatfile format.
OBITools allows multiple input files to be specified for a single command.
- `--no-order` When several input files are provided, indicates that there is no order among them. (default: false).
Using such option can increase a lot the processing of the data.
## The Fasta and Fastq annotations format
OBITools extend the [Fasta](#the-fasta-sequence-format) and [Fastq](#the-fastq-sequence-format) formats by introducing a format for the title lines of these formats allowing to annotate every sequence. While the previous version of OBITools used an *ad-hoc* format for these annotation, this new version introduce the usage of the standard JSON format to store them.
On input, OBITools automatically recognize the format of the annotations, but two options allows to force the parsing following one of them. You should normally not need to use these options.
- `--input-OBI-header` FASTA/FASTQ title line annotations follow OBI format. (default: false)
- `--input-json-header` FASTA/FASTQ title line annotations follow json format. (default: false)
On output, by default annotation are formatted using the new JSON format. For compatibility with previous version of OBITools and with external scripts and software, it is possible to force the usage of the previous OBITools format.
- `--output-OBI-header|-O` output FASTA/FASTQ title line annotations follow OBI format. (default: false)
- `--output-json-header` output FASTA/FASTQ title line annotations follow json format. (default: false)

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 45 KiB

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,12 @@
0 | BCT | Bacteria | |
1 | INV | Invertebrates | |
2 | MAM | Mammals | |
3 | PHG | Phages | |
4 | PLN | Plants and Fungi | |
5 | PRI | Primates | |
6 | ROD | Rodents | |
7 | SYN | Synthetic and Chimeric | |
8 | UNA | Unassigned | No species nodes should inherit this division assignment |
9 | VRL | Viruses | |
10 | VRT | Vertebrates | |
11 | ENV | Environmental samples | Anonymous sequences cloned directly from the environment |

View File

@ -0,0 +1,358 @@
--**************************************************************************
-- This is the NCBI genetic code table
-- Initial base data set from Andrzej Elzanowski while at PIR International
-- Addition of Eubacterial and Alternative Yeast by J.Ostell at NCBI
-- Base 1-3 of each codon have been added as comments to facilitate
-- readability at the suggestion of Peter Rice, EMBL
-- Later additions by Taxonomy Group staff at NCBI
--
-- Version 4.6
-- Renamed genetic code 24 to Rhabdopleuridae Mitochondrial
--
-- Version 4.5
-- Added Cephalodiscidae mitochondrial genetic code 33
--
-- Version 4.4
-- Added GTG as start codon for genetic code 3
-- Added Balanophoraceae plastid genetic code 32
--
-- Version 4.3
-- Change to CTG -> Leu in genetic codes 27, 28, 29, 30
--
-- Version 4.2
-- Added Karyorelict nuclear genetic code 27
-- Added Condylostoma nuclear genetic code 28
-- Added Mesodinium nuclear genetic code 29
-- Added Peritrich nuclear genetic code 30
-- Added Blastocrithidia nuclear genetic code 31
--
-- Version 4.1
-- Added Pachysolen tannophilus nuclear genetic code 26
--
-- Version 4.0
-- Updated version to reflect numerous undocumented changes:
-- Corrected start codons for genetic code 25
-- Name of new genetic code is Candidate Division SR1 and Gracilibacteria
-- Added candidate division SR1 nuclear genetic code 25
-- Added GTG as start codon for genetic code 24
-- Corrected Pterobranchia Mitochondrial genetic code (24)
-- Added genetic code 24, Pterobranchia Mitochondrial
-- Genetic code 11 is now Bacterial, Archaeal and Plant Plastid
-- Fixed capitalization of mitochondrial in codes 22 and 23
-- Added GTG, ATA, and TTG as alternative start codons to code 13
--
-- Version 3.9
-- Code 14 differs from code 9 only by translating UAA to Tyr rather than
-- STOP. A recent study (Telford et al, 2000) has found no evidence that
-- the codon UAA codes for Tyr in the flatworms, but other opinions exist.
-- There are very few GenBank records that are translated with code 14,
-- but a test translation shows that retranslating these records with code
-- 9 can cause premature terminations. Therefore, GenBank will maintain
-- code 14 until further information becomes available.
--
-- Version 3.8
-- Added GTG start to Echinoderm mitochondrial code, code 9
--
-- Version 3.7
-- Added code 23 Thraustochytrium mitochondrial code
-- formerly OGMP code 93
-- submitted by Gertraude Berger, Ph.D.
--
-- Version 3.6
-- Added code 22 TAG-Leu, TCA-stop
-- found in mitochondrial DNA of Scenedesmus obliquus
-- submitted by Gertraude Berger, Ph.D.
-- Organelle Genome Megasequencing Program, Univ Montreal
--
-- Version 3.5
-- Added code 21, Trematode Mitochondrial
-- (as deduced from: Garey & Wolstenholme,1989; Ohama et al, 1990)
-- Added code 16, Chlorophycean Mitochondrial
-- (TAG can translated to Leucine instaed to STOP in chlorophyceans
-- and fungi)
--
-- Version 3.4
-- Added CTG,TTG as allowed alternate start codons in Standard code.
-- Prats et al. 1989, Hann et al. 1992
--
-- Version 3.3 - 10/13/95
-- Added alternate intiation codon ATC to code 5
-- based on complete mitochondrial genome of honeybee
-- Crozier and Crozier (1993)
--
-- Version 3.2 - 6/24/95
-- Code Comments
-- 10 Alternative Ciliate Macronuclear renamed to Euplotid Macro...
-- 15 Blepharisma Macro.. code added
-- 5 Invertebrate Mito.. GTG allowed as alternate initiator
-- 11 Eubacterial renamed to Bacterial as most alternate starts
-- have been found in Archea
--
--
-- Version 3.1 - 1995
-- Updated as per Andrzej Elzanowski at NCBI
-- Complete documentation in NCBI toolkit documentation
-- Note: 2 genetic codes have been deleted
--
-- Old id Use id - Notes
--
-- id 7 id 4 - Kinetoplast code now merged in code id 4
-- id 8 id 1 - all plant chloroplast differences due to RNA edit
--
--
--*************************************************************************
Genetic-code-table ::= {
{
name "Standard" ,
name "SGC0" ,
id 1 ,
ncbieaa "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "---M------**--*----M---------------M----------------------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
},
{
name "Vertebrate Mitochondrial" ,
name "SGC1" ,
id 2 ,
ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG",
sncbieaa "----------**--------------------MMMM----------**---M------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
},
{
name "Yeast Mitochondrial" ,
name "SGC2" ,
id 3 ,
ncbieaa "FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "----------**----------------------MM---------------M------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
},
{
name "Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate
Mitochondrial; Mycoplasma; Spiroplasma" ,
name "SGC3" ,
id 4 ,
ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "--MM------**-------M------------MMMM---------------M------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
},
{
name "Invertebrate Mitochondrial" ,
name "SGC4" ,
id 5 ,
ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG",
sncbieaa "---M------**--------------------MMMM---------------M------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
},
{
name "Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear" ,
name "SGC5" ,
id 6 ,
ncbieaa "FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "--------------*--------------------M----------------------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
},
{
name "Echinoderm Mitochondrial; Flatworm Mitochondrial" ,
name "SGC8" ,
id 9 ,
ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
sncbieaa "----------**-----------------------M---------------M------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
},
{
name "Euplotid Nuclear" ,
name "SGC9" ,
id 10 ,
ncbieaa "FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "----------**-----------------------M----------------------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
},
{
name "Bacterial, Archaeal and Plant Plastid" ,
id 11 ,
ncbieaa "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "---M------**--*----M------------MMMM---------------M------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
},
{
name "Alternative Yeast Nuclear" ,
id 12 ,
ncbieaa "FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "----------**--*----M---------------M----------------------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
},
{
name "Ascidian Mitochondrial" ,
id 13 ,
ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG",
sncbieaa "---M------**----------------------MM---------------M------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
},
{
name "Alternative Flatworm Mitochondrial" ,
id 14 ,
ncbieaa "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
sncbieaa "-----------*-----------------------M----------------------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
} ,
{
name "Blepharisma Macronuclear" ,
id 15 ,
ncbieaa "FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "----------*---*--------------------M----------------------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
} ,
{
name "Chlorophycean Mitochondrial" ,
id 16 ,
ncbieaa "FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "----------*---*--------------------M----------------------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
} ,
{
name "Trematode Mitochondrial" ,
id 21 ,
ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
sncbieaa "----------**-----------------------M---------------M------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
} ,
{
name "Scenedesmus obliquus Mitochondrial" ,
id 22 ,
ncbieaa "FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "------*---*---*--------------------M----------------------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
} ,
{
name "Thraustochytrium Mitochondrial" ,
id 23 ,
ncbieaa "FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "--*-------**--*-----------------M--M---------------M------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
} ,
{
name "Rhabdopleuridae Mitochondrial" ,
id 24 ,
ncbieaa "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG",
sncbieaa "---M------**-------M---------------M---------------M------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
} ,
{
name "Candidate Division SR1 and Gracilibacteria" ,
id 25 ,
ncbieaa "FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "---M------**-----------------------M---------------M------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
} ,
{
name "Pachysolen tannophilus Nuclear" ,
id 26 ,
ncbieaa "FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "----------**--*----M---------------M----------------------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
} ,
{
name "Karyorelict Nuclear" ,
id 27 ,
ncbieaa "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "--------------*--------------------M----------------------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
} ,
{
name "Condylostoma Nuclear" ,
id 28 ,
ncbieaa "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "----------**--*--------------------M----------------------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
} ,
{
name "Mesodinium Nuclear" ,
id 29 ,
ncbieaa "FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "--------------*--------------------M----------------------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
} ,
{
name "Peritrich Nuclear" ,
id 30 ,
ncbieaa "FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "--------------*--------------------M----------------------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
} ,
{
name "Blastocrithidia Nuclear" ,
id 31 ,
ncbieaa "FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "----------**-----------------------M----------------------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
} ,
{
name "Balanophoraceae Plastid" ,
id 32 ,
ncbieaa "FFLLSSSSYY*WCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
sncbieaa "---M------*---*----M------------MMMM---------------M------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
} ,
{
name "Cephalodiscidae Mitochondrial" ,
id 33 ,
ncbieaa "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG",
sncbieaa "---M-------*-------M---------------M---------------M------------"
-- Base1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
-- Base2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
-- Base3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
}
}

View File

@ -0,0 +1,28 @@
0 | | Unspecified | | |
1 | | Standard | FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ---M------**--*----M---------------M---------------------------- |
2 | | Vertebrate Mitochondrial | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG | ----------**--------------------MMMM----------**---M------------ |
3 | | Yeast Mitochondrial | FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ----------**----------------------MM---------------M------------ |
4 | | Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate Mitochondrial; Mycoplasma; Spiroplasma | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | --MM------**-------M------------MMMM---------------M------------ |
5 | | Invertebrate Mitochondrial | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG | ---M------**--------------------MMMM---------------M------------ |
6 | | Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear | FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | --------------*--------------------M---------------------------- |
9 | | Echinoderm Mitochondrial; Flatworm Mitochondrial | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG | ----------**-----------------------M---------------M------------ |
10 | | Euplotid Nuclear | FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ----------**-----------------------M---------------------------- |
11 | | Bacterial, Archaeal and Plant Plastid | FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ---M------**--*----M------------MMMM---------------M------------ |
12 | | Alternative Yeast Nuclear | FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ----------**--*----M---------------M---------------------------- |
13 | | Ascidian Mitochondrial | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG | ---M------**----------------------MM---------------M------------ |
14 | | Alternative Flatworm Mitochondrial | FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG | -----------*-----------------------M---------------------------- |
15 | | Blepharisma Macronuclear | FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ----------*---*--------------------M---------------------------- |
16 | | Chlorophycean Mitochondrial | FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ----------*---*--------------------M---------------------------- |
21 | | Trematode Mitochondrial | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG | ----------**-----------------------M---------------M------------ |
22 | | Scenedesmus obliquus mitochondrial | FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ------*---*---*--------------------M---------------------------- |
23 | | Thraustochytrium mitochondrial code | FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | --*-------**--*-----------------M--M---------------M------------ |
24 | | Rhabdopleuridae Mitochondrial | FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG | ---M------**-------M---------------M---------------M------------ |
25 | | Candidate Division SR1 and Gracilibacteria | FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ---M------**-----------------------M---------------M------------ |
26 | | Pachysolen tannophilus Nuclear | FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ----------**--*----M---------------M---------------------------- |
27 | | Karyorelict Nuclear | FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | --------------*--------------------M---------------------------- |
28 | | Condylostoma Nuclear | FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ----------**--*--------------------M---------------------------- |
29 | | Mesodinium Nuclear | FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | --------------*--------------------M---------------------------- |
30 | | Peritrich Nuclear | FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | --------------*--------------------M---------------------------- |
31 | | Blastocrithidia Nuclear | FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ----------**-----------------------M---------------------------- |
32 | | Balanophoraceae Plastid | FFLLSSSSYY*WCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG | ---M------*---*----M------------MMMM---------------M------------ |
33 | | Cephalodiscidae Mitochondrial | FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG | ---M-------*-------M---------------M---------------M------------ |

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1 @@
53f7047c1752a90d7d6976d31dd8bac859fe2cb9

View File

@ -0,0 +1 @@
b6c19d7e4a5e085a1d27d1a893606880592ea1b5

View File

@ -0,0 +1,61 @@
*.dmp files are bcp-like dump from GenBank taxonomy database.
General information.
Field terminator is "\t|\t"
Row terminator is "\t|\n"
nodes.dmp file consists of taxonomy nodes. The description for each node includes the following
fields:
tax_id -- node id in GenBank taxonomy database
parent tax_id -- parent node id in GenBank taxonomy database
rank -- rank of this node (superkingdom, kingdom, ...)
embl code -- locus-name prefix; not unique
division id -- see division.dmp file
inherited div flag (1 or 0) -- 1 if node inherits division from parent
genetic code id -- see gencode.dmp file
inherited GC flag (1 or 0) -- 1 if node inherits genetic code from parent
mitochondrial genetic code id -- see gencode.dmp file
inherited MGC flag (1 or 0) -- 1 if node inherits mitochondrial gencode from parent
GenBank hidden flag (1 or 0) -- 1 if name is suppressed in GenBank entry lineage
hidden subtree root flag (1 or 0) -- 1 if this subtree has no sequence data yet
comments -- free-text comments and citations
Taxonomy names file (names.dmp):
tax_id -- the id of node associated with this name
name_txt -- name itself
unique name -- the unique variant of this name if name not unique
name class -- (synonym, common name, ...)
Divisions file (division.dmp):
division id -- taxonomy database division id
division cde -- GenBank division code (three characters)
division name -- e.g. BCT, PLN, VRT, MAM, PRI...
comments
Genetic codes file (gencode.dmp):
genetic code id -- GenBank genetic code id
abbreviation -- genetic code name abbreviation
name -- genetic code name
cde -- translation table for this genetic code
starts -- start codons for this genetic code
Deleted nodes file (delnodes.dmp):
tax_id -- deleted node id
Merged nodes file (merged.dmp):
old_tax_id -- id of nodes which has been merged
new_tax_id -- id of nodes which is result of merging
Citations file (citations.dmp):
cit_id -- the unique id of citation
cit_key -- citation key
pubmed_id -- unique id in PubMed database (0 if not in PubMed)
medline_id -- unique id in MedLine database (0 if not in MedLine)
url -- URL associated with citation
text -- any text (usually article name and authors).
-- The following characters are escaped in this text by a backslash:
-- newline (appear as "\n"),
-- tab character ("\t"),
-- double quotes ('\"'),
-- backslash character ("\\").
taxid_list -- list of node ids separated by a single space

View File

@ -0,0 +1,29 @@
#!/bin/bash
URL=https://ftp.ncbi.nlm.nih.gov/genbank/
DIV="bct|inv|mam|phg|pln|pri|rod|vrl|vrt"
GB_Release_Number=$(curl "${URL}GB_Release_Number")
mkdir -p "Release-${GB_Release_Number}"
cd "Release-${GB_Release_Number}"
curl $URL > index.html
for f in $(egrep "gb(${DIV})[0-9]+\.seq\.gz" index.html \
| sed -E 's@^.*<a href="([^"]+)">.*</a>.*$@\1@' ) ; do
echo -n "File : $f"
if [[ -f $f ]] ; then
gzip -t $f && echo " ok" || rm -f $f
fi
while [[ ! -f $f ]] ; do
echo downloading
wget2 --progress bar -v -o - $URL$f
if [[ -f $f ]] ; then
gzip -t $f && echo " ok" || rm -f $f
fi
done
done

View File

@ -0,0 +1,126 @@
#!/bin/bash
#
# Used resources URLs
#
NCBIURL="https://ftp.ncbi.nlm.nih.gov/" # NCBI Web site URL
GBURL="${NCBIURL}genbank/" # Directory of Genbank flat files
TAXOURL="${NCBIURL}pub/taxonomy/taxdump.tar.gz" # NCBI Taxdump
LOGFILE="download.log"
#
# List of downloaded Genbank divisions
#
DIV="bct|inv|mam|phg|pln|pri|rod|vrl|vrt"
############################
#
# Functions
#
############################
pattern_at_rank() {
local taxo="$1"
local rank="$2"
echo "^($(awk -F "|" -v rank="$rank" 'BEGIN {
ORS="|";
rank="\t" rank "\t"
}
($3 ~ rank) {sub(/^[ \t]+/,"",$1);
sub(/[ \t]+$/,"",$1);
print $1}
' "${taxo}/nodes.dmp" \
| sed 's/|$//'))$"
}
#
# Extrate from the web site the current Genbank release number
# end create the corresponding directory
#
echo "Looking at current Genbank release number"
GB_Release_Number=$(curl "${GBURL}GB_Release_Number")
echo "identified release number is : ${GB_Release_Number}"
mkdir -p "Release-${GB_Release_Number}"
cd "Release-${GB_Release_Number}" || exit
#
# Download the current NCBI taxonomy
#
mkdir -p "ncbitaxo"
if [[ ! -f ncbitaxo/nodes.dmp ]] || [[ ! -f ncbitaxo/names.dmp ]] ; then
curl "${TAXOURL}" \
| tar -C "ncbitaxo" -zxf -
fi
curl $GBURL > index.html
for f in $(grep -E "gb(${DIV})[0-9]+\.seq\.gz" index.html \
| sed -E 's@^.*<a href="([^"]+)">.*</a>.*$@\1@' ) ; do
fasta=${f/seq.gz/fasta}
stamp=${f/seq.gz/stamp}
echo "File : $f saved into $fasta"
rm -f ${fasta}.downloading
while [[ ! -f "stamp/${stamp}" ]] ; do
status=""
wget "${GBURL}${f}" && \
status=$( ( (gzip -dc "$f" 2>> "$LOGFILE" || echo "Unzipping error" 1>&2) \
| (obiannotate --genbank -t ncbitaxo \
--with-taxon-at-rank kingdom \
--with-taxon-at-rank superkingdom \
--with-taxon-at-rank phylum\
--with-taxon-at-rank order \
--with-taxon-at-rank family \
--with-taxon-at-rank genus \
-S division='"misc-@-0"' \
-S section='"misc-@-0"' \
2>> "$LOGFILE" || echo "Fasta conversion error" 1>&2) \
| (obigrep -A genus_taxid -A family_taxid 2>> "$LOGFILE" \
| obigrep -p 'annotations.genus_taxid > 0 && annotations.family_taxid > 0' \
-p 'annotations.phylum_taxid > 0 || annotations.order_taxid > 0' \
2>> "$LOGFILE" || echo "Fasta filtering error" 1>&2) \
| (obiannotate -p 'annotations.superkingdom_taxid > 0' \
-S division='printf("%s-S-%d",subspc(annotations.superkingdom_name),annotations.superkingdom_taxid)' \
2>> "$LOGFILE" || echo "Fasta annotation error" 1>&2) \
| (obiannotate -p 'annotations.kingdom_taxid > 0' \
-S division='printf("%s-K-%d",subspc(annotations.kingdom_name),annotations.kingdom_taxid)' \
2>> "$LOGFILE" || echo "Fasta annotation error" 1>&2) \
| (obiannotate -p 'annotations.phylum_taxid > 0' \
-S section='printf("%s-P-%d",subspc(annotations.phylum_name),annotations.phylum_taxid)' \
2>> "$LOGFILE" || echo "Fasta annotation error" 1>&2) \
| (obiannotate -p 'annotations.order_taxid > 0' \
-S section='printf("%s-O-%d",subspc(annotations.order_name),annotations.order_taxid)' \
2>> "$LOGFILE" || echo "Fasta annotation error" 1>&2) > "${fasta}.downloading") 2>&1 )
echo
rm -f "${f}"
if [[ -z "$status" ]] ; then
echo "Downloading of $f succeded ($(obicount -v "${fasta}.downloading" 2>/dev/null) sequences)"
mv "${fasta}.downloading" "${fasta}"
mkdir -p stamp
touch "stamp/${stamp}"
else
echo "Downloading of $f failed"
echo "$status"
rm -f "${fasta}"
rm -f "${fasta}.downloading"
fi
done
if [[ -f "$fasta" ]] ; then
obidistribute -Z -A -p "%s.fasta" -c section -d division "$fasta"
rm -f "$fasta"
fi
done

Binary file not shown.

View File

@ -0,0 +1,4 @@
wolf_diet 13a_F730603 aattaac TTAGATACCCCACTATGC TAGAACAGGCTCCTCTAG F @
wolf_diet 15a_F730814 gaagtag TTAGATACCCCACTATGC TAGAACAGGCTCCTCTAG F @
wolf_diet 26a_F040644 gaatatc TTAGATACCCCACTATGC TAGAACAGGCTCCTCTAG F @
wolf_diet 29a_F260619 gcctcct TTAGATACCCCACTATGC TAGAACAGGCTCCTCTAG F @

BIN
doc/build/_book/OBITools-V4.epub vendored Normal file

Binary file not shown.

BIN
doc/build/_book/OBITools-V4.pdf vendored Normal file

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More