mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 08:10:45 +00:00
Complete the documentation and add a Release note file
This commit is contained in:
29
Release-notes.md
Normal file
29
Release-notes.md
Normal file
@ -0,0 +1,29 @@
|
||||
# OBITools release notes
|
||||
|
||||
## February $18^th$, 2023. Release 4.0.0
|
||||
|
||||
It is the first version of the *OBITools* version 4. I decided to tag then following two weeks
|
||||
of intensive data analysis with them allowing to discover many small bugs present in the previous
|
||||
non-official version. Obviously other bugs are certainly persent in the code, and you are welcome
|
||||
to use the git ticket system to mention them. But they seems to produce now reliable results.
|
||||
|
||||
### Corrected bugs
|
||||
|
||||
- On some computers the end of the output file was lost, leading to the loose of sequences and
|
||||
to the production of incorrect file because of the last sequence record, sometime truncated in
|
||||
its middle. This was only occurring when more than a single CPU was used. It was affecting every obitools.
|
||||
- The `obiparing` software had a bug in the right aligment procedure. This led to the non alignment
|
||||
of very sort barcode during the paring of the forward and reverse reads.
|
||||
- The `obipairing` tools had a non deterministic comportment when aligning a paor very low quality reads.
|
||||
This induced that the result of the same low quality read pair was not the same from run to run.
|
||||
|
||||
### New functionality
|
||||
|
||||
- Adding of a `--compress|-Z` option to every obitools allowing to produce `gz` compressed output. OBITools
|
||||
were already able to deal with gziped input files transparently. They can now produce their résults in the same format.
|
||||
- Adding of a `--append|-A` option to the `obidistribute` tool. It allows to append the result of an
|
||||
`obidistribute` execution to preexisting files.
|
||||
- Adding of a `--directory|-d` option to the `obidistribute` tool. It allows to declare a secondary
|
||||
classification key over the one defined by the '--category|-c` option. This extra key leads to produce
|
||||
directories in which files produced according to the primary criterion are stored.
|
||||
- Adding of the functions `subspc`, `printf`, `int`, `numeric`, and `bool` to the expression language.
|
@ -16,6 +16,7 @@ func main() {
|
||||
_, args, _ := optionParser(os.Args)
|
||||
|
||||
fs, _ := obiconvert.ReadBioSequences(args...)
|
||||
|
||||
obidistribute.DistributeSequence(fs)
|
||||
|
||||
obiiter.WaitForLastPipe()
|
||||
|
@ -20,6 +20,69 @@ ul.task-list li input[type="checkbox"] {
|
||||
margin: 0 0.8em 0.2em -1.6em;
|
||||
vertical-align: middle;
|
||||
}
|
||||
pre > code.sourceCode { white-space: pre; position: relative; }
|
||||
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
|
||||
pre > code.sourceCode > span:empty { height: 1.2em; }
|
||||
.sourceCode { overflow: visible; }
|
||||
code.sourceCode > span { color: inherit; text-decoration: inherit; }
|
||||
div.sourceCode { margin: 1em 0; }
|
||||
pre.sourceCode { margin: 0; }
|
||||
@media screen {
|
||||
div.sourceCode { overflow: auto; }
|
||||
}
|
||||
@media print {
|
||||
pre > code.sourceCode { white-space: pre-wrap; }
|
||||
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
|
||||
}
|
||||
pre.numberSource code
|
||||
{ counter-reset: source-line 0; }
|
||||
pre.numberSource code > span
|
||||
{ position: relative; left: -4em; counter-increment: source-line; }
|
||||
pre.numberSource code > span > a:first-child::before
|
||||
{ content: counter(source-line);
|
||||
position: relative; left: -1em; text-align: right; vertical-align: baseline;
|
||||
border: none; display: inline-block;
|
||||
-webkit-touch-callout: none; -webkit-user-select: none;
|
||||
-khtml-user-select: none; -moz-user-select: none;
|
||||
-ms-user-select: none; user-select: none;
|
||||
padding: 0 4px; width: 4em;
|
||||
color: #aaaaaa;
|
||||
}
|
||||
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; }
|
||||
div.sourceCode
|
||||
{ }
|
||||
@media screen {
|
||||
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
|
||||
}
|
||||
code span.al { color: #ff0000; font-weight: bold; } /* Alert */
|
||||
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
|
||||
code span.at { color: #7d9029; } /* Attribute */
|
||||
code span.bn { color: #40a070; } /* BaseN */
|
||||
code span.bu { color: #008000; } /* BuiltIn */
|
||||
code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
|
||||
code span.ch { color: #4070a0; } /* Char */
|
||||
code span.cn { color: #880000; } /* Constant */
|
||||
code span.co { color: #60a0b0; font-style: italic; } /* Comment */
|
||||
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
|
||||
code span.do { color: #ba2121; font-style: italic; } /* Documentation */
|
||||
code span.dt { color: #902000; } /* DataType */
|
||||
code span.dv { color: #40a070; } /* DecVal */
|
||||
code span.er { color: #ff0000; font-weight: bold; } /* Error */
|
||||
code span.ex { } /* Extension */
|
||||
code span.fl { color: #40a070; } /* Float */
|
||||
code span.fu { color: #06287e; } /* Function */
|
||||
code span.im { color: #008000; font-weight: bold; } /* Import */
|
||||
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
|
||||
code span.kw { color: #007020; font-weight: bold; } /* Keyword */
|
||||
code span.op { color: #666666; } /* Operator */
|
||||
code span.ot { color: #007020; } /* Other */
|
||||
code span.pp { color: #bc7a00; } /* Preprocessor */
|
||||
code span.sc { color: #4070a0; } /* SpecialChar */
|
||||
code span.ss { color: #bb6688; } /* SpecialString */
|
||||
code span.st { color: #4070a0; } /* String */
|
||||
code span.va { color: #19177c; } /* Variable */
|
||||
code span.vs { color: #4070a0; } /* VerbatimString */
|
||||
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
|
||||
div.csl-bib-body { }
|
||||
div.csl-entry {
|
||||
clear: both;
|
||||
@ -175,16 +238,36 @@ div.csl-indent {
|
||||
<li><a href="#function-defined-in-the-language" id="toc-function-defined-in-the-language" class="nav-link" data-scroll-target="#function-defined-in-the-language"><span class="toc-section-number">4.3.2</span> Function defined in the language</a></li>
|
||||
<li><a href="#accessing-to-the-sequence-annotations" id="toc-accessing-to-the-sequence-annotations" class="nav-link" data-scroll-target="#accessing-to-the-sequence-annotations"><span class="toc-section-number">4.3.3</span> Accessing to the sequence annotations</a></li>
|
||||
</ul></li>
|
||||
<li><a href="#metabarcode-design-and-quality-assessment" id="toc-metabarcode-design-and-quality-assessment" class="nav-link" data-scroll-target="#metabarcode-design-and-quality-assessment"><span class="toc-section-number">4.4</span> Metabarcode design and quality assessment</a></li>
|
||||
<li><a href="#file-format-conversions" id="toc-file-format-conversions" class="nav-link" data-scroll-target="#file-format-conversions"><span class="toc-section-number">4.5</span> File format conversions</a></li>
|
||||
<li><a href="#sequence-annotations" id="toc-sequence-annotations" class="nav-link" data-scroll-target="#sequence-annotations"><span class="toc-section-number">4.6</span> Sequence annotations</a></li>
|
||||
<li><a href="#metabarcode-design-and-quality-assessment" id="toc-metabarcode-design-and-quality-assessment" class="nav-link" data-scroll-target="#metabarcode-design-and-quality-assessment"><span class="toc-section-number">4.4</span> Metabarcode design and quality assessment</a>
|
||||
<ul class="collapse">
|
||||
<li><a href="#obipcr" id="toc-obipcr" class="nav-link" data-scroll-target="#obipcr"><span class="toc-section-number">4.4.1</span> <code>obipcr</code></a></li>
|
||||
</ul></li>
|
||||
<li><a href="#file-format-conversions" id="toc-file-format-conversions" class="nav-link" data-scroll-target="#file-format-conversions"><span class="toc-section-number">4.5</span> File format conversions</a>
|
||||
<ul class="collapse">
|
||||
<li><a href="#obiconvert" id="toc-obiconvert" class="nav-link" data-scroll-target="#obiconvert"><span class="toc-section-number">4.5.1</span> <code>obiconvert</code></a></li>
|
||||
</ul></li>
|
||||
<li><a href="#sequence-annotations" id="toc-sequence-annotations" class="nav-link" data-scroll-target="#sequence-annotations"><span class="toc-section-number">4.6</span> Sequence annotations</a>
|
||||
<ul class="collapse">
|
||||
<li><a href="#obiannotate" id="toc-obiannotate" class="nav-link" data-scroll-target="#obiannotate"><span class="toc-section-number">4.6.1</span> <code>obiannotate</code></a></li>
|
||||
<li><a href="#obitag" id="toc-obitag" class="nav-link" data-scroll-target="#obitag"><span class="toc-section-number">4.6.2</span> <code>obitag</code></a></li>
|
||||
</ul></li>
|
||||
<li><a href="#computations-on-sequences" id="toc-computations-on-sequences" class="nav-link" data-scroll-target="#computations-on-sequences"><span class="toc-section-number">4.7</span> Computations on sequences</a>
|
||||
<ul class="collapse">
|
||||
<li><a href="#obipairing" id="toc-obipairing" class="nav-link" data-scroll-target="#obipairing"><span class="toc-section-number">4.7.1</span> <code>obipairing</code></a></li>
|
||||
<li><a href="#obimultiplex" id="toc-obimultiplex" class="nav-link" data-scroll-target="#obimultiplex"><span class="toc-section-number">4.7.2</span> <code>obimultiplex</code></a></li>
|
||||
<li><a href="#obicomplement" id="toc-obicomplement" class="nav-link" data-scroll-target="#obicomplement"><span class="toc-section-number">4.7.3</span> <code>obicomplement</code></a></li>
|
||||
<li><a href="#obiclean" id="toc-obiclean" class="nav-link" data-scroll-target="#obiclean"><span class="toc-section-number">4.7.4</span> <code>obiclean</code></a></li>
|
||||
<li><a href="#obiuniq" id="toc-obiuniq" class="nav-link" data-scroll-target="#obiuniq"><span class="toc-section-number">4.7.5</span> <code>obiuniq</code></a></li>
|
||||
</ul></li>
|
||||
<li><a href="#sequence-sampling-and-filtering" id="toc-sequence-sampling-and-filtering" class="nav-link" data-scroll-target="#sequence-sampling-and-filtering"><span class="toc-section-number">4.8</span> Sequence sampling and filtering</a>
|
||||
<ul class="collapse">
|
||||
<li><a href="#utilities" id="toc-utilities" class="nav-link" data-scroll-target="#utilities"><span class="toc-section-number">4.8.1</span> Utilities</a></li>
|
||||
<li><a href="#obigrep" id="toc-obigrep" class="nav-link" data-scroll-target="#obigrep"><span class="toc-section-number">4.8.1</span> <code>obigrep</code></a></li>
|
||||
</ul></li>
|
||||
<li><a href="#utilities" id="toc-utilities" class="nav-link" data-scroll-target="#utilities"><span class="toc-section-number">4.9</span> Utilities</a>
|
||||
<ul class="collapse">
|
||||
<li><a href="#obicount" id="toc-obicount" class="nav-link" data-scroll-target="#obicount"><span class="toc-section-number">4.9.1</span> <code>obicount</code></a></li>
|
||||
<li><a href="#obidistribute" id="toc-obidistribute" class="nav-link" data-scroll-target="#obidistribute"><span class="toc-section-number">4.9.2</span> <code>obidistribute</code></a></li>
|
||||
<li><a href="#obifind" id="toc-obifind" class="nav-link" data-scroll-target="#obifind"><span class="toc-section-number">4.9.3</span> <code>obifind</code></a></li>
|
||||
</ul></li>
|
||||
</ul>
|
||||
</nav>
|
||||
@ -268,40 +351,52 @@ div.csl-indent {
|
||||
<p>Several OBITools (<em>e.g.</em> obigrep, obiannotate) allow the user to specify some simple expressions to compute values or define predicates. This expressions are parsed and evaluated using the <a href="https://pkg.go.dev/github.com/PaesslerAG/gval" title="Gval (Go eVALuate) for evaluating arbitrary expressions Go-like expressions.">gval</a> go package, which allows for evaluating go-Like expression.</p>
|
||||
<section id="variables-usable-in-the-expression" class="level3" data-number="4.3.1">
|
||||
<h3 data-number="4.3.1" class="anchored" data-anchor-id="variables-usable-in-the-expression"><span class="header-section-number">4.3.1</span> Variables usable in the expression</h3>
|
||||
<section id="sequence" class="level4" data-number="4.3.1.1">
|
||||
<h4 data-number="4.3.1.1" class="anchored" data-anchor-id="sequence"><span class="header-section-number">4.3.1.1</span> sequence</h4>
|
||||
<p>sequence is the sequence object on which the expression is evaluated</p>
|
||||
</section>
|
||||
<section id="annotation" class="level4" data-number="4.3.1.2">
|
||||
<h4 data-number="4.3.1.2" class="anchored" data-anchor-id="annotation"><span class="header-section-number">4.3.1.2</span> annotation</h4>
|
||||
</section>
|
||||
<ul>
|
||||
<li><code>sequence</code> is the sequence object on which the expression is evaluated.</li>
|
||||
<li><code>annotations</code>is a map object containing every annotations associated to the currently processed sequence.</li>
|
||||
</ul>
|
||||
</section>
|
||||
<section id="function-defined-in-the-language" class="level3" data-number="4.3.2">
|
||||
<h3 data-number="4.3.2" class="anchored" data-anchor-id="function-defined-in-the-language"><span class="header-section-number">4.3.2</span> Function defined in the language</h3>
|
||||
<section id="len" class="level4" data-number="4.3.2.1">
|
||||
<h4 data-number="4.3.2.1" class="anchored" data-anchor-id="len"><span class="header-section-number">4.3.2.1</span> len</h4>
|
||||
<section id="instrospection-functions" class="level4 unnumbered">
|
||||
<h4 class="unnumbered anchored" data-anchor-id="instrospection-functions">Instrospection functions</h4>
|
||||
<ul>
|
||||
<li><code>len(x)</code>is a generic function allowing to retreive the size of a object. It returns the length of a sequences, the number of element in a map like <code>annotations</code>, the number of elements in an array. The reurned value is an <code>int</code>.</li>
|
||||
</ul>
|
||||
</section>
|
||||
<section id="ismap" class="level4" data-number="4.3.2.2">
|
||||
<h4 data-number="4.3.2.2" class="anchored" data-anchor-id="ismap"><span class="header-section-number">4.3.2.2</span> ismap</h4>
|
||||
<section id="cast-functions" class="level4 unnumbered">
|
||||
<h4 class="unnumbered anchored" data-anchor-id="cast-functions">Cast functions</h4>
|
||||
<ul>
|
||||
<li><code>int(x)</code> converts if possible the <code>x</code> value to an integer value. The function returns an <code>int</code>.</li>
|
||||
<li><code>numeric(x)</code> converts if possible the <code>x</code> value to a float value. The function returns a <code>float</code>.</li>
|
||||
<li><code>bool(x)</code> converts if possible the <code>x</code> value to a boolean value. The function returns a <code>bool</code>.</li>
|
||||
</ul>
|
||||
</section>
|
||||
<section id="hasattribute" class="level4" data-number="4.3.2.3">
|
||||
<h4 data-number="4.3.2.3" class="anchored" data-anchor-id="hasattribute"><span class="header-section-number">4.3.2.3</span> hasattribute</h4>
|
||||
</section>
|
||||
<section id="min" class="level4" data-number="4.3.2.4">
|
||||
<h4 data-number="4.3.2.4" class="anchored" data-anchor-id="min"><span class="header-section-number">4.3.2.4</span> min</h4>
|
||||
</section>
|
||||
<section id="max" class="level4" data-number="4.3.2.5">
|
||||
<h4 data-number="4.3.2.5" class="anchored" data-anchor-id="max"><span class="header-section-number">4.3.2.5</span> max</h4>
|
||||
<section id="string-related-functions" class="level4 unnumbered">
|
||||
<h4 class="unnumbered anchored" data-anchor-id="string-related-functions">String related functions</h4>
|
||||
<ul>
|
||||
<li><code>printf(format,...)</code> allows to combine several values to build a string. <code>format</code> follows the classical C <code>printf</code> syntax. The function returns a <code>string</code>.</li>
|
||||
<li><code>subspc(x)</code> substitutes every space in the <code>x</code> string by the underscore (<code>_</code>) character. The function returns a <code>string</code>.</li>
|
||||
</ul>
|
||||
</section>
|
||||
</section>
|
||||
<section id="accessing-to-the-sequence-annotations" class="level3" data-number="4.3.3">
|
||||
<h3 data-number="4.3.3" class="anchored" data-anchor-id="accessing-to-the-sequence-annotations"><span class="header-section-number">4.3.3</span> Accessing to the sequence annotations</h3>
|
||||
<p>The <code>annotations</code> variable is a map object containing all the annotations associated to the currently processed sequence. Index of the map are the attribute names. It exists to possibillities to retreive an annotation. It is possible to use the classical <code>[]</code> indexing operator, putting the attribute name quoted by double quotes between them.</p>
|
||||
<div class="sourceCode" id="cb1"><pre class="sourceCode go code-with-copy"><code class="sourceCode go"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a>annotations<span class="op">[</span><span class="st">"direction"</span><span class="op">]</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<p>The above code retreives the <code>direction</code> annotation. A second notation using the dot (<code>.</code>) is often more convenient.</p>
|
||||
<div class="sourceCode" id="cb2"><pre class="sourceCode go code-with-copy"><code class="sourceCode go"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>annotations<span class="op">.</span>direction</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<p>Special attributes of the sequence are accessible only by dedicated methods of the <code>sequence</code> object.</p>
|
||||
<ul>
|
||||
<li>The sequence identifier : <code>Id()</code></li>
|
||||
<li>THe sequence definition : <code>Definition()</code></li>
|
||||
</ul>
|
||||
</section>
|
||||
</section>
|
||||
<section id="metabarcode-design-and-quality-assessment" class="level2" data-number="4.4">
|
||||
<h2 data-number="4.4" class="anchored" data-anchor-id="metabarcode-design-and-quality-assessment"><span class="header-section-number">4.4</span> Metabarcode design and quality assessment</h2>
|
||||
<section id="obipcr" class="level4" data-number="4.4.0.1">
|
||||
<h4 data-number="4.4.0.1" class="anchored" data-anchor-id="obipcr"><span class="header-section-number">4.4.0.1</span> <code>obipcr</code></h4>
|
||||
<section id="obipcr" class="level3" data-number="4.4.1">
|
||||
<h3 data-number="4.4.1" class="anchored" data-anchor-id="obipcr"><span class="header-section-number">4.4.1</span> <code>obipcr</code></h3>
|
||||
<blockquote class="blockquote">
|
||||
<p>Replace the <code>ecoPCR</code> original <em>OBITools</em></p>
|
||||
</blockquote>
|
||||
@ -309,14 +404,17 @@ div.csl-indent {
|
||||
</section>
|
||||
<section id="file-format-conversions" class="level2" data-number="4.5">
|
||||
<h2 data-number="4.5" class="anchored" data-anchor-id="file-format-conversions"><span class="header-section-number">4.5</span> File format conversions</h2>
|
||||
<section id="obiconvert" class="level4" data-number="4.5.0.1">
|
||||
<h4 data-number="4.5.0.1" class="anchored" data-anchor-id="obiconvert"><span class="header-section-number">4.5.0.1</span> <code>obiconvert</code></h4>
|
||||
<section id="obiconvert" class="level3" data-number="4.5.1">
|
||||
<h3 data-number="4.5.1" class="anchored" data-anchor-id="obiconvert"><span class="header-section-number">4.5.1</span> <code>obiconvert</code></h3>
|
||||
</section>
|
||||
</section>
|
||||
<section id="sequence-annotations" class="level2" data-number="4.6">
|
||||
<h2 data-number="4.6" class="anchored" data-anchor-id="sequence-annotations"><span class="header-section-number">4.6</span> Sequence annotations</h2>
|
||||
<section id="obitag" class="level4" data-number="4.6.0.1">
|
||||
<h4 data-number="4.6.0.1" class="anchored" data-anchor-id="obitag"><span class="header-section-number">4.6.0.1</span> <code>obitag</code></h4>
|
||||
<section id="obiannotate" class="level3" data-number="4.6.1">
|
||||
<h3 data-number="4.6.1" class="anchored" data-anchor-id="obiannotate"><span class="header-section-number">4.6.1</span> <code>obiannotate</code></h3>
|
||||
</section>
|
||||
<section id="obitag" class="level3" data-number="4.6.2">
|
||||
<h3 data-number="4.6.2" class="anchored" data-anchor-id="obitag"><span class="header-section-number">4.6.2</span> <code>obitag</code></h3>
|
||||
</section>
|
||||
</section>
|
||||
<section id="computations-on-sequences" class="level2" data-number="4.7">
|
||||
@ -326,15 +424,15 @@ div.csl-indent {
|
||||
<blockquote class="blockquote">
|
||||
<p>Replace the <code>illuminapairedends</code> original <em>OBITools</em></p>
|
||||
</blockquote>
|
||||
<section id="alignment-procedure" class="level4" data-number="4.7.1.1">
|
||||
<h4 data-number="4.7.1.1" class="anchored" data-anchor-id="alignment-procedure"><span class="header-section-number">4.7.1.1</span> Alignment procedure</h4>
|
||||
<section id="alignment-procedure" class="level4 unnumbered">
|
||||
<h4 class="unnumbered anchored" data-anchor-id="alignment-procedure">Alignment procedure</h4>
|
||||
<p><code>obipairing</code> is introducing a new alignment algorithm compared to the <code>illuminapairedend</code> command of the <code>OBITools V2</code>. Nethertheless this new algorithm has been design to produce the same results than the previous, except in very few cases.</p>
|
||||
<p>The new algorithm is a two-step procedure. First, a FASTN-type algorithm <span class="citation" data-cites="Lipman1985-hw">(<a href="references.html#ref-Lipman1985-hw" role="doc-biblioref">Lipman and Pearson 1985</a>)</span> identifies the best offset between the two matched readings. This identifies the region of overlap.</p>
|
||||
<p>In the second step, the matching regions of the two reads are extracted along with a flanking sequence of <span class="math inline">\(\Delta\)</span> base pairs. The two subsequences are then aligned using a “one side free end-gap” dynamic programming algorithm. This latter step is only called if at least one mismatch is detected by the FASTP step.</p>
|
||||
<p>Unless the similarity between the two reads at their overlap region is very low, the addition of the flanking regions in the second step of the alignment ensures the same alignment as if the dynamic programming alignment was performed on the full reads.</p>
|
||||
</section>
|
||||
<section id="the-scoring-system" class="level4" data-number="4.7.1.2">
|
||||
<h4 data-number="4.7.1.2" class="anchored" data-anchor-id="the-scoring-system"><span class="header-section-number">4.7.1.2</span> The scoring system</h4>
|
||||
<section id="the-scoring-system" class="level4 unnumbered">
|
||||
<h4 class="unnumbered anchored" data-anchor-id="the-scoring-system">The scoring system</h4>
|
||||
<p>In the dynamic programming step, the match and mismatch scores take into account the quality scores of the two aligned nucleotides. By taking these into account, the probability of a true match can be calculated for each aligned base pair.</p>
|
||||
<p>If we consider a nucleotide read with a quality score <span class="math inline">\(Q\)</span>, the probability of misreading this base (<span class="math inline">\(P_E\)</span>) is : <span class="math display">\[
|
||||
P_E = 10^{-\frac{Q}{10}}
|
||||
@ -388,6 +486,12 @@ P(MATCH | X_1 \neq X_2) = (1-P_{E1})\frac{P_{E2}}{3} + (1-P_{E2})\frac{P_{E1}}
|
||||
\end{aligned}
|
||||
\]</span></p>
|
||||
<p><strong>Probability of a match under the random model</strong></p>
|
||||
<p>The second considered model is a pure random model where every base is equiprobable, hence having a probability of occurrence of a nucleotide equals <span class="math inline">\(0.25\)</span>. Under that hypothesis</p>
|
||||
<p><span class="math display">\[
|
||||
P(MATCH | \text{Random model}) = 0.25
|
||||
\]</span></p>
|
||||
<p><strong>The score is a log ration of likelyhood</strong></p>
|
||||
<p>Score is define as the logarithm of the ratio between the likelyhood of the observations considering the sequencer error model over tha likelyhood u</p>
|
||||
<div class="cell">
|
||||
<div class="cell-output-display">
|
||||
<div class="quarto-figure quarto-figure-center">
|
||||
@ -399,38 +503,49 @@ P(MATCH | X_1 \neq X_2) = (1-P_{E1})\frac{P_{E2}}{3} + (1-P_{E2})\frac{P_{E1}}
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="obimultiplex" class="level4" data-number="4.7.1.3">
|
||||
<h4 data-number="4.7.1.3" class="anchored" data-anchor-id="obimultiplex"><span class="header-section-number">4.7.1.3</span> <code>obimultiplex</code></h4>
|
||||
</section>
|
||||
<section id="obimultiplex" class="level3" data-number="4.7.2">
|
||||
<h3 data-number="4.7.2" class="anchored" data-anchor-id="obimultiplex"><span class="header-section-number">4.7.2</span> <code>obimultiplex</code></h3>
|
||||
<blockquote class="blockquote">
|
||||
<p>Replace the <code>ngsfilter</code> original <em>OBITools</em></p>
|
||||
</blockquote>
|
||||
</section>
|
||||
<section id="obicomplement" class="level4" data-number="4.7.1.4">
|
||||
<h4 data-number="4.7.1.4" class="anchored" data-anchor-id="obicomplement"><span class="header-section-number">4.7.1.4</span> <code>obicomplement</code></h4>
|
||||
<section id="obicomplement" class="level3" data-number="4.7.3">
|
||||
<h3 data-number="4.7.3" class="anchored" data-anchor-id="obicomplement"><span class="header-section-number">4.7.3</span> <code>obicomplement</code></h3>
|
||||
</section>
|
||||
<section id="obiclean" class="level4" data-number="4.7.1.5">
|
||||
<h4 data-number="4.7.1.5" class="anchored" data-anchor-id="obiclean"><span class="header-section-number">4.7.1.5</span> <code>obiclean</code></h4>
|
||||
</section>
|
||||
<section id="obiuniq" class="level4" data-number="4.7.1.6">
|
||||
<h4 data-number="4.7.1.6" class="anchored" data-anchor-id="obiuniq"><span class="header-section-number">4.7.1.6</span> <code>obiuniq</code></h4>
|
||||
<section id="obiclean" class="level3" data-number="4.7.4">
|
||||
<h3 data-number="4.7.4" class="anchored" data-anchor-id="obiclean"><span class="header-section-number">4.7.4</span> <code>obiclean</code></h3>
|
||||
</section>
|
||||
<section id="obiuniq" class="level3" data-number="4.7.5">
|
||||
<h3 data-number="4.7.5" class="anchored" data-anchor-id="obiuniq"><span class="header-section-number">4.7.5</span> <code>obiuniq</code></h3>
|
||||
</section>
|
||||
</section>
|
||||
<section id="sequence-sampling-and-filtering" class="level2" data-number="4.8">
|
||||
<h2 data-number="4.8" class="anchored" data-anchor-id="sequence-sampling-and-filtering"><span class="header-section-number">4.8</span> Sequence sampling and filtering</h2>
|
||||
<section id="obigrep" class="level4" data-number="4.8.0.1">
|
||||
<h4 data-number="4.8.0.1" class="anchored" data-anchor-id="obigrep"><span class="header-section-number">4.8.0.1</span> <code>obigrep</code></h4>
|
||||
<section id="obigrep" class="level3" data-number="4.8.1">
|
||||
<h3 data-number="4.8.1" class="anchored" data-anchor-id="obigrep"><span class="header-section-number">4.8.1</span> <code>obigrep</code></h3>
|
||||
</section>
|
||||
<section id="utilities" class="level3" data-number="4.8.1">
|
||||
<h3 data-number="4.8.1" class="anchored" data-anchor-id="utilities"><span class="header-section-number">4.8.1</span> Utilities</h3>
|
||||
<section id="obicount" class="level4" data-number="4.8.1.1">
|
||||
<h4 data-number="4.8.1.1" class="anchored" data-anchor-id="obicount"><span class="header-section-number">4.8.1.1</span> <code>obicount</code></h4>
|
||||
</section>
|
||||
<section id="obidistribute" class="level4" data-number="4.8.1.2">
|
||||
<h4 data-number="4.8.1.2" class="anchored" data-anchor-id="obidistribute"><span class="header-section-number">4.8.1.2</span> <code>obidistribute</code></h4>
|
||||
<section id="utilities" class="level2" data-number="4.9">
|
||||
<h2 data-number="4.9" class="anchored" data-anchor-id="utilities"><span class="header-section-number">4.9</span> Utilities</h2>
|
||||
<section id="obicount" class="level3" data-number="4.9.1">
|
||||
<h3 data-number="4.9.1" class="anchored" data-anchor-id="obicount"><span class="header-section-number">4.9.1</span> <code>obicount</code></h3>
|
||||
<p><code>obicount</code> counts the number of sequence records, the sum of the <code>count</code> attributes, and the sum of the length of all the sequences.</p>
|
||||
<p><em>Example:</em></p>
|
||||
<div class="sourceCode" id="cb3"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="ex">obicount</span> seq.fasta </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||
<p>Prints the number of sequence records contained in the <code>seq.fasta</code> file and the sum of their <code>count</code> attributes.</p>
|
||||
<p><em>Options specific to the command</em></p>
|
||||
<ul>
|
||||
<li><code>--reads|-r</code> Prints read counts.</li>
|
||||
<li><code>--symbols|-s</code> Prints symbol counts.</li>
|
||||
<li><code>--variants|-v</code> Prints variant counts.</li>
|
||||
</ul>
|
||||
</section>
|
||||
<section id="obifind" class="level4" data-number="4.8.1.3">
|
||||
<h4 data-number="4.8.1.3" class="anchored" data-anchor-id="obifind"><span class="header-section-number">4.8.1.3</span> <code>obifind</code></h4>
|
||||
<section id="obidistribute" class="level3" data-number="4.9.2">
|
||||
<h3 data-number="4.9.2" class="anchored" data-anchor-id="obidistribute"><span class="header-section-number">4.9.2</span> <code>obidistribute</code></h3>
|
||||
</section>
|
||||
<section id="obifind" class="level3" data-number="4.9.3">
|
||||
<h3 data-number="4.9.3" class="anchored" data-anchor-id="obifind"><span class="header-section-number">4.9.3</span> <code>obifind</code></h3>
|
||||
<blockquote class="blockquote">
|
||||
<p>Replace the <code>ecofind</code> original <em>OBITools.</em></p>
|
||||
</blockquote>
|
||||
@ -443,7 +558,6 @@ Lipman, D J, and W R Pearson. 1985. <span>“<span class="nocase">Rapid and sens
|
||||
</div>
|
||||
</section>
|
||||
</section>
|
||||
</section>
|
||||
|
||||
</main> <!-- /main -->
|
||||
<script id="quarto-html-after-body" type="application/javascript">
|
||||
|
File diff suppressed because one or more lines are too long
BIN
doc/_book/tutorial_files/figure-epub/unnamed-chunk-10-1.png
Normal file
BIN
doc/_book/tutorial_files/figure-epub/unnamed-chunk-10-1.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 16 KiB |
BIN
doc/_book/tutorial_files/figure-epub/unnamed-chunk-9-1.png
Normal file
BIN
doc/_book/tutorial_files/figure-epub/unnamed-chunk-9-1.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 15 KiB |
BIN
doc/_book/tutorial_files/figure-pdf/unnamed-chunk-10-1.pdf
Normal file
BIN
doc/_book/tutorial_files/figure-pdf/unnamed-chunk-10-1.pdf
Normal file
Binary file not shown.
BIN
doc/_book/tutorial_files/figure-pdf/unnamed-chunk-9-1.pdf
Normal file
BIN
doc/_book/tutorial_files/figure-pdf/unnamed-chunk-9-1.pdf
Normal file
Binary file not shown.
333
doc/_book/utilities.html
Normal file
333
doc/_book/utilities.html
Normal file
@ -0,0 +1,333 @@
|
||||
<!DOCTYPE html>
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
|
||||
|
||||
<meta charset="utf-8">
|
||||
<meta name="generator" content="quarto-1.2.256">
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
|
||||
|
||||
|
||||
<title>OBITools V4 - 5 Utilities</title>
|
||||
<style>
|
||||
code{white-space: pre-wrap;}
|
||||
span.smallcaps{font-variant: small-caps;}
|
||||
div.columns{display: flex; gap: min(4vw, 1.5em);}
|
||||
div.column{flex: auto; overflow-x: auto;}
|
||||
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
|
||||
ul.task-list{list-style: none;}
|
||||
ul.task-list li input[type="checkbox"] {
|
||||
width: 0.8em;
|
||||
margin: 0 0.8em 0.2em -1.6em;
|
||||
vertical-align: middle;
|
||||
}
|
||||
</style>
|
||||
|
||||
|
||||
<script src="site_libs/quarto-nav/quarto-nav.js"></script>
|
||||
<script src="site_libs/quarto-nav/headroom.min.js"></script>
|
||||
<script src="site_libs/clipboard/clipboard.min.js"></script>
|
||||
<script src="site_libs/quarto-search/autocomplete.umd.js"></script>
|
||||
<script src="site_libs/quarto-search/fuse.min.js"></script>
|
||||
<script src="site_libs/quarto-search/quarto-search.js"></script>
|
||||
<meta name="quarto:offset" content="./">
|
||||
<link href="./library.html" rel="next">
|
||||
<link href="./commands.html" rel="prev">
|
||||
<script src="site_libs/quarto-html/quarto.js"></script>
|
||||
<script src="site_libs/quarto-html/popper.min.js"></script>
|
||||
<script src="site_libs/quarto-html/tippy.umd.min.js"></script>
|
||||
<script src="site_libs/quarto-html/anchor.min.js"></script>
|
||||
<link href="site_libs/quarto-html/tippy.css" rel="stylesheet">
|
||||
<link href="site_libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
|
||||
<script src="site_libs/bootstrap/bootstrap.min.js"></script>
|
||||
<link href="site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
|
||||
<link href="site_libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
|
||||
<script id="quarto-search-options" type="application/json">{
|
||||
"location": "sidebar",
|
||||
"copy-button": false,
|
||||
"collapse-after": 3,
|
||||
"panel-placement": "start",
|
||||
"type": "textbox",
|
||||
"limit": 20,
|
||||
"language": {
|
||||
"search-no-results-text": "No results",
|
||||
"search-matching-documents-text": "matching documents",
|
||||
"search-copy-link-title": "Copy link to search",
|
||||
"search-hide-matches-text": "Hide additional matches",
|
||||
"search-more-match-text": "more match in this document",
|
||||
"search-more-matches-text": "more matches in this document",
|
||||
"search-clear-button-title": "Clear",
|
||||
"search-detached-cancel-button-title": "Cancel",
|
||||
"search-submit-button-title": "Submit"
|
||||
}
|
||||
}</script>
|
||||
|
||||
|
||||
</head>
|
||||
|
||||
<body class="nav-sidebar floating">
|
||||
|
||||
<div id="quarto-search-results"></div>
|
||||
<header id="quarto-header" class="headroom fixed-top">
|
||||
<nav class="quarto-secondary-nav" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
|
||||
<div class="container-fluid d-flex justify-content-between">
|
||||
<h1 class="quarto-secondary-nav-title"><span class="chapter-number">5</span> <span class="chapter-title">Utilities</span></h1>
|
||||
<button type="button" class="quarto-btn-toggle btn" aria-label="Show secondary navigation">
|
||||
<i class="bi bi-chevron-right"></i>
|
||||
</button>
|
||||
</div>
|
||||
</nav>
|
||||
</header>
|
||||
<!-- content -->
|
||||
<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article">
|
||||
<!-- sidebar -->
|
||||
<nav id="quarto-sidebar" class="sidebar collapse sidebar-navigation floating overflow-auto">
|
||||
<div class="pt-lg-2 mt-2 text-left sidebar-header">
|
||||
<div class="sidebar-title mb-0 py-0">
|
||||
<a href="./">OBITools V4</a>
|
||||
</div>
|
||||
</div>
|
||||
<div class="mt-2 flex-shrink-0 align-items-center">
|
||||
<div class="sidebar-search">
|
||||
<div id="quarto-search" class="" title="Search"></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sidebar-menu-container">
|
||||
<ul class="list-unstyled mt-1">
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="./index.html" class="sidebar-item-text sidebar-link">Preface</a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="./intro.html" class="sidebar-item-text sidebar-link"><span class="chapter-number">1</span> <span class="chapter-title">The OBITools</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="./formats.html" class="sidebar-item-text sidebar-link"><span class="chapter-number">2</span> <span class="chapter-title">File formats usable with <em>OBITools</em></span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="./tutorial.html" class="sidebar-item-text sidebar-link"><span class="chapter-number">3</span> <span class="chapter-title">OBITools V4 Tutorial</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="./commands.html" class="sidebar-item-text sidebar-link"><span class="chapter-number">4</span> <span class="chapter-title">The <em>OBITools V4</em> commands</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="./utilities.html" class="sidebar-item-text sidebar-link active"><span class="chapter-number">5</span> <span class="chapter-title">Utilities</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="./library.html" class="sidebar-item-text sidebar-link"><span class="chapter-number">6</span> <span class="chapter-title">The GO <em>OBITools</em> library</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="./annexes.html" class="sidebar-item-text sidebar-link"><span class="chapter-number">7</span> <span class="chapter-title">Annexes</span></a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="sidebar-item">
|
||||
<div class="sidebar-item-container">
|
||||
<a href="./references.html" class="sidebar-item-text sidebar-link">References</a>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
<!-- margin-sidebar -->
|
||||
<div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
|
||||
|
||||
</div>
|
||||
<!-- main -->
|
||||
<main class="content" id="quarto-document-content">
|
||||
|
||||
<header id="title-block-header" class="quarto-title-block default">
|
||||
<div class="quarto-title">
|
||||
<h1 class="title d-none d-lg-block"><span class="chapter-number">5</span> <span class="chapter-title">Utilities</span></h1>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div class="quarto-title-meta">
|
||||
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
</header>
|
||||
|
||||
<section id="obicount" class="level4" data-number="5.0.0.1">
|
||||
<h4 data-number="5.0.0.1" class="anchored" data-anchor-id="obicount"><span class="header-section-number">5.0.0.1</span> <code>obicount</code></h4>
|
||||
</section>
|
||||
<section id="obidistribute" class="level4" data-number="5.0.0.2">
|
||||
<h4 data-number="5.0.0.2" class="anchored" data-anchor-id="obidistribute"><span class="header-section-number">5.0.0.2</span> <code>obidistribute</code></h4>
|
||||
</section>
|
||||
<section id="obifind" class="level4" data-number="5.0.0.3">
|
||||
<h4 data-number="5.0.0.3" class="anchored" data-anchor-id="obifind"><span class="header-section-number">5.0.0.3</span> <code>obifind</code></h4>
|
||||
<blockquote class="blockquote">
|
||||
<p>Replace the <code>ecofind</code> original <em>OBITools.</em></p>
|
||||
</blockquote>
|
||||
|
||||
|
||||
</section>
|
||||
|
||||
</main> <!-- /main -->
|
||||
<script id="quarto-html-after-body" type="application/javascript">
|
||||
window.document.addEventListener("DOMContentLoaded", function (event) {
|
||||
const toggleBodyColorMode = (bsSheetEl) => {
|
||||
const mode = bsSheetEl.getAttribute("data-mode");
|
||||
const bodyEl = window.document.querySelector("body");
|
||||
if (mode === "dark") {
|
||||
bodyEl.classList.add("quarto-dark");
|
||||
bodyEl.classList.remove("quarto-light");
|
||||
} else {
|
||||
bodyEl.classList.add("quarto-light");
|
||||
bodyEl.classList.remove("quarto-dark");
|
||||
}
|
||||
}
|
||||
const toggleBodyColorPrimary = () => {
|
||||
const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
|
||||
if (bsSheetEl) {
|
||||
toggleBodyColorMode(bsSheetEl);
|
||||
}
|
||||
}
|
||||
toggleBodyColorPrimary();
|
||||
const icon = "";
|
||||
const anchorJS = new window.AnchorJS();
|
||||
anchorJS.options = {
|
||||
placement: 'right',
|
||||
icon: icon
|
||||
};
|
||||
anchorJS.add('.anchored');
|
||||
const clipboard = new window.ClipboardJS('.code-copy-button', {
|
||||
target: function(trigger) {
|
||||
return trigger.previousElementSibling;
|
||||
}
|
||||
});
|
||||
clipboard.on('success', function(e) {
|
||||
// button target
|
||||
const button = e.trigger;
|
||||
// don't keep focus
|
||||
button.blur();
|
||||
// flash "checked"
|
||||
button.classList.add('code-copy-button-checked');
|
||||
var currentTitle = button.getAttribute("title");
|
||||
button.setAttribute("title", "Copied!");
|
||||
let tooltip;
|
||||
if (window.bootstrap) {
|
||||
button.setAttribute("data-bs-toggle", "tooltip");
|
||||
button.setAttribute("data-bs-placement", "left");
|
||||
button.setAttribute("data-bs-title", "Copied!");
|
||||
tooltip = new bootstrap.Tooltip(button,
|
||||
{ trigger: "manual",
|
||||
customClass: "code-copy-button-tooltip",
|
||||
offset: [0, -8]});
|
||||
tooltip.show();
|
||||
}
|
||||
setTimeout(function() {
|
||||
if (tooltip) {
|
||||
tooltip.hide();
|
||||
button.removeAttribute("data-bs-title");
|
||||
button.removeAttribute("data-bs-toggle");
|
||||
button.removeAttribute("data-bs-placement");
|
||||
}
|
||||
button.setAttribute("title", currentTitle);
|
||||
button.classList.remove('code-copy-button-checked');
|
||||
}, 1000);
|
||||
// clear code selection
|
||||
e.clearSelection();
|
||||
});
|
||||
function tippyHover(el, contentFn) {
|
||||
const config = {
|
||||
allowHTML: true,
|
||||
content: contentFn,
|
||||
maxWidth: 500,
|
||||
delay: 100,
|
||||
arrow: false,
|
||||
appendTo: function(el) {
|
||||
return el.parentElement;
|
||||
},
|
||||
interactive: true,
|
||||
interactiveBorder: 10,
|
||||
theme: 'quarto',
|
||||
placement: 'bottom-start'
|
||||
};
|
||||
window.tippy(el, config);
|
||||
}
|
||||
const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
|
||||
for (var i=0; i<noterefs.length; i++) {
|
||||
const ref = noterefs[i];
|
||||
tippyHover(ref, function() {
|
||||
// use id or data attribute instead here
|
||||
let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
|
||||
try { href = new URL(href).hash; } catch {}
|
||||
const id = href.replace(/^#\/?/, "");
|
||||
const note = window.document.getElementById(id);
|
||||
return note.innerHTML;
|
||||
});
|
||||
}
|
||||
const findCites = (el) => {
|
||||
const parentEl = el.parentElement;
|
||||
if (parentEl) {
|
||||
const cites = parentEl.dataset.cites;
|
||||
if (cites) {
|
||||
return {
|
||||
el,
|
||||
cites: cites.split(' ')
|
||||
};
|
||||
} else {
|
||||
return findCites(el.parentElement)
|
||||
}
|
||||
} else {
|
||||
return undefined;
|
||||
}
|
||||
};
|
||||
var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
|
||||
for (var i=0; i<bibliorefs.length; i++) {
|
||||
const ref = bibliorefs[i];
|
||||
const citeInfo = findCites(ref);
|
||||
if (citeInfo) {
|
||||
tippyHover(citeInfo.el, function() {
|
||||
var popup = window.document.createElement('div');
|
||||
citeInfo.cites.forEach(function(cite) {
|
||||
var citeDiv = window.document.createElement('div');
|
||||
citeDiv.classList.add('hanging-indent');
|
||||
citeDiv.classList.add('csl-entry');
|
||||
var biblioDiv = window.document.getElementById('ref-' + cite);
|
||||
if (biblioDiv) {
|
||||
citeDiv.innerHTML = biblioDiv.innerHTML;
|
||||
}
|
||||
popup.appendChild(citeDiv);
|
||||
});
|
||||
return popup.innerHTML;
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
</script>
|
||||
<nav class="page-navigation">
|
||||
<div class="nav-page nav-page-previous">
|
||||
<a href="./commands.html" class="pagination-link">
|
||||
<i class="bi bi-arrow-left-short"></i> <span class="nav-page-text"><span class="chapter-number">4</span> <span class="chapter-title">The <em>OBITools V4</em> commands</span></span>
|
||||
</a>
|
||||
</div>
|
||||
<div class="nav-page nav-page-next">
|
||||
<a href="./library.html" class="pagination-link">
|
||||
<span class="nav-page-text"><span class="chapter-number">6</span> <span class="chapter-title">The GO <em>OBITools</em> library</span></span> <i class="bi bi-arrow-right-short"></i>
|
||||
</a>
|
||||
</div>
|
||||
</nav>
|
||||
</div> <!-- /content -->
|
||||
|
||||
|
||||
|
||||
</body></html>
|
139
doc/_obipairing.qmd
Normal file
139
doc/_obipairing.qmd
Normal file
@ -0,0 +1,139 @@
|
||||
### `obipairing`
|
||||
|
||||
> Replace the `illuminapairedends` original *OBITools*
|
||||
|
||||
#### Alignment procedure {.unnumbered}
|
||||
|
||||
`obipairing` is introducing a new alignment algorithm compared to the `illuminapairedend` command of the `OBITools V2`.
|
||||
Nethertheless this new algorithm has been design to produce the same results than the previous, except in very few cases.
|
||||
|
||||
The new algorithm is a two-step procedure. First, a FASTN-type algorithm [@Lipman1985-hw] identifies the best offset between the two matched readings. This identifies the region of overlap.
|
||||
|
||||
In the second step, the matching regions of the two reads are extracted along with a flanking sequence of $\Delta$ base pairs. The two subsequences are then aligned using a "one side free end-gap" dynamic programming algorithm. This latter step is only called if at least one mismatch is detected by the FASTP step.
|
||||
|
||||
Unless the similarity between the two reads at their overlap region is very low, the addition of the flanking regions in the second step of the alignment ensures the same alignment as if the dynamic programming alignment was performed on the full reads.
|
||||
|
||||
#### The scoring system {.unnumbered}
|
||||
|
||||
In the dynamic programming step, the match and mismatch scores take into account the quality scores of the two aligned nucleotides. By taking these into account, the probability of a true match can be calculated for each aligned base pair.
|
||||
|
||||
If we consider a nucleotide read with a quality score $Q$, the probability of misreading this base ($P_E$) is :
|
||||
$$
|
||||
P_E = 10^{-\frac{Q}{10}}
|
||||
$$
|
||||
|
||||
Thus, when a given nucleotide $X$ is observed with the quality score $Q$. The probability that $X$ is really an $X$ is :
|
||||
|
||||
$$
|
||||
P(X=X) = 1 - P_E
|
||||
$$
|
||||
|
||||
Otherwise, $X$ is actually one of the three other possible nucleotides ($X_{E1}$, $X_{E2}$ or $X_{E3}$). If we suppose that the three reading error have the same probability :
|
||||
|
||||
$$
|
||||
P(X=X_{E1}) = P(X=X_{E3}) = P(X=X_{E3}) = \frac{P_E}{3}
|
||||
$$
|
||||
|
||||
At each position in an alignment where the two nucleotides $X_1$ and $X_2$ face each other (not a gapped position), the probability of a true match varies depending on whether $X_1=X_2$, an observed match, or $X_1 \neq X_2$, an observed mismatch.
|
||||
|
||||
**Probability of a true match when $X_1=X_2$**
|
||||
|
||||
That probability can be divided in two parts. First $X_1$ and $X_2$ have been correctly read. The corresponding probability is :
|
||||
|
||||
$$
|
||||
\begin{aligned}
|
||||
P_{TM} &= (1- PE_1)(1-PE_2)\\
|
||||
&=(1 - 10^{-\frac{Q_1}{10} } )(1 - 10^{-\frac{Q_2}{10}} )
|
||||
\end{aligned}
|
||||
$$
|
||||
|
||||
Secondly, a match can occure if the true nucleotides read as $X_1$ and $X_2$ are not $X_1$ and $X_2$ but identical.
|
||||
|
||||
$$
|
||||
\begin{aligned}
|
||||
P(X_1==X_{E1}) \cap P(X_2==X_{E1}) &= \frac{P_{E1} P_{E2}}{9} \\
|
||||
P(X_1==X_{Ex}) \cap P(X_2==X_{Ex}) & = \frac{P_{E1} P_{E2}}{3}
|
||||
\end{aligned}
|
||||
$$
|
||||
|
||||
The probability of a true match between $X_1$ and $X_2$ when $X_1 = X_2$ an observed match :
|
||||
|
||||
$$
|
||||
\begin{aligned}
|
||||
P(MATCH | X_1 = X_2) = (1- PE_1)(1-PE_2) + \frac{P_{E1} P_{E2}}{3}
|
||||
\end{aligned}
|
||||
$$
|
||||
|
||||
**Probability of a true match when $X_1 \neq X_2$**
|
||||
|
||||
That probability can be divided in three parts.
|
||||
|
||||
a. $X_1$ has been correctly read and $X_2$ is a sequencing error and is actually equal to $X_1$.
|
||||
$$
|
||||
P_a = (1-P_{E1})\frac{P_{E2}}{3}
|
||||
$$
|
||||
a. $X_2$ has been correctly read and $X_1$ is a sequencing error and is actually equal to $X_2$.
|
||||
$$
|
||||
P_b = (1-P_{E2})\frac{P_{E1}}{3}
|
||||
$$
|
||||
a. $X_1$ and $X_2$ corresponds to sequencing error but are actually the same base $X_{Ex}$
|
||||
$$
|
||||
P_c = 2\frac{P_{E1} P_{E2}}{9}
|
||||
$$
|
||||
|
||||
Consequently :
|
||||
$$
|
||||
\begin{aligned}
|
||||
P(MATCH | X_1 \neq X_2) = (1-P_{E1})\frac{P_{E2}}{3} + (1-P_{E2})\frac{P_{E1}}{3} + 2\frac{P_{E1} P_{E2}}{9}
|
||||
\end{aligned}
|
||||
$$
|
||||
|
||||
**Probability of a match under the random model**
|
||||
|
||||
The second considered model is a pure random model where every base is equiprobable, hence having a probability of occurrence of a nucleotide equals $0.25$. Under that hypothesis
|
||||
|
||||
$$
|
||||
P(MATCH | \text{Random model}) = 0.25
|
||||
$$
|
||||
|
||||
**The score is a log ration of likelyhood**
|
||||
|
||||
Score is define as the logarithm of the ratio between the likelyhood of the observations considering the sequencer error model over tha likelyhood u
|
||||
|
||||
|
||||
```{r}
|
||||
#| echo: false
|
||||
#| warning: false
|
||||
#| fig-cap: "Evolution of the match and mismatch scores when the quality of base is 20 while the second range from 10 to 40."
|
||||
require(ggplot2)
|
||||
require(tidyverse)
|
||||
|
||||
Smatch <- function(Q1,Q2) {
|
||||
PE1 <- 10^(-Q1/10)
|
||||
PE2 <- 10^(-Q2/10)
|
||||
PT1 <- 1 - PE1
|
||||
PT2 <- 1 - PE2
|
||||
|
||||
PM <- PT1*PT2 + PE1 * PE2 / 3
|
||||
round((log(PM)+log(4))*10)
|
||||
}
|
||||
|
||||
Smismatch <- function(Q1,Q2) {
|
||||
|
||||
PE1 <- 10^(-Q1/10)
|
||||
PE2 <- 10^(-Q2/10)
|
||||
PT1 <- 1 - PE1
|
||||
PT2 <- 1 - PE2
|
||||
|
||||
PM <- PE1*PT2/3 + PT1 * PE2 / 3 + 2/3 * PE1 * PE2
|
||||
round((log(PM)+log(4))*10)
|
||||
}
|
||||
|
||||
tibble(Q = 10:40) %>%
|
||||
mutate(Match = mapply(Smatch,Q,20),
|
||||
Mismatch = mapply(Smismatch,Q,20),
|
||||
) %>% pivot_longer(cols = -Q, names_to = "Class", values_to = "Score") %>%
|
||||
ggplot(aes(x=Q,y=Score,col=Class)) +
|
||||
geom_line() +
|
||||
xlab("Q1 (Q2=20)")
|
||||
```
|
26
doc/_utilities.qmd
Normal file
26
doc/_utilities.qmd
Normal file
@ -0,0 +1,26 @@
|
||||
## Utilities
|
||||
|
||||
### `obicount`
|
||||
|
||||
`obicount` counts the number of sequence records, the sum of the ``count`` attributes, and the sum
|
||||
of the length of all the sequences.
|
||||
|
||||
*Example:*
|
||||
|
||||
``` bash
|
||||
obicount seq.fasta
|
||||
```
|
||||
Prints the number of sequence records contained in the ``seq.fasta``
|
||||
file and the sum of their ``count`` attributes.
|
||||
|
||||
*Options specific to the command*
|
||||
|
||||
- `--reads|-r ` Prints read counts.
|
||||
- `--symbols|-s` Prints symbol counts.
|
||||
- `--variants|-v` Prints variant counts.
|
||||
|
||||
### `obidistribute`
|
||||
|
||||
### `obifind`
|
||||
|
||||
> Replace the `ecofind` original *OBITools.*
|
208
doc/commands.qmd
208
doc/commands.qmd
@ -57,191 +57,89 @@ Several OBITools (*e.g.* obigrep, obiannotate) allow the user to specify some si
|
||||
|
||||
### Variables usable in the expression
|
||||
|
||||
#### sequence
|
||||
- `sequence` is the sequence object on which the expression is evaluated.
|
||||
- `annotations`is a map object containing every annotations associated to the currently processed sequence.
|
||||
|
||||
sequence is the sequence object on which the expression is evaluated
|
||||
### Function defined in the language
|
||||
|
||||
#### annotation
|
||||
#### Instrospection functions {.unnumbered}
|
||||
|
||||
### Function defined in the language
|
||||
- `len(x)`is a generic function allowing to retreive the size of a object. It returns
|
||||
the length of a sequences, the number of element in a map like `annotations`, the number
|
||||
of elements in an array. The reurned value is an `int`.
|
||||
|
||||
#### len
|
||||
#### Cast functions {.unnumbered}
|
||||
|
||||
#### ismap
|
||||
- `int(x)` converts if possible the `x` value to an integer value. The function
|
||||
returns an `int`.
|
||||
- `numeric(x)` converts if possible the `x` value to a float value. The function
|
||||
returns a `float`.
|
||||
- `bool(x)` converts if possible the `x` value to a boolean value. The function
|
||||
returns a `bool`.
|
||||
|
||||
#### hasattribute
|
||||
#### String related functions {.unnumbered}
|
||||
|
||||
#### min
|
||||
|
||||
#### max
|
||||
- `printf(format,...)` allows to combine several values to build a string. `format` follows the
|
||||
classical C `printf` syntax. The function returns a `string`.
|
||||
- `subspc(x)` substitutes every space in the `x` string by the underscore (`_`) character. The function
|
||||
returns a `string`.
|
||||
|
||||
### Accessing to the sequence annotations
|
||||
|
||||
The `annotations` variable is a map object containing all the annotations associated to the currently processed sequence. Index of the map are the attribute names. It exists to possibillities to retreive
|
||||
an annotation. It is possible to use the classical `[]` indexing operator, putting the attribute name
|
||||
quoted by double quotes between them.
|
||||
|
||||
```go
|
||||
annotations["direction"]
|
||||
```
|
||||
|
||||
The above code retreives the `direction` annotation. A second notation using the dot (`.`) is often
|
||||
more convenient.
|
||||
|
||||
```go
|
||||
annotations.direction
|
||||
```
|
||||
|
||||
Special attributes of the sequence are accessible only by dedicated methods of the `sequence` object.
|
||||
|
||||
- The sequence identifier : `Id()`
|
||||
- THe sequence definition : `Definition()`
|
||||
|
||||
|
||||
## Metabarcode design and quality assessment
|
||||
|
||||
#### `obipcr`
|
||||
### `obipcr`
|
||||
|
||||
> Replace the `ecoPCR` original *OBITools*
|
||||
|
||||
## File format conversions
|
||||
|
||||
#### `obiconvert`
|
||||
### `obiconvert`
|
||||
|
||||
## Sequence annotations
|
||||
|
||||
#### `obitag`
|
||||
### `obiannotate`
|
||||
|
||||
### `obitag`
|
||||
|
||||
## Computations on sequences
|
||||
|
||||
### `obipairing`
|
||||
{{< include _obipairing.qmd >}}
|
||||
|
||||
> Replace the `illuminapairedends` original *OBITools*
|
||||
|
||||
#### Alignment procedure
|
||||
|
||||
`obipairing` is introducing a new alignment algorithm compared to the `illuminapairedend` command of the `OBITools V2`.
|
||||
Nethertheless this new algorithm has been design to produce the same results than the previous, except in very few cases.
|
||||
|
||||
The new algorithm is a two-step procedure. First, a FASTN-type algorithm [@Lipman1985-hw] identifies the best offset between the two matched readings. This identifies the region of overlap.
|
||||
|
||||
In the second step, the matching regions of the two reads are extracted along with a flanking sequence of $\Delta$ base pairs. The two subsequences are then aligned using a "one side free end-gap" dynamic programming algorithm. This latter step is only called if at least one mismatch is detected by the FASTP step.
|
||||
|
||||
Unless the similarity between the two reads at their overlap region is very low, the addition of the flanking regions in the second step of the alignment ensures the same alignment as if the dynamic programming alignment was performed on the full reads.
|
||||
|
||||
#### The scoring system
|
||||
|
||||
In the dynamic programming step, the match and mismatch scores take into account the quality scores of the two aligned nucleotides. By taking these into account, the probability of a true match can be calculated for each aligned base pair.
|
||||
|
||||
If we consider a nucleotide read with a quality score $Q$, the probability of misreading this base ($P_E$) is :
|
||||
$$
|
||||
P_E = 10^{-\frac{Q}{10}}
|
||||
$$
|
||||
|
||||
Thus, when a given nucleotide $X$ is observed with the quality score $Q$. The probability that $X$ is really an $X$ is :
|
||||
|
||||
$$
|
||||
P(X=X) = 1 - P_E
|
||||
$$
|
||||
|
||||
Otherwise, $X$ is actually one of the three other possible nucleotides ($X_{E1}$, $X_{E2}$ or $X_{E3}$). If we suppose that the three reading error have the same probability :
|
||||
|
||||
$$
|
||||
P(X=X_{E1}) = P(X=X_{E3}) = P(X=X_{E3}) = \frac{P_E}{3}
|
||||
$$
|
||||
|
||||
At each position in an alignment where the two nucleotides $X_1$ and $X_2$ face each other (not a gapped position), the probability of a true match varies depending on whether $X_1=X_2$, an observed match, or $X_1 \neq X_2$, an observed mismatch.
|
||||
|
||||
**Probability of a true match when $X_1=X_2$**
|
||||
|
||||
That probability can be divided in two parts. First $X_1$ and $X_2$ have been correctly read. The corresponding probability is :
|
||||
|
||||
$$
|
||||
\begin{aligned}
|
||||
P_{TM} &= (1- PE_1)(1-PE_2)\\
|
||||
&=(1 - 10^{-\frac{Q_1}{10} } )(1 - 10^{-\frac{Q_2}{10}} )
|
||||
\end{aligned}
|
||||
$$
|
||||
|
||||
Secondly, a match can occure if the true nucleotides read as $X_1$ and $X_2$ are not $X_1$ and $X_2$ but identical.
|
||||
|
||||
$$
|
||||
\begin{aligned}
|
||||
P(X_1==X_{E1}) \cap P(X_2==X_{E1}) &= \frac{P_{E1} P_{E2}}{9} \\
|
||||
P(X_1==X_{Ex}) \cap P(X_2==X_{Ex}) & = \frac{P_{E1} P_{E2}}{3}
|
||||
\end{aligned}
|
||||
$$
|
||||
|
||||
The probability of a true match between $X_1$ and $X_2$ when $X_1 = X_2$ an observed match :
|
||||
|
||||
$$
|
||||
\begin{aligned}
|
||||
P(MATCH | X_1 = X_2) = (1- PE_1)(1-PE_2) + \frac{P_{E1} P_{E2}}{3}
|
||||
\end{aligned}
|
||||
$$
|
||||
|
||||
**Probability of a true match when $X_1 \neq X_2$**
|
||||
|
||||
That probability can be divided in three parts.
|
||||
|
||||
a. $X_1$ has been correctly read and $X_2$ is a sequencing error and is actually equal to $X_1$.
|
||||
$$
|
||||
P_a = (1-P_{E1})\frac{P_{E2}}{3}
|
||||
$$
|
||||
a. $X_2$ has been correctly read and $X_1$ is a sequencing error and is actually equal to $X_2$.
|
||||
$$
|
||||
P_b = (1-P_{E2})\frac{P_{E1}}{3}
|
||||
$$
|
||||
a. $X_1$ and $X_2$ corresponds to sequencing error but are actually the same base $X_{Ex}$
|
||||
$$
|
||||
P_c = 2\frac{P_{E1} P_{E2}}{9}
|
||||
$$
|
||||
|
||||
Consequently :
|
||||
$$
|
||||
\begin{aligned}
|
||||
P(MATCH | X_1 \neq X_2) = (1-P_{E1})\frac{P_{E2}}{3} + (1-P_{E2})\frac{P_{E1}}{3} + 2\frac{P_{E1} P_{E2}}{9}
|
||||
\end{aligned}
|
||||
$$
|
||||
|
||||
**Probability of a match under the random model**
|
||||
|
||||
|
||||
```{r}
|
||||
#| echo: false
|
||||
#| warning: false
|
||||
#| fig-cap: "Evolution of the match and mismatch scores when the quality of base is 20 while the second range from 10 to 40."
|
||||
require(ggplot2)
|
||||
require(tidyverse)
|
||||
|
||||
Smatch <- function(Q1,Q2) {
|
||||
PE1 <- 10^(-Q1/10)
|
||||
PE2 <- 10^(-Q2/10)
|
||||
PT1 <- 1 - PE1
|
||||
PT2 <- 1 - PE2
|
||||
|
||||
PM <- PT1*PT2 + PE1 * PE2 / 3
|
||||
round((log(PM)+log(4))*10)
|
||||
}
|
||||
|
||||
Smismatch <- function(Q1,Q2) {
|
||||
|
||||
PE1 <- 10^(-Q1/10)
|
||||
PE2 <- 10^(-Q2/10)
|
||||
PT1 <- 1 - PE1
|
||||
PT2 <- 1 - PE2
|
||||
|
||||
PM <- PE1*PT2/3 + PT1 * PE2 / 3 + 2/3 * PE1 * PE2
|
||||
round((log(PM)+log(4))*10)
|
||||
}
|
||||
|
||||
tibble(Q = 10:40) %>%
|
||||
mutate(Match = mapply(Smatch,Q,20),
|
||||
Mismatch = mapply(Smismatch,Q,20),
|
||||
) %>% pivot_longer(cols = -Q, names_to = "Class", values_to = "Score") %>%
|
||||
ggplot(aes(x=Q,y=Score,col=Class)) +
|
||||
geom_line() +
|
||||
xlab("Q1 (Q2=20)")
|
||||
```
|
||||
#### `obimultiplex`
|
||||
### `obimultiplex`
|
||||
|
||||
> Replace the `ngsfilter` original *OBITools*
|
||||
|
||||
#### `obicomplement`
|
||||
### `obicomplement`
|
||||
|
||||
#### `obiclean`
|
||||
### `obiclean`
|
||||
|
||||
#### `obiuniq`
|
||||
### `obiuniq`
|
||||
|
||||
## Sequence sampling and filtering
|
||||
## Sequence sampling and filtering
|
||||
|
||||
#### `obigrep`
|
||||
### `obigrep`
|
||||
|
||||
### Utilities
|
||||
{{< include _utilities.qmd >}}
|
||||
|
||||
#### `obicount`
|
||||
|
||||
#### `obidistribute`
|
||||
|
||||
#### `obifind`
|
||||
|
||||
> Replace the `ecofind` original *OBITools.*
|
||||
|
BIN
doc/commands_files/figure-epub/unnamed-chunk-1-1.png
Normal file
BIN
doc/commands_files/figure-epub/unnamed-chunk-1-1.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 19 KiB |
BIN
doc/commands_files/figure-pdf/unnamed-chunk-1-1.pdf
Normal file
BIN
doc/commands_files/figure-pdf/unnamed-chunk-1-1.pdf
Normal file
Binary file not shown.
Reference in New Issue
Block a user