mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Refactoring codes for removing buffer size options. An some other changes...
Former-commit-id: 10b57cc1a27446ade3c444217341e9651e89cdce
This commit is contained in:
@ -1,82 +1,107 @@
|
|||||||
# Annexes
|
# Annexes
|
||||||
|
|
||||||
### Sequence attributes
|
## Sequence attributes
|
||||||
|
|
||||||
#### Reserved sequence attributes
|
**ali_dir (`string`)**
|
||||||
|
|
||||||
##### `ali_dir`
|
- Set by the *obipairing* tool
|
||||||
|
- The attribute can contain 2 string values `left` or `right`.
|
||||||
|
|
||||||
###### Type : `string`
|
The alignment generated by *obipairing* is a 3'-end gap free algorithm.
|
||||||
|
Two cases can occur when aligning the forward and reverse reads. If the
|
||||||
|
barcode is long enough, both the reads overlap only on their 3' ends. In
|
||||||
|
such case, the alignment direction `ali_dir` is set to *left*. If the
|
||||||
|
barcode is shorter than the read length, the paired reads overlap by
|
||||||
|
their 5' ends, and the complete barcode is sequenced by both the reads.
|
||||||
|
In that later case, `ali_dir` is set to *right*.
|
||||||
|
|
||||||
The attribute can contain 2 string values `"left"` or `"right".`
|
**ali_length (`int`)**
|
||||||
|
|
||||||
###### Set by the *obipairing* tool
|
- Set by the *obipairing* tool
|
||||||
|
|
||||||
The alignment generated by *obipairing* is a 3'-end gap free algorithm.
|
Length of the aligned parts when merging forward and reverse reads
|
||||||
Two cases can occur when aligning the forward and reverse reads. If the
|
|
||||||
barcode is long enough, both the reads overlap only on their 3' ends. In
|
|
||||||
such case, the alignment direction `ali_dir` is set to *left*. If the
|
|
||||||
barcode is shorter than the read length, the paired reads overlap by
|
|
||||||
their 5' ends, and the complete barcode is sequenced by both the reads.
|
|
||||||
In that later case, `ali_dir` is set to *right*.
|
|
||||||
|
|
||||||
##### `ali_length`
|
|
||||||
|
|
||||||
###### Set by the *obipairing* tool
|
**count (`int`)**
|
||||||
|
|
||||||
Length of the aligned parts when merging forward and reverse reads
|
- Set by the *obiuniq* tool
|
||||||
|
- Getter : method `Count()`
|
||||||
|
- Setter : method `SetCount(int)`
|
||||||
|
|
||||||
##### `count` : the number of sequence occurrences
|
The `count` attribute indicates how-many strictly identical reads
|
||||||
|
have been merged in a single record. It contains an integer value. If it
|
||||||
|
is absent this means that the sequence record represents a single
|
||||||
|
occurrence of the sequence.
|
||||||
|
|
||||||
###### Set by the *obiuniq* tool
|
The `Count()` method allows to access to the count attribute as an
|
||||||
|
integer value. If the `count` attribute is not defined for the given
|
||||||
|
sequence, the value *1* is returned
|
||||||
|
|
||||||
The `count` attribute indicates how-many strictly identical sequences
|
**merged_* (`map[string]int`)**
|
||||||
have been merged in a single record. It contains an integer value. If it
|
|
||||||
is absent this means that the sequence record represents a single
|
|
||||||
occurrence of the sequence.
|
|
||||||
|
|
||||||
###### Getter : method `Count()`
|
- Set by the *obiuniq* tool
|
||||||
|
|
||||||
The `Count()` method allows to access to the count attribute as an
|
The `-m` option of the *obiuniq* tools allows for keeping track of the
|
||||||
integer value. If the `count` attribute is not defined for the given
|
distribution of the values stored in given attribute of interest. Often
|
||||||
sequence, the value *1* is returned
|
this option is used to summarise distribution of a sequence variant
|
||||||
|
accross samples when *obiuniq* is run after running *obimultiplex*. The
|
||||||
|
actual name of the attribute depends on the name of the monitored
|
||||||
|
attribute. If `-m` option is used with the attribute *sample*, then this
|
||||||
|
attribute names *merged_sample*.
|
||||||
|
|
||||||
##### `merged_*`
|
**mode (`string`)**
|
||||||
|
|
||||||
###### Type : `map[string]int`
|
- Set by the *obipairing* tool
|
||||||
|
- The attribute can contain 2 string values `join` or `alignment`.
|
||||||
|
|
||||||
###### Set by the *obiuniq* tool
|
|
||||||
|
|
||||||
The `-m` option of the *obiuniq* tools allows for keeping track of the
|
**obitag_ref_index (`map[string]string`)**
|
||||||
distribution of the values stored in given attribute of interest. Often
|
|
||||||
this option is used to summarise distribution of a sequence variant
|
|
||||||
accross samples when *obiuniq* is run after running *obimultiplex*. The
|
|
||||||
actual name of the attribute depends on the name of the monitored
|
|
||||||
attribute. If `-m` option is used with the attribute *sample*, then this
|
|
||||||
attribute names *merged_sample*.
|
|
||||||
|
|
||||||
##### `mode`
|
- Set by the *obirefidx* tool.
|
||||||
|
|
||||||
###### Set by the *obipairing* tool
|
It resumes to which taxonomic annotation a match to that sequence must
|
||||||
|
lead according to the number of differences existing between the query
|
||||||
|
sequence and the reference sequence having that tag.
|
||||||
|
|
||||||
**`obitag_ref_index`**
|
```json
|
||||||
|
{"0":"9606@Homo sapiens@species",
|
||||||
|
"2":"207598@Homininae@subfamily",
|
||||||
|
"3":"9604@Hominidae@family",
|
||||||
|
"8":"314295@Hominoidea@superfamily",
|
||||||
|
"10":"9526@Catarrhini@parvorder",
|
||||||
|
"12":"1437010@Boreoeutheria@clade",
|
||||||
|
"16":"9347@Eutheria@clade",
|
||||||
|
"17":"40674@Mammalia@class",
|
||||||
|
"22":"117571@Euteleostomi@clade",
|
||||||
|
"25":"7776@Gnathostomata@clade",
|
||||||
|
"29":"33213@Bilateria@clade",
|
||||||
|
"30":"6072@Eumetazoa@clade"}
|
||||||
|
```
|
||||||
|
|
||||||
###### Set by the *obirefidx* tool.
|
**pairing_mismatches (`map[string]string`)**
|
||||||
|
|
||||||
It resumes to which taxonomic annotation a match to that sequence must
|
- Set by the *obipairing* tool
|
||||||
lead according to the number of differences existing between the query
|
|
||||||
sequence and the reference sequence having that tag.
|
|
||||||
|
|
||||||
###### Getter : method `Count()`
|
**seq_a_single (`int`)**
|
||||||
|
|
||||||
##### `pairing_mismatches`
|
- Set by the *obipairing* tool
|
||||||
|
|
||||||
###### Set by the *obipairing* tool
|
**seq_ab_match (`int`)**
|
||||||
|
|
||||||
##### `score`
|
- Set by the *obipairing* tool
|
||||||
|
|
||||||
###### Set by the *obipairing* tool
|
**seq_b_single (`int`)**
|
||||||
|
|
||||||
##### `score_norm`
|
- Set by the *obipairing* tool
|
||||||
|
|
||||||
###### Set by the *obipairing* tool
|
**score (`int`)**
|
||||||
|
|
||||||
|
- Set by the *obipairing* tool
|
||||||
|
|
||||||
|
**score_norm (`float`)**
|
||||||
|
|
||||||
|
- Set by the *obipairing* tool
|
||||||
|
- The value ranges between 0 and 1.
|
||||||
|
|
||||||
|
Score of the alignment between forward and reverse reads expressed as a fraction of identity.
|
||||||
|
|
||||||
|
@ -10,13 +10,39 @@
|
|||||||
|
|
||||||
Sequences can be selected on several of their caracteristics, their length, their id, their sequence. Options allow for specifying the condition if selection.
|
Sequences can be selected on several of their caracteristics, their length, their id, their sequence. Options allow for specifying the condition if selection.
|
||||||
|
|
||||||
|
**Selection based on the sequence**
|
||||||
|
|
||||||
|
|
||||||
|
Sequence records can be selected according if they match or not with a pattern. The simplest pattern is as short sequence (*e.g* `AACCTT`). But the usage of regular patterns allows for looking for more complex pattern. As example, `A[TG]C+G` matches a `A`, followed by a `T` or a `G`, then one or several `C` and endly a `G`.
|
||||||
|
|
||||||
|
{{< include ../lib/options/selection/_sequence.qmd >}}
|
||||||
|
|
||||||
|
*Examples:*
|
||||||
|
|
||||||
|
: Selects only the sequence records that contain an *EcoRI* restriction site.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
obigrep -s 'GAATTC' seq1.fasta > seq2.fasta
|
||||||
|
```
|
||||||
|
|
||||||
|
: Selects only the sequence records that contain a stretch of at least 10 ``A``.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
obigrep -s 'A{10,}' seq1.fasta > seq2.fasta
|
||||||
|
```
|
||||||
|
|
||||||
|
: Selects only the sequence records that do not contain ambiguous nucleotides.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
obigrep -s '^[ACGT]+$' seq1.fasta > seq2.fasta
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
{{< include ../lib/options/selection/_min-count.qmd >}}
|
{{< include ../lib/options/selection/_min-count.qmd >}}
|
||||||
|
|
||||||
{{< include ../lib/options/selection/_max-count.qmd >}}
|
{{< include ../lib/options/selection/_max-count.qmd >}}
|
||||||
|
|
||||||
Example
|
*Examples*
|
||||||
|
|
||||||
: Selecting sequence records representing at least five reads in the dataset.
|
: Selecting sequence records representing at least five reads in the dataset.
|
||||||
|
|
||||||
|
@ -11,26 +11,64 @@ Several OBITools (*e.g.* obigrep, obiannotate) allow the user to specify some si
|
|||||||
|
|
||||||
### Instrospection functions {.unnumbered}
|
### Instrospection functions {.unnumbered}
|
||||||
|
|
||||||
- `len(x)`is a generic function allowing to retreive the size of a object. It returns
|
**`len(x)`**
|
||||||
|
|
||||||
|
: It is a generic function allowing to retreive the size of a object. It returns
|
||||||
the length of a sequences, the number of element in a map like `annotations`, the number
|
the length of a sequences, the number of element in a map like `annotations`, the number
|
||||||
of elements in an array. The reurned value is an `int`.
|
of elements in an array. The reurned value is an `int`.
|
||||||
|
|
||||||
### Cast functions {.unnumbered}
|
### Cast functions {.unnumbered}
|
||||||
|
|
||||||
- `int(x)` converts if possible the `x` value to an integer value. The function
|
**`int(x)`**
|
||||||
|
|
||||||
|
: Converts if possible the `x` value to an integer value. The function
|
||||||
returns an `int`.
|
returns an `int`.
|
||||||
- `numeric(x)` converts if possible the `x` value to a float value. The function
|
|
||||||
|
**`numeric(x)`**
|
||||||
|
|
||||||
|
: Converts if possible the `x` value to a float value. The function
|
||||||
returns a `float`.
|
returns a `float`.
|
||||||
- `bool(x)` converts if possible the `x` value to a boolean value. The function
|
|
||||||
|
**`bool(x)`**
|
||||||
|
|
||||||
|
: Converts if possible the `x` value to a boolean value. The function
|
||||||
returns a `bool`.
|
returns a `bool`.
|
||||||
|
|
||||||
### String related functions {.unnumbered}
|
### String related functions {.unnumbered}
|
||||||
|
|
||||||
- `printf(format,...)` allows to combine several values to build a string. `format` follows the
|
**`printf(format,...)`**
|
||||||
|
|
||||||
|
: Allows to combine several values to build a string. `format` follows the
|
||||||
classical C `printf` syntax. The function returns a `string`.
|
classical C `printf` syntax. The function returns a `string`.
|
||||||
- `subspc(x)` substitutes every space in the `x` string by the underscore (`_`) character. The function
|
|
||||||
|
**`subspc(x)`**
|
||||||
|
|
||||||
|
: substitutes every space in the `x` string by the underscore (`_`) character. The function
|
||||||
returns a `string`.
|
returns a `string`.
|
||||||
|
|
||||||
|
### Condition function {.unnumbered}
|
||||||
|
|
||||||
|
**`ifelse(condition,val1,val2)`**
|
||||||
|
|
||||||
|
: The `condition` value has to be a `bool` value. If it is `true` the function returns `val1`,
|
||||||
|
otherwise, it is returning `val2`.
|
||||||
|
|
||||||
|
### Sequence analysis related function
|
||||||
|
|
||||||
|
**`composition(sequence)`**
|
||||||
|
|
||||||
|
: The nucleotide composition of the sequence is returned as as map indexed by `a`, `c`, `g`, or `t` and
|
||||||
|
each value is the number of occurrences of that nucleotide. A fifth key `others` accounts for
|
||||||
|
all others symboles.
|
||||||
|
|
||||||
|
**`gcskew(sequence)`**
|
||||||
|
|
||||||
|
: Computes the excess of g compare to c of the sequence, known as the GC skew.
|
||||||
|
|
||||||
|
$$
|
||||||
|
Skew_{GC}=\frac{G-C}{G+C}
|
||||||
|
$$
|
||||||
|
|
||||||
## Accessing to the sequence annotations
|
## Accessing to the sequence annotations
|
||||||
|
|
||||||
The `annotations` variable is a map object containing all the annotations associated to the currently processed sequence. Index of the map are the attribute names. It exists to possibillities to retreive
|
The `annotations` variable is a map object containing all the annotations associated to the currently processed sequence. Index of the map are the attribute names. It exists to possibillities to retreive
|
||||||
@ -53,4 +91,7 @@ Special attributes of the sequence are accessible only by dedicated methods of t
|
|||||||
- The sequence identifier : `Id()`
|
- The sequence identifier : `Id()`
|
||||||
- THe sequence definition : `Definition()`
|
- THe sequence definition : `Definition()`
|
||||||
|
|
||||||
|
```go
|
||||||
|
sequence.Id()
|
||||||
|
```
|
||||||
|
|
||||||
|
BIN
doc/build/_book/OBITools-V4.epub
vendored
BIN
doc/build/_book/OBITools-V4.epub
vendored
Binary file not shown.
205
doc/build/_book/annexes.html
vendored
205
doc/build/_book/annexes.html
vendored
@ -20,6 +20,69 @@ ul.task-list li input[type="checkbox"] {
|
|||||||
margin: 0 0.8em 0.2em -1.6em;
|
margin: 0 0.8em 0.2em -1.6em;
|
||||||
vertical-align: middle;
|
vertical-align: middle;
|
||||||
}
|
}
|
||||||
|
pre > code.sourceCode { white-space: pre; position: relative; }
|
||||||
|
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
|
||||||
|
pre > code.sourceCode > span:empty { height: 1.2em; }
|
||||||
|
.sourceCode { overflow: visible; }
|
||||||
|
code.sourceCode > span { color: inherit; text-decoration: inherit; }
|
||||||
|
div.sourceCode { margin: 1em 0; }
|
||||||
|
pre.sourceCode { margin: 0; }
|
||||||
|
@media screen {
|
||||||
|
div.sourceCode { overflow: auto; }
|
||||||
|
}
|
||||||
|
@media print {
|
||||||
|
pre > code.sourceCode { white-space: pre-wrap; }
|
||||||
|
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
|
||||||
|
}
|
||||||
|
pre.numberSource code
|
||||||
|
{ counter-reset: source-line 0; }
|
||||||
|
pre.numberSource code > span
|
||||||
|
{ position: relative; left: -4em; counter-increment: source-line; }
|
||||||
|
pre.numberSource code > span > a:first-child::before
|
||||||
|
{ content: counter(source-line);
|
||||||
|
position: relative; left: -1em; text-align: right; vertical-align: baseline;
|
||||||
|
border: none; display: inline-block;
|
||||||
|
-webkit-touch-callout: none; -webkit-user-select: none;
|
||||||
|
-khtml-user-select: none; -moz-user-select: none;
|
||||||
|
-ms-user-select: none; user-select: none;
|
||||||
|
padding: 0 4px; width: 4em;
|
||||||
|
color: #aaaaaa;
|
||||||
|
}
|
||||||
|
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; }
|
||||||
|
div.sourceCode
|
||||||
|
{ }
|
||||||
|
@media screen {
|
||||||
|
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
|
||||||
|
}
|
||||||
|
code span.al { color: #ff0000; font-weight: bold; } /* Alert */
|
||||||
|
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
|
||||||
|
code span.at { color: #7d9029; } /* Attribute */
|
||||||
|
code span.bn { color: #40a070; } /* BaseN */
|
||||||
|
code span.bu { color: #008000; } /* BuiltIn */
|
||||||
|
code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
|
||||||
|
code span.ch { color: #4070a0; } /* Char */
|
||||||
|
code span.cn { color: #880000; } /* Constant */
|
||||||
|
code span.co { color: #60a0b0; font-style: italic; } /* Comment */
|
||||||
|
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
|
||||||
|
code span.do { color: #ba2121; font-style: italic; } /* Documentation */
|
||||||
|
code span.dt { color: #902000; } /* DataType */
|
||||||
|
code span.dv { color: #40a070; } /* DecVal */
|
||||||
|
code span.er { color: #ff0000; font-weight: bold; } /* Error */
|
||||||
|
code span.ex { } /* Extension */
|
||||||
|
code span.fl { color: #40a070; } /* Float */
|
||||||
|
code span.fu { color: #06287e; } /* Function */
|
||||||
|
code span.im { color: #008000; font-weight: bold; } /* Import */
|
||||||
|
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
|
||||||
|
code span.kw { color: #007020; font-weight: bold; } /* Keyword */
|
||||||
|
code span.op { color: #666666; } /* Operator */
|
||||||
|
code span.ot { color: #007020; } /* Other */
|
||||||
|
code span.pp { color: #bc7a00; } /* Preprocessor */
|
||||||
|
code span.sc { color: #4070a0; } /* SpecialChar */
|
||||||
|
code span.ss { color: #bb6688; } /* SpecialString */
|
||||||
|
code span.st { color: #4070a0; } /* String */
|
||||||
|
code span.va { color: #19177c; } /* Variable */
|
||||||
|
code span.vs { color: #4070a0; } /* VerbatimString */
|
||||||
|
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
|
||||||
</style>
|
</style>
|
||||||
|
|
||||||
|
|
||||||
@ -215,7 +278,7 @@ ul.task-list li input[type="checkbox"] {
|
|||||||
<h2 id="toc-title">Table of contents</h2>
|
<h2 id="toc-title">Table of contents</h2>
|
||||||
|
|
||||||
<ul>
|
<ul>
|
||||||
<li><a href="#sequence-attributes" id="toc-sequence-attributes" class="nav-link active" data-scroll-target="#sequence-attributes"><span class="toc-section-number">A.0.1</span> Sequence attributes</a></li>
|
<li><a href="#sequence-attributes" id="toc-sequence-attributes" class="nav-link active" data-scroll-target="#sequence-attributes"><span class="toc-section-number">A.1</span> Sequence attributes</a></li>
|
||||||
</ul>
|
</ul>
|
||||||
</nav>
|
</nav>
|
||||||
</div>
|
</div>
|
||||||
@ -239,84 +302,82 @@ ul.task-list li input[type="checkbox"] {
|
|||||||
|
|
||||||
</header>
|
</header>
|
||||||
|
|
||||||
<section id="sequence-attributes" class="level3" data-number="A.0.1">
|
<section id="sequence-attributes" class="level2" data-number="A.1">
|
||||||
<h3 data-number="A.0.1" class="anchored" data-anchor-id="sequence-attributes"><span class="header-section-number">A.0.1</span> Sequence attributes</h3>
|
<h2 data-number="A.1" class="anchored" data-anchor-id="sequence-attributes"><span class="header-section-number">A.1</span> Sequence attributes</h2>
|
||||||
<section id="reserved-sequence-attributes" class="level4" data-number="A.0.1.1">
|
<p><strong>ali_dir (<code>string</code>)</strong></p>
|
||||||
<h4 data-number="A.0.1.1" class="anchored" data-anchor-id="reserved-sequence-attributes"><span class="header-section-number">A.0.1.1</span> Reserved sequence attributes</h4>
|
<ul>
|
||||||
<section id="ali_dir" class="level5" data-number="A.0.1.1.1">
|
<li>Set by the <em>obipairing</em> tool</li>
|
||||||
<h5 data-number="A.0.1.1.1" class="anchored" data-anchor-id="ali_dir"><span class="header-section-number">A.0.1.1.1</span> <code>ali_dir</code></h5>
|
<li>The attribute can contain 2 string values <code>left</code> or <code>right</code>.</li>
|
||||||
<section id="type-string" class="level6" data-number="A.0.1.1.1.1">
|
</ul>
|
||||||
<h6 data-number="A.0.1.1.1.1" class="anchored" data-anchor-id="type-string"><span class="header-section-number">A.0.1.1.1.1</span> Type : <code>string</code></h6>
|
|
||||||
<p>The attribute can contain 2 string values <code>"left"</code> or <code>"right".</code></p>
|
|
||||||
</section>
|
|
||||||
<section id="set-by-the-obipairing-tool" class="level6" data-number="A.0.1.1.1.2">
|
|
||||||
<h6 data-number="A.0.1.1.1.2" class="anchored" data-anchor-id="set-by-the-obipairing-tool"><span class="header-section-number">A.0.1.1.1.2</span> Set by the <em>obipairing</em> tool</h6>
|
|
||||||
<p>The alignment generated by <em>obipairing</em> is a 3’-end gap free algorithm. Two cases can occur when aligning the forward and reverse reads. If the barcode is long enough, both the reads overlap only on their 3’ ends. In such case, the alignment direction <code>ali_dir</code> is set to <em>left</em>. If the barcode is shorter than the read length, the paired reads overlap by their 5’ ends, and the complete barcode is sequenced by both the reads. In that later case, <code>ali_dir</code> is set to <em>right</em>.</p>
|
<p>The alignment generated by <em>obipairing</em> is a 3’-end gap free algorithm. Two cases can occur when aligning the forward and reverse reads. If the barcode is long enough, both the reads overlap only on their 3’ ends. In such case, the alignment direction <code>ali_dir</code> is set to <em>left</em>. If the barcode is shorter than the read length, the paired reads overlap by their 5’ ends, and the complete barcode is sequenced by both the reads. In that later case, <code>ali_dir</code> is set to <em>right</em>.</p>
|
||||||
</section>
|
<p><strong>ali_length (<code>int</code>)</strong></p>
|
||||||
</section>
|
<ul>
|
||||||
<section id="ali_length" class="level5" data-number="A.0.1.1.2">
|
<li>Set by the <em>obipairing</em> tool</li>
|
||||||
<h5 data-number="A.0.1.1.2" class="anchored" data-anchor-id="ali_length"><span class="header-section-number">A.0.1.1.2</span> <code>ali_length</code></h5>
|
</ul>
|
||||||
<section id="set-by-the-obipairing-tool-1" class="level6" data-number="A.0.1.1.2.1">
|
|
||||||
<h6 data-number="A.0.1.1.2.1" class="anchored" data-anchor-id="set-by-the-obipairing-tool-1"><span class="header-section-number">A.0.1.1.2.1</span> Set by the <em>obipairing</em> tool</h6>
|
|
||||||
<p>Length of the aligned parts when merging forward and reverse reads</p>
|
<p>Length of the aligned parts when merging forward and reverse reads</p>
|
||||||
</section>
|
<p><strong>count (<code>int</code>)</strong></p>
|
||||||
</section>
|
<ul>
|
||||||
<section id="count-the-number-of-sequence-occurrences" class="level5" data-number="A.0.1.1.3">
|
<li>Set by the <em>obiuniq</em> tool</li>
|
||||||
<h5 data-number="A.0.1.1.3" class="anchored" data-anchor-id="count-the-number-of-sequence-occurrences"><span class="header-section-number">A.0.1.1.3</span> <code>count</code> : the number of sequence occurrences</h5>
|
<li>Getter : method <code>Count()</code></li>
|
||||||
<section id="set-by-the-obiuniq-tool" class="level6" data-number="A.0.1.1.3.1">
|
<li>Setter : method <code>SetCount(int)</code></li>
|
||||||
<h6 data-number="A.0.1.1.3.1" class="anchored" data-anchor-id="set-by-the-obiuniq-tool"><span class="header-section-number">A.0.1.1.3.1</span> Set by the <em>obiuniq</em> tool</h6>
|
</ul>
|
||||||
<p>The <code>count</code> attribute indicates how-many strictly identical sequences have been merged in a single record. It contains an integer value. If it is absent this means that the sequence record represents a single occurrence of the sequence.</p>
|
<p>The <code>count</code> attribute indicates how-many strictly identical reads have been merged in a single record. It contains an integer value. If it is absent this means that the sequence record represents a single occurrence of the sequence.</p>
|
||||||
</section>
|
|
||||||
<section id="getter-method-count" class="level6" data-number="A.0.1.1.3.2">
|
|
||||||
<h6 data-number="A.0.1.1.3.2" class="anchored" data-anchor-id="getter-method-count"><span class="header-section-number">A.0.1.1.3.2</span> Getter : method <code>Count()</code></h6>
|
|
||||||
<p>The <code>Count()</code> method allows to access to the count attribute as an integer value. If the <code>count</code> attribute is not defined for the given sequence, the value <em>1</em> is returned</p>
|
<p>The <code>Count()</code> method allows to access to the count attribute as an integer value. If the <code>count</code> attribute is not defined for the given sequence, the value <em>1</em> is returned</p>
|
||||||
</section>
|
<p><strong>merged_* (<code>map[string]int</code>)</strong></p>
|
||||||
</section>
|
<ul>
|
||||||
<section id="merged_" class="level5" data-number="A.0.1.1.4">
|
<li>Set by the <em>obiuniq</em> tool</li>
|
||||||
<h5 data-number="A.0.1.1.4" class="anchored" data-anchor-id="merged_"><span class="header-section-number">A.0.1.1.4</span> <code>merged_*</code></h5>
|
</ul>
|
||||||
<section id="type-mapstringint" class="level6" data-number="A.0.1.1.4.1">
|
|
||||||
<h6 data-number="A.0.1.1.4.1" class="anchored" data-anchor-id="type-mapstringint"><span class="header-section-number">A.0.1.1.4.1</span> Type : <code>map[string]int</code></h6>
|
|
||||||
</section>
|
|
||||||
<section id="set-by-the-obiuniq-tool-1" class="level6" data-number="A.0.1.1.4.2">
|
|
||||||
<h6 data-number="A.0.1.1.4.2" class="anchored" data-anchor-id="set-by-the-obiuniq-tool-1"><span class="header-section-number">A.0.1.1.4.2</span> Set by the <em>obiuniq</em> tool</h6>
|
|
||||||
<p>The <code>-m</code> option of the <em>obiuniq</em> tools allows for keeping track of the distribution of the values stored in given attribute of interest. Often this option is used to summarise distribution of a sequence variant accross samples when <em>obiuniq</em> is run after running <em>obimultiplex</em>. The actual name of the attribute depends on the name of the monitored attribute. If <code>-m</code> option is used with the attribute <em>sample</em>, then this attribute names <em>merged_sample</em>.</p>
|
<p>The <code>-m</code> option of the <em>obiuniq</em> tools allows for keeping track of the distribution of the values stored in given attribute of interest. Often this option is used to summarise distribution of a sequence variant accross samples when <em>obiuniq</em> is run after running <em>obimultiplex</em>. The actual name of the attribute depends on the name of the monitored attribute. If <code>-m</code> option is used with the attribute <em>sample</em>, then this attribute names <em>merged_sample</em>.</p>
|
||||||
</section>
|
<p><strong>mode (<code>string</code>)</strong></p>
|
||||||
</section>
|
<ul>
|
||||||
<section id="mode" class="level5" data-number="A.0.1.1.5">
|
<li>Set by the <em>obipairing</em> tool</li>
|
||||||
<h5 data-number="A.0.1.1.5" class="anchored" data-anchor-id="mode"><span class="header-section-number">A.0.1.1.5</span> <code>mode</code></h5>
|
<li>The attribute can contain 2 string values <code>join</code> or <code>alignment</code>.</li>
|
||||||
<section id="set-by-the-obipairing-tool-2" class="level6" data-number="A.0.1.1.5.1">
|
</ul>
|
||||||
<h6 data-number="A.0.1.1.5.1" class="anchored" data-anchor-id="set-by-the-obipairing-tool-2"><span class="header-section-number">A.0.1.1.5.1</span> Set by the <em>obipairing</em> tool</h6>
|
<p><strong>obitag_ref_index (<code>map[string]string</code>)</strong></p>
|
||||||
<p><strong><code>obitag_ref_index</code></strong></p>
|
<ul>
|
||||||
</section>
|
<li>Set by the <em>obirefidx</em> tool.</li>
|
||||||
<section id="set-by-the-obirefidx-tool." class="level6" data-number="A.0.1.1.5.2">
|
</ul>
|
||||||
<h6 data-number="A.0.1.1.5.2" class="anchored" data-anchor-id="set-by-the-obirefidx-tool."><span class="header-section-number">A.0.1.1.5.2</span> Set by the <em>obirefidx</em> tool.</h6>
|
|
||||||
<p>It resumes to which taxonomic annotation a match to that sequence must lead according to the number of differences existing between the query sequence and the reference sequence having that tag.</p>
|
<p>It resumes to which taxonomic annotation a match to that sequence must lead according to the number of differences existing between the query sequence and the reference sequence having that tag.</p>
|
||||||
</section>
|
<div class="sourceCode" id="cb1"><pre class="sourceCode json code-with-copy"><code class="sourceCode json"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a> <span class="fu">{</span><span class="dt">"0"</span><span class="fu">:</span><span class="st">"9606@Homo sapiens@species"</span><span class="fu">,</span></span>
|
||||||
<section id="getter-method-count-1" class="level6" data-number="A.0.1.1.5.3">
|
<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a> <span class="dt">"2"</span><span class="fu">:</span><span class="st">"207598@Homininae@subfamily"</span><span class="fu">,</span></span>
|
||||||
<h6 data-number="A.0.1.1.5.3" class="anchored" data-anchor-id="getter-method-count-1"><span class="header-section-number">A.0.1.1.5.3</span> Getter : method <code>Count()</code></h6>
|
<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a> <span class="dt">"3"</span><span class="fu">:</span><span class="st">"9604@Hominidae@family"</span><span class="fu">,</span></span>
|
||||||
</section>
|
<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a> <span class="dt">"8"</span><span class="fu">:</span><span class="st">"314295@Hominoidea@superfamily"</span><span class="fu">,</span></span>
|
||||||
</section>
|
<span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a> <span class="dt">"10"</span><span class="fu">:</span><span class="st">"9526@Catarrhini@parvorder"</span><span class="fu">,</span></span>
|
||||||
<section id="pairing_mismatches" class="level5" data-number="A.0.1.1.6">
|
<span id="cb1-6"><a href="#cb1-6" aria-hidden="true" tabindex="-1"></a> <span class="dt">"12"</span><span class="fu">:</span><span class="st">"1437010@Boreoeutheria@clade"</span><span class="fu">,</span></span>
|
||||||
<h5 data-number="A.0.1.1.6" class="anchored" data-anchor-id="pairing_mismatches"><span class="header-section-number">A.0.1.1.6</span> <code>pairing_mismatches</code></h5>
|
<span id="cb1-7"><a href="#cb1-7" aria-hidden="true" tabindex="-1"></a> <span class="dt">"16"</span><span class="fu">:</span><span class="st">"9347@Eutheria@clade"</span><span class="fu">,</span></span>
|
||||||
<section id="set-by-the-obipairing-tool-3" class="level6" data-number="A.0.1.1.6.1">
|
<span id="cb1-8"><a href="#cb1-8" aria-hidden="true" tabindex="-1"></a> <span class="dt">"17"</span><span class="fu">:</span><span class="st">"40674@Mammalia@class"</span><span class="fu">,</span></span>
|
||||||
<h6 data-number="A.0.1.1.6.1" class="anchored" data-anchor-id="set-by-the-obipairing-tool-3"><span class="header-section-number">A.0.1.1.6.1</span> Set by the <em>obipairing</em> tool</h6>
|
<span id="cb1-9"><a href="#cb1-9" aria-hidden="true" tabindex="-1"></a> <span class="dt">"22"</span><span class="fu">:</span><span class="st">"117571@Euteleostomi@clade"</span><span class="fu">,</span></span>
|
||||||
</section>
|
<span id="cb1-10"><a href="#cb1-10" aria-hidden="true" tabindex="-1"></a> <span class="dt">"25"</span><span class="fu">:</span><span class="st">"7776@Gnathostomata@clade"</span><span class="fu">,</span></span>
|
||||||
</section>
|
<span id="cb1-11"><a href="#cb1-11" aria-hidden="true" tabindex="-1"></a> <span class="dt">"29"</span><span class="fu">:</span><span class="st">"33213@Bilateria@clade"</span><span class="fu">,</span></span>
|
||||||
<section id="score" class="level5" data-number="A.0.1.1.7">
|
<span id="cb1-12"><a href="#cb1-12" aria-hidden="true" tabindex="-1"></a> <span class="dt">"30"</span><span class="fu">:</span><span class="st">"6072@Eumetazoa@clade"</span><span class="fu">}</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||||
<h5 data-number="A.0.1.1.7" class="anchored" data-anchor-id="score"><span class="header-section-number">A.0.1.1.7</span> <code>score</code></h5>
|
<p><strong>pairing_mismatches (<code>map[string]string</code>)</strong></p>
|
||||||
<section id="set-by-the-obipairing-tool-4" class="level6" data-number="A.0.1.1.7.1">
|
<ul>
|
||||||
<h6 data-number="A.0.1.1.7.1" class="anchored" data-anchor-id="set-by-the-obipairing-tool-4"><span class="header-section-number">A.0.1.1.7.1</span> Set by the <em>obipairing</em> tool</h6>
|
<li>Set by the <em>obipairing</em> tool</li>
|
||||||
</section>
|
</ul>
|
||||||
</section>
|
<p><strong>seq_a_single (<code>int</code>)</strong></p>
|
||||||
<section id="score_norm" class="level5" data-number="A.0.1.1.8">
|
<ul>
|
||||||
<h5 data-number="A.0.1.1.8" class="anchored" data-anchor-id="score_norm"><span class="header-section-number">A.0.1.1.8</span> <code>score_norm</code></h5>
|
<li>Set by the <em>obipairing</em> tool</li>
|
||||||
<section id="set-by-the-obipairing-tool-5" class="level6" data-number="A.0.1.1.8.1">
|
</ul>
|
||||||
<h6 data-number="A.0.1.1.8.1" class="anchored" data-anchor-id="set-by-the-obipairing-tool-5"><span class="header-section-number">A.0.1.1.8.1</span> Set by the <em>obipairing</em> tool</h6>
|
<p><strong>seq_ab_match (<code>int</code>)</strong></p>
|
||||||
|
<ul>
|
||||||
|
<li>Set by the <em>obipairing</em> tool</li>
|
||||||
|
</ul>
|
||||||
|
<p><strong>seq_b_single (<code>int</code>)</strong></p>
|
||||||
|
<ul>
|
||||||
|
<li>Set by the <em>obipairing</em> tool</li>
|
||||||
|
</ul>
|
||||||
|
<p><strong>score (<code>int</code>)</strong></p>
|
||||||
|
<ul>
|
||||||
|
<li>Set by the <em>obipairing</em> tool</li>
|
||||||
|
</ul>
|
||||||
|
<p><strong>score_norm (<code>float</code>)</strong></p>
|
||||||
|
<ul>
|
||||||
|
<li>Set by the <em>obipairing</em> tool</li>
|
||||||
|
<li>The value ranges between 0 and 1.</li>
|
||||||
|
</ul>
|
||||||
|
<p>Score of the alignment between forward and reverse reads expressed as a fraction of identity.</p>
|
||||||
|
|
||||||
|
|
||||||
</section>
|
|
||||||
</section>
|
|
||||||
</section>
|
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
</main> <!-- /main -->
|
</main> <!-- /main -->
|
||||||
|
21
doc/build/_book/comm_sampling.html
vendored
21
doc/build/_book/comm_sampling.html
vendored
@ -314,6 +314,23 @@ code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warni
|
|||||||
<section id="selecting-sequences-based-on-their-caracteristics" class="level4" data-number="12.1.1.1">
|
<section id="selecting-sequences-based-on-their-caracteristics" class="level4" data-number="12.1.1.1">
|
||||||
<h4 data-number="12.1.1.1" class="anchored" data-anchor-id="selecting-sequences-based-on-their-caracteristics"><span class="header-section-number">12.1.1.1</span> Selecting sequences based on their caracteristics</h4>
|
<h4 data-number="12.1.1.1" class="anchored" data-anchor-id="selecting-sequences-based-on-their-caracteristics"><span class="header-section-number">12.1.1.1</span> Selecting sequences based on their caracteristics</h4>
|
||||||
<p>Sequences can be selected on several of their caracteristics, their length, their id, their sequence. Options allow for specifying the condition if selection.</p>
|
<p>Sequences can be selected on several of their caracteristics, their length, their id, their sequence. Options allow for specifying the condition if selection.</p>
|
||||||
|
<p><strong>Selection based on the sequence</strong></p>
|
||||||
|
<p>Sequence records can be selected according if they match or not with a pattern. The simplest pattern is as short sequence (<em>e.g</em> <code>AACCTT</code>). But the usage of regular patterns allows for looking for more complex pattern. As example, <code>A[TG]C+G</code> matches a <code>A</code>, followed by a <code>T</code> or a <code>G</code>, then one or several <code>C</code> and endly a <code>G</code>.</p>
|
||||||
|
<dl>
|
||||||
|
<dt><strong>--sequence</strong>|<strong>-s</strong> <em>PATTERN</em></dt>
|
||||||
|
<dd>
|
||||||
|
<p>Regular expression pattern to be tested against the sequence itself. The pattern is case insensitive. A complete description of the regular pattern grammar is available <a href="https://yourbasic.org/golang/regexp-cheat-sheet/#cheat-sheet">here</a>.</p>
|
||||||
|
</dd>
|
||||||
|
<dt><em>Examples:</em></dt>
|
||||||
|
<dd>
|
||||||
|
<p>Selects only the sequence records that contain an <em>EcoRI</em> restriction site.</p>
|
||||||
|
</dd>
|
||||||
|
</dl>
|
||||||
|
<div class="sourceCode" id="cb1"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="ex">obigrep</span> <span class="at">-s</span> <span class="st">'GAATTC'</span> seq1.fasta <span class="op">></span> seq2.fasta</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||||
|
<p>: Selects only the sequence records that contain a stretch of at least 10 <code>A</code>.</p>
|
||||||
|
<div class="sourceCode" id="cb2"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="ex">obigrep</span> <span class="at">-s</span> <span class="st">'A{10,}'</span> seq1.fasta <span class="op">></span> seq2.fasta</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||||
|
<p>: Selects only the sequence records that do not contain ambiguous nucleotides.</p>
|
||||||
|
<div class="sourceCode" id="cb3"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="ex">obigrep</span> <span class="at">-s</span> <span class="st">'^[ACGT]+$'</span> seq1.fasta <span class="op">></span> seq2.fasta</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||||
<dl>
|
<dl>
|
||||||
<dt><strong>--min-count</strong> | <strong>-c</strong> <em>COUNT</em></dt>
|
<dt><strong>--min-count</strong> | <strong>-c</strong> <em>COUNT</em></dt>
|
||||||
<dd>
|
<dd>
|
||||||
@ -323,12 +340,12 @@ code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warni
|
|||||||
<dd>
|
<dd>
|
||||||
<p>only sequences reprensenting no more than <em>COUNT</em> reads will be selected. That option rely on the <code>count</code> attribute. If the <code>count</code> attribute is not defined for a sequence record, it is assumed equal to <span class="math inline">\(1\)</span>.</p>
|
<p>only sequences reprensenting no more than <em>COUNT</em> reads will be selected. That option rely on the <code>count</code> attribute. If the <code>count</code> attribute is not defined for a sequence record, it is assumed equal to <span class="math inline">\(1\)</span>.</p>
|
||||||
</dd>
|
</dd>
|
||||||
<dt>Example</dt>
|
<dt><em>Examples</em></dt>
|
||||||
<dd>
|
<dd>
|
||||||
<p>Selecting sequence records representing at least five reads in the dataset.</p>
|
<p>Selecting sequence records representing at least five reads in the dataset.</p>
|
||||||
</dd>
|
</dd>
|
||||||
</dl>
|
</dl>
|
||||||
<div class="sourceCode" id="cb1"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="ex">obigrep</span> <span class="at">-c</span> 5 data_SPER01.fasta <span class="op">></span> data_norare_SPER01.fasta</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
<div class="sourceCode" id="cb4"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="ex">obigrep</span> <span class="at">-c</span> 5 data_SPER01.fasta <span class="op">></span> data_norare_SPER01.fasta</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||||
|
|
||||||
|
|
||||||
</section>
|
</section>
|
||||||
|
71
doc/build/_book/expressions.html
vendored
71
doc/build/_book/expressions.html
vendored
@ -124,6 +124,7 @@ code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warni
|
|||||||
}
|
}
|
||||||
}</script>
|
}</script>
|
||||||
|
|
||||||
|
<script src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml-full.js" type="text/javascript"></script>
|
||||||
|
|
||||||
</head>
|
</head>
|
||||||
|
|
||||||
@ -284,6 +285,8 @@ code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warni
|
|||||||
<li><a href="#instrospection-functions" id="toc-instrospection-functions" class="nav-link" data-scroll-target="#instrospection-functions">Instrospection functions</a></li>
|
<li><a href="#instrospection-functions" id="toc-instrospection-functions" class="nav-link" data-scroll-target="#instrospection-functions">Instrospection functions</a></li>
|
||||||
<li><a href="#cast-functions" id="toc-cast-functions" class="nav-link" data-scroll-target="#cast-functions">Cast functions</a></li>
|
<li><a href="#cast-functions" id="toc-cast-functions" class="nav-link" data-scroll-target="#cast-functions">Cast functions</a></li>
|
||||||
<li><a href="#string-related-functions" id="toc-string-related-functions" class="nav-link" data-scroll-target="#string-related-functions">String related functions</a></li>
|
<li><a href="#string-related-functions" id="toc-string-related-functions" class="nav-link" data-scroll-target="#string-related-functions">String related functions</a></li>
|
||||||
|
<li><a href="#condition-function" id="toc-condition-function" class="nav-link" data-scroll-target="#condition-function">Condition function</a></li>
|
||||||
|
<li><a href="#sequence-analysis-related-function" id="toc-sequence-analysis-related-function" class="nav-link" data-scroll-target="#sequence-analysis-related-function"><span class="toc-section-number">7.2.1</span> Sequence analysis related function</a></li>
|
||||||
</ul></li>
|
</ul></li>
|
||||||
<li><a href="#accessing-to-the-sequence-annotations" id="toc-accessing-to-the-sequence-annotations" class="nav-link" data-scroll-target="#accessing-to-the-sequence-annotations"><span class="toc-section-number">7.3</span> Accessing to the sequence annotations</a></li>
|
<li><a href="#accessing-to-the-sequence-annotations" id="toc-accessing-to-the-sequence-annotations" class="nav-link" data-scroll-target="#accessing-to-the-sequence-annotations"><span class="toc-section-number">7.3</span> Accessing to the sequence annotations</a></li>
|
||||||
</ul>
|
</ul>
|
||||||
@ -321,24 +324,67 @@ code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warni
|
|||||||
<h2 data-number="7.2" class="anchored" data-anchor-id="function-defined-in-the-language"><span class="header-section-number">7.2</span> Function defined in the language</h2>
|
<h2 data-number="7.2" class="anchored" data-anchor-id="function-defined-in-the-language"><span class="header-section-number">7.2</span> Function defined in the language</h2>
|
||||||
<section id="instrospection-functions" class="level3 unnumbered">
|
<section id="instrospection-functions" class="level3 unnumbered">
|
||||||
<h3 class="unnumbered anchored" data-anchor-id="instrospection-functions">Instrospection functions</h3>
|
<h3 class="unnumbered anchored" data-anchor-id="instrospection-functions">Instrospection functions</h3>
|
||||||
<ul>
|
<dl>
|
||||||
<li><code>len(x)</code>is a generic function allowing to retreive the size of a object. It returns the length of a sequences, the number of element in a map like <code>annotations</code>, the number of elements in an array. The reurned value is an <code>int</code>.</li>
|
<dt><strong><code>len(x)</code></strong></dt>
|
||||||
</ul>
|
<dd>
|
||||||
|
<p>It is a generic function allowing to retreive the size of a object. It returns the length of a sequences, the number of element in a map like <code>annotations</code>, the number of elements in an array. The reurned value is an <code>int</code>.</p>
|
||||||
|
</dd>
|
||||||
|
</dl>
|
||||||
</section>
|
</section>
|
||||||
<section id="cast-functions" class="level3 unnumbered">
|
<section id="cast-functions" class="level3 unnumbered">
|
||||||
<h3 class="unnumbered anchored" data-anchor-id="cast-functions">Cast functions</h3>
|
<h3 class="unnumbered anchored" data-anchor-id="cast-functions">Cast functions</h3>
|
||||||
<ul>
|
<dl>
|
||||||
<li><code>int(x)</code> converts if possible the <code>x</code> value to an integer value. The function returns an <code>int</code>.</li>
|
<dt><strong><code>int(x)</code></strong></dt>
|
||||||
<li><code>numeric(x)</code> converts if possible the <code>x</code> value to a float value. The function returns a <code>float</code>.</li>
|
<dd>
|
||||||
<li><code>bool(x)</code> converts if possible the <code>x</code> value to a boolean value. The function returns a <code>bool</code>.</li>
|
<p>Converts if possible the <code>x</code> value to an integer value. The function returns an <code>int</code>.</p>
|
||||||
</ul>
|
</dd>
|
||||||
|
<dt><strong><code>numeric(x)</code></strong></dt>
|
||||||
|
<dd>
|
||||||
|
<p>Converts if possible the <code>x</code> value to a float value. The function returns a <code>float</code>.</p>
|
||||||
|
</dd>
|
||||||
|
<dt><strong><code>bool(x)</code></strong></dt>
|
||||||
|
<dd>
|
||||||
|
<p>Converts if possible the <code>x</code> value to a boolean value. The function returns a <code>bool</code>.</p>
|
||||||
|
</dd>
|
||||||
|
</dl>
|
||||||
</section>
|
</section>
|
||||||
<section id="string-related-functions" class="level3 unnumbered">
|
<section id="string-related-functions" class="level3 unnumbered">
|
||||||
<h3 class="unnumbered anchored" data-anchor-id="string-related-functions">String related functions</h3>
|
<h3 class="unnumbered anchored" data-anchor-id="string-related-functions">String related functions</h3>
|
||||||
<ul>
|
<dl>
|
||||||
<li><code>printf(format,...)</code> allows to combine several values to build a string. <code>format</code> follows the classical C <code>printf</code> syntax. The function returns a <code>string</code>.</li>
|
<dt><strong><code>printf(format,...)</code></strong></dt>
|
||||||
<li><code>subspc(x)</code> substitutes every space in the <code>x</code> string by the underscore (<code>_</code>) character. The function returns a <code>string</code>.</li>
|
<dd>
|
||||||
</ul>
|
<p>Allows to combine several values to build a string. <code>format</code> follows the classical C <code>printf</code> syntax. The function returns a <code>string</code>.</p>
|
||||||
|
</dd>
|
||||||
|
<dt><strong><code>subspc(x)</code></strong></dt>
|
||||||
|
<dd>
|
||||||
|
<p>substitutes every space in the <code>x</code> string by the underscore (<code>_</code>) character. The function returns a <code>string</code>.</p>
|
||||||
|
</dd>
|
||||||
|
</dl>
|
||||||
|
</section>
|
||||||
|
<section id="condition-function" class="level3 unnumbered">
|
||||||
|
<h3 class="unnumbered anchored" data-anchor-id="condition-function">Condition function</h3>
|
||||||
|
<dl>
|
||||||
|
<dt><strong><code>ifelse(condition,val1,val2)</code></strong></dt>
|
||||||
|
<dd>
|
||||||
|
<p>The <code>condition</code> value has to be a <code>bool</code> value. If it is <code>true</code> the function returns <code>val1</code>, otherwise, it is returning <code>val2</code>.</p>
|
||||||
|
</dd>
|
||||||
|
</dl>
|
||||||
|
</section>
|
||||||
|
<section id="sequence-analysis-related-function" class="level3" data-number="7.2.1">
|
||||||
|
<h3 data-number="7.2.1" class="anchored" data-anchor-id="sequence-analysis-related-function"><span class="header-section-number">7.2.1</span> Sequence analysis related function</h3>
|
||||||
|
<dl>
|
||||||
|
<dt><strong><code>composition(sequence)</code></strong></dt>
|
||||||
|
<dd>
|
||||||
|
<p>The nucleotide composition of the sequence is returned as as map indexed by <code>a</code>, <code>c</code>, <code>g</code>, or <code>t</code> and each value is the number of occurrences of that nucleotide. A fifth key <code>others</code> accounts for all others symboles.</p>
|
||||||
|
</dd>
|
||||||
|
<dt><strong><code>gcskew(sequence)</code></strong></dt>
|
||||||
|
<dd>
|
||||||
|
<p>Computes the excess of g compare to c of the sequence, known as the GC skew.</p>
|
||||||
|
<p><span class="math display">\[
|
||||||
|
Skew_{GC}=\frac{G-C}{G+C}
|
||||||
|
\]</span></p>
|
||||||
|
</dd>
|
||||||
|
</dl>
|
||||||
</section>
|
</section>
|
||||||
</section>
|
</section>
|
||||||
<section id="accessing-to-the-sequence-annotations" class="level2" data-number="7.3">
|
<section id="accessing-to-the-sequence-annotations" class="level2" data-number="7.3">
|
||||||
@ -352,6 +398,7 @@ code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warni
|
|||||||
<li>The sequence identifier : <code>Id()</code></li>
|
<li>The sequence identifier : <code>Id()</code></li>
|
||||||
<li>THe sequence definition : <code>Definition()</code></li>
|
<li>THe sequence definition : <code>Definition()</code></li>
|
||||||
</ul>
|
</ul>
|
||||||
|
<div class="sourceCode" id="cb3"><pre class="sourceCode go code-with-copy"><code class="sourceCode go"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>sequence<span class="op">.</span>Id<span class="op">()</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
|
||||||
|
|
||||||
|
|
||||||
</section>
|
</section>
|
||||||
|
14
doc/build/_man/man1/obigrep.man
vendored
14
doc/build/_man/man1/obigrep.man
vendored
@ -174,14 +174,22 @@ selected.
|
|||||||
That option rely on the \f[V]count\f[R] attribute.
|
That option rely on the \f[V]count\f[R] attribute.
|
||||||
If the \f[V]count\f[R] attribute is not defined for a sequence record,
|
If the \f[V]count\f[R] attribute is not defined for a sequence record,
|
||||||
it is assumed equal to 1.
|
it is assumed equal to 1.
|
||||||
.PP
|
.TP
|
||||||
\f[B]--max-length\f[R] | \f[B]-L\f[R] \f[I]LENGTH\f[R]
|
\f[B]--max-length\f[R] | \f[B]-L\f[R] \f[I]LENGTH\f[R]
|
||||||
.PP
|
Keeps sequence records whose sequence length is equal or shorter than
|
||||||
|
\f[I]LENGTH\f[R].
|
||||||
|
.TP
|
||||||
\f[B]--min-length\f[R] | \f[B]-l\f[R] \f[I]LENGTH\f[R]
|
\f[B]--min-length\f[R] | \f[B]-l\f[R] \f[I]LENGTH\f[R]
|
||||||
|
Keeps sequence records whose sequence length is equal or longer than
|
||||||
|
\f[I]LENGTH\f[R].
|
||||||
.PP
|
.PP
|
||||||
\f[B]--predicate\f[R]|\f[B]-p\f[R] \f[I]EXPRESSION\f[R]
|
\f[B]--predicate\f[R]|\f[B]-p\f[R] \f[I]EXPRESSION\f[R]
|
||||||
.PP
|
.TP
|
||||||
\f[B]--sequence\f[R]|\f[B]-s\f[R] \f[I]PATTERN\f[R]
|
\f[B]--sequence\f[R]|\f[B]-s\f[R] \f[I]PATTERN\f[R]
|
||||||
|
Regular expression pattern to be tested against the sequence itself.
|
||||||
|
The pattern is case insensitive.
|
||||||
|
A complete description of the regular pattern grammar is available
|
||||||
|
here (https://yourbasic.org/golang/regexp-cheat-sheet/#cheat-sheet).
|
||||||
.PP
|
.PP
|
||||||
\f[B]--inverse-match\f[R] | \f[B]-v\f[R]
|
\f[B]--inverse-match\f[R] | \f[B]-v\f[R]
|
||||||
.PP
|
.PP
|
||||||
|
3
doc/lib/options/selection/_max-length.qmd
Normal file
3
doc/lib/options/selection/_max-length.qmd
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
**\--max-length** | **-L** _LENGTH_
|
||||||
|
|
||||||
|
: Keeps sequence records whose sequence length is equal or shorter than _LENGTH_.
|
3
doc/lib/options/selection/_min-length.qmd
Normal file
3
doc/lib/options/selection/_min-length.qmd
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
**\--min-length** | **-l** _LENGTH_
|
||||||
|
|
||||||
|
: Keeps sequence records whose sequence length is equal or longer than _LENGTH_.
|
7
doc/lib/options/selection/_sequence.qmd
Normal file
7
doc/lib/options/selection/_sequence.qmd
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
**\--sequence**|**-s** _PATTERN_
|
||||||
|
|
||||||
|
: Regular expression pattern to be tested against the
|
||||||
|
sequence itself. The pattern is case insensitive. A
|
||||||
|
complete description of the regular pattern grammar
|
||||||
|
is available [here](https://yourbasic.org/golang/regexp-cheat-sheet/#cheat-sheet).
|
||||||
|
|
@ -99,13 +99,13 @@ The OBITools are centered around the [FASTA] (https://en.wikipedia.org/wiki/FAST
|
|||||||
|
|
||||||
{{< include ../lib/options/selection/_min-count.qmd >}}
|
{{< include ../lib/options/selection/_min-count.qmd >}}
|
||||||
|
|
||||||
**\--max-length** | **-L** _LENGTH_
|
{{< include ../lib/options/selection/_max-length.qmd >}}
|
||||||
|
|
||||||
**\--min-length** | **-l** _LENGTH_
|
{{< include ../lib/options/selection/_min-length.qmd >}}
|
||||||
|
|
||||||
**\--predicate**|**-p** _EXPRESSION_
|
**\--predicate**|**-p** _EXPRESSION_
|
||||||
|
|
||||||
**\--sequence**|**-s** _PATTERN_
|
{{< include ../lib/options/selection/_sequence.qmd >}}
|
||||||
|
|
||||||
**\--inverse-match** | **-v**
|
**\--inverse-match** | **-v**
|
||||||
|
|
||||||
|
@ -13,6 +13,7 @@ import (
|
|||||||
"github.com/barkimedes/go-deepcopy"
|
"github.com/barkimedes/go-deepcopy"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
// InterfaceToInt converts a interface{} to an integer value if possible.
|
// InterfaceToInt converts a interface{} to an integer value if possible.
|
||||||
// If not a "NotAnInteger" error is returned via the err
|
// If not a "NotAnInteger" error is returned via the err
|
||||||
// return value and val is set to 0.
|
// return value and val is set to 0.
|
||||||
@ -302,15 +303,6 @@ func ReadLines(path string) (lines []string, err error) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func Contains[T comparable](arr []T, x T) bool {
|
|
||||||
for _, v := range arr {
|
|
||||||
if v == x {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
func AtomicCounter(initial ...int) func() int {
|
func AtomicCounter(initial ...int) func() int {
|
||||||
counterMutex := sync.Mutex{}
|
counterMutex := sync.Mutex{}
|
||||||
counter := 0
|
counter := 0
|
||||||
|
24
pkg/goutils/slices.go
Normal file
24
pkg/goutils/slices.go
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
package goutils
|
||||||
|
|
||||||
|
|
||||||
|
func Contains[T comparable](arr []T, x T) bool {
|
||||||
|
for _, v := range arr {
|
||||||
|
if v == x {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func LookFor[T comparable](arr []T, x T) int {
|
||||||
|
for i, v := range arr {
|
||||||
|
if v == x {
|
||||||
|
return i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
|
||||||
|
func RemoveIndex[T comparable](s []T, index int) []T {
|
||||||
|
return append(s[:index], s[index+1:]...)
|
||||||
|
}
|
@ -13,7 +13,6 @@ type _Options struct {
|
|||||||
circular bool
|
circular bool
|
||||||
forwardError int
|
forwardError int
|
||||||
reverseError int
|
reverseError int
|
||||||
bufferSize int
|
|
||||||
batchSize int
|
batchSize int
|
||||||
parallelWorkers int
|
parallelWorkers int
|
||||||
forward ApatPattern
|
forward ApatPattern
|
||||||
@ -66,12 +65,6 @@ func (options Options) Circular() bool {
|
|||||||
return options.pointer.circular
|
return options.pointer.circular
|
||||||
}
|
}
|
||||||
|
|
||||||
// BufferSize returns the size of the channel
|
|
||||||
// buffer specified by the options
|
|
||||||
func (options Options) BufferSize() int {
|
|
||||||
return options.pointer.bufferSize
|
|
||||||
}
|
|
||||||
|
|
||||||
// BatchSize returns the size of the
|
// BatchSize returns the size of the
|
||||||
// sequence batch used by the PCR algorithm
|
// sequence batch used by the PCR algorithm
|
||||||
func (options Options) BatchSize() int {
|
func (options Options) BatchSize() int {
|
||||||
@ -95,7 +88,6 @@ func MakeOptions(setters []WithOption) Options {
|
|||||||
circular: false,
|
circular: false,
|
||||||
parallelWorkers: 4,
|
parallelWorkers: 4,
|
||||||
batchSize: 100,
|
batchSize: 100,
|
||||||
bufferSize: 100,
|
|
||||||
forward: NilApatPattern,
|
forward: NilApatPattern,
|
||||||
cfwd: NilApatPattern,
|
cfwd: NilApatPattern,
|
||||||
reverse: NilApatPattern,
|
reverse: NilApatPattern,
|
||||||
@ -188,16 +180,6 @@ func OptionCircular(circular bool) WithOption {
|
|||||||
return f
|
return f
|
||||||
}
|
}
|
||||||
|
|
||||||
// OptionBufferSize sets the requested channel
|
|
||||||
// buffer size.
|
|
||||||
func OptionBufferSize(size int) WithOption {
|
|
||||||
f := WithOption(func(opt Options) {
|
|
||||||
opt.pointer.bufferSize = size
|
|
||||||
})
|
|
||||||
|
|
||||||
return f
|
|
||||||
}
|
|
||||||
|
|
||||||
// OptionParallelWorkers sets how many search
|
// OptionParallelWorkers sets how many search
|
||||||
// jobs will be run in parallel.
|
// jobs will be run in parallel.
|
||||||
func OptionParallelWorkers(nworkers int) WithOption {
|
func OptionParallelWorkers(nworkers int) WithOption {
|
||||||
|
@ -36,20 +36,14 @@ func find(root, ext string) []string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func ISequenceChunkOnDisk(iterator obiiter.IBioSequence,
|
func ISequenceChunkOnDisk(iterator obiiter.IBioSequence,
|
||||||
classifier *obiseq.BioSequenceClassifier,
|
classifier *obiseq.BioSequenceClassifier) (obiiter.IBioSequence, error) {
|
||||||
sizes ...int) (obiiter.IBioSequence, error) {
|
|
||||||
dir, err := tempDir()
|
dir, err := tempDir()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return obiiter.NilIBioSequence, err
|
return obiiter.NilIBioSequence, err
|
||||||
}
|
}
|
||||||
|
|
||||||
bufferSize := iterator.BufferSize()
|
|
||||||
|
|
||||||
if len(sizes) > 0 {
|
newIter := obiiter.MakeIBioSequence()
|
||||||
bufferSize = sizes[0]
|
|
||||||
}
|
|
||||||
|
|
||||||
newIter := obiiter.MakeIBioSequence(bufferSize)
|
|
||||||
|
|
||||||
newIter.Add(1)
|
newIter.Add(1)
|
||||||
|
|
||||||
|
@ -10,16 +10,9 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func ISequenceChunk(iterator obiiter.IBioSequence,
|
func ISequenceChunk(iterator obiiter.IBioSequence,
|
||||||
classifier *obiseq.BioSequenceClassifier,
|
classifier *obiseq.BioSequenceClassifier) (obiiter.IBioSequence, error) {
|
||||||
sizes ...int) (obiiter.IBioSequence, error) {
|
|
||||||
|
|
||||||
bufferSize := iterator.BufferSize()
|
newIter := obiiter.MakeIBioSequence()
|
||||||
|
|
||||||
if len(sizes) > 0 {
|
|
||||||
bufferSize = sizes[0]
|
|
||||||
}
|
|
||||||
|
|
||||||
newIter := obiiter.MakeIBioSequence(bufferSize)
|
|
||||||
|
|
||||||
newIter.Add(1)
|
newIter.Add(1)
|
||||||
|
|
||||||
|
@ -6,7 +6,6 @@ type __options__ struct {
|
|||||||
navalue string
|
navalue string
|
||||||
cacheOnDisk bool
|
cacheOnDisk bool
|
||||||
batchCount int
|
batchCount int
|
||||||
bufferSize int
|
|
||||||
batchSize int
|
batchSize int
|
||||||
parallelWorkers int
|
parallelWorkers int
|
||||||
noSingleton bool
|
noSingleton bool
|
||||||
@ -25,7 +24,6 @@ func MakeOptions(setters []WithOption) Options {
|
|||||||
navalue: "NA",
|
navalue: "NA",
|
||||||
cacheOnDisk: false,
|
cacheOnDisk: false,
|
||||||
batchCount: 100,
|
batchCount: 100,
|
||||||
bufferSize: 2,
|
|
||||||
batchSize: 5000,
|
batchSize: 5000,
|
||||||
parallelWorkers: 4,
|
parallelWorkers: 4,
|
||||||
noSingleton: false,
|
noSingleton: false,
|
||||||
@ -65,10 +63,6 @@ func (opt Options) BatchCount() int {
|
|||||||
return opt.pointer.batchCount
|
return opt.pointer.batchCount
|
||||||
}
|
}
|
||||||
|
|
||||||
func (opt Options) BufferSize() int {
|
|
||||||
return opt.pointer.bufferSize
|
|
||||||
}
|
|
||||||
|
|
||||||
func (opt Options) BatchSize() int {
|
func (opt Options) BatchSize() int {
|
||||||
return opt.pointer.batchSize
|
return opt.pointer.batchSize
|
||||||
}
|
}
|
||||||
@ -148,14 +142,6 @@ func OptionsBatchSize(size int) WithOption {
|
|||||||
return f
|
return f
|
||||||
}
|
}
|
||||||
|
|
||||||
func OptionsBufferSize(size int) WithOption {
|
|
||||||
f := WithOption(func(opt Options) {
|
|
||||||
opt.pointer.bufferSize = size
|
|
||||||
})
|
|
||||||
|
|
||||||
return f
|
|
||||||
}
|
|
||||||
|
|
||||||
func OptionsNoSingleton() WithOption {
|
func OptionsNoSingleton() WithOption {
|
||||||
f := WithOption(func(opt Options) {
|
f := WithOption(func(opt Options) {
|
||||||
opt.pointer.noSingleton = true
|
opt.pointer.noSingleton = true
|
||||||
|
@ -58,20 +58,13 @@ func (by _By) Sort(seqs []sSS) {
|
|||||||
|
|
||||||
func ISequenceSubChunk(iterator obiiter.IBioSequence,
|
func ISequenceSubChunk(iterator obiiter.IBioSequence,
|
||||||
classifier *obiseq.BioSequenceClassifier,
|
classifier *obiseq.BioSequenceClassifier,
|
||||||
sizes ...int) (obiiter.IBioSequence, error) {
|
nworkers int) (obiiter.IBioSequence, error) {
|
||||||
|
|
||||||
bufferSize := iterator.BufferSize()
|
if nworkers <=0 {
|
||||||
nworkers := 4
|
nworkers = 4
|
||||||
|
|
||||||
if len(sizes) > 0 {
|
|
||||||
nworkers = sizes[0]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(sizes) > 1 {
|
newIter := obiiter.MakeIBioSequence()
|
||||||
bufferSize = sizes[1]
|
|
||||||
}
|
|
||||||
|
|
||||||
newIter := obiiter.MakeIBioSequence(bufferSize)
|
|
||||||
|
|
||||||
newIter.Add(nworkers)
|
newIter.Add(nworkers)
|
||||||
|
|
||||||
|
@ -19,7 +19,7 @@ func IUniqueSequence(iterator obiiter.IBioSequence,
|
|||||||
opts := MakeOptions(options)
|
opts := MakeOptions(options)
|
||||||
nworkers := opts.ParallelWorkers()
|
nworkers := opts.ParallelWorkers()
|
||||||
|
|
||||||
iUnique := obiiter.MakeIBioSequence(opts.BufferSize())
|
iUnique := obiiter.MakeIBioSequence()
|
||||||
|
|
||||||
iterator = iterator.Speed("Splitting data set")
|
iterator = iterator.Speed("Splitting data set")
|
||||||
|
|
||||||
@ -28,8 +28,7 @@ func IUniqueSequence(iterator obiiter.IBioSequence,
|
|||||||
if opts.SortOnDisk() {
|
if opts.SortOnDisk() {
|
||||||
nworkers = 1
|
nworkers = 1
|
||||||
iterator, err = ISequenceChunkOnDisk(iterator,
|
iterator, err = ISequenceChunkOnDisk(iterator,
|
||||||
obiseq.HashClassifier(opts.BatchCount()),
|
obiseq.HashClassifier(opts.BatchCount()))
|
||||||
0)
|
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return obiiter.NilIBioSequence, err
|
return obiiter.NilIBioSequence, err
|
||||||
@ -37,8 +36,7 @@ func IUniqueSequence(iterator obiiter.IBioSequence,
|
|||||||
|
|
||||||
} else {
|
} else {
|
||||||
iterator, err = ISequenceChunk(iterator,
|
iterator, err = ISequenceChunk(iterator,
|
||||||
obiseq.HashClassifier(opts.BatchCount()),
|
obiseq.HashClassifier(opts.BatchCount()))
|
||||||
opts.BufferSize())
|
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return obiiter.NilIBioSequence, err
|
return obiiter.NilIBioSequence, err
|
||||||
@ -78,12 +76,11 @@ func IUniqueSequence(iterator obiiter.IBioSequence,
|
|||||||
icat--
|
icat--
|
||||||
input, err = ISequenceSubChunk(input,
|
input, err = ISequenceSubChunk(input,
|
||||||
classifier,
|
classifier,
|
||||||
1,
|
1)
|
||||||
opts.BufferSize())
|
|
||||||
|
|
||||||
var next obiiter.IBioSequence
|
var next obiiter.IBioSequence
|
||||||
if icat >= 0 {
|
if icat >= 0 {
|
||||||
next = obiiter.MakeIBioSequence(opts.BufferSize())
|
next = obiiter.MakeIBioSequence()
|
||||||
|
|
||||||
iUnique.Add(1)
|
iUnique.Add(1)
|
||||||
|
|
||||||
@ -130,7 +127,6 @@ func IUniqueSequence(iterator obiiter.IBioSequence,
|
|||||||
|
|
||||||
iMerged := iUnique.IMergeSequenceBatch(opts.NAValue(),
|
iMerged := iUnique.IMergeSequenceBatch(opts.NAValue(),
|
||||||
opts.StatsOn(),
|
opts.StatsOn(),
|
||||||
opts.BufferSize(),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return iMerged, nil
|
return iMerged, nil
|
||||||
|
248
pkg/obiformats/csv_writer.go
Normal file
248
pkg/obiformats/csv_writer.go
Normal file
@ -0,0 +1,248 @@
|
|||||||
|
package obiformats
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/csv"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
)
|
||||||
|
|
||||||
|
func CSVRecord(sequence *obiseq.BioSequence, opt Options) []string {
|
||||||
|
keys := opt.CSVKeys()
|
||||||
|
record := make([]string, 0, len(keys)+4)
|
||||||
|
|
||||||
|
if opt.CSVId() {
|
||||||
|
record = append(record, sequence.Id())
|
||||||
|
}
|
||||||
|
|
||||||
|
if opt.CSVCount() {
|
||||||
|
record = append(record, fmt.Sprint(sequence.Count()))
|
||||||
|
}
|
||||||
|
|
||||||
|
if opt.CSVTaxon() {
|
||||||
|
taxid := sequence.Taxid()
|
||||||
|
sn, ok := sequence.GetAttribute("scientific_name")
|
||||||
|
|
||||||
|
if !ok {
|
||||||
|
if taxid == 1 {
|
||||||
|
sn = "root"
|
||||||
|
} else {
|
||||||
|
sn = opt.CSVNAValue()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
record = append(record, fmt.Sprint(taxid), fmt.Sprint(sn))
|
||||||
|
}
|
||||||
|
|
||||||
|
if opt.CSVDefinition() {
|
||||||
|
record = append(record, sequence.Definition())
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, key := range opt.CSVKeys() {
|
||||||
|
value, ok := sequence.GetAttribute(key)
|
||||||
|
if !ok {
|
||||||
|
value = opt.CSVNAValue()
|
||||||
|
}
|
||||||
|
|
||||||
|
svalue, _ := goutils.InterfaceToString(value)
|
||||||
|
record = append(record, svalue)
|
||||||
|
}
|
||||||
|
|
||||||
|
if opt.CSVSequence() {
|
||||||
|
record = append(record, string(sequence.Sequence()))
|
||||||
|
}
|
||||||
|
|
||||||
|
if opt.CSVQuality() {
|
||||||
|
if sequence.HasQualities() {
|
||||||
|
l := sequence.Len()
|
||||||
|
q := sequence.Qualities()
|
||||||
|
ascii := make([]byte, l)
|
||||||
|
quality_shift := opt.QualityShift()
|
||||||
|
for j := 0; j < l; j++ {
|
||||||
|
ascii[j] = uint8(q[j]) + uint8(quality_shift)
|
||||||
|
}
|
||||||
|
record = append(record, string(ascii))
|
||||||
|
} else {
|
||||||
|
record = append(record, opt.CSVNAValue())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return record
|
||||||
|
}
|
||||||
|
|
||||||
|
func CSVHeader(opt Options) []string {
|
||||||
|
keys := opt.CSVKeys()
|
||||||
|
record := make([]string, 0, len(keys)+4)
|
||||||
|
|
||||||
|
if opt.CSVId() {
|
||||||
|
record = append(record, "id")
|
||||||
|
}
|
||||||
|
|
||||||
|
if opt.CSVCount() {
|
||||||
|
record = append(record, "count")
|
||||||
|
}
|
||||||
|
|
||||||
|
if opt.CSVTaxon() {
|
||||||
|
record = append(record, "taxid", "scientific_name")
|
||||||
|
}
|
||||||
|
|
||||||
|
if opt.CSVDefinition() {
|
||||||
|
record = append(record, "definition")
|
||||||
|
}
|
||||||
|
|
||||||
|
record = append(record, opt.CSVKeys()...)
|
||||||
|
|
||||||
|
if opt.CSVSequence() {
|
||||||
|
record = append(record, "sequence")
|
||||||
|
}
|
||||||
|
|
||||||
|
if opt.CSVQuality() {
|
||||||
|
record = append(record, "quality")
|
||||||
|
}
|
||||||
|
|
||||||
|
return record
|
||||||
|
}
|
||||||
|
|
||||||
|
func FormatCVSBatch(batch obiiter.BioSequenceBatch, opt Options) []byte {
|
||||||
|
buff := new(bytes.Buffer)
|
||||||
|
csv := csv.NewWriter(buff)
|
||||||
|
|
||||||
|
if batch.Order() == 0 {
|
||||||
|
csv.Write(CSVHeader(opt))
|
||||||
|
}
|
||||||
|
for _, s := range batch.Slice() {
|
||||||
|
csv.Write(CSVRecord(s, opt))
|
||||||
|
}
|
||||||
|
|
||||||
|
csv.Flush()
|
||||||
|
|
||||||
|
return buff.Bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
func WriteCSV(iterator obiiter.IBioSequence,
|
||||||
|
file io.WriteCloser,
|
||||||
|
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
|
opt := MakeOptions(options)
|
||||||
|
|
||||||
|
file, _ = goutils.CompressStream(file, opt.CompressedFile(), opt.CloseFile())
|
||||||
|
|
||||||
|
newIter := obiiter.MakeIBioSequence()
|
||||||
|
|
||||||
|
nwriters := opt.ParallelWorkers()
|
||||||
|
|
||||||
|
obiiter.RegisterAPipe()
|
||||||
|
chunkchan := make(chan FileChunck)
|
||||||
|
|
||||||
|
newIter.Add(nwriters)
|
||||||
|
var waitWriter sync.WaitGroup
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
newIter.WaitAndClose()
|
||||||
|
for len(chunkchan) > 0 {
|
||||||
|
time.Sleep(time.Millisecond)
|
||||||
|
}
|
||||||
|
close(chunkchan)
|
||||||
|
waitWriter.Wait()
|
||||||
|
}()
|
||||||
|
|
||||||
|
ff := func(iterator obiiter.IBioSequence) {
|
||||||
|
for iterator.Next() {
|
||||||
|
|
||||||
|
batch := iterator.Get()
|
||||||
|
|
||||||
|
chunkchan <- FileChunck{
|
||||||
|
FormatCVSBatch(batch, opt),
|
||||||
|
batch.Order(),
|
||||||
|
}
|
||||||
|
newIter.Push(batch)
|
||||||
|
}
|
||||||
|
newIter.Done()
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debugln("Start of the CSV file writing")
|
||||||
|
go ff(iterator)
|
||||||
|
for i := 0; i < nwriters-1; i++ {
|
||||||
|
go ff(iterator.Split())
|
||||||
|
}
|
||||||
|
|
||||||
|
next_to_send := 0
|
||||||
|
received := make(map[int]FileChunck, 100)
|
||||||
|
|
||||||
|
waitWriter.Add(1)
|
||||||
|
go func() {
|
||||||
|
for chunk := range chunkchan {
|
||||||
|
if chunk.order == next_to_send {
|
||||||
|
file.Write(chunk.text)
|
||||||
|
next_to_send++
|
||||||
|
chunk, ok := received[next_to_send]
|
||||||
|
for ok {
|
||||||
|
file.Write(chunk.text)
|
||||||
|
delete(received, next_to_send)
|
||||||
|
next_to_send++
|
||||||
|
chunk, ok = received[next_to_send]
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
received[chunk.order] = chunk
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
file.Close()
|
||||||
|
|
||||||
|
log.Debugln("End of the CSV file writing")
|
||||||
|
obiiter.UnregisterPipe()
|
||||||
|
waitWriter.Done()
|
||||||
|
|
||||||
|
}()
|
||||||
|
|
||||||
|
return newIter, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func WriteCSVToStdout(iterator obiiter.IBioSequence,
|
||||||
|
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
|
options = append(options, OptionDontCloseFile())
|
||||||
|
return WriteCSV(iterator, os.Stdout, options...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func WriteCSVToFile(iterator obiiter.IBioSequence,
|
||||||
|
filename string,
|
||||||
|
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
|
|
||||||
|
opt := MakeOptions(options)
|
||||||
|
flags := os.O_WRONLY | os.O_CREATE
|
||||||
|
|
||||||
|
if opt.AppendFile() {
|
||||||
|
flags |= os.O_APPEND
|
||||||
|
}
|
||||||
|
file, err := os.OpenFile(filename, flags, 0660)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("open file error: %v", err)
|
||||||
|
return obiiter.NilIBioSequence, err
|
||||||
|
}
|
||||||
|
|
||||||
|
options = append(options, OptionCloseFile())
|
||||||
|
|
||||||
|
iterator, err = WriteCSV(iterator, file, options...)
|
||||||
|
|
||||||
|
if opt.HaveToSavePaired() {
|
||||||
|
var revfile *os.File
|
||||||
|
|
||||||
|
revfile, err = os.OpenFile(opt.PairedFileName(), flags, 0660)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("open file error: %v", err)
|
||||||
|
return obiiter.NilIBioSequence, err
|
||||||
|
}
|
||||||
|
iterator, err = WriteCSV(iterator.PairedWith(), revfile, options...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return iterator, err
|
||||||
|
}
|
@ -166,7 +166,7 @@ func ReadEcoPCR(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
|
|||||||
|
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
|
|
||||||
newIter := obiiter.MakeIBioSequence(opt.BufferSize())
|
newIter := obiiter.MakeIBioSequence()
|
||||||
newIter.Add(1)
|
newIter.Add(1)
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
|
@ -244,9 +244,9 @@ func _ReadFlatFileChunk(reader io.Reader, readers chan _FileChunk) {
|
|||||||
// <CR>?<LF>//<CR>?<LF>
|
// <CR>?<LF>//<CR>?<LF>
|
||||||
func ReadEMBL(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
|
func ReadEMBL(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
entry_channel := make(chan _FileChunk, opt.BufferSize())
|
entry_channel := make(chan _FileChunk)
|
||||||
|
|
||||||
newIter := obiiter.MakeIBioSequence(opt.BufferSize())
|
newIter := obiiter.MakeIBioSequence()
|
||||||
|
|
||||||
nworkers := opt.ParallelWorkers()
|
nworkers := opt.ParallelWorkers()
|
||||||
newIter.Add(nworkers)
|
newIter.Add(nworkers)
|
||||||
|
@ -19,6 +19,5 @@ func IParseFastSeqHeaderBatch(iterator obiiter.IBioSequence,
|
|||||||
options ...WithOption) obiiter.IBioSequence {
|
options ...WithOption) obiiter.IBioSequence {
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
return iterator.MakeIWorker(obiseq.AnnotatorToSeqWorker(opt.ParseFastSeqHeader()),
|
return iterator.MakeIWorker(obiseq.AnnotatorToSeqWorker(opt.ParseFastSeqHeader()),
|
||||||
opt.ParallelWorkers(),
|
opt.ParallelWorkers())
|
||||||
opt.BufferSize())
|
|
||||||
}
|
}
|
||||||
|
@ -105,7 +105,7 @@ func ReadFastSeqFromFile(filename string, options ...WithOption) (obiiter.IBioSe
|
|||||||
size = -1
|
size = -1
|
||||||
}
|
}
|
||||||
|
|
||||||
newIter := obiiter.MakeIBioSequence(opt.BufferSize())
|
newIter := obiiter.MakeIBioSequence()
|
||||||
newIter.Add(1)
|
newIter.Add(1)
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
@ -127,7 +127,7 @@ func ReadFastSeqFromFile(filename string, options ...WithOption) (obiiter.IBioSe
|
|||||||
|
|
||||||
func ReadFastSeqFromStdin(options ...WithOption) obiiter.IBioSequence {
|
func ReadFastSeqFromStdin(options ...WithOption) obiiter.IBioSequence {
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
newIter := obiiter.MakeIBioSequence(opt.BufferSize())
|
newIter := obiiter.MakeIBioSequence()
|
||||||
|
|
||||||
newIter.Add(1)
|
newIter.Add(1)
|
||||||
|
|
||||||
|
@ -71,8 +71,7 @@ func WriteFasta(iterator obiiter.IBioSequence,
|
|||||||
|
|
||||||
file, _ = goutils.CompressStream(file, opt.CompressedFile(), opt.CloseFile())
|
file, _ = goutils.CompressStream(file, opt.CompressedFile(), opt.CloseFile())
|
||||||
|
|
||||||
buffsize := iterator.BufferSize()
|
newIter := obiiter.MakeIBioSequence()
|
||||||
newIter := obiiter.MakeIBioSequence(buffsize)
|
|
||||||
|
|
||||||
nwriters := opt.ParallelWorkers()
|
nwriters := opt.ParallelWorkers()
|
||||||
|
|
||||||
|
@ -60,8 +60,7 @@ func WriteFastq(iterator obiiter.IBioSequence,
|
|||||||
|
|
||||||
file, _ = goutils.CompressStream(file, opt.CompressedFile(), opt.CloseFile())
|
file, _ = goutils.CompressStream(file, opt.CompressedFile(), opt.CloseFile())
|
||||||
|
|
||||||
buffsize := iterator.BufferSize()
|
newIter := obiiter.MakeIBioSequence()
|
||||||
newIter := obiiter.MakeIBioSequence(buffsize)
|
|
||||||
|
|
||||||
nwriters := opt.ParallelWorkers()
|
nwriters := opt.ParallelWorkers()
|
||||||
|
|
||||||
|
@ -113,9 +113,9 @@ func _ParseGenbankFile(input <-chan _FileChunk, out obiiter.IBioSequence) {
|
|||||||
|
|
||||||
func ReadGenbank(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
|
func ReadGenbank(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
entry_channel := make(chan _FileChunk, opt.BufferSize())
|
entry_channel := make(chan _FileChunk)
|
||||||
|
|
||||||
newIter := obiiter.MakeIBioSequence(opt.BufferSize())
|
newIter := obiiter.MakeIBioSequence()
|
||||||
|
|
||||||
nworkers := opt.ParallelWorkers()
|
nworkers := opt.ParallelWorkers()
|
||||||
newIter.Add(nworkers)
|
newIter.Add(nworkers)
|
||||||
|
@ -15,10 +15,15 @@ type __options__ struct {
|
|||||||
closefile bool
|
closefile bool
|
||||||
appendfile bool
|
appendfile bool
|
||||||
compressed bool
|
compressed bool
|
||||||
csv_ids bool
|
csv_id bool
|
||||||
cvs_sequence bool
|
csv_sequence bool
|
||||||
|
csv_quality bool
|
||||||
csv_definition bool
|
csv_definition bool
|
||||||
|
csv_count bool
|
||||||
|
csv_taxon bool
|
||||||
|
csv_keys []string
|
||||||
csv_separator string
|
csv_separator string
|
||||||
|
csv_navalue string
|
||||||
paired_filename string
|
paired_filename string
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -40,11 +45,16 @@ func MakeOptions(setters []WithOption) Options {
|
|||||||
closefile: false,
|
closefile: false,
|
||||||
appendfile: false,
|
appendfile: false,
|
||||||
compressed: false,
|
compressed: false,
|
||||||
csv_ids: true,
|
csv_id: true,
|
||||||
csv_definition: false,
|
csv_definition: false,
|
||||||
cvs_sequence: true,
|
csv_count: false,
|
||||||
|
csv_taxon: false,
|
||||||
|
csv_sequence: true,
|
||||||
|
csv_quality: false,
|
||||||
csv_separator: ",",
|
csv_separator: ",",
|
||||||
paired_filename: "",
|
csv_navalue: "NA",
|
||||||
|
csv_keys: make([]string, 0),
|
||||||
|
paired_filename: "",
|
||||||
}
|
}
|
||||||
|
|
||||||
opt := Options{&o}
|
opt := Options{&o}
|
||||||
@ -60,10 +70,6 @@ func (opt Options) QualityShift() int {
|
|||||||
return opt.pointer.quality_shift
|
return opt.pointer.quality_shift
|
||||||
}
|
}
|
||||||
|
|
||||||
func (opt Options) BufferSize() int {
|
|
||||||
return opt.pointer.buffer_size
|
|
||||||
}
|
|
||||||
|
|
||||||
func (opt Options) BatchSize() int {
|
func (opt Options) BatchSize() int {
|
||||||
return opt.pointer.batch_size
|
return opt.pointer.batch_size
|
||||||
}
|
}
|
||||||
@ -96,8 +102,40 @@ func (opt Options) CompressedFile() bool {
|
|||||||
return opt.pointer.compressed
|
return opt.pointer.compressed
|
||||||
}
|
}
|
||||||
|
|
||||||
func (opt Options) CSVIds() bool {
|
func (opt Options) CSVId() bool {
|
||||||
return opt.pointer.csv_ids
|
return opt.pointer.csv_id
|
||||||
|
}
|
||||||
|
|
||||||
|
func (opt Options) CSVDefinition() bool {
|
||||||
|
return opt.pointer.csv_definition
|
||||||
|
}
|
||||||
|
|
||||||
|
func (opt Options) CSVCount() bool {
|
||||||
|
return opt.pointer.csv_count
|
||||||
|
}
|
||||||
|
|
||||||
|
func (opt Options) CSVTaxon() bool {
|
||||||
|
return opt.pointer.csv_taxon
|
||||||
|
}
|
||||||
|
|
||||||
|
func (opt Options) CSVSequence() bool {
|
||||||
|
return opt.pointer.csv_sequence
|
||||||
|
}
|
||||||
|
|
||||||
|
func (opt Options) CSVQuality() bool {
|
||||||
|
return opt.pointer.csv_quality
|
||||||
|
}
|
||||||
|
|
||||||
|
func (opt Options) CSVKeys() []string {
|
||||||
|
return opt.pointer.csv_keys
|
||||||
|
}
|
||||||
|
|
||||||
|
func (opt Options) CSVSeparator() string {
|
||||||
|
return opt.pointer.csv_separator
|
||||||
|
}
|
||||||
|
|
||||||
|
func (opt Options) CSVNAValue() string {
|
||||||
|
return opt.pointer.csv_navalue
|
||||||
}
|
}
|
||||||
|
|
||||||
func (opt Options) HaveToSavePaired() bool {
|
func (opt Options) HaveToSavePaired() bool {
|
||||||
@ -108,14 +146,6 @@ func (opt Options) PairedFileName() string {
|
|||||||
return opt.pointer.paired_filename
|
return opt.pointer.paired_filename
|
||||||
}
|
}
|
||||||
|
|
||||||
func OptionsBufferSize(size int) WithOption {
|
|
||||||
f := WithOption(func(opt Options) {
|
|
||||||
opt.pointer.buffer_size = size
|
|
||||||
})
|
|
||||||
|
|
||||||
return f
|
|
||||||
}
|
|
||||||
|
|
||||||
func OptionCloseFile() WithOption {
|
func OptionCloseFile() WithOption {
|
||||||
f := WithOption(func(opt Options) {
|
f := WithOption(func(opt Options) {
|
||||||
opt.pointer.closefile = true
|
opt.pointer.closefile = true
|
||||||
@ -247,3 +277,82 @@ func WritePairedReadsTo(filename string) WithOption {
|
|||||||
return f
|
return f
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func CSVId(include bool) WithOption {
|
||||||
|
f := WithOption(func(opt Options) {
|
||||||
|
opt.pointer.csv_id = include
|
||||||
|
})
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
|
||||||
|
func CSVSequence(include bool) WithOption {
|
||||||
|
f := WithOption(func(opt Options) {
|
||||||
|
opt.pointer.csv_sequence = include
|
||||||
|
})
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
|
||||||
|
func CSVQuality(include bool) WithOption {
|
||||||
|
f := WithOption(func(opt Options) {
|
||||||
|
opt.pointer.csv_quality = include
|
||||||
|
})
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
|
||||||
|
func CSVDefinition(include bool) WithOption {
|
||||||
|
f := WithOption(func(opt Options) {
|
||||||
|
opt.pointer.csv_definition = include
|
||||||
|
})
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
|
||||||
|
func CSVCount(include bool) WithOption {
|
||||||
|
f := WithOption(func(opt Options) {
|
||||||
|
opt.pointer.csv_count = include
|
||||||
|
})
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
|
||||||
|
func CSVTaxon(include bool) WithOption {
|
||||||
|
f := WithOption(func(opt Options) {
|
||||||
|
opt.pointer.csv_taxon = include
|
||||||
|
})
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
|
||||||
|
func CSVKey(key string) WithOption {
|
||||||
|
f := WithOption(func(opt Options) {
|
||||||
|
opt.pointer.csv_keys = append(opt.pointer.csv_keys, key)
|
||||||
|
})
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
|
||||||
|
func CSVKeys(keys []string) WithOption {
|
||||||
|
f := WithOption(func(opt Options) {
|
||||||
|
opt.pointer.csv_keys = append(opt.pointer.csv_keys, keys...)
|
||||||
|
})
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
|
||||||
|
func CSVSeparator(separator string) WithOption {
|
||||||
|
f := WithOption(func(opt Options) {
|
||||||
|
opt.pointer.csv_separator = separator
|
||||||
|
})
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
|
||||||
|
func CSVNAValue(navalue string) WithOption {
|
||||||
|
f := WithOption(func(opt Options) {
|
||||||
|
opt.pointer.csv_navalue = navalue
|
||||||
|
})
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
@ -60,17 +60,11 @@ type IBioSequence struct {
|
|||||||
var NilIBioSequence = IBioSequence{pointer: nil}
|
var NilIBioSequence = IBioSequence{pointer: nil}
|
||||||
|
|
||||||
func MakeIBioSequence(sizes ...int) IBioSequence {
|
func MakeIBioSequence(sizes ...int) IBioSequence {
|
||||||
buffsize := int32(0)
|
|
||||||
|
|
||||||
if len(sizes) > 0 {
|
|
||||||
buffsize = int32(sizes[0])
|
|
||||||
}
|
|
||||||
|
|
||||||
i := _IBioSequence{
|
i := _IBioSequence{
|
||||||
channel: make(chan BioSequenceBatch, buffsize),
|
channel: make(chan BioSequenceBatch),
|
||||||
current: NilBioSequenceBatch,
|
current: NilBioSequenceBatch,
|
||||||
pushBack: abool.New(),
|
pushBack: abool.New(),
|
||||||
buffer_size: buffsize,
|
|
||||||
batch_size: -1,
|
batch_size: -1,
|
||||||
sequence_format: "",
|
sequence_format: "",
|
||||||
finished: abool.New(),
|
finished: abool.New(),
|
||||||
@ -160,14 +154,6 @@ func (iterator IBioSequence) IsNil() bool {
|
|||||||
return iterator.pointer == nil
|
return iterator.pointer == nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (iterator IBioSequence) BufferSize() int {
|
|
||||||
if iterator.pointer == nil {
|
|
||||||
log.Panic("call of IBioSequenceBatch.BufferSize method on NilIBioSequenceBatch")
|
|
||||||
}
|
|
||||||
|
|
||||||
return int(atomic.LoadInt32(&iterator.pointer.buffer_size))
|
|
||||||
}
|
|
||||||
|
|
||||||
func (iterator IBioSequence) BatchSize() int {
|
func (iterator IBioSequence) BatchSize() int {
|
||||||
if iterator.pointer == nil {
|
if iterator.pointer == nil {
|
||||||
log.Panic("call of IBioSequenceBatch.BatchSize method on NilIBioSequenceBatch")
|
log.Panic("call of IBioSequenceBatch.BatchSize method on NilIBioSequenceBatch")
|
||||||
@ -279,13 +265,8 @@ func (iterator IBioSequence) Finished() bool {
|
|||||||
|
|
||||||
// Sorting the batches of sequences.
|
// Sorting the batches of sequences.
|
||||||
func (iterator IBioSequence) SortBatches(sizes ...int) IBioSequence {
|
func (iterator IBioSequence) SortBatches(sizes ...int) IBioSequence {
|
||||||
buffsize := iterator.BufferSize()
|
|
||||||
|
|
||||||
if len(sizes) > 0 {
|
newIter := MakeIBioSequence()
|
||||||
buffsize = sizes[0]
|
|
||||||
}
|
|
||||||
|
|
||||||
newIter := MakeIBioSequence(buffsize)
|
|
||||||
|
|
||||||
newIter.Add(1)
|
newIter.Add(1)
|
||||||
|
|
||||||
@ -338,8 +319,7 @@ func (iterator IBioSequence) Concat(iterators ...IBioSequence) IBioSequence {
|
|||||||
allPaired = allPaired && i.IsPaired()
|
allPaired = allPaired && i.IsPaired()
|
||||||
}
|
}
|
||||||
|
|
||||||
buffsize := iterator.BufferSize()
|
newIter := MakeIBioSequence()
|
||||||
newIter := MakeIBioSequence(buffsize)
|
|
||||||
|
|
||||||
newIter.Add(1)
|
newIter.Add(1)
|
||||||
|
|
||||||
@ -396,8 +376,7 @@ func (iterator IBioSequence) Pool(iterators ...IBioSequence) IBioSequence {
|
|||||||
}
|
}
|
||||||
|
|
||||||
nextCounter := goutils.AtomicCounter()
|
nextCounter := goutils.AtomicCounter()
|
||||||
buffsize := iterator.BufferSize()
|
newIter := MakeIBioSequence()
|
||||||
newIter := MakeIBioSequence(buffsize)
|
|
||||||
|
|
||||||
newIter.Add(niterator)
|
newIter.Add(niterator)
|
||||||
|
|
||||||
@ -431,13 +410,8 @@ func (iterator IBioSequence) Pool(iterators ...IBioSequence) IBioSequence {
|
|||||||
// indicated in parameter. Rebatching implies to sort the
|
// indicated in parameter. Rebatching implies to sort the
|
||||||
// source IBioSequenceBatch.
|
// source IBioSequenceBatch.
|
||||||
func (iterator IBioSequence) Rebatch(size int, sizes ...int) IBioSequence {
|
func (iterator IBioSequence) Rebatch(size int, sizes ...int) IBioSequence {
|
||||||
buffsize := iterator.BufferSize()
|
|
||||||
|
|
||||||
if len(sizes) > 0 {
|
newIter := MakeIBioSequence()
|
||||||
buffsize = sizes[0]
|
|
||||||
}
|
|
||||||
|
|
||||||
newIter := MakeIBioSequence(buffsize)
|
|
||||||
|
|
||||||
newIter.Add(1)
|
newIter.Add(1)
|
||||||
|
|
||||||
@ -532,14 +506,9 @@ func (iterator IBioSequence) Count(recycle bool) (int, int, int) {
|
|||||||
// iterator following the predicate value.
|
// iterator following the predicate value.
|
||||||
func (iterator IBioSequence) DivideOn(predicate obiseq.SequencePredicate,
|
func (iterator IBioSequence) DivideOn(predicate obiseq.SequencePredicate,
|
||||||
size int, sizes ...int) (IBioSequence, IBioSequence) {
|
size int, sizes ...int) (IBioSequence, IBioSequence) {
|
||||||
buffsize := iterator.BufferSize()
|
|
||||||
|
|
||||||
if len(sizes) > 0 {
|
trueIter := MakeIBioSequence()
|
||||||
buffsize = sizes[0]
|
falseIter := MakeIBioSequence()
|
||||||
}
|
|
||||||
|
|
||||||
trueIter := MakeIBioSequence(buffsize)
|
|
||||||
falseIter := MakeIBioSequence(buffsize)
|
|
||||||
|
|
||||||
trueIter.Add(1)
|
trueIter.Add(1)
|
||||||
falseIter.Add(1)
|
falseIter.Add(1)
|
||||||
@ -604,18 +573,13 @@ func (iterator IBioSequence) DivideOn(predicate obiseq.SequencePredicate,
|
|||||||
// A function that takes a predicate and a batch of sequences and returns a filtered batch of sequences.
|
// A function that takes a predicate and a batch of sequences and returns a filtered batch of sequences.
|
||||||
func (iterator IBioSequence) FilterOn(predicate obiseq.SequencePredicate,
|
func (iterator IBioSequence) FilterOn(predicate obiseq.SequencePredicate,
|
||||||
size int, sizes ...int) IBioSequence {
|
size int, sizes ...int) IBioSequence {
|
||||||
buffsize := iterator.BufferSize()
|
|
||||||
nworkers := 4
|
nworkers := 4
|
||||||
|
|
||||||
if len(sizes) > 0 {
|
if len(sizes) > 0 {
|
||||||
nworkers = sizes[0]
|
nworkers = sizes[0]
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(sizes) > 1 {
|
trueIter := MakeIBioSequence()
|
||||||
buffsize = sizes[1]
|
|
||||||
}
|
|
||||||
|
|
||||||
trueIter := MakeIBioSequence(buffsize)
|
|
||||||
|
|
||||||
trueIter.Add(nworkers)
|
trueIter.Add(nworkers)
|
||||||
|
|
||||||
@ -661,18 +625,13 @@ func (iterator IBioSequence) FilterOn(predicate obiseq.SequencePredicate,
|
|||||||
|
|
||||||
func (iterator IBioSequence) FilterAnd(predicate obiseq.SequencePredicate,
|
func (iterator IBioSequence) FilterAnd(predicate obiseq.SequencePredicate,
|
||||||
size int, sizes ...int) IBioSequence {
|
size int, sizes ...int) IBioSequence {
|
||||||
buffsize := iterator.BufferSize()
|
|
||||||
nworkers := 4
|
nworkers := 4
|
||||||
|
|
||||||
if len(sizes) > 0 {
|
if len(sizes) > 0 {
|
||||||
nworkers = sizes[0]
|
nworkers = sizes[0]
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(sizes) > 1 {
|
trueIter := MakeIBioSequence()
|
||||||
buffsize = sizes[1]
|
|
||||||
}
|
|
||||||
|
|
||||||
trueIter := MakeIBioSequence(buffsize)
|
|
||||||
|
|
||||||
trueIter.Add(nworkers)
|
trueIter.Add(nworkers)
|
||||||
|
|
||||||
@ -740,13 +699,7 @@ func (iterator IBioSequence) Load() obiseq.BioSequenceSlice {
|
|||||||
func IBatchOver(data obiseq.BioSequenceSlice,
|
func IBatchOver(data obiseq.BioSequenceSlice,
|
||||||
size int, sizes ...int) IBioSequence {
|
size int, sizes ...int) IBioSequence {
|
||||||
|
|
||||||
buffsize := 0
|
newIter := MakeIBioSequence()
|
||||||
|
|
||||||
if len(sizes) > 0 {
|
|
||||||
buffsize = sizes[0]
|
|
||||||
}
|
|
||||||
|
|
||||||
newIter := MakeIBioSequence(buffsize)
|
|
||||||
|
|
||||||
newIter.Add(1)
|
newIter.Add(1)
|
||||||
|
|
||||||
|
@ -36,7 +36,6 @@ func (dist *IDistribute) Classifier() *obiseq.BioSequenceClassifier {
|
|||||||
|
|
||||||
func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier, sizes ...int) IDistribute {
|
func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier, sizes ...int) IDistribute {
|
||||||
batchsize := 5000
|
batchsize := 5000
|
||||||
buffsize := 2
|
|
||||||
|
|
||||||
outputs := make(map[int]IBioSequence, 100)
|
outputs := make(map[int]IBioSequence, 100)
|
||||||
slices := make(map[int]*obiseq.BioSequenceSlice, 100)
|
slices := make(map[int]*obiseq.BioSequenceSlice, 100)
|
||||||
@ -47,9 +46,7 @@ func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier, siz
|
|||||||
batchsize = sizes[0]
|
batchsize = sizes[0]
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(sizes) > 1 {
|
|
||||||
buffsize = sizes[1]
|
|
||||||
}
|
|
||||||
|
|
||||||
jobDone := sync.WaitGroup{}
|
jobDone := sync.WaitGroup{}
|
||||||
lock := sync.Mutex{}
|
lock := sync.Mutex{}
|
||||||
@ -80,7 +77,7 @@ func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier, siz
|
|||||||
orders[key] = 0
|
orders[key] = 0
|
||||||
|
|
||||||
lock.Lock()
|
lock.Lock()
|
||||||
outputs[key] = MakeIBioSequence(buffsize)
|
outputs[key] = MakeIBioSequence()
|
||||||
lock.Unlock()
|
lock.Unlock()
|
||||||
|
|
||||||
news <- key
|
news <- key
|
||||||
|
@ -4,16 +4,12 @@ import "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
|||||||
|
|
||||||
func (iterator IBioSequence) IMergeSequenceBatch(na string, statsOn []string, sizes ...int) IBioSequence {
|
func (iterator IBioSequence) IMergeSequenceBatch(na string, statsOn []string, sizes ...int) IBioSequence {
|
||||||
batchsize := 100
|
batchsize := 100
|
||||||
buffsize := iterator.BufferSize()
|
|
||||||
|
|
||||||
if len(sizes) > 0 {
|
if len(sizes) > 0 {
|
||||||
batchsize = sizes[0]
|
batchsize = sizes[0]
|
||||||
}
|
}
|
||||||
if len(sizes) > 1 {
|
|
||||||
buffsize = sizes[1]
|
|
||||||
}
|
|
||||||
|
|
||||||
newIter := MakeIBioSequence(buffsize)
|
newIter := MakeIBioSequence()
|
||||||
|
|
||||||
newIter.Add(1)
|
newIter.Add(1)
|
||||||
|
|
||||||
|
@ -6,7 +6,6 @@ import (
|
|||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
// That method allows for applying a SeqWorker function on every sequences.
|
// That method allows for applying a SeqWorker function on every sequences.
|
||||||
//
|
//
|
||||||
// Sequences are provided by the iterator and modified sequences are pushed
|
// Sequences are provided by the iterator and modified sequences are pushed
|
||||||
@ -17,17 +16,12 @@ import (
|
|||||||
// - The second the size of the chanel buffer. By default set to the same value than the input buffer.
|
// - The second the size of the chanel buffer. By default set to the same value than the input buffer.
|
||||||
func (iterator IBioSequence) MakeIWorker(worker obiseq.SeqWorker, sizes ...int) IBioSequence {
|
func (iterator IBioSequence) MakeIWorker(worker obiseq.SeqWorker, sizes ...int) IBioSequence {
|
||||||
nworkers := 4
|
nworkers := 4
|
||||||
buffsize := iterator.BufferSize()
|
|
||||||
|
|
||||||
if len(sizes) > 0 {
|
if len(sizes) > 0 {
|
||||||
nworkers = sizes[0]
|
nworkers = sizes[0]
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(sizes) > 1 {
|
newIter := MakeIBioSequence()
|
||||||
buffsize = sizes[1]
|
|
||||||
}
|
|
||||||
|
|
||||||
newIter := MakeIBioSequence(buffsize)
|
|
||||||
|
|
||||||
newIter.Add(nworkers)
|
newIter.Add(nworkers)
|
||||||
|
|
||||||
@ -64,17 +58,12 @@ func (iterator IBioSequence) MakeIWorker(worker obiseq.SeqWorker, sizes ...int)
|
|||||||
func (iterator IBioSequence) MakeIConditionalWorker(predicate obiseq.SequencePredicate,
|
func (iterator IBioSequence) MakeIConditionalWorker(predicate obiseq.SequencePredicate,
|
||||||
worker obiseq.SeqWorker, sizes ...int) IBioSequence {
|
worker obiseq.SeqWorker, sizes ...int) IBioSequence {
|
||||||
nworkers := 4
|
nworkers := 4
|
||||||
buffsize := iterator.BufferSize()
|
|
||||||
|
|
||||||
if len(sizes) > 0 {
|
if len(sizes) > 0 {
|
||||||
nworkers = sizes[0]
|
nworkers = sizes[0]
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(sizes) > 1 {
|
newIter := MakeIBioSequence()
|
||||||
buffsize = sizes[1]
|
|
||||||
}
|
|
||||||
|
|
||||||
newIter := MakeIBioSequence(buffsize)
|
|
||||||
|
|
||||||
newIter.Add(nworkers)
|
newIter.Add(nworkers)
|
||||||
|
|
||||||
@ -112,17 +101,12 @@ func (iterator IBioSequence) MakeIConditionalWorker(predicate obiseq.SequencePre
|
|||||||
|
|
||||||
func (iterator IBioSequence) MakeISliceWorker(worker obiseq.SeqSliceWorker, sizes ...int) IBioSequence {
|
func (iterator IBioSequence) MakeISliceWorker(worker obiseq.SeqSliceWorker, sizes ...int) IBioSequence {
|
||||||
nworkers := 4
|
nworkers := 4
|
||||||
buffsize := iterator.BufferSize()
|
|
||||||
|
|
||||||
if len(sizes) > 0 {
|
if len(sizes) > 0 {
|
||||||
nworkers = sizes[0]
|
nworkers = sizes[0]
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(sizes) > 1 {
|
newIter := MakeIBioSequence()
|
||||||
buffsize = sizes[1]
|
|
||||||
}
|
|
||||||
|
|
||||||
newIter := MakeIBioSequence(buffsize)
|
|
||||||
|
|
||||||
newIter.Add(nworkers)
|
newIter.Add(nworkers)
|
||||||
|
|
||||||
@ -140,7 +124,7 @@ func (iterator IBioSequence) MakeISliceWorker(worker obiseq.SeqSliceWorker, size
|
|||||||
newIter.Done()
|
newIter.Done()
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("Start of the batch slice workers on %d workers (buffer : %d)\n", nworkers, buffsize)
|
log.Printf("Start of the batch slice workers on %d workers\n", nworkers)
|
||||||
for i := 0; i < nworkers-1; i++ {
|
for i := 0; i < nworkers-1; i++ {
|
||||||
go f(iterator.Split())
|
go f(iterator.Split())
|
||||||
}
|
}
|
||||||
@ -168,4 +152,3 @@ func SliceWorkerPipe(worker obiseq.SeqSliceWorker, sizes ...int) Pipeable {
|
|||||||
|
|
||||||
return f
|
return f
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -11,7 +11,6 @@ type _Options struct {
|
|||||||
withProgressBar bool
|
withProgressBar bool
|
||||||
parallelWorkers int
|
parallelWorkers int
|
||||||
batchSize int
|
batchSize int
|
||||||
bufferSize int
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Options stores a set of option usable by the
|
// Options stores a set of option usable by the
|
||||||
@ -56,16 +55,6 @@ func OptionAllowedMismatches(count int) WithOption {
|
|||||||
return f
|
return f
|
||||||
}
|
}
|
||||||
|
|
||||||
// OptionBufferSize sets the requested channel
|
|
||||||
// buffer size.
|
|
||||||
func OptionBufferSize(size int) WithOption {
|
|
||||||
f := WithOption(func(opt Options) {
|
|
||||||
opt.pointer.bufferSize = size
|
|
||||||
})
|
|
||||||
|
|
||||||
return f
|
|
||||||
}
|
|
||||||
|
|
||||||
// OptionParallelWorkers sets how many search
|
// OptionParallelWorkers sets how many search
|
||||||
// jobs will be run in parallel.
|
// jobs will be run in parallel.
|
||||||
func OptionParallelWorkers(nworkers int) WithOption {
|
func OptionParallelWorkers(nworkers int) WithOption {
|
||||||
@ -102,12 +91,6 @@ func (options Options) WithProgressBar() bool {
|
|||||||
return options.pointer.withProgressBar
|
return options.pointer.withProgressBar
|
||||||
}
|
}
|
||||||
|
|
||||||
// BufferSize returns the size of the channel
|
|
||||||
// buffer specified by the options
|
|
||||||
func (options Options) BufferSize() int {
|
|
||||||
return options.pointer.bufferSize
|
|
||||||
}
|
|
||||||
|
|
||||||
// BatchSize returns the size of the
|
// BatchSize returns the size of the
|
||||||
// sequence batch used by the PCR algorithm
|
// sequence batch used by the PCR algorithm
|
||||||
func (options Options) BatchSize() int {
|
func (options Options) BatchSize() int {
|
||||||
@ -130,7 +113,6 @@ func MakeOptions(setters []WithOption) Options {
|
|||||||
withProgressBar: false,
|
withProgressBar: false,
|
||||||
parallelWorkers: 4,
|
parallelWorkers: 4,
|
||||||
batchSize: 1000,
|
batchSize: 1000,
|
||||||
bufferSize: 100,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
opt := Options{&o}
|
opt := Options{&o}
|
||||||
|
@ -11,12 +11,11 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var _Debug = false
|
var _Debug = false
|
||||||
var _ParallelWorkers = runtime.NumCPU() * 2 - 1
|
var _ParallelWorkers = runtime.NumCPU()*2 - 1
|
||||||
var _MaxAllowedCPU = runtime.NumCPU()
|
var _MaxAllowedCPU = runtime.NumCPU()
|
||||||
var _BufferSize = 1
|
|
||||||
var _BatchSize = 5000
|
var _BatchSize = 5000
|
||||||
|
|
||||||
type ArgumentParser func([]string) (*getoptions.GetOpt, []string, error)
|
type ArgumentParser func([]string) (*getoptions.GetOpt, []string)
|
||||||
|
|
||||||
func GenerateOptionParser(optionset ...func(*getoptions.GetOpt)) ArgumentParser {
|
func GenerateOptionParser(optionset ...func(*getoptions.GetOpt)) ArgumentParser {
|
||||||
|
|
||||||
@ -38,16 +37,20 @@ func GenerateOptionParser(optionset ...func(*getoptions.GetOpt)) ArgumentParser
|
|||||||
o(options)
|
o(options)
|
||||||
}
|
}
|
||||||
|
|
||||||
return func(args []string) (*getoptions.GetOpt, []string, error) {
|
return func(args []string) (*getoptions.GetOpt, []string) {
|
||||||
|
|
||||||
remaining, err := options.Parse(args[1:])
|
remaining, err := options.Parse(args[1:])
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Error on the commande line : %v",err)
|
||||||
|
}
|
||||||
|
|
||||||
// Setup the maximum number of CPU usable by the program
|
// Setup the maximum number of CPU usable by the program
|
||||||
runtime.GOMAXPROCS(_MaxAllowedCPU)
|
runtime.GOMAXPROCS(_MaxAllowedCPU)
|
||||||
if options.Called("max-cpu") {
|
if options.Called("max-cpu") {
|
||||||
log.Printf("CPU number limited to %d", _MaxAllowedCPU)
|
log.Printf("CPU number limited to %d", _MaxAllowedCPU)
|
||||||
if ! options.Called("workers") {
|
if !options.Called("workers") {
|
||||||
_ParallelWorkers=_MaxAllowedCPU * 2 - 1
|
_ParallelWorkers = _MaxAllowedCPU*2 - 1
|
||||||
log.Printf("Number of workers set %d", _ParallelWorkers)
|
log.Printf("Number of workers set %d", _ParallelWorkers)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -67,7 +70,7 @@ func GenerateOptionParser(optionset ...func(*getoptions.GetOpt)) ArgumentParser
|
|||||||
log.Debugln("Switch to debug level logging")
|
log.Debugln("Switch to debug level logging")
|
||||||
}
|
}
|
||||||
|
|
||||||
return options, remaining, err
|
return options, remaining
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -88,11 +91,6 @@ func CLIMaxCPU() int {
|
|||||||
return _MaxAllowedCPU
|
return _MaxAllowedCPU
|
||||||
}
|
}
|
||||||
|
|
||||||
// CLIBufferSize returns the expeted channel buffer size for obitools
|
|
||||||
func CLIBufferSize() int {
|
|
||||||
return _BufferSize
|
|
||||||
}
|
|
||||||
|
|
||||||
// CLIBatchSize returns the expeted size of the sequence batches
|
// CLIBatchSize returns the expeted size of the sequence batches
|
||||||
func CLIBatchSize() int {
|
func CLIBatchSize() int {
|
||||||
return _BatchSize
|
return _BatchSize
|
||||||
|
@ -8,6 +8,15 @@ import (
|
|||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func (s *BioSequence) HasAttribute(key string) bool {
|
||||||
|
ok := s.annotations != nil
|
||||||
|
|
||||||
|
if ok {
|
||||||
|
_, ok = s.annotations[key]
|
||||||
|
}
|
||||||
|
|
||||||
|
return ok
|
||||||
|
}
|
||||||
// A method that returns the value of the key in the annotation map.
|
// A method that returns the value of the key in the annotation map.
|
||||||
func (s *BioSequence) GetAttribute(key string) (interface{}, bool) {
|
func (s *BioSequence) GetAttribute(key string) (interface{}, bool) {
|
||||||
var val interface{}
|
var val interface{}
|
||||||
|
@ -278,3 +278,28 @@ func (s *BioSequence) Clear() {
|
|||||||
s.sequence = s.sequence[0:0]
|
s.sequence = s.sequence[0:0]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *BioSequence) Composition() map[byte]int {
|
||||||
|
|
||||||
|
a := 0
|
||||||
|
c := 0
|
||||||
|
g := 0
|
||||||
|
t := 0
|
||||||
|
other := 0
|
||||||
|
for _, char := range s.sequence {
|
||||||
|
switch char {
|
||||||
|
case 'a':
|
||||||
|
a++
|
||||||
|
case 'c':
|
||||||
|
c++
|
||||||
|
case 'g':
|
||||||
|
g++
|
||||||
|
case 't':
|
||||||
|
t++
|
||||||
|
default:
|
||||||
|
other++
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return map[byte]int{'a': a, 'c': c, 'g': g, 't': t, 'o': other}
|
||||||
|
}
|
||||||
|
@ -316,3 +316,4 @@ func RotateClassifier(size int) *BioSequenceClassifier {
|
|||||||
c := BioSequenceClassifier{code, value, reset, clone,"RotateClassifier"}
|
c := BioSequenceClassifier{code, value, reset, clone,"RotateClassifier"}
|
||||||
return &c
|
return &c
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4,22 +4,21 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obieval"
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
func Expression(expression string) func(*BioSequence) (interface{},error) {
|
func Expression(expression string) func(*BioSequence) (interface{}, error) {
|
||||||
|
|
||||||
exp, err := obieval.OBILang.NewEvaluable(expression)
|
exp, err := OBILang.NewEvaluable(expression)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("Error in the expression : %s", expression)
|
log.Fatalf("Error in the expression : %s", expression)
|
||||||
}
|
}
|
||||||
|
|
||||||
f := func(sequence *BioSequence) (interface{},error) {
|
f := func(sequence *BioSequence) (interface{}, error) {
|
||||||
return exp(context.Background(),
|
return exp(context.Background(),
|
||||||
map[string]interface{}{
|
map[string]interface{}{
|
||||||
"annotations": sequence.Annotations(),
|
"annotations": sequence.Annotations(),
|
||||||
"sequence": sequence,
|
"sequence": sequence,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@ -30,14 +29,14 @@ func Expression(expression string) func(*BioSequence) (interface{},error) {
|
|||||||
func EditIdWorker(expression string) SeqWorker {
|
func EditIdWorker(expression string) SeqWorker {
|
||||||
e := Expression(expression)
|
e := Expression(expression)
|
||||||
f := func(sequence *BioSequence) *BioSequence {
|
f := func(sequence *BioSequence) *BioSequence {
|
||||||
v,err := e(sequence)
|
v, err := e(sequence)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("Expression '%s' cannot be evaluated on sequence %s",
|
log.Fatalf("Expression '%s' cannot be evaluated on sequence %s",
|
||||||
expression,
|
expression,
|
||||||
sequence.Id())
|
sequence.Id())
|
||||||
}
|
}
|
||||||
sequence.SetId(fmt.Sprintf("%v",v))
|
sequence.SetId(fmt.Sprintf("%v", v))
|
||||||
return sequence
|
return sequence
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -47,16 +46,16 @@ func EditIdWorker(expression string) SeqWorker {
|
|||||||
func EditAttributeWorker(key string, expression string) SeqWorker {
|
func EditAttributeWorker(key string, expression string) SeqWorker {
|
||||||
e := Expression(expression)
|
e := Expression(expression)
|
||||||
f := func(sequence *BioSequence) *BioSequence {
|
f := func(sequence *BioSequence) *BioSequence {
|
||||||
v,err := e(sequence)
|
v, err := e(sequence)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("Expression '%s' cannot be evaluated on sequence %s",
|
log.Fatalf("Expression '%s' cannot be evaluated on sequence %s",
|
||||||
expression,
|
expression,
|
||||||
sequence.Id())
|
sequence.Id())
|
||||||
}
|
}
|
||||||
sequence.SetAttribute(key,v)
|
sequence.SetAttribute(key, v)
|
||||||
return sequence
|
return sequence
|
||||||
}
|
}
|
||||||
|
|
||||||
return f
|
return f
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
package obieval
|
package obiseq
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
@ -174,8 +174,19 @@ var OBILang = gval.NewLanguage(
|
|||||||
log.Fatalf("%v cannot be converted to a boolan value", args[0])
|
log.Fatalf("%v cannot be converted to a boolan value", args[0])
|
||||||
}
|
}
|
||||||
return val, nil
|
return val, nil
|
||||||
|
}),
|
||||||
|
gval.Function("ifelse", func(args ...interface{}) (interface{}, error) {
|
||||||
|
if args[0].(bool) {
|
||||||
|
return args[1], nil
|
||||||
|
} else {
|
||||||
|
return args[2], nil
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
gval.Function("gcskew", func(args ...interface{}) (interface{}, error) {
|
||||||
|
composition := (args[0].(*BioSequence)).Composition()
|
||||||
|
return float64(composition['g']-composition['c']) / float64(composition['g']+composition['c']), nil
|
||||||
|
}),
|
||||||
|
gval.Function("composition", func(args ...interface{}) (interface{}, error) {
|
||||||
|
return (args[0].(*BioSequence)).Composition(), nil
|
||||||
}))
|
}))
|
||||||
|
|
||||||
func Expression(expression string) (gval.Evaluable, error) {
|
|
||||||
return OBILang.NewEvaluable(expression)
|
|
||||||
}
|
|
@ -5,7 +5,6 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obieval"
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -256,7 +255,7 @@ func IsIdIn(ids ...string) SequencePredicate {
|
|||||||
|
|
||||||
func ExpressionPredicat(expression string) SequencePredicate {
|
func ExpressionPredicat(expression string) SequencePredicate {
|
||||||
|
|
||||||
exp, err := obieval.OBILang.NewEvaluable(expression)
|
exp, err := OBILang.NewEvaluable(expression)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("Error in the expression : %s", expression)
|
log.Fatalf("Error in the expression : %s", expression)
|
||||||
}
|
}
|
||||||
|
63
pkg/obitools/obicleandb/obicleandb.go
Normal file
63
pkg/obitools/obicleandb/obicleandb.go
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
package obicleandb
|
||||||
|
|
||||||
|
import (
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obichunk"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obigrep"
|
||||||
|
)
|
||||||
|
|
||||||
|
func ICleanDB(itertator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||||
|
var rankPredicate obiseq.SequencePredicate
|
||||||
|
|
||||||
|
options := make([]obichunk.WithOption, 0, 30)
|
||||||
|
|
||||||
|
// Make sequence dereplication with a constraint on the taxid.
|
||||||
|
// To be merged, both sequences must have the same taxid.
|
||||||
|
|
||||||
|
options = append(options,
|
||||||
|
obichunk.OptionBatchCount(100),
|
||||||
|
obichunk.OptionSortOnMemory(),
|
||||||
|
obichunk.OptionSubCategory("taxid"),
|
||||||
|
obichunk.OptionsParallelWorkers(
|
||||||
|
obioptions.CLIParallelWorkers()),
|
||||||
|
obichunk.OptionsBatchSize(
|
||||||
|
obioptions.CLIBatchSize()),
|
||||||
|
obichunk.OptionNAValue("NA"),
|
||||||
|
)
|
||||||
|
|
||||||
|
unique, err := obichunk.IUniqueSequence(itertator, options...)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
taxonomy := obigrep.CLILoadSelectedTaxonomy()
|
||||||
|
|
||||||
|
if len(obigrep.CLIRequiredRanks()) > 0 {
|
||||||
|
rankPredicate = obigrep.CLIHasRankDefinedPredicate()
|
||||||
|
} else {
|
||||||
|
rankPredicate = taxonomy.HasRequiredRank("species").And(taxonomy.HasRequiredRank("genus")).And(taxonomy.HasRequiredRank("family"))
|
||||||
|
}
|
||||||
|
|
||||||
|
goodTaxa := taxonomy.IsAValidTaxon(CLIUpdateTaxids()).And(rankPredicate)
|
||||||
|
|
||||||
|
usable := unique.FilterOn(goodTaxa,
|
||||||
|
obioptions.CLIBatchSize(),
|
||||||
|
obioptions.CLIParallelWorkers())
|
||||||
|
|
||||||
|
annotated := usable.MakeIWorker(taxonomy.MakeSetSpeciesWorker(),
|
||||||
|
obioptions.CLIParallelWorkers(),
|
||||||
|
).MakeIWorker(taxonomy.MakeSetGenusWorker(),
|
||||||
|
obioptions.CLIParallelWorkers(),
|
||||||
|
).MakeIWorker(taxonomy.MakeSetFamilyWorker(),
|
||||||
|
obioptions.CLIParallelWorkers(),
|
||||||
|
)
|
||||||
|
|
||||||
|
// annotated.MakeIConditionalWorker(obiseq.IsMoreAbundantOrEqualTo(3),1000)
|
||||||
|
|
||||||
|
return annotated
|
||||||
|
}
|
@ -60,6 +60,21 @@ func InputOptionSet(options *getoptions.GetOpt) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func OutputModeOptionSet(options *getoptions.GetOpt) {
|
||||||
|
options.BoolVar(&__no_progress_bar__, "no-progressbar", false,
|
||||||
|
options.Description("Disable the progress bar printing"))
|
||||||
|
|
||||||
|
options.BoolVar(&__compressed__, "compress", false,
|
||||||
|
options.Alias("Z"),
|
||||||
|
options.Description("Output is compressed"))
|
||||||
|
|
||||||
|
options.StringVar(&__output_file_name__, "out", __output_file_name__,
|
||||||
|
options.Alias("o"),
|
||||||
|
options.ArgName("FILENAME"),
|
||||||
|
options.Description("Filename used for saving the output"),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
func OutputOptionSet(options *getoptions.GetOpt) {
|
func OutputOptionSet(options *getoptions.GetOpt) {
|
||||||
options.BoolVar(&__output_in_fasta__, "fasta-output", false,
|
options.BoolVar(&__output_in_fasta__, "fasta-output", false,
|
||||||
options.Description("Read data following the ecoPCR output format."))
|
options.Description("Read data following the ecoPCR output format."))
|
||||||
@ -73,19 +88,7 @@ func OutputOptionSet(options *getoptions.GetOpt) {
|
|||||||
options.Alias("O"),
|
options.Alias("O"),
|
||||||
options.Description("output FASTA/FASTQ title line annotations follow OBI format."))
|
options.Description("output FASTA/FASTQ title line annotations follow OBI format."))
|
||||||
|
|
||||||
options.BoolVar(&__no_progress_bar__, "no-progressbar", false,
|
OutputModeOptionSet(options)
|
||||||
options.Description("Disable the progress bar printing"))
|
|
||||||
|
|
||||||
options.BoolVar(&__compressed__, "compress", false,
|
|
||||||
options.Alias("Z"),
|
|
||||||
options.Description("Output is compressed"))
|
|
||||||
|
|
||||||
options.StringVar(&__output_file_name__, "out", __output_file_name__,
|
|
||||||
options.Alias("o"),
|
|
||||||
options.ArgName("FILENAME"),
|
|
||||||
options.Description("Filename used for saving the output"),
|
|
||||||
)
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func PairedFilesOptionSet(options *getoptions.GetOpt) {
|
func PairedFilesOptionSet(options *getoptions.GetOpt) {
|
||||||
@ -197,4 +200,4 @@ func CLIHasPairedFile() bool {
|
|||||||
}
|
}
|
||||||
func CLIPairedFileName() string {
|
func CLIPairedFileName() string {
|
||||||
return __paired_file_name__
|
return __paired_file_name__
|
||||||
}
|
}
|
||||||
|
@ -48,6 +48,10 @@ func _ExpandListOfFiles(check_ext bool, filenames ...string) ([]string, error) {
|
|||||||
strings.HasSuffix(path, "fasta.gz") ||
|
strings.HasSuffix(path, "fasta.gz") ||
|
||||||
strings.HasSuffix(path, "fastq") ||
|
strings.HasSuffix(path, "fastq") ||
|
||||||
strings.HasSuffix(path, "fastq.gz") ||
|
strings.HasSuffix(path, "fastq.gz") ||
|
||||||
|
strings.HasSuffix(path, "seq") ||
|
||||||
|
strings.HasSuffix(path, "seq.gz") ||
|
||||||
|
strings.HasSuffix(path, "gb") ||
|
||||||
|
strings.HasSuffix(path, "gb.gz") ||
|
||||||
strings.HasSuffix(path, "dat") ||
|
strings.HasSuffix(path, "dat") ||
|
||||||
strings.HasSuffix(path, "dat.gz") ||
|
strings.HasSuffix(path, "dat.gz") ||
|
||||||
strings.HasSuffix(path, "ecopcr") ||
|
strings.HasSuffix(path, "ecopcr") ||
|
||||||
@ -82,13 +86,12 @@ func CLIReadBioSequences(filenames ...string) (obiiter.IBioSequence, error) {
|
|||||||
opts = append(opts, obiformats.OptionsFastSeqHeaderParser(obiformats.ParseGuessedFastSeqHeader))
|
opts = append(opts, obiformats.OptionsFastSeqHeaderParser(obiformats.ParseGuessedFastSeqHeader))
|
||||||
}
|
}
|
||||||
|
|
||||||
nworkers := obioptions.CLIParallelWorkers() // / 4
|
nworkers := obioptions.CLIParallelWorkers()
|
||||||
if nworkers < 2 {
|
if nworkers < 2 {
|
||||||
nworkers = 2
|
nworkers = 2
|
||||||
}
|
}
|
||||||
|
|
||||||
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
|
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
|
||||||
opts = append(opts, obiformats.OptionsBufferSize(obioptions.CLIBufferSize()))
|
|
||||||
opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize()))
|
opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize()))
|
||||||
|
|
||||||
opts = append(opts, obiformats.OptionsQualityShift(CLIInputQualityShift()))
|
opts = append(opts, obiformats.OptionsQualityShift(CLIInputQualityShift()))
|
||||||
|
@ -60,7 +60,6 @@ func CLIWriteBioSequences(iterator obiiter.IBioSequence,
|
|||||||
}
|
}
|
||||||
|
|
||||||
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
|
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
|
||||||
opts = append(opts, obiformats.OptionsBufferSize(obioptions.CLIBufferSize()))
|
|
||||||
opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize()))
|
opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize()))
|
||||||
|
|
||||||
opts = append(opts, obiformats.OptionsQualityShift(CLIOutputQualityShift()))
|
opts = append(opts, obiformats.OptionsQualityShift(CLIOutputQualityShift()))
|
||||||
|
61
pkg/obitools/obicsv/obicsv.go
Normal file
61
pkg/obitools/obicsv/obicsv.go
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
package obicsv
|
||||||
|
|
||||||
|
import (
|
||||||
|
"log"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func CLIWriteCSV(iterator obiiter.IBioSequence,
|
||||||
|
terminalAction bool, filenames ...string) (obiiter.IBioSequence, error) {
|
||||||
|
|
||||||
|
if obiconvert.CLIProgressBar() {
|
||||||
|
iterator = iterator.Speed()
|
||||||
|
}
|
||||||
|
|
||||||
|
var newIter obiiter.IBioSequence
|
||||||
|
|
||||||
|
opts := make([]obiformats.WithOption, 0, 10)
|
||||||
|
|
||||||
|
nworkers := obioptions.CLIParallelWorkers() / 4
|
||||||
|
if nworkers < 2 {
|
||||||
|
nworkers = 2
|
||||||
|
}
|
||||||
|
|
||||||
|
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
|
||||||
|
opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize()))
|
||||||
|
|
||||||
|
opts = append(opts, obiformats.OptionsQualityShift(obiconvert.CLIOutputQualityShift()))
|
||||||
|
opts = append(opts, obiformats.OptionsCompressed(obiconvert.CLICompressed()))
|
||||||
|
|
||||||
|
opts = append(opts, obiformats.CSVId(CLIPrintId()),
|
||||||
|
obiformats.CSVCount(CLIPrintCount()),
|
||||||
|
obiformats.CSVTaxon(CLIPrintTaxon()),
|
||||||
|
obiformats.CSVDefinition(CLIPrintDefinition()),
|
||||||
|
obiformats.CSVKeys(CLIToBeKeptAttributes()),
|
||||||
|
)
|
||||||
|
|
||||||
|
var err error
|
||||||
|
|
||||||
|
if len(filenames) == 0 {
|
||||||
|
newIter, err = obiformats.WriteCSVToStdout(iterator, opts...)
|
||||||
|
} else {
|
||||||
|
newIter, err = obiformats.WriteCSVToFile(iterator, filenames[0], opts...)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Write file error: %v", err)
|
||||||
|
return obiiter.NilIBioSequence, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if terminalAction {
|
||||||
|
newIter.Recycle()
|
||||||
|
return obiiter.NilIBioSequence, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return newIter, nil
|
||||||
|
|
||||||
|
}
|
126
pkg/obitools/obicsv/options.go
Normal file
126
pkg/obitools/obicsv/options.go
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
package obicsv
|
||||||
|
|
||||||
|
import (
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
||||||
|
"github.com/DavidGamba/go-getoptions"
|
||||||
|
)
|
||||||
|
|
||||||
|
var _outputIds = true
|
||||||
|
var _outputCount = false
|
||||||
|
var _outputTaxon = false
|
||||||
|
var _outputSequence = true
|
||||||
|
var _outputQuality = true
|
||||||
|
var _outputDefinition = false
|
||||||
|
var _obipairing = false
|
||||||
|
var _autoColumns = false
|
||||||
|
var _keepOnly = make([]string, 0)
|
||||||
|
var _naValue = "NA"
|
||||||
|
|
||||||
|
var _softAttributes = map[string][]string{
|
||||||
|
"obipairing": {"mode", "seq_a_single", "seq_b_single",
|
||||||
|
"ali_dir", "score", "score_norm",
|
||||||
|
"seq_ab_match", "pairing_mismatches",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
func CSVOptionSet(options *getoptions.GetOpt) {
|
||||||
|
options.BoolVar(&_outputIds, "ids", _outputIds,
|
||||||
|
options.Alias("i"),
|
||||||
|
options.Description("Prints sequence ids in the ouput."))
|
||||||
|
|
||||||
|
options.BoolVar(&_outputSequence, "sequence", _outputSequence,
|
||||||
|
options.Alias("s"),
|
||||||
|
options.Description("Prints sequence itself in the output."))
|
||||||
|
|
||||||
|
options.BoolVar(&_outputQuality, "quality", _outputQuality,
|
||||||
|
options.Alias("q"),
|
||||||
|
options.Description("Prints sequence quality in the output."))
|
||||||
|
|
||||||
|
options.BoolVar(&_outputDefinition, "definition", _outputDefinition,
|
||||||
|
options.Alias("d"),
|
||||||
|
options.Description("Prints sequence definition in the output."))
|
||||||
|
|
||||||
|
options.BoolVar(&_autoColumns, "auto", _autoColumns,
|
||||||
|
options.Description("Based on the first sequences, propose a list of attibutes to print"))
|
||||||
|
|
||||||
|
options.BoolVar(&_outputCount, "count", _outputCount,
|
||||||
|
options.Description("Prints the count attribute in the output"))
|
||||||
|
|
||||||
|
options.BoolVar(&_outputTaxon, "taxon", _outputTaxon,
|
||||||
|
options.Description("Prints the NCBI taxid and its related scientific name"))
|
||||||
|
|
||||||
|
options.BoolVar(&_obipairing, "obipairing", _obipairing,
|
||||||
|
options.Description("Prints the attributes added by obipairing"))
|
||||||
|
|
||||||
|
options.StringSliceVar(&_keepOnly, "keep", 1, 1,
|
||||||
|
options.Alias("k"),
|
||||||
|
options.ArgName("KEY"),
|
||||||
|
options.Description("Keeps only attribute with key <KEY>. Several -k options can be combined."))
|
||||||
|
|
||||||
|
options.StringVar(&_naValue, "na-value", _naValue,
|
||||||
|
options.ArgName("NAVALUE"),
|
||||||
|
options.Description("A string representing non available values in the CSV file."))
|
||||||
|
}
|
||||||
|
|
||||||
|
func OptionSet(options *getoptions.GetOpt) {
|
||||||
|
obiconvert.OutputModeOptionSet(options)
|
||||||
|
CSVOptionSet(options)
|
||||||
|
}
|
||||||
|
|
||||||
|
func CLIPrintId() bool {
|
||||||
|
return _outputIds
|
||||||
|
}
|
||||||
|
|
||||||
|
func CLIPrintSequence() bool {
|
||||||
|
return _outputSequence
|
||||||
|
}
|
||||||
|
|
||||||
|
func CLIPrintCount() bool {
|
||||||
|
return _outputCount
|
||||||
|
}
|
||||||
|
func CLIPrintTaxon() bool {
|
||||||
|
return _outputTaxon
|
||||||
|
}
|
||||||
|
func CLIPrintQuality() bool {
|
||||||
|
return _outputQuality
|
||||||
|
}
|
||||||
|
|
||||||
|
func CLIPrintDefinition() bool {
|
||||||
|
return _outputDefinition
|
||||||
|
}
|
||||||
|
|
||||||
|
func CLIAutoColumns() bool {
|
||||||
|
return _autoColumns
|
||||||
|
}
|
||||||
|
|
||||||
|
func CLIHasToBeKeptAttributes() bool {
|
||||||
|
return len(_keepOnly) > 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func CLIToBeKeptAttributes() []string {
|
||||||
|
if _obipairing {
|
||||||
|
_keepOnly = append(_keepOnly, _softAttributes["obipairing"]...)
|
||||||
|
}
|
||||||
|
|
||||||
|
if i := goutils.LookFor(_keepOnly, "count"); i >= 0 {
|
||||||
|
_keepOnly = goutils.RemoveIndex(_keepOnly, i)
|
||||||
|
_outputCount = true
|
||||||
|
}
|
||||||
|
|
||||||
|
if i := goutils.LookFor(_keepOnly, "taxid"); i >= 0 {
|
||||||
|
_keepOnly = goutils.RemoveIndex(_keepOnly, i)
|
||||||
|
_outputTaxon = true
|
||||||
|
}
|
||||||
|
|
||||||
|
if i := goutils.LookFor(_keepOnly, "scientific_name"); i >= 0 {
|
||||||
|
_keepOnly = goutils.RemoveIndex(_keepOnly, i)
|
||||||
|
_outputTaxon = true
|
||||||
|
}
|
||||||
|
|
||||||
|
return _keepOnly
|
||||||
|
}
|
||||||
|
|
||||||
|
func CLINAValue() string {
|
||||||
|
return _naValue
|
||||||
|
}
|
@ -31,7 +31,6 @@ func DistributeSequence(sequences obiiter.IBioSequence) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers),
|
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers),
|
||||||
obiformats.OptionsBufferSize(obioptions.CLIBufferSize()),
|
|
||||||
obiformats.OptionsBatchSize(obioptions.CLIBatchSize()),
|
obiformats.OptionsBatchSize(obioptions.CLIBatchSize()),
|
||||||
obiformats.OptionsQualityShift(obiconvert.CLIOutputQualityShift()),
|
obiformats.OptionsQualityShift(obiconvert.CLIOutputQualityShift()),
|
||||||
obiformats.OptionsAppendFile(CLIAppendSequences()),
|
obiformats.OptionsAppendFile(CLIAppendSequences()),
|
||||||
|
@ -39,7 +39,6 @@ func CLIFilterSequence(iterator obiiter.IBioSequence) obiiter.IBioSequence {
|
|||||||
newIter = iterator.FilterOn(predicate,
|
newIter = iterator.FilterOn(predicate,
|
||||||
obioptions.CLIBatchSize(),
|
obioptions.CLIBatchSize(),
|
||||||
obioptions.CLIParallelWorkers(),
|
obioptions.CLIParallelWorkers(),
|
||||||
obioptions.CLIBufferSize(),
|
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -20,7 +20,6 @@ func IExtractBarcode(iterator obiiter.IBioSequence) (obiiter.IBioSequence, error
|
|||||||
obingslibrary.OptionDiscardErrors(!CLIConservedErrors()),
|
obingslibrary.OptionDiscardErrors(!CLIConservedErrors()),
|
||||||
obingslibrary.OptionParallelWorkers(obioptions.CLIParallelWorkers()),
|
obingslibrary.OptionParallelWorkers(obioptions.CLIParallelWorkers()),
|
||||||
obingslibrary.OptionBatchSize(obioptions.CLIBatchSize()),
|
obingslibrary.OptionBatchSize(obioptions.CLIBatchSize()),
|
||||||
obingslibrary.OptionBufferSize(obioptions.CLIBufferSize()),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
ngsfilter, err := CLINGSFIlter()
|
ngsfilter, err := CLINGSFIlter()
|
||||||
|
@ -211,17 +211,13 @@ func IAssemblePESequencesBatch(iterator obiiter.IBioSequence,
|
|||||||
}
|
}
|
||||||
|
|
||||||
nworkers := obioptions.CLIMaxCPU() * 3 / 2
|
nworkers := obioptions.CLIMaxCPU() * 3 / 2
|
||||||
buffsize := iterator.BufferSize()
|
|
||||||
|
|
||||||
if len(sizes) > 0 {
|
if len(sizes) > 0 {
|
||||||
nworkers = sizes[0]
|
nworkers = sizes[0]
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(sizes) > 1 {
|
|
||||||
buffsize = sizes[1]
|
|
||||||
}
|
|
||||||
|
|
||||||
newIter := obiiter.MakeIBioSequence(buffsize)
|
newIter := obiiter.MakeIBioSequence()
|
||||||
|
|
||||||
newIter.Add(nworkers)
|
newIter.Add(nworkers)
|
||||||
|
|
||||||
|
@ -51,8 +51,6 @@ func Unique(sequences obiiter.IBioSequence) obiiter.IBioSequence {
|
|||||||
options = append(options,
|
options = append(options,
|
||||||
obichunk.OptionsParallelWorkers(
|
obichunk.OptionsParallelWorkers(
|
||||||
obioptions.CLIParallelWorkers()),
|
obioptions.CLIParallelWorkers()),
|
||||||
obichunk.OptionsBufferSize(
|
|
||||||
obioptions.CLIBufferSize()),
|
|
||||||
obichunk.OptionsBatchSize(
|
obichunk.OptionsBatchSize(
|
||||||
obioptions.CLIBatchSize()),
|
obioptions.CLIBatchSize()),
|
||||||
obichunk.OptionNAValue(CLINAValue()),
|
obichunk.OptionNAValue(CLINAValue()),
|
||||||
|
Reference in New Issue
Block a user