mirror of
https://github.com/metabarcoding/obitools4.git
synced 2026-03-26 22:00:52 +00:00
Compare commits
6 Commits
V4.3
...
blackboard
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6a2f867ae1 | ||
|
|
776b8f75b7 | ||
|
|
882cc82f23 | ||
|
|
f36b39bfa3 | ||
|
|
dfe2fc3d43 | ||
|
|
cba355cdde |
19
.github/workflows/obitest.yml
vendored
19
.github/workflows/obitest.yml
vendored
@@ -1,19 +0,0 @@
|
|||||||
name: "Run the obitools command test suite"
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
- V*
|
|
||||||
jobs:
|
|
||||||
build:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Setup Go
|
|
||||||
uses: actions/setup-go@v2
|
|
||||||
with:
|
|
||||||
go-version: '1.23'
|
|
||||||
- name: Checkout obitools4 project
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
- name: Run tests
|
|
||||||
run: make githubtests
|
|
||||||
139
.gitignore
vendored
139
.gitignore
vendored
@@ -1,27 +1,120 @@
|
|||||||
**/cpu.pprof
|
cpu.pprof
|
||||||
**/cpu.trace
|
cpu.trace
|
||||||
**/test
|
test
|
||||||
**/bin
|
bin
|
||||||
**/vendor
|
vendor
|
||||||
**/*.fastq
|
*.fastq
|
||||||
**/*.fasta
|
*.fasta
|
||||||
**/*.fastq.gz
|
*.fastq.gz
|
||||||
**/*.fasta.gz
|
*.fasta.gz
|
||||||
**/.DS_Store
|
.DS_Store
|
||||||
**/*.gml
|
*.gml
|
||||||
**/*.log
|
*.log
|
||||||
**/xxx*
|
/argaly
|
||||||
**/*.sav
|
|
||||||
**/*.old
|
|
||||||
**/*.tgz
|
|
||||||
**/*.yaml
|
|
||||||
**/*.csv
|
|
||||||
|
|
||||||
.rhistory
|
/obiconvert
|
||||||
/.vscode
|
/obicount
|
||||||
|
/obimultiplex
|
||||||
|
/obipairing
|
||||||
|
/obipcr
|
||||||
|
/obifind
|
||||||
|
/obidistribute
|
||||||
|
/obiuniq
|
||||||
/build
|
/build
|
||||||
|
/Makefile.old
|
||||||
|
.Rproj.user
|
||||||
|
obitools.Rproj
|
||||||
|
Stat_error.knit.md
|
||||||
|
.Rhistory
|
||||||
|
Stat_error.nb.html
|
||||||
|
Stat_error.Rmd
|
||||||
|
|
||||||
/ncbitaxo
|
/.luarc.json
|
||||||
|
/doc/TAXO/
|
||||||
|
/doc/results/
|
||||||
|
/doc/_main.log
|
||||||
|
/doc/_book/_main.tex
|
||||||
|
/doc/_freeze/
|
||||||
|
/doc/tutorial_files/
|
||||||
|
/doc/wolf_data/
|
||||||
|
/taxdump/
|
||||||
|
/.vscode/
|
||||||
|
|
||||||
!/obitests/**
|
/Algo-Alignement.numbers
|
||||||
!/sample/**
|
/Estimate_proba_true_seq.html
|
||||||
|
/Estimate_proba_true_seq.nb.html
|
||||||
|
/Estimate_proba_true_seq.Rmd
|
||||||
|
/modele_error_euka.qmd
|
||||||
|
/obitools.code-workspace
|
||||||
|
.DS_Store
|
||||||
|
.RData
|
||||||
|
x
|
||||||
|
xxx
|
||||||
|
y
|
||||||
|
/doc/wolf_diet.tgz
|
||||||
|
/doc/man/depends
|
||||||
|
/sample/wolf_R1.fasta.gz
|
||||||
|
/sample/wolf_R2.fasta.gz
|
||||||
|
/sample/euka03.ecotag.fasta.gz
|
||||||
|
/sample/ratio.csv
|
||||||
|
/sample/STD_PLN_1.dat
|
||||||
|
/sample/STD_PLN_2.dat
|
||||||
|
/sample/subset_Pasvik_R1.fastq.gz
|
||||||
|
/sample/subset_Pasvik_R2.fastq.gz
|
||||||
|
/sample/test_gobitools.fasta.bz2
|
||||||
|
euka03.csv*
|
||||||
|
gbbct793.seq.gz
|
||||||
|
gbinv1003.seq.gz
|
||||||
|
gbpln210.seq
|
||||||
|
/doc/book/OBITools-V4.aux
|
||||||
|
/doc/book/OBITools-V4.fdb_latexmk
|
||||||
|
/doc/book/OBITools-V4.fls
|
||||||
|
/doc/book/OBITools-V4.log
|
||||||
|
/doc/book/OBITools-V4.pdf
|
||||||
|
/doc/book/OBITools-V4.synctex.gz
|
||||||
|
/doc/book/OBITools-V4.tex
|
||||||
|
/doc/book/OBITools-V4.toc
|
||||||
|
getoptions.adoc
|
||||||
|
Archive.zip
|
||||||
|
.DS_Store
|
||||||
|
sample/.DS_Store
|
||||||
|
sample/consensus_graphs/specimen_hac_plants_Vern_disicolor_.gml
|
||||||
|
93954
|
||||||
|
Bact03.e5.gb_R254.obipcr.idx.fasta.save
|
||||||
|
sample/test.obipcr.log
|
||||||
|
Bact02.e3.gb_R254.obipcr.fasta.gz
|
||||||
|
Example_Arth03.ngsfilter
|
||||||
|
SPER01.csv
|
||||||
|
SPER03.csv
|
||||||
|
wolf_diet_ngsfilter.txt
|
||||||
|
xx
|
||||||
|
xxx.gb
|
||||||
|
yyy_geom.csv
|
||||||
|
yyy_LCS.csv
|
||||||
|
yyy.json
|
||||||
|
bug_obimultiplex/toto
|
||||||
|
bug_obimultiplex/toto_mapping
|
||||||
|
bug_obimultiplex/tutu
|
||||||
|
bug_obimultiplex/tutu_mapping
|
||||||
|
bug_obipairing/GIT1_GH_ngsfilter.txt
|
||||||
|
doc/book/TAXO/citations.dmp
|
||||||
|
doc/book/TAXO/delnodes.dmp
|
||||||
|
doc/book/TAXO/division.dmp
|
||||||
|
doc/book/TAXO/gc.prt
|
||||||
|
doc/book/TAXO/gencode.dmp
|
||||||
|
doc/book/TAXO/merged.dmp
|
||||||
|
doc/book/TAXO/names.dmp
|
||||||
|
doc/book/TAXO/nodes.dmp
|
||||||
|
doc/book/TAXO/readme.txt
|
||||||
|
doc/book/wolf_data/Release-253/ncbitaxo/citations.dmp
|
||||||
|
doc/book/wolf_data/Release-253/ncbitaxo/delnodes.dmp
|
||||||
|
doc/book/wolf_data/Release-253/ncbitaxo/division.dmp
|
||||||
|
doc/book/wolf_data/Release-253/ncbitaxo/gc.prt
|
||||||
|
doc/book/wolf_data/Release-253/ncbitaxo/gencode.dmp
|
||||||
|
doc/book/wolf_data/Release-253/ncbitaxo/merged.dmp
|
||||||
|
doc/book/wolf_data/Release-253/ncbitaxo/names.dmp
|
||||||
|
doc/book/wolf_data/Release-253/ncbitaxo/nodes.dmp
|
||||||
|
doc/book/wolf_data/Release-253/ncbitaxo/readme.txt
|
||||||
|
doc/book/results/toto.tasta
|
||||||
|
sample/.DS_Store
|
||||||
|
GO
|
||||||
|
|||||||
9
Makefile
9
Makefile
@@ -63,13 +63,6 @@ update-deps:
|
|||||||
|
|
||||||
test:
|
test:
|
||||||
$(GOTEST) ./...
|
$(GOTEST) ./...
|
||||||
|
|
||||||
obitests:
|
|
||||||
@for t in $$(find obitests -name test.sh -print) ; do \
|
|
||||||
bash $${t} ;\
|
|
||||||
done
|
|
||||||
|
|
||||||
githubtests: obitools obitests
|
|
||||||
|
|
||||||
man:
|
man:
|
||||||
make -C doc man
|
make -C doc man
|
||||||
@@ -104,5 +97,5 @@ ifneq ($(strip $(COMMIT_ID)),)
|
|||||||
@rm -f $(OUTPUT)
|
@rm -f $(OUTPUT)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
.PHONY: all packages obitools man obibook doc update-deps obitests githubtests .FORCE
|
.PHONY: all packages obitools man obibook doc update-deps .FORCE
|
||||||
.FORCE:
|
.FORCE:
|
||||||
@@ -5,7 +5,7 @@ They are implemented in *GO* and are tens of times faster than OBITools2.
|
|||||||
|
|
||||||
The git for *OBITools4* is available at :
|
The git for *OBITools4* is available at :
|
||||||
|
|
||||||
> https://github.com/metabarcoding/obitools4
|
> https://metabarcoding.org/obitools4
|
||||||
|
|
||||||
## Installing *OBITools V4*
|
## Installing *OBITools V4*
|
||||||
|
|
||||||
@@ -13,7 +13,7 @@ An installation script that compiles the new *OBITools* on your Unix-like system
|
|||||||
The easiest way to run it is to copy and paste the following command into your terminal
|
The easiest way to run it is to copy and paste the following command into your terminal
|
||||||
|
|
||||||
```{bash}
|
```{bash}
|
||||||
curl -L https://raw.githubusercontent.com/metabarcoding/obitools4/master/install_obitools.sh | bash
|
curl -L https://metabarcoding.org/obitools4/install.sh | bash
|
||||||
```
|
```
|
||||||
|
|
||||||
By default, the script installs the *OBITools* commands and other associated files into the `/usr/local` directory.
|
By default, the script installs the *OBITools* commands and other associated files into the `/usr/local` directory.
|
||||||
@@ -33,11 +33,11 @@ available on your system, the installation script offers two options:
|
|||||||
You can use these options by following the installation command:
|
You can use these options by following the installation command:
|
||||||
|
|
||||||
```{bash}
|
```{bash}
|
||||||
curl -L https://raw.githubusercontent.com/metabarcoding/obitools4/master/install_obitools.sh | \
|
curl -L https://metabarcoding.org/obitools4/install.sh | \
|
||||||
bash -s -- --install-dir test_install --obitools-prefix k
|
bash -s -- --install-dir test_install --obitools-prefix k
|
||||||
```
|
```
|
||||||
|
|
||||||
In this case, the binaries will be installed in the `test_install` directory and all command names will be prefixed with the letter `k`. Thus, `obigrep` will be named `kobigrep`.
|
In this case, the binaries will be installed in the `test_install` directory and all command names will be prefixed with the letter `k`. Thus `obigrep` will be named `kobigrep`.
|
||||||
|
|
||||||
## Continuing the analysis...
|
## Continuing the analysis...
|
||||||
|
|
||||||
|
|||||||
272
Release-notes.md
272
Release-notes.md
@@ -1,141 +1,25 @@
|
|||||||
# OBITools release notes
|
# OBITools release notes
|
||||||
|
|
||||||
## March 2nd, 2025. Release 4.3.0
|
## Latest changes
|
||||||
|
|
||||||
A new documentation website is available at https://obitools4.metabarcoding.org.
|
### CPU limitation
|
||||||
Its development is still in progress.
|
|
||||||
|
|
||||||
### Breaking changes
|
|
||||||
|
|
||||||
- In `obimultiplex`, the short version of the **--tag-list** option used to
|
|
||||||
specify the list of tags and primers to be used for the demultiplexing has
|
|
||||||
been changed from `-t` to `-s`.
|
|
||||||
|
|
||||||
- The command `obifind` is now renamed `obitaxonomy`.
|
|
||||||
|
|
||||||
- The **--taxdump** option used to specify the path to the taxdump containing
|
|
||||||
the NCBI taxonomy has been renamed to **--taxonomy**.
|
|
||||||
|
|
||||||
### Bug fixes
|
|
||||||
|
|
||||||
- Correction of a bug when using paired sequence file with the **--out** option.
|
|
||||||
|
|
||||||
- Correction of a bug in `obitag` when trying to annotate very short sequences of
|
|
||||||
4 bases or less.
|
|
||||||
|
|
||||||
|
|
||||||
- In `obipairing`, correct the stats `seq_a_single` and `seq_b_single` when
|
|
||||||
on right alignment mode
|
|
||||||
|
|
||||||
- Not really a bug but the memory impact of `obiuniq` has been reduced by reducing
|
|
||||||
the batch size and not reading the qualities from the fastq files as `obiuniq`
|
|
||||||
is producing only fasta output without qualities.
|
|
||||||
|
|
||||||
- In `obitag`, correct the wrong assignment of the **obitag_bestmatch**
|
|
||||||
attribute.
|
|
||||||
|
|
||||||
- In `obiclean`, the **--no-progress-bar** option disables all progress bars,
|
|
||||||
not just the data.
|
|
||||||
|
|
||||||
- Several fixes in reading FASTA and FASTQ files, including some code
|
|
||||||
simplification and factorization.
|
|
||||||
|
|
||||||
- Fixed a bug in all obitools that caused the same file to be processed
|
|
||||||
multiple times, when specifying a directory name as input.
|
|
||||||
|
|
||||||
|
- By default, *OBITools4* tries to use all the computing power available on
|
||||||
|
your computer. In some circumstances this can be problematic (e.g. if you
|
||||||
|
are running on a computer cluster managed by your university). You can limit
|
||||||
|
the number of CPU cores used by *OBITools4* or by using the **--max-cpu**
|
||||||
|
option or by setting the **OBIMAXCPU** environment variable. Some strange
|
||||||
|
behaviour of *OBITools4* has been observed when users try to limit the
|
||||||
|
maximum number of usable CPU cores to one. This seems to be caused by the Go
|
||||||
|
language, and it is not obvious to get *OBITools4* to run correctly on a
|
||||||
|
single core in all circumstances. Therefore, if you ask to use a single
|
||||||
|
core, **OBITools4** will print a warning message and actually set this
|
||||||
|
parameter to two cores. If you really want a single core, you can use the
|
||||||
|
**--force-one-core** option. But be aware that this can lead to incorrect
|
||||||
|
calculations.
|
||||||
|
|
||||||
### New features
|
### New features
|
||||||
|
|
||||||
- `obigrep` add a new **--valid-taxid** option to keep only sequence with a
|
|
||||||
valid taxid
|
|
||||||
|
|
||||||
- `obiclean` add a new **--min-sample-count** option with a default value of 1,
|
|
||||||
asking to filter out sequences which are not occurring in at least the
|
|
||||||
specified number of samples.
|
|
||||||
|
|
||||||
- `obitoaxonomy` a new **--dump|D** option allows for dumping a sub-taxonomy.
|
|
||||||
|
|
||||||
- Taxonomy dump can now be provided as a four-columns CSV file to the
|
|
||||||
**--taxonomy** option.
|
|
||||||
|
|
||||||
- NCBI Taxonomy dump does not need to be uncompressed and unarchived anymore. The
|
|
||||||
path of the tar and gziped dump file can be directly specified using the
|
|
||||||
**--taxonomy** option.
|
|
||||||
|
|
||||||
- Most of the time obitools identify automatically sequence file format. But
|
|
||||||
it fails sometimes. Two new option **--fasta** and **--fastq** are added to
|
|
||||||
allow the processing of the rare fasta and fastq files not recognized.
|
|
||||||
|
|
||||||
- In `obiscript`, adds new methods to the Lua sequence object:
|
|
||||||
- `md5_string()`: returning the MD5 check sum as a hexadecimal string,
|
|
||||||
- `subsequence(from,to)`: allows extracting a subsequence on a 0 based
|
|
||||||
coordinate system, upper bound excluded like in go.
|
|
||||||
- `reverse_complement`: returning a sequence object corresponding to the
|
|
||||||
reverse complement of the current sequence.
|
|
||||||
|
|
||||||
### Enhancement
|
|
||||||
|
|
||||||
- In every *OBITools* command, the progress bar is automatically deactivated
|
|
||||||
when the standard error output is redirected.
|
|
||||||
- Because Genbank and ENA:EMBL contain very large sequences, while OBITools4
|
|
||||||
are optimized As Genbank and ENA:EMBL contain very large sequences, while
|
|
||||||
OBITools4 is optimized for short sequences, `obipcr` faces some problems
|
|
||||||
with excessive consumption of computer resources, especially memory. Several
|
|
||||||
improvements in the tuning of the default `obipcr` parameters and some new
|
|
||||||
features, currently only available for FASTA and FASTQ file readers, have
|
|
||||||
been implemented to limit the memory impact of `obipcr` without changing the
|
|
||||||
computational efficiency too much.
|
|
||||||
- Logging system and therefore format, have been homogenized.
|
|
||||||
|
|
||||||
|
|
||||||
### Change of git repository
|
|
||||||
|
|
||||||
- The OBITools4 git repository has been moved to the GitHub repository.
|
|
||||||
The new address is: https://github.com/metabarcoding/obitools4.
|
|
||||||
Take care for using the new install script for retrieving the new version.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl -L https://metabarcoding.org/obitools4/install.sh \
|
|
||||||
| bash
|
|
||||||
```
|
|
||||||
|
|
||||||
or with options:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl -L https://metabarcoding.org/obitools4/install.sh \
|
|
||||||
| bash -s -- --install-dir test_install --obitools-prefix k
|
|
||||||
```
|
|
||||||
- The output of the obitools will evolve to produce results only in standard
|
|
||||||
formats such as fasta and fastq. For non-sequential data, the output will be
|
|
||||||
in CSV format, with the separator `,`, the decimal separator `.`, and a
|
|
||||||
header line with the column names. It is more convenient to use the output
|
|
||||||
in other programs. For example, you can use the `csvtomd` command to
|
|
||||||
reformat the CSV output into a Markdown table. The first command to initiate
|
|
||||||
this change is `obicount`, which now produces a 3-line CSV output.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
obicount data.csv | csvtomd
|
|
||||||
```
|
|
||||||
|
|
||||||
- Adds the new experimental `obicleandb` utility to clean up reference
|
|
||||||
database files created with `obipcr`. An easy way to create a reference
|
|
||||||
database for `obitag` is to use `obipcr` on a local copy of Genbank or EMBL.
|
|
||||||
However, these sequence databases are known to contain many taxonomic
|
|
||||||
errors, such as bacterial sequences annotated with the taxid of their host
|
|
||||||
species. `obicleandb` tries to detect these errors. To do this, it first keeps
|
|
||||||
only sequences annotated with the taxid to which a species, genus, and
|
|
||||||
family taxid can be assigned. Then, for each sequence, it compares the
|
|
||||||
distance of the sequence to the other sequences belonging to the same genus
|
|
||||||
to the same number of distances between the considered sequence and a
|
|
||||||
randomly selected set of sequences belonging to another family using a
|
|
||||||
Mann-Whitney U test. The alternative hypothesis is that out-of-family
|
|
||||||
distances are greater than intrageneric distances. Sequences are annotated
|
|
||||||
with the p-value of the Mann-Whitney U test in the **obicleandb_trusted**
|
|
||||||
slot. Later, the distribution of this p-value can be analyzed to determine a
|
|
||||||
threshold. Empirically, a threshold of 0.05 is a good compromise and allows
|
|
||||||
filtering out less than 1‰ of the sequences. These sequences can then be
|
|
||||||
removed using `obigrep`.
|
|
||||||
|
|
||||||
- Adds a new `obijoin` utility to join information contained in a sequence
|
- Adds a new `obijoin` utility to join information contained in a sequence
|
||||||
file with that contained in another sequence or CSV file. The command allows
|
file with that contained in another sequence or CSV file. The command allows
|
||||||
you to specify the names of the keys in the main sequence file and in the
|
you to specify the names of the keys in the main sequence file and in the
|
||||||
@@ -143,16 +27,16 @@ Its development is still in progress.
|
|||||||
|
|
||||||
- Adds a new tool `obidemerge` to demerge a `merge_xxx` slot by recreating the
|
- Adds a new tool `obidemerge` to demerge a `merge_xxx` slot by recreating the
|
||||||
multiple identical sequences having the slot `xxx` recreated with its initial
|
multiple identical sequences having the slot `xxx` recreated with its initial
|
||||||
value and the sequence count set to the number of occurrences referred in the
|
value and the sequence count set to the number of occurences refered in the
|
||||||
`merge_xxx` slot. During the operation, the `merge_xxx` slot is removed.
|
`merge_xxx` slot. During the operation, the `merge_xxx` slot is removed.
|
||||||
|
|
||||||
- Adds CSV as one of the input format for every obitools command. To encode
|
- Adds CSV as one of the input format for every obitools command. To encode
|
||||||
sequence the CSV file must include a column named `sequence` and another
|
sequence the CSV file must includes a column named `sequence` and another
|
||||||
column named `id`. An extra column named `qualities` can be added to specify
|
column named `id`. An extra column named `qualities` can be added to specify
|
||||||
the quality scores of the sequence following the same ASCII encoding than the
|
the quality scores of the sequence following the same ascii encoding than the
|
||||||
fastq format. All the other columns will be considered as annotations and will
|
fastq format. All the other columns will be considered as annotations and will
|
||||||
be interpreted as JSON objects encoding potentially for atomic values. If a
|
be interpreted as JSON objects encoding potentially for atomic values. If a
|
||||||
column value can not be decoded as JSON it will be considered as a string.
|
calumn value can not be decoded as JSON it will be considered as a string.
|
||||||
|
|
||||||
- A new option **--version** has been added to every obitools command. It will
|
- A new option **--version** has been added to every obitools command. It will
|
||||||
print the version of the command.
|
print the version of the command.
|
||||||
@@ -161,8 +45,8 @@ Its development is still in progress.
|
|||||||
quality scores from a BioSequence object.\
|
quality scores from a BioSequence object.\
|
||||||
|
|
||||||
- In `obimultuplex` the ngsfilter file describing the samples can be no provided
|
- In `obimultuplex` the ngsfilter file describing the samples can be no provided
|
||||||
not only using the classical ngsfilter format but also using the CSV format.
|
not only using the classical nfsfilter format but also using the csv format.
|
||||||
When using CSV, the first line must contain the column names. 5 columns are
|
When using csv, the first line must contain the column names. 5 columns are
|
||||||
expected:
|
expected:
|
||||||
|
|
||||||
- `experiment` the name of the experiment
|
- `experiment` the name of the experiment
|
||||||
@@ -178,34 +62,43 @@ Its development is still in progress.
|
|||||||
|
|
||||||
Supplementary columns are allowed. Their names and content will be used to
|
Supplementary columns are allowed. Their names and content will be used to
|
||||||
annotate the sequence corresponding to the sample, as the `key=value;` did
|
annotate the sequence corresponding to the sample, as the `key=value;` did
|
||||||
in the ngsfilter format.
|
in the nfsfilter format.
|
||||||
|
|
||||||
The CSV format used allows for comment lines starting with `#` character.
|
The CSV format used allows for comment lines starting with `#` character.
|
||||||
Special data lines starting with `@param` in the first column allow configuring the algorithm. The options **--template** provided an over
|
Special data lines starting with `@param` in the first column allow to
|
||||||
commented example of the CSV format, including all the possible options.
|
configure the algorithm. The options **--template** provided an over
|
||||||
|
commented example of the csv format, including all the possible options.
|
||||||
### CPU limitation
|
|
||||||
|
|
||||||
- By default, *OBITools4* tries to use all the computing power available on
|
### Enhancement
|
||||||
your computer. In some circumstances this can be problematic (e.g. if you
|
|
||||||
are running on a computer cluster managed by your university). You can limit
|
|
||||||
the number of CPU cores used by *OBITools4* or by using the **--max-cpu**
|
|
||||||
option or by setting the **OBIMAXCPU** environment variable. Some strange
|
|
||||||
behavior of *OBITools4* has been observed when users try to limit the
|
|
||||||
maximum number of usable CPU cores to one. This seems to be caused by the Go
|
|
||||||
language, and it is not obvious to get *OBITools4* to run correctly on a
|
|
||||||
single core in all circumstances. Therefore, if you ask to use a single
|
|
||||||
core, **OBITools4** will print a warning message and actually set this
|
|
||||||
parameter to two cores. If you really want a single core, you can use the
|
|
||||||
**--force-one-core** option. But be aware that this can lead to incorrect
|
|
||||||
calculations.
|
|
||||||
|
|
||||||
|
- In every *OBITools* command, the progress bar are automatically deactivated
|
||||||
|
when the standard error output is redirected.
|
||||||
|
- Because Genbank and ENA:EMBL contain very large sequences, while OBITools4
|
||||||
|
are optimized As Genbank and ENA:EMBL contain very large sequences, while
|
||||||
|
OBITools4 is optimised for short sequences, `obipcr` faces some problems
|
||||||
|
with excessive consumption of computer resources, especially memory. Several
|
||||||
|
improvements in the tuning of the default `obipcr` parameters and some new
|
||||||
|
features, currently only available for FASTA and FASTQ file readers, have
|
||||||
|
been implemented to limit the memory impact of `obipcr` without changing the
|
||||||
|
computational efficiency too much.
|
||||||
|
- Logging system and therefore format, have been homogenized.
|
||||||
|
|
||||||
|
### Bug
|
||||||
|
|
||||||
|
- In `obitag`, correct the wrong assignment of the **obitag_bestmatch**
|
||||||
|
attribute.
|
||||||
|
- In `obiclean`, the **--no-progress-bar** option disables all progress bars,
|
||||||
|
not just the data.
|
||||||
|
- Several fixes in reading FASTA and FASTQ files, including some code
|
||||||
|
simplification and and factorization.
|
||||||
|
- Fixed a bug in all obitools that caused the same file to be processed
|
||||||
|
multiple times. when specifying a directory name as input.
|
||||||
|
|
||||||
## April 2nd, 2024. Release 4.2.0
|
## April 2nd, 2024. Release 4.2.0
|
||||||
|
|
||||||
### New features
|
### New features
|
||||||
|
|
||||||
- A new OBITools named `obiscript` allows processing each sequence according
|
- A new OBITools named `obiscript` allows to process each sequence according
|
||||||
to a Lua script. This is an experimental tool. The **--template** option
|
to a Lua script. This is an experimental tool. The **--template** option
|
||||||
allows for generating an example script on the `stdout`.
|
allows for generating an example script on the `stdout`.
|
||||||
|
|
||||||
@@ -213,7 +106,7 @@ Its development is still in progress.
|
|||||||
|
|
||||||
- Two of the main class `obiseq.SeqWorker` and `obiseq.SeqWorker` have their
|
- Two of the main class `obiseq.SeqWorker` and `obiseq.SeqWorker` have their
|
||||||
declaration changed. Both now return two values a `obiseq.BioSequenceSlice`
|
declaration changed. Both now return two values a `obiseq.BioSequenceSlice`
|
||||||
and an `error`. This allows a worker to return potentially several sequences
|
and an `error`. This allow a worker to return potentially several sequences
|
||||||
as the result of the processing of a single sequence, or zero, which is
|
as the result of the processing of a single sequence, or zero, which is
|
||||||
equivalent to filter out the input sequence.
|
equivalent to filter out the input sequence.
|
||||||
|
|
||||||
@@ -221,12 +114,12 @@ Its development is still in progress.
|
|||||||
|
|
||||||
- In `obitag` if the reference database contains sequences annotated by taxid
|
- In `obitag` if the reference database contains sequences annotated by taxid
|
||||||
not referenced in the taxonomy, the corresponding sequences are discarded
|
not referenced in the taxonomy, the corresponding sequences are discarded
|
||||||
from the reference database and a warning indicating the sequence *id* and the
|
from the reference database and a warning indicating the sequence id and the
|
||||||
wrong taxid is emitted.
|
wrong taxid is emitted.
|
||||||
- The bug corrected in the parsing of EMBL and Genbank files as implemented in
|
- The bug corrected in the parsing of EMBL and Genbank files as implemented in
|
||||||
version 4.1.2 of OBITools4, potentially induced some reduction in the
|
version 4.1.2 of OBITools4, potentially induced some reduction in the
|
||||||
performance of the parsing. This should have been now fixed.
|
performance of the parsing. This should have been now fixed.
|
||||||
- In the same idea, parsing of Genbank and EMBL files were reading and storing
|
- In the same idea, parsing of genbank and EMBL files were reading and storing
|
||||||
in memory not only the sequence but also the annotations (features table).
|
in memory not only the sequence but also the annotations (features table).
|
||||||
Up to now none of the OBITools are using this information, but with large
|
Up to now none of the OBITools are using this information, but with large
|
||||||
complete genomes, it is occupying a lot of memory. To reduce this impact,
|
complete genomes, it is occupying a lot of memory. To reduce this impact,
|
||||||
@@ -265,7 +158,7 @@ Its development is still in progress.
|
|||||||
|
|
||||||
### New feature
|
### New feature
|
||||||
|
|
||||||
- In `obimatrix` a **--transpose** option allows transposing the produced
|
- In `obimatrix` a **--transpose** option allows to transpose the produced
|
||||||
matrix table in CSV format.
|
matrix table in CSV format.
|
||||||
- In `obitpairing` and `obipcrtag` two new options **--exact-mode** and
|
- In `obitpairing` and `obipcrtag` two new options **--exact-mode** and
|
||||||
**--fast-absolute** to control the heuristic used in the alignment
|
**--fast-absolute** to control the heuristic used in the alignment
|
||||||
@@ -273,7 +166,7 @@ Its development is still in progress.
|
|||||||
the exact algorithm at the cost of a speed. **--fast-absolute** change the
|
the exact algorithm at the cost of a speed. **--fast-absolute** change the
|
||||||
scoring schema of the heuristic.
|
scoring schema of the heuristic.
|
||||||
- In `obiannotate` adds the possibility to annotate the first match of a
|
- In `obiannotate` adds the possibility to annotate the first match of a
|
||||||
pattern using the same algorithm as the one used in `obipcr` and
|
pattern using the same algorithm than the one used in `obipcr` and
|
||||||
`obimultiplex`. For that four option were added :
|
`obimultiplex`. For that four option were added :
|
||||||
- **--pattern** : to specify the pattern. It can use IUPAC codes and
|
- **--pattern** : to specify the pattern. It can use IUPAC codes and
|
||||||
position with no error tolerated has to be followed by a `#` character.
|
position with no error tolerated has to be followed by a `#` character.
|
||||||
@@ -354,7 +247,7 @@ Its development is still in progress.
|
|||||||
|
|
||||||
### Bugs
|
### Bugs
|
||||||
|
|
||||||
- In the obitools language, the `composition` function now returns a map
|
- in the obitools language, the `composition` function now returns a map
|
||||||
indexed by lowercase string "a", "c", "g", "t" and "o" for other instead of
|
indexed by lowercase string "a", "c", "g", "t" and "o" for other instead of
|
||||||
being indexed by the ASCII codes of the corresponding letters.
|
being indexed by the ASCII codes of the corresponding letters.
|
||||||
- Correction of the reverse-complement operation. Every reverse complement of
|
- Correction of the reverse-complement operation. Every reverse complement of
|
||||||
@@ -367,18 +260,18 @@ Its development is still in progress.
|
|||||||
duplicating the quality values. This made `obimultiplex` to produce fastq
|
duplicating the quality values. This made `obimultiplex` to produce fastq
|
||||||
files with sequences having quality values duplicated.
|
files with sequences having quality values duplicated.
|
||||||
|
|
||||||
### Be careful
|
### Becareful
|
||||||
|
|
||||||
GO 1.21.0 is out, and it includes new functionalities which are used in the
|
GO 1.21.0 is out, and it includes new functionalities which are used in the
|
||||||
OBITools4 code. If you use the recommended method for compiling OBITools on your
|
OBITools4 code. If you use the recommanded method for compiling OBITools on your
|
||||||
computer, there is no problem, as the script always load the latest GO version.
|
computer, their is no problem, as the script always load the latest GO version.
|
||||||
If you rely on your personal GO install, please think to update.
|
If you rely on you personnal GO install, please think to update.
|
||||||
|
|
||||||
## August 29th, 2023. Release 4.0.5
|
## August 29th, 2023. Release 4.0.5
|
||||||
|
|
||||||
### Bugs
|
### Bugs
|
||||||
|
|
||||||
- Patch a bug in the `obiseq.BioSequence` constructor leading to an error on
|
- Patch a bug in the `obiseq.BioSequence` constructor leading to a error on
|
||||||
almost every obitools. The error message indicates : `fatal error: sync:
|
almost every obitools. The error message indicates : `fatal error: sync:
|
||||||
unlock of unlocked mutex` This bug was introduced in the release 4.0.4
|
unlock of unlocked mutex` This bug was introduced in the release 4.0.4
|
||||||
|
|
||||||
@@ -397,7 +290,7 @@ If you rely on your personal GO install, please think to update.
|
|||||||
data structure to limit the number of alignments actually computed. This
|
data structure to limit the number of alignments actually computed. This
|
||||||
increase a bit the speed of both the software. `obirefidx` is nevertheless
|
increase a bit the speed of both the software. `obirefidx` is nevertheless
|
||||||
still too slow compared to my expectation.
|
still too slow compared to my expectation.
|
||||||
- Switch to a parallel version of the GZIP library, allowing for high speed
|
- Switch to a parallel version of the gzip library, allowing for high speed
|
||||||
compress and decompress operation on files.
|
compress and decompress operation on files.
|
||||||
|
|
||||||
### New feature
|
### New feature
|
||||||
@@ -441,12 +334,12 @@ If you rely on your personal GO install, please think to update.
|
|||||||
--unidentified not_assigned.fastq
|
--unidentified not_assigned.fastq
|
||||||
```
|
```
|
||||||
|
|
||||||
The command produced four files : `tagged_library_R1.fastq` and
|
the command produced four files : `tagged_library_R1.fastq` and
|
||||||
`tagged_library_R2.fastq` containing the assigned reads and
|
`tagged_library_R2.fastq` containing the assigned reads and
|
||||||
`not_assigned_R1.fastq` and `not_assigned_R2.fastq` containing the
|
`not_assigned_R1.fastq` and `not_assigned_R2.fastq` containing the
|
||||||
unassignable reads.
|
unassignable reads.
|
||||||
|
|
||||||
The tagged library files can then be split using `obidistribute`:
|
the tagged library files can then be split using `obidistribute`:
|
||||||
|
|
||||||
```{bash}
|
```{bash}
|
||||||
mkdir pcr_reads
|
mkdir pcr_reads
|
||||||
@@ -456,9 +349,9 @@ If you rely on your personal GO install, please think to update.
|
|||||||
|
|
||||||
- Adding of two options **--add-lca-in** and **--lca-error** to `obiannotate`.
|
- Adding of two options **--add-lca-in** and **--lca-error** to `obiannotate`.
|
||||||
These options aim to help during construction of reference database using
|
These options aim to help during construction of reference database using
|
||||||
`obipcr`. On `obipcr` output, it is commonly run `obiuniq`. To merge identical
|
`obipcr`. On obipcr output, it is commonly run obiuniq. To merge identical
|
||||||
sequences annotated with different taxids, it is now possible to use the
|
sequences annotated with different taxids, it is now possible to use the
|
||||||
following strategies :
|
following strategie :
|
||||||
|
|
||||||
```{bash}
|
```{bash}
|
||||||
obiuniq -m taxid myrefdb.obipcr.fasta \
|
obiuniq -m taxid myrefdb.obipcr.fasta \
|
||||||
@@ -489,7 +382,7 @@ If you rely on your personal GO install, please think to update.
|
|||||||
- Correction of a bug in `obiconsensus` leading into the deletion of a base
|
- Correction of a bug in `obiconsensus` leading into the deletion of a base
|
||||||
close to the beginning of the consensus sequence.
|
close to the beginning of the consensus sequence.
|
||||||
|
|
||||||
## March 31st, 2023. Release 4.0.2
|
## March 31th, 2023. Release 4.0.2
|
||||||
|
|
||||||
### Compiler change
|
### Compiler change
|
||||||
|
|
||||||
@@ -500,15 +393,15 @@ If you rely on your personal GO install, please think to update.
|
|||||||
- Add the possibility for looking pattern with indels. This has been added to
|
- Add the possibility for looking pattern with indels. This has been added to
|
||||||
`obimultiplex` through the **--with-indels** option.
|
`obimultiplex` through the **--with-indels** option.
|
||||||
- Every obitools command has a **--pprof** option making the command
|
- Every obitools command has a **--pprof** option making the command
|
||||||
publishing a profiling website available at the address :
|
publishing a profiling web site available at the address :
|
||||||
<http://localhost:8080/debug/pprof/>
|
<http://localhost:8080/debug/pprof/>
|
||||||
- A new `obiconsensus` command has been added. It is a prototype. It aims to
|
- A new `obiconsensus` command has been added. It is a prototype. It aims to
|
||||||
build a consensus sequence from a set of reads. The consensus is estimated
|
build a consensus sequence from a set of reads. The consensus is estimated
|
||||||
for all the sequences contained in the input file. If several input files,
|
for all the sequences contained in the input file. If several input files,
|
||||||
or a directory name are provided the result contains a consensus per file.
|
or a directory name are provided the result contains a consensus per file.
|
||||||
The *id* of the sequence is the name of the input file depleted of its
|
The id of the sequence is the name of the input file depleted of its
|
||||||
directory name and of all its extensions.
|
directory name and of all its extensions.
|
||||||
- In `obipcr` an experimental option **--fragmented** allows for splitting very
|
- In `obipcr` an experimental option **--fragmented** allows for spliting very
|
||||||
long query sequences into shorter fragments with an overlap between the two
|
long query sequences into shorter fragments with an overlap between the two
|
||||||
contiguous fragment insuring that no amplicons are missed despite the split.
|
contiguous fragment insuring that no amplicons are missed despite the split.
|
||||||
As a site effect some amplicon can be identified twice.
|
As a site effect some amplicon can be identified twice.
|
||||||
@@ -551,7 +444,7 @@ If you rely on your personal GO install, please think to update.
|
|||||||
### Enhancement
|
### Enhancement
|
||||||
|
|
||||||
- *OBITools* are automatically processing all the sequences files contained in
|
- *OBITools* are automatically processing all the sequences files contained in
|
||||||
a directory and its subdirectory\
|
a directory and its sub-directory\
|
||||||
recursively if its name is provided as input. To process easily Genbank
|
recursively if its name is provided as input. To process easily Genbank
|
||||||
files, the corresponding filename extensions have been added. Today the
|
files, the corresponding filename extensions have been added. Today the
|
||||||
following extensions are recognized as sequence files : `.fasta`, `.fastq`,
|
following extensions are recognized as sequence files : `.fasta`, `.fastq`,
|
||||||
@@ -568,7 +461,7 @@ If you rely on your personal GO install, please think to update.
|
|||||||
export OBICPUMAX=4
|
export OBICPUMAX=4
|
||||||
```
|
```
|
||||||
|
|
||||||
- Adds a new option --out\|-o allowing to specify the name of an output file.
|
- Adds a new option --out\|-o allowing to specify the name of an outpout file.
|
||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
obiconvert -o xyz.fasta xxx.fastq
|
obiconvert -o xyz.fasta xxx.fastq
|
||||||
@@ -590,10 +483,10 @@ If you rely on your personal GO install, please think to update.
|
|||||||
matched files remain consistent when processed.
|
matched files remain consistent when processed.
|
||||||
|
|
||||||
- Adding of the function `ifelse` to the expression language for computing
|
- Adding of the function `ifelse` to the expression language for computing
|
||||||
conditional values.
|
conditionnal values.
|
||||||
|
|
||||||
- Adding two function to the expression language related to sequence
|
- Adding two function to the expression language related to sequence
|
||||||
composition : `composition` and `gcskew`. Both are taking a sequence as
|
conposition : `composition` and `gcskew`. Both are taking a sequence as
|
||||||
single argument.
|
single argument.
|
||||||
|
|
||||||
## February 18th, 2023. Release 4.0.0
|
## February 18th, 2023. Release 4.0.0
|
||||||
@@ -601,8 +494,8 @@ If you rely on your personal GO install, please think to update.
|
|||||||
It is the first version of the *OBITools* version 4. I decided to tag then
|
It is the first version of the *OBITools* version 4. I decided to tag then
|
||||||
following two weeks of intensive data analysis with them allowing to discover
|
following two weeks of intensive data analysis with them allowing to discover
|
||||||
many small bugs present in the previous non-official version. Obviously other
|
many small bugs present in the previous non-official version. Obviously other
|
||||||
bugs are certainly present in the code, and you are welcome to use the git
|
bugs are certainly persent in the code, and you are welcome to use the git
|
||||||
ticket system to mention them. But they seem to produce now reliable results.
|
ticket system to mention them. But they seems to produce now reliable results.
|
||||||
|
|
||||||
### Corrected bugs
|
### Corrected bugs
|
||||||
|
|
||||||
@@ -610,11 +503,11 @@ ticket system to mention them. But they seem to produce now reliable results.
|
|||||||
of sequences and to the production of incorrect file because of the last
|
of sequences and to the production of incorrect file because of the last
|
||||||
sequence record, sometime truncated in its middle. This was only occurring
|
sequence record, sometime truncated in its middle. This was only occurring
|
||||||
when more than a single CPU was used. It was affecting every obitools.
|
when more than a single CPU was used. It was affecting every obitools.
|
||||||
- The `obiparing` software had a bug in the right alignment procedure. This led
|
- The `obiparing` software had a bug in the right aligment procedure. This led
|
||||||
to the non-alignment of very sort barcode during the paring of the forward
|
to the non alignment of very sort barcode during the paring of the forward
|
||||||
and reverse reads.
|
and reverse reads.
|
||||||
- The `obipairing` tools had a non-deterministic comportment when aligning a
|
- The `obipairing` tools had a non deterministic comportment when aligning a
|
||||||
pair very low quality reads. This induced that the result of the same low
|
paor very low quality reads. This induced that the result of the same low
|
||||||
quality read pair was not the same from run to run.
|
quality read pair was not the same from run to run.
|
||||||
|
|
||||||
### New features
|
### New features
|
||||||
@@ -622,10 +515,11 @@ ticket system to mention them. But they seem to produce now reliable results.
|
|||||||
- Adding of a `--compress|-Z` option to every obitools allowing to produce
|
- Adding of a `--compress|-Z` option to every obitools allowing to produce
|
||||||
`gz` compressed output. OBITools were already able to deal with gziped input
|
`gz` compressed output. OBITools were already able to deal with gziped input
|
||||||
files transparently. They can now produce their results in the same format.
|
files transparently. They can now produce their results in the same format.
|
||||||
- Adding of a `--append|-A` option to the `obidistribute` tool. It allows appending the result of an `obidistribute` execution to preexisting files. -
|
- Adding of a `--append|-A` option to the `obidistribute` tool. It allows to
|
||||||
|
append the result of an `obidistribute` execution to preexisting files. -
|
||||||
Adding of a `--directory|-d` option to the `obidistribute` tool. It allows
|
Adding of a `--directory|-d` option to the `obidistribute` tool. It allows
|
||||||
declaring a secondary classification key over the one defined by the
|
to declare a secondary classification key over the one defined by the
|
||||||
`--category\|-c\` option. This extra key leads to produce directories in
|
'--category\|-c\` option. This extra key leads to produce directories in
|
||||||
which files produced according to the primary criterion are stored.
|
which files produced according to the primary criterion are stored.
|
||||||
- Adding of the functions `subspc`, `printf`, `int`, `numeric`, and `bool` to
|
- Adding of the functions `subspc`, `printf`, `int`, `numeric`, and `bool` to
|
||||||
the expression language.
|
the expression language.
|
||||||
@@ -3,11 +3,13 @@ package main
|
|||||||
import (
|
import (
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiannotate"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiannotate"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
@@ -35,11 +37,15 @@ func main() {
|
|||||||
_, args := optionParser(os.Args)
|
_, args := optionParser(os.Args)
|
||||||
|
|
||||||
sequences, err := obiconvert.CLIReadBioSequences(args...)
|
sequences, err := obiconvert.CLIReadBioSequences(args...)
|
||||||
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Cannot open file (%v)", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
annotator := obiannotate.CLIAnnotationPipeline()
|
annotator := obiannotate.CLIAnnotationPipeline()
|
||||||
obiconvert.CLIWriteBioSequences(sequences.Pipe(annotator), true)
|
obiconvert.CLIWriteBioSequences(sequences.Pipe(annotator), true)
|
||||||
|
|
||||||
obiutils.WaitForLastPipe()
|
obiiter.WaitForLastPipe()
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,9 +3,11 @@ package main
|
|||||||
import (
|
import (
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiclean"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiclean"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
)
|
)
|
||||||
@@ -16,12 +18,16 @@ func main() {
|
|||||||
_, args := optionParser(os.Args)
|
_, args := optionParser(os.Args)
|
||||||
|
|
||||||
fs, err := obiconvert.CLIReadBioSequences(args...)
|
fs, err := obiconvert.CLIReadBioSequences(args...)
|
||||||
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Cannot open file (%v)", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
cleaned := obiclean.CLIOBIClean(fs)
|
cleaned := obiclean.CLIOBIClean(fs)
|
||||||
|
|
||||||
obiconvert.CLIWriteBioSequences(cleaned, true)
|
obiconvert.CLIWriteBioSequences(cleaned, true)
|
||||||
|
|
||||||
obiutils.WaitForLastPipe()
|
obiiter.WaitForLastPipe()
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,28 +3,33 @@ package main
|
|||||||
import (
|
import (
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obicleandb"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obicleandb"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
obidefault.SetBatchSize(10)
|
obioptions.SetBatchSize(10)
|
||||||
|
|
||||||
optionParser := obioptions.GenerateOptionParser(obicleandb.OptionSet)
|
optionParser := obioptions.GenerateOptionParser(obicleandb.OptionSet)
|
||||||
|
|
||||||
_, args := optionParser(os.Args)
|
_, args := optionParser(os.Args)
|
||||||
|
|
||||||
fs, err := obiconvert.CLIReadBioSequences(args...)
|
fs, err := obiconvert.CLIReadBioSequences(args...)
|
||||||
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Cannot open file (%v)", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
cleaned := obicleandb.ICleanDB(fs)
|
cleaned := obicleandb.ICleanDB(fs)
|
||||||
|
|
||||||
toconsume, _ := obiconvert.CLIWriteBioSequences(cleaned, false)
|
toconsume, _ := obiconvert.CLIWriteBioSequences(cleaned, false)
|
||||||
toconsume.Consume()
|
toconsume.Consume()
|
||||||
|
|
||||||
obiutils.WaitForLastPipe()
|
obiiter.WaitForLastPipe()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,9 +3,11 @@ package main
|
|||||||
import (
|
import (
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
)
|
)
|
||||||
@@ -16,11 +18,15 @@ func main() {
|
|||||||
_, args := optionParser(os.Args)
|
_, args := optionParser(os.Args)
|
||||||
|
|
||||||
fs, err := obiconvert.CLIReadBioSequences(args...)
|
fs, err := obiconvert.CLIReadBioSequences(args...)
|
||||||
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Cannot open file (%v)", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
comp := fs.MakeIWorker(obiseq.ReverseComplementWorker(true), true)
|
comp := fs.MakeIWorker(obiseq.ReverseComplementWorker(true), true)
|
||||||
obiconvert.CLIWriteBioSequences(comp, true)
|
obiconvert.CLIWriteBioSequences(comp, true)
|
||||||
|
|
||||||
obiutils.WaitForLastPipe()
|
obiiter.WaitForLastPipe()
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,9 +3,11 @@ package main
|
|||||||
import (
|
import (
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconsensus"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconsensus"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
)
|
)
|
||||||
@@ -16,12 +18,16 @@ func main() {
|
|||||||
_, args := optionParser(os.Args)
|
_, args := optionParser(os.Args)
|
||||||
|
|
||||||
fs, err := obiconvert.CLIReadBioSequences(args...)
|
fs, err := obiconvert.CLIReadBioSequences(args...)
|
||||||
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Cannot open file (%v)", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
cleaned := obiconsensus.CLIOBIMinion(fs)
|
cleaned := obiconsensus.CLIOBIMinion(fs)
|
||||||
|
|
||||||
obiconvert.CLIWriteBioSequences(cleaned, true)
|
obiconvert.CLIWriteBioSequences(cleaned, true)
|
||||||
|
|
||||||
obiutils.WaitForLastPipe()
|
obiiter.WaitForLastPipe()
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,26 +3,31 @@ package main
|
|||||||
import (
|
import (
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
obidefault.SetStrictReadWorker(2)
|
obioptions.SetStrictReadWorker(2)
|
||||||
obidefault.SetStrictWriteWorker(2)
|
obioptions.SetStrictWriteWorker(2)
|
||||||
|
|
||||||
optionParser := obioptions.GenerateOptionParser(obiconvert.OptionSet)
|
optionParser := obioptions.GenerateOptionParser(obiconvert.OptionSet)
|
||||||
|
|
||||||
_, args := optionParser(os.Args)
|
_, args := optionParser(os.Args)
|
||||||
|
|
||||||
fs, err := obiconvert.CLIReadBioSequences(args...)
|
fs, err := obiconvert.CLIReadBioSequences(args...)
|
||||||
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Cannot open file (%v)", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
obiconvert.CLIWriteBioSequences(fs, true)
|
obiconvert.CLIWriteBioSequences(fs, true)
|
||||||
|
|
||||||
obiutils.WaitForLastPipe()
|
obiiter.WaitForLastPipe()
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiblackboard"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obicount"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obicount"
|
||||||
|
|
||||||
@@ -34,24 +34,28 @@ func main() {
|
|||||||
|
|
||||||
_, args := optionParser(os.Args)
|
_, args := optionParser(os.Args)
|
||||||
|
|
||||||
obidefault.SetStrictReadWorker(min(4, obidefault.ParallelWorkers()))
|
black := obiblackboard.NewBlackBoard(obioptions.CLIParallelWorkers())
|
||||||
fs, err := obiconvert.CLIReadBioSequences(args...)
|
|
||||||
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
|
||||||
|
|
||||||
nvariant, nread, nsymbol := fs.Count(true)
|
black.ReadSequences(args)
|
||||||
|
|
||||||
fmt.Print("entities,n\n")
|
counter := obiblackboard.CountSequenceAggregator("to_delete")
|
||||||
|
|
||||||
|
black.RegisterRunner("sequences", counter.Runner)
|
||||||
|
black.RegisterRunner("to_delete", obiblackboard.RecycleSequences(true, "final"))
|
||||||
|
|
||||||
|
black.Run()
|
||||||
|
|
||||||
|
fmt.Print("entity,n\n")
|
||||||
|
|
||||||
if obicount.CLIIsPrintingVariantCount() {
|
if obicount.CLIIsPrintingVariantCount() {
|
||||||
fmt.Printf("variants,%d\n", nvariant)
|
fmt.Printf("variants,%d\n", counter.Variants)
|
||||||
}
|
}
|
||||||
|
|
||||||
if obicount.CLIIsPrintingReadCount() {
|
if obicount.CLIIsPrintingReadCount() {
|
||||||
fmt.Printf("reads,%d\n", nread)
|
fmt.Printf("reads,%d\n", counter.Reads)
|
||||||
}
|
}
|
||||||
|
|
||||||
if obicount.CLIIsPrintingSymbolCount() {
|
if obicount.CLIIsPrintingSymbolCount() {
|
||||||
fmt.Printf("symbols,%d\n", nsymbol)
|
fmt.Printf("nucleotides,%d\n", counter.Nucleotides)
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,10 +3,12 @@ package main
|
|||||||
import (
|
import (
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obicsv"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obicsv"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
@@ -15,10 +17,14 @@ func main() {
|
|||||||
_, args := optionParser(os.Args)
|
_, args := optionParser(os.Args)
|
||||||
|
|
||||||
fs, err := obiconvert.CLIReadBioSequences(args...)
|
fs, err := obiconvert.CLIReadBioSequences(args...)
|
||||||
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
|
||||||
|
|
||||||
obicsv.CLIWriteSequenceCSV(fs, true)
|
if err != nil {
|
||||||
|
log.Errorf("Cannot open file (%v)", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
obiutils.WaitForLastPipe()
|
obicsv.CLIWriteCSV(fs, true)
|
||||||
|
|
||||||
|
obiiter.WaitForLastPipe()
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,29 +3,34 @@ package main
|
|||||||
import (
|
import (
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obidemerge"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obidemerge"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
obidefault.SetStrictReadWorker(2)
|
obioptions.SetStrictReadWorker(2)
|
||||||
obidefault.SetStrictWriteWorker(2)
|
obioptions.SetStrictWriteWorker(2)
|
||||||
|
|
||||||
optionParser := obioptions.GenerateOptionParser(obidemerge.OptionSet)
|
optionParser := obioptions.GenerateOptionParser(obidemerge.OptionSet)
|
||||||
|
|
||||||
_, args := optionParser(os.Args)
|
_, args := optionParser(os.Args)
|
||||||
|
|
||||||
fs, err := obiconvert.CLIReadBioSequences(args...)
|
fs, err := obiconvert.CLIReadBioSequences(args...)
|
||||||
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Cannot open file (%v)", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
demerged := obidemerge.CLIDemergeSequences(fs)
|
demerged := obidemerge.CLIDemergeSequences(fs)
|
||||||
|
|
||||||
obiconvert.CLIWriteBioSequences(demerged, true)
|
obiconvert.CLIWriteBioSequences(demerged, true)
|
||||||
|
|
||||||
obiutils.WaitForLastPipe()
|
obiiter.WaitForLastPipe()
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,9 +3,11 @@ package main
|
|||||||
import (
|
import (
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obidistribute"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obidistribute"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
)
|
)
|
||||||
@@ -16,10 +18,14 @@ func main() {
|
|||||||
_, args := optionParser(os.Args)
|
_, args := optionParser(os.Args)
|
||||||
|
|
||||||
fs, err := obiconvert.CLIReadBioSequences(args...)
|
fs, err := obiconvert.CLIReadBioSequences(args...)
|
||||||
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Cannot open file (%v)", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
obidistribute.CLIDistributeSequence(fs)
|
obidistribute.CLIDistributeSequence(fs)
|
||||||
|
|
||||||
obiutils.WaitForLastPipe()
|
obiiter.WaitForLastPipe()
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
68
cmd/obitools/obifind/main.go
Normal file
68
cmd/obitools/obifind/main.go
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obifind"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
optionParser := obioptions.GenerateOptionParser(obifind.OptionSet)
|
||||||
|
|
||||||
|
_, args := optionParser(os.Args)
|
||||||
|
|
||||||
|
//prof, _ := os.Create("obifind.prof")
|
||||||
|
//pprof.StartCPUProfile(prof)
|
||||||
|
|
||||||
|
restrictions, err := obifind.ITaxonRestrictions()
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("%+v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
switch {
|
||||||
|
case obifind.CLIRequestsPathForTaxid() >= 0:
|
||||||
|
taxonomy, err := obifind.CLILoadSelectedTaxonomy()
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("%+v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
taxon, err := taxonomy.Taxon(obifind.CLIRequestsPathForTaxid())
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("%+v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
s, err := taxon.Path()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("%+v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
obifind.TaxonWriter(s.Iterator(),
|
||||||
|
fmt.Sprintf("path:%d", taxon.Taxid()))
|
||||||
|
|
||||||
|
case len(args) == 0:
|
||||||
|
taxonomy, err := obifind.CLILoadSelectedTaxonomy()
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("%+v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
obifind.TaxonWriter(restrictions(taxonomy.Iterator()), "")
|
||||||
|
|
||||||
|
default:
|
||||||
|
matcher, err := obifind.ITaxonNameMatcher()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("%+v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, pattern := range args {
|
||||||
|
s := restrictions(matcher(pattern))
|
||||||
|
obifind.TaxonWriter(s, pattern)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//pprof.StopCPUProfile()
|
||||||
|
}
|
||||||
@@ -3,11 +3,13 @@ package main
|
|||||||
import (
|
import (
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obigrep"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obigrep"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
@@ -35,10 +37,13 @@ func main() {
|
|||||||
_, args := optionParser(os.Args)
|
_, args := optionParser(os.Args)
|
||||||
|
|
||||||
sequences, err := obiconvert.CLIReadBioSequences(args...)
|
sequences, err := obiconvert.CLIReadBioSequences(args...)
|
||||||
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Cannot open file (%v)", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
selected := obigrep.CLIFilterSequence(sequences)
|
selected := obigrep.CLIFilterSequence(sequences)
|
||||||
obiconvert.CLIWriteBioSequences(selected, true)
|
obiconvert.CLIWriteBioSequences(selected, true)
|
||||||
obiutils.WaitForLastPipe()
|
obiiter.WaitForLastPipe()
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,29 +3,34 @@ package main
|
|||||||
import (
|
import (
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obijoin"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obijoin"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
obidefault.SetStrictReadWorker(2)
|
obioptions.SetStrictReadWorker(2)
|
||||||
obidefault.SetStrictWriteWorker(2)
|
obioptions.SetStrictWriteWorker(2)
|
||||||
|
|
||||||
optionParser := obioptions.GenerateOptionParser(obijoin.OptionSet)
|
optionParser := obioptions.GenerateOptionParser(obijoin.OptionSet)
|
||||||
|
|
||||||
_, args := optionParser(os.Args)
|
_, args := optionParser(os.Args)
|
||||||
|
|
||||||
fs, err := obiconvert.CLIReadBioSequences(args...)
|
fs, err := obiconvert.CLIReadBioSequences(args...)
|
||||||
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Cannot open file (%v)", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
joined := obijoin.CLIJoinSequences(fs)
|
joined := obijoin.CLIJoinSequences(fs)
|
||||||
|
|
||||||
obiconvert.CLIWriteBioSequences(joined, true)
|
obiconvert.CLIWriteBioSequences(joined, true)
|
||||||
|
|
||||||
obiutils.WaitForLastPipe()
|
obiiter.WaitForLastPipe()
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,51 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"os"
|
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obikmersim"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
|
||||||
)
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
|
|
||||||
defer obiseq.LogBioSeqStatus()
|
|
||||||
|
|
||||||
// go tool pprof -http=":8000" ./obipairing ./cpu.pprof
|
|
||||||
// f, err := os.Create("cpu.pprof")
|
|
||||||
// if err != nil {
|
|
||||||
// log.Fatal(err)
|
|
||||||
// }
|
|
||||||
// pprof.StartCPUProfile(f)
|
|
||||||
// defer pprof.StopCPUProfile()
|
|
||||||
|
|
||||||
// go tool trace cpu.trace
|
|
||||||
// ftrace, err := os.Create("cpu.trace")
|
|
||||||
// if err != nil {
|
|
||||||
// log.Fatal(err)
|
|
||||||
// }
|
|
||||||
// trace.Start(ftrace)
|
|
||||||
// defer trace.Stop()
|
|
||||||
|
|
||||||
optionParser := obioptions.GenerateOptionParser(obikmersim.MatchOptionSet)
|
|
||||||
|
|
||||||
_, args := optionParser(os.Args)
|
|
||||||
|
|
||||||
var err error
|
|
||||||
sequences := obiiter.NilIBioSequence
|
|
||||||
|
|
||||||
if !obikmersim.CLISelf() {
|
|
||||||
sequences, err = obiconvert.CLIReadBioSequences(args...)
|
|
||||||
}
|
|
||||||
|
|
||||||
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
|
||||||
|
|
||||||
selected := obikmersim.CLIAlignSequences(sequences)
|
|
||||||
obiconvert.CLIWriteBioSequences(selected, true)
|
|
||||||
obiutils.WaitForLastPipe()
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -1,59 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"log"
|
|
||||||
"os"
|
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obikmersim"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
|
||||||
)
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
|
|
||||||
defer obiseq.LogBioSeqStatus()
|
|
||||||
|
|
||||||
// go tool pprof -http=":8000" ./obipairing ./cpu.pprof
|
|
||||||
// f, err := os.Create("cpu.pprof")
|
|
||||||
// if err != nil {
|
|
||||||
// log.Fatal(err)
|
|
||||||
// }
|
|
||||||
// pprof.StartCPUProfile(f)
|
|
||||||
// defer pprof.StopCPUProfile()
|
|
||||||
|
|
||||||
// go tool trace cpu.trace
|
|
||||||
// ftrace, err := os.Create("cpu.trace")
|
|
||||||
// if err != nil {
|
|
||||||
// log.Fatal(err)
|
|
||||||
// }
|
|
||||||
// trace.Start(ftrace)
|
|
||||||
// defer trace.Stop()
|
|
||||||
|
|
||||||
optionParser := obioptions.GenerateOptionParser(obikmersim.CountOptionSet)
|
|
||||||
|
|
||||||
_, args := optionParser(os.Args)
|
|
||||||
|
|
||||||
var err error
|
|
||||||
sequences := obiiter.NilIBioSequence
|
|
||||||
|
|
||||||
if !obikmersim.CLISelf() {
|
|
||||||
sequences, err = obiconvert.CLIReadBioSequences(args...)
|
|
||||||
}
|
|
||||||
|
|
||||||
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
|
||||||
|
|
||||||
counted := obikmersim.CLILookForSharedKmers(sequences)
|
|
||||||
topull, err := obiconvert.CLIWriteBioSequences(counted, false)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
log.Panic(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
topull.Consume()
|
|
||||||
|
|
||||||
obiutils.WaitForLastPipe()
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -3,9 +3,11 @@ package main
|
|||||||
import (
|
import (
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obilandmark"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obilandmark"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
)
|
)
|
||||||
@@ -16,11 +18,15 @@ func main() {
|
|||||||
_, args := optionParser(os.Args)
|
_, args := optionParser(os.Args)
|
||||||
|
|
||||||
fs, err := obiconvert.CLIReadBioSequences(args...)
|
fs, err := obiconvert.CLIReadBioSequences(args...)
|
||||||
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Cannot open file (%v)", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
indexed := obilandmark.CLISelectLandmarkSequences(fs)
|
indexed := obilandmark.CLISelectLandmarkSequences(fs)
|
||||||
|
|
||||||
obiconvert.CLIWriteBioSequences(indexed, true)
|
obiconvert.CLIWriteBioSequences(indexed, true)
|
||||||
obiutils.WaitForLastPipe()
|
obiiter.WaitForLastPipe()
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,6 +4,8 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
@@ -37,7 +39,11 @@ func main() {
|
|||||||
_, args := optionParser(os.Args)
|
_, args := optionParser(os.Args)
|
||||||
|
|
||||||
fs, err := obiconvert.CLIReadBioSequences(args...)
|
fs, err := obiconvert.CLIReadBioSequences(args...)
|
||||||
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Cannot open file (%v)", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
matrix := obimatrix.IMatrix(fs)
|
matrix := obimatrix.IMatrix(fs)
|
||||||
|
|
||||||
|
|||||||
@@ -1,44 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"os"
|
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obimicrosat"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
|
||||||
)
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
|
|
||||||
defer obiseq.LogBioSeqStatus()
|
|
||||||
|
|
||||||
// go tool pprof -http=":8000" ./obipairing ./cpu.pprof
|
|
||||||
// f, err := os.Create("cpu.pprof")
|
|
||||||
// if err != nil {
|
|
||||||
// log.Fatal(err)
|
|
||||||
// }
|
|
||||||
// pprof.StartCPUProfile(f)
|
|
||||||
// defer pprof.StopCPUProfile()
|
|
||||||
|
|
||||||
// go tool trace cpu.trace
|
|
||||||
// ftrace, err := os.Create("cpu.trace")
|
|
||||||
// if err != nil {
|
|
||||||
// log.Fatal(err)
|
|
||||||
// }
|
|
||||||
// trace.Start(ftrace)
|
|
||||||
// defer trace.Stop()
|
|
||||||
|
|
||||||
optionParser := obioptions.GenerateOptionParser(obimicrosat.OptionSet)
|
|
||||||
|
|
||||||
_, args := optionParser(os.Args)
|
|
||||||
|
|
||||||
sequences, err := obiconvert.CLIReadBioSequences(args...)
|
|
||||||
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
|
||||||
|
|
||||||
selected := obimicrosat.CLIAnnotateMicrosat(sequences)
|
|
||||||
obiconvert.CLIWriteBioSequences(selected, true)
|
|
||||||
obiutils.WaitForLastPipe()
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -6,10 +6,10 @@ import (
|
|||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obimultiplex"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obimultiplex"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
@@ -43,11 +43,14 @@ func main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
sequences, err := obiconvert.CLIReadBioSequences(args...)
|
sequences, err := obiconvert.CLIReadBioSequences(args...)
|
||||||
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Cannot open file (%v)", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
amplicons, _ := obimultiplex.IExtractBarcode(sequences)
|
amplicons, _ := obimultiplex.IExtractBarcode(sequences)
|
||||||
obiconvert.CLIWriteBioSequences(amplicons, true)
|
obiconvert.CLIWriteBioSequences(amplicons, true)
|
||||||
amplicons.Wait()
|
amplicons.Wait()
|
||||||
obiutils.WaitForLastPipe()
|
obiiter.WaitForLastPipe()
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,11 +5,10 @@ import (
|
|||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obipairing"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obipairing"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
@@ -34,8 +33,8 @@ func main() {
|
|||||||
|
|
||||||
optionParser(os.Args)
|
optionParser(os.Args)
|
||||||
|
|
||||||
obidefault.SetStrictReadWorker(2)
|
obioptions.SetStrictReadWorker(2)
|
||||||
obidefault.SetStrictWriteWorker(2)
|
obioptions.SetStrictWriteWorker(2)
|
||||||
pairs, err := obipairing.CLIPairedSequence()
|
pairs, err := obipairing.CLIPairedSequence()
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -52,10 +51,10 @@ func main() {
|
|||||||
obipairing.CLIFastMode(),
|
obipairing.CLIFastMode(),
|
||||||
obipairing.CLIFastRelativeScore(),
|
obipairing.CLIFastRelativeScore(),
|
||||||
obipairing.CLIWithStats(),
|
obipairing.CLIWithStats(),
|
||||||
obidefault.ParallelWorkers(),
|
obioptions.CLIParallelWorkers(),
|
||||||
)
|
)
|
||||||
|
|
||||||
obiconvert.CLIWriteBioSequences(paired, true)
|
obiconvert.CLIWriteBioSequences(paired, true)
|
||||||
|
|
||||||
obiutils.WaitForLastPipe()
|
obiiter.WaitForLastPipe()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,11 +3,12 @@ package main
|
|||||||
import (
|
import (
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obipcr"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obipcr"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
@@ -24,20 +25,24 @@ func main() {
|
|||||||
// trace.Start(ftrace)
|
// trace.Start(ftrace)
|
||||||
// defer trace.Stop()
|
// defer trace.Stop()
|
||||||
|
|
||||||
obidefault.SetWorkerPerCore(2)
|
obioptions.SetWorkerPerCore(2)
|
||||||
obidefault.SetReadWorkerPerCore(0.5)
|
obioptions.SetReadWorkerPerCore(0.5)
|
||||||
obidefault.SetParallelFilesRead(obidefault.ParallelWorkers() / 4)
|
obioptions.SetParallelFilesRead(obioptions.CLIParallelWorkers() / 4)
|
||||||
obidefault.SetBatchSize(10)
|
obioptions.SetBatchSize(10)
|
||||||
|
|
||||||
optionParser := obioptions.GenerateOptionParser(obipcr.OptionSet)
|
optionParser := obioptions.GenerateOptionParser(obipcr.OptionSet)
|
||||||
|
|
||||||
_, args := optionParser(os.Args)
|
_, args := optionParser(os.Args)
|
||||||
|
|
||||||
sequences, err := obiconvert.CLIReadBioSequences(args...)
|
sequences, err := obiconvert.CLIReadBioSequences(args...)
|
||||||
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Cannot open file (%v)", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
amplicons, _ := obipcr.CLIPCR(sequences)
|
amplicons, _ := obipcr.CLIPCR(sequences)
|
||||||
obiconvert.CLIWriteBioSequences(amplicons, true)
|
obiconvert.CLIWriteBioSequences(amplicons, true)
|
||||||
obiutils.WaitForLastPipe()
|
obiiter.WaitForLastPipe()
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,9 +3,11 @@ package main
|
|||||||
import (
|
import (
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obirefidx"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obirefidx"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
)
|
)
|
||||||
@@ -16,11 +18,15 @@ func main() {
|
|||||||
_, args := optionParser(os.Args)
|
_, args := optionParser(os.Args)
|
||||||
|
|
||||||
fs, err := obiconvert.CLIReadBioSequences(args...)
|
fs, err := obiconvert.CLIReadBioSequences(args...)
|
||||||
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Cannot open file (%v)", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
indexed := obirefidx.IndexFamilyDB(fs)
|
indexed := obirefidx.IndexFamilyDB(fs)
|
||||||
|
|
||||||
obiconvert.CLIWriteBioSequences(indexed, true)
|
obiconvert.CLIWriteBioSequences(indexed, true)
|
||||||
obiutils.WaitForLastPipe()
|
obiiter.WaitForLastPipe()
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,9 +3,11 @@ package main
|
|||||||
import (
|
import (
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obirefidx"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obirefidx"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
)
|
)
|
||||||
@@ -16,11 +18,14 @@ func main() {
|
|||||||
_, args := optionParser(os.Args)
|
_, args := optionParser(os.Args)
|
||||||
|
|
||||||
fs, err := obiconvert.CLIReadBioSequences(args...)
|
fs, err := obiconvert.CLIReadBioSequences(args...)
|
||||||
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Cannot open file (%v)", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
indexed := obirefidx.IndexReferenceDB(fs)
|
indexed := obirefidx.IndexReferenceDB(fs)
|
||||||
|
|
||||||
obiconvert.CLIWriteBioSequences(indexed, true)
|
obiconvert.CLIWriteBioSequences(indexed, true)
|
||||||
obiutils.WaitForLastPipe()
|
obiiter.WaitForLastPipe()
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,11 +4,13 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiscript"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiscript"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
@@ -41,11 +43,15 @@ func main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
sequences, err := obiconvert.CLIReadBioSequences(args...)
|
sequences, err := obiconvert.CLIReadBioSequences(args...)
|
||||||
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Cannot open file (%v)", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
annotator := obiscript.CLIScriptPipeline()
|
annotator := obiscript.CLIScriptPipeline()
|
||||||
obiconvert.CLIWriteBioSequences(sequences.Pipe(annotator), true)
|
obiconvert.CLIWriteBioSequences(sequences.Pipe(annotator), true)
|
||||||
|
|
||||||
obiutils.WaitForLastPipe()
|
obiiter.WaitForLastPipe()
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,11 +4,13 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obisplit"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obisplit"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
@@ -41,11 +43,15 @@ func main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
sequences, err := obiconvert.CLIReadBioSequences(args...)
|
sequences, err := obiconvert.CLIReadBioSequences(args...)
|
||||||
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Cannot open file (%v)", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
annotator := obisplit.CLISlitPipeline()
|
annotator := obisplit.CLISlitPipeline()
|
||||||
obiconvert.CLIWriteBioSequences(sequences.Pipe(annotator), true)
|
obiconvert.CLIWriteBioSequences(sequences.Pipe(annotator), true)
|
||||||
|
|
||||||
obiutils.WaitForLastPipe()
|
obiiter.WaitForLastPipe()
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
"gopkg.in/yaml.v3"
|
"gopkg.in/yaml.v3"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
@@ -38,7 +39,11 @@ func main() {
|
|||||||
_, args := optionParser(os.Args)
|
_, args := optionParser(os.Args)
|
||||||
|
|
||||||
fs, err := obiconvert.CLIReadBioSequences(args...)
|
fs, err := obiconvert.CLIReadBioSequences(args...)
|
||||||
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Cannot open file (%v)", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
summary := obisummary.ISummary(fs, obisummary.CLIMapSummary())
|
summary := obisummary.ISummary(fs, obisummary.CLIMapSummary())
|
||||||
|
|
||||||
|
|||||||
@@ -6,12 +6,10 @@ import (
|
|||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obifind"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obitag"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obitag"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
)
|
)
|
||||||
@@ -34,40 +32,29 @@ func main() {
|
|||||||
// trace.Start(ftrace)
|
// trace.Start(ftrace)
|
||||||
// defer trace.Stop()
|
// defer trace.Stop()
|
||||||
|
|
||||||
obidefault.SetWorkerPerCore(2)
|
obioptions.SetWorkerPerCore(2)
|
||||||
obidefault.SetStrictReadWorker(1)
|
obioptions.SetStrictReadWorker(1)
|
||||||
obidefault.SetStrictWriteWorker(1)
|
obioptions.SetStrictWriteWorker(1)
|
||||||
obidefault.SetBatchSize(10)
|
obioptions.SetBatchSize(10)
|
||||||
|
|
||||||
optionParser := obioptions.GenerateOptionParser(obitag.OptionSet)
|
optionParser := obioptions.GenerateOptionParser(obitag.OptionSet)
|
||||||
|
|
||||||
_, args := optionParser(os.Args)
|
_, args := optionParser(os.Args)
|
||||||
|
|
||||||
fs, err := obiconvert.CLIReadBioSequences(args...)
|
fs, err := obiconvert.CLIReadBioSequences(args...)
|
||||||
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
|
||||||
|
|
||||||
taxo := obitax.DefaultTaxonomy()
|
if err != nil {
|
||||||
|
log.Errorf("Cannot open file (%v)", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
taxo, error := obifind.CLILoadSelectedTaxonomy()
|
||||||
|
if error != nil {
|
||||||
|
log.Panicln(error)
|
||||||
|
}
|
||||||
|
|
||||||
references := obitag.CLIRefDB()
|
references := obitag.CLIRefDB()
|
||||||
|
|
||||||
if references == nil {
|
|
||||||
log.Panicln("No loaded reference database")
|
|
||||||
}
|
|
||||||
|
|
||||||
if taxo == nil {
|
|
||||||
taxo, err = references.ExtractTaxonomy(nil)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("No taxonomy specified or extractable from reference database: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
taxo.SetAsDefault()
|
|
||||||
}
|
|
||||||
|
|
||||||
if taxo == nil {
|
|
||||||
log.Panicln("No loaded taxonomy")
|
|
||||||
}
|
|
||||||
|
|
||||||
var identified obiiter.IBioSequence
|
var identified obiiter.IBioSequence
|
||||||
|
|
||||||
if obitag.CLIGeometricMode() {
|
if obitag.CLIGeometricMode() {
|
||||||
@@ -77,7 +64,7 @@ func main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
obiconvert.CLIWriteBioSequences(identified, true)
|
obiconvert.CLIWriteBioSequences(identified, true)
|
||||||
obiutils.WaitForLastPipe()
|
obiiter.WaitForLastPipe()
|
||||||
|
|
||||||
obitag.CLISaveRefetenceDB(references)
|
obitag.CLISaveRefetenceDB(references)
|
||||||
|
|
||||||
|
|||||||
@@ -5,12 +5,11 @@ import (
|
|||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obipairing"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obipairing"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obitagpcr"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obitagpcr"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
@@ -31,7 +30,7 @@ func main() {
|
|||||||
// trace.Start(ftrace)
|
// trace.Start(ftrace)
|
||||||
// defer trace.Stop()
|
// defer trace.Stop()
|
||||||
|
|
||||||
obidefault.SetWorkerPerCore(1)
|
obioptions.SetWorkerPerCore(1)
|
||||||
|
|
||||||
optionParser := obioptions.GenerateOptionParser(obitagpcr.OptionSet)
|
optionParser := obioptions.GenerateOptionParser(obitagpcr.OptionSet)
|
||||||
|
|
||||||
@@ -55,5 +54,5 @@ func main() {
|
|||||||
|
|
||||||
obiconvert.CLIWriteBioSequences(paired, true)
|
obiconvert.CLIWriteBioSequences(paired, true)
|
||||||
|
|
||||||
obiutils.WaitForLastPipe()
|
obiiter.WaitForLastPipe()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,106 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"os"
|
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obitaxonomy"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
|
||||||
)
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
optionParser := obioptions.GenerateOptionParser(obitaxonomy.OptionSet)
|
|
||||||
|
|
||||||
_, args := optionParser(os.Args)
|
|
||||||
|
|
||||||
var iterator *obitax.ITaxon
|
|
||||||
|
|
||||||
switch {
|
|
||||||
case obitaxonomy.CLIDownloadNCBI():
|
|
||||||
err := obitaxonomy.CLIDownloadNCBITaxdump()
|
|
||||||
if err != nil {
|
|
||||||
log.Errorf("Cannot download NCBI taxonomy: %s", err.Error())
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
os.Exit(0)
|
|
||||||
|
|
||||||
case obitaxonomy.CLIExtractTaxonomy():
|
|
||||||
iter, err := obiconvert.CLIReadBioSequences(args...)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("Cannot extract taxonomy: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
taxonomy, err := iter.ExtractTaxonomy()
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("Cannot extract taxonomy: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
taxonomy.SetAsDefault()
|
|
||||||
|
|
||||||
log.Infof("Number of extracted taxa: %d", taxonomy.Len())
|
|
||||||
iterator = taxonomy.AsTaxonSet().Sort().Iterator()
|
|
||||||
|
|
||||||
case obitaxonomy.CLIDumpSubtaxonomy():
|
|
||||||
iterator = obitaxonomy.CLISubTaxonomyIterator()
|
|
||||||
|
|
||||||
case obitaxonomy.CLIRequestsPathForTaxid() != "NA":
|
|
||||||
|
|
||||||
taxon, isAlias, err := obitax.DefaultTaxonomy().Taxon(obitaxonomy.CLIRequestsPathForTaxid())
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("Cannot identify the requested taxon: %s (%v)",
|
|
||||||
obitaxonomy.CLIRequestsPathForTaxid(), err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if isAlias {
|
|
||||||
if obidefault.FailOnTaxonomy() {
|
|
||||||
log.Fatalf("Taxon %s is an alias for %s", taxon.String(), taxon.Parent().String())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
s := taxon.Path()
|
|
||||||
|
|
||||||
if s == nil {
|
|
||||||
log.Fatalf("Cannot extract taxonomic path describing %s", taxon.String())
|
|
||||||
}
|
|
||||||
|
|
||||||
iterator = s.Iterator()
|
|
||||||
|
|
||||||
if obitaxonomy.CLIWithQuery() {
|
|
||||||
iterator = iterator.AddMetadata("query", taxon.String())
|
|
||||||
}
|
|
||||||
|
|
||||||
case len(args) == 0:
|
|
||||||
iterator = obitax.DefaultTaxonomy().Iterator()
|
|
||||||
default:
|
|
||||||
iters := make([]*obitax.ITaxon, len(args))
|
|
||||||
|
|
||||||
for i, pat := range args {
|
|
||||||
ii := obitax.DefaultTaxonomy().IFilterOnName(pat, obitaxonomy.CLIFixedPattern(), true)
|
|
||||||
if obitaxonomy.CLIWithQuery() {
|
|
||||||
ii = ii.AddMetadata("query", pat)
|
|
||||||
}
|
|
||||||
iters[i] = ii
|
|
||||||
}
|
|
||||||
|
|
||||||
iterator = iters[0]
|
|
||||||
|
|
||||||
if len(iters) > 1 {
|
|
||||||
iterator = iterator.Concat(iters[1:]...)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
iterator = obitaxonomy.CLITaxonRestrictions(iterator)
|
|
||||||
obitaxonomy.CLICSVTaxaWriter(iterator, true)
|
|
||||||
|
|
||||||
obiutils.WaitForLastPipe()
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -3,12 +3,13 @@ package main
|
|||||||
import (
|
import (
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiuniq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiuniq"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
@@ -31,18 +32,20 @@ func main() {
|
|||||||
// trace.Start(ftrace)
|
// trace.Start(ftrace)
|
||||||
// defer trace.Stop()
|
// defer trace.Stop()
|
||||||
|
|
||||||
obidefault.SetBatchSize(10)
|
|
||||||
obidefault.SetReadQualities(false)
|
|
||||||
optionParser := obioptions.GenerateOptionParser(obiuniq.OptionSet)
|
optionParser := obioptions.GenerateOptionParser(obiuniq.OptionSet)
|
||||||
|
|
||||||
_, args := optionParser(os.Args)
|
_, args := optionParser(os.Args)
|
||||||
|
|
||||||
sequences, err := obiconvert.CLIReadBioSequences(args...)
|
sequences, err := obiconvert.CLIReadBioSequences(args...)
|
||||||
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Cannot open file (%v)", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
unique := obiuniq.CLIUnique(sequences)
|
unique := obiuniq.CLIUnique(sequences)
|
||||||
obiconvert.CLIWriteBioSequences(unique, true)
|
obiconvert.CLIWriteBioSequences(unique, true)
|
||||||
|
|
||||||
obiutils.WaitForLastPipe()
|
obiiter.WaitForLastPipe()
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,16 +1,29 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"os"
|
"fmt"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiblackboard"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func r2(bb *obiblackboard.Blackboard, task *obiblackboard.Task) *obiblackboard.Task {
|
||||||
|
fmt.Printf("value : %v\n", task.Body)
|
||||||
|
return obiblackboard.NewInitialTask()
|
||||||
|
}
|
||||||
|
|
||||||
|
func rmul(bb *obiblackboard.Blackboard, task *obiblackboard.Task) *obiblackboard.Task {
|
||||||
|
nt := task.GetNext()
|
||||||
|
nt.Body = task.Body.(int) * 2
|
||||||
|
return nt
|
||||||
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
|
|
||||||
obitax.DetectTaxonomyFormat(os.Args[1])
|
black := obiblackboard.NewBlackBoard(20)
|
||||||
println(obiutils.RemoveAllExt("toto/tutu/test.txt"))
|
|
||||||
println(obiutils.Basename("toto/tutu/test.txt"))
|
|
||||||
|
|
||||||
|
black.RegisterRunner("todisplay", "final", r2)
|
||||||
|
black.RegisterRunner("multiply", "todisplay", rmul)
|
||||||
|
black.RegisterRunner("initial", "multiply", obiblackboard.DoCount(1000).RepeatTask(4))
|
||||||
|
|
||||||
|
black.Run()
|
||||||
}
|
}
|
||||||
|
|||||||
13
go.mod
13
go.mod
@@ -1,15 +1,12 @@
|
|||||||
module git.metabarcoding.org/obitools/obitools4/obitools4
|
module git.metabarcoding.org/obitools/obitools4/obitools4
|
||||||
|
|
||||||
go 1.23.1
|
go 1.22.1
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/DavidGamba/go-getoptions v0.28.0
|
github.com/DavidGamba/go-getoptions v0.28.0
|
||||||
github.com/PaesslerAG/gval v1.2.2
|
github.com/PaesslerAG/gval v1.2.2
|
||||||
github.com/TuftsBCB/io v0.0.0-20140121014543-22b94e9b23f9
|
|
||||||
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df
|
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df
|
||||||
github.com/buger/jsonparser v1.1.1
|
|
||||||
github.com/chen3feng/stl4go v0.1.1
|
github.com/chen3feng/stl4go v0.1.1
|
||||||
github.com/dlclark/regexp2 v1.11.4
|
|
||||||
github.com/goccy/go-json v0.10.3
|
github.com/goccy/go-json v0.10.3
|
||||||
github.com/klauspost/pgzip v1.2.6
|
github.com/klauspost/pgzip v1.2.6
|
||||||
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58
|
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58
|
||||||
@@ -26,12 +23,20 @@ require (
|
|||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
|
github.com/bytedance/sonic v1.11.9 // indirect
|
||||||
|
github.com/bytedance/sonic/loader v0.1.1 // indirect
|
||||||
|
github.com/cloudwego/base64x v0.1.4 // indirect
|
||||||
|
github.com/cloudwego/iasm v0.2.0 // indirect
|
||||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||||
github.com/goombaio/orderedmap v0.0.0-20180924084748-ba921b7e2419 // indirect
|
github.com/goombaio/orderedmap v0.0.0-20180924084748-ba921b7e2419 // indirect
|
||||||
|
github.com/klauspost/cpuid/v2 v2.0.9 // indirect
|
||||||
github.com/kr/pretty v0.3.0 // indirect
|
github.com/kr/pretty v0.3.0 // indirect
|
||||||
github.com/kr/text v0.2.0 // indirect
|
github.com/kr/text v0.2.0 // indirect
|
||||||
|
github.com/montanaflynn/stats v0.7.1 // indirect
|
||||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||||
github.com/rogpeppe/go-internal v1.6.1 // indirect
|
github.com/rogpeppe/go-internal v1.6.1 // indirect
|
||||||
|
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
|
||||||
|
golang.org/x/arch v0.0.0-20210923205945-b76863e36670 // indirect
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
|
|||||||
33
go.sum
33
go.sum
@@ -4,25 +4,29 @@ github.com/PaesslerAG/gval v1.2.2 h1:Y7iBzhgE09IGTt5QgGQ2IdaYYYOU134YGHBThD+wm9E
|
|||||||
github.com/PaesslerAG/gval v1.2.2/go.mod h1:XRFLwvmkTEdYziLdaCeCa5ImcGVrfQbeNUbVR+C6xac=
|
github.com/PaesslerAG/gval v1.2.2/go.mod h1:XRFLwvmkTEdYziLdaCeCa5ImcGVrfQbeNUbVR+C6xac=
|
||||||
github.com/PaesslerAG/jsonpath v0.1.0 h1:gADYeifvlqK3R3i2cR5B4DGgxLXIPb3TRTH1mGi0jPI=
|
github.com/PaesslerAG/jsonpath v0.1.0 h1:gADYeifvlqK3R3i2cR5B4DGgxLXIPb3TRTH1mGi0jPI=
|
||||||
github.com/PaesslerAG/jsonpath v0.1.0/go.mod h1:4BzmtoM/PI8fPO4aQGIusjGxGir2BzcV0grWtFzq1Y8=
|
github.com/PaesslerAG/jsonpath v0.1.0/go.mod h1:4BzmtoM/PI8fPO4aQGIusjGxGir2BzcV0grWtFzq1Y8=
|
||||||
github.com/TuftsBCB/io v0.0.0-20140121014543-22b94e9b23f9 h1:Zc1/GNsUpgZR9qm1EmRSKrnOHA7CCd0bIzGdq0cREN0=
|
|
||||||
github.com/TuftsBCB/io v0.0.0-20140121014543-22b94e9b23f9/go.mod h1:PZyV4WA3NpqtezSY0h6E6NARAmdDm0qwrydveOyR5Gc=
|
|
||||||
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df h1:GSoSVRLoBaFpOOds6QyY1L8AX7uoY+Ln3BHc22W40X0=
|
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df h1:GSoSVRLoBaFpOOds6QyY1L8AX7uoY+Ln3BHc22W40X0=
|
||||||
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df/go.mod h1:hiVxq5OP2bUGBRNS3Z/bt/reCLFNbdcST6gISi1fiOM=
|
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df/go.mod h1:hiVxq5OP2bUGBRNS3Z/bt/reCLFNbdcST6gISi1fiOM=
|
||||||
github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs=
|
github.com/bytedance/sonic v1.11.9 h1:LFHENlIY/SLzDWverzdOvgMztTxcfcF+cqNsz9pK5zg=
|
||||||
github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0=
|
github.com/bytedance/sonic v1.11.9/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4=
|
||||||
|
github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM=
|
||||||
|
github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
|
||||||
github.com/chen3feng/stl4go v0.1.1 h1:0L1+mDw7pomftKDruM23f1mA7miavOj6C6MZeadzN2Q=
|
github.com/chen3feng/stl4go v0.1.1 h1:0L1+mDw7pomftKDruM23f1mA7miavOj6C6MZeadzN2Q=
|
||||||
github.com/chen3feng/stl4go v0.1.1/go.mod h1:5ml3psLgETJjRJnMbPE+JiHLrCpt+Ajc2weeTECXzWU=
|
github.com/chen3feng/stl4go v0.1.1/go.mod h1:5ml3psLgETJjRJnMbPE+JiHLrCpt+Ajc2weeTECXzWU=
|
||||||
|
github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
|
||||||
|
github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
|
||||||
|
github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg=
|
||||||
|
github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
|
||||||
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
||||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/dlclark/regexp2 v1.11.4 h1:rPYF9/LECdNymJufQKmri9gV604RvvABwgOA8un7yAo=
|
|
||||||
github.com/dlclark/regexp2 v1.11.4/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
|
|
||||||
github.com/dsnet/compress v0.0.1 h1:PlZu0n3Tuv04TzpfPbrnI0HW/YwodEXDS+oPKahKF0Q=
|
github.com/dsnet/compress v0.0.1 h1:PlZu0n3Tuv04TzpfPbrnI0HW/YwodEXDS+oPKahKF0Q=
|
||||||
github.com/dsnet/compress v0.0.1/go.mod h1:Aw8dCMJ7RioblQeTqt88akK31OvO8Dhf5JflhBbQEHo=
|
github.com/dsnet/compress v0.0.1/go.mod h1:Aw8dCMJ7RioblQeTqt88akK31OvO8Dhf5JflhBbQEHo=
|
||||||
github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
|
github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
|
||||||
github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
|
github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
|
||||||
github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
|
github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
|
||||||
|
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
|
||||||
|
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
|
||||||
github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA=
|
github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA=
|
||||||
github.com/goccy/go-json v0.10.3/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
|
github.com/goccy/go-json v0.10.3/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
|
||||||
github.com/goombaio/orderedmap v0.0.0-20180924084748-ba921b7e2419 h1:SajEQ6tktpF9SRIuzbiPOX9AEZZ53Bvw0k9Mzrts8Lg=
|
github.com/goombaio/orderedmap v0.0.0-20180924084748-ba921b7e2419 h1:SajEQ6tktpF9SRIuzbiPOX9AEZZ53Bvw0k9Mzrts8Lg=
|
||||||
@@ -33,9 +37,13 @@ github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1
|
|||||||
github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
|
github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
|
||||||
github.com/klauspost/compress v1.17.2 h1:RlWWUY/Dr4fL8qk9YG7DTZ7PDgME2V4csBXA8L/ixi4=
|
github.com/klauspost/compress v1.17.2 h1:RlWWUY/Dr4fL8qk9YG7DTZ7PDgME2V4csBXA8L/ixi4=
|
||||||
github.com/klauspost/compress v1.17.2/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
|
github.com/klauspost/compress v1.17.2/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
|
||||||
|
github.com/klauspost/cpuid v1.2.0 h1:NMpwD2G9JSFOE1/TJjGSo5zG7Yb2bTe7eq1jH+irmeE=
|
||||||
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
|
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
|
||||||
|
github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4=
|
||||||
|
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
|
||||||
github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU=
|
github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU=
|
||||||
github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
|
github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
|
||||||
|
github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
|
||||||
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
|
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
|
||||||
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
|
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
|
||||||
github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
|
github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
|
||||||
@@ -50,6 +58,8 @@ github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZ
|
|||||||
github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
|
github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
|
||||||
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ=
|
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ=
|
||||||
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
|
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
|
||||||
|
github.com/montanaflynn/stats v0.7.1 h1:etflOAAHORrCC44V+aR6Ftzort912ZU+YLiSTuV8eaE=
|
||||||
|
github.com/montanaflynn/stats v0.7.1/go.mod h1:etXPPgVO6n31NxCd9KQUMvCM+ve0ruNzt6R8Bnaayow=
|
||||||
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0=
|
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0=
|
||||||
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y=
|
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y=
|
||||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
@@ -68,17 +78,26 @@ github.com/shopspring/decimal v1.3.1/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFR
|
|||||||
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
|
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
|
||||||
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
||||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||||
|
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
||||||
|
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
||||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||||
|
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||||
|
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||||
|
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||||
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
|
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
|
||||||
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
||||||
github.com/tevino/abool/v2 v2.1.0 h1:7w+Vf9f/5gmKT4m4qkayb33/92M+Um45F2BkHOR+L/c=
|
github.com/tevino/abool/v2 v2.1.0 h1:7w+Vf9f/5gmKT4m4qkayb33/92M+Um45F2BkHOR+L/c=
|
||||||
github.com/tevino/abool/v2 v2.1.0/go.mod h1:+Lmlqk6bHDWHqN1cbxqhwEAwMPXgc8I1SDEamtseuXY=
|
github.com/tevino/abool/v2 v2.1.0/go.mod h1:+Lmlqk6bHDWHqN1cbxqhwEAwMPXgc8I1SDEamtseuXY=
|
||||||
|
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
|
||||||
|
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
|
||||||
github.com/ulikunitz/xz v0.5.6/go.mod h1:2bypXElzHzzJZwzH67Y6wb67pO62Rzfn7BSiF4ABRW8=
|
github.com/ulikunitz/xz v0.5.6/go.mod h1:2bypXElzHzzJZwzH67Y6wb67pO62Rzfn7BSiF4ABRW8=
|
||||||
github.com/ulikunitz/xz v0.5.11 h1:kpFauv27b6ynzBNT/Xy+1k+fK4WswhN/6PN5WhFAGw8=
|
github.com/ulikunitz/xz v0.5.11 h1:kpFauv27b6ynzBNT/Xy+1k+fK4WswhN/6PN5WhFAGw8=
|
||||||
github.com/ulikunitz/xz v0.5.11/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
|
github.com/ulikunitz/xz v0.5.11/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
|
||||||
github.com/yuin/gopher-lua v1.1.1 h1:kYKnWBjvbNP4XLT3+bPEwAXJx262OhaHDWDVOPjL46M=
|
github.com/yuin/gopher-lua v1.1.1 h1:kYKnWBjvbNP4XLT3+bPEwAXJx262OhaHDWDVOPjL46M=
|
||||||
github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw=
|
github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw=
|
||||||
|
golang.org/x/arch v0.0.0-20210923205945-b76863e36670 h1:18EFjUmQOcUvxNYSkA6jO9VAiXCnxFY6NyDX0bHDmkU=
|
||||||
|
golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
|
||||||
golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI=
|
golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI=
|
||||||
golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo=
|
golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo=
|
||||||
golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
|
golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
|
||||||
@@ -101,6 +120,8 @@ gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
|
|||||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
|
nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50=
|
||||||
|
rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
|
||||||
scientificgo.org/special v0.0.0 h1:P6WJkECo6tgtvZAEfNXl+KEB9ReAatjKAeX8U07mjSc=
|
scientificgo.org/special v0.0.0 h1:P6WJkECo6tgtvZAEfNXl+KEB9ReAatjKAeX8U07mjSc=
|
||||||
scientificgo.org/special v0.0.0/go.mod h1:LoGVh9tS431RLTJo7gFlYDKFWq44cEb7QqL+M0EKtZU=
|
scientificgo.org/special v0.0.0/go.mod h1:LoGVh9tS431RLTJo7gFlYDKFWq44cEb7QqL+M0EKtZU=
|
||||||
scientificgo.org/testutil v0.0.0 h1:y356DHRo0tAz9zIFmxlhZoKDlHPHaWW/DCm9k3PhIMA=
|
scientificgo.org/testutil v0.0.0 h1:y356DHRo0tAz9zIFmxlhZoKDlHPHaWW/DCm9k3PhIMA=
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ INSTALL_DIR="/usr/local"
|
|||||||
OBITOOLS_PREFIX=""
|
OBITOOLS_PREFIX=""
|
||||||
# default values
|
# default values
|
||||||
URL="https://go.dev/dl/"
|
URL="https://go.dev/dl/"
|
||||||
OBIURL4="https://github.com/metabarcoding/obitools4/archive/refs/heads/master.zip"
|
OBIURL4="https://git.metabarcoding.org/obitools/obitools4/obitools4/-/archive/master/obitools4-master.tar.gz"
|
||||||
INSTALL_DIR="/usr/local"
|
INSTALL_DIR="/usr/local"
|
||||||
OBITOOLS_PREFIX=""
|
OBITOOLS_PREFIX=""
|
||||||
|
|
||||||
@@ -106,10 +106,8 @@ curl "$GOURL" \
|
|||||||
PATH="$(pwd)/go/bin:$PATH"
|
PATH="$(pwd)/go/bin:$PATH"
|
||||||
export PATH
|
export PATH
|
||||||
|
|
||||||
curl -L "$OBIURL4" > master.zip
|
curl -L "$OBIURL4" \
|
||||||
unzip master.zip
|
| tar zxf -
|
||||||
|
|
||||||
echo "Install OBITOOLS from : $OBIURL4"
|
|
||||||
|
|
||||||
cd obitools4-master || exit
|
cd obitools4-master || exit
|
||||||
|
|
||||||
|
|||||||
@@ -1,144 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
#
|
|
||||||
# Here give the name of the test serie
|
|
||||||
#
|
|
||||||
|
|
||||||
TEST_NAME=obicount
|
|
||||||
|
|
||||||
######
|
|
||||||
#
|
|
||||||
# Some variable and function definitions: please don't change them
|
|
||||||
#
|
|
||||||
######
|
|
||||||
TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
|
|
||||||
OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
|
|
||||||
export PATH="${OBITOOLS_DIR}:${PATH}"
|
|
||||||
|
|
||||||
|
|
||||||
TMPDIR="$(mktemp -d)"
|
|
||||||
ntest=0
|
|
||||||
success=0
|
|
||||||
failed=0
|
|
||||||
|
|
||||||
cleanup() {
|
|
||||||
echo "========================================" 1>&2
|
|
||||||
echo "## Results of the $TEST_NAME tests:" 1>&2
|
|
||||||
|
|
||||||
echo 1>&2
|
|
||||||
echo "- $ntest tests run" 1>&2
|
|
||||||
echo "- $success successfully completed" 1>&2
|
|
||||||
echo "- $failed failed tests" 1>&2
|
|
||||||
echo 1>&2
|
|
||||||
echo "Cleaning up the temporary directory..." 1>&2
|
|
||||||
echo 1>&2
|
|
||||||
echo "========================================" 1>&2
|
|
||||||
|
|
||||||
rm -rf "$TMPDIR" # Suppress the temporary directory
|
|
||||||
|
|
||||||
if [ $failed -gt 0 ]; then
|
|
||||||
log "$TEST_NAME tests failed"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
exit 0
|
|
||||||
}
|
|
||||||
|
|
||||||
log() {
|
|
||||||
echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
|
|
||||||
}
|
|
||||||
|
|
||||||
log "Testing $TEST_NAME..."
|
|
||||||
log "Test directory is $TEST_DIR"
|
|
||||||
log "obitools directory is $OBITOOLS_DIR"
|
|
||||||
log "Temporary directory is $TMPDIR"
|
|
||||||
log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
|
|
||||||
|
|
||||||
######################################################################
|
|
||||||
####
|
|
||||||
#### Below are the tests
|
|
||||||
####
|
|
||||||
#### Before each test :
|
|
||||||
#### - increment the variable ntest
|
|
||||||
####
|
|
||||||
#### Run the command as the condition of an if / then /else
|
|
||||||
#### - The command must return 0 on success
|
|
||||||
#### - The command must return an exit code different from 0 on failure
|
|
||||||
#### - The datafiles are stored in the same directory than the test script
|
|
||||||
#### - The test script directory is stored in the TEST_DIR variable
|
|
||||||
#### - If result files have to be produced they must be stored
|
|
||||||
#### in the temporary directory (TMPDIR variable)
|
|
||||||
####
|
|
||||||
#### then clause is executed on success of the command
|
|
||||||
#### - Write a success message using the log function
|
|
||||||
#### - increment the variable success
|
|
||||||
####
|
|
||||||
#### else clause is executed on failure of the command
|
|
||||||
#### - Write a failure message using the log function
|
|
||||||
#### - increment the variable failed
|
|
||||||
####
|
|
||||||
######################################################################
|
|
||||||
|
|
||||||
((ntest++))
|
|
||||||
if obicount "${TEST_DIR}/wolf_F.fasta.gz" \
|
|
||||||
> "${TMPDIR}/wolf_F.fasta_count.csv"
|
|
||||||
then
|
|
||||||
log "OBICount: fasta reading OK"
|
|
||||||
((success++))
|
|
||||||
else
|
|
||||||
log "OBICount: fasta reading failed"
|
|
||||||
((failed++))
|
|
||||||
fi
|
|
||||||
|
|
||||||
((ntest++))
|
|
||||||
if obicount "${TEST_DIR}/wolf_F.fastq.gz" \
|
|
||||||
> "${TMPDIR}/wolf_F.fastq_count.csv"
|
|
||||||
then
|
|
||||||
log "OBICount: fastq reading OK"
|
|
||||||
((success++))
|
|
||||||
else
|
|
||||||
log "OBICount: fastq reading failed"
|
|
||||||
((failed++))
|
|
||||||
fi
|
|
||||||
|
|
||||||
((ntest++))
|
|
||||||
if obicount "${TEST_DIR}/wolf_F.csv.gz" \
|
|
||||||
> "${TMPDIR}/wolf_F.csv_count.csv"
|
|
||||||
then
|
|
||||||
log "OBICount: csv reading OK"
|
|
||||||
((success++))
|
|
||||||
else
|
|
||||||
log "OBICount: csv reading failed"
|
|
||||||
((failed++))
|
|
||||||
fi
|
|
||||||
|
|
||||||
((ntest++))
|
|
||||||
if diff "${TMPDIR}/wolf_F.fasta_count.csv" \
|
|
||||||
"${TMPDIR}/wolf_F.fastq_count.csv" > /dev/null
|
|
||||||
then
|
|
||||||
log "OBICount: counting on fasta and fastq are identical OK"
|
|
||||||
((success++))
|
|
||||||
else
|
|
||||||
log "OBICount: counting on fasta and fastq are different failed"
|
|
||||||
((failed++))
|
|
||||||
fi
|
|
||||||
|
|
||||||
((ntest++))
|
|
||||||
if diff "${TMPDIR}/wolf_F.fasta_count.csv" \
|
|
||||||
"${TMPDIR}/wolf_F.csv_count.csv" > /dev/null
|
|
||||||
then
|
|
||||||
log "OBICount: counting on fasta and csv are identical OK"
|
|
||||||
((success++))
|
|
||||||
else
|
|
||||||
log "OBICount: counting on fasta and csv are different failed"
|
|
||||||
((failed++))
|
|
||||||
fi
|
|
||||||
|
|
||||||
#########################################
|
|
||||||
#
|
|
||||||
# At the end of the tests
|
|
||||||
# the cleanup function is called
|
|
||||||
#
|
|
||||||
#########################################
|
|
||||||
|
|
||||||
cleanup
|
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,134 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
#
|
|
||||||
# Here give the name of the test serie
|
|
||||||
#
|
|
||||||
|
|
||||||
TEST_NAME=obiparing
|
|
||||||
|
|
||||||
######
|
|
||||||
#
|
|
||||||
# Some variable and function definitions: please don't change them
|
|
||||||
#
|
|
||||||
######
|
|
||||||
TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
|
|
||||||
OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
|
|
||||||
export PATH="${OBITOOLS_DIR}:${PATH}"
|
|
||||||
|
|
||||||
|
|
||||||
TMPDIR="$(mktemp -d)"
|
|
||||||
ntest=0
|
|
||||||
success=0
|
|
||||||
failed=0
|
|
||||||
|
|
||||||
cleanup() {
|
|
||||||
echo "========================================" 1>&2
|
|
||||||
echo "## Results of the $TEST_NAME tests:" 1>&2
|
|
||||||
|
|
||||||
echo 1>&2
|
|
||||||
echo "- $ntest tests run" 1>&2
|
|
||||||
echo "- $success successfully completed" 1>&2
|
|
||||||
echo "- $failed failed tests" 1>&2
|
|
||||||
echo 1>&2
|
|
||||||
echo "Cleaning up the temporary directory..." 1>&2
|
|
||||||
echo 1>&2
|
|
||||||
echo "========================================" 1>&2
|
|
||||||
|
|
||||||
rm -rf "$TMPDIR" # Suppress the temporary directory
|
|
||||||
|
|
||||||
if [ $failed -gt 0 ]; then
|
|
||||||
log "$TEST_NAME tests failed"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
exit 0
|
|
||||||
}
|
|
||||||
|
|
||||||
log() {
|
|
||||||
echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
|
|
||||||
}
|
|
||||||
|
|
||||||
log "Testing $TEST_NAME..."
|
|
||||||
log "Test directory is $TEST_DIR"
|
|
||||||
log "obitools directory is $OBITOOLS_DIR"
|
|
||||||
log "Temporary directory is $TMPDIR"
|
|
||||||
log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
|
|
||||||
|
|
||||||
######################################################################
|
|
||||||
####
|
|
||||||
#### Below are the tests
|
|
||||||
####
|
|
||||||
#### Before each test :
|
|
||||||
#### - increment the variable ntest
|
|
||||||
####
|
|
||||||
#### Run the command as the condition of an if / then /else
|
|
||||||
#### - The command must return 0 on success
|
|
||||||
#### - The command must return an exit code different from 0 on failure
|
|
||||||
#### - The datafiles are stored in the same directory than the test script
|
|
||||||
#### - The test script directory is stored in the TEST_DIR variable
|
|
||||||
#### - If result files have to be produced they must be stored
|
|
||||||
#### in the temporary directory (TMPDIR variable)
|
|
||||||
####
|
|
||||||
#### then clause is executed on success of the command
|
|
||||||
#### - Write a success message using the log function
|
|
||||||
#### - increment the variable success
|
|
||||||
####
|
|
||||||
#### else clause is executed on failure of the command
|
|
||||||
#### - Write a failure message using the log function
|
|
||||||
#### - increment the variable failed
|
|
||||||
####
|
|
||||||
######################################################################
|
|
||||||
|
|
||||||
((ntest++))
|
|
||||||
if obipairing -F "${TEST_DIR}/wolf_F.fastq.gz" \
|
|
||||||
-R "${TEST_DIR}/wolf_R.fastq.gz" \
|
|
||||||
| obidistribute -Z -c mode \
|
|
||||||
-p "${TMPDIR}/wolf_paired_%s.fastq.gz"
|
|
||||||
then
|
|
||||||
log "OBIPairing: sequence pairing OK"
|
|
||||||
((success++))
|
|
||||||
else
|
|
||||||
log "OBIPairing: sequence pairing failed"
|
|
||||||
((failed++))
|
|
||||||
fi
|
|
||||||
|
|
||||||
((ntest++))
|
|
||||||
if obicsv -Z -s -i \
|
|
||||||
-k ali_dir -k ali_length -k paring_fast_count \
|
|
||||||
-k paring_fast_overlap -k paring_fast_score \
|
|
||||||
-k score -k score_norm -k seq_a_single \
|
|
||||||
-k seq_b_single -k seq_ab_match \
|
|
||||||
"${TMPDIR}/wolf_paired_alignment.fastq.gz" \
|
|
||||||
> "${TMPDIR}/wolf_paired_alignment.csv.gz" \
|
|
||||||
&& zdiff -c "${TEST_DIR}/wolf_paired_alignment.csv.gz" \
|
|
||||||
"${TMPDIR}/wolf_paired_alignment.csv.gz"
|
|
||||||
then
|
|
||||||
log "OBIPairing: check aligned sequences OK"
|
|
||||||
((success++))
|
|
||||||
else
|
|
||||||
log "OBIPairing: check aligned sequences failed"
|
|
||||||
((failed++))
|
|
||||||
fi
|
|
||||||
|
|
||||||
((ntest++))
|
|
||||||
if obicsv -Z -s -i \
|
|
||||||
"${TMPDIR}/wolf_paired_join.fastq.gz" \
|
|
||||||
> "${TMPDIR}/wolf_paired_join.csv.gz" \
|
|
||||||
&& zdiff -c "${TEST_DIR}/wolf_paired_join.csv.gz" \
|
|
||||||
"${TMPDIR}/wolf_paired_join.csv.gz"
|
|
||||||
then
|
|
||||||
log "OBIPairing: check joined sequences OK"
|
|
||||||
((success++))
|
|
||||||
else
|
|
||||||
log "OBIPairing: check joined sequences failed"
|
|
||||||
((failed++))
|
|
||||||
fi
|
|
||||||
|
|
||||||
#########################################
|
|
||||||
#
|
|
||||||
# At the end of the tests
|
|
||||||
# the cleanup function is called
|
|
||||||
#
|
|
||||||
#########################################
|
|
||||||
|
|
||||||
cleanup
|
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -10,7 +10,6 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// // A pool of byte slices.
|
// // A pool of byte slices.
|
||||||
@@ -159,30 +158,12 @@ func BuildQualityConsensus(seqA, seqB *obiseq.BioSequence, path []int, statOnMis
|
|||||||
|
|
||||||
match := 0
|
match := 0
|
||||||
|
|
||||||
left := obiutils.Abs(path[0])
|
|
||||||
right := 0
|
|
||||||
if path[len(path)-1] == 0 {
|
|
||||||
right = path[len(path)-2]
|
|
||||||
}
|
|
||||||
|
|
||||||
right = obiutils.Abs(right)
|
|
||||||
|
|
||||||
right = len(*bufferQA) - right
|
|
||||||
|
|
||||||
// log.Warnf("BuildQualityConsensus: left = %d right = %d\n", left, right)
|
|
||||||
|
|
||||||
for i, qA = range *bufferQA {
|
for i, qA = range *bufferQA {
|
||||||
nA := (*bufferSA)[i]
|
nA := (*bufferSA)[i]
|
||||||
nB := (*bufferSB)[i]
|
nB := (*bufferSB)[i]
|
||||||
qB = (*bufferQB)[i]
|
qB = (*bufferQB)[i]
|
||||||
|
|
||||||
if statOnMismatch && i >= left && i < right && nA != nB {
|
if statOnMismatch && nA != nB && nA != ' ' && nB != ' ' {
|
||||||
if nA == ' ' {
|
|
||||||
nA = '-'
|
|
||||||
}
|
|
||||||
if nB == ' ' {
|
|
||||||
nB = '-'
|
|
||||||
}
|
|
||||||
mismatches[strings.ToUpper(fmt.Sprintf("(%c:%02d)->(%c:%02d)", nA, qA, nB, qB))] = i + 1
|
mismatches[strings.ToUpper(fmt.Sprintf("(%c:%02d)->(%c:%02d)", nA, qA, nB, qB))] = i + 1
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -202,12 +183,13 @@ func BuildQualityConsensus(seqA, seqB *obiseq.BioSequence, path []int, statOnMis
|
|||||||
|
|
||||||
q := qA + qB
|
q := qA + qB
|
||||||
|
|
||||||
if nA != nB {
|
if qA > 0 && qB > 0 {
|
||||||
q = qM - byte(math.Log10(1-math.Pow(10, -float64(qm)/40))*10+0.5)
|
if nA != nB {
|
||||||
}
|
q = qM - byte(math.Log10(1-math.Pow(10, -float64(qm)/30))*10+0.5)
|
||||||
|
}
|
||||||
if nA == nB {
|
if nA == nB {
|
||||||
match++
|
match++
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if q > 90 {
|
if q > 90 {
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ func _Backtracking(pathMatrix []int, lseqA, lseqB int, path *[]int) []int {
|
|||||||
cp := cap(*path)
|
cp := cap(*path)
|
||||||
(*path) = slices.Grow((*path), needed)
|
(*path) = slices.Grow((*path), needed)
|
||||||
if cp < cap(*path) {
|
if cp < cap(*path) {
|
||||||
log.Debugf("Resized path from %d to %d\n", cp, cap(*path))
|
log.Infof("Resized path from %d to %d\n", cp, cap(*path))
|
||||||
}
|
}
|
||||||
p := cap(*path)
|
p := cap(*path)
|
||||||
*path = (*path)[:p]
|
*path = (*path)[:p]
|
||||||
|
|||||||
@@ -74,30 +74,6 @@ func _Logaddexp(a, b float64) float64 {
|
|||||||
return b + math.Log1p(math.Exp(a-b))
|
return b + math.Log1p(math.Exp(a-b))
|
||||||
}
|
}
|
||||||
|
|
||||||
func _Log1mexp(a float64) float64 {
|
|
||||||
if a > 0 {
|
|
||||||
log.Panic("Log1mexp: a > 0")
|
|
||||||
}
|
|
||||||
|
|
||||||
if a == 0 {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
return (math.Log(-math.Expm1(a)))
|
|
||||||
}
|
|
||||||
|
|
||||||
func _Logdiffexp(a, b float64) float64 {
|
|
||||||
if a < b {
|
|
||||||
log.Panic("Log1mexp: a < b")
|
|
||||||
}
|
|
||||||
|
|
||||||
if a == b {
|
|
||||||
return math.Inf(-1)
|
|
||||||
}
|
|
||||||
|
|
||||||
return a + _Log1mexp(b-a)
|
|
||||||
}
|
|
||||||
|
|
||||||
// _MatchScoreRatio calculates the match score ratio between two bytes.
|
// _MatchScoreRatio calculates the match score ratio between two bytes.
|
||||||
//
|
//
|
||||||
// Parameters:
|
// Parameters:
|
||||||
@@ -107,25 +83,25 @@ func _Logdiffexp(a, b float64) float64 {
|
|||||||
// Returns:
|
// Returns:
|
||||||
// - float64: the match score ratio when a match is observed
|
// - float64: the match score ratio when a match is observed
|
||||||
// - float64: the match score ratio when a mismatch is observed
|
// - float64: the match score ratio when a mismatch is observed
|
||||||
func _MatchScoreRatio(QF, QR byte) (float64, float64) {
|
func _MatchScoreRatio(a, b byte) (float64, float64) {
|
||||||
|
|
||||||
|
l2 := math.Log(2)
|
||||||
l3 := math.Log(3)
|
l3 := math.Log(3)
|
||||||
l4 := math.Log(4)
|
|
||||||
l10 := math.Log(10)
|
l10 := math.Log(10)
|
||||||
qF := -float64(QF) / 10 * l10
|
lalea := math.Log(4) // 1 /(change of the random model)
|
||||||
qR := -float64(QR) / 10 * l10
|
lE1 := -float64(a)/10*l10 - l3 // log proba of sequencing error on A/3
|
||||||
term1 := _Logaddexp(qF, qR)
|
lE2 := -float64(b)/10*l10 - l3 // log proba of sequencing error on B/3
|
||||||
term2 := _Logdiffexp(term1, qF+qR)
|
lO1 := math.Log1p(-math.Exp(lE1 + l3)) // log proba no being an error on A
|
||||||
|
lO2 := math.Log1p(-math.Exp(lE2 + l3)) // log proba no being an error on B
|
||||||
|
lO1O2 := lO1 + lO2
|
||||||
|
lE1E2 := lE1 + lE2
|
||||||
|
lO1E2 := lO1 + lE2
|
||||||
|
lO2E1 := lO2 + lE1
|
||||||
|
|
||||||
// log.Warnf("MatchScoreRatio: %v, %v , %v, %v", QF, QR, term1, term2)
|
MM := _Logaddexp(lO1O2, lE1E2+l3) // Proba match when match observed
|
||||||
|
Mm := _Logaddexp(_Logaddexp(lO1E2, lO2E1), lE1E2+l2) // Proba match when mismatch observed
|
||||||
|
|
||||||
match_logp := _Log1mexp(term2 + l3 - l4)
|
return MM + lalea, Mm + lalea
|
||||||
match_score := match_logp - _Log1mexp(match_logp)
|
|
||||||
|
|
||||||
mismatch_logp := term2 - l4
|
|
||||||
mismatch_score := mismatch_logp - _Log1mexp(mismatch_logp)
|
|
||||||
|
|
||||||
return match_score, mismatch_score
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func _InitNucPartMatch() {
|
func _InitNucPartMatch() {
|
||||||
|
|||||||
@@ -21,15 +21,15 @@ func encodeValues(score, length int, out bool) uint64 {
|
|||||||
return fo
|
return fo
|
||||||
}
|
}
|
||||||
|
|
||||||
// func _isout(value uint64) bool {
|
func _isout(value uint64) bool {
|
||||||
// const outmask = uint64(1) << dwsize
|
const outmask = uint64(1) << dwsize
|
||||||
// return (value & outmask) == 0
|
return (value & outmask) == 0
|
||||||
// }
|
}
|
||||||
|
|
||||||
// func _lpath(value uint64) int {
|
func _lpath(value uint64) int {
|
||||||
// const mask = uint64(1<<wsize) - 1
|
const mask = uint64(1<<wsize) - 1
|
||||||
// return int(((value + 1) ^ mask) & mask)
|
return int(((value + 1) ^ mask) & mask)
|
||||||
// }
|
}
|
||||||
|
|
||||||
func decodeValues(value uint64) (int, int, bool) {
|
func decodeValues(value uint64) (int, int, bool) {
|
||||||
const mask = uint64(1<<wsize) - 1
|
const mask = uint64(1<<wsize) - 1
|
||||||
@@ -57,3 +57,4 @@ func _setout(value uint64) uint64 {
|
|||||||
var _empty = encodeValues(0, 0, false)
|
var _empty = encodeValues(0, 0, false)
|
||||||
var _out = encodeValues(0, 30000, true)
|
var _out = encodeValues(0, 30000, true)
|
||||||
var _notavail = encodeValues(0, 30000, false)
|
var _notavail = encodeValues(0, 30000, false)
|
||||||
|
|
||||||
|
|||||||
@@ -1,73 +1,30 @@
|
|||||||
package obialign
|
package obialign
|
||||||
|
|
||||||
import log "github.com/sirupsen/logrus"
|
|
||||||
|
|
||||||
// buffIndex converts a pair of coordinates (i, j) into a linear index in a matrix
|
|
||||||
// of size width x width. The coordinates are (-1)-indexed, and the linear index
|
|
||||||
// is 0-indexed as well. The function first adds 1 to both coordinates to make
|
|
||||||
// sure the (-1,-1) coordinate is at position 0 in the matrix, and then computes
|
|
||||||
// the linear index by multiplying the first coordinate by the width and adding
|
|
||||||
// the second coordinate.
|
|
||||||
func buffIndex(i, j, width int) int {
|
func buffIndex(i, j, width int) int {
|
||||||
return (i+1)*width + (j + 1)
|
return (i+1)*width + (j + 1)
|
||||||
}
|
}
|
||||||
|
func LocatePattern(pattern, sequence []byte) (int, int, int) {
|
||||||
// LocatePattern is a function to locate a pattern in a sequence.
|
|
||||||
//
|
|
||||||
// It uses a dynamic programming approach to build a matrix of scores.
|
|
||||||
// The score at each cell is the maximum of the score of the cell
|
|
||||||
// above it (representing a deletion), the score of the cell to its
|
|
||||||
// left (representing an insertion), and the score of the cell
|
|
||||||
// diagonally above it (representing a match).
|
|
||||||
//
|
|
||||||
// The score of a match is 0 if the two characters are the same,
|
|
||||||
// and -1 if they are different.
|
|
||||||
//
|
|
||||||
// The function returns the start and end positions of the best
|
|
||||||
// match, as well as the number of errors in the best match.
|
|
||||||
func LocatePattern(id string, pattern, sequence []byte) (int, int, int) {
|
|
||||||
|
|
||||||
if len(pattern) >= len(sequence) {
|
|
||||||
log.Panicf("Sequence %s:Pattern %s must be shorter than sequence %s", id, pattern, sequence)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Pattern spreads over the columns
|
|
||||||
// Sequence spreads over the rows
|
|
||||||
width := len(pattern) + 1
|
width := len(pattern) + 1
|
||||||
buffsize := (len(pattern) + 1) * (len(sequence) + 1)
|
buffsize := (len(pattern) + 1) * (len(sequence) + 1)
|
||||||
buffer := make([]int, buffsize)
|
buffer := make([]int, buffsize)
|
||||||
|
|
||||||
// The path matrix keeps track of the best path through the matrix
|
|
||||||
// 0 : indicate the diagonal path
|
|
||||||
// 1 : indicate the up path
|
|
||||||
// -1 : indicate the left path
|
|
||||||
path := make([]int, buffsize)
|
path := make([]int, buffsize)
|
||||||
|
|
||||||
// Initialize the first row of the matrix
|
|
||||||
for j := 0; j < len(pattern); j++ {
|
for j := 0; j < len(pattern); j++ {
|
||||||
idx := buffIndex(-1, j, width)
|
idx := buffIndex(-1, j, width)
|
||||||
buffer[idx] = -j - 1
|
buffer[idx] = -j - 1
|
||||||
path[idx] = -1
|
path[idx] = -1
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize the first column of the matrix
|
|
||||||
// Alignment is endgap free so first column = 0
|
|
||||||
// to allow primer to shift freely along the sequence
|
|
||||||
for i := -1; i < len(sequence); i++ {
|
for i := -1; i < len(sequence); i++ {
|
||||||
idx := buffIndex(i, -1, width)
|
idx := buffIndex(i, -1, width)
|
||||||
buffer[idx] = 0
|
buffer[idx] = 0
|
||||||
path[idx] = +1
|
path[idx] = +1
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fills the matrix except the last column
|
|
||||||
// where gaps must be free too.
|
|
||||||
path[0] = 0
|
path[0] = 0
|
||||||
jmax := len(pattern) - 1
|
jmax := len(pattern) - 1
|
||||||
for i := 0; i < len(sequence); i++ {
|
for i := 0; i < len(sequence); i++ {
|
||||||
for j := 0; j < jmax; j++ {
|
for j := 0; j < jmax; j++ {
|
||||||
|
|
||||||
// Mismatch score = -1
|
|
||||||
// Match score = 0
|
|
||||||
match := -1
|
match := -1
|
||||||
if _samenuc(pattern[j], sequence[i]) {
|
if _samenuc(pattern[j], sequence[i]) {
|
||||||
match = 0
|
match = 0
|
||||||
@@ -76,8 +33,6 @@ func LocatePattern(id string, pattern, sequence []byte) (int, int, int) {
|
|||||||
idx := buffIndex(i, j, width)
|
idx := buffIndex(i, j, width)
|
||||||
|
|
||||||
diag := buffer[buffIndex(i-1, j-1, width)] + match
|
diag := buffer[buffIndex(i-1, j-1, width)] + match
|
||||||
|
|
||||||
// Each gap cost -1
|
|
||||||
left := buffer[buffIndex(i, j-1, width)] - 1
|
left := buffer[buffIndex(i, j-1, width)] - 1
|
||||||
up := buffer[buffIndex(i-1, j, width)] - 1
|
up := buffer[buffIndex(i-1, j, width)] - 1
|
||||||
|
|
||||||
@@ -96,12 +51,9 @@ func LocatePattern(id string, pattern, sequence []byte) (int, int, int) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fills the last column considering the free up gap
|
|
||||||
for i := 0; i < len(sequence); i++ {
|
for i := 0; i < len(sequence); i++ {
|
||||||
idx := buffIndex(i, jmax, width)
|
idx := buffIndex(i, jmax, width)
|
||||||
|
|
||||||
// Mismatch score = -1
|
|
||||||
// Match score = 0
|
|
||||||
match := -1
|
match := -1
|
||||||
if _samenuc(pattern[jmax], sequence[i]) {
|
if _samenuc(pattern[jmax], sequence[i]) {
|
||||||
match = 0
|
match = 0
|
||||||
@@ -113,7 +65,6 @@ func LocatePattern(id string, pattern, sequence []byte) (int, int, int) {
|
|||||||
|
|
||||||
score := max(diag, up, left)
|
score := max(diag, up, left)
|
||||||
buffer[idx] = score
|
buffer[idx] = score
|
||||||
|
|
||||||
switch {
|
switch {
|
||||||
case score == left:
|
case score == left:
|
||||||
path[idx] = -1
|
path[idx] = -1
|
||||||
@@ -124,13 +75,11 @@ func LocatePattern(id string, pattern, sequence []byte) (int, int, int) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Bactracking of the aligment
|
|
||||||
|
|
||||||
i := len(sequence) - 1
|
i := len(sequence) - 1
|
||||||
j := jmax
|
j := jmax
|
||||||
end := -1
|
end := -1
|
||||||
lali := 0
|
lali := 0
|
||||||
for j > 0 { // C'était i > -1 && j > 0
|
for i > -1 && j > 0 {
|
||||||
lali++
|
lali++
|
||||||
switch path[buffIndex(i, j, width)] {
|
switch path[buffIndex(i, j, width)] {
|
||||||
case 0:
|
case 0:
|
||||||
@@ -151,9 +100,5 @@ func LocatePattern(id string, pattern, sequence []byte) (int, int, int) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// log.Warnf("from : %d to: %d error: %d match: %v",
|
|
||||||
// i, end+1, -buffer[buffIndex(len(sequence)-1, len(pattern)-1, width)],
|
|
||||||
// string(sequence[i:(end+1)]))
|
|
||||||
return i, end + 1, -buffer[buffIndex(len(sequence)-1, len(pattern)-1, width)]
|
return i, end + 1, -buffer[buffIndex(len(sequence)-1, len(pattern)-1, width)]
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,8 +1,6 @@
|
|||||||
package obialign
|
package obialign
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"log"
|
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obikmer"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obikmer"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
)
|
)
|
||||||
@@ -315,105 +313,6 @@ func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap, scale float64
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Gaps at the beginning and at the end of seqA are free
|
|
||||||
// With seqA spanning over lines and seqB over columns
|
|
||||||
//
|
|
||||||
// SeqA must be the longer sequence. If that constraint is not
|
|
||||||
// respected, the function will panic.
|
|
||||||
//
|
|
||||||
// TO BE FINISHED
|
|
||||||
// - First column gap = 0
|
|
||||||
// - Last column gaps = 0
|
|
||||||
//
|
|
||||||
// Paths are encoded :
|
|
||||||
// - 0 : for diagonal
|
|
||||||
// - -1 : for top
|
|
||||||
// - +1 : for left
|
|
||||||
func _FillMatrixPeCenterAlign(seqA, qualA, seqB, qualB []byte, gap, scale float64,
|
|
||||||
scoreMatrix, pathMatrix *[]int) int {
|
|
||||||
|
|
||||||
la := len(seqA)
|
|
||||||
lb := len(seqB)
|
|
||||||
|
|
||||||
if len(seqA) < len(seqB) {
|
|
||||||
log.Panicf("len(seqA) < len(seqB) : %d < %d", len(seqA), len(seqB))
|
|
||||||
}
|
|
||||||
|
|
||||||
// The actual gap score is the gap score times the mismatch between
|
|
||||||
// two bases with a score of 40
|
|
||||||
gapPenalty := int(scale*gap*float64(_NucScorePartMatchMismatch[40][40]) + 0.5)
|
|
||||||
|
|
||||||
needed := (la + 1) * (lb + 1)
|
|
||||||
|
|
||||||
if needed > cap(*scoreMatrix) {
|
|
||||||
*scoreMatrix = make([]int, needed)
|
|
||||||
}
|
|
||||||
|
|
||||||
if needed > cap(*pathMatrix) {
|
|
||||||
*pathMatrix = make([]int, needed)
|
|
||||||
}
|
|
||||||
|
|
||||||
*scoreMatrix = (*scoreMatrix)[:needed]
|
|
||||||
*pathMatrix = (*pathMatrix)[:needed]
|
|
||||||
|
|
||||||
// Sets the first position of the matrix with 0 score
|
|
||||||
_SetMatrices(scoreMatrix, pathMatrix, la, -1, -1, 0, 0)
|
|
||||||
|
|
||||||
// Fills the first column with score 0
|
|
||||||
for i := 0; i < la; i++ {
|
|
||||||
_SetMatrices(scoreMatrix, pathMatrix, la, i, -1, 0, -1)
|
|
||||||
}
|
|
||||||
|
|
||||||
// la1 := la - 1 // Except the last line (gaps are free on it)
|
|
||||||
lb1 := lb - 1 // Except the last column (gaps are free on it)
|
|
||||||
|
|
||||||
for j := 0; j < lb1; j++ {
|
|
||||||
|
|
||||||
// Fill the first line with scores corresponding to a set of gaps
|
|
||||||
_SetMatrices(scoreMatrix, pathMatrix, la, -1, j, (j+1)*gapPenalty, 1)
|
|
||||||
|
|
||||||
for i := 0; i < la; i++ {
|
|
||||||
left, diag, top := _GetMatrixFrom(scoreMatrix, la, i, j)
|
|
||||||
// log.Infof("LA: i : %d j : %d left : %d diag : %d top : %d\n", i, j, left, diag, top)
|
|
||||||
|
|
||||||
diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[j], qualB[j], scale)
|
|
||||||
left += gapPenalty
|
|
||||||
top += gapPenalty
|
|
||||||
|
|
||||||
switch {
|
|
||||||
case diag >= left && diag >= top:
|
|
||||||
_SetMatrices(scoreMatrix, pathMatrix, la, i, j, diag, 0)
|
|
||||||
case left >= diag && left >= top:
|
|
||||||
_SetMatrices(scoreMatrix, pathMatrix, la, i, j, left, +1)
|
|
||||||
default:
|
|
||||||
_SetMatrices(scoreMatrix, pathMatrix, la, i, j, top, -1)
|
|
||||||
}
|
|
||||||
// log.Infof("LA: i : %d j : %d left : %d diag : %d top : %d [%d]\n", i, j, left, diag, top, _GetMatrix(scoreMatrix, la, i, j))
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
for i := 0; i < la; i++ {
|
|
||||||
left, diag, top := _GetMatrixFrom(scoreMatrix, la, i, lb1)
|
|
||||||
// log.Infof("LA: i : %d j : %d left : %d diag : %d top : %d\n", i, j, left, diag, top)
|
|
||||||
|
|
||||||
diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[lb1], qualB[lb1], scale)
|
|
||||||
left += gapPenalty
|
|
||||||
|
|
||||||
switch {
|
|
||||||
case diag >= left && diag >= top:
|
|
||||||
_SetMatrices(scoreMatrix, pathMatrix, la, i, lb1, diag, 0)
|
|
||||||
case left >= diag && left >= top:
|
|
||||||
_SetMatrices(scoreMatrix, pathMatrix, la, i, lb1, left, +1)
|
|
||||||
default:
|
|
||||||
_SetMatrices(scoreMatrix, pathMatrix, la, i, lb1, top, -1)
|
|
||||||
}
|
|
||||||
// log.Infof("LA: i : %d j : %d left : %d diag : %d top : %d [%d]\n", i, j, left, diag, top, _GetMatrix(scoreMatrix, la, i, j))
|
|
||||||
}
|
|
||||||
|
|
||||||
return _GetMatrix(scoreMatrix, la, la-1, lb1)
|
|
||||||
}
|
|
||||||
|
|
||||||
func PELeftAlign(seqA, seqB *obiseq.BioSequence, gap, scale float64,
|
func PELeftAlign(seqA, seqB *obiseq.BioSequence, gap, scale float64,
|
||||||
arena PEAlignArena) (int, []int) {
|
arena PEAlignArena) (int, []int) {
|
||||||
|
|
||||||
@@ -460,33 +359,9 @@ func PERightAlign(seqA, seqB *obiseq.BioSequence, gap, scale float64,
|
|||||||
return score, path
|
return score, path
|
||||||
}
|
}
|
||||||
|
|
||||||
func PECenterAlign(seqA, seqB *obiseq.BioSequence, gap, scale float64,
|
|
||||||
arena PEAlignArena) (int, []int) {
|
|
||||||
|
|
||||||
if !_InitializedDnaScore {
|
|
||||||
_InitDNAScoreMatrix()
|
|
||||||
}
|
|
||||||
|
|
||||||
if arena.pointer == nil {
|
|
||||||
arena = MakePEAlignArena(seqA.Len(), seqB.Len())
|
|
||||||
}
|
|
||||||
|
|
||||||
score := _FillMatrixPeCenterAlign(seqA.Sequence(), seqA.Qualities(),
|
|
||||||
seqB.Sequence(), seqB.Qualities(), gap, scale,
|
|
||||||
&arena.pointer.scoreMatrix,
|
|
||||||
&arena.pointer.pathMatrix)
|
|
||||||
|
|
||||||
path := _Backtracking(arena.pointer.pathMatrix,
|
|
||||||
seqA.Len(), seqB.Len(),
|
|
||||||
&arena.pointer.path)
|
|
||||||
|
|
||||||
return score, path
|
|
||||||
}
|
|
||||||
|
|
||||||
func PEAlign(seqA, seqB *obiseq.BioSequence,
|
func PEAlign(seqA, seqB *obiseq.BioSequence,
|
||||||
gap, scale float64, fastAlign bool, delta int, fastScoreRel bool,
|
gap, scale float64, fastAlign bool, delta int, fastScoreRel bool,
|
||||||
arena PEAlignArena, shift_buff *map[int]int) (bool, int, []int, int, int, float64) {
|
arena PEAlignArena, shift_buff *map[int]int) (int, []int, int, int, float64) {
|
||||||
var isLeftAlign bool
|
|
||||||
var score, shift int
|
var score, shift int
|
||||||
var startA, startB int
|
var startA, startB int
|
||||||
var partLen, over int
|
var partLen, over int
|
||||||
@@ -537,7 +412,6 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
|
|||||||
rawSeqB = seqB.Sequence()[0:partLen]
|
rawSeqB = seqB.Sequence()[0:partLen]
|
||||||
qualSeqB = seqB.Qualities()[0:partLen]
|
qualSeqB = seqB.Qualities()[0:partLen]
|
||||||
extra3 = seqB.Len() - partLen
|
extra3 = seqB.Len() - partLen
|
||||||
isLeftAlign = true
|
|
||||||
score = _FillMatrixPeLeftAlign(
|
score = _FillMatrixPeLeftAlign(
|
||||||
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale,
|
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale,
|
||||||
&arena.pointer.scoreMatrix,
|
&arena.pointer.scoreMatrix,
|
||||||
@@ -559,7 +433,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
|
|||||||
rawSeqA = seqA.Sequence()[:partLen]
|
rawSeqA = seqA.Sequence()[:partLen]
|
||||||
qualSeqA = seqA.Qualities()[:partLen]
|
qualSeqA = seqA.Qualities()[:partLen]
|
||||||
extra3 = partLen - seqA.Len()
|
extra3 = partLen - seqA.Len()
|
||||||
isLeftAlign = false
|
|
||||||
score = _FillMatrixPeRightAlign(
|
score = _FillMatrixPeRightAlign(
|
||||||
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale,
|
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale,
|
||||||
&arena.pointer.scoreMatrix,
|
&arena.pointer.scoreMatrix,
|
||||||
@@ -583,7 +457,6 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
|
|||||||
qualSeqB = seqB.Qualities()[0:partLen]
|
qualSeqB = seqB.Qualities()[0:partLen]
|
||||||
extra3 = seqB.Len() - partLen
|
extra3 = seqB.Len() - partLen
|
||||||
score = 0
|
score = 0
|
||||||
isLeftAlign = true
|
|
||||||
} else {
|
} else {
|
||||||
startA = 0
|
startA = 0
|
||||||
startB = -shift
|
startB = -shift
|
||||||
@@ -592,7 +465,6 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
|
|||||||
partLen = len(qualSeqB)
|
partLen = len(qualSeqB)
|
||||||
extra3 = partLen - seqA.Len()
|
extra3 = partLen - seqA.Len()
|
||||||
qualSeqA = seqA.Qualities()[:partLen]
|
qualSeqA = seqA.Qualities()[:partLen]
|
||||||
isLeftAlign = false
|
|
||||||
}
|
}
|
||||||
score = 0
|
score = 0
|
||||||
for i, qualA := range qualSeqA {
|
for i, qualA := range qualSeqA {
|
||||||
@@ -625,14 +497,10 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
|
|||||||
&arena.pointer.scoreMatrix,
|
&arena.pointer.scoreMatrix,
|
||||||
&arena.pointer.pathMatrix)
|
&arena.pointer.pathMatrix)
|
||||||
|
|
||||||
score = scoreR
|
|
||||||
|
|
||||||
path = _Backtracking(arena.pointer.pathMatrix,
|
path = _Backtracking(arena.pointer.pathMatrix,
|
||||||
len(rawSeqA), len(rawSeqB),
|
len(rawSeqA), len(rawSeqB),
|
||||||
&(arena.pointer.path))
|
&(arena.pointer.path))
|
||||||
|
|
||||||
isLeftAlign = false
|
|
||||||
|
|
||||||
scoreL := _FillMatrixPeLeftAlign(
|
scoreL := _FillMatrixPeLeftAlign(
|
||||||
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale,
|
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale,
|
||||||
&arena.pointer.scoreMatrix,
|
&arena.pointer.scoreMatrix,
|
||||||
@@ -642,11 +510,9 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
|
|||||||
path = _Backtracking(arena.pointer.pathMatrix,
|
path = _Backtracking(arena.pointer.pathMatrix,
|
||||||
len(rawSeqA), len(rawSeqB),
|
len(rawSeqA), len(rawSeqB),
|
||||||
&(arena.pointer.path))
|
&(arena.pointer.path))
|
||||||
isLeftAlign = true
|
|
||||||
score = scoreL
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return isLeftAlign, score, path, fastCount, over, fastScore
|
return score, path, fastCount, over, fastScore
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,154 +0,0 @@
|
|||||||
package obialign
|
|
||||||
|
|
||||||
import (
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obikmer"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
|
||||||
)
|
|
||||||
|
|
||||||
func ReadAlign(seqA, seqB *obiseq.BioSequence,
|
|
||||||
gap, scale float64, delta int, fastScoreRel bool,
|
|
||||||
arena PEAlignArena, shift_buff *map[int]int) (int, []int, int, int, float64, bool) {
|
|
||||||
var score, shift int
|
|
||||||
var startA, startB int
|
|
||||||
var partLen, over int
|
|
||||||
var rawSeqA, qualSeqA []byte
|
|
||||||
var rawSeqB, qualSeqB []byte
|
|
||||||
var extra5, extra3 int
|
|
||||||
|
|
||||||
var path []int
|
|
||||||
|
|
||||||
if !_InitializedDnaScore {
|
|
||||||
_InitDNAScoreMatrix()
|
|
||||||
}
|
|
||||||
|
|
||||||
fastCount := -1
|
|
||||||
fastScore := -1.0
|
|
||||||
|
|
||||||
directAlignment := true
|
|
||||||
|
|
||||||
index := obikmer.Index4mer(seqA,
|
|
||||||
&arena.pointer.fastIndex,
|
|
||||||
&arena.pointer.fastBuffer)
|
|
||||||
|
|
||||||
shift, fastCount, fastScore = obikmer.FastShiftFourMer(index, shift_buff, seqA.Len(), seqB, fastScoreRel, nil)
|
|
||||||
|
|
||||||
seqBR := seqB.ReverseComplement(false)
|
|
||||||
shiftR, fastCountR, fastScoreR := obikmer.FastShiftFourMer(index, shift_buff, seqA.Len(), seqBR, fastScoreRel, nil)
|
|
||||||
|
|
||||||
if fastCount < fastCountR {
|
|
||||||
shift = shiftR
|
|
||||||
fastCount = fastCountR
|
|
||||||
fastScore = fastScoreR
|
|
||||||
seqB = seqBR
|
|
||||||
directAlignment = false
|
|
||||||
}
|
|
||||||
|
|
||||||
// Compute the overlapping region length
|
|
||||||
switch {
|
|
||||||
case shift > 0:
|
|
||||||
over = seqA.Len() - shift
|
|
||||||
case shift < 0:
|
|
||||||
over = seqB.Len() + shift
|
|
||||||
default:
|
|
||||||
over = min(seqA.Len(), seqB.Len())
|
|
||||||
}
|
|
||||||
|
|
||||||
// log.Warnf("fw/fw: %v shift=%d fastCount=%d/over=%d fastScore=%f",
|
|
||||||
// directAlignment, shift, fastCount, over, fastScore)
|
|
||||||
|
|
||||||
// log.Warnf(("seqA: %s\nseqB: %s\n"), seqA.String(), seqB.String())
|
|
||||||
|
|
||||||
// At least one mismatch exists in the overlaping region
|
|
||||||
if fastCount+3 < over {
|
|
||||||
|
|
||||||
if shift > 0 || (shift == 0 && seqB.Len() >= seqA.Len()) {
|
|
||||||
startA = shift - delta
|
|
||||||
if startA < 0 {
|
|
||||||
startA = 0
|
|
||||||
}
|
|
||||||
extra5 = -startA
|
|
||||||
startB = 0
|
|
||||||
|
|
||||||
rawSeqA = seqA.Sequence()[startA:]
|
|
||||||
qualSeqA = seqA.Qualities()[startA:]
|
|
||||||
partLen = len(rawSeqA)
|
|
||||||
if partLen > seqB.Len() {
|
|
||||||
partLen = seqB.Len()
|
|
||||||
}
|
|
||||||
rawSeqB = seqB.Sequence()[0:partLen]
|
|
||||||
qualSeqB = seqB.Qualities()[0:partLen]
|
|
||||||
extra3 = seqB.Len() - partLen
|
|
||||||
score = _FillMatrixPeLeftAlign(
|
|
||||||
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale,
|
|
||||||
&arena.pointer.scoreMatrix,
|
|
||||||
&arena.pointer.pathMatrix)
|
|
||||||
} else {
|
|
||||||
|
|
||||||
startA = 0
|
|
||||||
startB = -shift - delta
|
|
||||||
if startB < 0 {
|
|
||||||
startB = 0
|
|
||||||
}
|
|
||||||
extra5 = startB
|
|
||||||
rawSeqB = seqB.Sequence()[startB:]
|
|
||||||
qualSeqB = seqB.Qualities()[startB:]
|
|
||||||
partLen = len(rawSeqB)
|
|
||||||
if partLen > seqA.Len() {
|
|
||||||
partLen = seqA.Len()
|
|
||||||
}
|
|
||||||
rawSeqA = seqA.Sequence()[:partLen]
|
|
||||||
qualSeqA = seqA.Qualities()[:partLen]
|
|
||||||
extra3 = partLen - seqA.Len()
|
|
||||||
|
|
||||||
score = _FillMatrixPeRightAlign(
|
|
||||||
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale,
|
|
||||||
&arena.pointer.scoreMatrix,
|
|
||||||
&arena.pointer.pathMatrix)
|
|
||||||
}
|
|
||||||
|
|
||||||
path = _Backtracking(arena.pointer.pathMatrix,
|
|
||||||
len(rawSeqA), len(rawSeqB),
|
|
||||||
&arena.pointer.path)
|
|
||||||
|
|
||||||
} else {
|
|
||||||
|
|
||||||
// Both overlaping regions are identicals
|
|
||||||
|
|
||||||
if shift > 0 || (shift == 0 && seqB.Len() >= seqA.Len()) {
|
|
||||||
startA = shift
|
|
||||||
startB = 0
|
|
||||||
extra5 = -startA
|
|
||||||
qualSeqA = seqA.Qualities()[startA:]
|
|
||||||
partLen = len(qualSeqA)
|
|
||||||
qualSeqB = seqB.Qualities()[0:partLen]
|
|
||||||
extra3 = seqB.Len() - partLen
|
|
||||||
score = 0
|
|
||||||
} else {
|
|
||||||
startA = 0
|
|
||||||
startB = -shift
|
|
||||||
extra5 = startB
|
|
||||||
qualSeqB = seqB.Qualities()[startB:]
|
|
||||||
partLen = len(qualSeqB)
|
|
||||||
extra3 = partLen - seqA.Len()
|
|
||||||
qualSeqA = seqA.Qualities()[:partLen]
|
|
||||||
}
|
|
||||||
|
|
||||||
score = 0
|
|
||||||
for i, qualA := range qualSeqA {
|
|
||||||
qualB := qualSeqB[i]
|
|
||||||
score += _NucScorePartMatchMatch[qualA][qualB]
|
|
||||||
}
|
|
||||||
|
|
||||||
path = arena.pointer.path[:0]
|
|
||||||
path = append(path, 0, partLen)
|
|
||||||
}
|
|
||||||
|
|
||||||
path[0] += extra5
|
|
||||||
if path[len(path)-1] == 0 {
|
|
||||||
path[len(path)-2] += extra3
|
|
||||||
} else {
|
|
||||||
path = append(path, extra3, 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
return score, path, fastCount, over, fastScore, directAlignment
|
|
||||||
}
|
|
||||||
@@ -137,28 +137,6 @@ char *reverseSequence(char *str,char isPattern)
|
|||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* -------------------------------------------- */
|
|
||||||
/* lowercase sequence */
|
|
||||||
/* -------------------------------------------- */
|
|
||||||
|
|
||||||
#define IS_UPPER(c) (((c) >= 'A') && ((c) <= 'A'))
|
|
||||||
#define TO_LOWER(c) ((c) - 'A' + 'a')
|
|
||||||
|
|
||||||
char *LowerSequence(char *seq)
|
|
||||||
{
|
|
||||||
char *cseq;
|
|
||||||
|
|
||||||
for (cseq = seq ; *cseq ; cseq++)
|
|
||||||
if (IS_UPPER(*cseq))
|
|
||||||
*cseq = TO_LOWER(*cseq);
|
|
||||||
|
|
||||||
return seq;
|
|
||||||
}
|
|
||||||
|
|
||||||
#undef IS_UPPER
|
|
||||||
#undef TO_LOWER
|
|
||||||
|
|
||||||
|
|
||||||
char *ecoComplementPattern(char *nucAcSeq)
|
char *ecoComplementPattern(char *nucAcSeq)
|
||||||
{
|
{
|
||||||
return reverseSequence(LXBioSeqComplement(nucAcSeq),1);
|
return reverseSequence(LXBioSeqComplement(nucAcSeq),1);
|
||||||
@@ -187,7 +165,6 @@ void UpperSequence(char *seq)
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* -------------------------------------------- */
|
/* -------------------------------------------- */
|
||||||
/* encode sequence */
|
/* encode sequence */
|
||||||
/* IS_UPPER is slightly faster than isupper */
|
/* IS_UPPER is slightly faster than isupper */
|
||||||
|
|||||||
@@ -9,7 +9,6 @@ import "C"
|
|||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"runtime"
|
"runtime"
|
||||||
"strings"
|
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
@@ -115,7 +114,7 @@ func (pattern ApatPattern) ReverseComplement() (ApatPattern, error) {
|
|||||||
C.free(unsafe.Pointer(errmsg))
|
C.free(unsafe.Pointer(errmsg))
|
||||||
return ApatPattern{nil}, errors.New(message)
|
return ApatPattern{nil}, errors.New(message)
|
||||||
}
|
}
|
||||||
spat := strings.ToLower(C.GoString(apc.cpat))
|
spat := C.GoString(apc.cpat)
|
||||||
ap := _ApatPattern{apc, spat}
|
ap := _ApatPattern{apc, spat}
|
||||||
|
|
||||||
runtime.SetFinalizer(&ap, func(p *_ApatPattern) {
|
runtime.SetFinalizer(&ap, func(p *_ApatPattern) {
|
||||||
@@ -297,24 +296,6 @@ func (pattern ApatPattern) FindAllIndex(sequence ApatSequence, begin, length int
|
|||||||
return loc
|
return loc
|
||||||
}
|
}
|
||||||
|
|
||||||
func (pattern ApatPattern) IsMatching(sequence ApatSequence, begin, length int) bool {
|
|
||||||
if begin < 0 {
|
|
||||||
begin = 0
|
|
||||||
}
|
|
||||||
|
|
||||||
if length < 0 {
|
|
||||||
length = sequence.Len()
|
|
||||||
}
|
|
||||||
|
|
||||||
nhits := int(C.ManberAll(sequence.pointer.pointer,
|
|
||||||
pattern.pointer.pointer,
|
|
||||||
0,
|
|
||||||
C.int32_t(begin),
|
|
||||||
C.int32_t(length+C.MAX_PAT_LEN)))
|
|
||||||
|
|
||||||
return nhits > 0
|
|
||||||
}
|
|
||||||
|
|
||||||
// BestMatch finds the best match of a given pattern in a sequence.
|
// BestMatch finds the best match of a given pattern in a sequence.
|
||||||
//
|
//
|
||||||
// THe function identify the first occurrence of the pattern in the sequence.
|
// THe function identify the first occurrence of the pattern in the sequence.
|
||||||
@@ -354,11 +335,6 @@ func (pattern ApatPattern) BestMatch(sequence ApatSequence, begin, length int) (
|
|||||||
nerr = best[2]
|
nerr = best[2]
|
||||||
end = best[1]
|
end = best[1]
|
||||||
|
|
||||||
if best[0] < 0 || best[1] > sequence.Len() {
|
|
||||||
matched = false
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if nerr == 0 || !pattern.pointer.pointer.hasIndel {
|
if nerr == 0 || !pattern.pointer.pointer.hasIndel {
|
||||||
start = best[0]
|
start = best[0]
|
||||||
log.Debugln("No nws ", start, nerr)
|
log.Debugln("No nws ", start, nerr)
|
||||||
@@ -379,31 +355,17 @@ func (pattern ApatPattern) BestMatch(sequence ApatSequence, begin, length int) (
|
|||||||
best[0], nerr, int(pattern.pointer.pointer.patlen),
|
best[0], nerr, int(pattern.pointer.pointer.patlen),
|
||||||
sequence.Len(), start, end)
|
sequence.Len(), start, end)
|
||||||
|
|
||||||
from, to, score := obialign.LocatePattern(sequence.pointer.reference.Id(),
|
from, to, score := obialign.LocatePattern((*cpattern)[0:int(pattern.pointer.pointer.patlen)], frg)
|
||||||
(*cpattern)[0:int(pattern.pointer.pointer.patlen)],
|
|
||||||
frg)
|
|
||||||
|
|
||||||
// olderr := m[2]
|
// olderr := m[2]
|
||||||
|
|
||||||
nerr = score
|
nerr = score
|
||||||
start = start + from
|
start = start + from
|
||||||
end = start + to
|
end = start + to
|
||||||
log.Debugf("BestMatch on %s : score=%d [%d..%d]", sequence.pointer.reference.Id(), score, start, nerr)
|
log.Debugln("results", score, start, nerr)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// FilterBestMatch filters the best non overlapping matches of a given pattern in a sequence.
|
|
||||||
//
|
|
||||||
// It takes the following parameters:
|
|
||||||
// - pattern: the pattern to search for (ApatPattern).
|
|
||||||
// - sequence: the sequence to search in (ApatSequence).
|
|
||||||
// - begin: the starting index of the search (int).
|
|
||||||
// - length: the length of the search (int).
|
|
||||||
//
|
|
||||||
// It returns a slice of [3]int representing the locations of all non-overlapping matches in the sequence.
|
|
||||||
// The two firsts values of the [3]int indicate respectively the start and the end position of
|
|
||||||
// the match. Following the GO convention the end position is not included in the
|
|
||||||
// match. The third value indicates the number of error detected for this occurrence.
|
|
||||||
func (pattern ApatPattern) FilterBestMatch(sequence ApatSequence, begin, length int) (loc [][3]int) {
|
func (pattern ApatPattern) FilterBestMatch(sequence ApatSequence, begin, length int) (loc [][3]int) {
|
||||||
res := pattern.FindAllIndex(sequence, begin, length)
|
res := pattern.FindAllIndex(sequence, begin, length)
|
||||||
filtered := make([][3]int, 0, len(res))
|
filtered := make([][3]int, 0, len(res))
|
||||||
@@ -462,15 +424,13 @@ func (pattern ApatPattern) FilterBestMatch(sequence ApatSequence, begin, length
|
|||||||
func (pattern ApatPattern) AllMatches(sequence ApatSequence, begin, length int) (loc [][3]int) {
|
func (pattern ApatPattern) AllMatches(sequence ApatSequence, begin, length int) (loc [][3]int) {
|
||||||
res := pattern.FilterBestMatch(sequence, begin, length)
|
res := pattern.FilterBestMatch(sequence, begin, length)
|
||||||
|
|
||||||
j := 0
|
|
||||||
for _, m := range res {
|
for _, m := range res {
|
||||||
// Recompute the start and end position of the match
|
// Recompute the start and end position of the match
|
||||||
// when the pattern allows for indels
|
// when the pattern allows for indels
|
||||||
if m[2] > 0 && pattern.pointer.pointer.hasIndel {
|
if m[2] > 0 && pattern.pointer.pointer.hasIndel {
|
||||||
// log.Warnf("Locating indel on sequence %s[%s]", sequence.pointer.reference.Id(), pattern.String())
|
start := m[0] - m[2]
|
||||||
start := m[0] - m[2]*2
|
|
||||||
start = max(start, 0)
|
start = max(start, 0)
|
||||||
end := start + int(pattern.pointer.pointer.patlen) + 4*m[2]
|
end := start + int(pattern.pointer.pointer.patlen) + 2*m[2]
|
||||||
end = min(end, sequence.Len())
|
end = min(end, sequence.Len())
|
||||||
// 1 << 30 = 1,073,741,824 = 1Gb
|
// 1 << 30 = 1,073,741,824 = 1Gb
|
||||||
// It's a virtual array mapping the sequence to the pattern
|
// It's a virtual array mapping the sequence to the pattern
|
||||||
@@ -479,24 +439,18 @@ func (pattern ApatPattern) AllMatches(sequence ApatSequence, begin, length int)
|
|||||||
cpattern := (*[1 << 30]byte)(unsafe.Pointer(pattern.pointer.pointer.cpat))
|
cpattern := (*[1 << 30]byte)(unsafe.Pointer(pattern.pointer.pointer.cpat))
|
||||||
frg := sequence.pointer.reference.Sequence()[start:end]
|
frg := sequence.pointer.reference.Sequence()[start:end]
|
||||||
|
|
||||||
pb, pe, score := obialign.LocatePattern(
|
begin, end, score := obialign.LocatePattern((*cpattern)[0:int(pattern.pointer.pointer.patlen)], frg)
|
||||||
sequence.pointer.reference.Id(),
|
|
||||||
(*cpattern)[0:int(pattern.pointer.pointer.patlen)],
|
|
||||||
frg)
|
|
||||||
|
|
||||||
// olderr := m[2]
|
// olderr := m[2]
|
||||||
m[2] = score
|
m[2] = score
|
||||||
m[0] = start + pb
|
m[0] = start + begin
|
||||||
m[1] = start + pe
|
m[1] = start + end
|
||||||
|
|
||||||
// log.Warnf("seq[%d@%d:%d] %d: %s %d - %s:%s:%s", i, m[0], m[1], olderr, sequence.pointer.reference.Id(), score,
|
// log.Warnf("seq[%d@%d:%d] %d: %s %d - %s:%s:%s", i, m[0], m[1], olderr, sequence.pointer.reference.Id(), score,
|
||||||
// frg, (*cpattern)[0:int(pattern.pointer.pointer.patlen)], sequence.pointer.reference.Sequence()[m[0]:m[1]])
|
// frg, (*cpattern)[0:int(pattern.pointer.pointer.patlen)], sequence.pointer.reference.Sequence()[m[0]:m[1]])
|
||||||
}
|
}
|
||||||
|
|
||||||
if int(pattern.pointer.pointer.maxerr) >= m[2] {
|
|
||||||
res[j] = m
|
|
||||||
j++
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return res[0:j]
|
|
||||||
|
// log.Debugf("All matches : %v", res)
|
||||||
|
|
||||||
|
return res
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ package obiapat
|
|||||||
import (
|
import (
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
)
|
)
|
||||||
@@ -104,7 +104,7 @@ func MakeOptions(setters []WithOption) Options {
|
|||||||
extension: -1,
|
extension: -1,
|
||||||
fullExtension: false,
|
fullExtension: false,
|
||||||
circular: false,
|
circular: false,
|
||||||
parallelWorkers: obidefault.ParallelWorkers(),
|
parallelWorkers: obioptions.CLIParallelWorkers(),
|
||||||
batchSize: 100,
|
batchSize: 100,
|
||||||
forward: NilApatPattern,
|
forward: NilApatPattern,
|
||||||
cfwd: NilApatPattern,
|
cfwd: NilApatPattern,
|
||||||
@@ -529,6 +529,7 @@ func PCRSliceWorker(options ...WithOption) obiseq.SeqSliceWorker {
|
|||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
worker := func(sequences obiseq.BioSequenceSlice) (obiseq.BioSequenceSlice, error) {
|
worker := func(sequences obiseq.BioSequenceSlice) (obiseq.BioSequenceSlice, error) {
|
||||||
result := _PCRSlice(sequences, opt)
|
result := _PCRSlice(sequences, opt)
|
||||||
|
sequences.Recycle(true)
|
||||||
return result, nil
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,40 +0,0 @@
|
|||||||
package obiapat
|
|
||||||
|
|
||||||
import (
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
|
||||||
log "github.com/sirupsen/logrus"
|
|
||||||
)
|
|
||||||
|
|
||||||
func IsPatternMatchSequence(pattern string, errormax int, bothStrand, allowIndels bool) obiseq.SequencePredicate {
|
|
||||||
|
|
||||||
pat, err := MakeApatPattern(pattern, errormax, allowIndels)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("error in sequence regular pattern syntax : %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
cpat, err := pat.ReverseComplement()
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("cannot reverse complement the pattern : %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
f := func(sequence *obiseq.BioSequence) bool {
|
|
||||||
aseq, err := MakeApatSequence(sequence, false)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
log.Panicf("Cannot convert sequence %s to apat format", sequence.Id())
|
|
||||||
}
|
|
||||||
|
|
||||||
match := pat.IsMatching(aseq, 0, aseq.Len())
|
|
||||||
|
|
||||||
if !match && bothStrand {
|
|
||||||
|
|
||||||
match = cpat.IsMatching(aseq, 0, aseq.Len())
|
|
||||||
}
|
|
||||||
|
|
||||||
return match
|
|
||||||
}
|
|
||||||
|
|
||||||
return f
|
|
||||||
}
|
|
||||||
204
pkg/obiblackboard/blackboard.go
Normal file
204
pkg/obiblackboard/blackboard.go
Normal file
@@ -0,0 +1,204 @@
|
|||||||
|
package obiblackboard
|
||||||
|
|
||||||
|
import (
|
||||||
|
"slices"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
|
)
|
||||||
|
|
||||||
|
type DoTask func(*Blackboard, *Task) *Task
|
||||||
|
|
||||||
|
type Blackboard struct {
|
||||||
|
Board map[int]Queue
|
||||||
|
BoardLock *sync.Mutex
|
||||||
|
Runners map[string]DoTask
|
||||||
|
Running *obiutils.Counter
|
||||||
|
TargetSize int
|
||||||
|
Size int
|
||||||
|
}
|
||||||
|
|
||||||
|
func doFinal(bb *Blackboard, task *Task) *Task {
|
||||||
|
if task.SavedTask != nil {
|
||||||
|
return task.SavedTask
|
||||||
|
}
|
||||||
|
|
||||||
|
return NewInitialTask()
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewBlackBoard(size int) *Blackboard {
|
||||||
|
board := make(map[int]Queue, 0)
|
||||||
|
runners := make(map[string]DoTask, 0)
|
||||||
|
|
||||||
|
if size < 2 {
|
||||||
|
size = 2
|
||||||
|
}
|
||||||
|
|
||||||
|
bb := &Blackboard{
|
||||||
|
Board: board,
|
||||||
|
BoardLock: &sync.Mutex{},
|
||||||
|
Runners: runners,
|
||||||
|
Running: obiutils.NewCounter(),
|
||||||
|
TargetSize: size,
|
||||||
|
Size: 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i < size; i++ {
|
||||||
|
bb.PushTask(NewInitialTask())
|
||||||
|
}
|
||||||
|
|
||||||
|
bb.RegisterRunner("final", doFinal)
|
||||||
|
|
||||||
|
return bb
|
||||||
|
}
|
||||||
|
|
||||||
|
func (bb *Blackboard) RegisterRunner(target string, runner DoTask) {
|
||||||
|
bb.Runners[target] = runner
|
||||||
|
}
|
||||||
|
|
||||||
|
func (bb *Blackboard) MaxQueue() Queue {
|
||||||
|
max_priority := -1
|
||||||
|
max_queue := Queue(nil)
|
||||||
|
for priority, queue := range bb.Board {
|
||||||
|
if priority > max_priority {
|
||||||
|
max_queue = queue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return max_queue
|
||||||
|
}
|
||||||
|
|
||||||
|
func (bb *Blackboard) PopTask() *Task {
|
||||||
|
bb.BoardLock.Lock()
|
||||||
|
defer bb.BoardLock.Unlock()
|
||||||
|
|
||||||
|
q := bb.MaxQueue()
|
||||||
|
|
||||||
|
if q != nil {
|
||||||
|
next_task := (*q)[0]
|
||||||
|
(*q) = (*q)[1:]
|
||||||
|
if len(*q) == 0 {
|
||||||
|
delete(bb.Board, next_task.Priority)
|
||||||
|
}
|
||||||
|
bb.Size--
|
||||||
|
return next_task
|
||||||
|
}
|
||||||
|
|
||||||
|
return (*Task)(nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (bb *Blackboard) PushTask(task *Task) {
|
||||||
|
bb.BoardLock.Lock()
|
||||||
|
defer bb.BoardLock.Unlock()
|
||||||
|
|
||||||
|
if task != nil {
|
||||||
|
priority := task.Priority
|
||||||
|
queue, ok := bb.Board[priority]
|
||||||
|
|
||||||
|
if !ok {
|
||||||
|
queue = NewQueue()
|
||||||
|
bb.Board[priority] = queue
|
||||||
|
}
|
||||||
|
|
||||||
|
*queue = slices.Grow(*queue, 1)
|
||||||
|
*queue = append((*queue), task)
|
||||||
|
|
||||||
|
bb.Size++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (bb *Blackboard) Run() {
|
||||||
|
|
||||||
|
ctask := make(chan *Task)
|
||||||
|
lock := &sync.WaitGroup{}
|
||||||
|
|
||||||
|
launcher := func() {
|
||||||
|
for task := range ctask {
|
||||||
|
runner, ok := bb.Runners[task.Role]
|
||||||
|
|
||||||
|
if ok {
|
||||||
|
task = runner(bb, task)
|
||||||
|
}
|
||||||
|
|
||||||
|
bb.PushTask(task)
|
||||||
|
bb.Running.Dec()
|
||||||
|
}
|
||||||
|
|
||||||
|
lock.Done()
|
||||||
|
}
|
||||||
|
|
||||||
|
parallel := bb.TargetSize - 1
|
||||||
|
lock.Add(parallel)
|
||||||
|
|
||||||
|
for i := 0; i < parallel; i++ {
|
||||||
|
go launcher()
|
||||||
|
}
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
|
||||||
|
for {
|
||||||
|
bb.Running.Inc()
|
||||||
|
task := bb.PopTask()
|
||||||
|
|
||||||
|
if task != nil {
|
||||||
|
ctask <- task
|
||||||
|
} else {
|
||||||
|
bb.Running.Dec()
|
||||||
|
if bb.Running.Value()+bb.Len() <= 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
time.Sleep(time.Millisecond)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
close(ctask)
|
||||||
|
}()
|
||||||
|
|
||||||
|
lock.Wait()
|
||||||
|
}
|
||||||
|
|
||||||
|
// func (bb *Blackboard) Run() {
|
||||||
|
// lock := &sync.WaitGroup{}
|
||||||
|
|
||||||
|
// launcher := func(runner DoTask, task *Task) {
|
||||||
|
// task = runner(bb, task)
|
||||||
|
|
||||||
|
// if task != nil {
|
||||||
|
// for bb.Len() > bb.TargetSize {
|
||||||
|
// time.Sleep(time.Millisecond)
|
||||||
|
// }
|
||||||
|
// bb.PushTask(task)
|
||||||
|
// }
|
||||||
|
|
||||||
|
// bb.Running.Dec()
|
||||||
|
// lock.Done()
|
||||||
|
// }
|
||||||
|
|
||||||
|
// lock.Add(1)
|
||||||
|
|
||||||
|
// func() {
|
||||||
|
// for bb.Len()+bb.Running.Value() > 0 {
|
||||||
|
// bb.Running.Inc()
|
||||||
|
// task := bb.PopTask()
|
||||||
|
|
||||||
|
// if task != nil {
|
||||||
|
// lock.Add(1)
|
||||||
|
// go launcher(bb.Runners[task.Role], task)
|
||||||
|
// } else {
|
||||||
|
// bb.Running.Dec()
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
// lock.Done()
|
||||||
|
// }()
|
||||||
|
|
||||||
|
// lock.Wait()
|
||||||
|
// }
|
||||||
|
|
||||||
|
func (bb *Blackboard) Len() int {
|
||||||
|
return bb.Size
|
||||||
|
}
|
||||||
|
|
||||||
|
// 151431044 151431044 15083822152
|
||||||
50
pkg/obiblackboard/count_sequences.go
Normal file
50
pkg/obiblackboard/count_sequences.go
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
package obiblackboard
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
|
)
|
||||||
|
|
||||||
|
type SequenceCounter struct {
|
||||||
|
Variants int
|
||||||
|
Reads int
|
||||||
|
Nucleotides int
|
||||||
|
Runner DoTask
|
||||||
|
}
|
||||||
|
|
||||||
|
func CountSequenceAggregator(target string) *SequenceCounter {
|
||||||
|
cc := &SequenceCounter{
|
||||||
|
Variants: 0,
|
||||||
|
Reads: 0,
|
||||||
|
Nucleotides: 0,
|
||||||
|
Runner: nil,
|
||||||
|
}
|
||||||
|
|
||||||
|
mutex := sync.Mutex{}
|
||||||
|
|
||||||
|
runner := func(bb *Blackboard, task *Task) *Task {
|
||||||
|
body := task.Body.(obiiter.BioSequenceBatch)
|
||||||
|
|
||||||
|
mutex.Lock()
|
||||||
|
cc.Variants += body.Len()
|
||||||
|
cc.Reads += body.Slice().Count()
|
||||||
|
cc.Nucleotides += body.Slice().Size()
|
||||||
|
mutex.Unlock()
|
||||||
|
|
||||||
|
nt := task.GetNext(target, true, false)
|
||||||
|
return nt
|
||||||
|
}
|
||||||
|
|
||||||
|
cc.Runner = runner
|
||||||
|
return cc
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecycleSequences(rescycleSequence bool, target string) DoTask {
|
||||||
|
return func(bb *Blackboard, task *Task) *Task {
|
||||||
|
body := task.Body.(obiiter.BioSequenceBatch)
|
||||||
|
// log.Warningf("With priority %d, Recycling %s[%d]", task.Priority, body.Source(), body.Order())
|
||||||
|
body.Recycle(rescycleSequence)
|
||||||
|
return task.GetNext(target, false, false)
|
||||||
|
}
|
||||||
|
}
|
||||||
17
pkg/obiblackboard/display_task.go
Normal file
17
pkg/obiblackboard/display_task.go
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
package obiblackboard
|
||||||
|
|
||||||
|
import "fmt"
|
||||||
|
|
||||||
|
func DisplayTask(bb *Blackboard, task *Task) *Task {
|
||||||
|
if task == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("Task: %s:\n%v\n\n", task.Role, task.Body)
|
||||||
|
|
||||||
|
return task
|
||||||
|
}
|
||||||
|
|
||||||
|
func (runner DoTask) Display() DoTask {
|
||||||
|
return runner.CombineWith(DisplayTask)
|
||||||
|
}
|
||||||
70
pkg/obiblackboard/doIterate.go
Normal file
70
pkg/obiblackboard/doIterate.go
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
package obiblackboard
|
||||||
|
|
||||||
|
import "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
|
|
||||||
|
type Iteration[T any] struct {
|
||||||
|
Index int
|
||||||
|
Value T
|
||||||
|
}
|
||||||
|
|
||||||
|
// DoIterateSlice generates a DoTask function that iterates over a given slice and
|
||||||
|
// creates a new InitialTask for each element. The function takes in a slice of type
|
||||||
|
// T and a target string. It returns a DoTask function that can be used to execute
|
||||||
|
// the iteration. The DoTask function takes a Blackboard and a Task as input and
|
||||||
|
// returns a new Task. The Task's Role is set to the target string and its Body is
|
||||||
|
// set to an Iteration struct containing the index i and the element s[i] from the
|
||||||
|
// input slice. The iteration stops when the index i is equal to or greater than
|
||||||
|
// the length of the input slice.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - s: The slice of type T to iterate over.
|
||||||
|
// - target: The target string to set as the Task's Role.
|
||||||
|
//
|
||||||
|
// Return type:
|
||||||
|
// - DoTask: The DoTask function that can be used to execute the iteration.
|
||||||
|
func DoIterateSlice[T any](s []T, target string) DoTask {
|
||||||
|
n := len(s)
|
||||||
|
idx := obiutils.AtomicCounter()
|
||||||
|
|
||||||
|
dt := func(bb *Blackboard, t *Task) *Task {
|
||||||
|
i := idx()
|
||||||
|
if i < n {
|
||||||
|
nt := t.GetNext(target, false, false)
|
||||||
|
nt.Body = Iteration[T]{i, s[i]}
|
||||||
|
return nt
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return dt
|
||||||
|
}
|
||||||
|
|
||||||
|
// DoCount generates a DoTask function that iterates over a given integer n and
|
||||||
|
// creates a new InitialTask for each iteration. The function takes in an integer n
|
||||||
|
// and a target string. It returns a DoTask function that can be used to execute
|
||||||
|
// the iteration. The DoTask function takes a Blackboard and a Task as input and
|
||||||
|
// returns a new Task. The Task's Role is set to the target string and its Body is
|
||||||
|
// set to the current iteration index i. The iteration stops when the index i is
|
||||||
|
// equal to or greater than the input integer n.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - n: The integer to iterate over.
|
||||||
|
// - target: The target string to set as the Task's Role.
|
||||||
|
//
|
||||||
|
// Return type:
|
||||||
|
// - DoTask: The DoTask function that can be used to execute the iteration.
|
||||||
|
func DoCount(n int, target string) DoTask {
|
||||||
|
idx := obiutils.AtomicCounter()
|
||||||
|
|
||||||
|
dt := func(bb *Blackboard, t *Task) *Task {
|
||||||
|
i := idx()
|
||||||
|
if i < n {
|
||||||
|
nt := t.GetNext(target, false, false)
|
||||||
|
nt.Body = i
|
||||||
|
return nt
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return dt
|
||||||
|
}
|
||||||
8
pkg/obiblackboard/queue.go
Normal file
8
pkg/obiblackboard/queue.go
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
package obiblackboard
|
||||||
|
|
||||||
|
type Queue *[]*Task
|
||||||
|
|
||||||
|
func NewQueue() Queue {
|
||||||
|
q := make([]*Task, 0)
|
||||||
|
return &q
|
||||||
|
}
|
||||||
534
pkg/obiblackboard/read_sequences.go
Normal file
534
pkg/obiblackboard/read_sequences.go
Normal file
@@ -0,0 +1,534 @@
|
|||||||
|
package obiblackboard
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"path"
|
||||||
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
|
|
||||||
|
"github.com/gabriel-vasile/mimetype"
|
||||||
|
"github.com/goombaio/orderedset"
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
)
|
||||||
|
|
||||||
|
func ExpandListOfFiles(check_ext bool, filenames ...string) ([]string, error) {
|
||||||
|
res, err := _ExpandListOfFiles(check_ext, filenames...)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Infof("Found %d files to process", len(res))
|
||||||
|
}
|
||||||
|
|
||||||
|
return res, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func _ExpandListOfFiles(check_ext bool, filenames ...string) ([]string, error) {
|
||||||
|
var err error
|
||||||
|
list_of_files := orderedset.NewOrderedSet()
|
||||||
|
for _, fn := range filenames {
|
||||||
|
// Special case for stdin
|
||||||
|
if fn == "-" {
|
||||||
|
list_of_files.Add(fn)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
err = filepath.Walk(fn,
|
||||||
|
func(path string, info os.FileInfo, err error) error {
|
||||||
|
var e error
|
||||||
|
if info == nil {
|
||||||
|
return fmt.Errorf("cannot open path")
|
||||||
|
}
|
||||||
|
for info.Mode()&os.ModeSymlink == os.ModeSymlink {
|
||||||
|
path, e = filepath.EvalSymlinks(path)
|
||||||
|
if e != nil {
|
||||||
|
return e
|
||||||
|
}
|
||||||
|
|
||||||
|
info, e = os.Stat(path)
|
||||||
|
if e != nil {
|
||||||
|
return e
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if info.IsDir() {
|
||||||
|
if path != fn {
|
||||||
|
subdir, e := ExpandListOfFiles(true, path)
|
||||||
|
if e != nil {
|
||||||
|
return e
|
||||||
|
}
|
||||||
|
for _, f := range subdir {
|
||||||
|
list_of_files.Add(f)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
check_ext = true
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if !check_ext ||
|
||||||
|
strings.HasSuffix(path, "csv") ||
|
||||||
|
strings.HasSuffix(path, "csv.gz") ||
|
||||||
|
strings.HasSuffix(path, "fasta") ||
|
||||||
|
strings.HasSuffix(path, "fasta.gz") ||
|
||||||
|
strings.HasSuffix(path, "fastq") ||
|
||||||
|
strings.HasSuffix(path, "fastq.gz") ||
|
||||||
|
strings.HasSuffix(path, "seq") ||
|
||||||
|
strings.HasSuffix(path, "seq.gz") ||
|
||||||
|
strings.HasSuffix(path, "gb") ||
|
||||||
|
strings.HasSuffix(path, "gb.gz") ||
|
||||||
|
strings.HasSuffix(path, "dat") ||
|
||||||
|
strings.HasSuffix(path, "dat.gz") ||
|
||||||
|
strings.HasSuffix(path, "ecopcr") ||
|
||||||
|
strings.HasSuffix(path, "ecopcr.gz") {
|
||||||
|
log.Debugf("Appending %s file\n", path)
|
||||||
|
list_of_files.Add(path)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
res := make([]string, 0, list_of_files.Size())
|
||||||
|
for _, v := range list_of_files.Values() {
|
||||||
|
res = append(res, v.(string))
|
||||||
|
}
|
||||||
|
|
||||||
|
return res, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// OBIMimeTypeGuesser is a function that takes an io.Reader as input and guesses the MIME type of the data.
|
||||||
|
// It uses several detectors to identify specific file formats, such as FASTA, FASTQ, ecoPCR2, GenBank, and EMBL.
|
||||||
|
// The function reads data from the input stream and analyzes it using the mimetype library.
|
||||||
|
// It then returns the detected MIME type, a modified reader with the read data, and any error encountered during the process.
|
||||||
|
//
|
||||||
|
// The following file types are recognized:
|
||||||
|
// - "text/ecopcr": if the first line starts with "#@ecopcr-v2".
|
||||||
|
// - "text/fasta": if the first line starts with ">".
|
||||||
|
// - "text/fastq": if the first line starts with "@".
|
||||||
|
// - "text/embl": if the first line starts with "ID ".
|
||||||
|
// - "text/genbank": if the first line starts with "LOCUS ".
|
||||||
|
// - "text/genbank" (special case): if the first line "Genetic Sequence Data Bank" (for genbank release files).
|
||||||
|
// - "text/csv"
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - stream: An io.Reader representing the input stream to read data from.
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - *mimetype.MIME: The detected MIME type of the data.
|
||||||
|
// - io.Reader: A modified reader with the read data.
|
||||||
|
// - error: Any error encountered during the process.
|
||||||
|
func OBIMimeTypeGuesser(stream io.Reader) (*mimetype.MIME, io.Reader, error) {
|
||||||
|
fastaDetector := func(raw []byte, limit uint32) bool {
|
||||||
|
ok, err := regexp.Match("^>[^ ]", raw)
|
||||||
|
return ok && err == nil
|
||||||
|
}
|
||||||
|
|
||||||
|
fastqDetector := func(raw []byte, limit uint32) bool {
|
||||||
|
ok, err := regexp.Match("^@[^ ].*\n[^ ]+\n\\+", raw)
|
||||||
|
return ok && err == nil
|
||||||
|
}
|
||||||
|
|
||||||
|
ecoPCR2Detector := func(raw []byte, limit uint32) bool {
|
||||||
|
ok := bytes.HasPrefix(raw, []byte("#@ecopcr-v2"))
|
||||||
|
return ok
|
||||||
|
}
|
||||||
|
|
||||||
|
genbankDetector := func(raw []byte, limit uint32) bool {
|
||||||
|
ok2 := bytes.HasPrefix(raw, []byte("LOCUS "))
|
||||||
|
ok1, err := regexp.Match("^[^ ]* +Genetic Sequence Data Bank *\n", raw)
|
||||||
|
return ok2 || (ok1 && err == nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
emblDetector := func(raw []byte, limit uint32) bool {
|
||||||
|
ok := bytes.HasPrefix(raw, []byte("ID "))
|
||||||
|
return ok
|
||||||
|
}
|
||||||
|
|
||||||
|
mimetype.Lookup("text/plain").Extend(fastaDetector, "text/fasta", ".fasta")
|
||||||
|
mimetype.Lookup("text/plain").Extend(fastqDetector, "text/fastq", ".fastq")
|
||||||
|
mimetype.Lookup("text/plain").Extend(ecoPCR2Detector, "text/ecopcr2", ".ecopcr")
|
||||||
|
mimetype.Lookup("text/plain").Extend(genbankDetector, "text/genbank", ".seq")
|
||||||
|
mimetype.Lookup("text/plain").Extend(emblDetector, "text/embl", ".dat")
|
||||||
|
|
||||||
|
mimetype.Lookup("application/octet-stream").Extend(fastaDetector, "text/fasta", ".fasta")
|
||||||
|
mimetype.Lookup("application/octet-stream").Extend(fastqDetector, "text/fastq", ".fastq")
|
||||||
|
mimetype.Lookup("application/octet-stream").Extend(ecoPCR2Detector, "text/ecopcr2", ".ecopcr")
|
||||||
|
mimetype.Lookup("application/octet-stream").Extend(genbankDetector, "text/genbank", ".seq")
|
||||||
|
mimetype.Lookup("application/octet-stream").Extend(emblDetector, "text/embl", ".dat")
|
||||||
|
|
||||||
|
// Create a buffer to store the read data
|
||||||
|
buf := make([]byte, 1024*128)
|
||||||
|
n, err := io.ReadFull(stream, buf)
|
||||||
|
|
||||||
|
if err != nil && err != io.ErrUnexpectedEOF {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Detect the MIME type using the mimetype library
|
||||||
|
mimeType := mimetype.Detect(buf)
|
||||||
|
if mimeType == nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a new reader based on the read data
|
||||||
|
newReader := io.Reader(bytes.NewReader(buf[:n]))
|
||||||
|
|
||||||
|
if err == nil {
|
||||||
|
newReader = io.MultiReader(newReader, stream)
|
||||||
|
}
|
||||||
|
|
||||||
|
return mimeType, newReader, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func TextChunkParser(parser obiformats.SeqFileChunkParser, target string) DoTask {
|
||||||
|
|
||||||
|
return func(bb *Blackboard, task *Task) *Task {
|
||||||
|
chunk := task.Body.(obiformats.SeqFileChunk)
|
||||||
|
sequences, err := parser(chunk.Source, chunk.Raw)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
nt := task.GetNext(target, false, false)
|
||||||
|
nt.Body = obiiter.MakeBioSequenceBatch(
|
||||||
|
chunk.Source,
|
||||||
|
chunk.Order,
|
||||||
|
sequences)
|
||||||
|
|
||||||
|
return nt
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func SeqAnnotParser(parser obiseq.SeqAnnotator, target string) DoTask {
|
||||||
|
worker := obiseq.SeqToSliceWorker(obiseq.AnnotatorToSeqWorker(parser), false)
|
||||||
|
|
||||||
|
return func(bb *Blackboard, task *Task) *Task {
|
||||||
|
batch := task.Body.(obiiter.BioSequenceBatch)
|
||||||
|
sequences, err := worker(batch.Slice())
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("SeqAnnotParser on %s[%d]: %v", batch.Source(), batch.Order(), err)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
nt := task.GetNext(target, false, false)
|
||||||
|
nt.Body = obiiter.MakeBioSequenceBatch(
|
||||||
|
batch.Source(),
|
||||||
|
batch.Order(),
|
||||||
|
sequences,
|
||||||
|
)
|
||||||
|
return nt
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// OpenStream opens a file specified by the given filename and returns a reader for the file,
|
||||||
|
// the detected MIME type of the file, and any error encountered during the process.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - filename: A string representing the path to the file to be opened. If the filename is "-",
|
||||||
|
// the function opens the standard input stream.
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - io.Reader: A reader for the file.
|
||||||
|
// - *mimetype.MIME: The detected MIME type of the file.
|
||||||
|
// - error: Any error encountered during the process.
|
||||||
|
func OpenStream(filename string) (io.Reader, *mimetype.MIME, error) {
|
||||||
|
var stream io.Reader
|
||||||
|
var err error
|
||||||
|
if filename == "-" {
|
||||||
|
stream, err = obiformats.Buf(os.Stdin)
|
||||||
|
} else {
|
||||||
|
stream, err = obiformats.Ropen(filename)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Detect the MIME type using the mimetype library
|
||||||
|
mimeType, newReader, err := OBIMimeTypeGuesser(stream)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Infof("%s mime type: %s", filename, mimeType.String())
|
||||||
|
|
||||||
|
return bufio.NewReader(newReader), mimeType, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type OpenedStreamBody struct {
|
||||||
|
Stream io.Reader
|
||||||
|
Filename string
|
||||||
|
Source string
|
||||||
|
Mime *mimetype.MIME
|
||||||
|
ToBeClosed bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func FilenameToStream(target string) DoTask {
|
||||||
|
|
||||||
|
return func(bb *Blackboard, task *Task) *Task {
|
||||||
|
filename := task.Body.(Iteration[string]).Value
|
||||||
|
stream, mimetype, err := OpenStream(filename)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Error opening %s: %v", filename, err)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
tobeclosed := filename != "-"
|
||||||
|
|
||||||
|
switch mimetype.String() {
|
||||||
|
case "text/fasta", "text/fastq", "text/ecopcr2", "text/genbank", "text/embl", "text/csv":
|
||||||
|
nt := task.GetNext(target+":"+mimetype.String(), false, false)
|
||||||
|
nt.Body = OpenedStreamBody{
|
||||||
|
Stream: stream,
|
||||||
|
Mime: mimetype,
|
||||||
|
Filename: filename,
|
||||||
|
Source: obiutils.RemoveAllExt((path.Base(filename))),
|
||||||
|
ToBeClosed: tobeclosed,
|
||||||
|
}
|
||||||
|
|
||||||
|
return nt
|
||||||
|
|
||||||
|
default:
|
||||||
|
log.Errorf("File %s (mime type %s) is an unsupported format", filename, mimetype.String())
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type TextChunkIteratorBody struct {
|
||||||
|
Chunks obiformats.ChannelSeqFileChunk
|
||||||
|
Stream io.Reader
|
||||||
|
Source string
|
||||||
|
ToBeClosed bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func StreamToTextChunkReader(lastEntry obiformats.LastSeqRecord, target string) DoTask {
|
||||||
|
return func(bb *Blackboard, task *Task) *Task {
|
||||||
|
|
||||||
|
body := task.Body.(OpenedStreamBody)
|
||||||
|
iterator := obiformats.ReadSeqFileChunk(
|
||||||
|
body.Source,
|
||||||
|
body.Stream,
|
||||||
|
make([]byte, 64*1024*1024),
|
||||||
|
lastEntry,
|
||||||
|
)
|
||||||
|
|
||||||
|
nt := task.GetNext(target, false, false)
|
||||||
|
nt.Body = TextChunkIteratorBody{
|
||||||
|
Chunks: iterator,
|
||||||
|
Stream: body.Stream,
|
||||||
|
Source: body.Source,
|
||||||
|
ToBeClosed: body.ToBeClosed,
|
||||||
|
}
|
||||||
|
|
||||||
|
return nt
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TextChuckIterator(endTask *Task, target string) DoTask {
|
||||||
|
return func(bb *Blackboard, task *Task) *Task {
|
||||||
|
body := task.Body.(TextChunkIteratorBody)
|
||||||
|
|
||||||
|
chunk, ok := <-body.Chunks
|
||||||
|
|
||||||
|
if !ok {
|
||||||
|
return endTask
|
||||||
|
}
|
||||||
|
|
||||||
|
var nt *Task
|
||||||
|
|
||||||
|
if bb.Len() > bb.TargetSize {
|
||||||
|
nt = task.GetNext(target, false, true)
|
||||||
|
} else {
|
||||||
|
nt = task.GetNext(target, false, false)
|
||||||
|
bb.PushTask(task)
|
||||||
|
}
|
||||||
|
|
||||||
|
nt.Body = chunk
|
||||||
|
return nt
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type SequenceIteratorBody struct {
|
||||||
|
Iterator obiiter.IBioSequence
|
||||||
|
Stream io.Reader
|
||||||
|
Source string
|
||||||
|
ToBeClosed bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func StreamToSequenceReader(
|
||||||
|
reader obiformats.SequenceReader,
|
||||||
|
options []obiformats.WithOption,
|
||||||
|
target string) DoTask {
|
||||||
|
return func(bb *Blackboard, task *Task) *Task {
|
||||||
|
body := task.Body.(OpenedStreamBody)
|
||||||
|
iterator, err := reader(body.Stream, options...)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Error opening %s: %v", body.Filename, err)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
nt := task.GetNext(target, false, false)
|
||||||
|
nt.Body = SequenceIteratorBody{
|
||||||
|
Iterator: iterator,
|
||||||
|
Stream: body.Stream,
|
||||||
|
Source: body.Source,
|
||||||
|
ToBeClosed: body.ToBeClosed,
|
||||||
|
}
|
||||||
|
|
||||||
|
return nt
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func SequenceIterator(endTask *Task, target string) DoTask {
|
||||||
|
return func(bb *Blackboard, task *Task) *Task {
|
||||||
|
body := task.Body.(SequenceIteratorBody)
|
||||||
|
|
||||||
|
if body.Iterator.Next() {
|
||||||
|
batch := body.Iterator.Get()
|
||||||
|
|
||||||
|
var nt *Task
|
||||||
|
if bb.Len() > bb.TargetSize {
|
||||||
|
nt = task.GetNext(target, false, true)
|
||||||
|
} else {
|
||||||
|
nt = task.GetNext(target, false, false)
|
||||||
|
bb.PushTask(task)
|
||||||
|
}
|
||||||
|
|
||||||
|
nt.Body = batch
|
||||||
|
|
||||||
|
return nt
|
||||||
|
} else {
|
||||||
|
return endTask
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (bb *Blackboard) ReadSequences(filepath []string, options ...obiformats.WithOption) {
|
||||||
|
|
||||||
|
var err error
|
||||||
|
|
||||||
|
opts := obiformats.MakeOptions(options)
|
||||||
|
|
||||||
|
if len(filepath) == 0 {
|
||||||
|
filepath = []string{"-"}
|
||||||
|
}
|
||||||
|
|
||||||
|
filepath, err = ExpandListOfFiles(false, filepath...)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Cannot expand list of files : %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
bb.RegisterRunner(
|
||||||
|
"initial",
|
||||||
|
DoIterateSlice(filepath, "filename"),
|
||||||
|
)
|
||||||
|
|
||||||
|
bb.RegisterRunner(
|
||||||
|
"filename",
|
||||||
|
FilenameToStream("stream"),
|
||||||
|
)
|
||||||
|
|
||||||
|
bb.RegisterRunner("stream:text/fasta",
|
||||||
|
StreamToTextChunkReader(
|
||||||
|
obiformats.EndOfLastFastaEntry,
|
||||||
|
"fasta_text_reader",
|
||||||
|
))
|
||||||
|
|
||||||
|
bb.RegisterRunner("fasta_text_reader",
|
||||||
|
TextChuckIterator(NewInitialTask(), "fasta_text_chunk"),
|
||||||
|
)
|
||||||
|
|
||||||
|
bb.RegisterRunner(
|
||||||
|
"fasta_text_chunk",
|
||||||
|
TextChunkParser(
|
||||||
|
obiformats.FastaChunkParser(),
|
||||||
|
"unannotated_sequences",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
bb.RegisterRunner("stream:text/fastq",
|
||||||
|
StreamToTextChunkReader(obiformats.EndOfLastFastqEntry,
|
||||||
|
"fastq_text_reader"))
|
||||||
|
|
||||||
|
bb.RegisterRunner("fastq_text_reader",
|
||||||
|
TextChuckIterator(NewInitialTask(), "fastq_text_chunk"),
|
||||||
|
)
|
||||||
|
|
||||||
|
bb.RegisterRunner(
|
||||||
|
"fastq_text_chunk",
|
||||||
|
TextChunkParser(
|
||||||
|
obiformats.FastqChunkParser(obioptions.InputQualityShift()),
|
||||||
|
"unannotated_sequences",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
bb.RegisterRunner("stream:text/embl",
|
||||||
|
StreamToTextChunkReader(obiformats.EndOfLastFlatFileEntry,
|
||||||
|
"embl_text_reader"))
|
||||||
|
|
||||||
|
bb.RegisterRunner("embl_text_reader",
|
||||||
|
TextChuckIterator(NewInitialTask(), "embl_text_chunk"),
|
||||||
|
)
|
||||||
|
|
||||||
|
bb.RegisterRunner(
|
||||||
|
"embl_text_chunk",
|
||||||
|
TextChunkParser(
|
||||||
|
obiformats.EmblChunkParser(opts.WithFeatureTable()),
|
||||||
|
"sequences",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
bb.RegisterRunner("stream:text/genbank",
|
||||||
|
StreamToTextChunkReader(obiformats.EndOfLastFlatFileEntry,
|
||||||
|
"genbank_text_reader"))
|
||||||
|
|
||||||
|
bb.RegisterRunner("genbank_text_reader",
|
||||||
|
TextChuckIterator(NewInitialTask(), "genbank_text_chunk"),
|
||||||
|
)
|
||||||
|
|
||||||
|
bb.RegisterRunner(
|
||||||
|
"genbank_text_chunk",
|
||||||
|
TextChunkParser(
|
||||||
|
obiformats.GenbankChunkParser(opts.WithFeatureTable()),
|
||||||
|
"sequences",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
bb.RegisterRunner(
|
||||||
|
"unannotated_sequences",
|
||||||
|
SeqAnnotParser(
|
||||||
|
opts.ParseFastSeqHeader(),
|
||||||
|
"sequences",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
bb.RegisterRunner("stream:text/csv",
|
||||||
|
StreamToSequenceReader(obiformats.ReadCSV, options, "sequence_reader"))
|
||||||
|
|
||||||
|
bb.RegisterRunner("stream:text/ecopcr2",
|
||||||
|
StreamToSequenceReader(obiformats.ReadEcoPCR, options, "sequence_reader"))
|
||||||
|
|
||||||
|
bb.RegisterRunner("sequence_reader",
|
||||||
|
SequenceIterator(NewInitialTask(), "sequences"),
|
||||||
|
)
|
||||||
|
|
||||||
|
}
|
||||||
108
pkg/obiblackboard/subtask.go
Normal file
108
pkg/obiblackboard/subtask.go
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
package obiblackboard
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
)
|
||||||
|
|
||||||
|
// RepeatTask creates a new DoTask function that repeats the given task n times.
|
||||||
|
//
|
||||||
|
// It takes an integer n as input, which specifies the number of times the task should be repeated.
|
||||||
|
// It returns a new DoTask function that can be used to execute the repeated task.
|
||||||
|
//
|
||||||
|
// The returned DoTask function maintains a map of tasks to their counts and tasks.
|
||||||
|
// When a task is executed, it checks if the task has been executed before.
|
||||||
|
// If it has, it increments the count and returns the previously executed task.
|
||||||
|
// If it has not been executed before, it executes the task using the provided runner function.
|
||||||
|
// If the runner function returns nil, the task is not added to the task memory and nil is returned.
|
||||||
|
// If the runner function returns a non-nil task, it is added to the task memory with a count of 0.
|
||||||
|
// After executing the task, the function checks if the count is less than (n-1).
|
||||||
|
// If it is, the task is added back to the blackboard to be executed again.
|
||||||
|
// If the count is equal to (n-1), the task is removed from the task memory.
|
||||||
|
// Finally, the function returns the executed task.
|
||||||
|
func (runner DoTask) RepeatTask(n int) DoTask {
|
||||||
|
type memtask struct {
|
||||||
|
count int
|
||||||
|
task *Task
|
||||||
|
}
|
||||||
|
taskMemory := make(map[*Task]*memtask)
|
||||||
|
taskMemoryLock := sync.Mutex{}
|
||||||
|
|
||||||
|
if n < 1 {
|
||||||
|
log.Fatalf("Cannot repeat a task less than once (n=%d)", n)
|
||||||
|
}
|
||||||
|
|
||||||
|
st := func(bb *Blackboard, task *Task) *Task {
|
||||||
|
taskMemoryLock.Lock()
|
||||||
|
|
||||||
|
mem, ok := taskMemory[task]
|
||||||
|
|
||||||
|
if !ok {
|
||||||
|
nt := runner(bb, task)
|
||||||
|
|
||||||
|
if nt == nil {
|
||||||
|
taskMemoryLock.Unlock()
|
||||||
|
return nt
|
||||||
|
}
|
||||||
|
|
||||||
|
mem = &memtask{
|
||||||
|
count: 0,
|
||||||
|
task: nt,
|
||||||
|
}
|
||||||
|
|
||||||
|
taskMemory[task] = mem
|
||||||
|
} else {
|
||||||
|
mem.count++
|
||||||
|
}
|
||||||
|
|
||||||
|
taskMemoryLock.Unlock()
|
||||||
|
|
||||||
|
if mem.count < (n - 1) {
|
||||||
|
bb.PushTask(task)
|
||||||
|
}
|
||||||
|
|
||||||
|
if mem.count == (n - 1) {
|
||||||
|
taskMemoryLock.Lock()
|
||||||
|
delete(taskMemory, task)
|
||||||
|
taskMemoryLock.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
return mem.task
|
||||||
|
}
|
||||||
|
|
||||||
|
return st
|
||||||
|
}
|
||||||
|
|
||||||
|
// CombineWith returns a new DoTask function that combines the given DoTask
|
||||||
|
// functions. The returned function applies the `other` function to the result
|
||||||
|
// of the `runner` function. The `bb` parameter is the Blackboard instance,
|
||||||
|
// and the `task` parameter is the Task instance.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - bb: The Blackboard instance.
|
||||||
|
// - task: The Task instance.
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - *Task: The result of applying the `other` function to the result of the
|
||||||
|
// `runner` function.
|
||||||
|
func (runner DoTask) CombineWith(other DoTask) DoTask {
|
||||||
|
return func(bb *Blackboard, task *Task) *Task {
|
||||||
|
return other(bb, runner(bb, task))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetTarget sets the target role for the task.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - target: The target role to set.
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - DoTask: The modified DoTask function.
|
||||||
|
func (runner DoTask) SetTarget(target string) DoTask {
|
||||||
|
return func(bb *Blackboard, task *Task) *Task {
|
||||||
|
nt := runner(bb, task)
|
||||||
|
nt.Role = target
|
||||||
|
return nt
|
||||||
|
}
|
||||||
|
}
|
||||||
34
pkg/obiblackboard/task.go
Normal file
34
pkg/obiblackboard/task.go
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
package obiblackboard
|
||||||
|
|
||||||
|
type Task struct {
|
||||||
|
Role string
|
||||||
|
SavedTask *Task
|
||||||
|
Priority int
|
||||||
|
Body interface{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewInitialTask() *Task {
|
||||||
|
return &Task{
|
||||||
|
Role: "initial",
|
||||||
|
SavedTask: nil,
|
||||||
|
Priority: 0,
|
||||||
|
Body: nil,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (task *Task) GetNext(target string, copy bool, save bool) *Task {
|
||||||
|
t := NewInitialTask()
|
||||||
|
t.Priority = task.Priority + 1
|
||||||
|
t.Role = target
|
||||||
|
if copy {
|
||||||
|
t.Body = task.Body
|
||||||
|
}
|
||||||
|
|
||||||
|
if save {
|
||||||
|
t.SavedTask = task
|
||||||
|
} else {
|
||||||
|
t.SavedTask = task.SavedTask
|
||||||
|
}
|
||||||
|
|
||||||
|
return t
|
||||||
|
}
|
||||||
@@ -12,13 +12,6 @@ import (
|
|||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
)
|
)
|
||||||
|
|
||||||
// tempDir creates a temporary directory with a prefix "obiseq_chunks_"
|
|
||||||
// in the system's temporary directory. It returns the path of the
|
|
||||||
// created directory and any error encountered during the creation process.
|
|
||||||
//
|
|
||||||
// If the directory creation is successful, the path to the new
|
|
||||||
// temporary directory is returned. If there is an error, it returns
|
|
||||||
// an empty string and the error encountered.
|
|
||||||
func tempDir() (string, error) {
|
func tempDir() (string, error) {
|
||||||
dir, err := os.MkdirTemp(os.TempDir(), "obiseq_chunks_")
|
dir, err := os.MkdirTemp(os.TempDir(), "obiseq_chunks_")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -27,19 +20,6 @@ func tempDir() (string, error) {
|
|||||||
return dir, nil
|
return dir, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// find searches for files with a specific extension in the given root directory
|
|
||||||
// and its subdirectories. It returns a slice of strings containing the paths
|
|
||||||
// of the found files.
|
|
||||||
//
|
|
||||||
// Parameters:
|
|
||||||
// - root: The root directory to start the search from.
|
|
||||||
// - ext: The file extension to look for (including the leading dot, e.g., ".txt").
|
|
||||||
//
|
|
||||||
// Returns:
|
|
||||||
// A slice of strings containing the paths of files that match the specified
|
|
||||||
// extension. If no files are found, an empty slice is returned. Any errors
|
|
||||||
// encountered during the directory traversal will be returned as part of the
|
|
||||||
// WalkDir function's error handling.
|
|
||||||
func find(root, ext string) []string {
|
func find(root, ext string) []string {
|
||||||
var a []string
|
var a []string
|
||||||
filepath.WalkDir(root, func(s string, d fs.DirEntry, e error) error {
|
filepath.WalkDir(root, func(s string, d fs.DirEntry, e error) error {
|
||||||
@@ -54,24 +34,6 @@ func find(root, ext string) []string {
|
|||||||
return a
|
return a
|
||||||
}
|
}
|
||||||
|
|
||||||
// ISequenceChunkOnDisk processes a sequence iterator by distributing the sequences
|
|
||||||
// into chunks stored on disk. It uses a classifier to determine how to distribute
|
|
||||||
// the sequences and returns a new iterator for the processed sequences.
|
|
||||||
//
|
|
||||||
// Parameters:
|
|
||||||
// - iterator: An iterator of biosequences to be processed.
|
|
||||||
// - classifier: A pointer to a BioSequenceClassifier used to classify the sequences
|
|
||||||
// during distribution.
|
|
||||||
//
|
|
||||||
// Returns:
|
|
||||||
// An iterator of biosequences representing the processed chunks. If an error occurs
|
|
||||||
// during the creation of the temporary directory or any other operation, it returns
|
|
||||||
// an error along with a nil iterator.
|
|
||||||
//
|
|
||||||
// The function operates asynchronously, creating a temporary directory to store
|
|
||||||
// the sequence chunks. Once the processing is complete, the temporary directory
|
|
||||||
// is removed. The function logs the number of batches created and the processing
|
|
||||||
// status of each batch.
|
|
||||||
func ISequenceChunkOnDisk(iterator obiiter.IBioSequence,
|
func ISequenceChunkOnDisk(iterator obiiter.IBioSequence,
|
||||||
classifier *obiseq.BioSequenceClassifier) (obiiter.IBioSequence, error) {
|
classifier *obiseq.BioSequenceClassifier) (obiiter.IBioSequence, error) {
|
||||||
dir, err := tempDir()
|
dir, err := tempDir()
|
||||||
|
|||||||
@@ -49,6 +49,7 @@ func ISequenceChunk(iterator obiiter.IBioSequence,
|
|||||||
b := data.Get()
|
b := data.Get()
|
||||||
source = b.Source()
|
source = b.Source()
|
||||||
*chunk = append(*chunk, b.Slice()...)
|
*chunk = append(*chunk, b.Slice()...)
|
||||||
|
b.Recycle(false)
|
||||||
}
|
}
|
||||||
|
|
||||||
lock.Lock()
|
lock.Lock()
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
package obichunk
|
package obichunk
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -29,8 +29,8 @@ func MakeOptions(setters []WithOption) Options {
|
|||||||
navalue: "NA",
|
navalue: "NA",
|
||||||
cacheOnDisk: false,
|
cacheOnDisk: false,
|
||||||
batchCount: 100,
|
batchCount: 100,
|
||||||
batchSize: obidefault.BatchSize(),
|
batchSize: obioptions.CLIBatchSize(),
|
||||||
parallelWorkers: obidefault.ParallelWorkers(),
|
parallelWorkers: obioptions.CLIParallelWorkers(),
|
||||||
noSingleton: false,
|
noSingleton: false,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -6,8 +6,8 @@ import (
|
|||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -62,7 +62,7 @@ func ISequenceSubChunk(iterator obiiter.IBioSequence,
|
|||||||
nworkers int) (obiiter.IBioSequence, error) {
|
nworkers int) (obiiter.IBioSequence, error) {
|
||||||
|
|
||||||
if nworkers <= 0 {
|
if nworkers <= 0 {
|
||||||
nworkers = obidefault.ParallelWorkers()
|
nworkers = obioptions.CLIParallelWorkers()
|
||||||
}
|
}
|
||||||
|
|
||||||
newIter := obiiter.MakeIBioSequence()
|
newIter := obiiter.MakeIBioSequence()
|
||||||
@@ -107,6 +107,8 @@ func ISequenceSubChunk(iterator obiiter.IBioSequence,
|
|||||||
batch.Slice()[i] = nil
|
batch.Slice()[i] = nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
batch.Recycle(false)
|
||||||
|
|
||||||
_By(func(p1, p2 *sSS) bool {
|
_By(func(p1, p2 *sSS) bool {
|
||||||
return p1.code < p2.code
|
return p1.code < p2.code
|
||||||
}).Sort(ordered)
|
}).Sort(ordered)
|
||||||
|
|||||||
@@ -95,7 +95,10 @@ func IUniqueSequence(iterator obiiter.IBioSequence,
|
|||||||
|
|
||||||
if icat < 0 || len(batch.Slice()) == 1 {
|
if icat < 0 || len(batch.Slice()) == 1 {
|
||||||
// No more sub classification of sequence or only a single sequence
|
// No more sub classification of sequence or only a single sequence
|
||||||
if !(opts.NoSingleton() && len(batch.Slice()) == 1 && batch.Slice()[0].Count() == 1) {
|
if opts.NoSingleton() && len(batch.Slice()) == 1 && batch.Slice()[0].Count() == 1 {
|
||||||
|
// We remove singleton from output
|
||||||
|
batch.Recycle(true)
|
||||||
|
} else {
|
||||||
iUnique.Push(batch.Reorder(nextOrder()))
|
iUnique.Push(batch.Reorder(nextOrder()))
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -1,26 +0,0 @@
|
|||||||
package obidefault
|
|
||||||
|
|
||||||
var _BatchSize = 2000
|
|
||||||
|
|
||||||
// SetBatchSize sets the size of the sequence batches.
|
|
||||||
//
|
|
||||||
// n - an integer representing the size of the sequence batches.
|
|
||||||
func SetBatchSize(n int) {
|
|
||||||
_BatchSize = n
|
|
||||||
}
|
|
||||||
|
|
||||||
// CLIBatchSize returns the expected size of the sequence batches.
|
|
||||||
//
|
|
||||||
// In Obitools, the sequences are processed in parallel by batches.
|
|
||||||
// The number of sequence in each batch is determined by the command line option
|
|
||||||
// --batch-size and the environment variable OBIBATCHSIZE.
|
|
||||||
//
|
|
||||||
// No parameters.
|
|
||||||
// Returns an integer value.
|
|
||||||
func BatchSize() int {
|
|
||||||
return _BatchSize
|
|
||||||
}
|
|
||||||
|
|
||||||
func BatchSizePtr() *int {
|
|
||||||
return &_BatchSize
|
|
||||||
}
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
package obidefault
|
|
||||||
|
|
||||||
var __compressed__ = false
|
|
||||||
|
|
||||||
func CompressOutput() bool {
|
|
||||||
return __compressed__
|
|
||||||
}
|
|
||||||
|
|
||||||
func SetCompressOutput(b bool) {
|
|
||||||
__compressed__ = b
|
|
||||||
}
|
|
||||||
|
|
||||||
func CompressedPtr() *bool {
|
|
||||||
return &__compressed__
|
|
||||||
}
|
|
||||||
@@ -1,29 +0,0 @@
|
|||||||
package obidefault
|
|
||||||
|
|
||||||
var _Quality_Shift_Input = byte(33)
|
|
||||||
var _Quality_Shift_Output = byte(33)
|
|
||||||
var _Read_Qualities = true
|
|
||||||
|
|
||||||
func SetReadQualitiesShift(shift byte) {
|
|
||||||
_Quality_Shift_Input = shift
|
|
||||||
}
|
|
||||||
|
|
||||||
func ReadQualitiesShift() byte {
|
|
||||||
return _Quality_Shift_Input
|
|
||||||
}
|
|
||||||
|
|
||||||
func SetWriteQualitiesShift(shift byte) {
|
|
||||||
_Quality_Shift_Output = shift
|
|
||||||
}
|
|
||||||
|
|
||||||
func WriteQualitiesShift() byte {
|
|
||||||
return _Quality_Shift_Output
|
|
||||||
}
|
|
||||||
|
|
||||||
func SetReadQualities(read bool) {
|
|
||||||
_Read_Qualities = read
|
|
||||||
}
|
|
||||||
|
|
||||||
func ReadQualities() bool {
|
|
||||||
return _Read_Qualities
|
|
||||||
}
|
|
||||||
@@ -1,58 +0,0 @@
|
|||||||
package obidefault
|
|
||||||
|
|
||||||
var __taxonomy__ = ""
|
|
||||||
var __alternative_name__ = false
|
|
||||||
var __fail_on_taxonomy__ = false
|
|
||||||
var __update_taxid__ = false
|
|
||||||
|
|
||||||
func SelectedTaxonomy() string {
|
|
||||||
return __taxonomy__
|
|
||||||
}
|
|
||||||
|
|
||||||
func HasSelectedTaxonomy() bool {
|
|
||||||
return __taxonomy__ != ""
|
|
||||||
}
|
|
||||||
|
|
||||||
func AreAlternativeNamesSelected() bool {
|
|
||||||
return __alternative_name__
|
|
||||||
}
|
|
||||||
|
|
||||||
func SelectedTaxonomyPtr() *string {
|
|
||||||
return &__taxonomy__
|
|
||||||
}
|
|
||||||
|
|
||||||
func AlternativeNamesSelectedPtr() *bool {
|
|
||||||
return &__alternative_name__
|
|
||||||
}
|
|
||||||
|
|
||||||
func SetSelectedTaxonomy(taxonomy string) {
|
|
||||||
__taxonomy__ = taxonomy
|
|
||||||
}
|
|
||||||
|
|
||||||
func SetAlternativeNamesSelected(alt bool) {
|
|
||||||
__alternative_name__ = alt
|
|
||||||
}
|
|
||||||
|
|
||||||
func SetFailOnTaxonomy(fail bool) {
|
|
||||||
__fail_on_taxonomy__ = fail
|
|
||||||
}
|
|
||||||
|
|
||||||
func SetUpdateTaxid(update bool) {
|
|
||||||
__update_taxid__ = update
|
|
||||||
}
|
|
||||||
|
|
||||||
func FailOnTaxonomyPtr() *bool {
|
|
||||||
return &__fail_on_taxonomy__
|
|
||||||
}
|
|
||||||
|
|
||||||
func UpdateTaxidPtr() *bool {
|
|
||||||
return &__update_taxid__
|
|
||||||
}
|
|
||||||
|
|
||||||
func FailOnTaxonomy() bool {
|
|
||||||
return __fail_on_taxonomy__
|
|
||||||
}
|
|
||||||
|
|
||||||
func UpdateTaxid() bool {
|
|
||||||
return __update_taxid__
|
|
||||||
}
|
|
||||||
@@ -1,170 +0,0 @@
|
|||||||
package obidefault
|
|
||||||
|
|
||||||
import "runtime"
|
|
||||||
|
|
||||||
var _MaxAllowedCPU = runtime.NumCPU()
|
|
||||||
var _WorkerPerCore = 1.0
|
|
||||||
|
|
||||||
var _ReadWorkerPerCore = 0.25
|
|
||||||
var _WriteWorkerPerCore = 0.25
|
|
||||||
|
|
||||||
var _StrictReadWorker = 0
|
|
||||||
var _StrictWriteWorker = 0
|
|
||||||
|
|
||||||
var _ParallelFilesRead = 0
|
|
||||||
|
|
||||||
// CLIParallelWorkers returns the number of parallel workers used for
|
|
||||||
// computing the result.
|
|
||||||
//
|
|
||||||
// The number of parallel workers is determined by the command line option
|
|
||||||
// --max-cpu|-m and the environment variable OBIMAXCPU. This number is
|
|
||||||
// multiplied by the variable _WorkerPerCore.
|
|
||||||
//
|
|
||||||
// No parameters.
|
|
||||||
// Returns an integer representing the number of parallel workers.
|
|
||||||
func ParallelWorkers() int {
|
|
||||||
return int(float64(MaxCPU()) * float64(WorkerPerCore()))
|
|
||||||
}
|
|
||||||
|
|
||||||
// CLIMaxCPU returns the maximum number of CPU cores allowed.
|
|
||||||
//
|
|
||||||
// The maximum number of CPU cores is determined by the command line option
|
|
||||||
// --max-cpu|-m and the environment variable OBIMAXCPU.
|
|
||||||
//
|
|
||||||
// No parameters.
|
|
||||||
// Returns an integer representing the maximum number of CPU cores allowed.
|
|
||||||
func MaxCPU() int {
|
|
||||||
return _MaxAllowedCPU
|
|
||||||
}
|
|
||||||
|
|
||||||
func MaxCPUPtr() *int {
|
|
||||||
return &_MaxAllowedCPU
|
|
||||||
}
|
|
||||||
|
|
||||||
// WorkerPerCore returns the number of workers per CPU core.
|
|
||||||
//
|
|
||||||
// No parameters.
|
|
||||||
// Returns a float64 representing the number of workers per CPU core.
|
|
||||||
func WorkerPerCore() float64 {
|
|
||||||
return _WorkerPerCore
|
|
||||||
}
|
|
||||||
|
|
||||||
// SetWorkerPerCore sets the number of workers per CPU core.
|
|
||||||
//
|
|
||||||
// It takes a float64 parameter representing the number of workers
|
|
||||||
// per CPU core and does not return any value.
|
|
||||||
func SetWorkerPerCore(n float64) {
|
|
||||||
_WorkerPerCore = n
|
|
||||||
}
|
|
||||||
|
|
||||||
// SetMaxCPU sets the maximum number of CPU cores allowed.
|
|
||||||
//
|
|
||||||
// n - an integer representing the new maximum number of CPU cores.
|
|
||||||
func SetMaxCPU(n int) {
|
|
||||||
_MaxAllowedCPU = n
|
|
||||||
}
|
|
||||||
|
|
||||||
// SetReadWorker sets the number of workers for reading files.
|
|
||||||
//
|
|
||||||
// The number of worker dedicated to reading files is determined
|
|
||||||
// as the number of allowed CPU cores multiplied by number of read workers per core.
|
|
||||||
// Setting the number of read workers using this function allows to decouple the number
|
|
||||||
// of read workers from the number of CPU cores.
|
|
||||||
//
|
|
||||||
// n - an integer representing the number of workers to be set.
|
|
||||||
func SetStrictReadWorker(n int) {
|
|
||||||
_StrictReadWorker = n
|
|
||||||
}
|
|
||||||
|
|
||||||
func SetStrictWriteWorker(n int) {
|
|
||||||
_StrictWriteWorker = n
|
|
||||||
}
|
|
||||||
|
|
||||||
// SetReadWorkerPerCore sets the number of worker per CPU
|
|
||||||
// core for reading files.
|
|
||||||
//
|
|
||||||
// n float64
|
|
||||||
func SetReadWorkerPerCore(n float64) {
|
|
||||||
_ReadWorkerPerCore = n
|
|
||||||
}
|
|
||||||
|
|
||||||
func SetWriteWorkerPerCore(n float64) {
|
|
||||||
_WriteWorkerPerCore = n
|
|
||||||
}
|
|
||||||
|
|
||||||
// ReadWorker returns the number of workers for reading files.
|
|
||||||
//
|
|
||||||
// No parameters.
|
|
||||||
// Returns an integer representing the number of workers.
|
|
||||||
func StrictReadWorker() int {
|
|
||||||
return _StrictReadWorker
|
|
||||||
}
|
|
||||||
|
|
||||||
func StrictWriteWorker() int {
|
|
||||||
return _StrictWriteWorker
|
|
||||||
}
|
|
||||||
|
|
||||||
// CLIReadParallelWorkers returns the number of parallel workers used for
|
|
||||||
// reading files.
|
|
||||||
//
|
|
||||||
// The number of parallel workers is determined by the command line option
|
|
||||||
// --max-cpu|-m and the environment variable OBIMAXCPU. This number is
|
|
||||||
// multiplied by the variable _ReadWorkerPerCore.
|
|
||||||
//
|
|
||||||
// No parameters.
|
|
||||||
// Returns an integer representing the number of parallel workers.
|
|
||||||
func ReadParallelWorkers() int {
|
|
||||||
if StrictReadWorker() == 0 {
|
|
||||||
n := int(float64(MaxCPU()) * ReadWorkerPerCore())
|
|
||||||
if n == 0 {
|
|
||||||
n = 1
|
|
||||||
}
|
|
||||||
return n
|
|
||||||
} else {
|
|
||||||
return StrictReadWorker()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func WriteParallelWorkers() int {
|
|
||||||
if StrictWriteWorker() == 0 {
|
|
||||||
n := int(float64(MaxCPU()) * WriteWorkerPerCore())
|
|
||||||
if n == 0 {
|
|
||||||
n = 1
|
|
||||||
}
|
|
||||||
return n
|
|
||||||
} else {
|
|
||||||
return StrictReadWorker()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ReadWorkerPerCore returns the number of worker per CPU core for
|
|
||||||
// computing the result.
|
|
||||||
//
|
|
||||||
// No parameters.
|
|
||||||
// Returns a float64 representing the number of worker per CPU core.
|
|
||||||
func ReadWorkerPerCore() float64 {
|
|
||||||
return _ReadWorkerPerCore
|
|
||||||
}
|
|
||||||
|
|
||||||
func WriteWorkerPerCore() float64 {
|
|
||||||
return _ReadWorkerPerCore
|
|
||||||
}
|
|
||||||
|
|
||||||
// ParallelFilesRead returns the number of files to be read in parallel.
|
|
||||||
//
|
|
||||||
// No parameters.
|
|
||||||
// Returns an integer representing the number of files to be read.
|
|
||||||
func ParallelFilesRead() int {
|
|
||||||
if _ParallelFilesRead == 0 {
|
|
||||||
return ReadParallelWorkers()
|
|
||||||
} else {
|
|
||||||
return _ParallelFilesRead
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// SetParallelFilesRead sets the number of files to be read in parallel.
|
|
||||||
//
|
|
||||||
// n - an integer representing the number of files to be set.
|
|
||||||
func SetParallelFilesRead(n int) {
|
|
||||||
_ParallelFilesRead = n
|
|
||||||
}
|
|
||||||
@@ -5,10 +5,10 @@ import (
|
|||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"path"
|
"path"
|
||||||
"strings"
|
"unsafe"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
"github.com/goccy/go-json"
|
"github.com/goccy/go-json"
|
||||||
@@ -94,28 +94,19 @@ func _ParseCsvFile(source string,
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
ft := header[i]
|
err := json.Unmarshal(unsafe.Slice(unsafe.StringData(field), len(field)), &val)
|
||||||
|
|
||||||
switch {
|
if err != nil {
|
||||||
case ft == "taxid":
|
val = field
|
||||||
sequence.SetTaxid(field)
|
} else {
|
||||||
case strings.HasSuffix(ft, "_taxid"):
|
if _, ok := val.(float64); ok {
|
||||||
sequence.SetTaxid(field, strings.TrimSuffix(ft, "_taxid"))
|
if obiutils.IsIntegral(val.(float64)) {
|
||||||
default:
|
val = int(val.(float64))
|
||||||
err := json.Unmarshal(obiutils.UnsafeBytes(field), &val)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
val = field
|
|
||||||
} else {
|
|
||||||
if _, ok := val.(float64); ok {
|
|
||||||
if obiutils.IsIntegral(val.(float64)) {
|
|
||||||
val = int(val.(float64))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sequence.SetAttribute(ft, val)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sequence.SetAttribute(header[i], val)
|
||||||
}
|
}
|
||||||
|
|
||||||
slice = append(slice, sequence)
|
slice = append(slice, sequence)
|
||||||
@@ -143,7 +134,7 @@ func ReadCSV(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, err
|
|||||||
go _ParseCsvFile(opt.Source(),
|
go _ParseCsvFile(opt.Source(),
|
||||||
reader,
|
reader,
|
||||||
out,
|
out,
|
||||||
obidefault.ReadQualitiesShift(),
|
byte(obioptions.InputQualityShift()),
|
||||||
opt.BatchSize())
|
opt.BatchSize())
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
@@ -157,9 +148,9 @@ func ReadCSV(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, err
|
|||||||
func ReadCSVFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
|
func ReadCSVFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
|
|
||||||
options = append(options, OptionsSource(obiutils.RemoveAllExt((path.Base(filename)))))
|
options = append(options, OptionsSource(obiutils.RemoveAllExt((path.Base(filename)))))
|
||||||
file, err := obiutils.Ropen(filename)
|
file, err := Ropen(filename)
|
||||||
|
|
||||||
if err == obiutils.ErrNoContent {
|
if err == ErrNoContent {
|
||||||
log.Infof("file %s is empty", filename)
|
log.Infof("file %s is empty", filename)
|
||||||
return ReadEmptyFile(options...)
|
return ReadEmptyFile(options...)
|
||||||
}
|
}
|
||||||
@@ -173,9 +164,9 @@ func ReadCSVFromFile(filename string, options ...WithOption) (obiiter.IBioSequen
|
|||||||
|
|
||||||
func ReadCSVFromStdin(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, error) {
|
func ReadCSVFromStdin(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
options = append(options, OptionsSource(obiutils.RemoveAllExt("stdin")))
|
options = append(options, OptionsSource(obiutils.RemoveAllExt("stdin")))
|
||||||
input, err := obiutils.Buf(os.Stdin)
|
input, err := Buf(os.Stdin)
|
||||||
|
|
||||||
if err == obiutils.ErrNoContent {
|
if err == ErrNoContent {
|
||||||
log.Infof("stdin is empty")
|
log.Infof("stdin is empty")
|
||||||
return ReadEmptyFile(options...)
|
return ReadEmptyFile(options...)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,14 +1,22 @@
|
|||||||
package obiformats
|
package obiformats
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/csv"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
func CSVSequenceRecord(sequence *obiseq.BioSequence, opt Options) []string {
|
func CSVRecord(sequence *obiseq.BioSequence, opt Options) []string {
|
||||||
keys := opt.CSVKeys()
|
keys := opt.CSVKeys()
|
||||||
record := make([]string, 0, len(keys)+4)
|
record := make([]string, 0, len(keys)+4)
|
||||||
|
|
||||||
@@ -22,10 +30,14 @@ func CSVSequenceRecord(sequence *obiseq.BioSequence, opt Options) []string {
|
|||||||
|
|
||||||
if opt.CSVTaxon() {
|
if opt.CSVTaxon() {
|
||||||
taxid := sequence.Taxid()
|
taxid := sequence.Taxid()
|
||||||
sn, ok := sequence.GetStringAttribute("scientific_name")
|
sn, ok := sequence.GetAttribute("scientific_name")
|
||||||
|
|
||||||
if !ok {
|
if !ok {
|
||||||
sn = opt.CSVNAValue()
|
if taxid == 1 {
|
||||||
|
sn = "root"
|
||||||
|
} else {
|
||||||
|
sn = opt.CSVNAValue()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
record = append(record, fmt.Sprint(taxid), fmt.Sprint(sn))
|
record = append(record, fmt.Sprint(taxid), fmt.Sprint(sn))
|
||||||
@@ -54,7 +66,7 @@ func CSVSequenceRecord(sequence *obiseq.BioSequence, opt Options) []string {
|
|||||||
l := sequence.Len()
|
l := sequence.Len()
|
||||||
q := sequence.Qualities()
|
q := sequence.Qualities()
|
||||||
ascii := make([]byte, l)
|
ascii := make([]byte, l)
|
||||||
quality_shift := obidefault.WriteQualitiesShift()
|
quality_shift := obioptions.OutputQualityShift()
|
||||||
for j := 0; j < l; j++ {
|
for j := 0; j < l; j++ {
|
||||||
ascii[j] = uint8(q[j]) + uint8(quality_shift)
|
ascii[j] = uint8(q[j]) + uint8(quality_shift)
|
||||||
}
|
}
|
||||||
@@ -66,3 +78,182 @@ func CSVSequenceRecord(sequence *obiseq.BioSequence, opt Options) []string {
|
|||||||
|
|
||||||
return record
|
return record
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func CSVHeader(opt Options) []string {
|
||||||
|
keys := opt.CSVKeys()
|
||||||
|
record := make([]string, 0, len(keys)+4)
|
||||||
|
|
||||||
|
if opt.CSVId() {
|
||||||
|
record = append(record, "id")
|
||||||
|
}
|
||||||
|
|
||||||
|
if opt.CSVCount() {
|
||||||
|
record = append(record, "count")
|
||||||
|
}
|
||||||
|
|
||||||
|
if opt.CSVTaxon() {
|
||||||
|
record = append(record, "taxid", "scientific_name")
|
||||||
|
}
|
||||||
|
|
||||||
|
if opt.CSVDefinition() {
|
||||||
|
record = append(record, "definition")
|
||||||
|
}
|
||||||
|
|
||||||
|
record = append(record, opt.CSVKeys()...)
|
||||||
|
|
||||||
|
if opt.CSVSequence() {
|
||||||
|
record = append(record, "sequence")
|
||||||
|
}
|
||||||
|
|
||||||
|
if opt.CSVQuality() {
|
||||||
|
record = append(record, "quality")
|
||||||
|
}
|
||||||
|
|
||||||
|
return record
|
||||||
|
}
|
||||||
|
|
||||||
|
func FormatCVSBatch(batch obiiter.BioSequenceBatch, opt Options) []byte {
|
||||||
|
buff := new(bytes.Buffer)
|
||||||
|
csv := csv.NewWriter(buff)
|
||||||
|
|
||||||
|
if batch.Order() == 0 {
|
||||||
|
csv.Write(CSVHeader(opt))
|
||||||
|
}
|
||||||
|
for _, s := range batch.Slice() {
|
||||||
|
csv.Write(CSVRecord(s, opt))
|
||||||
|
}
|
||||||
|
|
||||||
|
csv.Flush()
|
||||||
|
|
||||||
|
return buff.Bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
func WriteCSV(iterator obiiter.IBioSequence,
|
||||||
|
file io.WriteCloser,
|
||||||
|
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
|
|
||||||
|
var auto_slot obiutils.Set[string]
|
||||||
|
opt := MakeOptions(options)
|
||||||
|
|
||||||
|
file, _ = obiutils.CompressStream(file, opt.CompressedFile(), opt.CloseFile())
|
||||||
|
|
||||||
|
newIter := obiiter.MakeIBioSequence()
|
||||||
|
|
||||||
|
nwriters := opt.ParallelWorkers()
|
||||||
|
|
||||||
|
obiiter.RegisterAPipe()
|
||||||
|
chunkchan := make(chan FileChunk)
|
||||||
|
|
||||||
|
newIter.Add(nwriters)
|
||||||
|
var waitWriter sync.WaitGroup
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
newIter.WaitAndClose()
|
||||||
|
for len(chunkchan) > 0 {
|
||||||
|
time.Sleep(time.Millisecond)
|
||||||
|
}
|
||||||
|
close(chunkchan)
|
||||||
|
waitWriter.Wait()
|
||||||
|
}()
|
||||||
|
|
||||||
|
ff := func(iterator obiiter.IBioSequence) {
|
||||||
|
for iterator.Next() {
|
||||||
|
|
||||||
|
batch := iterator.Get()
|
||||||
|
|
||||||
|
chunkchan <- FileChunk{
|
||||||
|
FormatCVSBatch(batch, opt),
|
||||||
|
batch.Order(),
|
||||||
|
}
|
||||||
|
newIter.Push(batch)
|
||||||
|
}
|
||||||
|
newIter.Done()
|
||||||
|
}
|
||||||
|
|
||||||
|
next_to_send := 0
|
||||||
|
received := make(map[int]FileChunk, 100)
|
||||||
|
|
||||||
|
waitWriter.Add(1)
|
||||||
|
go func() {
|
||||||
|
for chunk := range chunkchan {
|
||||||
|
if chunk.order == next_to_send {
|
||||||
|
file.Write(chunk.text)
|
||||||
|
next_to_send++
|
||||||
|
chunk, ok := received[next_to_send]
|
||||||
|
for ok {
|
||||||
|
file.Write(chunk.text)
|
||||||
|
delete(received, next_to_send)
|
||||||
|
next_to_send++
|
||||||
|
chunk, ok = received[next_to_send]
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
received[chunk.order] = chunk
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
file.Close()
|
||||||
|
|
||||||
|
log.Debugln("End of the CSV file writing")
|
||||||
|
obiiter.UnregisterPipe()
|
||||||
|
waitWriter.Done()
|
||||||
|
}()
|
||||||
|
|
||||||
|
if opt.pointer.csv_auto {
|
||||||
|
if iterator.Next() {
|
||||||
|
batch := iterator.Get()
|
||||||
|
auto_slot = batch.Slice().AttributeKeys(true)
|
||||||
|
CSVKeys(auto_slot.Members())(opt)
|
||||||
|
iterator.PushBack()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debugln("Start of the CSV file writing")
|
||||||
|
go ff(iterator)
|
||||||
|
for i := 0; i < nwriters-1; i++ {
|
||||||
|
go ff(iterator.Split())
|
||||||
|
}
|
||||||
|
|
||||||
|
return newIter, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func WriteCSVToStdout(iterator obiiter.IBioSequence,
|
||||||
|
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
|
options = append(options, OptionDontCloseFile())
|
||||||
|
return WriteCSV(iterator, os.Stdout, options...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func WriteCSVToFile(iterator obiiter.IBioSequence,
|
||||||
|
filename string,
|
||||||
|
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
|
|
||||||
|
opt := MakeOptions(options)
|
||||||
|
flags := os.O_WRONLY | os.O_CREATE
|
||||||
|
|
||||||
|
if opt.AppendFile() {
|
||||||
|
flags |= os.O_APPEND
|
||||||
|
}
|
||||||
|
file, err := os.OpenFile(filename, flags, 0660)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("open file error: %v", err)
|
||||||
|
return obiiter.NilIBioSequence, err
|
||||||
|
}
|
||||||
|
|
||||||
|
options = append(options, OptionCloseFile())
|
||||||
|
|
||||||
|
iterator, err = WriteCSV(iterator, file, options...)
|
||||||
|
|
||||||
|
if opt.HaveToSavePaired() {
|
||||||
|
var revfile *os.File
|
||||||
|
|
||||||
|
revfile, err = os.OpenFile(opt.PairedFileName(), flags, 0660)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("open file error: %v", err)
|
||||||
|
return obiiter.NilIBioSequence, err
|
||||||
|
}
|
||||||
|
iterator, err = WriteCSV(iterator.PairedWith(), revfile, options...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return iterator, err
|
||||||
|
}
|
||||||
|
|||||||
@@ -14,40 +14,10 @@ import (
|
|||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
)
|
)
|
||||||
|
|
||||||
// SequenceBatchWriterToFile is a function type that defines a method for writing
|
|
||||||
// a batch of biosequences to a specified file. It takes an iterator of biosequences,
|
|
||||||
// a filename, and optional configuration options, and returns an iterator of biosequences
|
|
||||||
// along with any error encountered during the writing process.
|
|
||||||
//
|
|
||||||
// Parameters:
|
|
||||||
// - iterator: An iterator of biosequences to be written to the file.
|
|
||||||
// - filename: The name of the file where the sequences will be written.
|
|
||||||
// - options: Optional configuration options for the writing process.
|
|
||||||
//
|
|
||||||
// Returns:
|
|
||||||
// An iterator of biosequences that may have been modified during the writing process
|
|
||||||
// and an error if the writing operation fails.
|
|
||||||
type SequenceBatchWriterToFile func(iterator obiiter.IBioSequence,
|
type SequenceBatchWriterToFile func(iterator obiiter.IBioSequence,
|
||||||
filename string,
|
filename string,
|
||||||
options ...WithOption) (obiiter.IBioSequence, error)
|
options ...WithOption) (obiiter.IBioSequence, error)
|
||||||
|
|
||||||
// WriterDispatcher manages the writing of data to files based on a given
|
|
||||||
// prototype name and a dispatcher for distributing the sequences. It
|
|
||||||
// processes incoming data from the dispatcher in separate goroutines,
|
|
||||||
// formatting and writing the data to files as specified.
|
|
||||||
//
|
|
||||||
// Parameters:
|
|
||||||
// - prototypename: A string that serves as a template for naming the output files.
|
|
||||||
// - dispatcher: An instance of IDistribute that provides the data to be written
|
|
||||||
// and manages the distribution of sequences.
|
|
||||||
// - formater: A function of type SequenceBatchWriterToFile that formats and writes
|
|
||||||
// the sequences to the specified file.
|
|
||||||
// - options: Optional configuration options for the writing process.
|
|
||||||
//
|
|
||||||
// The function operates asynchronously, launching goroutines for each new data
|
|
||||||
// channel received from the dispatcher. It ensures that directories are created
|
|
||||||
// as needed and handles errors during the writing process. The function blocks
|
|
||||||
// until all writing jobs are completed.
|
|
||||||
func WriterDispatcher(prototypename string,
|
func WriterDispatcher(prototypename string,
|
||||||
dispatcher obiiter.IDistribute,
|
dispatcher obiiter.IDistribute,
|
||||||
formater SequenceBatchWriterToFile,
|
formater SequenceBatchWriterToFile,
|
||||||
@@ -64,7 +34,7 @@ func WriterDispatcher(prototypename string,
|
|||||||
data, err := dispatcher.Outputs(newflux)
|
data, err := dispatcher.Outputs(newflux)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("Cannot retrieve the new channel: %v", err)
|
log.Fatalf("Cannot retreive the new chanel : %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
key := dispatcher.Classifier().Value(newflux)
|
key := dispatcher.Classifier().Value(newflux)
|
||||||
@@ -88,7 +58,7 @@ func WriterDispatcher(prototypename string,
|
|||||||
info, err := os.Stat(directory)
|
info, err := os.Stat(directory)
|
||||||
switch {
|
switch {
|
||||||
case !os.IsNotExist(err) && !info.IsDir():
|
case !os.IsNotExist(err) && !info.IsDir():
|
||||||
log.Fatalf("Cannot create the directory %s", directory)
|
log.Fatalf("Cannot Create the directory %s", directory)
|
||||||
case os.IsNotExist(err):
|
case os.IsNotExist(err):
|
||||||
os.Mkdir(directory, 0755)
|
os.Mkdir(directory, 0755)
|
||||||
}
|
}
|
||||||
@@ -101,7 +71,7 @@ func WriterDispatcher(prototypename string,
|
|||||||
options...)
|
options...)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("Cannot open the output file for key %s",
|
log.Fatalf("cannot open the output file for key %s",
|
||||||
dispatcher.Classifier().Value(newflux))
|
dispatcher.Classifier().Value(newflux))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -159,7 +159,7 @@ func EmblChunkParser(withFeatureTable bool) func(string, io.Reader) (obiseq.BioS
|
|||||||
}
|
}
|
||||||
|
|
||||||
func _ParseEmblFile(
|
func _ParseEmblFile(
|
||||||
input ChannelFileChunk,
|
input ChannelSeqFileChunk,
|
||||||
out obiiter.IBioSequence,
|
out obiiter.IBioSequence,
|
||||||
withFeatureTable bool,
|
withFeatureTable bool,
|
||||||
) {
|
) {
|
||||||
@@ -187,9 +187,9 @@ func _ParseEmblFile(
|
|||||||
func ReadEMBL(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, error) {
|
func ReadEMBL(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
|
|
||||||
buff := make([]byte, 1024*1024*128) // 128 MB
|
buff := make([]byte, 1024*1024*1024*256)
|
||||||
|
|
||||||
entry_channel := ReadFileChunk(
|
entry_channel := ReadSeqFileChunk(
|
||||||
opt.Source(),
|
opt.Source(),
|
||||||
reader,
|
reader,
|
||||||
buff,
|
buff,
|
||||||
@@ -227,9 +227,9 @@ func ReadEMBLFromFile(filename string, options ...WithOption) (obiiter.IBioSeque
|
|||||||
|
|
||||||
options = append(options, OptionsSource(obiutils.RemoveAllExt((path.Base(filename)))))
|
options = append(options, OptionsSource(obiutils.RemoveAllExt((path.Base(filename)))))
|
||||||
|
|
||||||
reader, err = obiutils.Ropen(filename)
|
reader, err = Ropen(filename)
|
||||||
|
|
||||||
if err == obiutils.ErrNoContent {
|
if err == ErrNoContent {
|
||||||
log.Infof("file %s is empty", filename)
|
log.Infof("file %s is empty", filename)
|
||||||
return ReadEmptyFile(options...)
|
return ReadEmptyFile(options...)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -205,7 +205,7 @@ func FastaChunkParser() func(string, io.Reader) (obiseq.BioSequenceSlice, error)
|
|||||||
}
|
}
|
||||||
|
|
||||||
func _ParseFastaFile(
|
func _ParseFastaFile(
|
||||||
input ChannelFileChunk,
|
input ChannelSeqFileChunk,
|
||||||
out obiiter.IBioSequence,
|
out obiiter.IBioSequence,
|
||||||
) {
|
) {
|
||||||
|
|
||||||
@@ -213,7 +213,6 @@ func _ParseFastaFile(
|
|||||||
|
|
||||||
for chunks := range input {
|
for chunks := range input {
|
||||||
sequences, err := parser(chunks.Source, chunks.Raw)
|
sequences, err := parser(chunks.Source, chunks.Raw)
|
||||||
// log.Warnf("Chunck(%d:%d) -%d- ", chunks.Order, l, sequences.Len())
|
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("File %s : Cannot parse the fasta file : %v", chunks.Source, err)
|
log.Fatalf("File %s : Cannot parse the fasta file : %v", chunks.Source, err)
|
||||||
@@ -233,9 +232,9 @@ func ReadFasta(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, e
|
|||||||
|
|
||||||
nworker := opt.ParallelWorkers()
|
nworker := opt.ParallelWorkers()
|
||||||
|
|
||||||
buff := make([]byte, 1024*1024)
|
buff := make([]byte, 1024*1024*1024)
|
||||||
|
|
||||||
chkchan := ReadFileChunk(
|
chkchan := ReadSeqFileChunk(
|
||||||
opt.Source(),
|
opt.Source(),
|
||||||
reader,
|
reader,
|
||||||
buff,
|
buff,
|
||||||
@@ -251,7 +250,7 @@ func ReadFasta(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, e
|
|||||||
out.WaitAndClose()
|
out.WaitAndClose()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
newIter := out.SortBatches()
|
newIter := out.SortBatches().Rebatch(opt.BatchSize())
|
||||||
|
|
||||||
log.Debugln("Full file batch mode : ", opt.FullFileBatch())
|
log.Debugln("Full file batch mode : ", opt.FullFileBatch())
|
||||||
|
|
||||||
@@ -271,9 +270,9 @@ func ReadFasta(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, e
|
|||||||
func ReadFastaFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
|
func ReadFastaFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
options = append(options, OptionsSource(obiutils.RemoveAllExt((path.Base(filename)))))
|
options = append(options, OptionsSource(obiutils.RemoveAllExt((path.Base(filename)))))
|
||||||
|
|
||||||
file, err := obiutils.Ropen(filename)
|
file, err := Ropen(filename)
|
||||||
|
|
||||||
if err == obiutils.ErrNoContent {
|
if err == ErrNoContent {
|
||||||
log.Infof("file %s is empty", filename)
|
log.Infof("file %s is empty", filename)
|
||||||
return ReadEmptyFile(options...)
|
return ReadEmptyFile(options...)
|
||||||
}
|
}
|
||||||
@@ -287,9 +286,9 @@ func ReadFastaFromFile(filename string, options ...WithOption) (obiiter.IBioSequ
|
|||||||
|
|
||||||
func ReadFastaFromStdin(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, error) {
|
func ReadFastaFromStdin(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
options = append(options, OptionsSource("stdin"))
|
options = append(options, OptionsSource("stdin"))
|
||||||
input, err := obiutils.Buf(os.Stdin)
|
input, err := Buf(os.Stdin)
|
||||||
|
|
||||||
if err == obiutils.ErrNoContent {
|
if err == ErrNoContent {
|
||||||
log.Infof("stdin is empty")
|
log.Infof("stdin is empty")
|
||||||
return ReadEmptyFile(options...)
|
return ReadEmptyFile(options...)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,8 +7,8 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"path"
|
"path"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
@@ -17,7 +17,6 @@ import (
|
|||||||
func EndOfLastFastqEntry(buffer []byte) int {
|
func EndOfLastFastqEntry(buffer []byte) int {
|
||||||
var i int
|
var i int
|
||||||
|
|
||||||
// log.Warnf("EndOfLastFastqEntry(%d): %s", len(buffer), string(buffer[0:20]))
|
|
||||||
imax := len(buffer)
|
imax := len(buffer)
|
||||||
state := 0
|
state := 0
|
||||||
restart := imax - 1
|
restart := imax - 1
|
||||||
@@ -33,48 +32,39 @@ func EndOfLastFastqEntry(buffer []byte) int {
|
|||||||
case 0:
|
case 0:
|
||||||
if C == '+' {
|
if C == '+' {
|
||||||
// Potential start of quality part step 1
|
// Potential start of quality part step 1
|
||||||
// log.Warn("Potential start of quality part step 1 - +")
|
|
||||||
state = 1
|
state = 1
|
||||||
restart = i
|
restart = i
|
||||||
}
|
}
|
||||||
case 1:
|
case 1:
|
||||||
if is_end_of_line {
|
if is_end_of_line {
|
||||||
// Potential start of quality part step 2
|
// Potential start of quality part step 2
|
||||||
// log.Warn("Potential start of quality part step 2 - +/end of line")
|
|
||||||
state = 2
|
state = 2
|
||||||
} else {
|
} else {
|
||||||
// it was not the start of quality part
|
// it was not the start of quality part
|
||||||
// log.Warn("it was not the start of quality part")
|
|
||||||
state = 0
|
state = 0
|
||||||
i = restart
|
i = restart
|
||||||
}
|
}
|
||||||
case 2:
|
case 2:
|
||||||
if is_sep {
|
if is_sep {
|
||||||
// Potential start of quality part step 2 (stay in the same state)
|
// Potential start of quality part step 2 (stay in the same state)
|
||||||
// log.Warn("Potential start of quality part step 2 - skipping separator")
|
|
||||||
state = 2
|
state = 2
|
||||||
} else if (C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') || C == '-' || C == '.' || C == '[' || C == ']' {
|
} else if (C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') || C == '-' || C == '.' || C == '[' || C == ']' {
|
||||||
// progressing along of the sequence
|
// End of the sequence
|
||||||
// log.Warn("Detected the end of the sequence switching to state 3")
|
|
||||||
state = 3
|
state = 3
|
||||||
} else {
|
} else {
|
||||||
// it was not the start of quality part
|
// it was not the start of quality part
|
||||||
// log.Warn("it was not the start of quality part because is not preceded by sequence")
|
|
||||||
state = 0
|
state = 0
|
||||||
i = restart
|
i = restart
|
||||||
}
|
}
|
||||||
case 3:
|
case 3:
|
||||||
if is_end_of_line {
|
if is_end_of_line {
|
||||||
// Entrering in the header line
|
// Entrering in the header line
|
||||||
// log.Warn("Potentially entrering in the header line")
|
|
||||||
state = 4
|
state = 4
|
||||||
} else if (C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') || C == '-' || C == '.' || C == '[' || C == ']' {
|
} else if (C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') || C == '-' || C == '.' || C == '[' || C == ']' {
|
||||||
// progressing along of the sequence
|
// progressing along of the sequence
|
||||||
// log.Warn("Progressing along of the sequence")
|
|
||||||
state = 3
|
state = 3
|
||||||
} else {
|
} else {
|
||||||
// it was not the sequence part
|
// it was not the sequence part
|
||||||
// log.Warnf("it was not the sequence part : %c", C)
|
|
||||||
state = 0
|
state = 0
|
||||||
i = restart
|
i = restart
|
||||||
}
|
}
|
||||||
@@ -82,7 +72,6 @@ func EndOfLastFastqEntry(buffer []byte) int {
|
|||||||
if is_end_of_line {
|
if is_end_of_line {
|
||||||
state = 4
|
state = 4
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
state = 5
|
state = 5
|
||||||
}
|
}
|
||||||
case 5:
|
case 5:
|
||||||
@@ -91,18 +80,15 @@ func EndOfLastFastqEntry(buffer []byte) int {
|
|||||||
state = 0
|
state = 0
|
||||||
i = restart
|
i = restart
|
||||||
} else if C == '@' {
|
} else if C == '@' {
|
||||||
// It was the header line
|
|
||||||
// log.Warn("It was the header line")
|
|
||||||
state = 6
|
state = 6
|
||||||
cut = i
|
cut = i
|
||||||
}
|
}
|
||||||
case 6:
|
case 6:
|
||||||
if is_end_of_line {
|
if is_end_of_line {
|
||||||
// log.Warn("====> End of the last sequence")
|
|
||||||
state = 7
|
state = 7
|
||||||
} else {
|
} else {
|
||||||
// log.Warnf("%s: Strange it was not the end of the last sequence : %c : %s", string(buffer[0:40]), C, string(buffer[i-20:i+5]))
|
state = 0
|
||||||
state = 5
|
i = restart
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -131,7 +117,7 @@ func _storeSequenceQuality(bytes *bytes.Buffer, out *obiseq.BioSequence, quality
|
|||||||
out.SetQualities(q)
|
out.SetQualities(q)
|
||||||
}
|
}
|
||||||
|
|
||||||
func FastqChunkParser(quality_shift byte, with_quality bool) func(string, io.Reader) (obiseq.BioSequenceSlice, error) {
|
func FastqChunkParser(quality_shift byte) func(string, io.Reader) (obiseq.BioSequenceSlice, error) {
|
||||||
parser := func(source string, input io.Reader) (obiseq.BioSequenceSlice, error) {
|
parser := func(source string, input io.Reader) (obiseq.BioSequenceSlice, error) {
|
||||||
|
|
||||||
var identifier string
|
var identifier string
|
||||||
@@ -263,9 +249,7 @@ func FastqChunkParser(quality_shift byte, with_quality bool) func(string, io.Rea
|
|||||||
}
|
}
|
||||||
case 10:
|
case 10:
|
||||||
if is_end_of_line {
|
if is_end_of_line {
|
||||||
if with_quality {
|
_storeSequenceQuality(&qualBytes, sequences[len(sequences)-1], quality_shift)
|
||||||
_storeSequenceQuality(&qualBytes, sequences[len(sequences)-1], quality_shift)
|
|
||||||
}
|
|
||||||
state = 11
|
state = 11
|
||||||
} else {
|
} else {
|
||||||
qualBytes.WriteByte(C)
|
qualBytes.WriteByte(C)
|
||||||
@@ -298,13 +282,12 @@ func FastqChunkParser(quality_shift byte, with_quality bool) func(string, io.Rea
|
|||||||
}
|
}
|
||||||
|
|
||||||
func _ParseFastqFile(
|
func _ParseFastqFile(
|
||||||
input ChannelFileChunk,
|
input ChannelSeqFileChunk,
|
||||||
out obiiter.IBioSequence,
|
out obiiter.IBioSequence,
|
||||||
quality_shift byte,
|
quality_shift byte,
|
||||||
with_quality bool,
|
|
||||||
) {
|
) {
|
||||||
|
|
||||||
parser := FastqChunkParser(quality_shift, with_quality)
|
parser := FastqChunkParser(quality_shift)
|
||||||
|
|
||||||
for chunks := range input {
|
for chunks := range input {
|
||||||
sequences, err := parser(chunks.Source, chunks.Raw)
|
sequences, err := parser(chunks.Source, chunks.Raw)
|
||||||
@@ -327,9 +310,9 @@ func ReadFastq(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, e
|
|||||||
|
|
||||||
nworker := opt.ParallelWorkers()
|
nworker := opt.ParallelWorkers()
|
||||||
|
|
||||||
buff := make([]byte, 1024*1024)
|
buff := make([]byte, 1024*1024*1024)
|
||||||
|
|
||||||
chkchan := ReadFileChunk(
|
chkchan := ReadSeqFileChunk(
|
||||||
opt.Source(),
|
opt.Source(),
|
||||||
reader,
|
reader,
|
||||||
buff,
|
buff,
|
||||||
@@ -341,8 +324,7 @@ func ReadFastq(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, e
|
|||||||
go _ParseFastqFile(
|
go _ParseFastqFile(
|
||||||
chkchan,
|
chkchan,
|
||||||
out,
|
out,
|
||||||
obidefault.ReadQualitiesShift(),
|
byte(obioptions.InputQualityShift()),
|
||||||
opt.ReadQualities(),
|
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -350,7 +332,7 @@ func ReadFastq(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, e
|
|||||||
out.WaitAndClose()
|
out.WaitAndClose()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
newIter := out.SortBatches()
|
newIter := out.SortBatches().Rebatch(opt.BatchSize())
|
||||||
|
|
||||||
log.Debugln("Full file batch mode : ", opt.FullFileBatch())
|
log.Debugln("Full file batch mode : ", opt.FullFileBatch())
|
||||||
|
|
||||||
@@ -370,9 +352,9 @@ func ReadFastq(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, e
|
|||||||
func ReadFastqFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
|
func ReadFastqFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
options = append(options, OptionsSource(obiutils.RemoveAllExt((path.Base(filename)))))
|
options = append(options, OptionsSource(obiutils.RemoveAllExt((path.Base(filename)))))
|
||||||
|
|
||||||
file, err := obiutils.Ropen(filename)
|
file, err := Ropen(filename)
|
||||||
|
|
||||||
if err == obiutils.ErrNoContent {
|
if err == ErrNoContent {
|
||||||
log.Infof("file %s is empty", filename)
|
log.Infof("file %s is empty", filename)
|
||||||
return ReadEmptyFile(options...)
|
return ReadEmptyFile(options...)
|
||||||
}
|
}
|
||||||
@@ -386,9 +368,9 @@ func ReadFastqFromFile(filename string, options ...WithOption) (obiiter.IBioSequ
|
|||||||
|
|
||||||
func ReadFastqFromStdin(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, error) {
|
func ReadFastqFromStdin(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
options = append(options, OptionsSource(obiutils.RemoveAllExt("stdin")))
|
options = append(options, OptionsSource(obiutils.RemoveAllExt("stdin")))
|
||||||
input, err := obiutils.Buf(os.Stdin)
|
input, err := Buf(os.Stdin)
|
||||||
|
|
||||||
if err == obiutils.ErrNoContent {
|
if err == ErrNoContent {
|
||||||
log.Infof("stdin is empty")
|
log.Infof("stdin is empty")
|
||||||
return ReadEmptyFile(options...)
|
return ReadEmptyFile(options...)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ package obiformats
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"strconv"
|
"math"
|
||||||
"strings"
|
"strings"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
@@ -10,197 +10,10 @@ import (
|
|||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
"github.com/buger/jsonparser"
|
"github.com/goccy/go-json"
|
||||||
)
|
)
|
||||||
|
|
||||||
func _parse_json_map_string(str []byte) (map[string]string, error) {
|
func _parse_json_header_(header string, annotations obiseq.Annotation) string {
|
||||||
values := make(map[string]string)
|
|
||||||
jsonparser.ObjectEach(str,
|
|
||||||
func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
|
|
||||||
skey := string(key)
|
|
||||||
values[skey] = string(value)
|
|
||||||
return
|
|
||||||
},
|
|
||||||
)
|
|
||||||
return values, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func _parse_json_map_int(str []byte) (map[string]int, error) {
|
|
||||||
values := make(map[string]int)
|
|
||||||
jsonparser.ObjectEach(str,
|
|
||||||
func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
|
|
||||||
skey := string(key)
|
|
||||||
intval, err := jsonparser.ParseInt(value)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
values[skey] = int(intval)
|
|
||||||
return nil
|
|
||||||
},
|
|
||||||
)
|
|
||||||
return values, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func _parse_json_map_float(str []byte) (map[string]float64, error) {
|
|
||||||
values := make(map[string]float64)
|
|
||||||
jsonparser.ObjectEach(str,
|
|
||||||
func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
|
|
||||||
skey := string(key)
|
|
||||||
floatval, err := strconv.ParseFloat(obiutils.UnsafeString(value), 64)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
values[skey] = float64(floatval)
|
|
||||||
return nil
|
|
||||||
},
|
|
||||||
)
|
|
||||||
return values, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func _parse_json_map_bool(str []byte) (map[string]bool, error) {
|
|
||||||
values := make(map[string]bool)
|
|
||||||
jsonparser.ObjectEach(str,
|
|
||||||
func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
|
|
||||||
skey := string(key)
|
|
||||||
boolval, err := jsonparser.ParseBoolean(value)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
values[skey] = boolval
|
|
||||||
return nil
|
|
||||||
},
|
|
||||||
)
|
|
||||||
return values, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func _parse_json_map_interface(str []byte) (map[string]interface{}, error) {
|
|
||||||
values := make(map[string]interface{})
|
|
||||||
jsonparser.ObjectEach(str,
|
|
||||||
func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
|
|
||||||
skey := string(key)
|
|
||||||
switch dataType {
|
|
||||||
case jsonparser.String:
|
|
||||||
values[skey] = string(value)
|
|
||||||
case jsonparser.Number:
|
|
||||||
// Try to parse the number as an int at first then as float if that fails.
|
|
||||||
values[skey], err = jsonparser.ParseInt(value)
|
|
||||||
if err != nil {
|
|
||||||
values[skey], err = strconv.ParseFloat(obiutils.UnsafeString(value), 64)
|
|
||||||
}
|
|
||||||
if err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
case jsonparser.Boolean:
|
|
||||||
default:
|
|
||||||
values[skey] = string(value)
|
|
||||||
}
|
|
||||||
return
|
|
||||||
},
|
|
||||||
)
|
|
||||||
return values, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func _parse_json_array_string(str []byte) ([]string, error) {
|
|
||||||
values := make([]string, 0)
|
|
||||||
jsonparser.ArrayEach(str,
|
|
||||||
func(value []byte, dataType jsonparser.ValueType, offset int, err error) {
|
|
||||||
if dataType == jsonparser.String {
|
|
||||||
skey := string(value)
|
|
||||||
values = append(values, skey)
|
|
||||||
}
|
|
||||||
},
|
|
||||||
)
|
|
||||||
return values, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func _parse_json_array_int(str []byte, sequence *obiseq.BioSequence) ([]int, error) {
|
|
||||||
values := make([]int, 0)
|
|
||||||
jsonparser.ArrayEach(str,
|
|
||||||
func(value []byte, dataType jsonparser.ValueType, offset int, err error) {
|
|
||||||
if dataType == jsonparser.Number {
|
|
||||||
intval, err := jsonparser.ParseInt(value)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("%s: Parsing int failed on value %s: %s", sequence.Id(), value, err)
|
|
||||||
}
|
|
||||||
values = append(values, int(intval))
|
|
||||||
}
|
|
||||||
},
|
|
||||||
)
|
|
||||||
return values, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func _parse_json_array_float(str []byte, sequence *obiseq.BioSequence) ([]float64, error) {
|
|
||||||
values := make([]float64, 0)
|
|
||||||
jsonparser.ArrayEach(str,
|
|
||||||
func(value []byte, dataType jsonparser.ValueType, offset int, err error) {
|
|
||||||
if dataType == jsonparser.Number {
|
|
||||||
floatval, err := strconv.ParseFloat(obiutils.UnsafeString(value), 64)
|
|
||||||
if err == nil {
|
|
||||||
values = append(values, float64(floatval))
|
|
||||||
} else {
|
|
||||||
log.Fatalf("%s: Parsing float failed on value %s: %s", sequence.Id(), value, err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
)
|
|
||||||
return values, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func _parse_json_array_bool(str []byte, sequence *obiseq.BioSequence) ([]bool, error) {
|
|
||||||
values := make([]bool, 0)
|
|
||||||
jsonparser.ArrayEach(str,
|
|
||||||
func(value []byte, dataType jsonparser.ValueType, offset int, err error) {
|
|
||||||
if dataType == jsonparser.Boolean {
|
|
||||||
boolval, err := jsonparser.ParseBoolean(value)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("%s: Parsing bool failed on value %s: %s", sequence.Id(), value, err)
|
|
||||||
}
|
|
||||||
values = append(values, boolval)
|
|
||||||
}
|
|
||||||
},
|
|
||||||
)
|
|
||||||
return values, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func _parse_json_array_interface(str []byte) ([]interface{}, error) {
|
|
||||||
values := make([]interface{}, 0)
|
|
||||||
jsonparser.ArrayEach(str,
|
|
||||||
func(value []byte, dataType jsonparser.ValueType, offset int, err error) {
|
|
||||||
switch dataType {
|
|
||||||
case jsonparser.String:
|
|
||||||
values = append(values, string(value))
|
|
||||||
case jsonparser.Number:
|
|
||||||
// Try to parse the number as an int at first then as float if that fails.
|
|
||||||
intval, err := jsonparser.ParseInt(value)
|
|
||||||
if err != nil {
|
|
||||||
floatval, err := strconv.ParseFloat(obiutils.UnsafeString(value), 64)
|
|
||||||
if err != nil {
|
|
||||||
values = append(values, string(value))
|
|
||||||
} else {
|
|
||||||
values = append(values, floatval)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
values = append(values, intval)
|
|
||||||
}
|
|
||||||
case jsonparser.Boolean:
|
|
||||||
boolval, err := jsonparser.ParseBoolean(value)
|
|
||||||
if err != nil {
|
|
||||||
values = append(values, string(value))
|
|
||||||
} else {
|
|
||||||
values = append(values, boolval)
|
|
||||||
}
|
|
||||||
|
|
||||||
default:
|
|
||||||
values = append(values, string(value))
|
|
||||||
}
|
|
||||||
|
|
||||||
},
|
|
||||||
)
|
|
||||||
return values, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func _parse_json_header_(header string, sequence *obiseq.BioSequence) string {
|
|
||||||
annotations := sequence.Annotations()
|
|
||||||
start := -1
|
start := -1
|
||||||
stop := -1
|
stop := -1
|
||||||
level := 0
|
level := 0
|
||||||
@@ -238,123 +51,23 @@ func _parse_json_header_(header string, sequence *obiseq.BioSequence) string {
|
|||||||
|
|
||||||
stop++
|
stop++
|
||||||
|
|
||||||
jsonparser.ObjectEach(obiutils.UnsafeBytes(header[start:stop]),
|
err := json.Unmarshal([]byte(header)[start:stop], &annotations)
|
||||||
func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) error {
|
|
||||||
var err error
|
|
||||||
|
|
||||||
skey := obiutils.UnsafeString(key)
|
for k, v := range annotations {
|
||||||
|
switch vt := v.(type) {
|
||||||
switch {
|
case float64:
|
||||||
case skey == "id":
|
if vt == math.Floor(vt) {
|
||||||
sequence.SetId(string(value))
|
annotations[k] = int(vt)
|
||||||
case skey == "definition":
|
|
||||||
sequence.SetDefinition(string(value))
|
|
||||||
|
|
||||||
case skey == "count":
|
|
||||||
if dataType != jsonparser.Number {
|
|
||||||
log.Fatalf("%s: Count attribut must be numeric: %s", sequence.Id(), string(value))
|
|
||||||
}
|
|
||||||
count, err := jsonparser.ParseInt(value)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("%s: Cannot parse count %s", sequence.Id(), string(value))
|
|
||||||
}
|
|
||||||
sequence.SetCount(int(count))
|
|
||||||
|
|
||||||
case skey == "obiclean_weight":
|
|
||||||
weight, err := _parse_json_map_int(value)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("%s: Cannot parse obiclean weight %s", sequence.Id(), string(value))
|
|
||||||
}
|
|
||||||
annotations[skey] = weight
|
|
||||||
|
|
||||||
case skey == "obiclean_status":
|
|
||||||
status, err := _parse_json_map_string(value)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("%s: Cannot parse obiclean status %s", sequence.Id(), string(value))
|
|
||||||
}
|
|
||||||
annotations[skey] = status
|
|
||||||
|
|
||||||
case strings.HasPrefix(skey, "merged_"):
|
|
||||||
if dataType == jsonparser.Object {
|
|
||||||
data, err := _parse_json_map_int(value)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("%s: Cannot parse merged slot %s: %v", sequence.Id(), skey, err)
|
|
||||||
} else {
|
|
||||||
annotations[skey] = data
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
log.Fatalf("%s: Cannot parse merged slot %s", sequence.Id(), skey)
|
|
||||||
}
|
|
||||||
|
|
||||||
case skey == "taxid":
|
|
||||||
if dataType == jsonparser.Number || dataType == jsonparser.String {
|
|
||||||
taxid := string(value)
|
|
||||||
sequence.SetTaxid(taxid)
|
|
||||||
} else {
|
|
||||||
log.Fatalf("%s: Cannot parse taxid %s", sequence.Id(), string(value))
|
|
||||||
}
|
|
||||||
|
|
||||||
case strings.HasSuffix(skey, "_taxid"):
|
|
||||||
if dataType == jsonparser.Number || dataType == jsonparser.String {
|
|
||||||
rank, _ := obiutils.SplitInTwo(skey, '_')
|
|
||||||
|
|
||||||
taxid := string(value)
|
|
||||||
sequence.SetTaxid(taxid, rank)
|
|
||||||
} else {
|
|
||||||
log.Fatalf("%s: Cannot parse taxid %s", sequence.Id(), string(value))
|
|
||||||
}
|
|
||||||
|
|
||||||
default:
|
|
||||||
skey = strings.Clone(skey)
|
|
||||||
switch dataType {
|
|
||||||
case jsonparser.String:
|
|
||||||
annotations[skey] = string(value)
|
|
||||||
case jsonparser.Number:
|
|
||||||
// Try to parse the number as an int at first then as float if that fails.
|
|
||||||
annotations[skey], err = jsonparser.ParseInt(value)
|
|
||||||
if err != nil {
|
|
||||||
annotations[skey], err = strconv.ParseFloat(obiutils.UnsafeString(value), 64)
|
|
||||||
}
|
|
||||||
case jsonparser.Array:
|
|
||||||
annotations[skey], err = _parse_json_array_interface(value)
|
|
||||||
case jsonparser.Object:
|
|
||||||
annotations[skey], err = _parse_json_map_interface(value)
|
|
||||||
case jsonparser.Boolean:
|
|
||||||
annotations[skey], err = jsonparser.ParseBoolean(value)
|
|
||||||
case jsonparser.Null:
|
|
||||||
annotations[skey] = nil
|
|
||||||
default:
|
|
||||||
log.Fatalf("Unknown data type %v", dataType)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
{
|
||||||
if err != nil {
|
annotations[k] = vt
|
||||||
annotations[skey] = "NaN"
|
|
||||||
log.Fatalf("%s: Cannot parse value %s assicated to key %s into a %s value",
|
|
||||||
sequence.Id(), string(value), skey, dataType.String())
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return err
|
if err != nil {
|
||||||
},
|
log.Fatalf("annotation parsing error on %s : %v\n", header, err)
|
||||||
)
|
}
|
||||||
|
|
||||||
// err := json.Unmarshal([]byte(header)[start:stop], &annotations)
|
|
||||||
|
|
||||||
// for k, v := range annotations {
|
|
||||||
// switch vt := v.(type) {
|
|
||||||
// case float64:
|
|
||||||
// if vt == math.Floor(vt) {
|
|
||||||
// annotations[k] = int(vt)
|
|
||||||
// }
|
|
||||||
// {
|
|
||||||
// annotations[k] = vt
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
// if err != nil {
|
|
||||||
// log.Fatalf("annotation parsing error on %s : %v\n", header, err)
|
|
||||||
// }
|
|
||||||
|
|
||||||
return strings.TrimSpace(header[stop:])
|
return strings.TrimSpace(header[stop:])
|
||||||
}
|
}
|
||||||
@@ -365,9 +78,7 @@ func ParseFastSeqJsonHeader(sequence *obiseq.BioSequence) {
|
|||||||
|
|
||||||
definition_part := _parse_json_header_(
|
definition_part := _parse_json_header_(
|
||||||
definition,
|
definition,
|
||||||
sequence,
|
sequence.Annotations())
|
||||||
)
|
|
||||||
|
|
||||||
if len(definition_part) > 0 {
|
if len(definition_part) > 0 {
|
||||||
if sequence.HasDefinition() {
|
if sequence.HasDefinition() {
|
||||||
definition_part = sequence.Definition() + " " + definition_part
|
definition_part = sequence.Definition() + " " + definition_part
|
||||||
|
|||||||
@@ -14,8 +14,8 @@ import (
|
|||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
)
|
)
|
||||||
@@ -92,7 +92,7 @@ func ReadFastSeqFromFile(filename string, options ...WithOption) (obiiter.IBioSe
|
|||||||
name := C.CString(filename)
|
name := C.CString(filename)
|
||||||
defer C.free(unsafe.Pointer(name))
|
defer C.free(unsafe.Pointer(name))
|
||||||
|
|
||||||
pointer := C.open_fast_sek_file(name, C.int32_t(obidefault.ReadQualitiesShift()))
|
pointer := C.open_fast_sek_file(name, C.int32_t(obioptions.InputQualityShift()))
|
||||||
|
|
||||||
var err error
|
var err error
|
||||||
err = nil
|
err = nil
|
||||||
@@ -151,7 +151,7 @@ func ReadFastSeqFromStdin(options ...WithOption) obiiter.IBioSequence {
|
|||||||
}(newIter)
|
}(newIter)
|
||||||
|
|
||||||
go _FastseqReader(opt.Source(),
|
go _FastseqReader(opt.Source(),
|
||||||
C.open_fast_sek_stdin(C.int32_t(obidefault.ReadQualitiesShift())),
|
C.open_fast_sek_stdin(C.int32_t(obioptions.InputQualityShift())),
|
||||||
newIter, opt.BatchSize())
|
newIter, opt.BatchSize())
|
||||||
|
|
||||||
log.Debugln("Full file batch mode : ", opt.FullFileBatch())
|
log.Debugln("Full file batch mode : ", opt.FullFileBatch())
|
||||||
|
|||||||
@@ -7,7 +7,6 @@ import (
|
|||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
@@ -75,7 +74,7 @@ func FormatFasta(seq *obiseq.BioSequence, formater FormatHeader) string {
|
|||||||
// - skipEmpty: a boolean indicating whether empty sequences should be skipped or not.
|
// - skipEmpty: a boolean indicating whether empty sequences should be skipped or not.
|
||||||
//
|
//
|
||||||
// It returns a byte array containing the formatted sequences.
|
// It returns a byte array containing the formatted sequences.
|
||||||
func FormatFastaBatch(batch obiiter.BioSequenceBatch, formater FormatHeader, skipEmpty bool) *bytes.Buffer {
|
func FormatFastaBatch(batch obiiter.BioSequenceBatch, formater FormatHeader, skipEmpty bool) []byte {
|
||||||
// Create a buffer to store the formatted sequences
|
// Create a buffer to store the formatted sequences
|
||||||
var bs bytes.Buffer
|
var bs bytes.Buffer
|
||||||
|
|
||||||
@@ -115,7 +114,7 @@ func FormatFastaBatch(batch obiiter.BioSequenceBatch, formater FormatHeader, ski
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Return the byte array representation of the buffer
|
// Return the byte array representation of the buffer
|
||||||
return &bs
|
return bs.Bytes()
|
||||||
}
|
}
|
||||||
|
|
||||||
// WriteFasta writes a given iterator of bio sequences to a file in FASTA format.
|
// WriteFasta writes a given iterator of bio sequences to a file in FASTA format.
|
||||||
@@ -127,13 +126,14 @@ func WriteFasta(iterator obiiter.IBioSequence,
|
|||||||
options ...WithOption) (obiiter.IBioSequence, error) {
|
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
|
|
||||||
|
iterator = iterator.Rebatch(opt.BatchSize())
|
||||||
file, _ = obiutils.CompressStream(file, opt.CompressedFile(), opt.CloseFile())
|
file, _ = obiutils.CompressStream(file, opt.CompressedFile(), opt.CloseFile())
|
||||||
|
|
||||||
newIter := obiiter.MakeIBioSequence()
|
newIter := obiiter.MakeIBioSequence()
|
||||||
|
|
||||||
nwriters := opt.ParallelWorkers()
|
nwriters := opt.ParallelWorkers()
|
||||||
|
|
||||||
chunkchan := WriteFileChunk(file, opt.CloseFile())
|
chunkchan := WriteSeqFileChunk(file, opt.CloseFile())
|
||||||
|
|
||||||
header_format := opt.FormatFastSeqHeader()
|
header_format := opt.FormatFastSeqHeader()
|
||||||
|
|
||||||
@@ -141,11 +141,8 @@ func WriteFasta(iterator obiiter.IBioSequence,
|
|||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
newIter.WaitAndClose()
|
newIter.WaitAndClose()
|
||||||
for len(chunkchan) > 0 {
|
|
||||||
time.Sleep(time.Millisecond)
|
|
||||||
}
|
|
||||||
close(chunkchan)
|
close(chunkchan)
|
||||||
log.Debugf("Writing fasta file done")
|
log.Warnf("Writing fasta file done")
|
||||||
}()
|
}()
|
||||||
|
|
||||||
ff := func(iterator obiiter.IBioSequence) {
|
ff := func(iterator obiiter.IBioSequence) {
|
||||||
@@ -155,9 +152,9 @@ func WriteFasta(iterator obiiter.IBioSequence,
|
|||||||
|
|
||||||
log.Debugf("Formating fasta chunk %d", batch.Order())
|
log.Debugf("Formating fasta chunk %d", batch.Order())
|
||||||
|
|
||||||
chunkchan <- FileChunk{
|
chunkchan <- SeqFileChunk{
|
||||||
Source: batch.Source(),
|
Source: batch.Source(),
|
||||||
Raw: FormatFastaBatch(batch, header_format, opt.SkipEmptySequence()),
|
Raw: bytes.NewBuffer(FormatFastaBatch(batch, header_format, opt.SkipEmptySequence())),
|
||||||
Order: batch.Order(),
|
Order: batch.Order(),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -170,7 +167,7 @@ func WriteFasta(iterator obiiter.IBioSequence,
|
|||||||
|
|
||||||
log.Debugln("Start of the fasta file writing")
|
log.Debugln("Start of the fasta file writing")
|
||||||
go ff(iterator)
|
go ff(iterator)
|
||||||
for i := 1; i < nwriters; i++ {
|
for i := 0; i < nwriters-1; i++ {
|
||||||
go ff(iterator.Split())
|
go ff(iterator.Split())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -187,8 +184,7 @@ func WriteFasta(iterator obiiter.IBioSequence,
|
|||||||
// The function returns the same bio sequence iterator and an error if any occurred.
|
// The function returns the same bio sequence iterator and an error if any occurred.
|
||||||
func WriteFastaToStdout(iterator obiiter.IBioSequence,
|
func WriteFastaToStdout(iterator obiiter.IBioSequence,
|
||||||
options ...WithOption) (obiiter.IBioSequence, error) {
|
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
// options = append(options, OptionDontCloseFile())
|
options = append(options, OptionDontCloseFile())
|
||||||
options = append(options, OptionCloseFile())
|
|
||||||
return WriteFasta(iterator, os.Stdout, options...)
|
return WriteFasta(iterator, os.Stdout, options...)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"bytes"
|
"bytes"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
@@ -13,8 +14,6 @@ import (
|
|||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
)
|
)
|
||||||
|
|
||||||
type FormatSeqBatch func(batch obiiter.BioSequenceBatch, formater FormatHeader, skipEmpty bool) *bytes.Buffer
|
|
||||||
|
|
||||||
func _formatFastq(buff *bytes.Buffer, seq *obiseq.BioSequence, formater FormatHeader) {
|
func _formatFastq(buff *bytes.Buffer, seq *obiseq.BioSequence, formater FormatHeader) {
|
||||||
|
|
||||||
info := ""
|
info := ""
|
||||||
@@ -50,7 +49,7 @@ func FormatFastq(seq *obiseq.BioSequence, formater FormatHeader) string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func FormatFastqBatch(batch obiiter.BioSequenceBatch,
|
func FormatFastqBatch(batch obiiter.BioSequenceBatch,
|
||||||
formater FormatHeader, skipEmpty bool) *bytes.Buffer {
|
formater FormatHeader, skipEmpty bool) []byte {
|
||||||
var bs bytes.Buffer
|
var bs bytes.Buffer
|
||||||
|
|
||||||
lt := 0
|
lt := 0
|
||||||
@@ -83,7 +82,14 @@ func FormatFastqBatch(batch obiiter.BioSequenceBatch,
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return &bs
|
chunk := bs.Bytes()
|
||||||
|
|
||||||
|
return chunk
|
||||||
|
}
|
||||||
|
|
||||||
|
type FileChunk struct {
|
||||||
|
text []byte
|
||||||
|
order int
|
||||||
}
|
}
|
||||||
|
|
||||||
func WriteFastq(iterator obiiter.IBioSequence,
|
func WriteFastq(iterator obiiter.IBioSequence,
|
||||||
@@ -91,6 +97,7 @@ func WriteFastq(iterator obiiter.IBioSequence,
|
|||||||
options ...WithOption) (obiiter.IBioSequence, error) {
|
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
|
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
|
iterator = iterator.Rebatch(opt.BatchSize())
|
||||||
|
|
||||||
file, _ = obiutils.CompressStream(file, opt.CompressedFile(), opt.CloseFile())
|
file, _ = obiutils.CompressStream(file, opt.CompressedFile(), opt.CloseFile())
|
||||||
|
|
||||||
@@ -98,27 +105,29 @@ func WriteFastq(iterator obiiter.IBioSequence,
|
|||||||
|
|
||||||
nwriters := opt.ParallelWorkers()
|
nwriters := opt.ParallelWorkers()
|
||||||
|
|
||||||
chunkchan := WriteFileChunk(file, opt.CloseFile())
|
chunkchan := WriteSeqFileChunk(file, opt.CloseFile())
|
||||||
|
|
||||||
header_format := opt.FormatFastSeqHeader()
|
header_format := opt.FormatFastSeqHeader()
|
||||||
|
|
||||||
newIter.Add(nwriters)
|
newIter.Add(nwriters)
|
||||||
|
|
||||||
|
var waitWriter sync.WaitGroup
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
newIter.WaitAndClose()
|
newIter.WaitAndClose()
|
||||||
for len(chunkchan) > 0 {
|
for len(chunkchan) > 0 {
|
||||||
time.Sleep(time.Millisecond)
|
time.Sleep(time.Millisecond)
|
||||||
}
|
}
|
||||||
close(chunkchan)
|
close(chunkchan)
|
||||||
log.Debugf("Writing fastq file done")
|
waitWriter.Wait()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
ff := func(iterator obiiter.IBioSequence) {
|
ff := func(iterator obiiter.IBioSequence) {
|
||||||
for iterator.Next() {
|
for iterator.Next() {
|
||||||
batch := iterator.Get()
|
batch := iterator.Get()
|
||||||
chunk := FileChunk{
|
chunk := SeqFileChunk{
|
||||||
Source: batch.Source(),
|
Source: batch.Source(),
|
||||||
Raw: FormatFastqBatch(batch, header_format, opt.SkipEmptySequence()),
|
Raw: bytes.NewBuffer(FormatFastqBatch(batch, header_format, opt.SkipEmptySequence())),
|
||||||
Order: batch.Order(),
|
Order: batch.Order(),
|
||||||
}
|
}
|
||||||
chunkchan <- chunk
|
chunkchan <- chunk
|
||||||
@@ -129,7 +138,7 @@ func WriteFastq(iterator obiiter.IBioSequence,
|
|||||||
|
|
||||||
log.Debugln("Start of the fastq file writing")
|
log.Debugln("Start of the fastq file writing")
|
||||||
go ff(iterator)
|
go ff(iterator)
|
||||||
for i := 1; i < nwriters; i++ {
|
for i := 0; i < nwriters-1; i++ {
|
||||||
go ff(iterator.Split())
|
go ff(iterator.Split())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -138,9 +147,7 @@ func WriteFastq(iterator obiiter.IBioSequence,
|
|||||||
|
|
||||||
func WriteFastqToStdout(iterator obiiter.IBioSequence,
|
func WriteFastqToStdout(iterator obiiter.IBioSequence,
|
||||||
options ...WithOption) (obiiter.IBioSequence, error) {
|
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
// options = append(options, OptionDontCloseFile())
|
options = append(options, OptionDontCloseFile())
|
||||||
options = append(options, OptionCloseFile())
|
|
||||||
|
|
||||||
return WriteFastq(iterator, os.Stdout, options...)
|
return WriteFastq(iterator, os.Stdout, options...)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,61 +0,0 @@
|
|||||||
package obiformats
|
|
||||||
|
|
||||||
import (
|
|
||||||
"io"
|
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
|
||||||
)
|
|
||||||
|
|
||||||
func WriteFileChunk(
|
|
||||||
writer io.WriteCloser,
|
|
||||||
toBeClosed bool) ChannelFileChunk {
|
|
||||||
|
|
||||||
obiutils.RegisterAPipe()
|
|
||||||
chunk_channel := make(ChannelFileChunk)
|
|
||||||
|
|
||||||
go func() {
|
|
||||||
nextToPrint := 0
|
|
||||||
toBePrinted := make(map[int]FileChunk)
|
|
||||||
for chunk := range chunk_channel {
|
|
||||||
if chunk.Order == nextToPrint {
|
|
||||||
log.Debugf("Writing chunk: %d of length %d bytes",
|
|
||||||
chunk.Order,
|
|
||||||
len(chunk.Raw.Bytes()))
|
|
||||||
|
|
||||||
n, err := writer.Write(chunk.Raw.Bytes())
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("Cannot write chunk %d only %d bytes written on %d sended : %v",
|
|
||||||
chunk.Order, n, len(chunk.Raw.Bytes()), err)
|
|
||||||
}
|
|
||||||
nextToPrint++
|
|
||||||
|
|
||||||
chunk, ok := toBePrinted[nextToPrint]
|
|
||||||
for ok {
|
|
||||||
log.Debug("Writing buffered chunk : ", chunk.Order)
|
|
||||||
_, _ = writer.Write(chunk.Raw.Bytes())
|
|
||||||
delete(toBePrinted, nextToPrint)
|
|
||||||
nextToPrint++
|
|
||||||
chunk, ok = toBePrinted[nextToPrint]
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
toBePrinted[chunk.Order] = chunk
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Debugf("FIle have to be closed : %v", toBeClosed)
|
|
||||||
if toBeClosed {
|
|
||||||
err := writer.Close()
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("Cannot close the writer : %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
obiutils.UnregisterPipe()
|
|
||||||
log.Debugf("The writer has been closed")
|
|
||||||
}()
|
|
||||||
|
|
||||||
return chunk_channel
|
|
||||||
}
|
|
||||||
@@ -198,7 +198,7 @@ func GenbankChunkParser(withFeatureTable bool) func(string, io.Reader) (obiseq.B
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func _ParseGenbankFile(input ChannelFileChunk,
|
func _ParseGenbankFile(input ChannelSeqFileChunk,
|
||||||
out obiiter.IBioSequence,
|
out obiiter.IBioSequence,
|
||||||
withFeatureTable bool) {
|
withFeatureTable bool) {
|
||||||
|
|
||||||
@@ -223,9 +223,9 @@ func ReadGenbank(reader io.Reader, options ...WithOption) (obiiter.IBioSequence,
|
|||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
// entry_channel := make(chan _FileChunk)
|
// entry_channel := make(chan _FileChunk)
|
||||||
|
|
||||||
buff := make([]byte, 1024*1024*128) // 128 MB
|
buff := make([]byte, 1024*1024*1024*256)
|
||||||
|
|
||||||
entry_channel := ReadFileChunk(
|
entry_channel := ReadSeqFileChunk(
|
||||||
opt.Source(),
|
opt.Source(),
|
||||||
reader,
|
reader,
|
||||||
buff,
|
buff,
|
||||||
@@ -266,9 +266,9 @@ func ReadGenbankFromFile(filename string, options ...WithOption) (obiiter.IBioSe
|
|||||||
|
|
||||||
options = append(options, OptionsSource(obiutils.RemoveAllExt((path.Base(filename)))))
|
options = append(options, OptionsSource(obiutils.RemoveAllExt((path.Base(filename)))))
|
||||||
|
|
||||||
reader, err = obiutils.Ropen(filename)
|
reader, err = Ropen(filename)
|
||||||
|
|
||||||
if err == obiutils.ErrNoContent {
|
if err == ErrNoContent {
|
||||||
log.Infof("file %s is empty", filename)
|
log.Infof("file %s is empty", filename)
|
||||||
return ReadEmptyFile(options...)
|
return ReadEmptyFile(options...)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync/atomic"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/goccy/go-json"
|
"github.com/goccy/go-json"
|
||||||
@@ -58,17 +58,9 @@ func JSONRecord(sequence *obiseq.BioSequence) []byte {
|
|||||||
return text
|
return text
|
||||||
}
|
}
|
||||||
|
|
||||||
func FormatJSONBatch(batch obiiter.BioSequenceBatch) *bytes.Buffer {
|
func FormatJSONBatch(batch obiiter.BioSequenceBatch) []byte {
|
||||||
buff := new(bytes.Buffer)
|
buff := new(bytes.Buffer)
|
||||||
|
|
||||||
json := bufio.NewWriter(buff)
|
json := bufio.NewWriter(buff)
|
||||||
|
|
||||||
if batch.Order() == 0 {
|
|
||||||
json.WriteString("[\n")
|
|
||||||
} else {
|
|
||||||
json.WriteString(",\n")
|
|
||||||
}
|
|
||||||
|
|
||||||
n := batch.Slice().Len() - 1
|
n := batch.Slice().Len() - 1
|
||||||
for i, s := range batch.Slice() {
|
for i, s := range batch.Slice() {
|
||||||
json.WriteString(" ")
|
json.WriteString(" ")
|
||||||
@@ -79,36 +71,35 @@ func FormatJSONBatch(batch obiiter.BioSequenceBatch) *bytes.Buffer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
json.Flush()
|
json.Flush()
|
||||||
return buff
|
|
||||||
|
return buff.Bytes()
|
||||||
}
|
}
|
||||||
|
|
||||||
func WriteJSON(iterator obiiter.IBioSequence,
|
func WriteJSON(iterator obiiter.IBioSequence,
|
||||||
file io.WriteCloser,
|
file io.WriteCloser,
|
||||||
options ...WithOption) (obiiter.IBioSequence, error) {
|
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
var latestChunk atomic.Int64
|
|
||||||
|
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
|
|
||||||
file, _ = obiutils.CompressStream(file, opt.CompressedFile(), opt.CloseFile())
|
file, _ = obiutils.CompressStream(file, opt.CompressedFile(), opt.CloseFile())
|
||||||
|
|
||||||
newIter := obiiter.MakeIBioSequence()
|
newIter := obiiter.MakeIBioSequence()
|
||||||
|
|
||||||
nwriters := opt.ParallelWorkers()
|
nwriters := opt.ParallelWorkers()
|
||||||
|
|
||||||
chunkchan := WriteFileChunk(file, opt.CloseFile())
|
obiiter.RegisterAPipe()
|
||||||
|
chunkchan := make(chan FileChunk)
|
||||||
|
|
||||||
newIter.Add(nwriters)
|
newIter.Add(nwriters)
|
||||||
|
var waitWriter sync.WaitGroup
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
newIter.WaitAndClose()
|
newIter.WaitAndClose()
|
||||||
|
|
||||||
chunkchan <- FileChunk{
|
|
||||||
Source: "end",
|
|
||||||
Raw: bytes.NewBuffer([]byte("\n]\n")),
|
|
||||||
Order: int(latestChunk.Load()) + 1,
|
|
||||||
}
|
|
||||||
for len(chunkchan) > 0 {
|
for len(chunkchan) > 0 {
|
||||||
time.Sleep(time.Millisecond)
|
time.Sleep(time.Millisecond)
|
||||||
}
|
}
|
||||||
close(chunkchan)
|
close(chunkchan)
|
||||||
|
waitWriter.Wait()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
ff := func(iterator obiiter.IBioSequence) {
|
ff := func(iterator obiiter.IBioSequence) {
|
||||||
@@ -116,32 +107,62 @@ func WriteJSON(iterator obiiter.IBioSequence,
|
|||||||
|
|
||||||
batch := iterator.Get()
|
batch := iterator.Get()
|
||||||
|
|
||||||
ss := FileChunk{
|
chunkchan <- FileChunk{
|
||||||
Source: batch.Source(),
|
FormatJSONBatch(batch),
|
||||||
Raw: FormatJSONBatch(batch),
|
batch.Order(),
|
||||||
Order: batch.Order(),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
chunkchan <- ss
|
|
||||||
latestChunk.Store(int64(batch.Order()))
|
|
||||||
newIter.Push(batch)
|
newIter.Push(batch)
|
||||||
}
|
}
|
||||||
newIter.Done()
|
newIter.Done()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
next_to_send := 0
|
||||||
|
received := make(map[int]FileChunk, 100)
|
||||||
|
|
||||||
|
waitWriter.Add(1)
|
||||||
|
go func() {
|
||||||
|
for chunk := range chunkchan {
|
||||||
|
if chunk.order == next_to_send {
|
||||||
|
if next_to_send > 0 {
|
||||||
|
file.Write([]byte(",\n"))
|
||||||
|
}
|
||||||
|
file.Write(chunk.text)
|
||||||
|
next_to_send++
|
||||||
|
chunk, ok := received[next_to_send]
|
||||||
|
for ok {
|
||||||
|
file.Write(chunk.text)
|
||||||
|
delete(received, next_to_send)
|
||||||
|
next_to_send++
|
||||||
|
chunk, ok = received[next_to_send]
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
received[chunk.order] = chunk
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
file.Write([]byte("\n]\n"))
|
||||||
|
file.Close()
|
||||||
|
|
||||||
|
log.Debugln("End of the JSON file writing")
|
||||||
|
obiiter.UnregisterPipe()
|
||||||
|
waitWriter.Done()
|
||||||
|
|
||||||
|
}()
|
||||||
|
|
||||||
log.Debugln("Start of the JSON file writing")
|
log.Debugln("Start of the JSON file writing")
|
||||||
for i := 1; i < nwriters; i++ {
|
file.Write([]byte("[\n"))
|
||||||
|
go ff(iterator)
|
||||||
|
for i := 0; i < nwriters-1; i++ {
|
||||||
go ff(iterator.Split())
|
go ff(iterator.Split())
|
||||||
}
|
}
|
||||||
go ff(iterator)
|
|
||||||
|
|
||||||
return newIter, nil
|
return newIter, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func WriteJSONToStdout(iterator obiiter.IBioSequence,
|
func WriteJSONToStdout(iterator obiiter.IBioSequence,
|
||||||
options ...WithOption) (obiiter.IBioSequence, error) {
|
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
options = append(options, OptionCloseFile())
|
options = append(options, OptionDontCloseFile())
|
||||||
|
|
||||||
return WriteJSON(iterator, os.Stdout, options...)
|
return WriteJSON(iterator, os.Stdout, options...)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
169
pkg/obiformats/ncbitaxdump/read.go
Normal file
169
pkg/obiformats/ncbitaxdump/read.go
Normal file
@@ -0,0 +1,169 @@
|
|||||||
|
package ncbitaxdump
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"encoding/csv"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"path"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
|
)
|
||||||
|
|
||||||
|
func loadNodeTable(reader io.Reader, taxonomy *obitax.Taxonomy) {
|
||||||
|
file := csv.NewReader(reader)
|
||||||
|
file.Comma = '|'
|
||||||
|
file.Comment = '#'
|
||||||
|
file.TrimLeadingSpace = true
|
||||||
|
file.ReuseRecord = true
|
||||||
|
|
||||||
|
n := 0
|
||||||
|
|
||||||
|
for record, err := file.Read(); err == nil; record, err = file.Read() {
|
||||||
|
n++
|
||||||
|
taxid, err := strconv.Atoi(strings.TrimSpace(record[0]))
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Panicf("Cannot read taxon taxid at line %d: %v", n, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
parent, err := strconv.Atoi(strings.TrimSpace(record[1]))
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Panicf("Cannot read taxon parent taxid at line %d: %v", n, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
rank := strings.TrimSpace(record[2])
|
||||||
|
|
||||||
|
taxonomy.AddNewTaxa(taxid, parent, rank, true, true)
|
||||||
|
}
|
||||||
|
|
||||||
|
taxonomy.ReindexParent()
|
||||||
|
}
|
||||||
|
|
||||||
|
func loadNameTable(reader io.Reader, taxonomy *obitax.Taxonomy, onlysn bool) int {
|
||||||
|
// file := csv.NewReader(reader)
|
||||||
|
// file.Comma = '|'
|
||||||
|
// file.Comment = '#'
|
||||||
|
// file.TrimLeadingSpace = true
|
||||||
|
// file.ReuseRecord = true
|
||||||
|
// file.LazyQuotes = true
|
||||||
|
file := bufio.NewReader(reader)
|
||||||
|
|
||||||
|
n := 0
|
||||||
|
l := 0
|
||||||
|
|
||||||
|
for line, prefix, err := file.ReadLine(); err == nil; line, prefix, err = file.ReadLine() {
|
||||||
|
l++
|
||||||
|
if prefix {
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
|
||||||
|
record := strings.Split(string(line), "|")
|
||||||
|
taxid, err := strconv.Atoi(strings.TrimSpace(record[0]))
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Panicf("Cannot read taxon name taxid at line %d: %v", l, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
name := strings.TrimSpace(record[1])
|
||||||
|
classname := strings.TrimSpace(record[3])
|
||||||
|
|
||||||
|
if !onlysn || classname == "scientific name" {
|
||||||
|
n++
|
||||||
|
taxonomy.AddNewName(taxid, &name, &classname)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return n
|
||||||
|
}
|
||||||
|
|
||||||
|
func loadMergedTable(reader io.Reader, taxonomy *obitax.Taxonomy) int {
|
||||||
|
file := csv.NewReader(reader)
|
||||||
|
file.Comma = '|'
|
||||||
|
file.Comment = '#'
|
||||||
|
file.TrimLeadingSpace = true
|
||||||
|
file.ReuseRecord = true
|
||||||
|
|
||||||
|
n := 0
|
||||||
|
|
||||||
|
for record, err := file.Read(); err == nil; record, err = file.Read() {
|
||||||
|
n++
|
||||||
|
oldtaxid, err := strconv.Atoi(strings.TrimSpace(record[0]))
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Panicf("Cannot read alias taxid at line %d: %v", n, err)
|
||||||
|
}
|
||||||
|
newtaxid, err := strconv.Atoi(strings.TrimSpace(record[1]))
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Panicf("Cannot read alias new taxid at line %d: %v", n, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
taxonomy.AddNewAlias(newtaxid, oldtaxid)
|
||||||
|
}
|
||||||
|
|
||||||
|
return n
|
||||||
|
}
|
||||||
|
|
||||||
|
func LoadNCBITaxDump(directory string, onlysn bool) (*obitax.Taxonomy, error) {
|
||||||
|
|
||||||
|
taxonomy := obitax.NewTaxonomy()
|
||||||
|
|
||||||
|
//
|
||||||
|
// Load the Taxonomy nodes
|
||||||
|
//
|
||||||
|
|
||||||
|
log.Printf("Loading Taxonomy nodes\n")
|
||||||
|
|
||||||
|
nodefile, err := os.Open(path.Join(directory, "nodes.dmp"))
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("cannot open nodes file from '%s'",
|
||||||
|
directory)
|
||||||
|
}
|
||||||
|
defer nodefile.Close()
|
||||||
|
|
||||||
|
buffered := bufio.NewReader(nodefile)
|
||||||
|
loadNodeTable(buffered, taxonomy)
|
||||||
|
log.Printf("%d Taxonomy nodes read\n", taxonomy.Len())
|
||||||
|
|
||||||
|
//
|
||||||
|
// Load the Taxonomy nodes
|
||||||
|
//
|
||||||
|
|
||||||
|
log.Printf("Loading Taxon names\n")
|
||||||
|
|
||||||
|
namefile, nerr := os.Open(path.Join(directory, "names.dmp"))
|
||||||
|
if nerr != nil {
|
||||||
|
return nil, fmt.Errorf("cannot open names file from '%s'",
|
||||||
|
directory)
|
||||||
|
}
|
||||||
|
defer namefile.Close()
|
||||||
|
|
||||||
|
n := loadNameTable(namefile, taxonomy, onlysn)
|
||||||
|
log.Printf("%d taxon names read\n", n)
|
||||||
|
|
||||||
|
//
|
||||||
|
// Load the merged taxa
|
||||||
|
//
|
||||||
|
|
||||||
|
log.Printf("Loading Merged taxa\n")
|
||||||
|
|
||||||
|
aliasfile, aerr := os.Open(path.Join(directory, "merged.dmp"))
|
||||||
|
if aerr != nil {
|
||||||
|
return nil, fmt.Errorf("cannot open merged file from '%s'",
|
||||||
|
directory)
|
||||||
|
}
|
||||||
|
defer aliasfile.Close()
|
||||||
|
|
||||||
|
buffered = bufio.NewReader(aliasfile)
|
||||||
|
n = loadMergedTable(buffered, taxonomy)
|
||||||
|
log.Printf("%d merged taxa read\n", n)
|
||||||
|
|
||||||
|
return taxonomy, nil
|
||||||
|
}
|
||||||
@@ -536,24 +536,6 @@ var library_parameter = map[string]func(library *obingslibrary.NGSLibrary, value
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
// ReadCSVNGSFilter reads an NGS filter configuration from a CSV file and returns
|
|
||||||
// an NGSLibrary. The CSV file must include columns for 'experiment', 'sample',
|
|
||||||
// 'sample_tag', 'forward_primer', and 'reverse_primer'. Additional columns are
|
|
||||||
// used to annotate PCR samples.
|
|
||||||
//
|
|
||||||
// Parameters:
|
|
||||||
// - reader: an io.Reader providing the CSV input.
|
|
||||||
//
|
|
||||||
// Returns:
|
|
||||||
// - A pointer to an NGSLibrary populated with the data from the CSV file.
|
|
||||||
// - An error if the CSV is malformed or required columns are missing.
|
|
||||||
//
|
|
||||||
// The function processes both data records and parameter lines starting with
|
|
||||||
// '@param'. Parameter lines configure various aspects of the library.
|
|
||||||
//
|
|
||||||
// Each row in the CSV is validated to ensure it has the correct number of columns.
|
|
||||||
// Duplicate tag pairs for the same marker result in an error. Primer unicity is
|
|
||||||
// checked, and any unknown parameters are logged as warnings.
|
|
||||||
func ReadCSVNGSFilter(reader io.Reader) (*obingslibrary.NGSLibrary, error) {
|
func ReadCSVNGSFilter(reader io.Reader) (*obingslibrary.NGSLibrary, error) {
|
||||||
ngsfilter := obingslibrary.MakeNGSLibrary()
|
ngsfilter := obingslibrary.MakeNGSLibrary()
|
||||||
file := csv.NewReader(reader)
|
file := csv.NewReader(reader)
|
||||||
@@ -594,7 +576,6 @@ func ReadCSVNGSFilter(reader io.Reader) (*obingslibrary.NGSLibrary, error) {
|
|||||||
extraColumns := make([]int, 0)
|
extraColumns := make([]int, 0)
|
||||||
|
|
||||||
for i, colName := range header {
|
for i, colName := range header {
|
||||||
|
|
||||||
switch colName {
|
switch colName {
|
||||||
case "experiment":
|
case "experiment":
|
||||||
experimentColIndex = i
|
experimentColIndex = i
|
||||||
@@ -661,8 +642,6 @@ func ReadCSVNGSFilter(reader io.Reader) (*obingslibrary.NGSLibrary, error) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ngsfilter.CheckPrimerUnicity()
|
|
||||||
|
|
||||||
for i := 0; i < len(params); i++ {
|
for i := 0; i < len(params); i++ {
|
||||||
param := params[i][1]
|
param := params[i][1]
|
||||||
if len(params[i]) < 3 {
|
if len(params[i]) < 3 {
|
||||||
|
|||||||
@@ -1,14 +1,13 @@
|
|||||||
package obiformats
|
package obiformats
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
)
|
)
|
||||||
|
|
||||||
type __options__ struct {
|
type __options__ struct {
|
||||||
fastseq_header_parser obiseq.SeqAnnotator
|
fastseq_header_parser obiseq.SeqAnnotator
|
||||||
fastseq_header_writer BioSequenceFormater
|
fastseq_header_writer func(*obiseq.BioSequence) string
|
||||||
seqBatchFormater FormatSeqBatch
|
|
||||||
with_progress_bar bool
|
with_progress_bar bool
|
||||||
buffer_size int
|
buffer_size int
|
||||||
batch_size int
|
batch_size int
|
||||||
@@ -20,7 +19,6 @@ type __options__ struct {
|
|||||||
appendfile bool
|
appendfile bool
|
||||||
compressed bool
|
compressed bool
|
||||||
skip_empty bool
|
skip_empty bool
|
||||||
with_quality bool
|
|
||||||
csv_id bool
|
csv_id bool
|
||||||
csv_sequence bool
|
csv_sequence bool
|
||||||
csv_quality bool
|
csv_quality bool
|
||||||
@@ -46,11 +44,10 @@ func MakeOptions(setters []WithOption) Options {
|
|||||||
o := __options__{
|
o := __options__{
|
||||||
fastseq_header_parser: ParseGuessedFastSeqHeader,
|
fastseq_header_parser: ParseGuessedFastSeqHeader,
|
||||||
fastseq_header_writer: FormatFastSeqJsonHeader,
|
fastseq_header_writer: FormatFastSeqJsonHeader,
|
||||||
seqBatchFormater: nil,
|
|
||||||
with_progress_bar: false,
|
with_progress_bar: false,
|
||||||
buffer_size: 2,
|
buffer_size: 2,
|
||||||
parallel_workers: obidefault.ReadParallelWorkers(),
|
parallel_workers: obioptions.CLIReadParallelWorkers(),
|
||||||
batch_size: obidefault.BatchSize(),
|
batch_size: obioptions.CLIBatchSize(),
|
||||||
total_seq_size: 1024 * 1024 * 100, // 100 MB by default
|
total_seq_size: 1024 * 1024 * 100, // 100 MB by default
|
||||||
no_order: false,
|
no_order: false,
|
||||||
full_file_batch: false,
|
full_file_batch: false,
|
||||||
@@ -58,7 +55,6 @@ func MakeOptions(setters []WithOption) Options {
|
|||||||
appendfile: false,
|
appendfile: false,
|
||||||
compressed: false,
|
compressed: false,
|
||||||
skip_empty: false,
|
skip_empty: false,
|
||||||
with_quality: true,
|
|
||||||
csv_id: true,
|
csv_id: true,
|
||||||
csv_definition: false,
|
csv_definition: false,
|
||||||
csv_count: false,
|
csv_count: false,
|
||||||
@@ -107,10 +103,6 @@ func (opt Options) FormatFastSeqHeader() func(*obiseq.BioSequence) string {
|
|||||||
return opt.pointer.fastseq_header_writer
|
return opt.pointer.fastseq_header_writer
|
||||||
}
|
}
|
||||||
|
|
||||||
func (opt Options) SequenceFormater() FormatSeqBatch {
|
|
||||||
return opt.pointer.seqBatchFormater
|
|
||||||
}
|
|
||||||
|
|
||||||
func (opt Options) NoOrder() bool {
|
func (opt Options) NoOrder() bool {
|
||||||
return opt.pointer.no_order
|
return opt.pointer.no_order
|
||||||
}
|
}
|
||||||
@@ -135,10 +127,6 @@ func (opt Options) SkipEmptySequence() bool {
|
|||||||
return opt.pointer.skip_empty
|
return opt.pointer.skip_empty
|
||||||
}
|
}
|
||||||
|
|
||||||
func (opt Options) ReadQualities() bool {
|
|
||||||
return opt.pointer.with_quality
|
|
||||||
}
|
|
||||||
|
|
||||||
func (opt Options) CSVId() bool {
|
func (opt Options) CSVId() bool {
|
||||||
return opt.pointer.csv_id
|
return opt.pointer.csv_id
|
||||||
}
|
}
|
||||||
@@ -231,6 +219,8 @@ func OptionNoOrder(no_order bool) WithOption {
|
|||||||
return f
|
return f
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
func OptionsCompressed(compressed bool) WithOption {
|
func OptionsCompressed(compressed bool) WithOption {
|
||||||
f := WithOption(func(opt Options) {
|
f := WithOption(func(opt Options) {
|
||||||
opt.pointer.compressed = compressed
|
opt.pointer.compressed = compressed
|
||||||
@@ -247,14 +237,6 @@ func OptionsSkipEmptySequence(skip bool) WithOption {
|
|||||||
return f
|
return f
|
||||||
}
|
}
|
||||||
|
|
||||||
func OptionsReadQualities(read bool) WithOption {
|
|
||||||
f := WithOption(func(opt Options) {
|
|
||||||
opt.pointer.with_quality = read
|
|
||||||
})
|
|
||||||
|
|
||||||
return f
|
|
||||||
}
|
|
||||||
|
|
||||||
func OptionsNewFile() WithOption {
|
func OptionsNewFile() WithOption {
|
||||||
f := WithOption(func(opt Options) {
|
f := WithOption(func(opt Options) {
|
||||||
opt.pointer.appendfile = false
|
opt.pointer.appendfile = false
|
||||||
@@ -289,14 +271,6 @@ func OptionsFastSeqHeaderFormat(format func(*obiseq.BioSequence) string) WithOpt
|
|||||||
return f
|
return f
|
||||||
}
|
}
|
||||||
|
|
||||||
func OptionsSequenceFormater(formater FormatSeqBatch) WithOption {
|
|
||||||
f := WithOption(func(opt Options) {
|
|
||||||
opt.pointer.seqBatchFormater = formater
|
|
||||||
})
|
|
||||||
|
|
||||||
return f
|
|
||||||
}
|
|
||||||
|
|
||||||
func OptionsParallelWorkers(nworkers int) WithOption {
|
func OptionsParallelWorkers(nworkers int) WithOption {
|
||||||
f := WithOption(func(opt Options) {
|
f := WithOption(func(opt Options) {
|
||||||
opt.pointer.parallel_workers = nworkers
|
opt.pointer.parallel_workers = nworkers
|
||||||
|
|||||||
@@ -9,15 +9,17 @@ import (
|
|||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var _FileChunkSize = 1024 * 1024 * 10
|
||||||
|
|
||||||
type SeqFileChunkParser func(string, io.Reader) (obiseq.BioSequenceSlice, error)
|
type SeqFileChunkParser func(string, io.Reader) (obiseq.BioSequenceSlice, error)
|
||||||
|
|
||||||
type FileChunk struct {
|
type SeqFileChunk struct {
|
||||||
Source string
|
Source string
|
||||||
Raw *bytes.Buffer
|
Raw *bytes.Buffer
|
||||||
Order int
|
Order int
|
||||||
}
|
}
|
||||||
|
|
||||||
type ChannelFileChunk chan FileChunk
|
type ChannelSeqFileChunk chan SeqFileChunk
|
||||||
|
|
||||||
type LastSeqRecord func([]byte) int
|
type LastSeqRecord func([]byte) int
|
||||||
|
|
||||||
@@ -34,17 +36,15 @@ type LastSeqRecord func([]byte) int
|
|||||||
//
|
//
|
||||||
// Returns:
|
// Returns:
|
||||||
// None
|
// None
|
||||||
func ReadFileChunk(
|
func ReadSeqFileChunk(
|
||||||
source string,
|
source string,
|
||||||
reader io.Reader,
|
reader io.Reader,
|
||||||
buff []byte,
|
buff []byte,
|
||||||
splitter LastSeqRecord) ChannelFileChunk {
|
splitter LastSeqRecord) ChannelSeqFileChunk {
|
||||||
var err error
|
var err error
|
||||||
var fullbuff []byte
|
var fullbuff []byte
|
||||||
|
|
||||||
chunk_channel := make(ChannelFileChunk)
|
chunk_channel := make(ChannelSeqFileChunk)
|
||||||
|
|
||||||
fileChunkSize := len(buff)
|
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
size := 0
|
size := 0
|
||||||
@@ -71,13 +71,11 @@ func ReadFileChunk(
|
|||||||
// Read from the reader in 1 MB increments until the end of the last entry is found
|
// Read from the reader in 1 MB increments until the end of the last entry is found
|
||||||
for end = splitter(buff); err == nil && end < 0; end = splitter(buff) {
|
for end = splitter(buff); err == nil && end < 0; end = splitter(buff) {
|
||||||
ic++
|
ic++
|
||||||
buff = slices.Grow(buff, fileChunkSize)
|
buff = slices.Grow(buff, _FileChunkSize)
|
||||||
l := len(buff)
|
l := len(buff)
|
||||||
extbuff := buff[l:(l + fileChunkSize - 1)]
|
extbuff := buff[l:(l + _FileChunkSize - 1)]
|
||||||
size, err = io.ReadFull(reader, extbuff)
|
size, err = io.ReadFull(reader, extbuff)
|
||||||
buff = buff[0:(l + size)]
|
buff = buff[0:(l + size)]
|
||||||
// log.Warnf("Splitter not found, attempting %d to read in %d B increments : len(buff) = %d/%d", ic, fileChunkSize, len(extbuff), len(buff))
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fullbuff = buff
|
fullbuff = buff
|
||||||
@@ -95,10 +93,8 @@ func ReadFileChunk(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if len(buff) > 0 {
|
if len(buff) > 0 {
|
||||||
cbuff := slices.Clone(buff)
|
io := bytes.NewBuffer(slices.Clone(buff))
|
||||||
io := bytes.NewBuffer(cbuff)
|
chunk_channel <- SeqFileChunk{source, io, i}
|
||||||
// log.Warnf("chuck %d :Read %d bytes from file %s", i, io.Len(), source)
|
|
||||||
chunk_channel <- FileChunk{source, io, i}
|
|
||||||
i++
|
i++
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -122,7 +118,7 @@ func ReadFileChunk(
|
|||||||
// Send the last chunk to the channel
|
// Send the last chunk to the channel
|
||||||
if len(buff) > 0 {
|
if len(buff) > 0 {
|
||||||
io := bytes.NewBuffer(slices.Clone(buff))
|
io := bytes.NewBuffer(slices.Clone(buff))
|
||||||
chunk_channel <- FileChunk{source, io, i}
|
chunk_channel <- SeqFileChunk{source, io, i}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Close the readers channel when the end of the file is reached
|
// Close the readers channel when the end of the file is reached
|
||||||
51
pkg/obiformats/seqfile_chunk_write.go
Normal file
51
pkg/obiformats/seqfile_chunk_write.go
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
package obiformats
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
)
|
||||||
|
|
||||||
|
func WriteSeqFileChunk(
|
||||||
|
writer io.WriteCloser,
|
||||||
|
toBeClosed bool) ChannelSeqFileChunk {
|
||||||
|
|
||||||
|
obiiter.RegisterAPipe()
|
||||||
|
|
||||||
|
chunk_channel := make(ChannelSeqFileChunk)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
nextToPrint := 0
|
||||||
|
toBePrinted := make(map[int]SeqFileChunk)
|
||||||
|
for chunk := range chunk_channel {
|
||||||
|
if chunk.Order == nextToPrint {
|
||||||
|
_, _ = writer.Write(chunk.Raw.Bytes())
|
||||||
|
nextToPrint++
|
||||||
|
|
||||||
|
chunk, ok := toBePrinted[nextToPrint]
|
||||||
|
for ok {
|
||||||
|
_, _ = writer.Write(chunk.Raw.Bytes())
|
||||||
|
delete(toBePrinted, nextToPrint)
|
||||||
|
nextToPrint++
|
||||||
|
chunk, ok = toBePrinted[nextToPrint]
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
toBePrinted[chunk.Order] = chunk
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if toBeClosed {
|
||||||
|
err := writer.Close()
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Cannot close the writer : %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
obiiter.UnregisterPipe()
|
||||||
|
log.Warnf("The writer has been closed")
|
||||||
|
}()
|
||||||
|
|
||||||
|
return chunk_channel
|
||||||
|
}
|
||||||
@@ -3,8 +3,6 @@ package obiformats
|
|||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
"bytes"
|
"bytes"
|
||||||
"encoding/csv"
|
|
||||||
"errors"
|
|
||||||
"io"
|
"io"
|
||||||
"path"
|
"path"
|
||||||
"regexp"
|
"regexp"
|
||||||
@@ -41,38 +39,13 @@ type SequenceReader func(reader io.Reader, options ...WithOption) (obiiter.IBioS
|
|||||||
// - io.Reader: A modified reader with the read data.
|
// - io.Reader: A modified reader with the read data.
|
||||||
// - error: Any error encountered during the process.
|
// - error: Any error encountered during the process.
|
||||||
func OBIMimeTypeGuesser(stream io.Reader) (*mimetype.MIME, io.Reader, error) {
|
func OBIMimeTypeGuesser(stream io.Reader) (*mimetype.MIME, io.Reader, error) {
|
||||||
csv := func(in []byte, limit uint32) bool {
|
|
||||||
in = dropLastLine(in, limit)
|
|
||||||
|
|
||||||
br := bytes.NewReader(in)
|
|
||||||
r := csv.NewReader(br)
|
|
||||||
r.Comma = ','
|
|
||||||
r.ReuseRecord = true
|
|
||||||
r.LazyQuotes = true
|
|
||||||
r.Comment = '#'
|
|
||||||
|
|
||||||
lines := 0
|
|
||||||
for {
|
|
||||||
_, err := r.Read()
|
|
||||||
if errors.Is(err, io.EOF) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
if err != nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
lines++
|
|
||||||
}
|
|
||||||
|
|
||||||
return r.FieldsPerRecord > 1 && lines > 1
|
|
||||||
}
|
|
||||||
|
|
||||||
fastaDetector := func(raw []byte, limit uint32) bool {
|
fastaDetector := func(raw []byte, limit uint32) bool {
|
||||||
ok, err := regexp.Match("^>[^ ]", raw)
|
ok, err := regexp.Match("^>[^ ]", raw)
|
||||||
return ok && err == nil
|
return ok && err == nil
|
||||||
}
|
}
|
||||||
|
|
||||||
fastqDetector := func(raw []byte, limit uint32) bool {
|
fastqDetector := func(raw []byte, limit uint32) bool {
|
||||||
ok, err := regexp.Match("^@[^ ].*\n[A-Za-z.-]+", raw)
|
ok, err := regexp.Match("^@[^ ].*\n[^ ]+\n\\+", raw)
|
||||||
return ok && err == nil
|
return ok && err == nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -97,17 +70,15 @@ func OBIMimeTypeGuesser(stream io.Reader) (*mimetype.MIME, io.Reader, error) {
|
|||||||
mimetype.Lookup("text/plain").Extend(ecoPCR2Detector, "text/ecopcr2", ".ecopcr")
|
mimetype.Lookup("text/plain").Extend(ecoPCR2Detector, "text/ecopcr2", ".ecopcr")
|
||||||
mimetype.Lookup("text/plain").Extend(genbankDetector, "text/genbank", ".seq")
|
mimetype.Lookup("text/plain").Extend(genbankDetector, "text/genbank", ".seq")
|
||||||
mimetype.Lookup("text/plain").Extend(emblDetector, "text/embl", ".dat")
|
mimetype.Lookup("text/plain").Extend(emblDetector, "text/embl", ".dat")
|
||||||
mimetype.Lookup("text/plain").Extend(csv, "text/csv", ".csv")
|
|
||||||
|
|
||||||
mimetype.Lookup("application/octet-stream").Extend(fastaDetector, "text/fasta", ".fasta")
|
mimetype.Lookup("application/octet-stream").Extend(fastaDetector, "text/fasta", ".fasta")
|
||||||
mimetype.Lookup("application/octet-stream").Extend(fastqDetector, "text/fastq", ".fastq")
|
mimetype.Lookup("application/octet-stream").Extend(fastqDetector, "text/fastq", ".fastq")
|
||||||
mimetype.Lookup("application/octet-stream").Extend(ecoPCR2Detector, "text/ecopcr2", ".ecopcr")
|
mimetype.Lookup("application/octet-stream").Extend(ecoPCR2Detector, "text/ecopcr2", ".ecopcr")
|
||||||
mimetype.Lookup("application/octet-stream").Extend(genbankDetector, "text/genbank", ".seq")
|
mimetype.Lookup("application/octet-stream").Extend(genbankDetector, "text/genbank", ".seq")
|
||||||
mimetype.Lookup("application/octet-stream").Extend(emblDetector, "text/embl", ".dat")
|
mimetype.Lookup("application/octet-stream").Extend(emblDetector, "text/embl", ".dat")
|
||||||
mimetype.Lookup("application/octet-stream").Extend(csv, "text/csv", ".csv")
|
|
||||||
|
|
||||||
// Create a buffer to store the read data
|
// Create a buffer to store the read data
|
||||||
buf := make([]byte, 1024*1024)
|
buf := make([]byte, 1024*128)
|
||||||
n, err := io.ReadFull(stream, buf)
|
n, err := io.ReadFull(stream, buf)
|
||||||
|
|
||||||
if err != nil && err != io.ErrUnexpectedEOF {
|
if err != nil && err != io.ErrUnexpectedEOF {
|
||||||
@@ -116,7 +87,6 @@ func OBIMimeTypeGuesser(stream io.Reader) (*mimetype.MIME, io.Reader, error) {
|
|||||||
|
|
||||||
// Detect the MIME type using the mimetype library
|
// Detect the MIME type using the mimetype library
|
||||||
mimeType := mimetype.Detect(buf)
|
mimeType := mimetype.Detect(buf)
|
||||||
|
|
||||||
if mimeType == nil {
|
if mimeType == nil {
|
||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
}
|
}
|
||||||
@@ -172,15 +142,15 @@ func OBIMimeTypeGuesser(stream io.Reader) (*mimetype.MIME, io.Reader, error) {
|
|||||||
// - error: An error if any occurred during the reading process.
|
// - error: An error if any occurred during the reading process.
|
||||||
func ReadSequencesFromFile(filename string,
|
func ReadSequencesFromFile(filename string,
|
||||||
options ...WithOption) (obiiter.IBioSequence, error) {
|
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
var file *obiutils.Reader
|
var file *Reader
|
||||||
var reader io.Reader
|
var reader io.Reader
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
options = append(options, OptionsSource(obiutils.RemoveAllExt((path.Base(filename)))))
|
options = append(options, OptionsSource(obiutils.RemoveAllExt((path.Base(filename)))))
|
||||||
|
|
||||||
file, err = obiutils.Ropen(filename)
|
file, err = Ropen(filename)
|
||||||
|
|
||||||
if err == obiutils.ErrNoContent {
|
if err == ErrNoContent {
|
||||||
log.Infof("file %s is empty", filename)
|
log.Infof("file %s is empty", filename)
|
||||||
return ReadEmptyFile(options...)
|
return ReadEmptyFile(options...)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -45,8 +45,7 @@ func WriteSequence(iterator obiiter.IBioSequence,
|
|||||||
|
|
||||||
func WriteSequencesToStdout(iterator obiiter.IBioSequence,
|
func WriteSequencesToStdout(iterator obiiter.IBioSequence,
|
||||||
options ...WithOption) (obiiter.IBioSequence, error) {
|
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
// options = append(options, OptionDontCloseFile())
|
options = append(options, OptionDontCloseFile())
|
||||||
options = append(options, OptionCloseFile())
|
|
||||||
return WriteSequence(iterator, os.Stdout, options...)
|
return WriteSequence(iterator, os.Stdout, options...)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
// Ropen opens a (possibly gzipped) file/process/http site for buffered reading.
|
// Ropen opens a (possibly gzipped) file/process/http site for buffered reading.
|
||||||
// Wopen opens a (possibly gzipped) file for buffered writing.
|
// Wopen opens a (possibly gzipped) file for buffered writing.
|
||||||
// Both will use gzip when appropriate and will user buffered IO.
|
// Both will use gzip when appropriate and will user buffered IO.
|
||||||
package obiutils
|
package obiformats
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package obiutils
|
package obiformats
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user