mirror of
https://github.com/metabarcoding/obitools4.git
synced 2026-03-25 21:40:52 +00:00
Compare commits
43 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7b23314651 | ||
|
|
1e541eac4c | ||
|
|
13cd4c86ac | ||
|
|
75dd535201 | ||
|
|
573acafafc | ||
|
|
0067152c2b | ||
|
|
791d253edc | ||
|
|
6245d7f684 | ||
|
|
13d610aff7 | ||
|
|
db284f1d44 | ||
|
|
51b3e83d32 | ||
|
|
8671285d02 | ||
|
|
51d11aa36d | ||
|
|
fb6f857d8c | ||
|
|
d4209b4549 | ||
|
|
ef05d4975f | ||
|
|
4588bf8b5d | ||
|
|
090633850d | ||
|
|
15a058cf63 | ||
|
|
2f5f7634d6 | ||
|
|
48138b605c | ||
|
|
aed22c12a6 | ||
|
|
443a9b3ce3 | ||
|
|
7e90537379 | ||
|
|
d3d15acc6c | ||
|
|
bd4a0b5ca5 | ||
|
|
952f85f312 | ||
|
|
4774438644 | ||
|
|
6a8061cc4f | ||
|
|
e2563cd8df | ||
|
|
f2e81adf95 | ||
|
|
f27e9bc91e | ||
|
|
773e54965d | ||
|
|
ceca33998b | ||
|
|
b9bee5f426 | ||
|
|
c10df073a7 | ||
|
|
d3dac1b21f | ||
|
|
0df082da06 | ||
|
|
2452aef7a9 | ||
|
|
337954592d | ||
|
|
8a28c9ae7c | ||
|
|
b6b18c0fa1 | ||
|
|
67e2758d63 |
19
.github/workflows/obitest.yml
vendored
Normal file
19
.github/workflows/obitest.yml
vendored
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
name: "Run the obitools command test suite"
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
- V*
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Setup Go
|
||||||
|
uses: actions/setup-go@v2
|
||||||
|
with:
|
||||||
|
go-version: '1.23'
|
||||||
|
- name: Checkout obitools4 project
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
- name: Run tests
|
||||||
|
run: make githubtests
|
||||||
153
.gitignore
vendored
153
.gitignore
vendored
@@ -1,134 +1,27 @@
|
|||||||
cpu.pprof
|
**/cpu.pprof
|
||||||
cpu.trace
|
**/cpu.trace
|
||||||
test
|
**/test
|
||||||
bin
|
**/bin
|
||||||
vendor
|
**/vendor
|
||||||
*.fastq
|
**/*.fastq
|
||||||
*.fasta
|
**/*.fasta
|
||||||
*.fastq.gz
|
**/*.fastq.gz
|
||||||
*.fasta.gz
|
**/*.fasta.gz
|
||||||
.DS_Store
|
**/.DS_Store
|
||||||
*.gml
|
**/*.gml
|
||||||
*.log
|
**/*.log
|
||||||
/argaly
|
**/xxx*
|
||||||
|
**/*.sav
|
||||||
|
**/*.old
|
||||||
|
**/*.tgz
|
||||||
|
**/*.yaml
|
||||||
|
**/*.csv
|
||||||
|
|
||||||
/obiconvert
|
.rhistory
|
||||||
/obicount
|
/.vscode
|
||||||
/obimultiplex
|
|
||||||
/obipairing
|
|
||||||
/obipcr
|
|
||||||
/obifind
|
|
||||||
/obidistribute
|
|
||||||
/obiuniq
|
|
||||||
/build
|
/build
|
||||||
/Makefile.old
|
|
||||||
.Rproj.user
|
|
||||||
obitools.Rproj
|
|
||||||
Stat_error.knit.md
|
|
||||||
.Rhistory
|
|
||||||
Stat_error.nb.html
|
|
||||||
Stat_error.Rmd
|
|
||||||
|
|
||||||
/.luarc.json
|
/ncbitaxo
|
||||||
/doc/TAXO/
|
|
||||||
/doc/results/
|
|
||||||
/doc/_main.log
|
|
||||||
/doc/_book/_main.tex
|
|
||||||
/doc/_freeze/
|
|
||||||
/doc/tutorial_files/
|
|
||||||
/doc/wolf_data/
|
|
||||||
/taxdump/
|
|
||||||
/.vscode/
|
|
||||||
|
|
||||||
/Algo-Alignement.numbers
|
!/obitests/**
|
||||||
/Estimate_proba_true_seq.html
|
!/sample/**
|
||||||
/Estimate_proba_true_seq.nb.html
|
|
||||||
/Estimate_proba_true_seq.Rmd
|
|
||||||
/modele_error_euka.qmd
|
|
||||||
/obitools.code-workspace
|
|
||||||
.DS_Store
|
|
||||||
.RData
|
|
||||||
x
|
|
||||||
xxx
|
|
||||||
y
|
|
||||||
/doc/wolf_diet.tgz
|
|
||||||
/doc/man/depends
|
|
||||||
/sample/wolf_R1.fasta.gz
|
|
||||||
/sample/wolf_R2.fasta.gz
|
|
||||||
/sample/euka03.ecotag.fasta.gz
|
|
||||||
/sample/ratio.csv
|
|
||||||
/sample/STD_PLN_1.dat
|
|
||||||
/sample/STD_PLN_2.dat
|
|
||||||
/sample/subset_Pasvik_R1.fastq.gz
|
|
||||||
/sample/subset_Pasvik_R2.fastq.gz
|
|
||||||
/sample/test_gobitools.fasta.bz2
|
|
||||||
euka03.csv*
|
|
||||||
gbbct793.seq.gz
|
|
||||||
gbinv1003.seq.gz
|
|
||||||
gbpln210.seq
|
|
||||||
/doc/book/OBITools-V4.aux
|
|
||||||
/doc/book/OBITools-V4.fdb_latexmk
|
|
||||||
/doc/book/OBITools-V4.fls
|
|
||||||
/doc/book/OBITools-V4.log
|
|
||||||
/doc/book/OBITools-V4.pdf
|
|
||||||
/doc/book/OBITools-V4.synctex.gz
|
|
||||||
/doc/book/OBITools-V4.tex
|
|
||||||
/doc/book/OBITools-V4.toc
|
|
||||||
getoptions.adoc
|
|
||||||
Archive.zip
|
|
||||||
.DS_Store
|
|
||||||
sample/.DS_Store
|
|
||||||
sample/consensus_graphs/specimen_hac_plants_Vern_disicolor_.gml
|
|
||||||
93954
|
|
||||||
Bact03.e5.gb_R254.obipcr.idx.fasta.save
|
|
||||||
sample/test.obipcr.log
|
|
||||||
Bact02.e3.gb_R254.obipcr.fasta.gz
|
|
||||||
Example_Arth03.ngsfilter
|
|
||||||
SPER01.csv
|
|
||||||
SPER03.csv
|
|
||||||
wolf_diet_ngsfilter.txt
|
|
||||||
xx
|
|
||||||
xxx.gb
|
|
||||||
yyy_geom.csv
|
|
||||||
yyy_LCS.csv
|
|
||||||
yyy.json
|
|
||||||
bug_obimultiplex/toto
|
|
||||||
bug_obimultiplex/toto_mapping
|
|
||||||
bug_obimultiplex/tutu
|
|
||||||
bug_obimultiplex/tutu_mapping
|
|
||||||
bug_obipairing/GIT1_GH_ngsfilter.txt
|
|
||||||
doc/book/TAXO/citations.dmp
|
|
||||||
doc/book/TAXO/delnodes.dmp
|
|
||||||
doc/book/TAXO/division.dmp
|
|
||||||
doc/book/TAXO/gc.prt
|
|
||||||
doc/book/TAXO/gencode.dmp
|
|
||||||
doc/book/TAXO/merged.dmp
|
|
||||||
doc/book/TAXO/names.dmp
|
|
||||||
doc/book/TAXO/nodes.dmp
|
|
||||||
doc/book/TAXO/readme.txt
|
|
||||||
doc/book/wolf_data/Release-253/ncbitaxo/citations.dmp
|
|
||||||
doc/book/wolf_data/Release-253/ncbitaxo/delnodes.dmp
|
|
||||||
doc/book/wolf_data/Release-253/ncbitaxo/division.dmp
|
|
||||||
doc/book/wolf_data/Release-253/ncbitaxo/gc.prt
|
|
||||||
doc/book/wolf_data/Release-253/ncbitaxo/gencode.dmp
|
|
||||||
doc/book/wolf_data/Release-253/ncbitaxo/merged.dmp
|
|
||||||
doc/book/wolf_data/Release-253/ncbitaxo/names.dmp
|
|
||||||
doc/book/wolf_data/Release-253/ncbitaxo/nodes.dmp
|
|
||||||
doc/book/wolf_data/Release-253/ncbitaxo/readme.txt
|
|
||||||
doc/book/results/toto.tasta
|
|
||||||
sample/.DS_Store
|
|
||||||
GO
|
|
||||||
ncbitaxo/citations.dmp
|
|
||||||
ncbitaxo/delnodes.dmp
|
|
||||||
ncbitaxo/division.dmp
|
|
||||||
ncbitaxo/gc.prt
|
|
||||||
ncbitaxo/gencode.dmp
|
|
||||||
ncbitaxo/merged.dmp
|
|
||||||
ncbitaxo/names.dmp
|
|
||||||
ncbitaxo/nodes.dmp
|
|
||||||
ncbitaxo/readme.txt
|
|
||||||
template.16S
|
|
||||||
xxx.gz
|
|
||||||
*.sav
|
|
||||||
*.old
|
|
||||||
ncbitaxo.tgz
|
|
||||||
|
|||||||
9
Makefile
9
Makefile
@@ -63,6 +63,13 @@ update-deps:
|
|||||||
|
|
||||||
test:
|
test:
|
||||||
$(GOTEST) ./...
|
$(GOTEST) ./...
|
||||||
|
|
||||||
|
obitests:
|
||||||
|
@for t in $$(find obitests -name test.sh -print) ; do \
|
||||||
|
bash $${t} ;\
|
||||||
|
done
|
||||||
|
|
||||||
|
githubtests: obitools obitests
|
||||||
|
|
||||||
man:
|
man:
|
||||||
make -C doc man
|
make -C doc man
|
||||||
@@ -97,5 +104,5 @@ ifneq ($(strip $(COMMIT_ID)),)
|
|||||||
@rm -f $(OUTPUT)
|
@rm -f $(OUTPUT)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
.PHONY: all packages obitools man obibook doc update-deps .FORCE
|
.PHONY: all packages obitools man obibook doc update-deps obitests githubtests .FORCE
|
||||||
.FORCE:
|
.FORCE:
|
||||||
@@ -37,7 +37,7 @@ curl -L https://raw.githubusercontent.com/metabarcoding/obitools4/master/install
|
|||||||
bash -s -- --install-dir test_install --obitools-prefix k
|
bash -s -- --install-dir test_install --obitools-prefix k
|
||||||
```
|
```
|
||||||
|
|
||||||
In this case, the binaries will be installed in the `test_install` directory and all command names will be prefixed with the letter `k`. Thus `obigrep` will be named `kobigrep`.
|
In this case, the binaries will be installed in the `test_install` directory and all command names will be prefixed with the letter `k`. Thus, `obigrep` will be named `kobigrep`.
|
||||||
|
|
||||||
## Continuing the analysis...
|
## Continuing the analysis...
|
||||||
|
|
||||||
|
|||||||
228
Release-notes.md
228
Release-notes.md
@@ -1,19 +1,29 @@
|
|||||||
# OBITools release notes
|
# OBITools release notes
|
||||||
|
|
||||||
## Latest changes
|
## March 2nd, 2025. Release 4.3.0
|
||||||
|
|
||||||
|
A new documentation website is available at https://obitools4.metabarcoding.org.
|
||||||
|
Its development is still in progress.
|
||||||
|
|
||||||
### Breaking changes
|
### Breaking changes
|
||||||
|
|
||||||
- In `obimultiplex`, the short version of the **--tag-list** option used to specify the list
|
- In `obimultiplex`, the short version of the **--tag-list** option used to
|
||||||
of tags and primers to be used for the demultiplexing has been changed from `-t` to `-s`.
|
specify the list of tags and primers to be used for the demultiplexing has
|
||||||
|
been changed from `-t` to `-s`.
|
||||||
|
|
||||||
- The command `obifind` is now renamed `obitaxonomy`.
|
- The command `obifind` is now renamed `obitaxonomy`.
|
||||||
|
|
||||||
- The **--taxdump** option used to specify the path to the taxdump containing the NCBI taxonomy
|
- The **--taxdump** option used to specify the path to the taxdump containing
|
||||||
has been renamed to **--taxonomy**.
|
the NCBI taxonomy has been renamed to **--taxonomy**.
|
||||||
|
|
||||||
### Bug fixes
|
### Bug fixes
|
||||||
|
|
||||||
|
- Correction of a bug when using paired sequence file with the **--out** option.
|
||||||
|
|
||||||
|
- Correction of a bug in `obitag` when trying to annotate very short sequences of
|
||||||
|
4 bases or less.
|
||||||
|
|
||||||
|
|
||||||
- In `obipairing`, correct the stats `seq_a_single` and `seq_b_single` when
|
- In `obipairing`, correct the stats `seq_a_single` and `seq_b_single` when
|
||||||
on right alignment mode
|
on right alignment mode
|
||||||
|
|
||||||
@@ -21,12 +31,32 @@
|
|||||||
the batch size and not reading the qualities from the fastq files as `obiuniq`
|
the batch size and not reading the qualities from the fastq files as `obiuniq`
|
||||||
is producing only fasta output without qualities.
|
is producing only fasta output without qualities.
|
||||||
|
|
||||||
|
- In `obitag`, correct the wrong assignment of the **obitag_bestmatch**
|
||||||
|
attribute.
|
||||||
|
|
||||||
|
- In `obiclean`, the **--no-progress-bar** option disables all progress bars,
|
||||||
|
not just the data.
|
||||||
|
|
||||||
|
- Several fixes in reading FASTA and FASTQ files, including some code
|
||||||
|
simplification and factorization.
|
||||||
|
|
||||||
|
- Fixed a bug in all obitools that caused the same file to be processed
|
||||||
|
multiple times, when specifying a directory name as input.
|
||||||
|
|
||||||
|
|
||||||
### New features
|
### New features
|
||||||
|
|
||||||
|
- `obigrep` add a new **--valid-taxid** option to keep only sequence with a
|
||||||
|
valid taxid
|
||||||
|
|
||||||
|
- `obiclean` add a new **--min-sample-count** option with a default value of 1,
|
||||||
|
asking to filter out sequences which are not occurring in at least the
|
||||||
|
specified number of samples.
|
||||||
|
|
||||||
- `obitoaxonomy` a new **--dump|D** option allows for dumping a sub-taxonomy.
|
- `obitoaxonomy` a new **--dump|D** option allows for dumping a sub-taxonomy.
|
||||||
|
|
||||||
- Taxonomy dump can now be provided as a four-columns CSV file to the **--taxonomy**
|
- Taxonomy dump can now be provided as a four-columns CSV file to the
|
||||||
option.
|
**--taxonomy** option.
|
||||||
|
|
||||||
- NCBI Taxonomy dump does not need to be uncompressed and unarchived anymore. The
|
- NCBI Taxonomy dump does not need to be uncompressed and unarchived anymore. The
|
||||||
path of the tar and gziped dump file can be directly specified using the
|
path of the tar and gziped dump file can be directly specified using the
|
||||||
@@ -37,54 +67,50 @@
|
|||||||
allow the processing of the rare fasta and fastq files not recognized.
|
allow the processing of the rare fasta and fastq files not recognized.
|
||||||
|
|
||||||
- In `obiscript`, adds new methods to the Lua sequence object:
|
- In `obiscript`, adds new methods to the Lua sequence object:
|
||||||
- `md5_string()`: returning the MD5 check sum as an hexadecimal string,
|
- `md5_string()`: returning the MD5 check sum as a hexadecimal string,
|
||||||
- `subsequence(from,to)`: allows to extract a subsequence on a 0 based
|
- `subsequence(from,to)`: allows extracting a subsequence on a 0 based
|
||||||
coordinate system, upper bound expluded like in go.
|
coordinate system, upper bound excluded like in go.
|
||||||
- `reverse_complement`: returning a sequence object corresponding to the reverse complement
|
- `reverse_complement`: returning a sequence object corresponding to the
|
||||||
of the current sequence.
|
reverse complement of the current sequence.
|
||||||
|
|
||||||
### Change of git repositiory
|
### Enhancement
|
||||||
|
|
||||||
- The OBITools4 git repository has been moved to the github repository.
|
- In every *OBITools* command, the progress bar is automatically deactivated
|
||||||
|
when the standard error output is redirected.
|
||||||
|
- Because Genbank and ENA:EMBL contain very large sequences, while OBITools4
|
||||||
|
are optimized As Genbank and ENA:EMBL contain very large sequences, while
|
||||||
|
OBITools4 is optimized for short sequences, `obipcr` faces some problems
|
||||||
|
with excessive consumption of computer resources, especially memory. Several
|
||||||
|
improvements in the tuning of the default `obipcr` parameters and some new
|
||||||
|
features, currently only available for FASTA and FASTQ file readers, have
|
||||||
|
been implemented to limit the memory impact of `obipcr` without changing the
|
||||||
|
computational efficiency too much.
|
||||||
|
- Logging system and therefore format, have been homogenized.
|
||||||
|
|
||||||
|
|
||||||
|
### Change of git repository
|
||||||
|
|
||||||
|
- The OBITools4 git repository has been moved to the GitHub repository.
|
||||||
The new address is: https://github.com/metabarcoding/obitools4.
|
The new address is: https://github.com/metabarcoding/obitools4.
|
||||||
Take care for using the new install script for retrieving the new version.
|
Take care for using the new install script for retrieving the new version.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl -L https://raw.githubusercontent.com/metabarcoding/obitools4/master/install_obitools.sh \
|
curl -L https://metabarcoding.org/obitools4/install.sh \
|
||||||
| bash
|
| bash
|
||||||
```
|
```
|
||||||
|
|
||||||
or with options:
|
or with options:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl -L https://raw.githubusercontent.com/metabarcoding/obitools4/master/install_obitools.sh \
|
curl -L https://metabarcoding.org/obitools4/install.sh \
|
||||||
| bash -s -- --install-dir test_install --obitools-prefix k
|
| bash -s -- --install-dir test_install --obitools-prefix k
|
||||||
```
|
```
|
||||||
|
|
||||||
### CPU limitation
|
|
||||||
|
|
||||||
- By default, *OBITools4* tries to use all the computing power available on
|
|
||||||
your computer. In some circumstances this can be problematic (e.g. if you
|
|
||||||
are running on a computer cluster managed by your university). You can limit
|
|
||||||
the number of CPU cores used by *OBITools4* or by using the **--max-cpu**
|
|
||||||
option or by setting the **OBIMAXCPU** environment variable. Some strange
|
|
||||||
behaviour of *OBITools4* has been observed when users try to limit the
|
|
||||||
maximum number of usable CPU cores to one. This seems to be caused by the Go
|
|
||||||
language, and it is not obvious to get *OBITools4* to run correctly on a
|
|
||||||
single core in all circumstances. Therefore, if you ask to use a single
|
|
||||||
core, **OBITools4** will print a warning message and actually set this
|
|
||||||
parameter to two cores. If you really want a single core, you can use the
|
|
||||||
**--force-one-core** option. But be aware that this can lead to incorrect
|
|
||||||
calculations.
|
|
||||||
|
|
||||||
### New features
|
|
||||||
|
|
||||||
- The output of the obitools will evolve to produce results only in standard
|
- The output of the obitools will evolve to produce results only in standard
|
||||||
formats such as fasta and fastq. For non-sequential data, the output will be
|
formats such as fasta and fastq. For non-sequential data, the output will be
|
||||||
in CSV format, with the separator `,`, the decimal separator `.`, and a
|
in CSV format, with the separator `,`, the decimal separator `.`, and a
|
||||||
header line with the column names. It is more convenient to use the output
|
header line with the column names. It is more convenient to use the output
|
||||||
in other programs. For example, you can use the `csvtomd` command to
|
in other programs. For example, you can use the `csvtomd` command to
|
||||||
reformat the csv output into a markdown table. The first command to initiate
|
reformat the CSV output into a Markdown table. The first command to initiate
|
||||||
this change is `obicount`, which now produces a 3-line CSV output.
|
this change is `obicount`, which now produces a 3-line CSV output.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@@ -96,7 +122,7 @@
|
|||||||
database for `obitag` is to use `obipcr` on a local copy of Genbank or EMBL.
|
database for `obitag` is to use `obipcr` on a local copy of Genbank or EMBL.
|
||||||
However, these sequence databases are known to contain many taxonomic
|
However, these sequence databases are known to contain many taxonomic
|
||||||
errors, such as bacterial sequences annotated with the taxid of their host
|
errors, such as bacterial sequences annotated with the taxid of their host
|
||||||
species. obicleandb tries to detect these errors. To do this, it first keeps
|
species. `obicleandb` tries to detect these errors. To do this, it first keeps
|
||||||
only sequences annotated with the taxid to which a species, genus, and
|
only sequences annotated with the taxid to which a species, genus, and
|
||||||
family taxid can be assigned. Then, for each sequence, it compares the
|
family taxid can be assigned. Then, for each sequence, it compares the
|
||||||
distance of the sequence to the other sequences belonging to the same genus
|
distance of the sequence to the other sequences belonging to the same genus
|
||||||
@@ -107,7 +133,7 @@
|
|||||||
with the p-value of the Mann-Whitney U test in the **obicleandb_trusted**
|
with the p-value of the Mann-Whitney U test in the **obicleandb_trusted**
|
||||||
slot. Later, the distribution of this p-value can be analyzed to determine a
|
slot. Later, the distribution of this p-value can be analyzed to determine a
|
||||||
threshold. Empirically, a threshold of 0.05 is a good compromise and allows
|
threshold. Empirically, a threshold of 0.05 is a good compromise and allows
|
||||||
to filter out less than 1‰ of the sequences. These sequences can then be
|
filtering out less than 1‰ of the sequences. These sequences can then be
|
||||||
removed using `obigrep`.
|
removed using `obigrep`.
|
||||||
|
|
||||||
- Adds a new `obijoin` utility to join information contained in a sequence
|
- Adds a new `obijoin` utility to join information contained in a sequence
|
||||||
@@ -117,16 +143,16 @@
|
|||||||
|
|
||||||
- Adds a new tool `obidemerge` to demerge a `merge_xxx` slot by recreating the
|
- Adds a new tool `obidemerge` to demerge a `merge_xxx` slot by recreating the
|
||||||
multiple identical sequences having the slot `xxx` recreated with its initial
|
multiple identical sequences having the slot `xxx` recreated with its initial
|
||||||
value and the sequence count set to the number of occurences refered in the
|
value and the sequence count set to the number of occurrences referred in the
|
||||||
`merge_xxx` slot. During the operation, the `merge_xxx` slot is removed.
|
`merge_xxx` slot. During the operation, the `merge_xxx` slot is removed.
|
||||||
|
|
||||||
- Adds CSV as one of the input format for every obitools command. To encode
|
- Adds CSV as one of the input format for every obitools command. To encode
|
||||||
sequence the CSV file must includes a column named `sequence` and another
|
sequence the CSV file must include a column named `sequence` and another
|
||||||
column named `id`. An extra column named `qualities` can be added to specify
|
column named `id`. An extra column named `qualities` can be added to specify
|
||||||
the quality scores of the sequence following the same ascii encoding than the
|
the quality scores of the sequence following the same ASCII encoding than the
|
||||||
fastq format. All the other columns will be considered as annotations and will
|
fastq format. All the other columns will be considered as annotations and will
|
||||||
be interpreted as JSON objects encoding potentially for atomic values. If a
|
be interpreted as JSON objects encoding potentially for atomic values. If a
|
||||||
calumn value can not be decoded as JSON it will be considered as a string.
|
column value can not be decoded as JSON it will be considered as a string.
|
||||||
|
|
||||||
- A new option **--version** has been added to every obitools command. It will
|
- A new option **--version** has been added to every obitools command. It will
|
||||||
print the version of the command.
|
print the version of the command.
|
||||||
@@ -135,8 +161,8 @@
|
|||||||
quality scores from a BioSequence object.\
|
quality scores from a BioSequence object.\
|
||||||
|
|
||||||
- In `obimultuplex` the ngsfilter file describing the samples can be no provided
|
- In `obimultuplex` the ngsfilter file describing the samples can be no provided
|
||||||
not only using the classical nfsfilter format but also using the csv format.
|
not only using the classical ngsfilter format but also using the CSV format.
|
||||||
When using csv, the first line must contain the column names. 5 columns are
|
When using CSV, the first line must contain the column names. 5 columns are
|
||||||
expected:
|
expected:
|
||||||
|
|
||||||
- `experiment` the name of the experiment
|
- `experiment` the name of the experiment
|
||||||
@@ -152,43 +178,34 @@
|
|||||||
|
|
||||||
Supplementary columns are allowed. Their names and content will be used to
|
Supplementary columns are allowed. Their names and content will be used to
|
||||||
annotate the sequence corresponding to the sample, as the `key=value;` did
|
annotate the sequence corresponding to the sample, as the `key=value;` did
|
||||||
in the nfsfilter format.
|
in the ngsfilter format.
|
||||||
|
|
||||||
The CSV format used allows for comment lines starting with `#` character.
|
The CSV format used allows for comment lines starting with `#` character.
|
||||||
Special data lines starting with `@param` in the first column allow to
|
Special data lines starting with `@param` in the first column allow configuring the algorithm. The options **--template** provided an over
|
||||||
configure the algorithm. The options **--template** provided an over
|
commented example of the CSV format, including all the possible options.
|
||||||
commented example of the csv format, including all the possible options.
|
|
||||||
|
### CPU limitation
|
||||||
|
|
||||||
### Enhancement
|
- By default, *OBITools4* tries to use all the computing power available on
|
||||||
|
your computer. In some circumstances this can be problematic (e.g. if you
|
||||||
|
are running on a computer cluster managed by your university). You can limit
|
||||||
|
the number of CPU cores used by *OBITools4* or by using the **--max-cpu**
|
||||||
|
option or by setting the **OBIMAXCPU** environment variable. Some strange
|
||||||
|
behavior of *OBITools4* has been observed when users try to limit the
|
||||||
|
maximum number of usable CPU cores to one. This seems to be caused by the Go
|
||||||
|
language, and it is not obvious to get *OBITools4* to run correctly on a
|
||||||
|
single core in all circumstances. Therefore, if you ask to use a single
|
||||||
|
core, **OBITools4** will print a warning message and actually set this
|
||||||
|
parameter to two cores. If you really want a single core, you can use the
|
||||||
|
**--force-one-core** option. But be aware that this can lead to incorrect
|
||||||
|
calculations.
|
||||||
|
|
||||||
- In every *OBITools* command, the progress bar are automatically deactivated
|
|
||||||
when the standard error output is redirected.
|
|
||||||
- Because Genbank and ENA:EMBL contain very large sequences, while OBITools4
|
|
||||||
are optimized As Genbank and ENA:EMBL contain very large sequences, while
|
|
||||||
OBITools4 is optimised for short sequences, `obipcr` faces some problems
|
|
||||||
with excessive consumption of computer resources, especially memory. Several
|
|
||||||
improvements in the tuning of the default `obipcr` parameters and some new
|
|
||||||
features, currently only available for FASTA and FASTQ file readers, have
|
|
||||||
been implemented to limit the memory impact of `obipcr` without changing the
|
|
||||||
computational efficiency too much.
|
|
||||||
- Logging system and therefore format, have been homogenized.
|
|
||||||
|
|
||||||
### Bug
|
|
||||||
|
|
||||||
- In `obitag`, correct the wrong assignment of the **obitag_bestmatch**
|
|
||||||
attribute.
|
|
||||||
- In `obiclean`, the **--no-progress-bar** option disables all progress bars,
|
|
||||||
not just the data.
|
|
||||||
- Several fixes in reading FASTA and FASTQ files, including some code
|
|
||||||
simplification and and factorization.
|
|
||||||
- Fixed a bug in all obitools that caused the same file to be processed
|
|
||||||
multiple times. when specifying a directory name as input.
|
|
||||||
|
|
||||||
## April 2nd, 2024. Release 4.2.0
|
## April 2nd, 2024. Release 4.2.0
|
||||||
|
|
||||||
### New features
|
### New features
|
||||||
|
|
||||||
- A new OBITools named `obiscript` allows to process each sequence according
|
- A new OBITools named `obiscript` allows processing each sequence according
|
||||||
to a Lua script. This is an experimental tool. The **--template** option
|
to a Lua script. This is an experimental tool. The **--template** option
|
||||||
allows for generating an example script on the `stdout`.
|
allows for generating an example script on the `stdout`.
|
||||||
|
|
||||||
@@ -196,7 +213,7 @@
|
|||||||
|
|
||||||
- Two of the main class `obiseq.SeqWorker` and `obiseq.SeqWorker` have their
|
- Two of the main class `obiseq.SeqWorker` and `obiseq.SeqWorker` have their
|
||||||
declaration changed. Both now return two values a `obiseq.BioSequenceSlice`
|
declaration changed. Both now return two values a `obiseq.BioSequenceSlice`
|
||||||
and an `error`. This allow a worker to return potentially several sequences
|
and an `error`. This allows a worker to return potentially several sequences
|
||||||
as the result of the processing of a single sequence, or zero, which is
|
as the result of the processing of a single sequence, or zero, which is
|
||||||
equivalent to filter out the input sequence.
|
equivalent to filter out the input sequence.
|
||||||
|
|
||||||
@@ -204,12 +221,12 @@
|
|||||||
|
|
||||||
- In `obitag` if the reference database contains sequences annotated by taxid
|
- In `obitag` if the reference database contains sequences annotated by taxid
|
||||||
not referenced in the taxonomy, the corresponding sequences are discarded
|
not referenced in the taxonomy, the corresponding sequences are discarded
|
||||||
from the reference database and a warning indicating the sequence id and the
|
from the reference database and a warning indicating the sequence *id* and the
|
||||||
wrong taxid is emitted.
|
wrong taxid is emitted.
|
||||||
- The bug corrected in the parsing of EMBL and Genbank files as implemented in
|
- The bug corrected in the parsing of EMBL and Genbank files as implemented in
|
||||||
version 4.1.2 of OBITools4, potentially induced some reduction in the
|
version 4.1.2 of OBITools4, potentially induced some reduction in the
|
||||||
performance of the parsing. This should have been now fixed.
|
performance of the parsing. This should have been now fixed.
|
||||||
- In the same idea, parsing of genbank and EMBL files were reading and storing
|
- In the same idea, parsing of Genbank and EMBL files were reading and storing
|
||||||
in memory not only the sequence but also the annotations (features table).
|
in memory not only the sequence but also the annotations (features table).
|
||||||
Up to now none of the OBITools are using this information, but with large
|
Up to now none of the OBITools are using this information, but with large
|
||||||
complete genomes, it is occupying a lot of memory. To reduce this impact,
|
complete genomes, it is occupying a lot of memory. To reduce this impact,
|
||||||
@@ -248,7 +265,7 @@
|
|||||||
|
|
||||||
### New feature
|
### New feature
|
||||||
|
|
||||||
- In `obimatrix` a **--transpose** option allows to transpose the produced
|
- In `obimatrix` a **--transpose** option allows transposing the produced
|
||||||
matrix table in CSV format.
|
matrix table in CSV format.
|
||||||
- In `obitpairing` and `obipcrtag` two new options **--exact-mode** and
|
- In `obitpairing` and `obipcrtag` two new options **--exact-mode** and
|
||||||
**--fast-absolute** to control the heuristic used in the alignment
|
**--fast-absolute** to control the heuristic used in the alignment
|
||||||
@@ -256,7 +273,7 @@
|
|||||||
the exact algorithm at the cost of a speed. **--fast-absolute** change the
|
the exact algorithm at the cost of a speed. **--fast-absolute** change the
|
||||||
scoring schema of the heuristic.
|
scoring schema of the heuristic.
|
||||||
- In `obiannotate` adds the possibility to annotate the first match of a
|
- In `obiannotate` adds the possibility to annotate the first match of a
|
||||||
pattern using the same algorithm than the one used in `obipcr` and
|
pattern using the same algorithm as the one used in `obipcr` and
|
||||||
`obimultiplex`. For that four option were added :
|
`obimultiplex`. For that four option were added :
|
||||||
- **--pattern** : to specify the pattern. It can use IUPAC codes and
|
- **--pattern** : to specify the pattern. It can use IUPAC codes and
|
||||||
position with no error tolerated has to be followed by a `#` character.
|
position with no error tolerated has to be followed by a `#` character.
|
||||||
@@ -337,7 +354,7 @@
|
|||||||
|
|
||||||
### Bugs
|
### Bugs
|
||||||
|
|
||||||
- in the obitools language, the `composition` function now returns a map
|
- In the obitools language, the `composition` function now returns a map
|
||||||
indexed by lowercase string "a", "c", "g", "t" and "o" for other instead of
|
indexed by lowercase string "a", "c", "g", "t" and "o" for other instead of
|
||||||
being indexed by the ASCII codes of the corresponding letters.
|
being indexed by the ASCII codes of the corresponding letters.
|
||||||
- Correction of the reverse-complement operation. Every reverse complement of
|
- Correction of the reverse-complement operation. Every reverse complement of
|
||||||
@@ -350,18 +367,18 @@
|
|||||||
duplicating the quality values. This made `obimultiplex` to produce fastq
|
duplicating the quality values. This made `obimultiplex` to produce fastq
|
||||||
files with sequences having quality values duplicated.
|
files with sequences having quality values duplicated.
|
||||||
|
|
||||||
### Becareful
|
### Be careful
|
||||||
|
|
||||||
GO 1.21.0 is out, and it includes new functionalities which are used in the
|
GO 1.21.0 is out, and it includes new functionalities which are used in the
|
||||||
OBITools4 code. If you use the recommanded method for compiling OBITools on your
|
OBITools4 code. If you use the recommended method for compiling OBITools on your
|
||||||
computer, their is no problem, as the script always load the latest GO version.
|
computer, there is no problem, as the script always load the latest GO version.
|
||||||
If you rely on you personnal GO install, please think to update.
|
If you rely on your personal GO install, please think to update.
|
||||||
|
|
||||||
## August 29th, 2023. Release 4.0.5
|
## August 29th, 2023. Release 4.0.5
|
||||||
|
|
||||||
### Bugs
|
### Bugs
|
||||||
|
|
||||||
- Patch a bug in the `obiseq.BioSequence` constructor leading to a error on
|
- Patch a bug in the `obiseq.BioSequence` constructor leading to an error on
|
||||||
almost every obitools. The error message indicates : `fatal error: sync:
|
almost every obitools. The error message indicates : `fatal error: sync:
|
||||||
unlock of unlocked mutex` This bug was introduced in the release 4.0.4
|
unlock of unlocked mutex` This bug was introduced in the release 4.0.4
|
||||||
|
|
||||||
@@ -380,7 +397,7 @@ If you rely on you personnal GO install, please think to update.
|
|||||||
data structure to limit the number of alignments actually computed. This
|
data structure to limit the number of alignments actually computed. This
|
||||||
increase a bit the speed of both the software. `obirefidx` is nevertheless
|
increase a bit the speed of both the software. `obirefidx` is nevertheless
|
||||||
still too slow compared to my expectation.
|
still too slow compared to my expectation.
|
||||||
- Switch to a parallel version of the gzip library, allowing for high speed
|
- Switch to a parallel version of the GZIP library, allowing for high speed
|
||||||
compress and decompress operation on files.
|
compress and decompress operation on files.
|
||||||
|
|
||||||
### New feature
|
### New feature
|
||||||
@@ -424,12 +441,12 @@ If you rely on you personnal GO install, please think to update.
|
|||||||
--unidentified not_assigned.fastq
|
--unidentified not_assigned.fastq
|
||||||
```
|
```
|
||||||
|
|
||||||
the command produced four files : `tagged_library_R1.fastq` and
|
The command produced four files : `tagged_library_R1.fastq` and
|
||||||
`tagged_library_R2.fastq` containing the assigned reads and
|
`tagged_library_R2.fastq` containing the assigned reads and
|
||||||
`not_assigned_R1.fastq` and `not_assigned_R2.fastq` containing the
|
`not_assigned_R1.fastq` and `not_assigned_R2.fastq` containing the
|
||||||
unassignable reads.
|
unassignable reads.
|
||||||
|
|
||||||
the tagged library files can then be split using `obidistribute`:
|
The tagged library files can then be split using `obidistribute`:
|
||||||
|
|
||||||
```{bash}
|
```{bash}
|
||||||
mkdir pcr_reads
|
mkdir pcr_reads
|
||||||
@@ -439,9 +456,9 @@ If you rely on you personnal GO install, please think to update.
|
|||||||
|
|
||||||
- Adding of two options **--add-lca-in** and **--lca-error** to `obiannotate`.
|
- Adding of two options **--add-lca-in** and **--lca-error** to `obiannotate`.
|
||||||
These options aim to help during construction of reference database using
|
These options aim to help during construction of reference database using
|
||||||
`obipcr`. On obipcr output, it is commonly run obiuniq. To merge identical
|
`obipcr`. On `obipcr` output, it is commonly run `obiuniq`. To merge identical
|
||||||
sequences annotated with different taxids, it is now possible to use the
|
sequences annotated with different taxids, it is now possible to use the
|
||||||
following strategie :
|
following strategies :
|
||||||
|
|
||||||
```{bash}
|
```{bash}
|
||||||
obiuniq -m taxid myrefdb.obipcr.fasta \
|
obiuniq -m taxid myrefdb.obipcr.fasta \
|
||||||
@@ -472,7 +489,7 @@ If you rely on you personnal GO install, please think to update.
|
|||||||
- Correction of a bug in `obiconsensus` leading into the deletion of a base
|
- Correction of a bug in `obiconsensus` leading into the deletion of a base
|
||||||
close to the beginning of the consensus sequence.
|
close to the beginning of the consensus sequence.
|
||||||
|
|
||||||
## March 31th, 2023. Release 4.0.2
|
## March 31st, 2023. Release 4.0.2
|
||||||
|
|
||||||
### Compiler change
|
### Compiler change
|
||||||
|
|
||||||
@@ -483,15 +500,15 @@ If you rely on you personnal GO install, please think to update.
|
|||||||
- Add the possibility for looking pattern with indels. This has been added to
|
- Add the possibility for looking pattern with indels. This has been added to
|
||||||
`obimultiplex` through the **--with-indels** option.
|
`obimultiplex` through the **--with-indels** option.
|
||||||
- Every obitools command has a **--pprof** option making the command
|
- Every obitools command has a **--pprof** option making the command
|
||||||
publishing a profiling web site available at the address :
|
publishing a profiling website available at the address :
|
||||||
<http://localhost:8080/debug/pprof/>
|
<http://localhost:8080/debug/pprof/>
|
||||||
- A new `obiconsensus` command has been added. It is a prototype. It aims to
|
- A new `obiconsensus` command has been added. It is a prototype. It aims to
|
||||||
build a consensus sequence from a set of reads. The consensus is estimated
|
build a consensus sequence from a set of reads. The consensus is estimated
|
||||||
for all the sequences contained in the input file. If several input files,
|
for all the sequences contained in the input file. If several input files,
|
||||||
or a directory name are provided the result contains a consensus per file.
|
or a directory name are provided the result contains a consensus per file.
|
||||||
The id of the sequence is the name of the input file depleted of its
|
The *id* of the sequence is the name of the input file depleted of its
|
||||||
directory name and of all its extensions.
|
directory name and of all its extensions.
|
||||||
- In `obipcr` an experimental option **--fragmented** allows for spliting very
|
- In `obipcr` an experimental option **--fragmented** allows for splitting very
|
||||||
long query sequences into shorter fragments with an overlap between the two
|
long query sequences into shorter fragments with an overlap between the two
|
||||||
contiguous fragment insuring that no amplicons are missed despite the split.
|
contiguous fragment insuring that no amplicons are missed despite the split.
|
||||||
As a site effect some amplicon can be identified twice.
|
As a site effect some amplicon can be identified twice.
|
||||||
@@ -534,7 +551,7 @@ If you rely on you personnal GO install, please think to update.
|
|||||||
### Enhancement
|
### Enhancement
|
||||||
|
|
||||||
- *OBITools* are automatically processing all the sequences files contained in
|
- *OBITools* are automatically processing all the sequences files contained in
|
||||||
a directory and its sub-directory\
|
a directory and its subdirectory\
|
||||||
recursively if its name is provided as input. To process easily Genbank
|
recursively if its name is provided as input. To process easily Genbank
|
||||||
files, the corresponding filename extensions have been added. Today the
|
files, the corresponding filename extensions have been added. Today the
|
||||||
following extensions are recognized as sequence files : `.fasta`, `.fastq`,
|
following extensions are recognized as sequence files : `.fasta`, `.fastq`,
|
||||||
@@ -551,7 +568,7 @@ If you rely on you personnal GO install, please think to update.
|
|||||||
export OBICPUMAX=4
|
export OBICPUMAX=4
|
||||||
```
|
```
|
||||||
|
|
||||||
- Adds a new option --out\|-o allowing to specify the name of an outpout file.
|
- Adds a new option --out\|-o allowing to specify the name of an output file.
|
||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
obiconvert -o xyz.fasta xxx.fastq
|
obiconvert -o xyz.fasta xxx.fastq
|
||||||
@@ -573,10 +590,10 @@ If you rely on you personnal GO install, please think to update.
|
|||||||
matched files remain consistent when processed.
|
matched files remain consistent when processed.
|
||||||
|
|
||||||
- Adding of the function `ifelse` to the expression language for computing
|
- Adding of the function `ifelse` to the expression language for computing
|
||||||
conditionnal values.
|
conditional values.
|
||||||
|
|
||||||
- Adding two function to the expression language related to sequence
|
- Adding two function to the expression language related to sequence
|
||||||
conposition : `composition` and `gcskew`. Both are taking a sequence as
|
composition : `composition` and `gcskew`. Both are taking a sequence as
|
||||||
single argument.
|
single argument.
|
||||||
|
|
||||||
## February 18th, 2023. Release 4.0.0
|
## February 18th, 2023. Release 4.0.0
|
||||||
@@ -584,8 +601,8 @@ If you rely on you personnal GO install, please think to update.
|
|||||||
It is the first version of the *OBITools* version 4. I decided to tag then
|
It is the first version of the *OBITools* version 4. I decided to tag then
|
||||||
following two weeks of intensive data analysis with them allowing to discover
|
following two weeks of intensive data analysis with them allowing to discover
|
||||||
many small bugs present in the previous non-official version. Obviously other
|
many small bugs present in the previous non-official version. Obviously other
|
||||||
bugs are certainly persent in the code, and you are welcome to use the git
|
bugs are certainly present in the code, and you are welcome to use the git
|
||||||
ticket system to mention them. But they seems to produce now reliable results.
|
ticket system to mention them. But they seem to produce now reliable results.
|
||||||
|
|
||||||
### Corrected bugs
|
### Corrected bugs
|
||||||
|
|
||||||
@@ -593,11 +610,11 @@ ticket system to mention them. But they seems to produce now reliable results.
|
|||||||
of sequences and to the production of incorrect file because of the last
|
of sequences and to the production of incorrect file because of the last
|
||||||
sequence record, sometime truncated in its middle. This was only occurring
|
sequence record, sometime truncated in its middle. This was only occurring
|
||||||
when more than a single CPU was used. It was affecting every obitools.
|
when more than a single CPU was used. It was affecting every obitools.
|
||||||
- The `obiparing` software had a bug in the right aligment procedure. This led
|
- The `obiparing` software had a bug in the right alignment procedure. This led
|
||||||
to the non alignment of very sort barcode during the paring of the forward
|
to the non-alignment of very sort barcode during the paring of the forward
|
||||||
and reverse reads.
|
and reverse reads.
|
||||||
- The `obipairing` tools had a non deterministic comportment when aligning a
|
- The `obipairing` tools had a non-deterministic comportment when aligning a
|
||||||
paor very low quality reads. This induced that the result of the same low
|
pair very low quality reads. This induced that the result of the same low
|
||||||
quality read pair was not the same from run to run.
|
quality read pair was not the same from run to run.
|
||||||
|
|
||||||
### New features
|
### New features
|
||||||
@@ -605,11 +622,10 @@ ticket system to mention them. But they seems to produce now reliable results.
|
|||||||
- Adding of a `--compress|-Z` option to every obitools allowing to produce
|
- Adding of a `--compress|-Z` option to every obitools allowing to produce
|
||||||
`gz` compressed output. OBITools were already able to deal with gziped input
|
`gz` compressed output. OBITools were already able to deal with gziped input
|
||||||
files transparently. They can now produce their results in the same format.
|
files transparently. They can now produce their results in the same format.
|
||||||
- Adding of a `--append|-A` option to the `obidistribute` tool. It allows to
|
- Adding of a `--append|-A` option to the `obidistribute` tool. It allows appending the result of an `obidistribute` execution to preexisting files. -
|
||||||
append the result of an `obidistribute` execution to preexisting files. -
|
|
||||||
Adding of a `--directory|-d` option to the `obidistribute` tool. It allows
|
Adding of a `--directory|-d` option to the `obidistribute` tool. It allows
|
||||||
to declare a secondary classification key over the one defined by the
|
declaring a secondary classification key over the one defined by the
|
||||||
'--category\|-c\` option. This extra key leads to produce directories in
|
`--category\|-c\` option. This extra key leads to produce directories in
|
||||||
which files produced according to the primary criterion are stored.
|
which files produced according to the primary criterion are stored.
|
||||||
- Adding of the functions `subspc`, `printf`, `int`, `numeric`, and `bool` to
|
- Adding of the functions `subspc`, `printf`, `int`, `numeric`, and `bool` to
|
||||||
the expression language.
|
the expression language.
|
||||||
@@ -47,12 +47,27 @@ func main() {
|
|||||||
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
||||||
|
|
||||||
taxo := obitax.DefaultTaxonomy()
|
taxo := obitax.DefaultTaxonomy()
|
||||||
|
|
||||||
|
references := obitag.CLIRefDB()
|
||||||
|
|
||||||
|
if references == nil {
|
||||||
|
log.Panicln("No loaded reference database")
|
||||||
|
}
|
||||||
|
|
||||||
|
if taxo == nil {
|
||||||
|
taxo, err = references.ExtractTaxonomy(nil)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("No taxonomy specified or extractable from reference database: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
taxo.SetAsDefault()
|
||||||
|
}
|
||||||
|
|
||||||
if taxo == nil {
|
if taxo == nil {
|
||||||
log.Panicln("No loaded taxonomy")
|
log.Panicln("No loaded taxonomy")
|
||||||
}
|
}
|
||||||
|
|
||||||
references := obitag.CLIRefDB()
|
|
||||||
|
|
||||||
var identified obiiter.IBioSequence
|
var identified obiiter.IBioSequence
|
||||||
|
|
||||||
if obitag.CLIGeometricMode() {
|
if obitag.CLIGeometricMode() {
|
||||||
|
|||||||
@@ -1,13 +1,16 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"log"
|
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obitaxonomy"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obitaxonomy"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
@@ -18,17 +21,49 @@ func main() {
|
|||||||
var iterator *obitax.ITaxon
|
var iterator *obitax.ITaxon
|
||||||
|
|
||||||
switch {
|
switch {
|
||||||
|
case obitaxonomy.CLIDownloadNCBI():
|
||||||
|
err := obitaxonomy.CLIDownloadNCBITaxdump()
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Cannot download NCBI taxonomy: %s", err.Error())
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
os.Exit(0)
|
||||||
|
|
||||||
|
case obitaxonomy.CLIExtractTaxonomy():
|
||||||
|
iter, err := obiconvert.CLIReadBioSequences(args...)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Cannot extract taxonomy: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
taxonomy, err := iter.ExtractTaxonomy()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Cannot extract taxonomy: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
taxonomy.SetAsDefault()
|
||||||
|
|
||||||
|
log.Infof("Number of extracted taxa: %d", taxonomy.Len())
|
||||||
|
iterator = taxonomy.AsTaxonSet().Sort().Iterator()
|
||||||
|
|
||||||
case obitaxonomy.CLIDumpSubtaxonomy():
|
case obitaxonomy.CLIDumpSubtaxonomy():
|
||||||
iterator = obitaxonomy.CLISubTaxonomyIterator()
|
iterator = obitaxonomy.CLISubTaxonomyIterator()
|
||||||
|
|
||||||
case obitaxonomy.CLIRequestsPathForTaxid() != "NA":
|
case obitaxonomy.CLIRequestsPathForTaxid() != "NA":
|
||||||
|
|
||||||
taxon := obitax.DefaultTaxonomy().Taxon(obitaxonomy.CLIRequestsPathForTaxid())
|
taxon, isAlias, err := obitax.DefaultTaxonomy().Taxon(obitaxonomy.CLIRequestsPathForTaxid())
|
||||||
|
|
||||||
if taxon == nil {
|
if err != nil {
|
||||||
log.Fatalf("Cannot identify the requested taxon: %s",
|
log.Fatalf("Cannot identify the requested taxon: %s (%v)",
|
||||||
obitaxonomy.CLIRequestsPathForTaxid())
|
obitaxonomy.CLIRequestsPathForTaxid(), err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if isAlias {
|
||||||
|
if obidefault.FailOnTaxonomy() {
|
||||||
|
log.Fatalf("Taxon %s is an alias for %s", taxon.String(), taxon.Parent().String())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
s := taxon.Path()
|
s := taxon.Path()
|
||||||
|
|||||||
4
go.mod
4
go.mod
@@ -5,7 +5,9 @@ go 1.23.1
|
|||||||
require (
|
require (
|
||||||
github.com/DavidGamba/go-getoptions v0.28.0
|
github.com/DavidGamba/go-getoptions v0.28.0
|
||||||
github.com/PaesslerAG/gval v1.2.2
|
github.com/PaesslerAG/gval v1.2.2
|
||||||
|
github.com/TuftsBCB/io v0.0.0-20140121014543-22b94e9b23f9
|
||||||
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df
|
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df
|
||||||
|
github.com/buger/jsonparser v1.1.1
|
||||||
github.com/chen3feng/stl4go v0.1.1
|
github.com/chen3feng/stl4go v0.1.1
|
||||||
github.com/dlclark/regexp2 v1.11.4
|
github.com/dlclark/regexp2 v1.11.4
|
||||||
github.com/goccy/go-json v0.10.3
|
github.com/goccy/go-json v0.10.3
|
||||||
@@ -24,8 +26,6 @@ require (
|
|||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/Clever/csvlint v0.3.0 // indirect
|
|
||||||
github.com/buger/jsonparser v1.1.1 // indirect
|
|
||||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||||
github.com/goombaio/orderedmap v0.0.0-20180924084748-ba921b7e2419 // indirect
|
github.com/goombaio/orderedmap v0.0.0-20180924084748-ba921b7e2419 // indirect
|
||||||
github.com/kr/pretty v0.3.0 // indirect
|
github.com/kr/pretty v0.3.0 // indirect
|
||||||
|
|||||||
5
go.sum
5
go.sum
@@ -1,11 +1,11 @@
|
|||||||
github.com/Clever/csvlint v0.3.0 h1:58WEFXWy+i0fCbxTXscR2QwYESRuAUFjEGLgZs6j2iU=
|
|
||||||
github.com/Clever/csvlint v0.3.0/go.mod h1:+wLRuW/bI8NhpRoeyUBxqKsK35OhvgJhXHSWdKp5XJU=
|
|
||||||
github.com/DavidGamba/go-getoptions v0.28.0 h1:18wgEvfZdrlfIhVDGEBO3Dl0fkOyXqXLa0tLMCKxM1c=
|
github.com/DavidGamba/go-getoptions v0.28.0 h1:18wgEvfZdrlfIhVDGEBO3Dl0fkOyXqXLa0tLMCKxM1c=
|
||||||
github.com/DavidGamba/go-getoptions v0.28.0/go.mod h1:zE97E3PR9P3BI/HKyNYgdMlYxodcuiC6W68KIgeYT84=
|
github.com/DavidGamba/go-getoptions v0.28.0/go.mod h1:zE97E3PR9P3BI/HKyNYgdMlYxodcuiC6W68KIgeYT84=
|
||||||
github.com/PaesslerAG/gval v1.2.2 h1:Y7iBzhgE09IGTt5QgGQ2IdaYYYOU134YGHBThD+wm9E=
|
github.com/PaesslerAG/gval v1.2.2 h1:Y7iBzhgE09IGTt5QgGQ2IdaYYYOU134YGHBThD+wm9E=
|
||||||
github.com/PaesslerAG/gval v1.2.2/go.mod h1:XRFLwvmkTEdYziLdaCeCa5ImcGVrfQbeNUbVR+C6xac=
|
github.com/PaesslerAG/gval v1.2.2/go.mod h1:XRFLwvmkTEdYziLdaCeCa5ImcGVrfQbeNUbVR+C6xac=
|
||||||
github.com/PaesslerAG/jsonpath v0.1.0 h1:gADYeifvlqK3R3i2cR5B4DGgxLXIPb3TRTH1mGi0jPI=
|
github.com/PaesslerAG/jsonpath v0.1.0 h1:gADYeifvlqK3R3i2cR5B4DGgxLXIPb3TRTH1mGi0jPI=
|
||||||
github.com/PaesslerAG/jsonpath v0.1.0/go.mod h1:4BzmtoM/PI8fPO4aQGIusjGxGir2BzcV0grWtFzq1Y8=
|
github.com/PaesslerAG/jsonpath v0.1.0/go.mod h1:4BzmtoM/PI8fPO4aQGIusjGxGir2BzcV0grWtFzq1Y8=
|
||||||
|
github.com/TuftsBCB/io v0.0.0-20140121014543-22b94e9b23f9 h1:Zc1/GNsUpgZR9qm1EmRSKrnOHA7CCd0bIzGdq0cREN0=
|
||||||
|
github.com/TuftsBCB/io v0.0.0-20140121014543-22b94e9b23f9/go.mod h1:PZyV4WA3NpqtezSY0h6E6NARAmdDm0qwrydveOyR5Gc=
|
||||||
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df h1:GSoSVRLoBaFpOOds6QyY1L8AX7uoY+Ln3BHc22W40X0=
|
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df h1:GSoSVRLoBaFpOOds6QyY1L8AX7uoY+Ln3BHc22W40X0=
|
||||||
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df/go.mod h1:hiVxq5OP2bUGBRNS3Z/bt/reCLFNbdcST6gISi1fiOM=
|
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df/go.mod h1:hiVxq5OP2bUGBRNS3Z/bt/reCLFNbdcST6gISi1fiOM=
|
||||||
github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs=
|
github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs=
|
||||||
@@ -69,7 +69,6 @@ github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ
|
|||||||
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
||||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||||
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
|
||||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||||
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
|
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
|
||||||
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
||||||
|
|||||||
144
obitests/obitools/obicount/test.sh
Executable file
144
obitests/obitools/obicount/test.sh
Executable file
@@ -0,0 +1,144 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
#
|
||||||
|
# Here give the name of the test serie
|
||||||
|
#
|
||||||
|
|
||||||
|
TEST_NAME=obicount
|
||||||
|
|
||||||
|
######
|
||||||
|
#
|
||||||
|
# Some variable and function definitions: please don't change them
|
||||||
|
#
|
||||||
|
######
|
||||||
|
TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
|
||||||
|
OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
|
||||||
|
export PATH="${OBITOOLS_DIR}:${PATH}"
|
||||||
|
|
||||||
|
|
||||||
|
TMPDIR="$(mktemp -d)"
|
||||||
|
ntest=0
|
||||||
|
success=0
|
||||||
|
failed=0
|
||||||
|
|
||||||
|
cleanup() {
|
||||||
|
echo "========================================" 1>&2
|
||||||
|
echo "## Results of the $TEST_NAME tests:" 1>&2
|
||||||
|
|
||||||
|
echo 1>&2
|
||||||
|
echo "- $ntest tests run" 1>&2
|
||||||
|
echo "- $success successfully completed" 1>&2
|
||||||
|
echo "- $failed failed tests" 1>&2
|
||||||
|
echo 1>&2
|
||||||
|
echo "Cleaning up the temporary directory..." 1>&2
|
||||||
|
echo 1>&2
|
||||||
|
echo "========================================" 1>&2
|
||||||
|
|
||||||
|
rm -rf "$TMPDIR" # Suppress the temporary directory
|
||||||
|
|
||||||
|
if [ $failed -gt 0 ]; then
|
||||||
|
log "$TEST_NAME tests failed"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
exit 0
|
||||||
|
}
|
||||||
|
|
||||||
|
log() {
|
||||||
|
echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
|
||||||
|
}
|
||||||
|
|
||||||
|
log "Testing $TEST_NAME..."
|
||||||
|
log "Test directory is $TEST_DIR"
|
||||||
|
log "obitools directory is $OBITOOLS_DIR"
|
||||||
|
log "Temporary directory is $TMPDIR"
|
||||||
|
log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
|
||||||
|
|
||||||
|
######################################################################
|
||||||
|
####
|
||||||
|
#### Below are the tests
|
||||||
|
####
|
||||||
|
#### Before each test :
|
||||||
|
#### - increment the variable ntest
|
||||||
|
####
|
||||||
|
#### Run the command as the condition of an if / then /else
|
||||||
|
#### - The command must return 0 on success
|
||||||
|
#### - The command must return an exit code different from 0 on failure
|
||||||
|
#### - The datafiles are stored in the same directory than the test script
|
||||||
|
#### - The test script directory is stored in the TEST_DIR variable
|
||||||
|
#### - If result files have to be produced they must be stored
|
||||||
|
#### in the temporary directory (TMPDIR variable)
|
||||||
|
####
|
||||||
|
#### then clause is executed on success of the command
|
||||||
|
#### - Write a success message using the log function
|
||||||
|
#### - increment the variable success
|
||||||
|
####
|
||||||
|
#### else clause is executed on failure of the command
|
||||||
|
#### - Write a failure message using the log function
|
||||||
|
#### - increment the variable failed
|
||||||
|
####
|
||||||
|
######################################################################
|
||||||
|
|
||||||
|
((ntest++))
|
||||||
|
if obicount "${TEST_DIR}/wolf_F.fasta.gz" \
|
||||||
|
> "${TMPDIR}/wolf_F.fasta_count.csv"
|
||||||
|
then
|
||||||
|
log "OBICount: fasta reading OK"
|
||||||
|
((success++))
|
||||||
|
else
|
||||||
|
log "OBICount: fasta reading failed"
|
||||||
|
((failed++))
|
||||||
|
fi
|
||||||
|
|
||||||
|
((ntest++))
|
||||||
|
if obicount "${TEST_DIR}/wolf_F.fastq.gz" \
|
||||||
|
> "${TMPDIR}/wolf_F.fastq_count.csv"
|
||||||
|
then
|
||||||
|
log "OBICount: fastq reading OK"
|
||||||
|
((success++))
|
||||||
|
else
|
||||||
|
log "OBICount: fastq reading failed"
|
||||||
|
((failed++))
|
||||||
|
fi
|
||||||
|
|
||||||
|
((ntest++))
|
||||||
|
if obicount "${TEST_DIR}/wolf_F.csv.gz" \
|
||||||
|
> "${TMPDIR}/wolf_F.csv_count.csv"
|
||||||
|
then
|
||||||
|
log "OBICount: csv reading OK"
|
||||||
|
((success++))
|
||||||
|
else
|
||||||
|
log "OBICount: csv reading failed"
|
||||||
|
((failed++))
|
||||||
|
fi
|
||||||
|
|
||||||
|
((ntest++))
|
||||||
|
if diff "${TMPDIR}/wolf_F.fasta_count.csv" \
|
||||||
|
"${TMPDIR}/wolf_F.fastq_count.csv" > /dev/null
|
||||||
|
then
|
||||||
|
log "OBICount: counting on fasta and fastq are identical OK"
|
||||||
|
((success++))
|
||||||
|
else
|
||||||
|
log "OBICount: counting on fasta and fastq are different failed"
|
||||||
|
((failed++))
|
||||||
|
fi
|
||||||
|
|
||||||
|
((ntest++))
|
||||||
|
if diff "${TMPDIR}/wolf_F.fasta_count.csv" \
|
||||||
|
"${TMPDIR}/wolf_F.csv_count.csv" > /dev/null
|
||||||
|
then
|
||||||
|
log "OBICount: counting on fasta and csv are identical OK"
|
||||||
|
((success++))
|
||||||
|
else
|
||||||
|
log "OBICount: counting on fasta and csv are different failed"
|
||||||
|
((failed++))
|
||||||
|
fi
|
||||||
|
|
||||||
|
#########################################
|
||||||
|
#
|
||||||
|
# At the end of the tests
|
||||||
|
# the cleanup function is called
|
||||||
|
#
|
||||||
|
#########################################
|
||||||
|
|
||||||
|
cleanup
|
||||||
BIN
obitests/obitools/obicount/wolf_F.csv.gz
Normal file
BIN
obitests/obitools/obicount/wolf_F.csv.gz
Normal file
Binary file not shown.
BIN
obitests/obitools/obicount/wolf_F.fasta.gz
Normal file
BIN
obitests/obitools/obicount/wolf_F.fasta.gz
Normal file
Binary file not shown.
BIN
obitests/obitools/obicount/wolf_F.fastq.gz
Normal file
BIN
obitests/obitools/obicount/wolf_F.fastq.gz
Normal file
Binary file not shown.
134
obitests/obitools/obiparing/test.sh
Executable file
134
obitests/obitools/obiparing/test.sh
Executable file
@@ -0,0 +1,134 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
#
|
||||||
|
# Here give the name of the test serie
|
||||||
|
#
|
||||||
|
|
||||||
|
TEST_NAME=obiparing
|
||||||
|
|
||||||
|
######
|
||||||
|
#
|
||||||
|
# Some variable and function definitions: please don't change them
|
||||||
|
#
|
||||||
|
######
|
||||||
|
TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
|
||||||
|
OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
|
||||||
|
export PATH="${OBITOOLS_DIR}:${PATH}"
|
||||||
|
|
||||||
|
|
||||||
|
TMPDIR="$(mktemp -d)"
|
||||||
|
ntest=0
|
||||||
|
success=0
|
||||||
|
failed=0
|
||||||
|
|
||||||
|
cleanup() {
|
||||||
|
echo "========================================" 1>&2
|
||||||
|
echo "## Results of the $TEST_NAME tests:" 1>&2
|
||||||
|
|
||||||
|
echo 1>&2
|
||||||
|
echo "- $ntest tests run" 1>&2
|
||||||
|
echo "- $success successfully completed" 1>&2
|
||||||
|
echo "- $failed failed tests" 1>&2
|
||||||
|
echo 1>&2
|
||||||
|
echo "Cleaning up the temporary directory..." 1>&2
|
||||||
|
echo 1>&2
|
||||||
|
echo "========================================" 1>&2
|
||||||
|
|
||||||
|
rm -rf "$TMPDIR" # Suppress the temporary directory
|
||||||
|
|
||||||
|
if [ $failed -gt 0 ]; then
|
||||||
|
log "$TEST_NAME tests failed"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
exit 0
|
||||||
|
}
|
||||||
|
|
||||||
|
log() {
|
||||||
|
echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
|
||||||
|
}
|
||||||
|
|
||||||
|
log "Testing $TEST_NAME..."
|
||||||
|
log "Test directory is $TEST_DIR"
|
||||||
|
log "obitools directory is $OBITOOLS_DIR"
|
||||||
|
log "Temporary directory is $TMPDIR"
|
||||||
|
log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
|
||||||
|
|
||||||
|
######################################################################
|
||||||
|
####
|
||||||
|
#### Below are the tests
|
||||||
|
####
|
||||||
|
#### Before each test :
|
||||||
|
#### - increment the variable ntest
|
||||||
|
####
|
||||||
|
#### Run the command as the condition of an if / then /else
|
||||||
|
#### - The command must return 0 on success
|
||||||
|
#### - The command must return an exit code different from 0 on failure
|
||||||
|
#### - The datafiles are stored in the same directory than the test script
|
||||||
|
#### - The test script directory is stored in the TEST_DIR variable
|
||||||
|
#### - If result files have to be produced they must be stored
|
||||||
|
#### in the temporary directory (TMPDIR variable)
|
||||||
|
####
|
||||||
|
#### then clause is executed on success of the command
|
||||||
|
#### - Write a success message using the log function
|
||||||
|
#### - increment the variable success
|
||||||
|
####
|
||||||
|
#### else clause is executed on failure of the command
|
||||||
|
#### - Write a failure message using the log function
|
||||||
|
#### - increment the variable failed
|
||||||
|
####
|
||||||
|
######################################################################
|
||||||
|
|
||||||
|
((ntest++))
|
||||||
|
if obipairing -F "${TEST_DIR}/wolf_F.fastq.gz" \
|
||||||
|
-R "${TEST_DIR}/wolf_R.fastq.gz" \
|
||||||
|
| obidistribute -Z -c mode \
|
||||||
|
-p "${TMPDIR}/wolf_paired_%s.fastq.gz"
|
||||||
|
then
|
||||||
|
log "OBIPairing: sequence pairing OK"
|
||||||
|
((success++))
|
||||||
|
else
|
||||||
|
log "OBIPairing: sequence pairing failed"
|
||||||
|
((failed++))
|
||||||
|
fi
|
||||||
|
|
||||||
|
((ntest++))
|
||||||
|
if obicsv -Z -s -i \
|
||||||
|
-k ali_dir -k ali_length -k paring_fast_count \
|
||||||
|
-k paring_fast_overlap -k paring_fast_score \
|
||||||
|
-k score -k score_norm -k seq_a_single \
|
||||||
|
-k seq_b_single -k seq_ab_match \
|
||||||
|
"${TMPDIR}/wolf_paired_alignment.fastq.gz" \
|
||||||
|
> "${TMPDIR}/wolf_paired_alignment.csv.gz" \
|
||||||
|
&& zdiff -c "${TEST_DIR}/wolf_paired_alignment.csv.gz" \
|
||||||
|
"${TMPDIR}/wolf_paired_alignment.csv.gz"
|
||||||
|
then
|
||||||
|
log "OBIPairing: check aligned sequences OK"
|
||||||
|
((success++))
|
||||||
|
else
|
||||||
|
log "OBIPairing: check aligned sequences failed"
|
||||||
|
((failed++))
|
||||||
|
fi
|
||||||
|
|
||||||
|
((ntest++))
|
||||||
|
if obicsv -Z -s -i \
|
||||||
|
"${TMPDIR}/wolf_paired_join.fastq.gz" \
|
||||||
|
> "${TMPDIR}/wolf_paired_join.csv.gz" \
|
||||||
|
&& zdiff -c "${TEST_DIR}/wolf_paired_join.csv.gz" \
|
||||||
|
"${TMPDIR}/wolf_paired_join.csv.gz"
|
||||||
|
then
|
||||||
|
log "OBIPairing: check joined sequences OK"
|
||||||
|
((success++))
|
||||||
|
else
|
||||||
|
log "OBIPairing: check joined sequences failed"
|
||||||
|
((failed++))
|
||||||
|
fi
|
||||||
|
|
||||||
|
#########################################
|
||||||
|
#
|
||||||
|
# At the end of the tests
|
||||||
|
# the cleanup function is called
|
||||||
|
#
|
||||||
|
#########################################
|
||||||
|
|
||||||
|
cleanup
|
||||||
BIN
obitests/obitools/obiparing/wolf_F.fastq.gz
Normal file
BIN
obitests/obitools/obiparing/wolf_F.fastq.gz
Normal file
Binary file not shown.
BIN
obitests/obitools/obiparing/wolf_R.fastq.gz
Normal file
BIN
obitests/obitools/obiparing/wolf_R.fastq.gz
Normal file
Binary file not shown.
BIN
obitests/obitools/obiparing/wolf_paired_alignment.csv.gz
Normal file
BIN
obitests/obitools/obiparing/wolf_paired_alignment.csv.gz
Normal file
Binary file not shown.
BIN
obitests/obitools/obiparing/wolf_paired_join.csv.gz
Normal file
BIN
obitests/obitools/obiparing/wolf_paired_join.csv.gz
Normal file
Binary file not shown.
@@ -10,6 +10,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
)
|
)
|
||||||
|
|
||||||
// // A pool of byte slices.
|
// // A pool of byte slices.
|
||||||
@@ -158,12 +159,30 @@ func BuildQualityConsensus(seqA, seqB *obiseq.BioSequence, path []int, statOnMis
|
|||||||
|
|
||||||
match := 0
|
match := 0
|
||||||
|
|
||||||
|
left := obiutils.Abs(path[0])
|
||||||
|
right := 0
|
||||||
|
if path[len(path)-1] == 0 {
|
||||||
|
right = path[len(path)-2]
|
||||||
|
}
|
||||||
|
|
||||||
|
right = obiutils.Abs(right)
|
||||||
|
|
||||||
|
right = len(*bufferQA) - right
|
||||||
|
|
||||||
|
// log.Warnf("BuildQualityConsensus: left = %d right = %d\n", left, right)
|
||||||
|
|
||||||
for i, qA = range *bufferQA {
|
for i, qA = range *bufferQA {
|
||||||
nA := (*bufferSA)[i]
|
nA := (*bufferSA)[i]
|
||||||
nB := (*bufferSB)[i]
|
nB := (*bufferSB)[i]
|
||||||
qB = (*bufferQB)[i]
|
qB = (*bufferQB)[i]
|
||||||
|
|
||||||
if statOnMismatch && nA != nB && nA != ' ' && nB != ' ' {
|
if statOnMismatch && i >= left && i < right && nA != nB {
|
||||||
|
if nA == ' ' {
|
||||||
|
nA = '-'
|
||||||
|
}
|
||||||
|
if nB == ' ' {
|
||||||
|
nB = '-'
|
||||||
|
}
|
||||||
mismatches[strings.ToUpper(fmt.Sprintf("(%c:%02d)->(%c:%02d)", nA, qA, nB, qB))] = i + 1
|
mismatches[strings.ToUpper(fmt.Sprintf("(%c:%02d)->(%c:%02d)", nA, qA, nB, qB))] = i + 1
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -183,13 +202,12 @@ func BuildQualityConsensus(seqA, seqB *obiseq.BioSequence, path []int, statOnMis
|
|||||||
|
|
||||||
q := qA + qB
|
q := qA + qB
|
||||||
|
|
||||||
if qA > 0 && qB > 0 {
|
if nA != nB {
|
||||||
if nA != nB {
|
q = qM - byte(math.Log10(1-math.Pow(10, -float64(qm)/40))*10+0.5)
|
||||||
q = qM - byte(math.Log10(1-math.Pow(10, -float64(qm)/30))*10+0.5)
|
}
|
||||||
}
|
|
||||||
if nA == nB {
|
if nA == nB {
|
||||||
match++
|
match++
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if q > 90 {
|
if q > 90 {
|
||||||
|
|||||||
@@ -74,6 +74,30 @@ func _Logaddexp(a, b float64) float64 {
|
|||||||
return b + math.Log1p(math.Exp(a-b))
|
return b + math.Log1p(math.Exp(a-b))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func _Log1mexp(a float64) float64 {
|
||||||
|
if a > 0 {
|
||||||
|
log.Panic("Log1mexp: a > 0")
|
||||||
|
}
|
||||||
|
|
||||||
|
if a == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
return (math.Log(-math.Expm1(a)))
|
||||||
|
}
|
||||||
|
|
||||||
|
func _Logdiffexp(a, b float64) float64 {
|
||||||
|
if a < b {
|
||||||
|
log.Panic("Log1mexp: a < b")
|
||||||
|
}
|
||||||
|
|
||||||
|
if a == b {
|
||||||
|
return math.Inf(-1)
|
||||||
|
}
|
||||||
|
|
||||||
|
return a + _Log1mexp(b-a)
|
||||||
|
}
|
||||||
|
|
||||||
// _MatchScoreRatio calculates the match score ratio between two bytes.
|
// _MatchScoreRatio calculates the match score ratio between two bytes.
|
||||||
//
|
//
|
||||||
// Parameters:
|
// Parameters:
|
||||||
@@ -83,25 +107,25 @@ func _Logaddexp(a, b float64) float64 {
|
|||||||
// Returns:
|
// Returns:
|
||||||
// - float64: the match score ratio when a match is observed
|
// - float64: the match score ratio when a match is observed
|
||||||
// - float64: the match score ratio when a mismatch is observed
|
// - float64: the match score ratio when a mismatch is observed
|
||||||
func _MatchScoreRatio(a, b byte) (float64, float64) {
|
func _MatchScoreRatio(QF, QR byte) (float64, float64) {
|
||||||
|
|
||||||
l2 := math.Log(2)
|
|
||||||
l3 := math.Log(3)
|
l3 := math.Log(3)
|
||||||
|
l4 := math.Log(4)
|
||||||
l10 := math.Log(10)
|
l10 := math.Log(10)
|
||||||
lalea := math.Log(4) // 1 /(change of the random model)
|
qF := -float64(QF) / 10 * l10
|
||||||
lE1 := -float64(a)/10*l10 - l3 // log proba of sequencing error on A/3
|
qR := -float64(QR) / 10 * l10
|
||||||
lE2 := -float64(b)/10*l10 - l3 // log proba of sequencing error on B/3
|
term1 := _Logaddexp(qF, qR)
|
||||||
lO1 := math.Log1p(-math.Exp(lE1 + l3)) // log proba no being an error on A
|
term2 := _Logdiffexp(term1, qF+qR)
|
||||||
lO2 := math.Log1p(-math.Exp(lE2 + l3)) // log proba no being an error on B
|
|
||||||
lO1O2 := lO1 + lO2
|
|
||||||
lE1E2 := lE1 + lE2
|
|
||||||
lO1E2 := lO1 + lE2
|
|
||||||
lO2E1 := lO2 + lE1
|
|
||||||
|
|
||||||
MM := _Logaddexp(lO1O2, lE1E2+l3) // Proba match when match observed
|
// log.Warnf("MatchScoreRatio: %v, %v , %v, %v", QF, QR, term1, term2)
|
||||||
Mm := _Logaddexp(_Logaddexp(lO1E2, lO2E1), lE1E2+l2) // Proba match when mismatch observed
|
|
||||||
|
|
||||||
return MM + lalea, Mm + lalea
|
match_logp := _Log1mexp(term2 + l3 - l4)
|
||||||
|
match_score := match_logp - _Log1mexp(match_logp)
|
||||||
|
|
||||||
|
mismatch_logp := term2 - l4
|
||||||
|
mismatch_score := mismatch_logp - _Log1mexp(mismatch_logp)
|
||||||
|
|
||||||
|
return match_score, mismatch_score
|
||||||
}
|
}
|
||||||
|
|
||||||
func _InitNucPartMatch() {
|
func _InitNucPartMatch() {
|
||||||
|
|||||||
@@ -21,15 +21,15 @@ func encodeValues(score, length int, out bool) uint64 {
|
|||||||
return fo
|
return fo
|
||||||
}
|
}
|
||||||
|
|
||||||
func _isout(value uint64) bool {
|
// func _isout(value uint64) bool {
|
||||||
const outmask = uint64(1) << dwsize
|
// const outmask = uint64(1) << dwsize
|
||||||
return (value & outmask) == 0
|
// return (value & outmask) == 0
|
||||||
}
|
// }
|
||||||
|
|
||||||
func _lpath(value uint64) int {
|
// func _lpath(value uint64) int {
|
||||||
const mask = uint64(1<<wsize) - 1
|
// const mask = uint64(1<<wsize) - 1
|
||||||
return int(((value + 1) ^ mask) & mask)
|
// return int(((value + 1) ^ mask) & mask)
|
||||||
}
|
// }
|
||||||
|
|
||||||
func decodeValues(value uint64) (int, int, bool) {
|
func decodeValues(value uint64) (int, int, bool) {
|
||||||
const mask = uint64(1<<wsize) - 1
|
const mask = uint64(1<<wsize) - 1
|
||||||
@@ -57,4 +57,3 @@ func _setout(value uint64) uint64 {
|
|||||||
var _empty = encodeValues(0, 0, false)
|
var _empty = encodeValues(0, 0, false)
|
||||||
var _out = encodeValues(0, 30000, true)
|
var _out = encodeValues(0, 30000, true)
|
||||||
var _notavail = encodeValues(0, 30000, false)
|
var _notavail = encodeValues(0, 30000, false)
|
||||||
|
|
||||||
|
|||||||
@@ -625,6 +625,8 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
|
|||||||
&arena.pointer.scoreMatrix,
|
&arena.pointer.scoreMatrix,
|
||||||
&arena.pointer.pathMatrix)
|
&arena.pointer.pathMatrix)
|
||||||
|
|
||||||
|
score = scoreR
|
||||||
|
|
||||||
path = _Backtracking(arena.pointer.pathMatrix,
|
path = _Backtracking(arena.pointer.pathMatrix,
|
||||||
len(rawSeqA), len(rawSeqB),
|
len(rawSeqA), len(rawSeqB),
|
||||||
&(arena.pointer.path))
|
&(arena.pointer.path))
|
||||||
@@ -641,6 +643,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
|
|||||||
len(rawSeqA), len(rawSeqB),
|
len(rawSeqA), len(rawSeqB),
|
||||||
&(arena.pointer.path))
|
&(arena.pointer.path))
|
||||||
isLeftAlign = true
|
isLeftAlign = true
|
||||||
|
score = scoreL
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,6 +2,8 @@ package obidefault
|
|||||||
|
|
||||||
var __taxonomy__ = ""
|
var __taxonomy__ = ""
|
||||||
var __alternative_name__ = false
|
var __alternative_name__ = false
|
||||||
|
var __fail_on_taxonomy__ = false
|
||||||
|
var __update_taxid__ = false
|
||||||
|
|
||||||
func SelectedTaxonomy() string {
|
func SelectedTaxonomy() string {
|
||||||
return __taxonomy__
|
return __taxonomy__
|
||||||
@@ -30,3 +32,27 @@ func SetSelectedTaxonomy(taxonomy string) {
|
|||||||
func SetAlternativeNamesSelected(alt bool) {
|
func SetAlternativeNamesSelected(alt bool) {
|
||||||
__alternative_name__ = alt
|
__alternative_name__ = alt
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func SetFailOnTaxonomy(fail bool) {
|
||||||
|
__fail_on_taxonomy__ = fail
|
||||||
|
}
|
||||||
|
|
||||||
|
func SetUpdateTaxid(update bool) {
|
||||||
|
__update_taxid__ = update
|
||||||
|
}
|
||||||
|
|
||||||
|
func FailOnTaxonomyPtr() *bool {
|
||||||
|
return &__fail_on_taxonomy__
|
||||||
|
}
|
||||||
|
|
||||||
|
func UpdateTaxidPtr() *bool {
|
||||||
|
return &__update_taxid__
|
||||||
|
}
|
||||||
|
|
||||||
|
func FailOnTaxonomy() bool {
|
||||||
|
return __fail_on_taxonomy__
|
||||||
|
}
|
||||||
|
|
||||||
|
func UpdateTaxid() bool {
|
||||||
|
return __update_taxid__
|
||||||
|
}
|
||||||
|
|||||||
@@ -9,12 +9,11 @@ import (
|
|||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
"github.com/buger/jsonparser"
|
"github.com/buger/jsonparser"
|
||||||
)
|
)
|
||||||
|
|
||||||
func _parse_json_map_string(str []byte, sequence *obiseq.BioSequence) (map[string]string, error) {
|
func _parse_json_map_string(str []byte) (map[string]string, error) {
|
||||||
values := make(map[string]string)
|
values := make(map[string]string)
|
||||||
jsonparser.ObjectEach(str,
|
jsonparser.ObjectEach(str,
|
||||||
func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
|
func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
|
||||||
@@ -26,7 +25,7 @@ func _parse_json_map_string(str []byte, sequence *obiseq.BioSequence) (map[strin
|
|||||||
return values, nil
|
return values, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func _parse_json_map_int(str []byte, sequence *obiseq.BioSequence) (map[string]int, error) {
|
func _parse_json_map_int(str []byte) (map[string]int, error) {
|
||||||
values := make(map[string]int)
|
values := make(map[string]int)
|
||||||
jsonparser.ObjectEach(str,
|
jsonparser.ObjectEach(str,
|
||||||
func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
|
func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
|
||||||
@@ -42,7 +41,7 @@ func _parse_json_map_int(str []byte, sequence *obiseq.BioSequence) (map[string]i
|
|||||||
return values, nil
|
return values, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func _parse_json_map_float(str []byte, sequence *obiseq.BioSequence) (map[string]float64, error) {
|
func _parse_json_map_float(str []byte) (map[string]float64, error) {
|
||||||
values := make(map[string]float64)
|
values := make(map[string]float64)
|
||||||
jsonparser.ObjectEach(str,
|
jsonparser.ObjectEach(str,
|
||||||
func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
|
func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
|
||||||
@@ -58,7 +57,7 @@ func _parse_json_map_float(str []byte, sequence *obiseq.BioSequence) (map[string
|
|||||||
return values, nil
|
return values, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func _parse_json_map_bool(str []byte, sequence *obiseq.BioSequence) (map[string]bool, error) {
|
func _parse_json_map_bool(str []byte) (map[string]bool, error) {
|
||||||
values := make(map[string]bool)
|
values := make(map[string]bool)
|
||||||
jsonparser.ObjectEach(str,
|
jsonparser.ObjectEach(str,
|
||||||
func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
|
func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
|
||||||
@@ -74,7 +73,7 @@ func _parse_json_map_bool(str []byte, sequence *obiseq.BioSequence) (map[string]
|
|||||||
return values, nil
|
return values, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func _parse_json_map_interface(str []byte, sequence *obiseq.BioSequence) (map[string]interface{}, error) {
|
func _parse_json_map_interface(str []byte) (map[string]interface{}, error) {
|
||||||
values := make(map[string]interface{})
|
values := make(map[string]interface{})
|
||||||
jsonparser.ObjectEach(str,
|
jsonparser.ObjectEach(str,
|
||||||
func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
|
func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
|
||||||
@@ -101,7 +100,7 @@ func _parse_json_map_interface(str []byte, sequence *obiseq.BioSequence) (map[st
|
|||||||
return values, nil
|
return values, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func _parse_json_array_string(str []byte, sequence *obiseq.BioSequence) ([]string, error) {
|
func _parse_json_array_string(str []byte) ([]string, error) {
|
||||||
values := make([]string, 0)
|
values := make([]string, 0)
|
||||||
jsonparser.ArrayEach(str,
|
jsonparser.ArrayEach(str,
|
||||||
func(value []byte, dataType jsonparser.ValueType, offset int, err error) {
|
func(value []byte, dataType jsonparser.ValueType, offset int, err error) {
|
||||||
@@ -163,7 +162,7 @@ func _parse_json_array_bool(str []byte, sequence *obiseq.BioSequence) ([]bool, e
|
|||||||
return values, nil
|
return values, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func _parse_json_array_interface(str []byte, sequence *obiseq.BioSequence) ([]interface{}, error) {
|
func _parse_json_array_interface(str []byte) ([]interface{}, error) {
|
||||||
values := make([]interface{}, 0)
|
values := make([]interface{}, 0)
|
||||||
jsonparser.ArrayEach(str,
|
jsonparser.ArrayEach(str,
|
||||||
func(value []byte, dataType jsonparser.ValueType, offset int, err error) {
|
func(value []byte, dataType jsonparser.ValueType, offset int, err error) {
|
||||||
@@ -201,8 +200,6 @@ func _parse_json_array_interface(str []byte, sequence *obiseq.BioSequence) ([]in
|
|||||||
}
|
}
|
||||||
|
|
||||||
func _parse_json_header_(header string, sequence *obiseq.BioSequence) string {
|
func _parse_json_header_(header string, sequence *obiseq.BioSequence) string {
|
||||||
taxonomy := obitax.DefaultTaxonomy()
|
|
||||||
|
|
||||||
annotations := sequence.Annotations()
|
annotations := sequence.Annotations()
|
||||||
start := -1
|
start := -1
|
||||||
stop := -1
|
stop := -1
|
||||||
@@ -264,14 +261,14 @@ func _parse_json_header_(header string, sequence *obiseq.BioSequence) string {
|
|||||||
sequence.SetCount(int(count))
|
sequence.SetCount(int(count))
|
||||||
|
|
||||||
case skey == "obiclean_weight":
|
case skey == "obiclean_weight":
|
||||||
weight, err := _parse_json_map_int(value, sequence)
|
weight, err := _parse_json_map_int(value)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("%s: Cannot parse obiclean weight %s", sequence.Id(), string(value))
|
log.Fatalf("%s: Cannot parse obiclean weight %s", sequence.Id(), string(value))
|
||||||
}
|
}
|
||||||
annotations[skey] = weight
|
annotations[skey] = weight
|
||||||
|
|
||||||
case skey == "obiclean_status":
|
case skey == "obiclean_status":
|
||||||
status, err := _parse_json_map_string(value, sequence)
|
status, err := _parse_json_map_string(value)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("%s: Cannot parse obiclean status %s", sequence.Id(), string(value))
|
log.Fatalf("%s: Cannot parse obiclean status %s", sequence.Id(), string(value))
|
||||||
}
|
}
|
||||||
@@ -279,7 +276,7 @@ func _parse_json_header_(header string, sequence *obiseq.BioSequence) string {
|
|||||||
|
|
||||||
case strings.HasPrefix(skey, "merged_"):
|
case strings.HasPrefix(skey, "merged_"):
|
||||||
if dataType == jsonparser.Object {
|
if dataType == jsonparser.Object {
|
||||||
data, err := _parse_json_map_int(value, sequence)
|
data, err := _parse_json_map_int(value)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("%s: Cannot parse merged slot %s: %v", sequence.Id(), skey, err)
|
log.Fatalf("%s: Cannot parse merged slot %s: %v", sequence.Id(), skey, err)
|
||||||
} else {
|
} else {
|
||||||
@@ -291,13 +288,8 @@ func _parse_json_header_(header string, sequence *obiseq.BioSequence) string {
|
|||||||
|
|
||||||
case skey == "taxid":
|
case skey == "taxid":
|
||||||
if dataType == jsonparser.Number || dataType == jsonparser.String {
|
if dataType == jsonparser.Number || dataType == jsonparser.String {
|
||||||
taxid := obiutils.UnsafeString(value)
|
taxid := string(value)
|
||||||
taxon := taxonomy.Taxon(taxid)
|
sequence.SetTaxid(taxid)
|
||||||
if taxon != nil {
|
|
||||||
sequence.SetTaxon(taxon)
|
|
||||||
} else {
|
|
||||||
sequence.SetTaxid(string(value))
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
log.Fatalf("%s: Cannot parse taxid %s", sequence.Id(), string(value))
|
log.Fatalf("%s: Cannot parse taxid %s", sequence.Id(), string(value))
|
||||||
}
|
}
|
||||||
@@ -306,15 +298,7 @@ func _parse_json_header_(header string, sequence *obiseq.BioSequence) string {
|
|||||||
if dataType == jsonparser.Number || dataType == jsonparser.String {
|
if dataType == jsonparser.Number || dataType == jsonparser.String {
|
||||||
rank, _ := obiutils.SplitInTwo(skey, '_')
|
rank, _ := obiutils.SplitInTwo(skey, '_')
|
||||||
|
|
||||||
taxid := obiutils.UnsafeString(value)
|
taxid := string(value)
|
||||||
taxon := taxonomy.Taxon(taxid)
|
|
||||||
|
|
||||||
if taxon != nil {
|
|
||||||
taxid = taxon.String()
|
|
||||||
} else {
|
|
||||||
taxid = string(value)
|
|
||||||
}
|
|
||||||
|
|
||||||
sequence.SetTaxid(taxid, rank)
|
sequence.SetTaxid(taxid, rank)
|
||||||
} else {
|
} else {
|
||||||
log.Fatalf("%s: Cannot parse taxid %s", sequence.Id(), string(value))
|
log.Fatalf("%s: Cannot parse taxid %s", sequence.Id(), string(value))
|
||||||
@@ -332,9 +316,9 @@ func _parse_json_header_(header string, sequence *obiseq.BioSequence) string {
|
|||||||
annotations[skey], err = strconv.ParseFloat(obiutils.UnsafeString(value), 64)
|
annotations[skey], err = strconv.ParseFloat(obiutils.UnsafeString(value), 64)
|
||||||
}
|
}
|
||||||
case jsonparser.Array:
|
case jsonparser.Array:
|
||||||
annotations[skey], err = _parse_json_array_interface(value, sequence)
|
annotations[skey], err = _parse_json_array_interface(value)
|
||||||
case jsonparser.Object:
|
case jsonparser.Object:
|
||||||
annotations[skey], err = _parse_json_map_interface(value, sequence)
|
annotations[skey], err = _parse_json_map_interface(value)
|
||||||
case jsonparser.Boolean:
|
case jsonparser.Boolean:
|
||||||
annotations[skey], err = jsonparser.ParseBoolean(value)
|
annotations[skey], err = jsonparser.ParseBoolean(value)
|
||||||
case jsonparser.Null:
|
case jsonparser.Null:
|
||||||
|
|||||||
@@ -72,7 +72,7 @@ func OBIMimeTypeGuesser(stream io.Reader) (*mimetype.MIME, io.Reader, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fastqDetector := func(raw []byte, limit uint32) bool {
|
fastqDetector := func(raw []byte, limit uint32) bool {
|
||||||
ok, err := regexp.Match("^@[^ ].*\n[^ ]+\n\\+", raw)
|
ok, err := regexp.Match("^@[^ ].*\n[A-Za-z.-]+", raw)
|
||||||
return ok && err == nil
|
return ok && err == nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
18
pkg/obiiter/extract_taxonomy.go
Normal file
18
pkg/obiiter/extract_taxonomy.go
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
package obiiter
|
||||||
|
|
||||||
|
import "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
|
|
||||||
|
func (iterator *IBioSequence) ExtractTaxonomy() (taxonomy *obitax.Taxonomy, err error) {
|
||||||
|
|
||||||
|
for iterator.Next() {
|
||||||
|
slice := iterator.Get().Slice()
|
||||||
|
|
||||||
|
taxonomy, err = slice.ExtractTaxonomy(taxonomy)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
@@ -19,7 +19,7 @@ func IFragments(minsize, length, overlap, size, nworkers int) Pipeable {
|
|||||||
newiter.WaitAndClose()
|
newiter.WaitAndClose()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
f := func(iterator IBioSequence, id int) {
|
f := func(iterator IBioSequence) {
|
||||||
source := ""
|
source := ""
|
||||||
for iterator.Next() {
|
for iterator.Next() {
|
||||||
news := obiseq.MakeBioSequenceSlice()
|
news := obiseq.MakeBioSequenceSlice()
|
||||||
@@ -66,9 +66,9 @@ func IFragments(minsize, length, overlap, size, nworkers int) Pipeable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for i := 1; i < nworkers; i++ {
|
for i := 1; i < nworkers; i++ {
|
||||||
go f(iterator.Split(), i)
|
go f(iterator.Split())
|
||||||
}
|
}
|
||||||
go f(iterator, 0)
|
go f(iterator)
|
||||||
|
|
||||||
return newiter.SortBatches().Rebatch(size)
|
return newiter.SortBatches().Rebatch(size)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ package obikmer
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
)
|
)
|
||||||
|
|
||||||
var __single_base_code__ = []byte{0,
|
var __single_base_code__ = []byte{0,
|
||||||
@@ -131,33 +132,39 @@ func FastShiftFourMer(index [][]int, shifts *map[int]int, lindex int, seq *obise
|
|||||||
maxshift := 0
|
maxshift := 0
|
||||||
maxcount := 0
|
maxcount := 0
|
||||||
maxscore := -1.0
|
maxscore := -1.0
|
||||||
|
maxrelscore := -1.0
|
||||||
|
|
||||||
for shift, count := range *shifts {
|
for shift, count := range *shifts {
|
||||||
delete((*shifts), shift)
|
delete((*shifts), shift)
|
||||||
score := float64(count)
|
selectscore := float64(count)
|
||||||
if relscore {
|
relativescore := float64(count)
|
||||||
over := -shift
|
over := -shift
|
||||||
switch {
|
switch {
|
||||||
case shift > 0:
|
case shift > 0:
|
||||||
over += lindex
|
over += lindex
|
||||||
case shift < 0:
|
case shift < 0:
|
||||||
over = seq.Len() - over
|
over = seq.Len() - over
|
||||||
default:
|
default:
|
||||||
over = min(lindex, seq.Len())
|
over = min(lindex, seq.Len())
|
||||||
}
|
|
||||||
score = score / float64(over-3)
|
|
||||||
}
|
}
|
||||||
if score > maxscore {
|
relativescore = relativescore / float64(over-3)
|
||||||
|
if relscore {
|
||||||
|
selectscore = relativescore
|
||||||
|
}
|
||||||
|
|
||||||
|
if selectscore > maxscore {
|
||||||
maxshift = shift
|
maxshift = shift
|
||||||
maxcount = count
|
maxcount = count
|
||||||
maxscore = score
|
maxscore = selectscore
|
||||||
|
maxrelscore = relativescore
|
||||||
} else {
|
} else {
|
||||||
if score == maxscore && shift < maxshift {
|
if selectscore == maxscore && obiutils.Abs(shift) < obiutils.Abs(maxshift) {
|
||||||
maxshift = shift
|
maxshift = shift
|
||||||
maxcount = count
|
maxcount = count
|
||||||
|
maxrelscore = relativescore
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return maxshift, maxcount, maxscore
|
return maxshift, maxcount, maxrelscore
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,4 +4,5 @@ import lua "github.com/yuin/gopher-lua"
|
|||||||
|
|
||||||
func RegisterObilib(luaState *lua.LState) {
|
func RegisterObilib(luaState *lua.LState) {
|
||||||
RegisterObiSeq(luaState)
|
RegisterObiSeq(luaState)
|
||||||
|
RegisterObiTaxonomy(luaState)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,9 @@
|
|||||||
package obilua
|
package obilua
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
lua "github.com/yuin/gopher-lua"
|
lua "github.com/yuin/gopher-lua"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -16,6 +18,7 @@ func registerBioSequenceType(luaState *lua.LState) {
|
|||||||
bioSequenceType := luaState.NewTypeMetatable(luaBioSequenceTypeName)
|
bioSequenceType := luaState.NewTypeMetatable(luaBioSequenceTypeName)
|
||||||
luaState.SetGlobal(luaBioSequenceTypeName, bioSequenceType)
|
luaState.SetGlobal(luaBioSequenceTypeName, bioSequenceType)
|
||||||
luaState.SetField(bioSequenceType, "new", luaState.NewFunction(newObiSeq))
|
luaState.SetField(bioSequenceType, "new", luaState.NewFunction(newObiSeq))
|
||||||
|
luaState.SetField(bioSequenceType, "nil", obiseq2Lua(luaState, nil))
|
||||||
|
|
||||||
luaState.SetField(bioSequenceType, "__index",
|
luaState.SetField(bioSequenceType, "__index",
|
||||||
luaState.SetFuncs(luaState.NewTable(),
|
luaState.SetFuncs(luaState.NewTable(),
|
||||||
@@ -53,6 +56,7 @@ var bioSequenceMethods = map[string]lua.LGFunction{
|
|||||||
"definition": bioSequenceGetSetDefinition,
|
"definition": bioSequenceGetSetDefinition,
|
||||||
"count": bioSequenceGetSetCount,
|
"count": bioSequenceGetSetCount,
|
||||||
"taxid": bioSequenceGetSetTaxid,
|
"taxid": bioSequenceGetSetTaxid,
|
||||||
|
"taxon": bioSequenceGetSetTaxon,
|
||||||
"attribute": bioSequenceGetSetAttribute,
|
"attribute": bioSequenceGetSetAttribute,
|
||||||
"len": bioSequenceGetLength,
|
"len": bioSequenceGetLength,
|
||||||
"has_sequence": bioSequenceHasSequence,
|
"has_sequence": bioSequenceHasSequence,
|
||||||
@@ -62,6 +66,9 @@ var bioSequenceMethods = map[string]lua.LGFunction{
|
|||||||
"md5_string": bioSequenceGetMD5String,
|
"md5_string": bioSequenceGetMD5String,
|
||||||
"subsequence": bioSequenceGetSubsequence,
|
"subsequence": bioSequenceGetSubsequence,
|
||||||
"reverse_complement": bioSequenceGetRevcomp,
|
"reverse_complement": bioSequenceGetRevcomp,
|
||||||
|
"fasta": bioSequenceGetFasta,
|
||||||
|
"fastq": bioSequenceGetFastq,
|
||||||
|
"string": bioSequenceAsString,
|
||||||
}
|
}
|
||||||
|
|
||||||
// checkBioSequence checks if the first argument in the Lua stack is a *obiseq.BioSequence.
|
// checkBioSequence checks if the first argument in the Lua stack is a *obiseq.BioSequence.
|
||||||
@@ -254,3 +261,88 @@ func bioSequenceGetRevcomp(luaState *lua.LState) int {
|
|||||||
luaState.Push(obiseq2Lua(luaState, revcomp))
|
luaState.Push(obiseq2Lua(luaState, revcomp))
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func bioSequenceGetSetTaxon(luaState *lua.LState) int {
|
||||||
|
s := checkBioSequence(luaState)
|
||||||
|
|
||||||
|
if luaState.GetTop() > 1 {
|
||||||
|
taxon := checkTaxon(luaState, 2)
|
||||||
|
|
||||||
|
s.SetTaxon(taxon)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
taxon := s.Taxon(obitax.DefaultTaxonomy())
|
||||||
|
luaState.Push(taxon2Lua(luaState, taxon))
|
||||||
|
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func bioSequenceGetFasta(luaState *lua.LState) int {
|
||||||
|
s := checkBioSequence(luaState)
|
||||||
|
|
||||||
|
formater := obiformats.FormatFastSeqJsonHeader
|
||||||
|
|
||||||
|
if luaState.GetTop() > 1 {
|
||||||
|
format := luaState.CheckString(2)
|
||||||
|
switch format {
|
||||||
|
case "json":
|
||||||
|
formater = obiformats.FormatFastSeqJsonHeader
|
||||||
|
case "obi":
|
||||||
|
formater = obiformats.FormatFastSeqOBIHeader
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
txt := obiformats.FormatFasta(s, formater)
|
||||||
|
|
||||||
|
luaState.Push(lua.LString(txt))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func bioSequenceGetFastq(luaState *lua.LState) int {
|
||||||
|
s := checkBioSequence(luaState)
|
||||||
|
|
||||||
|
formater := obiformats.FormatFastSeqJsonHeader
|
||||||
|
|
||||||
|
if luaState.GetTop() > 1 {
|
||||||
|
format := luaState.CheckString(2)
|
||||||
|
switch format {
|
||||||
|
case "json":
|
||||||
|
formater = obiformats.FormatFastSeqJsonHeader
|
||||||
|
case "obi":
|
||||||
|
formater = obiformats.FormatFastSeqOBIHeader
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
txt := obiformats.FormatFastq(s, formater)
|
||||||
|
|
||||||
|
luaState.Push(lua.LString(txt))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func bioSequenceAsString(luaState *lua.LState) int {
|
||||||
|
s := checkBioSequence(luaState)
|
||||||
|
|
||||||
|
formater := obiformats.FormatFastSeqJsonHeader
|
||||||
|
format := obiformats.FormatFasta
|
||||||
|
|
||||||
|
if s.HasQualities() {
|
||||||
|
format = obiformats.FormatFastq
|
||||||
|
}
|
||||||
|
|
||||||
|
if luaState.GetTop() > 1 {
|
||||||
|
format := luaState.CheckString(2)
|
||||||
|
switch format {
|
||||||
|
case "json":
|
||||||
|
formater = obiformats.FormatFastSeqJsonHeader
|
||||||
|
case "obi":
|
||||||
|
formater = obiformats.FormatFastSeqOBIHeader
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
txt := format(s, formater)
|
||||||
|
|
||||||
|
luaState.Push(lua.LString(txt))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,6 +1,9 @@
|
|||||||
package obilua
|
package obilua
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
lua "github.com/yuin/gopher-lua"
|
lua "github.com/yuin/gopher-lua"
|
||||||
)
|
)
|
||||||
@@ -11,6 +14,7 @@ func registerBioSequenceSliceType(luaState *lua.LState) {
|
|||||||
bioSequenceSliceType := luaState.NewTypeMetatable(luaBioSequenceSliceTypeName)
|
bioSequenceSliceType := luaState.NewTypeMetatable(luaBioSequenceSliceTypeName)
|
||||||
luaState.SetGlobal(luaBioSequenceSliceTypeName, bioSequenceSliceType)
|
luaState.SetGlobal(luaBioSequenceSliceTypeName, bioSequenceSliceType)
|
||||||
luaState.SetField(bioSequenceSliceType, "new", luaState.NewFunction(newObiSeqSlice))
|
luaState.SetField(bioSequenceSliceType, "new", luaState.NewFunction(newObiSeqSlice))
|
||||||
|
luaState.SetField(bioSequenceSliceType, "nil", obiseqslice2Lua(luaState, nil))
|
||||||
|
|
||||||
luaState.SetField(bioSequenceSliceType, "__index",
|
luaState.SetField(bioSequenceSliceType, "__index",
|
||||||
luaState.SetFuncs(luaState.NewTable(),
|
luaState.SetFuncs(luaState.NewTable(),
|
||||||
@@ -37,6 +41,9 @@ var bioSequenceSliceMethods = map[string]lua.LGFunction{
|
|||||||
"pop": bioSequenceSlicePop,
|
"pop": bioSequenceSlicePop,
|
||||||
"sequence": bioSequenceSliceGetSetSequence,
|
"sequence": bioSequenceSliceGetSetSequence,
|
||||||
"len": bioSequenceSliceGetLength,
|
"len": bioSequenceSliceGetLength,
|
||||||
|
"fasta": bioSequenceSliceGetFasta,
|
||||||
|
"fastq": bioSequenceSliceGetFastq,
|
||||||
|
"string": bioSequenceSliceAsString,
|
||||||
}
|
}
|
||||||
|
|
||||||
func checkBioSequenceSlice(L *lua.LState) *obiseq.BioSequenceSlice {
|
func checkBioSequenceSlice(L *lua.LState) *obiseq.BioSequenceSlice {
|
||||||
@@ -105,3 +112,96 @@ func bioSequenceSlicePop(luaState *lua.LState) int {
|
|||||||
return 1
|
return 1
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func bioSequenceSliceGetFasta(luaState *lua.LState) int {
|
||||||
|
s := checkBioSequenceSlice(luaState)
|
||||||
|
|
||||||
|
formater := obiformats.FormatFastSeqJsonHeader
|
||||||
|
|
||||||
|
if luaState.GetTop() > 1 {
|
||||||
|
format := luaState.CheckString(2)
|
||||||
|
switch format {
|
||||||
|
case "json":
|
||||||
|
formater = obiformats.FormatFastSeqJsonHeader
|
||||||
|
case "obi":
|
||||||
|
formater = obiformats.FormatFastSeqOBIHeader
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
txts := make([]string, len(*s))
|
||||||
|
|
||||||
|
for i, seq := range *s {
|
||||||
|
txts[i] = obiformats.FormatFasta(seq, formater)
|
||||||
|
}
|
||||||
|
|
||||||
|
txt := strings.Join(txts, "\n")
|
||||||
|
|
||||||
|
luaState.Push(lua.LString(txt))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func bioSequenceSliceGetFastq(luaState *lua.LState) int {
|
||||||
|
s := checkBioSequenceSlice(luaState)
|
||||||
|
|
||||||
|
formater := obiformats.FormatFastSeqJsonHeader
|
||||||
|
|
||||||
|
if luaState.GetTop() > 1 {
|
||||||
|
format := luaState.CheckString(2)
|
||||||
|
switch format {
|
||||||
|
case "json":
|
||||||
|
formater = obiformats.FormatFastSeqJsonHeader
|
||||||
|
case "obi":
|
||||||
|
formater = obiformats.FormatFastSeqOBIHeader
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
txts := make([]string, len(*s))
|
||||||
|
|
||||||
|
for i, seq := range *s {
|
||||||
|
txts[i] = obiformats.FormatFastq(seq, formater)
|
||||||
|
}
|
||||||
|
|
||||||
|
txt := strings.Join(txts, "\n")
|
||||||
|
|
||||||
|
luaState.Push(lua.LString(txt))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func bioSequenceSliceAsString(luaState *lua.LState) int {
|
||||||
|
s := checkBioSequenceSlice(luaState)
|
||||||
|
|
||||||
|
formater := obiformats.FormatFastSeqJsonHeader
|
||||||
|
|
||||||
|
if luaState.GetTop() > 1 {
|
||||||
|
format := luaState.CheckString(2)
|
||||||
|
switch format {
|
||||||
|
case "json":
|
||||||
|
formater = obiformats.FormatFastSeqJsonHeader
|
||||||
|
case "obi":
|
||||||
|
formater = obiformats.FormatFastSeqOBIHeader
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
txts := make([]string, len(*s))
|
||||||
|
|
||||||
|
format := obiformats.FormatFasta
|
||||||
|
|
||||||
|
allQual := true
|
||||||
|
|
||||||
|
for _, s := range *s {
|
||||||
|
allQual = allQual && s.HasQualities()
|
||||||
|
}
|
||||||
|
|
||||||
|
if allQual {
|
||||||
|
format = obiformats.FormatFastq
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, seq := range *s {
|
||||||
|
txts[i] = format(seq, formater)
|
||||||
|
}
|
||||||
|
|
||||||
|
txt := strings.Join(txts, "\n")
|
||||||
|
|
||||||
|
luaState.Push(lua.LString(txt))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|||||||
139
pkg/obilua/obitaxon.go
Normal file
139
pkg/obilua/obitaxon.go
Normal file
@@ -0,0 +1,139 @@
|
|||||||
|
package obilua
|
||||||
|
|
||||||
|
import (
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
|
lua "github.com/yuin/gopher-lua"
|
||||||
|
)
|
||||||
|
|
||||||
|
const luaTaxonTypeName = "Taxon"
|
||||||
|
|
||||||
|
func registerTaxonType(luaState *lua.LState) {
|
||||||
|
taxonType := luaState.NewTypeMetatable(luaTaxonTypeName)
|
||||||
|
luaState.SetGlobal(luaTaxonTypeName, taxonType)
|
||||||
|
luaState.SetField(taxonType, "new", luaState.NewFunction(newTaxon))
|
||||||
|
luaState.SetField(taxonType, "nil", taxonomy2Lua(luaState, nil))
|
||||||
|
|
||||||
|
luaState.SetField(taxonType, "__index",
|
||||||
|
luaState.SetFuncs(luaState.NewTable(),
|
||||||
|
taxonMethods))
|
||||||
|
}
|
||||||
|
|
||||||
|
func taxon2Lua(interpreter *lua.LState,
|
||||||
|
taxon *obitax.Taxon) lua.LValue {
|
||||||
|
ud := interpreter.NewUserData()
|
||||||
|
ud.Value = taxon
|
||||||
|
interpreter.SetMetatable(ud, interpreter.GetTypeMetatable(luaTaxonTypeName))
|
||||||
|
|
||||||
|
return ud
|
||||||
|
}
|
||||||
|
|
||||||
|
func newTaxon(luaState *lua.LState) int {
|
||||||
|
taxonomy := checkTaxonomy(luaState)
|
||||||
|
taxid := luaState.CheckString(2)
|
||||||
|
parent := luaState.CheckString(3)
|
||||||
|
sname := luaState.CheckString(4)
|
||||||
|
rank := luaState.CheckString(5)
|
||||||
|
|
||||||
|
isroot := false
|
||||||
|
|
||||||
|
if luaState.GetTop() > 5 {
|
||||||
|
isroot = luaState.CheckBool(6)
|
||||||
|
}
|
||||||
|
|
||||||
|
taxon, err := taxonomy.AddTaxon(taxid, parent, rank, isroot, false)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
luaState.RaiseError("(%v,%v,%v) : Error on taxon creation: %v", taxid, parent, sname, err)
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
taxon.SetName(sname, "scientific name")
|
||||||
|
|
||||||
|
luaState.Push(taxon2Lua(luaState, taxon))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
var taxonMethods = map[string]lua.LGFunction{
|
||||||
|
"string": taxonAsString,
|
||||||
|
"scientific_name": taxonGetSetScientificName,
|
||||||
|
"parent": taxonGetParent,
|
||||||
|
"taxon_at_rank": taxGetTaxonAtRank,
|
||||||
|
"species": taxonGetSpecies,
|
||||||
|
"genus": taxonGetGenus,
|
||||||
|
"family": taxonGetFamily,
|
||||||
|
}
|
||||||
|
|
||||||
|
func checkTaxon(L *lua.LState, i int) *obitax.Taxon {
|
||||||
|
ud := L.CheckUserData(i)
|
||||||
|
if v, ok := ud.Value.(*obitax.Taxon); ok {
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
L.ArgError(i, "obitax.Taxon expected")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func taxonAsString(luaState *lua.LState) int {
|
||||||
|
taxon := checkTaxon(luaState, 1)
|
||||||
|
luaState.Push(lua.LString(taxon.String()))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func taxonGetSetScientificName(luaState *lua.LState) int {
|
||||||
|
taxon := checkTaxon(luaState, 1)
|
||||||
|
|
||||||
|
if luaState.GetTop() > 1 {
|
||||||
|
sname := luaState.CheckString(2)
|
||||||
|
taxon.SetName(sname, "scientific name")
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
luaState.Push(lua.LString(taxon.ScientificName()))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func taxonGetParent(luaState *lua.LState) int {
|
||||||
|
taxon := checkTaxon(luaState, 1)
|
||||||
|
|
||||||
|
parent := taxon.Parent()
|
||||||
|
luaState.Push(taxon2Lua(luaState, parent))
|
||||||
|
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func taxonGetSpecies(luaState *lua.LState) int {
|
||||||
|
taxon := checkTaxon(luaState, 1)
|
||||||
|
|
||||||
|
species := taxon.Species()
|
||||||
|
luaState.Push(taxon2Lua(luaState, species))
|
||||||
|
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func taxonGetGenus(luaState *lua.LState) int {
|
||||||
|
taxon := checkTaxon(luaState, 1)
|
||||||
|
|
||||||
|
genus := taxon.Genus()
|
||||||
|
luaState.Push(taxon2Lua(luaState, genus))
|
||||||
|
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func taxonGetFamily(luaState *lua.LState) int {
|
||||||
|
taxon := checkTaxon(luaState, 1)
|
||||||
|
|
||||||
|
family := taxon.Family()
|
||||||
|
luaState.Push(taxon2Lua(luaState, family))
|
||||||
|
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func taxGetTaxonAtRank(luaState *lua.LState) int {
|
||||||
|
taxon := checkTaxon(luaState, 1)
|
||||||
|
rank := luaState.CheckString(2)
|
||||||
|
|
||||||
|
taxonAt := taxon.TaxonAtRank(rank)
|
||||||
|
|
||||||
|
luaState.Push(taxon2Lua(luaState, taxonAt))
|
||||||
|
|
||||||
|
return 1
|
||||||
|
}
|
||||||
116
pkg/obilua/obitaxonomy.go
Normal file
116
pkg/obilua/obitaxonomy.go
Normal file
@@ -0,0 +1,116 @@
|
|||||||
|
package obilua
|
||||||
|
|
||||||
|
import (
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
|
lua "github.com/yuin/gopher-lua"
|
||||||
|
)
|
||||||
|
|
||||||
|
func RegisterObiTaxonomy(luaState *lua.LState) {
|
||||||
|
registerTaxonomyType(luaState)
|
||||||
|
registerTaxonType(luaState)
|
||||||
|
}
|
||||||
|
|
||||||
|
const luaTaxonomyTypeName = "Taxonomy"
|
||||||
|
|
||||||
|
func registerTaxonomyType(luaState *lua.LState) {
|
||||||
|
taxonomyType := luaState.NewTypeMetatable(luaTaxonomyTypeName)
|
||||||
|
luaState.SetGlobal(luaTaxonomyTypeName, taxonomyType)
|
||||||
|
luaState.SetField(taxonomyType, "new", luaState.NewFunction(newTaxonomy))
|
||||||
|
luaState.SetField(taxonomyType, "default", luaState.NewFunction(defaultTaxonomy))
|
||||||
|
luaState.SetField(taxonomyType, "has_default", luaState.NewFunction(hasDefaultTaxonomy))
|
||||||
|
luaState.SetField(taxonomyType, "nil", taxon2Lua(luaState, nil))
|
||||||
|
luaState.SetField(taxonomyType, "__index",
|
||||||
|
luaState.SetFuncs(luaState.NewTable(),
|
||||||
|
taxonomyMethods))
|
||||||
|
}
|
||||||
|
|
||||||
|
func taxonomy2Lua(interpreter *lua.LState,
|
||||||
|
taxonomy *obitax.Taxonomy) lua.LValue {
|
||||||
|
ud := interpreter.NewUserData()
|
||||||
|
ud.Value = taxonomy
|
||||||
|
interpreter.SetMetatable(ud, interpreter.GetTypeMetatable(luaTaxonomyTypeName))
|
||||||
|
|
||||||
|
return ud
|
||||||
|
}
|
||||||
|
|
||||||
|
func newTaxonomy(luaState *lua.LState) int {
|
||||||
|
name := luaState.CheckString(1)
|
||||||
|
code := luaState.CheckString(2)
|
||||||
|
|
||||||
|
charset := obiutils.AsciiAlphaNumSet
|
||||||
|
if luaState.GetTop() > 2 {
|
||||||
|
charset = obiutils.AsciiSetFromString(luaState.CheckString(3))
|
||||||
|
}
|
||||||
|
|
||||||
|
taxonomy := obitax.NewTaxonomy(name, code, charset)
|
||||||
|
|
||||||
|
luaState.Push(taxonomy2Lua(luaState, taxonomy))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func defaultTaxonomy(luaState *lua.LState) int {
|
||||||
|
taxonomy := obitax.DefaultTaxonomy()
|
||||||
|
|
||||||
|
if taxonomy == nil {
|
||||||
|
luaState.RaiseError("No default taxonomy")
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
luaState.Push(taxonomy2Lua(luaState, taxonomy))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func hasDefaultTaxonomy(luaState *lua.LState) int {
|
||||||
|
taxonomy := obitax.DefaultTaxonomy()
|
||||||
|
|
||||||
|
luaState.Push(lua.LBool(taxonomy != nil))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
var taxonomyMethods = map[string]lua.LGFunction{
|
||||||
|
"name": taxonomyGetName,
|
||||||
|
"code": taxonomyGetCode,
|
||||||
|
"taxon": taxonomyGetTaxon,
|
||||||
|
}
|
||||||
|
|
||||||
|
func checkTaxonomy(L *lua.LState) *obitax.Taxonomy {
|
||||||
|
ud := L.CheckUserData(1)
|
||||||
|
if v, ok := ud.Value.(*obitax.Taxonomy); ok {
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
L.ArgError(1, "obitax.Taxonomy expected")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func taxonomyGetName(luaState *lua.LState) int {
|
||||||
|
taxo := checkTaxonomy(luaState)
|
||||||
|
luaState.Push(lua.LString(taxo.Name()))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func taxonomyGetCode(luaState *lua.LState) int {
|
||||||
|
taxo := checkTaxonomy(luaState)
|
||||||
|
luaState.Push(lua.LString(taxo.Code()))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func taxonomyGetTaxon(luaState *lua.LState) int {
|
||||||
|
taxo := checkTaxonomy(luaState)
|
||||||
|
taxid := luaState.CheckString(2)
|
||||||
|
taxon, isAlias, err := taxo.Taxon(taxid)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
luaState.RaiseError("%s : Error on taxon taxon: %v", taxid, err)
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
if isAlias && obidefault.FailOnTaxonomy() {
|
||||||
|
luaState.RaiseError("%s : Taxon is an alias of %s", taxid, taxon.String())
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
luaState.Push(taxon2Lua(luaState, taxon))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
@@ -66,10 +66,6 @@ func GenerateOptionParser(optionset ...func(*getoptions.GetOpt)) ArgumentParser
|
|||||||
options.GetEnv("OBISOLEXA"),
|
options.GetEnv("OBISOLEXA"),
|
||||||
options.Description("Decodes quality string according to the Solexa specification."))
|
options.Description("Decodes quality string according to the Solexa specification."))
|
||||||
|
|
||||||
options.BoolVar(obidefault.CompressedPtr(), "compressed", obidefault.CompressOutput(),
|
|
||||||
options.Alias("Z"),
|
|
||||||
options.Description("Compress all the result using gzip"))
|
|
||||||
|
|
||||||
for _, o := range optionset {
|
for _, o := range optionset {
|
||||||
o(options)
|
o(options)
|
||||||
}
|
}
|
||||||
@@ -181,6 +177,15 @@ func LoadTaxonomyOptionSet(options *getoptions.GetOpt, required, alternatiive bo
|
|||||||
options.Alias("a"),
|
options.Alias("a"),
|
||||||
options.Description("Enable the search on all alternative names and not only scientific names."))
|
options.Description("Enable the search on all alternative names and not only scientific names."))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
options.BoolVar(obidefault.FailOnTaxonomyPtr(), "fail-on-taxonomy",
|
||||||
|
obidefault.FailOnTaxonomy(),
|
||||||
|
options.Description("Make obitools failing on error if a used taxid is not a currently valid one"),
|
||||||
|
)
|
||||||
|
|
||||||
|
options.BoolVar(obidefault.UpdateTaxidPtr(), "update-taxid", obidefault.UpdateTaxid(),
|
||||||
|
options.Description("Make obitools automatically updating the taxid that are declared merged to a newest one."),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
// CLIIsDebugMode returns whether the CLI is in debug mode.
|
// CLIIsDebugMode returns whether the CLI is in debug mode.
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ import (
|
|||||||
// corresponds to the last commit, and not the one when the file will be
|
// corresponds to the last commit, and not the one when the file will be
|
||||||
// commited
|
// commited
|
||||||
|
|
||||||
var _Commit = "c50a0f4"
|
var _Commit = "573acaf"
|
||||||
var _Version = "Release 4.2.0"
|
var _Version = "Release 4.2.0"
|
||||||
|
|
||||||
// Version returns the version of the obitools package.
|
// Version returns the version of the obitools package.
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package obiseq
|
package obiseq
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
"golang.org/x/exp/slices"
|
"golang.org/x/exp/slices"
|
||||||
@@ -179,3 +180,18 @@ func (s *BioSequenceSlice) SortOnLength(reverse bool) {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *BioSequenceSlice) ExtractTaxonomy(taxonomy *obitax.Taxonomy) (*obitax.Taxonomy, error) {
|
||||||
|
var err error
|
||||||
|
|
||||||
|
for _, s := range *s {
|
||||||
|
taxonomy, err = taxonomy.InsertPathString(s.Path())
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return taxonomy, nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -196,6 +196,16 @@ func IsShorterOrEqualTo(length int) SequencePredicate {
|
|||||||
return f
|
return f
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func OccurInAtleast(sample string, n int) SequencePredicate {
|
||||||
|
desc := MakeStatsOnDescription(sample)
|
||||||
|
f := func(sequence *BioSequence) bool {
|
||||||
|
stats := sequence.StatsOn(desc, "NA")
|
||||||
|
return len(stats) >= n
|
||||||
|
}
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
|
||||||
func IsSequenceMatch(pattern string) SequencePredicate {
|
func IsSequenceMatch(pattern string) SequencePredicate {
|
||||||
pat, err := regexp.Compile("(?i)" + pattern)
|
pat, err := regexp.Compile("(?i)" + pattern)
|
||||||
|
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ func TaxonomyClassifier(taxonomicRank string,
|
|||||||
if taxon != nil {
|
if taxon != nil {
|
||||||
ttaxon := taxon.TaxonAtRank(taxonomicRank)
|
ttaxon := taxon.TaxonAtRank(taxonomicRank)
|
||||||
if abortOnMissing && ttaxon == nil {
|
if abortOnMissing && ttaxon == nil {
|
||||||
log.Fatalf("Taxon at rank %s not found in taxonomy for taxid %d", taxonomicRank, taxon.String())
|
log.Fatalf("Taxon at rank %s not found in taxonomy for taxid %s", taxonomicRank, taxon.String())
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if abortOnMissing {
|
if abortOnMissing {
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"math"
|
"math"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
@@ -15,13 +16,20 @@ func (sequence *BioSequence) TaxonomicDistribution(taxonomy *obitax.Taxonomy) ma
|
|||||||
taxonomy = taxonomy.OrDefault(true)
|
taxonomy = taxonomy.OrDefault(true)
|
||||||
|
|
||||||
for taxid, v := range taxids {
|
for taxid, v := range taxids {
|
||||||
t := taxonomy.Taxon(taxid)
|
t, isAlias, err := taxonomy.Taxon(taxid)
|
||||||
if t == nil {
|
if err != nil {
|
||||||
log.Fatalf(
|
log.Fatalf(
|
||||||
"On sequence %s taxid %s is not defined in taxonomy: %s",
|
"On sequence %s taxid %s is not defined in taxonomy: %s (%v)",
|
||||||
sequence.Id(),
|
sequence.Id(),
|
||||||
taxid,
|
taxid,
|
||||||
taxonomy.Name())
|
taxonomy.Name(),
|
||||||
|
err,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
if isAlias && obidefault.FailOnTaxonomy() {
|
||||||
|
log.Fatalf("On sequence %s taxid %s is an alias on %s",
|
||||||
|
sequence.Id(), taxid, t.String())
|
||||||
}
|
}
|
||||||
taxons[t.Node] = v
|
taxons[t.Node] = v
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,7 +5,9 @@ import (
|
|||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
)
|
)
|
||||||
|
|
||||||
func (s *BioSequence) Taxon(taxonomy *obitax.Taxonomy) *obitax.Taxon {
|
func (s *BioSequence) Taxon(taxonomy *obitax.Taxonomy) *obitax.Taxon {
|
||||||
@@ -14,7 +16,10 @@ func (s *BioSequence) Taxon(taxonomy *obitax.Taxonomy) *obitax.Taxon {
|
|||||||
if taxid == "NA" {
|
if taxid == "NA" {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
return taxonomy.Taxon(taxid)
|
|
||||||
|
taxon, _, _ := taxonomy.Taxon(taxid)
|
||||||
|
|
||||||
|
return taxon
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetTaxid sets the taxid for the BioSequence.
|
// SetTaxid sets the taxid for the BioSequence.
|
||||||
@@ -23,6 +28,9 @@ func (s *BioSequence) Taxon(taxonomy *obitax.Taxonomy) *obitax.Taxon {
|
|||||||
//
|
//
|
||||||
// taxid - the taxid to set.
|
// taxid - the taxid to set.
|
||||||
func (s *BioSequence) SetTaxid(taxid string, rank ...string) {
|
func (s *BioSequence) SetTaxid(taxid string, rank ...string) {
|
||||||
|
var err error
|
||||||
|
var isAlias bool
|
||||||
|
|
||||||
if taxid == "" {
|
if taxid == "" {
|
||||||
taxid = "NA"
|
taxid = "NA"
|
||||||
} else {
|
} else {
|
||||||
@@ -30,11 +38,38 @@ func (s *BioSequence) SetTaxid(taxid string, rank ...string) {
|
|||||||
taxon := (*obitax.Taxon)(nil)
|
taxon := (*obitax.Taxon)(nil)
|
||||||
|
|
||||||
if taxonomy != nil {
|
if taxonomy != nil {
|
||||||
taxon = taxonomy.Taxon(taxid)
|
taxon, isAlias, err = taxonomy.Taxon(taxid)
|
||||||
}
|
|
||||||
|
if err != nil {
|
||||||
|
logger := log.Warnf
|
||||||
|
if obidefault.FailOnTaxonomy() {
|
||||||
|
logger = log.Fatalf
|
||||||
|
}
|
||||||
|
logger("%s: Taxid: %v is unknown from taxonomy (%v)",
|
||||||
|
s.Id(), taxid, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if isAlias {
|
||||||
|
if obidefault.FailOnTaxonomy() {
|
||||||
|
log.Fatalf("%s: Taxid: %v is an alias from taxonomy (%v) to %s",
|
||||||
|
s.Id(), taxid, taxonomy.Name(), taxon.String())
|
||||||
|
} else {
|
||||||
|
if obidefault.UpdateTaxid() {
|
||||||
|
log.Warnf("%s: Taxid: %v is updated to %s",
|
||||||
|
s.Id(), taxid, taxon.String())
|
||||||
|
taxid = taxon.String()
|
||||||
|
} else {
|
||||||
|
log.Warnf("%s: Taxid %v has to be updated to %s",
|
||||||
|
s.Id(), taxid, taxon.String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
if taxon != nil {
|
||||||
|
taxid = taxon.String()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if taxon != nil {
|
|
||||||
taxid = taxon.String()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -135,14 +170,35 @@ func (sequence *BioSequence) SetFamily(taxonomy *obitax.Taxonomy) *obitax.Taxon
|
|||||||
return sequence.SetTaxonAtRank(taxonomy, "family")
|
return sequence.SetTaxonAtRank(taxonomy, "family")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sequence *BioSequence) SetPath(taxonomy *obitax.Taxonomy) string {
|
func (sequence *BioSequence) SetPath(taxonomy *obitax.Taxonomy) []string {
|
||||||
taxon := sequence.Taxon(taxonomy)
|
taxon := sequence.Taxon(taxonomy)
|
||||||
path := taxon.Path()
|
path := taxon.Path()
|
||||||
|
spath := make([]string, path.Len())
|
||||||
|
lpath := path.Len() - 1
|
||||||
|
|
||||||
tpath := path.String()
|
for i := lpath; i >= 0; i-- {
|
||||||
sequence.SetAttribute("taxonomic_path", tpath)
|
spath[lpath-i] = path.Get(i).String(taxonomy.Code())
|
||||||
|
}
|
||||||
|
|
||||||
return tpath
|
sequence.SetAttribute("taxonomic_path", spath)
|
||||||
|
|
||||||
|
return spath
|
||||||
|
}
|
||||||
|
|
||||||
|
func (sequence *BioSequence) Path() []string {
|
||||||
|
path, ok := sequence.GetAttribute("taxonomic_path")
|
||||||
|
|
||||||
|
if !ok {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
slice, err := obiutils.InterfaceToStringSlice(path)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("%s: taxonomic_path has the wrong type (%v)", sequence.Id(), err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return slice
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sequence *BioSequence) SetScientificName(taxonomy *obitax.Taxonomy) string {
|
func (sequence *BioSequence) SetScientificName(taxonomy *obitax.Taxonomy) string {
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ func IsAValidTaxon(taxonomy *obitax.Taxonomy, withAutoCorrection ...bool) Sequen
|
|||||||
if autocorrection {
|
if autocorrection {
|
||||||
sequence.SetTaxid(ttaxid)
|
sequence.SetTaxid(ttaxid)
|
||||||
log.Printf(
|
log.Printf(
|
||||||
"Sequence %s : Taxid %d updated with %d",
|
"Sequence %s : Taxid %s updated with %s",
|
||||||
sequence.Id(),
|
sequence.Id(),
|
||||||
taxid,
|
taxid,
|
||||||
ttaxid,
|
ttaxid,
|
||||||
@@ -63,7 +63,12 @@ func IsSubCladeOfSlot(taxonomy *obitax.Taxonomy, key string) SequencePredicate {
|
|||||||
val, ok := sequence.GetStringAttribute(key)
|
val, ok := sequence.GetStringAttribute(key)
|
||||||
|
|
||||||
if ok {
|
if ok {
|
||||||
parent := taxonomy.Taxon(val)
|
parent, _, err := taxonomy.Taxon(val)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("%s: %s is unkown from the taxonomy (%v)", sequence.Id(), val, err)
|
||||||
|
}
|
||||||
|
|
||||||
taxon := sequence.Taxon(taxonomy)
|
taxon := sequence.Taxon(taxonomy)
|
||||||
return parent != nil && taxon != nil && taxon.IsSubCladeOf(parent)
|
return parent != nil && taxon != nil && taxon.IsSubCladeOf(parent)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1 +1,38 @@
|
|||||||
package obitax
|
package obitax
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/TuftsBCB/io/newick"
|
||||||
|
)
|
||||||
|
|
||||||
|
func (taxonomy *Taxonomy) Newick() string {
|
||||||
|
if taxonomy == nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
iterator := taxonomy.AsTaxonSet().Sort().Iterator()
|
||||||
|
|
||||||
|
nodes := make(map[*string]*newick.Tree, taxonomy.Len())
|
||||||
|
trees := make([]*newick.Tree, 0)
|
||||||
|
|
||||||
|
for iterator.Next() {
|
||||||
|
taxon := iterator.Get()
|
||||||
|
tree := &newick.Tree{Label: taxon.String()}
|
||||||
|
nodes[taxon.Node.id] = tree
|
||||||
|
if parent, ok := nodes[taxon.Parent().Node.id]; ok {
|
||||||
|
parent.Children = append(parent.Children, *tree)
|
||||||
|
} else {
|
||||||
|
trees = append(trees, tree)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rep := strings.Builder{}
|
||||||
|
|
||||||
|
for _, tree := range trees {
|
||||||
|
rep.WriteString(tree.String())
|
||||||
|
rep.WriteString("\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
return rep.String()
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,11 +1,14 @@
|
|||||||
package obitax
|
package obitax
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"sync"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
var __defaut_taxonomy__ *Taxonomy
|
var __defaut_taxonomy__ *Taxonomy
|
||||||
|
var __defaut_taxonomy_mutex__ sync.Mutex
|
||||||
|
|
||||||
func (taxonomy *Taxonomy) SetAsDefault() {
|
func (taxonomy *Taxonomy) SetAsDefault() {
|
||||||
log.Infof("Set as default taxonomy %s", taxonomy.Name())
|
log.Infof("Set as default taxonomy %s", taxonomy.Name())
|
||||||
@@ -32,14 +35,18 @@ func DefaultTaxonomy() *Taxonomy {
|
|||||||
var err error
|
var err error
|
||||||
if __defaut_taxonomy__ == nil {
|
if __defaut_taxonomy__ == nil {
|
||||||
if obidefault.HasSelectedTaxonomy() {
|
if obidefault.HasSelectedTaxonomy() {
|
||||||
__defaut_taxonomy__, err = LoadTaxonomy(
|
__defaut_taxonomy_mutex__.Lock()
|
||||||
obidefault.SelectedTaxonomy(),
|
defer __defaut_taxonomy_mutex__.Unlock()
|
||||||
!obidefault.AreAlternativeNamesSelected(),
|
if __defaut_taxonomy__ == nil {
|
||||||
)
|
__defaut_taxonomy__, err = LoadTaxonomy(
|
||||||
|
obidefault.SelectedTaxonomy(),
|
||||||
|
!obidefault.AreAlternativeNamesSelected(),
|
||||||
|
)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("Cannot load default taxonomy: %v", err)
|
log.Fatalf("Cannot load default taxonomy: %v", err)
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ package obitax
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
log "github.com/sirupsen/logrus"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// ITaxon represents an iterator for traversing Taxon instances.
|
// ITaxon represents an iterator for traversing Taxon instances.
|
||||||
@@ -195,7 +194,6 @@ func (taxon *Taxon) ISubTaxonomy() *ITaxon {
|
|||||||
|
|
||||||
pushed := true
|
pushed := true
|
||||||
|
|
||||||
log.Warn(parents)
|
|
||||||
for pushed {
|
for pushed {
|
||||||
itaxo := taxo.Iterator()
|
itaxo := taxo.Iterator()
|
||||||
pushed = false
|
pushed = false
|
||||||
@@ -218,9 +216,9 @@ func (taxon *Taxon) ISubTaxonomy() *ITaxon {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (taxonomy *Taxonomy) ISubTaxonomy(taxid string) *ITaxon {
|
func (taxonomy *Taxonomy) ISubTaxonomy(taxid string) *ITaxon {
|
||||||
taxon := taxonomy.Taxon(taxid)
|
taxon, _, err := taxonomy.Taxon(taxid)
|
||||||
|
|
||||||
if taxon == nil {
|
if err != nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -91,7 +91,13 @@ func loadNameTable(reader io.Reader, taxonomy *Taxonomy, onlysn bool) int {
|
|||||||
|
|
||||||
if !onlysn || classname == "scientific name" {
|
if !onlysn || classname == "scientific name" {
|
||||||
n++
|
n++
|
||||||
taxonomy.Taxon(taxid).SetName(name, classname)
|
taxon, _, err := taxonomy.Taxon(taxid)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("%s: is unknown from the taxonomy", taxid)
|
||||||
|
}
|
||||||
|
|
||||||
|
taxon.SetName(name, classname)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -125,7 +131,7 @@ func loadMergedTable(reader io.Reader, taxonomy *Taxonomy) int {
|
|||||||
oldtaxid := strings.TrimSpace(record[0])
|
oldtaxid := strings.TrimSpace(record[0])
|
||||||
newtaxid := strings.TrimSpace(record[1])
|
newtaxid := strings.TrimSpace(record[1])
|
||||||
|
|
||||||
taxonomy.AddAlias(newtaxid, oldtaxid, false)
|
taxonomy.AddAlias(oldtaxid, newtaxid, false)
|
||||||
}
|
}
|
||||||
|
|
||||||
return n
|
return n
|
||||||
@@ -196,7 +202,11 @@ func LoadNCBITaxDump(directory string, onlysn bool) (*Taxonomy, error) {
|
|||||||
n = loadMergedTable(buffered, taxonomy)
|
n = loadMergedTable(buffered, taxonomy)
|
||||||
log.Printf("%d merged taxa read\n", n)
|
log.Printf("%d merged taxa read\n", n)
|
||||||
|
|
||||||
root := taxonomy.Taxon("1")
|
root, _, err := taxonomy.Taxon("1")
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal("cannot find the root taxon (1) in the NCBI tax dump")
|
||||||
|
}
|
||||||
taxonomy.SetRoot(root)
|
taxonomy.SetRoot(root)
|
||||||
|
|
||||||
return taxonomy, nil
|
return taxonomy, nil
|
||||||
|
|||||||
@@ -134,7 +134,12 @@ func LoadNCBITarTaxDump(path string, onlysn bool) (*Taxonomy, error) {
|
|||||||
n = loadMergedTable(buffered, taxonomy)
|
n = loadMergedTable(buffered, taxonomy)
|
||||||
log.Printf("%d merged taxa read\n", n)
|
log.Printf("%d merged taxa read\n", n)
|
||||||
|
|
||||||
root := taxonomy.Taxon("1")
|
root, _, err := taxonomy.Taxon("1")
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal("cannot find the root taxon (1) in the NCBI tax dump")
|
||||||
|
}
|
||||||
|
|
||||||
taxonomy.SetRoot(root)
|
taxonomy.SetRoot(root)
|
||||||
|
|
||||||
return taxonomy, nil
|
return taxonomy, nil
|
||||||
|
|||||||
1
pkg/obitax/newick_write.go
Normal file
1
pkg/obitax/newick_write.go
Normal file
@@ -0,0 +1 @@
|
|||||||
|
package obitax
|
||||||
64
pkg/obitax/string_parser.go
Normal file
64
pkg/obitax/string_parser.go
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
package obitax
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ParseTaxonString parses a string in the format "code:taxid [scientific name]@rank"
|
||||||
|
// and returns the individual components. It handles extra whitespace around components.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - taxonStr: The string to parse in the format "code:taxid [scientific name]@rank"
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - code: The taxonomy code
|
||||||
|
// - taxid: The taxon identifier
|
||||||
|
// - scientificName: The scientific name (without brackets)
|
||||||
|
// - rank: The rank
|
||||||
|
// - error: An error if the string format is invalid
|
||||||
|
func ParseTaxonString(taxonStr string) (code, taxid, scientificName, rank string, err error) {
|
||||||
|
// Trim any leading/trailing whitespace from the entire string
|
||||||
|
taxonStr = strings.TrimSpace(taxonStr)
|
||||||
|
|
||||||
|
// Split by '@' to separate rank
|
||||||
|
parts := strings.Split(taxonStr, "@")
|
||||||
|
if len(parts) > 2 {
|
||||||
|
return "", "", "", "", errors.New("invalid format: multiple '@' characters found")
|
||||||
|
}
|
||||||
|
|
||||||
|
mainPart := strings.TrimSpace(parts[0])
|
||||||
|
if len(parts) == 2 {
|
||||||
|
rank = strings.TrimSpace(parts[1])
|
||||||
|
} else {
|
||||||
|
rank = "no rank"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find scientific name part (enclosed in square brackets)
|
||||||
|
startBracket := strings.Index(mainPart, "[")
|
||||||
|
endBracket := strings.LastIndex(mainPart, "]")
|
||||||
|
|
||||||
|
if startBracket == -1 || endBracket == -1 || startBracket > endBracket {
|
||||||
|
return "", "", "", "", errors.New("invalid format: scientific name must be enclosed in square brackets")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract and clean scientific name
|
||||||
|
scientificName = strings.TrimSpace(mainPart[startBracket+1 : endBracket])
|
||||||
|
|
||||||
|
// Process code:taxid part
|
||||||
|
idPart := strings.TrimSpace(mainPart[:startBracket])
|
||||||
|
idComponents := strings.Split(idPart, ":")
|
||||||
|
|
||||||
|
if len(idComponents) != 2 {
|
||||||
|
return "", "", "", "", errors.New("invalid format: missing taxonomy code separator ':'")
|
||||||
|
}
|
||||||
|
|
||||||
|
code = strings.TrimSpace(idComponents[0])
|
||||||
|
taxid = strings.TrimSpace(idComponents[1])
|
||||||
|
|
||||||
|
if code == "" || taxid == "" || scientificName == "" {
|
||||||
|
return "", "", "", "", errors.New("invalid format: code, taxid and scientific name cannot be empty")
|
||||||
|
}
|
||||||
|
|
||||||
|
return code, taxid, scientificName, rank, nil
|
||||||
|
}
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
package obitax
|
package obitax
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
"iter"
|
"iter"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
|
||||||
@@ -379,3 +380,29 @@ func (taxon *Taxon) SameAs(other *Taxon) bool {
|
|||||||
|
|
||||||
return taxon.Taxonomy == other.Taxonomy && taxon.Node.id == other.Node.id
|
return taxon.Taxonomy == other.Taxonomy && taxon.Node.id == other.Node.id
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (taxon *Taxon) AddChild(child string, replace bool) (*Taxon, error) {
|
||||||
|
if taxon == nil {
|
||||||
|
return nil, errors.New("nil taxon")
|
||||||
|
}
|
||||||
|
|
||||||
|
code, taxid, scientific_name, rank, err := ParseTaxonString(child)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if taxon.Taxonomy.code != code {
|
||||||
|
return nil, errors.New("taxonomy code mismatch")
|
||||||
|
}
|
||||||
|
|
||||||
|
newTaxon, err := taxon.Taxonomy.AddTaxon(taxid, *taxon.Node.id, rank, false, replace)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
newTaxon.SetName(scientific_name, "scientific name")
|
||||||
|
|
||||||
|
return newTaxon, nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -12,7 +12,6 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
log "github.com/sirupsen/logrus"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// Taxonomy represents a hierarchical classification of taxa.
|
// Taxonomy represents a hierarchical classification of taxa.
|
||||||
@@ -130,27 +129,30 @@ func (taxonomy *Taxonomy) TaxidString(id string) (string, error) {
|
|||||||
// Returns:
|
// Returns:
|
||||||
// - A pointer to the Taxon instance associated with the provided taxid.
|
// - A pointer to the Taxon instance associated with the provided taxid.
|
||||||
// - If the taxid is unknown, the method will log a fatal error.
|
// - If the taxid is unknown, the method will log a fatal error.
|
||||||
func (taxonomy *Taxonomy) Taxon(taxid string) *Taxon {
|
func (taxonomy *Taxonomy) Taxon(taxid string) (*Taxon, bool, error) {
|
||||||
taxonomy = taxonomy.OrDefault(false)
|
taxonomy = taxonomy.OrDefault(false)
|
||||||
if taxonomy == nil {
|
if taxonomy == nil {
|
||||||
return nil
|
return nil, false, errors.New("cannot extract taxon from nil taxonomy")
|
||||||
}
|
}
|
||||||
|
|
||||||
id, err := taxonomy.Id(taxid)
|
id, err := taxonomy.Id(taxid)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("Taxid %s: %v", taxid, err)
|
return nil, false, fmt.Errorf("Taxid %s: %v", taxid, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
taxon := taxonomy.nodes.Get(id)
|
taxon := taxonomy.nodes.Get(id)
|
||||||
|
isAlias := taxon.Node.id != id
|
||||||
|
|
||||||
if taxon == nil {
|
if taxon == nil {
|
||||||
log.Fatalf("Taxid %s is not part of the taxonomy %s",
|
return nil,
|
||||||
taxid,
|
false,
|
||||||
taxonomy.name)
|
fmt.Errorf("Taxid %s is not part of the taxonomy %s",
|
||||||
|
taxid,
|
||||||
|
taxonomy.name)
|
||||||
}
|
}
|
||||||
|
|
||||||
return taxon
|
return taxon, isAlias, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// AsTaxonSet returns the set of taxon nodes contained within the Taxonomy.
|
// AsTaxonSet returns the set of taxon nodes contained within the Taxonomy.
|
||||||
@@ -353,3 +355,63 @@ func (taxonomy *Taxonomy) HasRoot() bool {
|
|||||||
taxonomy = taxonomy.OrDefault(false)
|
taxonomy = taxonomy.OrDefault(false)
|
||||||
return taxonomy != nil && taxonomy.root != nil
|
return taxonomy != nil && taxonomy.root != nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (taxonomy *Taxonomy) InsertPathString(path []string) (*Taxonomy, error) {
|
||||||
|
if len(path) == 0 {
|
||||||
|
return nil, errors.New("path is empty")
|
||||||
|
}
|
||||||
|
|
||||||
|
code, taxid, scientific_name, rank, err := ParseTaxonString(path[0])
|
||||||
|
|
||||||
|
if taxonomy == nil {
|
||||||
|
taxonomy = NewTaxonomy(code, code, obiutils.AsciiAlphaNumSet)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if taxonomy.Len() == 0 {
|
||||||
|
|
||||||
|
if code != taxonomy.code {
|
||||||
|
return nil, fmt.Errorf("cannot insert taxon %s into taxonomy %s with code %s",
|
||||||
|
path[0], taxonomy.name, taxonomy.code)
|
||||||
|
}
|
||||||
|
|
||||||
|
root, err := taxonomy.AddTaxon(taxid, taxid, rank, true, true)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
root.SetName(scientific_name, "scientificName")
|
||||||
|
}
|
||||||
|
|
||||||
|
var current *Taxon
|
||||||
|
current, _, err = taxonomy.Taxon(taxid)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if !current.IsRoot() {
|
||||||
|
return nil, errors.New("path does not start with a root node")
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, id := range path[1:] {
|
||||||
|
taxon, _, err := taxonomy.Taxon(id)
|
||||||
|
if err == nil {
|
||||||
|
if !current.SameAs(taxon.Parent()) {
|
||||||
|
return nil, errors.New("path is not consistent with the taxonomy, parent mismatch")
|
||||||
|
}
|
||||||
|
current = taxon
|
||||||
|
} else {
|
||||||
|
current, err = current.AddChild(id, false)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return taxonomy, nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -151,7 +151,8 @@ func (set *TaxonSet) Alias(id *string, taxon *Taxon) {
|
|||||||
if original == nil {
|
if original == nil {
|
||||||
log.Fatalf("Original taxon %v is not part of taxon set", id)
|
log.Fatalf("Original taxon %v is not part of taxon set", id)
|
||||||
}
|
}
|
||||||
set.set[id] = taxon.Node
|
|
||||||
|
set.set[id] = original.Node
|
||||||
set.nalias++
|
set.nalias++
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -196,3 +197,30 @@ func (set *TaxonSet) Contains(id *string) bool {
|
|||||||
node := set.Get(id)
|
node := set.Get(id)
|
||||||
return node != nil
|
return node != nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (set *TaxonSet) Sort() *TaxonSlice {
|
||||||
|
if set == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
taxonomy := set.Taxonomy()
|
||||||
|
taxa := taxonomy.NewTaxonSlice(0, set.Len())
|
||||||
|
parent := make(map[*TaxNode]bool, set.Len())
|
||||||
|
|
||||||
|
pushed := true
|
||||||
|
|
||||||
|
for pushed {
|
||||||
|
pushed = false
|
||||||
|
for _, node := range set.set {
|
||||||
|
if !parent[node] && (parent[set.Get(node.parent).Node] ||
|
||||||
|
!set.Contains(node.parent) ||
|
||||||
|
node == taxonomy.Root().Node) {
|
||||||
|
pushed = true
|
||||||
|
taxa.slice = append(taxa.slice, node)
|
||||||
|
parent[node] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return taxa
|
||||||
|
}
|
||||||
|
|||||||
126
pkg/obitools/obiclean/chimera.go
Normal file
126
pkg/obitools/obiclean/chimera.go
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
package obiclean
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"sort"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
)
|
||||||
|
|
||||||
|
func commonPrefix(a, b *obiseq.BioSequence) int {
|
||||||
|
i := 0
|
||||||
|
l := min(a.Len(), b.Len())
|
||||||
|
|
||||||
|
if l == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
as := a.Sequence()
|
||||||
|
bs := b.Sequence()
|
||||||
|
|
||||||
|
for i < l && as[i] == bs[i] {
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
|
||||||
|
if obiutils.UnsafeString(as[:i]) != obiutils.UnsafeString(bs[:i]) {
|
||||||
|
log.Fatalf("i: %d, j: %d (%s/%s)", i, i, as[:i], bs[:i])
|
||||||
|
}
|
||||||
|
|
||||||
|
return i
|
||||||
|
}
|
||||||
|
|
||||||
|
func commonSuffix(a, b *obiseq.BioSequence) int {
|
||||||
|
i := a.Len() - 1
|
||||||
|
j := b.Len() - 1
|
||||||
|
|
||||||
|
if i < 0 || j < 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
as := a.Sequence()
|
||||||
|
bs := b.Sequence()
|
||||||
|
|
||||||
|
l := 0
|
||||||
|
for i >= 0 && j >= 0 && as[i] == bs[j] {
|
||||||
|
i--
|
||||||
|
j--
|
||||||
|
l++
|
||||||
|
}
|
||||||
|
|
||||||
|
if obiutils.UnsafeString(as[i+1:]) != obiutils.UnsafeString(bs[j+1:]) {
|
||||||
|
log.Fatalf("i: %d, j: %d (%s/%s)", i, j, as[i+1:], bs[j+1:])
|
||||||
|
}
|
||||||
|
// log.Warnf("i: %d, j: %d (%s)", i, j, as[i+1:])
|
||||||
|
|
||||||
|
return l
|
||||||
|
}
|
||||||
|
|
||||||
|
func AnnotateChimera(samples map[string]*[]*seqPCR) {
|
||||||
|
|
||||||
|
w := func(sample string, seqs *[]*seqPCR) {
|
||||||
|
ls := len(*seqs)
|
||||||
|
cp := make([]int, ls)
|
||||||
|
cs := make([]int, ls)
|
||||||
|
|
||||||
|
pcrs := make([]*seqPCR, 0, ls)
|
||||||
|
|
||||||
|
for _, s := range *seqs {
|
||||||
|
if len(s.Edges) == 0 {
|
||||||
|
pcrs = append(pcrs, s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
lp := len(pcrs)
|
||||||
|
|
||||||
|
sort.Slice(pcrs, func(i, j int) bool {
|
||||||
|
return pcrs[i].Weight < pcrs[j].Weight
|
||||||
|
})
|
||||||
|
|
||||||
|
for i, s := range pcrs {
|
||||||
|
for j := i + 1; j < lp; j++ {
|
||||||
|
s2 := pcrs[j]
|
||||||
|
cp[j] = commonPrefix(s.Sequence, s2.Sequence)
|
||||||
|
cs[j] = commonSuffix(s.Sequence, s2.Sequence)
|
||||||
|
}
|
||||||
|
|
||||||
|
var cm map[string]string
|
||||||
|
var err error
|
||||||
|
|
||||||
|
chimera, ok := s.Sequence.GetAttribute("chimera")
|
||||||
|
|
||||||
|
if !ok {
|
||||||
|
cm = map[string]string{}
|
||||||
|
} else {
|
||||||
|
cm, err = obiutils.InterfaceToStringMap(chimera)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("type of chimera not map[string]string: %T (%v)",
|
||||||
|
chimera, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ls := s.Sequence.Len()
|
||||||
|
|
||||||
|
for k := i + 1; k < lp; k++ {
|
||||||
|
for l := i + 1; l < lp; l++ {
|
||||||
|
if k != l && cp[k]+cs[l] == ls {
|
||||||
|
cm[sample] = fmt.Sprintf("{%s}/{%s}@(%d)",
|
||||||
|
pcrs[k].Sequence.Id(),
|
||||||
|
pcrs[l].Sequence.Id(),
|
||||||
|
cp[k])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(cm) > 0 {
|
||||||
|
s.Sequence.SetAttribute("chimera", cm)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
for sn, sqs := range samples {
|
||||||
|
w(sn, sqs)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -13,23 +13,24 @@ import (
|
|||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obialign"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obialign"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
"github.com/schollz/progressbar/v3"
|
"github.com/schollz/progressbar/v3"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Ratio struct {
|
type Ratio struct {
|
||||||
Sample string
|
Sample string
|
||||||
SeqID string
|
SeqID string
|
||||||
status string
|
OriginalStatus string
|
||||||
From int
|
WOriginal int
|
||||||
To int
|
WMutant int
|
||||||
CFrom int
|
COriginal int
|
||||||
CTo int
|
CMutant int
|
||||||
Pos int
|
Pos int
|
||||||
Length int
|
Length int
|
||||||
A int
|
A int
|
||||||
C int
|
C int
|
||||||
G int
|
G int
|
||||||
T int
|
T int
|
||||||
}
|
}
|
||||||
|
|
||||||
type Edge struct {
|
type Edge struct {
|
||||||
@@ -52,45 +53,21 @@ func makeEdge(father, dist, pos int, from, to byte) Edge {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func abs(x int) int {
|
|
||||||
if x < 0 {
|
|
||||||
return -x
|
|
||||||
}
|
|
||||||
return x
|
|
||||||
}
|
|
||||||
|
|
||||||
func max(x, y int) int {
|
|
||||||
if x > y {
|
|
||||||
return x
|
|
||||||
}
|
|
||||||
return y
|
|
||||||
}
|
|
||||||
|
|
||||||
func min(x, y int) int {
|
|
||||||
if x < y {
|
|
||||||
return x
|
|
||||||
}
|
|
||||||
return y
|
|
||||||
}
|
|
||||||
|
|
||||||
func minMax(x, y int) (int, int) {
|
|
||||||
if x < y {
|
|
||||||
return x, y
|
|
||||||
}
|
|
||||||
return y, x
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
// It takes a filename and a 2D slice of floats pruduced during graph building,
|
// It takes a filename and a 2D slice of floats pruduced during graph building,
|
||||||
// and writes a CSV file with the first column being the
|
// and writes a CSV file with the first column being the
|
||||||
// first nucleotide, the second column being the second nucleotide, and the third column being the
|
// first nucleotide, the second column being the second nucleotide, and the third column being the
|
||||||
// ratio
|
// ratio
|
||||||
func EmpiricalDistCsv(filename string, data [][]Ratio) {
|
func EmpiricalDistCsv(filename string, data [][]Ratio, compressed bool) {
|
||||||
file, err := os.Create(filename)
|
file, err := os.Create(filename)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
}
|
}
|
||||||
defer file.Close()
|
|
||||||
|
destfile, err := obiutils.CompressStream(file, true, true)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
}
|
||||||
|
defer destfile.Close()
|
||||||
|
|
||||||
pbopt := make([]progressbar.Option, 0, 5)
|
pbopt := make([]progressbar.Option, 0, 5)
|
||||||
pbopt = append(pbopt,
|
pbopt = append(pbopt,
|
||||||
@@ -103,19 +80,19 @@ func EmpiricalDistCsv(filename string, data [][]Ratio) {
|
|||||||
|
|
||||||
bar := progressbar.NewOptions(len(data), pbopt...)
|
bar := progressbar.NewOptions(len(data), pbopt...)
|
||||||
|
|
||||||
fmt.Fprintln(file, "Sample,Father_id,Father_status,From,To,Weight_from,Weight_to,Count_from,Count_to,Position,length,A,C,G,T")
|
fmt.Fprintln(destfile, "Sample,Origin_id,Origin_status,Origin,Mutant,Origin_Weight,Mutant_Weight,Origin_Count,Mutant_Count,Position,Origin_length,A,C,G,T")
|
||||||
for code, dist := range data {
|
for code, dist := range data {
|
||||||
a1, a2 := intToNucPair(code)
|
a1, a2 := intToNucPair(code)
|
||||||
for _, ratio := range dist {
|
for _, ratio := range dist {
|
||||||
fmt.Fprintf(file, "%s,%s,%s,%c,%c,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d\n",
|
fmt.Fprintf(destfile, "%s,%s,%s,%c,%c,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d\n",
|
||||||
ratio.Sample,
|
ratio.Sample,
|
||||||
ratio.SeqID,
|
ratio.SeqID,
|
||||||
ratio.status,
|
ratio.OriginalStatus,
|
||||||
a1, a2,
|
a1, a2,
|
||||||
ratio.From,
|
ratio.WOriginal,
|
||||||
ratio.To,
|
ratio.WMutant,
|
||||||
ratio.CFrom,
|
ratio.COriginal,
|
||||||
ratio.CTo,
|
ratio.CMutant,
|
||||||
ratio.Pos,
|
ratio.Pos,
|
||||||
ratio.Length,
|
ratio.Length,
|
||||||
ratio.A,
|
ratio.A,
|
||||||
@@ -478,16 +455,20 @@ func EstimateRatio(samples map[string]*[]*seqPCR, minStatRatio int) [][]Ratio {
|
|||||||
if father.Weight >= minStatRatio && edge.Dist == 1 {
|
if father.Weight >= minStatRatio && edge.Dist == 1 {
|
||||||
s := father.Sequence.Sequence()
|
s := father.Sequence.Sequence()
|
||||||
ratio[edge.NucPair] = append(ratio[edge.NucPair],
|
ratio[edge.NucPair] = append(ratio[edge.NucPair],
|
||||||
Ratio{name,
|
Ratio{
|
||||||
father.Sequence.Id(), Status(father.Sequence)[name],
|
Sample: name,
|
||||||
father.Weight, seq.Weight,
|
SeqID: father.Sequence.Id(),
|
||||||
father.Count, seq.Count,
|
OriginalStatus: Status(father.Sequence)[name],
|
||||||
edge.Pos,
|
WOriginal: father.Weight,
|
||||||
father.Sequence.Len(),
|
WMutant: seq.Weight,
|
||||||
bytes.Count(s, []byte("a")),
|
COriginal: father.Count,
|
||||||
bytes.Count(s, []byte("c")),
|
CMutant: seq.Count,
|
||||||
bytes.Count(s, []byte("g")),
|
Pos: edge.Pos,
|
||||||
bytes.Count(s, []byte("t"))})
|
Length: father.Sequence.Len(),
|
||||||
|
A: bytes.Count(s, []byte("a")),
|
||||||
|
C: bytes.Count(s, []byte("c")),
|
||||||
|
G: bytes.Count(s, []byte("g")),
|
||||||
|
T: bytes.Count(s, []byte("t"))})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ package obiclean
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"maps"
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||||
@@ -19,6 +20,7 @@ type seqPCR struct {
|
|||||||
Sequence *obiseq.BioSequence // pointer to the corresponding sequence
|
Sequence *obiseq.BioSequence // pointer to the corresponding sequence
|
||||||
SonCount int
|
SonCount int
|
||||||
AddedSons int
|
AddedSons int
|
||||||
|
IsHead bool
|
||||||
Edges []Edge
|
Edges []Edge
|
||||||
Cluster map[int]bool // used as the set of head sequences associated to that sequence
|
Cluster map[int]bool // used as the set of head sequences associated to that sequence
|
||||||
}
|
}
|
||||||
@@ -50,6 +52,7 @@ func buildSamples(dataset obiseq.BioSequenceSlice,
|
|||||||
Sequence: s,
|
Sequence: s,
|
||||||
SonCount: 0,
|
SonCount: 0,
|
||||||
AddedSons: 0,
|
AddedSons: 0,
|
||||||
|
IsHead: false,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -57,9 +60,7 @@ func buildSamples(dataset obiseq.BioSequenceSlice,
|
|||||||
return samples
|
return samples
|
||||||
}
|
}
|
||||||
|
|
||||||
func annotateOBIClean(source string, dataset obiseq.BioSequenceSlice,
|
func annotateOBIClean(source string, dataset obiseq.BioSequenceSlice) obiiter.IBioSequence {
|
||||||
sample map[string]*([]*seqPCR),
|
|
||||||
tag, NAValue string) obiiter.IBioSequence {
|
|
||||||
batchsize := 1000
|
batchsize := 1000
|
||||||
var annot = func(data obiseq.BioSequenceSlice) (obiseq.BioSequenceSlice, error) {
|
var annot = func(data obiseq.BioSequenceSlice) (obiseq.BioSequenceSlice, error) {
|
||||||
|
|
||||||
@@ -114,6 +115,28 @@ func IsHead(sequence *obiseq.BioSequence) bool {
|
|||||||
return ishead
|
return ishead
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func NotAlwaysChimera(tag string) obiseq.SequencePredicate {
|
||||||
|
descriptor := obiseq.MakeStatsOnDescription(tag)
|
||||||
|
predicat := func(sequence *obiseq.BioSequence) bool {
|
||||||
|
|
||||||
|
chimera, ok := sequence.GetStringMap("chimera")
|
||||||
|
if !ok || len(chimera) == 0 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
samples := maps.Keys(sequence.StatsOn(descriptor, "NA"))
|
||||||
|
|
||||||
|
for s := range samples {
|
||||||
|
if _, ok := chimera[s]; !ok {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
return predicat
|
||||||
|
}
|
||||||
|
|
||||||
func HeadCount(sequence *obiseq.BioSequence) int {
|
func HeadCount(sequence *obiseq.BioSequence) int {
|
||||||
var err error
|
var err error
|
||||||
annotation := sequence.Annotations()
|
annotation := sequence.Annotations()
|
||||||
@@ -237,6 +260,7 @@ func Mutation(sample map[string]*([]*seqPCR)) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func Status(sequence *obiseq.BioSequence) map[string]string {
|
func Status(sequence *obiseq.BioSequence) map[string]string {
|
||||||
|
var err error
|
||||||
annotation := sequence.Annotations()
|
annotation := sequence.Annotations()
|
||||||
iobistatus, ok := annotation["obiclean_status"]
|
iobistatus, ok := annotation["obiclean_status"]
|
||||||
var obistatus map[string]string
|
var obistatus map[string]string
|
||||||
@@ -246,9 +270,9 @@ func Status(sequence *obiseq.BioSequence) map[string]string {
|
|||||||
case map[string]string:
|
case map[string]string:
|
||||||
obistatus = iobistatus
|
obistatus = iobistatus
|
||||||
case map[string]interface{}:
|
case map[string]interface{}:
|
||||||
obistatus = make(map[string]string)
|
obistatus, err = obiutils.InterfaceToStringMap(obistatus)
|
||||||
for k, v := range iobistatus {
|
if err != nil {
|
||||||
obistatus[k] = fmt.Sprint(v)
|
log.Panicf("obiclean_status attribute of sequence %s must be castable to a map[string]string", sequence.Id())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -356,19 +380,30 @@ func CLIOBIClean(itertator obiiter.IBioSequence) obiiter.IBioSequence {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if DetectChimera() {
|
||||||
|
AnnotateChimera(samples)
|
||||||
|
}
|
||||||
|
|
||||||
if SaveGraphToFiles() {
|
if SaveGraphToFiles() {
|
||||||
SaveGMLGraphs(GraphFilesDirectory(), samples, MinCountToEvalMutationRate())
|
SaveGMLGraphs(GraphFilesDirectory(), samples, MinCountToEvalMutationRate())
|
||||||
}
|
}
|
||||||
|
|
||||||
if IsSaveRatioTable() {
|
if IsSaveRatioTable() {
|
||||||
all_ratio := EstimateRatio(samples, MinCountToEvalMutationRate())
|
all_ratio := EstimateRatio(samples, MinCountToEvalMutationRate())
|
||||||
EmpiricalDistCsv(RatioTableFilename(), all_ratio)
|
EmpiricalDistCsv(RatioTableFilename(), all_ratio, obidefault.CompressOutput())
|
||||||
}
|
}
|
||||||
|
|
||||||
iter := annotateOBIClean(source, db, samples, SampleAttribute(), "NA")
|
iter := annotateOBIClean(source, db)
|
||||||
|
|
||||||
if OnlyHead() {
|
if OnlyHead() {
|
||||||
iter = iter.FilterOn(IsHead, 1000)
|
iter = iter.FilterOn(IsHead,
|
||||||
|
obidefault.BatchSize()).FilterOn(NotAlwaysChimera(SampleAttribute()),
|
||||||
|
obidefault.BatchSize())
|
||||||
|
}
|
||||||
|
|
||||||
|
if MinSampleCount() > 1 {
|
||||||
|
sc := obiseq.OccurInAtleast(SampleAttribute(), MinSampleCount())
|
||||||
|
iter = iter.FilterOn(sc, obidefault.BatchSize())
|
||||||
}
|
}
|
||||||
|
|
||||||
return iter
|
return iter
|
||||||
|
|||||||
@@ -16,6 +16,8 @@ var _onlyHead = false
|
|||||||
|
|
||||||
var _saveGraph = "__@@NOSAVE@@__"
|
var _saveGraph = "__@@NOSAVE@@__"
|
||||||
var _saveRatio = "__@@NOSAVE@@__"
|
var _saveRatio = "__@@NOSAVE@@__"
|
||||||
|
var _minSample = 1
|
||||||
|
var _detectChimera = false
|
||||||
|
|
||||||
func ObicleanOptionSet(options *getoptions.GetOpt) {
|
func ObicleanOptionSet(options *getoptions.GetOpt) {
|
||||||
options.StringVar(&_sampleAttribute, "sample", _sampleAttribute,
|
options.StringVar(&_sampleAttribute, "sample", _sampleAttribute,
|
||||||
@@ -55,6 +57,13 @@ func ObicleanOptionSet(options *getoptions.GetOpt) {
|
|||||||
"The ratio file follows the csv format."),
|
"The ratio file follows the csv format."),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
options.IntVar(&_minSample, "min-sample-count", _minSample,
|
||||||
|
options.Description("Minimum number of samples a sequence must be present in to be considered in the analysis."),
|
||||||
|
)
|
||||||
|
|
||||||
|
options.BoolVar(&_detectChimera, "detect-chimera", _detectChimera,
|
||||||
|
options.Description("Detect chimera sequences."),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
func OptionSet(options *getoptions.GetOpt) {
|
func OptionSet(options *getoptions.GetOpt) {
|
||||||
@@ -111,3 +120,13 @@ func IsSaveRatioTable() bool {
|
|||||||
func RatioTableFilename() string {
|
func RatioTableFilename() string {
|
||||||
return _saveRatio
|
return _saveRatio
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// It returns the minimum number of samples a sequence must be present in to be considered in the analysis
|
||||||
|
func MinSampleCount() int {
|
||||||
|
return _minSample
|
||||||
|
}
|
||||||
|
|
||||||
|
// It returns true if chimera detection is enabled
|
||||||
|
func DetectChimera() bool {
|
||||||
|
return _detectChimera
|
||||||
|
}
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package obiconvert
|
|||||||
import (
|
import (
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
@@ -30,7 +31,6 @@ var __output_fastjson_format__ = false
|
|||||||
var __output_fastobi_format__ = false
|
var __output_fastobi_format__ = false
|
||||||
|
|
||||||
var __no_progress_bar__ = false
|
var __no_progress_bar__ = false
|
||||||
var __compressed__ = false
|
|
||||||
var __skip_empty__ = false
|
var __skip_empty__ = false
|
||||||
|
|
||||||
var __output_file_name__ = "-"
|
var __output_file_name__ = "-"
|
||||||
@@ -71,16 +71,16 @@ func InputOptionSet(options *getoptions.GetOpt) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func OutputModeOptionSet(options *getoptions.GetOpt) {
|
func OutputModeOptionSet(options *getoptions.GetOpt, compressed bool) {
|
||||||
options.BoolVar(&__no_progress_bar__, "no-progressbar", false,
|
options.BoolVar(&__no_progress_bar__, "no-progressbar", false,
|
||||||
options.Description("Disable the progress bar printing"))
|
options.Description("Disable the progress bar printing"))
|
||||||
|
|
||||||
options.BoolVar(&__compressed__, "compress", false,
|
if compressed {
|
||||||
options.Alias("Z"),
|
options.BoolVar(obidefault.CompressedPtr(), "compressed", obidefault.CompressOutput(),
|
||||||
options.Description("Output is compressed"))
|
options.Alias("Z"),
|
||||||
|
options.Description("Compress all the result using gzip"))
|
||||||
|
|
||||||
options.BoolVar(&__skip_empty__, "skip-empty", __skip_empty__,
|
}
|
||||||
options.Description("Sequences of length equal to zero are suppressed from the output"))
|
|
||||||
|
|
||||||
options.StringVar(&__output_file_name__, "out", __output_file_name__,
|
options.StringVar(&__output_file_name__, "out", __output_file_name__,
|
||||||
options.Alias("o"),
|
options.Alias("o"),
|
||||||
@@ -90,6 +90,9 @@ func OutputModeOptionSet(options *getoptions.GetOpt) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func OutputOptionSet(options *getoptions.GetOpt) {
|
func OutputOptionSet(options *getoptions.GetOpt) {
|
||||||
|
options.BoolVar(&__skip_empty__, "skip-empty", __skip_empty__,
|
||||||
|
options.Description("Sequences of length equal to zero are suppressed from the output"))
|
||||||
|
|
||||||
options.BoolVar(&__output_in_fasta__, "fasta-output", false,
|
options.BoolVar(&__output_in_fasta__, "fasta-output", false,
|
||||||
options.Description("Write sequence in fasta format (default if no quality data available)."))
|
options.Description("Write sequence in fasta format (default if no quality data available)."))
|
||||||
|
|
||||||
@@ -105,7 +108,7 @@ func OutputOptionSet(options *getoptions.GetOpt) {
|
|||||||
options.Alias("O"),
|
options.Alias("O"),
|
||||||
options.Description("output FASTA/FASTQ title line annotations follow OBI format."))
|
options.Description("output FASTA/FASTQ title line annotations follow OBI format."))
|
||||||
|
|
||||||
OutputModeOptionSet(options)
|
OutputModeOptionSet(options, true)
|
||||||
}
|
}
|
||||||
|
|
||||||
func PairedFilesOptionSet(options *getoptions.GetOpt) {
|
func PairedFilesOptionSet(options *getoptions.GetOpt) {
|
||||||
@@ -159,10 +162,6 @@ func CLIOutputFormat() string {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func CLICompressed() bool {
|
|
||||||
return __compressed__
|
|
||||||
}
|
|
||||||
|
|
||||||
func CLISkipEmpty() bool {
|
func CLISkipEmpty() bool {
|
||||||
return __skip_empty__
|
return __skip_empty__
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -55,6 +55,8 @@ func ExpandListOfFiles(check_ext bool, filenames ...string) ([]string, error) {
|
|||||||
strings.HasSuffix(path, "fasta.gz") ||
|
strings.HasSuffix(path, "fasta.gz") ||
|
||||||
strings.HasSuffix(path, "fastq") ||
|
strings.HasSuffix(path, "fastq") ||
|
||||||
strings.HasSuffix(path, "fastq.gz") ||
|
strings.HasSuffix(path, "fastq.gz") ||
|
||||||
|
strings.HasSuffix(path, "fq") ||
|
||||||
|
strings.HasSuffix(path, "fq.gz") ||
|
||||||
strings.HasSuffix(path, "seq") ||
|
strings.HasSuffix(path, "seq") ||
|
||||||
strings.HasSuffix(path, "seq.gz") ||
|
strings.HasSuffix(path, "seq.gz") ||
|
||||||
strings.HasSuffix(path, "gb") ||
|
strings.HasSuffix(path, "gb") ||
|
||||||
@@ -140,7 +142,7 @@ func CLIReadBioSequences(filenames ...string) (obiiter.IBioSequence, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
switch CLIInputFormat() {
|
switch CLIInputFormat() {
|
||||||
case "fastq":
|
case "fastq", "fq":
|
||||||
reader = obiformats.ReadFastqFromFile
|
reader = obiformats.ReadFastqFromFile
|
||||||
case "fasta":
|
case "fasta":
|
||||||
reader = obiformats.ReadFastaFromFile
|
reader = obiformats.ReadFastaFromFile
|
||||||
@@ -168,22 +170,25 @@ func CLIReadBioSequences(filenames ...string) (obiiter.IBioSequence, error) {
|
|||||||
opts...,
|
opts...,
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
iterator, err = reader(list_of_files[0], opts...)
|
if len(list_of_files) > 0 {
|
||||||
|
iterator, err = reader(list_of_files[0], opts...)
|
||||||
if err != nil {
|
|
||||||
return obiiter.NilIBioSequence, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if CLIPairedFileName() != "" {
|
|
||||||
ip, err := reader(CLIPairedFileName(), opts...)
|
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return obiiter.NilIBioSequence, err
|
return obiiter.NilIBioSequence, err
|
||||||
}
|
}
|
||||||
|
|
||||||
iterator = iterator.PairTo(ip)
|
if CLIPairedFileName() != "" {
|
||||||
}
|
ip, err := reader(CLIPairedFileName(), opts...)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return obiiter.NilIBioSequence, err
|
||||||
|
}
|
||||||
|
|
||||||
|
iterator = iterator.PairTo(ip)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
iterator = obiiter.NilIBioSequence
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ func BuildPairedFileNames(filename string) (string, string) {
|
|||||||
forward := parts[0] + "_R1"
|
forward := parts[0] + "_R1"
|
||||||
reverse := parts[0] + "_R2"
|
reverse := parts[0] + "_R2"
|
||||||
|
|
||||||
if parts[1] != "" {
|
if len(parts) > 1 && parts[1] != "" {
|
||||||
suffix := "." + parts[1]
|
suffix := "." + parts[1]
|
||||||
forward += suffix
|
forward += suffix
|
||||||
reverse += suffix
|
reverse += suffix
|
||||||
@@ -58,7 +58,7 @@ func CLIWriteBioSequences(iterator obiiter.IBioSequence,
|
|||||||
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
|
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
|
||||||
opts = append(opts, obiformats.OptionsBatchSize(obidefault.BatchSize()))
|
opts = append(opts, obiformats.OptionsBatchSize(obidefault.BatchSize()))
|
||||||
|
|
||||||
opts = append(opts, obiformats.OptionsCompressed(CLICompressed()))
|
opts = append(opts, obiformats.OptionsCompressed(obidefault.CompressOutput()))
|
||||||
|
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
|
|||||||
@@ -33,6 +33,7 @@ func CLIWriteSequenceCSV(iterator obiiter.IBioSequence,
|
|||||||
CSVDefinition(CLIPrintDefinition()),
|
CSVDefinition(CLIPrintDefinition()),
|
||||||
CSVKeys(CLIToBeKeptAttributes()),
|
CSVKeys(CLIToBeKeptAttributes()),
|
||||||
CSVSequence(CLIPrintSequence()),
|
CSVSequence(CLIPrintSequence()),
|
||||||
|
CSVQuality(CLIPrintQuality()),
|
||||||
CSVAutoColumn(CLIAutoColumns()),
|
CSVAutoColumn(CLIAutoColumns()),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -66,7 +66,7 @@ func CSVOptionSet(options *getoptions.GetOpt) {
|
|||||||
|
|
||||||
func OptionSet(options *getoptions.GetOpt) {
|
func OptionSet(options *getoptions.GetOpt) {
|
||||||
obiconvert.InputOptionSet(options)
|
obiconvert.InputOptionSet(options)
|
||||||
obiconvert.OutputModeOptionSet(options)
|
obiconvert.OutputModeOptionSet(options, true)
|
||||||
obioptions.LoadTaxonomyOptionSet(options, false, false)
|
obioptions.LoadTaxonomyOptionSet(options, false, false)
|
||||||
CSVOptionSet(options)
|
CSVOptionSet(options)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ func CSVSequenceHeader(opt Options) obiitercsv.CSVHeader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if opt.CSVQuality() {
|
if opt.CSVQuality() {
|
||||||
record.AppendField("quality")
|
record.AppendField("qualities")
|
||||||
}
|
}
|
||||||
|
|
||||||
return record
|
return record
|
||||||
@@ -100,9 +100,9 @@ func CSVBatchFromSequences(batch obiiter.BioSequenceBatch, opt Options) obiiterc
|
|||||||
for j := 0; j < l; j++ {
|
for j := 0; j < l; j++ {
|
||||||
ascii[j] = uint8(q[j]) + uint8(quality_shift)
|
ascii[j] = uint8(q[j]) + uint8(quality_shift)
|
||||||
}
|
}
|
||||||
record["quality"] = string(ascii)
|
record["qualities"] = string(ascii)
|
||||||
} else {
|
} else {
|
||||||
record["quality"] = opt.CSVNAValue()
|
record["qualities"] = opt.CSVNAValue()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ func CLIDistributeSequence(sequences obiiter.IBioSequence) {
|
|||||||
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers),
|
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers),
|
||||||
obiformats.OptionsBatchSize(obidefault.BatchSize()),
|
obiformats.OptionsBatchSize(obidefault.BatchSize()),
|
||||||
obiformats.OptionsAppendFile(CLIAppendSequences()),
|
obiformats.OptionsAppendFile(CLIAppendSequences()),
|
||||||
obiformats.OptionsCompressed(obiconvert.CLICompressed()))
|
obiformats.OptionsCompressed(obidefault.CompressOutput()))
|
||||||
|
|
||||||
var formater obiformats.SequenceBatchWriterToFile
|
var formater obiformats.SequenceBatchWriterToFile
|
||||||
|
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiapat"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiapat"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
@@ -16,6 +17,7 @@ import (
|
|||||||
var _BelongTaxa = make([]string, 0)
|
var _BelongTaxa = make([]string, 0)
|
||||||
var _NotBelongTaxa = make([]string, 0)
|
var _NotBelongTaxa = make([]string, 0)
|
||||||
var _RequiredRanks = make([]string, 0)
|
var _RequiredRanks = make([]string, 0)
|
||||||
|
var _ValidateTaxonomy = false
|
||||||
|
|
||||||
var _MinimumLength = 1
|
var _MinimumLength = 1
|
||||||
var _MaximumLength = int(2e9)
|
var _MaximumLength = int(2e9)
|
||||||
@@ -62,6 +64,9 @@ func TaxonomySelectionOptionSet(options *getoptions.GetOpt) {
|
|||||||
options.ArgName("RANK_NAME"),
|
options.ArgName("RANK_NAME"),
|
||||||
options.Description("Select sequences belonging a taxon with a rank <RANK_NAME>"))
|
options.Description("Select sequences belonging a taxon with a rank <RANK_NAME>"))
|
||||||
|
|
||||||
|
options.BoolVar(&_ValidateTaxonomy, "valid-taxid", _ValidateTaxonomy,
|
||||||
|
options.Description("Validate the taxonomic classification of the sequences."))
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func SequenceSelectionOptionSet(options *getoptions.GetOpt) {
|
func SequenceSelectionOptionSet(options *getoptions.GetOpt) {
|
||||||
@@ -248,15 +253,15 @@ func CLIRestrictTaxonomyPredicate() obiseq.SequencePredicate {
|
|||||||
if len(_BelongTaxa) > 0 {
|
if len(_BelongTaxa) > 0 {
|
||||||
taxonomy := obitax.DefaultTaxonomy()
|
taxonomy := obitax.DefaultTaxonomy()
|
||||||
|
|
||||||
taxon := taxonomy.Taxon(_BelongTaxa[0])
|
taxon, _, err := taxonomy.Taxon(_BelongTaxa[0])
|
||||||
if taxon == nil {
|
if err != nil {
|
||||||
p = obiseq.IsSubCladeOfSlot(taxonomy, _BelongTaxa[0])
|
p = obiseq.IsSubCladeOfSlot(taxonomy, _BelongTaxa[0])
|
||||||
} else {
|
} else {
|
||||||
p = obiseq.IsSubCladeOf(taxonomy, taxon)
|
p = obiseq.IsSubCladeOf(taxonomy, taxon)
|
||||||
}
|
}
|
||||||
for _, staxid := range _BelongTaxa[1:] {
|
for _, staxid := range _BelongTaxa[1:] {
|
||||||
taxon := taxonomy.Taxon(staxid)
|
taxon, _, err := taxonomy.Taxon(staxid)
|
||||||
if taxon == nil {
|
if err != nil {
|
||||||
p2 = obiseq.IsSubCladeOfSlot(taxonomy, staxid)
|
p2 = obiseq.IsSubCladeOfSlot(taxonomy, staxid)
|
||||||
} else {
|
} else {
|
||||||
p2 = obiseq.IsSubCladeOf(taxonomy, taxon)
|
p2 = obiseq.IsSubCladeOf(taxonomy, taxon)
|
||||||
@@ -271,6 +276,27 @@ func CLIRestrictTaxonomyPredicate() obiseq.SequencePredicate {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func CLIIsValidTaxonomyPredicate() obiseq.SequencePredicate {
|
||||||
|
if _ValidateTaxonomy {
|
||||||
|
if !obidefault.HasSelectedTaxonomy() {
|
||||||
|
log.Fatal("Taxonomy not found")
|
||||||
|
}
|
||||||
|
taxonomy := obitax.DefaultTaxonomy()
|
||||||
|
if taxonomy == nil {
|
||||||
|
log.Fatal("Taxonomy not found")
|
||||||
|
}
|
||||||
|
|
||||||
|
predicat := func(sequences *obiseq.BioSequence) bool {
|
||||||
|
taxon := sequences.Taxon(taxonomy)
|
||||||
|
return taxon != nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return predicat
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func CLIAvoidTaxonomyPredicate() obiseq.SequencePredicate {
|
func CLIAvoidTaxonomyPredicate() obiseq.SequencePredicate {
|
||||||
var p obiseq.SequencePredicate
|
var p obiseq.SequencePredicate
|
||||||
var p2 obiseq.SequencePredicate
|
var p2 obiseq.SequencePredicate
|
||||||
@@ -278,16 +304,16 @@ func CLIAvoidTaxonomyPredicate() obiseq.SequencePredicate {
|
|||||||
if len(_NotBelongTaxa) > 0 {
|
if len(_NotBelongTaxa) > 0 {
|
||||||
taxonomy := obitax.DefaultTaxonomy()
|
taxonomy := obitax.DefaultTaxonomy()
|
||||||
|
|
||||||
taxon := taxonomy.Taxon(_NotBelongTaxa[0])
|
taxon, _, err := taxonomy.Taxon(_NotBelongTaxa[0])
|
||||||
if taxon == nil {
|
if err != nil {
|
||||||
p = obiseq.IsSubCladeOfSlot(taxonomy, _NotBelongTaxa[0])
|
p = obiseq.IsSubCladeOfSlot(taxonomy, _NotBelongTaxa[0])
|
||||||
} else {
|
} else {
|
||||||
p = obiseq.IsSubCladeOf(taxonomy, taxon)
|
p = obiseq.IsSubCladeOf(taxonomy, taxon)
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, taxid := range _NotBelongTaxa[1:] {
|
for _, taxid := range _NotBelongTaxa[1:] {
|
||||||
taxon := taxonomy.Taxon(taxid)
|
taxon, _, err := taxonomy.Taxon(taxid)
|
||||||
if taxon == nil {
|
if err != nil {
|
||||||
p2 = obiseq.IsSubCladeOfSlot(taxonomy, taxid)
|
p2 = obiseq.IsSubCladeOfSlot(taxonomy, taxid)
|
||||||
} else {
|
} else {
|
||||||
p2 = obiseq.IsSubCladeOf(taxonomy, taxon)
|
p2 = obiseq.IsSubCladeOf(taxonomy, taxon)
|
||||||
@@ -319,7 +345,7 @@ func CLIHasRankDefinedPredicate() obiseq.SequencePredicate {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func CLITaxonomyFilterPredicate() obiseq.SequencePredicate {
|
func CLITaxonomyFilterPredicate() obiseq.SequencePredicate {
|
||||||
return CLIHasRankDefinedPredicate().And(CLIRestrictTaxonomyPredicate()).And(CLIAvoidTaxonomyPredicate())
|
return CLIIsValidTaxonomyPredicate().And(CLIAvoidTaxonomyPredicate()).And(CLIHasRankDefinedPredicate()).And(CLIRestrictTaxonomyPredicate())
|
||||||
}
|
}
|
||||||
|
|
||||||
func CLIPredicatesPredicate() obiseq.SequencePredicate {
|
func CLIPredicatesPredicate() obiseq.SequencePredicate {
|
||||||
|
|||||||
@@ -129,6 +129,7 @@ func AssemblePESequences(seqA, seqB *obiseq.BioSequence,
|
|||||||
}
|
}
|
||||||
lcons := cons.Len()
|
lcons := cons.Len()
|
||||||
aliLength := lcons - _Abs(left) - _Abs(right)
|
aliLength := lcons - _Abs(left) - _Abs(right)
|
||||||
|
|
||||||
identity := float64(match) / float64(aliLength)
|
identity := float64(match) / float64(aliLength)
|
||||||
if aliLength == 0 {
|
if aliLength == 0 {
|
||||||
identity = 0
|
identity = 0
|
||||||
@@ -237,7 +238,7 @@ func IAssemblePESequencesBatch(iterator obiiter.IBioSequence,
|
|||||||
log.Printf("End of the sequence Pairing")
|
log.Printf("End of the sequence Pairing")
|
||||||
}()
|
}()
|
||||||
|
|
||||||
f := func(iterator obiiter.IBioSequence, wid int) {
|
f := func(iterator obiiter.IBioSequence) {
|
||||||
arena := obialign.MakePEAlignArena(150, 150)
|
arena := obialign.MakePEAlignArena(150, 150)
|
||||||
shifts := make(map[int]int)
|
shifts := make(map[int]int)
|
||||||
|
|
||||||
@@ -262,9 +263,9 @@ func IAssemblePESequencesBatch(iterator obiiter.IBioSequence,
|
|||||||
log.Printf("Start of the sequence Pairing using %d workers\n", nworkers)
|
log.Printf("Start of the sequence Pairing using %d workers\n", nworkers)
|
||||||
|
|
||||||
for i := 0; i < nworkers-1; i++ {
|
for i := 0; i < nworkers-1; i++ {
|
||||||
go f(iterator.Split(), i)
|
go f(iterator.Split())
|
||||||
}
|
}
|
||||||
go f(iterator, nworkers-1)
|
go f(iterator)
|
||||||
return newIter
|
return newIter
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -42,9 +42,10 @@ func MatchDistanceIndex(taxonomy *obitax.Taxonomy, distance int, distanceIdx map
|
|||||||
if i == len(keys) || distance > keys[len(keys)-1] {
|
if i == len(keys) || distance > keys[len(keys)-1] {
|
||||||
taxon = taxonomy.Root()
|
taxon = taxonomy.Root()
|
||||||
} else {
|
} else {
|
||||||
taxon = taxonomy.Taxon(distanceIdx[keys[i]])
|
var err error
|
||||||
if taxon == nil {
|
taxon, _, err = taxonomy.Taxon(distanceIdx[keys[i]])
|
||||||
log.Panicf("Cannot identify taxon %s in %s", distanceIdx[keys[i]], taxonomy.Name())
|
if err != nil {
|
||||||
|
log.Panicf("Cannot identify taxon %s in %s (%v)", distanceIdx[keys[i]], taxonomy.Name(), err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -72,6 +73,10 @@ func FindClosests(sequence *obiseq.BioSequence,
|
|||||||
refcounts []*obikmer.Table4mer,
|
refcounts []*obikmer.Table4mer,
|
||||||
runExact bool) (obiseq.BioSequenceSlice, int, float64, string, []int) {
|
runExact bool) (obiseq.BioSequenceSlice, int, float64, string, []int) {
|
||||||
|
|
||||||
|
if sequence.Len() < 5 {
|
||||||
|
return obiseq.BioSequenceSlice{}, 1000, 0, "NA", []int{}
|
||||||
|
}
|
||||||
|
|
||||||
var matrix []uint64
|
var matrix []uint64
|
||||||
|
|
||||||
seqwords := obikmer.Count4Mer(sequence, nil, nil)
|
seqwords := obikmer.Count4Mer(sequence, nil, nil)
|
||||||
@@ -196,9 +201,9 @@ func Identify(sequence *obiseq.BioSequence,
|
|||||||
log.Panic("Problem in identification line : ", best.Id(), "idx:", idx, "distance:", d)
|
log.Panic("Problem in identification line : ", best.Id(), "idx:", idx, "distance:", d)
|
||||||
}
|
}
|
||||||
|
|
||||||
match_taxon := taxo.Taxon(identification)
|
match_taxon, _, err := taxo.Taxon(identification)
|
||||||
|
|
||||||
if taxon != nil {
|
if err == nil {
|
||||||
taxon, _ = taxon.LCA(match_taxon)
|
taxon, _ = taxon.LCA(match_taxon)
|
||||||
} else {
|
} else {
|
||||||
taxon = match_taxon
|
taxon = match_taxon
|
||||||
@@ -255,7 +260,7 @@ func CLIAssignTaxonomy(iterator obiiter.IBioSequence,
|
|||||||
if taxon != nil {
|
if taxon != nil {
|
||||||
j++
|
j++
|
||||||
} else {
|
} else {
|
||||||
log.Warnf("Taxid %d is not described in the taxonomy %s."+
|
log.Warnf("Taxid %s is not described in the taxonomy %s."+
|
||||||
" Sequence %s is discared from the reference database",
|
" Sequence %s is discared from the reference database",
|
||||||
seq.Taxid(), taxo.Name(), seq.Id())
|
seq.Taxid(), taxo.Name(), seq.Id())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,7 +6,6 @@ import (
|
|||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
@@ -43,7 +42,6 @@ func TagOptionSet(options *getoptions.GetOpt) {
|
|||||||
// the obiuniq command
|
// the obiuniq command
|
||||||
func OptionSet(options *getoptions.GetOpt) {
|
func OptionSet(options *getoptions.GetOpt) {
|
||||||
obiconvert.OptionSet(options)
|
obiconvert.OptionSet(options)
|
||||||
obioptions.LoadTaxonomyOptionSet(options, true, false)
|
|
||||||
TagOptionSet(options)
|
TagOptionSet(options)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,10 +1,15 @@
|
|||||||
package obitaxonomy
|
package obitaxonomy
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiitercsv"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiitercsv"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obicsv"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obicsv"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
@@ -73,3 +78,18 @@ func CLICSVTaxaIterator(iterator *obitax.ITaxon) *obiitercsv.ICSVRecord {
|
|||||||
func CLICSVTaxaWriter(iterator *obitax.ITaxon, terminalAction bool) *obiitercsv.ICSVRecord {
|
func CLICSVTaxaWriter(iterator *obitax.ITaxon, terminalAction bool) *obiitercsv.ICSVRecord {
|
||||||
return obicsv.CLICSVWriter(CLICSVTaxaIterator(iterator), terminalAction)
|
return obicsv.CLICSVWriter(CLICSVTaxaIterator(iterator), terminalAction)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func CLIDownloadNCBITaxdump() error {
|
||||||
|
now := time.Now()
|
||||||
|
dateStr := now.Format("20060102") // In Go, this specific date is used as reference for formatting
|
||||||
|
|
||||||
|
filename := fmt.Sprintf("ncbitaxo_%s.tgz", dateStr)
|
||||||
|
|
||||||
|
if obiconvert.CLIOutPutFileName() != "-" {
|
||||||
|
filename = obiconvert.CLIOutPutFileName()
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Infof("Downloading NCBI Taxdump to %s", filename)
|
||||||
|
return obiutils.DownloadFile("https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdump.tar.gz", filename)
|
||||||
|
|
||||||
|
}
|
||||||
@@ -5,6 +5,7 @@ import (
|
|||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"github.com/DavidGamba/go-getoptions"
|
"github.com/DavidGamba/go-getoptions"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -22,6 +23,8 @@ var __taxid_path__ = "NA"
|
|||||||
var __taxid_sons__ = "NA"
|
var __taxid_sons__ = "NA"
|
||||||
var __restrict_rank__ = ""
|
var __restrict_rank__ = ""
|
||||||
var __to_dump__ = ""
|
var __to_dump__ = ""
|
||||||
|
var __download_ncbi__ = false
|
||||||
|
var __extract_taxonomy__ = false
|
||||||
|
|
||||||
func FilterTaxonomyOptionSet(options *getoptions.GetOpt) {
|
func FilterTaxonomyOptionSet(options *getoptions.GetOpt) {
|
||||||
options.BoolVar(&__rank_list__, "rank-list", false,
|
options.BoolVar(&__rank_list__, "rank-list", false,
|
||||||
@@ -34,7 +37,8 @@ func FilterTaxonomyOptionSet(options *getoptions.GetOpt) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func OptionSet(options *getoptions.GetOpt) {
|
func OptionSet(options *getoptions.GetOpt) {
|
||||||
obioptions.LoadTaxonomyOptionSet(options, true, true)
|
obioptions.LoadTaxonomyOptionSet(options, false, true)
|
||||||
|
obiconvert.OutputModeOptionSet(options, false)
|
||||||
FilterTaxonomyOptionSet(options)
|
FilterTaxonomyOptionSet(options)
|
||||||
options.BoolVar(&__fixed_pattern__, "fixed", false,
|
options.BoolVar(&__fixed_pattern__, "fixed", false,
|
||||||
options.Alias("F"),
|
options.Alias("F"),
|
||||||
@@ -70,6 +74,12 @@ func OptionSet(options *getoptions.GetOpt) {
|
|||||||
options.ArgName("TAXID"),
|
options.ArgName("TAXID"),
|
||||||
options.Description("Dump a sub-taxonomy corresponding to the precised clade"),
|
options.Description("Dump a sub-taxonomy corresponding to the precised clade"),
|
||||||
)
|
)
|
||||||
|
options.BoolVar(&__download_ncbi__, "download-ncbi", __download_ncbi__,
|
||||||
|
options.Description("Download the current NCBI taxonomy taxdump"),
|
||||||
|
)
|
||||||
|
options.BoolVar(&__extract_taxonomy__, "extract-taxonomy", __extract_taxonomy__,
|
||||||
|
options.Description("Extract taxonomy from a sequence file"),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
func CLITaxonomicalRestrictions() (*obitax.TaxonSet, error) {
|
func CLITaxonomicalRestrictions() (*obitax.TaxonSet, error) {
|
||||||
@@ -81,13 +91,14 @@ func CLITaxonomicalRestrictions() (*obitax.TaxonSet, error) {
|
|||||||
|
|
||||||
ts := taxonomy.NewTaxonSet()
|
ts := taxonomy.NewTaxonSet()
|
||||||
for _, taxid := range __taxonomical_restriction__ {
|
for _, taxid := range __taxonomical_restriction__ {
|
||||||
tx := taxonomy.Taxon(taxid)
|
tx, _, err := taxonomy.Taxon(taxid)
|
||||||
|
|
||||||
if tx == nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf(
|
return nil, fmt.Errorf(
|
||||||
"cannot find taxon %s in taxonomy %s",
|
"cannot find taxon %s in taxonomy %s (%v)",
|
||||||
taxid,
|
taxid,
|
||||||
taxonomy.Name(),
|
taxonomy.Name(),
|
||||||
|
err,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -144,3 +155,11 @@ func CLIDumpSubtaxonomy() bool {
|
|||||||
func CLISubTaxonomyNode() string {
|
func CLISubTaxonomyNode() string {
|
||||||
return __to_dump__
|
return __to_dump__
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func CLIDownloadNCBI() bool {
|
||||||
|
return __download_ncbi__
|
||||||
|
}
|
||||||
|
|
||||||
|
func CLIExtractTaxonomy() bool {
|
||||||
|
return __extract_taxonomy__
|
||||||
|
}
|
||||||
|
|||||||
@@ -93,3 +93,145 @@ func MapToMapInterface(m interface{}) map[string]interface{} {
|
|||||||
log.Panic("Invalid map type")
|
log.Panic("Invalid map type")
|
||||||
return make(map[string]interface{})
|
return make(map[string]interface{})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// InterfaceToInt converts a interface{} to an integer value if possible.
|
||||||
|
// If not a "NotAnInteger" error is returned via the err
|
||||||
|
// return value and val is set to 0.
|
||||||
|
func InterfaceToInt(i interface{}) (val int, err error) {
|
||||||
|
|
||||||
|
err = nil
|
||||||
|
val = 0
|
||||||
|
|
||||||
|
switch t := i.(type) {
|
||||||
|
case int:
|
||||||
|
val = t
|
||||||
|
case int8:
|
||||||
|
val = int(t) // standardizes across systems
|
||||||
|
case int16:
|
||||||
|
val = int(t) // standardizes across systems
|
||||||
|
case int32:
|
||||||
|
val = int(t) // standardizes across systems
|
||||||
|
case int64:
|
||||||
|
val = int(t) // standardizes across systems
|
||||||
|
case float32:
|
||||||
|
val = int(t) // standardizes across systems
|
||||||
|
case float64:
|
||||||
|
val = int(t) // standardizes across systems
|
||||||
|
case uint8:
|
||||||
|
val = int(t) // standardizes across systems
|
||||||
|
case uint16:
|
||||||
|
val = int(t) // standardizes across systems
|
||||||
|
case uint32:
|
||||||
|
val = int(t) // standardizes across systems
|
||||||
|
case uint64:
|
||||||
|
val = int(t) // standardizes across systems
|
||||||
|
default:
|
||||||
|
err = &NotAnInteger{"value attribute cannot be casted to an integer"}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// InterfaceToInt converts a interface{} to an integer value if possible.
|
||||||
|
// If not a "NotAnInteger" error is returned via the err
|
||||||
|
// return value and val is set to 0.
|
||||||
|
func InterfaceToFloat64(i interface{}) (val float64, err error) {
|
||||||
|
|
||||||
|
err = nil
|
||||||
|
val = 0
|
||||||
|
|
||||||
|
switch t := i.(type) {
|
||||||
|
case int:
|
||||||
|
val = float64(t)
|
||||||
|
case int8:
|
||||||
|
val = float64(t) // standardizes across systems
|
||||||
|
case int16:
|
||||||
|
val = float64(t) // standardizes across systems
|
||||||
|
case int32:
|
||||||
|
val = float64(t) // standardizes across systems
|
||||||
|
case int64:
|
||||||
|
val = float64(t) // standardizes across systems
|
||||||
|
case float32:
|
||||||
|
val = float64(t) // standardizes across systems
|
||||||
|
case float64:
|
||||||
|
val = t // standardizes across systems
|
||||||
|
case uint8:
|
||||||
|
val = float64(t) // standardizes across systems
|
||||||
|
case uint16:
|
||||||
|
val = float64(t) // standardizes across systems
|
||||||
|
case uint32:
|
||||||
|
val = float64(t) // standardizes across systems
|
||||||
|
case uint64:
|
||||||
|
val = float64(t) // standardizes across systems
|
||||||
|
default:
|
||||||
|
err = &NotAnFloat64{"value attribute cannot be casted to a float value"}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func InterfaceToIntMap(i interface{}) (val map[string]int, err error) {
|
||||||
|
err = nil
|
||||||
|
|
||||||
|
switch i := i.(type) {
|
||||||
|
case map[string]int:
|
||||||
|
val = i
|
||||||
|
case map[string]interface{}:
|
||||||
|
val = make(map[string]int, len(i))
|
||||||
|
for k, v := range i {
|
||||||
|
val[k], err = InterfaceToInt(v)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case map[string]float64:
|
||||||
|
val = make(map[string]int, len(i))
|
||||||
|
for k, v := range i {
|
||||||
|
val[k] = int(v)
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
err = &NotAMapInt{"value attribute cannot be casted to a map[string]int"}
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func InterfaceToStringMap(i interface{}) (val map[string]string, err error) {
|
||||||
|
err = nil
|
||||||
|
|
||||||
|
switch i := i.(type) {
|
||||||
|
case map[string]string:
|
||||||
|
val = i
|
||||||
|
case map[string]interface{}:
|
||||||
|
val = make(map[string]string, len(i))
|
||||||
|
for k, v := range i {
|
||||||
|
val[k], err = InterfaceToString(v)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
err = &NotAMapInt{"value attribute cannot be casted to a map[string]int"}
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func InterfaceToStringSlice(i interface{}) (val []string, err error) {
|
||||||
|
err = nil
|
||||||
|
|
||||||
|
switch i := i.(type) {
|
||||||
|
case []string:
|
||||||
|
val = i
|
||||||
|
case []interface{}:
|
||||||
|
val = make([]string, len(i))
|
||||||
|
for k, v := range i {
|
||||||
|
val[k], err = InterfaceToString(v)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
err = &NotAMapInt{"value attribute cannot be casted to a []string"}
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|||||||
45
pkg/obiutils/download.go
Normal file
45
pkg/obiutils/download.go
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
package obiutils
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"github.com/schollz/progressbar/v3"
|
||||||
|
)
|
||||||
|
|
||||||
|
func DownloadFile(url string, filepath string) error {
|
||||||
|
// Get the data
|
||||||
|
resp, err := http.Get(url)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
// Check server response
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return fmt.Errorf("bad status: %s", resp.Status)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create the file
|
||||||
|
out, err := os.Create(filepath)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer out.Close()
|
||||||
|
|
||||||
|
// Create progress bar
|
||||||
|
bar := progressbar.DefaultBytes(
|
||||||
|
resp.ContentLength,
|
||||||
|
"downloading",
|
||||||
|
)
|
||||||
|
|
||||||
|
// Write the body to file while updating the progress bar
|
||||||
|
_, err = io.Copy(io.MultiWriter(out, bar), resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
@@ -25,43 +25,6 @@ func (m *NotAnInteger) Error() string {
|
|||||||
return m.message
|
return m.message
|
||||||
}
|
}
|
||||||
|
|
||||||
// InterfaceToInt converts a interface{} to an integer value if possible.
|
|
||||||
// If not a "NotAnInteger" error is returned via the err
|
|
||||||
// return value and val is set to 0.
|
|
||||||
func InterfaceToInt(i interface{}) (val int, err error) {
|
|
||||||
|
|
||||||
err = nil
|
|
||||||
val = 0
|
|
||||||
|
|
||||||
switch t := i.(type) {
|
|
||||||
case int:
|
|
||||||
val = t
|
|
||||||
case int8:
|
|
||||||
val = int(t) // standardizes across systems
|
|
||||||
case int16:
|
|
||||||
val = int(t) // standardizes across systems
|
|
||||||
case int32:
|
|
||||||
val = int(t) // standardizes across systems
|
|
||||||
case int64:
|
|
||||||
val = int(t) // standardizes across systems
|
|
||||||
case float32:
|
|
||||||
val = int(t) // standardizes across systems
|
|
||||||
case float64:
|
|
||||||
val = int(t) // standardizes across systems
|
|
||||||
case uint8:
|
|
||||||
val = int(t) // standardizes across systems
|
|
||||||
case uint16:
|
|
||||||
val = int(t) // standardizes across systems
|
|
||||||
case uint32:
|
|
||||||
val = int(t) // standardizes across systems
|
|
||||||
case uint64:
|
|
||||||
val = int(t) // standardizes across systems
|
|
||||||
default:
|
|
||||||
err = &NotAnInteger{"value attribute cannot be casted to an integer"}
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// NotAnInteger defines a new type of Error : "NotAnInteger"
|
// NotAnInteger defines a new type of Error : "NotAnInteger"
|
||||||
type NotAnFloat64 struct {
|
type NotAnFloat64 struct {
|
||||||
message string
|
message string
|
||||||
@@ -74,43 +37,6 @@ func (m *NotAnFloat64) Error() string {
|
|||||||
return m.message
|
return m.message
|
||||||
}
|
}
|
||||||
|
|
||||||
// InterfaceToInt converts a interface{} to an integer value if possible.
|
|
||||||
// If not a "NotAnInteger" error is returned via the err
|
|
||||||
// return value and val is set to 0.
|
|
||||||
func InterfaceToFloat64(i interface{}) (val float64, err error) {
|
|
||||||
|
|
||||||
err = nil
|
|
||||||
val = 0
|
|
||||||
|
|
||||||
switch t := i.(type) {
|
|
||||||
case int:
|
|
||||||
val = float64(t)
|
|
||||||
case int8:
|
|
||||||
val = float64(t) // standardizes across systems
|
|
||||||
case int16:
|
|
||||||
val = float64(t) // standardizes across systems
|
|
||||||
case int32:
|
|
||||||
val = float64(t) // standardizes across systems
|
|
||||||
case int64:
|
|
||||||
val = float64(t) // standardizes across systems
|
|
||||||
case float32:
|
|
||||||
val = float64(t) // standardizes across systems
|
|
||||||
case float64:
|
|
||||||
val = t // standardizes across systems
|
|
||||||
case uint8:
|
|
||||||
val = float64(t) // standardizes across systems
|
|
||||||
case uint16:
|
|
||||||
val = float64(t) // standardizes across systems
|
|
||||||
case uint32:
|
|
||||||
val = float64(t) // standardizes across systems
|
|
||||||
case uint64:
|
|
||||||
val = float64(t) // standardizes across systems
|
|
||||||
default:
|
|
||||||
err = &NotAnFloat64{"value attribute cannot be casted to a float value"}
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// NotABoolean defines a new type of Error : "NotAMapInt"
|
// NotABoolean defines a new type of Error : "NotAMapInt"
|
||||||
type NotAMapInt struct {
|
type NotAMapInt struct {
|
||||||
message string
|
message string
|
||||||
@@ -123,53 +49,6 @@ func (m *NotAMapInt) Error() string {
|
|||||||
return m.message
|
return m.message
|
||||||
}
|
}
|
||||||
|
|
||||||
func InterfaceToIntMap(i interface{}) (val map[string]int, err error) {
|
|
||||||
err = nil
|
|
||||||
|
|
||||||
switch i := i.(type) {
|
|
||||||
case map[string]int:
|
|
||||||
val = i
|
|
||||||
case map[string]interface{}:
|
|
||||||
val = make(map[string]int, len(i))
|
|
||||||
for k, v := range i {
|
|
||||||
val[k], err = InterfaceToInt(v)
|
|
||||||
if err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case map[string]float64:
|
|
||||||
val = make(map[string]int, len(i))
|
|
||||||
for k, v := range i {
|
|
||||||
val[k] = int(v)
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
err = &NotAMapInt{"value attribute cannot be casted to a map[string]int"}
|
|
||||||
}
|
|
||||||
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
func InterfaceToStringMap(i interface{}) (val map[string]string, err error) {
|
|
||||||
err = nil
|
|
||||||
|
|
||||||
switch i := i.(type) {
|
|
||||||
case map[string]string:
|
|
||||||
val = i
|
|
||||||
case map[string]interface{}:
|
|
||||||
val = make(map[string]string, len(i))
|
|
||||||
for k, v := range i {
|
|
||||||
val[k], err = InterfaceToString(v)
|
|
||||||
if err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
err = &NotAMapInt{"value attribute cannot be casted to a map[string]int"}
|
|
||||||
}
|
|
||||||
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// NotABoolean defines a new type of Error : "NotAMapInt"
|
// NotABoolean defines a new type of Error : "NotAMapInt"
|
||||||
type NotAMapFloat64 struct {
|
type NotAMapFloat64 struct {
|
||||||
message string
|
message string
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ func MakeSet[E comparable](vals ...E) Set[E] {
|
|||||||
// It takes a variadic parameter of type E, where E is a comparable type.
|
// It takes a variadic parameter of type E, where E is a comparable type.
|
||||||
// It returns a pointer to a Set of type E.
|
// It returns a pointer to a Set of type E.
|
||||||
func NewSet[E comparable](vals ...E) *Set[E] {
|
func NewSet[E comparable](vals ...E) *Set[E] {
|
||||||
s := MakeSet[E](vals...)
|
s := MakeSet(vals...)
|
||||||
return &s
|
return &s
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -50,7 +50,7 @@ func TestNewSet(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Test Case 2: Creating a set with multiple values
|
// Test Case 2: Creating a set with multiple values
|
||||||
set2 := NewSet[string]("apple", "banana", "cherry")
|
set2 := NewSet("apple", "banana", "cherry")
|
||||||
if len(*set2) != 3 {
|
if len(*set2) != 3 {
|
||||||
t.Errorf("Expected size to be 3, but got %d", len(*set2))
|
t.Errorf("Expected size to be 3, but got %d", len(*set2))
|
||||||
}
|
}
|
||||||
@@ -147,7 +147,7 @@ func TestMembers(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Test case 2: Set with multiple elements
|
// Test case 2: Set with multiple elements
|
||||||
set = MakeSet[int](1, 2, 3)
|
set = MakeSet(1, 2, 3)
|
||||||
expected = []int{1, 2, 3}
|
expected = []int{1, 2, 3}
|
||||||
actual = set.Members()
|
actual = set.Members()
|
||||||
sort.Ints(actual)
|
sort.Ints(actual)
|
||||||
@@ -172,7 +172,7 @@ func TestSetString(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Test set with single member
|
// Test set with single member
|
||||||
singleMemberSet := NewSet[int](42)
|
singleMemberSet := NewSet(42)
|
||||||
singleMemberSetString := singleMemberSet.String()
|
singleMemberSetString := singleMemberSet.String()
|
||||||
expectedSingleMemberSetString := "[42]"
|
expectedSingleMemberSetString := "[42]"
|
||||||
if singleMemberSetString != expectedSingleMemberSetString {
|
if singleMemberSetString != expectedSingleMemberSetString {
|
||||||
@@ -180,7 +180,7 @@ func TestSetString(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Test set with multiple members
|
// Test set with multiple members
|
||||||
multipleMembersSet := NewSet[int](1, 2, 3)
|
multipleMembersSet := NewSet(1, 2, 3)
|
||||||
multipleMembersSetString := multipleMembersSet.String()
|
multipleMembersSetString := multipleMembersSet.String()
|
||||||
expectedMultipleMembersSetString := "[1 2 3]"
|
expectedMultipleMembersSetString := "[1 2 3]"
|
||||||
if multipleMembersSetString != expectedMultipleMembersSetString {
|
if multipleMembersSetString != expectedMultipleMembersSetString {
|
||||||
@@ -213,26 +213,26 @@ func TestUnion(t *testing.T) {
|
|||||||
|
|
||||||
// Test case 2: Union of an empty set and a non-empty set should return the non-empty set
|
// Test case 2: Union of an empty set and a non-empty set should return the non-empty set
|
||||||
set1 = MakeSet[int]()
|
set1 = MakeSet[int]()
|
||||||
set2 = MakeSet[int](1, 2, 3)
|
set2 = MakeSet(1, 2, 3)
|
||||||
expected = MakeSet[int](1, 2, 3)
|
expected = MakeSet(1, 2, 3)
|
||||||
result = set1.Union(set2)
|
result = set1.Union(set2)
|
||||||
if !reflect.DeepEqual(result, expected) {
|
if !reflect.DeepEqual(result, expected) {
|
||||||
t.Errorf("Expected %v, but got %v", expected, result)
|
t.Errorf("Expected %v, but got %v", expected, result)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test case 3: Union of two non-empty sets with common elements should return a set with unique elements
|
// Test case 3: Union of two non-empty sets with common elements should return a set with unique elements
|
||||||
set1 = MakeSet[int](1, 2, 3)
|
set1 = MakeSet(1, 2, 3)
|
||||||
set2 = MakeSet[int](2, 3, 4)
|
set2 = MakeSet(2, 3, 4)
|
||||||
expected = MakeSet[int](1, 2, 3, 4)
|
expected = MakeSet(1, 2, 3, 4)
|
||||||
result = set1.Union(set2)
|
result = set1.Union(set2)
|
||||||
if !reflect.DeepEqual(result, expected) {
|
if !reflect.DeepEqual(result, expected) {
|
||||||
t.Errorf("Expected %v, but got %v", expected, result)
|
t.Errorf("Expected %v, but got %v", expected, result)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test case 4: Union of two non-empty sets with no common elements should return a set with all elements
|
// Test case 4: Union of two non-empty sets with no common elements should return a set with all elements
|
||||||
set1 = MakeSet[int](1, 2, 3)
|
set1 = MakeSet(1, 2, 3)
|
||||||
set2 = MakeSet[int](4, 5, 6)
|
set2 = MakeSet(4, 5, 6)
|
||||||
expected = MakeSet[int](1, 2, 3, 4, 5, 6)
|
expected = MakeSet(1, 2, 3, 4, 5, 6)
|
||||||
result = set1.Union(set2)
|
result = set1.Union(set2)
|
||||||
if !reflect.DeepEqual(result, expected) {
|
if !reflect.DeepEqual(result, expected) {
|
||||||
t.Errorf("Expected %v, but got %v", expected, result)
|
t.Errorf("Expected %v, but got %v", expected, result)
|
||||||
|
|||||||
Reference in New Issue
Block a user