A first version of obimicroasm...

2026-03-26 22:00:52 +00:00 · 2025-02-16 21:47:49 +01:00
46 changed files with 1635 additions and 885 deletions
--- a/.github/workflows/obitest.yml
+++ b/.github/workflows/obitest.yml
@@ -1,19 +0,0 @@
-name: "Run the obitools command test suite"
-
-on:
-  push:
-    branches:
-      - master
-      - V*
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    steps:
-    - name: Setup Go
-      uses: actions/setup-go@v2
-      with:
-        go-version: '1.23'
-    - name: Checkout obitools4 project
-      uses: actions/checkout@v4
-    - name: Run tests
-      run: make githubtests
--- a/.gitignore
+++ b/.gitignore
@@ -1,27 +1,135 @@
-**/cpu.pprof
-**/cpu.trace
-**/test
-**/bin
-**/vendor
-**/*.fastq
-**/*.fasta
-**/*.fastq.gz
-**/*.fasta.gz
-**/.DS_Store
-**/*.gml
-**/*.log
-**/xxx*
-**/*.sav
-**/*.old
-**/*.tgz
-**/*.yaml
-**/*.csv
+cpu.pprof
+cpu.trace
+test
+bin
+vendor
+*.fastq
+*.fasta
+*.fastq.gz
+*.fasta.gz
+.DS_Store
+*.gml
+*.log
+/argaly

-.rhistory
-/.vscode
+/obiconvert
+/obicount
+/obimultiplex
+/obipairing
+/obipcr
+/obifind
+/obidistribute
+/obiuniq
 /build
+/Makefile.old
+.Rproj.user
+obitools.Rproj
+Stat_error.knit.md
+.Rhistory
+Stat_error.nb.html
+Stat_error.Rmd

-/ncbitaxo
+/.luarc.json
+/doc/TAXO/
+/doc/results/
+/doc/_main.log
+/doc/_book/_main.tex
+/doc/_freeze/
+/doc/tutorial_files/
+/doc/wolf_data/
+/taxdump/
+/.vscode/

-!/obitests/**
-!/sample/**
+/Algo-Alignement.numbers
+/Estimate_proba_true_seq.html
+/Estimate_proba_true_seq.nb.html
+/Estimate_proba_true_seq.Rmd
+/modele_error_euka.qmd
+/obitools.code-workspace
+.DS_Store
+.RData
+x
+xxx
+y
+/doc/wolf_diet.tgz
+/doc/man/depends
+/sample/wolf_R1.fasta.gz
+/sample/wolf_R2.fasta.gz
+/sample/euka03.ecotag.fasta.gz
+/sample/ratio.csv
+/sample/STD_PLN_1.dat
+/sample/STD_PLN_2.dat
+/sample/subset_Pasvik_R1.fastq.gz
+/sample/subset_Pasvik_R2.fastq.gz
+/sample/test_gobitools.fasta.bz2
+euka03.csv*
+gbbct793.seq.gz
+gbinv1003.seq.gz
+gbpln210.seq
+/doc/book/OBITools-V4.aux
+/doc/book/OBITools-V4.fdb_latexmk
+/doc/book/OBITools-V4.fls
+/doc/book/OBITools-V4.log
+/doc/book/OBITools-V4.pdf
+/doc/book/OBITools-V4.synctex.gz
+/doc/book/OBITools-V4.tex
+/doc/book/OBITools-V4.toc
+getoptions.adoc
+Archive.zip
+.DS_Store
+sample/.DS_Store
+sample/consensus_graphs/specimen_hac_plants_Vern_disicolor_.gml
+93954
+Bact03.e5.gb_R254.obipcr.idx.fasta.save
+sample/test.obipcr.log
+Bact02.e3.gb_R254.obipcr.fasta.gz
+Example_Arth03.ngsfilter
+SPER01.csv
+SPER03.csv
+wolf_diet_ngsfilter.txt
+xx
+xxx.gb
+yyy_geom.csv
+yyy_LCS.csv
+yyy.json
+bug_obimultiplex/toto
+bug_obimultiplex/toto_mapping
+bug_obimultiplex/tutu
+bug_obimultiplex/tutu_mapping
+bug_obipairing/GIT1_GH_ngsfilter.txt
+doc/book/TAXO/citations.dmp
+doc/book/TAXO/delnodes.dmp
+doc/book/TAXO/division.dmp
+doc/book/TAXO/gc.prt
+doc/book/TAXO/gencode.dmp
+doc/book/TAXO/merged.dmp
+doc/book/TAXO/names.dmp
+doc/book/TAXO/nodes.dmp
+doc/book/TAXO/readme.txt
+doc/book/wolf_data/Release-253/ncbitaxo/citations.dmp
+doc/book/wolf_data/Release-253/ncbitaxo/delnodes.dmp
+doc/book/wolf_data/Release-253/ncbitaxo/division.dmp
+doc/book/wolf_data/Release-253/ncbitaxo/gc.prt
+doc/book/wolf_data/Release-253/ncbitaxo/gencode.dmp
+doc/book/wolf_data/Release-253/ncbitaxo/merged.dmp
+doc/book/wolf_data/Release-253/ncbitaxo/names.dmp
+doc/book/wolf_data/Release-253/ncbitaxo/nodes.dmp
+doc/book/wolf_data/Release-253/ncbitaxo/readme.txt
+doc/book/results/toto.tasta
+sample/.DS_Store
+GO
+ncbitaxo/citations.dmp
+ncbitaxo/delnodes.dmp
+ncbitaxo/division.dmp
+ncbitaxo/gc.prt
+ncbitaxo/gencode.dmp
+ncbitaxo/merged.dmp
+ncbitaxo/names.dmp
+ncbitaxo/nodes.dmp
+ncbitaxo/readme.txt
+template.16S
+xxx.gz
+*.sav
+*.old
+ncbitaxo.tgz
+*.csv
--- a/9
+++ b/9
@@ -63,13 +63,6 @@ update-deps:

 test:
 	$(GOTEST) ./...
-
-obitests: 
-	@for t in $$(find obitests -name test.sh -print) ; do \
-		bash $${t} ;\
-	done 
-
-githubtests: obitools obitests
 	
 man: 
 	make -C doc man
@@ -104,5 +97,5 @@ ifneq ($(strip $(COMMIT_ID)),)
 	@rm -f $(OUTPUT)
 endif

-.PHONY: all packages obitools man obibook doc update-deps obitests githubtests .FORCE
+.PHONY: all packages obitools man obibook doc update-deps .FORCE
 .FORCE:
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ curl -L https://raw.githubusercontent.com/metabarcoding/obitools4/master/install
      bash -s -- --install-dir test_install --obitools-prefix k
 ```

-In this case, the binaries will be installed in the `test_install` directory and all command names will be prefixed with the letter `k`. Thus, `obigrep` will be named `kobigrep`.
+In this case, the binaries will be installed in the `test_install` directory and all command names will be prefixed with the letter `k`. Thus `obigrep` will be named `kobigrep`.

 ## Continuing the analysis...

--- a/Release-notes.md
+++ b/Release-notes.md
@@ -1,29 +1,19 @@
 # OBITools release notes

-## March 2nd, 2025. Release 4.3.0
-
-A new documentation website is available at https://obitools4.metabarcoding.org.
-Its development is still in progress. 
+## Latest changes

 ### Breaking changes

- In `obimultiplex`, the short version of the **--tag-list** option used to
-  specify the list of tags and primers to be used for the demultiplexing has
-  been changed from `-t` to `-s`.
+- In `obimultiplex`, the short version of the **--tag-list** option used to specify the list 
+  of tags and primers to be used for the demultiplexing has been changed from `-t` to `-s`.

 - The command `obifind` is now renamed `obitaxonomy`.

- The **--taxdump** option used to specify the path to the taxdump containing
-  the NCBI taxonomy has been renamed to **--taxonomy**.
+- The **--taxdump** option used to specify the path to the taxdump containing the NCBI taxonomy
+  has been renamed to **--taxonomy**.

 ### Bug fixes

- Correction of a bug when using paired sequence file with the **--out** option.
-
- Correction of a bug in `obitag` when trying to annotate very short sequences of
-  4 bases or less.
-  
-
 - In `obipairing`, correct the stats `seq_a_single` and `seq_b_single` when
  on right alignment mode

@@ -31,32 +21,12 @@ Its development is still in progress.
  the batch size and not reading the qualities from the fastq files as `obiuniq`
  is producing only fasta output without qualities.

-   In `obitag`, correct the wrong assignment of the **obitag_bestmatch**
-    attribute.
-
-   In `obiclean`, the **--no-progress-bar** option disables all progress bars,
-    not just the data.
-
-   Several fixes in reading FASTA and FASTQ files, including some code
-    simplification and factorization.
-
-   Fixed a bug in all obitools that caused the same file to be processed
-    multiple times, when specifying a directory name as input.
-
-
 ### New features

-   `obigrep` add a new **--valid-taxid** option to keep only sequence with a
-    valid taxid
-
-   `obiclean` add a new **--min-sample-count** option with a default value of 1,
-    asking to filter out sequences which are not occurring in at least the
-    specified number of samples.
-
 -   `obitoaxonomy` a new **--dump|D** option allows for dumping a sub-taxonomy.
  
-   Taxonomy dump can now be provided as a four-columns CSV file to the
-    **--taxonomy** option.
+-   Taxonomy dump can now be provided as a four-columns CSV file to the **--taxonomy**
+    option.

 -   NCBI Taxonomy dump does not need to be uncompressed and unarchived anymore. The
    path of the tar and gziped dump file can be directly specified using the
@@ -67,50 +37,54 @@ Its development is still in progress.
    allow the processing of the rare fasta and fastq files not recognized.
    
 -   In `obiscript`, adds new methods to the Lua sequence object:
-    - `md5_string()`: returning the MD5 check sum as a hexadecimal string, 
-    - `subsequence(from,to)`: allows extracting a subsequence on a 0 based
-      coordinate system, upper bound excluded like in go. 
-    - `reverse_complement`: returning a sequence object corresponding to the
-      reverse complement of the current sequence.
+    - `md5_string()`:  returning the MD5 check sum as an hexadecimal string,
+	- `subsequence(from,to)`: allows to extract a subsequence on a 0 based 
+            coordinate system, upper bound expluded like in go.
+	- `reverse_complement`: returning a sequence object corresponding to the reverse complement
+            of the current sequence.

-### Enhancement
+### Change of git repositiory

-   In every *OBITools* command, the progress bar is automatically deactivated
-    when the standard error output is redirected.
-   Because Genbank and ENA:EMBL contain very large sequences, while OBITools4
-    are optimized As Genbank and ENA:EMBL contain very large sequences, while
-    OBITools4 is optimized for short sequences, `obipcr` faces some problems
-    with excessive consumption of computer resources, especially memory. Several
-    improvements in the tuning of the default `obipcr` parameters and some new
-    features, currently only available for FASTA and FASTQ file readers, have
-    been implemented to limit the memory impact of `obipcr` without changing the
-    computational efficiency too much.
-   Logging system and therefore format, have been homogenized.
-
-
-### Change of git repository
-
-   The OBITools4 git repository has been moved to the GitHub repository. 
+-   The OBITools4 git repository has been moved to the github repository. 
    The new address is: https://github.com/metabarcoding/obitools4.
    Take care for using the new install script for retrieving the new version.

    ```bash
-    curl -L https://metabarcoding.org/obitools4/install.sh \
+    curl -L https://raw.githubusercontent.com/metabarcoding/obitools4/master/install_obitools.sh \
      | bash
    ```

    or with options:

    ```bash
-    curl -L https://metabarcoding.org/obitools4/install.sh \
+    curl -L https://raw.githubusercontent.com/metabarcoding/obitools4/master/install_obitools.sh \
      | bash -s -- --install-dir test_install --obitools-prefix k
    ```
+    
+### CPU limitation
+
+-   By default, *OBITools4* tries to use all the computing power available on
+    your computer. In some circumstances this can be problematic (e.g. if you
+    are running on a computer cluster managed by your university). You can limit
+    the number of CPU cores used by *OBITools4* or by using the **--max-cpu**
+    option or by setting the **OBIMAXCPU** environment variable. Some strange
+    behaviour of *OBITools4* has been observed when users try to limit the
+    maximum number of usable CPU cores to one. This seems to be caused by the Go
+    language, and it is not obvious to get *OBITools4* to run correctly on a
+    single core in all circumstances. Therefore, if you ask to use a single
+    core, **OBITools4** will print a warning message and actually set this
+    parameter to two cores. If you really want a single core, you can use the
+    **--force-one-core** option. But be aware that this can lead to incorrect
+    calculations.
+
+### New features
+
 -   The output of the obitools will evolve to produce results only in standard
    formats such as fasta and fastq. For non-sequential data, the output will be
    in CSV format, with the separator `,`, the decimal separator `.`, and a
    header line with the column names. It is more convenient to use the output
    in other programs. For example, you can use the `csvtomd` command to
-    reformat the CSV output into a Markdown table. The first command to initiate
+    reformat the csv output into a markdown table. The first command to initiate
    this change is `obicount`, which now produces a 3-line CSV output.

    ```bash
@@ -122,7 +96,7 @@ Its development is still in progress.
    database for `obitag` is to use `obipcr` on a local copy of Genbank or EMBL.
    However, these sequence databases are known to contain many taxonomic
    errors, such as bacterial sequences annotated with the taxid of their host
-    species. `obicleandb` tries to detect these errors. To do this, it first keeps
+    species. obicleandb tries to detect these errors. To do this, it first keeps
    only sequences annotated with the taxid to which a species, genus, and
    family taxid can be assigned. Then, for each sequence, it compares the
    distance of the sequence to the other sequences belonging to the same genus
@@ -133,7 +107,7 @@ Its development is still in progress.
    with the p-value of the Mann-Whitney U test in the **obicleandb_trusted**
    slot. Later, the distribution of this p-value can be analyzed to determine a
    threshold. Empirically, a threshold of 0.05 is a good compromise and allows
-    filtering out less than 1‰ of the sequences. These sequences can then be
+    to filter out less than 1‰ of the sequences. These sequences can then be
    removed using `obigrep`.

 -   Adds a new `obijoin` utility to join information contained in a sequence
@@ -143,16 +117,16 @@ Its development is still in progress.

 -   Adds a new tool `obidemerge` to demerge a `merge_xxx` slot by recreating the 
    multiple identical sequences having the slot `xxx` recreated with its initial
-    value and the sequence count set to the number of occurrences referred in the
+    value and the sequence count set to the number of occurences refered in the
    `merge_xxx` slot. During the operation, the `merge_xxx` slot is removed.

 -   Adds CSV as one of the input format for every obitools command. To encode
-    sequence the CSV file must include a column named `sequence` and another
+    sequence the CSV file must includes a column named `sequence` and another
    column named `id`. An extra column named `qualities` can be added to specify 
-    the quality scores of the sequence following the same ASCII encoding than the
+    the quality scores of the sequence following the same ascii encoding than the
    fastq format. All the other columns will be considered as annotations and will
    be interpreted as JSON objects encoding potentially for atomic values. If a 
-    column value can not be decoded as JSON it will be considered as a string.
+    calumn value can not be decoded as JSON it will be considered as a string.

 -   A new option **--version** has been added to every obitools command. It will
    print the version of the command.
@@ -161,8 +135,8 @@ Its development is still in progress.
    quality scores from a BioSequence object.\

 -   In `obimultuplex` the ngsfilter file describing the samples can be no provided
-    not only using the classical ngsfilter format but also using the CSV format.
-    When using CSV, the first line must contain the column names. 5 columns are
+    not only using the classical nfsfilter format but also using the csv format.
+    When using csv, the first line must contain the column names. 5 columns are
    expected:

    -   `experiment` the name of the experiment
@@ -178,34 +152,43 @@ Its development is still in progress.

    Supplementary columns are allowed. Their names and content will be used to
    annotate the sequence corresponding to the sample, as the `key=value;` did
-    in the ngsfilter format.
+    in the nfsfilter format.

    The CSV format used allows for comment lines starting with `#` character.
-    Special data lines starting with `@param` in the first column allow configuring the algorithm. The options **--template** provided an over
-    commented example of the CSV format, including all the possible options.
-    
-### CPU limitation
+    Special data lines starting with `@param` in the first column allow to
+    configure the algorithm. The options **--template** provided an over
+    commented example of the csv format, including all the possible options.

-   By default, *OBITools4* tries to use all the computing power available on
-    your computer. In some circumstances this can be problematic (e.g. if you
-    are running on a computer cluster managed by your university). You can limit
-    the number of CPU cores used by *OBITools4* or by using the **--max-cpu**
-    option or by setting the **OBIMAXCPU** environment variable. Some strange
-    behavior of *OBITools4* has been observed when users try to limit the
-    maximum number of usable CPU cores to one. This seems to be caused by the Go
-    language, and it is not obvious to get *OBITools4* to run correctly on a
-    single core in all circumstances. Therefore, if you ask to use a single
-    core, **OBITools4** will print a warning message and actually set this
-    parameter to two cores. If you really want a single core, you can use the
-    **--force-one-core** option. But be aware that this can lead to incorrect
-    calculations.
+### Enhancement

+-   In every *OBITools* command, the progress bar are automatically deactivated
+    when the standard error output is redirected.
+-   Because Genbank and ENA:EMBL contain very large sequences, while OBITools4
+    are optimized As Genbank and ENA:EMBL contain very large sequences, while
+    OBITools4 is optimised for short sequences, `obipcr` faces some problems
+    with excessive consumption of computer resources, especially memory. Several
+    improvements in the tuning of the default `obipcr` parameters and some new
+    features, currently only available for FASTA and FASTQ file readers, have
+    been implemented to limit the memory impact of `obipcr` without changing the
+    computational efficiency too much.
+-   Logging system and therefore format, have been homogenized.
+
+### Bug
+
+-   In `obitag`, correct the wrong assignment of the **obitag_bestmatch**
+    attribute.
+-   In `obiclean`, the **--no-progress-bar** option disables all progress bars,
+    not just the data.
+-   Several fixes in reading FASTA and FASTQ files, including some code
+    simplification and and factorization.
+-   Fixed a bug in all obitools that caused the same file to be processed
+    multiple times. when specifying a directory name as input.

 ## April 2nd, 2024. Release 4.2.0

 ### New features

-   A new OBITools named `obiscript` allows processing each sequence according
+-   A new OBITools named `obiscript` allows to process each sequence according
    to a Lua script. This is an experimental tool. The **--template** option
    allows for generating an example script on the `stdout`.

@@ -213,7 +196,7 @@ Its development is still in progress.

 -   Two of the main class `obiseq.SeqWorker` and `obiseq.SeqWorker` have their
    declaration changed. Both now return two values a `obiseq.BioSequenceSlice`
-    and an `error`. This allows a worker to return potentially several sequences
+    and an `error`. This allow a worker to return potentially several sequences
    as the result of the processing of a single sequence, or zero, which is
    equivalent to filter out the input sequence.

@@ -221,12 +204,12 @@ Its development is still in progress.

 -   In `obitag` if the reference database contains sequences annotated by taxid
    not referenced in the taxonomy, the corresponding sequences are discarded
-    from the reference database and a warning indicating the sequence *id* and the
+    from the reference database and a warning indicating the sequence id and the
    wrong taxid is emitted.
 -   The bug corrected in the parsing of EMBL and Genbank files as implemented in
    version 4.1.2 of OBITools4, potentially induced some reduction in the
    performance of the parsing. This should have been now fixed.
-   In the same idea, parsing of Genbank and EMBL files were reading and storing
+-   In the same idea, parsing of genbank and EMBL files were reading and storing
    in memory not only the sequence but also the annotations (features table).
    Up to now none of the OBITools are using this information, but with large
    complete genomes, it is occupying a lot of memory. To reduce this impact,
@@ -265,7 +248,7 @@ Its development is still in progress.

 ### New feature

-   In `obimatrix` a **--transpose** option allows transposing the produced
+-   In `obimatrix` a **--transpose** option allows to transpose the produced
    matrix table in CSV format.
 -   In `obitpairing` and `obipcrtag` two new options **--exact-mode** and
    **--fast-absolute** to control the heuristic used in the alignment
@@ -273,7 +256,7 @@ Its development is still in progress.
    the exact algorithm at the cost of a speed. **--fast-absolute** change the
    scoring schema of the heuristic.
 -   In `obiannotate` adds the possibility to annotate the first match of a
-    pattern using the same algorithm as the one used in `obipcr` and
+    pattern using the same algorithm than the one used in `obipcr` and
    `obimultiplex`. For that four option were added :
    -   **--pattern** : to specify the pattern. It can use IUPAC codes and
        position with no error tolerated has to be followed by a `#` character.
@@ -354,7 +337,7 @@ Its development is still in progress.

 ### Bugs

-   In the obitools language, the `composition` function now returns a map
+-   in the obitools language, the `composition` function now returns a map
    indexed by lowercase string "a", "c", "g", "t" and "o" for other instead of
    being indexed by the ASCII codes of the corresponding letters.
 -   Correction of the reverse-complement operation. Every reverse complement of
@@ -367,18 +350,18 @@ Its development is still in progress.
    duplicating the quality values. This made `obimultiplex` to produce fastq
    files with sequences having quality values duplicated.

-### Be careful
+### Becareful

 GO 1.21.0 is out, and it includes new functionalities which are used in the
-OBITools4 code. If you use the recommended method for compiling OBITools on your
-computer, there is no problem, as the script always load the latest GO version.
-If you rely on your personal GO install, please think to update.
+OBITools4 code. If you use the recommanded method for compiling OBITools on your
+computer, their is no problem, as the script always load the latest GO version.
+If you rely on you personnal GO install, please think to update.

 ## August 29th, 2023. Release 4.0.5

 ### Bugs

-   Patch a bug in the `obiseq.BioSequence` constructor leading to an error on
+-   Patch a bug in the `obiseq.BioSequence` constructor leading to a error on
    almost every obitools. The error message indicates : `fatal error: sync:
    unlock of unlocked mutex` This bug was introduced in the release 4.0.4

@@ -397,7 +380,7 @@ If you rely on your personal GO install, please think to update.
    data structure to limit the number of alignments actually computed. This
    increase a bit the speed of both the software. `obirefidx` is nevertheless
    still too slow compared to my expectation.
-   Switch to a parallel version of the GZIP library, allowing for high speed
+-   Switch to a parallel version of the gzip library, allowing for high speed
    compress and decompress operation on files.

 ### New feature
@@ -441,12 +424,12 @@ If you rely on your personal GO install, please think to update.
              --unidentified not_assigned.fastq
    ```

-    The command produced four files : `tagged_library_R1.fastq` and
+    the command produced four files : `tagged_library_R1.fastq` and
    `tagged_library_R2.fastq` containing the assigned reads and
    `not_assigned_R1.fastq` and `not_assigned_R2.fastq` containing the
    unassignable reads.

-    The tagged library files can then be split using `obidistribute`:
+    the tagged library files can then be split using `obidistribute`:

    ```{bash}
    mkdir pcr_reads
@@ -456,9 +439,9 @@ If you rely on your personal GO install, please think to update.

 -   Adding of two options **--add-lca-in** and **--lca-error** to `obiannotate`.
    These options aim to help during construction of reference database using
-    `obipcr`. On `obipcr` output, it is commonly run `obiuniq`. To merge identical
+    `obipcr`. On obipcr output, it is commonly run obiuniq. To merge identical
    sequences annotated with different taxids, it is now possible to use the
-    following strategies :
+    following strategie :

    ```{bash}
    obiuniq -m taxid myrefdb.obipcr.fasta \
@@ -489,7 +472,7 @@ If you rely on your personal GO install, please think to update.
 -   Correction of a bug in `obiconsensus` leading into the deletion of a base
    close to the beginning of the consensus sequence.

-## March 31st, 2023. Release 4.0.2
+## March 31th, 2023. Release 4.0.2

 ### Compiler change

@@ -500,15 +483,15 @@ If you rely on your personal GO install, please think to update.
 -   Add the possibility for looking pattern with indels. This has been added to
    `obimultiplex` through the **--with-indels** option.
 -   Every obitools command has a **--pprof** option making the command
-    publishing a profiling website available at the address :
+    publishing a profiling web site available at the address :
    <http://localhost:8080/debug/pprof/>
 -   A new `obiconsensus` command has been added. It is a prototype. It aims to
    build a consensus sequence from a set of reads. The consensus is estimated
    for all the sequences contained in the input file. If several input files,
    or a directory name are provided the result contains a consensus per file.
-    The *id* of the sequence is the name of the input file depleted of its
+    The id of the sequence is the name of the input file depleted of its
    directory name and of all its extensions.
-   In `obipcr` an experimental option **--fragmented** allows for splitting very
+-   In `obipcr` an experimental option **--fragmented** allows for spliting very
    long query sequences into shorter fragments with an overlap between the two
    contiguous fragment insuring that no amplicons are missed despite the split.
    As a site effect some amplicon can be identified twice.
@@ -551,7 +534,7 @@ If you rely on your personal GO install, please think to update.
 ### Enhancement

 -   *OBITools* are automatically processing all the sequences files contained in
-    a directory and its subdirectory\
+    a directory and its sub-directory\
    recursively if its name is provided as input. To process easily Genbank
    files, the corresponding filename extensions have been added. Today the
    following extensions are recognized as sequence files : `.fasta`, `.fastq`,
@@ -568,7 +551,7 @@ If you rely on your personal GO install, please think to update.
    export OBICPUMAX=4
    ```

-   Adds a new option --out\|-o allowing to specify the name of an output file.
+-   Adds a new option --out\|-o allowing to specify the name of an outpout file.

    ``` bash
    obiconvert -o xyz.fasta xxx.fastq
@@ -590,10 +573,10 @@ If you rely on your personal GO install, please think to update.
    matched files remain consistent when processed.

 -   Adding of the function `ifelse` to the expression language for computing
-    conditional values.
+    conditionnal values.

 -   Adding two function to the expression language related to sequence
-    composition : `composition` and `gcskew`. Both are taking a sequence as
+    conposition : `composition` and `gcskew`. Both are taking a sequence as
    single argument.

 ## February 18th, 2023. Release 4.0.0
@@ -601,8 +584,8 @@ If you rely on your personal GO install, please think to update.
 It is the first version of the *OBITools* version 4. I decided to tag then
 following two weeks of intensive data analysis with them allowing to discover
 many small bugs present in the previous non-official version. Obviously other
-bugs are certainly present in the code, and you are welcome to use the git
-ticket system to mention them. But they seem to produce now reliable results.
+bugs are certainly persent in the code, and you are welcome to use the git
+ticket system to mention them. But they seems to produce now reliable results.

 ### Corrected bugs

@@ -610,11 +593,11 @@ ticket system to mention them. But they seem to produce now reliable results.
    of sequences and to the production of incorrect file because of the last
    sequence record, sometime truncated in its middle. This was only occurring
    when more than a single CPU was used. It was affecting every obitools.
-   The `obiparing` software had a bug in the right alignment procedure. This led
-    to the non-alignment of very sort barcode during the paring of the forward
+-   The `obiparing` software had a bug in the right aligment procedure. This led
+    to the non alignment of very sort barcode during the paring of the forward
    and reverse reads.
-   The `obipairing` tools had a non-deterministic comportment when aligning a
-    pair very low quality reads. This induced that the result of the same low
+-   The `obipairing` tools had a non deterministic comportment when aligning a
+    paor very low quality reads. This induced that the result of the same low
    quality read pair was not the same from run to run.

 ### New features
@@ -622,10 +605,11 @@ ticket system to mention them. But they seem to produce now reliable results.
 -   Adding of a `--compress|-Z` option to every obitools allowing to produce
    `gz` compressed output. OBITools were already able to deal with gziped input
    files transparently. They can now produce their results in the same format.
-    - Adding of a `--append|-A` option to the `obidistribute` tool. It allows appending the result of an `obidistribute` execution to preexisting files. -
+    - Adding of a `--append|-A` option to the `obidistribute` tool. It allows to
+    append the result of an `obidistribute` execution to preexisting files. -
    Adding of a `--directory|-d` option to the `obidistribute` tool. It allows
-    declaring a secondary classification key over the one defined by the
-    `--category\|-c\` option. This extra key leads to produce directories in
+    to declare a secondary classification key over the one defined by the
+    '--category\|-c\` option. This extra key leads to produce directories in
    which files produced according to the primary criterion are stored.
 -   Adding of the functions `subspc`, `printf`, `int`, `numeric`, and `bool` to
    the expression language.
--- a/cmd/obitools/obimicroasm/main.go
+++ b/cmd/obitools/obimicroasm/main.go
@@ -0,0 +1,42 @@
+package main
+
+import (
+	"os"
+
+	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
+	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats"
+	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
+	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obimicroasm"
+	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
+)
+
+func main() {
+
+	// go tool pprof -http=":8000" ./obipairing ./cpu.pprof
+	// f, err := os.Create("cpu.pprof")
+	// if err != nil {
+	// 	log.Fatal(err)
+	// }
+	// pprof.StartCPUProfile(f)
+	// defer pprof.StopCPUProfile()
+
+	// go tool trace cpu.trace
+	// ftrace, err := os.Create("cpu.trace")
+	// if err != nil {
+	// 	log.Fatal(err)
+	// }
+	// trace.Start(ftrace)
+	// defer trace.Stop()
+
+	optionParser := obioptions.GenerateOptionParser(obimicroasm.OptionSet)
+
+	optionParser(os.Args)
+
+	obidefault.SetStrictReadWorker(2)
+	obidefault.SetStrictWriteWorker(2)
+
+	seq := obimicroasm.CLIAssemblePCR()
+
+	println(obiformats.FormatFasta(seq, obiformats.FormatFastSeqJsonHeader))
+	obiutils.WaitForLastPipe()
+}
--- a/go.mod
+++ b/go.mod
@@ -5,9 +5,7 @@ go 1.23.1
 require (
 	github.com/DavidGamba/go-getoptions v0.28.0
 	github.com/PaesslerAG/gval v1.2.2
-	github.com/TuftsBCB/io v0.0.0-20140121014543-22b94e9b23f9
 	github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df
-	github.com/buger/jsonparser v1.1.1
 	github.com/chen3feng/stl4go v0.1.1
 	github.com/dlclark/regexp2 v1.11.4
 	github.com/goccy/go-json v0.10.3
@@ -16,7 +14,7 @@ require (
 	github.com/rrethy/ahocorasick v1.0.0
 	github.com/schollz/progressbar/v3 v3.13.1
 	github.com/sirupsen/logrus v1.9.3
-	github.com/stretchr/testify v1.8.4
+	github.com/stretchr/testify v1.10.0
 	github.com/tevino/abool/v2 v2.1.0
 	github.com/yuin/gopher-lua v1.1.1
 	golang.org/x/exp v0.0.0-20231006140011-7918f672742d
@@ -26,12 +24,18 @@ require (
 )

 require (
+	github.com/Clever/csvlint v0.3.0 // indirect
+	github.com/TuftsBCB/io v0.0.0-20140121014543-22b94e9b23f9 // indirect
+	github.com/buger/jsonparser v1.1.1 // indirect
+	github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e // indirect
 	github.com/davecgh/go-spew v1.1.1 // indirect
+	github.com/ef-ds/deque/v2 v2.0.2 // indirect
 	github.com/goombaio/orderedmap v0.0.0-20180924084748-ba921b7e2419 // indirect
 	github.com/kr/pretty v0.3.0 // indirect
 	github.com/kr/text v0.2.0 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
 	github.com/rogpeppe/go-internal v1.6.1 // indirect
+	go.etcd.io/bbolt v1.4.0 // indirect
 )

 require (
@@ -45,7 +49,7 @@ require (
 	github.com/shopspring/decimal v1.3.1 // indirect
 	github.com/ulikunitz/xz v0.5.11
 	golang.org/x/net v0.17.0 // indirect
-	golang.org/x/sys v0.17.0 // indirect
+	golang.org/x/sys v0.29.0 // indirect
 	golang.org/x/term v0.13.0 // indirect
 	gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c
 )
--- a/go.sum
+++ b/go.sum
@@ -1,3 +1,5 @@
+github.com/Clever/csvlint v0.3.0 h1:58WEFXWy+i0fCbxTXscR2QwYESRuAUFjEGLgZs6j2iU=
+github.com/Clever/csvlint v0.3.0/go.mod h1:+wLRuW/bI8NhpRoeyUBxqKsK35OhvgJhXHSWdKp5XJU=
 github.com/DavidGamba/go-getoptions v0.28.0 h1:18wgEvfZdrlfIhVDGEBO3Dl0fkOyXqXLa0tLMCKxM1c=
 github.com/DavidGamba/go-getoptions v0.28.0/go.mod h1:zE97E3PR9P3BI/HKyNYgdMlYxodcuiC6W68KIgeYT84=
 github.com/PaesslerAG/gval v1.2.2 h1:Y7iBzhgE09IGTt5QgGQ2IdaYYYOU134YGHBThD+wm9E=
@@ -12,6 +14,8 @@ github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMU
 github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0=
 github.com/chen3feng/stl4go v0.1.1 h1:0L1+mDw7pomftKDruM23f1mA7miavOj6C6MZeadzN2Q=
 github.com/chen3feng/stl4go v0.1.1/go.mod h1:5ml3psLgETJjRJnMbPE+JiHLrCpt+Ajc2weeTECXzWU=
+github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e h1:fY5BOSpyZCqRo5OhCuC+XN+r/bBCmeuuJtjz+bCNIf8=
+github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
 github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
@@ -21,6 +25,8 @@ github.com/dlclark/regexp2 v1.11.4/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cn
 github.com/dsnet/compress v0.0.1 h1:PlZu0n3Tuv04TzpfPbrnI0HW/YwodEXDS+oPKahKF0Q=
 github.com/dsnet/compress v0.0.1/go.mod h1:Aw8dCMJ7RioblQeTqt88akK31OvO8Dhf5JflhBbQEHo=
 github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
+github.com/ef-ds/deque/v2 v2.0.2 h1:GQtDK1boBMu/qsNbSLQsqzwNptaioxZI39X3UxT5ALA=
+github.com/ef-ds/deque/v2 v2.0.2/go.mod h1:hoZy4VooWLhRT4uS+sSCilfgBQUNptJU2FGqr08a5sc=
 github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
 github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
 github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA=
@@ -69,9 +75,12 @@ github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ
 github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
 github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
+github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
+github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
 github.com/tevino/abool/v2 v2.1.0 h1:7w+Vf9f/5gmKT4m4qkayb33/92M+Um45F2BkHOR+L/c=
 github.com/tevino/abool/v2 v2.1.0/go.mod h1:+Lmlqk6bHDWHqN1cbxqhwEAwMPXgc8I1SDEamtseuXY=
 github.com/ulikunitz/xz v0.5.6/go.mod h1:2bypXElzHzzJZwzH67Y6wb67pO62Rzfn7BSiF4ABRW8=
@@ -79,6 +88,8 @@ github.com/ulikunitz/xz v0.5.11 h1:kpFauv27b6ynzBNT/Xy+1k+fK4WswhN/6PN5WhFAGw8=
 github.com/ulikunitz/xz v0.5.11/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
 github.com/yuin/gopher-lua v1.1.1 h1:kYKnWBjvbNP4XLT3+bPEwAXJx262OhaHDWDVOPjL46M=
 github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw=
+go.etcd.io/bbolt v1.4.0 h1:TU77id3TnN/zKr7CO/uk+fBCwF2jGcMuw2B/FMAzYIk=
+go.etcd.io/bbolt v1.4.0/go.mod h1:AsD+OCi/qPN1giOX1aiLAha3o1U8rAz65bvN4j0sRuk=
 golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI=
 golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo=
 golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
@@ -88,6 +99,8 @@ golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y=
 golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU=
+golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U=
 golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek=
 golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U=
--- a/go.work.sum
+++ b/go.work.sum
@@ -2,12 +2,9 @@ git.sr.ht/~sbinet/gg v0.3.1 h1:LNhjNn8DerC8f9DHLz6lS0YYul/b602DUxDgGkd/Aik=
 git.sr.ht/~sbinet/gg v0.3.1/go.mod h1:KGYtlADtqsqANL9ueOFkWymvzUvLMQllU5Ixo+8v3pc=
 github.com/ajstarks/svgo v0.0.0-20211024235047-1546f124cd8b h1:slYM766cy2nI3BwyRiyQj/Ud48djTMtMebDqepE95rw=
 github.com/ajstarks/svgo v0.0.0-20211024235047-1546f124cd8b/go.mod h1:1KcenG0jGWcpt8ov532z81sp/kMMUG485J2InIOyADM=
-github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0=
 github.com/chzyer/logex v1.1.10 h1:Swpa1K6QvQznwJRcfTfQJmTE72DqScAa40E+fbHEXEE=
 github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
 github.com/chzyer/logex v1.2.0 h1:+eqR0HfOetur4tgnC8ftU5imRnhi4te+BadWS95c5AM=
-github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e h1:fY5BOSpyZCqRo5OhCuC+XN+r/bBCmeuuJtjz+bCNIf8=
-github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
 github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1 h1:q763qf9huN11kDQavWsoZXJNW3xEE4JJyHa5Q25/sd8=
 github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
 github.com/chzyer/test v0.0.0-20210722231415-061457976a23 h1:dZ0/VyGgQdVGAss6Ju0dt5P0QltE0SFY5Woh6hbIfiQ=
@@ -29,16 +26,21 @@ github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeN
 github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
 github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
 github.com/ianlancetaylor/demangle v0.0.0-20220319035150-800ac71e25c2 h1:rcanfLhLDA8nozr/K289V1zcntHr3V+SHlXwzz1ZI2g=
+github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
 github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213 h1:qGQQKEcAR99REcMpsXCp3lJ03zYT1PkRd3kQGPn9GVg=
 github.com/klauspost/cpuid v1.2.0 h1:NMpwD2G9JSFOE1/TJjGSo5zG7Yb2bTe7eq1jH+irmeE=
 github.com/kr/pty v1.1.1 h1:VkoXIwSboBpnk99O/KFauAEILuNHv5DVFKZMBN/gUgw=
 github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
 github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPng=
 github.com/smallnest/goroutine v1.1.1/go.mod h1:Fp8f6ZReubfdj0m4+NcUnW4IsAqKa+Pnrv9opEiD43E=
+github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=
+github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
 github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4=
 github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c=
 github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
+github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
 github.com/yuin/goldmark v1.4.13 h1:fVcFKWvrslecOb/tg+Cc05dkeYx540o0FuFt3nUVDoE=
+go.etcd.io/gofail v0.2.0/go.mod h1:nL3ILMGfkXTekKI3clMBNazKnjUZjYLKmBHzsVAnC1o=
 golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc=
 golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4=
 golang.org/x/image v0.6.0 h1:bR8b5okrPI3g/gyZakLZHeWxAR8Dn5CyxXv1hLH5g/4=
@@ -46,6 +48,7 @@ golang.org/x/image v0.6.0/go.mod h1:MXLdDR43H7cDJq5GEGXEVeeNhPgi+YYEQ2pC1byI1x0=
 golang.org/x/mod v0.13.0 h1:I/DsJXRlw/8l/0c24sM9yb0T4z9liZTduXvdAWYiysY=
 golang.org/x/mod v0.13.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
 golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4 h1:uVc8UZUe6tr40fFVnUP5Oj+veunVezqYl9z7DYw9xzw=
+golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
 golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
 golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
 golang.org/x/tools v0.14.0 h1:jvNa2pY0M4r62jkRQ6RwEZZyPcymeL9XZMLBbV7U2nc=
--- a/obitests/obitools/obicount/test.sh
+++ b/obitests/obitools/obicount/test.sh
@@ -1,144 +0,0 @@
-#!/bin/bash
-
-#
-# Here give the name of the test serie
-#
-
-TEST_NAME=obicount
-
-######
-#
-# Some variable and function definitions: please don't change them
-#
-######
-TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
-OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
-export PATH="${OBITOOLS_DIR}:${PATH}"
-
-
-TMPDIR="$(mktemp -d)"
-ntest=0
-success=0
-failed=0
-
-cleanup() {
-    echo "========================================" 1>&2
-    echo "## Results of the $TEST_NAME tests:" 1>&2
-
-    echo 1>&2
-    echo "- $ntest tests run" 1>&2
-    echo "- $success successfully completed" 1>&2
-    echo "- $failed failed tests" 1>&2
-    echo 1>&2
-    echo "Cleaning up the temporary directory..." 1>&2
-    echo 1>&2
-    echo "========================================" 1>&2
-
-    rm -rf "$TMPDIR"  # Suppress the temporary directory
-
-    if [ $failed -gt 0 ]; then
-       log "$TEST_NAME tests failed" 
-       exit 1
-    fi
-
-    exit 0
-}
-
-log() {
-    echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
-}
-
-log "Testing $TEST_NAME..." 
-log "Test directory is $TEST_DIR" 
-log "obitools directory is $OBITOOLS_DIR" 
-log "Temporary directory is $TMPDIR" 
-log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
-
-######################################################################
-####
-#### Below are the tests
-####
-#### Before each test :
-####  - increment the variable ntest
-####
-#### Run the command as the condition of an if / then /else
-####  - The command must return 0 on success
-####  - The command must return an exit code different from 0 on failure
-####  - The datafiles are stored in the same directory than the test script
-####  - The test script directory is stored in the TEST_DIR variable
-####  - If result files have to be produced they must be stored
-####    in the temporary directory (TMPDIR variable)
-####
-#### then clause is executed on success of the command
-####  - Write a success message using the log function
-####  - increment the variable success
-####
-#### else clause is executed on failure of the command
-####  - Write a failure message using the log function
-####  - increment the variable failed
-####
-######################################################################
-
-((ntest++))
-if obicount "${TEST_DIR}/wolf_F.fasta.gz" \
-    > "${TMPDIR}/wolf_F.fasta_count.csv" 
-then
-    log "OBICount: fasta reading OK" 
-    ((success++))
-else
-    log "OBICount: fasta reading failed" 
-    ((failed++))
-fi
-
-((ntest++))
-if obicount "${TEST_DIR}/wolf_F.fastq.gz" \
-    > "${TMPDIR}/wolf_F.fastq_count.csv"
-then
-    log "OBICount: fastq reading OK"
-    ((success++))
-else
-    log "OBICount: fastq reading failed" 
-    ((failed++))
-fi
-
-((ntest++))
-if obicount "${TEST_DIR}/wolf_F.csv.gz" \
-    > "${TMPDIR}/wolf_F.csv_count.csv"
-then
-    log "OBICount: csv reading OK" 
-    ((success++))
-else
-    log "OBICount: csv reading failed"
-    ((failed++))
-fi
-
-((ntest++))
-if diff "${TMPDIR}/wolf_F.fasta_count.csv" \
-        "${TMPDIR}/wolf_F.fastq_count.csv"  > /dev/null
-then
-    log "OBICount: counting on fasta and fastq are identical OK"
-    ((success++))
-else
-    log "OBICount: counting on fasta and fastq are different failed"
-    ((failed++))
-fi
-
-((ntest++))
-if diff "${TMPDIR}/wolf_F.fasta_count.csv" \
-        "${TMPDIR}/wolf_F.csv_count.csv" > /dev/null
-then
-    log "OBICount: counting on fasta and csv are identical OK"
-    ((success++))
-else
-    log "OBICount: counting on fasta and csv are different failed"
-    ((failed++))
-fi
-
-#########################################
-#
-# At the end of the tests
-# the cleanup function is called
-#
-#########################################
-
-cleanup
--- a/obitests/obitools/obicount/wolf_F.csv.gz
+++ b/obitests/obitools/obicount/wolf_F.csv.gz
--- a/obitests/obitools/obicount/wolf_F.fasta.gz
+++ b/obitests/obitools/obicount/wolf_F.fasta.gz
--- a/obitests/obitools/obicount/wolf_F.fastq.gz
+++ b/obitests/obitools/obicount/wolf_F.fastq.gz
--- a/obitests/obitools/obiparing/test.sh
+++ b/obitests/obitools/obiparing/test.sh
@@ -1,134 +0,0 @@
-#!/bin/bash
-
-#
-# Here give the name of the test serie
-#
-
-TEST_NAME=obiparing
-
-######
-#
-# Some variable and function definitions: please don't change them
-#
-######
-TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
-OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
-export PATH="${OBITOOLS_DIR}:${PATH}"
-
-
-TMPDIR="$(mktemp -d)"
-ntest=0
-success=0
-failed=0
-
-cleanup() {
-    echo "========================================" 1>&2
-    echo "## Results of the $TEST_NAME tests:" 1>&2
-
-    echo 1>&2
-    echo "- $ntest tests run" 1>&2
-    echo "- $success successfully completed" 1>&2
-    echo "- $failed failed tests" 1>&2
-    echo 1>&2
-    echo "Cleaning up the temporary directory..." 1>&2
-    echo 1>&2
-    echo "========================================" 1>&2
-
-    rm -rf "$TMPDIR"  # Suppress the temporary directory
-
-    if [ $failed -gt 0 ]; then
-       log "$TEST_NAME tests failed" 
-       exit 1
-    fi
-
-    exit 0
-}
-
-log() {
-    echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
-}
-
-log "Testing $TEST_NAME..." 
-log "Test directory is $TEST_DIR" 
-log "obitools directory is $OBITOOLS_DIR" 
-log "Temporary directory is $TMPDIR" 
-log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
-
-######################################################################
-####
-#### Below are the tests
-####
-#### Before each test :
-####  - increment the variable ntest
-####
-#### Run the command as the condition of an if / then /else
-####  - The command must return 0 on success
-####  - The command must return an exit code different from 0 on failure
-####  - The datafiles are stored in the same directory than the test script
-####  - The test script directory is stored in the TEST_DIR variable
-####  - If result files have to be produced they must be stored
-####    in the temporary directory (TMPDIR variable)
-####
-#### then clause is executed on success of the command
-####  - Write a success message using the log function
-####  - increment the variable success
-####
-#### else clause is executed on failure of the command
-####  - Write a failure message using the log function
-####  - increment the variable failed
-####
-######################################################################
-
-((ntest++))
-if obipairing -F "${TEST_DIR}/wolf_F.fastq.gz" \
-              -R "${TEST_DIR}/wolf_R.fastq.gz" \
-    | obidistribute -Z -c mode \
-                    -p "${TMPDIR}/wolf_paired_%s.fastq.gz" 
-then
-    log "OBIPairing: sequence pairing OK" 
-    ((success++))
-else
-    log "OBIPairing: sequence pairing failed" 
-    ((failed++))
-fi
-
-((ntest++))
-if obicsv -Z -s -i \
-          -k ali_dir -k ali_length -k paring_fast_count \
-          -k paring_fast_overlap -k paring_fast_score \
-          -k score -k score_norm -k seq_a_single \
-          -k seq_b_single -k seq_ab_match \
-          "${TMPDIR}/wolf_paired_alignment.fastq.gz" \
-    > "${TMPDIR}/wolf_paired_alignment.csv.gz" \
-    && zdiff -c "${TEST_DIR}/wolf_paired_alignment.csv.gz" \
-                "${TMPDIR}/wolf_paired_alignment.csv.gz" 
-then
-    log "OBIPairing: check aligned sequences OK" 
-    ((success++))
-else
-    log "OBIPairing: check aligned sequences failed" 
-    ((failed++))
-fi
-
-((ntest++))
-if obicsv -Z -s -i \
-          "${TMPDIR}/wolf_paired_join.fastq.gz" \
-    > "${TMPDIR}/wolf_paired_join.csv.gz" \
-    && zdiff -c "${TEST_DIR}/wolf_paired_join.csv.gz" \
-                "${TMPDIR}/wolf_paired_join.csv.gz"
-then
-    log "OBIPairing: check joined sequences OK" 
-    ((success++))
-else
-    log "OBIPairing: check joined sequences failed" 
-    ((failed++))
-fi
-
-#########################################
-#
-# At the end of the tests
-# the cleanup function is called
-#
-#########################################
-
-cleanup
--- a/obitests/obitools/obiparing/wolf_F.fastq.gz
+++ b/obitests/obitools/obiparing/wolf_F.fastq.gz
--- a/obitests/obitools/obiparing/wolf_R.fastq.gz
+++ b/obitests/obitools/obiparing/wolf_R.fastq.gz
--- a/obitests/obitools/obiparing/wolf_paired_alignment.csv.gz
+++ b/obitests/obitools/obiparing/wolf_paired_alignment.csv.gz
--- a/obitests/obitools/obiparing/wolf_paired_join.csv.gz
+++ b/obitests/obitools/obiparing/wolf_paired_join.csv.gz
--- a/pkg/obialign/alignment.go
+++ b/pkg/obialign/alignment.go
@@ -10,7 +10,6 @@ import (
 	"strings"

 	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
-	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
 )

 // // A pool of byte slices.
@@ -159,30 +158,12 @@ func BuildQualityConsensus(seqA, seqB *obiseq.BioSequence, path []int, statOnMis

 	match := 0

-	left := obiutils.Abs(path[0])
-	right := 0
-	if path[len(path)-1] == 0 {
-		right = path[len(path)-2]
-	}
-
-	right = obiutils.Abs(right)
-
-	right = len(*bufferQA) - right
-
-	// log.Warnf("BuildQualityConsensus: left = %d right = %d\n", left, right)
-
 	for i, qA = range *bufferQA {
 		nA := (*bufferSA)[i]
 		nB := (*bufferSB)[i]
 		qB = (*bufferQB)[i]

-		if statOnMismatch && i >= left && i < right && nA != nB {
-			if nA == ' ' {
-				nA = '-'
-			}
-			if nB == ' ' {
-				nB = '-'
-			}
+		if statOnMismatch && nA != nB && nA != ' ' && nB != ' ' {
 			mismatches[strings.ToUpper(fmt.Sprintf("(%c:%02d)->(%c:%02d)", nA, qA, nB, qB))] = i + 1
 		}

@@ -202,12 +183,13 @@ func BuildQualityConsensus(seqA, seqB *obiseq.BioSequence, path []int, statOnMis

 		q := qA + qB

-		if nA != nB {
-			q = qM - byte(math.Log10(1-math.Pow(10, -float64(qm)/40))*10+0.5)
-		}
-
-		if nA == nB {
-			match++
+		if qA > 0 && qB > 0 {
+			if nA != nB {
+				q = qM - byte(math.Log10(1-math.Pow(10, -float64(qm)/30))*10+0.5)
+			}
+			if nA == nB {
+				match++
+			}
 		}

 		if q > 90 {
--- a/pkg/obialign/dnamatrix.go
+++ b/pkg/obialign/dnamatrix.go
@@ -74,30 +74,6 @@ func _Logaddexp(a, b float64) float64 {
 	return b + math.Log1p(math.Exp(a-b))
 }

-func _Log1mexp(a float64) float64 {
-	if a > 0 {
-		log.Panic("Log1mexp: a > 0")
-	}
-
-	if a == 0 {
-		return 0
-	}
-
-	return (math.Log(-math.Expm1(a)))
-}
-
-func _Logdiffexp(a, b float64) float64 {
-	if a < b {
-		log.Panic("Log1mexp: a < b")
-	}
-
-	if a == b {
-		return math.Inf(-1)
-	}
-
-	return a + _Log1mexp(b-a)
-}
-
 // _MatchScoreRatio calculates the match score ratio between two bytes.
 //
 // Parameters:
@@ -107,25 +83,25 @@ func _Logdiffexp(a, b float64) float64 {
 // Returns:
 // - float64: the match score ratio when a match is observed
 // - float64: the match score ratio when a mismatch is observed
-func _MatchScoreRatio(QF, QR byte) (float64, float64) {
+func _MatchScoreRatio(a, b byte) (float64, float64) {

+	l2 := math.Log(2)
 	l3 := math.Log(3)
-	l4 := math.Log(4)
 	l10 := math.Log(10)
-	qF := -float64(QF) / 10 * l10
-	qR := -float64(QR) / 10 * l10
-	term1 := _Logaddexp(qF, qR)
-	term2 := _Logdiffexp(term1, qF+qR)
+	lalea := math.Log(4)                   // 1 /(change of the random model)
+	lE1 := -float64(a)/10*l10 - l3         // log proba of sequencing error on A/3
+	lE2 := -float64(b)/10*l10 - l3         // log proba of sequencing error on B/3
+	lO1 := math.Log1p(-math.Exp(lE1 + l3)) // log proba no being an error on A
+	lO2 := math.Log1p(-math.Exp(lE2 + l3)) // log proba no being an error on B
+	lO1O2 := lO1 + lO2
+	lE1E2 := lE1 + lE2
+	lO1E2 := lO1 + lE2
+	lO2E1 := lO2 + lE1

-	// log.Warnf("MatchScoreRatio: %v, %v , %v, %v", QF, QR, term1, term2)
+	MM := _Logaddexp(lO1O2, lE1E2+l3)                    // Proba match when match observed
+	Mm := _Logaddexp(_Logaddexp(lO1E2, lO2E1), lE1E2+l2) // Proba match when mismatch observed

-	match_logp := _Log1mexp(term2 + l3 - l4)
-	match_score := match_logp - _Log1mexp(match_logp)
-
-	mismatch_logp := term2 - l4
-	mismatch_score := mismatch_logp - _Log1mexp(mismatch_logp)
-
-	return match_score, mismatch_score
+	return MM + lalea, Mm + lalea
 }

 func _InitNucPartMatch() {
--- a/pkg/obialign/fastlcs.go
+++ b/pkg/obialign/fastlcs.go
@@ -21,15 +21,15 @@ func encodeValues(score, length int, out bool) uint64 {
 	return fo
 }

-// func _isout(value uint64) bool {
-// 	const outmask = uint64(1) << dwsize
-// 	return (value & outmask) == 0
-// }
+func _isout(value uint64) bool {
+	const outmask = uint64(1) << dwsize
+	return (value & outmask) == 0
+}

-// func _lpath(value uint64) int {
-// 	const mask = uint64(1<<wsize) - 1
-// 	return int(((value + 1) ^ mask) & mask)
-// }
+func _lpath(value uint64) int {
+	const mask = uint64(1<<wsize) - 1
+	return int(((value + 1) ^ mask) & mask)
+}

 func decodeValues(value uint64) (int, int, bool) {
 	const mask = uint64(1<<wsize) - 1
@@ -57,3 +57,4 @@ func _setout(value uint64) uint64 {
 var _empty = encodeValues(0, 0, false)
 var _out = encodeValues(0, 30000, true)
 var _notavail = encodeValues(0, 30000, false)
+
--- a/pkg/obialign/pairedendalign.go
+++ b/pkg/obialign/pairedendalign.go
@@ -625,8 +625,6 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
 			&arena.pointer.scoreMatrix,
 			&arena.pointer.pathMatrix)

-		score = scoreR
-
 		path = _Backtracking(arena.pointer.pathMatrix,
 			len(rawSeqA), len(rawSeqB),
 			&(arena.pointer.path))
@@ -643,7 +641,6 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
 				len(rawSeqA), len(rawSeqB),
 				&(arena.pointer.path))
 			isLeftAlign = true
-			score = scoreL
 		}

 	}
--- a/pkg/obiformats/fastseq_json_header.go
+++ b/pkg/obiformats/fastseq_json_header.go
@@ -13,7 +13,7 @@ import (
 	"github.com/buger/jsonparser"
 )

-func _parse_json_map_string(str []byte) (map[string]string, error) {
+func _parse_json_map_string(str []byte, sequence *obiseq.BioSequence) (map[string]string, error) {
 	values := make(map[string]string)
 	jsonparser.ObjectEach(str,
 		func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
@@ -25,7 +25,7 @@ func _parse_json_map_string(str []byte) (map[string]string, error) {
 	return values, nil
 }

-func _parse_json_map_int(str []byte) (map[string]int, error) {
+func _parse_json_map_int(str []byte, sequence *obiseq.BioSequence) (map[string]int, error) {
 	values := make(map[string]int)
 	jsonparser.ObjectEach(str,
 		func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
@@ -41,7 +41,7 @@ func _parse_json_map_int(str []byte) (map[string]int, error) {
 	return values, nil
 }

-func _parse_json_map_float(str []byte) (map[string]float64, error) {
+func _parse_json_map_float(str []byte, sequence *obiseq.BioSequence) (map[string]float64, error) {
 	values := make(map[string]float64)
 	jsonparser.ObjectEach(str,
 		func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
@@ -57,7 +57,7 @@ func _parse_json_map_float(str []byte) (map[string]float64, error) {
 	return values, nil
 }

-func _parse_json_map_bool(str []byte) (map[string]bool, error) {
+func _parse_json_map_bool(str []byte, sequence *obiseq.BioSequence) (map[string]bool, error) {
 	values := make(map[string]bool)
 	jsonparser.ObjectEach(str,
 		func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
@@ -73,7 +73,7 @@ func _parse_json_map_bool(str []byte) (map[string]bool, error) {
 	return values, nil
 }

-func _parse_json_map_interface(str []byte) (map[string]interface{}, error) {
+func _parse_json_map_interface(str []byte, sequence *obiseq.BioSequence) (map[string]interface{}, error) {
 	values := make(map[string]interface{})
 	jsonparser.ObjectEach(str,
 		func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
@@ -100,7 +100,7 @@ func _parse_json_map_interface(str []byte) (map[string]interface{}, error) {
 	return values, nil
 }

-func _parse_json_array_string(str []byte) ([]string, error) {
+func _parse_json_array_string(str []byte, sequence *obiseq.BioSequence) ([]string, error) {
 	values := make([]string, 0)
 	jsonparser.ArrayEach(str,
 		func(value []byte, dataType jsonparser.ValueType, offset int, err error) {
@@ -162,7 +162,7 @@ func _parse_json_array_bool(str []byte, sequence *obiseq.BioSequence) ([]bool, e
 	return values, nil
 }

-func _parse_json_array_interface(str []byte) ([]interface{}, error) {
+func _parse_json_array_interface(str []byte, sequence *obiseq.BioSequence) ([]interface{}, error) {
 	values := make([]interface{}, 0)
 	jsonparser.ArrayEach(str,
 		func(value []byte, dataType jsonparser.ValueType, offset int, err error) {
@@ -261,14 +261,14 @@ func _parse_json_header_(header string, sequence *obiseq.BioSequence) string {
 				sequence.SetCount(int(count))

 			case skey == "obiclean_weight":
-				weight, err := _parse_json_map_int(value)
+				weight, err := _parse_json_map_int(value, sequence)
 				if err != nil {
 					log.Fatalf("%s: Cannot parse obiclean weight %s", sequence.Id(), string(value))
 				}
 				annotations[skey] = weight

 			case skey == "obiclean_status":
-				status, err := _parse_json_map_string(value)
+				status, err := _parse_json_map_string(value, sequence)
 				if err != nil {
 					log.Fatalf("%s: Cannot parse obiclean status %s", sequence.Id(), string(value))
 				}
@@ -276,7 +276,7 @@ func _parse_json_header_(header string, sequence *obiseq.BioSequence) string {

 			case strings.HasPrefix(skey, "merged_"):
 				if dataType == jsonparser.Object {
-					data, err := _parse_json_map_int(value)
+					data, err := _parse_json_map_int(value, sequence)
 					if err != nil {
 						log.Fatalf("%s: Cannot parse merged slot %s: %v", sequence.Id(), skey, err)
 					} else {
@@ -316,9 +316,9 @@ func _parse_json_header_(header string, sequence *obiseq.BioSequence) string {
 						annotations[skey], err = strconv.ParseFloat(obiutils.UnsafeString(value), 64)
 					}
 				case jsonparser.Array:
-					annotations[skey], err = _parse_json_array_interface(value)
+					annotations[skey], err = _parse_json_array_interface(value, sequence)
 				case jsonparser.Object:
-					annotations[skey], err = _parse_json_map_interface(value)
+					annotations[skey], err = _parse_json_map_interface(value, sequence)
 				case jsonparser.Boolean:
 					annotations[skey], err = jsonparser.ParseBoolean(value)
 				case jsonparser.Null:
--- a/pkg/obiiter/fragment.go
+++ b/pkg/obiiter/fragment.go
@@ -19,7 +19,7 @@ func IFragments(minsize, length, overlap, size, nworkers int) Pipeable {
 			newiter.WaitAndClose()
 		}()

-		f := func(iterator IBioSequence) {
+		f := func(iterator IBioSequence, id int) {
 			source := ""
 			for iterator.Next() {
 				news := obiseq.MakeBioSequenceSlice()
@@ -66,9 +66,9 @@ func IFragments(minsize, length, overlap, size, nworkers int) Pipeable {
 		}

 		for i := 1; i < nworkers; i++ {
-			go f(iterator.Split())
+			go f(iterator.Split(), i)
 		}
-		go f(iterator)
+		go f(iterator, 0)

 		return newiter.SortBatches().Rebatch(size)
 	}
--- a/pkg/obikmer/debruijn.go
+++ b/pkg/obikmer/debruijn.go
@@ -8,9 +8,12 @@ import (
 	"math/bits"
 	"os"
 	"slices"
+	"sort"

 	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
 	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obistats"
+	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
+	"github.com/ef-ds/deque/v2"
 	log "github.com/sirupsen/logrus"
 )

@@ -89,12 +92,18 @@ type DeBruijnGraph struct {
 //
 //	*DeBruijnGraph - a pointer to the created De Bruijn's Graph
 func MakeDeBruijnGraph(kmersize int) *DeBruijnGraph {
+	if kmersize > 31 {
+		log.Panicf("k-mer size %d is too large", kmersize)
+	}
+
+	kmermask := (^uint64(0) << (uint64(kmersize) * 2))
+
 	g := DeBruijnGraph{
 		kmersize: kmersize,
-		kmermask: ^(^uint64(0) << (uint64(kmersize) * 2)), // k-mer mask used to set to 0 the bits that are not in the k-mer
-		prevc:    uint64(1) << (uint64(kmersize-1) * 2),
-		prevg:    uint64(2) << (uint64(kmersize-1) * 2),
-		prevt:    uint64(3) << (uint64(kmersize-1) * 2),
+		kmermask: kmermask, // k-mer mask used to set to 1 the bits that are not in the k-mer
+		prevc:    (uint64(1) << (uint64(kmersize-1) * 2)) | kmermask,
+		prevg:    (uint64(2) << (uint64(kmersize-1) * 2)) | kmermask,
+		prevt:    (uint64(3) << (uint64(kmersize-1) * 2)) | kmermask,
 		graph:    make(map[uint64]uint),
 	}

@@ -161,19 +170,34 @@ func (g *DeBruijnGraph) FilterMinWeight(min int) {
 	}
 }

+// FilterMinWeight filters the DeBruijnGraph by removing nodes with weight less than the specified minimum.
+//
+// min: an integer representing the minimum count threshold.
+func (g *DeBruijnGraph) FilterMaxWeight(min int) {
+	umin := uint(min)
+	for idx, count := range g.graph {
+		if count > umin {
+			delete(g.graph, idx)
+		}
+	}
+}
+
 func (g *DeBruijnGraph) Previouses(index uint64) []uint64 {
 	if _, ok := g.graph[index]; !ok {
 		log.Panicf("k-mer %s (index %d) is not in graph", g.DecodeNode(index), index)
 	}

 	rep := make([]uint64, 0, 4)
+
+	index &= ^g.kmermask
 	index >>= 2

-	if _, ok := g.graph[index]; ok {
-		rep = append(rep, index)
+	key := index | g.kmermask
+	if _, ok := g.graph[key]; ok {
+		rep = append(rep, key)
 	}

-	key := index | g.prevc
+	key = index | g.prevc
 	if _, ok := g.graph[key]; ok {
 		rep = append(rep, key)
 	}
@@ -197,7 +221,7 @@ func (g *DeBruijnGraph) Nexts(index uint64) []uint64 {
 	}

 	rep := make([]uint64, 0, 4)
-	index = (index << 2) & g.kmermask
+	index = (index << 2) | g.kmermask

 	if _, ok := g.graph[index]; ok {
 		rep = append(rep, index)
@@ -268,6 +292,33 @@ func (g *DeBruijnGraph) MaxHead() (uint64, int, bool) {
 	return rep, int(max), found
 }

+func (g *DeBruijnGraph) Terminals() []uint64 {
+	rep := make([]uint64, 0, 10)
+
+	for k := range g.graph {
+		if len(g.Nexts(k)) == 0 {
+			rep = append(rep, k)
+		}
+	}
+
+	return rep
+}
+
+func (g *DeBruijnGraph) MaxTerminal() (uint64, int, bool) {
+	rep := uint64(0)
+	max := uint(0)
+	found := false
+	for k, w := range g.graph {
+		if len(g.Nexts(k)) == 0 && w > max {
+			rep = k
+			max = w
+			found = true
+		}
+	}
+
+	return rep, int(max), found
+}
+
 func (g *DeBruijnGraph) MaxPath() []uint64 {
 	path := make([]uint64, 0, 1000)
 	ok := false
@@ -318,7 +369,11 @@ func (g *DeBruijnGraph) LongestConsensus(id string, min_cov float64) (*obiseq.Bi
 		return nil, fmt.Errorf("graph is empty")
 	}
 	//path := g.LongestPath(max_length)
-	path := g.HaviestPath()
+	path, err := g.HaviestPath(nil, nil, false)
+
+	if err != nil {
+		return nil, err
+	}

 	spath := path

@@ -481,7 +536,7 @@ func (graph *DeBruijnGraph) append(sequence []byte, current uint64, weight int)
 	}

 	current <<= 2
-	current &= graph.kmermask
+	current |= graph.kmermask
 	b := iupac[sequence[0]]
 	current |= b[0]
 	graph.graph[current] = uint(graph.Weight(current) + weight)
@@ -495,6 +550,36 @@ func (graph *DeBruijnGraph) append(sequence []byte, current uint64, weight int)
 	}
 }

+// func (graph *DeBruijnGraph) search(current uint64, extension []byte, path []uint64, error,errormax int) ([]uint64,error) {
+
+// 	path = append(path, current)
+
+// 	if len(extension) == 0 {
+// 		return path,nil
+// 	}
+
+// 	current <<= 2
+// 	current &= graph.kmermask
+// 	b := iupac[extension[0]]
+
+// 	newPath := path
+// 	if len(b) > 1 {
+// 		newPath = slices.Clone(path)
+// 	}
+
+// 	current |= b[0]
+
+// 	_, ok := graph.graph[current]
+// 	if ok {
+// 		newPath = append(newPath, current)
+// 	}
+// 	rep, err := graph.search(current, extension[1:], newPath, error,errormax)
+// 	if err != nil {
+// 		return path,err
+// 	}
+
+// }
+
 // Push appends a BioSequence to the DeBruijnGraph.
 //
 // Parameters:
@@ -523,6 +608,7 @@ func (graph *DeBruijnGraph) Push(sequence *obiseq.BioSequence) {
 				initFirstKmer(start+1, key)
 			}
 		} else {
+			key |= graph.kmermask
 			graph.graph[key] = uint(graph.Weight(key) + w)
 			graph.append(s[graph.kmersize:], key, w)
 		}
@@ -533,6 +619,110 @@ func (graph *DeBruijnGraph) Push(sequence *obiseq.BioSequence) {
 	}
 }

+func (graph *DeBruijnGraph) search(sequence []byte, mismatch, errormax int) []uint64 {
+	var initFirstKmer func(start int, key uint64) []uint64
+
+	initFirstKmer = func(start int, key uint64) []uint64 {
+		if start == graph.kmersize {
+			key |= graph.kmermask
+			if _, ok := graph.graph[key]; ok {
+				return []uint64{key}
+			} else {
+				return []uint64{}
+			}
+		}
+
+		keys := make([]uint64, 0, 1000)
+
+		if start == 0 {
+			key = 0
+		}
+
+		key <<= 2
+		b := iupac[sequence[start]]
+
+		for _, code := range b {
+			key &= ^uint64(3)
+			key |= code
+			keys = append(keys, initFirstKmer(start+1, key)...)
+		}
+
+		// w := []string{}
+		// for _, k := range keys {
+		// 	w = append(w, graph.DecodeNode(k))
+		// }
+		// // log.Warnf("For %s found %d matches : %v", sequence, len(keys), w)
+
+		return keys
+	}
+
+	rep := initFirstKmer(0, 0)
+
+	return rep
+}
+
+func (graph *DeBruijnGraph) Search(sequence *obiseq.BioSequence, errormax int) []uint64 {
+
+	s := sequence.Sequence() // Get the sequence as a byte slice
+
+	if len(s) < graph.KmerSize() {
+		s = slices.Clone(s)
+		for len(s) < graph.KmerSize() {
+			s = append(s, 'n')
+		}
+	}
+
+	log.Warnf("searching for %s", s)
+	keys := graph.search(s, 0, errormax)
+
+	for mismatch := 1; mismatch <= errormax; mismatch++ {
+		log.Warnf("searching with %d error for %s", mismatch, s)
+		for probe := range IterateOneError(s[0:graph.kmersize]) {
+			keys = append(keys,
+				graph.search(probe, mismatch, errormax)...,
+			)
+		}
+	}
+	keys = obiutils.Unique(keys)
+
+	return keys
+}
+
+func (graph *DeBruijnGraph) BackSearch(sequence *obiseq.BioSequence, errormax int) []uint64 {
+	lkmer := graph.KmerSize()
+
+	s := sequence.Sequence() // Get the sequence as a byte slice
+
+	if len(s) < lkmer {
+		sn := []byte{}
+		ls := len(s)
+		for ls < lkmer {
+			sn = append(sn, 'n')
+			ls++
+		}
+		s = append(sn, s...)
+	} else {
+		s = s[(len(s) - lkmer):]
+	}
+
+	log.Warnf("back-searching for %s", s)
+
+	keys := graph.search(s, 0, errormax)
+
+	for mismatch := 1; mismatch <= errormax; mismatch++ {
+		log.Warnf("searching with %d error for %s", mismatch, s)
+		for probe := range IterateOneError(s[0:graph.kmersize]) {
+			// log.Warnf("searching with %d error for %s", mismatch, probe)
+			keys = append(keys,
+				graph.search(probe, mismatch, errormax)...,
+			)
+		}
+	}
+
+	keys = obiutils.Unique(keys)
+	return keys
+}
+
 func (graph *DeBruijnGraph) Gml() string {
 	buffer := bytes.NewBuffer(make([]byte, 0, 1000))

@@ -614,7 +804,7 @@ func (graph *DeBruijnGraph) WriteGml(filename string) error {
 func (g *DeBruijnGraph) HammingDistance(kmer1, kmer2 uint64) int {
 	ident := ^((kmer1 & kmer2) | (^kmer1 & ^kmer2))
 	ident |= (ident >> 1)
-	ident &= 0x5555555555555555 & g.kmermask
+	ident &= 0x5555555555555555 & ^g.kmermask
 	return bits.OnesCount64(ident)
 }

@@ -638,11 +828,23 @@ func (h *UInt64Heap) Pop() any {
 	return x
 }

-func (g *DeBruijnGraph) HaviestPath() []uint64 {
+func (g *DeBruijnGraph) HaviestPath(starts, stops []uint64, backPath bool) ([]uint64, error) {

-	if g.HasCycle() {
-		return nil
+	// if g.HasCycle() {
+	// 	return nil, fmt.Errorf("graph has a cycle")
+	// }
+
+	following := g.Nexts
+
+	if backPath {
+		following = g.Previouses
 	}
+
+	stopNodes := make(map[uint64]bool, len(stops))
+	for _, n := range stops {
+		stopNodes[n] = true
+	}
+
 	// Initialize the distance array and visited set
 	distances := make(map[uint64]int)
 	visited := make(map[uint64]bool)
@@ -654,7 +856,11 @@ func (g *DeBruijnGraph) HaviestPath() []uint64 {
 	heap.Init(queue)

 	startNodes := make(map[uint64]struct{})
-	for _, n := range g.Heads() {
+	if starts == nil {
+		starts = g.Heads()
+	}
+
+	for _, n := range starts {
 		startNodes[n] = struct{}{}
 		heap.Push(queue, n)
 		distances[n] = g.Weight(n)
@@ -686,7 +892,11 @@ func (g *DeBruijnGraph) HaviestPath() []uint64 {
 			log.Warn("current node is 0")
 		}
 		// Update the distance of the neighbors
-		nextNodes := g.Nexts(currentNode)
+
+		nextNodes := following(currentNode)
+		if _, ok := stopNodes[currentNode]; ok {
+			nextNodes = []uint64{}
+		}
 		for _, nextNode := range nextNodes {
 			if nextNode == 0 {
 				log.Warn("next node is 0")
@@ -718,16 +928,178 @@ func (g *DeBruijnGraph) HaviestPath() []uint64 {
 	}

 	if slices.Contains(heaviestPath, currentNode) {
-		log.Panicf("Cycle detected %v -> %v (%v) len(%v), graph: %v", heaviestPath, currentNode, startNodes, len(heaviestPath), g.Len())
-		return nil
+		return nil, fmt.Errorf("cycle detected in heaviest path")
 	}

 	heaviestPath = append(heaviestPath, currentNode)

 	// Reverse the path
-	slices.Reverse(heaviestPath)
+	if !backPath {
+		slices.Reverse(heaviestPath)
+	}

-	return heaviestPath
+	return heaviestPath, nil
+}
+
+func (g *DeBruijnGraph) HaviestPathDSU(starts, stops []uint64, backPath bool) ([]uint64, error) {
+	// Collect and sort edges
+	type Edge struct {
+		weight float64
+		u, v   uint64
+	}
+	edges := make([]Edge, 0)
+
+	// Function to get next nodes (either Nexts or Previouses based on backPath)
+	following := g.Nexts
+	previouses := g.Previouses
+	if backPath {
+		following = g.Previouses
+		previouses = g.Nexts
+	}
+
+	// Collect all edges
+	for u := range g.graph {
+		for _, v := range following(u) {
+			edges = append(edges, Edge{
+				weight: float64(min(g.Weight(u), g.Weight(v))),
+				u:      u,
+				v:      v,
+			})
+		}
+	}
+
+	// Sort edges by weight in descending order
+	sort.Slice(edges, func(i, j int) bool {
+		return edges[i].weight > edges[j].weight
+	})
+
+	// Initialize disjoint set data structure
+	parent := make(map[uint64]uint64)
+	for u := range g.graph {
+		parent[u] = u
+	}
+
+	// Find with path compression
+	var find func(uint64) uint64
+	find = func(node uint64) uint64 {
+		if parent[node] != node {
+			parent[node] = find(parent[node])
+		}
+		return parent[node]
+	}
+
+	// Union function that returns true if cycle is detected
+	union := func(u, v uint64) bool {
+		rootU := find(u)
+		rootV := find(v)
+		if rootU == rootV {
+			return true // Cycle detected
+		}
+		parent[rootV] = rootU
+		return false
+	}
+
+	// If no specific starts provided, use graph heads
+	if starts == nil {
+		if !backPath {
+			starts = g.Heads()
+		} else {
+			starts = g.Terminals()
+		}
+	}
+
+	// If no specific stops provided, use graph terminals
+	if stops == nil {
+		if !backPath {
+			stops = g.Terminals()
+		} else {
+			stops = g.Heads()
+		}
+	}
+
+	// Convert stops to a map for O(1) lookup
+	stopNodes := make(map[uint64]bool)
+	for _, stop := range stops {
+		stopNodes[stop] = false
+	}
+
+	var path []uint64
+	maxCapacity := math.Inf(-1)
+	stopEdge := []Edge{}
+
+	// Process edges in descending order of weight
+	for _, edge := range edges {
+		if stopNodes[edge.u] {
+			continue // Skip edges from stop nodes
+		}
+
+		if in, ok := stopNodes[edge.v]; ok {
+			if !in {
+				stopEdge = append(stopEdge, edge)
+				stopNodes[edge.v] = true
+			}
+		}
+
+		if union(edge.u, edge.v) {
+			continue // Skip if creates cycle
+		}
+
+		pathFound := false
+		for _, sedge := range stopEdge {
+			// Check if any start-stop pair is connected
+			fv := find(sedge.v)
+			for _, s := range starts {
+				fs := find(s)
+				//				log.Warnf("Start: %d, Stop: %d", fs, fv)
+				if fs == fv {
+					pathFound = true
+					maxCapacity = edge.weight
+
+					// Reconstruct path
+					current := sedge.v
+					path = []uint64{current}
+					for current != s {
+						oldcurrent := current
+						//						log.Warnf("Start: %d, Current: %d, Previous: %v", s, current, previouses(current))
+						for _, prev := range previouses(current) {
+							if find(prev) == fs {
+								path = append(path, prev)
+								current = prev
+								break
+							}
+						}
+						if current == oldcurrent {
+							log.Fatalf("We are stuck")
+						}
+
+					}
+					//					log.Warnf("Built path: %v", path)
+					break
+				}
+			}
+			if pathFound {
+				break
+			}
+		}
+		if pathFound {
+			break
+		}
+	}
+
+	//	log.Warnf("Stop edge: %v", stopEdge)
+
+	// Process edges in descending order of weight
+
+	if path == nil {
+		return nil, fmt.Errorf("no valid path found")
+	}
+
+	if !backPath {
+		slices.Reverse(path)
+	}
+	log.Warnf("Max capacity: %5.0f: %v", maxCapacity, g.DecodePath(path))
+
+	return path, nil
 }

 func (g *DeBruijnGraph) HasCycle() bool {
@@ -765,3 +1137,59 @@ func (g *DeBruijnGraph) HasCycle() bool {
 	}
 	return false
 }
+
+// HasCycleInDegree détecte la présence d'un cycle dans le graphe en utilisant la méthode des degrés entrants.
+// Cette méthode est basée sur le tri topologique : si on ne peut pas trier tous les nœuds,
+// alors il y a un cycle.
+//
+// Returns:
+// - bool: true si le graphe contient un cycle, false sinon
+func (g *DeBruijnGraph) HasCycleInDegree() bool {
+	// Créer une map pour stocker les degrés entrants de chaque nœud
+	inDegree := make(map[uint64]int)
+
+	// Initialiser les degrés entrants à 0 pour tous les nœuds
+	for node := range g.graph {
+		inDegree[node] = 0
+	}
+
+	// Calculer les degrés entrants
+	for node := range g.graph {
+		for _, next := range g.Nexts(node) {
+			inDegree[next]++
+		}
+	}
+
+	// Créer une deque pour stocker les nœuds avec un degré entrant de 0
+	queue := deque.Deque[uint64]{}
+
+	// Ajouter tous les nœuds avec un degré entrant de 0 à la deque
+	for node := range g.graph {
+		if inDegree[node] == 0 {
+			queue.PushBack(node)
+		}
+	}
+
+	visited := 0 // Compteur de nœuds visités
+
+	// Parcours BFS
+	for queue.Len() > 0 {
+		// Retirer le premier nœud de la deque
+		node, _ := queue.PopFront()
+		visited++
+
+		// Pour chaque nœud adjacent
+		for _, next := range g.Nexts(node) {
+			// Réduire son degré entrant
+			inDegree[next]--
+
+			// Si le degré entrant devient 0, l'ajouter à la deque
+			if inDegree[next] == 0 {
+				queue.PushBack(next)
+			}
+		}
+	}
+
+	// S'il y a un cycle, on n'aura pas pu visiter tous les nœuds
+	return visited != len(g.graph)
+}
--- a/pkg/obikmer/encodefourmer.go
+++ b/pkg/obikmer/encodefourmer.go
@@ -2,7 +2,6 @@ package obikmer

 import (
 	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
-	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
 )

 var __single_base_code__ = []byte{0,
@@ -132,39 +131,33 @@ func FastShiftFourMer(index [][]int, shifts *map[int]int, lindex int, seq *obise
 	maxshift := 0
 	maxcount := 0
 	maxscore := -1.0
-	maxrelscore := -1.0

 	for shift, count := range *shifts {
 		delete((*shifts), shift)
-		selectscore := float64(count)
-		relativescore := float64(count)
-		over := -shift
-		switch {
-		case shift > 0:
-			over += lindex
-		case shift < 0:
-			over = seq.Len() - over
-		default:
-			over = min(lindex, seq.Len())
-		}
-		relativescore = relativescore / float64(over-3)
+		score := float64(count)
 		if relscore {
-			selectscore = relativescore
+			over := -shift
+			switch {
+			case shift > 0:
+				over += lindex
+			case shift < 0:
+				over = seq.Len() - over
+			default:
+				over = min(lindex, seq.Len())
+			}
+			score = score / float64(over-3)
 		}
-
-		if selectscore > maxscore {
+		if score > maxscore {
 			maxshift = shift
 			maxcount = count
-			maxscore = selectscore
-			maxrelscore = relativescore
+			maxscore = score
 		} else {
-			if selectscore == maxscore && obiutils.Abs(shift) < obiutils.Abs(maxshift) {
+			if score == maxscore && shift < maxshift {
 				maxshift = shift
 				maxcount = count
-				maxrelscore = relativescore
 			}
 		}
 	}

-	return maxshift, maxcount, maxrelscore
+	return maxshift, maxcount, maxscore
 }
--- a/pkg/obikmer/oneerror.go
+++ b/pkg/obikmer/oneerror.go
@@ -0,0 +1,45 @@
+package obikmer
+
+import (
+	"iter"
+	"slices"
+)
+
+var baseError = map[byte]byte{
+	'a': 'b',
+	'c': 'd',
+	'g': 'h',
+	't': 'v',
+	'r': 'y',
+	'y': 'r',
+	's': 'w',
+	'w': 's',
+	'k': 'm',
+	'm': 'k',
+	'd': 'c',
+	'v': 't',
+	'h': 'g',
+	'b': 'a',
+}
+
+type BytesItem []byte
+
+func IterateOneError(kmer []byte) iter.Seq[BytesItem] {
+	lkmer := len(kmer)
+	return func(yield func(BytesItem) bool) {
+		for p := 0; p < lkmer; p++ {
+			for p < lkmer && kmer[p] == 'n' {
+				p++
+			}
+
+			if p < lkmer {
+				nkmer := slices.Clone(kmer)
+				nkmer[p] = baseError[kmer[p]]
+				if !yield(nkmer) {
+					return
+				}
+			}
+		}
+	}
+
+}
--- a/pkg/obioptions/version.go
+++ b/pkg/obioptions/version.go
@@ -8,7 +8,7 @@ import (
 // corresponds to the last commit, and not the one when the file will be
 // commited

-var _Commit = "573acaf"
+var _Commit = "4774438"
 var _Version = "Release 4.2.0"

 // Version returns the version of the obitools package.
--- a/pkg/obiseq/paired_reads.go
+++ b/pkg/obiseq/paired_reads.go
@@ -25,7 +25,7 @@ func (s *BioSequence) UnPair() {
 }

 func (s *BioSequenceSlice) IsPaired() bool {
-	return (*s)[0].paired != nil
+	return s != nil && s.Len() > 0 && (*s)[0].paired != nil
 }

 func (s *BioSequenceSlice) PairedWith() *BioSequenceSlice {
--- a/pkg/obiseq/predicate.go
+++ b/pkg/obiseq/predicate.go
@@ -196,16 +196,6 @@ func IsShorterOrEqualTo(length int) SequencePredicate {
 	return f
 }

-func OccurInAtleast(sample string, n int) SequencePredicate {
-	desc := MakeStatsOnDescription(sample)
-	f := func(sequence *BioSequence) bool {
-		stats := sequence.StatsOn(desc, "NA")
-		return len(stats) >= n
-	}
-
-	return f
-}
-
 func IsSequenceMatch(pattern string) SequencePredicate {
 	pat, err := regexp.Compile("(?i)" + pattern)

--- a/pkg/obiseq/taxonomy_classifier.go
+++ b/pkg/obiseq/taxonomy_classifier.go
@@ -31,7 +31,7 @@ func TaxonomyClassifier(taxonomicRank string,
 		if taxon != nil {
 			ttaxon := taxon.TaxonAtRank(taxonomicRank)
 			if abortOnMissing && ttaxon == nil {
-				log.Fatalf("Taxon at rank %s not found in taxonomy for taxid %s", taxonomicRank, taxon.String())
+				log.Fatalf("Taxon at rank %s not found in taxonomy for taxid %d", taxonomicRank, taxon.String())
 			}
 		} else {
 			if abortOnMissing {
--- a/pkg/obiseq/taxonomy_predicate.go
+++ b/pkg/obiseq/taxonomy_predicate.go
@@ -25,7 +25,7 @@ func IsAValidTaxon(taxonomy *obitax.Taxonomy, withAutoCorrection ...bool) Sequen
 				if autocorrection {
 					sequence.SetTaxid(ttaxid)
 					log.Printf(
-						"Sequence %s : Taxid %s updated with %s",
+						"Sequence %s : Taxid %d updated with %d",
 						sequence.Id(),
 						taxid,
 						ttaxid,
--- a/pkg/obitools/obiclean/chimera.go
+++ b/pkg/obitools/obiclean/chimera.go
@@ -1,126 +0,0 @@
-package obiclean
-
-import (
-	"fmt"
-	"sort"
-
-	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
-	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
-	log "github.com/sirupsen/logrus"
-)
-
-func commonPrefix(a, b *obiseq.BioSequence) int {
-	i := 0
-	l := min(a.Len(), b.Len())
-
-	if l == 0 {
-		return 0
-	}
-	as := a.Sequence()
-	bs := b.Sequence()
-
-	for i < l && as[i] == bs[i] {
-		i++
-	}
-
-	if obiutils.UnsafeString(as[:i]) != obiutils.UnsafeString(bs[:i]) {
-		log.Fatalf("i: %d, j: %d (%s/%s)", i, i, as[:i], bs[:i])
-	}
-
-	return i
-}
-
-func commonSuffix(a, b *obiseq.BioSequence) int {
-	i := a.Len() - 1
-	j := b.Len() - 1
-
-	if i < 0 || j < 0 {
-		return 0
-	}
-
-	as := a.Sequence()
-	bs := b.Sequence()
-
-	l := 0
-	for i >= 0 && j >= 0 && as[i] == bs[j] {
-		i--
-		j--
-		l++
-	}
-
-	if obiutils.UnsafeString(as[i+1:]) != obiutils.UnsafeString(bs[j+1:]) {
-		log.Fatalf("i: %d, j: %d (%s/%s)", i, j, as[i+1:], bs[j+1:])
-	}
-	// log.Warnf("i: %d, j: %d (%s)", i, j, as[i+1:])
-
-	return l
-}
-
-func AnnotateChimera(samples map[string]*[]*seqPCR) {
-
-	w := func(sample string, seqs *[]*seqPCR) {
-		ls := len(*seqs)
-		cp := make([]int, ls)
-		cs := make([]int, ls)
-
-		pcrs := make([]*seqPCR, 0, ls)
-
-		for _, s := range *seqs {
-			if len(s.Edges) == 0 {
-				pcrs = append(pcrs, s)
-			}
-		}
-
-		lp := len(pcrs)
-
-		sort.Slice(pcrs, func(i, j int) bool {
-			return pcrs[i].Weight < pcrs[j].Weight
-		})
-
-		for i, s := range pcrs {
-			for j := i + 1; j < lp; j++ {
-				s2 := pcrs[j]
-				cp[j] = commonPrefix(s.Sequence, s2.Sequence)
-				cs[j] = commonSuffix(s.Sequence, s2.Sequence)
-			}
-
-			var cm map[string]string
-			var err error
-
-			chimera, ok := s.Sequence.GetAttribute("chimera")
-
-			if !ok {
-				cm = map[string]string{}
-			} else {
-				cm, err = obiutils.InterfaceToStringMap(chimera)
-				if err != nil {
-					log.Fatalf("type of chimera not map[string]string: %T (%v)",
-						chimera, err)
-				}
-			}
-
-			ls := s.Sequence.Len()
-
-			for k := i + 1; k < lp; k++ {
-				for l := i + 1; l < lp; l++ {
-					if k != l && cp[k]+cs[l] == ls {
-						cm[sample] = fmt.Sprintf("{%s}/{%s}@(%d)",
-							pcrs[k].Sequence.Id(),
-							pcrs[l].Sequence.Id(),
-							cp[k])
-					}
-				}
-			}
-
-			if len(cm) > 0 {
-				s.Sequence.SetAttribute("chimera", cm)
-			}
-		}
-
-	}
-
-	for sn, sqs := range samples {
-		w(sn, sqs)
-	}
-
-}
--- a/pkg/obitools/obiclean/graph.go
+++ b/pkg/obitools/obiclean/graph.go
@@ -13,24 +13,23 @@ import (
 	log "github.com/sirupsen/logrus"

 	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obialign"
-	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
 	"github.com/schollz/progressbar/v3"
 )

 type Ratio struct {
-	Sample         string
-	SeqID          string
-	OriginalStatus string
-	WOriginal      int
-	WMutant        int
-	COriginal      int
-	CMutant        int
-	Pos            int
-	Length         int
-	A              int
-	C              int
-	G              int
-	T              int
+	Sample string
+	SeqID  string
+	status string
+	From   int
+	To     int
+	CFrom  int
+	CTo    int
+	Pos    int
+	Length int
+	A      int
+	C      int
+	G      int
+	T      int
 }

 type Edge struct {
@@ -53,21 +52,45 @@ func makeEdge(father, dist, pos int, from, to byte) Edge {
 	}
 }

+func abs(x int) int {
+	if x < 0 {
+		return -x
+	}
+	return x
+}
+
+func max(x, y int) int {
+	if x > y {
+		return x
+	}
+	return y
+}
+
+func min(x, y int) int {
+	if x < y {
+		return x
+	}
+	return y
+}
+
+func minMax(x, y int) (int, int) {
+	if x < y {
+		return x, y
+	}
+	return y, x
+
+}
+
 // It takes a filename and a 2D slice of floats pruduced during graph building,
 // and writes a CSV file with the first column being the
 // first nucleotide, the second column being the second nucleotide, and the third column being the
 // ratio
-func EmpiricalDistCsv(filename string, data [][]Ratio, compressed bool) {
+func EmpiricalDistCsv(filename string, data [][]Ratio) {
 	file, err := os.Create(filename)
 	if err != nil {
 		fmt.Println(err)
 	}
-
-	destfile, err := obiutils.CompressStream(file, true, true)
-	if err != nil {
-		fmt.Println(err)
-	}
-	defer destfile.Close()
+	defer file.Close()

 	pbopt := make([]progressbar.Option, 0, 5)
 	pbopt = append(pbopt,
@@ -80,19 +103,19 @@ func EmpiricalDistCsv(filename string, data [][]Ratio, compressed bool) {

 	bar := progressbar.NewOptions(len(data), pbopt...)

-	fmt.Fprintln(destfile, "Sample,Origin_id,Origin_status,Origin,Mutant,Origin_Weight,Mutant_Weight,Origin_Count,Mutant_Count,Position,Origin_length,A,C,G,T")
+	fmt.Fprintln(file, "Sample,Father_id,Father_status,From,To,Weight_from,Weight_to,Count_from,Count_to,Position,length,A,C,G,T")
 	for code, dist := range data {
 		a1, a2 := intToNucPair(code)
 		for _, ratio := range dist {
-			fmt.Fprintf(destfile, "%s,%s,%s,%c,%c,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d\n",
+			fmt.Fprintf(file, "%s,%s,%s,%c,%c,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d\n",
 				ratio.Sample,
 				ratio.SeqID,
-				ratio.OriginalStatus,
+				ratio.status,
 				a1, a2,
-				ratio.WOriginal,
-				ratio.WMutant,
-				ratio.COriginal,
-				ratio.CMutant,
+				ratio.From,
+				ratio.To,
+				ratio.CFrom,
+				ratio.CTo,
 				ratio.Pos,
 				ratio.Length,
 				ratio.A,
@@ -455,20 +478,16 @@ func EstimateRatio(samples map[string]*[]*seqPCR, minStatRatio int) [][]Ratio {
 				if father.Weight >= minStatRatio && edge.Dist == 1 {
 					s := father.Sequence.Sequence()
 					ratio[edge.NucPair] = append(ratio[edge.NucPair],
-						Ratio{
-							Sample:         name,
-							SeqID:          father.Sequence.Id(),
-							OriginalStatus: Status(father.Sequence)[name],
-							WOriginal:      father.Weight,
-							WMutant:        seq.Weight,
-							COriginal:      father.Count,
-							CMutant:        seq.Count,
-							Pos:            edge.Pos,
-							Length:         father.Sequence.Len(),
-							A:              bytes.Count(s, []byte("a")),
-							C:              bytes.Count(s, []byte("c")),
-							G:              bytes.Count(s, []byte("g")),
-							T:              bytes.Count(s, []byte("t"))})
+						Ratio{name,
+							father.Sequence.Id(), Status(father.Sequence)[name],
+							father.Weight, seq.Weight,
+							father.Count, seq.Count,
+							edge.Pos,
+							father.Sequence.Len(),
+							bytes.Count(s, []byte("a")),
+							bytes.Count(s, []byte("c")),
+							bytes.Count(s, []byte("g")),
+							bytes.Count(s, []byte("t"))})
 				}
 			}

--- a/pkg/obitools/obiclean/obiclean.go
+++ b/pkg/obitools/obiclean/obiclean.go
@@ -2,7 +2,6 @@ package obiclean

 import (
 	"fmt"
-	"maps"
 	"os"

 	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
@@ -20,7 +19,6 @@ type seqPCR struct {
 	Sequence  *obiseq.BioSequence // pointer to the corresponding sequence
 	SonCount  int
 	AddedSons int
-	IsHead    bool
 	Edges     []Edge
 	Cluster   map[int]bool // used as the set of head sequences associated to that sequence
 }
@@ -52,7 +50,6 @@ func buildSamples(dataset obiseq.BioSequenceSlice,
 				Sequence:  s,
 				SonCount:  0,
 				AddedSons: 0,
-				IsHead:    false,
 			})
 		}
 	}
@@ -60,7 +57,9 @@ func buildSamples(dataset obiseq.BioSequenceSlice,
 	return samples
 }

-func annotateOBIClean(source string, dataset obiseq.BioSequenceSlice) obiiter.IBioSequence {
+func annotateOBIClean(source string, dataset obiseq.BioSequenceSlice,
+	sample map[string]*([]*seqPCR),
+	tag, NAValue string) obiiter.IBioSequence {
 	batchsize := 1000
 	var annot = func(data obiseq.BioSequenceSlice) (obiseq.BioSequenceSlice, error) {

@@ -115,28 +114,6 @@ func IsHead(sequence *obiseq.BioSequence) bool {
 	return ishead
 }

-func NotAlwaysChimera(tag string) obiseq.SequencePredicate {
-	descriptor := obiseq.MakeStatsOnDescription(tag)
-	predicat := func(sequence *obiseq.BioSequence) bool {
-
-		chimera, ok := sequence.GetStringMap("chimera")
-		if !ok || len(chimera) == 0 {
-			return true
-		}
-		samples := maps.Keys(sequence.StatsOn(descriptor, "NA"))
-
-		for s := range samples {
-			if _, ok := chimera[s]; !ok {
-				return true
-			}
-		}
-
-		return false
-	}
-
-	return predicat
-}
-
 func HeadCount(sequence *obiseq.BioSequence) int {
 	var err error
 	annotation := sequence.Annotations()
@@ -260,7 +237,6 @@ func Mutation(sample map[string]*([]*seqPCR)) {
 }

 func Status(sequence *obiseq.BioSequence) map[string]string {
-	var err error
 	annotation := sequence.Annotations()
 	iobistatus, ok := annotation["obiclean_status"]
 	var obistatus map[string]string
@@ -270,9 +246,9 @@ func Status(sequence *obiseq.BioSequence) map[string]string {
 		case map[string]string:
 			obistatus = iobistatus
 		case map[string]interface{}:
-			obistatus, err = obiutils.InterfaceToStringMap(obistatus)
-			if err != nil {
-				log.Panicf("obiclean_status attribute of sequence %s must be castable to a map[string]string", sequence.Id())
+			obistatus = make(map[string]string)
+			for k, v := range iobistatus {
+				obistatus[k] = fmt.Sprint(v)
 			}
 		}
 	} else {
@@ -380,30 +356,19 @@ func CLIOBIClean(itertator obiiter.IBioSequence) obiiter.IBioSequence {
 		}
 	}

-	if DetectChimera() {
-		AnnotateChimera(samples)
-	}
-
 	if SaveGraphToFiles() {
 		SaveGMLGraphs(GraphFilesDirectory(), samples, MinCountToEvalMutationRate())
 	}

 	if IsSaveRatioTable() {
 		all_ratio := EstimateRatio(samples, MinCountToEvalMutationRate())
-		EmpiricalDistCsv(RatioTableFilename(), all_ratio, obidefault.CompressOutput())
+		EmpiricalDistCsv(RatioTableFilename(), all_ratio)
 	}

-	iter := annotateOBIClean(source, db)
+	iter := annotateOBIClean(source, db, samples, SampleAttribute(), "NA")

 	if OnlyHead() {
-		iter = iter.FilterOn(IsHead,
-			obidefault.BatchSize()).FilterOn(NotAlwaysChimera(SampleAttribute()),
-			obidefault.BatchSize())
-	}
-
-	if MinSampleCount() > 1 {
-		sc := obiseq.OccurInAtleast(SampleAttribute(), MinSampleCount())
-		iter = iter.FilterOn(sc, obidefault.BatchSize())
+		iter = iter.FilterOn(IsHead, 1000)
 	}

 	return iter
--- a/pkg/obitools/obiclean/options.go
+++ b/pkg/obitools/obiclean/options.go
@@ -16,8 +16,6 @@ var _onlyHead = false

 var _saveGraph = "__@@NOSAVE@@__"
 var _saveRatio = "__@@NOSAVE@@__"
-var _minSample = 1
-var _detectChimera = false

 func ObicleanOptionSet(options *getoptions.GetOpt) {
 	options.StringVar(&_sampleAttribute, "sample", _sampleAttribute,
@@ -57,13 +55,6 @@ func ObicleanOptionSet(options *getoptions.GetOpt) {
 			"The ratio file follows the csv format."),
 	)

-	options.IntVar(&_minSample, "min-sample-count", _minSample,
-		options.Description("Minimum number of samples a sequence must be present in to be considered in the analysis."),
-	)
-
-	options.BoolVar(&_detectChimera, "detect-chimera", _detectChimera,
-		options.Description("Detect chimera sequences."),
-	)
 }

 func OptionSet(options *getoptions.GetOpt) {
@@ -120,13 +111,3 @@ func IsSaveRatioTable() bool {
 func RatioTableFilename() string {
 	return _saveRatio
 }
-
-// It returns the minimum number of samples a sequence must be present in to be considered in the analysis
-func MinSampleCount() int {
-	return _minSample
-}
-
-// It returns true if chimera detection is enabled
-func DetectChimera() bool {
-	return _detectChimera
-}
--- a/pkg/obitools/obiconvert/options.go
+++ b/pkg/obitools/obiconvert/options.go
@@ -31,6 +31,7 @@ var __output_fastjson_format__ = false
 var __output_fastobi_format__ = false

 var __no_progress_bar__ = false
+var __compressed__ = false
 var __skip_empty__ = false

 var __output_file_name__ = "-"
--- a/pkg/obitools/obiconvert/sequence_writer.go
+++ b/pkg/obitools/obiconvert/sequence_writer.go
@@ -21,7 +21,7 @@ func BuildPairedFileNames(filename string) (string, string) {
 	forward := parts[0] + "_R1"
 	reverse := parts[0] + "_R2"

-	if len(parts) > 1 && parts[1] != "" {
+	if parts[1] != "" {
 		suffix := "." + parts[1]
 		forward += suffix
 		reverse += suffix
--- a/pkg/obitools/obigrep/options.go
+++ b/pkg/obitools/obigrep/options.go
@@ -6,7 +6,6 @@ import (
 	log "github.com/sirupsen/logrus"

 	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiapat"
-	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
 	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
 	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
 	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
@@ -17,7 +16,6 @@ import (
 var _BelongTaxa = make([]string, 0)
 var _NotBelongTaxa = make([]string, 0)
 var _RequiredRanks = make([]string, 0)
-var _ValidateTaxonomy = false

 var _MinimumLength = 1
 var _MaximumLength = int(2e9)
@@ -64,9 +62,6 @@ func TaxonomySelectionOptionSet(options *getoptions.GetOpt) {
 		options.ArgName("RANK_NAME"),
 		options.Description("Select sequences belonging a taxon with a rank <RANK_NAME>"))

-	options.BoolVar(&_ValidateTaxonomy, "valid-taxid", _ValidateTaxonomy,
-		options.Description("Validate the taxonomic classification of the sequences."))
-
 }

 func SequenceSelectionOptionSet(options *getoptions.GetOpt) {
@@ -276,27 +271,6 @@ func CLIRestrictTaxonomyPredicate() obiseq.SequencePredicate {
 	return nil
 }

-func CLIIsValidTaxonomyPredicate() obiseq.SequencePredicate {
-	if _ValidateTaxonomy {
-		if !obidefault.HasSelectedTaxonomy() {
-			log.Fatal("Taxonomy not found")
-		}
-		taxonomy := obitax.DefaultTaxonomy()
-		if taxonomy == nil {
-			log.Fatal("Taxonomy not found")
-		}
-
-		predicat := func(sequences *obiseq.BioSequence) bool {
-			taxon := sequences.Taxon(taxonomy)
-			return taxon != nil
-		}
-
-		return predicat
-	}
-
-	return nil
-}
-
 func CLIAvoidTaxonomyPredicate() obiseq.SequencePredicate {
 	var p obiseq.SequencePredicate
 	var p2 obiseq.SequencePredicate
@@ -345,7 +319,7 @@ func CLIHasRankDefinedPredicate() obiseq.SequencePredicate {
 }

 func CLITaxonomyFilterPredicate() obiseq.SequencePredicate {
-	return CLIIsValidTaxonomyPredicate().And(CLIAvoidTaxonomyPredicate()).And(CLIHasRankDefinedPredicate()).And(CLIRestrictTaxonomyPredicate())
+	return CLIHasRankDefinedPredicate().And(CLIRestrictTaxonomyPredicate()).And(CLIAvoidTaxonomyPredicate())
 }

 func CLIPredicatesPredicate() obiseq.SequencePredicate {
--- a/pkg/obitools/obimicroasm/microasm.go
+++ b/pkg/obitools/obimicroasm/microasm.go
@@ -0,0 +1,520 @@
+package obimicroasm
+
+import (
+	"fmt"
+	"os"
+	"path"
+	"slices"
+
+	log "github.com/sirupsen/logrus"
+
+	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiapat"
+	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats"
+	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
+	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obikmer"
+	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
+	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obisuffix"
+)
+
+func BuildFilterOnPatternReadPairWorker(
+	forward, reverse string,
+	errormax int,
+	cutReads bool,
+) obiseq.SeqWorker {
+	forwardPatternDir, err := obiapat.MakeApatPattern(forward, errormax, false)
+
+	if err != nil {
+		log.Fatalf("Cannot compile forward primer %s : %v", forward, err)
+	}
+
+	reverse_rev := obiseq.NewBioSequence("fp", []byte(reverse), "").ReverseComplement(true).String()
+	reveresePatternRev, err := obiapat.MakeApatPattern(reverse_rev, errormax, false)
+
+	if err != nil {
+		log.Fatalf("Cannot compile reverse complement reverse primer %s : %v", reverse, err)
+	}
+
+	matchRead := func(sequence *obiseq.BioSequence) *obiseq.BioSequence {
+		var aseq obiapat.ApatSequence
+		var err error
+		var read, match *obiseq.BioSequence
+
+		aseq, err = obiapat.MakeApatSequence(sequence, false)
+
+		if err != nil {
+			log.Fatalf("Cannot prepare apat sequence from %s : %v", sequence.Id(), err)
+		}
+
+		start, end, nerr, matched := forwardPatternDir.BestMatch(aseq, 0, aseq.Len())
+
+		if matched {
+			read = sequence
+
+			if cutReads {
+				read, err = sequence.Subsequence(start, sequence.Len(), false)
+
+				if err != nil {
+					log.Fatalf("Cannot cut, on forward, forward read %s [%d,%d] : %v",
+						sequence.Id(), start, sequence.Len(), err)
+				}
+			}
+
+			read.SetAttribute("forward_primer", forward)
+			match, _ = sequence.Subsequence(start, end, false)
+			read.SetAttribute("forward_match", match.String())
+			read.SetAttribute("forward_error", nerr)
+
+			aseq, err = obiapat.MakeApatSequence(read, false, aseq)
+
+			if err != nil {
+				log.Fatalf("Cannot prepare apat sequence from %s : %v", sequence.Id(), err)
+			}
+
+			start, end, nerr, matched = reveresePatternRev.BestMatch(aseq, 0, aseq.Len())
+
+			if matched {
+
+				frread := read
+
+				if cutReads {
+					frread, err = read.Subsequence(0, end, false)
+
+					if err != nil {
+						log.Fatalf("Cannot xxx cut, on reverse, forward read %s [%d,%d] : %v",
+							sequence.Id(), start, read.Len(), err)
+					}
+				}
+
+				frread.SetAttribute("reverse_primer", reverse)
+				match, _ = read.Subsequence(start, end, false)
+				frread.SetAttribute("reverse_match", match.ReverseComplement(true).String())
+				frread.SetAttribute("reverse_error", nerr)
+
+				read = frread
+				//				log.Warnf("Forward-Reverse primer matched on %s : %d\n%s", read.Id(), read.Len(),
+				//					obiformats.FormatFasta(read, obiformats.FormatFastSeqJsonHeader))
+			}
+
+		} else {
+			start, end, nerr, matched = reveresePatternRev.BestMatch(aseq, 0, aseq.Len())
+
+			if matched {
+				read = sequence
+				if cutReads {
+					read, err = sequence.Subsequence(0, end, false)
+
+					if err != nil {
+						log.Fatalf("Cannot yyy cut, on reverse, forward read %s [%d,%d] : %v",
+							sequence.Id(), 0, end, err)
+					}
+
+				}
+
+				read.SetAttribute("reverse_primer", reverse)
+				match, _ = read.Subsequence(start, end, false)
+				read.SetAttribute("reverse_match", match.ReverseComplement(true).String())
+				read.SetAttribute("reverse_error", nerr)
+			} else {
+				read = nil
+			}
+
+		}
+
+		return read
+	}
+
+	w := func(sequence *obiseq.BioSequence) (result obiseq.BioSequenceSlice, err error) {
+		result = obiseq.MakeBioSequenceSlice()
+
+		paired := sequence.PairedWith()
+		sequence.UnPair()
+
+		read := matchRead(sequence)
+
+		if read == nil {
+			sequence = sequence.ReverseComplement(true)
+			read = matchRead(sequence)
+		}
+
+		if read != nil {
+			result = append(result, read)
+		}
+
+		if paired != nil {
+			read = matchRead(paired)
+
+			if read == nil {
+				read = matchRead(paired.ReverseComplement(true))
+			}
+
+			if read != nil {
+				result = append(result, read)
+			}
+		}
+
+		return
+	}
+
+	return w
+}
+
+func ExtractOnPatterns(iter obiiter.IBioSequence,
+	forward, reverse string,
+	errormax int,
+	cutReads bool,
+) obiseq.BioSequenceSlice {
+
+	matched := iter.MakeIWorker(
+		BuildFilterOnPatternReadPairWorker(forward, reverse, errormax, cutReads),
+		false,
+	)
+
+	rep := obiseq.MakeBioSequenceSlice()
+
+	for matched.Next() {
+		frgs := matched.Get()
+		rep = append(rep, frgs.Slice()...)
+	}
+
+	return rep
+}
+
+func BuildPCRProduct(seqs obiseq.BioSequenceSlice,
+	consensus_id string,
+	kmer_size int,
+	forward, reverse string,
+	backtrack bool,
+	save_graph bool, dirname string) (*obiseq.BioSequence, error) {
+
+	from := obiseq.NewBioSequence("forward", []byte(forward), "")
+	to := obiseq.NewBioSequence("reverse", []byte(CLIReversePrimer()), "").ReverseComplement(true)
+
+	if backtrack {
+		from, to = to, from
+	}
+
+	if seqs.Len() == 0 {
+		return nil, fmt.Errorf("no sequence provided")
+	}
+
+	if save_graph {
+		if dirname == "" {
+			dirname = "."
+		}
+
+		if stat, err := os.Stat(dirname); err != nil || !stat.IsDir() {
+			// path does not exist or is not directory
+			os.RemoveAll(dirname)
+			err := os.Mkdir(dirname, 0755)
+
+			if err != nil {
+				log.Panicf("Cannot create directory %s for saving graphs", dirname)
+			}
+		}
+
+		fasta, err := os.Create(path.Join(dirname, fmt.Sprintf("%s_consensus.fasta", consensus_id)))
+
+		if err == nil {
+			defer fasta.Close()
+			fasta.Write(obiformats.FormatFastaBatch(obiiter.MakeBioSequenceBatch(
+				fmt.Sprintf("%s_consensus", consensus_id),
+				0,
+				seqs,
+			),
+				obiformats.FormatFastSeqJsonHeader, false).Bytes())
+			fasta.Close()
+		}
+
+	}
+
+	log.Debugf("Number of reads : %d\n", len(seqs))
+
+	if kmer_size < 0 {
+		longest := make([]int, len(seqs))
+
+		for i, seq := range seqs {
+			s := obiseq.BioSequenceSlice{seq}
+			sa := obisuffix.BuildSuffixArray(&s)
+			longest[i] = slices.Max(sa.CommonSuffix())
+		}
+
+		// spectrum := map[int]int{}
+		// for _, s := range longest {
+		// 	spectrum[s]++
+		// }
+
+		// log.Warnf("spectum kmer size : %v", spectrum)
+
+		kmer_size = slices.Max(longest) + 1
+		log.Infof("estimated kmer size : %d", kmer_size)
+	}
+
+	var graph *obikmer.DeBruijnGraph
+
+	var hp []uint64
+	var err error
+	var starts []uint64
+	var stops []uint64
+
+	for {
+		graph = obikmer.MakeDeBruijnGraph(kmer_size)
+
+		for _, s := range seqs {
+			graph.Push(s)
+		}
+
+		if !backtrack {
+			starts = graph.Search(from, CLIAllowedMismatch())
+			stops = graph.BackSearch(to, CLIAllowedMismatch())
+		} else {
+			starts = graph.BackSearch(from, CLIAllowedMismatch())
+			stops = graph.Search(to, CLIAllowedMismatch())
+		}
+
+		log.Infof("Found %d starts", len(starts))
+		pweight := map[int]int{}
+		for _, s := range starts {
+			w := graph.Weight(s)
+			pweight[w]++
+			log.Warnf("Starts : %s (%d)\n", graph.DecodeNode(s), w)
+		}
+
+		log.Infof("Found %d stops", len(stops))
+		for _, s := range stops {
+			w := graph.Weight(s)
+			pweight[w]++
+			log.Warnf("Stop : %s (%d)\n", graph.DecodeNode(s), w)
+		}
+
+		log.Infof("Weight spectrum : %v", pweight)
+
+		wmax := 0
+		sw := 0
+		for w := range pweight {
+			sw += w
+			if w > wmax {
+				wmax = w
+			}
+		}
+
+		graph.FilterMinWeight(int(sw / len(pweight)))
+		graph.FilterMaxWeight(int(wmax * 2))
+
+		log.Infof("Minimum coverage : %d", int(sw/len(pweight)))
+		log.Infof("Maximum coverage : %d", int(wmax*2))
+
+		if !graph.HasCycleInDegree() {
+			break
+		}
+
+		kmer_size++
+
+		if kmer_size > 31 {
+			break
+		}
+
+		SetKmerSize(kmer_size)
+		log.Warnf("Cycle detected, increasing kmer size to %d\n", kmer_size)
+	}
+
+	if !backtrack {
+		starts = graph.Search(from, CLIAllowedMismatch())
+		stops = graph.BackSearch(to, CLIAllowedMismatch())
+	} else {
+		starts = graph.BackSearch(from, CLIAllowedMismatch())
+		stops = graph.Search(to, CLIAllowedMismatch())
+	}
+
+	hp, err = graph.HaviestPath(starts, stops, backtrack)
+
+	log.Debugf("Graph size : %d\n", graph.Len())
+
+	maxw := graph.MaxWeight()
+	modew := graph.WeightMode()
+	meanw := graph.WeightMean()
+	specw := graph.WeightSpectrum()
+	kmer := graph.KmerSize()
+
+	log.Warnf("Weigh mode: %d Weigth mean : %4.1f Weigth max : %d, kmer = %d", modew, meanw, maxw, kmer)
+	log.Warn(specw)
+
+	if save_graph {
+
+		file, err := os.Create(path.Join(dirname,
+			fmt.Sprintf("%s_consensus.gml", consensus_id)))
+
+		if err != nil {
+			fmt.Println(err)
+		} else {
+			file.WriteString(graph.Gml())
+			file.Close()
+		}
+	}
+
+	if err == nil {
+		s := graph.DecodePath(hp)
+
+		seq := obiseq.NewBioSequence(consensus_id, []byte(s), "")
+
+		total_kmer := graph.Len()
+		sumCount := 0
+
+		if seq != nil {
+			for _, s := range seqs {
+				sumCount += s.Count()
+			}
+			seq.SetAttribute("obiconsensus_consensus", true)
+			seq.SetAttribute("obiconsensus_weight", sumCount)
+			seq.SetAttribute("obiconsensus_seq_length", seq.Len())
+			seq.SetAttribute("obiconsensus_kmer_size", kmer_size)
+			seq.SetAttribute("obiconsensus_kmer_max_occur", graph.MaxWeight())
+			seq.SetAttribute("obiconsensus_filtered_graph_size", graph.Len())
+			seq.SetAttribute("obiconsensus_full_graph_size", total_kmer)
+		}
+
+		log.Warnf("Consensus sequence : \n%s", obiformats.FormatFasta(seq, obiformats.FormatFastSeqJsonHeader))
+
+		return seq, nil
+
+	}
+
+	return nil, err
+}
+
+func CLIAssemblePCR() *obiseq.BioSequence {
+
+	pairs, err := CLIPairedSequence()
+
+	if err != nil {
+		log.Errorf("Cannot open file (%v)", err)
+		os.Exit(1)
+	}
+
+	matched := ExtractOnPatterns(pairs,
+		CLIForwardPrimer(),
+		CLIReversePrimer(),
+		CLIAllowedMismatch(),
+		true,
+	)
+
+	seq, err := BuildPCRProduct(
+		matched,
+		CLIGraphFilesDirectory(),
+		CLIKmerSize(),
+		CLIForwardPrimer(),
+		CLIReversePrimer(),
+		false,
+		CLISaveGraphToFiles(),
+		CLIGraphFilesDirectory())
+
+	if err != nil {
+		log.Fatalf("Cannot build the consensus sequence : %v", err)
+
+	}
+
+	forwardPatternDir, err := obiapat.MakeApatPattern(
+		CLIForwardPrimer(),
+		CLIAllowedMismatch(),
+		false)
+
+	if err != nil {
+		log.Fatalf("Cannot compile forward primer %s : %v", CLIForwardPrimer(), err)
+	}
+
+	reverse_rev := obiseq.NewBioSequence("fp", []byte(CLIReversePrimer()), "").ReverseComplement(true).String()
+	reveresePatternRev, err := obiapat.MakeApatPattern(reverse_rev, CLIAllowedMismatch(), false)
+
+	if err != nil {
+		log.Fatalf("Cannot compile reverse complement reverse primer %s : %v", CLIReversePrimer(), err)
+	}
+
+	aseq, err := obiapat.MakeApatSequence(seq, false)
+
+	if err != nil {
+		log.Fatalf("Cannot build apat sequence: %v", err)
+	}
+
+	fstart, fend, fnerr, hasfw := forwardPatternDir.BestMatch(aseq, 0, aseq.Len())
+	rstart, rend, rnerr, hasrev := reveresePatternRev.BestMatch(aseq, 0, aseq.Len())
+
+	for hasfw && !hasrev {
+		var rseq *obiseq.BioSequence
+		rseq, err = BuildPCRProduct(
+			matched,
+			CLIGraphFilesDirectory(),
+			CLIKmerSize(),
+			CLIForwardPrimer(),
+			CLIReversePrimer(),
+			true,
+			CLISaveGraphToFiles(),
+			CLIGraphFilesDirectory())
+
+		if err != nil {
+			log.Fatalf("Cannot build Reverse PCR sequence: %v", err)
+		}
+
+		kmerSize, _ := seq.GetIntAttribute("obiconsensus_kmer_size")
+		fp, _ := seq.Subsequence(seq.Len()-kmerSize, seq.Len(), false)
+		rp, _ := rseq.Subsequence(0, kmerSize, false)
+		rp = rp.ReverseComplement(true)
+
+		pairs, err := CLIPairedSequence()
+
+		if err != nil {
+			log.Errorf("Cannot open file (%v)", err)
+			os.Exit(1)
+		}
+
+		nmatched := ExtractOnPatterns(pairs,
+			fp.String(),
+			rp.String(),
+			CLIAllowedMismatch(),
+			true,
+		)
+
+		in := map[string]bool{}
+
+		for _, s := range matched {
+			in[s.String()] = true
+		}
+
+		for _, s := range nmatched {
+			if !in[s.String()] {
+				matched = append(matched, s)
+			}
+		}
+
+		seq, err = BuildPCRProduct(
+			matched,
+			CLIGraphFilesDirectory(),
+			CLIKmerSize(),
+			CLIForwardPrimer(),
+			CLIReversePrimer(),
+			false,
+			CLISaveGraphToFiles(),
+			CLIGraphFilesDirectory())
+
+		aseq, err := obiapat.MakeApatSequence(seq, false)
+
+		if err != nil {
+			log.Fatalf("Cannot build apat sequence: %v", err)
+		}
+		fstart, fend, fnerr, hasfw = forwardPatternDir.BestMatch(aseq, 0, aseq.Len())
+		rstart, rend, rnerr, hasrev = reveresePatternRev.BestMatch(aseq, 0, aseq.Len())
+
+	}
+
+	marker, _ := seq.Subsequence(fstart, rend, false)
+
+	marker.SetAttribute("forward_primer", CLIForwardPrimer())
+	match, _ := seq.Subsequence(fstart, fend, false)
+	marker.SetAttribute("forward_match", match.String())
+	marker.SetAttribute("forward_error", fnerr)
+
+	marker.SetAttribute("reverse_primer", CLIReversePrimer())
+	match, _ = seq.Subsequence(rstart, rend, false)
+	marker.SetAttribute("reverse_match", match.ReverseComplement(true).String())
+	marker.SetAttribute("reverse_error", rnerr)
+
+	return marker
+}
--- a/pkg/obitools/obimicroasm/options.go
+++ b/pkg/obitools/obimicroasm/options.go
@@ -0,0 +1,139 @@
+package obimicroasm
+
+import (
+	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiapat"
+	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
+	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
+	"github.com/DavidGamba/go-getoptions"
+	log "github.com/sirupsen/logrus"
+)
+
+var _ForwardFile = ""
+var _ReverseFile = ""
+var _ForwardPrimer string
+var _ReversePrimer string
+var _AllowedMismatch = 0
+var _kmerSize = -1
+
+var _saveGraph = "__@@NOSAVE@@__"
+
+func MicroAsmOptionSet(options *getoptions.GetOpt) {
+	options.StringVar(&_ForwardFile, "forward-reads", "",
+		options.Alias("F"),
+		options.ArgName("FILENAME_F"),
+		options.Required("You must provide at a forward file"),
+		options.Description("The file names containing the forward reads"))
+	options.StringVar(&_ReverseFile, "reverse-reads", "",
+		options.Alias("R"),
+		options.ArgName("FILENAME_R"),
+		options.Required("You must provide a reverse file"),
+		options.Description("The file names containing the reverse reads"))
+	options.StringVar(&_ForwardPrimer, "forward", "",
+		options.Required("You must provide a forward primer"),
+		options.Description("The forward primer used for the electronic PCR."))
+
+	options.StringVar(&_ReversePrimer, "reverse", "",
+		options.Required("You must provide a reverse primer"),
+		options.Description("The reverse primer used for the electronic PCR."))
+
+	options.IntVar(&_AllowedMismatch, "allowed-mismatches", 0,
+		options.Alias("e"),
+		options.Description("Maximum number of mismatches allowed for each primer."))
+	options.IntVar(&_kmerSize, "kmer-size", _kmerSize,
+		options.ArgName("SIZE"),
+		options.Description("The size of the kmer used to build the consensus. "+
+			"Default value = -1, which means that the kmer size is estimated from the data"),
+	)
+
+	options.StringVar(&_saveGraph, "save-graph", _saveGraph,
+		options.Description("Creates a directory containing the set of DAG used by the obiclean clustering algorithm. "+
+			"The graph files follow the graphml format."),
+	)
+
+}
+
+func OptionSet(options *getoptions.GetOpt) {
+	obiconvert.OptionSet(options)
+	MicroAsmOptionSet(options)
+}
+
+// CLIForwardPrimer returns the sequence of the forward primer as indicated by the
+// --forward command line option
+func CLIForwardPrimer() string {
+	pattern, err := obiapat.MakeApatPattern(_ForwardPrimer, _AllowedMismatch, false)
+
+	if err != nil {
+		log.Fatalf("%+v", err)
+	}
+
+	pattern.Free()
+
+	return _ForwardPrimer
+}
+
+// CLIReversePrimer returns the sequence of the reverse primer as indicated by the
+// --reverse command line option
+func CLIReversePrimer() string {
+	pattern, err := obiapat.MakeApatPattern(_ReversePrimer, _AllowedMismatch, false)
+
+	if err != nil {
+		log.Fatalf("%+v", err)
+	}
+
+	pattern.Free()
+
+	return _ReversePrimer
+}
+
+// CLIAllowedMismatch returns the allowed mistmatch count between each
+// primer and the sequences as indicated by the
+// --allowed-mismatches|-e command line option
+func CLIAllowedMismatch() int {
+	return _AllowedMismatch
+}
+
+func CLIPairedSequence() (obiiter.IBioSequence, error) {
+	forward, err := obiconvert.CLIReadBioSequences(_ForwardFile)
+	if err != nil {
+		return obiiter.NilIBioSequence, err
+	}
+
+	reverse, err := obiconvert.CLIReadBioSequences(_ReverseFile)
+	if err != nil {
+		return obiiter.NilIBioSequence, err
+	}
+
+	paired := forward.PairTo(reverse)
+
+	return paired, nil
+}
+
+func CLIForwardFile() string {
+	return _ForwardFile
+}
+
+// Returns true it the obliclean graphs must be saved
+func CLISaveGraphToFiles() bool {
+	return _saveGraph != "__@@NOSAVE@@__"
+}
+
+// It returns the directory where the graph files are saved
+func CLIGraphFilesDirectory() string {
+	return _saveGraph
+}
+
+// CLIKmerSize returns the value of the kmer size to use for building the consensus.
+//
+// The value of the kmer size is set by the user with the `-k` flag.
+// The value -1 means that the kmer size is estimated as the minimum value that
+// insure that no kmer are present more than one time in a sequence.
+//
+// No parameters.
+// Returns an integer value.
+func CLIKmerSize() int {
+	return _kmerSize
+}
+
+func SetKmerSize(kmerSize int) {
+	_kmerSize = kmerSize
+}
--- a/pkg/obitools/obipairing/pairing.go
+++ b/pkg/obitools/obipairing/pairing.go
@@ -129,7 +129,6 @@ func AssemblePESequences(seqA, seqB *obiseq.BioSequence,
 	}
 	lcons := cons.Len()
 	aliLength := lcons - _Abs(left) - _Abs(right)
-
 	identity := float64(match) / float64(aliLength)
 	if aliLength == 0 {
 		identity = 0
@@ -238,7 +237,7 @@ func IAssemblePESequencesBatch(iterator obiiter.IBioSequence,
 		log.Printf("End of the sequence Pairing")
 	}()

-	f := func(iterator obiiter.IBioSequence) {
+	f := func(iterator obiiter.IBioSequence, wid int) {
 		arena := obialign.MakePEAlignArena(150, 150)
 		shifts := make(map[int]int)

@@ -263,9 +262,9 @@ func IAssemblePESequencesBatch(iterator obiiter.IBioSequence,
 	log.Printf("Start of the sequence Pairing using %d workers\n", nworkers)

 	for i := 0; i < nworkers-1; i++ {
-		go f(iterator.Split())
+		go f(iterator.Split(), i)
 	}
-	go f(iterator)
+	go f(iterator, nworkers-1)
 	return newIter

 }
--- a/pkg/obitools/obitag/obitag.go
+++ b/pkg/obitools/obitag/obitag.go
@@ -73,10 +73,6 @@ func FindClosests(sequence *obiseq.BioSequence,
 	refcounts []*obikmer.Table4mer,
 	runExact bool) (obiseq.BioSequenceSlice, int, float64, string, []int) {

-	if sequence.Len() < 5 {
-		return obiseq.BioSequenceSlice{}, 1000, 0, "NA", []int{}
-	}
-
 	var matrix []uint64

 	seqwords := obikmer.Count4Mer(sequence, nil, nil)
@@ -260,7 +256,7 @@ func CLIAssignTaxonomy(iterator obiiter.IBioSequence,
 		if taxon != nil {
 			j++
 		} else {
-			log.Warnf("Taxid %s is not described in the taxonomy %s."+
+			log.Warnf("Taxid %d is not described in the taxonomy %s."+
 				" Sequence %s is discared from the reference database",
 				seq.Taxid(), taxo.Name(), seq.Id())
 		}
--- a/pkg/obiutils/set.go
+++ b/pkg/obiutils/set.go
@@ -23,7 +23,7 @@ func MakeSet[E comparable](vals ...E) Set[E] {
 // It takes a variadic parameter of type E, where E is a comparable type.
 // It returns a pointer to a Set of type E.
 func NewSet[E comparable](vals ...E) *Set[E] {
-	s := MakeSet(vals...)
+	s := MakeSet[E](vals...)
 	return &s
 }

--- a/pkg/obiutils/set_test.go
+++ b/pkg/obiutils/set_test.go
@@ -50,7 +50,7 @@ func TestNewSet(t *testing.T) {
 	}

 	// Test Case 2: Creating a set with multiple values
-	set2 := NewSet("apple", "banana", "cherry")
+	set2 := NewSet[string]("apple", "banana", "cherry")
 	if len(*set2) != 3 {
 		t.Errorf("Expected size to be 3, but got %d", len(*set2))
 	}
@@ -147,7 +147,7 @@ func TestMembers(t *testing.T) {
 	}

 	// Test case 2: Set with multiple elements
-	set = MakeSet(1, 2, 3)
+	set = MakeSet[int](1, 2, 3)
 	expected = []int{1, 2, 3}
 	actual = set.Members()
 	sort.Ints(actual)
@@ -172,7 +172,7 @@ func TestSetString(t *testing.T) {
 	}

 	// Test set with single member
-	singleMemberSet := NewSet(42)
+	singleMemberSet := NewSet[int](42)
 	singleMemberSetString := singleMemberSet.String()
 	expectedSingleMemberSetString := "[42]"
 	if singleMemberSetString != expectedSingleMemberSetString {
@@ -180,7 +180,7 @@ func TestSetString(t *testing.T) {
 	}

 	// Test set with multiple members
-	multipleMembersSet := NewSet(1, 2, 3)
+	multipleMembersSet := NewSet[int](1, 2, 3)
 	multipleMembersSetString := multipleMembersSet.String()
 	expectedMultipleMembersSetString := "[1 2 3]"
 	if multipleMembersSetString != expectedMultipleMembersSetString {
@@ -213,26 +213,26 @@ func TestUnion(t *testing.T) {

 	// Test case 2: Union of an empty set and a non-empty set should return the non-empty set
 	set1 = MakeSet[int]()
-	set2 = MakeSet(1, 2, 3)
-	expected = MakeSet(1, 2, 3)
+	set2 = MakeSet[int](1, 2, 3)
+	expected = MakeSet[int](1, 2, 3)
 	result = set1.Union(set2)
 	if !reflect.DeepEqual(result, expected) {
 		t.Errorf("Expected %v, but got %v", expected, result)
 	}

 	// Test case 3: Union of two non-empty sets with common elements should return a set with unique elements
-	set1 = MakeSet(1, 2, 3)
-	set2 = MakeSet(2, 3, 4)
-	expected = MakeSet(1, 2, 3, 4)
+	set1 = MakeSet[int](1, 2, 3)
+	set2 = MakeSet[int](2, 3, 4)
+	expected = MakeSet[int](1, 2, 3, 4)
 	result = set1.Union(set2)
 	if !reflect.DeepEqual(result, expected) {
 		t.Errorf("Expected %v, but got %v", expected, result)
 	}

 	// Test case 4: Union of two non-empty sets with no common elements should return a set with all elements
-	set1 = MakeSet(1, 2, 3)
-	set2 = MakeSet(4, 5, 6)
-	expected = MakeSet(1, 2, 3, 4, 5, 6)
+	set1 = MakeSet[int](1, 2, 3)
+	set2 = MakeSet[int](4, 5, 6)
+	expected = MakeSet[int](1, 2, 3, 4, 5, 6)
 	result = set1.Union(set2)
 	if !reflect.DeepEqual(result, expected) {
 		t.Errorf("Expected %v, but got %v", expected, result)
--- a/pkg/obiutils/unique.go
+++ b/pkg/obiutils/unique.go
@@ -0,0 +1,20 @@
+package obiutils
+
+// Unique returns a new slice containing only unique values from the input slice.
+// The order of elements in the output slice is not guaranteed to match the input order.
+//
+// Parameters:
+// - slice: The input slice containing potentially duplicate values
+//
+// Returns:
+// - A new slice containing only unique values
+func Unique[T comparable](slice []T) []T {
+	// Create a map to track unique values
+	seen := Set[T]{}
+
+	for _, v := range slice {
+		seen.Add(v)
+	}
+
+	return seen.Members()
+}