diff --git a/Release-notes.md b/Release-notes.md index 69cf654..9052c91 100644 --- a/Release-notes.md +++ b/Release-notes.md @@ -33,6 +33,8 @@ ### Bug fixes - Fix a bug in the parsing of the JSON header of FASTA and FASTQ files occurring when a string includes a curly brace. +- Fix a bug in the function looking for the closest match in `obitag`. This error led to some wrong taxonomic + assignment. ## February 16th, 2024. Release 4.1.2 diff --git a/pkg/obitools/obitag/obitag.go b/pkg/obitools/obitag/obitag.go index bdd8735..ea218a7 100644 --- a/pkg/obitools/obitag/obitag.go +++ b/pkg/obitools/obitag/obitag.go @@ -101,10 +101,6 @@ func FindClosests(sequence *obiseq.BioSequence, ref := references[order] score := int(1e9) - if maxe != -1 { - wordmin = obiutils.MaxInt(sequence.Len(), ref.Len()) - 3 - 4*maxe - } - if cw[order] < wordmin { break } @@ -124,12 +120,17 @@ func FindClosests(sequence *obiseq.BioSequence, } } + // + // We have found a better candidate than never + // if maxe == -1 || score < maxe { - bests = bests[:0] - bestidxs = bestidxs[:0] - maxe = score + bests = bests[:0] // Empty the best lists + bestidxs = bestidxs[:0] // + + maxe = score // Memorize the best scores + wordmin = max(0, max(sequence.Len(), ref.Len())-3-4*maxe) bestId = float64(lcs) / float64(alilength) - // log.Println(ref.Id(), maxe, bestId,bestidxs) + // log.Warnln(sequence.Id(), ref.Id(), cw[order], maxe, bestId, wordmin) } if score == maxe { @@ -279,7 +280,7 @@ func CLIAssignTaxonomy(iterator obiiter.IBioSequence, buffer := make([]byte, 0, 1000) var err error - j:= 0 + j := 0 for _, seq := range references { references[j] = seq refcounts[j] = obikmer.Count4Mer(seq, &buffer, nil) @@ -287,12 +288,12 @@ func CLIAssignTaxonomy(iterator obiiter.IBioSequence, if err == nil { j++ } else { - log.Warnf("Taxid %d is not described in the taxonomy. Sequence %s is discared from the reference database",seq.Taxid(),seq.Id()) + log.Warnf("Taxid %d is not described in the taxonomy. Sequence %s is discared from the reference database", seq.Taxid(), seq.Id()) } } references = references[:j] - refcounts = refcounts[:j] + refcounts = refcounts[:j] worker := IdentifySeqWorker(references, refcounts, taxa, taxo, CLIRunExact()) diff --git a/pkg/obitools/obitag/options.go b/pkg/obitools/obitag/options.go index 3f6d6d4..0e074bc 100644 --- a/pkg/obitools/obitag/options.go +++ b/pkg/obitools/obitag/options.go @@ -34,7 +34,7 @@ func TagOptionSet(options *getoptions.GetOpt) { // options.BoolVar(&_RunExact, "exact", _RunExact, // options.Alias("E"), - // options.Description("Unactivate the heuristic limatitating the sequence comparisons")) + // options.Description("Desactivate the heuristic limitating the sequence comparisons")) }