Obitag, allow for reference sequences with bad taxid, which are discarded with the emission of a warning

Former-commit-id: a8781da1afa86de9c34b008959f0705ae69fc64f
This commit is contained in:
Eric Coissac
2024-03-18 19:11:41 +01:00
parent aa07ef2127
commit b428c48353
2 changed files with 16 additions and 5 deletions

View File

@@ -278,11 +278,22 @@ func CLIAssignTaxonomy(iterator obiiter.IBioSequence,
buffer := make([]byte, 0, 1000)
for i, seq := range references {
refcounts[i] = obikmer.Count4Mer(seq, &buffer, nil)
taxa[i], _ = taxo.Taxon(seq.Taxid())
var err error
j:= 0
for _, seq := range references {
references[j] = seq
refcounts[j] = obikmer.Count4Mer(seq, &buffer, nil)
taxa[j], err = taxo.Taxon(seq.Taxid())
if err == nil {
j++
} else {
log.Warnf("Taxid %d is not described in the taxonomy. Sequence %s is discared from the reference database",seq.Taxid(),seq.Id())
}
}
references = references[:j]
refcounts = refcounts[:j]
worker := IdentifySeqWorker(references, refcounts, taxa, taxo, CLIRunExact())
return iterator.MakeIWorker(worker, false, obioptions.CLIParallelWorkers(), 0)