From b428c48353f93e594bce706774cc1ffb24a716d2 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Mon, 18 Mar 2024 19:11:41 +0100 Subject: [PATCH] Obitag, allow for reference sequences with bad taxid, which are discarded with the emission of a warning Former-commit-id: a8781da1afa86de9c34b008959f0705ae69fc64f --- pkg/obitax/lca.go | 4 ++-- pkg/obitools/obitag/obitag.go | 17 ++++++++++++++--- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/pkg/obitax/lca.go b/pkg/obitax/lca.go index 55650d5..c54a6bf 100644 --- a/pkg/obitax/lca.go +++ b/pkg/obitax/lca.go @@ -12,11 +12,11 @@ import ( func (t1 *TaxNode) LCA(t2 *TaxNode) (*TaxNode, error) { if t1 == nil { - log.Fatalf("Try to get LCA of nil taxon") + log.Panicf("Try to get LCA of nil taxon") } if t2 == nil { - log.Fatalf("Try to get LCA of nil taxon") + log.Panicf("Try to get LCA of nil taxon") } p1, err1 := t1.Path() diff --git a/pkg/obitools/obitag/obitag.go b/pkg/obitools/obitag/obitag.go index 57e3869..bdd8735 100644 --- a/pkg/obitools/obitag/obitag.go +++ b/pkg/obitools/obitag/obitag.go @@ -278,11 +278,22 @@ func CLIAssignTaxonomy(iterator obiiter.IBioSequence, buffer := make([]byte, 0, 1000) - for i, seq := range references { - refcounts[i] = obikmer.Count4Mer(seq, &buffer, nil) - taxa[i], _ = taxo.Taxon(seq.Taxid()) + var err error + j:= 0 + for _, seq := range references { + references[j] = seq + refcounts[j] = obikmer.Count4Mer(seq, &buffer, nil) + taxa[j], err = taxo.Taxon(seq.Taxid()) + if err == nil { + j++ + } else { + log.Warnf("Taxid %d is not described in the taxonomy. Sequence %s is discared from the reference database",seq.Taxid(),seq.Id()) + } } + references = references[:j] + refcounts = refcounts[:j] + worker := IdentifySeqWorker(references, refcounts, taxa, taxo, CLIRunExact()) return iterator.MakeIWorker(worker, false, obioptions.CLIParallelWorkers(), 0)