Patch a bug in obitag when some reference sequences have taxid absent from the taxonomy

This commit is contained in:
Eric Coissac
2025-03-27 16:45:02 +01:00
parent 2d52322876
commit 03b5ce9397
5 changed files with 27 additions and 11 deletions

View File

@ -8,7 +8,7 @@ import (
// corresponds to the last commit, and not the one when the file will be
// commited
var _Commit = "fd80249"
var _Commit = "2d52322"
var _Version = "Release 4.4.0"
// Version returns the version of the obitools package.

View File

@ -27,7 +27,7 @@ type Taxon struct {
// Returns:
// - A formatted string representing the Taxon in the form "taxonomy_code:taxon_id [scientific_name]".
func (taxon *Taxon) String() string {
if taxon == nil {
if taxon == nil || taxon.Node == nil {
return "NA"
}
return taxon.Node.String(taxon.Taxonomy.code)

View File

@ -145,3 +145,18 @@ func (slice *TaxonSlice) Set(index int, taxon *Taxon) *TaxonSlice {
return slice
}
func (slice *TaxonSlice) Push(taxon *Taxon) *TaxonSlice {
if slice.taxonomy != taxon.Taxonomy {
log.Panic("Cannot add taxon from a different taxonomy")
}
slice.slice = append(slice.slice, taxon.Node)
return slice
}
func (slice *TaxonSlice) ReduceToSize(size int) *TaxonSlice {
slice.slice = slice.slice[:size]
return slice
}

View File

@ -249,16 +249,16 @@ func CLIAssignTaxonomy(iterator obiiter.IBioSequence,
[]*obikmer.Table4mer,
len(references))
taxa := taxo.NewTaxonSlice(references.Len(), references.Len())
taxa := taxo.NewTaxonSlice(0, references.Len())
buffer := make([]byte, 0, 1000)
j := 0
for _, seq := range references {
references[j] = seq
refcounts[j] = obikmer.Count4Mer(seq, &buffer, nil)
taxon := seq.Taxon(taxo)
if taxon != nil {
taxa.Set(j, taxon)
if taxon != nil && taxon.Node != nil {
references[j] = seq
refcounts[j] = obikmer.Count4Mer(seq, &buffer, nil)
taxa.Push(taxon)
j++
} else {
obilog.Warnf("Taxid %s is not described in the taxonomy %s."+
@ -267,6 +267,7 @@ func CLIAssignTaxonomy(iterator obiiter.IBioSequence,
}
}
log.Infof("%d reference sequences conserved on %d", j, len(references))
references = references[:j]
refcounts = refcounts[:j]