Change obiclean algorithm for a better evaluation of ratio

This commit is contained in:
2022-08-31 20:38:03 +02:00
parent 90ba980de6
commit 6b8f4490cf
8 changed files with 371 additions and 75 deletions

View File

@ -2,6 +2,8 @@ package obitax
import (
"fmt"
log "github.com/sirupsen/logrus"
)
func (taxon *TaxNode) Path() (*TaxonSlice, error) {
@ -22,6 +24,34 @@ func (taxon *TaxNode) Path() (*TaxonSlice, error) {
return &path, nil
}
func (taxon *TaxNode) TaxonAtRank(rank string) *TaxNode {
for taxon.rank != rank && taxon != taxon.pparent {
taxon = taxon.pparent
if taxon == nil {
log.Panicln("Taxonomy must be reindexed")
}
}
if taxon == taxon.pparent {
taxon = nil
}
return taxon
}
func (taxon *TaxNode) Species() *TaxNode {
return taxon.TaxonAtRank("species")
}
func (taxon *TaxNode) Genus() *TaxNode {
return taxon.TaxonAtRank("genus")
}
func (taxon *TaxNode) Family() *TaxNode {
return taxon.TaxonAtRank("family")
}
// Returns a TaxonSet listing the requested taxon and all
// its ancestors in the taxonomy down to the root.
func (taxonomy *Taxonomy) Path(taxid int) (*TaxonSlice, error) {

View File

@ -0,0 +1,37 @@
package obitax
import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
)
// Setting the taxon at a given rank for a given sequence.
//
// Two attributes are added to the sequence. One named by the rank name stores
// the taxid, a second named by the rank name suffixed with '_name' contains the
// Scientific name of the genus.
// If the taxon at the given rank doesn't exist for the taxonomy annotation
// of the sequence, nothing happens.
func (taxonomy *Taxonomy) SetTaxonAtRank(sequence *obiseq.BioSequence, rank string) *TaxNode {
taxid := sequence.Taxid()
taxon, err := taxonomy.Taxon(taxid)
taxonAtRank := taxon.TaxonAtRank(rank)
if err == nil && taxonAtRank != nil {
sequence.SetAttribute(rank, taxonAtRank.taxid)
sequence.SetAttribute(rank+"_name", taxonAtRank.scientificname)
}
return taxonAtRank
}
func (taxonomy *Taxonomy) SetSpecies(sequence *obiseq.BioSequence) *TaxNode {
return taxonomy.SetTaxonAtRank(sequence, "species")
}
func (taxonomy *Taxonomy) SetGenus(sequence *obiseq.BioSequence) *TaxNode {
return taxonomy.SetTaxonAtRank(sequence, "genus")
}
func (taxonomy *Taxonomy) SetFamily(sequence *obiseq.BioSequence) *TaxNode {
return taxonomy.SetTaxonAtRank(sequence, "family")
}

View File

@ -22,7 +22,10 @@ func (taxonomy *Taxonomy) IsAValidTaxon(withAutoCorrection ...bool) obiseq.Seque
if err == nil && taxon.taxid != taxid {
if autocorrection {
sequence.SetTaxid(taxon.taxid)
log.Printf("Sequence %s : Taxid %d updated with %d", taxid, taxon.taxid)
log.Printf("Sequence %s : Taxid %d updated with %d",
sequence.Id(),
taxid,
taxon.taxid)
} else {
if _, ok := deprecatedTaxidsWarning[taxid]; !ok {
deprecatedTaxidsWarning[taxid] = true

View File

@ -0,0 +1,56 @@
package obitax
import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
log "github.com/sirupsen/logrus"
)
func (taxonomy *Taxonomy) MakeSetTaxonAtRankWorker(rank string) obiiter.SeqWorker {
if !goutils.Contains(taxonomy.RankList(), rank) {
log.Fatalf("%s is not a valid rank (allowed ranks are %v)",
rank,
taxonomy.RankList())
}
w := func(sequence *obiseq.BioSequence) *obiseq.BioSequence {
taxonomy.SetTaxonAtRank(sequence, rank)
return sequence
}
return w
}
func (taxonomy *Taxonomy) MakeSetSpeciesWorker() obiiter.SeqWorker {
w := func(sequence *obiseq.BioSequence) *obiseq.BioSequence {
taxonomy.SetSpecies(sequence)
return sequence
}
return w
}
func (taxonomy *Taxonomy) MakeSetGenusWorker() obiiter.SeqWorker {
w := func(sequence *obiseq.BioSequence) *obiseq.BioSequence {
taxonomy.SetGenus(sequence)
return sequence
}
return w
}
func (taxonomy *Taxonomy) MakeSetFamilyWorker() obiiter.SeqWorker {
w := func(sequence *obiseq.BioSequence) *obiseq.BioSequence {
taxonomy.SetFamily(sequence)
return sequence
}
return w
}