Files
obitools4/pkg/obiseq/taxonomy_classifier.go
Eric Coissac 51b3e83d32 some cleaning
2025-02-24 11:31:49 +01:00

82 lines
1.9 KiB
Go

package obiseq
import (
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
log "github.com/sirupsen/logrus"
)
// TaxonomyClassifier is a function that creates a new instance of the BioSequenceClassifier
// for taxonomic classification based on a given taxonomic rank, taxonomy, and abort flag.
//
// Parameters:
// - taxonomicRank: the taxonomic rank to classify the sequences at.
// - taxonomy: the taxonomy object used for classification.
// - abortOnMissing: a flag indicating whether to abort if a taxon is missing in the taxonomy.
//
// Return:
// - *obiseq.BioSequenceClassifier: the new instance of the BioSequenceClassifier.
func TaxonomyClassifier(taxonomicRank string,
taxonomy *obitax.Taxonomy,
abortOnMissing bool) *BioSequenceClassifier {
taxonomy = taxonomy.OrDefault(true)
keys := make(map[*obitax.TaxNode]int)
codes := make([]*obitax.TaxNode, 1)
codes[0] = nil
keys[nil] = 0
code := func(sequence *BioSequence) int {
taxon := sequence.Taxon(taxonomy)
if taxon != nil {
ttaxon := taxon.TaxonAtRank(taxonomicRank)
if abortOnMissing && ttaxon == nil {
log.Fatalf("Taxon at rank %s not found in taxonomy for taxid %s", taxonomicRank, taxon.String())
}
} else {
if abortOnMissing {
log.Fatalf("Sequence %s: Taxid %s not found in taxonomy",
sequence.Id(),
sequence.Taxid())
}
taxon = nil
}
k, ok := keys[taxon.Node]
if ok {
return k
}
k = len(codes)
keys[taxon.Node] = k
codes = append(codes, taxon.Node)
return k
}
value := func(k int) string {
taxon := codes[k]
return taxon.ScientificName()
}
reset := func() {
keys = make(map[*obitax.TaxNode]int)
codes = make([]*obitax.TaxNode, 1)
codes[0] = nil
keys[nil] = 0
}
clone := func() *BioSequenceClassifier {
return TaxonomyClassifier(taxonomicRank, taxonomy, abortOnMissing)
}
c := BioSequenceClassifier{
Code: code,
Value: value,
Reset: reset,
Clone: clone,
Type: "TaxonomyClassifier"}
return &c
}