mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
82 lines
1.9 KiB
Go
82 lines
1.9 KiB
Go
package obiseq
|
|
|
|
import (
|
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
|
log "github.com/sirupsen/logrus"
|
|
)
|
|
|
|
// TaxonomyClassifier is a function that creates a new instance of the BioSequenceClassifier
|
|
// for taxonomic classification based on a given taxonomic rank, taxonomy, and abort flag.
|
|
//
|
|
// Parameters:
|
|
// - taxonomicRank: the taxonomic rank to classify the sequences at.
|
|
// - taxonomy: the taxonomy object used for classification.
|
|
// - abortOnMissing: a flag indicating whether to abort if a taxon is missing in the taxonomy.
|
|
//
|
|
// Return:
|
|
// - *obiseq.BioSequenceClassifier: the new instance of the BioSequenceClassifier.
|
|
func TaxonomyClassifier(taxonomicRank string,
|
|
taxonomy *obitax.Taxonomy,
|
|
abortOnMissing bool) *BioSequenceClassifier {
|
|
|
|
taxonomy = taxonomy.OrDefault(true)
|
|
|
|
keys := make(map[*obitax.TaxNode]int)
|
|
codes := make([]*obitax.TaxNode, 1)
|
|
codes[0] = nil
|
|
keys[nil] = 0
|
|
|
|
code := func(sequence *BioSequence) int {
|
|
taxon := sequence.Taxon(taxonomy)
|
|
if taxon != nil {
|
|
ttaxon := taxon.TaxonAtRank(taxonomicRank)
|
|
if abortOnMissing && ttaxon == nil {
|
|
log.Fatalf("Taxon at rank %s not found in taxonomy for taxid %s", taxonomicRank, taxon.String())
|
|
}
|
|
} else {
|
|
if abortOnMissing {
|
|
log.Fatalf("Sequence %s: Taxid %s not found in taxonomy",
|
|
sequence.Id(),
|
|
sequence.Taxid())
|
|
}
|
|
taxon = nil
|
|
}
|
|
|
|
k, ok := keys[taxon.Node]
|
|
|
|
if ok {
|
|
return k
|
|
}
|
|
|
|
k = len(codes)
|
|
keys[taxon.Node] = k
|
|
codes = append(codes, taxon.Node)
|
|
|
|
return k
|
|
}
|
|
|
|
value := func(k int) string {
|
|
taxon := codes[k]
|
|
return taxon.ScientificName()
|
|
}
|
|
|
|
reset := func() {
|
|
keys = make(map[*obitax.TaxNode]int)
|
|
codes = make([]*obitax.TaxNode, 1)
|
|
codes[0] = nil
|
|
keys[nil] = 0
|
|
}
|
|
|
|
clone := func() *BioSequenceClassifier {
|
|
return TaxonomyClassifier(taxonomicRank, taxonomy, abortOnMissing)
|
|
}
|
|
|
|
c := BioSequenceClassifier{
|
|
Code: code,
|
|
Value: value,
|
|
Reset: reset,
|
|
Clone: clone,
|
|
Type: "TaxonomyClassifier"}
|
|
return &c
|
|
}
|