Files
obitools4/pkg/obitax/taxonset.go
Eric Coissac 6cb7a5a352 Changes to be committed:
modified:   cmd/obitools/obitag/main.go
	modified:   cmd/obitools/obitaxonomy/main.go
	modified:   pkg/obiformats/csvtaxdump_read.go
	modified:   pkg/obiformats/ecopcr_read.go
	modified:   pkg/obiformats/ncbitaxdump_read.go
	modified:   pkg/obiformats/ncbitaxdump_readtar.go
	modified:   pkg/obiformats/newick_write.go
	modified:   pkg/obiformats/options.go
	modified:   pkg/obiformats/taxonomy_read.go
	modified:   pkg/obiformats/universal_read.go
	modified:   pkg/obiiter/extract_taxonomy.go
	modified:   pkg/obioptions/options.go
	modified:   pkg/obioptions/version.go
	new file:   pkg/obiphylo/tree.go
	modified:   pkg/obiseq/biosequenceslice.go
	modified:   pkg/obiseq/taxonomy_methods.go
	modified:   pkg/obitax/taxonomy.go
	modified:   pkg/obitax/taxonset.go
	modified:   pkg/obitools/obiconvert/sequence_reader.go
	modified:   pkg/obitools/obitag/obitag.go
	modified:   pkg/obitools/obitaxonomy/obitaxonomy.go
	modified:   pkg/obitools/obitaxonomy/options.go
	deleted:    sample/.DS_Store
2025-06-04 09:48:10 +02:00

265 lines
7.8 KiB
Go

/*
Package obitax provides functionality for managing taxonomic data structures,
specifically for representing and manipulating collections of taxa within a taxonomy.
It includes the TaxonSet structure, which holds mappings of taxon identifiers to their
corresponding TaxNode instances, along with methods for managing and querying these taxa.
*/
package obitax
import (
"fmt"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiphylo"
log "github.com/sirupsen/logrus"
)
// TaxonSet represents a collection of taxa within a taxonomy.
// It holds a mapping of taxon identifiers to their corresponding TaxNode instances,
// as well as a reference to the associated Taxonomy.
//
// Fields:
// - set: A map that associates taxon identifiers of type *string with their corresponding TaxNode instances.
// - nalias: The number of aliases in the TaxonSet.
// - taxonomy: A pointer to the Taxonomy instance that this TaxonSet belongs to.
type TaxonSet struct {
set map[*string]*TaxNode
nalias int
taxonomy *Taxonomy
}
// NewTaxonSet creates a new TaxonSet associated with the given Taxonomy.
// It initializes the set as an empty map and sets the alias count to zero.
//
// Returns:
// - A pointer to the newly created TaxonSet.
func (taxonomy *Taxonomy) NewTaxonSet() *TaxonSet {
return &TaxonSet{
set: make(map[*string]*TaxNode),
nalias: 0,
taxonomy: taxonomy.OrDefault(true),
}
}
// Get retrieves the TaxNode associated with the specified taxon identifier.
// It returns the TaxNode if it exists in the TaxonSet; otherwise, it returns nil.
//
// Parameters:
// - id: A pointer to the taxon identifier for which the TaxNode is to be retrieved.
//
// Returns:
// - A pointer to the TaxNode associated with the provided identifier, or nil
// if no such taxon exists in the set.
func (set *TaxonSet) Get(id *string) *Taxon {
if set == nil {
return nil
}
node := set.set[id]
if node == nil {
return nil
}
return &Taxon{
Taxonomy: set.taxonomy,
Node: set.set[id],
}
}
// Len returns the number of unique taxa in the TaxonSet.
// It calculates the count by subtracting the number of aliases from the total
// number of entries in the set.
//
// Returns:
// - An integer representing the count of unique taxa in the TaxonSet.
func (set *TaxonSet) Len() int {
if set == nil {
return 0
}
return len(set.set) - set.nalias
}
// Insert adds a TaxNode to the TaxonSet. If a taxon with the same identifier
// already exists in the set, it updates the reference. If the existing taxon was
// an alias, its alias count is decremented.
//
// Parameters:
// - taxon: A pointer to the TaxNode instance to be added to the TaxonSet.
//
// Behavior:
// - If a taxon with the same identifier already exists and is different from the
// new taxon, the alias count is decremented.
func (set *TaxonSet) Insert(node *TaxNode) *TaxonSet {
if set == nil {
log.Panic("Cannot insert node into nil TaxonSet")
}
if old := set.set[node.id]; old != nil && old.id != node.id {
set.nalias--
}
set.set[node.id] = node
return set
}
// InsertTaxon adds a Taxon to the TaxonSet. It verifies that the Taxon belongs
// to the same Taxonomy as the TaxonSet before insertion. If they do not match,
// it logs a fatal error and terminates the program.
//
// Parameters:
// - taxon: A pointer to the Taxon instance to be added to the TaxonSet.
func (set *TaxonSet) InsertTaxon(taxon *Taxon) *TaxonSet {
if set == nil {
set = taxon.Taxonomy.NewTaxonSet()
}
if set.taxonomy != taxon.Taxonomy {
log.Fatalf(
"Cannot insert taxon %s into taxon set belonging to %s taxonomy",
taxon.String(),
set.taxonomy.name,
)
}
return set.Insert(taxon.Node)
}
// Taxonomy returns a pointer to the Taxonomy instance that this TaxonSet belongs to.
//
// Returns:
// - A pointer to the Taxonomy instance that this TaxonSet belongs to.
func (set *TaxonSet) Taxonomy() *Taxonomy {
if set == nil {
return nil
}
return set.taxonomy
}
// Alias associates a given alias string with a specified TaxNode in the TaxonSet.
// It first converts the alias to its corresponding identifier using the Id method.
// If the original taxon is not part of the taxon set, it logs a fatal error and terminates the program.
//
// Parameters:
// - alias: A pointer to a string representing the alias to be associated with the taxon node.
// - node: A pointer to the TaxNode instance that the alias will refer to.
//
// Behavior:
// - If the original taxon corresponding to the alias is not part of the taxon set,
// the method will log a fatal error and terminate the program.
func (set *TaxonSet) Alias(id *string, taxon *Taxon) {
if set == nil {
log.Panic("Cannot add alias to a nil TaxonSet")
}
original := set.Get(taxon.Node.id)
if original == nil {
log.Fatalf("Original taxon %v is not part of taxon set", id)
}
set.set[id] = original.Node
set.nalias++
}
// IsAlias checks if the given identifier corresponds to an alias in the TaxonSet.
// It retrieves the TaxNode associated with the identifier and returns true if the
// node exists and its identifier is different from the provided identifier; otherwise, it returns false.
//
// Parameters:
// - id: A pointer to the identifier to be checked for alias status.
//
// Returns:
// - A boolean indicating whether the identifier corresponds to an alias in the set.
func (set *TaxonSet) IsAlias(id *string) bool {
taxon := set.Get(id)
return taxon != nil && taxon.Node.id != id
}
// IsATaxon checks if the given ID corresponds to a valid taxon node in the TaxonSet.
// It returns true if the node exists and its ID matches the provided ID; otherwise, it returns false.
// If the ID corresponds to an alias, it will return false.
//
// Parameters:
// - id: A pointer to the identifier of the taxon to check.
//
// Returns:
// - A boolean indicating whether the specified ID corresponds to a valid taxon node.
func (set *TaxonSet) IsATaxon(id *string) bool {
taxon := set.Get(id)
return taxon != nil && taxon.Node.id == id
}
// Contains checks if the TaxonSet contains a taxon node with the specified ID.
// It returns true if the node exists in the set; otherwise, it returns false.
// If the ID corresponds to an alias, it will return true if the alias exists.
//
// Parameters:
// - id: A pointer to the identifier of the taxon to check for presence in the set.
//
// Returns:
// - A boolean indicating whether the TaxonSet contains a taxon node with the specified ID.
func (set *TaxonSet) Contains(id *string) bool {
node := set.Get(id)
return node != nil
}
func (set *TaxonSet) Sort() *TaxonSlice {
if set == nil {
return nil
}
taxonomy := set.Taxonomy()
taxa := taxonomy.NewTaxonSlice(0, set.Len())
parent := make(map[*TaxNode]bool, set.Len())
pushed := true
for pushed {
pushed = false
for _, node := range set.set {
if !parent[node] && (parent[set.Get(node.parent).Node] ||
!set.Contains(node.parent) ||
node == taxonomy.Root().Node) {
pushed = true
taxa.slice = append(taxa.slice, node)
parent[node] = true
}
}
}
return taxa
}
func (taxo *TaxonSet) AsPhyloTree(root *TaxNode) (*obiphylo.PhyloNode, error) {
nodes := make(map[*string]*obiphylo.PhyloNode, taxo.Len())
tsi := taxo.Iterator()
log.Warnf("Coucou")
for tsi.Next() {
taxon := tsi.Get()
id := taxon.Node.Id()
node := obiphylo.NewPhyloNode()
rank := taxon.Rank()
node.Name = fmt.Sprintf("%s -%s@%s-", taxon.ScientificName(), *id, rank)
node.SetAttribute("rank", rank)
node.SetAttribute("parent", taxon.Parent().Node.Id())
nodes[id] = node
}
for id, node := range nodes {
if id == root.Id() {
continue
}
pid := node.GetAttribute("parent").(*string)
parent := nodes[pid]
if parent != nil {
parent.AddChild(node, 1)
} else {
return nil, fmt.Errorf("cannot find parent node for %s", *pid)
}
}
rid := root.Id()
return nodes[rid], nil
}