Files
obitools4/pkg/obiseq/biosequenceslice.go
Eric Coissac 6cb7a5a352 Changes to be committed:
modified:   cmd/obitools/obitag/main.go
	modified:   cmd/obitools/obitaxonomy/main.go
	modified:   pkg/obiformats/csvtaxdump_read.go
	modified:   pkg/obiformats/ecopcr_read.go
	modified:   pkg/obiformats/ncbitaxdump_read.go
	modified:   pkg/obiformats/ncbitaxdump_readtar.go
	modified:   pkg/obiformats/newick_write.go
	modified:   pkg/obiformats/options.go
	modified:   pkg/obiformats/taxonomy_read.go
	modified:   pkg/obiformats/universal_read.go
	modified:   pkg/obiiter/extract_taxonomy.go
	modified:   pkg/obioptions/options.go
	modified:   pkg/obioptions/version.go
	new file:   pkg/obiphylo/tree.go
	modified:   pkg/obiseq/biosequenceslice.go
	modified:   pkg/obiseq/taxonomy_methods.go
	modified:   pkg/obitax/taxonomy.go
	modified:   pkg/obitax/taxonset.go
	modified:   pkg/obitools/obiconvert/sequence_reader.go
	modified:   pkg/obitools/obitag/obitag.go
	modified:   pkg/obitools/obitaxonomy/obitaxonomy.go
	modified:   pkg/obitools/obitaxonomy/options.go
	deleted:    sample/.DS_Store
2025-06-04 09:48:10 +02:00

217 lines
5.2 KiB
Go

package obiseq
import (
"errors"
"fmt"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obilog"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
log "github.com/sirupsen/logrus"
"golang.org/x/exp/slices"
)
// BioSequenceSlice represents a collection or a set of BioSequence.
//
// BioSequenceSlice is used to define BioSequenceBatch
// a memory pool of BioSequenceSlice is managed to limit allocations.
type BioSequenceSlice []*BioSequence
// NewBioSequenceSlice returns a new BioSequenceSlice with the specified size.
//
// The size parameter is optional. If provided, the returned slice will be
// resized accordingly.
//
// Returns a pointer to the newly created BioSequenceSlice.
func NewBioSequenceSlice(size ...int) *BioSequenceSlice {
capacity := 0
if len(size) > 0 {
capacity = size[0]
}
slice := make(BioSequenceSlice, capacity)
return &slice
}
// MakeBioSequenceSlice creates a new BioSequenceSlice with the specified size(s).
//
// Parameters:
// - size: The size(s) of the BioSequenceSlice to create (optional).
//
// Return:
// A new BioSequenceSlice with the specified size(s).
func MakeBioSequenceSlice(size ...int) BioSequenceSlice {
return *NewBioSequenceSlice(size...)
}
// EnsureCapacity ensures that the BioSequenceSlice has a minimum capacity
//
// It takes an integer `capacity` as a parameter, which represents the desired minimum capacity of the BioSequenceSlice.
// It returns a pointer to the BioSequenceSlice.
func (s *BioSequenceSlice) EnsureCapacity(capacity int) *BioSequenceSlice {
var c int
if s != nil {
c = cap(*s)
} else {
c = 0
}
n := 0
for capacity > c {
old_c := c
*s = slices.Grow(*s, capacity)
c = cap(*s)
if c < capacity {
n++
if n < 4 {
obilog.Warnf("cannot allocate a Biosequence Slice of size %d (only %d from %d)", capacity, c, old_c)
} else {
log.Panicf("cannot allocate a Biosequence Slice of size %d (only %d from %d)", capacity, c, old_c)
}
}
}
return s
}
// Push appends a BioSequence to the BioSequenceSlice.
//
// It takes a pointer to a BioSequenceSlice and a BioSequence as parameters.
// It does not return anything.
func (s *BioSequenceSlice) Push(sequence *BioSequence) {
*s = append(*s, sequence)
}
// Pop returns and removes the last element from the BioSequenceSlice.
//
// It does not take any parameters.
// It returns *BioSequence, the last element of the slice.
func (s *BioSequenceSlice) Pop() *BioSequence {
// Get the length of the slice
length := len(*s)
// If the slice is empty, return nil
if length == 0 {
return nil
}
// Get the last element of the slice
lastElement := (*s)[length-1]
// Set the last element to nil
(*s)[length-1] = nil
// Remove the last element from the slice
*s = (*s)[:length-1]
// Return the last element
return lastElement
}
// Pop0 returns and removes the first element of the BioSequenceSlice.
//
// It does not take any parameters.
// It returns a pointer to a BioSequence object.
func (s *BioSequenceSlice) Pop0() *BioSequence {
if len(*s) == 0 {
return nil
}
firstElement := (*s)[0]
(*s)[0] = nil
*s = (*s)[1:]
return firstElement
}
// NotEmpty checks if the BioSequenceSlice is not empty.
//
// No parameters.
// Returns a boolean value indicating if the BioSequenceSlice is not empty.
func (s BioSequenceSlice) NotEmpty() bool {
return len(s) > 0
}
// Len returns the length of the BioSequenceSlice.
//
// It has no parameters.
// It returns an integer.
func (s BioSequenceSlice) Len() int {
return len(s)
}
// Size returns the total size of the BioSequenceSlice.
//
// It calculates the size by iterating over each BioSequence in the slice
// and summing up their lengths.
//
// Returns an integer representing the total size of the BioSequenceSlice.
func (s BioSequenceSlice) Size() int {
size := 0
for _, s := range s {
size += s.Len()
}
return size
}
func (s BioSequenceSlice) AttributeKeys(skip_map, skip_definition bool) obiutils.Set[string] {
keys := obiutils.MakeSet[string]()
for _, k := range s {
keys = keys.Union(k.AttributeKeys(skip_map, skip_definition))
}
return keys
}
func (s *BioSequenceSlice) SortOnCount(reverse bool) {
slices.SortFunc(*s, func(a, b *BioSequence) int {
if reverse {
return b.Count() - a.Count()
} else {
return a.Count() - b.Count()
}
})
}
func (s *BioSequenceSlice) SortOnLength(reverse bool) {
slices.SortFunc(*s, func(a, b *BioSequence) int {
if reverse {
return b.Len() - a.Len()
} else {
return a.Len() - b.Len()
}
})
}
func (s *BioSequenceSlice) ExtractTaxonomy(taxonomy *obitax.Taxonomy, seqAsTaxa bool) (*obitax.Taxonomy, error) {
var err error
for _, s := range *s {
path := s.Path()
if seqAsTaxa {
if len(path) == 0 {
return nil, fmt.Errorf("sequence %v has no path", s.Id())
}
last := path[len(path)-1]
taxname, _ := obiutils.SplitInTwo(last, ':')
if idx, ok := s.GetIntAttribute("seq_number"); !ok {
return nil, errors.New("sequences are not numbered")
} else {
path = append(path, fmt.Sprintf("%s:SEQ%010d [%s]@sequence", taxname, idx, s.Id()))
}
}
taxonomy, err = taxonomy.InsertPathString(path)
if err != nil {
return nil, err
}
}
return taxonomy, nil
}