mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Patch a bug in the embl reader and adds some doc
Former-commit-id: 9b5f75fb14bcc3043da1647055279987a295d271
This commit is contained in:
@ -36,6 +36,7 @@
|
||||
- In `obimultiplex`, correct a bug leading to a miss-read of the ngsfilter file when tags where written in lower case.
|
||||
- In `obitag`, correct a bug leading to the annotation by taxid 1 (root) all the sequences having a 100% match
|
||||
with one the reference sequence.
|
||||
- Correct a bug in the EMBL reader.
|
||||
|
||||
## November 16th, 2023. Release 4.1.0
|
||||
|
||||
|
@ -133,7 +133,8 @@ func _ParseEmblFile(source string, input <-chan _FileChunk, out obiiter.IBioSequ
|
||||
}
|
||||
case strings.HasPrefix(line, " "):
|
||||
parts := strings.SplitN(line[5:], " ", 7)
|
||||
for i := 0; i < 6; i++ {
|
||||
np := len(parts)
|
||||
for i := 0; i < np; i++ {
|
||||
seqBytes.WriteString(parts[i])
|
||||
}
|
||||
case line == "//":
|
||||
|
@ -762,3 +762,10 @@ func IBatchOver(data obiseq.BioSequenceSlice,
|
||||
}
|
||||
return newIter
|
||||
}
|
||||
|
||||
// func IBatchOverClasses(data obiseq.BioSequenceSlice,
|
||||
// classifier *obiseq.BioSequenceClassifier) IBioSequence {
|
||||
|
||||
// newIter := MakeIBioSequence()
|
||||
// classMap := make(map[string]int)
|
||||
// }
|
||||
|
@ -99,6 +99,11 @@ func (sequence *BioSequence) _revcmpMutation() *BioSequence {
|
||||
return sequence
|
||||
}
|
||||
|
||||
/**
|
||||
* ReverseComplementWorker is a function that returns a SeqWorker which performs reverse complement operation on given BioSequence.
|
||||
* @param inplace {bool}: If true, changes will be made to original sequence object else new sequence object will be created. Default value is false.
|
||||
* @returns {SeqWorker} A function that accepts *BioSequence and returns its reversed-complement form.
|
||||
*/
|
||||
func ReverseComplementWorker(inplace bool) SeqWorker {
|
||||
f := func(input *BioSequence) *BioSequence {
|
||||
return input.ReverseComplement(inplace)
|
||||
|
64
pkg/obitax/bioseq_classifier.go
Normal file
64
pkg/obitax/bioseq_classifier.go
Normal file
@ -0,0 +1,64 @@
|
||||
package obitax
|
||||
|
||||
import (
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// TaxonomyClassifier is a function that creates a new instance of the BioSequenceClassifier
|
||||
// for taxonomic classification based on a given taxonomic rank, taxonomy, and abort flag.
|
||||
//
|
||||
// Parameters:
|
||||
// - taxonomicRank: the taxonomic rank to classify the sequences at.
|
||||
// - taxonomy: the taxonomy object used for classification.
|
||||
// - abortOnMissing: a flag indicating whether to abort if a taxon is missing in the taxonomy.
|
||||
//
|
||||
// Return:
|
||||
// - *obiseq.BioSequenceClassifier: the new instance of the BioSequenceClassifier.
|
||||
func TaxonomyClassifier(taxonomicRank string,
|
||||
taxonomy *Taxonomy,
|
||||
abortOnMissing bool) *obiseq.BioSequenceClassifier {
|
||||
|
||||
code := func(sequence *obiseq.BioSequence) int {
|
||||
taxid := sequence.Taxid()
|
||||
taxon, err := taxonomy.Taxon(taxid)
|
||||
if err == nil {
|
||||
taxon = taxon.TaxonAtRank(taxonomicRank)
|
||||
} else {
|
||||
taxon = nil
|
||||
}
|
||||
if taxon == nil {
|
||||
if abortOnMissing {
|
||||
if err != nil {
|
||||
log.Fatalf("Taxid %d not found in taxonomy", taxid)
|
||||
} else {
|
||||
log.Fatalf("Taxon at rank %s not found in taxonomy for taxid %d", taxonomicRank, taxid)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
return taxon.Taxid()
|
||||
}
|
||||
|
||||
value := func(k int) string {
|
||||
taxon, _ := taxonomy.Taxon(k)
|
||||
return taxon.ScientificName()
|
||||
}
|
||||
|
||||
reset := func() {
|
||||
}
|
||||
|
||||
clone := func() *obiseq.BioSequenceClassifier {
|
||||
return TaxonomyClassifier(taxonomicRank, taxonomy, abortOnMissing)
|
||||
}
|
||||
|
||||
c := obiseq.BioSequenceClassifier{
|
||||
Code: code,
|
||||
Value: value,
|
||||
Reset: reset,
|
||||
Clone: clone,
|
||||
Type: "TaxonomyClassifier"}
|
||||
return &c
|
||||
}
|
@ -6,6 +6,11 @@ import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// Path generates the lineage path from the current taxon up to the root.
|
||||
//
|
||||
// This method does not take parameters as it is called on a TaxNode receiver.
|
||||
// It returns a pointer to a TaxonSlice containing the path and an error if
|
||||
// the taxonomy needs reindexing.
|
||||
func (taxon *TaxNode) Path() (*TaxonSlice, error) {
|
||||
|
||||
path := make(TaxonSlice, 0, 30)
|
||||
@ -24,6 +29,18 @@ func (taxon *TaxNode) Path() (*TaxonSlice, error) {
|
||||
return &path, nil
|
||||
}
|
||||
|
||||
// TaxonAtRank traverses up the taxonomy tree starting from the current
|
||||
// node until it finds a node that matches the specified rank.
|
||||
//
|
||||
// If a node with the given rank is not found in the path to the root,
|
||||
// or if the taxonomy tree is not properly indexed (i.e., a node's parent
|
||||
// is itself), the function will return nil. In case the taxonomy needs
|
||||
// reindexing, the function will panic.
|
||||
//
|
||||
// rank: the taxonomic rank to search for (e.g., "species", "genus").
|
||||
//
|
||||
// Returns a pointer to a TaxNode representing the node at the
|
||||
// specified rank, or nil if no such node exists in the path.
|
||||
func (taxon *TaxNode) TaxonAtRank(rank string) *TaxNode {
|
||||
for taxon.rank != rank && taxon != taxon.pparent {
|
||||
taxon = taxon.pparent
|
||||
@ -33,13 +50,20 @@ func (taxon *TaxNode) TaxonAtRank(rank string) *TaxNode {
|
||||
}
|
||||
}
|
||||
|
||||
if taxon == taxon.pparent && taxon.rank != rank {
|
||||
if taxon == taxon.pparent && taxon.rank != rank {
|
||||
taxon = nil
|
||||
}
|
||||
|
||||
return taxon
|
||||
}
|
||||
|
||||
// Species retrieves the TaxNode corresponding to the species rank.
|
||||
//
|
||||
// This method does not take any parameters. It is a convenience
|
||||
// wrapper around the TaxonAtRank method, specifically retrieving
|
||||
// the species-level taxonomic classification for the calling TaxNode.
|
||||
//
|
||||
// Returns a pointer to the TaxNode representing the species.
|
||||
func (taxon *TaxNode) Species() *TaxNode {
|
||||
return taxon.TaxonAtRank("species")
|
||||
}
|
||||
@ -52,8 +76,6 @@ func (taxon *TaxNode) Family() *TaxNode {
|
||||
return taxon.TaxonAtRank("family")
|
||||
}
|
||||
|
||||
// Returns a TaxonSet listing the requested taxon and all
|
||||
// its ancestors in the taxonomy down to the root.
|
||||
func (taxonomy *Taxonomy) Path(taxid int) (*TaxonSlice, error) {
|
||||
taxon, err := taxonomy.Taxon(taxid)
|
||||
|
||||
|
1
pkg/obitools/obilandmark/taxostat.go
Normal file
1
pkg/obitools/obilandmark/taxostat.go
Normal file
@ -0,0 +1 @@
|
||||
package obilandmark
|
Reference in New Issue
Block a user