Patch a bug in the embl reader and adds some doc

Former-commit-id: 9b5f75fb14bcc3043da1647055279987a295d271
This commit is contained in:
2024-01-31 15:43:02 +01:00
parent f2f7b4574e
commit 23758b00f6
7 changed files with 105 additions and 4 deletions

View File

@ -36,6 +36,7 @@
- In `obimultiplex`, correct a bug leading to a miss-read of the ngsfilter file when tags where written in lower case.
- In `obitag`, correct a bug leading to the annotation by taxid 1 (root) all the sequences having a 100% match
with one the reference sequence.
- Correct a bug in the EMBL reader.
## November 16th, 2023. Release 4.1.0

View File

@ -133,7 +133,8 @@ func _ParseEmblFile(source string, input <-chan _FileChunk, out obiiter.IBioSequ
}
case strings.HasPrefix(line, " "):
parts := strings.SplitN(line[5:], " ", 7)
for i := 0; i < 6; i++ {
np := len(parts)
for i := 0; i < np; i++ {
seqBytes.WriteString(parts[i])
}
case line == "//":

View File

@ -762,3 +762,10 @@ func IBatchOver(data obiseq.BioSequenceSlice,
}
return newIter
}
// func IBatchOverClasses(data obiseq.BioSequenceSlice,
// classifier *obiseq.BioSequenceClassifier) IBioSequence {
// newIter := MakeIBioSequence()
// classMap := make(map[string]int)
// }

View File

@ -99,6 +99,11 @@ func (sequence *BioSequence) _revcmpMutation() *BioSequence {
return sequence
}
/**
* ReverseComplementWorker is a function that returns a SeqWorker which performs reverse complement operation on given BioSequence.
* @param inplace {bool}: If true, changes will be made to original sequence object else new sequence object will be created. Default value is false.
* @returns {SeqWorker} A function that accepts *BioSequence and returns its reversed-complement form.
*/
func ReverseComplementWorker(inplace bool) SeqWorker {
f := func(input *BioSequence) *BioSequence {
return input.ReverseComplement(inplace)

View File

@ -0,0 +1,64 @@
package obitax
import (
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
log "github.com/sirupsen/logrus"
)
// TaxonomyClassifier is a function that creates a new instance of the BioSequenceClassifier
// for taxonomic classification based on a given taxonomic rank, taxonomy, and abort flag.
//
// Parameters:
// - taxonomicRank: the taxonomic rank to classify the sequences at.
// - taxonomy: the taxonomy object used for classification.
// - abortOnMissing: a flag indicating whether to abort if a taxon is missing in the taxonomy.
//
// Return:
// - *obiseq.BioSequenceClassifier: the new instance of the BioSequenceClassifier.
func TaxonomyClassifier(taxonomicRank string,
taxonomy *Taxonomy,
abortOnMissing bool) *obiseq.BioSequenceClassifier {
code := func(sequence *obiseq.BioSequence) int {
taxid := sequence.Taxid()
taxon, err := taxonomy.Taxon(taxid)
if err == nil {
taxon = taxon.TaxonAtRank(taxonomicRank)
} else {
taxon = nil
}
if taxon == nil {
if abortOnMissing {
if err != nil {
log.Fatalf("Taxid %d not found in taxonomy", taxid)
} else {
log.Fatalf("Taxon at rank %s not found in taxonomy for taxid %d", taxonomicRank, taxid)
}
}
return 0
}
return taxon.Taxid()
}
value := func(k int) string {
taxon, _ := taxonomy.Taxon(k)
return taxon.ScientificName()
}
reset := func() {
}
clone := func() *obiseq.BioSequenceClassifier {
return TaxonomyClassifier(taxonomicRank, taxonomy, abortOnMissing)
}
c := obiseq.BioSequenceClassifier{
Code: code,
Value: value,
Reset: reset,
Clone: clone,
Type: "TaxonomyClassifier"}
return &c
}

View File

@ -6,6 +6,11 @@ import (
log "github.com/sirupsen/logrus"
)
// Path generates the lineage path from the current taxon up to the root.
//
// This method does not take parameters as it is called on a TaxNode receiver.
// It returns a pointer to a TaxonSlice containing the path and an error if
// the taxonomy needs reindexing.
func (taxon *TaxNode) Path() (*TaxonSlice, error) {
path := make(TaxonSlice, 0, 30)
@ -24,6 +29,18 @@ func (taxon *TaxNode) Path() (*TaxonSlice, error) {
return &path, nil
}
// TaxonAtRank traverses up the taxonomy tree starting from the current
// node until it finds a node that matches the specified rank.
//
// If a node with the given rank is not found in the path to the root,
// or if the taxonomy tree is not properly indexed (i.e., a node's parent
// is itself), the function will return nil. In case the taxonomy needs
// reindexing, the function will panic.
//
// rank: the taxonomic rank to search for (e.g., "species", "genus").
//
// Returns a pointer to a TaxNode representing the node at the
// specified rank, or nil if no such node exists in the path.
func (taxon *TaxNode) TaxonAtRank(rank string) *TaxNode {
for taxon.rank != rank && taxon != taxon.pparent {
taxon = taxon.pparent
@ -33,13 +50,20 @@ func (taxon *TaxNode) TaxonAtRank(rank string) *TaxNode {
}
}
if taxon == taxon.pparent && taxon.rank != rank {
if taxon == taxon.pparent && taxon.rank != rank {
taxon = nil
}
return taxon
}
// Species retrieves the TaxNode corresponding to the species rank.
//
// This method does not take any parameters. It is a convenience
// wrapper around the TaxonAtRank method, specifically retrieving
// the species-level taxonomic classification for the calling TaxNode.
//
// Returns a pointer to the TaxNode representing the species.
func (taxon *TaxNode) Species() *TaxNode {
return taxon.TaxonAtRank("species")
}
@ -52,8 +76,6 @@ func (taxon *TaxNode) Family() *TaxNode {
return taxon.TaxonAtRank("family")
}
// Returns a TaxonSet listing the requested taxon and all
// its ancestors in the taxonomy down to the root.
func (taxonomy *Taxonomy) Path(taxid int) (*TaxonSlice, error) {
taxon, err := taxonomy.Taxon(taxid)

View File

@ -0,0 +1 @@
package obilandmark