mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Correction on obiformat of bug leading to partial parsing and add godocs
Former-commit-id: b27105355f1a330eedf6eaa72c8ac94f06806c28
This commit is contained in:
@ -11,14 +11,24 @@ import (
|
||||
|
||||
type StatsOnValues map[string]int
|
||||
|
||||
// StatsOnSlotName returns the name of the slot that summarizes statistics of occurrence for a given attribute.
|
||||
//
|
||||
// Parameters:
|
||||
// - key: the attribute key (string)
|
||||
//
|
||||
// Return type:
|
||||
// - string
|
||||
func StatsOnSlotName(key string) string {
|
||||
return "merged_" + key
|
||||
}
|
||||
|
||||
/*
|
||||
Tests if the sequence has already a slot summarizing statistics
|
||||
of occurrence for a given attribute.
|
||||
*/
|
||||
// HasStatsOn tests if the sequence has already a slot summarizing statistics of occurrence for a given attribute.
|
||||
//
|
||||
// Parameters:
|
||||
// - key: the attribute key (string)
|
||||
//
|
||||
// Return type:
|
||||
// - bool
|
||||
func (sequence *BioSequence) HasStatsOn(key string) bool {
|
||||
if !sequence.HasAnnotation() {
|
||||
return false
|
||||
@ -31,7 +41,14 @@ func (sequence *BioSequence) HasStatsOn(key string) bool {
|
||||
return ok
|
||||
}
|
||||
|
||||
// A function that takes a BioSequence and a key and returns a StatsOnValues.
|
||||
// StatsOn returns the slot summarizing statistics of occurrence for a given attribute.
|
||||
//
|
||||
// Parameters:
|
||||
// - key: the attribute key (string) to be summarized
|
||||
// - na: the value to be used if the attribute is not present
|
||||
//
|
||||
// Return type:
|
||||
// - StatsOnValues
|
||||
func (sequence *BioSequence) StatsOn(key string, na string) StatsOnValues {
|
||||
mkey := StatsOnSlotName(key)
|
||||
annotations := sequence.Annotations()
|
||||
@ -77,7 +94,14 @@ func (sequence *BioSequence) StatsOn(key string, na string) StatsOnValues {
|
||||
return stats
|
||||
}
|
||||
|
||||
// Adding the count of the sequence to the count of the key in the stats.
|
||||
// StatsPlusOne adds the count of the sequence toAdd to the count of the key in the stats.
|
||||
//
|
||||
// Parameters:
|
||||
// - key: the attribute key (string) to be summarized
|
||||
// - toAdd: the BioSequence to add to the stats
|
||||
// - na: the value to be used if the attribute is not present
|
||||
// Return type:
|
||||
// - bool
|
||||
func (sequence *BioSequence) StatsPlusOne(key string, toAdd *BioSequence, na string) bool {
|
||||
sval := na
|
||||
annotations := sequence.Annotations()
|
||||
@ -109,10 +133,14 @@ func (sequence *BioSequence) StatsPlusOne(key string, toAdd *BioSequence, na str
|
||||
old = 0
|
||||
}
|
||||
stats[sval] = old + toAdd.Count()
|
||||
annotations[StatsOnSlotName(key)] = stats
|
||||
annotations[StatsOnSlotName(key)] = stats // TODO: check if this is necessary
|
||||
return retval
|
||||
}
|
||||
|
||||
// Merge merges the given StatsOnValues with the current StatsOnValues.
|
||||
//
|
||||
// It takes a parameter `toMerged` of type StatsOnValues, which represents the StatsOnValues to be merged.
|
||||
// It returns a value of type StatsOnValues, which represents the merged StatsOnValues.
|
||||
func (stats StatsOnValues) Merge(toMerged StatsOnValues) StatsOnValues {
|
||||
for k, val := range toMerged {
|
||||
old, ok := stats[k]
|
||||
@ -125,7 +153,16 @@ func (stats StatsOnValues) Merge(toMerged StatsOnValues) StatsOnValues {
|
||||
return stats
|
||||
}
|
||||
|
||||
// Merging two sequences.
|
||||
// Merge merges two sequences into a single sequence.
|
||||
//
|
||||
// Parameters:
|
||||
// - tomerge: the sequence to be merged (BioSequence)
|
||||
// - na: the value to be used if the attribute is not present (string)
|
||||
// - inplace: a boolean indicating whether to merge in place or not (bool)
|
||||
// - statsOn: a variadic string parameter representing the attributes to be summarized (string)
|
||||
//
|
||||
// Return type:
|
||||
// - *BioSequence: the merged sequence (BioSequence)
|
||||
func (sequence *BioSequence) Merge(tomerge *BioSequence, na string, inplace bool, statsOn ...string) *BioSequence {
|
||||
if !inplace {
|
||||
sequence = sequence.Copy()
|
||||
@ -184,17 +221,15 @@ func (sequence *BioSequence) Merge(tomerge *BioSequence, na string, inplace bool
|
||||
return sequence
|
||||
}
|
||||
|
||||
/*
|
||||
*
|
||||
|
||||
Merges a set of sequence into a single sequence.
|
||||
|
||||
The function assumes that every sequence in the batch is
|
||||
identical in term of sequence. Actually the function only
|
||||
aggregates the annotations of the different sequences to be merged
|
||||
|
||||
Quality information is lost during the merge procedure.
|
||||
*/
|
||||
// Merge merges the given sequences into a single sequence.
|
||||
//
|
||||
// Parameters:
|
||||
// - sequences: a slice of BioSequence objects to be merged (BioSequenceSlice)
|
||||
// - na: the value to be used if the attribute is not present (string)
|
||||
// - statsOn: a slice of strings representing the attributes to be summarized ([]string)
|
||||
//
|
||||
// Return type:
|
||||
// - *BioSequence: the merged sequence (BioSequence)
|
||||
func (sequences BioSequenceSlice) Merge(na string, statsOn []string) *BioSequence {
|
||||
seq := sequences[0]
|
||||
//sequences[0] = nil
|
||||
|
@ -12,10 +12,46 @@ type SeqAnnotator func(*BioSequence)
|
||||
type SeqWorker func(*BioSequence) (BioSequenceSlice, error)
|
||||
type SeqSliceWorker func(BioSequenceSlice) (BioSequenceSlice, error)
|
||||
|
||||
// NilSeqWorker returns a BioSequenceSlice containing the input sequence and a nil error value.
|
||||
// This function is typically used as a placeholder or default worker in SeqToSliceWorker when no specific worker is needed.
|
||||
//
|
||||
// Parameters:
|
||||
//
|
||||
// seq *BioSequence: A pointer to a BioSequence struct that needs processing.
|
||||
//
|
||||
// Returns:
|
||||
//
|
||||
// BioSequenceSlice, error: This function returns a slice containing the input sequence and an error value. If no error occurred during the operation, it will be nil; otherwise, it will contain details about the error.
|
||||
func NilSeqWorker(seq *BioSequence) (BioSequenceSlice, error) {
|
||||
return BioSequenceSlice{seq}, nil
|
||||
}
|
||||
|
||||
// AnnotatorToSeqWorker is a higher-order function that takes a SeqAnnotator
|
||||
// function and returns a SeqWorker function. It is used to wrap a sequence
|
||||
// annotation function and convert it into a worker function that can be used
|
||||
// in a pipeline or workflow for processing biological sequences.
|
||||
//
|
||||
// Parameters:
|
||||
//
|
||||
// function SeqAnnotator: A function that takes a pointer to a BioSequence
|
||||
// struct and performs some annotation or processing on the sequence data.
|
||||
// The SeqAnnotator type is expected to be a function with the following
|
||||
// signature:
|
||||
// func(seq *BioSequence)
|
||||
// This function should modify the input BioSequence struct in-place by adding
|
||||
// annotations, metadata, or performing any other desired operations.
|
||||
//
|
||||
// Returns:
|
||||
//
|
||||
// SeqWorker: A function that takes a pointer to a BioSequence struct and
|
||||
// returns a BioSequenceSlice containing the input BioSequence, along with
|
||||
// an error value. The SeqWorker type is expected to be a function with the
|
||||
// following signature:
|
||||
// func(seq *BioSequence) (BioSequenceSlice, error)
|
||||
// The returned SeqWorker function wraps the provided SeqAnnotator function
|
||||
// and applies it to the input BioSequence before returning the modified
|
||||
// BioSequence in a BioSequenceSlice. The error value is always nil, as the
|
||||
// function does not perform any operations that could potentially fail.
|
||||
func AnnotatorToSeqWorker(function SeqAnnotator) SeqWorker {
|
||||
f := func(seq *BioSequence) (BioSequenceSlice, error) {
|
||||
function(seq)
|
||||
@ -24,6 +60,35 @@ func AnnotatorToSeqWorker(function SeqAnnotator) SeqWorker {
|
||||
return f
|
||||
}
|
||||
|
||||
// SeqToSliceWorker is a higher-order function that takes a SeqWorker and a
|
||||
// boolean value indicating whether to break on error and returns a SeqSliceWorker.
|
||||
// It can be used in a pipeline or workflow for processing biological sequences,
|
||||
// applying the provided worker to each element of a BioSequenceSlice and returning
|
||||
// a new slice.
|
||||
//
|
||||
// Parameters:
|
||||
//
|
||||
// worker SeqWorker: A function that takes a pointer to a BioSequence struct and
|
||||
// performs some processing on it.
|
||||
// The signature for this function is func(seq *BioSequence) (BioSequenceSlice, error).
|
||||
// This function should return a modified BioSequence in a BioSequenceSlice along with
|
||||
// an error value indicating whether the operation was successful or not.
|
||||
// breakOnError bool: A boolean flag that determines whether to stop processing further
|
||||
// elements in case of an error. If set to true and an error is encountered while
|
||||
// processing any element, it stops processing remaining elements and returns the processed
|
||||
// slice so far along with the encountered error. If set to false, it logs the error and
|
||||
// continues processing remaining elements.
|
||||
//
|
||||
// Returns:
|
||||
//
|
||||
// SeqSliceWorker: A function that takes a BioSequenceSlice (a slice of pointers to
|
||||
// BioSequence structs) as input and returns a processed BioSequenceSlice along with
|
||||
// an error value indicating whether the operation was successful or not.
|
||||
// The signature for this function is func(input BioSequenceSlice) (BioSequenceSlice, error).
|
||||
// If breakOnError is set to true and any element processing results in an error, it stops
|
||||
// further processing and returns the processed slice so far along with the encountered error.
|
||||
// Otherwise, it processes all elements and returns them as a single BioSequenceSlice along with
|
||||
// a nil error value.
|
||||
func SeqToSliceWorker(worker SeqWorker,
|
||||
breakOnError bool) SeqSliceWorker {
|
||||
var f SeqSliceWorker
|
||||
@ -68,6 +133,18 @@ func SeqToSliceWorker(worker SeqWorker,
|
||||
return f
|
||||
}
|
||||
|
||||
// SeqToSliceConditionalWorker creates a new SeqSliceWorker that processes each sequence in a slice based on a condition. It takes a SequencePredicate and a worker function as arguments. The worker function is only applied to sequences that satisfy the condition.
|
||||
// If `condition` is nil, this function just behaves like SeqToSliceWorker with the provided `worker`.
|
||||
// If `breakOnError` is true, the pipeline will stop and return an error if any sequence processing fails. Otherwise, it will log a warning message for each failed sequence.
|
||||
//
|
||||
// Parameters:
|
||||
// - condition SequencePredicate: A predicate function that determines which sequences should be processed by the worker.
|
||||
// - worker SeqWorker: The worker to be applied to the sequences that satisfy the condition.
|
||||
// - breakOnError bool: If true, the pipeline will stop and return an error if any sequence processing fails. Otherwise, it will log a warning message for each failed sequence.
|
||||
//
|
||||
// Returns:
|
||||
//
|
||||
// SeqSliceWorker: A new SeqSliceWorker that processes sequences based on a condition. This function returns a single SeqSliceWorker that can be used to process BioSequences in a workflow or pipeline.
|
||||
func SeqToSliceConditionalWorker(
|
||||
condition SequencePredicate,
|
||||
worker SeqWorker, breakOnError bool) SeqSliceWorker {
|
||||
@ -112,6 +189,17 @@ func SeqToSliceConditionalWorker(
|
||||
return f
|
||||
}
|
||||
|
||||
// ChainWorkers chains two workers together and returns a new SeqWorker. It takes an existing worker (`worker`) and a next worker as arguments, combines them into a pipeline and applies it to each BioSequence in the sequence slice.
|
||||
// If `next` is nil, this function just returns the input worker.
|
||||
// If `worker` is nil, this function just returns the next worker.
|
||||
//
|
||||
// Parameters:
|
||||
// - worker SeqWorker: The initial worker to be chained. This worker will be executed first on each sequence.
|
||||
// - next SeqWorker: The next worker in the pipeline. This worker will be applied to the output of `worker` for each sequence.
|
||||
//
|
||||
// Returns:
|
||||
//
|
||||
// SeqWorker: A new SeqWorker that chains the input workers together into a pipeline. This function returns a single SeqWorker that can be used to process BioSequences in a workflow or pipeline.
|
||||
func (worker SeqWorker) ChainWorkers(next SeqWorker) SeqWorker {
|
||||
if worker == nil {
|
||||
return next
|
||||
|
Reference in New Issue
Block a user