mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Change the way sequence definition are managed. They are now when present stored as an attribute
Former-commit-id: 6e618377c05b42937d2eace3c9668390980ab68c
This commit is contained in:
@ -298,6 +298,24 @@ func (pattern ApatPattern) FindAllIndex(sequence ApatSequence, begin, length int
|
||||
return loc
|
||||
}
|
||||
|
||||
// BestMatch finds the best match of a given pattern in a sequence.
|
||||
//
|
||||
// THe function identify the first occurrence of the pattern in the sequence.
|
||||
// The search can be limited to a portion of the sequence using the begin and
|
||||
// length parameters to find the next occurrences.
|
||||
//
|
||||
// The BestMatch methood ins
|
||||
// It takes the following parameters:
|
||||
// - pattern: the pattern to search for (ApatPattern).
|
||||
// - sequence: the sequence to search in (ApatSequence).
|
||||
// - begin: the starting index of the search (int).
|
||||
// - length: the length of the search (int).
|
||||
//
|
||||
// It returns the following values:
|
||||
// - start: the starting index of the best match (int).
|
||||
// - end: the ending index of the best match (int).
|
||||
// - nerr: the number of errors in the best match (int).
|
||||
// - matched: a boolean indicating whether a match was found (bool).
|
||||
func (pattern ApatPattern) BestMatch(sequence ApatSequence, begin, length int) (start int, end int, nerr int, matched bool) {
|
||||
res := pattern.FindAllIndex(sequence, begin, length)
|
||||
|
||||
|
9
pkg/obiformats/empty_file.go
Normal file
9
pkg/obiformats/empty_file.go
Normal file
@ -0,0 +1,9 @@
|
||||
package obiformats
|
||||
|
||||
import "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||
|
||||
func ReadEmptyFile(options ...WithOption) (obiiter.IBioSequence, error) {
|
||||
out := obiiter.MakeIBioSequence()
|
||||
out.Close()
|
||||
return out, nil
|
||||
}
|
@ -294,6 +294,11 @@ func ReadFastaFromFile(filename string, options ...WithOption) (obiiter.IBioSequ
|
||||
|
||||
file, err := Ropen(filename)
|
||||
|
||||
if err == ErrNoContent {
|
||||
log.Infof("file %s is empty", filename)
|
||||
return ReadEmptyFile(options...)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return obiiter.NilIBioSequence, err
|
||||
}
|
||||
@ -305,6 +310,11 @@ func ReadFastaFromStdin(reader io.Reader, options ...WithOption) (obiiter.IBioSe
|
||||
options = append(options, OptionsSource(obiutils.RemoveAllExt("stdin")))
|
||||
input, err := Buf(os.Stdin)
|
||||
|
||||
if err == ErrNoContent {
|
||||
log.Infof("stdin is empty")
|
||||
return ReadEmptyFile(options...)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("open file error: %v", err)
|
||||
return obiiter.NilIBioSequence, err
|
||||
|
@ -339,6 +339,11 @@ func ReadFastqFromFile(filename string, options ...WithOption) (obiiter.IBioSequ
|
||||
|
||||
file, err := Ropen(filename)
|
||||
|
||||
if err == ErrNoContent {
|
||||
log.Infof("file %s is empty", filename)
|
||||
return ReadEmptyFile(options...)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return obiiter.NilIBioSequence, err
|
||||
}
|
||||
@ -350,6 +355,11 @@ func ReadFastqFromStdin(reader io.Reader, options ...WithOption) (obiiter.IBioSe
|
||||
options = append(options, OptionsSource(obiutils.RemoveAllExt("stdin")))
|
||||
input, err := Buf(os.Stdin)
|
||||
|
||||
if err == ErrNoContent {
|
||||
log.Infof("stdin is empty")
|
||||
return ReadEmptyFile(options...)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("open file error: %v", err)
|
||||
return obiiter.NilIBioSequence, err
|
||||
|
@ -59,9 +59,13 @@ func _parseMainNGSFilterTags(text string) obingslibrary.TagPair {
|
||||
}
|
||||
}
|
||||
|
||||
func _parseMainNGSFilter(text string) (obingslibrary.PrimerPair, obingslibrary.TagPair, string, string, bool) {
|
||||
func _parseMainNGSFilter(text string) (obingslibrary.PrimerPair, obingslibrary.TagPair, string, string, bool, bool) {
|
||||
fields := strings.Fields(text)
|
||||
|
||||
if len(fields) < 6 {
|
||||
return obingslibrary.PrimerPair{}, obingslibrary.TagPair{}, "", "", false, false
|
||||
}
|
||||
|
||||
tags := _parseMainNGSFilterTags(fields[2])
|
||||
partial := fields[5] == "T" || fields[5] == "t"
|
||||
|
||||
@ -72,7 +76,8 @@ func _parseMainNGSFilter(text string) (obingslibrary.PrimerPair, obingslibrary.T
|
||||
tags,
|
||||
fields[0],
|
||||
fields[1],
|
||||
partial
|
||||
partial,
|
||||
true
|
||||
}
|
||||
|
||||
func ReadNGSFilter(reader io.Reader) (obingslibrary.NGSLibrary, error) {
|
||||
@ -89,7 +94,15 @@ func ReadNGSFilter(reader io.Reader) (obingslibrary.NGSLibrary, error) {
|
||||
|
||||
split := strings.SplitN(line, "@", 2)
|
||||
|
||||
primers, tags, experiment, sample, partial := _parseMainNGSFilter(split[0])
|
||||
if len(split) < 1 {
|
||||
return nil, fmt.Errorf("line %d : invalid format", i+1)
|
||||
}
|
||||
|
||||
primers, tags, experiment, sample, partial, ok := _parseMainNGSFilter(split[0])
|
||||
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("line %d : invalid format", i+1)
|
||||
}
|
||||
|
||||
marker, _ := ngsfilter.GetMarker(primers.Forward, primers.Reverse)
|
||||
pcr, ok := marker.GetPCR(tags.Forward, tags.Reverse)
|
||||
|
@ -104,6 +104,16 @@ func (marker *Marker) Compile(forward, reverse string, maxError int, allowsIndel
|
||||
return nil
|
||||
}
|
||||
|
||||
// Match finds the best matching demultiplex for a given sequence.
|
||||
//
|
||||
// Parameters:
|
||||
//
|
||||
// marker - a pointer to a Marker struct that contains the forward and reverse primers.
|
||||
// sequence - a pointer to a BioSequence struct that represents the input sequence.
|
||||
//
|
||||
// Returns:
|
||||
//
|
||||
// A pointer to a DemultiplexMatch struct that contains the best matching demultiplex.
|
||||
func (marker *Marker) Match(sequence *obiseq.BioSequence) *DemultiplexMatch {
|
||||
aseq, _ := obiapat.MakeApatSequence(sequence, false)
|
||||
|
||||
@ -223,6 +233,15 @@ func (marker *Marker) Match(sequence *obiseq.BioSequence) *DemultiplexMatch {
|
||||
return nil
|
||||
}
|
||||
|
||||
// ExtractBarcode extracts the barcode from the given biosequence.
|
||||
//
|
||||
// Parameters:
|
||||
// - sequence: The biosequence from which to extract the barcode.
|
||||
// - inplace: A boolean indicating whether the barcode should be extracted in-place or not.
|
||||
//
|
||||
// Returns:
|
||||
// - The biosequence with the extracted barcode.
|
||||
// - An error indicating any issues encountered during the extraction process.
|
||||
func (match *DemultiplexMatch) ExtractBarcode(sequence *obiseq.BioSequence, inplace bool) (*obiseq.BioSequence, error) {
|
||||
if !inplace {
|
||||
sequence = sequence.Copy()
|
||||
|
@ -55,8 +55,8 @@ type Annotation map[string]interface{}
|
||||
// A BioSequence is a sequence of bytes with an identifier, a definition, a sequence, qualities,
|
||||
// features and annotations. It aims to represent a biological sequence
|
||||
type BioSequence struct {
|
||||
id string // The identidier of the sequence (private accessible through the method Id)
|
||||
definition string // The documentation of the sequence (private accessible through the method Definition)
|
||||
id string // The identidier of the sequence (private accessible through the method Id)
|
||||
//definition string // The documentation of the sequence (private accessible through the method Definition)
|
||||
source string // The filename without directory name and extension from where the sequence was read.
|
||||
sequence []byte // The sequence itself, it is accessible by the methode Sequence
|
||||
qualities []byte // The quality scores of the sequence.
|
||||
@ -80,8 +80,8 @@ func NewEmptyBioSequence(preallocate int) *BioSequence {
|
||||
}
|
||||
|
||||
return &BioSequence{
|
||||
id: "",
|
||||
definition: "",
|
||||
id: "",
|
||||
//definition: "",
|
||||
source: "",
|
||||
sequence: seq,
|
||||
qualities: nil,
|
||||
@ -148,7 +148,7 @@ func (s *BioSequence) Copy() *BioSequence {
|
||||
newSeq := NewEmptyBioSequence(0)
|
||||
|
||||
newSeq.id = s.id
|
||||
newSeq.definition = s.definition
|
||||
//newSeq.definition = s.definition
|
||||
|
||||
newSeq.sequence = CopySlice(s.sequence)
|
||||
newSeq.qualities = CopySlice(s.qualities)
|
||||
@ -176,7 +176,16 @@ func (s *BioSequence) Id() string {
|
||||
// No parameters.
|
||||
// Returns a string.
|
||||
func (s *BioSequence) Definition() string {
|
||||
return s.definition
|
||||
definition := ""
|
||||
var err error
|
||||
def, ok := s.GetAttribute("definition")
|
||||
if ok {
|
||||
definition, err = obiutils.InterfaceToString(def)
|
||||
if err != nil {
|
||||
definition = ""
|
||||
}
|
||||
}
|
||||
return definition
|
||||
}
|
||||
|
||||
// Sequence returns the sequence of the BioSequence.
|
||||
@ -315,7 +324,7 @@ func (s *BioSequence) SetId(id string) {
|
||||
//
|
||||
// It takes a string parameter 'definition' and assigns it to the 'definition' field of the BioSequence struct.
|
||||
func (s *BioSequence) SetDefinition(definition string) {
|
||||
s.definition = definition
|
||||
s.SetAttribute("definition", definition)
|
||||
}
|
||||
|
||||
// SetSource sets the source of the BioSequence.
|
||||
|
@ -143,11 +143,10 @@ func TestBioSequence_Recycle(t *testing.T) {
|
||||
// Returns: None.
|
||||
func TestCopy(t *testing.T) {
|
||||
seq := &BioSequence{
|
||||
id: "test",
|
||||
definition: "test sequence",
|
||||
sequence: []byte("ATCG"),
|
||||
qualities: []byte("1234"),
|
||||
feature: []byte("feature1...feature2"),
|
||||
id: "test",
|
||||
sequence: []byte("ATCG"),
|
||||
qualities: []byte("1234"),
|
||||
feature: []byte("feature1...feature2"),
|
||||
annotations: Annotation{
|
||||
"annotation1": "value1",
|
||||
"annotation2": "value2",
|
||||
@ -161,10 +160,6 @@ func TestCopy(t *testing.T) {
|
||||
if newSeq.id != seq.id {
|
||||
t.Errorf("Expected id to be %v, got %v", seq.id, newSeq.id)
|
||||
}
|
||||
if newSeq.definition != seq.definition {
|
||||
t.Errorf("Expected definition to be %v, got %v", seq.definition, newSeq.definition)
|
||||
}
|
||||
|
||||
// Test if the sequence, qualities, and feature fields are copied correctly
|
||||
if !reflect.DeepEqual(newSeq.sequence, seq.sequence) {
|
||||
t.Errorf("Expected sequence to be %v, got %v", seq.sequence, newSeq.sequence)
|
||||
|
@ -32,7 +32,7 @@ func (sequence *BioSequence) Subsequence(from, to int, circular bool) (*BioSeque
|
||||
}
|
||||
|
||||
newSeq.id = fmt.Sprintf("%s_sub[%d..%d]", sequence.Id(), from+1, to)
|
||||
newSeq.definition = sequence.definition
|
||||
// newSeq.definition = sequence.definition
|
||||
} else {
|
||||
newSeq, _ = sequence.Subsequence(from, sequence.Len(), false)
|
||||
newSeq.Write(sequence.Sequence()[0:to])
|
||||
|
@ -1,5 +1,17 @@
|
||||
package obiutils
|
||||
|
||||
import "fmt"
|
||||
|
||||
// InterfaceToString converts an interface value to a string.
|
||||
//
|
||||
// The function takes an interface{} value as a parameter and returns a string representation of that value.
|
||||
// It returns the string representation and an error if any occurred during the conversion process.
|
||||
func InterfaceToString(i interface{}) (val string, err error) {
|
||||
err = nil
|
||||
val = fmt.Sprintf("%v", i)
|
||||
return
|
||||
}
|
||||
|
||||
// CastableToInt checks if the given input can be casted to an integer.
|
||||
//
|
||||
// i: the value to check for castability.
|
||||
|
@ -4,7 +4,6 @@ import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"reflect"
|
||||
@ -13,15 +12,6 @@ import (
|
||||
"github.com/barkimedes/go-deepcopy"
|
||||
)
|
||||
|
||||
// InterfaceToInt converts a interface{} to an integer value if possible.
|
||||
// If not a "NotAnInteger" error is returned via the err
|
||||
// return value and val is set to 0.
|
||||
func InterfaceToString(i interface{}) (val string, err error) {
|
||||
err = nil
|
||||
val = fmt.Sprintf("%v", i)
|
||||
return
|
||||
}
|
||||
|
||||
// NotAnInteger defines a new type of Error : "NotAnInteger"
|
||||
type NotAnInteger struct {
|
||||
message string
|
||||
|
Reference in New Issue
Block a user