mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Change the way sequence definition are managed. They are now when present stored as an attribute
Former-commit-id: 6e618377c05b42937d2eace3c9668390980ab68c
This commit is contained in:
@ -298,6 +298,24 @@ func (pattern ApatPattern) FindAllIndex(sequence ApatSequence, begin, length int
|
|||||||
return loc
|
return loc
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// BestMatch finds the best match of a given pattern in a sequence.
|
||||||
|
//
|
||||||
|
// THe function identify the first occurrence of the pattern in the sequence.
|
||||||
|
// The search can be limited to a portion of the sequence using the begin and
|
||||||
|
// length parameters to find the next occurrences.
|
||||||
|
//
|
||||||
|
// The BestMatch methood ins
|
||||||
|
// It takes the following parameters:
|
||||||
|
// - pattern: the pattern to search for (ApatPattern).
|
||||||
|
// - sequence: the sequence to search in (ApatSequence).
|
||||||
|
// - begin: the starting index of the search (int).
|
||||||
|
// - length: the length of the search (int).
|
||||||
|
//
|
||||||
|
// It returns the following values:
|
||||||
|
// - start: the starting index of the best match (int).
|
||||||
|
// - end: the ending index of the best match (int).
|
||||||
|
// - nerr: the number of errors in the best match (int).
|
||||||
|
// - matched: a boolean indicating whether a match was found (bool).
|
||||||
func (pattern ApatPattern) BestMatch(sequence ApatSequence, begin, length int) (start int, end int, nerr int, matched bool) {
|
func (pattern ApatPattern) BestMatch(sequence ApatSequence, begin, length int) (start int, end int, nerr int, matched bool) {
|
||||||
res := pattern.FindAllIndex(sequence, begin, length)
|
res := pattern.FindAllIndex(sequence, begin, length)
|
||||||
|
|
||||||
|
9
pkg/obiformats/empty_file.go
Normal file
9
pkg/obiformats/empty_file.go
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
package obiformats
|
||||||
|
|
||||||
|
import "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
|
|
||||||
|
func ReadEmptyFile(options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
|
out := obiiter.MakeIBioSequence()
|
||||||
|
out.Close()
|
||||||
|
return out, nil
|
||||||
|
}
|
@ -294,6 +294,11 @@ func ReadFastaFromFile(filename string, options ...WithOption) (obiiter.IBioSequ
|
|||||||
|
|
||||||
file, err := Ropen(filename)
|
file, err := Ropen(filename)
|
||||||
|
|
||||||
|
if err == ErrNoContent {
|
||||||
|
log.Infof("file %s is empty", filename)
|
||||||
|
return ReadEmptyFile(options...)
|
||||||
|
}
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return obiiter.NilIBioSequence, err
|
return obiiter.NilIBioSequence, err
|
||||||
}
|
}
|
||||||
@ -305,6 +310,11 @@ func ReadFastaFromStdin(reader io.Reader, options ...WithOption) (obiiter.IBioSe
|
|||||||
options = append(options, OptionsSource(obiutils.RemoveAllExt("stdin")))
|
options = append(options, OptionsSource(obiutils.RemoveAllExt("stdin")))
|
||||||
input, err := Buf(os.Stdin)
|
input, err := Buf(os.Stdin)
|
||||||
|
|
||||||
|
if err == ErrNoContent {
|
||||||
|
log.Infof("stdin is empty")
|
||||||
|
return ReadEmptyFile(options...)
|
||||||
|
}
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("open file error: %v", err)
|
log.Fatalf("open file error: %v", err)
|
||||||
return obiiter.NilIBioSequence, err
|
return obiiter.NilIBioSequence, err
|
||||||
|
@ -339,6 +339,11 @@ func ReadFastqFromFile(filename string, options ...WithOption) (obiiter.IBioSequ
|
|||||||
|
|
||||||
file, err := Ropen(filename)
|
file, err := Ropen(filename)
|
||||||
|
|
||||||
|
if err == ErrNoContent {
|
||||||
|
log.Infof("file %s is empty", filename)
|
||||||
|
return ReadEmptyFile(options...)
|
||||||
|
}
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return obiiter.NilIBioSequence, err
|
return obiiter.NilIBioSequence, err
|
||||||
}
|
}
|
||||||
@ -350,6 +355,11 @@ func ReadFastqFromStdin(reader io.Reader, options ...WithOption) (obiiter.IBioSe
|
|||||||
options = append(options, OptionsSource(obiutils.RemoveAllExt("stdin")))
|
options = append(options, OptionsSource(obiutils.RemoveAllExt("stdin")))
|
||||||
input, err := Buf(os.Stdin)
|
input, err := Buf(os.Stdin)
|
||||||
|
|
||||||
|
if err == ErrNoContent {
|
||||||
|
log.Infof("stdin is empty")
|
||||||
|
return ReadEmptyFile(options...)
|
||||||
|
}
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("open file error: %v", err)
|
log.Fatalf("open file error: %v", err)
|
||||||
return obiiter.NilIBioSequence, err
|
return obiiter.NilIBioSequence, err
|
||||||
|
@ -59,9 +59,13 @@ func _parseMainNGSFilterTags(text string) obingslibrary.TagPair {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func _parseMainNGSFilter(text string) (obingslibrary.PrimerPair, obingslibrary.TagPair, string, string, bool) {
|
func _parseMainNGSFilter(text string) (obingslibrary.PrimerPair, obingslibrary.TagPair, string, string, bool, bool) {
|
||||||
fields := strings.Fields(text)
|
fields := strings.Fields(text)
|
||||||
|
|
||||||
|
if len(fields) < 6 {
|
||||||
|
return obingslibrary.PrimerPair{}, obingslibrary.TagPair{}, "", "", false, false
|
||||||
|
}
|
||||||
|
|
||||||
tags := _parseMainNGSFilterTags(fields[2])
|
tags := _parseMainNGSFilterTags(fields[2])
|
||||||
partial := fields[5] == "T" || fields[5] == "t"
|
partial := fields[5] == "T" || fields[5] == "t"
|
||||||
|
|
||||||
@ -72,7 +76,8 @@ func _parseMainNGSFilter(text string) (obingslibrary.PrimerPair, obingslibrary.T
|
|||||||
tags,
|
tags,
|
||||||
fields[0],
|
fields[0],
|
||||||
fields[1],
|
fields[1],
|
||||||
partial
|
partial,
|
||||||
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
func ReadNGSFilter(reader io.Reader) (obingslibrary.NGSLibrary, error) {
|
func ReadNGSFilter(reader io.Reader) (obingslibrary.NGSLibrary, error) {
|
||||||
@ -89,7 +94,15 @@ func ReadNGSFilter(reader io.Reader) (obingslibrary.NGSLibrary, error) {
|
|||||||
|
|
||||||
split := strings.SplitN(line, "@", 2)
|
split := strings.SplitN(line, "@", 2)
|
||||||
|
|
||||||
primers, tags, experiment, sample, partial := _parseMainNGSFilter(split[0])
|
if len(split) < 1 {
|
||||||
|
return nil, fmt.Errorf("line %d : invalid format", i+1)
|
||||||
|
}
|
||||||
|
|
||||||
|
primers, tags, experiment, sample, partial, ok := _parseMainNGSFilter(split[0])
|
||||||
|
|
||||||
|
if !ok {
|
||||||
|
return nil, fmt.Errorf("line %d : invalid format", i+1)
|
||||||
|
}
|
||||||
|
|
||||||
marker, _ := ngsfilter.GetMarker(primers.Forward, primers.Reverse)
|
marker, _ := ngsfilter.GetMarker(primers.Forward, primers.Reverse)
|
||||||
pcr, ok := marker.GetPCR(tags.Forward, tags.Reverse)
|
pcr, ok := marker.GetPCR(tags.Forward, tags.Reverse)
|
||||||
|
@ -104,6 +104,16 @@ func (marker *Marker) Compile(forward, reverse string, maxError int, allowsIndel
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Match finds the best matching demultiplex for a given sequence.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
//
|
||||||
|
// marker - a pointer to a Marker struct that contains the forward and reverse primers.
|
||||||
|
// sequence - a pointer to a BioSequence struct that represents the input sequence.
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
//
|
||||||
|
// A pointer to a DemultiplexMatch struct that contains the best matching demultiplex.
|
||||||
func (marker *Marker) Match(sequence *obiseq.BioSequence) *DemultiplexMatch {
|
func (marker *Marker) Match(sequence *obiseq.BioSequence) *DemultiplexMatch {
|
||||||
aseq, _ := obiapat.MakeApatSequence(sequence, false)
|
aseq, _ := obiapat.MakeApatSequence(sequence, false)
|
||||||
|
|
||||||
@ -223,6 +233,15 @@ func (marker *Marker) Match(sequence *obiseq.BioSequence) *DemultiplexMatch {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ExtractBarcode extracts the barcode from the given biosequence.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - sequence: The biosequence from which to extract the barcode.
|
||||||
|
// - inplace: A boolean indicating whether the barcode should be extracted in-place or not.
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - The biosequence with the extracted barcode.
|
||||||
|
// - An error indicating any issues encountered during the extraction process.
|
||||||
func (match *DemultiplexMatch) ExtractBarcode(sequence *obiseq.BioSequence, inplace bool) (*obiseq.BioSequence, error) {
|
func (match *DemultiplexMatch) ExtractBarcode(sequence *obiseq.BioSequence, inplace bool) (*obiseq.BioSequence, error) {
|
||||||
if !inplace {
|
if !inplace {
|
||||||
sequence = sequence.Copy()
|
sequence = sequence.Copy()
|
||||||
|
@ -55,8 +55,8 @@ type Annotation map[string]interface{}
|
|||||||
// A BioSequence is a sequence of bytes with an identifier, a definition, a sequence, qualities,
|
// A BioSequence is a sequence of bytes with an identifier, a definition, a sequence, qualities,
|
||||||
// features and annotations. It aims to represent a biological sequence
|
// features and annotations. It aims to represent a biological sequence
|
||||||
type BioSequence struct {
|
type BioSequence struct {
|
||||||
id string // The identidier of the sequence (private accessible through the method Id)
|
id string // The identidier of the sequence (private accessible through the method Id)
|
||||||
definition string // The documentation of the sequence (private accessible through the method Definition)
|
//definition string // The documentation of the sequence (private accessible through the method Definition)
|
||||||
source string // The filename without directory name and extension from where the sequence was read.
|
source string // The filename without directory name and extension from where the sequence was read.
|
||||||
sequence []byte // The sequence itself, it is accessible by the methode Sequence
|
sequence []byte // The sequence itself, it is accessible by the methode Sequence
|
||||||
qualities []byte // The quality scores of the sequence.
|
qualities []byte // The quality scores of the sequence.
|
||||||
@ -80,8 +80,8 @@ func NewEmptyBioSequence(preallocate int) *BioSequence {
|
|||||||
}
|
}
|
||||||
|
|
||||||
return &BioSequence{
|
return &BioSequence{
|
||||||
id: "",
|
id: "",
|
||||||
definition: "",
|
//definition: "",
|
||||||
source: "",
|
source: "",
|
||||||
sequence: seq,
|
sequence: seq,
|
||||||
qualities: nil,
|
qualities: nil,
|
||||||
@ -148,7 +148,7 @@ func (s *BioSequence) Copy() *BioSequence {
|
|||||||
newSeq := NewEmptyBioSequence(0)
|
newSeq := NewEmptyBioSequence(0)
|
||||||
|
|
||||||
newSeq.id = s.id
|
newSeq.id = s.id
|
||||||
newSeq.definition = s.definition
|
//newSeq.definition = s.definition
|
||||||
|
|
||||||
newSeq.sequence = CopySlice(s.sequence)
|
newSeq.sequence = CopySlice(s.sequence)
|
||||||
newSeq.qualities = CopySlice(s.qualities)
|
newSeq.qualities = CopySlice(s.qualities)
|
||||||
@ -176,7 +176,16 @@ func (s *BioSequence) Id() string {
|
|||||||
// No parameters.
|
// No parameters.
|
||||||
// Returns a string.
|
// Returns a string.
|
||||||
func (s *BioSequence) Definition() string {
|
func (s *BioSequence) Definition() string {
|
||||||
return s.definition
|
definition := ""
|
||||||
|
var err error
|
||||||
|
def, ok := s.GetAttribute("definition")
|
||||||
|
if ok {
|
||||||
|
definition, err = obiutils.InterfaceToString(def)
|
||||||
|
if err != nil {
|
||||||
|
definition = ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return definition
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sequence returns the sequence of the BioSequence.
|
// Sequence returns the sequence of the BioSequence.
|
||||||
@ -315,7 +324,7 @@ func (s *BioSequence) SetId(id string) {
|
|||||||
//
|
//
|
||||||
// It takes a string parameter 'definition' and assigns it to the 'definition' field of the BioSequence struct.
|
// It takes a string parameter 'definition' and assigns it to the 'definition' field of the BioSequence struct.
|
||||||
func (s *BioSequence) SetDefinition(definition string) {
|
func (s *BioSequence) SetDefinition(definition string) {
|
||||||
s.definition = definition
|
s.SetAttribute("definition", definition)
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetSource sets the source of the BioSequence.
|
// SetSource sets the source of the BioSequence.
|
||||||
|
@ -143,11 +143,10 @@ func TestBioSequence_Recycle(t *testing.T) {
|
|||||||
// Returns: None.
|
// Returns: None.
|
||||||
func TestCopy(t *testing.T) {
|
func TestCopy(t *testing.T) {
|
||||||
seq := &BioSequence{
|
seq := &BioSequence{
|
||||||
id: "test",
|
id: "test",
|
||||||
definition: "test sequence",
|
sequence: []byte("ATCG"),
|
||||||
sequence: []byte("ATCG"),
|
qualities: []byte("1234"),
|
||||||
qualities: []byte("1234"),
|
feature: []byte("feature1...feature2"),
|
||||||
feature: []byte("feature1...feature2"),
|
|
||||||
annotations: Annotation{
|
annotations: Annotation{
|
||||||
"annotation1": "value1",
|
"annotation1": "value1",
|
||||||
"annotation2": "value2",
|
"annotation2": "value2",
|
||||||
@ -161,10 +160,6 @@ func TestCopy(t *testing.T) {
|
|||||||
if newSeq.id != seq.id {
|
if newSeq.id != seq.id {
|
||||||
t.Errorf("Expected id to be %v, got %v", seq.id, newSeq.id)
|
t.Errorf("Expected id to be %v, got %v", seq.id, newSeq.id)
|
||||||
}
|
}
|
||||||
if newSeq.definition != seq.definition {
|
|
||||||
t.Errorf("Expected definition to be %v, got %v", seq.definition, newSeq.definition)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test if the sequence, qualities, and feature fields are copied correctly
|
// Test if the sequence, qualities, and feature fields are copied correctly
|
||||||
if !reflect.DeepEqual(newSeq.sequence, seq.sequence) {
|
if !reflect.DeepEqual(newSeq.sequence, seq.sequence) {
|
||||||
t.Errorf("Expected sequence to be %v, got %v", seq.sequence, newSeq.sequence)
|
t.Errorf("Expected sequence to be %v, got %v", seq.sequence, newSeq.sequence)
|
||||||
|
@ -32,7 +32,7 @@ func (sequence *BioSequence) Subsequence(from, to int, circular bool) (*BioSeque
|
|||||||
}
|
}
|
||||||
|
|
||||||
newSeq.id = fmt.Sprintf("%s_sub[%d..%d]", sequence.Id(), from+1, to)
|
newSeq.id = fmt.Sprintf("%s_sub[%d..%d]", sequence.Id(), from+1, to)
|
||||||
newSeq.definition = sequence.definition
|
// newSeq.definition = sequence.definition
|
||||||
} else {
|
} else {
|
||||||
newSeq, _ = sequence.Subsequence(from, sequence.Len(), false)
|
newSeq, _ = sequence.Subsequence(from, sequence.Len(), false)
|
||||||
newSeq.Write(sequence.Sequence()[0:to])
|
newSeq.Write(sequence.Sequence()[0:to])
|
||||||
|
@ -1,5 +1,17 @@
|
|||||||
package obiutils
|
package obiutils
|
||||||
|
|
||||||
|
import "fmt"
|
||||||
|
|
||||||
|
// InterfaceToString converts an interface value to a string.
|
||||||
|
//
|
||||||
|
// The function takes an interface{} value as a parameter and returns a string representation of that value.
|
||||||
|
// It returns the string representation and an error if any occurred during the conversion process.
|
||||||
|
func InterfaceToString(i interface{}) (val string, err error) {
|
||||||
|
err = nil
|
||||||
|
val = fmt.Sprintf("%v", i)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
// CastableToInt checks if the given input can be casted to an integer.
|
// CastableToInt checks if the given input can be casted to an integer.
|
||||||
//
|
//
|
||||||
// i: the value to check for castability.
|
// i: the value to check for castability.
|
||||||
|
@ -4,7 +4,6 @@ import (
|
|||||||
"bufio"
|
"bufio"
|
||||||
"bytes"
|
"bytes"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"reflect"
|
"reflect"
|
||||||
@ -13,15 +12,6 @@ import (
|
|||||||
"github.com/barkimedes/go-deepcopy"
|
"github.com/barkimedes/go-deepcopy"
|
||||||
)
|
)
|
||||||
|
|
||||||
// InterfaceToInt converts a interface{} to an integer value if possible.
|
|
||||||
// If not a "NotAnInteger" error is returned via the err
|
|
||||||
// return value and val is set to 0.
|
|
||||||
func InterfaceToString(i interface{}) (val string, err error) {
|
|
||||||
err = nil
|
|
||||||
val = fmt.Sprintf("%v", i)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// NotAnInteger defines a new type of Error : "NotAnInteger"
|
// NotAnInteger defines a new type of Error : "NotAnInteger"
|
||||||
type NotAnInteger struct {
|
type NotAnInteger struct {
|
||||||
message string
|
message string
|
||||||
|
Reference in New Issue
Block a user