Change the way sequence definition are managed. They are now when present stored as an attribute

Former-commit-id: 6e618377c05b42937d2eace3c9668390980ab68c
This commit is contained in:
2023-10-05 07:21:12 +02:00
parent 5c30ec354f
commit d23a911080
11 changed files with 115 additions and 30 deletions

View File

@ -298,6 +298,24 @@ func (pattern ApatPattern) FindAllIndex(sequence ApatSequence, begin, length int
return loc
}
// BestMatch finds the best match of a given pattern in a sequence.
//
// THe function identify the first occurrence of the pattern in the sequence.
// The search can be limited to a portion of the sequence using the begin and
// length parameters to find the next occurrences.
//
// The BestMatch methood ins
// It takes the following parameters:
// - pattern: the pattern to search for (ApatPattern).
// - sequence: the sequence to search in (ApatSequence).
// - begin: the starting index of the search (int).
// - length: the length of the search (int).
//
// It returns the following values:
// - start: the starting index of the best match (int).
// - end: the ending index of the best match (int).
// - nerr: the number of errors in the best match (int).
// - matched: a boolean indicating whether a match was found (bool).
func (pattern ApatPattern) BestMatch(sequence ApatSequence, begin, length int) (start int, end int, nerr int, matched bool) {
res := pattern.FindAllIndex(sequence, begin, length)

View File

@ -0,0 +1,9 @@
package obiformats
import "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
func ReadEmptyFile(options ...WithOption) (obiiter.IBioSequence, error) {
out := obiiter.MakeIBioSequence()
out.Close()
return out, nil
}

View File

@ -294,6 +294,11 @@ func ReadFastaFromFile(filename string, options ...WithOption) (obiiter.IBioSequ
file, err := Ropen(filename)
if err == ErrNoContent {
log.Infof("file %s is empty", filename)
return ReadEmptyFile(options...)
}
if err != nil {
return obiiter.NilIBioSequence, err
}
@ -305,6 +310,11 @@ func ReadFastaFromStdin(reader io.Reader, options ...WithOption) (obiiter.IBioSe
options = append(options, OptionsSource(obiutils.RemoveAllExt("stdin")))
input, err := Buf(os.Stdin)
if err == ErrNoContent {
log.Infof("stdin is empty")
return ReadEmptyFile(options...)
}
if err != nil {
log.Fatalf("open file error: %v", err)
return obiiter.NilIBioSequence, err

View File

@ -339,6 +339,11 @@ func ReadFastqFromFile(filename string, options ...WithOption) (obiiter.IBioSequ
file, err := Ropen(filename)
if err == ErrNoContent {
log.Infof("file %s is empty", filename)
return ReadEmptyFile(options...)
}
if err != nil {
return obiiter.NilIBioSequence, err
}
@ -350,6 +355,11 @@ func ReadFastqFromStdin(reader io.Reader, options ...WithOption) (obiiter.IBioSe
options = append(options, OptionsSource(obiutils.RemoveAllExt("stdin")))
input, err := Buf(os.Stdin)
if err == ErrNoContent {
log.Infof("stdin is empty")
return ReadEmptyFile(options...)
}
if err != nil {
log.Fatalf("open file error: %v", err)
return obiiter.NilIBioSequence, err

View File

@ -59,9 +59,13 @@ func _parseMainNGSFilterTags(text string) obingslibrary.TagPair {
}
}
func _parseMainNGSFilter(text string) (obingslibrary.PrimerPair, obingslibrary.TagPair, string, string, bool) {
func _parseMainNGSFilter(text string) (obingslibrary.PrimerPair, obingslibrary.TagPair, string, string, bool, bool) {
fields := strings.Fields(text)
if len(fields) < 6 {
return obingslibrary.PrimerPair{}, obingslibrary.TagPair{}, "", "", false, false
}
tags := _parseMainNGSFilterTags(fields[2])
partial := fields[5] == "T" || fields[5] == "t"
@ -72,7 +76,8 @@ func _parseMainNGSFilter(text string) (obingslibrary.PrimerPair, obingslibrary.T
tags,
fields[0],
fields[1],
partial
partial,
true
}
func ReadNGSFilter(reader io.Reader) (obingslibrary.NGSLibrary, error) {
@ -89,7 +94,15 @@ func ReadNGSFilter(reader io.Reader) (obingslibrary.NGSLibrary, error) {
split := strings.SplitN(line, "@", 2)
primers, tags, experiment, sample, partial := _parseMainNGSFilter(split[0])
if len(split) < 1 {
return nil, fmt.Errorf("line %d : invalid format", i+1)
}
primers, tags, experiment, sample, partial, ok := _parseMainNGSFilter(split[0])
if !ok {
return nil, fmt.Errorf("line %d : invalid format", i+1)
}
marker, _ := ngsfilter.GetMarker(primers.Forward, primers.Reverse)
pcr, ok := marker.GetPCR(tags.Forward, tags.Reverse)

View File

@ -104,6 +104,16 @@ func (marker *Marker) Compile(forward, reverse string, maxError int, allowsIndel
return nil
}
// Match finds the best matching demultiplex for a given sequence.
//
// Parameters:
//
// marker - a pointer to a Marker struct that contains the forward and reverse primers.
// sequence - a pointer to a BioSequence struct that represents the input sequence.
//
// Returns:
//
// A pointer to a DemultiplexMatch struct that contains the best matching demultiplex.
func (marker *Marker) Match(sequence *obiseq.BioSequence) *DemultiplexMatch {
aseq, _ := obiapat.MakeApatSequence(sequence, false)
@ -223,6 +233,15 @@ func (marker *Marker) Match(sequence *obiseq.BioSequence) *DemultiplexMatch {
return nil
}
// ExtractBarcode extracts the barcode from the given biosequence.
//
// Parameters:
// - sequence: The biosequence from which to extract the barcode.
// - inplace: A boolean indicating whether the barcode should be extracted in-place or not.
//
// Returns:
// - The biosequence with the extracted barcode.
// - An error indicating any issues encountered during the extraction process.
func (match *DemultiplexMatch) ExtractBarcode(sequence *obiseq.BioSequence, inplace bool) (*obiseq.BioSequence, error) {
if !inplace {
sequence = sequence.Copy()

View File

@ -55,8 +55,8 @@ type Annotation map[string]interface{}
// A BioSequence is a sequence of bytes with an identifier, a definition, a sequence, qualities,
// features and annotations. It aims to represent a biological sequence
type BioSequence struct {
id string // The identidier of the sequence (private accessible through the method Id)
definition string // The documentation of the sequence (private accessible through the method Definition)
id string // The identidier of the sequence (private accessible through the method Id)
//definition string // The documentation of the sequence (private accessible through the method Definition)
source string // The filename without directory name and extension from where the sequence was read.
sequence []byte // The sequence itself, it is accessible by the methode Sequence
qualities []byte // The quality scores of the sequence.
@ -80,8 +80,8 @@ func NewEmptyBioSequence(preallocate int) *BioSequence {
}
return &BioSequence{
id: "",
definition: "",
id: "",
//definition: "",
source: "",
sequence: seq,
qualities: nil,
@ -148,7 +148,7 @@ func (s *BioSequence) Copy() *BioSequence {
newSeq := NewEmptyBioSequence(0)
newSeq.id = s.id
newSeq.definition = s.definition
//newSeq.definition = s.definition
newSeq.sequence = CopySlice(s.sequence)
newSeq.qualities = CopySlice(s.qualities)
@ -176,7 +176,16 @@ func (s *BioSequence) Id() string {
// No parameters.
// Returns a string.
func (s *BioSequence) Definition() string {
return s.definition
definition := ""
var err error
def, ok := s.GetAttribute("definition")
if ok {
definition, err = obiutils.InterfaceToString(def)
if err != nil {
definition = ""
}
}
return definition
}
// Sequence returns the sequence of the BioSequence.
@ -315,7 +324,7 @@ func (s *BioSequence) SetId(id string) {
//
// It takes a string parameter 'definition' and assigns it to the 'definition' field of the BioSequence struct.
func (s *BioSequence) SetDefinition(definition string) {
s.definition = definition
s.SetAttribute("definition", definition)
}
// SetSource sets the source of the BioSequence.

View File

@ -143,11 +143,10 @@ func TestBioSequence_Recycle(t *testing.T) {
// Returns: None.
func TestCopy(t *testing.T) {
seq := &BioSequence{
id: "test",
definition: "test sequence",
sequence: []byte("ATCG"),
qualities: []byte("1234"),
feature: []byte("feature1...feature2"),
id: "test",
sequence: []byte("ATCG"),
qualities: []byte("1234"),
feature: []byte("feature1...feature2"),
annotations: Annotation{
"annotation1": "value1",
"annotation2": "value2",
@ -161,10 +160,6 @@ func TestCopy(t *testing.T) {
if newSeq.id != seq.id {
t.Errorf("Expected id to be %v, got %v", seq.id, newSeq.id)
}
if newSeq.definition != seq.definition {
t.Errorf("Expected definition to be %v, got %v", seq.definition, newSeq.definition)
}
// Test if the sequence, qualities, and feature fields are copied correctly
if !reflect.DeepEqual(newSeq.sequence, seq.sequence) {
t.Errorf("Expected sequence to be %v, got %v", seq.sequence, newSeq.sequence)

View File

@ -32,7 +32,7 @@ func (sequence *BioSequence) Subsequence(from, to int, circular bool) (*BioSeque
}
newSeq.id = fmt.Sprintf("%s_sub[%d..%d]", sequence.Id(), from+1, to)
newSeq.definition = sequence.definition
// newSeq.definition = sequence.definition
} else {
newSeq, _ = sequence.Subsequence(from, sequence.Len(), false)
newSeq.Write(sequence.Sequence()[0:to])

View File

@ -1,5 +1,17 @@
package obiutils
import "fmt"
// InterfaceToString converts an interface value to a string.
//
// The function takes an interface{} value as a parameter and returns a string representation of that value.
// It returns the string representation and an error if any occurred during the conversion process.
func InterfaceToString(i interface{}) (val string, err error) {
err = nil
val = fmt.Sprintf("%v", i)
return
}
// CastableToInt checks if the given input can be casted to an integer.
//
// i: the value to check for castability.

View File

@ -4,7 +4,6 @@ import (
"bufio"
"bytes"
"encoding/json"
"fmt"
"io"
"os"
"reflect"
@ -13,15 +12,6 @@ import (
"github.com/barkimedes/go-deepcopy"
)
// InterfaceToInt converts a interface{} to an integer value if possible.
// If not a "NotAnInteger" error is returned via the err
// return value and val is set to 0.
func InterfaceToString(i interface{}) (val string, err error) {
err = nil
val = fmt.Sprintf("%v", i)
return
}
// NotAnInteger defines a new type of Error : "NotAnInteger"
type NotAnInteger struct {
message string