Change the way sequence definition are managed. They are now when present stored as an attribute

Former-commit-id: 6e618377c05b42937d2eace3c9668390980ab68c
This commit is contained in:
2023-10-05 07:21:12 +02:00
parent 5c30ec354f
commit d23a911080
11 changed files with 115 additions and 30 deletions

View File

@ -298,6 +298,24 @@ func (pattern ApatPattern) FindAllIndex(sequence ApatSequence, begin, length int
return loc return loc
} }
// BestMatch finds the best match of a given pattern in a sequence.
//
// THe function identify the first occurrence of the pattern in the sequence.
// The search can be limited to a portion of the sequence using the begin and
// length parameters to find the next occurrences.
//
// The BestMatch methood ins
// It takes the following parameters:
// - pattern: the pattern to search for (ApatPattern).
// - sequence: the sequence to search in (ApatSequence).
// - begin: the starting index of the search (int).
// - length: the length of the search (int).
//
// It returns the following values:
// - start: the starting index of the best match (int).
// - end: the ending index of the best match (int).
// - nerr: the number of errors in the best match (int).
// - matched: a boolean indicating whether a match was found (bool).
func (pattern ApatPattern) BestMatch(sequence ApatSequence, begin, length int) (start int, end int, nerr int, matched bool) { func (pattern ApatPattern) BestMatch(sequence ApatSequence, begin, length int) (start int, end int, nerr int, matched bool) {
res := pattern.FindAllIndex(sequence, begin, length) res := pattern.FindAllIndex(sequence, begin, length)

View File

@ -0,0 +1,9 @@
package obiformats
import "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
func ReadEmptyFile(options ...WithOption) (obiiter.IBioSequence, error) {
out := obiiter.MakeIBioSequence()
out.Close()
return out, nil
}

View File

@ -294,6 +294,11 @@ func ReadFastaFromFile(filename string, options ...WithOption) (obiiter.IBioSequ
file, err := Ropen(filename) file, err := Ropen(filename)
if err == ErrNoContent {
log.Infof("file %s is empty", filename)
return ReadEmptyFile(options...)
}
if err != nil { if err != nil {
return obiiter.NilIBioSequence, err return obiiter.NilIBioSequence, err
} }
@ -305,6 +310,11 @@ func ReadFastaFromStdin(reader io.Reader, options ...WithOption) (obiiter.IBioSe
options = append(options, OptionsSource(obiutils.RemoveAllExt("stdin"))) options = append(options, OptionsSource(obiutils.RemoveAllExt("stdin")))
input, err := Buf(os.Stdin) input, err := Buf(os.Stdin)
if err == ErrNoContent {
log.Infof("stdin is empty")
return ReadEmptyFile(options...)
}
if err != nil { if err != nil {
log.Fatalf("open file error: %v", err) log.Fatalf("open file error: %v", err)
return obiiter.NilIBioSequence, err return obiiter.NilIBioSequence, err

View File

@ -339,6 +339,11 @@ func ReadFastqFromFile(filename string, options ...WithOption) (obiiter.IBioSequ
file, err := Ropen(filename) file, err := Ropen(filename)
if err == ErrNoContent {
log.Infof("file %s is empty", filename)
return ReadEmptyFile(options...)
}
if err != nil { if err != nil {
return obiiter.NilIBioSequence, err return obiiter.NilIBioSequence, err
} }
@ -350,6 +355,11 @@ func ReadFastqFromStdin(reader io.Reader, options ...WithOption) (obiiter.IBioSe
options = append(options, OptionsSource(obiutils.RemoveAllExt("stdin"))) options = append(options, OptionsSource(obiutils.RemoveAllExt("stdin")))
input, err := Buf(os.Stdin) input, err := Buf(os.Stdin)
if err == ErrNoContent {
log.Infof("stdin is empty")
return ReadEmptyFile(options...)
}
if err != nil { if err != nil {
log.Fatalf("open file error: %v", err) log.Fatalf("open file error: %v", err)
return obiiter.NilIBioSequence, err return obiiter.NilIBioSequence, err

View File

@ -59,9 +59,13 @@ func _parseMainNGSFilterTags(text string) obingslibrary.TagPair {
} }
} }
func _parseMainNGSFilter(text string) (obingslibrary.PrimerPair, obingslibrary.TagPair, string, string, bool) { func _parseMainNGSFilter(text string) (obingslibrary.PrimerPair, obingslibrary.TagPair, string, string, bool, bool) {
fields := strings.Fields(text) fields := strings.Fields(text)
if len(fields) < 6 {
return obingslibrary.PrimerPair{}, obingslibrary.TagPair{}, "", "", false, false
}
tags := _parseMainNGSFilterTags(fields[2]) tags := _parseMainNGSFilterTags(fields[2])
partial := fields[5] == "T" || fields[5] == "t" partial := fields[5] == "T" || fields[5] == "t"
@ -72,7 +76,8 @@ func _parseMainNGSFilter(text string) (obingslibrary.PrimerPair, obingslibrary.T
tags, tags,
fields[0], fields[0],
fields[1], fields[1],
partial partial,
true
} }
func ReadNGSFilter(reader io.Reader) (obingslibrary.NGSLibrary, error) { func ReadNGSFilter(reader io.Reader) (obingslibrary.NGSLibrary, error) {
@ -89,7 +94,15 @@ func ReadNGSFilter(reader io.Reader) (obingslibrary.NGSLibrary, error) {
split := strings.SplitN(line, "@", 2) split := strings.SplitN(line, "@", 2)
primers, tags, experiment, sample, partial := _parseMainNGSFilter(split[0]) if len(split) < 1 {
return nil, fmt.Errorf("line %d : invalid format", i+1)
}
primers, tags, experiment, sample, partial, ok := _parseMainNGSFilter(split[0])
if !ok {
return nil, fmt.Errorf("line %d : invalid format", i+1)
}
marker, _ := ngsfilter.GetMarker(primers.Forward, primers.Reverse) marker, _ := ngsfilter.GetMarker(primers.Forward, primers.Reverse)
pcr, ok := marker.GetPCR(tags.Forward, tags.Reverse) pcr, ok := marker.GetPCR(tags.Forward, tags.Reverse)

View File

@ -104,6 +104,16 @@ func (marker *Marker) Compile(forward, reverse string, maxError int, allowsIndel
return nil return nil
} }
// Match finds the best matching demultiplex for a given sequence.
//
// Parameters:
//
// marker - a pointer to a Marker struct that contains the forward and reverse primers.
// sequence - a pointer to a BioSequence struct that represents the input sequence.
//
// Returns:
//
// A pointer to a DemultiplexMatch struct that contains the best matching demultiplex.
func (marker *Marker) Match(sequence *obiseq.BioSequence) *DemultiplexMatch { func (marker *Marker) Match(sequence *obiseq.BioSequence) *DemultiplexMatch {
aseq, _ := obiapat.MakeApatSequence(sequence, false) aseq, _ := obiapat.MakeApatSequence(sequence, false)
@ -223,6 +233,15 @@ func (marker *Marker) Match(sequence *obiseq.BioSequence) *DemultiplexMatch {
return nil return nil
} }
// ExtractBarcode extracts the barcode from the given biosequence.
//
// Parameters:
// - sequence: The biosequence from which to extract the barcode.
// - inplace: A boolean indicating whether the barcode should be extracted in-place or not.
//
// Returns:
// - The biosequence with the extracted barcode.
// - An error indicating any issues encountered during the extraction process.
func (match *DemultiplexMatch) ExtractBarcode(sequence *obiseq.BioSequence, inplace bool) (*obiseq.BioSequence, error) { func (match *DemultiplexMatch) ExtractBarcode(sequence *obiseq.BioSequence, inplace bool) (*obiseq.BioSequence, error) {
if !inplace { if !inplace {
sequence = sequence.Copy() sequence = sequence.Copy()

View File

@ -55,8 +55,8 @@ type Annotation map[string]interface{}
// A BioSequence is a sequence of bytes with an identifier, a definition, a sequence, qualities, // A BioSequence is a sequence of bytes with an identifier, a definition, a sequence, qualities,
// features and annotations. It aims to represent a biological sequence // features and annotations. It aims to represent a biological sequence
type BioSequence struct { type BioSequence struct {
id string // The identidier of the sequence (private accessible through the method Id) id string // The identidier of the sequence (private accessible through the method Id)
definition string // The documentation of the sequence (private accessible through the method Definition) //definition string // The documentation of the sequence (private accessible through the method Definition)
source string // The filename without directory name and extension from where the sequence was read. source string // The filename without directory name and extension from where the sequence was read.
sequence []byte // The sequence itself, it is accessible by the methode Sequence sequence []byte // The sequence itself, it is accessible by the methode Sequence
qualities []byte // The quality scores of the sequence. qualities []byte // The quality scores of the sequence.
@ -80,8 +80,8 @@ func NewEmptyBioSequence(preallocate int) *BioSequence {
} }
return &BioSequence{ return &BioSequence{
id: "", id: "",
definition: "", //definition: "",
source: "", source: "",
sequence: seq, sequence: seq,
qualities: nil, qualities: nil,
@ -148,7 +148,7 @@ func (s *BioSequence) Copy() *BioSequence {
newSeq := NewEmptyBioSequence(0) newSeq := NewEmptyBioSequence(0)
newSeq.id = s.id newSeq.id = s.id
newSeq.definition = s.definition //newSeq.definition = s.definition
newSeq.sequence = CopySlice(s.sequence) newSeq.sequence = CopySlice(s.sequence)
newSeq.qualities = CopySlice(s.qualities) newSeq.qualities = CopySlice(s.qualities)
@ -176,7 +176,16 @@ func (s *BioSequence) Id() string {
// No parameters. // No parameters.
// Returns a string. // Returns a string.
func (s *BioSequence) Definition() string { func (s *BioSequence) Definition() string {
return s.definition definition := ""
var err error
def, ok := s.GetAttribute("definition")
if ok {
definition, err = obiutils.InterfaceToString(def)
if err != nil {
definition = ""
}
}
return definition
} }
// Sequence returns the sequence of the BioSequence. // Sequence returns the sequence of the BioSequence.
@ -315,7 +324,7 @@ func (s *BioSequence) SetId(id string) {
// //
// It takes a string parameter 'definition' and assigns it to the 'definition' field of the BioSequence struct. // It takes a string parameter 'definition' and assigns it to the 'definition' field of the BioSequence struct.
func (s *BioSequence) SetDefinition(definition string) { func (s *BioSequence) SetDefinition(definition string) {
s.definition = definition s.SetAttribute("definition", definition)
} }
// SetSource sets the source of the BioSequence. // SetSource sets the source of the BioSequence.

View File

@ -143,11 +143,10 @@ func TestBioSequence_Recycle(t *testing.T) {
// Returns: None. // Returns: None.
func TestCopy(t *testing.T) { func TestCopy(t *testing.T) {
seq := &BioSequence{ seq := &BioSequence{
id: "test", id: "test",
definition: "test sequence", sequence: []byte("ATCG"),
sequence: []byte("ATCG"), qualities: []byte("1234"),
qualities: []byte("1234"), feature: []byte("feature1...feature2"),
feature: []byte("feature1...feature2"),
annotations: Annotation{ annotations: Annotation{
"annotation1": "value1", "annotation1": "value1",
"annotation2": "value2", "annotation2": "value2",
@ -161,10 +160,6 @@ func TestCopy(t *testing.T) {
if newSeq.id != seq.id { if newSeq.id != seq.id {
t.Errorf("Expected id to be %v, got %v", seq.id, newSeq.id) t.Errorf("Expected id to be %v, got %v", seq.id, newSeq.id)
} }
if newSeq.definition != seq.definition {
t.Errorf("Expected definition to be %v, got %v", seq.definition, newSeq.definition)
}
// Test if the sequence, qualities, and feature fields are copied correctly // Test if the sequence, qualities, and feature fields are copied correctly
if !reflect.DeepEqual(newSeq.sequence, seq.sequence) { if !reflect.DeepEqual(newSeq.sequence, seq.sequence) {
t.Errorf("Expected sequence to be %v, got %v", seq.sequence, newSeq.sequence) t.Errorf("Expected sequence to be %v, got %v", seq.sequence, newSeq.sequence)

View File

@ -32,7 +32,7 @@ func (sequence *BioSequence) Subsequence(from, to int, circular bool) (*BioSeque
} }
newSeq.id = fmt.Sprintf("%s_sub[%d..%d]", sequence.Id(), from+1, to) newSeq.id = fmt.Sprintf("%s_sub[%d..%d]", sequence.Id(), from+1, to)
newSeq.definition = sequence.definition // newSeq.definition = sequence.definition
} else { } else {
newSeq, _ = sequence.Subsequence(from, sequence.Len(), false) newSeq, _ = sequence.Subsequence(from, sequence.Len(), false)
newSeq.Write(sequence.Sequence()[0:to]) newSeq.Write(sequence.Sequence()[0:to])

View File

@ -1,5 +1,17 @@
package obiutils package obiutils
import "fmt"
// InterfaceToString converts an interface value to a string.
//
// The function takes an interface{} value as a parameter and returns a string representation of that value.
// It returns the string representation and an error if any occurred during the conversion process.
func InterfaceToString(i interface{}) (val string, err error) {
err = nil
val = fmt.Sprintf("%v", i)
return
}
// CastableToInt checks if the given input can be casted to an integer. // CastableToInt checks if the given input can be casted to an integer.
// //
// i: the value to check for castability. // i: the value to check for castability.

View File

@ -4,7 +4,6 @@ import (
"bufio" "bufio"
"bytes" "bytes"
"encoding/json" "encoding/json"
"fmt"
"io" "io"
"os" "os"
"reflect" "reflect"
@ -13,15 +12,6 @@ import (
"github.com/barkimedes/go-deepcopy" "github.com/barkimedes/go-deepcopy"
) )
// InterfaceToInt converts a interface{} to an integer value if possible.
// If not a "NotAnInteger" error is returned via the err
// return value and val is set to 0.
func InterfaceToString(i interface{}) (val string, err error) {
err = nil
val = fmt.Sprintf("%v", i)
return
}
// NotAnInteger defines a new type of Error : "NotAnInteger" // NotAnInteger defines a new type of Error : "NotAnInteger"
type NotAnInteger struct { type NotAnInteger struct {
message string message string