Replace MakeBioSequence call by NewBioSequence call,

Implements a new file format guesser
Adds some more API doc


Former-commit-id: 9837bf1c28beca6ddb599b367f93548950ba83c1
This commit is contained in:
2023-08-30 19:59:46 +02:00
parent c2533667b2
commit 3f8c0d6a2f
7 changed files with 582 additions and 116 deletions

View File

@ -8,21 +8,30 @@ import (
"fmt"
"math"
"strings"
"sync"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
)
// A pool of byte slices.
var _BuildAlignArenaPool = sync.Pool{
New: func() interface{} {
bs := make([]byte, 0, 300)
return &bs
},
}
// // A pool of byte slices.
// var _BuildAlignArenaPool = sync.Pool{
// New: func() interface{} {
// bs := make([]byte, 0, 300)
// return &bs
// },
// }
// _BuildAlignment builds the alignment between two sequences.
//
// It takes two sequences, a path, a gap character, and two buffers, and it builds the alignment by
// walking the path and copying the sequences into the buffers
// walking the path and copying the sequences into the buffers.
//
// Parameters:
// - seqA: a byte slice representing the first sequence.
// - seqB: a byte slice representing the second sequence.
// - path: a slice of integers representing the alignment path.
// - gap: a byte representing the gap character.
// - bufferA: a pointer to a byte slice for storing the aligned sequence A.
// - bufferB: a pointer to a byte slice for storing the aligned sequence B.
func _BuildAlignment(seqA, seqB []byte, path []int, gap byte, bufferA, bufferB *[]byte) {
*bufferA = (*bufferA)[:0]

View File

@ -2,11 +2,14 @@ package obiformats
import (
"bufio"
gzip "github.com/klauspost/pgzip"
"bytes"
"io"
"os"
"path"
"strings"
"regexp"
"github.com/gabriel-vasile/mimetype"
gzip "github.com/klauspost/pgzip"
log "github.com/sirupsen/logrus"
@ -14,36 +17,89 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
)
func GuessSeqFileType(firstline string) string {
switch {
case strings.HasPrefix(firstline, "#@ecopcr-v2"):
return "ecopcr"
case strings.HasPrefix(firstline, "#"):
return "ecopcr"
case strings.HasPrefix(firstline, ">"):
return "fasta"
case strings.HasPrefix(firstline, "@"):
return "fastq"
case strings.HasPrefix(firstline, "ID "):
return "embl"
case strings.HasPrefix(firstline, "LOCUS "):
return "genbank"
// Special case for genbank release files
// I hope it is enougth stringeant
case strings.HasSuffix(firstline, " Genetic Se"):
return "genbank"
default:
return "unknown"
// OBIMimeTypeGuesser is a function that takes an io.Reader as input and guesses the MIME type of the data.
// It uses several detectors to identify specific file formats, such as FASTA, FASTQ, ecoPCR2, GenBank, and EMBL.
// The function reads data from the input stream and analyzes it using the mimetype library.
// It then returns the detected MIME type, a modified reader with the read data, and any error encountered during the process.
//
// The following file types are recognized:
// - "text/ecopcr": if the first line starts with "#@ecopcr-v2".
// - "text/fasta": if the first line starts with ">".
// - "text/fastq": if the first line starts with "@".
// - "text/embl": if the first line starts with "ID ".
// - "text/genbank": if the first line starts with "LOCUS ".
// - "text/genbank" (special case): if the first line "Genetic Sequence Data Bank" (for genbank release files).
// - "text/csv"
//
// Parameters:
// - stream: An io.Reader representing the input stream to read data from.
//
// Returns:
// - *mimetype.MIME: The detected MIME type of the data.
// - io.Reader: A modified reader with the read data.
// - error: Any error encountered during the process.
func OBIMimeTypeGuesser(stream io.Reader) (*mimetype.MIME, io.Reader, error) {
fastaDetector := func(raw []byte, limit uint32) bool {
ok, err := regexp.Match("^>[^ ]", raw)
return ok && err == nil
}
fastqDetector := func(raw []byte, limit uint32) bool {
ok, err := regexp.Match("^@[^ ]", raw)
return ok && err == nil
}
ecoPCR2Detector := func(raw []byte, limit uint32) bool {
ok := bytes.HasPrefix(raw, []byte("#@ecopcr-v2"))
return ok
}
genbankDetector := func(raw []byte, limit uint32) bool {
ok2 := bytes.HasPrefix(raw, []byte("LOCUS "))
ok1, err := regexp.Match("^[^ ]* +Genetic Sequence Data Bank *\n", raw)
return ok2 || (ok1 && err == nil)
}
emblDetector := func(raw []byte, limit uint32) bool {
ok := bytes.HasPrefix(raw, []byte("ID "))
return ok
}
mimetype.Lookup("text/plain").Extend(fastaDetector, "text/fasta", ".fasta")
mimetype.Lookup("text/plain").Extend(fastqDetector, "text/fastq", ".fastq")
mimetype.Lookup("text/plain").Extend(ecoPCR2Detector, "text/ecopcr2", ".ecopcr")
mimetype.Lookup("text/plain").Extend(genbankDetector, "text/genbank", ".seq")
mimetype.Lookup("text/plain").Extend(emblDetector, "text/embl", ".dat")
// Create a buffer to store the read data
buf := make([]byte, 1024*128)
n, err := stream.Read(buf)
if err != nil && err != io.EOF {
return nil, nil, err
}
// Detect the MIME type using the mimetype library
mimeType := mimetype.Detect(buf)
if mimeType == nil {
return nil, nil, err
}
// Create a new reader based on the read data
newReader := io.MultiReader(bytes.NewReader(buf[:n]), stream)
return mimeType, newReader, nil
}
// ReadSequencesFromFile reads sequences from a file and returns an iterator of bio sequences and an error.
//
// Parameters:
// - filename: The name of the file to read the sequences from.
// - options: Optional parameters to customize the reading process.
//
// Returns:
// - obiiter.IBioSequence: An iterator of bio sequences.
// - error: An error if any occurred during the reading process.
func ReadSequencesFromFile(filename string,
options ...WithOption) (obiiter.IBioSequence, error) {
var file *os.File
@ -71,35 +127,28 @@ func ReadSequencesFromFile(filename string,
reader = greader
}
breader := bufio.NewReader(reader)
mime, reader, err := OBIMimeTypeGuesser(reader)
tag, _ := breader.Peek(30)
if len(tag) < 30 {
newIter := obiiter.MakeIBioSequence()
newIter.Close()
return newIter, nil
if err != nil {
return obiiter.NilIBioSequence, err
}
filetype := GuessSeqFileType(string(tag))
log.Debugf("File guessed format : %s (tag: %s)",
filetype, (strings.Split(string(tag), "\n"))[0])
reader = breader
reader = bufio.NewReader(reader)
switch filetype {
case "fastq", "fasta":
switch mime.String() {
case "text/fasta", "text/fastq":
file.Close()
is, err := ReadFastSeqFromFile(filename, options...)
return is, err
case "ecopcr":
case "text/ecopcr2":
return ReadEcoPCR(reader, options...), nil
case "embl":
case "text/embl":
return ReadEMBL(reader, options...), nil
case "genbank":
case "text/genbank":
return ReadGenbank(reader, options...), nil
default:
log.Fatalf("File %s has guessed format %s which is not yet implemented",
filename, filetype)
filename, mime.String())
}
return obiiter.NilIBioSequence, nil

View File

@ -226,13 +226,13 @@ func (g *DeBruijnGraph) LongestConsensus(id string) (*obiseq.BioSequence, error)
s := g.DecodePath(path)
if len(s) > 0 {
seq := obiseq.MakeBioSequence(
seq := obiseq.NewBioSequence(
id,
[]byte(s),
"",
)
return &seq, nil
return seq, nil
}
return nil, fmt.Errorf("cannot identify optimum path")
@ -295,13 +295,13 @@ func (g *DeBruijnGraph) BestConsensus(id string) (*obiseq.BioSequence, error) {
s := g.DecodePath(path)
if len(s) > 0 {
seq := obiseq.MakeBioSequence(
seq := obiseq.NewBioSequence(
id,
[]byte(s),
"",
)
return &seq, nil
return seq, nil
}
return nil, fmt.Errorf("cannot identify optimum path")

View File

@ -34,6 +34,10 @@ type Quality []uint8
var __default_qualities__ = make(Quality, 0, 500)
// __make_default_qualities__ generates a default quality slice of the given length.
//
// It takes an integer parameter 'length' which specifies the desired length of the quality slice.
// It returns a Quality slice which is a subset of the '__default_qualities__' slice.
func __make_default_qualities__(length int) Quality {
cl := len(__default_qualities__)
if cl < length {
@ -59,11 +63,14 @@ type BioSequence struct {
feature []byte
paired *BioSequence // A pointer to the paired sequence
annotations Annotation
annot_lock *sync.Mutex
annot_lock sync.Mutex
}
// MakeEmptyBioSequence() creates a new BioSequence object with no data
func MakeEmptyBioSequence(preallocate int) BioSequence {
// NewEmptyBioSequence creates a new BioSequence object with an empty sequence.
//
// The preallocate parameter specifies the number of bytes to preallocate for the sequence. If preallocate is greater than 0, the sequence will be preallocated with the specified number of bytes. If preallocate is 0, the sequence will not be preallocated.
// The function returns a pointer to the newly created BioSequence object.
func NewEmptyBioSequence(preallocate int) *BioSequence {
atomic.AddInt32(&_NewSeq, 1)
atomic.AddInt32(&_InMemSeq, 1)
@ -72,7 +79,7 @@ func MakeEmptyBioSequence(preallocate int) BioSequence {
seq = GetSlice(preallocate)
}
return BioSequence{
return &BioSequence{
id: "",
definition: "",
source: "",
@ -81,36 +88,33 @@ func MakeEmptyBioSequence(preallocate int) BioSequence {
feature: nil,
paired: nil,
annotations: nil,
annot_lock: &sync.Mutex{},
annot_lock: sync.Mutex{},
}
}
// `NewEmptyBioSequence()` returns a pointer to a new empty BioSequence
func NewEmptyBioSequence(preallocate int) *BioSequence {
s := MakeEmptyBioSequence(preallocate)
return &s
}
// `MakeBioSequence` creates a new `BioSequence` with the given `id`, `sequence`, and `definition`
func MakeBioSequence(id string,
// NewBioSequence creates a new BioSequence object with the given ID, sequence, and definition.
//
// Parameters:
// - id: the ID of the BioSequence.
// - sequence: the sequence data of the BioSequence.
// - definition: the definition of the BioSequence.
//
// Returns:
// - *BioSequence: the newly created BioSequence object.
func NewBioSequence(id string,
sequence []byte,
definition string) BioSequence {
bs := MakeEmptyBioSequence(0)
definition string) *BioSequence {
bs := NewEmptyBioSequence(0)
bs.SetId(id)
bs.SetSequence(sequence)
bs.SetDefinition(definition)
return bs
}
// `NewBioSequence` creates a new `BioSequence` struct and returns a pointer to it
func NewBioSequence(id string,
sequence []byte,
definition string) *BioSequence {
s := MakeBioSequence(id, sequence, definition)
return &s
}
// A method that is called when the sequence is no longer needed.
// Recycle recycles the BioSequence object.
//
// It decreases the count of in-memory sequences and increases the count of recycled sequences.
// It also recycles the various slices and annotations of the BioSequence object.
func (sequence *BioSequence) Recycle() {
atomic.AddInt32(&_RecycleSeq, 1)
@ -133,9 +137,15 @@ func (sequence *BioSequence) Recycle() {
}
}
// Copying the BioSequence.
// Copy returns a new BioSequence that is a copy of the original BioSequence.
//
// It copies the id and definition fields of the original BioSequence to the new BioSequence.
// It also creates new slices and copies the values from the original BioSequence's sequence, qualities, and feature fields to the new BioSequence.
// If the original BioSequence has annotations, it locks the annot_lock and copies the annotations to the new BioSequence.
//
// The function returns the new BioSequence.
func (s *BioSequence) Copy() *BioSequence {
newSeq := MakeEmptyBioSequence(0)
newSeq := NewEmptyBioSequence(0)
newSeq.id = s.id
newSeq.definition = s.definition
@ -150,30 +160,45 @@ func (s *BioSequence) Copy() *BioSequence {
newSeq.annotations = GetAnnotation(s.annotations)
}
return &newSeq
return newSeq
}
// A method that returns the id of the sequence.
// Id returns the ID of the BioSequence.
//
// No parameters.
// Returns a string.
func (s *BioSequence) Id() string {
return s.id
}
// A method that returns the definition of the sequence.
// Definition returns the definition of the BioSequence.
//
// No parameters.
// Returns a string.
func (s *BioSequence) Definition() string {
return s.definition
}
// A method that returns the sequence as a byte slice.
// Sequence returns the sequence of the BioSequence.
//
// Returns:
// - []byte: The sequence of the BioSequence.
func (s *BioSequence) Sequence() []byte {
return s.sequence
}
// A method that returns the sequence as a string.
// String returns the string representation of the Sequence.
//
// No parameters.
// Returns a string.
func (s *BioSequence) String() string {
return string(s.sequence)
}
// Returning the length of the sequence.
// Len returns the length of the BioSequence.
//
// It does not take any parameters.
// It returns an integer representing the length of the sequence.
func (s *BioSequence) Len() int {
return len(s.sequence)
}
@ -301,39 +326,47 @@ func (s *BioSequence) WriteString(data string) (int, error) {
return s.Write(bdata)
}
// A method that appends a byte to the sequence.
// WriteByte appends a byte to the BioSequence's sequence.
//
// data: the byte to append to the sequence.
// error: an error if the append operation fails.
func (s *BioSequence) WriteByte(data byte) error {
s.sequence = append(s.sequence, data)
return nil
}
// Clearing the sequence.
// Clear clears the BioSequence by resetting the sequence to an empty slice.
//
// No parameters.
// No return values.
func (s *BioSequence) Clear() {
s.sequence = s.sequence[0:0]
}
// Composition calculates the composition of the BioSequence.
//
// It counts the occurrences of each nucleotide (a, c, g, t) in the sequence
// and returns a map with the counts.
//
// No parameters.
// Returns a map of byte to int, with the counts of each nucleotide.
func (s *BioSequence) Composition() map[byte]int {
counts := map[byte]int{
'a': 0,
'c': 0,
'g': 0,
't': 0,
'o': 0,
}
a := 0
c := 0
g := 0
t := 0
other := 0
for _, char := range s.sequence {
switch char {
case 'a':
a++
case 'c':
c++
case 'g':
g++
case 't':
t++
switch char | byte(32) {
case 'a', 'c', 'g', 't':
counts[char]++
default:
other++
counts['o']++
}
}
return map[byte]int{'a': a, 'c': c, 'g': g, 't': t, 'o': other}
return counts
}

View File

@ -0,0 +1,366 @@
package obiseq
import (
"bytes"
"reflect"
"sync"
"testing"
)
// TestNewEmptyBioSequence tests the NewEmptyBioSequence function.
//
// It checks the behavior of the function by creating different BioSequence instances with different preallocate values.
// The function verifies that the sequence is correctly preallocated or not preallocated based on the input value.
// It also checks that the length and capacity of the sequence are set correctly.
// The test fails if the function returns nil or if the sequence length or capacity is not as expected.
func TestNewEmptyBioSequence(t *testing.T) {
// Test case: preallocate is 0, sequence should not be preallocated
seq := NewEmptyBioSequence(0)
if seq == nil {
t.Errorf("NewEmptyBioSequence(0) returned nil")
} else if len(seq.sequence) != 0 {
t.Errorf("Expected sequence length to be 0, got %d", len(seq.sequence))
}
// Test case: preallocate is greater than 0, sequence should be preallocated
seq = NewEmptyBioSequence(100)
if seq == nil {
t.Errorf("NewEmptyBioSequence(100) returned nil")
} else if cap(seq.sequence) < 100 {
t.Errorf("Expected sequence capacity to be at least 100, got %d", cap(seq.sequence))
}
// Test case: preallocate is negative, sequence should not be preallocated
seq = NewEmptyBioSequence(-100)
if seq == nil {
t.Errorf("NewEmptyBioSequence(-100) returned nil")
} else if len(seq.sequence) != 0 {
t.Errorf("Expected sequence length to be 0, got %d", len(seq.sequence))
}
}
// TestNewBioSequence tests the NewBioSequence function.
//
// It checks the correctness of the NewBioSequence function by validating that the BioSequence object
// created has the correct ID, sequence, and definition.
// The function performs two test cases:
// 1. Test case 1 checks if the BioSequence object created using the NewBioSequence function has
// the expected ID, sequence, and definition when provided with valid inputs.
// 2. Test case 2 checks if the BioSequence object created using the NewBioSequence function has
// the expected ID, sequence, and definition when provided with different valid inputs.
func TestNewBioSequence(t *testing.T) {
// Test case 1:
id := "seq1"
sequence := []byte("ACGT")
definition := "DNA sequence"
expectedID := "seq1"
expectedSequence := []byte("acgt")
expectedDefinition := "DNA sequence"
bs := NewBioSequence(id, sequence, definition)
if bs.Id() != expectedID {
t.Errorf("Expected ID to be %s, but got %s", expectedID, bs.Id())
}
if !bytes.Equal(bs.Sequence(), expectedSequence) {
t.Errorf("Expected sequence to be %v, but got %v", expectedSequence, bs.Sequence())
}
if bs.Definition() != expectedDefinition {
t.Errorf("Expected definition to be %s, but got %s", expectedDefinition, bs.Definition())
}
// Test case 2:
id = "seq2"
sequence = []byte("ATCG")
definition = "RNA sequence"
expectedID = "seq2"
expectedSequence = []byte("atcg")
expectedDefinition = "RNA sequence"
bs = NewBioSequence(id, sequence, definition)
if bs.Id() != expectedID {
t.Errorf("Expected ID to be %s, but got %s", expectedID, bs.Id())
}
if !bytes.Equal(bs.Sequence(), expectedSequence) {
t.Errorf("Expected sequence to be %v, but got %v", expectedSequence, bs.Sequence())
}
if bs.Definition() != expectedDefinition {
t.Errorf("Expected definition to be %s, but got %s", expectedDefinition, bs.Definition())
}
}
// TestBioSequence_Recycle tests the Recycle method of the BioSequence struct.
//
// Test case 1: Recycle a BioSequence object with non-nil slices and annotations.
// Test case 2: Recycle a nil BioSequence object.
// Test case 3: Recycle a BioSequence object with nil slices and annotations.
func TestBioSequence_Recycle(t *testing.T) {
// Test case 1: Recycle a BioSequence object with non-nil slices and annotations
sequence := &BioSequence{
sequence: []byte{'A', 'C', 'G', 'T'},
feature: []byte("..."),
qualities: []byte{30, 30, 30, 30},
annotations: Annotation{"description": "Test"},
}
sequence.Recycle()
if len(sequence.sequence) != 0 {
t.Errorf("Expected sequence to be empty, got %v", sequence.sequence)
}
if len(sequence.feature) != 0 {
t.Errorf("Expected feature to be empty, got %v", sequence.feature)
}
if len(sequence.qualities) != 0 {
t.Errorf("Expected qualities to be empty, got %v", sequence.qualities)
}
if sequence.annotations != nil {
t.Errorf("Expected annotations to be nil, got %v", sequence.annotations)
}
// Test case 2: Recycle a nil BioSequence object
var nilSequence *BioSequence
nilSequence.Recycle() // No panic expected
// Test case 3: Recycle a BioSequence object with nil slices and annotations
emptySequence := &BioSequence{}
emptySequence.Recycle() // No panic expected
}
// TestCopy tests the Copy function of the BioSequence struct.
//
// It creates a new BioSequence and copies the fields from the original sequence
// to the new one. It then performs various tests to check if the fields were
// copied correctly.
//
// Parameters:
// - t: The testing.T object used for reporting test failures.
//
// Returns: None.
func TestCopy(t *testing.T) {
seq := &BioSequence{
id: "test",
definition: "test sequence",
sequence: []byte("ATCG"),
qualities: []byte("1234"),
feature: []byte("feature1...feature2"),
annotations: Annotation{
"annotation1": "value1",
"annotation2": "value2",
},
annot_lock: sync.Mutex{},
}
newSeq := seq.Copy()
// Test if the id and definition fields are copied correctly
if newSeq.id != seq.id {
t.Errorf("Expected id to be %v, got %v", seq.id, newSeq.id)
}
if newSeq.definition != seq.definition {
t.Errorf("Expected definition to be %v, got %v", seq.definition, newSeq.definition)
}
// Test if the sequence, qualities, and feature fields are copied correctly
if !reflect.DeepEqual(newSeq.sequence, seq.sequence) {
t.Errorf("Expected sequence to be %v, got %v", seq.sequence, newSeq.sequence)
}
if !reflect.DeepEqual(newSeq.qualities, seq.qualities) {
t.Errorf("Expected qualities to be %v, got %v", seq.qualities, newSeq.qualities)
}
if !reflect.DeepEqual(newSeq.feature, seq.feature) {
t.Errorf("Expected feature to be %v, got %v", seq.feature, newSeq.feature)
}
// Test if the annotations are copied correctly
if !reflect.DeepEqual(newSeq.annotations, seq.annotations) {
t.Errorf("Expected annotations to be %v, got %v", seq.annotations, newSeq.annotations)
}
}
// TestBioSequence_Id tests the Id method of the BioSequence struct.
//
// It initializes a BioSequence with an ID using the constructor and then
// verifies that the Id method returns the expected ID.
// The expected ID is "ABC123".
// The method takes no parameters and returns a string.
func TestBioSequence_Id(t *testing.T) {
// Initialize BioSequence with an ID using the constructor
bioSeq := NewBioSequence("ABC123", []byte(""), "")
// Test case: ID is returned correctly
expected := "ABC123"
result := bioSeq.Id()
if result != expected {
t.Errorf("Expected ID to be %s, but got %s", expected, result)
}
}
// TestBioSequenceDefinition tests the Definition() method of the BioSequence struct.
//
// This function verifies the behavior of the Definition() method in two test cases:
// 1. Empty definition: It creates a BioSequence object with an empty definition and verifies that the Definition() method returns an empty string.
// 2. Non-empty definition: It creates a BioSequence object with a non-empty definition and verifies that the Definition() method returns the expected definition.
func TestBioSequenceDefinition(t *testing.T) {
// Test case 1: Empty definition
seq1 := NewBioSequence("", []byte{}, "")
expected1 := ""
if got1 := seq1.Definition(); got1 != expected1 {
t.Errorf("Expected %q, but got %q", expected1, got1)
}
// Test case 2: Non-empty definition
seq2 := NewBioSequence("", []byte{}, "This is a definition")
expected2 := "This is a definition"
if got2 := seq2.Definition(); got2 != expected2 {
t.Errorf("Expected %q, but got %q", expected2, got2)
}
}
// TestBioSequenceSequence tests the Sequence() method of the BioSequence struct.
//
// It verifies the behavior of the Sequence() method under two scenarios:
// - Test case 1: Empty sequence
// - Test case 2: Non-empty sequence
//
// Parameter(s):
// - t: The testing object provided by the testing framework.
// It is used to report errors if the test fails.
//
// Return type(s):
// None.
func TestBioSequenceSequence(t *testing.T) {
// Test case 1: Empty sequence
seq := NewBioSequence("", []byte{}, "")
expected := []byte{}
actual := seq.Sequence()
if !bytes.EqualFold(actual, expected) {
t.Errorf("Expected %v, but got %v", expected, actual)
}
// Test case 2: Non-empty sequence
seq = NewBioSequence("", []byte("atcg"), "")
expected = []byte("atcg")
actual = seq.Sequence()
if !bytes.EqualFold(actual, expected) {
t.Errorf("Expected %v, but got %v", expected, actual)
}
}
// TestBioSequence_String tests the String method of the BioSequence struct.
//
// It includes two test cases:
//
// 1. Test case 1: Empty sequence
// - Creates an empty BioSequence instance.
// - Expects an empty string as the result of calling the String method on the BioSequence instance.
//
// 2. Test case 2: Non-empty sequence
// - Creates a BioSequence instance with the sequence "acgt".
// - Expects the sequence "acgt" as the result of calling the String method on the BioSequence instance.
//
// No parameters are required.
// No return types are specified.
func TestBioSequence_String(t *testing.T) {
// Test case 1: Empty sequence
seq1 := &BioSequence{}
expected1 := ""
if got1 := seq1.String(); got1 != expected1 {
t.Errorf("Test case 1 failed: expected %s, got %s", expected1, got1)
}
// Test case 2: Non-empty sequence
seq2 := &BioSequence{sequence: []byte("acgt")}
expected2 := "acgt"
if got2 := seq2.String(); got2 != expected2 {
t.Errorf("Test case 2 failed: expected %s, got %s", expected2, got2)
}
}
// TestBioSequence_Len tests the Len method of the BioSequence struct.
//
// It verifies the behavior of the method by performing multiple test cases.
// Each test case creates a BioSequence instance with a specific sequence and
// compares the actual length returned by the Len method with the expected
// length.
//
// Test 1: Empty sequence
// - Create a BioSequence instance with an empty sequence.
// - The expected length is 0.
// - Check if the actual length returned by the Len method matches the expected
// length. If not, report an error.
//
// Test 2: Sequence with 5 characters
// - Create a BioSequence instance with a sequence of 5 characters.
// - The expected length is 5.
// - Check if the actual length returned by the Len method matches the expected
// length. If not, report an error.
//
// Test 3: Sequence with 10 characters
// - Create a BioSequence instance with a sequence of 10 characters.
// - The expected length is 10.
// - Check if the actual length returned by the Len method matches the expected
// length. If not, report an error.
func TestBioSequence_Len(t *testing.T) {
// Test 1: Empty sequence
s1 := NewBioSequence("", nil, "")
expected1 := 0
if len := s1.Len(); len != expected1 {
t.Errorf("Expected length: %d, but got: %d", expected1, len)
}
// Test 2: Sequence with 5 characters
s2 := NewBioSequence("", []byte("ATCGT"), "")
expected2 := 5
if len := s2.Len(); len != expected2 {
t.Errorf("Expected length: %d, but got: %d", expected2, len)
}
// Test 3: Sequence with 10 characters
s3 := NewBioSequence("", []byte("AGCTAGCTAG"), "")
expected3 := 10
if len := s3.Len(); len != expected3 {
t.Errorf("Expected length: %d, but got: %d", expected3, len)
}
}
// TestBioSequence_Composition tests the Composition method of the BioSequence struct.
//
// It tests the method with three different test cases:
// 1. Empty sequence: It checks if the Composition method returns the expected composition when the sequence is empty.
// 2. Sequence with valid nucleotides: It checks if the Composition method returns the expected composition when the sequence contains valid nucleotides.
// 3. Sequence with invalid nucleotides: It checks if the Composition method returns the expected composition when the sequence contains invalid nucleotides.
//
// The expected composition for each test case is defined in a map where the keys are the nucleotides and the values are the expected counts.
// The Composition method is expected to return a map with the actual nucleotide counts.
//
// Parameters:
// - t: The testing.T object used for reporting test failures and logging.
//
// Return type: void.
func TestBioSequence_Composition(t *testing.T) {
// Test case: Empty sequence
seq1 := NewBioSequence("", []byte(""), "")
expected1 := map[byte]int{'a': 0, 'c': 0, 'g': 0, 't': 0, 'o': 0}
if result1 := seq1.Composition(); !reflect.DeepEqual(result1, expected1) {
t.Errorf("Composition() returned incorrect result for empty sequence. Got %v, expected %v", result1, expected1)
}
// Test case: Sequence with valid nucleotides
seq2 := NewBioSequence("", []byte("acgtACGT"), "")
expected2 := map[byte]int{'a': 2, 'c': 2, 'g': 2, 't': 2, 'o': 0}
if result2 := seq2.Composition(); !reflect.DeepEqual(result2, expected2) {
t.Errorf("Composition() returned incorrect result for sequence with valid nucleotides. Got %v, expected %v", result2, expected2)
}
// Test case: Sequence with invalid nucleotides
seq3 := NewBioSequence("", []byte("acgtACGT1234"), "")
expected3 := map[byte]int{'a': 2, 'c': 2, 'g': 2, 't': 2, 'o': 4}
if result3 := seq3.Composition(); !reflect.DeepEqual(result3, expected3) {
t.Errorf("Composition() returned incorrect result for sequence with invalid nucleotides. Got %v, expected %v", result3, expected3)
}
}

View File

@ -73,7 +73,12 @@ func RecycleAnnotation(a *Annotation) {
}
}
// It returns a new Annotation object, initialized with the values from the first argument
// GetAnnotation returns an Annotation from the BioSequenceAnnotationPool.
//
// It takes as argument O or 1 Annotation annotation object.
// If an annotation object is passed, it is copied into the new Annotation.
//
// It returns an Annotation.
func GetAnnotation(values ...Annotation) Annotation {
a := Annotation(nil)

View File

@ -271,10 +271,14 @@ func (m *NotABoolean) Error() string {
return m.message
}
// > It copies the contents of the `src` map into the `dest` map, but if the value is a map, slice, or
// array, it makes a deep copy of it
// MustFillMap fills the destination map with the values from the source map.
//
// The function takes in two parameters:
// - dest: a map[string]interface{} representing the destination map.
// - src: a map[string]interface{} representing the source map.
//
// There is no return value.
func MustFillMap(dest, src map[string]interface{}) {
for k, v := range src {
if IsAMap(v) || IsASlice(v) || IsAnArray(v) {
v = deepcopy.MustAnything(v)