mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-12-08 16:50:27 +00:00
Patch rev complement and first implementation of --auto in obicsv
Former-commit-id: f3020e81283b1073c4d1c2d2ff0887e3998e6764
This commit is contained in:
@@ -8,6 +8,48 @@ import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// AttributeKeys returns the keys of the attributes in the BioSequence.
|
||||
//
|
||||
// It does not take any parameters.
|
||||
//
|
||||
// Returns:
|
||||
//
|
||||
// []string: The keys of the BioSequence.
|
||||
func (s *BioSequence) AttributeKeys(skip_map bool) obiutils.Set[string] {
|
||||
keys := obiutils.MakeSet[string]()
|
||||
|
||||
for k, v := range s.Annotations() {
|
||||
if !skip_map || !obiutils.IsAMap(v) {
|
||||
keys.Add(k)
|
||||
}
|
||||
}
|
||||
|
||||
return keys
|
||||
}
|
||||
|
||||
// Keys returns the keys of the BioSequence.
|
||||
//
|
||||
// It returns a slice of strings containing the keys of the BioSequence.
|
||||
// The keys include "id", "sequence", "qualities", and the attribute keys
|
||||
// of the BioSequence.
|
||||
//
|
||||
// Returns:
|
||||
//
|
||||
// []string: The keys of the BioSequence.
|
||||
func (s *BioSequence) Keys(skip_map bool) obiutils.Set[string] {
|
||||
keys := s.AttributeKeys(skip_map)
|
||||
keys.Add("id")
|
||||
|
||||
if s.HasSequence() {
|
||||
keys.Add("sequence")
|
||||
}
|
||||
if s.HasQualities() {
|
||||
keys.Add("qualities")
|
||||
}
|
||||
|
||||
return keys
|
||||
}
|
||||
|
||||
// HasAttribute checks if the BioSequence has the specified attribute.
|
||||
//
|
||||
// Parameters:
|
||||
@@ -16,6 +58,17 @@ import (
|
||||
// Returns:
|
||||
// - a boolean indicating whether the BioSequence has the attribute.
|
||||
func (s *BioSequence) HasAttribute(key string) bool {
|
||||
if key == "id" {
|
||||
return true
|
||||
}
|
||||
|
||||
if key == "sequence" && s.sequence != nil {
|
||||
return true
|
||||
}
|
||||
|
||||
if key == "qualities" && s.qualities != nil {
|
||||
return true
|
||||
}
|
||||
ok := s.annotations != nil
|
||||
|
||||
if ok {
|
||||
@@ -36,6 +89,25 @@ func (s *BioSequence) HasAttribute(key string) bool {
|
||||
// - val: The value associated with the given key.
|
||||
// - ok: A boolean indicating whether the key exists in the annotations map.
|
||||
func (s *BioSequence) GetAttribute(key string) (interface{}, bool) {
|
||||
|
||||
if key == "id" {
|
||||
return s.id, true
|
||||
}
|
||||
|
||||
if key == "sequence" {
|
||||
if s.HasSequence() {
|
||||
return s.String(), true
|
||||
}
|
||||
return nil, false
|
||||
}
|
||||
|
||||
if key == "qualities" {
|
||||
if s.HasQualities() {
|
||||
return s.QualitiesString(), true
|
||||
}
|
||||
return nil, false
|
||||
}
|
||||
|
||||
var val interface{}
|
||||
ok := s.annotations != nil
|
||||
|
||||
@@ -54,6 +126,17 @@ func (s *BioSequence) GetAttribute(key string) (interface{}, bool) {
|
||||
// - key: the key to set the value for.
|
||||
// - value: the value to set for the given key.
|
||||
func (s *BioSequence) SetAttribute(key string, value interface{}) {
|
||||
|
||||
if key == "id" {
|
||||
s.SetId(value.(string))
|
||||
return
|
||||
}
|
||||
|
||||
if key == "sequence" {
|
||||
s.SetSequence(value.([]byte))
|
||||
return
|
||||
}
|
||||
|
||||
annot := s.Annotations()
|
||||
|
||||
defer s.AnnotationsUnlock()
|
||||
|
||||
@@ -15,6 +15,7 @@ import (
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
@@ -55,8 +56,7 @@ type Annotation map[string]interface{}
|
||||
// A BioSequence is a sequence of bytes with an identifier, a definition, a sequence, qualities,
|
||||
// features and annotations. It aims to represent a biological sequence
|
||||
type BioSequence struct {
|
||||
id string // The identidier of the sequence (private accessible through the method Id)
|
||||
//definition string // The documentation of the sequence (private accessible through the method Definition)
|
||||
id string // The identidier of the sequence (private accessible through the method Id)
|
||||
source string // The filename without directory name and extension from where the sequence was read.
|
||||
sequence []byte // The sequence itself, it is accessible by the methode Sequence
|
||||
qualities []byte // The quality scores of the sequence.
|
||||
@@ -188,6 +188,14 @@ func (s *BioSequence) Definition() string {
|
||||
return definition
|
||||
}
|
||||
|
||||
// HasSequence checks if the BioSequence has a sequence.
|
||||
//
|
||||
// No parameters.
|
||||
// Returns a boolean.
|
||||
func (s *BioSequence) HasSequence() bool {
|
||||
return s.sequence != nil && len(s.sequence) > 0
|
||||
}
|
||||
|
||||
// Sequence returns the sequence of the BioSequence.
|
||||
//
|
||||
// Returns:
|
||||
@@ -217,7 +225,7 @@ func (s *BioSequence) Len() int {
|
||||
// This function does not have any parameters.
|
||||
// It returns a boolean value indicating whether the BioSequence has qualities.
|
||||
func (s *BioSequence) HasQualities() bool {
|
||||
return len(s.qualities) > 0
|
||||
return s.qualities != nil && len(s.qualities) > 0
|
||||
}
|
||||
|
||||
// Qualities returns the sequence quality scores of the BioSequence.
|
||||
@@ -235,6 +243,19 @@ func (s *BioSequence) Qualities() Quality {
|
||||
return __make_default_qualities__(len(s.sequence))
|
||||
}
|
||||
|
||||
// QualitiesString returns the string representation of the qualities of the BioSequence.
|
||||
//
|
||||
// Returns a string representing the qualities of the BioSequence after applying the shift.
|
||||
func (s *BioSequence) QualitiesString() string {
|
||||
quality_shift := obioptions.OutputQualityShift()
|
||||
qual := s.Qualities()
|
||||
qual_ascii := make([]byte, len(qual))
|
||||
for i := 0; i < len(qual); i++ {
|
||||
qual_ascii[i] = byte(qual[i] + byte(quality_shift))
|
||||
}
|
||||
return string(qual_ascii)
|
||||
}
|
||||
|
||||
// Features returns the feature string of the BioSequence.
|
||||
//
|
||||
// The feature string contains the EMBL/GenBank not parsed feature table
|
||||
|
||||
@@ -3,6 +3,7 @@ package obiseq
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"golang.org/x/exp/slices"
|
||||
)
|
||||
@@ -171,3 +172,13 @@ func (s BioSequenceSlice) Size() int {
|
||||
|
||||
return size
|
||||
}
|
||||
|
||||
func (s BioSequenceSlice) AttributeKeys(skip_map bool) obiutils.Set[string] {
|
||||
keys := obiutils.MakeSet[string]()
|
||||
|
||||
for _, k := range s {
|
||||
keys = keys.Union(k.AttributeKeys(skip_map))
|
||||
}
|
||||
|
||||
return keys
|
||||
}
|
||||
|
||||
@@ -198,6 +198,15 @@ var OBILang = gval.NewLanguage(
|
||||
composition := (args[0].(*BioSequence)).Composition()
|
||||
return float64(composition['g']-composition['c']) / float64(composition['g']+composition['c']), nil
|
||||
}),
|
||||
gval.Function("gc", func(args ...interface{}) (interface{}, error) {
|
||||
composition := (args[0].(*BioSequence)).Composition()
|
||||
return float64(composition['g']+composition['c']) / float64(args[0].(*BioSequence).Len()), nil
|
||||
}),
|
||||
gval.Function("composition", func(args ...interface{}) (interface{}, error) {
|
||||
return (args[0].(*BioSequence)).Composition(), nil
|
||||
comp := (args[0].(*BioSequence)).Composition()
|
||||
scomp := make(map[string]float64)
|
||||
for k, v := range comp {
|
||||
scomp[string(k)] = float64(v)
|
||||
}
|
||||
return scomp, nil
|
||||
}))
|
||||
|
||||
@@ -1,7 +1,17 @@
|
||||
package obiseq
|
||||
|
||||
// ".ABCDEFGHIJKLMNOPQRSTUVWXYZ#![]"
|
||||
var _revcmpDNA = []byte(".TVGHEFCDIJMLKNOPQYSAABWXRZ#!][")
|
||||
var _revcmpDNA = []byte(".TVGHNNCDNNMNKNNNNYSAABWNRN]N[NNN")
|
||||
|
||||
func complement(n byte) byte {
|
||||
switch {
|
||||
case n == '.' || n == '-':
|
||||
return n
|
||||
case (n >= 'A' && n <= 'z'):
|
||||
return _revcmpDNA[n&31] | (n & 0x20)
|
||||
}
|
||||
return 'n'
|
||||
}
|
||||
|
||||
// Reverse complements a DNA sequence.
|
||||
// If the inplace parametter is true, that operation is done in place.
|
||||
@@ -18,8 +28,7 @@ func (sequence *BioSequence) ReverseComplement(inplace bool) *BioSequence {
|
||||
// ASCII code & 31 -> builds an index in witch (a|A) is 1
|
||||
// ASCII code & 0x20 -> Foce lower case
|
||||
|
||||
s[j], s[i] = _revcmpDNA[s[i]&31]|(s[i]&0x20),
|
||||
_revcmpDNA[s[j]&31]|(s[j]&0x20)
|
||||
s[j], s[i] = complement(s[i]), complement(s[j])
|
||||
j++
|
||||
}
|
||||
|
||||
@@ -40,8 +49,7 @@ func (sequence *BioSequence) _revcmpMutation() *BioSequence {
|
||||
b := []byte(m)
|
||||
|
||||
// Echange and reverse complement symboles
|
||||
b[1], b[9] = _revcmpDNA[b[9]&31]|(b[9]&0x20),
|
||||
_revcmpDNA[b[1]&31]|(b[1]&0x20)
|
||||
b[1], b[9] = complement(b[9]), complement(b[1])
|
||||
|
||||
// Exchange sequencing scores
|
||||
b[3], b[4], b[11], b[12] = b[11], b[12], b[3], b[4]
|
||||
@@ -65,7 +73,6 @@ func (sequence *BioSequence) _revcmpMutation() *BioSequence {
|
||||
return sequence
|
||||
}
|
||||
|
||||
|
||||
func ReverseComplementWorker(inplace bool) SeqWorker {
|
||||
f := func(input *BioSequence) *BioSequence {
|
||||
return input.ReverseComplement(inplace)
|
||||
|
||||
Reference in New Issue
Block a user