mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-12-08 16:50:27 +00:00
Patch header parting and formatiing
This commit is contained in:
@@ -81,13 +81,7 @@ func ISequenceChunkOnDisk(iterator obiiter.IBioSequenceBatch,
|
||||
panic(err)
|
||||
}
|
||||
|
||||
//chunck := make(obiseq.BioSequenceSlice, 0, 10000)
|
||||
chunck := obiseq.MakeBioSequenceSlice()
|
||||
for iseq.Next() {
|
||||
b := iseq.Get()
|
||||
chunck = append(chunck, b.Slice()...)
|
||||
b.Recycle()
|
||||
}
|
||||
chunck := iseq.Load()
|
||||
|
||||
newIter.Push(obiiter.MakeBioSequenceBatch(order, chunck))
|
||||
log.Infof("Start processing of batch %d/%d : %d sequences",
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
package obiformats
|
||||
|
||||
import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
"math"
|
||||
"strings"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
"github.com/goccy/go-json"
|
||||
)
|
||||
@@ -42,6 +44,16 @@ func _parse_json_header_(header string, annotations obiseq.Annotation) string {
|
||||
stop++
|
||||
|
||||
err := json.Unmarshal([]byte(header)[start:stop], &annotations)
|
||||
|
||||
for k, v := range annotations {
|
||||
switch vt := v.(type) {
|
||||
case float64 :
|
||||
if vt == math.Floor(vt) {
|
||||
annotations[k] = int(vt)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("annotation parsing error on %s : %v\n", header, err)
|
||||
}
|
||||
|
||||
@@ -3,6 +3,8 @@ package obiformats
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"log"
|
||||
"math"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
@@ -251,9 +253,20 @@ func ParseOBIFeatures(text string, annotations obiseq.Annotation) string {
|
||||
} // End of not string
|
||||
} // End of not numeric
|
||||
|
||||
annotations[key] = value
|
||||
switch vt := value.(type) {
|
||||
case float64:
|
||||
if vt == math.Floor(vt) {
|
||||
annotations[key] = int(vt)
|
||||
}
|
||||
default:
|
||||
annotations[key] = value
|
||||
}
|
||||
|
||||
d = part[stop:]
|
||||
if stop < len(part) {
|
||||
d = part[stop:]
|
||||
} else {
|
||||
d = []byte{}
|
||||
}
|
||||
//m = __obi_header_key_pattern__.FindIndex(d)
|
||||
m = __match__key__(d)
|
||||
}
|
||||
@@ -280,6 +293,16 @@ func FormatFastSeqOBIHeader(sequence *obiseq.BioSequence) string {
|
||||
switch t := value.(type) {
|
||||
case string:
|
||||
text.WriteString(fmt.Sprintf("%s=%s; ", key, t))
|
||||
case map[string]int,
|
||||
map[string]interface{}:
|
||||
tv, err := json.Marshal(t)
|
||||
if err != nil {
|
||||
log.Fatalf("Cannot convert %v value", value)
|
||||
}
|
||||
tv = bytes.ReplaceAll(tv, []byte(`"`), []byte("'"))
|
||||
text.WriteString(fmt.Sprintf("%s=", key))
|
||||
text.Write(tv)
|
||||
text.WriteString("; ")
|
||||
default:
|
||||
text.WriteString(fmt.Sprintf("%s=%v; ", key, value))
|
||||
}
|
||||
|
||||
@@ -612,3 +612,15 @@ func (iterator IBioSequenceBatch) FilterOn(predicate obiseq.SequencePredicate,
|
||||
|
||||
return trueIter.Rebatch(size)
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) Load() obiseq.BioSequenceSlice {
|
||||
|
||||
chunck := obiseq.MakeBioSequenceSlice()
|
||||
for iterator.Next() {
|
||||
b := iterator.Get()
|
||||
chunck = append(chunck, b.Slice()...)
|
||||
b.Recycle()
|
||||
}
|
||||
|
||||
return chunck
|
||||
}
|
||||
@@ -257,3 +257,4 @@ func (s *BioSequence) WriteByte(data byte) error {
|
||||
s.sequence = append(s.sequence, data)
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -2,8 +2,10 @@ package obiseq
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"strings"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type StatsOnValues map[string]int
|
||||
@@ -33,6 +35,16 @@ func (sequence *BioSequence) StatsOn(key string, na string) StatsOnValues {
|
||||
case StatsOnValues:
|
||||
stats = istat
|
||||
newstat = false
|
||||
case map[string]interface{}:
|
||||
stats = make(StatsOnValues, len(istat))
|
||||
var err error
|
||||
for k, v := range istat {
|
||||
stats[k], err = goutils.InterfaceToInt(v)
|
||||
if err != nil {
|
||||
log.Panicf("In sequence %s : %s stat tag not only containing integer values %s",
|
||||
sequence.Id(), mkey, istat)
|
||||
}
|
||||
}
|
||||
default:
|
||||
stats = make(StatsOnValues, 100)
|
||||
annotations[mkey] = stats
|
||||
|
||||
Reference in New Issue
Block a user