Patch header parting and formatiing

This commit is contained in:
2022-05-27 11:53:29 +03:00
parent 656eda1f73
commit f14860a486
8 changed files with 91 additions and 29 deletions

View File

@ -15,7 +15,7 @@ OBITOOLS_SRC:= $(wildcard cmd/obitools/*/*.go)
OBITOOLS_DIRS:=$(sort $(patsubst %/,%,$(dir $(OBITOOLS_SRC))))
OBITOOLS:=$(notdir $(OBITOOLS_DIRS))
.PHONY: all
.PHONY: all obitools
define MAKE_PKG_RULE
pkg-$(notdir $(1)): $(1)
@ -43,6 +43,12 @@ all: obitools
packages: $(patsubst %,pkg-%,$(PACKAGES))
obitools: $(patsubst %,$(OBITOOLS_PREFIX)%,$(OBITOOLS))
macos-pkg:
@bash pkgs/macos/macos-installer-builder-master/macOS-x64/build-macos-x64.sh \
OBITools \
0.0.1
$(BUILD_DIR):
mkdir -p $@

View File

@ -1,14 +1,13 @@
package main
import (
"fmt"
"log"
"os"
"runtime/trace"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obialign"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiclean"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
)
func main() {
@ -20,13 +19,16 @@ func main() {
trace.Start(ftrace)
defer trace.Stop()
// option_parser := obioptions.GenerateOptionParser(
// obiconvert.InputOptionSet,
// )
option_parser := obioptions.GenerateOptionParser(
obiconvert.InputOptionSet,
)
//_, args, _ := option_parser(os.Args)
_, args, _ := option_parser(os.Args)
fs, _ := obiconvert.ReadBioSequencesBatch(args...)
obiclean.IOBIClean(fs)
// fs, _ := obiconvert.ReadBioSequences(args...)
// buffer := make([]byte, 0)
// fs.Next()
// s := fs.Get()
@ -41,19 +43,19 @@ func main() {
// fmt.Printf("Shift : %d Score : %d\n", maxshift, maxcount)
// }
A := []byte("ccgcctccttagaacaggctcctctagaaaaccatgtgggatatctaaagaaggcggagatagaaagagcggttcagcaggaatgccgagatggacggcgtgtgacg")
B := []byte("ccgcctccttagaacaggctcctctagaaaaaccatgtgggatatctaaagaaggcggagatagaaagagcggttcagcaggaatgccgagatggacggcgtgtgacg")
// A := []byte("ccgcctccttagaacaggctcctctagaaaaccatgtgggatatctaaagaaggcggagatagaaagagcggttcagcaggaatgccgagatggacggcgtgtgacg")
// B := []byte("ccgcctccttagaacaggctcctctagaaaaaccatgtgggatatctaaagaaggcggagatagaaagagcggttcagcaggaatgccgagatggacggcgtgtgacg")
// B := []byte("cgccaccaccgagatctacactctttccctacacgacgctcttccgatctccgcctccttagaacaggctcctctagaaaagcatagtggggtatctaaaggaggcgg")
sA := obiseq.NewBioSequence("A", A, "")
sB := obiseq.MakeBioSequence("B", B, "")
// sA := obiseq.NewBioSequence("A", A, "")
// sB := obiseq.MakeBioSequence("B", B, "")
s, l := obialign.LCSScore(sA, &sB, 2, nil)
// s, l := obialign.LCSScore(sA, &sB, 2, nil)
fmt.Printf("score : %d length : %d error : %d\n", s, l, l-s)
// fmt.Printf("score : %d length : %d error : %d\n", s, l, l-s)
s, l = obialign.LCSScore(&sB, &sB, 2, nil)
// s, l = obialign.LCSScore(&sB, &sB, 2, nil)
fmt.Printf("score : %d length : %d error : %d\n", s, l, l-s)
// fmt.Printf("score : %d length : %d error : %d\n", s, l, l-s)
// pat, _ := obiapat.MakeApatPattern("TCCTTCCAACAGGCTCCTC", 3)
// as, _ := obiapat.MakeApatSequence(sA, false)

View File

@ -81,13 +81,7 @@ func ISequenceChunkOnDisk(iterator obiiter.IBioSequenceBatch,
panic(err)
}
//chunck := make(obiseq.BioSequenceSlice, 0, 10000)
chunck := obiseq.MakeBioSequenceSlice()
for iseq.Next() {
b := iseq.Get()
chunck = append(chunck, b.Slice()...)
b.Recycle()
}
chunck := iseq.Load()
newIter.Push(obiiter.MakeBioSequenceBatch(order, chunck))
log.Infof("Start processing of batch %d/%d : %d sequences",

View File

@ -1,9 +1,11 @@
package obiformats
import (
log "github.com/sirupsen/logrus"
"math"
"strings"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
"github.com/goccy/go-json"
)
@ -42,6 +44,16 @@ func _parse_json_header_(header string, annotations obiseq.Annotation) string {
stop++
err := json.Unmarshal([]byte(header)[start:stop], &annotations)
for k, v := range annotations {
switch vt := v.(type) {
case float64 :
if vt == math.Floor(vt) {
annotations[k] = int(vt)
}
}
}
if err != nil {
log.Fatalf("annotation parsing error on %s : %v\n", header, err)
}

View File

@ -3,6 +3,8 @@ package obiformats
import (
"bytes"
"fmt"
"log"
"math"
"regexp"
"strconv"
"strings"
@ -251,9 +253,20 @@ func ParseOBIFeatures(text string, annotations obiseq.Annotation) string {
} // End of not string
} // End of not numeric
annotations[key] = value
switch vt := value.(type) {
case float64:
if vt == math.Floor(vt) {
annotations[key] = int(vt)
}
default:
annotations[key] = value
}
d = part[stop:]
if stop < len(part) {
d = part[stop:]
} else {
d = []byte{}
}
//m = __obi_header_key_pattern__.FindIndex(d)
m = __match__key__(d)
}
@ -280,6 +293,16 @@ func FormatFastSeqOBIHeader(sequence *obiseq.BioSequence) string {
switch t := value.(type) {
case string:
text.WriteString(fmt.Sprintf("%s=%s; ", key, t))
case map[string]int,
map[string]interface{}:
tv, err := json.Marshal(t)
if err != nil {
log.Fatalf("Cannot convert %v value", value)
}
tv = bytes.ReplaceAll(tv, []byte(`"`), []byte("'"))
text.WriteString(fmt.Sprintf("%s=", key))
text.Write(tv)
text.WriteString("; ")
default:
text.WriteString(fmt.Sprintf("%s=%v; ", key, value))
}

View File

@ -612,3 +612,15 @@ func (iterator IBioSequenceBatch) FilterOn(predicate obiseq.SequencePredicate,
return trueIter.Rebatch(size)
}
func (iterator IBioSequenceBatch) Load() obiseq.BioSequenceSlice {
chunck := obiseq.MakeBioSequenceSlice()
for iterator.Next() {
b := iterator.Get()
chunck = append(chunck, b.Slice()...)
b.Recycle()
}
return chunck
}

View File

@ -257,3 +257,4 @@ func (s *BioSequence) WriteByte(data byte) error {
s.sequence = append(s.sequence, data)
return nil
}

View File

@ -2,8 +2,10 @@ package obiseq
import (
"fmt"
log "github.com/sirupsen/logrus"
"strings"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
log "github.com/sirupsen/logrus"
)
type StatsOnValues map[string]int
@ -33,6 +35,16 @@ func (sequence *BioSequence) StatsOn(key string, na string) StatsOnValues {
case StatsOnValues:
stats = istat
newstat = false
case map[string]interface{}:
stats = make(StatsOnValues, len(istat))
var err error
for k, v := range istat {
stats[k], err = goutils.InterfaceToInt(v)
if err != nil {
log.Panicf("In sequence %s : %s stat tag not only containing integer values %s",
sequence.Id(), mkey, istat)
}
}
default:
stats = make(StatsOnValues, 100)
annotations[mkey] = stats