mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Patch header parting and formatiing
This commit is contained in:
8
Makefile
8
Makefile
@ -15,7 +15,7 @@ OBITOOLS_SRC:= $(wildcard cmd/obitools/*/*.go)
|
||||
OBITOOLS_DIRS:=$(sort $(patsubst %/,%,$(dir $(OBITOOLS_SRC))))
|
||||
OBITOOLS:=$(notdir $(OBITOOLS_DIRS))
|
||||
|
||||
.PHONY: all
|
||||
.PHONY: all obitools
|
||||
|
||||
define MAKE_PKG_RULE
|
||||
pkg-$(notdir $(1)): $(1)
|
||||
@ -43,6 +43,12 @@ all: obitools
|
||||
packages: $(patsubst %,pkg-%,$(PACKAGES))
|
||||
obitools: $(patsubst %,$(OBITOOLS_PREFIX)%,$(OBITOOLS))
|
||||
|
||||
|
||||
macos-pkg:
|
||||
@bash pkgs/macos/macos-installer-builder-master/macOS-x64/build-macos-x64.sh \
|
||||
OBITools \
|
||||
0.0.1
|
||||
|
||||
$(BUILD_DIR):
|
||||
mkdir -p $@
|
||||
|
||||
|
@ -1,14 +1,13 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"runtime/trace"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obialign"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiclean"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
||||
)
|
||||
|
||||
func main() {
|
||||
@ -20,13 +19,16 @@ func main() {
|
||||
trace.Start(ftrace)
|
||||
defer trace.Stop()
|
||||
|
||||
// option_parser := obioptions.GenerateOptionParser(
|
||||
// obiconvert.InputOptionSet,
|
||||
// )
|
||||
option_parser := obioptions.GenerateOptionParser(
|
||||
obiconvert.InputOptionSet,
|
||||
)
|
||||
|
||||
//_, args, _ := option_parser(os.Args)
|
||||
_, args, _ := option_parser(os.Args)
|
||||
|
||||
fs, _ := obiconvert.ReadBioSequencesBatch(args...)
|
||||
|
||||
obiclean.IOBIClean(fs)
|
||||
|
||||
// fs, _ := obiconvert.ReadBioSequences(args...)
|
||||
// buffer := make([]byte, 0)
|
||||
// fs.Next()
|
||||
// s := fs.Get()
|
||||
@ -41,19 +43,19 @@ func main() {
|
||||
// fmt.Printf("Shift : %d Score : %d\n", maxshift, maxcount)
|
||||
// }
|
||||
|
||||
A := []byte("ccgcctccttagaacaggctcctctagaaaaccatgtgggatatctaaagaaggcggagatagaaagagcggttcagcaggaatgccgagatggacggcgtgtgacg")
|
||||
B := []byte("ccgcctccttagaacaggctcctctagaaaaaccatgtgggatatctaaagaaggcggagatagaaagagcggttcagcaggaatgccgagatggacggcgtgtgacg")
|
||||
// A := []byte("ccgcctccttagaacaggctcctctagaaaaccatgtgggatatctaaagaaggcggagatagaaagagcggttcagcaggaatgccgagatggacggcgtgtgacg")
|
||||
// B := []byte("ccgcctccttagaacaggctcctctagaaaaaccatgtgggatatctaaagaaggcggagatagaaagagcggttcagcaggaatgccgagatggacggcgtgtgacg")
|
||||
// B := []byte("cgccaccaccgagatctacactctttccctacacgacgctcttccgatctccgcctccttagaacaggctcctctagaaaagcatagtggggtatctaaaggaggcgg")
|
||||
sA := obiseq.NewBioSequence("A", A, "")
|
||||
sB := obiseq.MakeBioSequence("B", B, "")
|
||||
// sA := obiseq.NewBioSequence("A", A, "")
|
||||
// sB := obiseq.MakeBioSequence("B", B, "")
|
||||
|
||||
s, l := obialign.LCSScore(sA, &sB, 2, nil)
|
||||
// s, l := obialign.LCSScore(sA, &sB, 2, nil)
|
||||
|
||||
fmt.Printf("score : %d length : %d error : %d\n", s, l, l-s)
|
||||
// fmt.Printf("score : %d length : %d error : %d\n", s, l, l-s)
|
||||
|
||||
s, l = obialign.LCSScore(&sB, &sB, 2, nil)
|
||||
// s, l = obialign.LCSScore(&sB, &sB, 2, nil)
|
||||
|
||||
fmt.Printf("score : %d length : %d error : %d\n", s, l, l-s)
|
||||
// fmt.Printf("score : %d length : %d error : %d\n", s, l, l-s)
|
||||
|
||||
// pat, _ := obiapat.MakeApatPattern("TCCTTCCAACAGGCTCCTC", 3)
|
||||
// as, _ := obiapat.MakeApatSequence(sA, false)
|
||||
|
@ -81,13 +81,7 @@ func ISequenceChunkOnDisk(iterator obiiter.IBioSequenceBatch,
|
||||
panic(err)
|
||||
}
|
||||
|
||||
//chunck := make(obiseq.BioSequenceSlice, 0, 10000)
|
||||
chunck := obiseq.MakeBioSequenceSlice()
|
||||
for iseq.Next() {
|
||||
b := iseq.Get()
|
||||
chunck = append(chunck, b.Slice()...)
|
||||
b.Recycle()
|
||||
}
|
||||
chunck := iseq.Load()
|
||||
|
||||
newIter.Push(obiiter.MakeBioSequenceBatch(order, chunck))
|
||||
log.Infof("Start processing of batch %d/%d : %d sequences",
|
||||
|
@ -1,9 +1,11 @@
|
||||
package obiformats
|
||||
|
||||
import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
"math"
|
||||
"strings"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
"github.com/goccy/go-json"
|
||||
)
|
||||
@ -42,6 +44,16 @@ func _parse_json_header_(header string, annotations obiseq.Annotation) string {
|
||||
stop++
|
||||
|
||||
err := json.Unmarshal([]byte(header)[start:stop], &annotations)
|
||||
|
||||
for k, v := range annotations {
|
||||
switch vt := v.(type) {
|
||||
case float64 :
|
||||
if vt == math.Floor(vt) {
|
||||
annotations[k] = int(vt)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("annotation parsing error on %s : %v\n", header, err)
|
||||
}
|
||||
|
@ -3,6 +3,8 @@ package obiformats
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"log"
|
||||
"math"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
@ -251,9 +253,20 @@ func ParseOBIFeatures(text string, annotations obiseq.Annotation) string {
|
||||
} // End of not string
|
||||
} // End of not numeric
|
||||
|
||||
annotations[key] = value
|
||||
switch vt := value.(type) {
|
||||
case float64:
|
||||
if vt == math.Floor(vt) {
|
||||
annotations[key] = int(vt)
|
||||
}
|
||||
default:
|
||||
annotations[key] = value
|
||||
}
|
||||
|
||||
d = part[stop:]
|
||||
if stop < len(part) {
|
||||
d = part[stop:]
|
||||
} else {
|
||||
d = []byte{}
|
||||
}
|
||||
//m = __obi_header_key_pattern__.FindIndex(d)
|
||||
m = __match__key__(d)
|
||||
}
|
||||
@ -280,6 +293,16 @@ func FormatFastSeqOBIHeader(sequence *obiseq.BioSequence) string {
|
||||
switch t := value.(type) {
|
||||
case string:
|
||||
text.WriteString(fmt.Sprintf("%s=%s; ", key, t))
|
||||
case map[string]int,
|
||||
map[string]interface{}:
|
||||
tv, err := json.Marshal(t)
|
||||
if err != nil {
|
||||
log.Fatalf("Cannot convert %v value", value)
|
||||
}
|
||||
tv = bytes.ReplaceAll(tv, []byte(`"`), []byte("'"))
|
||||
text.WriteString(fmt.Sprintf("%s=", key))
|
||||
text.Write(tv)
|
||||
text.WriteString("; ")
|
||||
default:
|
||||
text.WriteString(fmt.Sprintf("%s=%v; ", key, value))
|
||||
}
|
||||
|
@ -612,3 +612,15 @@ func (iterator IBioSequenceBatch) FilterOn(predicate obiseq.SequencePredicate,
|
||||
|
||||
return trueIter.Rebatch(size)
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) Load() obiseq.BioSequenceSlice {
|
||||
|
||||
chunck := obiseq.MakeBioSequenceSlice()
|
||||
for iterator.Next() {
|
||||
b := iterator.Get()
|
||||
chunck = append(chunck, b.Slice()...)
|
||||
b.Recycle()
|
||||
}
|
||||
|
||||
return chunck
|
||||
}
|
@ -257,3 +257,4 @@ func (s *BioSequence) WriteByte(data byte) error {
|
||||
s.sequence = append(s.sequence, data)
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -2,8 +2,10 @@ package obiseq
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"strings"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type StatsOnValues map[string]int
|
||||
@ -33,6 +35,16 @@ func (sequence *BioSequence) StatsOn(key string, na string) StatsOnValues {
|
||||
case StatsOnValues:
|
||||
stats = istat
|
||||
newstat = false
|
||||
case map[string]interface{}:
|
||||
stats = make(StatsOnValues, len(istat))
|
||||
var err error
|
||||
for k, v := range istat {
|
||||
stats[k], err = goutils.InterfaceToInt(v)
|
||||
if err != nil {
|
||||
log.Panicf("In sequence %s : %s stat tag not only containing integer values %s",
|
||||
sequence.Id(), mkey, istat)
|
||||
}
|
||||
}
|
||||
default:
|
||||
stats = make(StatsOnValues, 100)
|
||||
annotations[mkey] = stats
|
||||
|
Reference in New Issue
Block a user