mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Adds a reader for NGS filter files and change some API for the apat library
This commit is contained in:
34
cmd/obitools/obimultiplex/main.go
Normal file
34
cmd/obitools/obimultiplex/main.go
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obipcr"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
|
||||||
|
// f, err := os.Create("cpu.pprof")
|
||||||
|
// if err != nil {
|
||||||
|
// log.Fatal(err)
|
||||||
|
// }
|
||||||
|
// pprof.StartCPUProfile(f)
|
||||||
|
// defer pprof.StopCPUProfile()
|
||||||
|
|
||||||
|
// ftrace, err := os.Create("cpu.trace")
|
||||||
|
// if err != nil {
|
||||||
|
// log.Fatal(err)
|
||||||
|
// }
|
||||||
|
// trace.Start(ftrace)
|
||||||
|
// defer trace.Stop()
|
||||||
|
|
||||||
|
optionParser := obioptions.GenerateOptionParser(obipcr.OptionSet)
|
||||||
|
|
||||||
|
_, args, _ := optionParser(os.Args)
|
||||||
|
|
||||||
|
sequences, _ := obiconvert.ReadBioSequencesBatch(args...)
|
||||||
|
amplicons, _ := obipcr.PCR(sequences)
|
||||||
|
obiconvert.WriteBioSequencesBatch(amplicons, true)
|
||||||
|
}
|
@ -1,7 +1,9 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"log"
|
||||||
"os"
|
"os"
|
||||||
|
"runtime/trace"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
||||||
@ -10,6 +12,7 @@ import (
|
|||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
|
|
||||||
|
// go tool pprof -http=":8000" ./obipairing ./cpu.pprof
|
||||||
// f, err := os.Create("cpu.pprof")
|
// f, err := os.Create("cpu.pprof")
|
||||||
// if err != nil {
|
// if err != nil {
|
||||||
// log.Fatal(err)
|
// log.Fatal(err)
|
||||||
@ -17,12 +20,13 @@ func main() {
|
|||||||
// pprof.StartCPUProfile(f)
|
// pprof.StartCPUProfile(f)
|
||||||
// defer pprof.StopCPUProfile()
|
// defer pprof.StopCPUProfile()
|
||||||
|
|
||||||
// ftrace, err := os.Create("cpu.trace")
|
// go tool trace cpu.trace
|
||||||
// if err != nil {
|
ftrace, err := os.Create("cpu.trace")
|
||||||
// log.Fatal(err)
|
if err != nil {
|
||||||
// }
|
log.Fatal(err)
|
||||||
// trace.Start(ftrace)
|
}
|
||||||
// defer trace.Stop()
|
trace.Start(ftrace)
|
||||||
|
defer trace.Stop()
|
||||||
|
|
||||||
optionParser := obioptions.GenerateOptionParser(obipcr.OptionSet)
|
optionParser := obioptions.GenerateOptionParser(obipcr.OptionSet)
|
||||||
|
|
||||||
|
@ -6,7 +6,8 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"runtime/trace"
|
"runtime/trace"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obialign"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiapat"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -41,27 +42,17 @@ func main() {
|
|||||||
// }
|
// }
|
||||||
|
|
||||||
A := []byte("ccgcctccttagaacaggctcctctagaaaaccatagtgggatatctaaagaaggcggagatagaaagagcggttcagcaggaatgccgagatggacggcgtgtgacg")
|
A := []byte("ccgcctccttagaacaggctcctctagaaaaccatagtgggatatctaaagaaggcggagatagaaagagcggttcagcaggaatgccgagatggacggcgtgtgacg")
|
||||||
B := []byte("cgccaccaccgagatctacactctttccctacacgacgctcttccgatctccgcctccttagaacaggctcctctagaaaagcatagtggggtatctaaaggaggcgg")
|
// B := []byte("cgccaccaccgagatctacactctttccctacacgacgctcttccgatctccgcctccttagaacaggctcctctagaaaagcatagtggggtatctaaaggaggcgg")
|
||||||
sA := obiseq.MakeBioSequence("A", A, "")
|
sA := obiseq.MakeBioSequence("A", A, "")
|
||||||
sB := obiseq.MakeBioSequence("B", B, "")
|
// sB := obiseq.MakeBioSequence("B", B, "")
|
||||||
|
|
||||||
fmt.Println(string(sA.Sequence()))
|
pat, _ := obiapat.MakeApatPattern("TCCTTCCAACAGGCTCCTC", 3)
|
||||||
fmt.Println(sA.Qualities())
|
as, _ := obiapat.MakeApatSequence(sA, false)
|
||||||
fmt.Println(string(sB.Sequence()))
|
fmt.Println(pat.FindAllIndex(as))
|
||||||
fmt.Println(sB.Qualities())
|
|
||||||
|
|
||||||
score, path := obialign.PELeftAlign(sA, sB, 2, obialign.NilPEAlignArena)
|
file, _ := os.Open("sample/wolf_diet_ngsfilter.txt")
|
||||||
fmt.Printf("Score : %d Path : %v\n", score, path)
|
xxx, _ := obiformats.ReadNGSFilter(file)
|
||||||
score, path = obialign.PERightAlign(sA, sB, 2, obialign.NilPEAlignArena)
|
|
||||||
fmt.Printf("Score : %d Path : %v\n", score, path)
|
|
||||||
|
|
||||||
fmt.Println(string(sA.Sequence()))
|
fmt.Println(xxx)
|
||||||
sA.ReverseComplement(true)
|
|
||||||
fmt.Println(string(sA.Sequence()))
|
|
||||||
fmt.Println(string(sA.Id()))
|
|
||||||
|
|
||||||
sA.Reset()
|
|
||||||
fmt.Println(sA.Length())
|
|
||||||
fmt.Println(sA.String())
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -202,8 +202,6 @@ func (pattern ApatPattern) FindAllIndex(sequence ApatSequence, limits ...int) (l
|
|||||||
C.int32_t(begin),
|
C.int32_t(begin),
|
||||||
C.int32_t(length+C.MAX_PAT_LEN)))
|
C.int32_t(length+C.MAX_PAT_LEN)))
|
||||||
|
|
||||||
//log.Printf("match count : %d\n", nhits)
|
|
||||||
|
|
||||||
if nhits == 0 {
|
if nhits == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
package obiapat
|
package obiapat
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"log"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
)
|
)
|
||||||
@ -14,6 +16,10 @@ type _Options struct {
|
|||||||
bufferSize int
|
bufferSize int
|
||||||
batchSize int
|
batchSize int
|
||||||
parallelWorkers int
|
parallelWorkers int
|
||||||
|
forward ApatPattern
|
||||||
|
cfwd ApatPattern
|
||||||
|
reverse ApatPattern
|
||||||
|
crev ApatPattern
|
||||||
}
|
}
|
||||||
|
|
||||||
// Options stores a set of option usable by the
|
// Options stores a set of option usable by the
|
||||||
@ -90,6 +96,10 @@ func MakeOptions(setters []WithOption) Options {
|
|||||||
parallelWorkers: 4,
|
parallelWorkers: 4,
|
||||||
batchSize: 100,
|
batchSize: 100,
|
||||||
bufferSize: 100,
|
bufferSize: 100,
|
||||||
|
forward: NilApatPattern,
|
||||||
|
cfwd: NilApatPattern,
|
||||||
|
reverse: NilApatPattern,
|
||||||
|
crev: NilApatPattern,
|
||||||
}
|
}
|
||||||
|
|
||||||
opt := Options{&o}
|
opt := Options{&o}
|
||||||
@ -126,19 +136,41 @@ func OptionMaxLength(maxLength int) WithOption {
|
|||||||
// OptionForwardError sets the number of
|
// OptionForwardError sets the number of
|
||||||
// error allowed when matching the forward
|
// error allowed when matching the forward
|
||||||
// primer.
|
// primer.
|
||||||
func OptionForwardError(max int) WithOption {
|
func OptionForwardPrimer(primer string, max int) WithOption {
|
||||||
f := WithOption(func(opt Options) {
|
f := WithOption(func(opt Options) {
|
||||||
|
var err error
|
||||||
|
|
||||||
|
opt.pointer.forward, err = MakeApatPattern(primer, max)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("error : %v\n", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
opt.pointer.cfwd, err = opt.pointer.forward.ReverseComplement()
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("error : %v\n", err)
|
||||||
|
}
|
||||||
opt.pointer.forwardError = max
|
opt.pointer.forwardError = max
|
||||||
})
|
})
|
||||||
|
|
||||||
return f
|
return f
|
||||||
}
|
}
|
||||||
|
|
||||||
// OptionReverseError sets the number of
|
// OptionForwardError sets the number of
|
||||||
// error allowed when matching the reverse
|
// error allowed when matching the forward
|
||||||
// primer.
|
// primer.
|
||||||
func OptionReverseError(max int) WithOption {
|
func OptionReversePrimer(primer string, max int) WithOption {
|
||||||
f := WithOption(func(opt Options) {
|
f := WithOption(func(opt Options) {
|
||||||
|
var err error
|
||||||
|
|
||||||
|
opt.pointer.reverse, err = MakeApatPattern(primer, max)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("error : %v\n", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
opt.pointer.crev, err = opt.pointer.reverse.ReverseComplement()
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("error : %v\n", err)
|
||||||
|
}
|
||||||
opt.pointer.reverseError = max
|
opt.pointer.reverseError = max
|
||||||
})
|
})
|
||||||
|
|
||||||
@ -185,14 +217,37 @@ func OptionBatchSize(size int) WithOption {
|
|||||||
return f
|
return f
|
||||||
}
|
}
|
||||||
|
|
||||||
func _Pcr(seq ApatSequence, sequence obiseq.BioSequence,
|
func (options Options) Free() {
|
||||||
forward, cfwd, reverse, crev ApatPattern,
|
if options.pointer.forward.pointer != nil {
|
||||||
|
options.pointer.forward.Free()
|
||||||
|
}
|
||||||
|
|
||||||
|
if options.pointer.cfwd.pointer != nil {
|
||||||
|
options.pointer.cfwd.Free()
|
||||||
|
}
|
||||||
|
|
||||||
|
if options.pointer.reverse.pointer != nil {
|
||||||
|
options.pointer.reverse.Free()
|
||||||
|
}
|
||||||
|
|
||||||
|
if options.pointer.crev.pointer != nil {
|
||||||
|
options.pointer.crev.Free()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func _Pcr(seq ApatSequence,
|
||||||
|
sequence obiseq.BioSequence,
|
||||||
opt Options) obiseq.BioSequenceSlice {
|
opt Options) obiseq.BioSequenceSlice {
|
||||||
results := make(obiseq.BioSequenceSlice, 0, 10)
|
results := make(obiseq.BioSequenceSlice, 0, 10)
|
||||||
|
|
||||||
|
forward := opt.pointer.forward
|
||||||
|
cfwd := opt.pointer.cfwd
|
||||||
|
reverse := opt.pointer.reverse
|
||||||
|
crev := opt.pointer.crev
|
||||||
|
|
||||||
forwardMatches := forward.FindAllIndex(seq)
|
forwardMatches := forward.FindAllIndex(seq)
|
||||||
|
|
||||||
if forwardMatches != nil {
|
if len(forwardMatches) > 0 {
|
||||||
|
|
||||||
begin := forwardMatches[0][0]
|
begin := forwardMatches[0][0]
|
||||||
length := seq.Length() - begin
|
length := seq.Length() - begin
|
||||||
@ -262,7 +317,6 @@ func _Pcr(seq ApatSequence, sequence obiseq.BioSequence,
|
|||||||
}
|
}
|
||||||
|
|
||||||
forwardMatches = reverse.FindAllIndex(seq)
|
forwardMatches = reverse.FindAllIndex(seq)
|
||||||
|
|
||||||
if forwardMatches != nil {
|
if forwardMatches != nil {
|
||||||
|
|
||||||
begin := forwardMatches[0][0]
|
begin := forwardMatches[0][0]
|
||||||
@ -340,28 +394,15 @@ func _Pcr(seq ApatSequence, sequence obiseq.BioSequence,
|
|||||||
// obiseq.BioSequence instance. PCR parameters are
|
// obiseq.BioSequence instance. PCR parameters are
|
||||||
// specified using the corresponding Option functions
|
// specified using the corresponding Option functions
|
||||||
// defined for the PCR algorithm.
|
// defined for the PCR algorithm.
|
||||||
func PCR(sequence obiseq.BioSequence,
|
func PCR(sequence obiseq.BioSequence, options ...WithOption) obiseq.BioSequenceSlice {
|
||||||
forward, reverse string, options ...WithOption) obiseq.BioSequenceSlice {
|
|
||||||
|
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
|
defer opt.Free()
|
||||||
|
|
||||||
seq, _ := MakeApatSequence(sequence, opt.Circular())
|
seq, _ := MakeApatSequence(sequence, opt.Circular())
|
||||||
|
defer seq.Free()
|
||||||
|
|
||||||
fwd, _ := MakeApatPattern(forward, opt.ForwardError())
|
results := _Pcr(seq, sequence, opt)
|
||||||
rev, _ := MakeApatPattern(reverse, opt.ReverseError())
|
|
||||||
cfwd, _ := fwd.ReverseComplement()
|
|
||||||
crev, _ := rev.ReverseComplement()
|
|
||||||
|
|
||||||
results := _Pcr(seq, sequence,
|
|
||||||
fwd, cfwd, rev, crev,
|
|
||||||
opt)
|
|
||||||
|
|
||||||
seq.Free()
|
|
||||||
|
|
||||||
fwd.Free()
|
|
||||||
rev.Free()
|
|
||||||
cfwd.Free()
|
|
||||||
crev.Free()
|
|
||||||
|
|
||||||
return results
|
return results
|
||||||
}
|
}
|
||||||
@ -372,32 +413,24 @@ func PCR(sequence obiseq.BioSequence,
|
|||||||
// specified using the corresponding Option functions
|
// specified using the corresponding Option functions
|
||||||
// defined for the PCR algorithm.
|
// defined for the PCR algorithm.
|
||||||
func PCRSlice(sequences obiseq.BioSequenceSlice,
|
func PCRSlice(sequences obiseq.BioSequenceSlice,
|
||||||
forward, reverse string, options ...WithOption) obiseq.BioSequenceSlice {
|
options ...WithOption) obiseq.BioSequenceSlice {
|
||||||
|
|
||||||
results := make(obiseq.BioSequenceSlice, 0, len(sequences))
|
results := make(obiseq.BioSequenceSlice, 0, len(sequences))
|
||||||
|
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
|
defer opt.Free()
|
||||||
fwd, _ := MakeApatPattern(forward, opt.ForwardError())
|
|
||||||
rev, _ := MakeApatPattern(reverse, opt.ReverseError())
|
|
||||||
cfwd, _ := fwd.ReverseComplement()
|
|
||||||
crev, _ := rev.ReverseComplement()
|
|
||||||
|
|
||||||
if len(sequences) > 0 {
|
if len(sequences) > 0 {
|
||||||
seq, _ := MakeApatSequence(sequences[0], opt.Circular())
|
seq, _ := MakeApatSequence(sequences[0], opt.Circular())
|
||||||
amplicons := _Pcr(seq, sequences[0],
|
amplicons := _Pcr(seq, sequences[0], opt)
|
||||||
fwd, cfwd, rev, crev,
|
|
||||||
opt)
|
|
||||||
|
|
||||||
if len(amplicons) > 0 {
|
if len(amplicons) > 0 {
|
||||||
results = append(results, amplicons...)
|
results = append(results, amplicons...)
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, sequence := range sequences[1:] {
|
for _, sequence := range sequences[1:] {
|
||||||
seq, _ := MakeApatSequence(sequence, opt.Circular(), seq)
|
seq, _ = MakeApatSequence(sequence, opt.Circular(), seq)
|
||||||
amplicons = _Pcr(seq, sequence,
|
amplicons = _Pcr(seq, sequence, opt)
|
||||||
fwd, cfwd, rev, crev,
|
|
||||||
opt)
|
|
||||||
if len(amplicons) > 0 {
|
if len(amplicons) > 0 {
|
||||||
results = append(results, amplicons...)
|
results = append(results, amplicons...)
|
||||||
}
|
}
|
||||||
@ -406,21 +439,15 @@ func PCRSlice(sequences obiseq.BioSequenceSlice,
|
|||||||
seq.Free()
|
seq.Free()
|
||||||
}
|
}
|
||||||
|
|
||||||
fwd.Free()
|
|
||||||
rev.Free()
|
|
||||||
cfwd.Free()
|
|
||||||
crev.Free()
|
|
||||||
|
|
||||||
return results
|
return results
|
||||||
}
|
}
|
||||||
|
|
||||||
// PCRSliceWorker is a worker function builder which produce
|
// PCRSliceWorker is a worker function builder which produce
|
||||||
// job function usable by the obiseq.MakeISliceWorker function.
|
// job function usable by the obiseq.MakeISliceWorker function.
|
||||||
func PCRSliceWorker(forward, reverse string,
|
func PCRSliceWorker(options ...WithOption) obiseq.SeqSliceWorker {
|
||||||
options ...WithOption) obiseq.SeqSliceWorker {
|
|
||||||
|
|
||||||
worker := func(sequences obiseq.BioSequenceSlice) obiseq.BioSequenceSlice {
|
worker := func(sequences obiseq.BioSequenceSlice) obiseq.BioSequenceSlice {
|
||||||
return PCRSlice(sequences, forward, reverse, options...)
|
return PCRSlice(sequences, options...)
|
||||||
}
|
}
|
||||||
|
|
||||||
return worker
|
return worker
|
||||||
|
@ -184,8 +184,8 @@ func ReadEMBLBatch(reader io.Reader, options ...WithOption) obiseq.IBioSequenceB
|
|||||||
|
|
||||||
newIter := obiseq.MakeIBioSequenceBatch(opt.BufferSize())
|
newIter := obiseq.MakeIBioSequenceBatch(opt.BufferSize())
|
||||||
|
|
||||||
// newIter.Add(opt.ParallelWorkers())
|
nworkers := opt.ParallelWorkers()
|
||||||
newIter.Add(2)
|
newIter.Add(nworkers)
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
newIter.Wait()
|
newIter.Wait()
|
||||||
@ -196,7 +196,7 @@ func ReadEMBLBatch(reader io.Reader, options ...WithOption) obiseq.IBioSequenceB
|
|||||||
}()
|
}()
|
||||||
|
|
||||||
// for j := 0; j < opt.ParallelWorkers(); j++ {
|
// for j := 0; j < opt.ParallelWorkers(); j++ {
|
||||||
for j := 0; j < 2; j++ {
|
for j := 0; j < nworkers; j++ {
|
||||||
go _ParseEmblFile(entry_channel, newIter)
|
go _ParseEmblFile(entry_channel, newIter)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -173,16 +173,11 @@ func __is_false__(text []byte) bool {
|
|||||||
bytes.Equal(text, __FALSE__)
|
bytes.Equal(text, __FALSE__)
|
||||||
}
|
}
|
||||||
|
|
||||||
func ParseFastSeqOBIHeader(sequence obiseq.BioSequence) {
|
func ParseOBIFeatures(text string, annotations obiseq.Annotation) string {
|
||||||
definition := []byte(sequence.Definition())
|
|
||||||
annotations := sequence.Annotations()
|
|
||||||
|
|
||||||
// all_matches := __obi_header_pattern__.FindAllSubmatchIndex(definition, -1)
|
|
||||||
|
|
||||||
|
definition := []byte(text)
|
||||||
d := definition
|
d := definition
|
||||||
|
|
||||||
//for m := __obi_header_key_pattern__.FindIndex(definition); len(m) > 0; {
|
|
||||||
//fmt.Println(string(definition[0:20]), __match__key__(definition))
|
|
||||||
for m := __match__key__(definition); len(m) > 0; {
|
for m := __match__key__(definition); len(m) > 0; {
|
||||||
var bvalue []byte
|
var bvalue []byte
|
||||||
var value interface{}
|
var value interface{}
|
||||||
@ -263,7 +258,16 @@ func ParseFastSeqOBIHeader(sequence obiseq.BioSequence) {
|
|||||||
m = __match__key__(d)
|
m = __match__key__(d)
|
||||||
}
|
}
|
||||||
|
|
||||||
sequence.SetDefinition(string(bytes.TrimSpace(d)))
|
return string(bytes.TrimSpace(d))
|
||||||
|
}
|
||||||
|
|
||||||
|
func ParseFastSeqOBIHeader(sequence obiseq.BioSequence) {
|
||||||
|
annotations := sequence.Annotations()
|
||||||
|
|
||||||
|
definition := ParseOBIFeatures(sequence.Definition(),
|
||||||
|
annotations)
|
||||||
|
|
||||||
|
sequence.SetDefinition(definition)
|
||||||
}
|
}
|
||||||
|
|
||||||
func FormatFastSeqOBIHeader(sequence obiseq.BioSequence) string {
|
func FormatFastSeqOBIHeader(sequence obiseq.BioSequence) string {
|
||||||
|
@ -120,10 +120,10 @@ func WriteFastaBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options
|
|||||||
}
|
}
|
||||||
|
|
||||||
log.Println("Start of the fasta file writing")
|
log.Println("Start of the fasta file writing")
|
||||||
|
go ff(iterator)
|
||||||
for i := 0; i < nwriters-1; i++ {
|
for i := 0; i < nwriters-1; i++ {
|
||||||
go ff(iterator.Split())
|
go ff(iterator.Split())
|
||||||
}
|
}
|
||||||
go ff(iterator)
|
|
||||||
|
|
||||||
next_to_send := 0
|
next_to_send := 0
|
||||||
received := make(map[int]FileChunck, 100)
|
received := make(map[int]FileChunck, 100)
|
||||||
|
@ -122,10 +122,10 @@ func WriteFastqBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options
|
|||||||
}
|
}
|
||||||
|
|
||||||
log.Println("Start of the fastq file writing")
|
log.Println("Start of the fastq file writing")
|
||||||
|
go ff(iterator)
|
||||||
for i := 0; i < nwriters-1; i++ {
|
for i := 0; i < nwriters-1; i++ {
|
||||||
go ff(iterator.Split())
|
go ff(iterator.Split())
|
||||||
}
|
}
|
||||||
go ff(iterator)
|
|
||||||
|
|
||||||
next_to_send := 0
|
next_to_send := 0
|
||||||
received := make(map[int]FileChunck, 100)
|
received := make(map[int]FileChunck, 100)
|
||||||
|
133
pkg/obiformats/ngsfilter_read.go
Normal file
133
pkg/obiformats/ngsfilter_read.go
Normal file
@ -0,0 +1,133 @@
|
|||||||
|
package obiformats
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
|
)
|
||||||
|
|
||||||
|
type PrimerPair struct {
|
||||||
|
Forward string
|
||||||
|
Reverse string
|
||||||
|
}
|
||||||
|
|
||||||
|
type TagPair struct {
|
||||||
|
Forward string
|
||||||
|
Reverse string
|
||||||
|
}
|
||||||
|
|
||||||
|
type PCR struct {
|
||||||
|
Experiment string
|
||||||
|
Sample string
|
||||||
|
Partial bool
|
||||||
|
Annotations obiseq.Annotation
|
||||||
|
}
|
||||||
|
|
||||||
|
type PCRs map[TagPair]PCR
|
||||||
|
type NGSFilter map[PrimerPair]PCRs
|
||||||
|
|
||||||
|
func _readLines(reader io.Reader) []string {
|
||||||
|
r := bufio.NewReader(reader)
|
||||||
|
bytes := []byte{}
|
||||||
|
lines := []string{}
|
||||||
|
for {
|
||||||
|
line, isPrefix, err := r.ReadLine()
|
||||||
|
if err != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
bytes = append(bytes, line...)
|
||||||
|
if !isPrefix {
|
||||||
|
str := strings.TrimSpace(string(bytes))
|
||||||
|
if len(str) > 0 {
|
||||||
|
lines = append(lines, str)
|
||||||
|
bytes = []byte{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(bytes) > 0 {
|
||||||
|
lines = append(lines, string(bytes))
|
||||||
|
}
|
||||||
|
return lines
|
||||||
|
}
|
||||||
|
|
||||||
|
func _parseMainNGSFilterTags(text string) TagPair {
|
||||||
|
|
||||||
|
tags := strings.Split(text, ":")
|
||||||
|
|
||||||
|
if len(tags) == 1 {
|
||||||
|
return TagPair{tags[0], tags[0]}
|
||||||
|
}
|
||||||
|
|
||||||
|
if tags[0] == "-" {
|
||||||
|
tags[0] = ""
|
||||||
|
}
|
||||||
|
|
||||||
|
if tags[1] == "-" {
|
||||||
|
tags[1] = ""
|
||||||
|
}
|
||||||
|
|
||||||
|
return TagPair{tags[0], tags[1]}
|
||||||
|
}
|
||||||
|
|
||||||
|
func _parseMainNGSFilter(text string) (PrimerPair, TagPair, string, string, bool) {
|
||||||
|
fields := strings.Fields(text)
|
||||||
|
|
||||||
|
tags := _parseMainNGSFilterTags(fields[2])
|
||||||
|
partial := fields[5] == "T" || fields[5] == "t"
|
||||||
|
|
||||||
|
return PrimerPair{fields[3], fields[4]},
|
||||||
|
tags,
|
||||||
|
fields[0],
|
||||||
|
fields[1],
|
||||||
|
partial
|
||||||
|
}
|
||||||
|
|
||||||
|
func ReadNGSFilter(reader io.Reader) (NGSFilter, error) {
|
||||||
|
ngsfilter := make(NGSFilter, 10)
|
||||||
|
|
||||||
|
lines := _readLines(reader)
|
||||||
|
|
||||||
|
for _, line := range lines {
|
||||||
|
line = strings.TrimSpace(line)
|
||||||
|
|
||||||
|
if strings.HasPrefix(line, "#") || len(line) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
split := strings.SplitN(line, "@", 2)
|
||||||
|
|
||||||
|
primers, tags, experiment, sample, partial := _parseMainNGSFilter(split[0])
|
||||||
|
newPCR := PCR{
|
||||||
|
Experiment: experiment,
|
||||||
|
Sample: sample,
|
||||||
|
Partial: partial,
|
||||||
|
Annotations: nil,
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(split) > 1 && len(split[1]) > 0 {
|
||||||
|
newPCR.Annotations = obiseq.GetAnnotation()
|
||||||
|
ParseOBIFeatures(split[1], newPCR.Annotations)
|
||||||
|
}
|
||||||
|
|
||||||
|
samples, ok := ngsfilter[primers]
|
||||||
|
|
||||||
|
if ok {
|
||||||
|
pcr, ok := samples[tags]
|
||||||
|
|
||||||
|
if ok {
|
||||||
|
return nil, fmt.Errorf("pair of tags %v used for samples %s in %s and %s in %s",
|
||||||
|
tags, sample, experiment, pcr.Sample, pcr.Experiment)
|
||||||
|
}
|
||||||
|
|
||||||
|
samples[tags] = newPCR
|
||||||
|
} else {
|
||||||
|
ngsfilter[primers] = make(PCRs, 1000)
|
||||||
|
ngsfilter[primers][tags] = newPCR
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ngsfilter, nil
|
||||||
|
}
|
@ -56,19 +56,26 @@ func WriteSequenceBatch(iterator obiseq.IBioSequenceBatch,
|
|||||||
file io.Writer,
|
file io.Writer,
|
||||||
options ...WithOption) (obiseq.IBioSequenceBatch, error) {
|
options ...WithOption) (obiseq.IBioSequenceBatch, error) {
|
||||||
|
|
||||||
var newIter obiseq.IBioSequenceBatch
|
iterator = iterator.Rebatch(1000)
|
||||||
var err error
|
|
||||||
|
|
||||||
ok := iterator.Next()
|
ok := iterator.Next()
|
||||||
|
|
||||||
if ok {
|
if ok {
|
||||||
iterator.PushBack()
|
|
||||||
batch := iterator.Get()
|
batch := iterator.Get()
|
||||||
|
iterator.PushBack()
|
||||||
|
|
||||||
|
var newIter obiseq.IBioSequenceBatch
|
||||||
|
var err error
|
||||||
|
|
||||||
|
if len(batch.Slice()) > 0 {
|
||||||
if batch.Slice()[0].HasQualities() {
|
if batch.Slice()[0].HasQualities() {
|
||||||
newIter, err = WriteFastqBatch(iterator, file, options...)
|
newIter, err = WriteFastqBatch(iterator, file, options...)
|
||||||
} else {
|
} else {
|
||||||
newIter, err = WriteFastaBatch(iterator, file, options...)
|
newIter, err = WriteFastaBatch(iterator, file, options...)
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
newIter, err = WriteFastaBatch(iterator, file, options...)
|
||||||
|
}
|
||||||
|
|
||||||
return newIter, err
|
return newIter, err
|
||||||
}
|
}
|
||||||
|
1
pkg/obitools/obimultiplex/options.go
Normal file
1
pkg/obitools/obimultiplex/options.go
Normal file
@ -0,0 +1 @@
|
|||||||
|
package obimultiplex
|
@ -10,12 +10,18 @@ import (
|
|||||||
// obiseq.BioSequenceBatch containing the selected amplicon sequences.
|
// obiseq.BioSequenceBatch containing the selected amplicon sequences.
|
||||||
func PCR(iterator obiseq.IBioSequenceBatch) (obiseq.IBioSequenceBatch, error) {
|
func PCR(iterator obiseq.IBioSequenceBatch) (obiseq.IBioSequenceBatch, error) {
|
||||||
|
|
||||||
forward := ForwardPrimer()
|
|
||||||
reverse := ReversePrimer()
|
|
||||||
opts := make([]obiapat.WithOption, 0, 10)
|
opts := make([]obiapat.WithOption, 0, 10)
|
||||||
|
|
||||||
opts = append(opts, obiapat.OptionForwardError(AllowedMismatch()),
|
opts = append(opts,
|
||||||
obiapat.OptionReverseError(AllowedMismatch()))
|
obiapat.OptionForwardPrimer(
|
||||||
|
ForwardPrimer(),
|
||||||
|
AllowedMismatch(),
|
||||||
|
),
|
||||||
|
obiapat.OptionReversePrimer(
|
||||||
|
ReversePrimer(),
|
||||||
|
AllowedMismatch(),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
if MinLength() > 0 {
|
if MinLength() > 0 {
|
||||||
opts = append(opts, obiapat.OptionMinLength(MinLength()))
|
opts = append(opts, obiapat.OptionMinLength(MinLength()))
|
||||||
@ -29,7 +35,7 @@ func PCR(iterator obiseq.IBioSequenceBatch) (obiseq.IBioSequenceBatch, error) {
|
|||||||
opts = append(opts, obiapat.OptionCircular(Circular()))
|
opts = append(opts, obiapat.OptionCircular(Circular()))
|
||||||
}
|
}
|
||||||
|
|
||||||
worker := obiapat.PCRSliceWorker(forward, reverse, opts...)
|
worker := obiapat.PCRSliceWorker(opts...)
|
||||||
|
|
||||||
return iterator.MakeISliceWorker(worker), nil
|
return iterator.MakeISliceWorker(worker), nil
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user