mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-12-08 16:50:27 +00:00
Adds a reader for NGS filter files and change some API for the apat library
This commit is contained in:
@@ -184,8 +184,8 @@ func ReadEMBLBatch(reader io.Reader, options ...WithOption) obiseq.IBioSequenceB
|
||||
|
||||
newIter := obiseq.MakeIBioSequenceBatch(opt.BufferSize())
|
||||
|
||||
// newIter.Add(opt.ParallelWorkers())
|
||||
newIter.Add(2)
|
||||
nworkers := opt.ParallelWorkers()
|
||||
newIter.Add(nworkers)
|
||||
|
||||
go func() {
|
||||
newIter.Wait()
|
||||
@@ -196,7 +196,7 @@ func ReadEMBLBatch(reader io.Reader, options ...WithOption) obiseq.IBioSequenceB
|
||||
}()
|
||||
|
||||
// for j := 0; j < opt.ParallelWorkers(); j++ {
|
||||
for j := 0; j < 2; j++ {
|
||||
for j := 0; j < nworkers; j++ {
|
||||
go _ParseEmblFile(entry_channel, newIter)
|
||||
}
|
||||
|
||||
|
||||
@@ -173,16 +173,11 @@ func __is_false__(text []byte) bool {
|
||||
bytes.Equal(text, __FALSE__)
|
||||
}
|
||||
|
||||
func ParseFastSeqOBIHeader(sequence obiseq.BioSequence) {
|
||||
definition := []byte(sequence.Definition())
|
||||
annotations := sequence.Annotations()
|
||||
|
||||
// all_matches := __obi_header_pattern__.FindAllSubmatchIndex(definition, -1)
|
||||
func ParseOBIFeatures(text string, annotations obiseq.Annotation) string {
|
||||
|
||||
definition := []byte(text)
|
||||
d := definition
|
||||
|
||||
//for m := __obi_header_key_pattern__.FindIndex(definition); len(m) > 0; {
|
||||
//fmt.Println(string(definition[0:20]), __match__key__(definition))
|
||||
for m := __match__key__(definition); len(m) > 0; {
|
||||
var bvalue []byte
|
||||
var value interface{}
|
||||
@@ -263,7 +258,16 @@ func ParseFastSeqOBIHeader(sequence obiseq.BioSequence) {
|
||||
m = __match__key__(d)
|
||||
}
|
||||
|
||||
sequence.SetDefinition(string(bytes.TrimSpace(d)))
|
||||
return string(bytes.TrimSpace(d))
|
||||
}
|
||||
|
||||
func ParseFastSeqOBIHeader(sequence obiseq.BioSequence) {
|
||||
annotations := sequence.Annotations()
|
||||
|
||||
definition := ParseOBIFeatures(sequence.Definition(),
|
||||
annotations)
|
||||
|
||||
sequence.SetDefinition(definition)
|
||||
}
|
||||
|
||||
func FormatFastSeqOBIHeader(sequence obiseq.BioSequence) string {
|
||||
|
||||
@@ -120,10 +120,10 @@ func WriteFastaBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options
|
||||
}
|
||||
|
||||
log.Println("Start of the fasta file writing")
|
||||
go ff(iterator)
|
||||
for i := 0; i < nwriters-1; i++ {
|
||||
go ff(iterator.Split())
|
||||
}
|
||||
go ff(iterator)
|
||||
|
||||
next_to_send := 0
|
||||
received := make(map[int]FileChunck, 100)
|
||||
|
||||
@@ -122,10 +122,10 @@ func WriteFastqBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options
|
||||
}
|
||||
|
||||
log.Println("Start of the fastq file writing")
|
||||
go ff(iterator)
|
||||
for i := 0; i < nwriters-1; i++ {
|
||||
go ff(iterator.Split())
|
||||
}
|
||||
go ff(iterator)
|
||||
|
||||
next_to_send := 0
|
||||
received := make(map[int]FileChunck, 100)
|
||||
|
||||
133
pkg/obiformats/ngsfilter_read.go
Normal file
133
pkg/obiformats/ngsfilter_read.go
Normal file
@@ -0,0 +1,133 @@
|
||||
package obiformats
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
)
|
||||
|
||||
type PrimerPair struct {
|
||||
Forward string
|
||||
Reverse string
|
||||
}
|
||||
|
||||
type TagPair struct {
|
||||
Forward string
|
||||
Reverse string
|
||||
}
|
||||
|
||||
type PCR struct {
|
||||
Experiment string
|
||||
Sample string
|
||||
Partial bool
|
||||
Annotations obiseq.Annotation
|
||||
}
|
||||
|
||||
type PCRs map[TagPair]PCR
|
||||
type NGSFilter map[PrimerPair]PCRs
|
||||
|
||||
func _readLines(reader io.Reader) []string {
|
||||
r := bufio.NewReader(reader)
|
||||
bytes := []byte{}
|
||||
lines := []string{}
|
||||
for {
|
||||
line, isPrefix, err := r.ReadLine()
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
bytes = append(bytes, line...)
|
||||
if !isPrefix {
|
||||
str := strings.TrimSpace(string(bytes))
|
||||
if len(str) > 0 {
|
||||
lines = append(lines, str)
|
||||
bytes = []byte{}
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(bytes) > 0 {
|
||||
lines = append(lines, string(bytes))
|
||||
}
|
||||
return lines
|
||||
}
|
||||
|
||||
func _parseMainNGSFilterTags(text string) TagPair {
|
||||
|
||||
tags := strings.Split(text, ":")
|
||||
|
||||
if len(tags) == 1 {
|
||||
return TagPair{tags[0], tags[0]}
|
||||
}
|
||||
|
||||
if tags[0] == "-" {
|
||||
tags[0] = ""
|
||||
}
|
||||
|
||||
if tags[1] == "-" {
|
||||
tags[1] = ""
|
||||
}
|
||||
|
||||
return TagPair{tags[0], tags[1]}
|
||||
}
|
||||
|
||||
func _parseMainNGSFilter(text string) (PrimerPair, TagPair, string, string, bool) {
|
||||
fields := strings.Fields(text)
|
||||
|
||||
tags := _parseMainNGSFilterTags(fields[2])
|
||||
partial := fields[5] == "T" || fields[5] == "t"
|
||||
|
||||
return PrimerPair{fields[3], fields[4]},
|
||||
tags,
|
||||
fields[0],
|
||||
fields[1],
|
||||
partial
|
||||
}
|
||||
|
||||
func ReadNGSFilter(reader io.Reader) (NGSFilter, error) {
|
||||
ngsfilter := make(NGSFilter, 10)
|
||||
|
||||
lines := _readLines(reader)
|
||||
|
||||
for _, line := range lines {
|
||||
line = strings.TrimSpace(line)
|
||||
|
||||
if strings.HasPrefix(line, "#") || len(line) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
split := strings.SplitN(line, "@", 2)
|
||||
|
||||
primers, tags, experiment, sample, partial := _parseMainNGSFilter(split[0])
|
||||
newPCR := PCR{
|
||||
Experiment: experiment,
|
||||
Sample: sample,
|
||||
Partial: partial,
|
||||
Annotations: nil,
|
||||
}
|
||||
|
||||
if len(split) > 1 && len(split[1]) > 0 {
|
||||
newPCR.Annotations = obiseq.GetAnnotation()
|
||||
ParseOBIFeatures(split[1], newPCR.Annotations)
|
||||
}
|
||||
|
||||
samples, ok := ngsfilter[primers]
|
||||
|
||||
if ok {
|
||||
pcr, ok := samples[tags]
|
||||
|
||||
if ok {
|
||||
return nil, fmt.Errorf("pair of tags %v used for samples %s in %s and %s in %s",
|
||||
tags, sample, experiment, pcr.Sample, pcr.Experiment)
|
||||
}
|
||||
|
||||
samples[tags] = newPCR
|
||||
} else {
|
||||
ngsfilter[primers] = make(PCRs, 1000)
|
||||
ngsfilter[primers][tags] = newPCR
|
||||
}
|
||||
}
|
||||
|
||||
return ngsfilter, nil
|
||||
}
|
||||
@@ -56,16 +56,23 @@ func WriteSequenceBatch(iterator obiseq.IBioSequenceBatch,
|
||||
file io.Writer,
|
||||
options ...WithOption) (obiseq.IBioSequenceBatch, error) {
|
||||
|
||||
var newIter obiseq.IBioSequenceBatch
|
||||
var err error
|
||||
iterator = iterator.Rebatch(1000)
|
||||
|
||||
ok := iterator.Next()
|
||||
|
||||
if ok {
|
||||
iterator.PushBack()
|
||||
batch := iterator.Get()
|
||||
if batch.Slice()[0].HasQualities() {
|
||||
newIter, err = WriteFastqBatch(iterator, file, options...)
|
||||
iterator.PushBack()
|
||||
|
||||
var newIter obiseq.IBioSequenceBatch
|
||||
var err error
|
||||
|
||||
if len(batch.Slice()) > 0 {
|
||||
if batch.Slice()[0].HasQualities() {
|
||||
newIter, err = WriteFastqBatch(iterator, file, options...)
|
||||
} else {
|
||||
newIter, err = WriteFastaBatch(iterator, file, options...)
|
||||
}
|
||||
} else {
|
||||
newIter, err = WriteFastaBatch(iterator, file, options...)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user