mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-12-08 16:50:27 +00:00
Big change iin the data model, and a first version of obiuniq
This commit is contained in:
@@ -31,11 +31,12 @@ func WriterDispatcher(prototypename string,
|
||||
}
|
||||
|
||||
out, err := formater(data,
|
||||
fmt.Sprintf(prototypename, newflux),
|
||||
fmt.Sprintf(prototypename, dispatcher.Classifier().Value(newflux)),
|
||||
options...)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("cannot open the output file for key %d", newflux)
|
||||
log.Fatalf("cannot open the output file for key %s",
|
||||
dispatcher.Classifier().Value(newflux))
|
||||
}
|
||||
|
||||
out.Recycle()
|
||||
|
||||
@@ -35,12 +35,12 @@ func __readline__(stream io.Reader) string {
|
||||
return string(line[0:i])
|
||||
}
|
||||
|
||||
func __read_ecopcr_bioseq__(file *__ecopcr_file__) (obiseq.BioSequence, error) {
|
||||
func __read_ecopcr_bioseq__(file *__ecopcr_file__) (*obiseq.BioSequence, error) {
|
||||
|
||||
record, err := file.csv.Read()
|
||||
|
||||
if err != nil {
|
||||
return obiseq.NilBioSequence, err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
name := strings.TrimSpace(record[0])
|
||||
@@ -65,7 +65,7 @@ func __read_ecopcr_bioseq__(file *__ecopcr_file__) (obiseq.BioSequence, error) {
|
||||
comment = strings.TrimSpace(record[19])
|
||||
}
|
||||
|
||||
bseq := obiseq.MakeBioSequence(name, sequence, comment)
|
||||
bseq := obiseq.NewBioSequence(name, sequence, comment)
|
||||
annotation := bseq.Annotations()
|
||||
|
||||
annotation["ac"] = name
|
||||
@@ -168,7 +168,7 @@ func ReadEcoPCRBatch(reader io.Reader, options ...WithOption) obiseq.IBioSequenc
|
||||
|
||||
go func() {
|
||||
newIter.Wait()
|
||||
close(newIter.Channel())
|
||||
newIter.Close()
|
||||
}()
|
||||
|
||||
go func() {
|
||||
@@ -181,9 +181,8 @@ func ReadEcoPCRBatch(reader io.Reader, options ...WithOption) obiseq.IBioSequenc
|
||||
slice = append(slice, seq)
|
||||
ii++
|
||||
if ii >= opt.BatchSize() {
|
||||
newIter.Channel() <- obiseq.MakeBioSequenceBatch(i, slice...)
|
||||
slice = make(obiseq.BioSequenceSlice, 0, opt.BatchSize())
|
||||
|
||||
newIter.Push(obiseq.MakeBioSequenceBatch(i, slice))
|
||||
slice = obiseq.MakeBioSequenceSlice()
|
||||
i++
|
||||
ii = 0
|
||||
}
|
||||
@@ -192,7 +191,7 @@ func ReadEcoPCRBatch(reader io.Reader, options ...WithOption) obiseq.IBioSequenc
|
||||
}
|
||||
|
||||
if len(slice) > 0 {
|
||||
newIter.Channel() <- obiseq.MakeBioSequenceBatch(i, slice...)
|
||||
newIter.Push(obiseq.MakeBioSequenceBatch(i, slice))
|
||||
}
|
||||
|
||||
newIter.Done()
|
||||
|
||||
@@ -9,7 +9,6 @@ import (
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
)
|
||||
@@ -124,7 +123,7 @@ func _ParseEmblFile(input <-chan _FileChunk, out obiseq.IBioSequenceBatch) {
|
||||
seqBytes.WriteString(parts[i])
|
||||
}
|
||||
case line == "//":
|
||||
sequence := obiseq.MakeBioSequence(id,
|
||||
sequence := obiseq.NewBioSequence(id,
|
||||
seqBytes.Bytes(),
|
||||
defBytes.String())
|
||||
|
||||
@@ -140,8 +139,7 @@ func _ParseEmblFile(input <-chan _FileChunk, out obiseq.IBioSequenceBatch) {
|
||||
seqBytes = new(bytes.Buffer)
|
||||
}
|
||||
}
|
||||
out.Channel() <- obiseq.MakeBioSequenceBatch(order, sequences...)
|
||||
|
||||
out.Push(obiseq.MakeBioSequenceBatch(order, sequences))
|
||||
}
|
||||
|
||||
out.Done()
|
||||
@@ -188,11 +186,7 @@ func ReadEMBLBatch(reader io.Reader, options ...WithOption) obiseq.IBioSequenceB
|
||||
newIter.Add(nworkers)
|
||||
|
||||
go func() {
|
||||
newIter.Wait()
|
||||
for len(newIter.Channel()) > 0 {
|
||||
time.Sleep(time.Millisecond)
|
||||
}
|
||||
close(newIter.Channel())
|
||||
newIter.WaitAndClose()
|
||||
}()
|
||||
|
||||
// for j := 0; j < opt.ParallelWorkers(); j++ {
|
||||
|
||||
@@ -6,7 +6,7 @@ import (
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
)
|
||||
|
||||
func ParseGuessedFastSeqHeader(sequence obiseq.BioSequence) {
|
||||
func ParseGuessedFastSeqHeader(sequence *obiseq.BioSequence) {
|
||||
if strings.HasPrefix(sequence.Definition(), "{") {
|
||||
ParseFastSeqJsonHeader(sequence)
|
||||
} else {
|
||||
|
||||
@@ -2,4 +2,4 @@ package obiformats
|
||||
|
||||
import "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
|
||||
type FormatHeader func(sequence obiseq.BioSequence) string
|
||||
type FormatHeader func(sequence *obiseq.BioSequence) string
|
||||
|
||||
@@ -49,12 +49,12 @@ func _parse_json_header_(header string, annotations obiseq.Annotation) string {
|
||||
return strings.TrimSpace(header[stop:])
|
||||
}
|
||||
|
||||
func ParseFastSeqJsonHeader(sequence obiseq.BioSequence) {
|
||||
func ParseFastSeqJsonHeader(sequence *obiseq.BioSequence) {
|
||||
sequence.SetDefinition(_parse_json_header_(sequence.Definition(),
|
||||
sequence.Annotations()))
|
||||
}
|
||||
|
||||
func FormatFastSeqJsonHeader(sequence obiseq.BioSequence) string {
|
||||
func FormatFastSeqJsonHeader(sequence *obiseq.BioSequence) string {
|
||||
annotations := sequence.Annotations()
|
||||
|
||||
if annotations != nil {
|
||||
|
||||
@@ -261,7 +261,7 @@ func ParseOBIFeatures(text string, annotations obiseq.Annotation) string {
|
||||
return string(bytes.TrimSpace(d))
|
||||
}
|
||||
|
||||
func ParseFastSeqOBIHeader(sequence obiseq.BioSequence) {
|
||||
func ParseFastSeqOBIHeader(sequence *obiseq.BioSequence) {
|
||||
annotations := sequence.Annotations()
|
||||
|
||||
definition := ParseOBIFeatures(sequence.Definition(),
|
||||
@@ -270,7 +270,7 @@ func ParseFastSeqOBIHeader(sequence obiseq.BioSequence) {
|
||||
sequence.SetDefinition(definition)
|
||||
}
|
||||
|
||||
func FormatFastSeqOBIHeader(sequence obiseq.BioSequence) string {
|
||||
func FormatFastSeqOBIHeader(sequence *obiseq.BioSequence) string {
|
||||
annotations := sequence.Annotations()
|
||||
|
||||
if annotations != nil {
|
||||
|
||||
@@ -10,7 +10,6 @@ import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/cutils"
|
||||
@@ -24,7 +23,7 @@ func _FastseqReader(seqfile C.fast_kseq_p,
|
||||
i := 0
|
||||
ii := 0
|
||||
|
||||
slice := obiseq.GetBioSequenceSlice()
|
||||
slice := obiseq.MakeBioSequenceSlice()
|
||||
|
||||
for l := int64(C.next_fast_sek(seqfile)); l > 0; l = int64(C.next_fast_sek(seqfile)) {
|
||||
|
||||
@@ -45,7 +44,7 @@ func _FastseqReader(seqfile C.fast_kseq_p,
|
||||
comment = ""
|
||||
}
|
||||
|
||||
rep := obiseq.MakeBioSequence(name, sequence, comment)
|
||||
rep := obiseq.NewBioSequence(name, sequence, comment)
|
||||
|
||||
if s.qual.l > C.ulong(0) {
|
||||
cquality := cutils.ByteSlice(unsafe.Pointer(s.qual.s), int(s.qual.l))
|
||||
@@ -64,17 +63,17 @@ func _FastseqReader(seqfile C.fast_kseq_p,
|
||||
// log.Printf("\n==> Pushing sequence batch\n")
|
||||
// start := time.Now()
|
||||
|
||||
iterator.Channel() <- obiseq.MakeBioSequenceBatch(i, slice...)
|
||||
iterator.Push(obiseq.MakeBioSequenceBatch(i, slice))
|
||||
// elapsed := time.Since(start)
|
||||
// log.Printf("\n==>sequences pushed after %s\n", elapsed)
|
||||
|
||||
slice = make(obiseq.BioSequenceSlice, 0, batch_size)
|
||||
slice = obiseq.MakeBioSequenceSlice()
|
||||
i++
|
||||
ii = 0
|
||||
}
|
||||
}
|
||||
if len(slice) > 0 {
|
||||
iterator.Channel() <- obiseq.MakeBioSequenceBatch(i, slice...)
|
||||
iterator.Push(obiseq.MakeBioSequenceBatch(i, slice))
|
||||
}
|
||||
iterator.Done()
|
||||
|
||||
@@ -109,12 +108,7 @@ func ReadFastSeqBatchFromFile(filename string, options ...WithOption) (obiseq.IB
|
||||
newIter.Add(1)
|
||||
|
||||
go func() {
|
||||
newIter.Wait()
|
||||
for len(newIter.Channel()) > 0 {
|
||||
time.Sleep(time.Millisecond)
|
||||
}
|
||||
close(newIter.Channel())
|
||||
|
||||
newIter.WaitAndClose()
|
||||
log.Println("End of the fastq file reading")
|
||||
}()
|
||||
|
||||
@@ -142,8 +136,7 @@ func ReadFastSeqBatchFromStdin(options ...WithOption) obiseq.IBioSequenceBatch {
|
||||
newIter.Add(1)
|
||||
|
||||
go func() {
|
||||
newIter.Wait()
|
||||
close(newIter.Channel())
|
||||
newIter.WaitAndClose()
|
||||
}()
|
||||
|
||||
go _FastseqReader(C.open_fast_sek_stdin(C.int32_t(opt.QualityShift())), newIter, opt.BatchSize())
|
||||
|
||||
@@ -7,7 +7,6 @@ import (
|
||||
"log"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
)
|
||||
@@ -19,9 +18,13 @@ func min(x, y int) int {
|
||||
return y
|
||||
}
|
||||
|
||||
func FormatFasta(seq obiseq.BioSequence, formater FormatHeader) string {
|
||||
func FormatFasta(seq *obiseq.BioSequence, formater FormatHeader) string {
|
||||
var fragments strings.Builder
|
||||
|
||||
if seq==nil {
|
||||
log.Panicln("try to format a nil BioSequence")
|
||||
}
|
||||
|
||||
s := seq.Sequence()
|
||||
l := len(s)
|
||||
|
||||
@@ -106,16 +109,8 @@ func WriteFastaBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options
|
||||
newIter.Add(nwriters)
|
||||
|
||||
go func() {
|
||||
newIter.Wait()
|
||||
for len(chunkchan) > 0 {
|
||||
time.Sleep(time.Millisecond)
|
||||
}
|
||||
newIter.WaitAndClose()
|
||||
close(chunkchan)
|
||||
for len(newIter.Channel()) > 0 {
|
||||
time.Sleep(time.Millisecond)
|
||||
}
|
||||
close(newIter.Channel())
|
||||
|
||||
}()
|
||||
|
||||
ff := func(iterator obiseq.IBioSequenceBatch) {
|
||||
@@ -125,7 +120,7 @@ func WriteFastaBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options
|
||||
FormatFastaBatch(batch, header_format),
|
||||
batch.Order(),
|
||||
}
|
||||
newIter.Channel() <- batch
|
||||
newIter.Push(batch)
|
||||
}
|
||||
newIter.Done()
|
||||
}
|
||||
@@ -156,7 +151,7 @@ func WriteFastaBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
if opt.CloseFile() {
|
||||
switch file := file.(type) {
|
||||
case *os.File:
|
||||
|
||||
@@ -11,7 +11,7 @@ import (
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
)
|
||||
|
||||
func FormatFastq(seq obiseq.BioSequence, quality_shift int, formater FormatHeader) string {
|
||||
func FormatFastq(seq *obiseq.BioSequence, quality_shift int, formater FormatHeader) string {
|
||||
|
||||
l := seq.Length()
|
||||
q := seq.Qualities()
|
||||
@@ -106,15 +106,11 @@ func WriteFastqBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options
|
||||
newIter.Add(nwriters)
|
||||
|
||||
go func() {
|
||||
newIter.Wait()
|
||||
newIter.WaitAndClose()
|
||||
for len(chunkchan) > 0 {
|
||||
time.Sleep(time.Millisecond)
|
||||
}
|
||||
close(chunkchan)
|
||||
for len(newIter.Channel()) > 0 {
|
||||
time.Sleep(time.Millisecond)
|
||||
}
|
||||
close(newIter.Channel())
|
||||
}()
|
||||
|
||||
ff := func(iterator obiseq.IBioSequenceBatch) {
|
||||
@@ -125,7 +121,7 @@ func WriteFastqBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options
|
||||
batch.Order(),
|
||||
}
|
||||
chunkchan <- chunk
|
||||
newIter.Channel() <- batch
|
||||
newIter.Push(batch)
|
||||
}
|
||||
newIter.Done()
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ import (
|
||||
|
||||
type __options__ struct {
|
||||
fastseq_header_parser obiseq.SeqAnnotator
|
||||
fastseq_header_writer func(obiseq.BioSequence) string
|
||||
fastseq_header_writer func(*obiseq.BioSequence) string
|
||||
with_progress_bar bool
|
||||
buffer_size int
|
||||
batch_size int
|
||||
@@ -62,7 +62,7 @@ func (opt Options) ParseFastSeqHeader() obiseq.SeqAnnotator {
|
||||
return opt.pointer.fastseq_header_parser
|
||||
}
|
||||
|
||||
func (opt Options) FormatFastSeqHeader() func(obiseq.BioSequence) string {
|
||||
func (opt Options) FormatFastSeqHeader() func(*obiseq.BioSequence) string {
|
||||
return opt.pointer.fastseq_header_writer
|
||||
}
|
||||
|
||||
@@ -141,7 +141,7 @@ func OptionsFastSeqDefaultHeaderParser() WithOption {
|
||||
|
||||
// OptionsFastSeqHeaderFormat allows foor specifying the format
|
||||
// used to write FASTA and FASTQ sequence.
|
||||
func OptionsFastSeqHeaderFormat(format func(obiseq.BioSequence) string) WithOption {
|
||||
func OptionsFastSeqHeaderFormat(format func(*obiseq.BioSequence) string) WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.fastseq_header_writer = format
|
||||
})
|
||||
|
||||
@@ -66,7 +66,7 @@ func ReadSequencesBatchFromFile(filename string, options ...WithOption) (obiseq.
|
||||
|
||||
if len(tag) < 30 {
|
||||
newIter := obiseq.MakeIBioSequenceBatch()
|
||||
close(newIter.Channel())
|
||||
newIter.Close()
|
||||
return newIter, nil
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user