Big change iin the data model, and a first version of obiuniq

This commit is contained in:
2022-02-21 19:00:23 +01:00
parent 9737f97084
commit 2e7c1834b0
43 changed files with 664 additions and 440 deletions

View File

@@ -31,11 +31,12 @@ func WriterDispatcher(prototypename string,
}
out, err := formater(data,
fmt.Sprintf(prototypename, newflux),
fmt.Sprintf(prototypename, dispatcher.Classifier().Value(newflux)),
options...)
if err != nil {
log.Fatalf("cannot open the output file for key %d", newflux)
log.Fatalf("cannot open the output file for key %s",
dispatcher.Classifier().Value(newflux))
}
out.Recycle()

View File

@@ -35,12 +35,12 @@ func __readline__(stream io.Reader) string {
return string(line[0:i])
}
func __read_ecopcr_bioseq__(file *__ecopcr_file__) (obiseq.BioSequence, error) {
func __read_ecopcr_bioseq__(file *__ecopcr_file__) (*obiseq.BioSequence, error) {
record, err := file.csv.Read()
if err != nil {
return obiseq.NilBioSequence, err
return nil, err
}
name := strings.TrimSpace(record[0])
@@ -65,7 +65,7 @@ func __read_ecopcr_bioseq__(file *__ecopcr_file__) (obiseq.BioSequence, error) {
comment = strings.TrimSpace(record[19])
}
bseq := obiseq.MakeBioSequence(name, sequence, comment)
bseq := obiseq.NewBioSequence(name, sequence, comment)
annotation := bseq.Annotations()
annotation["ac"] = name
@@ -168,7 +168,7 @@ func ReadEcoPCRBatch(reader io.Reader, options ...WithOption) obiseq.IBioSequenc
go func() {
newIter.Wait()
close(newIter.Channel())
newIter.Close()
}()
go func() {
@@ -181,9 +181,8 @@ func ReadEcoPCRBatch(reader io.Reader, options ...WithOption) obiseq.IBioSequenc
slice = append(slice, seq)
ii++
if ii >= opt.BatchSize() {
newIter.Channel() <- obiseq.MakeBioSequenceBatch(i, slice...)
slice = make(obiseq.BioSequenceSlice, 0, opt.BatchSize())
newIter.Push(obiseq.MakeBioSequenceBatch(i, slice))
slice = obiseq.MakeBioSequenceSlice()
i++
ii = 0
}
@@ -192,7 +191,7 @@ func ReadEcoPCRBatch(reader io.Reader, options ...WithOption) obiseq.IBioSequenc
}
if len(slice) > 0 {
newIter.Channel() <- obiseq.MakeBioSequenceBatch(i, slice...)
newIter.Push(obiseq.MakeBioSequenceBatch(i, slice))
}
newIter.Done()

View File

@@ -9,7 +9,6 @@ import (
"os"
"strconv"
"strings"
"time"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
)
@@ -124,7 +123,7 @@ func _ParseEmblFile(input <-chan _FileChunk, out obiseq.IBioSequenceBatch) {
seqBytes.WriteString(parts[i])
}
case line == "//":
sequence := obiseq.MakeBioSequence(id,
sequence := obiseq.NewBioSequence(id,
seqBytes.Bytes(),
defBytes.String())
@@ -140,8 +139,7 @@ func _ParseEmblFile(input <-chan _FileChunk, out obiseq.IBioSequenceBatch) {
seqBytes = new(bytes.Buffer)
}
}
out.Channel() <- obiseq.MakeBioSequenceBatch(order, sequences...)
out.Push(obiseq.MakeBioSequenceBatch(order, sequences))
}
out.Done()
@@ -188,11 +186,7 @@ func ReadEMBLBatch(reader io.Reader, options ...WithOption) obiseq.IBioSequenceB
newIter.Add(nworkers)
go func() {
newIter.Wait()
for len(newIter.Channel()) > 0 {
time.Sleep(time.Millisecond)
}
close(newIter.Channel())
newIter.WaitAndClose()
}()
// for j := 0; j < opt.ParallelWorkers(); j++ {

View File

@@ -6,7 +6,7 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
)
func ParseGuessedFastSeqHeader(sequence obiseq.BioSequence) {
func ParseGuessedFastSeqHeader(sequence *obiseq.BioSequence) {
if strings.HasPrefix(sequence.Definition(), "{") {
ParseFastSeqJsonHeader(sequence)
} else {

View File

@@ -2,4 +2,4 @@ package obiformats
import "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
type FormatHeader func(sequence obiseq.BioSequence) string
type FormatHeader func(sequence *obiseq.BioSequence) string

View File

@@ -49,12 +49,12 @@ func _parse_json_header_(header string, annotations obiseq.Annotation) string {
return strings.TrimSpace(header[stop:])
}
func ParseFastSeqJsonHeader(sequence obiseq.BioSequence) {
func ParseFastSeqJsonHeader(sequence *obiseq.BioSequence) {
sequence.SetDefinition(_parse_json_header_(sequence.Definition(),
sequence.Annotations()))
}
func FormatFastSeqJsonHeader(sequence obiseq.BioSequence) string {
func FormatFastSeqJsonHeader(sequence *obiseq.BioSequence) string {
annotations := sequence.Annotations()
if annotations != nil {

View File

@@ -261,7 +261,7 @@ func ParseOBIFeatures(text string, annotations obiseq.Annotation) string {
return string(bytes.TrimSpace(d))
}
func ParseFastSeqOBIHeader(sequence obiseq.BioSequence) {
func ParseFastSeqOBIHeader(sequence *obiseq.BioSequence) {
annotations := sequence.Annotations()
definition := ParseOBIFeatures(sequence.Definition(),
@@ -270,7 +270,7 @@ func ParseFastSeqOBIHeader(sequence obiseq.BioSequence) {
sequence.SetDefinition(definition)
}
func FormatFastSeqOBIHeader(sequence obiseq.BioSequence) string {
func FormatFastSeqOBIHeader(sequence *obiseq.BioSequence) string {
annotations := sequence.Annotations()
if annotations != nil {

View File

@@ -10,7 +10,6 @@ import (
"fmt"
"log"
"os"
"time"
"unsafe"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/cutils"
@@ -24,7 +23,7 @@ func _FastseqReader(seqfile C.fast_kseq_p,
i := 0
ii := 0
slice := obiseq.GetBioSequenceSlice()
slice := obiseq.MakeBioSequenceSlice()
for l := int64(C.next_fast_sek(seqfile)); l > 0; l = int64(C.next_fast_sek(seqfile)) {
@@ -45,7 +44,7 @@ func _FastseqReader(seqfile C.fast_kseq_p,
comment = ""
}
rep := obiseq.MakeBioSequence(name, sequence, comment)
rep := obiseq.NewBioSequence(name, sequence, comment)
if s.qual.l > C.ulong(0) {
cquality := cutils.ByteSlice(unsafe.Pointer(s.qual.s), int(s.qual.l))
@@ -64,17 +63,17 @@ func _FastseqReader(seqfile C.fast_kseq_p,
// log.Printf("\n==> Pushing sequence batch\n")
// start := time.Now()
iterator.Channel() <- obiseq.MakeBioSequenceBatch(i, slice...)
iterator.Push(obiseq.MakeBioSequenceBatch(i, slice))
// elapsed := time.Since(start)
// log.Printf("\n==>sequences pushed after %s\n", elapsed)
slice = make(obiseq.BioSequenceSlice, 0, batch_size)
slice = obiseq.MakeBioSequenceSlice()
i++
ii = 0
}
}
if len(slice) > 0 {
iterator.Channel() <- obiseq.MakeBioSequenceBatch(i, slice...)
iterator.Push(obiseq.MakeBioSequenceBatch(i, slice))
}
iterator.Done()
@@ -109,12 +108,7 @@ func ReadFastSeqBatchFromFile(filename string, options ...WithOption) (obiseq.IB
newIter.Add(1)
go func() {
newIter.Wait()
for len(newIter.Channel()) > 0 {
time.Sleep(time.Millisecond)
}
close(newIter.Channel())
newIter.WaitAndClose()
log.Println("End of the fastq file reading")
}()
@@ -142,8 +136,7 @@ func ReadFastSeqBatchFromStdin(options ...WithOption) obiseq.IBioSequenceBatch {
newIter.Add(1)
go func() {
newIter.Wait()
close(newIter.Channel())
newIter.WaitAndClose()
}()
go _FastseqReader(C.open_fast_sek_stdin(C.int32_t(opt.QualityShift())), newIter, opt.BatchSize())

View File

@@ -7,7 +7,6 @@ import (
"log"
"os"
"strings"
"time"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
)
@@ -19,9 +18,13 @@ func min(x, y int) int {
return y
}
func FormatFasta(seq obiseq.BioSequence, formater FormatHeader) string {
func FormatFasta(seq *obiseq.BioSequence, formater FormatHeader) string {
var fragments strings.Builder
if seq==nil {
log.Panicln("try to format a nil BioSequence")
}
s := seq.Sequence()
l := len(s)
@@ -106,16 +109,8 @@ func WriteFastaBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options
newIter.Add(nwriters)
go func() {
newIter.Wait()
for len(chunkchan) > 0 {
time.Sleep(time.Millisecond)
}
newIter.WaitAndClose()
close(chunkchan)
for len(newIter.Channel()) > 0 {
time.Sleep(time.Millisecond)
}
close(newIter.Channel())
}()
ff := func(iterator obiseq.IBioSequenceBatch) {
@@ -125,7 +120,7 @@ func WriteFastaBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options
FormatFastaBatch(batch, header_format),
batch.Order(),
}
newIter.Channel() <- batch
newIter.Push(batch)
}
newIter.Done()
}
@@ -156,7 +151,7 @@ func WriteFastaBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options
}
}
if opt.CloseFile() {
switch file := file.(type) {
case *os.File:

View File

@@ -11,7 +11,7 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
)
func FormatFastq(seq obiseq.BioSequence, quality_shift int, formater FormatHeader) string {
func FormatFastq(seq *obiseq.BioSequence, quality_shift int, formater FormatHeader) string {
l := seq.Length()
q := seq.Qualities()
@@ -106,15 +106,11 @@ func WriteFastqBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options
newIter.Add(nwriters)
go func() {
newIter.Wait()
newIter.WaitAndClose()
for len(chunkchan) > 0 {
time.Sleep(time.Millisecond)
}
close(chunkchan)
for len(newIter.Channel()) > 0 {
time.Sleep(time.Millisecond)
}
close(newIter.Channel())
}()
ff := func(iterator obiseq.IBioSequenceBatch) {
@@ -125,7 +121,7 @@ func WriteFastqBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options
batch.Order(),
}
chunkchan <- chunk
newIter.Channel() <- batch
newIter.Push(batch)
}
newIter.Done()
}

View File

@@ -6,7 +6,7 @@ import (
type __options__ struct {
fastseq_header_parser obiseq.SeqAnnotator
fastseq_header_writer func(obiseq.BioSequence) string
fastseq_header_writer func(*obiseq.BioSequence) string
with_progress_bar bool
buffer_size int
batch_size int
@@ -62,7 +62,7 @@ func (opt Options) ParseFastSeqHeader() obiseq.SeqAnnotator {
return opt.pointer.fastseq_header_parser
}
func (opt Options) FormatFastSeqHeader() func(obiseq.BioSequence) string {
func (opt Options) FormatFastSeqHeader() func(*obiseq.BioSequence) string {
return opt.pointer.fastseq_header_writer
}
@@ -141,7 +141,7 @@ func OptionsFastSeqDefaultHeaderParser() WithOption {
// OptionsFastSeqHeaderFormat allows foor specifying the format
// used to write FASTA and FASTQ sequence.
func OptionsFastSeqHeaderFormat(format func(obiseq.BioSequence) string) WithOption {
func OptionsFastSeqHeaderFormat(format func(*obiseq.BioSequence) string) WithOption {
f := WithOption(func(opt Options) {
opt.pointer.fastseq_header_writer = format
})

View File

@@ -66,7 +66,7 @@ func ReadSequencesBatchFromFile(filename string, options ...WithOption) (obiseq.
if len(tag) < 30 {
newIter := obiseq.MakeIBioSequenceBatch()
close(newIter.Channel())
newIter.Close()
return newIter, nil
}