Files
obitools4/pkg/obiformats/fastseq_read.go

149 lines
3.1 KiB
Go
Raw Normal View History

2022-01-13 23:27:39 +01:00
package obiformats
// #cgo CFLAGS: -g -Wall
// #cgo LDFLAGS: -lz
// #include <stdlib.h>
// #include "fastseq_read.h"
import "C"
import (
"bytes"
2022-01-13 23:27:39 +01:00
"fmt"
"os"
"unsafe"
2022-02-24 12:14:52 +01:00
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
2022-01-13 23:43:01 +01:00
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
2022-01-13 23:27:39 +01:00
)
2022-01-16 00:21:42 +01:00
func _FastseqReader(seqfile C.fast_kseq_p,
2023-01-22 22:04:17 +01:00
iterator obiiter.IBioSequence,
2022-01-13 23:27:39 +01:00
batch_size int) {
var comment string
i := 0
ii := 0
slice := obiseq.MakeBioSequenceSlice()
2022-01-13 23:27:39 +01:00
2022-09-29 14:48:05 +02:00
for l := int64(C.next_fast_sek(seqfile)); l != 0; l = int64(C.next_fast_sek(seqfile)) {
2022-01-13 23:27:39 +01:00
s := seqfile.seq
sequence := C.GoBytes(unsafe.Pointer(s.seq.s), C.int(s.seq.l))
2022-01-13 23:27:39 +01:00
name := C.GoString(s.name.s)
if s.comment.l > C.ulong(0) {
comment = C.GoString(s.comment.s)
} else {
comment = ""
}
rep := obiseq.NewBioSequence(name, bytes.ToLower(sequence), comment)
2022-01-13 23:27:39 +01:00
if s.qual.l > C.ulong(0) {
cquality := unsafe.Slice(s.qual.s, C.int(s.qual.l))
2022-01-13 23:27:39 +01:00
l := int(s.qual.l)
quality := obiseq.GetSlice(l)
2022-01-13 23:27:39 +01:00
shift := uint8(seqfile.shift)
2022-01-13 23:27:39 +01:00
for j := 0; j < l; j++ {
func() {
defer func() {
if err := recover(); err != nil {
log.Println("cquality:", cquality,
"s.qual.s:", s.qual.s,
"quality:", quality)
log.Panic("panic occurred:", err)
}
}()
quality = append(quality, uint8(cquality[j])-shift)
}()
2022-01-13 23:27:39 +01:00
}
rep.SetQualities(quality)
}
slice = append(slice, rep)
ii++
if ii >= batch_size {
iterator.Push(obiiter.MakeBioSequenceBatch(i, slice))
slice = obiseq.MakeBioSequenceSlice()
2022-01-13 23:27:39 +01:00
i++
ii = 0
}
2022-09-29 14:48:05 +02:00
2022-01-13 23:27:39 +01:00
}
if len(slice) > 0 {
iterator.Push(obiiter.MakeBioSequenceBatch(i, slice))
2022-01-13 23:27:39 +01:00
}
iterator.Done()
}
2023-01-22 22:04:17 +01:00
func ReadFastSeqFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
2022-01-13 23:27:39 +01:00
opt := MakeOptions(options)
name := C.CString(filename)
defer C.free(unsafe.Pointer(name))
pointer := C.open_fast_sek_file(name, C.int32_t(opt.QualityShift()))
var err error
err = nil
if pointer == nil {
2022-01-14 16:10:19 +01:00
err = fmt.Errorf("cannot open file %s", filename)
2023-01-22 22:04:17 +01:00
return obiiter.NilIBioSequence, err
2022-01-13 23:27:39 +01:00
}
size := int64(-1)
fi, err := os.Stat(filename)
if err == nil {
size = fi.Size()
2022-02-24 12:14:52 +01:00
log.Debugf("File size of %s is %d bytes\n", filename, size)
2022-01-13 23:27:39 +01:00
} else {
size = -1
}
2023-01-22 22:04:17 +01:00
newIter := obiiter.MakeIBioSequence(opt.BufferSize())
2022-01-14 17:32:12 +01:00
newIter.Add(1)
2022-01-13 23:27:39 +01:00
go func() {
newIter.WaitAndClose()
2022-02-24 12:14:52 +01:00
log.Debugln("End of the fastq file reading")
2022-01-13 23:27:39 +01:00
}()
2022-02-24 12:14:52 +01:00
log.Debugln("Start of the fastq file reading")
2022-01-13 23:27:39 +01:00
2022-01-16 00:21:42 +01:00
go _FastseqReader(pointer, newIter, opt.BatchSize())
2022-01-13 23:27:39 +01:00
parser := opt.ParseFastSeqHeader()
2022-02-09 22:00:38 +01:00
2022-01-13 23:27:39 +01:00
if parser != nil {
2022-01-14 17:32:12 +01:00
return IParseFastSeqHeaderBatch(newIter, options...), err
2022-02-18 22:53:09 +01:00
}
2022-01-13 23:27:39 +01:00
2022-01-14 17:32:12 +01:00
return newIter, err
2022-01-13 23:27:39 +01:00
}
2023-01-22 22:04:17 +01:00
func ReadFastSeqFromStdin(options ...WithOption) obiiter.IBioSequence {
2022-01-13 23:27:39 +01:00
opt := MakeOptions(options)
2023-01-22 22:04:17 +01:00
newIter := obiiter.MakeIBioSequence(opt.BufferSize())
2022-01-13 23:27:39 +01:00
2022-01-14 17:32:12 +01:00
newIter.Add(1)
2022-01-13 23:27:39 +01:00
go func() {
newIter.WaitAndClose()
2022-01-13 23:27:39 +01:00
}()
go _FastseqReader(C.open_fast_sek_stdin(C.int32_t(opt.QualityShift())),
newIter, opt.BatchSize())
2022-01-13 23:27:39 +01:00
2022-09-29 14:48:05 +02:00
parser := opt.ParseFastSeqHeader()
if parser != nil {
return IParseFastSeqHeaderBatch(newIter, options...)
}
2022-01-14 17:32:12 +01:00
return newIter
2022-01-13 23:27:39 +01:00
}