mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Refactoring codes for removing buffer size options. An some other changes...
Former-commit-id: 10b57cc1a27446ade3c444217341e9651e89cdce
This commit is contained in:
248
pkg/obiformats/csv_writer.go
Normal file
248
pkg/obiformats/csv_writer.go
Normal file
@ -0,0 +1,248 @@
|
||||
package obiformats
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/csv"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
func CSVRecord(sequence *obiseq.BioSequence, opt Options) []string {
|
||||
keys := opt.CSVKeys()
|
||||
record := make([]string, 0, len(keys)+4)
|
||||
|
||||
if opt.CSVId() {
|
||||
record = append(record, sequence.Id())
|
||||
}
|
||||
|
||||
if opt.CSVCount() {
|
||||
record = append(record, fmt.Sprint(sequence.Count()))
|
||||
}
|
||||
|
||||
if opt.CSVTaxon() {
|
||||
taxid := sequence.Taxid()
|
||||
sn, ok := sequence.GetAttribute("scientific_name")
|
||||
|
||||
if !ok {
|
||||
if taxid == 1 {
|
||||
sn = "root"
|
||||
} else {
|
||||
sn = opt.CSVNAValue()
|
||||
}
|
||||
}
|
||||
|
||||
record = append(record, fmt.Sprint(taxid), fmt.Sprint(sn))
|
||||
}
|
||||
|
||||
if opt.CSVDefinition() {
|
||||
record = append(record, sequence.Definition())
|
||||
}
|
||||
|
||||
for _, key := range opt.CSVKeys() {
|
||||
value, ok := sequence.GetAttribute(key)
|
||||
if !ok {
|
||||
value = opt.CSVNAValue()
|
||||
}
|
||||
|
||||
svalue, _ := goutils.InterfaceToString(value)
|
||||
record = append(record, svalue)
|
||||
}
|
||||
|
||||
if opt.CSVSequence() {
|
||||
record = append(record, string(sequence.Sequence()))
|
||||
}
|
||||
|
||||
if opt.CSVQuality() {
|
||||
if sequence.HasQualities() {
|
||||
l := sequence.Len()
|
||||
q := sequence.Qualities()
|
||||
ascii := make([]byte, l)
|
||||
quality_shift := opt.QualityShift()
|
||||
for j := 0; j < l; j++ {
|
||||
ascii[j] = uint8(q[j]) + uint8(quality_shift)
|
||||
}
|
||||
record = append(record, string(ascii))
|
||||
} else {
|
||||
record = append(record, opt.CSVNAValue())
|
||||
}
|
||||
}
|
||||
|
||||
return record
|
||||
}
|
||||
|
||||
func CSVHeader(opt Options) []string {
|
||||
keys := opt.CSVKeys()
|
||||
record := make([]string, 0, len(keys)+4)
|
||||
|
||||
if opt.CSVId() {
|
||||
record = append(record, "id")
|
||||
}
|
||||
|
||||
if opt.CSVCount() {
|
||||
record = append(record, "count")
|
||||
}
|
||||
|
||||
if opt.CSVTaxon() {
|
||||
record = append(record, "taxid", "scientific_name")
|
||||
}
|
||||
|
||||
if opt.CSVDefinition() {
|
||||
record = append(record, "definition")
|
||||
}
|
||||
|
||||
record = append(record, opt.CSVKeys()...)
|
||||
|
||||
if opt.CSVSequence() {
|
||||
record = append(record, "sequence")
|
||||
}
|
||||
|
||||
if opt.CSVQuality() {
|
||||
record = append(record, "quality")
|
||||
}
|
||||
|
||||
return record
|
||||
}
|
||||
|
||||
func FormatCVSBatch(batch obiiter.BioSequenceBatch, opt Options) []byte {
|
||||
buff := new(bytes.Buffer)
|
||||
csv := csv.NewWriter(buff)
|
||||
|
||||
if batch.Order() == 0 {
|
||||
csv.Write(CSVHeader(opt))
|
||||
}
|
||||
for _, s := range batch.Slice() {
|
||||
csv.Write(CSVRecord(s, opt))
|
||||
}
|
||||
|
||||
csv.Flush()
|
||||
|
||||
return buff.Bytes()
|
||||
}
|
||||
|
||||
func WriteCSV(iterator obiiter.IBioSequence,
|
||||
file io.WriteCloser,
|
||||
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||
opt := MakeOptions(options)
|
||||
|
||||
file, _ = goutils.CompressStream(file, opt.CompressedFile(), opt.CloseFile())
|
||||
|
||||
newIter := obiiter.MakeIBioSequence()
|
||||
|
||||
nwriters := opt.ParallelWorkers()
|
||||
|
||||
obiiter.RegisterAPipe()
|
||||
chunkchan := make(chan FileChunck)
|
||||
|
||||
newIter.Add(nwriters)
|
||||
var waitWriter sync.WaitGroup
|
||||
|
||||
go func() {
|
||||
newIter.WaitAndClose()
|
||||
for len(chunkchan) > 0 {
|
||||
time.Sleep(time.Millisecond)
|
||||
}
|
||||
close(chunkchan)
|
||||
waitWriter.Wait()
|
||||
}()
|
||||
|
||||
ff := func(iterator obiiter.IBioSequence) {
|
||||
for iterator.Next() {
|
||||
|
||||
batch := iterator.Get()
|
||||
|
||||
chunkchan <- FileChunck{
|
||||
FormatCVSBatch(batch, opt),
|
||||
batch.Order(),
|
||||
}
|
||||
newIter.Push(batch)
|
||||
}
|
||||
newIter.Done()
|
||||
}
|
||||
|
||||
log.Debugln("Start of the CSV file writing")
|
||||
go ff(iterator)
|
||||
for i := 0; i < nwriters-1; i++ {
|
||||
go ff(iterator.Split())
|
||||
}
|
||||
|
||||
next_to_send := 0
|
||||
received := make(map[int]FileChunck, 100)
|
||||
|
||||
waitWriter.Add(1)
|
||||
go func() {
|
||||
for chunk := range chunkchan {
|
||||
if chunk.order == next_to_send {
|
||||
file.Write(chunk.text)
|
||||
next_to_send++
|
||||
chunk, ok := received[next_to_send]
|
||||
for ok {
|
||||
file.Write(chunk.text)
|
||||
delete(received, next_to_send)
|
||||
next_to_send++
|
||||
chunk, ok = received[next_to_send]
|
||||
}
|
||||
} else {
|
||||
received[chunk.order] = chunk
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
file.Close()
|
||||
|
||||
log.Debugln("End of the CSV file writing")
|
||||
obiiter.UnregisterPipe()
|
||||
waitWriter.Done()
|
||||
|
||||
}()
|
||||
|
||||
return newIter, nil
|
||||
}
|
||||
|
||||
func WriteCSVToStdout(iterator obiiter.IBioSequence,
|
||||
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||
options = append(options, OptionDontCloseFile())
|
||||
return WriteCSV(iterator, os.Stdout, options...)
|
||||
}
|
||||
|
||||
func WriteCSVToFile(iterator obiiter.IBioSequence,
|
||||
filename string,
|
||||
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||
|
||||
opt := MakeOptions(options)
|
||||
flags := os.O_WRONLY | os.O_CREATE
|
||||
|
||||
if opt.AppendFile() {
|
||||
flags |= os.O_APPEND
|
||||
}
|
||||
file, err := os.OpenFile(filename, flags, 0660)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("open file error: %v", err)
|
||||
return obiiter.NilIBioSequence, err
|
||||
}
|
||||
|
||||
options = append(options, OptionCloseFile())
|
||||
|
||||
iterator, err = WriteCSV(iterator, file, options...)
|
||||
|
||||
if opt.HaveToSavePaired() {
|
||||
var revfile *os.File
|
||||
|
||||
revfile, err = os.OpenFile(opt.PairedFileName(), flags, 0660)
|
||||
if err != nil {
|
||||
log.Fatalf("open file error: %v", err)
|
||||
return obiiter.NilIBioSequence, err
|
||||
}
|
||||
iterator, err = WriteCSV(iterator.PairedWith(), revfile, options...)
|
||||
}
|
||||
|
||||
return iterator, err
|
||||
}
|
Reference in New Issue
Block a user