Patch some bugs in writing files

Former-commit-id: 612868a281dc0ecf4e6c5776973735e5c71bd517
This commit is contained in:
Eric Coissac
2024-06-19 13:15:30 +02:00
parent 73996bb0b7
commit 818ce87bab
8 changed files with 75 additions and 22 deletions

View File

@ -226,7 +226,10 @@ func WriteFastaToFile(iterator obiiter.IBioSequence,
if opt.AppendFile() { if opt.AppendFile() {
flags |= os.O_APPEND flags |= os.O_APPEND
} else {
flags |= os.O_TRUNC
} }
file, err := os.OpenFile(filename, flags, 0660) file, err := os.OpenFile(filename, flags, 0660)
if err != nil { if err != nil {

View File

@ -32,17 +32,31 @@ func FormatFastq(seq *obiseq.BioSequence, formater FormatHeader) string {
q, q,
) )
if f[0] != '@' {
log.Panicln("FormatFastq: FASTQ format error")
}
return f return f
} }
func FormatFastqBatch(batch obiiter.BioSequenceBatch, func FormatFastqBatch(batch obiiter.BioSequenceBatch,
formater FormatHeader, skipEmpty bool) []byte { formater FormatHeader, skipEmpty bool) []byte {
var bs bytes.Buffer var bs bytes.Buffer
for _, seq := range batch.Slice() { for _, seq := range batch.Slice() {
if seq.Len() > 0 { if seq.Len() > 0 {
fs := FormatFastq(seq, formater) fs := FormatFastq(seq, formater)
bs.WriteString(fs) lb := bs.Len()
n, _ := bs.WriteString(fs)
if n < len(fs) {
log.Panicln("FormatFastqBatch: Cannot write all FASTQ sequences")
}
bs.WriteString("\n") bs.WriteString("\n")
if bs.Len()-lb < len(fs)+1 {
log.Panicln("FormatFastqBatch: Cannot write all FASTQ sequences correctly")
}
} else { } else {
if skipEmpty { if skipEmpty {
log.Warnf("Sequence %s is empty and skiped in output", seq.Id()) log.Warnf("Sequence %s is empty and skiped in output", seq.Id())
@ -53,7 +67,15 @@ func FormatFastqBatch(batch obiiter.BioSequenceBatch,
} }
return bs.Bytes() chunk := bs.Bytes()
chunk = chunk[:bs.Len()]
if chunk[0] != '@' {
log.Panicln("FormatFastqBatch: FASTQ format error")
}
return chunk
} }
type FileChunck struct { type FileChunck struct {
@ -118,16 +140,25 @@ func WriteFastq(iterator obiiter.IBioSequence,
go func() { go func() {
for chunk := range chunkchan { for chunk := range chunkchan {
if chunk.order == next_to_send { if chunk.order == next_to_send {
if chunk.text[0] != '@' {
log.Panicln("WriteFastq: FASTQ format error")
}
file.Write(chunk.text) file.Write(chunk.text)
next_to_send++ next_to_send++
chunk, ok := received[next_to_send] chunk, ok := received[next_to_send]
for ok { for ok {
if chunk.text[0] != '@' {
log.Panicln("WriteFastq: FASTQ format error")
}
file.Write(chunk.text) file.Write(chunk.text)
delete(received, next_to_send) delete(received, next_to_send)
next_to_send++ next_to_send++
chunk, ok = received[next_to_send] chunk, ok = received[next_to_send]
} }
} else { } else {
if _, ok := received[chunk.order]; ok {
log.Panicln("WriteFastq: Two chunks with the same number")
}
received[chunk.order] = chunk received[chunk.order] = chunk
} }
@ -158,7 +189,10 @@ func WriteFastqToFile(iterator obiiter.IBioSequence,
if opt.AppendFile() { if opt.AppendFile() {
flags |= os.O_APPEND flags |= os.O_APPEND
} else {
flags |= os.O_TRUNC
} }
file, err := os.OpenFile(filename, flags, 0660) file, err := os.OpenFile(filename, flags, 0660)
if err != nil { if err != nil {

View File

@ -174,7 +174,10 @@ func WriteJSONToFile(iterator obiiter.IBioSequence,
if opt.AppendFile() { if opt.AppendFile() {
flags |= os.O_APPEND flags |= os.O_APPEND
} else {
flags |= os.O_TRUNC
} }
file, err := os.OpenFile(filename, flags, 0660) file, err := os.OpenFile(filename, flags, 0660)
if err != nil { if err != nil {

View File

@ -58,7 +58,10 @@ func WriteSequencesToFile(iterator obiiter.IBioSequence,
if opt.AppendFile() { if opt.AppendFile() {
flags |= os.O_APPEND flags |= os.O_APPEND
} else {
flags |= os.O_TRUNC
} }
file, err := os.OpenFile(filename, flags, 0660) file, err := os.OpenFile(filename, flags, 0660)
if err != nil { if err != nil {

View File

@ -112,6 +112,7 @@ func lookForTag(seq string, delimiter byte) string {
func lookForRescueTag(seq string, delimiter byte, taglength, border, indel int) string { func lookForRescueTag(seq string, delimiter byte, taglength, border, indel int) string {
// log.Info("lookForRescueTag") // log.Info("lookForRescueTag")
// log.Infof("seq: %s", seq)
i := len(seq) - 1 i := len(seq) - 1
@ -126,10 +127,14 @@ func lookForRescueTag(seq string, delimiter byte, taglength, border, indel int)
delimlen++ delimlen++
} }
if obiutils.Abs(delimlen-border) > indel { if (border - delimlen) > indel {
return "" return ""
} }
if delimlen > border {
i += delimlen - border
}
// log.Infof("delimlen: %d", delimlen) // log.Infof("delimlen: %d", delimlen)
end := i + 1 end := i + 1
@ -146,10 +151,6 @@ func lookForRescueTag(seq string, delimiter byte, taglength, border, indel int)
delimlen++ delimlen++
} }
if obiutils.Abs(delimlen-border) > indel {
return ""
}
delimlen = min(delimlen, border) delimlen = min(delimlen, border)
// log.Infof("delimlen: %d", delimlen) // log.Infof("delimlen: %d", delimlen)
@ -161,7 +162,6 @@ func lookForRescueTag(seq string, delimiter byte, taglength, border, indel int)
} }
// log.Infof("begin: %d, end: %d", begin, end) // log.Infof("begin: %d, end: %d", begin, end)
// log.Infof("seq: %s", seq)
// log.Infof("seq[begin:end]: %s", seq[begin:end]) // log.Infof("seq[begin:end]: %s", seq[begin:end])
return seq[begin:end] return seq[begin:end]

View File

@ -7,7 +7,7 @@ import (
// TODO: The version number is extracted from git. This induces that the version // TODO: The version number is extracted from git. This induces that the version
// corresponds to the last commit, and not the one when the file will be // corresponds to the last commit, and not the one when the file will be
// commited // commited
var _Commit = "1cf2768" var _Commit = "58bcc67"
var _Version = "Release 4.2.0" var _Version = "Release 4.2.0"
// Version returns the version of the obitools package. // Version returns the version of the obitools package.

View File

@ -2,6 +2,7 @@ package obiseq
import ( import (
"fmt" "fmt"
"math"
"reflect" "reflect"
"strings" "strings"
@ -120,8 +121,14 @@ func (sequence *BioSequence) StatsPlusOne(key string, toAdd *BioSequence, na str
uint8, uint16, uint32, uint64, uint8, uint16, uint32, uint64,
int8, int16, int32, int64, bool: int8, int16, int32, int64, bool:
sval = fmt.Sprint(value) sval = fmt.Sprint(value)
case float64:
if math.Floor(value) == value {
sval = fmt.Sprint(int(value))
} else {
log.Fatalf("Trying to make stats on a float value (%v : %T)", value, value)
}
default: default:
log.Fatalf("Trying to make stats on a none string, integer or boolean value (%v)", value) log.Fatalf("Trying to make stats on a none string, integer or boolean value (%v : %T)", value, value)
} }
retval = true retval = true
} }

View File

@ -108,14 +108,12 @@ func BuildConsensus(seqs obiseq.BioSequenceSlice,
sumCount += s.Count() sumCount += s.Count()
} }
log.Warnf("sumCount : %d", sumCount) seq.SetAttribute("obiconsensus_weight", sumCount)
seq.SetAttribute("obiconsensus_seq_length", seq.Len())
seq.SetCount(sumCount) seq.SetAttribute("obiconsensus_kmer_size", kmer_size)
seq.SetAttribute("seq_length", seq.Len()) seq.SetAttribute("obiconsensus_kmer_max_occur", graph.MaxWeight())
seq.SetAttribute("kmer_size", kmer_size) seq.SetAttribute("obiconsensus_filtered_graph_size", graph.Len())
seq.SetAttribute("kmer_max_occur", graph.MaxWeight()) seq.SetAttribute("obiconsensus_full_graph_size", total_kmer)
seq.SetAttribute("filtered_graph_size", graph.Len())
seq.SetAttribute("full_graph_size", total_kmer)
} }
return seq, err return seq, err
} }
@ -280,19 +278,23 @@ func MinionDenoise(graph *obigraph.Graph[*obiseq.BioSequence, Mutation],
if err != nil { if err != nil {
log.Warning(err) log.Warning(err)
clean = (*graph.Vertices)[i] clean = (*graph.Vertices)[i]
clean.SetAttribute("obiminion_consensus", false) clean.SetAttribute("obiconsensus_consensus", false)
} else { } else {
clean.SetAttribute("obiminion_consensus", true) clean.SetAttribute("obiconsensus_consensus", true)
} }
pack.Recycle(false) pack.Recycle(false)
} else { } else {
clean = obiseq.NewBioSequence(v.Id(), v.Sequence(), v.Definition()) clean = obiseq.NewBioSequence(v.Id(), v.Sequence(), v.Definition())
clean.SetAttribute("obiminion_consensus", false) clean.SetAttribute("obiconsensus_consensus", false)
} }
// clean.SetCount(int(graph.VertexWeight(i))) clean.SetCount(int(graph.VertexWeight(i)))
clean.SetAttribute(sample_key, graph.Name) clean.SetAttribute(sample_key, graph.Name)
if !clean.HasAttribute("obiconsensus_weight") {
clean.SetAttribute("obiconsensus_weight", int(1))
}
denoised[i] = clean denoised[i] = clean
} }
@ -376,6 +378,7 @@ func CLIOBIMinion(itertator obiiter.IBioSequence) obiiter.IBioSequence {
}() }()
obiuniq.AddStatsOn(CLISampleAttribute()) obiuniq.AddStatsOn(CLISampleAttribute())
obiuniq.AddStatsOn("obiconsensus_weight")
obiuniq.SetUniqueInMemory(false) obiuniq.SetUniqueInMemory(false)
obiuniq.SetNoSingleton(CLINoSingleton()) obiuniq.SetNoSingleton(CLINoSingleton())
return obiuniq.CLIUnique(newIter).Pipe(obiiter.WorkerPipe(obiannotate.AddSeqLengthWorker(), false)) return obiuniq.CLIUnique(newIter).Pipe(obiiter.WorkerPipe(obiannotate.AddSeqLengthWorker(), false))