diff --git a/pkg/obiformats/fastseq_write_fasta.go b/pkg/obiformats/fastseq_write_fasta.go index 8d7735a..b50af65 100644 --- a/pkg/obiformats/fastseq_write_fasta.go +++ b/pkg/obiformats/fastseq_write_fasta.go @@ -226,7 +226,10 @@ func WriteFastaToFile(iterator obiiter.IBioSequence, if opt.AppendFile() { flags |= os.O_APPEND + } else { + flags |= os.O_TRUNC } + file, err := os.OpenFile(filename, flags, 0660) if err != nil { diff --git a/pkg/obiformats/fastseq_write_fastq.go b/pkg/obiformats/fastseq_write_fastq.go index 7d1bea7..abd6feb 100644 --- a/pkg/obiformats/fastseq_write_fastq.go +++ b/pkg/obiformats/fastseq_write_fastq.go @@ -32,17 +32,31 @@ func FormatFastq(seq *obiseq.BioSequence, formater FormatHeader) string { q, ) + if f[0] != '@' { + log.Panicln("FormatFastq: FASTQ format error") + } + return f } func FormatFastqBatch(batch obiiter.BioSequenceBatch, formater FormatHeader, skipEmpty bool) []byte { var bs bytes.Buffer + for _, seq := range batch.Slice() { if seq.Len() > 0 { fs := FormatFastq(seq, formater) - bs.WriteString(fs) + lb := bs.Len() + n, _ := bs.WriteString(fs) + + if n < len(fs) { + log.Panicln("FormatFastqBatch: Cannot write all FASTQ sequences") + } bs.WriteString("\n") + + if bs.Len()-lb < len(fs)+1 { + log.Panicln("FormatFastqBatch: Cannot write all FASTQ sequences correctly") + } } else { if skipEmpty { log.Warnf("Sequence %s is empty and skiped in output", seq.Id()) @@ -53,7 +67,15 @@ func FormatFastqBatch(batch obiiter.BioSequenceBatch, } - return bs.Bytes() + chunk := bs.Bytes() + + chunk = chunk[:bs.Len()] + + if chunk[0] != '@' { + log.Panicln("FormatFastqBatch: FASTQ format error") + } + + return chunk } type FileChunck struct { @@ -118,16 +140,25 @@ func WriteFastq(iterator obiiter.IBioSequence, go func() { for chunk := range chunkchan { if chunk.order == next_to_send { + if chunk.text[0] != '@' { + log.Panicln("WriteFastq: FASTQ format error") + } file.Write(chunk.text) next_to_send++ chunk, ok := received[next_to_send] for ok { + if chunk.text[0] != '@' { + log.Panicln("WriteFastq: FASTQ format error") + } file.Write(chunk.text) delete(received, next_to_send) next_to_send++ chunk, ok = received[next_to_send] } } else { + if _, ok := received[chunk.order]; ok { + log.Panicln("WriteFastq: Two chunks with the same number") + } received[chunk.order] = chunk } @@ -158,7 +189,10 @@ func WriteFastqToFile(iterator obiiter.IBioSequence, if opt.AppendFile() { flags |= os.O_APPEND + } else { + flags |= os.O_TRUNC } + file, err := os.OpenFile(filename, flags, 0660) if err != nil { diff --git a/pkg/obiformats/json_writer.go b/pkg/obiformats/json_writer.go index 9814247..fac57fa 100644 --- a/pkg/obiformats/json_writer.go +++ b/pkg/obiformats/json_writer.go @@ -174,7 +174,10 @@ func WriteJSONToFile(iterator obiiter.IBioSequence, if opt.AppendFile() { flags |= os.O_APPEND + } else { + flags |= os.O_TRUNC } + file, err := os.OpenFile(filename, flags, 0660) if err != nil { diff --git a/pkg/obiformats/universal_write.go b/pkg/obiformats/universal_write.go index 567b4c7..6d8da4a 100644 --- a/pkg/obiformats/universal_write.go +++ b/pkg/obiformats/universal_write.go @@ -58,7 +58,10 @@ func WriteSequencesToFile(iterator obiiter.IBioSequence, if opt.AppendFile() { flags |= os.O_APPEND + } else { + flags |= os.O_TRUNC } + file, err := os.OpenFile(filename, flags, 0660) if err != nil { diff --git a/pkg/obingslibrary/multimatch.go b/pkg/obingslibrary/multimatch.go index 092550a..1e9dcfe 100644 --- a/pkg/obingslibrary/multimatch.go +++ b/pkg/obingslibrary/multimatch.go @@ -112,6 +112,7 @@ func lookForTag(seq string, delimiter byte) string { func lookForRescueTag(seq string, delimiter byte, taglength, border, indel int) string { // log.Info("lookForRescueTag") + // log.Infof("seq: %s", seq) i := len(seq) - 1 @@ -126,10 +127,14 @@ func lookForRescueTag(seq string, delimiter byte, taglength, border, indel int) delimlen++ } - if obiutils.Abs(delimlen-border) > indel { + if (border - delimlen) > indel { return "" } + if delimlen > border { + i += delimlen - border + } + // log.Infof("delimlen: %d", delimlen) end := i + 1 @@ -146,10 +151,6 @@ func lookForRescueTag(seq string, delimiter byte, taglength, border, indel int) delimlen++ } - if obiutils.Abs(delimlen-border) > indel { - return "" - } - delimlen = min(delimlen, border) // log.Infof("delimlen: %d", delimlen) @@ -161,7 +162,6 @@ func lookForRescueTag(seq string, delimiter byte, taglength, border, indel int) } // log.Infof("begin: %d, end: %d", begin, end) - // log.Infof("seq: %s", seq) // log.Infof("seq[begin:end]: %s", seq[begin:end]) return seq[begin:end] diff --git a/pkg/obioptions/version.go b/pkg/obioptions/version.go index 3594970..46f03cd 100644 --- a/pkg/obioptions/version.go +++ b/pkg/obioptions/version.go @@ -7,7 +7,7 @@ import ( // TODO: The version number is extracted from git. This induces that the version // corresponds to the last commit, and not the one when the file will be // commited -var _Commit = "1cf2768" +var _Commit = "58bcc67" var _Version = "Release 4.2.0" // Version returns the version of the obitools package. diff --git a/pkg/obiseq/merge.go b/pkg/obiseq/merge.go index e32f6c0..cfabb37 100644 --- a/pkg/obiseq/merge.go +++ b/pkg/obiseq/merge.go @@ -2,6 +2,7 @@ package obiseq import ( "fmt" + "math" "reflect" "strings" @@ -120,8 +121,14 @@ func (sequence *BioSequence) StatsPlusOne(key string, toAdd *BioSequence, na str uint8, uint16, uint32, uint64, int8, int16, int32, int64, bool: sval = fmt.Sprint(value) + case float64: + if math.Floor(value) == value { + sval = fmt.Sprint(int(value)) + } else { + log.Fatalf("Trying to make stats on a float value (%v : %T)", value, value) + } default: - log.Fatalf("Trying to make stats on a none string, integer or boolean value (%v)", value) + log.Fatalf("Trying to make stats on a none string, integer or boolean value (%v : %T)", value, value) } retval = true } diff --git a/pkg/obitools/obiconsensus/obiconsensus.go b/pkg/obitools/obiconsensus/obiconsensus.go index 1b5b703..22e21d8 100644 --- a/pkg/obitools/obiconsensus/obiconsensus.go +++ b/pkg/obitools/obiconsensus/obiconsensus.go @@ -108,14 +108,12 @@ func BuildConsensus(seqs obiseq.BioSequenceSlice, sumCount += s.Count() } - log.Warnf("sumCount : %d", sumCount) - - seq.SetCount(sumCount) - seq.SetAttribute("seq_length", seq.Len()) - seq.SetAttribute("kmer_size", kmer_size) - seq.SetAttribute("kmer_max_occur", graph.MaxWeight()) - seq.SetAttribute("filtered_graph_size", graph.Len()) - seq.SetAttribute("full_graph_size", total_kmer) + seq.SetAttribute("obiconsensus_weight", sumCount) + seq.SetAttribute("obiconsensus_seq_length", seq.Len()) + seq.SetAttribute("obiconsensus_kmer_size", kmer_size) + seq.SetAttribute("obiconsensus_kmer_max_occur", graph.MaxWeight()) + seq.SetAttribute("obiconsensus_filtered_graph_size", graph.Len()) + seq.SetAttribute("obiconsensus_full_graph_size", total_kmer) } return seq, err } @@ -280,19 +278,23 @@ func MinionDenoise(graph *obigraph.Graph[*obiseq.BioSequence, Mutation], if err != nil { log.Warning(err) clean = (*graph.Vertices)[i] - clean.SetAttribute("obiminion_consensus", false) + clean.SetAttribute("obiconsensus_consensus", false) } else { - clean.SetAttribute("obiminion_consensus", true) + clean.SetAttribute("obiconsensus_consensus", true) } pack.Recycle(false) } else { clean = obiseq.NewBioSequence(v.Id(), v.Sequence(), v.Definition()) - clean.SetAttribute("obiminion_consensus", false) + clean.SetAttribute("obiconsensus_consensus", false) } - // clean.SetCount(int(graph.VertexWeight(i))) + clean.SetCount(int(graph.VertexWeight(i))) clean.SetAttribute(sample_key, graph.Name) + if !clean.HasAttribute("obiconsensus_weight") { + clean.SetAttribute("obiconsensus_weight", int(1)) + } + denoised[i] = clean } @@ -376,6 +378,7 @@ func CLIOBIMinion(itertator obiiter.IBioSequence) obiiter.IBioSequence { }() obiuniq.AddStatsOn(CLISampleAttribute()) + obiuniq.AddStatsOn("obiconsensus_weight") obiuniq.SetUniqueInMemory(false) obiuniq.SetNoSingleton(CLINoSingleton()) return obiuniq.CLIUnique(newIter).Pipe(obiiter.WorkerPipe(obiannotate.AddSeqLengthWorker(), false))