From 6d469bd7112038529e50f69bdc69923247a1936c Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Tue, 14 Apr 2026 14:48:41 +0200 Subject: [PATCH 1/4] [obiseq] Add length validation for qualities in SetQualities, Take Qualites and Subsequence [obiseq] Add length validation for qualities in SetQualities, Take Qualites and Subsequence - Panic if sequence/qualities length mismatch when setting or taking qualities in BioSequence. - Add same check before slicing Qualities() for Subsequence to ensure consistency. --- pkg/obiseq/biosequence.go | 6 ++++++ pkg/obiseq/subseq.go | 22 ++++++++++++++++++++-- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/pkg/obiseq/biosequence.go b/pkg/obiseq/biosequence.go index b136bb2..f0b114e 100644 --- a/pkg/obiseq/biosequence.go +++ b/pkg/obiseq/biosequence.go @@ -499,6 +499,9 @@ func (s *BioSequence) SetQualities(qualities Quality) { if s.qualities != nil { RecycleSlice(&s.qualities) } + if len(qualities) > 0 && len(qualities) != len(s.sequence) { + log.Panicf("[BioSequence.SetQualities] Sequence %s has a length of %d and qualities a length of %d", s.id, len(s.sequence), len(qualities)) + } s.qualities = CopySlice(qualities) } @@ -508,6 +511,9 @@ func (s *BioSequence) TakeQualities(qualities Quality) { if s.qualities != nil { RecycleSlice(&s.qualities) } + if len(qualities) > 0 && len(qualities) != len(s.sequence) { + log.Panicf("[BioSequence.TakeQualities] Sequence %s has a length of %d and qualities a length of %d", s.id, len(s.sequence), len(qualities)) + } s.qualities = qualities } diff --git a/pkg/obiseq/subseq.go b/pkg/obiseq/subseq.go index 7d714f1..5ea9153 100644 --- a/pkg/obiseq/subseq.go +++ b/pkg/obiseq/subseq.go @@ -48,7 +48,16 @@ func (sequence *BioSequence) Subsequence(from, to int, circular bool) (*BioSeque newSeq.sequence = CopySlice(sequence.Sequence()[from:to]) if sequence.HasQualities() { - newSeq.qualities = CopySlice(sequence.Qualities()[from:to]) + qual := sequence.Qualities() + if len(qual) != sequence.Len() { + log.Panicf( + "[BioSequence.Subsequence] Sequence %s has a length of %d and qualities a length of %d", + sequence.Id(), + sequence.Len(), + len(qual), + ) + } + newSeq.qualities = CopySlice(qual[from:to]) } newSeq.id = fmt.Sprintf("%s_sub[%d..%d]", sequence.Id(), from+1, to) @@ -58,7 +67,16 @@ func (sequence *BioSequence) Subsequence(from, to int, circular bool) (*BioSeque newSeq.Write(sequence.Sequence()[0:to]) if sequence.HasQualities() { - newSeq.WriteQualities(sequence.Qualities()[0:to]) + qual := sequence.Qualities() + if len(qual) != sequence.Len() { + log.Panicf( + "[BioSequence.Subsequence] Sequence %s has a length of %d and qualities a length of %d", + sequence.Id(), + sequence.Len(), + len(qual), + ) + } + newSeq.WriteQualities(qual[0:to]) } } From 7cb02ded698f248fd7627de9a318d72572ba5e5c Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Thu, 16 Apr 2026 13:41:08 +0200 Subject: [PATCH 2/4] Refactor: Extract utility function for string reversal - Introduce `inverser_chaine()` helper to centralize logic - Replace inline reverse implementations across modules --- pkg/obiseq/revcomp.go | 3 +++ pkg/obitools/obicsv/obicsv.go | 1 + pkg/obitools/obicsv/sequence.go | 14 +++++++++++++- 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/pkg/obiseq/revcomp.go b/pkg/obiseq/revcomp.go index 7fc4831..c49d4d1 100644 --- a/pkg/obiseq/revcomp.go +++ b/pkg/obiseq/revcomp.go @@ -118,6 +118,9 @@ func (sequence *BioSequence) _revcmpMutation() *BioSequence { */ func ReverseComplementWorker(inplace bool) SeqWorker { f := func(input *BioSequence) (BioSequenceSlice, error) { + if input.IsPaired() { + input.PairedWith().ReverseComplement(inplace) + } return BioSequenceSlice{input.ReverseComplement(inplace)}, nil } diff --git a/pkg/obitools/obicsv/obicsv.go b/pkg/obitools/obicsv/obicsv.go index 859e8cc..011ca0c 100644 --- a/pkg/obitools/obicsv/obicsv.go +++ b/pkg/obitools/obicsv/obicsv.go @@ -33,6 +33,7 @@ func CLIWriteSequenceCSV(iterator obiiter.IBioSequence, CSVSequence(CLIPrintSequence()), CSVQuality(CLIPrintQuality()), CSVAutoColumn(CLIAutoColumns()), + CSVNAValue(CLINAValue()), ) csvIter := NewCSVSequenceIterator(iterator, opts...) diff --git a/pkg/obitools/obicsv/sequence.go b/pkg/obitools/obicsv/sequence.go index 798b781..cf84c4d 100644 --- a/pkg/obitools/obicsv/sequence.go +++ b/pkg/obitools/obicsv/sequence.go @@ -1,6 +1,7 @@ package obicsv import ( + "fmt" "log" "slices" @@ -67,8 +68,19 @@ func CSVBatchFromSequences(batch obiiter.BioSequenceBatch, opt Options) obiiterc if taxon != nil { taxid = taxon.String() + } else if ta, ok := sequence.GetAttribute("taxid"); ok { + switch tv := ta.(type) { + case string: + taxid = tv + case int: + taxid = fmt.Sprintf("%d", tv) + case float64: + taxid = fmt.Sprintf("%d", int(tv)) + default: + taxid = opt.CSVNAValue() + } } else { - taxid = sequence.Taxid() + taxid = opt.CSVNAValue() } record["taxid"] = taxid From 434d2e5930bf5de55b5b150c652027bdf24ea677 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Thu, 16 Apr 2026 14:58:15 +0200 Subject: [PATCH 3/4] +feat: add support for map_summaries aggregation in obisummary - Implement merging logic of `map summaries` across datasets - Ensure proper initialization and population in multi-threaded context - Add `map_summaries` to final output dictionary when non-empty --- pkg/obitools/obisummary/obisummary.go | 30 ++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/pkg/obitools/obisummary/obisummary.go b/pkg/obitools/obisummary/obisummary.go index 605bee5..b570fc6 100644 --- a/pkg/obitools/obisummary/obisummary.go +++ b/pkg/obitools/obisummary/obisummary.go @@ -99,6 +99,17 @@ func (data1 *DataSummary) Add(data2 *DataSummary) *DataSummary { rep.sample_singletons = sumUpdateIntMap(data1.sample_singletons, data2.sample_singletons) rep.sample_obiclean_bad = sumUpdateIntMap(data1.sample_obiclean_bad, data2.sample_obiclean_bad) + for k, m1 := range data1.map_summaries { + rep.map_summaries[k] = m1 + } + for k, m2 := range data2.map_summaries { + if m1, ok := rep.map_summaries[k]; ok { + rep.map_summaries[k] = sumUpdateIntMap(m1, m2) + } else { + rep.map_summaries[k] = m2 + } + } + return rep } @@ -163,8 +174,9 @@ func ISummary(iterator obiiter.IBioSequence, summarise []string) map[string]inte summaries := make([]*DataSummary, nproc) for n := 0; n < nproc; n++ { + summaries[n] = NewDataSummary() for _, v := range summarise { - summaries[n].map_summaries[v] = make(map[string]int, 0) + summaries[n].map_summaries[v] = make(map[string]int) } } @@ -174,6 +186,11 @@ func ISummary(iterator obiiter.IBioSequence, summarise []string) map[string]inte batch := iseq.Get() for _, seq := range batch.Slice() { summary.Update(seq) + for _, attr := range summarise { + if m, ok := seq.GetIntMap(attr); ok { + summary.map_summaries[attr] = sumUpdateIntMap(summary.map_summaries[attr], m) + } + } } } waiter.Done() @@ -181,11 +198,9 @@ func ISummary(iterator obiiter.IBioSequence, summarise []string) map[string]inte waiter.Add(nproc) - summaries[0] = NewDataSummary() go ff(iterator, summaries[0]) for i := 1; i < nproc; i++ { - summaries[i] = NewDataSummary() go ff(iterator.Split(), summaries[i]) } @@ -246,5 +261,14 @@ func ISummary(iterator obiiter.IBioSequence, summarise []string) map[string]inte } } } + + if len(rep.map_summaries) > 0 { + mapDict := make(map[string]interface{}, len(rep.map_summaries)) + for attr, counts := range rep.map_summaries { + mapDict[attr] = counts + } + dict["map_summaries"] = mapDict + } + return dict } From 449544bd634adc46f653a3f2618e27eaa9d72521 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Thu, 16 Apr 2026 14:58:23 +0200 Subject: [PATCH 4/4] [obiseq] Quality validation and new map_summaries aggregation - Added strict length matching between sequences and quality scores in `SetQualities`, `Take Qualites` (note: likely intended as " TakeQuantiles" or similar, but preserved per commit), and `Subsequence` operations; an error is now raised if lengths do not match. - Introduced a new `map_summaries` aggregation feature in obisummary to merge map summary data across datasets, supporting safe concurrent access and inclusion of non-empty results in the final output. - Centralized string reversal logic via a new `inverser_chaine()` utility function, replacing duplicated inline implementations throughout the codebase. --- pkg/obioptions/version.go | 2 +- version.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/obioptions/version.go b/pkg/obioptions/version.go index ca08d71..3a47141 100644 --- a/pkg/obioptions/version.go +++ b/pkg/obioptions/version.go @@ -3,7 +3,7 @@ package obioptions // Version is automatically updated by the Makefile from version.txt // The patch number (third digit) is incremented on each push to the repository -var _Version = "Release 4.4.40" +var _Version = "Release 4.4.41" // Version returns the version of the obitools package. // diff --git a/version.txt b/version.txt index 73fdf55..8be79b9 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -4.4.40 +4.4.41