From b2d16721f0d6ccd27633fc260da80f41cc1acfeb Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Sun, 8 Feb 2026 15:52:18 +0100 Subject: [PATCH] Fix classifier cloning and reset in chunk processing This commit fixes an issue in the chunk processing logic where the wrong classifier instance was being reset and used for code generation. A local clone of the classifier is now created and used to ensure correct behavior during dereplication. --- pkg/obichunk/chunk_on_disk.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pkg/obichunk/chunk_on_disk.go b/pkg/obichunk/chunk_on_disk.go index 2877beb..06bbc7a 100644 --- a/pkg/obichunk/chunk_on_disk.go +++ b/pkg/obichunk/chunk_on_disk.go @@ -110,6 +110,7 @@ func ISequenceChunkOnDisk(iterator obiiter.IBioSequence, log.Infof("Data splitted over %d batches", nbatch) go func() { + localClassifier := uniqueClassifier.Clone() for order, file := range fileNames { iseq, err := obiformats.ReadSequencesFromFile(file) @@ -121,7 +122,7 @@ func ISequenceChunkOnDisk(iterator obiiter.IBioSequence, if dereplicate { u := make(map[string]*obiseq.BioSequence) var source string - uniqueClassifier.Reset() + localClassifier.Reset() for iseq.Next() { batch := iseq.Get() @@ -129,8 +130,8 @@ func ISequenceChunkOnDisk(iterator obiiter.IBioSequence, for _, seq := range batch.Slice() { // Use composite key: sequence + categories - code := uniqueClassifier.Code(seq) - key := uniqueClassifier.Value(code) + code := localClassifier.Code(seq) + key := localClassifier.Value(code) prev, ok := u[key] if ok { prev.Merge(seq, na, true, statsOn)