Small bug in sequence counting on merge

This commit is contained in:
2022-05-30 16:28:59 +02:00
parent f14860a486
commit cf5b4baa54
5 changed files with 21 additions and 4 deletions

View File

@ -11,7 +11,7 @@ import (
) )
// //
// Interface for sorting a list of sequences accoording to // Interface for sorting a list of sequences according to
// their classes // their classes
// //

View File

@ -9,6 +9,9 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
) )
// Runs dereplication algorithm on a obiiter.IBioSequenceBatch
// iterator.
func IUniqueSequence(iterator obiiter.IBioSequenceBatch, func IUniqueSequence(iterator obiiter.IBioSequenceBatch,
options ...WithOption) (obiiter.IBioSequenceBatch, error) { options ...WithOption) (obiiter.IBioSequenceBatch, error) {
@ -62,7 +65,9 @@ func IUniqueSequence(iterator obiiter.IBioSequenceBatch,
return neworder return neworder
} }
var ff func(obiiter.IBioSequenceBatch, *obiseq.BioSequenceClassifier, int) var ff func(obiiter.IBioSequenceBatch,
*obiseq.BioSequenceClassifier,
int)
cat := opts.Categories() cat := opts.Categories()
na := opts.NAValue() na := opts.NAValue()

View File

@ -37,6 +37,7 @@ func (sequence *BioSequence) StatsOn(key string, na string) StatsOnValues {
newstat = false newstat = false
case map[string]interface{}: case map[string]interface{}:
stats = make(StatsOnValues, len(istat)) stats = make(StatsOnValues, len(istat))
newstat = false
var err error var err error
for k, v := range istat { for k, v := range istat {
stats[k], err = goutils.InterfaceToInt(v) stats[k], err = goutils.InterfaceToInt(v)
@ -161,7 +162,7 @@ func (sequences BioSequenceSlice) Merge(na string, statsOn []string) *BioSequenc
seq.SetQualities(nil) seq.SetQualities(nil)
if len(sequences) == 1 { if len(sequences) == 1 {
seq.Annotations()["count"] = 1 seq.Annotations()["count"] = seq.Count()
for _, v := range statsOn { for _, v := range statsOn {
seq.StatsOn(v, na) seq.StatsOn(v, na)
} }

View File

@ -41,7 +41,7 @@ func UniqueOptionSet(options *getoptions.GetOpt) {
} }
// OptionSet adds to the basic option set every options declared for // OptionSet adds to the basic option set every options declared for
// the obipcr command // the obiuniq command
func OptionSet(options *getoptions.GetOpt) { func OptionSet(options *getoptions.GetOpt) {
obiconvert.OptionSet(options) obiconvert.OptionSet(options)
UniqueOptionSet(options) UniqueOptionSet(options)

View File

@ -16,6 +16,11 @@ func Unique(sequences obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch {
obichunk.OptionBatchCount(CLINumberOfChunks()), obichunk.OptionBatchCount(CLINumberOfChunks()),
) )
//
// Considers if data splitting must be done on disk or in memory
//
// --on-disk command line option
if CLIUniqueInMemory() { if CLIUniqueInMemory() {
log.Printf("Running dereplication in memory on %d chunks", CLINumberOfChunks()) log.Printf("Running dereplication in memory on %d chunks", CLINumberOfChunks())
options = append(options, obichunk.OptionSortOnMemory()) options = append(options, obichunk.OptionSortOnMemory())
@ -24,6 +29,12 @@ func Unique(sequences obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch {
options = append(options, obichunk.OptionSortOnDisk()) options = append(options, obichunk.OptionSortOnDisk())
} }
//
// Considers if sequences observed a singletime in the dataset have to
// be conserved in the output
//
// --no-singleton
if CLINoSingleton() { if CLINoSingleton() {
log.Printf("Removing sigletons from the output") log.Printf("Removing sigletons from the output")
options = append(options, obichunk.OptionsNoSingleton()) options = append(options, obichunk.OptionsNoSingleton())