mirror of
https://github.com/metabarcoding/obitools4.git
synced 2026-03-26 05:50:52 +00:00
Add max frequency filtering and top-kmer saving capabilities
This commit introduces max frequency filtering to limit k-mer occurrences and adds functionality to save the N most frequent k-mers per set to CSV files. It also includes the ability to output k-mer frequency spectra as CSV and updates the CLI options accordingly.
This commit is contained in:
@@ -31,11 +31,19 @@ func runIndex(ctx context.Context, opt *getoptions.GetOpt, args []string) error
|
||||
return fmt.Errorf("invalid min-occurrence: %d (must be >= 1)", minOcc)
|
||||
}
|
||||
|
||||
maxOcc := CLIMaxOccurrence()
|
||||
|
||||
// Build options
|
||||
var opts []obikmer.BuilderOption
|
||||
if minOcc > 1 {
|
||||
opts = append(opts, obikmer.WithMinFrequency(minOcc))
|
||||
}
|
||||
if maxOcc > 0 {
|
||||
opts = append(opts, obikmer.WithMaxFrequency(maxOcc))
|
||||
}
|
||||
if topN := CLISaveFreqKmer(); topN > 0 {
|
||||
opts = append(opts, obikmer.WithSaveFreqKmers(topN))
|
||||
}
|
||||
|
||||
// Determine whether to append to existing group or create new
|
||||
var builder *obikmer.KmerSetGroupBuilder
|
||||
@@ -50,7 +58,11 @@ func runIndex(ctx context.Context, opt *getoptions.GetOpt, args []string) error
|
||||
}
|
||||
} else {
|
||||
// New group
|
||||
log.Infof("Creating new kmer index: k=%d, m=%d, min-occurrence=%d", k, m, minOcc)
|
||||
if maxOcc > 0 {
|
||||
log.Infof("Creating new kmer index: k=%d, m=%d, occurrence=[%d,%d]", k, m, minOcc, maxOcc)
|
||||
} else {
|
||||
log.Infof("Creating new kmer index: k=%d, m=%d, min-occurrence=%d", k, m, minOcc)
|
||||
}
|
||||
builder, err = obikmer.NewKmerSetGroupBuilder(outDir, k, m, 1, -1, opts...)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create kmer index builder: %w", err)
|
||||
@@ -99,6 +111,9 @@ func runIndex(ctx context.Context, opt *getoptions.GetOpt, args []string) error
|
||||
if minOcc > 1 {
|
||||
ksg.SetAttribute("min_occurrence", minOcc)
|
||||
}
|
||||
if maxOcc > 0 {
|
||||
ksg.SetAttribute("max_occurrence", maxOcc)
|
||||
}
|
||||
|
||||
if err := ksg.SaveMetadata(); err != nil {
|
||||
return fmt.Errorf("failed to save metadata: %w", err)
|
||||
|
||||
Reference in New Issue
Block a user