mirror of
https://github.com/metabarcoding/obitools4.git
synced 2026-03-28 06:40:53 +00:00
Add entropy-based filtering for k-mers
This commit introduces entropy-based filtering for k-mers to remove low-complexity sequences. It adds: - New KmerEntropy and KmerEntropyFilter functions in pkg/obikmer/entropy.go for computing and filtering k-mer entropy - Integration of entropy filtering in the k-mer set builder (pkg/obikmer/kmer_set_builder.go) - A new 'filter' command in obik tool (pkg/obitools/obik/filter.go) to apply entropy filtering on existing indices - CLI options for configuring entropy filtering during index building and filtering The entropy filter helps improve the quality of k-mer sets by removing repetitive sequences that may interfere with downstream analyses.
This commit is contained in:
@@ -33,6 +33,9 @@ func runIndex(ctx context.Context, opt *getoptions.GetOpt, args []string) error
|
||||
|
||||
maxOcc := CLIMaxOccurrence()
|
||||
|
||||
entropyThreshold := CLIIndexEntropyThreshold()
|
||||
entropySize := CLIIndexEntropySize()
|
||||
|
||||
// Build options
|
||||
var opts []obikmer.BuilderOption
|
||||
if minOcc > 1 {
|
||||
@@ -44,6 +47,9 @@ func runIndex(ctx context.Context, opt *getoptions.GetOpt, args []string) error
|
||||
if topN := CLISaveFreqKmer(); topN > 0 {
|
||||
opts = append(opts, obikmer.WithSaveFreqKmers(topN))
|
||||
}
|
||||
if entropyThreshold > 0 {
|
||||
opts = append(opts, obikmer.WithEntropyFilter(entropyThreshold, entropySize))
|
||||
}
|
||||
|
||||
// Determine whether to append to existing group or create new
|
||||
var builder *obikmer.KmerSetGroupBuilder
|
||||
@@ -115,6 +121,11 @@ func runIndex(ctx context.Context, opt *getoptions.GetOpt, args []string) error
|
||||
ksg.SetAttribute("max_occurrence", maxOcc)
|
||||
}
|
||||
|
||||
if entropyThreshold > 0 {
|
||||
ksg.SetAttribute("entropy_filter", entropyThreshold)
|
||||
ksg.SetAttribute("entropy_filter_size", entropySize)
|
||||
}
|
||||
|
||||
if err := ksg.SaveMetadata(); err != nil {
|
||||
return fmt.Errorf("failed to save metadata: %w", err)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user