mirror of
https://github.com/metabarcoding/obitools4.git
synced 2026-03-25 13:30:52 +00:00
Add entropy-based filtering for k-mers
This commit introduces entropy-based filtering for k-mers to remove low-complexity sequences. It adds: - New KmerEntropy and KmerEntropyFilter functions in pkg/obikmer/entropy.go for computing and filtering k-mer entropy - Integration of entropy filtering in the k-mer set builder (pkg/obikmer/kmer_set_builder.go) - A new 'filter' command in obik tool (pkg/obitools/obik/filter.go) to apply entropy filtering on existing indices - CLI options for configuring entropy filtering during index building and filtering The entropy filter helps improve the quality of k-mer sets by removing repetitive sequences that may interfere with downstream analyses.
This commit is contained in:
@@ -128,6 +128,27 @@ func OpenKmerSetGroup(directory string) (*KmerSetGroup, error) {
|
||||
return ksg, nil
|
||||
}
|
||||
|
||||
// NewFilteredKmerSetGroup creates a KmerSetGroup from pre-computed data.
|
||||
// Used by the filter command to construct a new group after filtering partitions.
|
||||
func NewFilteredKmerSetGroup(
|
||||
directory string, k, m, partitions, n int,
|
||||
setsIDs []string, counts []uint64,
|
||||
setsMetadata []map[string]interface{},
|
||||
) (*KmerSetGroup, error) {
|
||||
ksg := &KmerSetGroup{
|
||||
path: directory,
|
||||
k: k,
|
||||
m: m,
|
||||
partitions: partitions,
|
||||
n: n,
|
||||
setsIDs: setsIDs,
|
||||
counts: counts,
|
||||
setsMetadata: setsMetadata,
|
||||
Metadata: make(map[string]interface{}),
|
||||
}
|
||||
return ksg, nil
|
||||
}
|
||||
|
||||
// SaveMetadata writes the metadata.toml file. This is useful after
|
||||
// modifying attributes or IDs on an already-finalized index.
|
||||
func (ksg *KmerSetGroup) SaveMetadata() error {
|
||||
|
||||
Reference in New Issue
Block a user