Refactor k-mer index management with subcommands and enhanced metadata support
This commit refactors the k-mer index management tools to use a unified subcommand structure with obik, adds support for per-set metadata and ID management, enhances the k-mer set group builder to support appending to existing groups, and improves command-line option handling with a new global options registration system.
Key changes:
- Introduce obik command with subcommands (index, ls, summary, cp, mv, rm, super, lowmask)
- Add support for per-set metadata and ID management in kmer set groups
- Implement ability to append to existing kmer index groups
- Refactor option parsing to use a global options registration system
- Add new commands for listing, copying, moving, and removing sets
- Enhance low-complexity masking with new options and output formats
- Improve kmer index summary with Jaccard distance matrix support
- Remove deprecated obikindex and obisuperkmer commands
- Update build process to use the new subcommand structure
2026-02-09 23:10:30 +01:00
|
|
|
package obik
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"context"
|
|
|
|
|
"fmt"
|
|
|
|
|
|
|
|
|
|
log "github.com/sirupsen/logrus"
|
|
|
|
|
|
|
|
|
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
|
|
|
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obikmer"
|
|
|
|
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
|
|
|
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
|
|
|
|
"github.com/DavidGamba/go-getoptions"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// runSuper implements the "obik super" subcommand.
|
|
|
|
|
// It extracts super k-mers from DNA sequences.
|
|
|
|
|
func runSuper(ctx context.Context, opt *getoptions.GetOpt, args []string) error {
|
2026-02-10 09:52:28 +01:00
|
|
|
k := CLIKmerSize()
|
|
|
|
|
m := CLIMinimizerSize()
|
Refactor k-mer index management with subcommands and enhanced metadata support
This commit refactors the k-mer index management tools to use a unified subcommand structure with obik, adds support for per-set metadata and ID management, enhances the k-mer set group builder to support appending to existing groups, and improves command-line option handling with a new global options registration system.
Key changes:
- Introduce obik command with subcommands (index, ls, summary, cp, mv, rm, super, lowmask)
- Add support for per-set metadata and ID management in kmer set groups
- Implement ability to append to existing kmer index groups
- Refactor option parsing to use a global options registration system
- Add new commands for listing, copying, moving, and removing sets
- Enhance low-complexity masking with new options and output formats
- Improve kmer index summary with Jaccard distance matrix support
- Remove deprecated obikindex and obisuperkmer commands
- Update build process to use the new subcommand structure
2026-02-09 23:10:30 +01:00
|
|
|
|
|
|
|
|
if k < 2 || k > 31 {
|
|
|
|
|
return fmt.Errorf("invalid k-mer size: %d (must be between 2 and 31)", k)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if m < 1 || m >= k {
|
|
|
|
|
return fmt.Errorf("invalid parameters: minimizer size (%d) must be between 1 and k-1 (%d)", m, k-1)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
log.Printf("Extracting super k-mers with k=%d, m=%d", k, m)
|
|
|
|
|
|
|
|
|
|
sequences, err := obiconvert.CLIReadBioSequences(args...)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return fmt.Errorf("failed to open sequence files: %w", err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
worker := obikmer.SuperKmerWorker(k, m)
|
|
|
|
|
|
|
|
|
|
superkmers := sequences.MakeIWorker(
|
|
|
|
|
worker,
|
|
|
|
|
false,
|
|
|
|
|
obidefault.ParallelWorkers(),
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
obiconvert.CLIWriteBioSequences(superkmers, true)
|
|
|
|
|
obiutils.WaitForLastPipe()
|
|
|
|
|
|
|
|
|
|
return nil
|
|
|
|
|
}
|