mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-12-07 16:20:27 +00:00
Merge pull request #64 from metabarcoding/push-yurwulsmpxkq
End of obilowmask
This commit is contained in:
@@ -8,7 +8,7 @@ import (
|
||||
// corresponds to the last commit, and not the one when the file will be
|
||||
// commited
|
||||
|
||||
var _Commit = "961abce"
|
||||
var _Commit = "c1b9503"
|
||||
var _Version = "Release 4.4.0"
|
||||
|
||||
// Version returns the version of the obitools package.
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package obilowmask
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||
@@ -17,6 +18,7 @@ type MaskingMode int
|
||||
const (
|
||||
Mask MaskingMode = iota // Mask mode: replace low-complexity regions with masked characters
|
||||
Split // Split mode: split sequence into high-complexity fragments
|
||||
Extract
|
||||
)
|
||||
|
||||
// LowMaskWorker creates a worker to mask low-complexity regions in DNA sequences.
|
||||
@@ -342,6 +344,76 @@ func LowMaskWorker(kmer_size int, level_max int, threshold float64, mode Masking
|
||||
return obiseq.BioSequenceSlice{seqCopy}, nil
|
||||
}
|
||||
|
||||
selectMasked := func(sequence *obiseq.BioSequence, maskPosition []bool) (obiseq.BioSequenceSlice, error) {
|
||||
rep := obiseq.NewBioSequenceSlice()
|
||||
|
||||
inlow := false
|
||||
fromlow := -1
|
||||
for i, masked := range maskPosition {
|
||||
if masked && !inlow {
|
||||
fromlow = i
|
||||
inlow = true
|
||||
}
|
||||
if inlow && !masked {
|
||||
if fromlow >= 0 {
|
||||
frg, err := sequence.Subsequence(fromlow, i, false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rep.Push(frg)
|
||||
}
|
||||
inlow = false
|
||||
fromlow = -1
|
||||
}
|
||||
}
|
||||
|
||||
// Handle the case where we end in a masked region
|
||||
if inlow && fromlow >= 0 {
|
||||
frg, err := sequence.Subsequence(fromlow, len(maskPosition), false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rep.Push(frg)
|
||||
}
|
||||
|
||||
return *rep, nil
|
||||
}
|
||||
|
||||
selectunmasked := func(sequence *obiseq.BioSequence, maskPosition []bool) (obiseq.BioSequenceSlice, error) {
|
||||
rep := obiseq.NewBioSequenceSlice()
|
||||
|
||||
inhigh := false
|
||||
fromhigh := -1
|
||||
for i, masked := range maskPosition {
|
||||
if !masked && !inhigh {
|
||||
fromhigh = i
|
||||
inhigh = true
|
||||
}
|
||||
if inhigh && masked {
|
||||
if fromhigh >= 0 {
|
||||
frg, err := sequence.Subsequence(fromhigh, i, false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rep.Push(frg)
|
||||
}
|
||||
inhigh = false
|
||||
fromhigh = -1
|
||||
}
|
||||
}
|
||||
|
||||
// Handle the case where we end in an unmasked region
|
||||
if inhigh && fromhigh >= 0 {
|
||||
frg, err := sequence.Subsequence(fromhigh, len(maskPosition), false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rep.Push(frg)
|
||||
}
|
||||
|
||||
return *rep, nil
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// FUNCTION 7: masking - Main masking function
|
||||
// ========================================================================
|
||||
@@ -425,7 +497,15 @@ func LowMaskWorker(kmer_size int, level_max int, threshold float64, mode Masking
|
||||
sequence.SetAttribute("mask", mask)
|
||||
sequence.SetAttribute("Entropies", entropies)
|
||||
|
||||
return applyMaskMode(sequence, remove, maskChar)
|
||||
switch mode {
|
||||
case Mask:
|
||||
return applyMaskMode(sequence, remove, maskChar)
|
||||
case Split:
|
||||
return selectunmasked(sequence, remove)
|
||||
case Extract:
|
||||
return selectMasked(sequence, remove)
|
||||
}
|
||||
return nil, fmt.Errorf("Unknown mode %d", mode)
|
||||
}
|
||||
|
||||
return masking
|
||||
|
||||
@@ -13,6 +13,7 @@ var __kmer_size__ = 31
|
||||
var __level_max__ = 6
|
||||
var __threshold__ = 0.5
|
||||
var __split_mode__ = false
|
||||
var __low_mode__ = false
|
||||
var __mask__ = "."
|
||||
|
||||
func LowMaskOptionSet(options *getoptions.GetOpt) {
|
||||
@@ -29,11 +30,15 @@ func LowMaskOptionSet(options *getoptions.GetOpt) {
|
||||
options.Description("entropy theshold used to mask a kmer"),
|
||||
)
|
||||
|
||||
options.BoolVar(&__split_mode__, "--split-mode", __split_mode__,
|
||||
options.BoolVar(&__split_mode__, "split-mode", __split_mode__,
|
||||
options.Description("in split mode, input sequences are splitted to remove masked regions"),
|
||||
)
|
||||
|
||||
options.StringVar(&__mask__, "--masking-char", __mask__,
|
||||
options.BoolVar(&__low_mode__, "low-mode", __low_mode__,
|
||||
options.Description("in split mode, input sequences are splitted to remove masked regions"),
|
||||
)
|
||||
|
||||
options.StringVar(&__mask__, "masking-char", __mask__,
|
||||
options.Description("Character used to mask low complexity region"),
|
||||
)
|
||||
}
|
||||
@@ -56,9 +61,12 @@ func CLIThreshold() float64 {
|
||||
}
|
||||
|
||||
func CLIMaskingMode() MaskingMode {
|
||||
if __split_mode__ {
|
||||
switch {
|
||||
case __low_mode__:
|
||||
return Extract
|
||||
case __split_mode__:
|
||||
return Split
|
||||
} else {
|
||||
default:
|
||||
return Mask
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user