mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
add the --min-sample-count option to obiclean.
This commit is contained in:
@ -21,15 +21,15 @@ func encodeValues(score, length int, out bool) uint64 {
|
||||
return fo
|
||||
}
|
||||
|
||||
func _isout(value uint64) bool {
|
||||
const outmask = uint64(1) << dwsize
|
||||
return (value & outmask) == 0
|
||||
}
|
||||
// func _isout(value uint64) bool {
|
||||
// const outmask = uint64(1) << dwsize
|
||||
// return (value & outmask) == 0
|
||||
// }
|
||||
|
||||
func _lpath(value uint64) int {
|
||||
const mask = uint64(1<<wsize) - 1
|
||||
return int(((value + 1) ^ mask) & mask)
|
||||
}
|
||||
// func _lpath(value uint64) int {
|
||||
// const mask = uint64(1<<wsize) - 1
|
||||
// return int(((value + 1) ^ mask) & mask)
|
||||
// }
|
||||
|
||||
func decodeValues(value uint64) (int, int, bool) {
|
||||
const mask = uint64(1<<wsize) - 1
|
||||
@ -57,4 +57,3 @@ func _setout(value uint64) uint64 {
|
||||
var _empty = encodeValues(0, 0, false)
|
||||
var _out = encodeValues(0, 30000, true)
|
||||
var _notavail = encodeValues(0, 30000, false)
|
||||
|
||||
|
@ -8,7 +8,7 @@ import (
|
||||
// corresponds to the last commit, and not the one when the file will be
|
||||
// commited
|
||||
|
||||
var _Commit = "fb6f857"
|
||||
var _Commit = "51d11aa"
|
||||
var _Version = "Release 4.2.0"
|
||||
|
||||
// Version returns the version of the obitools package.
|
||||
|
@ -196,6 +196,16 @@ func IsShorterOrEqualTo(length int) SequencePredicate {
|
||||
return f
|
||||
}
|
||||
|
||||
func OccurInAtleast(sample string, n int) SequencePredicate {
|
||||
desc := MakeStatsOnDescription(sample)
|
||||
f := func(sequence *BioSequence) bool {
|
||||
stats := sequence.StatsOn(desc, "NA")
|
||||
return len(stats) >= n
|
||||
}
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func IsSequenceMatch(pattern string) SequencePredicate {
|
||||
pat, err := regexp.Compile("(?i)" + pattern)
|
||||
|
||||
|
@ -368,7 +368,12 @@ func CLIOBIClean(itertator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
iter := annotateOBIClean(source, db, samples, SampleAttribute(), "NA")
|
||||
|
||||
if OnlyHead() {
|
||||
iter = iter.FilterOn(IsHead, 1000)
|
||||
iter = iter.FilterOn(IsHead, obidefault.BatchSize())
|
||||
}
|
||||
|
||||
if MinSampleCount() > 1 {
|
||||
sc := obiseq.OccurInAtleast(SampleAttribute(), MinSampleCount())
|
||||
iter = iter.FilterOn(sc, obidefault.BatchSize())
|
||||
}
|
||||
|
||||
return iter
|
||||
|
@ -16,6 +16,7 @@ var _onlyHead = false
|
||||
|
||||
var _saveGraph = "__@@NOSAVE@@__"
|
||||
var _saveRatio = "__@@NOSAVE@@__"
|
||||
var _minSample = 1
|
||||
|
||||
func ObicleanOptionSet(options *getoptions.GetOpt) {
|
||||
options.StringVar(&_sampleAttribute, "sample", _sampleAttribute,
|
||||
@ -55,6 +56,9 @@ func ObicleanOptionSet(options *getoptions.GetOpt) {
|
||||
"The ratio file follows the csv format."),
|
||||
)
|
||||
|
||||
options.IntVar(&_minSample, "min-sample-count", _minSample,
|
||||
options.Description("Minimum number of samples a sequence must be present in to be considered in the analysis."),
|
||||
)
|
||||
}
|
||||
|
||||
func OptionSet(options *getoptions.GetOpt) {
|
||||
@ -111,3 +115,8 @@ func IsSaveRatioTable() bool {
|
||||
func RatioTableFilename() string {
|
||||
return _saveRatio
|
||||
}
|
||||
|
||||
// It returns the minimum number of samples a sequence must be present in to be considered in the analysis
|
||||
func MinSampleCount() int {
|
||||
return _minSample
|
||||
}
|
||||
|
@ -238,7 +238,7 @@ func IAssemblePESequencesBatch(iterator obiiter.IBioSequence,
|
||||
log.Printf("End of the sequence Pairing")
|
||||
}()
|
||||
|
||||
f := func(iterator obiiter.IBioSequence, wid int) {
|
||||
f := func(iterator obiiter.IBioSequence) {
|
||||
arena := obialign.MakePEAlignArena(150, 150)
|
||||
shifts := make(map[int]int)
|
||||
|
||||
@ -263,9 +263,9 @@ func IAssemblePESequencesBatch(iterator obiiter.IBioSequence,
|
||||
log.Printf("Start of the sequence Pairing using %d workers\n", nworkers)
|
||||
|
||||
for i := 0; i < nworkers-1; i++ {
|
||||
go f(iterator.Split(), i)
|
||||
go f(iterator.Split())
|
||||
}
|
||||
go f(iterator, nworkers-1)
|
||||
go f(iterator)
|
||||
return newIter
|
||||
|
||||
}
|
||||
|
Reference in New Issue
Block a user