mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-12-12 18:30:25 +00:00
Cleaning of obiminion
Former-commit-id: 75148afd70e5006cc6855bcddc86506b099761a1
This commit is contained in:
@@ -17,9 +17,7 @@ import (
|
||||
|
||||
func BuildConsensus(seqs obiseq.BioSequenceSlice,
|
||||
consensus_id string,
|
||||
kmer_size int, quorum float64,
|
||||
min_depth float64,
|
||||
max_length int,
|
||||
kmer_size int,
|
||||
save_graph bool, dirname string) (*obiseq.BioSequence, error) {
|
||||
|
||||
if save_graph {
|
||||
@@ -37,7 +35,7 @@ func BuildConsensus(seqs obiseq.BioSequenceSlice,
|
||||
}
|
||||
}
|
||||
|
||||
fasta, err := os.Create(path.Join(dirname, fmt.Sprintf("%s.fasta", consensus_id)))
|
||||
fasta, err := os.Create(path.Join(dirname, fmt.Sprintf("%s_consensus.fasta", consensus_id)))
|
||||
|
||||
if err == nil {
|
||||
defer fasta.Close()
|
||||
@@ -58,16 +56,7 @@ func BuildConsensus(seqs obiseq.BioSequenceSlice,
|
||||
longest[i] = slices.Max(sa.CommonSuffix())
|
||||
}
|
||||
|
||||
// o := obiutils.Order(sort.IntSlice(longest))
|
||||
// i := int(float64(len(seqs)) * quorum)
|
||||
|
||||
// if i >= len(o) {
|
||||
// i = len(o) - 1
|
||||
// }
|
||||
|
||||
kmer_size = slices.Max(longest) + 1
|
||||
|
||||
// kmer_size = longest[o[i]] + 1
|
||||
log.Printf("estimated kmer size : %d", kmer_size)
|
||||
}
|
||||
|
||||
@@ -90,7 +79,7 @@ func BuildConsensus(seqs obiseq.BioSequenceSlice,
|
||||
if save_graph {
|
||||
|
||||
file, err := os.Create(path.Join(dirname,
|
||||
fmt.Sprintf("%s_raw_consensus.gml", consensus_id)))
|
||||
fmt.Sprintf("%s_consensus.gml", consensus_id)))
|
||||
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
@@ -103,65 +92,7 @@ func BuildConsensus(seqs obiseq.BioSequenceSlice,
|
||||
log.Printf("Graph size : %d\n", graph.Len())
|
||||
total_kmer := graph.Len()
|
||||
|
||||
// threshold := 0
|
||||
|
||||
// switch {
|
||||
// case min_depth < 0:
|
||||
// spectrum := graph.WeightSpectrum()
|
||||
// cum := make(map[int]int)
|
||||
|
||||
// spectrum[1] = 0
|
||||
// for i := 2; i < len(spectrum); i++ {
|
||||
// spectrum[i] += spectrum[i-1]
|
||||
// cum[spectrum[i]]++
|
||||
// }
|
||||
|
||||
// max := 0
|
||||
// kmax := 0
|
||||
// for k, obs := range cum {
|
||||
// if obs > max {
|
||||
// max = obs
|
||||
// kmax = k
|
||||
// }
|
||||
// }
|
||||
|
||||
// for i, total := range spectrum {
|
||||
// if total == kmax {
|
||||
// threshold = i
|
||||
// break
|
||||
// }
|
||||
// }
|
||||
// threshold /= 2
|
||||
|
||||
// if threshold < 1 {
|
||||
// threshold = 1
|
||||
// }
|
||||
|
||||
// log.Info("Estimated kmer_min_occur = ", threshold)
|
||||
// case min_depth >= 1:
|
||||
// threshold = int(min_depth)
|
||||
// default:
|
||||
// threshold = int(float64(len(seqs)) * min_depth)
|
||||
// }
|
||||
|
||||
// graph.FilterMinWeight(threshold)
|
||||
|
||||
// log.Printf("Graph size : %d\n", graph.Len())
|
||||
|
||||
// if save_graph {
|
||||
|
||||
// file, err := os.Create(path.Join(dirname,
|
||||
// fmt.Sprintf("%s_consensus.gml", consensus_id)))
|
||||
|
||||
// if err != nil {
|
||||
// fmt.Println(err)
|
||||
// } else {
|
||||
// file.WriteString(graph.Gml())
|
||||
// file.Close()
|
||||
// }
|
||||
// }
|
||||
|
||||
seq, err := graph.LongestConsensus(consensus_id, max_length)
|
||||
seq, err := graph.LongestConsensus(consensus_id)
|
||||
|
||||
sumCount := 0
|
||||
|
||||
@@ -173,7 +104,6 @@ func BuildConsensus(seqs obiseq.BioSequenceSlice,
|
||||
seq.SetCount(sumCount)
|
||||
seq.SetAttribute("seq_length", seq.Len())
|
||||
seq.SetAttribute("kmer_size", kmer_size)
|
||||
//seq.SetAttribute("kmer_min_occur", threshold)
|
||||
seq.SetAttribute("kmer_max_occur", graph.MaxWeight())
|
||||
seq.SetAttribute("filtered_graph_size", graph.Len())
|
||||
seq.SetAttribute("full_graph_size", total_kmer)
|
||||
@@ -181,52 +111,6 @@ func BuildConsensus(seqs obiseq.BioSequenceSlice,
|
||||
return seq, err
|
||||
}
|
||||
|
||||
// func BuildConsensusWithTimeout(seqs obiseq.BioSequenceSlice,
|
||||
// kmer_size int, quorum float64,
|
||||
// min_depth float64,
|
||||
// save_graph bool, dirname string, timeout time.Duration) (*obiseq.BioSequence, error) {
|
||||
|
||||
// ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
||||
// defer cancel()
|
||||
|
||||
// consensus := func() *obiseq.BioSequence {
|
||||
// cons, err := BuildConsensus(seqs, kmer_size, quorum, min_depth, save_graph, dirname,)
|
||||
// if err != nil {
|
||||
// cons = nil
|
||||
// }
|
||||
|
||||
// return cons
|
||||
// }
|
||||
|
||||
// computation := func() <-chan *obiseq.BioSequence {
|
||||
// result := make(chan *obiseq.BioSequence)
|
||||
|
||||
// go func() {
|
||||
// select {
|
||||
// case <-ctx.Done():
|
||||
// result <- nil
|
||||
// default:
|
||||
// result <- consensus()
|
||||
|
||||
// }
|
||||
// }()
|
||||
|
||||
// return result
|
||||
// }
|
||||
|
||||
// calcResult := computation()
|
||||
|
||||
// select {
|
||||
// case result := <-calcResult:
|
||||
// if result == nil {
|
||||
// return nil, fmt.Errorf("cannot compute consensus")
|
||||
// }
|
||||
// return result, nil
|
||||
// case <-ctx.Done():
|
||||
// return nil, fmt.Errorf("compute consensus timeout, exiting")
|
||||
// }
|
||||
// }
|
||||
|
||||
func Consensus(iterator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
newIter := obiiter.MakeIBioSequence()
|
||||
size := 10
|
||||
@@ -266,9 +150,7 @@ func Consensus(iterator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
}
|
||||
consensus, err := BuildConsensus(sequences,
|
||||
id,
|
||||
CLIKmerSize(), CLIThreshold(),
|
||||
CLIKmerDepth(),
|
||||
CLIMaxConsensusLength(),
|
||||
CLIKmerSize(),
|
||||
CLISaveGraphToFiles(),
|
||||
CLIGraphFilesDirectory(),
|
||||
)
|
||||
|
||||
@@ -7,9 +7,6 @@ import (
|
||||
|
||||
var _saveGraph = "__@@NOSAVE@@__"
|
||||
var _kmerSize = -1
|
||||
var _threshold = 0.99
|
||||
var _mindepth = -1.0
|
||||
var _consensus_max_length = -1
|
||||
|
||||
func ObiconsensusOptionSet(options *getoptions.GetOpt) {
|
||||
|
||||
@@ -25,26 +22,6 @@ func ObiconsensusOptionSet(options *getoptions.GetOpt) {
|
||||
"Default value = -1, which means that the kmer size is estimated from the data"),
|
||||
)
|
||||
|
||||
options.Float64Var(&_threshold, "threshold", _threshold,
|
||||
options.ArgName("RATIO"),
|
||||
options.Description("A threshold between O and 1 used to determine the optimal "+
|
||||
"kmer size"),
|
||||
)
|
||||
|
||||
options.Float64Var(&_mindepth, "min-depth", _mindepth,
|
||||
options.ArgName("DEPTH"),
|
||||
options.Description("if DEPTH is between 0 and 1, it corresponds to fraction of the "+
|
||||
"reads in which a kmer must occurs to be conserved in the graph. If DEPTH is greater "+
|
||||
"than 1, indicate the minimum count of occurrence for a kmer to be kept. "+
|
||||
"Default value = -1, which means that the DEPTH is estimated from the data"),
|
||||
)
|
||||
|
||||
options.IntVar(&_consensus_max_length, "consensus-max-length", _consensus_max_length,
|
||||
options.ArgName("LENGTH"),
|
||||
options.Description("Maximum length of the consensus sequence. "+
|
||||
"Default value = -1, which means that no limit is applied"),
|
||||
)
|
||||
|
||||
}
|
||||
|
||||
func OptionSet(options *getoptions.GetOpt) {
|
||||
@@ -66,15 +43,3 @@ func CLIGraphFilesDirectory() string {
|
||||
func CLIKmerSize() int {
|
||||
return _kmerSize
|
||||
}
|
||||
|
||||
func CLIKmerDepth() float64 {
|
||||
return _mindepth
|
||||
}
|
||||
|
||||
func CLIThreshold() float64 {
|
||||
return _threshold
|
||||
}
|
||||
|
||||
func CLIMaxConsensusLength() int {
|
||||
return _consensus_max_length
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user