mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-12-10 09:40:27 +00:00
correction of several small bugs
This commit is contained in:
@@ -25,8 +25,19 @@ import (
|
||||
func BuildConsensus(seqs obiseq.BioSequenceSlice,
|
||||
consensus_id string,
|
||||
kmer_size int,
|
||||
filter_out float64,
|
||||
save_graph bool, dirname string) (*obiseq.BioSequence, error) {
|
||||
|
||||
if seqs.Len() == 0 {
|
||||
return nil, fmt.Errorf("no sequence provided")
|
||||
}
|
||||
|
||||
if seqs.Len() == 1 {
|
||||
seq := seqs[0].Copy()
|
||||
seq.SetAttribute("obiconsensus_consensus", false)
|
||||
return seq, nil
|
||||
}
|
||||
|
||||
if save_graph {
|
||||
if dirname == "" {
|
||||
dirname = "."
|
||||
@@ -104,7 +115,7 @@ func BuildConsensus(seqs obiseq.BioSequenceSlice,
|
||||
log.Debugf("Graph size : %d\n", graph.Len())
|
||||
total_kmer := graph.Len()
|
||||
|
||||
seq, err := graph.LongestConsensus(consensus_id)
|
||||
seq, err := graph.LongestConsensus(consensus_id, filter_out)
|
||||
|
||||
sumCount := 0
|
||||
|
||||
@@ -112,7 +123,7 @@ func BuildConsensus(seqs obiseq.BioSequenceSlice,
|
||||
for _, s := range seqs {
|
||||
sumCount += s.Count()
|
||||
}
|
||||
|
||||
seq.SetAttribute("obiconsensus_consensus", true)
|
||||
seq.SetAttribute("obiconsensus_weight", sumCount)
|
||||
seq.SetAttribute("obiconsensus_seq_length", seq.Len())
|
||||
seq.SetAttribute("obiconsensus_kmer_size", kmer_size)
|
||||
@@ -136,6 +147,10 @@ func SampleWeight(seqs *obiseq.BioSequenceSlice, sample, sample_key string) func
|
||||
|
||||
stats := (*seqs)[i].StatsOn(obiseq.MakeStatsOnDescription(sample_key), "NA")
|
||||
|
||||
if stats == nil {
|
||||
log.Panicf("Sample %s not found in sequence %d", sample, i)
|
||||
}
|
||||
|
||||
if value, ok := stats[sample]; ok {
|
||||
return float64(value)
|
||||
}
|
||||
@@ -292,16 +307,16 @@ func MinionDenoise(graph *obigraph.Graph[*obiseq.BioSequence, Mutation],
|
||||
pack[degree] = v
|
||||
clean, err = BuildConsensus(pack,
|
||||
fmt.Sprintf("%s_consensus", v.Id()),
|
||||
kmer_size,
|
||||
kmer_size, CLILowCoverage(),
|
||||
CLISaveGraphToFiles(), CLIGraphFilesDirectory())
|
||||
|
||||
if err != nil {
|
||||
log.Warning(err)
|
||||
clean = (*graph.Vertices)[i]
|
||||
clean = (*graph.Vertices)[i].Copy()
|
||||
clean.SetAttribute("obiconsensus_consensus", false)
|
||||
} else {
|
||||
clean.SetAttribute("obiconsensus_consensus", true)
|
||||
|
||||
}
|
||||
|
||||
pack.Recycle(false)
|
||||
|
||||
} else {
|
||||
@@ -318,8 +333,9 @@ func MinionDenoise(graph *obigraph.Graph[*obiseq.BioSequence, Mutation],
|
||||
|
||||
annotations := v.Annotations()
|
||||
|
||||
staton := obiseq.StatsOnSlotName(sample_key)
|
||||
for k, v := range annotations {
|
||||
if !clean.HasAttribute(k) {
|
||||
if !clean.HasAttribute(k) && k != staton {
|
||||
clean.SetAttribute(k, v)
|
||||
}
|
||||
}
|
||||
@@ -334,6 +350,83 @@ func MinionDenoise(graph *obigraph.Graph[*obiseq.BioSequence, Mutation],
|
||||
|
||||
return denoised
|
||||
}
|
||||
|
||||
func MinionClusterDenoise(graph *obigraph.Graph[*obiseq.BioSequence, Mutation],
|
||||
sample_key string, kmer_size int) obiseq.BioSequenceSlice {
|
||||
denoised := obiseq.MakeBioSequenceSlice()
|
||||
seqs := (*obiseq.BioSequenceSlice)(graph.Vertices)
|
||||
weight := SampleWeight(seqs, graph.Name, sample_key)
|
||||
seqWeights := make([]float64, len(*seqs))
|
||||
|
||||
// Compute weights for each vertex as the sum of the weights of its neighbors
|
||||
|
||||
log.Info("")
|
||||
log.Infof("Sample %s: Computing weights", graph.Name)
|
||||
for i := range *seqs {
|
||||
w := weight(i)
|
||||
for _, j := range graph.Neighbors(i) {
|
||||
w += weight(j)
|
||||
}
|
||||
|
||||
seqWeights[i] = w
|
||||
}
|
||||
|
||||
log.Infof("Sample %s: Done computing weights", graph.Name)
|
||||
|
||||
log.Infof("Sample %s: Clustering", graph.Name)
|
||||
// Look for vertex not having a neighbor with a higher weight
|
||||
for i := range *seqs {
|
||||
v := (*seqs)[i]
|
||||
head := true
|
||||
neighbors := graph.Neighbors(i)
|
||||
for _, j := range neighbors {
|
||||
if seqWeights[i] < seqWeights[j] {
|
||||
head = false
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if head {
|
||||
pack := obiseq.MakeBioSequenceSlice(len(neighbors) + 1)
|
||||
for k, j := range neighbors {
|
||||
pack[k] = (*seqs)[j]
|
||||
}
|
||||
pack[len(neighbors)] = v
|
||||
|
||||
clean, err := BuildConsensus(pack,
|
||||
fmt.Sprintf("%s_consensus", v.Id()),
|
||||
kmer_size, CLILowCoverage(),
|
||||
CLISaveGraphToFiles(), CLIGraphFilesDirectory())
|
||||
|
||||
if err != nil {
|
||||
log.Warning(err)
|
||||
clean = (*graph.Vertices)[i].Copy()
|
||||
clean.SetAttribute("obiconsensus_consensus", false)
|
||||
}
|
||||
pack.Recycle(false)
|
||||
|
||||
clean.SetAttribute(sample_key, graph.Name)
|
||||
|
||||
annotations := v.Annotations()
|
||||
clean.SetCount(int(weight(i)))
|
||||
|
||||
staton := obiseq.StatsOnSlotName(sample_key)
|
||||
|
||||
for k, v := range annotations {
|
||||
if !clean.HasAttribute(k) && k != staton {
|
||||
clean.SetAttribute(k, v)
|
||||
}
|
||||
}
|
||||
|
||||
denoised = append(denoised, clean)
|
||||
}
|
||||
}
|
||||
|
||||
log.Infof("Sample %s: Done clustering", graph.Name)
|
||||
|
||||
return denoised
|
||||
}
|
||||
|
||||
func CLIOBIMinion(itertator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
dirname := CLIGraphFilesDirectory()
|
||||
newIter := obiiter.MakeIBioSequence()
|
||||
@@ -395,9 +488,17 @@ func CLIOBIMinion(itertator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
false, 1, 0, 3)
|
||||
}
|
||||
|
||||
denoised := MinionDenoise(graph,
|
||||
CLISampleAttribute(),
|
||||
CLIKmerSize())
|
||||
var denoised obiseq.BioSequenceSlice
|
||||
|
||||
if CLICluterDenoise() {
|
||||
denoised = MinionClusterDenoise(graph,
|
||||
CLISampleAttribute(),
|
||||
CLIKmerSize())
|
||||
} else {
|
||||
denoised = MinionDenoise(graph,
|
||||
CLISampleAttribute(),
|
||||
CLIKmerSize())
|
||||
}
|
||||
|
||||
newIter.Push(obiiter.MakeBioSequenceBatch(source, sample_order, denoised))
|
||||
|
||||
@@ -411,9 +512,14 @@ func CLIOBIMinion(itertator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
newIter.WaitAndClose()
|
||||
}()
|
||||
|
||||
obiuniq.AddStatsOn(CLISampleAttribute())
|
||||
// obiuniq.AddStatsOn("sample:obiconsensus_weight")
|
||||
obiuniq.SetUniqueInMemory(false)
|
||||
obiuniq.SetNoSingleton(CLINoSingleton())
|
||||
return obiuniq.CLIUnique(newIter).Pipe(obiiter.WorkerPipe(obiannotate.AddSeqLengthWorker(), false))
|
||||
res := newIter
|
||||
if CLIUnique() {
|
||||
obiuniq.AddStatsOn(CLISampleAttribute())
|
||||
// obiuniq.AddStatsOn("sample:obiconsensus_weight")
|
||||
obiuniq.SetUniqueInMemory(false)
|
||||
obiuniq.SetNoSingleton(CLINoSingleton())
|
||||
res = obiuniq.CLIUnique(newIter)
|
||||
}
|
||||
|
||||
return res.Pipe(obiiter.WorkerPipe(obiannotate.AddSeqLengthWorker(), false))
|
||||
}
|
||||
|
||||
@@ -8,8 +8,6 @@ import (
|
||||
var _distStepMax = 1
|
||||
var _sampleAttribute = "sample"
|
||||
|
||||
var _ratioMax = 1.0
|
||||
|
||||
var _clusterMode = false
|
||||
var _onlyHead = false
|
||||
|
||||
@@ -20,6 +18,10 @@ var _NoSingleton = false
|
||||
var _saveGraph = "__@@NOSAVE@@__"
|
||||
var _saveRatio = "__@@NOSAVE@@__"
|
||||
|
||||
var _lowCoverage = 0.0
|
||||
|
||||
var _unique = false
|
||||
|
||||
// ObiminionOptionSet sets the options for obiminion.
|
||||
//
|
||||
// options: The options for configuring obiminion.
|
||||
@@ -50,6 +52,19 @@ func ObiminionOptionSet(options *getoptions.GetOpt) {
|
||||
options.BoolVar(&_NoSingleton, "no-singleton", _NoSingleton,
|
||||
options.Description("If set, sequences occurring a single time in the data set are discarded."))
|
||||
|
||||
options.BoolVar(&_clusterMode, "cluster", _clusterMode,
|
||||
options.Alias("C"),
|
||||
options.Description("Switch obiconsensus into its clustering mode."),
|
||||
)
|
||||
|
||||
options.BoolVar(&_unique, "unique", _unique,
|
||||
options.Alias("U"),
|
||||
options.Description("If set, sequences are dereplicated on the output (obiuniq)."),
|
||||
)
|
||||
|
||||
options.Float64Var(&_lowCoverage, "low-coverage", _lowCoverage,
|
||||
options.Description("If the coverage of a sample is lower than this value, it will be discarded."),
|
||||
)
|
||||
}
|
||||
|
||||
// OptionSet sets up the options for the obiminion package.
|
||||
@@ -129,4 +144,16 @@ func CLIKmerSize() int {
|
||||
// Returns a boolean value indicating whether or not singleton sequences should be discarded.
|
||||
func CLINoSingleton() bool {
|
||||
return _NoSingleton
|
||||
}
|
||||
}
|
||||
|
||||
func CLICluterDenoise() bool {
|
||||
return _clusterMode
|
||||
}
|
||||
|
||||
func CLIUnique() bool {
|
||||
return _unique
|
||||
}
|
||||
|
||||
func CLILowCoverage() float64 {
|
||||
return _lowCoverage
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user