Change obiclean algorithm for a better evaluation of ratio

This commit is contained in:
2022-08-31 20:38:03 +02:00
parent 90ba980de6
commit 6b8f4490cf
8 changed files with 371 additions and 75 deletions

View File

@ -19,6 +19,14 @@ func AnnotatorToSeqWorker(function SeqAnnotator) SeqWorker {
return f return f
} }
// That method allows for applying a SeqWorker function on every sequences.
//
// Sequences are provided by the iterator and modified sequences are pushed
// on the returned IBioSequenceBatch.
//
// Moreover the SeqWorker function, the method accepted two optional integer parameters.
// - First is allowing to indicates the number of workers running in parallele (default 4)
// - The second the size of the chanel buffer. By default set to the same value than the input buffer.
func (iterator IBioSequenceBatch) MakeIWorker(worker SeqWorker, sizes ...int) IBioSequenceBatch { func (iterator IBioSequenceBatch) MakeIWorker(worker SeqWorker, sizes ...int) IBioSequenceBatch {
nworkers := 4 nworkers := 4
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
@ -61,6 +69,51 @@ func (iterator IBioSequenceBatch) MakeIWorker(worker SeqWorker, sizes ...int) IB
return newIter return newIter
} }
func (iterator IBioSequenceBatch) MakeIConditionalWorker(predicate obiseq.SequencePredicate,
worker SeqWorker, sizes ...int) IBioSequenceBatch {
nworkers := 4
buffsize := iterator.BufferSize()
if len(sizes) > 0 {
nworkers = sizes[0]
}
if len(sizes) > 1 {
buffsize = sizes[1]
}
newIter := MakeIBioSequenceBatch(buffsize)
newIter.Add(nworkers)
go func() {
newIter.WaitAndClose()
log.Debugln("End of the batch workers")
}()
f := func(iterator IBioSequenceBatch) {
for iterator.Next() {
batch := iterator.Get()
for i, seq := range batch.slice {
if predicate(batch.slice[i]) {
batch.slice[i] = worker(seq)
}
}
newIter.Push(batch)
}
newIter.Done()
}
log.Debugln("Start of the batch workers")
for i := 0; i < nworkers-1; i++ {
go f(iterator.Split())
}
go f(iterator)
return newIter
}
func (iterator IBioSequenceBatch) MakeISliceWorker(worker SeqSliceWorker, sizes ...int) IBioSequenceBatch { func (iterator IBioSequenceBatch) MakeISliceWorker(worker SeqSliceWorker, sizes ...int) IBioSequenceBatch {
nworkers := 4 nworkers := 4
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()

View File

@ -199,7 +199,7 @@ func (s *BioSequence) Annotations() Annotation {
} }
// A method that returns the value of the key in the annotation map. // A method that returns the value of the key in the annotation map.
func (s *BioSequence) Get(key string) (interface{}, bool) { func (s *BioSequence) GetAttribute(key string) (interface{}, bool) {
var val interface{} var val interface{}
ok := s.annotations != nil ok := s.annotations != nil
@ -210,12 +210,17 @@ func (s *BioSequence) Get(key string) (interface{}, bool) {
return val, ok return val, ok
} }
func (s *BioSequence) SetAttribute(key string, value interface{}) {
annot := s.Annotations()
annot[key] = value
}
// A method that returns the value of the key in the annotation map. // A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetInt(key string) (int, bool) { func (s *BioSequence) GetIntAttribute(key string) (int, bool) {
var val int var val int
var err error var err error
v, ok := s.Get(key) v, ok := s.GetAttribute(key)
if ok { if ok {
val, err = goutils.InterfaceToInt(v) val, err = goutils.InterfaceToInt(v)
@ -226,9 +231,9 @@ func (s *BioSequence) GetInt(key string) (int, bool) {
} }
// A method that returns the value of the key in the annotation map. // A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetString(key string) (string, bool) { func (s *BioSequence) GetStringAttribute(key string) (string, bool) {
var val string var val string
v, ok := s.Get(key) v, ok := s.GetAttribute(key)
if ok { if ok {
val = fmt.Sprint(v) val = fmt.Sprint(v)
@ -242,7 +247,7 @@ func (s *BioSequence) GetBool(key string) (bool, bool) {
var val bool var val bool
var err error var err error
v, ok := s.Get(key) v, ok := s.GetAttribute(key)
if ok { if ok {
val, err = goutils.InterfaceToBool(v) val, err = goutils.InterfaceToBool(v)
@ -259,7 +264,7 @@ func (s *BioSequence) MD5() [16]byte {
// Returning the number of times the sequence has been observed. // Returning the number of times the sequence has been observed.
func (s *BioSequence) Count() int { func (s *BioSequence) Count() int {
count, ok := s.GetInt("count") count, ok := s.GetIntAttribute("count")
if !ok { if !ok {
count = 1 count = 1
@ -270,7 +275,7 @@ func (s *BioSequence) Count() int {
// Returning the taxid of the sequence. // Returning the taxid of the sequence.
func (s *BioSequence) Taxid() int { func (s *BioSequence) Taxid() int {
taxid, ok := s.GetInt("taxid") taxid, ok := s.GetIntAttribute("taxid")
if !ok { if !ok {
taxid = 1 taxid = 1
@ -330,6 +335,12 @@ func (s *BioSequence) WriteByteQualities(data byte) error {
return nil return nil
} }
// Clearing the sequence.
func (s *BioSequence) ClearQualities() {
s.qualities = s.qualities[0:0]
}
// A method that appends a byte slice to the sequence. // A method that appends a byte slice to the sequence.
func (s *BioSequence) Write(data []byte) (int, error) { func (s *BioSequence) Write(data []byte) (int, error) {
s.sequence = append(s.sequence, data...) s.sequence = append(s.sequence, data...)
@ -347,3 +358,8 @@ func (s *BioSequence) WriteByte(data byte) error {
s.sequence = append(s.sequence, data) s.sequence = append(s.sequence, data)
return nil return nil
} }
// Clearing the sequence.
func (s *BioSequence) Clear() {
s.sequence = s.sequence[0:0]
}

View File

@ -2,6 +2,8 @@ package obitax
import ( import (
"fmt" "fmt"
log "github.com/sirupsen/logrus"
) )
func (taxon *TaxNode) Path() (*TaxonSlice, error) { func (taxon *TaxNode) Path() (*TaxonSlice, error) {
@ -22,6 +24,34 @@ func (taxon *TaxNode) Path() (*TaxonSlice, error) {
return &path, nil return &path, nil
} }
func (taxon *TaxNode) TaxonAtRank(rank string) *TaxNode {
for taxon.rank != rank && taxon != taxon.pparent {
taxon = taxon.pparent
if taxon == nil {
log.Panicln("Taxonomy must be reindexed")
}
}
if taxon == taxon.pparent {
taxon = nil
}
return taxon
}
func (taxon *TaxNode) Species() *TaxNode {
return taxon.TaxonAtRank("species")
}
func (taxon *TaxNode) Genus() *TaxNode {
return taxon.TaxonAtRank("genus")
}
func (taxon *TaxNode) Family() *TaxNode {
return taxon.TaxonAtRank("family")
}
// Returns a TaxonSet listing the requested taxon and all // Returns a TaxonSet listing the requested taxon and all
// its ancestors in the taxonomy down to the root. // its ancestors in the taxonomy down to the root.
func (taxonomy *Taxonomy) Path(taxid int) (*TaxonSlice, error) { func (taxonomy *Taxonomy) Path(taxid int) (*TaxonSlice, error) {

View File

@ -0,0 +1,37 @@
package obitax
import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
)
// Setting the taxon at a given rank for a given sequence.
//
// Two attributes are added to the sequence. One named by the rank name stores
// the taxid, a second named by the rank name suffixed with '_name' contains the
// Scientific name of the genus.
// If the taxon at the given rank doesn't exist for the taxonomy annotation
// of the sequence, nothing happens.
func (taxonomy *Taxonomy) SetTaxonAtRank(sequence *obiseq.BioSequence, rank string) *TaxNode {
taxid := sequence.Taxid()
taxon, err := taxonomy.Taxon(taxid)
taxonAtRank := taxon.TaxonAtRank(rank)
if err == nil && taxonAtRank != nil {
sequence.SetAttribute(rank, taxonAtRank.taxid)
sequence.SetAttribute(rank+"_name", taxonAtRank.scientificname)
}
return taxonAtRank
}
func (taxonomy *Taxonomy) SetSpecies(sequence *obiseq.BioSequence) *TaxNode {
return taxonomy.SetTaxonAtRank(sequence, "species")
}
func (taxonomy *Taxonomy) SetGenus(sequence *obiseq.BioSequence) *TaxNode {
return taxonomy.SetTaxonAtRank(sequence, "genus")
}
func (taxonomy *Taxonomy) SetFamily(sequence *obiseq.BioSequence) *TaxNode {
return taxonomy.SetTaxonAtRank(sequence, "family")
}

View File

@ -22,7 +22,10 @@ func (taxonomy *Taxonomy) IsAValidTaxon(withAutoCorrection ...bool) obiseq.Seque
if err == nil && taxon.taxid != taxid { if err == nil && taxon.taxid != taxid {
if autocorrection { if autocorrection {
sequence.SetTaxid(taxon.taxid) sequence.SetTaxid(taxon.taxid)
log.Printf("Sequence %s : Taxid %d updated with %d", taxid, taxon.taxid) log.Printf("Sequence %s : Taxid %d updated with %d",
sequence.Id(),
taxid,
taxon.taxid)
} else { } else {
if _, ok := deprecatedTaxidsWarning[taxid]; !ok { if _, ok := deprecatedTaxidsWarning[taxid]; !ok {
deprecatedTaxidsWarning[taxid] = true deprecatedTaxidsWarning[taxid] = true

View File

@ -0,0 +1,56 @@
package obitax
import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
log "github.com/sirupsen/logrus"
)
func (taxonomy *Taxonomy) MakeSetTaxonAtRankWorker(rank string) obiiter.SeqWorker {
if !goutils.Contains(taxonomy.RankList(), rank) {
log.Fatalf("%s is not a valid rank (allowed ranks are %v)",
rank,
taxonomy.RankList())
}
w := func(sequence *obiseq.BioSequence) *obiseq.BioSequence {
taxonomy.SetTaxonAtRank(sequence, rank)
return sequence
}
return w
}
func (taxonomy *Taxonomy) MakeSetSpeciesWorker() obiiter.SeqWorker {
w := func(sequence *obiseq.BioSequence) *obiseq.BioSequence {
taxonomy.SetSpecies(sequence)
return sequence
}
return w
}
func (taxonomy *Taxonomy) MakeSetGenusWorker() obiiter.SeqWorker {
w := func(sequence *obiseq.BioSequence) *obiseq.BioSequence {
taxonomy.SetGenus(sequence)
return sequence
}
return w
}
func (taxonomy *Taxonomy) MakeSetFamilyWorker() obiiter.SeqWorker {
w := func(sequence *obiseq.BioSequence) *obiseq.BioSequence {
taxonomy.SetFamily(sequence)
return sequence
}
return w
}

View File

@ -17,12 +17,35 @@ import (
) )
type Ratio struct { type Ratio struct {
Sample string
From int From int
To int To int
CFrom int
CTo int
Pos int Pos int
Length int Length int
} }
type Edge struct {
Father int
From byte
To byte
Pos int
NucPair int
Dist int
}
func makeEdge(father, dist, pos int, from, to byte) Edge {
return Edge{
Father: father,
Dist: dist,
Pos: pos,
From: from,
To: to,
NucPair: nucPair(from, to),
}
}
func abs(x int) int { func abs(x int) int {
if x < 0 { if x < 0 {
return -x return -x
@ -74,11 +97,19 @@ func EmpiricalDistCsv(filename string, data [][]Ratio) {
bar := progressbar.NewOptions(len(data), pbopt...) bar := progressbar.NewOptions(len(data), pbopt...)
fmt.Fprintln(file, "From,To,Count_from,Count_to,Position,length") fmt.Fprintln(file, "Sample,From,To,Weight_from,Weight_to,Count_from,Count_to,Position,length")
for code, dist := range data { for code, dist := range data {
a1, a2 := intToNucPair(code) a1, a2 := intToNucPair(code)
for _, ratio := range dist { for _, ratio := range dist {
fmt.Fprintf(file, "%c,%c,%d,%d,%d,%d\n", a1, a2, ratio.From, ratio.To, ratio.Pos, ratio.Length) fmt.Fprintf(file, "%s,%c,%c,%d,%d,%d,%d,%d,%d\n",
ratio.Sample,
a1, a2,
ratio.From,
ratio.To,
ratio.CFrom,
ratio.CTo,
ratio.Pos,
ratio.Length)
} }
bar.Add(1) bar.Add(1)
} }
@ -94,11 +125,11 @@ func Gml(seqs *[]*seqPCR, sample string, statThreshold int) string {
comment "Obiclean graph for sample {{ Name }}" comment "Obiclean graph for sample {{ Name }}"
directed 1 directed 1
{{range $index, $data:= .}} {{range $index, $data:= .}}
{{ if or $data.Fathers (gt $data.SonCount 0)}} {{ if or $data.Edges (gt $data.SonCount 0)}}
node [ id {{$index}} node [ id {{$index}}
graphics [ graphics [
type "{{ Shape $data.Count }}" type "{{ Shape $data.Count }}"
fill "{{ if and (gt $data.SonCount 0) (not $data.Fathers)}}#0000FF{{ else }}#00FF00{{ end }}" fill "{{ if and (gt $data.SonCount 0) (not $data.Edges)}}#0000FF{{ else }}#00FF00{{ end }}"
h {{ Sqrt $data.Count }} h {{ Sqrt $data.Count }}
w {{ Sqrt $data.Count }} w {{ Sqrt $data.Count }}
] ]
@ -108,11 +139,11 @@ func Gml(seqs *[]*seqPCR, sample string, statThreshold int) string {
{{ end }} {{ end }}
{{range $index, $data:= .}} {{range $index, $data:= .}}
{{range $i, $father:= $data.Fathers}} {{range $i, $edge:= $data.Edges}}
edge [ source {{$index}} edge [ source {{$index}}
target {{$father}} target {{$edge.Father}}
color "{{ if gt (index $data.Dist $i) 1 }}#FF0000{{ else }}#00FF00{{ end }}" color "{{ if gt (index $data.Edges $i).Dist 1 }}#FF0000{{ else }}#00FF00{{ end }}"
label "{{(index $data.Dist $i)}}" label "{{(index $data.Edges $i).Dist}}"
] ]
{{ end }} {{ end }}
{{ end }} {{ end }}
@ -226,51 +257,83 @@ func intToNucPair(code int) (a, b byte) {
return decode[c1], decode[c2] return decode[c1], decode[c2]
} }
func buildSamplePairs(seqs *[]*seqPCR, minStatRatio int, workers int) ([][]Ratio, int) { func reweightSequences(seqs *[]*seqPCR) {
for _, node := range *seqs {
node.Weight = node.Count
}
//var rfunc func(*seqPCR)
rfunc := func(node *seqPCR) {
node.AddedSons=0
nedges := len(node.Edges)
if nedges > 0 {
swf := 0.0
for k := 0; k < nedges; k++ {
swf += float64((*seqs)[node.Edges[k].Father].Count)
}
for k := 0; k < nedges; k++ {
father := (*seqs)[node.Edges[k].Father]
father.Weight += int(math.Round(float64(node.Weight) * float64(father.Count) / swf))
father.AddedSons++
// log.Println(father.AddedSons, father.SonCount)
}
}
}
for _, node := range *seqs {
if node.SonCount == 0 {
rfunc(node)
}
}
for done := true; done; {
done = false
for _, node := range *seqs {
if node.SonCount > 0 && node.SonCount == node.AddedSons {
// log.Println(node.AddedSons, node.SonCount)
rfunc(node)
done = true
}
}
}
}
func buildSamplePairs(seqs *[]*seqPCR, workers int) int {
nseq := len(*seqs) nseq := len(*seqs)
running := sync.WaitGroup{} running := sync.WaitGroup{}
linePairs := func(i int) [][]Ratio { linePairs := func(i int) {
ratio := make([][]Ratio, 25)
son := (*seqs)[i] son := (*seqs)[i]
for j := i + 1; j < nseq; j++ { for j := i + 1; j < nseq; j++ {
father := (*seqs)[j] father := (*seqs)[j]
d, pos, a1, a2 := obialign.D1Or0(son.Sequence, father.Sequence) d, pos, a1, a2 := obialign.D1Or0(son.Sequence, father.Sequence)
if d > 0 { if d > 0 {
son.Fathers = append(son.Fathers, j) son.Edges = append(son.Edges, makeEdge(j, d, pos, a1, a2))
son.Dist = append(son.Dist, d)
father.SonCount++ father.SonCount++
if father.Count > minStatRatio {
n := nucPair(a1, a2)
ratio[n] = append(ratio[n], Ratio{father.Count, son.Count, pos, father.Sequence.Length()})
}
} }
} }
return ratio
} }
lineChan := make(chan int) lineChan := make(chan int)
idxChan := make(chan [][]Ratio)
ff := func() { ff := func() {
for i := range lineChan { for i := range lineChan {
idxChan <- linePairs(i) linePairs(i)
} }
running.Done() running.Done()
} }
running.Add(workers) running.Add(workers)
go func() {
running.Wait()
close(idxChan)
}()
for i := 0; i < workers; i++ { for i := 0; i < workers; i++ {
go ff() go ff()
} }
@ -283,15 +346,12 @@ func buildSamplePairs(seqs *[]*seqPCR, minStatRatio int, workers int) ([][]Ratio
}() }()
np := nseq * (nseq - 1) / 2 np := nseq * (nseq - 1) / 2
ratio := make([][]Ratio, 25)
for data := range idxChan {
for i, r := range data {
ratio[i] = append(ratio[i], r...)
}
} running.Wait()
return ratio, np reweightSequences(seqs)
return np
} }
func extendSimilarityGraph(seqs *[]*seqPCR, step int, workers int) int { func extendSimilarityGraph(seqs *[]*seqPCR, step int, workers int) int {
@ -310,8 +370,7 @@ func extendSimilarityGraph(seqs *[]*seqPCR, step int, workers int) int {
matrix) matrix)
d := (lali - lcs) d := (lali - lcs)
if lcs >= 0 && d <= step && step > 0 { if lcs >= 0 && d <= step && step > 0 {
son.Fathers = append(son.Fathers, j) son.Edges = append(son.Edges, makeEdge(j, d, -1, '-', '-'))
son.Dist = append(son.Dist, d)
father.SonCount++ father.SonCount++
//a, b := minMax((*seqs)[i].Count, (*seqs)[j].Count) //a, b := minMax((*seqs)[i].Count, (*seqs)[j].Count)
} }
@ -340,7 +399,7 @@ func extendSimilarityGraph(seqs *[]*seqPCR, step int, workers int) int {
go func() { go func() {
for i := 0; i < nseq; i++ { for i := 0; i < nseq; i++ {
if len((*seqs)[i].Fathers) == 0 { if len((*seqs)[i].Edges) == 0 {
lineChan <- i lineChan <- i
} }
} }
@ -354,21 +413,18 @@ func extendSimilarityGraph(seqs *[]*seqPCR, step int, workers int) int {
func FilterGraphOnRatio(seqs *[]*seqPCR, ratio float64) { func FilterGraphOnRatio(seqs *[]*seqPCR, ratio float64) {
for _, s1 := range *seqs { for _, s1 := range *seqs {
c1 := float64(s1.Count) c1 := float64(s1.Weight)
f := s1.Fathers e := s1.Edges
d := s1.Dist
j := 0 j := 0
for i, s2 := range f { for i, s2 := range e {
f[j] = f[i] e[j] = e[i]
d[j] = d[i] if (c1 / float64((*seqs)[s2.Father].Weight)) <= math.Pow(ratio, float64(e[i].Dist)) {
if (c1 / float64((*seqs)[s2].Count)) <= math.Pow(ratio, float64(d[i])) {
j++ j++
} else { } else {
(*seqs)[s2].SonCount-- (*seqs)[s2.Father].SonCount--
} }
} }
s1.Fathers = f[0:j] s1.Edges = e[0:j]
s1.Dist = d[0:j]
} }
} }
@ -384,7 +440,7 @@ func sortSamples(samples map[string]*([]*seqPCR)) {
} }
func ObicleanStatus(seq *seqPCR) string { func ObicleanStatus(seq *seqPCR) string {
if len(seq.Fathers) == 0 { if len(seq.Edges) == 0 {
if seq.SonCount > 0 { if seq.SonCount > 0 {
return "h" return "h"
} else { } else {
@ -395,8 +451,28 @@ func ObicleanStatus(seq *seqPCR) string {
} }
} }
func EstimateRatio(samples map[string]*[]*seqPCR, minStatRatio int) [][]Ratio {
ratio := make([][]Ratio, 25)
for name, seqs := range samples {
for _, seq := range *seqs {
for _, edge := range seq.Edges {
father := (*seqs)[edge.Father]
if father.Weight >= minStatRatio && edge.Dist == 1 {
ratio[edge.NucPair] = append(ratio[edge.NucPair], Ratio{name, father.Weight, seq.Weight, father.Count, seq.Count, edge.Pos, father.Sequence.Length()})
}
}
}
}
return ratio
}
func BuildSeqGraph(samples map[string]*[]*seqPCR, func BuildSeqGraph(samples map[string]*[]*seqPCR,
maxError, minStatRatio, workers int) [][]Ratio { maxError, workers int) {
sortSamples(samples) sortSamples(samples)
@ -416,13 +492,8 @@ func BuildSeqGraph(samples map[string]*[]*seqPCR,
) )
bar := progressbar.NewOptions(npairs, pbopt...) bar := progressbar.NewOptions(npairs, pbopt...)
all_ratio := make([][]Ratio, 25)
for _, seqs := range samples { for _, seqs := range samples {
ratio, np := buildSamplePairs(seqs, minStatRatio, workers) np := buildSamplePairs(seqs, workers)
for i, r := range ratio {
all_ratio[i] = append(all_ratio[i], r...)
}
bar.Add(np) bar.Add(np)
} }
@ -444,6 +515,4 @@ func BuildSeqGraph(samples map[string]*[]*seqPCR,
bar.Add(np) bar.Add(np)
} }
} }
return all_ratio
} }

View File

@ -14,10 +14,11 @@ import (
type seqPCR struct { type seqPCR struct {
Count int // number of reads associated to a sequence in a PCR Count int // number of reads associated to a sequence in a PCR
Weight int // Number of reads associated to a sequence after clustering
Sequence *obiseq.BioSequence // pointer to the corresponding sequence Sequence *obiseq.BioSequence // pointer to the corresponding sequence
SonCount int SonCount int
Fathers []int AddedSons int
Dist []int Edges []Edge
} }
// buildSamples sorts the sequences by samples // buildSamples sorts the sequences by samples
@ -46,6 +47,7 @@ func buildSamples(dataset obiseq.BioSequenceSlice,
Count: v, Count: v,
Sequence: s, Sequence: s,
SonCount: 0, SonCount: 0,
AddedSons: 0,
}) })
} }
} }
@ -181,6 +183,33 @@ func Status(sequence *obiseq.BioSequence) map[string]string {
return obistatus return obistatus
} }
func Weight(sequence *obiseq.BioSequence) map[string]int {
annotation := sequence.Annotations()
iobistatus, ok := annotation["obiclean_weight"]
var weight map[string]int
var err error
if ok {
switch iobistatus := iobistatus.(type) {
case map[string]int:
weight = iobistatus
case map[string]interface{}:
weight = make(map[string]int)
for k, v := range iobistatus {
weight[k], err = goutils.InterfaceToInt(v)
if err != nil {
log.Panicf("Weight value %v cannnot be casted to an integer value\n", v)
}
}
}
} else {
weight = make(map[string]int)
annotation["obiclean_weight"] = weight
}
return weight
}
func IOBIClean(itertator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch { func IOBIClean(itertator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch {
db := itertator.Load() db := itertator.Load()
@ -191,9 +220,8 @@ func IOBIClean(itertator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch {
log.Infof("Dataset composed of %d samples\n", len(samples)) log.Infof("Dataset composed of %d samples\n", len(samples))
all_ratio := BuildSeqGraph(samples, BuildSeqGraph(samples,
DistStepMax(), DistStepMax(),
MinCountToEvalMutationRate(),
obioptions.CLIParallelWorkers()) obioptions.CLIParallelWorkers())
if RatioMax() < 1.0 { if RatioMax() < 1.0 {
@ -215,6 +243,7 @@ func IOBIClean(itertator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch {
} }
if IsSaveRatioTable() { if IsSaveRatioTable() {
all_ratio := EstimateRatio(samples, MinCountToEvalMutationRate())
EmpiricalDistCsv(RatioTableFilename(), all_ratio) EmpiricalDistCsv(RatioTableFilename(), all_ratio)
} }
@ -237,6 +266,9 @@ func IOBIClean(itertator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch {
for _, pcr := range *seqs { for _, pcr := range *seqs {
obistatus := Status(pcr.Sequence) obistatus := Status(pcr.Sequence)
obistatus[name] = ObicleanStatus(pcr) obistatus[name] = ObicleanStatus(pcr)
obiweight := Weight(pcr.Sequence)
obiweight[name] = pcr.Weight
} }
bar.Add(1) bar.Add(1)
} }