mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-12-08 16:50:27 +00:00
Force sequence reading to produce lowercase sequences.
Adds two columns to the obiclean ratio csv file
This commit is contained in:
@@ -18,6 +18,8 @@ import (
|
||||
|
||||
type Ratio struct {
|
||||
Sample string
|
||||
SeqID string
|
||||
status string
|
||||
From int
|
||||
To int
|
||||
CFrom int
|
||||
@@ -97,12 +99,14 @@ func EmpiricalDistCsv(filename string, data [][]Ratio) {
|
||||
|
||||
bar := progressbar.NewOptions(len(data), pbopt...)
|
||||
|
||||
fmt.Fprintln(file, "Sample,From,To,Weight_from,Weight_to,Count_from,Count_to,Position,length")
|
||||
fmt.Fprintln(file, "Sample,Father_id,Father_status,From,To,Weight_from,Weight_to,Count_from,Count_to,Position,length")
|
||||
for code, dist := range data {
|
||||
a1, a2 := intToNucPair(code)
|
||||
for _, ratio := range dist {
|
||||
fmt.Fprintf(file, "%s,%c,%c,%d,%d,%d,%d,%d,%d\n",
|
||||
fmt.Fprintf(file, "%s,%s,%s,%c,%c,%d,%d,%d,%d,%d,%d\n",
|
||||
ratio.Sample,
|
||||
ratio.SeqID,
|
||||
ratio.status,
|
||||
a1, a2,
|
||||
ratio.From,
|
||||
ratio.To,
|
||||
@@ -463,7 +467,13 @@ func EstimateRatio(samples map[string]*[]*seqPCR, minStatRatio int) [][]Ratio {
|
||||
for _, edge := range seq.Edges {
|
||||
father := (*seqs)[edge.Father]
|
||||
if father.Weight >= minStatRatio && edge.Dist == 1 {
|
||||
ratio[edge.NucPair] = append(ratio[edge.NucPair], Ratio{name, father.Weight, seq.Weight, father.Count, seq.Count, edge.Pos, father.Sequence.Len()})
|
||||
ratio[edge.NucPair] = append(ratio[edge.NucPair],
|
||||
Ratio{name,
|
||||
father.Sequence.Id(), Status(father.Sequence)[name],
|
||||
father.Weight, seq.Weight,
|
||||
father.Count, seq.Count,
|
||||
edge.Pos,
|
||||
father.Sequence.Len()})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@ type seqPCR struct {
|
||||
SonCount int
|
||||
AddedSons int
|
||||
Edges []Edge
|
||||
Cluster map[int]bool // used as the set of head sequences associated to that sequence
|
||||
}
|
||||
|
||||
// buildSamples sorts the sequences by samples
|
||||
@@ -183,13 +184,53 @@ func GetMutation(sequence *obiseq.BioSequence) map[string]string {
|
||||
return mutation
|
||||
}
|
||||
|
||||
func GetCluster(sequence *obiseq.BioSequence) map[string]string {
|
||||
annotation := sequence.Annotations()
|
||||
icluster, ok := annotation["obiclean_cluster"]
|
||||
var cluster map[string]string
|
||||
|
||||
if ok {
|
||||
switch icluster := icluster.(type) {
|
||||
case map[string]string:
|
||||
cluster = icluster
|
||||
case map[string]interface{}:
|
||||
cluster = make(map[string]string)
|
||||
for k, v := range icluster {
|
||||
cluster[k] = fmt.Sprint(v)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
cluster = make(map[string]string)
|
||||
annotation["obiclean_cluster"] = cluster
|
||||
}
|
||||
|
||||
return cluster
|
||||
}
|
||||
|
||||
|
||||
// func Cluster(sample map[string]*([]*seqPCR)) {
|
||||
// for _, graph := range sample {
|
||||
// for _, s := range *graph {
|
||||
// cluster := GetCluster(s.Sequence)
|
||||
// if len(s.Edges) > 0 {
|
||||
// for _, f := range s.Edges {
|
||||
|
||||
// }
|
||||
// } else {
|
||||
// cluster
|
||||
// }
|
||||
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
func Mutation(sample map[string]*([]*seqPCR)) {
|
||||
for _, graph := range sample {
|
||||
for _, s := range *graph {
|
||||
for _, f := range s.Edges {
|
||||
id := (*graph)[f.Father].Sequence.Id()
|
||||
GetMutation(s.Sequence)[id] = fmt.Sprintf("(%c)->(%c)@%d",
|
||||
f.From, f.To, f.Pos + 1)
|
||||
f.From, f.To, f.Pos+1)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -277,14 +318,6 @@ func IOBIClean(itertator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch {
|
||||
}
|
||||
}
|
||||
|
||||
if IsSaveRatioTable() {
|
||||
all_ratio := EstimateRatio(samples, MinCountToEvalMutationRate())
|
||||
EmpiricalDistCsv(RatioTableFilename(), all_ratio)
|
||||
}
|
||||
|
||||
if SaveGraphToFiles() {
|
||||
SaveGMLGraphs(GraphFilesDirectory(), samples, MinCountToEvalMutationRate())
|
||||
}
|
||||
|
||||
Mutation(samples)
|
||||
|
||||
@@ -310,6 +343,16 @@ func IOBIClean(itertator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch {
|
||||
bar.Add(1)
|
||||
}
|
||||
|
||||
if SaveGraphToFiles() {
|
||||
SaveGMLGraphs(GraphFilesDirectory(), samples, MinCountToEvalMutationRate())
|
||||
}
|
||||
|
||||
if IsSaveRatioTable() {
|
||||
all_ratio := EstimateRatio(samples, MinCountToEvalMutationRate())
|
||||
EmpiricalDistCsv(RatioTableFilename(), all_ratio)
|
||||
}
|
||||
|
||||
|
||||
iter := annotateOBIClean(db, samples, SampleAttribute(), "NA")
|
||||
|
||||
if OnlyHead() {
|
||||
|
||||
@@ -56,5 +56,5 @@ func IExtractBarcode(iterator obiiter.IBioSequenceBatch) (obiiter.IBioSequenceBa
|
||||
}
|
||||
log.Printf("Sequence demultiplexing using %d workers\n", obioptions.CLIParallelWorkers())
|
||||
|
||||
return newIter, nil
|
||||
return newIter.Speed("Demultiplexing"), nil
|
||||
}
|
||||
|
||||
@@ -66,7 +66,9 @@ func FindClosests(sequence *obiseq.BioSequence,
|
||||
|
||||
// log.Println(sequence.Id(),cw[j], maxe)
|
||||
if runExact || (atMost <= (maxe + 1)) {
|
||||
// if true {
|
||||
lcs, alilength := obialign.FastLCSScore(sequence, ref, maxe+1, &matrix)
|
||||
// fmt.Println(j, cw[j], lcs, alilength, alilength-lcs)
|
||||
// lcs, alilength := obialign.LCSScore(sequence, ref, maxe+1, matrix)
|
||||
n++
|
||||
if lcs == -1 {
|
||||
|
||||
Reference in New Issue
Block a user