Force sequence reading to produce lowercase sequences.

Adds two columns to the obiclean ratio csv file
This commit is contained in:
2022-11-22 15:06:09 +01:00
parent f4daa7f97f
commit 20b16c0ba1
14 changed files with 294 additions and 23 deletions

View File

@@ -18,6 +18,8 @@ import (
type Ratio struct {
Sample string
SeqID string
status string
From int
To int
CFrom int
@@ -97,12 +99,14 @@ func EmpiricalDistCsv(filename string, data [][]Ratio) {
bar := progressbar.NewOptions(len(data), pbopt...)
fmt.Fprintln(file, "Sample,From,To,Weight_from,Weight_to,Count_from,Count_to,Position,length")
fmt.Fprintln(file, "Sample,Father_id,Father_status,From,To,Weight_from,Weight_to,Count_from,Count_to,Position,length")
for code, dist := range data {
a1, a2 := intToNucPair(code)
for _, ratio := range dist {
fmt.Fprintf(file, "%s,%c,%c,%d,%d,%d,%d,%d,%d\n",
fmt.Fprintf(file, "%s,%s,%s,%c,%c,%d,%d,%d,%d,%d,%d\n",
ratio.Sample,
ratio.SeqID,
ratio.status,
a1, a2,
ratio.From,
ratio.To,
@@ -463,7 +467,13 @@ func EstimateRatio(samples map[string]*[]*seqPCR, minStatRatio int) [][]Ratio {
for _, edge := range seq.Edges {
father := (*seqs)[edge.Father]
if father.Weight >= minStatRatio && edge.Dist == 1 {
ratio[edge.NucPair] = append(ratio[edge.NucPair], Ratio{name, father.Weight, seq.Weight, father.Count, seq.Count, edge.Pos, father.Sequence.Len()})
ratio[edge.NucPair] = append(ratio[edge.NucPair],
Ratio{name,
father.Sequence.Id(), Status(father.Sequence)[name],
father.Weight, seq.Weight,
father.Count, seq.Count,
edge.Pos,
father.Sequence.Len()})
}
}

View File

@@ -19,6 +19,7 @@ type seqPCR struct {
SonCount int
AddedSons int
Edges []Edge
Cluster map[int]bool // used as the set of head sequences associated to that sequence
}
// buildSamples sorts the sequences by samples
@@ -183,13 +184,53 @@ func GetMutation(sequence *obiseq.BioSequence) map[string]string {
return mutation
}
func GetCluster(sequence *obiseq.BioSequence) map[string]string {
annotation := sequence.Annotations()
icluster, ok := annotation["obiclean_cluster"]
var cluster map[string]string
if ok {
switch icluster := icluster.(type) {
case map[string]string:
cluster = icluster
case map[string]interface{}:
cluster = make(map[string]string)
for k, v := range icluster {
cluster[k] = fmt.Sprint(v)
}
}
} else {
cluster = make(map[string]string)
annotation["obiclean_cluster"] = cluster
}
return cluster
}
// func Cluster(sample map[string]*([]*seqPCR)) {
// for _, graph := range sample {
// for _, s := range *graph {
// cluster := GetCluster(s.Sequence)
// if len(s.Edges) > 0 {
// for _, f := range s.Edges {
// }
// } else {
// cluster
// }
// }
// }
// }
func Mutation(sample map[string]*([]*seqPCR)) {
for _, graph := range sample {
for _, s := range *graph {
for _, f := range s.Edges {
id := (*graph)[f.Father].Sequence.Id()
GetMutation(s.Sequence)[id] = fmt.Sprintf("(%c)->(%c)@%d",
f.From, f.To, f.Pos + 1)
f.From, f.To, f.Pos+1)
}
}
}
@@ -277,14 +318,6 @@ func IOBIClean(itertator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch {
}
}
if IsSaveRatioTable() {
all_ratio := EstimateRatio(samples, MinCountToEvalMutationRate())
EmpiricalDistCsv(RatioTableFilename(), all_ratio)
}
if SaveGraphToFiles() {
SaveGMLGraphs(GraphFilesDirectory(), samples, MinCountToEvalMutationRate())
}
Mutation(samples)
@@ -310,6 +343,16 @@ func IOBIClean(itertator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch {
bar.Add(1)
}
if SaveGraphToFiles() {
SaveGMLGraphs(GraphFilesDirectory(), samples, MinCountToEvalMutationRate())
}
if IsSaveRatioTable() {
all_ratio := EstimateRatio(samples, MinCountToEvalMutationRate())
EmpiricalDistCsv(RatioTableFilename(), all_ratio)
}
iter := annotateOBIClean(db, samples, SampleAttribute(), "NA")
if OnlyHead() {

View File

@@ -56,5 +56,5 @@ func IExtractBarcode(iterator obiiter.IBioSequenceBatch) (obiiter.IBioSequenceBa
}
log.Printf("Sequence demultiplexing using %d workers\n", obioptions.CLIParallelWorkers())
return newIter, nil
return newIter.Speed("Demultiplexing"), nil
}

View File

@@ -66,7 +66,9 @@ func FindClosests(sequence *obiseq.BioSequence,
// log.Println(sequence.Id(),cw[j], maxe)
if runExact || (atMost <= (maxe + 1)) {
// if true {
lcs, alilength := obialign.FastLCSScore(sequence, ref, maxe+1, &matrix)
// fmt.Println(j, cw[j], lcs, alilength, alilength-lcs)
// lcs, alilength := obialign.LCSScore(sequence, ref, maxe+1, matrix)
n++
if lcs == -1 {