From 0067152c2b3aaf864ece06892035a424aa432e67 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Thu, 27 Feb 2025 10:19:39 +0100 Subject: [PATCH] Patch the production of the ratio file --- README.md | 2 +- pkg/obiformats/fastseq_json_header.go | 24 +++++----- pkg/obioptions/version.go | 2 +- pkg/obitools/obiclean/graph.go | 68 +++++++++++++++------------ pkg/obiutils/set.go | 2 +- pkg/obiutils/set_test.go | 24 +++++----- 6 files changed, 64 insertions(+), 58 deletions(-) diff --git a/README.md b/README.md index 3db1744..1bdf3ed 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ curl -L https://raw.githubusercontent.com/metabarcoding/obitools4/master/install bash -s -- --install-dir test_install --obitools-prefix k ``` -In this case, the binaries will be installed in the `test_install` directory and all command names will be prefixed with the letter `k`. Thus `obigrep` will be named `kobigrep`. +In this case, the binaries will be installed in the `test_install` directory and all command names will be prefixed with the letter `k`. Thus, `obigrep` will be named `kobigrep`. ## Continuing the analysis... diff --git a/pkg/obiformats/fastseq_json_header.go b/pkg/obiformats/fastseq_json_header.go index 661092a..ebdcab4 100644 --- a/pkg/obiformats/fastseq_json_header.go +++ b/pkg/obiformats/fastseq_json_header.go @@ -13,7 +13,7 @@ import ( "github.com/buger/jsonparser" ) -func _parse_json_map_string(str []byte, sequence *obiseq.BioSequence) (map[string]string, error) { +func _parse_json_map_string(str []byte) (map[string]string, error) { values := make(map[string]string) jsonparser.ObjectEach(str, func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) { @@ -25,7 +25,7 @@ func _parse_json_map_string(str []byte, sequence *obiseq.BioSequence) (map[strin return values, nil } -func _parse_json_map_int(str []byte, sequence *obiseq.BioSequence) (map[string]int, error) { +func _parse_json_map_int(str []byte) (map[string]int, error) { values := make(map[string]int) jsonparser.ObjectEach(str, func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) { @@ -41,7 +41,7 @@ func _parse_json_map_int(str []byte, sequence *obiseq.BioSequence) (map[string]i return values, nil } -func _parse_json_map_float(str []byte, sequence *obiseq.BioSequence) (map[string]float64, error) { +func _parse_json_map_float(str []byte) (map[string]float64, error) { values := make(map[string]float64) jsonparser.ObjectEach(str, func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) { @@ -57,7 +57,7 @@ func _parse_json_map_float(str []byte, sequence *obiseq.BioSequence) (map[string return values, nil } -func _parse_json_map_bool(str []byte, sequence *obiseq.BioSequence) (map[string]bool, error) { +func _parse_json_map_bool(str []byte) (map[string]bool, error) { values := make(map[string]bool) jsonparser.ObjectEach(str, func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) { @@ -73,7 +73,7 @@ func _parse_json_map_bool(str []byte, sequence *obiseq.BioSequence) (map[string] return values, nil } -func _parse_json_map_interface(str []byte, sequence *obiseq.BioSequence) (map[string]interface{}, error) { +func _parse_json_map_interface(str []byte) (map[string]interface{}, error) { values := make(map[string]interface{}) jsonparser.ObjectEach(str, func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) { @@ -100,7 +100,7 @@ func _parse_json_map_interface(str []byte, sequence *obiseq.BioSequence) (map[st return values, nil } -func _parse_json_array_string(str []byte, sequence *obiseq.BioSequence) ([]string, error) { +func _parse_json_array_string(str []byte) ([]string, error) { values := make([]string, 0) jsonparser.ArrayEach(str, func(value []byte, dataType jsonparser.ValueType, offset int, err error) { @@ -162,7 +162,7 @@ func _parse_json_array_bool(str []byte, sequence *obiseq.BioSequence) ([]bool, e return values, nil } -func _parse_json_array_interface(str []byte, sequence *obiseq.BioSequence) ([]interface{}, error) { +func _parse_json_array_interface(str []byte) ([]interface{}, error) { values := make([]interface{}, 0) jsonparser.ArrayEach(str, func(value []byte, dataType jsonparser.ValueType, offset int, err error) { @@ -261,14 +261,14 @@ func _parse_json_header_(header string, sequence *obiseq.BioSequence) string { sequence.SetCount(int(count)) case skey == "obiclean_weight": - weight, err := _parse_json_map_int(value, sequence) + weight, err := _parse_json_map_int(value) if err != nil { log.Fatalf("%s: Cannot parse obiclean weight %s", sequence.Id(), string(value)) } annotations[skey] = weight case skey == "obiclean_status": - status, err := _parse_json_map_string(value, sequence) + status, err := _parse_json_map_string(value) if err != nil { log.Fatalf("%s: Cannot parse obiclean status %s", sequence.Id(), string(value)) } @@ -276,7 +276,7 @@ func _parse_json_header_(header string, sequence *obiseq.BioSequence) string { case strings.HasPrefix(skey, "merged_"): if dataType == jsonparser.Object { - data, err := _parse_json_map_int(value, sequence) + data, err := _parse_json_map_int(value) if err != nil { log.Fatalf("%s: Cannot parse merged slot %s: %v", sequence.Id(), skey, err) } else { @@ -316,9 +316,9 @@ func _parse_json_header_(header string, sequence *obiseq.BioSequence) string { annotations[skey], err = strconv.ParseFloat(obiutils.UnsafeString(value), 64) } case jsonparser.Array: - annotations[skey], err = _parse_json_array_interface(value, sequence) + annotations[skey], err = _parse_json_array_interface(value) case jsonparser.Object: - annotations[skey], err = _parse_json_map_interface(value, sequence) + annotations[skey], err = _parse_json_map_interface(value) case jsonparser.Boolean: annotations[skey], err = jsonparser.ParseBoolean(value) case jsonparser.Null: diff --git a/pkg/obioptions/version.go b/pkg/obioptions/version.go index c3826e5..8751533 100644 --- a/pkg/obioptions/version.go +++ b/pkg/obioptions/version.go @@ -8,7 +8,7 @@ import ( // corresponds to the last commit, and not the one when the file will be // commited -var _Commit = "6245d7f" +var _Commit = "791d253" var _Version = "Release 4.2.0" // Version returns the version of the obitools package. diff --git a/pkg/obitools/obiclean/graph.go b/pkg/obitools/obiclean/graph.go index 713442f..6f790ce 100644 --- a/pkg/obitools/obiclean/graph.go +++ b/pkg/obitools/obiclean/graph.go @@ -18,19 +18,19 @@ import ( ) type Ratio struct { - Sample string - SeqID string - status string - From int - To int - CFrom int - CTo int - Pos int - Length int - A int - C int - G int - T int + Sample string + SeqID string + OriginalStatus string + WOriginal int + WMutant int + COriginal int + CMutant int + Pos int + Length int + A int + C int + G int + T int } type Edge struct { @@ -59,13 +59,15 @@ func makeEdge(father, dist, pos int, from, to byte) Edge { // ratio func EmpiricalDistCsv(filename string, data [][]Ratio, compressed bool) { file, err := os.Create(filename) - - defer file.Close() + if err != nil { + fmt.Println(err) + } destfile, err := obiutils.CompressStream(file, true, true) if err != nil { fmt.Println(err) } + defer destfile.Close() pbopt := make([]progressbar.Option, 0, 5) pbopt = append(pbopt, @@ -78,19 +80,19 @@ func EmpiricalDistCsv(filename string, data [][]Ratio, compressed bool) { bar := progressbar.NewOptions(len(data), pbopt...) - fmt.Fprintln(destfile, "Sample,Father_id,Father_status,From,To,Weight_from,Weight_to,Count_from,Count_to,Position,length,A,C,G,T") + fmt.Fprintln(destfile, "Sample,Origin_id,Origin_status,Origin,Mutant,Origin_Weight,Mutant_Weight,Origin_Count,Mutant_Count,Position,Origin_length,A,C,G,T") for code, dist := range data { a1, a2 := intToNucPair(code) for _, ratio := range dist { fmt.Fprintf(destfile, "%s,%s,%s,%c,%c,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d\n", ratio.Sample, ratio.SeqID, - ratio.status, + ratio.OriginalStatus, a1, a2, - ratio.From, - ratio.To, - ratio.CFrom, - ratio.CTo, + ratio.WOriginal, + ratio.WMutant, + ratio.COriginal, + ratio.CMutant, ratio.Pos, ratio.Length, ratio.A, @@ -453,16 +455,20 @@ func EstimateRatio(samples map[string]*[]*seqPCR, minStatRatio int) [][]Ratio { if father.Weight >= minStatRatio && edge.Dist == 1 { s := father.Sequence.Sequence() ratio[edge.NucPair] = append(ratio[edge.NucPair], - Ratio{name, - father.Sequence.Id(), Status(father.Sequence)[name], - father.Weight, seq.Weight, - father.Count, seq.Count, - edge.Pos, - father.Sequence.Len(), - bytes.Count(s, []byte("a")), - bytes.Count(s, []byte("c")), - bytes.Count(s, []byte("g")), - bytes.Count(s, []byte("t"))}) + Ratio{ + Sample: name, + SeqID: father.Sequence.Id(), + OriginalStatus: Status(father.Sequence)[name], + WOriginal: father.Weight, + WMutant: seq.Weight, + COriginal: father.Count, + CMutant: seq.Count, + Pos: edge.Pos, + Length: father.Sequence.Len(), + A: bytes.Count(s, []byte("a")), + C: bytes.Count(s, []byte("c")), + G: bytes.Count(s, []byte("g")), + T: bytes.Count(s, []byte("t"))}) } } diff --git a/pkg/obiutils/set.go b/pkg/obiutils/set.go index 4a9ef52..c6c71b3 100644 --- a/pkg/obiutils/set.go +++ b/pkg/obiutils/set.go @@ -23,7 +23,7 @@ func MakeSet[E comparable](vals ...E) Set[E] { // It takes a variadic parameter of type E, where E is a comparable type. // It returns a pointer to a Set of type E. func NewSet[E comparable](vals ...E) *Set[E] { - s := MakeSet[E](vals...) + s := MakeSet(vals...) return &s } diff --git a/pkg/obiutils/set_test.go b/pkg/obiutils/set_test.go index 327ae0b..1754bbb 100644 --- a/pkg/obiutils/set_test.go +++ b/pkg/obiutils/set_test.go @@ -50,7 +50,7 @@ func TestNewSet(t *testing.T) { } // Test Case 2: Creating a set with multiple values - set2 := NewSet[string]("apple", "banana", "cherry") + set2 := NewSet("apple", "banana", "cherry") if len(*set2) != 3 { t.Errorf("Expected size to be 3, but got %d", len(*set2)) } @@ -147,7 +147,7 @@ func TestMembers(t *testing.T) { } // Test case 2: Set with multiple elements - set = MakeSet[int](1, 2, 3) + set = MakeSet(1, 2, 3) expected = []int{1, 2, 3} actual = set.Members() sort.Ints(actual) @@ -172,7 +172,7 @@ func TestSetString(t *testing.T) { } // Test set with single member - singleMemberSet := NewSet[int](42) + singleMemberSet := NewSet(42) singleMemberSetString := singleMemberSet.String() expectedSingleMemberSetString := "[42]" if singleMemberSetString != expectedSingleMemberSetString { @@ -180,7 +180,7 @@ func TestSetString(t *testing.T) { } // Test set with multiple members - multipleMembersSet := NewSet[int](1, 2, 3) + multipleMembersSet := NewSet(1, 2, 3) multipleMembersSetString := multipleMembersSet.String() expectedMultipleMembersSetString := "[1 2 3]" if multipleMembersSetString != expectedMultipleMembersSetString { @@ -213,26 +213,26 @@ func TestUnion(t *testing.T) { // Test case 2: Union of an empty set and a non-empty set should return the non-empty set set1 = MakeSet[int]() - set2 = MakeSet[int](1, 2, 3) - expected = MakeSet[int](1, 2, 3) + set2 = MakeSet(1, 2, 3) + expected = MakeSet(1, 2, 3) result = set1.Union(set2) if !reflect.DeepEqual(result, expected) { t.Errorf("Expected %v, but got %v", expected, result) } // Test case 3: Union of two non-empty sets with common elements should return a set with unique elements - set1 = MakeSet[int](1, 2, 3) - set2 = MakeSet[int](2, 3, 4) - expected = MakeSet[int](1, 2, 3, 4) + set1 = MakeSet(1, 2, 3) + set2 = MakeSet(2, 3, 4) + expected = MakeSet(1, 2, 3, 4) result = set1.Union(set2) if !reflect.DeepEqual(result, expected) { t.Errorf("Expected %v, but got %v", expected, result) } // Test case 4: Union of two non-empty sets with no common elements should return a set with all elements - set1 = MakeSet[int](1, 2, 3) - set2 = MakeSet[int](4, 5, 6) - expected = MakeSet[int](1, 2, 3, 4, 5, 6) + set1 = MakeSet(1, 2, 3) + set2 = MakeSet(4, 5, 6) + expected = MakeSet(1, 2, 3, 4, 5, 6) result = set1.Union(set2) if !reflect.DeepEqual(result, expected) { t.Errorf("Expected %v, but got %v", expected, result)