mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
a first version of obisummary
Former-commit-id: cca1019d82a14a322f46a20890b996b5c7491d41
This commit is contained in:
61
cmd/obitools/obisummary/main.go
Normal file
61
cmd/obitools/obisummary/main.go
Normal file
@ -0,0 +1,61 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
"gopkg.in/yaml.v3"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obisummary"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
||||
defer obiseq.LogBioSeqStatus()
|
||||
|
||||
// go tool pprof -http=":8000" ./obipairing ./cpu.pprof
|
||||
// f, err := os.Create("cpu.pprof")
|
||||
// if err != nil {
|
||||
// log.Fatal(err)
|
||||
// }
|
||||
// pprof.StartCPUProfile(f)
|
||||
// defer pprof.StopCPUProfile()
|
||||
|
||||
// go tool trace cpu.trace
|
||||
// ftrace, err := os.Create("cpu.trace")
|
||||
// if err != nil {
|
||||
// log.Fatal(err)
|
||||
// }
|
||||
// trace.Start(ftrace)
|
||||
// defer trace.Stop()
|
||||
|
||||
optionParser := obioptions.GenerateOptionParser(
|
||||
obiconvert.InputOptionSet,
|
||||
obisummary.OptionSet,
|
||||
)
|
||||
|
||||
_, args := optionParser(os.Args)
|
||||
|
||||
fs, err := obiconvert.CLIReadBioSequences(args...)
|
||||
|
||||
if err != nil {
|
||||
log.Errorf("Cannot open file (%v)", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
summary := obisummary.ISummary(fs)
|
||||
|
||||
if obisummary.CLIOutFormat() == "json" {
|
||||
output, _ := json.MarshalIndent(summary, "", " ")
|
||||
fmt.Print(string(output))
|
||||
} else {
|
||||
output, _ := yaml.Marshal(summary)
|
||||
fmt.Print(string(output))
|
||||
}
|
||||
fmt.Printf("\n")
|
||||
}
|
@ -287,6 +287,21 @@ func (s *BioSequence) GetIntMap(key string) (map[string]int, bool) {
|
||||
return val, ok
|
||||
}
|
||||
|
||||
func (s *BioSequence) GetStringMap(key string) (map[string]string, bool) {
|
||||
var val map[string]string
|
||||
|
||||
var err error
|
||||
|
||||
v, ok := s.GetAttribute(key)
|
||||
|
||||
if ok {
|
||||
val, err = obiutils.InterfaceToStringMap(v)
|
||||
ok = err == nil
|
||||
}
|
||||
|
||||
return val, ok
|
||||
}
|
||||
|
||||
// GetIntSlice returns the integer slice value associated with the given key in the BioSequence object.
|
||||
//
|
||||
// Parameters:
|
||||
|
@ -6,6 +6,18 @@ import (
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
// TestSubsequence tests the Subsequence function.
|
||||
//
|
||||
// The function tests various cases of the Subsequence method of a BioSequence object.
|
||||
// It checks different scenarios of subsequence slicing, including both valid and invalid parameters.
|
||||
// The function is designed for unit testing purposes and uses the Go testing package.
|
||||
// It asserts that the expected subsequence is returned for each test case and checks for any errors.
|
||||
// The function also verifies the correctness of the subsequence qualities, if applicable.
|
||||
// The test cases cover both non-circular and circular subsequence slicing.
|
||||
// It ensures that the function handles different scenarios such as when `from` is greater than `to`,
|
||||
// `from` or `to` is out of bounds, and normal subsequence slicing cases.
|
||||
//
|
||||
// TestSubsequence does not return any value.
|
||||
func TestSubsequence(t *testing.T) {
|
||||
// Test case 1: Subsequence with valid parameters and non-circular
|
||||
seq := NewBioSequence("ID1", []byte("ATCG"), "")
|
||||
|
239
pkg/obitools/obisummary/obisummary.go
Normal file
239
pkg/obitools/obisummary/obisummary.go
Normal file
@ -0,0 +1,239 @@
|
||||
package obisummary
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
|
||||
)
|
||||
|
||||
type DataSummary struct {
|
||||
read_count int
|
||||
variant_count int
|
||||
symbole_count int
|
||||
has_merged_sample int
|
||||
has_obiclean_status int
|
||||
has_obiclean_weight int
|
||||
tags map[string]int
|
||||
map_tags map[string]int
|
||||
vector_tags map[string]int
|
||||
samples map[string]int
|
||||
sample_variants map[string]int
|
||||
sample_singletons map[string]int
|
||||
sample_obiclean_bad map[string]int
|
||||
}
|
||||
|
||||
func NewDataSummary() *DataSummary {
|
||||
return &DataSummary{
|
||||
read_count: 0,
|
||||
variant_count: 0,
|
||||
symbole_count: 0,
|
||||
has_merged_sample: 0,
|
||||
has_obiclean_status: 0,
|
||||
has_obiclean_weight: 0,
|
||||
tags: make(map[string]int),
|
||||
map_tags: make(map[string]int),
|
||||
vector_tags: make(map[string]int),
|
||||
samples: make(map[string]int),
|
||||
sample_variants: make(map[string]int),
|
||||
sample_singletons: make(map[string]int),
|
||||
sample_obiclean_bad: make(map[string]int),
|
||||
}
|
||||
}
|
||||
|
||||
func sumUpdateIntMap(m1, m2 map[string]int) map[string]int {
|
||||
for k, v2 := range m2 {
|
||||
if v1, ok := m1[k]; ok {
|
||||
m1[k] = v1 + v2
|
||||
} else {
|
||||
m1[k] = v2
|
||||
}
|
||||
}
|
||||
|
||||
return m1
|
||||
}
|
||||
|
||||
func countUpdateIntMap(m1, m2 map[string]int) map[string]int {
|
||||
for k := range m2 {
|
||||
if v, ok := m1[k]; ok {
|
||||
m1[k] = v + 1
|
||||
} else {
|
||||
m1[k] = 1
|
||||
}
|
||||
}
|
||||
|
||||
return m1
|
||||
}
|
||||
|
||||
func plusUpdateIntMap(m1 map[string]int, key string, val int) map[string]int {
|
||||
if v, ok := m1[key]; ok {
|
||||
m1[key] = v + val
|
||||
} else {
|
||||
m1[key] = val
|
||||
}
|
||||
return m1
|
||||
}
|
||||
|
||||
func plusOneUpdateIntMap(m1 map[string]int, key string) map[string]int {
|
||||
return plusUpdateIntMap(m1, key, 1)
|
||||
}
|
||||
|
||||
func (data1 *DataSummary) Add(data2 *DataSummary) *DataSummary {
|
||||
rep := NewDataSummary()
|
||||
rep.read_count = data1.read_count + data2.read_count
|
||||
rep.variant_count = data1.variant_count + data2.variant_count
|
||||
rep.symbole_count = data1.symbole_count + data2.symbole_count
|
||||
rep.has_merged_sample = data1.has_merged_sample + data2.has_merged_sample
|
||||
rep.has_obiclean_status = data1.has_obiclean_status + data2.has_obiclean_status
|
||||
rep.has_obiclean_weight = data1.has_obiclean_weight + data2.has_obiclean_weight
|
||||
|
||||
rep.tags = sumUpdateIntMap(data1.tags, data2.tags)
|
||||
rep.map_tags = sumUpdateIntMap(data1.map_tags, data2.map_tags)
|
||||
rep.vector_tags = sumUpdateIntMap(data1.vector_tags, data2.vector_tags)
|
||||
rep.samples = sumUpdateIntMap(data1.samples, data2.samples)
|
||||
rep.sample_variants = sumUpdateIntMap(data1.sample_variants, data2.sample_variants)
|
||||
rep.sample_singletons = sumUpdateIntMap(data1.sample_singletons, data2.sample_singletons)
|
||||
rep.sample_obiclean_bad = sumUpdateIntMap(data1.sample_obiclean_bad, data2.sample_obiclean_bad)
|
||||
|
||||
return rep
|
||||
}
|
||||
|
||||
func (data *DataSummary) Update(s *obiseq.BioSequence) *DataSummary {
|
||||
data.read_count += s.Count()
|
||||
data.variant_count++
|
||||
data.symbole_count += s.Len()
|
||||
|
||||
if s.HasAttribute("merged_sample") {
|
||||
data.has_merged_sample++
|
||||
samples, _ := s.GetIntMap("merged_sample")
|
||||
obiclean, obc_ok := s.GetStringMap("obiclean_status")
|
||||
data.samples = sumUpdateIntMap(data.samples, samples)
|
||||
data.sample_variants = countUpdateIntMap(data.sample_variants, samples)
|
||||
for k, v := range samples {
|
||||
if v == 1 {
|
||||
data.sample_singletons = plusOneUpdateIntMap(data.sample_singletons, k)
|
||||
}
|
||||
if v > 1 && obc_ok && obiclean[k] == "i" {
|
||||
data.sample_obiclean_bad = plusOneUpdateIntMap(data.sample_obiclean_bad, k)
|
||||
}
|
||||
}
|
||||
} else if s.HasAttribute("sample") {
|
||||
sample, _ := s.GetStringAttribute("sample")
|
||||
data.samples = plusUpdateIntMap(data.samples, sample, s.Count())
|
||||
data.sample_variants = plusOneUpdateIntMap(data.sample_variants, sample)
|
||||
if s.Count() == 1 {
|
||||
data.sample_singletons = plusOneUpdateIntMap(data.sample_singletons, sample)
|
||||
}
|
||||
}
|
||||
|
||||
if s.HasAttribute("obiclean_status") {
|
||||
data.has_obiclean_status++
|
||||
}
|
||||
|
||||
if s.HasAttribute("obiclean_weight") {
|
||||
data.has_obiclean_weight++
|
||||
}
|
||||
|
||||
for k, v := range s.Annotations() {
|
||||
switch {
|
||||
case obiutils.IsAMap(v):
|
||||
plusOneUpdateIntMap(data.map_tags, k)
|
||||
case obiutils.IsASlice(v):
|
||||
plusOneUpdateIntMap(data.vector_tags, k)
|
||||
default:
|
||||
plusOneUpdateIntMap(data.tags, k)
|
||||
}
|
||||
}
|
||||
|
||||
return data
|
||||
}
|
||||
|
||||
func ISummary(iterator obiiter.IBioSequence) map[string]interface{} {
|
||||
|
||||
nproc := obioptions.CLIParallelWorkers()
|
||||
waiter := sync.WaitGroup{}
|
||||
|
||||
summaries := make([]*DataSummary, nproc)
|
||||
|
||||
ff := func(iseq obiiter.IBioSequence, summary *DataSummary) {
|
||||
|
||||
for iseq.Next() {
|
||||
batch := iseq.Get()
|
||||
for _, seq := range batch.Slice() {
|
||||
summary.Update(seq)
|
||||
}
|
||||
batch.Recycle(true)
|
||||
}
|
||||
waiter.Done()
|
||||
}
|
||||
|
||||
waiter.Add(nproc)
|
||||
|
||||
summaries[0] = NewDataSummary()
|
||||
go ff(iterator, summaries[0])
|
||||
|
||||
for i := 1; i < nproc; i++ {
|
||||
summaries[i] = NewDataSummary()
|
||||
go ff(iterator.Split(), summaries[i])
|
||||
}
|
||||
|
||||
waiter.Wait()
|
||||
|
||||
rep := summaries[0]
|
||||
|
||||
for i := 1; i < nproc; i++ {
|
||||
rep = rep.Add(summaries[i])
|
||||
}
|
||||
|
||||
dict := make(map[string]interface{})
|
||||
|
||||
dict["count"] = map[string]interface{}{
|
||||
"variants": rep.variant_count,
|
||||
"reads": rep.read_count,
|
||||
"total_length": rep.symbole_count,
|
||||
}
|
||||
|
||||
if len(rep.tags)+len(rep.map_tags)+len(rep.vector_tags) > 0 {
|
||||
dict["annotations"] = map[string]interface{}{
|
||||
"scalar_attributes": len(rep.tags),
|
||||
"map_attributes": len(rep.map_tags),
|
||||
"vector_attributes": len(rep.vector_tags),
|
||||
"keys": make(map[string]map[string]int, 3),
|
||||
}
|
||||
|
||||
if len(rep.tags) > 0 {
|
||||
((dict["annotations"].(map[string]interface{}))["keys"].(map[string]map[string]int))["scalar"] = rep.tags
|
||||
}
|
||||
|
||||
if len(rep.map_tags) > 0 {
|
||||
((dict["annotations"].(map[string]interface{}))["keys"].(map[string]map[string]int))["map"] = rep.map_tags
|
||||
}
|
||||
|
||||
if len(rep.vector_tags) > 0 {
|
||||
((dict["annotations"].(map[string]interface{}))["keys"].(map[string]map[string]int))["vector"] = rep.vector_tags
|
||||
}
|
||||
|
||||
if len(rep.samples) > 0 {
|
||||
dict["samples"] = map[string]interface{}{
|
||||
"sample_count": len(rep.samples),
|
||||
"sample_stats": make(map[string]map[string]int, 2),
|
||||
}
|
||||
|
||||
stats := ((dict["samples"].(map[string]interface{}))["sample_stats"].(map[string]map[string]int))
|
||||
for k, v := range rep.samples {
|
||||
stats[k] = map[string]int{
|
||||
"reads": v,
|
||||
"variants": rep.sample_variants[k],
|
||||
"singletons": rep.sample_singletons[k],
|
||||
}
|
||||
|
||||
if rep.variant_count == rep.has_obiclean_status {
|
||||
stats[k]["obiclean_bad"] = rep.sample_obiclean_bad[k]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return dict
|
||||
}
|
28
pkg/obitools/obisummary/options.go
Normal file
28
pkg/obitools/obisummary/options.go
Normal file
@ -0,0 +1,28 @@
|
||||
// obicount function utility package.
|
||||
//
|
||||
// The obitols/obicount package contains every
|
||||
// functions specificaly required by the obicount utility.
|
||||
package obisummary
|
||||
|
||||
import (
|
||||
"github.com/DavidGamba/go-getoptions"
|
||||
)
|
||||
|
||||
var __json_output__ = false
|
||||
var __yaml_output__ = false
|
||||
|
||||
func OptionSet(options *getoptions.GetOpt) {
|
||||
options.BoolVar(&__json_output__, "json-output", false,
|
||||
options.Description("Print results as JSON record."))
|
||||
|
||||
options.BoolVar(&__yaml_output__, "yaml-output", false,
|
||||
options.Description("Print results as YAML record."))
|
||||
}
|
||||
|
||||
func CLIOutFormat() string {
|
||||
if __yaml_output__ && !__json_output__ {
|
||||
return "yaml"
|
||||
}
|
||||
|
||||
return "json"
|
||||
}
|
@ -111,11 +111,11 @@ func IPCRTagPESequencesBatch(iterator obiiter.IBioSequence,
|
||||
}
|
||||
} else {
|
||||
demultiplex_error := consensus.Annotations()["demultiplex_error"]
|
||||
if demultiplex_error!=nil {
|
||||
if demultiplex_error != nil {
|
||||
A.Annotations()["demultiplex_error"] = demultiplex_error.(string)
|
||||
B.Annotations()["demultiplex_error"] = demultiplex_error.(string)
|
||||
} else {
|
||||
log.Panicln("@@ ",wid,"Error : ",err,*consensus)
|
||||
log.Panicln("@@ ", wid, "Error : ", err, *consensus)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -160,31 +160,3 @@ func IPCRTagPESequencesBatch(iterator obiiter.IBioSequence,
|
||||
return iout
|
||||
|
||||
}
|
||||
|
||||
// if match.IsDirect {
|
||||
// annot["direction"] = "direct"
|
||||
// } else {
|
||||
// annot["direction"] = "reverse"
|
||||
// }
|
||||
|
||||
// if match.ForwardMatch != "" {
|
||||
// annot["forward_match"] = match.ForwardMatch
|
||||
// annot["forward_mismatches"] = match.ForwardMismatches
|
||||
// annot["forward_tag"] = match.ForwardTag
|
||||
// }
|
||||
|
||||
// if match.ReverseMatch != "" {
|
||||
// annot["reverse_match"] = match.ReverseMatch
|
||||
// annot["reverse_mismatches"] = match.ReverseMismatches
|
||||
// annot["reverse_tag"] = match.ReverseTag
|
||||
// }
|
||||
|
||||
// if match.Error == nil {
|
||||
// if match.Pcr != nil {
|
||||
// annot["sample"] = match.Pcr.Sample
|
||||
// annot["experiment"] = match.Pcr.Experiment
|
||||
// for k, val := range match.Pcr.Annotations {
|
||||
// annot[k] = val
|
||||
// }
|
||||
// } else {
|
||||
// annot["demultiplex_error"]
|
||||
|
@ -148,6 +148,27 @@ func InterfaceToIntMap(i interface{}) (val map[string]int, err error) {
|
||||
return
|
||||
}
|
||||
|
||||
func InterfaceToStringMap(i interface{}) (val map[string]string, err error) {
|
||||
err = nil
|
||||
|
||||
switch i := i.(type) {
|
||||
case map[string]string:
|
||||
val = i
|
||||
case map[string]interface{}:
|
||||
val = make(map[string]string, len(i))
|
||||
for k, v := range i {
|
||||
val[k], err = InterfaceToString(v)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
default:
|
||||
err = &NotAMapInt{"value attribute cannot be casted to a map[string]int"}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// NotABoolean defines a new type of Error : "NotAMapInt"
|
||||
type NotAMapFloat64 struct {
|
||||
message string
|
||||
|
Reference in New Issue
Block a user