mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
A first functional version of obiclean
This commit is contained in:
22
cmd/obitools/obiclean/main.go
Normal file
22
cmd/obitools/obiclean/main.go
Normal file
@ -0,0 +1,22 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiclean"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||
)
|
||||
|
||||
func main() {
|
||||
optionParser := obioptions.GenerateOptionParser(obiclean.OptionSet)
|
||||
|
||||
_, args, _ := optionParser(os.Args)
|
||||
|
||||
fs, _ := obiconvert.ReadBioSequencesBatch(args...)
|
||||
|
||||
cleaned := obiclean.IOBIClean(fs)
|
||||
|
||||
obiconvert.WriteBioSequencesBatch(cleaned, true)
|
||||
}
|
@ -12,6 +12,7 @@ import (
|
||||
|
||||
func main() {
|
||||
|
||||
// Creating a file called cpu.trace.
|
||||
ftrace, err := os.Create("cpu.trace")
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
|
7
go.mod
7
go.mod
@ -1,6 +1,6 @@
|
||||
module git.metabarcoding.org/lecasofts/go/obitools
|
||||
|
||||
go 1.17
|
||||
go 1.18
|
||||
|
||||
require (
|
||||
github.com/DavidGamba/go-getoptions v0.25.3
|
||||
@ -12,10 +12,15 @@ require (
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/akualab/gjoa v0.0.0-20150423185904-0953495dbcc7 // indirect
|
||||
github.com/golang/glog v1.0.0 // indirect
|
||||
github.com/mattn/go-runewidth v0.0.13 // indirect
|
||||
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
|
||||
github.com/rivo/uniseg v0.2.0 // indirect
|
||||
golang.org/x/crypto v0.0.0-20220131195533-30dcbda58838 // indirect
|
||||
golang.org/x/exp v0.0.0-20191002040644-a1355ae1e2c3 // indirect
|
||||
golang.org/x/sys v0.0.0-20220204135822-1c1b9b1eba6a // indirect
|
||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 // indirect
|
||||
gonum.org/v1/gonum v0.11.0 // indirect
|
||||
scientificgo.org/special v0.0.0 // indirect
|
||||
)
|
||||
|
30
go.sum
30
go.sum
@ -1,14 +1,21 @@
|
||||
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
|
||||
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
|
||||
github.com/DavidGamba/go-getoptions v0.25.3 h1:lSPcMkwWvVZU05C+Uz4DKnKN5wz4bcD1QvJ/QHCRexo=
|
||||
github.com/DavidGamba/go-getoptions v0.25.3/go.mod h1:qLaLSYeQ8sUVOfKuu5JT5qKKS3OCwyhkYSJnoG+ggmo=
|
||||
github.com/PaesslerAG/gval v1.1.2 h1:EROKxV4/fAKWb0Qoj7NOxmHZA7gcpjOV9XgiRZMRCUU=
|
||||
github.com/PaesslerAG/gval v1.1.2/go.mod h1:Fa8gfkCmUsELXgayr8sfL/sw+VzCVoa03dcOcR/if2w=
|
||||
github.com/PaesslerAG/jsonpath v0.1.0 h1:gADYeifvlqK3R3i2cR5B4DGgxLXIPb3TRTH1mGi0jPI=
|
||||
github.com/PaesslerAG/jsonpath v0.1.0/go.mod h1:4BzmtoM/PI8fPO4aQGIusjGxGir2BzcV0grWtFzq1Y8=
|
||||
github.com/akualab/gjoa v0.0.0-20150423185904-0953495dbcc7 h1:v8r5rCWpphm5f+yaKDQvVKM46jj0SB7iDUTVS1MHSAg=
|
||||
github.com/akualab/gjoa v0.0.0-20150423185904-0953495dbcc7/go.mod h1:zhxCkmvf40lxgYc6aJuF6AOwfzuvFIANF9OQaMz5xNs=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
|
||||
github.com/goccy/go-json v0.9.4 h1:L8MLKG2mvVXiQu07qB6hmfqeSYQdOnqPot2GhsIwIaI=
|
||||
github.com/goccy/go-json v0.9.4/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
|
||||
github.com/golang/glog v1.0.0 h1:nfP3RFugxnNRyKgeWd4oI1nYvXpxrx8ck8ZrcizshdQ=
|
||||
github.com/golang/glog v1.0.0/go.mod h1:EWib/APOK0SL3dFbYqvxE3UYd8E6s1ouQ7iEp/0LWV4=
|
||||
github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw=
|
||||
github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
|
||||
github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU=
|
||||
@ -29,9 +36,24 @@ github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/tevino/abool/v2 v2.0.1 h1:OF7FC5V5z3yAWyixbc32ecEzrgAJCsPkVOsPM2qoZPI=
|
||||
github.com/tevino/abool/v2 v2.0.1/go.mod h1:+Lmlqk6bHDWHqN1cbxqhwEAwMPXgc8I1SDEamtseuXY=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
||||
golang.org/x/crypto v0.0.0-20220131195533-30dcbda58838 h1:71vQrMauZZhcTVK6KdYM+rklehEEwb3E+ZhaE5jrPrE=
|
||||
golang.org/x/crypto v0.0.0-20220131195533-30dcbda58838/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
|
||||
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
|
||||
golang.org/x/exp v0.0.0-20191002040644-a1355ae1e2c3 h1:n9HxLrNxWWtEb1cA950nuEEj3QnKbtsCJ6KjcgisNUs=
|
||||
golang.org/x/exp v0.0.0-20191002040644-a1355ae1e2c3/go.mod h1:NOZ3BPKG0ec/BKJQgnvsSFpcKLM5xXVWnvZS97DWHgE=
|
||||
golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
|
||||
golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
|
||||
golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o=
|
||||
golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
|
||||
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
@ -43,5 +65,13 @@ golang.org/x/sys v0.0.0-20220204135822-1c1b9b1eba6a/go.mod h1:oPkhp1MJrh7nUepCBc
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 h1:JGgROgKl9N8DuW20oFS5gxc+lE67/N3FcwmBPMe7ArY=
|
||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20190927191325-030b2cf1153e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
gonum.org/v1/gonum v0.11.0 h1:f1IJhK4Km5tBJmaiJXtk/PkL4cdVX6J+tGiM187uT5E=
|
||||
gonum.org/v1/gonum v0.11.0/go.mod h1:fSG4YDCxxUZQJ7rKsQrj0gMOg00Il0Z96/qMA4bVQhA=
|
||||
scientificgo.org/special v0.0.0 h1:P6WJkECo6tgtvZAEfNXl+KEB9ReAatjKAeX8U07mjSc=
|
||||
scientificgo.org/special v0.0.0/go.mod h1:LoGVh9tS431RLTJo7gFlYDKFWq44cEb7QqL+M0EKtZU=
|
||||
scientificgo.org/testutil v0.0.0/go.mod h1:Go6R4b+9YkFocMo3H3vNQ7tjbrX9Rc12wal7NZjvPXg=
|
||||
|
@ -57,6 +57,7 @@ func InterfaceToInt(i interface{}) (val int, err error) {
|
||||
return
|
||||
}
|
||||
|
||||
// If the interface{} can be cast to an int, return true.
|
||||
func CastableToInt(i interface{}) bool {
|
||||
|
||||
switch i.(type) {
|
||||
@ -70,7 +71,10 @@ func CastableToInt(i interface{}) bool {
|
||||
}
|
||||
}
|
||||
|
||||
// CopyMap makes a deep copy of a map[string]interface{}.
|
||||
// "CopyMap copies the contents of a map[string]interface{} to another map[string]interface{}."
|
||||
//
|
||||
// The function uses the gob package to encode the source map into a buffer and then decode the buffer
|
||||
// into the destination map
|
||||
func CopyMap(dest, src map[string]interface{}) {
|
||||
buf := new(bytes.Buffer)
|
||||
gob.NewEncoder(buf).Encode(src)
|
||||
@ -78,6 +82,7 @@ func CopyMap(dest, src map[string]interface{}) {
|
||||
}
|
||||
|
||||
// Read a whole file into the memory and store it as array of lines
|
||||
// It reads a file line by line, and returns a slice of strings, one for each line
|
||||
func ReadLines(path string) (lines []string, err error) {
|
||||
var (
|
||||
file *os.File
|
||||
|
78
pkg/obialign/is_d0_or_d1.go
Normal file
78
pkg/obialign/is_d0_or_d1.go
Normal file
@ -0,0 +1,78 @@
|
||||
package obialign
|
||||
|
||||
import "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
|
||||
func abs(x int) int {
|
||||
if x < 0 {
|
||||
return -x
|
||||
}
|
||||
|
||||
return x
|
||||
}
|
||||
|
||||
func D1Or0(seq1, seq2 *obiseq.BioSequence) (int, int, byte, byte) {
|
||||
|
||||
pos := -1
|
||||
|
||||
l1 := seq1.Length()
|
||||
l2 := seq2.Length()
|
||||
|
||||
if abs(l1-l2) > 1 {
|
||||
return -1, pos, 0, 0
|
||||
}
|
||||
|
||||
s1 := seq1.Sequence()
|
||||
s2 := seq2.Sequence()
|
||||
|
||||
b1 := 0
|
||||
b2 := 0
|
||||
|
||||
// Scans the sequences from their beginings as long as they are identical
|
||||
|
||||
for b1 < l1 && b2 < l2 && s1[b1] == s2[b2] {
|
||||
b1++
|
||||
b2++
|
||||
}
|
||||
|
||||
if b1 == l1 && b2 == l2 {
|
||||
return 0, pos, 0, 0
|
||||
}
|
||||
|
||||
// Scans the sequences from their ends as long as they are identical
|
||||
|
||||
e1 := l1 - 1
|
||||
e2 := l2 - 1
|
||||
|
||||
for (e1 > b1 || e2 > b2) && s1[e1] == s2[e2] {
|
||||
e1--
|
||||
e2--
|
||||
}
|
||||
|
||||
if (l1 == l2 && (e1 > b1 || e2 > b2)) ||
|
||||
(l1 > l2 && e1 > b1) ||
|
||||
(l1 < l2 && e2 > b2) {
|
||||
return -1, pos, 0, 0
|
||||
}
|
||||
|
||||
if b1 >= e1 {
|
||||
if e1 > e2 {
|
||||
pos = e1
|
||||
} else {
|
||||
pos = e2
|
||||
}
|
||||
}
|
||||
|
||||
a1 := byte('-')
|
||||
a2 := byte('-')
|
||||
|
||||
if e2 >= e1 {
|
||||
a2 = s2[e2]
|
||||
}
|
||||
|
||||
if e2 <= e1 {
|
||||
a1 = s1[e1]
|
||||
}
|
||||
|
||||
return 1, pos, a1, a2
|
||||
|
||||
}
|
@ -138,6 +138,7 @@ func OptionMaxLength(maxLength int) WithOption {
|
||||
// error allowed when matching the forward
|
||||
// primer.
|
||||
func OptionForwardPrimer(primer string, max int) WithOption {
|
||||
|
||||
f := WithOption(func(opt Options) {
|
||||
var err error
|
||||
|
||||
|
@ -210,17 +210,22 @@ func ParseOBIFeatures(text string, annotations obiseq.Annotation) string {
|
||||
bvalue = bytes.TrimSpace(part[m[0]:(m[1] - 1)])
|
||||
j := bytes.ReplaceAll(bvalue, []byte("'"), []byte(`"`))
|
||||
var err error
|
||||
if strings.HasPrefix(key, "merged_") ||
|
||||
strings.HasSuffix(key, "_count") {
|
||||
switch {
|
||||
case strings.HasPrefix(key, "merged_") ||
|
||||
strings.HasSuffix(key, "_count"):
|
||||
dict := make(map[string]int)
|
||||
err = json.Unmarshal(j, &dict)
|
||||
value = dict
|
||||
} else {
|
||||
case strings.HasSuffix(key, "_status"):
|
||||
dict := make(map[string]string)
|
||||
err = json.Unmarshal(j, &dict)
|
||||
value = dict
|
||||
default:
|
||||
dict := make(map[string]interface{})
|
||||
err = json.Unmarshal(j, &dict)
|
||||
value = dict
|
||||
}
|
||||
|
||||
|
||||
if err != nil {
|
||||
value = string(bvalue)
|
||||
}
|
||||
|
@ -494,6 +494,9 @@ func (iterator IBioSequenceBatch) PairWith(reverse IBioSequenceBatch,
|
||||
return newIter
|
||||
}
|
||||
|
||||
// A function that takes a predicate and returns two IBioSequenceBatch iterators.
|
||||
// Sequences extracted from the input iterator are distributed among both the
|
||||
// iterator following the predicate value.
|
||||
func (iterator IBioSequenceBatch) DivideOn(predicate obiseq.SequencePredicate,
|
||||
size int, sizes ...int) (IBioSequenceBatch, IBioSequenceBatch) {
|
||||
buffsize := iterator.BufferSize()
|
||||
@ -560,6 +563,8 @@ func (iterator IBioSequenceBatch) DivideOn(predicate obiseq.SequencePredicate,
|
||||
return trueIter, falseIter
|
||||
}
|
||||
|
||||
// Filtering a batch of sequences.
|
||||
// A function that takes a predicate and a batch of sequences and returns a filtered batch of sequences.
|
||||
func (iterator IBioSequenceBatch) FilterOn(predicate obiseq.SequencePredicate,
|
||||
size int, sizes ...int) IBioSequenceBatch {
|
||||
buffsize := iterator.BufferSize()
|
||||
@ -615,6 +620,8 @@ func (iterator IBioSequenceBatch) FilterOn(predicate obiseq.SequencePredicate,
|
||||
return trueIter.Rebatch(size)
|
||||
}
|
||||
|
||||
// Load every sequences availables from an IBioSequenceBatch iterator into
|
||||
// a large obiseq.BioSequenceSlice.
|
||||
func (iterator IBioSequenceBatch) Load() obiseq.BioSequenceSlice {
|
||||
|
||||
chunck := obiseq.MakeBioSequenceSlice()
|
||||
@ -625,4 +632,42 @@ func (iterator IBioSequenceBatch) Load() obiseq.BioSequenceSlice {
|
||||
}
|
||||
|
||||
return chunck
|
||||
}
|
||||
}
|
||||
|
||||
// It takes a slice of BioSequence objects, and returns an iterator that will return batches of
|
||||
// BioSequence objects
|
||||
func IBatchOver(data obiseq.BioSequenceSlice,
|
||||
size int, sizes ...int) IBioSequenceBatch {
|
||||
|
||||
buffsize := 0
|
||||
|
||||
if len(sizes) > 0 {
|
||||
buffsize = sizes[1]
|
||||
}
|
||||
|
||||
trueIter := MakeIBioSequenceBatch(buffsize)
|
||||
|
||||
trueIter.Add(1)
|
||||
|
||||
go func() {
|
||||
trueIter.WaitAndClose()
|
||||
}()
|
||||
|
||||
go func() {
|
||||
ldata := len(data)
|
||||
batchid := 0
|
||||
next := 0
|
||||
for i:=0; i < ldata; i=next {
|
||||
next = i + size
|
||||
if next > ldata {
|
||||
next = ldata
|
||||
}
|
||||
trueIter.Push(MakeBioSequenceBatch(batchid,data[i:next]))
|
||||
batchid++
|
||||
}
|
||||
|
||||
trueIter.Done()
|
||||
}()
|
||||
|
||||
return trueIter
|
||||
}
|
||||
|
@ -43,6 +43,9 @@ func (sequence *BioSequence) StatsOn(key string, na string) StatsOnValues {
|
||||
case StatsOnValues:
|
||||
stats = istat
|
||||
newstat = false
|
||||
case map[string]int:
|
||||
stats = istat
|
||||
newstat = false
|
||||
case map[string]interface{}:
|
||||
stats = make(StatsOnValues, len(istat))
|
||||
newstat = false
|
||||
|
27
pkg/obistats/minmax.go
Normal file
27
pkg/obistats/minmax.go
Normal file
@ -0,0 +1,27 @@
|
||||
package obistats
|
||||
|
||||
|
||||
|
||||
func Max[T int64 | int32 | int16 | int8 | int | float32 | float64] (data []T) T {
|
||||
m := data[0]
|
||||
|
||||
for _,v := range data {
|
||||
if v > m {
|
||||
m = v
|
||||
}
|
||||
}
|
||||
|
||||
return m
|
||||
}
|
||||
|
||||
func Min[T int64 | int32 | int16 | int8 | int | float32 | float64] (data []T) T {
|
||||
m := data[0]
|
||||
|
||||
for _,v := range data {
|
||||
if v < m {
|
||||
m = v
|
||||
}
|
||||
}
|
||||
|
||||
return m
|
||||
}
|
49
pkg/obistats/utils.go
Normal file
49
pkg/obistats/utils.go
Normal file
@ -0,0 +1,49 @@
|
||||
package obistats
|
||||
|
||||
import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
"math"
|
||||
)
|
||||
|
||||
// Lchoose returns logarithms of binomial coefficients
|
||||
func Lchoose(n,x int) float64 {
|
||||
fn := float64(n)
|
||||
fx := float64(x)
|
||||
|
||||
ln1, _ := math.Lgamma(fn + 1.0)
|
||||
lx1, _ := math.Lgamma(fx + 1.0)
|
||||
lnx1, _ := math.Lgamma(fn - fx + 1.0)
|
||||
|
||||
return ln1 - lx1 - lnx1
|
||||
}
|
||||
|
||||
func Choose(n,x int) float64 {
|
||||
return math.Exp(Lchoose(x,n))
|
||||
}
|
||||
|
||||
|
||||
func LogAddExp(x, y float64) float64 {
|
||||
|
||||
tmp := x - y
|
||||
|
||||
if tmp > 0 {
|
||||
|
||||
return x + math.Log1p(math.Exp(-tmp))
|
||||
|
||||
} else if tmp <= 0 {
|
||||
|
||||
return y + math.Log1p(math.Exp(tmp))
|
||||
|
||||
} else {
|
||||
|
||||
// Nans, or infinities of the same sign involved
|
||||
|
||||
log.Errorf("logaddexp %f %f", x, y)
|
||||
|
||||
return x + y
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
448
pkg/obitools/obiclean/graph.go
Normal file
448
pkg/obitools/obiclean/graph.go
Normal file
@ -0,0 +1,448 @@
|
||||
package obiclean
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"log"
|
||||
"math"
|
||||
"os"
|
||||
"path"
|
||||
"sort"
|
||||
"sync"
|
||||
"text/template"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obialign"
|
||||
"github.com/schollz/progressbar/v3"
|
||||
)
|
||||
|
||||
type Ratio struct {
|
||||
From int
|
||||
To int
|
||||
Pos int
|
||||
Length int
|
||||
}
|
||||
|
||||
func abs(x int) int {
|
||||
if x < 0 {
|
||||
return -x
|
||||
}
|
||||
return x
|
||||
}
|
||||
|
||||
func max(x, y int) int {
|
||||
if x > y {
|
||||
return x
|
||||
}
|
||||
return y
|
||||
}
|
||||
|
||||
func min(x, y int) int {
|
||||
if x < y {
|
||||
return x
|
||||
}
|
||||
return y
|
||||
}
|
||||
|
||||
func minMax(x, y int) (int, int) {
|
||||
if x < y {
|
||||
return x, y
|
||||
}
|
||||
return y, x
|
||||
|
||||
}
|
||||
|
||||
// It takes a filename and a 2D slice of floats pruduced during graph building,
|
||||
// and writes a CSV file with the first column being the
|
||||
// first nucleotide, the second column being the second nucleotide, and the third column being the
|
||||
// ratio
|
||||
func EmpiricalDistCsv(filename string, data [][]Ratio) {
|
||||
file, err := os.Create(filename)
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
pbopt := make([]progressbar.Option, 0, 5)
|
||||
pbopt = append(pbopt,
|
||||
progressbar.OptionSetWriter(os.Stderr),
|
||||
progressbar.OptionSetWidth(15),
|
||||
progressbar.OptionShowIts(),
|
||||
progressbar.OptionSetPredictTime(true),
|
||||
progressbar.OptionSetDescription("[Save CSV stat ratio file"),
|
||||
)
|
||||
|
||||
bar := progressbar.NewOptions(len(data), pbopt...)
|
||||
|
||||
fmt.Fprintln(file, "From,To,Count_from,Count_to,Position,length")
|
||||
for code, dist := range data {
|
||||
a1, a2 := intToNucPair(code)
|
||||
for _, ratio := range dist {
|
||||
fmt.Fprintf(file, "%c,%c,%d,%d,%d,%d\n", a1, a2, ratio.From, ratio.To, ratio.Pos, ratio.Length)
|
||||
}
|
||||
bar.Add(1)
|
||||
}
|
||||
}
|
||||
|
||||
// It takes a slice of sequences, a sample name and a statistical threshold and returns a string
|
||||
// containing a GML representation of the graph
|
||||
func Gml(seqs *[]*seqPCR, sample string, statThreshold int) string {
|
||||
// (*seqs)[1].Count
|
||||
var dot bytes.Buffer
|
||||
digraphTpl := template.New("gml_digraph")
|
||||
digraph := `graph [
|
||||
comment "Obiclean graph for sample {{ Name }}"
|
||||
directed 1
|
||||
{{range $index, $data:= .}}
|
||||
{{ if or $data.Fathers $data.HasSon}}
|
||||
node [ id {{$index}}
|
||||
graphics [
|
||||
type "{{ Shape $data.Count }}"
|
||||
fill "{{ if and $data.HasSon (not $data.Fathers)}}#0000FF{{ else }}#00FF00{{ end }}"
|
||||
h {{ Sqrt $data.Count }}
|
||||
w {{ Sqrt $data.Count }}
|
||||
]
|
||||
|
||||
]
|
||||
{{ end }}
|
||||
{{ end }}
|
||||
|
||||
{{range $index, $data:= .}}
|
||||
{{range $i, $father:= $data.Fathers}}
|
||||
edge [ source {{$index}}
|
||||
target {{$father}}
|
||||
color "{{ if gt (index $data.Dist $i) 1 }}#FF0000{{ else }}#00FF00{{ end }}"
|
||||
label "{{(index $data.Dist $i)}}"
|
||||
]
|
||||
{{ end }}
|
||||
{{ end }}
|
||||
]
|
||||
|
||||
`
|
||||
|
||||
tmpl, err := digraphTpl.Funcs(template.FuncMap{
|
||||
"Sqrt": func(i int) int { return 3 * int(math.Floor(math.Sqrt(float64(i)))) },
|
||||
"Name": func() string { return sample },
|
||||
"Shape": func(i int) string {
|
||||
if i >= statThreshold {
|
||||
return "circle"
|
||||
} else {
|
||||
return "rectangle"
|
||||
}
|
||||
},
|
||||
}).Parse(digraph)
|
||||
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
err = tmpl.Execute(&dot, *seqs)
|
||||
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return dot.String()
|
||||
}
|
||||
|
||||
func SaveGMLGraphs(dirname string,
|
||||
samples map[string]*[]*seqPCR,
|
||||
statThreshold int,
|
||||
) {
|
||||
|
||||
if stat, err := os.Stat(dirname); err != nil || !stat.IsDir() {
|
||||
// path does not exist or is not directory
|
||||
os.RemoveAll(dirname)
|
||||
err := os.Mkdir(dirname, 0755)
|
||||
|
||||
if err != nil {
|
||||
log.Panicf("Cannot create directory %s for saving graphs", dirname)
|
||||
}
|
||||
}
|
||||
|
||||
pbopt := make([]progressbar.Option, 0, 5)
|
||||
pbopt = append(pbopt,
|
||||
progressbar.OptionSetWriter(os.Stderr),
|
||||
progressbar.OptionSetWidth(15),
|
||||
progressbar.OptionShowIts(),
|
||||
progressbar.OptionSetPredictTime(true),
|
||||
progressbar.OptionSetDescription("[Save GML Graph files]"),
|
||||
)
|
||||
|
||||
bar := progressbar.NewOptions(len(samples), pbopt...)
|
||||
|
||||
for name, seqs := range samples {
|
||||
|
||||
file, err := os.Create(path.Join(dirname,
|
||||
fmt.Sprintf("%s.gml", name)))
|
||||
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
}
|
||||
|
||||
file.WriteString(Gml(seqs, name, statThreshold))
|
||||
file.Close()
|
||||
|
||||
bar.Add(1)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func nucPair(a, b byte) int {
|
||||
|
||||
n1 := 0
|
||||
switch a {
|
||||
case 'a':
|
||||
n1 = 1
|
||||
case 'c':
|
||||
n1 = 2
|
||||
case 'g':
|
||||
n1 = 3
|
||||
case 't':
|
||||
n1 = 4
|
||||
}
|
||||
|
||||
n2 := 0
|
||||
switch b {
|
||||
case 'a':
|
||||
n2 = 1
|
||||
case 'c':
|
||||
n2 = 2
|
||||
case 'g':
|
||||
n2 = 3
|
||||
case 't':
|
||||
n2 = 4
|
||||
}
|
||||
|
||||
return n1*5 + n2
|
||||
|
||||
}
|
||||
|
||||
func intToNucPair(code int) (a, b byte) {
|
||||
var decode = []byte{'-', 'a', 'c', 'g', 't'}
|
||||
c1 := code / 5
|
||||
c2 := code - c1*5
|
||||
|
||||
return decode[c1], decode[c2]
|
||||
}
|
||||
|
||||
func buildSamplePairs(seqs *[]*seqPCR, minStatRatio int, workers int) ([][]Ratio, int) {
|
||||
nseq := len(*seqs)
|
||||
running := sync.WaitGroup{}
|
||||
|
||||
linePairs := func(i int) [][]Ratio {
|
||||
|
||||
ratio := make([][]Ratio, 25)
|
||||
son := (*seqs)[i]
|
||||
|
||||
for j := i + 1; j < nseq; j++ {
|
||||
father := (*seqs)[j]
|
||||
d, pos, a1, a2 := obialign.D1Or0(son.Sequence, father.Sequence)
|
||||
if d > 0 {
|
||||
son.Fathers = append(son.Fathers, j)
|
||||
son.Dist = append(son.Dist, d)
|
||||
father.SonCount++
|
||||
|
||||
if father.Count > minStatRatio {
|
||||
n := nucPair(a1, a2)
|
||||
ratio[n] = append(ratio[n], Ratio{father.Count, son.Count, pos, father.Sequence.Length()})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ratio
|
||||
}
|
||||
|
||||
lineChan := make(chan int)
|
||||
idxChan := make(chan [][]Ratio)
|
||||
|
||||
ff := func() {
|
||||
for i := range lineChan {
|
||||
idxChan <- linePairs(i)
|
||||
}
|
||||
|
||||
running.Done()
|
||||
}
|
||||
|
||||
running.Add(workers)
|
||||
|
||||
go func() {
|
||||
running.Wait()
|
||||
close(idxChan)
|
||||
}()
|
||||
|
||||
for i := 0; i < workers; i++ {
|
||||
go ff()
|
||||
}
|
||||
|
||||
go func() {
|
||||
for i := 0; i < nseq; i++ {
|
||||
lineChan <- i
|
||||
}
|
||||
close(lineChan)
|
||||
}()
|
||||
|
||||
np := nseq * (nseq - 1) / 2
|
||||
ratio := make([][]Ratio, 25)
|
||||
for data := range idxChan {
|
||||
for i, r := range data {
|
||||
ratio[i] = append(ratio[i], r...)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return ratio, np
|
||||
}
|
||||
|
||||
func extendSimilarityGraph(seqs *[]*seqPCR, step int, workers int) int {
|
||||
nseq := len(*seqs)
|
||||
running := sync.WaitGroup{}
|
||||
|
||||
linePairs := func(matrix *obialign.LCSMatrix, i int) {
|
||||
son := (*seqs)[i]
|
||||
for j := i + 1; j < nseq; j++ {
|
||||
father := (*seqs)[j]
|
||||
d, _, _, _ := obialign.D1Or0(son.Sequence, father.Sequence)
|
||||
|
||||
if d < 0 {
|
||||
lcs, lali := obialign.LCSScore(son.Sequence, father.Sequence,
|
||||
step,
|
||||
matrix)
|
||||
d := (lali - lcs)
|
||||
if lcs >= 0 && d <= step && step > 0 {
|
||||
son.Fathers = append(son.Fathers, j)
|
||||
son.Dist = append(son.Dist, d)
|
||||
father.SonCount++
|
||||
//a, b := minMax((*seqs)[i].Count, (*seqs)[j].Count)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
lineChan := make(chan int)
|
||||
// idxChan := make(chan [][]Ratio)
|
||||
|
||||
ff := func() {
|
||||
matrix := obialign.NewLCSMatrix(nil, 150, 150, step)
|
||||
for i := range lineChan {
|
||||
linePairs(matrix, i)
|
||||
}
|
||||
|
||||
running.Done()
|
||||
}
|
||||
|
||||
running.Add(workers)
|
||||
|
||||
for i := 0; i < workers; i++ {
|
||||
go ff()
|
||||
}
|
||||
|
||||
go func() {
|
||||
for i := 0; i < nseq; i++ {
|
||||
if len((*seqs)[i].Fathers) == 0 {
|
||||
lineChan <- i
|
||||
}
|
||||
}
|
||||
close(lineChan)
|
||||
}()
|
||||
|
||||
running.Wait()
|
||||
np := nseq * (nseq - 1) / 2
|
||||
return np
|
||||
}
|
||||
|
||||
func FilterGraphOnRatio(seqs *[]*seqPCR, ratio float64) {
|
||||
for _, s1 := range *seqs {
|
||||
c1 := float64(s1.Count)
|
||||
f := s1.Fathers
|
||||
d := s1.Dist
|
||||
j := 0
|
||||
for i, s2 := range f {
|
||||
f[j] = f[i]
|
||||
d[j] = d[i]
|
||||
if (c1 / float64((*seqs)[s2].Count)) <= math.Pow(ratio, float64(d[i])) {
|
||||
j++
|
||||
} else {
|
||||
(*seqs)[s2].SonCount--
|
||||
}
|
||||
}
|
||||
s1.Fathers = f[0:j]
|
||||
s1.Dist = d[0:j]
|
||||
}
|
||||
}
|
||||
|
||||
// sortSamples sorts the sequences in each sample by their increasing count
|
||||
func sortSamples(samples map[string]*([]*seqPCR)) {
|
||||
|
||||
for _, s := range samples {
|
||||
sort.SliceStable(*s, func(i, j int) bool {
|
||||
return (*s)[i].Count < (*s)[j].Count
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func ObicleanStatus(seq *seqPCR) string {
|
||||
if len(seq.Fathers) == 0 {
|
||||
if seq.SonCount > 0 {
|
||||
return "h"
|
||||
} else {
|
||||
return "s"
|
||||
}
|
||||
} else {
|
||||
return "i"
|
||||
}
|
||||
}
|
||||
|
||||
func BuildSeqGraph(samples map[string]*[]*seqPCR,
|
||||
maxError, minStatRatio, workers int) [][]Ratio {
|
||||
|
||||
sortSamples(samples)
|
||||
|
||||
npairs := 0
|
||||
for _, seqs := range samples {
|
||||
nseq := len(*seqs)
|
||||
npairs += nseq * (nseq - 1) / 2
|
||||
}
|
||||
|
||||
pbopt := make([]progressbar.Option, 0, 5)
|
||||
pbopt = append(pbopt,
|
||||
progressbar.OptionSetWriter(os.Stderr),
|
||||
progressbar.OptionSetWidth(15),
|
||||
progressbar.OptionShowIts(),
|
||||
progressbar.OptionSetPredictTime(true),
|
||||
progressbar.OptionSetDescription("[One error graph]"),
|
||||
)
|
||||
|
||||
bar := progressbar.NewOptions(npairs, pbopt...)
|
||||
all_ratio := make([][]Ratio, 25)
|
||||
for _, seqs := range samples {
|
||||
ratio, np := buildSamplePairs(seqs, minStatRatio, workers)
|
||||
|
||||
for i, r := range ratio {
|
||||
all_ratio[i] = append(all_ratio[i], r...)
|
||||
}
|
||||
|
||||
bar.Add(np)
|
||||
}
|
||||
|
||||
if maxError > 1 {
|
||||
pbopt = make([]progressbar.Option, 0, 5)
|
||||
pbopt = append(pbopt,
|
||||
progressbar.OptionSetWriter(os.Stderr),
|
||||
progressbar.OptionSetWidth(15),
|
||||
progressbar.OptionShowIts(),
|
||||
progressbar.OptionSetPredictTime(true),
|
||||
progressbar.OptionSetDescription("[Adds multiple errors]"),
|
||||
)
|
||||
|
||||
bar = progressbar.NewOptions(npairs, pbopt...)
|
||||
|
||||
for _, seqs := range samples {
|
||||
np := extendSimilarityGraph(seqs, maxError, workers)
|
||||
bar.Add(np)
|
||||
}
|
||||
}
|
||||
|
||||
return all_ratio
|
||||
}
|
251
pkg/obitools/obiclean/obiclean.go
Normal file
251
pkg/obitools/obiclean/obiclean.go
Normal file
@ -0,0 +1,251 @@
|
||||
package obiclean
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
"github.com/schollz/progressbar/v3"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type seqPCR struct {
|
||||
Count int // number of reads associated to a sequence in a PCR
|
||||
Sequence *obiseq.BioSequence // pointer to the corresponding sequence
|
||||
SonCount int
|
||||
Fathers []int
|
||||
Dist []int
|
||||
}
|
||||
|
||||
// buildSamples sorts the sequences by samples
|
||||
//
|
||||
// The sequences are distributed according to their sample association.
|
||||
// The function returns a map indexed by sample names.
|
||||
// Each sample is represented by a vector where each element associates
|
||||
// a sequence to its count of occurrences in the considered sample.
|
||||
func buildSamples(dataset obiseq.BioSequenceSlice,
|
||||
tag, NAValue string) map[string]*([]*seqPCR) {
|
||||
samples := make(map[string]*([]*seqPCR))
|
||||
|
||||
for _, s := range dataset {
|
||||
stats := s.StatsOn(tag, NAValue)
|
||||
|
||||
for k, v := range stats {
|
||||
pcr, ok := samples[k]
|
||||
|
||||
if !ok {
|
||||
p := make([]*seqPCR, 0, 10)
|
||||
pcr = &p
|
||||
samples[k] = pcr
|
||||
}
|
||||
|
||||
*pcr = append(*pcr, &seqPCR{
|
||||
Count: v,
|
||||
Sequence: s,
|
||||
SonCount: 0,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return samples
|
||||
}
|
||||
|
||||
func annotateOBIClean(dataset obiseq.BioSequenceSlice,
|
||||
sample map[string]*([]*seqPCR),
|
||||
tag, NAValue string) obiiter.IBioSequenceBatch {
|
||||
batchsize := 1000
|
||||
var annot = func(data obiseq.BioSequenceSlice) obiseq.BioSequenceSlice {
|
||||
|
||||
for _, s := range data {
|
||||
status := Status(s)
|
||||
head := 0
|
||||
internal := 0
|
||||
singleton := 0
|
||||
for _, v := range status {
|
||||
switch v {
|
||||
case "i":
|
||||
internal++
|
||||
case "h":
|
||||
head++
|
||||
case "s":
|
||||
singleton++
|
||||
}
|
||||
}
|
||||
is_head := (head + singleton) > 0
|
||||
|
||||
annotation := s.Annotations()
|
||||
annotation["obiclean_head"] = is_head
|
||||
annotation["obiclean_singletoncount"] = singleton
|
||||
annotation["obiclean_internalcount"] = internal
|
||||
annotation["obiclean_headcount"] = head
|
||||
annotation["obiclean_samplecount"] = head + internal + singleton
|
||||
|
||||
}
|
||||
return data
|
||||
}
|
||||
|
||||
iter := obiiter.IBatchOver(dataset, batchsize)
|
||||
riter := iter.MakeISliceWorker(annot)
|
||||
|
||||
return riter
|
||||
}
|
||||
|
||||
func IsHead(sequence *obiseq.BioSequence) bool {
|
||||
annotation := sequence.Annotations()
|
||||
iishead, ok := annotation["obiclean_head"]
|
||||
ishead := true
|
||||
|
||||
if ok {
|
||||
switch iishead := iishead.(type) {
|
||||
case bool:
|
||||
ishead = iishead
|
||||
default:
|
||||
log.Panic("obiclean_head attribute of sequence %s must be a boolean not : %v", sequence.Id(), iishead)
|
||||
}
|
||||
}
|
||||
|
||||
return ishead
|
||||
}
|
||||
|
||||
func HeadCount(sequence *obiseq.BioSequence) int {
|
||||
var err error
|
||||
annotation := sequence.Annotations()
|
||||
ivalue, ok := annotation["obiclean_headcount"]
|
||||
value := 0
|
||||
|
||||
if ok {
|
||||
value, err = goutils.InterfaceToInt(value)
|
||||
if err != nil {
|
||||
log.Panic("obiclean_headcount attribute of sequence %s must be an integer value not : %v", sequence.Id(), ivalue)
|
||||
}
|
||||
}
|
||||
|
||||
return value
|
||||
}
|
||||
|
||||
func InternalCount(sequence *obiseq.BioSequence) int {
|
||||
var err error
|
||||
annotation := sequence.Annotations()
|
||||
ivalue, ok := annotation["obiclean_internalcount"]
|
||||
value := 0
|
||||
|
||||
if ok {
|
||||
value, err = goutils.InterfaceToInt(value)
|
||||
if err != nil {
|
||||
log.Panic("obiclean_internalcount attribute of sequence %s must be an integer value not : %v", sequence.Id(), ivalue)
|
||||
}
|
||||
}
|
||||
|
||||
return value
|
||||
}
|
||||
|
||||
func SingletonCount(sequence *obiseq.BioSequence) int {
|
||||
var err error
|
||||
annotation := sequence.Annotations()
|
||||
ivalue, ok := annotation["obiclean_samplecount"]
|
||||
value := 0
|
||||
|
||||
if ok {
|
||||
value, err = goutils.InterfaceToInt(value)
|
||||
if err != nil {
|
||||
log.Panic("obiclean_samplecount attribute of sequence %s must be an integer value not : %v", sequence.Id(), ivalue)
|
||||
}
|
||||
}
|
||||
|
||||
return value
|
||||
}
|
||||
|
||||
func Status(sequence *obiseq.BioSequence) map[string]string {
|
||||
annotation := sequence.Annotations()
|
||||
iobistatus, ok := annotation["obiclean_status"]
|
||||
var obistatus map[string]string
|
||||
|
||||
if ok {
|
||||
switch iobistatus := iobistatus.(type) {
|
||||
case map[string]string:
|
||||
obistatus = iobistatus
|
||||
case map[string]interface{}:
|
||||
obistatus = make(map[string]string)
|
||||
for k, v := range iobistatus {
|
||||
obistatus[k] = fmt.Sprint(v)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
obistatus = make(map[string]string)
|
||||
annotation["obiclean_status"] = obistatus
|
||||
}
|
||||
|
||||
return obistatus
|
||||
}
|
||||
|
||||
func IOBIClean(itertator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch {
|
||||
|
||||
db := itertator.Load()
|
||||
|
||||
log.Infof("Sequence dataset of %d sequeences loaded\n", len(db))
|
||||
|
||||
samples := buildSamples(db, SampleAttribute(), "NA")
|
||||
|
||||
log.Infof("Dataset composed of %d samples\n", len(samples))
|
||||
|
||||
all_ratio := BuildSeqGraph(samples,
|
||||
DistStepMax(),
|
||||
MinCountToEvalMutationRate(),
|
||||
obioptions.CLIParallelWorkers())
|
||||
|
||||
if RatioMax() < 1.0 {
|
||||
pbopt := make([]progressbar.Option, 0, 5)
|
||||
pbopt = append(pbopt,
|
||||
progressbar.OptionSetWriter(os.Stderr),
|
||||
progressbar.OptionSetWidth(15),
|
||||
progressbar.OptionShowIts(),
|
||||
progressbar.OptionSetPredictTime(true),
|
||||
progressbar.OptionSetDescription("[Filter graph on abundance ratio]"),
|
||||
)
|
||||
|
||||
bar := progressbar.NewOptions(len(samples), pbopt...)
|
||||
|
||||
for _, seqs := range samples {
|
||||
FilterGraphOnRatio(seqs, RatioMax())
|
||||
bar.Add(1)
|
||||
}
|
||||
}
|
||||
|
||||
if IsSaveRatioTable() {
|
||||
EmpiricalDistCsv(RatioTableFilename(), all_ratio)
|
||||
}
|
||||
|
||||
if SaveGraphToFiles() {
|
||||
SaveGMLGraphs(GraphFilesDirectory(), samples, MinCountToEvalMutationRate())
|
||||
}
|
||||
|
||||
pbopt := make([]progressbar.Option, 0, 5)
|
||||
pbopt = append(pbopt,
|
||||
progressbar.OptionSetWriter(os.Stderr),
|
||||
progressbar.OptionSetWidth(15),
|
||||
progressbar.OptionShowIts(),
|
||||
progressbar.OptionSetPredictTime(true),
|
||||
progressbar.OptionSetDescription("[Annotate sequence status]"),
|
||||
)
|
||||
|
||||
bar := progressbar.NewOptions(len(samples), pbopt...)
|
||||
|
||||
for name, seqs := range samples {
|
||||
for _, pcr := range *seqs {
|
||||
obistatus := Status(pcr.Sequence)
|
||||
obistatus[name] = ObicleanStatus(pcr)
|
||||
}
|
||||
bar.Add(1)
|
||||
}
|
||||
|
||||
iter := annotateOBIClean(db, samples, SampleAttribute(), "NA")
|
||||
|
||||
if OnlyHead() {
|
||||
iter = iter.FilterOn(IsHead, 1000)
|
||||
}
|
||||
|
||||
return iter
|
||||
}
|
113
pkg/obitools/obiclean/options.go
Normal file
113
pkg/obitools/obiclean/options.go
Normal file
@ -0,0 +1,113 @@
|
||||
package obiclean
|
||||
|
||||
import (
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
||||
"github.com/DavidGamba/go-getoptions"
|
||||
)
|
||||
|
||||
var _distStepMax = 1
|
||||
var _sampleAttribute = "sample"
|
||||
|
||||
var _ratioMax = 1.0
|
||||
var _minEvalRate = 1000
|
||||
|
||||
var _clusterMode = false
|
||||
var _onlyHead = false
|
||||
|
||||
var _saveGraph = "__@@NOSAVE@@__"
|
||||
var _saveRatio = "__@@NOSAVE@@__"
|
||||
|
||||
func ObicleanOptionSet(options *getoptions.GetOpt) {
|
||||
options.StringVar(&_sampleAttribute, "sample", _sampleAttribute,
|
||||
options.Alias("s"),
|
||||
options.Description("Attribute containing sample descriptions (default %s)."))
|
||||
|
||||
options.IntVar(&_distStepMax, "distance", _distStepMax,
|
||||
options.Alias("d"),
|
||||
options.Description("Maximum numbers of differences between two variant sequences (default: %d)."))
|
||||
|
||||
options.Float64Var(&_ratioMax, "ratio", _ratioMax,
|
||||
options.Alias("r"),
|
||||
options.Description("Threshold ratio between counts (rare/abundant counts)"+
|
||||
" of two sequence records so that the less abundant one is a variant of "+
|
||||
"the more abundant (default: %3.2f)."))
|
||||
|
||||
options.IntVar(&_minEvalRate, "min-eval-rate", _minEvalRate,
|
||||
options.Description("Minimum abundance of a sequence to be used to evaluate mutation rate."))
|
||||
|
||||
// options.BoolVar(&_clusterMode, "cluster", _clusterMode,
|
||||
// options.Alias("C"),
|
||||
// options.Description("Switch obiclean into its clustering mode. This adds information to each sequence about the true."),
|
||||
// )
|
||||
|
||||
options.BoolVar(&_onlyHead, "head", _onlyHead,
|
||||
options.Alias("H"),
|
||||
options.Description("Select only sequences with the head status in a least one sample."),
|
||||
)
|
||||
|
||||
options.StringVar(&_saveGraph, "save-graph", _saveGraph,
|
||||
options.Description("Creates a directory containing the set of DAG used by the obiclean clustering algorithm. "+
|
||||
"The graph files follow the graphml format."),
|
||||
)
|
||||
|
||||
options.StringVar(&_saveRatio, "save-ratio", _saveRatio,
|
||||
options.Description("Creates a file containing the set of abundance ratio on the graph edge. "+
|
||||
"The ratio file follows the csv format."),
|
||||
)
|
||||
|
||||
}
|
||||
|
||||
func OptionSet(options *getoptions.GetOpt) {
|
||||
obiconvert.InputOptionSet(options)
|
||||
obiconvert.OutputOptionSet(options)
|
||||
ObicleanOptionSet(options)
|
||||
}
|
||||
|
||||
func DistStepMax() int {
|
||||
return _distStepMax
|
||||
}
|
||||
|
||||
// It returns the name of the attibute used to store sample name
|
||||
func SampleAttribute() string {
|
||||
return _sampleAttribute
|
||||
}
|
||||
|
||||
// Return the maximum abundance ratio between read counts to consider sequences as error.
|
||||
func RatioMax() float64 {
|
||||
return _ratioMax
|
||||
}
|
||||
|
||||
// > The function `MinCountToEvalMutationRate()` returns the minimum number of reads that must be
|
||||
// observed before the mutation rate can be evaluated
|
||||
func MinCountToEvalMutationRate() int {
|
||||
return _minEvalRate
|
||||
}
|
||||
|
||||
func ClusterMode() bool {
|
||||
return _clusterMode
|
||||
}
|
||||
|
||||
// `OnlyHead()` returns a boolean value that indicates whether the `-h` flag was passed to the program
|
||||
func OnlyHead() bool {
|
||||
return _onlyHead
|
||||
}
|
||||
|
||||
// Returns true it the obliclean graphs must be saved
|
||||
func SaveGraphToFiles() bool {
|
||||
return _saveGraph != "__@@NOSAVE@@__"
|
||||
}
|
||||
|
||||
// It returns the directory where the graph files are saved
|
||||
func GraphFilesDirectory() string {
|
||||
return _saveGraph
|
||||
}
|
||||
|
||||
// Returns true it the table of ratio must be saved
|
||||
func IsSaveRatioTable() bool {
|
||||
return _saveRatio != "__@@NOSAVE@@__"
|
||||
}
|
||||
|
||||
// It returns the filename of the file that stores the ratio table
|
||||
func RatioTableFilename() string {
|
||||
return _saveRatio
|
||||
}
|
@ -60,7 +60,7 @@ func OptionSet(options *getoptions.GetOpt) {
|
||||
// --forward command line option
|
||||
func ForwardPrimer() string {
|
||||
pattern, err := obiapat.MakeApatPattern(_ForwardPrimer, _AllowedMismatch)
|
||||
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("%+v", err)
|
||||
}
|
||||
|
Reference in New Issue
Block a user