some cleanning on the new tools

Former-commit-id: f265a3910d2872c56af81c9b873669f2cb627781
This commit is contained in:
Eric Coissac
2024-07-11 10:46:11 +02:00
parent c7ed47e110
commit 42c5881ddc
5 changed files with 99 additions and 17 deletions

View File

@ -5,6 +5,7 @@ import (
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obicleandb"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
@ -12,6 +13,8 @@ import (
)
func main() {
obioptions.SetBatchSize(10)
optionParser := obioptions.GenerateOptionParser(obicleandb.OptionSet)
_, args := optionParser(os.Args)
@ -25,5 +28,8 @@ func main() {
cleaned := obicleandb.ICleanDB(fs)
obiconvert.CLIWriteBioSequences(cleaned, true)
toconsume, _ := obiconvert.CLIWriteBioSequences(cleaned, false)
toconsume.Consume()
obiiter.WaitForLastPipe()
}

View File

@ -0,0 +1,36 @@
package main
import (
"os"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obidemerge"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
)
func main() {
obioptions.SetStrictReadWorker(2)
obioptions.SetStrictWriteWorker(2)
optionParser := obioptions.GenerateOptionParser(obidemerge.OptionSet)
_, args := optionParser(os.Args)
fs, err := obiconvert.CLIReadBioSequences(args...)
if err != nil {
log.Errorf("Cannot open file (%v)", err)
os.Exit(1)
}
demerged := obidemerge.CLIDemergeSequences(fs)
obiconvert.CLIWriteBioSequences(demerged, true)
obiiter.WaitForLastPipe()
}

View File

@ -0,0 +1,36 @@
package main
import (
"os"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obijoin"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
)
func main() {
obioptions.SetStrictReadWorker(2)
obioptions.SetStrictWriteWorker(2)
optionParser := obioptions.GenerateOptionParser(obijoin.OptionSet)
_, args := optionParser(os.Args)
fs, err := obiconvert.CLIReadBioSequences(args...)
if err != nil {
log.Errorf("Cannot open file (%v)", err)
os.Exit(1)
}
joined := obijoin.CLIJoinSequences(fs)
obiconvert.CLIWriteBioSequences(joined, true)
obiiter.WaitForLastPipe()
}

View File

@ -7,7 +7,7 @@ import (
// TODO: The version number is extracted from git. This induces that the version
// corresponds to the last commit, and not the one when the file will be
// commited
var _Commit = "a365bb6"
var _Commit = "8a1ed26"
var _Version = "Release 4.2.0"
// Version returns the version of the obitools package.

View File

@ -26,8 +26,8 @@ func MakeSequenceFamilyGenusWorker(references obiseq.BioSequenceSlice) obiseq.Se
family := make(map[int]*obiseq.BioSequenceSlice)
for _, ref := range references {
g, ok := ref.GetIntAttribute("genus_taxid")
f, ok := ref.GetIntAttribute("family_taxid")
g, _ := ref.GetIntAttribute("genus_taxid")
f, _ := ref.GetIntAttribute("family_taxid")
gs, ok := genus[g]
if !ok {
@ -47,6 +47,9 @@ func MakeSequenceFamilyGenusWorker(references obiseq.BioSequenceSlice) obiseq.Se
}
f := func(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
sequence.SetAttribute("obicleandb_level", "none")
pval := 0.0
g, _ := sequence.GetIntAttribute("genus_taxid")
sequence.SetAttribute("obicleandb_level", "genus")
@ -61,8 +64,6 @@ func MakeSequenceFamilyGenusWorker(references obiseq.BioSequenceSlice) obiseq.Se
}
nindist := len(indist)
pval := 0.0
f, _ := sequence.GetIntAttribute("family_taxid")
fs := family[f]
@ -87,7 +88,7 @@ func MakeSequenceFamilyGenusWorker(references obiseq.BioSequenceSlice) obiseq.Se
next = 20
}
outdist := make([]float64, 0, nindist)
outdist := make([]float64, 0, next)
p := rand.Perm(references.Len())
i := 0
for _, ir := range p {
@ -110,13 +111,15 @@ func MakeSequenceFamilyGenusWorker(references obiseq.BioSequenceSlice) obiseq.Se
pval = res.P
}
level, _ := sequence.GetAttribute("obicleandb_level")
log.Warnf("%s - level: %v", sequence.Id(), level)
log.Warnf("%s - gdist: %v", sequence.Id(), indist)
log.Warnf("%s - fdist: %v", sequence.Id(), outdist)
log.Warnf("%s - pval: %f", sequence.Id(), pval)
} else {
sequence.SetAttribute("obicleandb_level", "none")
// level, _ := sequence.GetAttribute("obicleandb_level")
// log.Warnf("%s - level: %v", sequence.Id(), level)
// log.Warnf("%s - gdist: %v", sequence.Id(), indist)
// log.Warnf("%s - fdist: %v", sequence.Id(), outdist)
// log.Warnf("%s - pval: %f", sequence.Id(), pval)
}
if pval < 0.0 {
pval = 0.0
}
sequence.SetAttribute("obicleandb_trusted", pval)
@ -265,10 +268,11 @@ func ICleanDB(itertator obiiter.IBioSequence) obiiter.IBioSequence {
).MakeIWorker(taxonomy.MakeSetFamilyWorker(),
false,
obioptions.CLIParallelWorkers(),
).MakeIWorker(SequenceTrust,
false,
obioptions.CLIParallelWorkers(),
)
// .MakeIWorker(SequenceTrust,
// false,
// obioptions.CLIParallelWorkers(),
// )
references := annotated.Load()