From 42c5881ddcef4a0393ba8555468a134b66438d89 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Thu, 11 Jul 2024 10:46:11 +0200 Subject: [PATCH] some cleanning on the new tools Former-commit-id: f265a3910d2872c56af81c9b873669f2cb627781 --- cmd/obitools/obicleandb/main.go | 8 +++++- cmd/obitools/obidemerge/main.go | 36 +++++++++++++++++++++++++++ cmd/obitools/obijoin/main.go | 36 +++++++++++++++++++++++++++ pkg/obioptions/version.go | 2 +- pkg/obitools/obicleandb/obicleandb.go | 34 ++++++++++++++----------- 5 files changed, 99 insertions(+), 17 deletions(-) create mode 100644 cmd/obitools/obidemerge/main.go create mode 100644 cmd/obitools/obijoin/main.go diff --git a/cmd/obitools/obicleandb/main.go b/cmd/obitools/obicleandb/main.go index 53bc3c8..94ba435 100644 --- a/cmd/obitools/obicleandb/main.go +++ b/cmd/obitools/obicleandb/main.go @@ -5,6 +5,7 @@ import ( log "github.com/sirupsen/logrus" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obicleandb" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert" @@ -12,6 +13,8 @@ import ( ) func main() { + obioptions.SetBatchSize(10) + optionParser := obioptions.GenerateOptionParser(obicleandb.OptionSet) _, args := optionParser(os.Args) @@ -25,5 +28,8 @@ func main() { cleaned := obicleandb.ICleanDB(fs) - obiconvert.CLIWriteBioSequences(cleaned, true) + toconsume, _ := obiconvert.CLIWriteBioSequences(cleaned, false) + toconsume.Consume() + + obiiter.WaitForLastPipe() } diff --git a/cmd/obitools/obidemerge/main.go b/cmd/obitools/obidemerge/main.go new file mode 100644 index 0000000..7746be0 --- /dev/null +++ b/cmd/obitools/obidemerge/main.go @@ -0,0 +1,36 @@ +package main + +import ( + "os" + + log "github.com/sirupsen/logrus" + + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obidemerge" + + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" +) + +func main() { + obioptions.SetStrictReadWorker(2) + obioptions.SetStrictWriteWorker(2) + + optionParser := obioptions.GenerateOptionParser(obidemerge.OptionSet) + + _, args := optionParser(os.Args) + + fs, err := obiconvert.CLIReadBioSequences(args...) + + if err != nil { + log.Errorf("Cannot open file (%v)", err) + os.Exit(1) + } + + demerged := obidemerge.CLIDemergeSequences(fs) + + obiconvert.CLIWriteBioSequences(demerged, true) + + obiiter.WaitForLastPipe() + +} diff --git a/cmd/obitools/obijoin/main.go b/cmd/obitools/obijoin/main.go new file mode 100644 index 0000000..c83442c --- /dev/null +++ b/cmd/obitools/obijoin/main.go @@ -0,0 +1,36 @@ +package main + +import ( + "os" + + log "github.com/sirupsen/logrus" + + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obijoin" + + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" +) + +func main() { + obioptions.SetStrictReadWorker(2) + obioptions.SetStrictWriteWorker(2) + + optionParser := obioptions.GenerateOptionParser(obijoin.OptionSet) + + _, args := optionParser(os.Args) + + fs, err := obiconvert.CLIReadBioSequences(args...) + + if err != nil { + log.Errorf("Cannot open file (%v)", err) + os.Exit(1) + } + + joined := obijoin.CLIJoinSequences(fs) + + obiconvert.CLIWriteBioSequences(joined, true) + + obiiter.WaitForLastPipe() + +} diff --git a/pkg/obioptions/version.go b/pkg/obioptions/version.go index fd801d0..c58660b 100644 --- a/pkg/obioptions/version.go +++ b/pkg/obioptions/version.go @@ -7,7 +7,7 @@ import ( // TODO: The version number is extracted from git. This induces that the version // corresponds to the last commit, and not the one when the file will be // commited -var _Commit = "a365bb6" +var _Commit = "8a1ed26" var _Version = "Release 4.2.0" // Version returns the version of the obitools package. diff --git a/pkg/obitools/obicleandb/obicleandb.go b/pkg/obitools/obicleandb/obicleandb.go index 0f752d0..0ded75e 100644 --- a/pkg/obitools/obicleandb/obicleandb.go +++ b/pkg/obitools/obicleandb/obicleandb.go @@ -26,8 +26,8 @@ func MakeSequenceFamilyGenusWorker(references obiseq.BioSequenceSlice) obiseq.Se family := make(map[int]*obiseq.BioSequenceSlice) for _, ref := range references { - g, ok := ref.GetIntAttribute("genus_taxid") - f, ok := ref.GetIntAttribute("family_taxid") + g, _ := ref.GetIntAttribute("genus_taxid") + f, _ := ref.GetIntAttribute("family_taxid") gs, ok := genus[g] if !ok { @@ -47,6 +47,9 @@ func MakeSequenceFamilyGenusWorker(references obiseq.BioSequenceSlice) obiseq.Se } f := func(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) { + sequence.SetAttribute("obicleandb_level", "none") + pval := 0.0 + g, _ := sequence.GetIntAttribute("genus_taxid") sequence.SetAttribute("obicleandb_level", "genus") @@ -61,8 +64,6 @@ func MakeSequenceFamilyGenusWorker(references obiseq.BioSequenceSlice) obiseq.Se } nindist := len(indist) - pval := 0.0 - f, _ := sequence.GetIntAttribute("family_taxid") fs := family[f] @@ -87,7 +88,7 @@ func MakeSequenceFamilyGenusWorker(references obiseq.BioSequenceSlice) obiseq.Se next = 20 } - outdist := make([]float64, 0, nindist) + outdist := make([]float64, 0, next) p := rand.Perm(references.Len()) i := 0 for _, ir := range p { @@ -110,13 +111,15 @@ func MakeSequenceFamilyGenusWorker(references obiseq.BioSequenceSlice) obiseq.Se pval = res.P } - level, _ := sequence.GetAttribute("obicleandb_level") - log.Warnf("%s - level: %v", sequence.Id(), level) - log.Warnf("%s - gdist: %v", sequence.Id(), indist) - log.Warnf("%s - fdist: %v", sequence.Id(), outdist) - log.Warnf("%s - pval: %f", sequence.Id(), pval) - } else { - sequence.SetAttribute("obicleandb_level", "none") + // level, _ := sequence.GetAttribute("obicleandb_level") + // log.Warnf("%s - level: %v", sequence.Id(), level) + // log.Warnf("%s - gdist: %v", sequence.Id(), indist) + // log.Warnf("%s - fdist: %v", sequence.Id(), outdist) + // log.Warnf("%s - pval: %f", sequence.Id(), pval) + } + + if pval < 0.0 { + pval = 0.0 } sequence.SetAttribute("obicleandb_trusted", pval) @@ -265,10 +268,11 @@ func ICleanDB(itertator obiiter.IBioSequence) obiiter.IBioSequence { ).MakeIWorker(taxonomy.MakeSetFamilyWorker(), false, obioptions.CLIParallelWorkers(), - ).MakeIWorker(SequenceTrust, - false, - obioptions.CLIParallelWorkers(), ) + // .MakeIWorker(SequenceTrust, + // false, + // obioptions.CLIParallelWorkers(), + // ) references := annotated.Load()