From 8a28c9ae7cd8e11743931d3d64b9c8636f13a47c Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Wed, 29 Jan 2025 12:38:39 +0100 Subject: [PATCH] add the --download-ncbi option to obitaxonomy --- cmd/obitools/obitaxonomy/main.go | 12 ++++- pkg/obioptions/version.go | 2 +- .../{obifind.go => obitaxonomy.go} | 20 +++++++++ pkg/obitools/obitaxonomy/options.go | 11 ++++- pkg/obiutils/download.go | 45 +++++++++++++++++++ 5 files changed, 87 insertions(+), 3 deletions(-) rename pkg/obitools/obitaxonomy/{obifind.go => obitaxonomy.go} (75%) create mode 100644 pkg/obiutils/download.go diff --git a/cmd/obitools/obitaxonomy/main.go b/cmd/obitools/obitaxonomy/main.go index d72f5b6..65fd04a 100644 --- a/cmd/obitools/obitaxonomy/main.go +++ b/cmd/obitools/obitaxonomy/main.go @@ -1,13 +1,14 @@ package main import ( - "log" "os" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obitaxonomy" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" + + log "github.com/sirupsen/logrus" ) func main() { @@ -19,6 +20,15 @@ func main() { switch { + case obitaxonomy.CLIDownloadNCBI(): + err := obitaxonomy.CLIDownloadNCBITaxdump() + if err != nil { + log.Errorf("Cannot download NCBI taxonomy: %s", err.Error()) + os.Exit(1) + } + + os.Exit(0) + case obitaxonomy.CLIDumpSubtaxonomy(): iterator = obitaxonomy.CLISubTaxonomyIterator() diff --git a/pkg/obioptions/version.go b/pkg/obioptions/version.go index 6c3cfb6..e3c9ef1 100644 --- a/pkg/obioptions/version.go +++ b/pkg/obioptions/version.go @@ -8,7 +8,7 @@ import ( // corresponds to the last commit, and not the one when the file will be // commited -var _Commit = "67e2758" +var _Commit = "b6b18c0" var _Version = "Release 4.2.0" // Version returns the version of the obitools package. diff --git a/pkg/obitools/obitaxonomy/obifind.go b/pkg/obitools/obitaxonomy/obitaxonomy.go similarity index 75% rename from pkg/obitools/obitaxonomy/obifind.go rename to pkg/obitools/obitaxonomy/obitaxonomy.go index ffffd6e..1cdf668 100644 --- a/pkg/obitools/obitaxonomy/obifind.go +++ b/pkg/obitools/obitaxonomy/obitaxonomy.go @@ -1,10 +1,15 @@ package obitaxonomy import ( + "fmt" + "time" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiitercsv" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obicsv" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" log "github.com/sirupsen/logrus" ) @@ -73,3 +78,18 @@ func CLICSVTaxaIterator(iterator *obitax.ITaxon) *obiitercsv.ICSVRecord { func CLICSVTaxaWriter(iterator *obitax.ITaxon, terminalAction bool) *obiitercsv.ICSVRecord { return obicsv.CLICSVWriter(CLICSVTaxaIterator(iterator), terminalAction) } + +func CLIDownloadNCBITaxdump() error { + now := time.Now() + dateStr := now.Format("20060102") // In Go, this specific date is used as reference for formatting + + filename := fmt.Sprintf("ncbitaxo_%s.tgz", dateStr) + + if obiconvert.CLIOutPutFileName() != "-" { + filename = obiconvert.CLIOutPutFileName() + } + + log.Infof("Downloading NCBI Taxdump to %s", filename) + return obiutils.DownloadFile("https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdump.tar.gz", filename) + +} diff --git a/pkg/obitools/obitaxonomy/options.go b/pkg/obitools/obitaxonomy/options.go index 1c868d0..634e39b 100644 --- a/pkg/obitools/obitaxonomy/options.go +++ b/pkg/obitools/obitaxonomy/options.go @@ -22,6 +22,7 @@ var __taxid_path__ = "NA" var __taxid_sons__ = "NA" var __restrict_rank__ = "" var __to_dump__ = "" +var __download_ncbi__ = false func FilterTaxonomyOptionSet(options *getoptions.GetOpt) { options.BoolVar(&__rank_list__, "rank-list", false, @@ -34,7 +35,7 @@ func FilterTaxonomyOptionSet(options *getoptions.GetOpt) { } func OptionSet(options *getoptions.GetOpt) { - obioptions.LoadTaxonomyOptionSet(options, true, true) + obioptions.LoadTaxonomyOptionSet(options, false, true) FilterTaxonomyOptionSet(options) options.BoolVar(&__fixed_pattern__, "fixed", false, options.Alias("F"), @@ -70,6 +71,10 @@ func OptionSet(options *getoptions.GetOpt) { options.ArgName("TAXID"), options.Description("Dump a sub-taxonomy corresponding to the precised clade"), ) + options.BoolVar(&__download_ncbi__, "download-ncbi", __download_ncbi__, + options.Description("Download the current NCBI taxonomy taxdump"), + ) + } func CLITaxonomicalRestrictions() (*obitax.TaxonSet, error) { @@ -144,3 +149,7 @@ func CLIDumpSubtaxonomy() bool { func CLISubTaxonomyNode() string { return __to_dump__ } + +func CLIDownloadNCBI() bool { + return __download_ncbi__ +} diff --git a/pkg/obiutils/download.go b/pkg/obiutils/download.go new file mode 100644 index 0000000..7fea1c6 --- /dev/null +++ b/pkg/obiutils/download.go @@ -0,0 +1,45 @@ +package obiutils + +import ( + "fmt" + "io" + "net/http" + "os" + + "github.com/schollz/progressbar/v3" +) + +func DownloadFile(url string, filepath string) error { + // Get the data + resp, err := http.Get(url) + if err != nil { + return err + } + defer resp.Body.Close() + + // Check server response + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("bad status: %s", resp.Status) + } + + // Create the file + out, err := os.Create(filepath) + if err != nil { + return err + } + defer out.Close() + + // Create progress bar + bar := progressbar.DefaultBytes( + resp.ContentLength, + "downloading", + ) + + // Write the body to file while updating the progress bar + _, err = io.Copy(io.MultiWriter(out, bar), resp.Body) + if err != nil { + return err + } + + return nil +}