Make sequence files recognized as a taxonomy

2025-06-29 16:20:46 +00:00 · 2025-03-14 14:22:22 +01:00
parent d1c31c54de
commit 8448783499
21 changed files with 657 additions and 467 deletions
--- a/pkg/obitax/csviterator.go
+++ b/pkg/obitax/csviterator.go
@ -1,299 +0,0 @@
-package obitax
-
-import (
-	"slices"
-
-	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
-	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiitercsv"
-)
-
-type __options__ struct {
-	batch_size           int // Number of items to process in a batch
-	with_pattern         bool
-	with_parent          bool
-	with_path            bool
-	with_rank            bool
-	with_scientific_name bool
-	raw_taxid            bool
-	with_metadata        []string
-	source               string // Source of the data
-}
-
-// Options wraps the __options__ struct to provide a pointer to the options.
-type Options struct {
-	pointer *__options__ // Pointer to the underlying options
-}
-
-// WithOption is a function type that takes an Options parameter and modifies it.
-type WithOption func(Options)
-
-// MakeOptions creates an Options instance with default settings and applies any provided setters.
-// It returns the configured Options.
-//
-// Parameters:
-//   - setters: A slice of WithOption functions to customize the options.
-//
-// Returns:
-//   - An Options instance with the specified settings.
-func MakeOptions(setters []WithOption) Options {
-	o := __options__{
-		batch_size:           obidefault.BatchSize(), // Number of items to process in a batch
-		with_pattern:         true,
-		with_parent:          false,
-		with_path:            false,
-		with_rank:            true,
-		with_scientific_name: false,
-		raw_taxid:            false,
-		source:               "unknown",
-	}
-	opt := Options{&o}
-
-	for _, set := range setters {
-		set(opt)
-	}
-
-	return opt
-}
-
-// BatchSize returns the size of the batch to be processed.
-// It retrieves the batch size from the underlying options.
-func (o *Options) BatchSize() int {
-	return o.pointer.batch_size
-}
-
-// WithPattern returns whether the pattern option is enabled.
-// It retrieves the setting from the underlying options.
-func (o *Options) WithPattern() bool {
-	return o.pointer.with_pattern
-}
-
-// WithParent returns whether the parent option is enabled.
-// It retrieves the setting from the underlying options.
-func (o *Options) WithParent() bool {
-	return o.pointer.with_parent
-}
-
-// WithPath returns whether the path option is enabled.
-// It retrieves the setting from the underlying options.
-func (o *Options) WithPath() bool {
-	return o.pointer.with_path
-}
-
-// WithRank returns whether the rank option is enabled.
-// It retrieves the setting from the underlying options.
-func (o *Options) WithRank() bool {
-	return o.pointer.with_rank
-}
-
-// WithScientificName returns whether the scientific name option is enabled.
-// It retrieves the setting from the underlying options.
-func (o *Options) WithScientificName() bool {
-	return o.pointer.with_scientific_name
-}
-
-// RawTaxid returns whether the raw taxid option is enabled.
-// It retrieves the setting from the underlying options.
-func (o *Options) RawTaxid() bool {
-	return o.pointer.raw_taxid
-}
-
-// Source returns the source of the data.
-// It retrieves the source from the underlying options.
-func (o *Options) Source() string {
-	return o.pointer.source
-}
-
-// WithMetadata returns a slice of strings containing the metadata
-// associated with the Options instance. It retrieves the metadata
-// from the pointer's with_metadata field.
-func (o *Options) WithMetadata() []string {
-	if o.WithPattern() {
-		idx := slices.Index(o.pointer.with_metadata, "query")
-		if idx >= 0 {
-			o.pointer.with_metadata = slices.Delete(o.pointer.with_metadata, idx, idx+1)
-		}
-	}
-
-	return o.pointer.with_metadata
-}
-
-// OptionsBatchSize returns a WithOption function that sets the batch_size option.
-// Parameters:
-//   - size: An integer specifying the size of the batch to be processed.
-func OptionsBatchSize(size int) WithOption {
-	f := WithOption(func(opt Options) {
-		opt.pointer.batch_size = size
-	})
-
-	return f
-}
-
-func OptionsWithPattern(value bool) WithOption {
-	f := WithOption(func(opt Options) {
-		opt.pointer.with_pattern = value
-	})
-
-	return f
-}
-
-func OptionsWithParent(value bool) WithOption {
-	f := WithOption(func(opt Options) {
-		opt.pointer.with_parent = value
-	})
-
-	return f
-}
-
-func OptionsWithPath(value bool) WithOption {
-	f := WithOption(func(opt Options) {
-		opt.pointer.with_path = value
-	})
-
-	return f
-}
-
-func OptionsWithRank(value bool) WithOption {
-	f := WithOption(func(opt Options) {
-		opt.pointer.with_rank = value
-	})
-
-	return f
-}
-
-func OptionsWithScientificName(value bool) WithOption {
-	f := WithOption(func(opt Options) {
-		opt.pointer.with_scientific_name = value
-	})
-
-	return f
-}
-
-func OptionsRawTaxid(value bool) WithOption {
-	f := WithOption(func(opt Options) {
-		opt.pointer.raw_taxid = value
-	})
-
-	return f
-}
-
-func OptionsSource(value string) WithOption {
-	f := WithOption(func(opt Options) {
-		opt.pointer.source = value
-	})
-
-	return f
-}
-
-func OptionsWithMetadata(values ...string) WithOption {
-	f := WithOption(func(opt Options) {
-		opt.pointer.with_metadata = values
-	})
-	return f
-}
-
-func (iterator *ITaxon) CSVTaxaIterator(options ...WithOption) *obiitercsv.ICSVRecord {
-
-	opt := MakeOptions(options)
-	metakeys := make([]string, 0)
-
-	newIter := obiitercsv.NewICSVRecord()
-
-	newIter.Add(1)
-
-	batch_size := opt.BatchSize()
-
-	if opt.WithPattern() {
-		newIter.AppendField("query")
-		opt.pointer.with_metadata = append(opt.pointer.with_metadata, "query")
-	}
-
-	newIter.AppendField("taxid")
-	rawtaxid := opt.RawTaxid()
-
-	if opt.WithParent() {
-		newIter.AppendField("parent")
-	}
-
-	if opt.WithRank() {
-		newIter.AppendField("taxonomic_rank")
-	}
-
-	if opt.WithScientificName() {
-		newIter.AppendField("scientific_name")
-	}
-
-	if opt.WithMetadata() != nil {
-		metakeys = opt.WithMetadata()
-		for _, metadata := range metakeys {
-			newIter.AppendField(metadata)
-		}
-	}
-
-	if opt.WithPath() {
-		newIter.AppendField("path")
-	}
-
-	go func() {
-		newIter.WaitAndClose()
-	}()
-
-	go func() {
-		o := 0
-		data := make([]obiitercsv.CSVRecord, 0, batch_size)
-		for iterator.Next() {
-
-			taxon := iterator.Get()
-			record := make(obiitercsv.CSVRecord)
-
-			if opt.WithPattern() {
-				record["query"] = taxon.MetadataAsString("query")
-			}
-
-			if rawtaxid {
-				record["taxid"] = *taxon.Node.Id()
-			} else {
-				record["taxid"] = taxon.String()
-			}
-
-			if opt.WithParent() {
-				if rawtaxid {
-					record["parent"] = *taxon.Node.ParentId()
-				} else {
-					record["parent"] = taxon.Parent().String()
-				}
-			}
-
-			if opt.WithRank() {
-				record["taxonomic_rank"] = taxon.Rank()
-			}
-
-			if opt.WithScientificName() {
-				record["scientific_name"] = taxon.ScientificName()
-			}
-
-			if opt.WithPath() {
-				record["path"] = taxon.Path().String()
-			}
-
-			for _, key := range metakeys {
-				record[key] = taxon.MetadataAsString(key)
-			}
-
-			data = append(data, record)
-			if len(data) >= batch_size {
-				newIter.Push(obiitercsv.MakeCSVRecordBatch(opt.Source(), o, data))
-				data = make([]obiitercsv.CSVRecord, 0, batch_size)
-				o++
-			}
-
-		}
-
-		if len(data) > 0 {
-			newIter.Push(obiitercsv.MakeCSVRecordBatch(opt.Source(), o, data))
-		}
-
-		newIter.Done()
-	}()
-
-	return newIter
-}
--- a/pkg/obitax/csvtaxdump_read.go
+++ b/pkg/obitax/csvtaxdump_read.go
@ -1,119 +0,0 @@
-package obitax
-
-import (
-	"encoding/csv"
-	"errors"
-	"strings"
-
-	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
-	log "github.com/sirupsen/logrus"
-)
-
-func LoadCSVTaxonomy(path string, onlysn bool) (*Taxonomy, error) {
-
-	log.Infof("Loading taxonomy from csv file: %s", path)
-
-	file, err := obiutils.Ropen(path)
-
-	if err != nil {
-		return nil, err
-	}
-
-	defer file.Close()
-
-	csvfile := csv.NewReader(file)
-
-	csvfile.Comma = ','
-	csvfile.ReuseRecord = false
-	csvfile.LazyQuotes = true
-	csvfile.Comment = '#'
-	csvfile.FieldsPerRecord = -1
-	csvfile.TrimLeadingSpace = true
-
-	header, err := csvfile.Read()
-
-	if err != nil {
-		log.Fatal(err)
-	}
-
-	taxidColIndex := -1
-	parentColIndex := -1
-	scientific_nameColIndex := -1
-	rankColIndex := -1
-
-	for i, colName := range header {
-		switch colName {
-		case "taxid":
-			taxidColIndex = i
-		case "parent":
-			parentColIndex = i
-		case "scientific_name":
-			scientific_nameColIndex = i
-		case "taxonomic_rank":
-			rankColIndex = i
-		}
-	}
-
-	if taxidColIndex == -1 {
-		return nil, errors.New("taxonomy file does not contain taxid column")
-	}
-
-	if parentColIndex == -1 {
-		return nil, errors.New("taxonomy file does not contain parent column")
-	}
-
-	if scientific_nameColIndex == -1 {
-		return nil, errors.New("taxonomy file does not contain scientific_name column")
-	}
-
-	if rankColIndex == -1 {
-		return nil, errors.New("taxonomy file does not contain rank column")
-	}
-
-	name := obiutils.RemoveAllExt(path)
-	short := obiutils.Basename(path)
-
-	line, err := csvfile.Read()
-	if err == nil {
-		parts := strings.Split(line[taxidColIndex], " ")
-		parts = strings.Split(parts[0], ":")
-		if len(parts) > 1 {
-			short = parts[0]
-		}
-	}
-
-	log.Infof("Taxonomy name: %s", name)
-	log.Infof("Taxon code: %s", short)
-
-	taxonomy := NewTaxonomy(name, short, obiutils.AsciiAlphaNumSet)
-
-	root := true
-	var taxon *Taxon
-
-	for err == nil {
-		taxid := line[taxidColIndex]
-		parent := line[parentColIndex]
-		scientific_name := line[scientific_nameColIndex]
-		rank := line[rankColIndex]
-
-		taxon, err = taxonomy.AddTaxon(taxid, parent, rank, root, false)
-
-		if err != nil {
-			log.Fatalf("cannot add taxon %s:  %v", taxid, err)
-		}
-
-		root = false
-
-		taxon.SetName(scientific_name, "scientific name")
-
-		line, err = csvfile.Read()
-	}
-
-	log.Infof("%d Taxa loaded", taxonomy.Len())
-
-	if !taxonomy.HasRoot() {
-		return nil, errors.New("taxonomy file does not contain root node")
-	}
-
-	return taxonomy, nil
-}
--- a/pkg/obitax/csvtaxdump_write.go
+++ b/pkg/obitax/csvtaxdump_write.go
@ -1,38 +0,0 @@
-package obitax
-
-import (
-	"strings"
-
-	"github.com/TuftsBCB/io/newick"
-)
-
-func (taxonomy *Taxonomy) Newick() string {
-	if taxonomy == nil {
-		return ""
-	}
-
-	iterator := taxonomy.AsTaxonSet().Sort().Iterator()
-
-	nodes := make(map[*string]*newick.Tree, taxonomy.Len())
-	trees := make([]*newick.Tree, 0)
-
-	for iterator.Next() {
-		taxon := iterator.Get()
-		tree := &newick.Tree{Label: taxon.String()}
-		nodes[taxon.Node.id] = tree
-		if parent, ok := nodes[taxon.Parent().Node.id]; ok {
-			parent.Children = append(parent.Children, *tree)
-		} else {
-			trees = append(trees, tree)
-		}
-	}
-
-	rep := strings.Builder{}
-
-	for _, tree := range trees {
-		rep.WriteString(tree.String())
-		rep.WriteString("\n")
-	}
-
-	return rep.String()
-}
--- a/pkg/obitax/default_taxonomy.go
+++ b/pkg/obitax/default_taxonomy.go
@ -3,7 +3,6 @@ package obitax
 import (
 	"sync"

-	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
 	log "github.com/sirupsen/logrus"
 )

@ -32,24 +31,5 @@ func IsDefaultTaxonomyDefined() bool {
 }

 func DefaultTaxonomy() *Taxonomy {
-	var err error
-	if __defaut_taxonomy__ == nil {
-		if obidefault.HasSelectedTaxonomy() {
-			__defaut_taxonomy_mutex__.Lock()
-			defer __defaut_taxonomy_mutex__.Unlock()
-			if __defaut_taxonomy__ == nil {
-				__defaut_taxonomy__, err = LoadTaxonomy(
-					obidefault.SelectedTaxonomy(),
-					!obidefault.AreAlternativeNamesSelected(),
-				)
-
-				if err != nil {
-					log.Fatalf("Cannot load default taxonomy: %v", err)
-
-				}
-			}
-		}
-	}
-
 	return __defaut_taxonomy__
 }
--- a/pkg/obitax/iterator.go
+++ b/pkg/obitax/iterator.go
@ -224,3 +224,9 @@ func (taxonomy *Taxonomy) ISubTaxonomy(taxid string) *ITaxon {

 	return taxon.ISubTaxonomy()
 }
+
+func (iterator *ITaxon) Consume() {
+	for iterator.Next() {
+		iterator.Get()
+	}
+}
--- a/pkg/obitax/ncbitaxdump_read.go
+++ b/pkg/obitax/ncbitaxdump_read.go
@ -1,213 +0,0 @@
-package obitax
-
-import (
-	"bufio"
-	"encoding/csv"
-	"fmt"
-	"io"
-	"os"
-	"path"
-	"strings"
-
-	log "github.com/sirupsen/logrus"
-
-	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
-)
-
-// loadNodeTable reads a node table from the provided reader and populates the given taxonomy.
-// It is an internal function and should not be called directly. It is part of the NCBI taxdump reader.
-// The node table is expected to be in CSV format with a custom delimiter ('|') and comments
-// starting with '#'. Each record in the table represents a taxon with its taxid, parent taxid,
-// and rank.
-//
-// Parameters:
-//   - reader: An io.Reader from which the node table is read.
-//   - taxonomy: A pointer to an obitax.Taxonomy instance where the taxon data will be added.
-//
-// The function reads each record from the input, trims whitespace from the taxid, parent, and rank,
-// and adds the taxon to the taxonomy. If an error occurs while adding a taxon, the function logs
-// a fatal error and terminates the program.
-func loadNodeTable(reader io.Reader, taxonomy *Taxonomy) {
-	file := csv.NewReader(reader)
-	file.Comma = '|'
-	file.Comment = '#'
-	file.TrimLeadingSpace = true
-	file.ReuseRecord = true
-
-	n := 0
-
-	for record, err := file.Read(); err == nil; record, err = file.Read() {
-		n++
-		taxid := strings.TrimSpace(record[0])
-		parent := strings.TrimSpace(record[1])
-		rank := strings.TrimSpace(record[2])
-
-		_, err := taxonomy.AddTaxon(taxid, parent, rank, taxid == "1", false)
-
-		if err != nil {
-			log.Fatalf("Error adding taxon %s: %v\n", taxid, err)
-		}
-	}
-}
-
-// loadNameTable reads a name table from the provided reader and populates the given taxonomy.
-// It is an internal function and should not be called directly. It is part of the NCBI taxdump reader.
-// The name table is expected to be in a custom format with fields separated by the '|' character.
-// Each record in the table represents a taxon with its taxid, name, and class name.
-//
-// Parameters:
-//   - reader: An io.Reader from which the name table is read.
-//   - taxonomy: A pointer to an obitax.Taxonomy instance where the taxon names will be set.
-//   - onlysn: A boolean flag indicating whether to only process records with the class name "scientific name".
-//
-// Returns:
-//
-//	The number of taxon names successfully loaded into the taxonomy. If a line is too long, -1 is returned.
-//	The function processes each line, trims whitespace from the taxid, name, and class name, and sets
-//	the name in the taxonomy if the conditions are met.
-func loadNameTable(reader io.Reader, taxonomy *Taxonomy, onlysn bool) int {
-	// file := csv.NewReader(reader)
-	// file.Comma = '|'
-	// file.Comment = '#'
-	// file.TrimLeadingSpace = true
-	// file.ReuseRecord = true
-	// file.LazyQuotes = true
-	file := bufio.NewReader(reader)
-
-	n := 0
-	l := 0
-
-	for line, prefix, err := file.ReadLine(); err == nil; line, prefix, err = file.ReadLine() {
-		l++
-		if prefix {
-			return -1
-		}
-
-		record := strings.Split(string(line), "|")
-		taxid := strings.TrimSpace(record[0])
-
-		name := strings.TrimSpace(record[1])
-		classname := strings.TrimSpace(record[3])
-
-		if !onlysn || classname == "scientific name" {
-			n++
-			taxon, _, err := taxonomy.Taxon(taxid)
-
-			if err != nil {
-				log.Fatalf("%s: is unknown from the taxonomy", taxid)
-			}
-
-			taxon.SetName(name, classname)
-		}
-	}
-
-	return n
-}
-
-// loadMergedTable reads a merged table from the provided reader and populates the given taxonomy.
-// It is an internal function and should not be called directly. It is part of the NCBI taxdump reader.
-// The merged table is expected to be in CSV format with a custom delimiter ('|') and comments
-// starting with '#'. Each record in the table represents a mapping between an old taxid and a new taxid.
-//
-// Parameters:
-//   - reader: An io.Reader from which the merged table is read.
-//   - taxonomy: A pointer to an obitax.Taxonomy instance where the alias mappings will be added.
-//
-// Returns:
-//
-//	The number of alias mappings successfully loaded into the taxonomy. The function processes
-//	each record, trims whitespace from the old and new taxid, and adds the alias to the taxonomy.
-func loadMergedTable(reader io.Reader, taxonomy *Taxonomy) int {
-	file := csv.NewReader(reader)
-	file.Comma = '|'
-	file.Comment = '#'
-	file.TrimLeadingSpace = true
-	file.ReuseRecord = true
-
-	n := 0
-
-	for record, err := file.Read(); err == nil; record, err = file.Read() {
-		n++
-		oldtaxid := strings.TrimSpace(record[0])
-		newtaxid := strings.TrimSpace(record[1])
-
-		taxonomy.AddAlias(oldtaxid, newtaxid, false)
-	}
-
-	return n
-}
-
-// LoadNCBITaxDump loads the NCBI taxonomy data from the specified directory.
-// It reads the taxonomy nodes, taxon names, and merged taxa from the corresponding files
-// and constructs a Taxonomy object.
-//
-// Parameters:
-//   - directory: A string representing the path to the directory containing the NCBI taxonomy dump files.
-//   - onlysn: A boolean indicating whether to load only scientific names (true) or all names (false).
-//
-// Returns:
-//   - A pointer to the obitax.Taxonomy object containing the loaded taxonomy data, or an error
-//     if any of the files cannot be opened or read.
-func LoadNCBITaxDump(directory string, onlysn bool) (*Taxonomy, error) {
-
-	taxonomy := NewTaxonomy("NCBI Taxonomy", "taxon", obiutils.AsciiDigitSet)
-
-	//
-	// Load the Taxonomy nodes
-	//
-
-	log.Printf("Loading Taxonomy nodes\n")
-
-	nodefile, err := os.Open(path.Join(directory, "nodes.dmp"))
-	if err != nil {
-		return nil, fmt.Errorf("cannot open nodes file from '%s'",
-			directory)
-	}
-	defer nodefile.Close()
-
-	buffered := bufio.NewReader(nodefile)
-	loadNodeTable(buffered, taxonomy)
-	log.Printf("%d Taxonomy nodes read\n", taxonomy.Len())
-
-	//
-	// Load the Taxonomy nodes
-	//
-
-	log.Printf("Loading Taxon names\n")
-
-	namefile, nerr := os.Open(path.Join(directory, "names.dmp"))
-	if nerr != nil {
-		return nil, fmt.Errorf("cannot open names file from '%s'",
-			directory)
-	}
-	defer namefile.Close()
-
-	n := loadNameTable(namefile, taxonomy, onlysn)
-	log.Printf("%d taxon names read\n", n)
-
-	//
-	// Load the merged taxa
-	//
-
-	log.Printf("Loading Merged taxa\n")
-
-	aliasfile, aerr := os.Open(path.Join(directory, "merged.dmp"))
-	if aerr != nil {
-		return nil, fmt.Errorf("cannot open merged file from '%s'",
-			directory)
-	}
-	defer aliasfile.Close()
-
-	buffered = bufio.NewReader(aliasfile)
-	n = loadMergedTable(buffered, taxonomy)
-	log.Printf("%d merged taxa read\n", n)
-
-	root, _, err := taxonomy.Taxon("1")
-
-	if err != nil {
-		log.Fatal("cannot find the root taxon (1) in the NCBI tax dump")
-	}
-	taxonomy.SetRoot(root)
-
-	return taxonomy, nil
-}
--- a/pkg/obitax/ncbitaxdump_readtar.go
+++ b/pkg/obitax/ncbitaxdump_readtar.go
@ -1,146 +0,0 @@
-package obitax
-
-import (
-	"archive/tar"
-	"bufio"
-	"fmt"
-
-	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
-
-	log "github.com/sirupsen/logrus"
-)
-
-func IsNCBITarTaxDump(path string) bool {
-
-	file, err := obiutils.Ropen(path)
-
-	if err != nil {
-		return false
-	}
-
-	defer file.Close()
-
-	citations := false
-	division := false
-	gencode := false
-	names := false
-	delnodes := false
-	gc := false
-	merged := false
-	nodes := false
-
-	tarfile := tar.NewReader(file)
-
-	header, err := tarfile.Next()
-
-	for err == nil {
-		name := header.Name
-
-		if header.Typeflag == tar.TypeReg {
-			switch name {
-			case "citations.dmp":
-				citations = true
-			case "division.dmp":
-				division = true
-			case "gencode.dmp":
-				gencode = true
-			case "names.dmp":
-				names = true
-			case "delnodes.dmp":
-				delnodes = true
-			case "gc.prt":
-				gc = true
-			case "merged.dmp":
-				merged = true
-			case "nodes.dmp":
-				nodes = true
-			}
-		}
-		header, err = tarfile.Next()
-	}
-
-	return citations && division && gencode && names && delnodes && gc && merged && nodes
-}
-
-func LoadNCBITarTaxDump(path string, onlysn bool) (*Taxonomy, error) {
-
-	taxonomy := NewTaxonomy("NCBI Taxonomy", "taxon", obiutils.AsciiDigitSet)
-
-	//
-	// Load the Taxonomy nodes
-	//
-
-	log.Printf("Loading Taxonomy nodes\n")
-
-	file, err := obiutils.Ropen(path)
-	if err != nil {
-		return nil, fmt.Errorf("cannot open taxonomy file from '%s'",
-			path)
-	}
-
-	nodefile, err := obiutils.TarFileReader(file, "nodes.dmp")
-	if err != nil {
-		file.Close()
-		return nil, fmt.Errorf("cannot open nodes file from '%s'",
-			path)
-	}
-
-	buffered := bufio.NewReader(nodefile)
-	loadNodeTable(buffered, taxonomy)
-	log.Printf("%d Taxonomy nodes read\n", taxonomy.Len())
-	file.Close()
-
-	//
-	// Load the Taxonomy nodes
-	//
-
-	log.Printf("Loading Taxon names\n")
-
-	file, err = obiutils.Ropen(path)
-	if err != nil {
-		return nil, fmt.Errorf("cannot open taxonomy file from '%s'",
-			path)
-	}
-
-	namefile, nerr := obiutils.TarFileReader(file, "names.dmp")
-	if nerr != nil {
-		file.Close()
-		return nil, fmt.Errorf("cannot open names file from '%s'",
-			path)
-	}
-	n := loadNameTable(namefile, taxonomy, onlysn)
-	log.Printf("%d taxon names read\n", n)
-	file.Close()
-
-	//
-	// Load the merged taxa
-	//
-
-	log.Printf("Loading Merged taxa\n")
-	file, err = obiutils.Ropen(path)
-	if err != nil {
-		return nil, fmt.Errorf("cannot open taxonomy file from '%s'",
-			path)
-	}
-
-	aliasfile, aerr := obiutils.TarFileReader(file, "merged.dmp")
-	if aerr != nil {
-		file.Close()
-		return nil, fmt.Errorf("cannot open merged file from '%s'",
-			path)
-	}
-
-	buffered = bufio.NewReader(aliasfile)
-	n = loadMergedTable(buffered, taxonomy)
-	log.Printf("%d merged taxa read\n", n)
-
-	root, _, err := taxonomy.Taxon("1")
-
-	if err != nil {
-		log.Fatal("cannot find the root taxon (1) in the NCBI tax dump")
-	}
-
-	taxonomy.SetRoot(root)
-
-	return taxonomy, nil
-}
--- a/pkg/obitax/newick_write.go
+++ b/pkg/obitax/newick_write.go
@ -1 +0,0 @@
-package obitax
--- a/pkg/obitax/taxonomy_read.go
+++ b/pkg/obitax/taxonomy_read.go
@ -1,84 +0,0 @@
-package obitax
-
-import (
-	"fmt"
-	"os"
-
-	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
-	"github.com/gabriel-vasile/mimetype"
-
-	log "github.com/sirupsen/logrus"
-)
-
-type TaxonomyLoader func(path string, onlysn bool) (*Taxonomy, error)
-
-func DetectTaxonomyTarFormat(path string) (TaxonomyLoader, error) {
-
-	switch {
-	case IsNCBITarTaxDump(path):
-		log.Infof("NCBI Taxdump Tar Archive detected: %s", path)
-		return LoadNCBITarTaxDump, nil
-	}
-
-	return nil, fmt.Errorf("unknown taxonomy format: %s", path)
-}
-
-func DetectTaxonomyFormat(path string) (TaxonomyLoader, error) {
-
-	file, err := os.Open(path)
-	if err != nil {
-		return nil, err
-	}
-
-	fileInfo, err := file.Stat()
-	if err != nil {
-		file.Close()
-		return nil, err
-	}
-
-	file.Close()
-
-	if fileInfo.IsDir() {
-		// For the moment, we only support NCBI Taxdump directory format
-		log.Infof("NCBI Taxdump detected: %s", path)
-		return LoadNCBITaxDump, nil
-	} else {
-		file, err := obiutils.Ropen(path)
-
-		if err != nil {
-			return nil, err
-		}
-
-		mimetype, err := mimetype.DetectReader(file)
-
-		if err != nil {
-			file.Close()
-			return nil, err
-		}
-
-		file.Close()
-
-		switch mimetype.String() {
-		case "text/csv":
-			return LoadCSVTaxonomy, nil
-		case "application/x-tar":
-			return DetectTaxonomyTarFormat(path)
-		}
-
-		log.Fatalf("Detected file format: %s", mimetype.String())
-	}
-
-	return nil, nil
-}
-
-func LoadTaxonomy(path string, onlysn bool) (*Taxonomy, error) {
-	loader, err := DetectTaxonomyFormat(path)
-
-	if err != nil {
-		return nil, err
-	}
-
-	taxonomy, err := loader(path, onlysn)
-
-	return taxonomy, err
-}