Files
obitools4/pkg/obitax/taxonomy_read.go
2025-01-27 17:12:45 +01:00

85 lines
1.5 KiB
Go

package obitax
import (
"fmt"
"os"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
"github.com/gabriel-vasile/mimetype"
log "github.com/sirupsen/logrus"
)
type TaxonomyLoader func(path string, onlysn bool) (*Taxonomy, error)
func DetectTaxonomyTarFormat(path string) (TaxonomyLoader, error) {
switch {
case IsNCBITarTaxDump(path):
log.Infof("NCBI Taxdump Tar Archive detected: %s", path)
return LoadNCBITarTaxDump, nil
}
return nil, fmt.Errorf("unknown taxonomy format: %s", path)
}
func DetectTaxonomyFormat(path string) (TaxonomyLoader, error) {
file, err := os.Open(path)
if err != nil {
return nil, err
}
fileInfo, err := file.Stat()
if err != nil {
file.Close()
return nil, err
}
file.Close()
if fileInfo.IsDir() {
// For the moment, we only support NCBI Taxdump directory format
log.Infof("NCBI Taxdump detected: %s", path)
return LoadNCBITaxDump, nil
} else {
file, err := obiutils.Ropen(path)
if err != nil {
return nil, err
}
mimetype, err := mimetype.DetectReader(file)
if err != nil {
file.Close()
return nil, err
}
file.Close()
switch mimetype.String() {
case "text/csv":
return LoadCSVTaxonomy, nil
case "application/x-tar":
return DetectTaxonomyTarFormat(path)
}
log.Fatalf("Detected file format: %s", mimetype.String())
}
return nil, nil
}
func LoadTaxonomy(path string, onlysn bool) (*Taxonomy, error) {
loader, err := DetectTaxonomyFormat(path)
if err != nil {
return nil, err
}
taxonomy, err := loader(path, onlysn)
return taxonomy, err
}