mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00

modified: cmd/obitools/obitag/main.go modified: cmd/obitools/obitaxonomy/main.go modified: pkg/obiformats/csvtaxdump_read.go modified: pkg/obiformats/ecopcr_read.go modified: pkg/obiformats/ncbitaxdump_read.go modified: pkg/obiformats/ncbitaxdump_readtar.go modified: pkg/obiformats/newick_write.go modified: pkg/obiformats/options.go modified: pkg/obiformats/taxonomy_read.go modified: pkg/obiformats/universal_read.go modified: pkg/obiiter/extract_taxonomy.go modified: pkg/obioptions/options.go modified: pkg/obioptions/version.go new file: pkg/obiphylo/tree.go modified: pkg/obiseq/biosequenceslice.go modified: pkg/obiseq/taxonomy_methods.go modified: pkg/obitax/taxonomy.go modified: pkg/obitax/taxonset.go modified: pkg/obitools/obiconvert/sequence_reader.go modified: pkg/obitools/obitag/obitag.go modified: pkg/obitools/obitaxonomy/obitaxonomy.go modified: pkg/obitools/obitaxonomy/options.go deleted: sample/.DS_Store
112 lines
2.3 KiB
Go
112 lines
2.3 KiB
Go
package obiformats
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
|
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
|
"github.com/gabriel-vasile/mimetype"
|
|
|
|
log "github.com/sirupsen/logrus"
|
|
)
|
|
|
|
type TaxonomyLoader func(path string, onlysn, seqAsTaxa bool) (*obitax.Taxonomy, error)
|
|
|
|
func DetectTaxonomyTarFormat(path string) (TaxonomyLoader, error) {
|
|
|
|
switch {
|
|
case IsNCBITarTaxDump(path):
|
|
log.Infof("NCBI Taxdump Tar Archive detected: %s", path)
|
|
return LoadNCBITarTaxDump, nil
|
|
}
|
|
|
|
return nil, fmt.Errorf("unknown taxonomy format: %s", path)
|
|
}
|
|
|
|
func DetectTaxonomyFormat(path string) (TaxonomyLoader, error) {
|
|
|
|
obiutils.RegisterOBIMimeType()
|
|
|
|
file, err := os.Open(path)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
fileInfo, err := file.Stat()
|
|
if err != nil {
|
|
file.Close()
|
|
return nil, err
|
|
}
|
|
|
|
file.Close()
|
|
|
|
if fileInfo.IsDir() {
|
|
// For the moment, we only support NCBI Taxdump directory format
|
|
log.Infof("NCBI Taxdump detected: %s", path)
|
|
return LoadNCBITaxDump, nil
|
|
} else {
|
|
file, err := obiutils.Ropen(path)
|
|
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
mimetype, err := mimetype.DetectReader(file)
|
|
|
|
if err != nil {
|
|
file.Close()
|
|
return nil, err
|
|
}
|
|
|
|
file.Close()
|
|
|
|
switch mimetype.String() {
|
|
case "text/csv":
|
|
return LoadCSVTaxonomy, nil
|
|
case "application/x-tar":
|
|
return DetectTaxonomyTarFormat(path)
|
|
case "text/fasta":
|
|
return func(path string, onlysn, seqAsTaxa bool) (*obitax.Taxonomy, error) {
|
|
input, err := ReadFastaFromFile(path)
|
|
input = input.NumberSequences(1, true)
|
|
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
_, data := input.Load()
|
|
|
|
return data.ExtractTaxonomy(nil, seqAsTaxa)
|
|
}, nil
|
|
case "text/fastq":
|
|
return func(path string, onlysn, seqAsTaxa bool) (*obitax.Taxonomy, error) {
|
|
input, err := ReadFastqFromFile(path)
|
|
input = input.NumberSequences(1, true)
|
|
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
_, data := input.Load()
|
|
|
|
return data.ExtractTaxonomy(nil, seqAsTaxa)
|
|
}, nil
|
|
}
|
|
|
|
log.Fatalf("Detected file format: %s", mimetype.String())
|
|
}
|
|
|
|
return nil, nil
|
|
}
|
|
|
|
func LoadTaxonomy(path string, onlysn, seqAsTaxa bool) (*obitax.Taxonomy, error) {
|
|
loader, err := DetectTaxonomyFormat(path)
|
|
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
taxonomy, err := loader(path, onlysn, seqAsTaxa)
|
|
|
|
return taxonomy, err
|
|
}
|