mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00

modified: cmd/obitools/obitag/main.go modified: cmd/obitools/obitaxonomy/main.go modified: pkg/obiformats/csvtaxdump_read.go modified: pkg/obiformats/ecopcr_read.go modified: pkg/obiformats/ncbitaxdump_read.go modified: pkg/obiformats/ncbitaxdump_readtar.go modified: pkg/obiformats/newick_write.go modified: pkg/obiformats/options.go modified: pkg/obiformats/taxonomy_read.go modified: pkg/obiformats/universal_read.go modified: pkg/obiiter/extract_taxonomy.go modified: pkg/obioptions/options.go modified: pkg/obioptions/version.go new file: pkg/obiphylo/tree.go modified: pkg/obiseq/biosequenceslice.go modified: pkg/obiseq/taxonomy_methods.go modified: pkg/obitax/taxonomy.go modified: pkg/obitax/taxonset.go modified: pkg/obitools/obiconvert/sequence_reader.go modified: pkg/obitools/obitag/obitag.go modified: pkg/obitools/obitaxonomy/obitaxonomy.go modified: pkg/obitools/obitaxonomy/options.go deleted: sample/.DS_Store
148 lines
2.9 KiB
Go
148 lines
2.9 KiB
Go
package obiformats
|
|
|
|
import (
|
|
"archive/tar"
|
|
"bufio"
|
|
"fmt"
|
|
|
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
|
|
|
log "github.com/sirupsen/logrus"
|
|
)
|
|
|
|
func IsNCBITarTaxDump(path string) bool {
|
|
|
|
file, err := obiutils.Ropen(path)
|
|
|
|
if err != nil {
|
|
return false
|
|
}
|
|
|
|
defer file.Close()
|
|
|
|
citations := false
|
|
division := false
|
|
gencode := false
|
|
names := false
|
|
delnodes := false
|
|
gc := false
|
|
merged := false
|
|
nodes := false
|
|
|
|
tarfile := tar.NewReader(file)
|
|
|
|
header, err := tarfile.Next()
|
|
|
|
for err == nil {
|
|
name := header.Name
|
|
|
|
if header.Typeflag == tar.TypeReg {
|
|
switch name {
|
|
case "citations.dmp":
|
|
citations = true
|
|
case "division.dmp":
|
|
division = true
|
|
case "gencode.dmp":
|
|
gencode = true
|
|
case "names.dmp":
|
|
names = true
|
|
case "delnodes.dmp":
|
|
delnodes = true
|
|
case "gc.prt":
|
|
gc = true
|
|
case "merged.dmp":
|
|
merged = true
|
|
case "nodes.dmp":
|
|
nodes = true
|
|
}
|
|
}
|
|
header, err = tarfile.Next()
|
|
}
|
|
|
|
return citations && division && gencode && names && delnodes && gc && merged && nodes
|
|
}
|
|
|
|
func LoadNCBITarTaxDump(path string, onlysn, seqAsTaxa bool) (*obitax.Taxonomy, error) {
|
|
|
|
taxonomy := obitax.NewTaxonomy("NCBI Taxonomy", "taxon", obiutils.AsciiDigitSet)
|
|
|
|
//
|
|
// Load the Taxonomy nodes
|
|
//
|
|
|
|
log.Printf("Loading Taxonomy nodes\n")
|
|
|
|
file, err := obiutils.Ropen(path)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("cannot open taxonomy file from '%s'",
|
|
path)
|
|
}
|
|
|
|
nodefile, err := obiutils.TarFileReader(file, "nodes.dmp")
|
|
if err != nil {
|
|
file.Close()
|
|
return nil, fmt.Errorf("cannot open nodes file from '%s'",
|
|
path)
|
|
}
|
|
|
|
buffered := bufio.NewReader(nodefile)
|
|
loadNodeTable(buffered, taxonomy)
|
|
log.Printf("%d Taxonomy nodes read\n", taxonomy.Len())
|
|
file.Close()
|
|
|
|
//
|
|
// Load the Taxonomy nodes
|
|
//
|
|
|
|
log.Printf("Loading Taxon names\n")
|
|
|
|
file, err = obiutils.Ropen(path)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("cannot open taxonomy file from '%s'",
|
|
path)
|
|
}
|
|
|
|
namefile, nerr := obiutils.TarFileReader(file, "names.dmp")
|
|
if nerr != nil {
|
|
file.Close()
|
|
return nil, fmt.Errorf("cannot open names file from '%s'",
|
|
path)
|
|
}
|
|
n := loadNameTable(namefile, taxonomy, onlysn)
|
|
log.Printf("%d taxon names read\n", n)
|
|
file.Close()
|
|
|
|
//
|
|
// Load the merged taxa
|
|
//
|
|
|
|
log.Printf("Loading Merged taxa\n")
|
|
file, err = obiutils.Ropen(path)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("cannot open taxonomy file from '%s'",
|
|
path)
|
|
}
|
|
|
|
aliasfile, aerr := obiutils.TarFileReader(file, "merged.dmp")
|
|
if aerr != nil {
|
|
file.Close()
|
|
return nil, fmt.Errorf("cannot open merged file from '%s'",
|
|
path)
|
|
}
|
|
|
|
buffered = bufio.NewReader(aliasfile)
|
|
n = loadMergedTable(buffered, taxonomy)
|
|
log.Printf("%d merged taxa read\n", n)
|
|
|
|
root, _, err := taxonomy.Taxon("1")
|
|
|
|
if err != nil {
|
|
log.Fatal("cannot find the root taxon (1) in the NCBI tax dump")
|
|
}
|
|
|
|
taxonomy.SetRoot(root)
|
|
|
|
return taxonomy, nil
|
|
}
|