mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-12-08 08:40:26 +00:00
Changes to be committed:
modified: cmd/obitools/obitag/main.go modified: cmd/obitools/obitaxonomy/main.go modified: pkg/obiformats/csvtaxdump_read.go modified: pkg/obiformats/ecopcr_read.go modified: pkg/obiformats/ncbitaxdump_read.go modified: pkg/obiformats/ncbitaxdump_readtar.go modified: pkg/obiformats/newick_write.go modified: pkg/obiformats/options.go modified: pkg/obiformats/taxonomy_read.go modified: pkg/obiformats/universal_read.go modified: pkg/obiiter/extract_taxonomy.go modified: pkg/obioptions/options.go modified: pkg/obioptions/version.go new file: pkg/obiphylo/tree.go modified: pkg/obiseq/biosequenceslice.go modified: pkg/obiseq/taxonomy_methods.go modified: pkg/obitax/taxonomy.go modified: pkg/obitax/taxonset.go modified: pkg/obitools/obiconvert/sequence_reader.go modified: pkg/obitools/obitag/obitag.go modified: pkg/obitools/obitaxonomy/obitaxonomy.go modified: pkg/obitools/obitaxonomy/options.go deleted: sample/.DS_Store
This commit is contained in:
@@ -10,7 +10,7 @@ import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
func LoadCSVTaxonomy(path string, onlysn bool) (*obitax.Taxonomy, error) {
|
||||
func LoadCSVTaxonomy(path string, onlysn, seqAsTaxa bool) (*obitax.Taxonomy, error) {
|
||||
|
||||
log.Infof("Loading taxonomy from csv file: %s", path)
|
||||
|
||||
|
||||
@@ -4,13 +4,10 @@ import (
|
||||
"encoding/csv"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
gzip "github.com/klauspost/pgzip"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||
@@ -177,8 +174,10 @@ func ReadEcoPCR(reader io.Reader, options ...WithOption) (obiiter.IBioSequence,
|
||||
}()
|
||||
|
||||
go func() {
|
||||
var err error = nil
|
||||
var seq *obiseq.BioSequence
|
||||
|
||||
seq, err := __read_ecopcr_bioseq__(&ecopcr)
|
||||
seq, err = __read_ecopcr_bioseq__(&ecopcr)
|
||||
seq.SetSource(opt.Source())
|
||||
slice := make(obiseq.BioSequenceSlice, 0, opt.BatchSize())
|
||||
i := 0
|
||||
@@ -194,7 +193,12 @@ func ReadEcoPCR(reader io.Reader, options ...WithOption) (obiiter.IBioSequence,
|
||||
}
|
||||
|
||||
seq, err = __read_ecopcr_bioseq__(&ecopcr)
|
||||
seq.SetSource(opt.Source())
|
||||
|
||||
if err == nil {
|
||||
seq.SetSource(opt.Source())
|
||||
} else if err != io.EOF {
|
||||
log.Panicf("%+v", err)
|
||||
}
|
||||
}
|
||||
|
||||
if len(slice) > 0 {
|
||||
@@ -218,22 +222,21 @@ func ReadEcoPCR(reader io.Reader, options ...WithOption) (obiiter.IBioSequence,
|
||||
|
||||
func ReadEcoPCRFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
|
||||
var reader io.Reader
|
||||
var greader io.Reader
|
||||
var err error
|
||||
|
||||
options = append(options, OptionsSource(obiutils.RemoveAllExt((path.Base(filename)))))
|
||||
|
||||
reader, err = os.Open(filename)
|
||||
reader, err = obiutils.Ropen(filename)
|
||||
|
||||
if err == obiutils.ErrNoContent {
|
||||
log.Infof("file %s is empty", filename)
|
||||
return ReadEmptyFile(options...)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
log.Printf("open file error: %+v", err)
|
||||
return obiiter.NilIBioSequence, err
|
||||
}
|
||||
|
||||
// Test if the flux is compressed by gzip
|
||||
greader, err = gzip.NewReader(reader)
|
||||
if err == nil {
|
||||
reader = greader
|
||||
}
|
||||
|
||||
return ReadEcoPCR(reader, options...)
|
||||
}
|
||||
|
||||
@@ -149,7 +149,7 @@ func loadMergedTable(reader io.Reader, taxonomy *obitax.Taxonomy) int {
|
||||
// Returns:
|
||||
// - A pointer to the obitax.Taxonomy object containing the loaded taxonomy data, or an error
|
||||
// if any of the files cannot be opened or read.
|
||||
func LoadNCBITaxDump(directory string, onlysn bool) (*obitax.Taxonomy, error) {
|
||||
func LoadNCBITaxDump(directory string, onlysn, seqAsTaxa bool) (*obitax.Taxonomy, error) {
|
||||
|
||||
taxonomy := obitax.NewTaxonomy("NCBI Taxonomy", "taxon", obiutils.AsciiDigitSet)
|
||||
|
||||
|
||||
@@ -63,7 +63,7 @@ func IsNCBITarTaxDump(path string) bool {
|
||||
return citations && division && gencode && names && delnodes && gc && merged && nodes
|
||||
}
|
||||
|
||||
func LoadNCBITarTaxDump(path string, onlysn bool) (*obitax.Taxonomy, error) {
|
||||
func LoadNCBITarTaxDump(path string, onlysn, seqAsTaxa bool) (*obitax.Taxonomy, error) {
|
||||
|
||||
taxonomy := obitax.NewTaxonomy("NCBI Taxonomy", "taxon", obiutils.AsciiDigitSet)
|
||||
|
||||
|
||||
@@ -54,7 +54,7 @@ func (tree *Tree) Newick(level int, taxid, scientific_name, rank bool) string {
|
||||
if scientific_name {
|
||||
buffer.WriteByte(' ')
|
||||
}
|
||||
buffer.WriteByte('-')
|
||||
// buffer.WriteByte('-')
|
||||
if taxid {
|
||||
buffer.WriteString(*tree.TaxNode.Id())
|
||||
if rank {
|
||||
@@ -64,7 +64,7 @@ func (tree *Tree) Newick(level int, taxid, scientific_name, rank bool) string {
|
||||
if rank {
|
||||
buffer.WriteString(tree.TaxNode.Rank())
|
||||
}
|
||||
buffer.WriteByte('-')
|
||||
//buffer.WriteByte('-')
|
||||
}
|
||||
if scientific_name || taxid || rank {
|
||||
buffer.WriteByte('\'')
|
||||
@@ -85,24 +85,14 @@ func Newick(taxa *obitax.TaxonSet, taxid, scientific_name, rank bool) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
iterator := taxa.Sort().Iterator()
|
||||
root := taxa.Sort().Get(0)
|
||||
tree, err := taxa.AsPhyloTree(root)
|
||||
|
||||
nodes := make(map[*string]*Tree, taxa.Len())
|
||||
trees := make([]*Tree, 0)
|
||||
|
||||
for iterator.Next() {
|
||||
taxon := iterator.Get()
|
||||
|
||||
tree := &Tree{TaxNode: taxon.Node}
|
||||
if parent, ok := nodes[taxon.Parent().Node.Id()]; ok {
|
||||
parent.Children = append(parent.Children, tree)
|
||||
} else {
|
||||
trees = append(trees, tree)
|
||||
}
|
||||
nodes[taxon.Node.Id()] = tree
|
||||
if err != nil {
|
||||
log.Fatalf("Cannot build taxonomy tree: %v", err)
|
||||
}
|
||||
|
||||
return trees[0].Newick(0, taxid, scientific_name, rank)
|
||||
return tree.Newick(0)
|
||||
}
|
||||
|
||||
func WriteNewick(iterator *obitax.ITaxon,
|
||||
|
||||
@@ -42,6 +42,7 @@ type __options__ struct {
|
||||
with_rank bool
|
||||
with_taxid bool
|
||||
with_scientific_name bool
|
||||
without_root_path bool
|
||||
raw_taxid bool
|
||||
with_metadata []string
|
||||
}
|
||||
@@ -88,6 +89,7 @@ func MakeOptions(setters []WithOption) Options {
|
||||
with_rank: true,
|
||||
with_taxid: true,
|
||||
with_scientific_name: false,
|
||||
without_root_path: false,
|
||||
raw_taxid: false,
|
||||
}
|
||||
|
||||
@@ -250,6 +252,11 @@ func (o *Options) WithScientificName() bool {
|
||||
return o.pointer.with_scientific_name
|
||||
}
|
||||
|
||||
// WithoutRootPath returns whether the root path option is enabled.
|
||||
func (o *Options) WithoutRootPath() bool {
|
||||
return o.pointer.without_root_path
|
||||
}
|
||||
|
||||
// RawTaxid returns whether the raw taxid option is enabled.
|
||||
// It retrieves the setting from the underlying options.
|
||||
func (o *Options) RawTaxid() bool {
|
||||
@@ -576,6 +583,13 @@ func OptionsWithScientificName(value bool) WithOption {
|
||||
return f
|
||||
}
|
||||
|
||||
func OptionWithoutRootPath(value bool) WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.without_root_path = value
|
||||
})
|
||||
return f
|
||||
}
|
||||
|
||||
func OptionsRawTaxid(value bool) WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.raw_taxid = value
|
||||
|
||||
@@ -11,7 +11,7 @@ import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type TaxonomyLoader func(path string, onlysn bool) (*obitax.Taxonomy, error)
|
||||
type TaxonomyLoader func(path string, onlysn, seqAsTaxa bool) (*obitax.Taxonomy, error)
|
||||
|
||||
func DetectTaxonomyTarFormat(path string) (TaxonomyLoader, error) {
|
||||
|
||||
@@ -67,26 +67,28 @@ func DetectTaxonomyFormat(path string) (TaxonomyLoader, error) {
|
||||
case "application/x-tar":
|
||||
return DetectTaxonomyTarFormat(path)
|
||||
case "text/fasta":
|
||||
return func(path string, onlysn bool) (*obitax.Taxonomy, error) {
|
||||
return func(path string, onlysn, seqAsTaxa bool) (*obitax.Taxonomy, error) {
|
||||
input, err := ReadFastaFromFile(path)
|
||||
input = input.NumberSequences(1, true)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
_, data := input.Load()
|
||||
|
||||
return data.ExtractTaxonomy(nil)
|
||||
return data.ExtractTaxonomy(nil, seqAsTaxa)
|
||||
}, nil
|
||||
case "text/fastq":
|
||||
return func(path string, onlysn bool) (*obitax.Taxonomy, error) {
|
||||
return func(path string, onlysn, seqAsTaxa bool) (*obitax.Taxonomy, error) {
|
||||
input, err := ReadFastqFromFile(path)
|
||||
input = input.NumberSequences(1, true)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
_, data := input.Load()
|
||||
|
||||
return data.ExtractTaxonomy(nil)
|
||||
return data.ExtractTaxonomy(nil, seqAsTaxa)
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -96,14 +98,14 @@ func DetectTaxonomyFormat(path string) (TaxonomyLoader, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func LoadTaxonomy(path string, onlysn bool) (*obitax.Taxonomy, error) {
|
||||
func LoadTaxonomy(path string, onlysn, seqAsTaxa bool) (*obitax.Taxonomy, error) {
|
||||
loader, err := DetectTaxonomyFormat(path)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
taxonomy, err := loader(path, onlysn)
|
||||
taxonomy, err := loader(path, onlysn, seqAsTaxa)
|
||||
|
||||
return taxonomy, err
|
||||
}
|
||||
|
||||
@@ -154,8 +154,7 @@ func ReadSequencesFromFile(filename string,
|
||||
return obiiter.NilIBioSequence, nil
|
||||
}
|
||||
|
||||
// func ReadSequencesFromStdin(options ...WithOption) obiiter.IBioSequence {
|
||||
|
||||
// options = append(options, OptionsSource("stdin"))
|
||||
|
||||
// }
|
||||
func ReadSequencesFromStdin(options ...WithOption) (obiiter.IBioSequence, error) {
|
||||
options = append(options, OptionCloseFile())
|
||||
return ReadSequencesFromFile("-", options...)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user