mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Changes to be committed:
modified: cmd/obitools/obitag/main.go modified: cmd/obitools/obitaxonomy/main.go modified: pkg/obiformats/csvtaxdump_read.go modified: pkg/obiformats/ecopcr_read.go modified: pkg/obiformats/ncbitaxdump_read.go modified: pkg/obiformats/ncbitaxdump_readtar.go modified: pkg/obiformats/newick_write.go modified: pkg/obiformats/options.go modified: pkg/obiformats/taxonomy_read.go modified: pkg/obiformats/universal_read.go modified: pkg/obiiter/extract_taxonomy.go modified: pkg/obioptions/options.go modified: pkg/obioptions/version.go new file: pkg/obiphylo/tree.go modified: pkg/obiseq/biosequenceslice.go modified: pkg/obiseq/taxonomy_methods.go modified: pkg/obitax/taxonomy.go modified: pkg/obitax/taxonset.go modified: pkg/obitools/obiconvert/sequence_reader.go modified: pkg/obitools/obitag/obitag.go modified: pkg/obitools/obitaxonomy/obitaxonomy.go modified: pkg/obitools/obitaxonomy/options.go deleted: sample/.DS_Store
This commit is contained in:
@ -11,6 +11,7 @@ import (
|
|||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obitag"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obitag"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obitaxonomy"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
@ -58,7 +59,7 @@ func main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if taxo == nil {
|
if taxo == nil {
|
||||||
taxo, err = references.ExtractTaxonomy(nil)
|
taxo, err = references.ExtractTaxonomy(nil, obitaxonomy.CLINewickWithLeaves())
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("No taxonomy specified or extractable from reference database: %v", err)
|
log.Fatalf("No taxonomy specified or extractable from reference database: %v", err)
|
||||||
|
@ -62,12 +62,13 @@ func main() {
|
|||||||
|
|
||||||
case obitaxonomy.CLIExtractTaxonomy():
|
case obitaxonomy.CLIExtractTaxonomy():
|
||||||
iter, err := obiconvert.CLIReadBioSequences(args...)
|
iter, err := obiconvert.CLIReadBioSequences(args...)
|
||||||
|
iter = iter.NumberSequences(1, true)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("Cannot extract taxonomy: %v", err)
|
log.Fatalf("Cannot extract taxonomy: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
taxonomy, err := iter.ExtractTaxonomy()
|
taxonomy, err := iter.ExtractTaxonomy(obitaxonomy.CLINewickWithLeaves())
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("Cannot extract taxonomy: %v", err)
|
log.Fatalf("Cannot extract taxonomy: %v", err)
|
||||||
|
@ -10,7 +10,7 @@ import (
|
|||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
func LoadCSVTaxonomy(path string, onlysn bool) (*obitax.Taxonomy, error) {
|
func LoadCSVTaxonomy(path string, onlysn, seqAsTaxa bool) (*obitax.Taxonomy, error) {
|
||||||
|
|
||||||
log.Infof("Loading taxonomy from csv file: %s", path)
|
log.Infof("Loading taxonomy from csv file: %s", path)
|
||||||
|
|
||||||
|
@ -4,13 +4,10 @@ import (
|
|||||||
"encoding/csv"
|
"encoding/csv"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
|
||||||
"path"
|
"path"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
gzip "github.com/klauspost/pgzip"
|
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
@ -177,8 +174,10 @@ func ReadEcoPCR(reader io.Reader, options ...WithOption) (obiiter.IBioSequence,
|
|||||||
}()
|
}()
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
|
var err error = nil
|
||||||
|
var seq *obiseq.BioSequence
|
||||||
|
|
||||||
seq, err := __read_ecopcr_bioseq__(&ecopcr)
|
seq, err = __read_ecopcr_bioseq__(&ecopcr)
|
||||||
seq.SetSource(opt.Source())
|
seq.SetSource(opt.Source())
|
||||||
slice := make(obiseq.BioSequenceSlice, 0, opt.BatchSize())
|
slice := make(obiseq.BioSequenceSlice, 0, opt.BatchSize())
|
||||||
i := 0
|
i := 0
|
||||||
@ -194,7 +193,12 @@ func ReadEcoPCR(reader io.Reader, options ...WithOption) (obiiter.IBioSequence,
|
|||||||
}
|
}
|
||||||
|
|
||||||
seq, err = __read_ecopcr_bioseq__(&ecopcr)
|
seq, err = __read_ecopcr_bioseq__(&ecopcr)
|
||||||
seq.SetSource(opt.Source())
|
|
||||||
|
if err == nil {
|
||||||
|
seq.SetSource(opt.Source())
|
||||||
|
} else if err != io.EOF {
|
||||||
|
log.Panicf("%+v", err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(slice) > 0 {
|
if len(slice) > 0 {
|
||||||
@ -218,22 +222,21 @@ func ReadEcoPCR(reader io.Reader, options ...WithOption) (obiiter.IBioSequence,
|
|||||||
|
|
||||||
func ReadEcoPCRFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
|
func ReadEcoPCRFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
var reader io.Reader
|
var reader io.Reader
|
||||||
var greader io.Reader
|
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
options = append(options, OptionsSource(obiutils.RemoveAllExt((path.Base(filename)))))
|
options = append(options, OptionsSource(obiutils.RemoveAllExt((path.Base(filename)))))
|
||||||
|
|
||||||
reader, err = os.Open(filename)
|
reader, err = obiutils.Ropen(filename)
|
||||||
|
|
||||||
|
if err == obiutils.ErrNoContent {
|
||||||
|
log.Infof("file %s is empty", filename)
|
||||||
|
return ReadEmptyFile(options...)
|
||||||
|
}
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("open file error: %+v", err)
|
log.Printf("open file error: %+v", err)
|
||||||
return obiiter.NilIBioSequence, err
|
return obiiter.NilIBioSequence, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test if the flux is compressed by gzip
|
|
||||||
greader, err = gzip.NewReader(reader)
|
|
||||||
if err == nil {
|
|
||||||
reader = greader
|
|
||||||
}
|
|
||||||
|
|
||||||
return ReadEcoPCR(reader, options...)
|
return ReadEcoPCR(reader, options...)
|
||||||
}
|
}
|
||||||
|
@ -149,7 +149,7 @@ func loadMergedTable(reader io.Reader, taxonomy *obitax.Taxonomy) int {
|
|||||||
// Returns:
|
// Returns:
|
||||||
// - A pointer to the obitax.Taxonomy object containing the loaded taxonomy data, or an error
|
// - A pointer to the obitax.Taxonomy object containing the loaded taxonomy data, or an error
|
||||||
// if any of the files cannot be opened or read.
|
// if any of the files cannot be opened or read.
|
||||||
func LoadNCBITaxDump(directory string, onlysn bool) (*obitax.Taxonomy, error) {
|
func LoadNCBITaxDump(directory string, onlysn, seqAsTaxa bool) (*obitax.Taxonomy, error) {
|
||||||
|
|
||||||
taxonomy := obitax.NewTaxonomy("NCBI Taxonomy", "taxon", obiutils.AsciiDigitSet)
|
taxonomy := obitax.NewTaxonomy("NCBI Taxonomy", "taxon", obiutils.AsciiDigitSet)
|
||||||
|
|
||||||
|
@ -63,7 +63,7 @@ func IsNCBITarTaxDump(path string) bool {
|
|||||||
return citations && division && gencode && names && delnodes && gc && merged && nodes
|
return citations && division && gencode && names && delnodes && gc && merged && nodes
|
||||||
}
|
}
|
||||||
|
|
||||||
func LoadNCBITarTaxDump(path string, onlysn bool) (*obitax.Taxonomy, error) {
|
func LoadNCBITarTaxDump(path string, onlysn, seqAsTaxa bool) (*obitax.Taxonomy, error) {
|
||||||
|
|
||||||
taxonomy := obitax.NewTaxonomy("NCBI Taxonomy", "taxon", obiutils.AsciiDigitSet)
|
taxonomy := obitax.NewTaxonomy("NCBI Taxonomy", "taxon", obiutils.AsciiDigitSet)
|
||||||
|
|
||||||
|
@ -54,7 +54,7 @@ func (tree *Tree) Newick(level int, taxid, scientific_name, rank bool) string {
|
|||||||
if scientific_name {
|
if scientific_name {
|
||||||
buffer.WriteByte(' ')
|
buffer.WriteByte(' ')
|
||||||
}
|
}
|
||||||
buffer.WriteByte('-')
|
// buffer.WriteByte('-')
|
||||||
if taxid {
|
if taxid {
|
||||||
buffer.WriteString(*tree.TaxNode.Id())
|
buffer.WriteString(*tree.TaxNode.Id())
|
||||||
if rank {
|
if rank {
|
||||||
@ -64,7 +64,7 @@ func (tree *Tree) Newick(level int, taxid, scientific_name, rank bool) string {
|
|||||||
if rank {
|
if rank {
|
||||||
buffer.WriteString(tree.TaxNode.Rank())
|
buffer.WriteString(tree.TaxNode.Rank())
|
||||||
}
|
}
|
||||||
buffer.WriteByte('-')
|
//buffer.WriteByte('-')
|
||||||
}
|
}
|
||||||
if scientific_name || taxid || rank {
|
if scientific_name || taxid || rank {
|
||||||
buffer.WriteByte('\'')
|
buffer.WriteByte('\'')
|
||||||
@ -85,24 +85,14 @@ func Newick(taxa *obitax.TaxonSet, taxid, scientific_name, rank bool) string {
|
|||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
iterator := taxa.Sort().Iterator()
|
root := taxa.Sort().Get(0)
|
||||||
|
tree, err := taxa.AsPhyloTree(root)
|
||||||
|
|
||||||
nodes := make(map[*string]*Tree, taxa.Len())
|
if err != nil {
|
||||||
trees := make([]*Tree, 0)
|
log.Fatalf("Cannot build taxonomy tree: %v", err)
|
||||||
|
|
||||||
for iterator.Next() {
|
|
||||||
taxon := iterator.Get()
|
|
||||||
|
|
||||||
tree := &Tree{TaxNode: taxon.Node}
|
|
||||||
if parent, ok := nodes[taxon.Parent().Node.Id()]; ok {
|
|
||||||
parent.Children = append(parent.Children, tree)
|
|
||||||
} else {
|
|
||||||
trees = append(trees, tree)
|
|
||||||
}
|
|
||||||
nodes[taxon.Node.Id()] = tree
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return trees[0].Newick(0, taxid, scientific_name, rank)
|
return tree.Newick(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
func WriteNewick(iterator *obitax.ITaxon,
|
func WriteNewick(iterator *obitax.ITaxon,
|
||||||
|
@ -42,6 +42,7 @@ type __options__ struct {
|
|||||||
with_rank bool
|
with_rank bool
|
||||||
with_taxid bool
|
with_taxid bool
|
||||||
with_scientific_name bool
|
with_scientific_name bool
|
||||||
|
without_root_path bool
|
||||||
raw_taxid bool
|
raw_taxid bool
|
||||||
with_metadata []string
|
with_metadata []string
|
||||||
}
|
}
|
||||||
@ -88,6 +89,7 @@ func MakeOptions(setters []WithOption) Options {
|
|||||||
with_rank: true,
|
with_rank: true,
|
||||||
with_taxid: true,
|
with_taxid: true,
|
||||||
with_scientific_name: false,
|
with_scientific_name: false,
|
||||||
|
without_root_path: false,
|
||||||
raw_taxid: false,
|
raw_taxid: false,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -250,6 +252,11 @@ func (o *Options) WithScientificName() bool {
|
|||||||
return o.pointer.with_scientific_name
|
return o.pointer.with_scientific_name
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// WithoutRootPath returns whether the root path option is enabled.
|
||||||
|
func (o *Options) WithoutRootPath() bool {
|
||||||
|
return o.pointer.without_root_path
|
||||||
|
}
|
||||||
|
|
||||||
// RawTaxid returns whether the raw taxid option is enabled.
|
// RawTaxid returns whether the raw taxid option is enabled.
|
||||||
// It retrieves the setting from the underlying options.
|
// It retrieves the setting from the underlying options.
|
||||||
func (o *Options) RawTaxid() bool {
|
func (o *Options) RawTaxid() bool {
|
||||||
@ -576,6 +583,13 @@ func OptionsWithScientificName(value bool) WithOption {
|
|||||||
return f
|
return f
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func OptionWithoutRootPath(value bool) WithOption {
|
||||||
|
f := WithOption(func(opt Options) {
|
||||||
|
opt.pointer.without_root_path = value
|
||||||
|
})
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
|
||||||
func OptionsRawTaxid(value bool) WithOption {
|
func OptionsRawTaxid(value bool) WithOption {
|
||||||
f := WithOption(func(opt Options) {
|
f := WithOption(func(opt Options) {
|
||||||
opt.pointer.raw_taxid = value
|
opt.pointer.raw_taxid = value
|
||||||
|
@ -11,7 +11,7 @@ import (
|
|||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
type TaxonomyLoader func(path string, onlysn bool) (*obitax.Taxonomy, error)
|
type TaxonomyLoader func(path string, onlysn, seqAsTaxa bool) (*obitax.Taxonomy, error)
|
||||||
|
|
||||||
func DetectTaxonomyTarFormat(path string) (TaxonomyLoader, error) {
|
func DetectTaxonomyTarFormat(path string) (TaxonomyLoader, error) {
|
||||||
|
|
||||||
@ -67,26 +67,28 @@ func DetectTaxonomyFormat(path string) (TaxonomyLoader, error) {
|
|||||||
case "application/x-tar":
|
case "application/x-tar":
|
||||||
return DetectTaxonomyTarFormat(path)
|
return DetectTaxonomyTarFormat(path)
|
||||||
case "text/fasta":
|
case "text/fasta":
|
||||||
return func(path string, onlysn bool) (*obitax.Taxonomy, error) {
|
return func(path string, onlysn, seqAsTaxa bool) (*obitax.Taxonomy, error) {
|
||||||
input, err := ReadFastaFromFile(path)
|
input, err := ReadFastaFromFile(path)
|
||||||
|
input = input.NumberSequences(1, true)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
_, data := input.Load()
|
_, data := input.Load()
|
||||||
|
|
||||||
return data.ExtractTaxonomy(nil)
|
return data.ExtractTaxonomy(nil, seqAsTaxa)
|
||||||
}, nil
|
}, nil
|
||||||
case "text/fastq":
|
case "text/fastq":
|
||||||
return func(path string, onlysn bool) (*obitax.Taxonomy, error) {
|
return func(path string, onlysn, seqAsTaxa bool) (*obitax.Taxonomy, error) {
|
||||||
input, err := ReadFastqFromFile(path)
|
input, err := ReadFastqFromFile(path)
|
||||||
|
input = input.NumberSequences(1, true)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
_, data := input.Load()
|
_, data := input.Load()
|
||||||
|
|
||||||
return data.ExtractTaxonomy(nil)
|
return data.ExtractTaxonomy(nil, seqAsTaxa)
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -96,14 +98,14 @@ func DetectTaxonomyFormat(path string) (TaxonomyLoader, error) {
|
|||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func LoadTaxonomy(path string, onlysn bool) (*obitax.Taxonomy, error) {
|
func LoadTaxonomy(path string, onlysn, seqAsTaxa bool) (*obitax.Taxonomy, error) {
|
||||||
loader, err := DetectTaxonomyFormat(path)
|
loader, err := DetectTaxonomyFormat(path)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
taxonomy, err := loader(path, onlysn)
|
taxonomy, err := loader(path, onlysn, seqAsTaxa)
|
||||||
|
|
||||||
return taxonomy, err
|
return taxonomy, err
|
||||||
}
|
}
|
||||||
|
@ -154,8 +154,7 @@ func ReadSequencesFromFile(filename string,
|
|||||||
return obiiter.NilIBioSequence, nil
|
return obiiter.NilIBioSequence, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// func ReadSequencesFromStdin(options ...WithOption) obiiter.IBioSequence {
|
func ReadSequencesFromStdin(options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
|
options = append(options, OptionCloseFile())
|
||||||
// options = append(options, OptionsSource("stdin"))
|
return ReadSequencesFromFile("-", options...)
|
||||||
|
}
|
||||||
// }
|
|
||||||
|
@ -2,12 +2,12 @@ package obiiter
|
|||||||
|
|
||||||
import "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
import "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
|
|
||||||
func (iterator *IBioSequence) ExtractTaxonomy() (taxonomy *obitax.Taxonomy, err error) {
|
func (iterator *IBioSequence) ExtractTaxonomy(seqAsTaxa bool) (taxonomy *obitax.Taxonomy, err error) {
|
||||||
|
|
||||||
for iterator.Next() {
|
for iterator.Next() {
|
||||||
slice := iterator.Get().Slice()
|
slice := iterator.Get().Slice()
|
||||||
|
|
||||||
taxonomy, err = slice.ExtractTaxonomy(taxonomy)
|
taxonomy, err = slice.ExtractTaxonomy(taxonomy, seqAsTaxa)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
|
@ -17,10 +17,10 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var _Debug = false
|
var _Debug = false
|
||||||
var _BatchSize = 2000
|
|
||||||
var _Pprof = false
|
var _Pprof = false
|
||||||
var _PprofMudex = 10
|
var _PprofMudex = 10
|
||||||
var _PprofGoroutine = 6060
|
var _PprofGoroutine = 6060
|
||||||
|
var __seq_as_taxa__ = false
|
||||||
|
|
||||||
var __defaut_taxonomy_mutex__ sync.Mutex
|
var __defaut_taxonomy_mutex__ sync.Mutex
|
||||||
|
|
||||||
@ -102,6 +102,7 @@ func GenerateOptionParser(program string,
|
|||||||
taxonomy, err := obiformats.LoadTaxonomy(
|
taxonomy, err := obiformats.LoadTaxonomy(
|
||||||
obidefault.SelectedTaxonomy(),
|
obidefault.SelectedTaxonomy(),
|
||||||
!obidefault.AreAlternativeNamesSelected(),
|
!obidefault.AreAlternativeNamesSelected(),
|
||||||
|
SeqAsTaxa(),
|
||||||
)
|
)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -218,6 +219,9 @@ func LoadTaxonomyOptionSet(options *getoptions.GetOpt, required, alternatiive bo
|
|||||||
options.BoolVar(obidefault.UseRawTaxidsPtr(), "raw-taxid", obidefault.UseRawTaxids(),
|
options.BoolVar(obidefault.UseRawTaxidsPtr(), "raw-taxid", obidefault.UseRawTaxids(),
|
||||||
options.Description("When set, taxids are printed in files with any supplementary information (taxon name and rank)"),
|
options.Description("When set, taxids are printed in files with any supplementary information (taxon name and rank)"),
|
||||||
)
|
)
|
||||||
|
options.BoolVar(&__seq_as_taxa__, "with-leaves", __seq_as_taxa__,
|
||||||
|
options.Description("If taxonomy is extracted from a sequence file, sequences are added as leave of their taxid annotation"),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
// CLIIsDebugMode returns whether the CLI is in debug mode.
|
// CLIIsDebugMode returns whether the CLI is in debug mode.
|
||||||
@ -232,6 +236,10 @@ func CLIIsDebugMode() bool {
|
|||||||
return _Debug
|
return _Debug
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func SeqAsTaxa() bool {
|
||||||
|
return __seq_as_taxa__
|
||||||
|
}
|
||||||
|
|
||||||
// SetDebugOn sets the debug mode on.
|
// SetDebugOn sets the debug mode on.
|
||||||
func SetDebugOn() {
|
func SetDebugOn() {
|
||||||
_Debug = true
|
_Debug = true
|
||||||
|
@ -8,7 +8,7 @@ import (
|
|||||||
// corresponds to the last commit, and not the one when the file will be
|
// corresponds to the last commit, and not the one when the file will be
|
||||||
// commited
|
// commited
|
||||||
|
|
||||||
var _Commit = "f9324dd"
|
var _Commit = "3424d30"
|
||||||
var _Version = "Release 4.4.0"
|
var _Version = "Release 4.4.0"
|
||||||
|
|
||||||
// Version returns the version of the obitools package.
|
// Version returns the version of the obitools package.
|
||||||
|
71
pkg/obiphylo/tree.go
Normal file
71
pkg/obiphylo/tree.go
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
package obiphylo
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
type PhyloNode struct {
|
||||||
|
Name string
|
||||||
|
Children map[*PhyloNode]float64
|
||||||
|
Attributes map[string]any
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewPhyloNode() *PhyloNode {
|
||||||
|
return &PhyloNode{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *PhyloNode) AddChild(child *PhyloNode, distance float64) {
|
||||||
|
if n.Children == nil {
|
||||||
|
n.Children = map[*PhyloNode]float64{}
|
||||||
|
}
|
||||||
|
n.Children[child] = distance
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *PhyloNode) SetAttribute(key string, value any) {
|
||||||
|
if n.Attributes == nil {
|
||||||
|
n.Attributes = make(map[string]any)
|
||||||
|
}
|
||||||
|
n.Attributes[key] = value
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *PhyloNode) GetDistanceToChild(child *PhyloNode) float64 {
|
||||||
|
return n.Children[child]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *PhyloNode) GetAttribute(key string) any {
|
||||||
|
return n.Attributes[key]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *PhyloNode) Newick(level int) string {
|
||||||
|
nc := len(n.Children)
|
||||||
|
result := strings.Builder{}
|
||||||
|
result.WriteString(strings.Repeat(" ", level))
|
||||||
|
if nc > 0 {
|
||||||
|
result.WriteString("(\n")
|
||||||
|
i := 0
|
||||||
|
for child, distance := range n.Children {
|
||||||
|
result.WriteString(child.Newick(level + 1))
|
||||||
|
if !math.IsNaN(distance) {
|
||||||
|
result.WriteString(fmt.Sprintf(":%.5f", distance))
|
||||||
|
}
|
||||||
|
i++
|
||||||
|
if i < nc {
|
||||||
|
result.WriteByte(',')
|
||||||
|
}
|
||||||
|
result.WriteString("\n")
|
||||||
|
}
|
||||||
|
result.WriteString(strings.Repeat(" ", level))
|
||||||
|
result.WriteByte(')')
|
||||||
|
}
|
||||||
|
if n.Name != "" {
|
||||||
|
result.WriteString(n.Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
if level == 0 {
|
||||||
|
result.WriteString(";\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
return result.String()
|
||||||
|
}
|
@ -1,6 +1,9 @@
|
|||||||
package obiseq
|
package obiseq
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obilog"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obilog"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
@ -182,11 +185,26 @@ func (s *BioSequenceSlice) SortOnLength(reverse bool) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *BioSequenceSlice) ExtractTaxonomy(taxonomy *obitax.Taxonomy) (*obitax.Taxonomy, error) {
|
func (s *BioSequenceSlice) ExtractTaxonomy(taxonomy *obitax.Taxonomy, seqAsTaxa bool) (*obitax.Taxonomy, error) {
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
for _, s := range *s {
|
for _, s := range *s {
|
||||||
taxonomy, err = taxonomy.InsertPathString(s.Path())
|
path := s.Path()
|
||||||
|
if seqAsTaxa {
|
||||||
|
if len(path) == 0 {
|
||||||
|
return nil, fmt.Errorf("sequence %v has no path", s.Id())
|
||||||
|
}
|
||||||
|
last := path[len(path)-1]
|
||||||
|
taxname, _ := obiutils.SplitInTwo(last, ':')
|
||||||
|
if idx, ok := s.GetIntAttribute("seq_number"); !ok {
|
||||||
|
return nil, errors.New("sequences are not numbered")
|
||||||
|
} else {
|
||||||
|
path = append(path, fmt.Sprintf("%s:SEQ%010d [%s]@sequence", taxname, idx, s.Id()))
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
taxonomy, err = taxonomy.InsertPathString(path)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -189,7 +189,11 @@ func (sequence *BioSequence) Path() []string {
|
|||||||
path, ok := sequence.GetAttribute("taxonomic_path")
|
path, ok := sequence.GetAttribute("taxonomic_path")
|
||||||
|
|
||||||
if !ok {
|
if !ok {
|
||||||
return nil
|
if taxo := obitax.DefaultTaxonomy(); taxo != nil {
|
||||||
|
path = sequence.SetPath(taxo)
|
||||||
|
} else {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
slice, err := obiutils.InterfaceToStringSlice(path)
|
slice, err := obiutils.InterfaceToStringSlice(path)
|
||||||
|
@ -11,6 +11,7 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiphylo"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -415,3 +416,10 @@ func (taxonomy *Taxonomy) InsertPathString(path []string) (*Taxonomy, error) {
|
|||||||
|
|
||||||
return taxonomy, nil
|
return taxonomy, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (taxo *Taxonomy) AsPhyloTree() (*obiphylo.PhyloNode, error) {
|
||||||
|
root := taxo.Root().Node
|
||||||
|
taxa := taxo.AsTaxonSet()
|
||||||
|
|
||||||
|
return taxa.AsPhyloTree(root)
|
||||||
|
}
|
||||||
|
@ -7,7 +7,12 @@ corresponding TaxNode instances, along with methods for managing and querying th
|
|||||||
|
|
||||||
package obitax
|
package obitax
|
||||||
|
|
||||||
import log "github.com/sirupsen/logrus"
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiphylo"
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
)
|
||||||
|
|
||||||
// TaxonSet represents a collection of taxa within a taxonomy.
|
// TaxonSet represents a collection of taxa within a taxonomy.
|
||||||
// It holds a mapping of taxon identifiers to their corresponding TaxNode instances,
|
// It holds a mapping of taxon identifiers to their corresponding TaxNode instances,
|
||||||
@ -224,3 +229,36 @@ func (set *TaxonSet) Sort() *TaxonSlice {
|
|||||||
|
|
||||||
return taxa
|
return taxa
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (taxo *TaxonSet) AsPhyloTree(root *TaxNode) (*obiphylo.PhyloNode, error) {
|
||||||
|
nodes := make(map[*string]*obiphylo.PhyloNode, taxo.Len())
|
||||||
|
tsi := taxo.Iterator()
|
||||||
|
|
||||||
|
log.Warnf("Coucou")
|
||||||
|
for tsi.Next() {
|
||||||
|
taxon := tsi.Get()
|
||||||
|
id := taxon.Node.Id()
|
||||||
|
node := obiphylo.NewPhyloNode()
|
||||||
|
rank := taxon.Rank()
|
||||||
|
node.Name = fmt.Sprintf("%s -%s@%s-", taxon.ScientificName(), *id, rank)
|
||||||
|
node.SetAttribute("rank", rank)
|
||||||
|
node.SetAttribute("parent", taxon.Parent().Node.Id())
|
||||||
|
nodes[id] = node
|
||||||
|
}
|
||||||
|
|
||||||
|
for id, node := range nodes {
|
||||||
|
if id == root.Id() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
pid := node.GetAttribute("parent").(*string)
|
||||||
|
parent := nodes[pid]
|
||||||
|
if parent != nil {
|
||||||
|
parent.AddChild(node, 1)
|
||||||
|
} else {
|
||||||
|
return nil, fmt.Errorf("cannot find parent node for %s", *pid)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rid := root.Id()
|
||||||
|
return nodes[rid], nil
|
||||||
|
}
|
||||||
|
@ -133,7 +133,7 @@ func CLIReadBioSequences(filenames ...string) (obiiter.IBioSequence, error) {
|
|||||||
case "csv":
|
case "csv":
|
||||||
iterator, err = obiformats.ReadCSV(os.Stdin, opts...)
|
iterator, err = obiformats.ReadCSV(os.Stdin, opts...)
|
||||||
default:
|
default:
|
||||||
iterator = obiformats.ReadFastSeqFromStdin(opts...)
|
iterator, err = obiformats.ReadSequencesFromStdin(opts...)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -186,6 +186,9 @@ func Identify(sequence *obiseq.BioSequence,
|
|||||||
// log.Debugln("Need of indexing")
|
// log.Debugln("Need of indexing")
|
||||||
newidx++
|
newidx++
|
||||||
idx = obirefidx.IndexSequence(seqidxs[i], references, &refcounts, taxa, taxo)
|
idx = obirefidx.IndexSequence(seqidxs[i], references, &refcounts, taxa, taxo)
|
||||||
|
if len(idx) == 0 {
|
||||||
|
log.Panicf("%s idx: %v", references[seqidxs[i]].Id(), idx)
|
||||||
|
}
|
||||||
references[seqidxs[i]].SetOBITagRefIndex(idx)
|
references[seqidxs[i]].SetOBITagRefIndex(idx)
|
||||||
log.Debugln(references[seqidxs[i]].Id(), idx)
|
log.Debugln(references[seqidxs[i]].Id(), idx)
|
||||||
}
|
}
|
||||||
|
@ -91,6 +91,7 @@ func CLINewickWriter(iterator *obitax.ITaxon,
|
|||||||
obiformats.OptionsWithRank(CLIWithRank()),
|
obiformats.OptionsWithRank(CLIWithRank()),
|
||||||
obiformats.OptionsWithScientificName(CLIWithScientificName()),
|
obiformats.OptionsWithScientificName(CLIWithScientificName()),
|
||||||
obiformats.OptionsWithTaxid(true),
|
obiformats.OptionsWithTaxid(true),
|
||||||
|
obiformats.OptionWithoutRootPath(CLINewickWithoutRoot()),
|
||||||
)
|
)
|
||||||
|
|
||||||
filename := obiconvert.CLIOutPutFileName()
|
filename := obiconvert.CLIOutPutFileName()
|
||||||
|
@ -25,6 +25,8 @@ var __to_dump__ = ""
|
|||||||
var __download_ncbi__ = false
|
var __download_ncbi__ = false
|
||||||
var __extract_taxonomy__ = false
|
var __extract_taxonomy__ = false
|
||||||
var __newick__ = false
|
var __newick__ = false
|
||||||
|
var __newick_with_leaves__ = false
|
||||||
|
var __newick_without_root__ = false
|
||||||
|
|
||||||
func FilterTaxonomyOptionSet(options *getoptions.GetOpt) {
|
func FilterTaxonomyOptionSet(options *getoptions.GetOpt) {
|
||||||
options.BoolVar(&__rank_list__, "rank-list", false,
|
options.BoolVar(&__rank_list__, "rank-list", false,
|
||||||
@ -81,6 +83,9 @@ func OptionSet(options *getoptions.GetOpt) {
|
|||||||
options.BoolVar(&__newick__, "newick-output", __newick__,
|
options.BoolVar(&__newick__, "newick-output", __newick__,
|
||||||
options.Description("Format the resulting taxonomy as a newick tree"),
|
options.Description("Format the resulting taxonomy as a newick tree"),
|
||||||
)
|
)
|
||||||
|
options.BoolVar(&__newick_without_root__, "without-root", __newick_without_root__,
|
||||||
|
options.Description("If used, do not include the non-branched path to the root in the output"),
|
||||||
|
)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -166,6 +171,14 @@ func CLIAsNewick() bool {
|
|||||||
return __newick__
|
return __newick__
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func CLINewickWithLeaves() bool {
|
||||||
|
return __newick_with_leaves__
|
||||||
|
}
|
||||||
|
|
||||||
|
func CLINewickWithoutRoot() bool {
|
||||||
|
return __newick_without_root__
|
||||||
|
}
|
||||||
|
|
||||||
func CLIAskForRankList() bool {
|
func CLIAskForRankList() bool {
|
||||||
return __rank_list__
|
return __rank_list__
|
||||||
}
|
}
|
||||||
|
BIN
sample/.DS_Store
vendored
BIN
sample/.DS_Store
vendored
Binary file not shown.
Reference in New Issue
Block a user