From 6cb7a5a3529c7e967e628f32d0f739ec72c168e1 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Wed, 4 Jun 2025 09:48:10 +0200 Subject: [PATCH] Changes to be committed: modified: cmd/obitools/obitag/main.go modified: cmd/obitools/obitaxonomy/main.go modified: pkg/obiformats/csvtaxdump_read.go modified: pkg/obiformats/ecopcr_read.go modified: pkg/obiformats/ncbitaxdump_read.go modified: pkg/obiformats/ncbitaxdump_readtar.go modified: pkg/obiformats/newick_write.go modified: pkg/obiformats/options.go modified: pkg/obiformats/taxonomy_read.go modified: pkg/obiformats/universal_read.go modified: pkg/obiiter/extract_taxonomy.go modified: pkg/obioptions/options.go modified: pkg/obioptions/version.go new file: pkg/obiphylo/tree.go modified: pkg/obiseq/biosequenceslice.go modified: pkg/obiseq/taxonomy_methods.go modified: pkg/obitax/taxonomy.go modified: pkg/obitax/taxonset.go modified: pkg/obitools/obiconvert/sequence_reader.go modified: pkg/obitools/obitag/obitag.go modified: pkg/obitools/obitaxonomy/obitaxonomy.go modified: pkg/obitools/obitaxonomy/options.go deleted: sample/.DS_Store --- cmd/obitools/obitag/main.go | 3 +- cmd/obitools/obitaxonomy/main.go | 3 +- pkg/obiformats/csvtaxdump_read.go | 2 +- pkg/obiformats/ecopcr_read.go | 29 +++++---- pkg/obiformats/ncbitaxdump_read.go | 2 +- pkg/obiformats/ncbitaxdump_readtar.go | 2 +- pkg/obiformats/newick_write.go | 24 ++----- pkg/obiformats/options.go | 14 ++++ pkg/obiformats/taxonomy_read.go | 16 +++-- pkg/obiformats/universal_read.go | 9 ++- pkg/obiiter/extract_taxonomy.go | 4 +- pkg/obioptions/options.go | 10 ++- pkg/obioptions/version.go | 2 +- pkg/obiphylo/tree.go | 71 +++++++++++++++++++++ pkg/obiseq/biosequenceslice.go | 22 ++++++- pkg/obiseq/taxonomy_methods.go | 6 +- pkg/obitax/taxonomy.go | 8 +++ pkg/obitax/taxonset.go | 40 +++++++++++- pkg/obitools/obiconvert/sequence_reader.go | 2 +- pkg/obitools/obitag/obitag.go | 3 + pkg/obitools/obitaxonomy/obitaxonomy.go | 1 + pkg/obitools/obitaxonomy/options.go | 13 ++++ sample/.DS_Store | Bin 10244 -> 0 bytes 23 files changed, 230 insertions(+), 56 deletions(-) create mode 100644 pkg/obiphylo/tree.go delete mode 100644 sample/.DS_Store diff --git a/cmd/obitools/obitag/main.go b/cmd/obitools/obitag/main.go index c93e422..302cffb 100644 --- a/cmd/obitools/obitag/main.go +++ b/cmd/obitools/obitag/main.go @@ -11,6 +11,7 @@ import ( "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obitag" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obitaxonomy" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" @@ -58,7 +59,7 @@ func main() { } if taxo == nil { - taxo, err = references.ExtractTaxonomy(nil) + taxo, err = references.ExtractTaxonomy(nil, obitaxonomy.CLINewickWithLeaves()) if err != nil { log.Fatalf("No taxonomy specified or extractable from reference database: %v", err) diff --git a/cmd/obitools/obitaxonomy/main.go b/cmd/obitools/obitaxonomy/main.go index 8e1bc7a..3901d3c 100644 --- a/cmd/obitools/obitaxonomy/main.go +++ b/cmd/obitools/obitaxonomy/main.go @@ -62,12 +62,13 @@ func main() { case obitaxonomy.CLIExtractTaxonomy(): iter, err := obiconvert.CLIReadBioSequences(args...) + iter = iter.NumberSequences(1, true) if err != nil { log.Fatalf("Cannot extract taxonomy: %v", err) } - taxonomy, err := iter.ExtractTaxonomy() + taxonomy, err := iter.ExtractTaxonomy(obitaxonomy.CLINewickWithLeaves()) if err != nil { log.Fatalf("Cannot extract taxonomy: %v", err) diff --git a/pkg/obiformats/csvtaxdump_read.go b/pkg/obiformats/csvtaxdump_read.go index bc17aa6..a0a5f26 100644 --- a/pkg/obiformats/csvtaxdump_read.go +++ b/pkg/obiformats/csvtaxdump_read.go @@ -10,7 +10,7 @@ import ( log "github.com/sirupsen/logrus" ) -func LoadCSVTaxonomy(path string, onlysn bool) (*obitax.Taxonomy, error) { +func LoadCSVTaxonomy(path string, onlysn, seqAsTaxa bool) (*obitax.Taxonomy, error) { log.Infof("Loading taxonomy from csv file: %s", path) diff --git a/pkg/obiformats/ecopcr_read.go b/pkg/obiformats/ecopcr_read.go index 5706805..9dd4536 100644 --- a/pkg/obiformats/ecopcr_read.go +++ b/pkg/obiformats/ecopcr_read.go @@ -4,13 +4,10 @@ import ( "encoding/csv" "fmt" "io" - "os" "path" "strconv" "strings" - gzip "github.com/klauspost/pgzip" - log "github.com/sirupsen/logrus" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" @@ -177,8 +174,10 @@ func ReadEcoPCR(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, }() go func() { + var err error = nil + var seq *obiseq.BioSequence - seq, err := __read_ecopcr_bioseq__(&ecopcr) + seq, err = __read_ecopcr_bioseq__(&ecopcr) seq.SetSource(opt.Source()) slice := make(obiseq.BioSequenceSlice, 0, opt.BatchSize()) i := 0 @@ -194,7 +193,12 @@ func ReadEcoPCR(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, } seq, err = __read_ecopcr_bioseq__(&ecopcr) - seq.SetSource(opt.Source()) + + if err == nil { + seq.SetSource(opt.Source()) + } else if err != io.EOF { + log.Panicf("%+v", err) + } } if len(slice) > 0 { @@ -218,22 +222,21 @@ func ReadEcoPCR(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, func ReadEcoPCRFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) { var reader io.Reader - var greader io.Reader var err error options = append(options, OptionsSource(obiutils.RemoveAllExt((path.Base(filename))))) - reader, err = os.Open(filename) + reader, err = obiutils.Ropen(filename) + + if err == obiutils.ErrNoContent { + log.Infof("file %s is empty", filename) + return ReadEmptyFile(options...) + } + if err != nil { log.Printf("open file error: %+v", err) return obiiter.NilIBioSequence, err } - // Test if the flux is compressed by gzip - greader, err = gzip.NewReader(reader) - if err == nil { - reader = greader - } - return ReadEcoPCR(reader, options...) } diff --git a/pkg/obiformats/ncbitaxdump_read.go b/pkg/obiformats/ncbitaxdump_read.go index 7e6c3f0..82c40bc 100644 --- a/pkg/obiformats/ncbitaxdump_read.go +++ b/pkg/obiformats/ncbitaxdump_read.go @@ -149,7 +149,7 @@ func loadMergedTable(reader io.Reader, taxonomy *obitax.Taxonomy) int { // Returns: // - A pointer to the obitax.Taxonomy object containing the loaded taxonomy data, or an error // if any of the files cannot be opened or read. -func LoadNCBITaxDump(directory string, onlysn bool) (*obitax.Taxonomy, error) { +func LoadNCBITaxDump(directory string, onlysn, seqAsTaxa bool) (*obitax.Taxonomy, error) { taxonomy := obitax.NewTaxonomy("NCBI Taxonomy", "taxon", obiutils.AsciiDigitSet) diff --git a/pkg/obiformats/ncbitaxdump_readtar.go b/pkg/obiformats/ncbitaxdump_readtar.go index 6b3dba2..8dad31a 100644 --- a/pkg/obiformats/ncbitaxdump_readtar.go +++ b/pkg/obiformats/ncbitaxdump_readtar.go @@ -63,7 +63,7 @@ func IsNCBITarTaxDump(path string) bool { return citations && division && gencode && names && delnodes && gc && merged && nodes } -func LoadNCBITarTaxDump(path string, onlysn bool) (*obitax.Taxonomy, error) { +func LoadNCBITarTaxDump(path string, onlysn, seqAsTaxa bool) (*obitax.Taxonomy, error) { taxonomy := obitax.NewTaxonomy("NCBI Taxonomy", "taxon", obiutils.AsciiDigitSet) diff --git a/pkg/obiformats/newick_write.go b/pkg/obiformats/newick_write.go index e843189..c82fee7 100644 --- a/pkg/obiformats/newick_write.go +++ b/pkg/obiformats/newick_write.go @@ -54,7 +54,7 @@ func (tree *Tree) Newick(level int, taxid, scientific_name, rank bool) string { if scientific_name { buffer.WriteByte(' ') } - buffer.WriteByte('-') + // buffer.WriteByte('-') if taxid { buffer.WriteString(*tree.TaxNode.Id()) if rank { @@ -64,7 +64,7 @@ func (tree *Tree) Newick(level int, taxid, scientific_name, rank bool) string { if rank { buffer.WriteString(tree.TaxNode.Rank()) } - buffer.WriteByte('-') + //buffer.WriteByte('-') } if scientific_name || taxid || rank { buffer.WriteByte('\'') @@ -85,24 +85,14 @@ func Newick(taxa *obitax.TaxonSet, taxid, scientific_name, rank bool) string { return "" } - iterator := taxa.Sort().Iterator() + root := taxa.Sort().Get(0) + tree, err := taxa.AsPhyloTree(root) - nodes := make(map[*string]*Tree, taxa.Len()) - trees := make([]*Tree, 0) - - for iterator.Next() { - taxon := iterator.Get() - - tree := &Tree{TaxNode: taxon.Node} - if parent, ok := nodes[taxon.Parent().Node.Id()]; ok { - parent.Children = append(parent.Children, tree) - } else { - trees = append(trees, tree) - } - nodes[taxon.Node.Id()] = tree + if err != nil { + log.Fatalf("Cannot build taxonomy tree: %v", err) } - return trees[0].Newick(0, taxid, scientific_name, rank) + return tree.Newick(0) } func WriteNewick(iterator *obitax.ITaxon, diff --git a/pkg/obiformats/options.go b/pkg/obiformats/options.go index 243390a..69345e9 100644 --- a/pkg/obiformats/options.go +++ b/pkg/obiformats/options.go @@ -42,6 +42,7 @@ type __options__ struct { with_rank bool with_taxid bool with_scientific_name bool + without_root_path bool raw_taxid bool with_metadata []string } @@ -88,6 +89,7 @@ func MakeOptions(setters []WithOption) Options { with_rank: true, with_taxid: true, with_scientific_name: false, + without_root_path: false, raw_taxid: false, } @@ -250,6 +252,11 @@ func (o *Options) WithScientificName() bool { return o.pointer.with_scientific_name } +// WithoutRootPath returns whether the root path option is enabled. +func (o *Options) WithoutRootPath() bool { + return o.pointer.without_root_path +} + // RawTaxid returns whether the raw taxid option is enabled. // It retrieves the setting from the underlying options. func (o *Options) RawTaxid() bool { @@ -576,6 +583,13 @@ func OptionsWithScientificName(value bool) WithOption { return f } +func OptionWithoutRootPath(value bool) WithOption { + f := WithOption(func(opt Options) { + opt.pointer.without_root_path = value + }) + return f +} + func OptionsRawTaxid(value bool) WithOption { f := WithOption(func(opt Options) { opt.pointer.raw_taxid = value diff --git a/pkg/obiformats/taxonomy_read.go b/pkg/obiformats/taxonomy_read.go index fe99526..4dbb46e 100644 --- a/pkg/obiformats/taxonomy_read.go +++ b/pkg/obiformats/taxonomy_read.go @@ -11,7 +11,7 @@ import ( log "github.com/sirupsen/logrus" ) -type TaxonomyLoader func(path string, onlysn bool) (*obitax.Taxonomy, error) +type TaxonomyLoader func(path string, onlysn, seqAsTaxa bool) (*obitax.Taxonomy, error) func DetectTaxonomyTarFormat(path string) (TaxonomyLoader, error) { @@ -67,26 +67,28 @@ func DetectTaxonomyFormat(path string) (TaxonomyLoader, error) { case "application/x-tar": return DetectTaxonomyTarFormat(path) case "text/fasta": - return func(path string, onlysn bool) (*obitax.Taxonomy, error) { + return func(path string, onlysn, seqAsTaxa bool) (*obitax.Taxonomy, error) { input, err := ReadFastaFromFile(path) + input = input.NumberSequences(1, true) if err != nil { return nil, err } _, data := input.Load() - return data.ExtractTaxonomy(nil) + return data.ExtractTaxonomy(nil, seqAsTaxa) }, nil case "text/fastq": - return func(path string, onlysn bool) (*obitax.Taxonomy, error) { + return func(path string, onlysn, seqAsTaxa bool) (*obitax.Taxonomy, error) { input, err := ReadFastqFromFile(path) + input = input.NumberSequences(1, true) if err != nil { return nil, err } _, data := input.Load() - return data.ExtractTaxonomy(nil) + return data.ExtractTaxonomy(nil, seqAsTaxa) }, nil } @@ -96,14 +98,14 @@ func DetectTaxonomyFormat(path string) (TaxonomyLoader, error) { return nil, nil } -func LoadTaxonomy(path string, onlysn bool) (*obitax.Taxonomy, error) { +func LoadTaxonomy(path string, onlysn, seqAsTaxa bool) (*obitax.Taxonomy, error) { loader, err := DetectTaxonomyFormat(path) if err != nil { return nil, err } - taxonomy, err := loader(path, onlysn) + taxonomy, err := loader(path, onlysn, seqAsTaxa) return taxonomy, err } diff --git a/pkg/obiformats/universal_read.go b/pkg/obiformats/universal_read.go index 0a09f89..b6c5196 100644 --- a/pkg/obiformats/universal_read.go +++ b/pkg/obiformats/universal_read.go @@ -154,8 +154,7 @@ func ReadSequencesFromFile(filename string, return obiiter.NilIBioSequence, nil } -// func ReadSequencesFromStdin(options ...WithOption) obiiter.IBioSequence { - -// options = append(options, OptionsSource("stdin")) - -// } +func ReadSequencesFromStdin(options ...WithOption) (obiiter.IBioSequence, error) { + options = append(options, OptionCloseFile()) + return ReadSequencesFromFile("-", options...) +} diff --git a/pkg/obiiter/extract_taxonomy.go b/pkg/obiiter/extract_taxonomy.go index 868f527..fd05eaf 100644 --- a/pkg/obiiter/extract_taxonomy.go +++ b/pkg/obiiter/extract_taxonomy.go @@ -2,12 +2,12 @@ package obiiter import "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax" -func (iterator *IBioSequence) ExtractTaxonomy() (taxonomy *obitax.Taxonomy, err error) { +func (iterator *IBioSequence) ExtractTaxonomy(seqAsTaxa bool) (taxonomy *obitax.Taxonomy, err error) { for iterator.Next() { slice := iterator.Get().Slice() - taxonomy, err = slice.ExtractTaxonomy(taxonomy) + taxonomy, err = slice.ExtractTaxonomy(taxonomy, seqAsTaxa) if err != nil { return diff --git a/pkg/obioptions/options.go b/pkg/obioptions/options.go index 77d4886..a7d5826 100644 --- a/pkg/obioptions/options.go +++ b/pkg/obioptions/options.go @@ -17,10 +17,10 @@ import ( ) var _Debug = false -var _BatchSize = 2000 var _Pprof = false var _PprofMudex = 10 var _PprofGoroutine = 6060 +var __seq_as_taxa__ = false var __defaut_taxonomy_mutex__ sync.Mutex @@ -102,6 +102,7 @@ func GenerateOptionParser(program string, taxonomy, err := obiformats.LoadTaxonomy( obidefault.SelectedTaxonomy(), !obidefault.AreAlternativeNamesSelected(), + SeqAsTaxa(), ) if err != nil { @@ -218,6 +219,9 @@ func LoadTaxonomyOptionSet(options *getoptions.GetOpt, required, alternatiive bo options.BoolVar(obidefault.UseRawTaxidsPtr(), "raw-taxid", obidefault.UseRawTaxids(), options.Description("When set, taxids are printed in files with any supplementary information (taxon name and rank)"), ) + options.BoolVar(&__seq_as_taxa__, "with-leaves", __seq_as_taxa__, + options.Description("If taxonomy is extracted from a sequence file, sequences are added as leave of their taxid annotation"), + ) } // CLIIsDebugMode returns whether the CLI is in debug mode. @@ -232,6 +236,10 @@ func CLIIsDebugMode() bool { return _Debug } +func SeqAsTaxa() bool { + return __seq_as_taxa__ +} + // SetDebugOn sets the debug mode on. func SetDebugOn() { _Debug = true diff --git a/pkg/obioptions/version.go b/pkg/obioptions/version.go index 92b90d4..c2ef408 100644 --- a/pkg/obioptions/version.go +++ b/pkg/obioptions/version.go @@ -8,7 +8,7 @@ import ( // corresponds to the last commit, and not the one when the file will be // commited -var _Commit = "f9324dd" +var _Commit = "3424d30" var _Version = "Release 4.4.0" // Version returns the version of the obitools package. diff --git a/pkg/obiphylo/tree.go b/pkg/obiphylo/tree.go new file mode 100644 index 0000000..47433cd --- /dev/null +++ b/pkg/obiphylo/tree.go @@ -0,0 +1,71 @@ +package obiphylo + +import ( + "fmt" + "math" + "strings" +) + +type PhyloNode struct { + Name string + Children map[*PhyloNode]float64 + Attributes map[string]any +} + +func NewPhyloNode() *PhyloNode { + return &PhyloNode{} +} + +func (n *PhyloNode) AddChild(child *PhyloNode, distance float64) { + if n.Children == nil { + n.Children = map[*PhyloNode]float64{} + } + n.Children[child] = distance +} + +func (n *PhyloNode) SetAttribute(key string, value any) { + if n.Attributes == nil { + n.Attributes = make(map[string]any) + } + n.Attributes[key] = value +} + +func (n *PhyloNode) GetDistanceToChild(child *PhyloNode) float64 { + return n.Children[child] +} + +func (n *PhyloNode) GetAttribute(key string) any { + return n.Attributes[key] +} + +func (n *PhyloNode) Newick(level int) string { + nc := len(n.Children) + result := strings.Builder{} + result.WriteString(strings.Repeat(" ", level)) + if nc > 0 { + result.WriteString("(\n") + i := 0 + for child, distance := range n.Children { + result.WriteString(child.Newick(level + 1)) + if !math.IsNaN(distance) { + result.WriteString(fmt.Sprintf(":%.5f", distance)) + } + i++ + if i < nc { + result.WriteByte(',') + } + result.WriteString("\n") + } + result.WriteString(strings.Repeat(" ", level)) + result.WriteByte(')') + } + if n.Name != "" { + result.WriteString(n.Name) + } + + if level == 0 { + result.WriteString(";\n") + } + + return result.String() +} diff --git a/pkg/obiseq/biosequenceslice.go b/pkg/obiseq/biosequenceslice.go index ca666b2..597a6dd 100644 --- a/pkg/obiseq/biosequenceslice.go +++ b/pkg/obiseq/biosequenceslice.go @@ -1,6 +1,9 @@ package obiseq import ( + "errors" + "fmt" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obilog" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" @@ -182,11 +185,26 @@ func (s *BioSequenceSlice) SortOnLength(reverse bool) { }) } -func (s *BioSequenceSlice) ExtractTaxonomy(taxonomy *obitax.Taxonomy) (*obitax.Taxonomy, error) { +func (s *BioSequenceSlice) ExtractTaxonomy(taxonomy *obitax.Taxonomy, seqAsTaxa bool) (*obitax.Taxonomy, error) { var err error for _, s := range *s { - taxonomy, err = taxonomy.InsertPathString(s.Path()) + path := s.Path() + if seqAsTaxa { + if len(path) == 0 { + return nil, fmt.Errorf("sequence %v has no path", s.Id()) + } + last := path[len(path)-1] + taxname, _ := obiutils.SplitInTwo(last, ':') + if idx, ok := s.GetIntAttribute("seq_number"); !ok { + return nil, errors.New("sequences are not numbered") + } else { + path = append(path, fmt.Sprintf("%s:SEQ%010d [%s]@sequence", taxname, idx, s.Id())) + } + + } + + taxonomy, err = taxonomy.InsertPathString(path) if err != nil { return nil, err diff --git a/pkg/obiseq/taxonomy_methods.go b/pkg/obiseq/taxonomy_methods.go index 5d0c639..ef1b0d9 100644 --- a/pkg/obiseq/taxonomy_methods.go +++ b/pkg/obiseq/taxonomy_methods.go @@ -189,7 +189,11 @@ func (sequence *BioSequence) Path() []string { path, ok := sequence.GetAttribute("taxonomic_path") if !ok { - return nil + if taxo := obitax.DefaultTaxonomy(); taxo != nil { + path = sequence.SetPath(taxo) + } else { + return nil + } } slice, err := obiutils.InterfaceToStringSlice(path) diff --git a/pkg/obitax/taxonomy.go b/pkg/obitax/taxonomy.go index 9860236..76fdb2f 100644 --- a/pkg/obitax/taxonomy.go +++ b/pkg/obitax/taxonomy.go @@ -11,6 +11,7 @@ import ( "errors" "fmt" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiphylo" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" ) @@ -415,3 +416,10 @@ func (taxonomy *Taxonomy) InsertPathString(path []string) (*Taxonomy, error) { return taxonomy, nil } + +func (taxo *Taxonomy) AsPhyloTree() (*obiphylo.PhyloNode, error) { + root := taxo.Root().Node + taxa := taxo.AsTaxonSet() + + return taxa.AsPhyloTree(root) +} diff --git a/pkg/obitax/taxonset.go b/pkg/obitax/taxonset.go index 6b6c536..44288ec 100644 --- a/pkg/obitax/taxonset.go +++ b/pkg/obitax/taxonset.go @@ -7,7 +7,12 @@ corresponding TaxNode instances, along with methods for managing and querying th package obitax -import log "github.com/sirupsen/logrus" +import ( + "fmt" + + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiphylo" + log "github.com/sirupsen/logrus" +) // TaxonSet represents a collection of taxa within a taxonomy. // It holds a mapping of taxon identifiers to their corresponding TaxNode instances, @@ -224,3 +229,36 @@ func (set *TaxonSet) Sort() *TaxonSlice { return taxa } + +func (taxo *TaxonSet) AsPhyloTree(root *TaxNode) (*obiphylo.PhyloNode, error) { + nodes := make(map[*string]*obiphylo.PhyloNode, taxo.Len()) + tsi := taxo.Iterator() + + log.Warnf("Coucou") + for tsi.Next() { + taxon := tsi.Get() + id := taxon.Node.Id() + node := obiphylo.NewPhyloNode() + rank := taxon.Rank() + node.Name = fmt.Sprintf("%s -%s@%s-", taxon.ScientificName(), *id, rank) + node.SetAttribute("rank", rank) + node.SetAttribute("parent", taxon.Parent().Node.Id()) + nodes[id] = node + } + + for id, node := range nodes { + if id == root.Id() { + continue + } + pid := node.GetAttribute("parent").(*string) + parent := nodes[pid] + if parent != nil { + parent.AddChild(node, 1) + } else { + return nil, fmt.Errorf("cannot find parent node for %s", *pid) + } + } + + rid := root.Id() + return nodes[rid], nil +} diff --git a/pkg/obitools/obiconvert/sequence_reader.go b/pkg/obitools/obiconvert/sequence_reader.go index 2d4c690..48b8306 100644 --- a/pkg/obitools/obiconvert/sequence_reader.go +++ b/pkg/obitools/obiconvert/sequence_reader.go @@ -133,7 +133,7 @@ func CLIReadBioSequences(filenames ...string) (obiiter.IBioSequence, error) { case "csv": iterator, err = obiformats.ReadCSV(os.Stdin, opts...) default: - iterator = obiformats.ReadFastSeqFromStdin(opts...) + iterator, err = obiformats.ReadSequencesFromStdin(opts...) } if err != nil { diff --git a/pkg/obitools/obitag/obitag.go b/pkg/obitools/obitag/obitag.go index 887fb08..884960f 100644 --- a/pkg/obitools/obitag/obitag.go +++ b/pkg/obitools/obitag/obitag.go @@ -186,6 +186,9 @@ func Identify(sequence *obiseq.BioSequence, // log.Debugln("Need of indexing") newidx++ idx = obirefidx.IndexSequence(seqidxs[i], references, &refcounts, taxa, taxo) + if len(idx) == 0 { + log.Panicf("%s idx: %v", references[seqidxs[i]].Id(), idx) + } references[seqidxs[i]].SetOBITagRefIndex(idx) log.Debugln(references[seqidxs[i]].Id(), idx) } diff --git a/pkg/obitools/obitaxonomy/obitaxonomy.go b/pkg/obitools/obitaxonomy/obitaxonomy.go index 2ff330a..c06ac6b 100644 --- a/pkg/obitools/obitaxonomy/obitaxonomy.go +++ b/pkg/obitools/obitaxonomy/obitaxonomy.go @@ -91,6 +91,7 @@ func CLINewickWriter(iterator *obitax.ITaxon, obiformats.OptionsWithRank(CLIWithRank()), obiformats.OptionsWithScientificName(CLIWithScientificName()), obiformats.OptionsWithTaxid(true), + obiformats.OptionWithoutRootPath(CLINewickWithoutRoot()), ) filename := obiconvert.CLIOutPutFileName() diff --git a/pkg/obitools/obitaxonomy/options.go b/pkg/obitools/obitaxonomy/options.go index 0f9d643..27f6b60 100644 --- a/pkg/obitools/obitaxonomy/options.go +++ b/pkg/obitools/obitaxonomy/options.go @@ -25,6 +25,8 @@ var __to_dump__ = "" var __download_ncbi__ = false var __extract_taxonomy__ = false var __newick__ = false +var __newick_with_leaves__ = false +var __newick_without_root__ = false func FilterTaxonomyOptionSet(options *getoptions.GetOpt) { options.BoolVar(&__rank_list__, "rank-list", false, @@ -81,6 +83,9 @@ func OptionSet(options *getoptions.GetOpt) { options.BoolVar(&__newick__, "newick-output", __newick__, options.Description("Format the resulting taxonomy as a newick tree"), ) + options.BoolVar(&__newick_without_root__, "without-root", __newick_without_root__, + options.Description("If used, do not include the non-branched path to the root in the output"), + ) } @@ -166,6 +171,14 @@ func CLIAsNewick() bool { return __newick__ } +func CLINewickWithLeaves() bool { + return __newick_with_leaves__ +} + +func CLINewickWithoutRoot() bool { + return __newick_without_root__ +} + func CLIAskForRankList() bool { return __rank_list__ } diff --git a/sample/.DS_Store b/sample/.DS_Store deleted file mode 100644 index 0db93af213e5ed8551d42f8aa8ed31e41986c756..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 10244 zcmeHMTWl0n8205S!Z{q?S$#fc4v0W zCB&-ei^OQ6Q4=K^^}z?hOEi(F(Kn)o2ZKg5@j;9)`rv~x(U^Tbn*p|~90b&5aC~~XJqQ`XtujlcaAO$5HhlB?(zy=p`6qDDq4hupC zLIy$xLIy$xLI!RJ258Ubft3pzK0^jV20{j|Fu=bLF^b&A13MwC|LLHIe*_>~O2vMl zG@S!{Of0bRz)lG3L#a=3dcXjR!4U%moa$q|IoWt%CxjJnKmi8~M#kWTg8t})i}}p~ z5YcLmbm_$T+`^ZPJlL4yp8fSVe3GbzhSQQMDF8OwCNhK6rZDk+^Y zvy7Foa&|B=;*2Le&C9uMT6RCr^*WA~Dx|g4KGPUaDwXYy?P;cMWPO0C8`R`LpK0sP zc$<^6b=SuxLSh+IyJ;&$6X)1}WyNyy?Shq(ssbzD6W~S?bv&=2Cwf+J63Mgc9Tj)TC>tQOY@9Y%SxEX47cAij(9vZ z=sH=i-EY`$5|)=G6(#2yJq7{HywO20w{qIbF>+?sg6KWVYa5z3KG?B+_d^$F&zY;p z^8Cuayy==L%NWqyv|;O}J-m0^)V+~G;@|enA=Ai4A{F>eudR(57Z)yCtf*D$k^)M! zy^3W@HS5o%%(0y59Tj7!v1Jjad~iUgdF^&|-lOiyKXbG`12VPBb5XBX9-A9P2wtkFGAIt?(LyBPD|8kKXjs?w`5?Yv1|L9bjmhw?RJtUA6@ydj^~>Ue|5MQ8_9p`P}; zE;v9pX&E>Qr{EPh4R6DT@F|>!3vda(haccaxD3C+AMiJp;tVW9h80+aci|FTinX{F zn{ges;ZEFzyRjR4@gN?;Bx*Q>4rXu+T|A19;p2D`pTwu}1$+@-!k6()JcDQP9efWz z$1m_p{0cANCHx65N~u<=lbWTCQoESERB*ffQ^hBBQ2u5- zDfqrfW7qSf=(f(T+ryJC&EmdOF>ik5f~D0fRyD2L6u%-bA;^LTi?!{`d5RPGa`xZ< zbRCkHsa5il=w8yhapbCZ32!U-o>In@&A0&7yX9q3Wd?bK$lfb2k18|C8$`AyS{qd) zazc@@(7x~s`dsN}Ba|;rF7tXRV3RD*n*pIGj7FB?7|%+;$GZ`eK>%Fm_~yn zJc1@>(Zd|(@ezEKgnSa8AQ?Y{&*F3VJf6l^@ilxM-|z+fK7Kq+pgX1t6hFBcf#wrg z+j8t-YRiSr=z_bETy=;xNEbZNwp&g**dY1u&$ATkC;dPvfJHD)8>~g`lc0l1&h1gk zhG9YvQQo4wLECLT*(2ah?M-@%dN4(AlarL5Th!aOqFIbldKivPMPrl@#dE1BLT70H zCXH**tgfzRY+4U|XD#lbc>JDX+;zwQYZ|(Ed}5AfJbdHvp_O_#UBqVb_&H+H1itbc zy^CTrbryz@fslcafslcafjgOj60!IgzyIG#KhybLKo~*>LI&g zFQ4DF$0$Bbkq6#xLRcR{58i~A``7W5;n(pig0kR(!>^AAWfQ{saKia;vf?)XKmRk} RmT%I+^M82$|9_nS{{b#m3Euz!