Changes to be committed:

modified:   cmd/obitools/obitag/main.go
	modified:   cmd/obitools/obitaxonomy/main.go
	modified:   pkg/obiformats/csvtaxdump_read.go
	modified:   pkg/obiformats/ecopcr_read.go
	modified:   pkg/obiformats/ncbitaxdump_read.go
	modified:   pkg/obiformats/ncbitaxdump_readtar.go
	modified:   pkg/obiformats/newick_write.go
	modified:   pkg/obiformats/options.go
	modified:   pkg/obiformats/taxonomy_read.go
	modified:   pkg/obiformats/universal_read.go
	modified:   pkg/obiiter/extract_taxonomy.go
	modified:   pkg/obioptions/options.go
	modified:   pkg/obioptions/version.go
	new file:   pkg/obiphylo/tree.go
	modified:   pkg/obiseq/biosequenceslice.go
	modified:   pkg/obiseq/taxonomy_methods.go
	modified:   pkg/obitax/taxonomy.go
	modified:   pkg/obitax/taxonset.go
	modified:   pkg/obitools/obiconvert/sequence_reader.go
	modified:   pkg/obitools/obitag/obitag.go
	modified:   pkg/obitools/obitaxonomy/obitaxonomy.go
	modified:   pkg/obitools/obitaxonomy/options.go
	deleted:    sample/.DS_Store
This commit is contained in:
Eric Coissac
2025-06-04 09:48:10 +02:00
parent 3424d3057f
commit 6cb7a5a352
23 changed files with 230 additions and 56 deletions

View File

@@ -10,7 +10,7 @@ import (
log "github.com/sirupsen/logrus"
)
func LoadCSVTaxonomy(path string, onlysn bool) (*obitax.Taxonomy, error) {
func LoadCSVTaxonomy(path string, onlysn, seqAsTaxa bool) (*obitax.Taxonomy, error) {
log.Infof("Loading taxonomy from csv file: %s", path)

View File

@@ -4,13 +4,10 @@ import (
"encoding/csv"
"fmt"
"io"
"os"
"path"
"strconv"
"strings"
gzip "github.com/klauspost/pgzip"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
@@ -177,8 +174,10 @@ func ReadEcoPCR(reader io.Reader, options ...WithOption) (obiiter.IBioSequence,
}()
go func() {
var err error = nil
var seq *obiseq.BioSequence
seq, err := __read_ecopcr_bioseq__(&ecopcr)
seq, err = __read_ecopcr_bioseq__(&ecopcr)
seq.SetSource(opt.Source())
slice := make(obiseq.BioSequenceSlice, 0, opt.BatchSize())
i := 0
@@ -194,7 +193,12 @@ func ReadEcoPCR(reader io.Reader, options ...WithOption) (obiiter.IBioSequence,
}
seq, err = __read_ecopcr_bioseq__(&ecopcr)
seq.SetSource(opt.Source())
if err == nil {
seq.SetSource(opt.Source())
} else if err != io.EOF {
log.Panicf("%+v", err)
}
}
if len(slice) > 0 {
@@ -218,22 +222,21 @@ func ReadEcoPCR(reader io.Reader, options ...WithOption) (obiiter.IBioSequence,
func ReadEcoPCRFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
var reader io.Reader
var greader io.Reader
var err error
options = append(options, OptionsSource(obiutils.RemoveAllExt((path.Base(filename)))))
reader, err = os.Open(filename)
reader, err = obiutils.Ropen(filename)
if err == obiutils.ErrNoContent {
log.Infof("file %s is empty", filename)
return ReadEmptyFile(options...)
}
if err != nil {
log.Printf("open file error: %+v", err)
return obiiter.NilIBioSequence, err
}
// Test if the flux is compressed by gzip
greader, err = gzip.NewReader(reader)
if err == nil {
reader = greader
}
return ReadEcoPCR(reader, options...)
}

View File

@@ -149,7 +149,7 @@ func loadMergedTable(reader io.Reader, taxonomy *obitax.Taxonomy) int {
// Returns:
// - A pointer to the obitax.Taxonomy object containing the loaded taxonomy data, or an error
// if any of the files cannot be opened or read.
func LoadNCBITaxDump(directory string, onlysn bool) (*obitax.Taxonomy, error) {
func LoadNCBITaxDump(directory string, onlysn, seqAsTaxa bool) (*obitax.Taxonomy, error) {
taxonomy := obitax.NewTaxonomy("NCBI Taxonomy", "taxon", obiutils.AsciiDigitSet)

View File

@@ -63,7 +63,7 @@ func IsNCBITarTaxDump(path string) bool {
return citations && division && gencode && names && delnodes && gc && merged && nodes
}
func LoadNCBITarTaxDump(path string, onlysn bool) (*obitax.Taxonomy, error) {
func LoadNCBITarTaxDump(path string, onlysn, seqAsTaxa bool) (*obitax.Taxonomy, error) {
taxonomy := obitax.NewTaxonomy("NCBI Taxonomy", "taxon", obiutils.AsciiDigitSet)

View File

@@ -54,7 +54,7 @@ func (tree *Tree) Newick(level int, taxid, scientific_name, rank bool) string {
if scientific_name {
buffer.WriteByte(' ')
}
buffer.WriteByte('-')
// buffer.WriteByte('-')
if taxid {
buffer.WriteString(*tree.TaxNode.Id())
if rank {
@@ -64,7 +64,7 @@ func (tree *Tree) Newick(level int, taxid, scientific_name, rank bool) string {
if rank {
buffer.WriteString(tree.TaxNode.Rank())
}
buffer.WriteByte('-')
//buffer.WriteByte('-')
}
if scientific_name || taxid || rank {
buffer.WriteByte('\'')
@@ -85,24 +85,14 @@ func Newick(taxa *obitax.TaxonSet, taxid, scientific_name, rank bool) string {
return ""
}
iterator := taxa.Sort().Iterator()
root := taxa.Sort().Get(0)
tree, err := taxa.AsPhyloTree(root)
nodes := make(map[*string]*Tree, taxa.Len())
trees := make([]*Tree, 0)
for iterator.Next() {
taxon := iterator.Get()
tree := &Tree{TaxNode: taxon.Node}
if parent, ok := nodes[taxon.Parent().Node.Id()]; ok {
parent.Children = append(parent.Children, tree)
} else {
trees = append(trees, tree)
}
nodes[taxon.Node.Id()] = tree
if err != nil {
log.Fatalf("Cannot build taxonomy tree: %v", err)
}
return trees[0].Newick(0, taxid, scientific_name, rank)
return tree.Newick(0)
}
func WriteNewick(iterator *obitax.ITaxon,

View File

@@ -42,6 +42,7 @@ type __options__ struct {
with_rank bool
with_taxid bool
with_scientific_name bool
without_root_path bool
raw_taxid bool
with_metadata []string
}
@@ -88,6 +89,7 @@ func MakeOptions(setters []WithOption) Options {
with_rank: true,
with_taxid: true,
with_scientific_name: false,
without_root_path: false,
raw_taxid: false,
}
@@ -250,6 +252,11 @@ func (o *Options) WithScientificName() bool {
return o.pointer.with_scientific_name
}
// WithoutRootPath returns whether the root path option is enabled.
func (o *Options) WithoutRootPath() bool {
return o.pointer.without_root_path
}
// RawTaxid returns whether the raw taxid option is enabled.
// It retrieves the setting from the underlying options.
func (o *Options) RawTaxid() bool {
@@ -576,6 +583,13 @@ func OptionsWithScientificName(value bool) WithOption {
return f
}
func OptionWithoutRootPath(value bool) WithOption {
f := WithOption(func(opt Options) {
opt.pointer.without_root_path = value
})
return f
}
func OptionsRawTaxid(value bool) WithOption {
f := WithOption(func(opt Options) {
opt.pointer.raw_taxid = value

View File

@@ -11,7 +11,7 @@ import (
log "github.com/sirupsen/logrus"
)
type TaxonomyLoader func(path string, onlysn bool) (*obitax.Taxonomy, error)
type TaxonomyLoader func(path string, onlysn, seqAsTaxa bool) (*obitax.Taxonomy, error)
func DetectTaxonomyTarFormat(path string) (TaxonomyLoader, error) {
@@ -67,26 +67,28 @@ func DetectTaxonomyFormat(path string) (TaxonomyLoader, error) {
case "application/x-tar":
return DetectTaxonomyTarFormat(path)
case "text/fasta":
return func(path string, onlysn bool) (*obitax.Taxonomy, error) {
return func(path string, onlysn, seqAsTaxa bool) (*obitax.Taxonomy, error) {
input, err := ReadFastaFromFile(path)
input = input.NumberSequences(1, true)
if err != nil {
return nil, err
}
_, data := input.Load()
return data.ExtractTaxonomy(nil)
return data.ExtractTaxonomy(nil, seqAsTaxa)
}, nil
case "text/fastq":
return func(path string, onlysn bool) (*obitax.Taxonomy, error) {
return func(path string, onlysn, seqAsTaxa bool) (*obitax.Taxonomy, error) {
input, err := ReadFastqFromFile(path)
input = input.NumberSequences(1, true)
if err != nil {
return nil, err
}
_, data := input.Load()
return data.ExtractTaxonomy(nil)
return data.ExtractTaxonomy(nil, seqAsTaxa)
}, nil
}
@@ -96,14 +98,14 @@ func DetectTaxonomyFormat(path string) (TaxonomyLoader, error) {
return nil, nil
}
func LoadTaxonomy(path string, onlysn bool) (*obitax.Taxonomy, error) {
func LoadTaxonomy(path string, onlysn, seqAsTaxa bool) (*obitax.Taxonomy, error) {
loader, err := DetectTaxonomyFormat(path)
if err != nil {
return nil, err
}
taxonomy, err := loader(path, onlysn)
taxonomy, err := loader(path, onlysn, seqAsTaxa)
return taxonomy, err
}

View File

@@ -154,8 +154,7 @@ func ReadSequencesFromFile(filename string,
return obiiter.NilIBioSequence, nil
}
// func ReadSequencesFromStdin(options ...WithOption) obiiter.IBioSequence {
// options = append(options, OptionsSource("stdin"))
// }
func ReadSequencesFromStdin(options ...WithOption) (obiiter.IBioSequence, error) {
options = append(options, OptionCloseFile())
return ReadSequencesFromFile("-", options...)
}