Adds possibility to extract a taxonomy from taxonomic path included in sequence files

This commit is contained in:
Eric Coissac
2025-01-30 11:18:21 +01:00
parent 2452aef7a9
commit 0df082da06
20 changed files with 460 additions and 173 deletions

View File

@@ -248,15 +248,15 @@ func CLIRestrictTaxonomyPredicate() obiseq.SequencePredicate {
if len(_BelongTaxa) > 0 {
taxonomy := obitax.DefaultTaxonomy()
taxon := taxonomy.Taxon(_BelongTaxa[0])
if taxon == nil {
taxon, err := taxonomy.Taxon(_BelongTaxa[0])
if err != nil {
p = obiseq.IsSubCladeOfSlot(taxonomy, _BelongTaxa[0])
} else {
p = obiseq.IsSubCladeOf(taxonomy, taxon)
}
for _, staxid := range _BelongTaxa[1:] {
taxon := taxonomy.Taxon(staxid)
if taxon == nil {
taxon, err := taxonomy.Taxon(staxid)
if err != nil {
p2 = obiseq.IsSubCladeOfSlot(taxonomy, staxid)
} else {
p2 = obiseq.IsSubCladeOf(taxonomy, taxon)
@@ -278,16 +278,16 @@ func CLIAvoidTaxonomyPredicate() obiseq.SequencePredicate {
if len(_NotBelongTaxa) > 0 {
taxonomy := obitax.DefaultTaxonomy()
taxon := taxonomy.Taxon(_NotBelongTaxa[0])
if taxon == nil {
taxon, err := taxonomy.Taxon(_NotBelongTaxa[0])
if err != nil {
p = obiseq.IsSubCladeOfSlot(taxonomy, _NotBelongTaxa[0])
} else {
p = obiseq.IsSubCladeOf(taxonomy, taxon)
}
for _, taxid := range _NotBelongTaxa[1:] {
taxon := taxonomy.Taxon(taxid)
if taxon == nil {
taxon, err := taxonomy.Taxon(taxid)
if err != nil {
p2 = obiseq.IsSubCladeOfSlot(taxonomy, taxid)
} else {
p2 = obiseq.IsSubCladeOf(taxonomy, taxon)

View File

@@ -42,9 +42,10 @@ func MatchDistanceIndex(taxonomy *obitax.Taxonomy, distance int, distanceIdx map
if i == len(keys) || distance > keys[len(keys)-1] {
taxon = taxonomy.Root()
} else {
taxon = taxonomy.Taxon(distanceIdx[keys[i]])
if taxon == nil {
log.Panicf("Cannot identify taxon %s in %s", distanceIdx[keys[i]], taxonomy.Name())
var err error
taxon, err = taxonomy.Taxon(distanceIdx[keys[i]])
if err != nil {
log.Panicf("Cannot identify taxon %s in %s (%v)", distanceIdx[keys[i]], taxonomy.Name(), err)
}
}
@@ -196,9 +197,9 @@ func Identify(sequence *obiseq.BioSequence,
log.Panic("Problem in identification line : ", best.Id(), "idx:", idx, "distance:", d)
}
match_taxon := taxo.Taxon(identification)
match_taxon, err := taxo.Taxon(identification)
if taxon != nil {
if err == nil {
taxon, _ = taxon.LCA(match_taxon)
} else {
taxon = match_taxon

View File

@@ -24,6 +24,7 @@ var __taxid_sons__ = "NA"
var __restrict_rank__ = ""
var __to_dump__ = ""
var __download_ncbi__ = false
var __extract_taxonomy__ = false
func FilterTaxonomyOptionSet(options *getoptions.GetOpt) {
options.BoolVar(&__rank_list__, "rank-list", false,
@@ -76,7 +77,9 @@ func OptionSet(options *getoptions.GetOpt) {
options.BoolVar(&__download_ncbi__, "download-ncbi", __download_ncbi__,
options.Description("Download the current NCBI taxonomy taxdump"),
)
options.BoolVar(&__extract_taxonomy__, "extract-taxonomy", __extract_taxonomy__,
options.Description("Extract taxonomy from a sequence file"),
)
}
func CLITaxonomicalRestrictions() (*obitax.TaxonSet, error) {
@@ -88,13 +91,14 @@ func CLITaxonomicalRestrictions() (*obitax.TaxonSet, error) {
ts := taxonomy.NewTaxonSet()
for _, taxid := range __taxonomical_restriction__ {
tx := taxonomy.Taxon(taxid)
tx, err := taxonomy.Taxon(taxid)
if tx == nil {
if err != nil {
return nil, fmt.Errorf(
"cannot find taxon %s in taxonomy %s",
"cannot find taxon %s in taxonomy %s (%v)",
taxid,
taxonomy.Name(),
err,
)
}
@@ -155,3 +159,7 @@ func CLISubTaxonomyNode() string {
func CLIDownloadNCBI() bool {
return __download_ncbi__
}
func CLIExtractTaxonomy() bool {
return __extract_taxonomy__
}