mirror of
https://github.com/metabarcoding/obitools4.git
synced 2026-05-01 04:20:40 +00:00
feat(obiconvert): add --raw-taxid option and refactor taxID formatting
- Add new `--tax-id` mode (`obiconvert --raw-taxid`) to output bare numeric taxIDs instead of full-format strings. - Introduce `TaxNode.FullString()` to always return the complete "code:id [name]@rank" format, regardless of global `UseRawTaxids()` setting. - Update `.String(taxonomyCode)` to respect the global flag, returning bare ID when `--raw-taxid` is active. - Extract raw taxID from full-format strings in taxonomy methods when needed (e.g., fallback without loaded DB). - Add comprehensive test suite covering: a) `--raw-taxid` execution and idempotency b) full-format taxID output with `--taxonomy` c interaction of both flags d format validation - Add test data: new reference files `out_ecotag.fasta`, taxonomy.csv, and updated shell script.
This commit is contained in:
@@ -70,6 +70,12 @@ func (s *BioSequence) SetTaxid(taxid string, rank ...string) {
|
||||
}
|
||||
}
|
||||
|
||||
} else if obidefault.UseRawTaxids() {
|
||||
// Without a loaded taxonomy, extract the bare ID from full-format strings
|
||||
// like "code:12345 [Name]@rank" so that --raw-taxid is honoured everywhere.
|
||||
if _, rawID, _, _, parseErr := obitax.ParseTaxonString(taxid); parseErr == nil {
|
||||
taxid = rawID
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -177,7 +183,7 @@ func (sequence *BioSequence) SetPath(taxonomy *obitax.Taxonomy) []string {
|
||||
lpath := path.Len() - 1
|
||||
|
||||
for i := lpath; i >= 0; i-- {
|
||||
spath[lpath-i] = path.Get(i).String(taxonomy.Code())
|
||||
spath[lpath-i] = path.Get(i).FullString(taxonomy.Code())
|
||||
}
|
||||
|
||||
sequence.SetAttribute("taxonomic_path", spath)
|
||||
|
||||
+19
-13
@@ -29,6 +29,24 @@ type TaxNode struct {
|
||||
alternatenames *map[*string]*string
|
||||
}
|
||||
|
||||
// FullString returns the full string representation of the TaxNode in the form
|
||||
// "taxonomyCode:id [scientificName]@rank", regardless of the UseRawTaxids setting.
|
||||
// This is used internally when a parseable format is required (e.g. taxonomic_path).
|
||||
func (node *TaxNode) FullString(taxonomyCode string) string {
|
||||
if node.HasScientificName() {
|
||||
return fmt.Sprintf("%s:%v [%s]@%s",
|
||||
taxonomyCode,
|
||||
*node.id,
|
||||
node.ScientificName(),
|
||||
node.Rank(),
|
||||
)
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%s:%v",
|
||||
taxonomyCode,
|
||||
*node.id)
|
||||
}
|
||||
|
||||
// String returns a string representation of the TaxNode, including the taxonomy code,
|
||||
// the node ID, and the scientific name. The output format is "taxonomyCode:id [scientificName]".
|
||||
//
|
||||
@@ -42,19 +60,7 @@ func (node *TaxNode) String(taxonomyCode string) string {
|
||||
return *node.id
|
||||
}
|
||||
|
||||
if node.HasScientificName() {
|
||||
return fmt.Sprintf("%s:%v [%s]@%s",
|
||||
taxonomyCode,
|
||||
*node.id,
|
||||
node.ScientificName(),
|
||||
node.Rank(),
|
||||
)
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%s:%v",
|
||||
taxonomyCode,
|
||||
*node.id)
|
||||
|
||||
return node.FullString(taxonomyCode)
|
||||
}
|
||||
|
||||
// Id returns the unique identifier of the TaxNode.
|
||||
|
||||
Reference in New Issue
Block a user