feat(obiconvert): add --raw-taxid option and refactor taxID formatting

- Add new `--tax-id` mode (`obiconvert --raw-taxid`) to output bare numeric taxIDs instead of full-format strings.
- Introduce `TaxNode.FullString()` to always return the complete "code:id [name]@rank" format, regardless of global `UseRawTaxids()` setting.
- Update `.String(taxonomyCode)` to respect the global flag, returning bare ID when `--raw-taxid` is active.
- Extract raw taxID from full-format strings in taxonomy methods when needed (e.g., fallback without loaded DB).
- Add comprehensive test suite covering:
a) `--raw-taxid` execution and idempotency
b) full-format taxID output with `--taxonomy`
c interaction of both flags
d format validation
- Add test data: new reference files `out_ecotag.fasta`, taxonomy.csv, and updated shell script.
This commit is contained in:
Eric Coissac
2026-04-30 16:44:28 +02:00
parent 14e2840a2d
commit 60b3753673
6 changed files with 261 additions and 29 deletions
+19 -13
View File
@@ -29,6 +29,24 @@ type TaxNode struct {
alternatenames *map[*string]*string
}
// FullString returns the full string representation of the TaxNode in the form
// "taxonomyCode:id [scientificName]@rank", regardless of the UseRawTaxids setting.
// This is used internally when a parseable format is required (e.g. taxonomic_path).
func (node *TaxNode) FullString(taxonomyCode string) string {
if node.HasScientificName() {
return fmt.Sprintf("%s:%v [%s]@%s",
taxonomyCode,
*node.id,
node.ScientificName(),
node.Rank(),
)
}
return fmt.Sprintf("%s:%v",
taxonomyCode,
*node.id)
}
// String returns a string representation of the TaxNode, including the taxonomy code,
// the node ID, and the scientific name. The output format is "taxonomyCode:id [scientificName]".
//
@@ -42,19 +60,7 @@ func (node *TaxNode) String(taxonomyCode string) string {
return *node.id
}
if node.HasScientificName() {
return fmt.Sprintf("%s:%v [%s]@%s",
taxonomyCode,
*node.id,
node.ScientificName(),
node.Rank(),
)
}
return fmt.Sprintf("%s:%v",
taxonomyCode,
*node.id)
return node.FullString(taxonomyCode)
}
// Id returns the unique identifier of the TaxNode.