mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
65 lines
2.0 KiB
Go
65 lines
2.0 KiB
Go
package obitax
|
|
|
|
import (
|
|
"errors"
|
|
"strings"
|
|
)
|
|
|
|
// ParseTaxonString parses a string in the format "code:taxid [scientific name]@rank"
|
|
// and returns the individual components. It handles extra whitespace around components.
|
|
//
|
|
// Parameters:
|
|
// - taxonStr: The string to parse in the format "code:taxid [scientific name]@rank"
|
|
//
|
|
// Returns:
|
|
// - code: The taxonomy code
|
|
// - taxid: The taxon identifier
|
|
// - scientificName: The scientific name (without brackets)
|
|
// - rank: The rank
|
|
// - error: An error if the string format is invalid
|
|
func ParseTaxonString(taxonStr string) (code, taxid, scientificName, rank string, err error) {
|
|
// Trim any leading/trailing whitespace from the entire string
|
|
taxonStr = strings.TrimSpace(taxonStr)
|
|
|
|
// Split by '@' to separate rank
|
|
parts := strings.Split(taxonStr, "@")
|
|
if len(parts) > 2 {
|
|
return "", "", "", "", errors.New("invalid format: multiple '@' characters found")
|
|
}
|
|
|
|
mainPart := strings.TrimSpace(parts[0])
|
|
if len(parts) == 2 {
|
|
rank = strings.TrimSpace(parts[1])
|
|
} else {
|
|
rank = "no rank"
|
|
}
|
|
|
|
// Find scientific name part (enclosed in square brackets)
|
|
startBracket := strings.Index(mainPart, "[")
|
|
endBracket := strings.LastIndex(mainPart, "]")
|
|
|
|
if startBracket == -1 || endBracket == -1 || startBracket > endBracket {
|
|
return "", "", "", "", errors.New("invalid format: scientific name must be enclosed in square brackets")
|
|
}
|
|
|
|
// Extract and clean scientific name
|
|
scientificName = strings.TrimSpace(mainPart[startBracket+1 : endBracket])
|
|
|
|
// Process code:taxid part
|
|
idPart := strings.TrimSpace(mainPart[:startBracket])
|
|
idComponents := strings.Split(idPart, ":")
|
|
|
|
if len(idComponents) != 2 {
|
|
return "", "", "", "", errors.New("invalid format: missing taxonomy code separator ':'")
|
|
}
|
|
|
|
code = strings.TrimSpace(idComponents[0])
|
|
taxid = strings.TrimSpace(idComponents[1])
|
|
|
|
if code == "" || taxid == "" || scientificName == "" {
|
|
return "", "", "", "", errors.New("invalid format: code, taxid and scientific name cannot be empty")
|
|
}
|
|
|
|
return code, taxid, scientificName, rank, nil
|
|
}
|