mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
104 lines
2.0 KiB
Go
104 lines
2.0 KiB
Go
package obitax
|
|
|
|
import (
|
|
"encoding/csv"
|
|
"errors"
|
|
"strings"
|
|
|
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
|
log "github.com/sirupsen/logrus"
|
|
)
|
|
|
|
func LoadCSVTaxonomy(path string, onlysn bool) (*Taxonomy, error) {
|
|
|
|
file, err := obiutils.Ropen(path)
|
|
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
defer file.Close()
|
|
|
|
csvfile := csv.NewReader(file)
|
|
|
|
csvfile.Comma = ','
|
|
csvfile.ReuseRecord = false
|
|
csvfile.LazyQuotes = true
|
|
csvfile.Comment = '#'
|
|
csvfile.FieldsPerRecord = -1
|
|
csvfile.TrimLeadingSpace = true
|
|
|
|
header, err := csvfile.Read()
|
|
|
|
if err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
|
|
taxidColIndex := -1
|
|
parentColIndex := -1
|
|
scientific_nameColIndex := -1
|
|
rankColIndex := -1
|
|
|
|
for i, colName := range header {
|
|
switch colName {
|
|
case "taxid":
|
|
taxidColIndex = i
|
|
case "parent":
|
|
parentColIndex = i
|
|
case "scientific_name":
|
|
scientific_nameColIndex = i
|
|
case "rank":
|
|
rankColIndex = i
|
|
}
|
|
}
|
|
|
|
if taxidColIndex == -1 {
|
|
return nil, errors.New("taxonomy file does not contain taxid column")
|
|
}
|
|
|
|
if parentColIndex == -1 {
|
|
return nil, errors.New("taxonomy file does not contain parent column")
|
|
}
|
|
|
|
if scientific_nameColIndex == -1 {
|
|
return nil, errors.New("taxonomy file does not contain scientific_name column")
|
|
}
|
|
|
|
if rankColIndex == -1 {
|
|
return nil, errors.New("taxonomy file does not contain rank column")
|
|
}
|
|
|
|
name := obiutils.RemoveAllExt(path)
|
|
short := obiutils.Basename(path)
|
|
taxonomy := NewTaxonomy(name, short, obiutils.AsciiAlphaNumSet)
|
|
|
|
line, err := csvfile.Read()
|
|
|
|
for err != nil {
|
|
taxid := line[taxidColIndex]
|
|
parent := line[parentColIndex]
|
|
scientific_name := line[scientific_nameColIndex]
|
|
rank := line[rankColIndex]
|
|
|
|
parts := strings.Split(rank, ":")
|
|
|
|
rank = parts[0]
|
|
|
|
root := len(parts) > 1 && parts[1] == "root"
|
|
|
|
taxon, err := taxonomy.AddTaxon(taxid, parent, rank, false, root)
|
|
taxon.SetName(scientific_name, "scientific name")
|
|
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
}
|
|
|
|
if !taxonomy.HasRoot() {
|
|
return nil, errors.New("taxonomy file does not contain root node")
|
|
}
|
|
|
|
return taxonomy, nil
|
|
}
|