mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
introduce obidefault
This commit is contained in:
103
pkg/obitax/csvtaxdump_read.go
Normal file
103
pkg/obitax/csvtaxdump_read.go
Normal file
@ -0,0 +1,103 @@
|
||||
package obitax
|
||||
|
||||
import (
|
||||
"encoding/csv"
|
||||
"errors"
|
||||
"strings"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
func LoadCSVTaxonomy(path string, onlysn bool) (*Taxonomy, error) {
|
||||
|
||||
file, err := obiutils.Ropen(path)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
defer file.Close()
|
||||
|
||||
csvfile := csv.NewReader(file)
|
||||
|
||||
csvfile.Comma = ','
|
||||
csvfile.ReuseRecord = false
|
||||
csvfile.LazyQuotes = true
|
||||
csvfile.Comment = '#'
|
||||
csvfile.FieldsPerRecord = -1
|
||||
csvfile.TrimLeadingSpace = true
|
||||
|
||||
header, err := csvfile.Read()
|
||||
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
taxidColIndex := -1
|
||||
parentColIndex := -1
|
||||
scientific_nameColIndex := -1
|
||||
rankColIndex := -1
|
||||
|
||||
for i, colName := range header {
|
||||
switch colName {
|
||||
case "taxid":
|
||||
taxidColIndex = i
|
||||
case "parent":
|
||||
parentColIndex = i
|
||||
case "scientific_name":
|
||||
scientific_nameColIndex = i
|
||||
case "rank":
|
||||
rankColIndex = i
|
||||
}
|
||||
}
|
||||
|
||||
if taxidColIndex == -1 {
|
||||
return nil, errors.New("taxonomy file does not contain taxid column")
|
||||
}
|
||||
|
||||
if parentColIndex == -1 {
|
||||
return nil, errors.New("taxonomy file does not contain parent column")
|
||||
}
|
||||
|
||||
if scientific_nameColIndex == -1 {
|
||||
return nil, errors.New("taxonomy file does not contain scientific_name column")
|
||||
}
|
||||
|
||||
if rankColIndex == -1 {
|
||||
return nil, errors.New("taxonomy file does not contain rank column")
|
||||
}
|
||||
|
||||
name := obiutils.RemoveAllExt(path)
|
||||
short := obiutils.Basename(path)
|
||||
taxonomy := NewTaxonomy(name, short, obiutils.AsciiAlphaNumSet)
|
||||
|
||||
line, err := csvfile.Read()
|
||||
|
||||
for err != nil {
|
||||
taxid := line[taxidColIndex]
|
||||
parent := line[parentColIndex]
|
||||
scientific_name := line[scientific_nameColIndex]
|
||||
rank := line[rankColIndex]
|
||||
|
||||
parts := strings.Split(rank, ":")
|
||||
|
||||
rank = parts[0]
|
||||
|
||||
root := len(parts) > 1 && parts[1] == "root"
|
||||
|
||||
taxon, err := taxonomy.AddTaxon(taxid, parent, rank, false, root)
|
||||
taxon.SetName(scientific_name, "scientific name")
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if !taxonomy.HasRoot() {
|
||||
return nil, errors.New("taxonomy file does not contain root node")
|
||||
}
|
||||
|
||||
return taxonomy, nil
|
||||
}
|
Reference in New Issue
Block a user