mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Patch a small bug on json write
This commit is contained in:
@ -2,18 +2,208 @@ package obiformats
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"math"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unsafe"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
"github.com/goccy/go-json"
|
||||
"github.com/buger/jsonparser"
|
||||
)
|
||||
|
||||
func _parse_json_header_(header string, annotations obiseq.Annotation) string {
|
||||
func _parse_json_map_string(str []byte, sequence *obiseq.BioSequence) (map[string]string, error) {
|
||||
values := make(map[string]string)
|
||||
jsonparser.ObjectEach(str,
|
||||
func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
|
||||
skey := string(key)
|
||||
values[skey] = string(value)
|
||||
return
|
||||
},
|
||||
)
|
||||
return values, nil
|
||||
}
|
||||
|
||||
func _parse_json_map_int(str []byte, sequence *obiseq.BioSequence) (map[string]int, error) {
|
||||
values := make(map[string]int)
|
||||
jsonparser.ObjectEach(str,
|
||||
func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
|
||||
skey := string(key)
|
||||
intval, err := jsonparser.ParseInt(value)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
values[skey] = int(intval)
|
||||
return nil
|
||||
},
|
||||
)
|
||||
return values, nil
|
||||
}
|
||||
|
||||
func _parse_json_map_float(str []byte, sequence *obiseq.BioSequence) (map[string]float64, error) {
|
||||
values := make(map[string]float64)
|
||||
jsonparser.ObjectEach(str,
|
||||
func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
|
||||
skey := string(key)
|
||||
floatval, err := strconv.ParseFloat(obiutils.UnsafeString(value), 64)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
values[skey] = float64(floatval)
|
||||
return nil
|
||||
},
|
||||
)
|
||||
return values, nil
|
||||
}
|
||||
|
||||
func _parse_json_map_bool(str []byte, sequence *obiseq.BioSequence) (map[string]bool, error) {
|
||||
values := make(map[string]bool)
|
||||
jsonparser.ObjectEach(str,
|
||||
func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
|
||||
skey := string(key)
|
||||
boolval, err := jsonparser.ParseBoolean(value)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
values[skey] = boolval
|
||||
return nil
|
||||
},
|
||||
)
|
||||
return values, nil
|
||||
}
|
||||
|
||||
func _parse_json_map_interface(str []byte, sequence *obiseq.BioSequence) (map[string]interface{}, error) {
|
||||
values := make(map[string]interface{})
|
||||
jsonparser.ObjectEach(str,
|
||||
func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
|
||||
skey := string(key)
|
||||
switch dataType {
|
||||
case jsonparser.String:
|
||||
values[skey] = string(value)
|
||||
case jsonparser.Number:
|
||||
// Try to parse the number as an int at first then as float if that fails.
|
||||
values[skey], err = jsonparser.ParseInt(value)
|
||||
if err != nil {
|
||||
values[skey], err = strconv.ParseFloat(obiutils.UnsafeString(value), 64)
|
||||
}
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
case jsonparser.Boolean:
|
||||
default:
|
||||
values[skey] = string(value)
|
||||
}
|
||||
return
|
||||
},
|
||||
)
|
||||
return values, nil
|
||||
}
|
||||
|
||||
func _parse_json_array_string(str []byte, sequence *obiseq.BioSequence) ([]string, error) {
|
||||
values := make([]string, 0)
|
||||
jsonparser.ArrayEach(str,
|
||||
func(value []byte, dataType jsonparser.ValueType, offset int, err error) {
|
||||
if dataType == jsonparser.String {
|
||||
skey := string(value)
|
||||
values = append(values, skey)
|
||||
}
|
||||
},
|
||||
)
|
||||
return values, nil
|
||||
}
|
||||
|
||||
func _parse_json_array_int(str []byte, sequence *obiseq.BioSequence) ([]int, error) {
|
||||
values := make([]int, 0)
|
||||
jsonparser.ArrayEach(str,
|
||||
func(value []byte, dataType jsonparser.ValueType, offset int, err error) {
|
||||
if dataType == jsonparser.Number {
|
||||
intval, err := jsonparser.ParseInt(value)
|
||||
if err != nil {
|
||||
log.Fatalf("%s: Parsing int failed on value %s: %s", sequence.Id(), value, err)
|
||||
}
|
||||
values = append(values, int(intval))
|
||||
}
|
||||
},
|
||||
)
|
||||
return values, nil
|
||||
}
|
||||
|
||||
func _parse_json_array_float(str []byte, sequence *obiseq.BioSequence) ([]float64, error) {
|
||||
values := make([]float64, 0)
|
||||
jsonparser.ArrayEach(str,
|
||||
func(value []byte, dataType jsonparser.ValueType, offset int, err error) {
|
||||
if dataType == jsonparser.Number {
|
||||
floatval, err := strconv.ParseFloat(obiutils.UnsafeString(value), 64)
|
||||
if err == nil {
|
||||
values = append(values, float64(floatval))
|
||||
} else {
|
||||
log.Fatalf("%s: Parsing float failed on value %s: %s", sequence.Id(), value, err)
|
||||
}
|
||||
}
|
||||
},
|
||||
)
|
||||
return values, nil
|
||||
}
|
||||
|
||||
func _parse_json_array_bool(str []byte, sequence *obiseq.BioSequence) ([]bool, error) {
|
||||
values := make([]bool, 0)
|
||||
jsonparser.ArrayEach(str,
|
||||
func(value []byte, dataType jsonparser.ValueType, offset int, err error) {
|
||||
if dataType == jsonparser.Boolean {
|
||||
boolval, err := jsonparser.ParseBoolean(value)
|
||||
if err != nil {
|
||||
log.Fatalf("%s: Parsing bool failed on value %s: %s", sequence.Id(), value, err)
|
||||
}
|
||||
values = append(values, boolval)
|
||||
}
|
||||
},
|
||||
)
|
||||
return values, nil
|
||||
}
|
||||
|
||||
func _parse_json_array_interface(str []byte, sequence *obiseq.BioSequence) ([]interface{}, error) {
|
||||
values := make([]interface{}, 0)
|
||||
jsonparser.ArrayEach(str,
|
||||
func(value []byte, dataType jsonparser.ValueType, offset int, err error) {
|
||||
switch dataType {
|
||||
case jsonparser.String:
|
||||
values = append(values, string(value))
|
||||
case jsonparser.Number:
|
||||
// Try to parse the number as an int at first then as float if that fails.
|
||||
intval, err := jsonparser.ParseInt(value)
|
||||
if err != nil {
|
||||
floatval, err := strconv.ParseFloat(obiutils.UnsafeString(value), 64)
|
||||
if err != nil {
|
||||
values = append(values, string(value))
|
||||
} else {
|
||||
values = append(values, floatval)
|
||||
}
|
||||
} else {
|
||||
values = append(values, intval)
|
||||
}
|
||||
case jsonparser.Boolean:
|
||||
boolval, err := jsonparser.ParseBoolean(value)
|
||||
if err != nil {
|
||||
values = append(values, string(value))
|
||||
} else {
|
||||
values = append(values, boolval)
|
||||
}
|
||||
|
||||
default:
|
||||
values = append(values, string(value))
|
||||
}
|
||||
|
||||
},
|
||||
)
|
||||
return values, nil
|
||||
}
|
||||
|
||||
func _parse_json_header_(header string, sequence *obiseq.BioSequence) string {
|
||||
taxonomy := obitax.DefaultTaxonomy()
|
||||
|
||||
annotations := sequence.Annotations()
|
||||
start := -1
|
||||
stop := -1
|
||||
level := 0
|
||||
@ -51,23 +241,136 @@ func _parse_json_header_(header string, annotations obiseq.Annotation) string {
|
||||
|
||||
stop++
|
||||
|
||||
err := json.Unmarshal([]byte(header)[start:stop], &annotations)
|
||||
jsonparser.ObjectEach(obiutils.UnsafeBytes(header[start:stop]),
|
||||
func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) error {
|
||||
var err error
|
||||
|
||||
for k, v := range annotations {
|
||||
switch vt := v.(type) {
|
||||
case float64:
|
||||
if vt == math.Floor(vt) {
|
||||
annotations[k] = int(vt)
|
||||
}
|
||||
{
|
||||
annotations[k] = vt
|
||||
}
|
||||
}
|
||||
}
|
||||
skey := obiutils.UnsafeString(key)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("annotation parsing error on %s : %v\n", header, err)
|
||||
}
|
||||
switch {
|
||||
case skey == "id":
|
||||
sequence.SetId(string(value))
|
||||
case skey == "definition":
|
||||
sequence.SetDefinition(string(value))
|
||||
|
||||
case skey == "count":
|
||||
if dataType != jsonparser.Number {
|
||||
log.Fatalf("%s: Count attribut must be numeric: %s", sequence.Id(), string(value))
|
||||
}
|
||||
count, err := jsonparser.ParseInt(value)
|
||||
if err != nil {
|
||||
log.Fatalf("%s: Cannot parse count %s", sequence.Id(), string(value))
|
||||
}
|
||||
sequence.SetCount(int(count))
|
||||
|
||||
case skey == "obiclean_weight":
|
||||
weight, err := _parse_json_map_int(value, sequence)
|
||||
if err != nil {
|
||||
log.Fatalf("%s: Cannot parse obiclean weight %s", sequence.Id(), string(value))
|
||||
}
|
||||
annotations[skey] = weight
|
||||
|
||||
case skey == "obiclean_status":
|
||||
status, err := _parse_json_map_string(value, sequence)
|
||||
if err != nil {
|
||||
log.Fatalf("%s: Cannot parse obiclean status %s", sequence.Id(), string(value))
|
||||
}
|
||||
annotations[skey] = status
|
||||
|
||||
case strings.HasPrefix(skey, "merged_"):
|
||||
if dataType == jsonparser.Object {
|
||||
data, err := _parse_json_map_int(value, sequence)
|
||||
if err != nil {
|
||||
log.Fatalf("%s: Cannot parse merged slot %s: %v", sequence.Id(), skey, err)
|
||||
} else {
|
||||
annotations[skey] = data
|
||||
}
|
||||
} else {
|
||||
log.Fatalf("%s: Cannot parse merged slot %s", sequence.Id(), skey)
|
||||
}
|
||||
|
||||
case skey == "taxid":
|
||||
if dataType == jsonparser.Number || dataType == jsonparser.String {
|
||||
taxid := obiutils.UnsafeString(value)
|
||||
taxon := taxonomy.Taxon(taxid)
|
||||
if taxon != nil {
|
||||
sequence.SetTaxon(taxon)
|
||||
} else {
|
||||
sequence.SetTaxid(string(value))
|
||||
}
|
||||
} else {
|
||||
log.Fatalf("%s: Cannot parse taxid %s", sequence.Id(), string(value))
|
||||
}
|
||||
|
||||
case strings.HasSuffix(skey, "_taxid"):
|
||||
if dataType == jsonparser.Number || dataType == jsonparser.String {
|
||||
rank, _ := obiutils.SplitInTwo(skey, '_')
|
||||
|
||||
taxid := obiutils.UnsafeString(value)
|
||||
taxon := taxonomy.Taxon(taxid)
|
||||
|
||||
if taxon != nil {
|
||||
taxid = taxon.String()
|
||||
} else {
|
||||
taxid = string(value)
|
||||
}
|
||||
|
||||
sequence.SetTaxid(taxid, rank)
|
||||
} else {
|
||||
log.Fatalf("%s: Cannot parse taxid %s", sequence.Id(), string(value))
|
||||
}
|
||||
|
||||
default:
|
||||
skey = strings.Clone(skey)
|
||||
switch dataType {
|
||||
case jsonparser.String:
|
||||
annotations[skey] = string(value)
|
||||
case jsonparser.Number:
|
||||
// Try to parse the number as an int at first then as float if that fails.
|
||||
annotations[skey], err = jsonparser.ParseInt(value)
|
||||
if err != nil {
|
||||
annotations[skey], err = strconv.ParseFloat(obiutils.UnsafeString(value), 64)
|
||||
}
|
||||
case jsonparser.Array:
|
||||
annotations[skey], err = _parse_json_array_interface(value, sequence)
|
||||
case jsonparser.Object:
|
||||
annotations[skey], err = _parse_json_map_interface(value, sequence)
|
||||
case jsonparser.Boolean:
|
||||
annotations[skey], err = jsonparser.ParseBoolean(value)
|
||||
case jsonparser.Null:
|
||||
annotations[skey] = nil
|
||||
default:
|
||||
log.Fatalf("Unknown data type %v", dataType)
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
annotations[skey] = "NaN"
|
||||
log.Fatalf("%s: Cannot parse value %s assicated to key %s into a %s value",
|
||||
sequence.Id(), string(value), skey, dataType.String())
|
||||
}
|
||||
|
||||
return err
|
||||
},
|
||||
)
|
||||
|
||||
// err := json.Unmarshal([]byte(header)[start:stop], &annotations)
|
||||
|
||||
// for k, v := range annotations {
|
||||
// switch vt := v.(type) {
|
||||
// case float64:
|
||||
// if vt == math.Floor(vt) {
|
||||
// annotations[k] = int(vt)
|
||||
// }
|
||||
// {
|
||||
// annotations[k] = vt
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// if err != nil {
|
||||
// log.Fatalf("annotation parsing error on %s : %v\n", header, err)
|
||||
// }
|
||||
|
||||
return strings.TrimSpace(header[stop:])
|
||||
}
|
||||
@ -78,7 +381,9 @@ func ParseFastSeqJsonHeader(sequence *obiseq.BioSequence) {
|
||||
|
||||
definition_part := _parse_json_header_(
|
||||
definition,
|
||||
sequence.Annotations())
|
||||
sequence,
|
||||
)
|
||||
|
||||
if len(definition_part) > 0 {
|
||||
if sequence.HasDefinition() {
|
||||
definition_part = sequence.Definition() + " " + definition_part
|
||||
|
@ -15,6 +15,19 @@ import (
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
)
|
||||
|
||||
// loadNodeTable reads a node table from the provided reader and populates the given taxonomy.
|
||||
// It is an internal function and should not be called directly. It is part of the NCBI taxdump reader.
|
||||
// The node table is expected to be in CSV format with a custom delimiter ('|') and comments
|
||||
// starting with '#'. Each record in the table represents a taxon with its taxid, parent taxid,
|
||||
// and rank.
|
||||
//
|
||||
// Parameters:
|
||||
// - reader: An io.Reader from which the node table is read.
|
||||
// - taxonomy: A pointer to an obitax.Taxonomy instance where the taxon data will be added.
|
||||
//
|
||||
// The function reads each record from the input, trims whitespace from the taxid, parent, and rank,
|
||||
// and adds the taxon to the taxonomy. If an error occurs while adding a taxon, the function logs
|
||||
// a fatal error and terminates the program.
|
||||
func loadNodeTable(reader io.Reader, taxonomy *obitax.Taxonomy) {
|
||||
file := csv.NewReader(reader)
|
||||
file.Comma = '|'
|
||||
@ -38,6 +51,21 @@ func loadNodeTable(reader io.Reader, taxonomy *obitax.Taxonomy) {
|
||||
}
|
||||
}
|
||||
|
||||
// loadNameTable reads a name table from the provided reader and populates the given taxonomy.
|
||||
// It is an internal function and should not be called directly. It is part of the NCBI taxdump reader.
|
||||
// The name table is expected to be in a custom format with fields separated by the '|' character.
|
||||
// Each record in the table represents a taxon with its taxid, name, and class name.
|
||||
//
|
||||
// Parameters:
|
||||
// - reader: An io.Reader from which the name table is read.
|
||||
// - taxonomy: A pointer to an obitax.Taxonomy instance where the taxon names will be set.
|
||||
// - onlysn: A boolean flag indicating whether to only process records with the class name "scientific name".
|
||||
//
|
||||
// Returns:
|
||||
//
|
||||
// The number of taxon names successfully loaded into the taxonomy. If a line is too long, -1 is returned.
|
||||
// The function processes each line, trims whitespace from the taxid, name, and class name, and sets
|
||||
// the name in the taxonomy if the conditions are met.
|
||||
func loadNameTable(reader io.Reader, taxonomy *obitax.Taxonomy, onlysn bool) int {
|
||||
// file := csv.NewReader(reader)
|
||||
// file.Comma = '|'
|
||||
@ -71,6 +99,19 @@ func loadNameTable(reader io.Reader, taxonomy *obitax.Taxonomy, onlysn bool) int
|
||||
return n
|
||||
}
|
||||
|
||||
// loadMergedTable reads a merged table from the provided reader and populates the given taxonomy.
|
||||
// It is an internal function and should not be called directly. It is part of the NCBI taxdump reader.
|
||||
// The merged table is expected to be in CSV format with a custom delimiter ('|') and comments
|
||||
// starting with '#'. Each record in the table represents a mapping between an old taxid and a new taxid.
|
||||
//
|
||||
// Parameters:
|
||||
// - reader: An io.Reader from which the merged table is read.
|
||||
// - taxonomy: A pointer to an obitax.Taxonomy instance where the alias mappings will be added.
|
||||
//
|
||||
// Returns:
|
||||
//
|
||||
// The number of alias mappings successfully loaded into the taxonomy. The function processes
|
||||
// each record, trims whitespace from the old and new taxid, and adds the alias to the taxonomy.
|
||||
func loadMergedTable(reader io.Reader, taxonomy *obitax.Taxonomy) int {
|
||||
file := csv.NewReader(reader)
|
||||
file.Comma = '|'
|
||||
|
@ -7,7 +7,7 @@ import (
|
||||
// TODO: The version number is extracted from git. This induces that the version
|
||||
// corresponds to the last commit, and not the one when the file will be
|
||||
// commited
|
||||
var _Commit = "795df34"
|
||||
var _Commit = "abfa8f3"
|
||||
var _Version = "Release 4.2.0"
|
||||
|
||||
// Version returns the version of the obitools package.
|
||||
|
@ -19,6 +19,8 @@ func TaxonomyClassifier(taxonomicRank string,
|
||||
taxonomy *obitax.Taxonomy,
|
||||
abortOnMissing bool) *BioSequenceClassifier {
|
||||
|
||||
taxonomy = taxonomy.OrDefault(true)
|
||||
|
||||
keys := make(map[*obitax.TaxNode]int)
|
||||
codes := make([]*obitax.TaxNode, 1)
|
||||
codes[0] = nil
|
||||
|
@ -12,6 +12,8 @@ func (sequence *BioSequence) TaxonomicDistribution(taxonomy *obitax.Taxonomy) ma
|
||||
taxids := sequence.StatsOn(MakeStatsOnDescription("taxid"), "na")
|
||||
taxons := make(map[*obitax.TaxNode]int, len(taxids))
|
||||
|
||||
taxonomy = taxonomy.OrDefault(true)
|
||||
|
||||
for taxid, v := range taxids {
|
||||
t := taxonomy.Taxon(taxid)
|
||||
if t == nil {
|
||||
@ -27,6 +29,9 @@ func (sequence *BioSequence) TaxonomicDistribution(taxonomy *obitax.Taxonomy) ma
|
||||
}
|
||||
|
||||
func (sequence *BioSequence) LCA(taxonomy *obitax.Taxonomy, threshold float64) (*obitax.Taxon, float64, int) {
|
||||
|
||||
taxonomy = taxonomy.OrDefault(true)
|
||||
|
||||
taxons := sequence.TaxonomicDistribution(taxonomy)
|
||||
paths := make(map[*obitax.TaxNode]*obitax.TaxonSlice, len(taxons))
|
||||
answer := (*obitax.TaxNode)(nil)
|
||||
@ -34,11 +39,11 @@ func (sequence *BioSequence) LCA(taxonomy *obitax.Taxonomy, threshold float64) (
|
||||
granTotal := 0
|
||||
|
||||
for t, w := range taxons {
|
||||
p := (&obitax.Taxon{Taxonomy: taxonomy,
|
||||
Node: t,
|
||||
}).Path()
|
||||
taxon := &obitax.Taxon{Taxonomy: taxonomy, Node: t}
|
||||
p := taxon.Path()
|
||||
|
||||
if p == nil {
|
||||
log.Panicf("Sequence %s: taxonomic path cannot be retreived from Taxid %d : %v", sequence.Id(), t.String(taxonomy.Code()))
|
||||
log.Panicf("Sequence %s: taxonomic path cannot be retreived from Taxid : %s", sequence.Id(), taxon.String())
|
||||
}
|
||||
|
||||
p.Reverse(true)
|
||||
@ -103,6 +108,8 @@ func (sequence *BioSequence) LCA(taxonomy *obitax.Taxonomy, threshold float64) (
|
||||
|
||||
func AddLCAWorker(taxonomy *obitax.Taxonomy, slot_name string, threshold float64) SeqWorker {
|
||||
|
||||
taxonomy = taxonomy.OrDefault(true)
|
||||
|
||||
if !strings.HasSuffix(slot_name, "taxid") {
|
||||
slot_name = slot_name + "_taxid"
|
||||
}
|
||||
|
@ -9,6 +9,7 @@ import (
|
||||
)
|
||||
|
||||
func (s *BioSequence) Taxon(taxonomy *obitax.Taxonomy) *obitax.Taxon {
|
||||
|
||||
taxid := s.Taxid()
|
||||
if taxid == "NA" {
|
||||
return nil
|
||||
@ -21,16 +22,39 @@ func (s *BioSequence) Taxon(taxonomy *obitax.Taxonomy) *obitax.Taxon {
|
||||
// Parameters:
|
||||
//
|
||||
// taxid - the taxid to set.
|
||||
func (s *BioSequence) SetTaxid(taxid string) {
|
||||
func (s *BioSequence) SetTaxid(taxid string, rank ...string) {
|
||||
if taxid == "" {
|
||||
taxid = "NA"
|
||||
} else {
|
||||
taxonomy := obitax.DefaultTaxonomy()
|
||||
taxon := (*obitax.Taxon)(nil)
|
||||
|
||||
if taxonomy != nil {
|
||||
taxon = taxonomy.Taxon(taxid)
|
||||
}
|
||||
|
||||
if taxon != nil {
|
||||
taxid = taxon.String()
|
||||
}
|
||||
}
|
||||
|
||||
if len(rank) > 0 {
|
||||
r := rank[0]
|
||||
s.SetAttribute(r+"_taxid", taxid)
|
||||
} else {
|
||||
s.SetAttribute("taxid", taxid)
|
||||
}
|
||||
s.SetAttribute("taxid", taxid)
|
||||
}
|
||||
|
||||
func (s *BioSequence) SetTaxon(taxon *obitax.Taxon) {
|
||||
func (s *BioSequence) SetTaxon(taxon *obitax.Taxon, rank ...string) {
|
||||
taxid := taxon.String()
|
||||
s.SetTaxid(taxid)
|
||||
|
||||
if len(rank) > 0 {
|
||||
r := rank[0]
|
||||
s.SetAttribute(r+"_taxid", taxid)
|
||||
} else {
|
||||
s.SetAttribute("taxid", taxid)
|
||||
}
|
||||
}
|
||||
|
||||
// Taxid returns the taxonomic ID associated with the BioSequence.
|
||||
|
@ -35,13 +35,15 @@ type TaxNode struct {
|
||||
// - taxonomyCode: A string representing the code of the taxonomy to which the node belongs.
|
||||
//
|
||||
// Returns:
|
||||
// - A formatted string representing the TaxNode in the form "taxonomyCode:id [scientificName]".
|
||||
// - A formatted string representing the TaxNode in the form "taxonomyCode:id [scientificName]@rank".
|
||||
func (node *TaxNode) String(taxonomyCode string) string {
|
||||
if node.HasScientificName() {
|
||||
return fmt.Sprintf("%s:%v [%s]",
|
||||
return fmt.Sprintf("%s:%v [%s]@%s",
|
||||
taxonomyCode,
|
||||
*node.id,
|
||||
node.ScientificName())
|
||||
node.ScientificName(),
|
||||
node.Rank(),
|
||||
)
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%s:%v",
|
||||
|
@ -273,31 +273,31 @@ func CLIAnnotationWorker() obiseq.SeqWorker {
|
||||
}
|
||||
|
||||
if CLIHasTaxonAtRank() {
|
||||
taxo := obigrep.CLILoadSelectedTaxonomy()
|
||||
taxo := obitax.DefaultTaxonomy()
|
||||
w := AddTaxonAtRankWorker(taxo, CLITaxonAtRank()...)
|
||||
annotator = annotator.ChainWorkers(w)
|
||||
}
|
||||
|
||||
if CLISetTaxonomicPath() {
|
||||
taxo := obigrep.CLILoadSelectedTaxonomy()
|
||||
taxo := obitax.DefaultTaxonomy()
|
||||
w := obiseq.MakeSetPathWorker(taxo)
|
||||
annotator = annotator.ChainWorkers(w)
|
||||
}
|
||||
|
||||
if CLISetTaxonomicRank() {
|
||||
taxo := obigrep.CLILoadSelectedTaxonomy()
|
||||
taxo := obitax.DefaultTaxonomy()
|
||||
w := AddTaxonRankWorker(taxo)
|
||||
annotator = annotator.ChainWorkers(w)
|
||||
}
|
||||
|
||||
if CLISetScientificName() {
|
||||
taxo := obigrep.CLILoadSelectedTaxonomy()
|
||||
taxo := obitax.DefaultTaxonomy()
|
||||
w := AddScientificNameWorker(taxo)
|
||||
annotator = annotator.ChainWorkers(w)
|
||||
}
|
||||
|
||||
if CLIHasAddLCA() {
|
||||
taxo := obigrep.CLILoadSelectedTaxonomy()
|
||||
taxo := obitax.DefaultTaxonomy()
|
||||
w := obiseq.AddLCAWorker(taxo, CLILCASlotName(), CLILCAThreshold())
|
||||
annotator = annotator.ChainWorkers(w)
|
||||
}
|
||||
|
@ -11,6 +11,7 @@ import (
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obistats"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obigrep"
|
||||
)
|
||||
|
||||
@ -245,7 +246,7 @@ func ICleanDB(itertator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
taxonomy := obigrep.CLILoadSelectedTaxonomy()
|
||||
taxonomy := obitax.DefaultTaxonomy()
|
||||
|
||||
if len(obigrep.CLIRequiredRanks()) > 0 {
|
||||
rankPredicate = obigrep.CLIHasRankDefinedPredicate()
|
||||
|
@ -3,6 +3,7 @@ package obiconvert
|
||||
import (
|
||||
"os"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/DavidGamba/go-getoptions"
|
||||
@ -115,6 +116,7 @@ func PairedFilesOptionSet(options *getoptions.GetOpt) {
|
||||
}
|
||||
|
||||
func OptionSet(options *getoptions.GetOpt) {
|
||||
obioptions.LoadTaxonomyOptionSet(options, false, false)
|
||||
InputOptionSet(options)
|
||||
OutputOptionSet(options)
|
||||
PairedFilesOptionSet(options)
|
||||
|
@ -6,7 +6,7 @@ import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiapat"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats/ncbitaxdump"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||
@ -33,7 +33,6 @@ var _Predicats = make([]string, 0)
|
||||
var _IdList = ""
|
||||
|
||||
var _Taxdump = ""
|
||||
var _Taxonomy = (*obitax.Taxonomy)(nil)
|
||||
|
||||
var _RequiredAttributes = make([]string, 0)
|
||||
var _AttributePatterns = make(map[string]string, 0)
|
||||
@ -49,10 +48,7 @@ var _pattern_indel = false
|
||||
var _pattern_only_forward = false
|
||||
|
||||
func TaxonomySelectionOptionSet(options *getoptions.GetOpt) {
|
||||
|
||||
options.StringVar(&_Taxdump, "taxdump", _Taxdump,
|
||||
options.Alias("t"),
|
||||
options.Description("Points to the directory containing the NCBI Taxonomy database dump."))
|
||||
obioptions.LoadTaxonomyOptionSet(options, false, false)
|
||||
|
||||
options.StringSliceVar(&_BelongTaxa, "restrict-to-taxon", 1, 1,
|
||||
options.Alias("r"),
|
||||
@ -246,31 +242,12 @@ func CLIPatternBothStrand() bool {
|
||||
return !_pattern_only_forward
|
||||
}
|
||||
|
||||
func CLILoadSelectedTaxonomy() *obitax.Taxonomy {
|
||||
if CLISelectedNCBITaxDump() != "" {
|
||||
if _Taxonomy == nil {
|
||||
var err error
|
||||
_Taxonomy, err = ncbitaxdump.LoadNCBITaxDump(CLISelectedNCBITaxDump(), true)
|
||||
if err != nil {
|
||||
log.Fatalf("cannot load taxonomy %s : %v",
|
||||
CLISelectedNCBITaxDump(), err)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return _Taxonomy
|
||||
}
|
||||
|
||||
log.Fatalln("no NCBI taxdump selected using option -t|--taxdump")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func CLIRestrictTaxonomyPredicate() obiseq.SequencePredicate {
|
||||
var p obiseq.SequencePredicate
|
||||
var p2 obiseq.SequencePredicate
|
||||
|
||||
if len(_BelongTaxa) > 0 {
|
||||
taxonomy := CLILoadSelectedTaxonomy()
|
||||
taxonomy := obitax.DefaultTaxonomy()
|
||||
|
||||
taxon := taxonomy.Taxon(_BelongTaxa[0])
|
||||
if taxon == nil {
|
||||
@ -300,7 +277,7 @@ func CLIAvoidTaxonomyPredicate() obiseq.SequencePredicate {
|
||||
var p2 obiseq.SequencePredicate
|
||||
|
||||
if len(_NotBelongTaxa) > 0 {
|
||||
taxonomy := CLILoadSelectedTaxonomy()
|
||||
taxonomy := obitax.DefaultTaxonomy()
|
||||
|
||||
taxon := taxonomy.Taxon(_NotBelongTaxa[0])
|
||||
if taxon == nil {
|
||||
@ -329,7 +306,7 @@ func CLIAvoidTaxonomyPredicate() obiseq.SequencePredicate {
|
||||
func CLIHasRankDefinedPredicate() obiseq.SequencePredicate {
|
||||
|
||||
if len(_RequiredRanks) > 0 {
|
||||
taxonomy := CLILoadSelectedTaxonomy()
|
||||
taxonomy := obitax.DefaultTaxonomy()
|
||||
p := obiseq.HasRequiredRank(taxonomy, _RequiredRanks[0])
|
||||
|
||||
for _, rank := range _RequiredRanks[1:] {
|
||||
|
@ -70,11 +70,7 @@ func GeomIndexSesquence(seqidx int,
|
||||
new_lca, _ := lca.LCA(taxa.Taxon(o))
|
||||
if new_lca.SameAs(lca) {
|
||||
lca = new_lca
|
||||
index[int(seq_dist[o])] = fmt.Sprintf(
|
||||
"%s@%s",
|
||||
lca.String(),
|
||||
lca.Rank(),
|
||||
)
|
||||
index[int(seq_dist[o])] = lca.String()
|
||||
|
||||
if lca.IsRoot() {
|
||||
break
|
||||
|
@ -1,7 +1,6 @@
|
||||
package obirefidx
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
@ -172,11 +171,7 @@ func IndexSequence(seqidx int,
|
||||
for i, d := range closest {
|
||||
if i < (len(closest)-1) && d < closest[i+1] {
|
||||
current_taxon := pseq.Taxon(i)
|
||||
obitag_index[d] = fmt.Sprintf(
|
||||
"%s@%s",
|
||||
current_taxon.String(),
|
||||
current_taxon.Rank(),
|
||||
)
|
||||
obitag_index[d] = current_taxon.String()
|
||||
}
|
||||
}
|
||||
|
||||
@ -197,9 +192,10 @@ func IndexReferenceDB(iterator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
source, references := iterator.Load()
|
||||
log.Infof("Done. Database contains %d sequences", len(references))
|
||||
|
||||
taxo, error := obioptions.CLILoadSelectedTaxonomy()
|
||||
if error != nil {
|
||||
log.Panicln(error)
|
||||
taxo := obitax.DefaultTaxonomy()
|
||||
|
||||
if taxo == nil {
|
||||
log.Fatal("No taxonomy loaded.")
|
||||
}
|
||||
|
||||
log.Infoln("Indexing sequence taxids...")
|
||||
|
26
pkg/obiutils/unsafe.go
Normal file
26
pkg/obiutils/unsafe.go
Normal file
@ -0,0 +1,26 @@
|
||||
package obiutils
|
||||
|
||||
import "unsafe"
|
||||
|
||||
// UnsafeBytes converts a string into a byte slice without making a copy of the data.
|
||||
// This function is considered unsafe because it directly manipulates memory and does not
|
||||
// perform any checks on the string's contents. It should be used with caution.
|
||||
//
|
||||
// Parameters:
|
||||
// - str: The input string to be converted into a byte slice.
|
||||
//
|
||||
// Returns:
|
||||
//
|
||||
// A byte slice representation of the input string. The returned slice shares the same
|
||||
// underlying data as the original string, so modifications to the byte slice may affect
|
||||
// the original string and vice versa.
|
||||
func UnsafeBytes(str string) []byte {
|
||||
d := unsafe.StringData(str)
|
||||
b := unsafe.Slice(d, len(str))
|
||||
|
||||
return b
|
||||
}
|
||||
|
||||
func UnsafeString(b []byte) string {
|
||||
return unsafe.String(unsafe.SliceData(b), len(b))
|
||||
}
|
Reference in New Issue
Block a user