Patch a small bug on json write

This commit is contained in:
Eric Coissac
2024-12-20 19:42:03 +01:00
parent abfa8f357a
commit 5d0f996625
14 changed files with 458 additions and 79 deletions

View File

@ -2,18 +2,208 @@ package obiformats
import (
"bytes"
"math"
"strconv"
"strings"
"unsafe"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
"github.com/goccy/go-json"
"github.com/buger/jsonparser"
)
func _parse_json_header_(header string, annotations obiseq.Annotation) string {
func _parse_json_map_string(str []byte, sequence *obiseq.BioSequence) (map[string]string, error) {
values := make(map[string]string)
jsonparser.ObjectEach(str,
func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
skey := string(key)
values[skey] = string(value)
return
},
)
return values, nil
}
func _parse_json_map_int(str []byte, sequence *obiseq.BioSequence) (map[string]int, error) {
values := make(map[string]int)
jsonparser.ObjectEach(str,
func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
skey := string(key)
intval, err := jsonparser.ParseInt(value)
if err != nil {
return err
}
values[skey] = int(intval)
return nil
},
)
return values, nil
}
func _parse_json_map_float(str []byte, sequence *obiseq.BioSequence) (map[string]float64, error) {
values := make(map[string]float64)
jsonparser.ObjectEach(str,
func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
skey := string(key)
floatval, err := strconv.ParseFloat(obiutils.UnsafeString(value), 64)
if err != nil {
return err
}
values[skey] = float64(floatval)
return nil
},
)
return values, nil
}
func _parse_json_map_bool(str []byte, sequence *obiseq.BioSequence) (map[string]bool, error) {
values := make(map[string]bool)
jsonparser.ObjectEach(str,
func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
skey := string(key)
boolval, err := jsonparser.ParseBoolean(value)
if err != nil {
return err
}
values[skey] = boolval
return nil
},
)
return values, nil
}
func _parse_json_map_interface(str []byte, sequence *obiseq.BioSequence) (map[string]interface{}, error) {
values := make(map[string]interface{})
jsonparser.ObjectEach(str,
func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
skey := string(key)
switch dataType {
case jsonparser.String:
values[skey] = string(value)
case jsonparser.Number:
// Try to parse the number as an int at first then as float if that fails.
values[skey], err = jsonparser.ParseInt(value)
if err != nil {
values[skey], err = strconv.ParseFloat(obiutils.UnsafeString(value), 64)
}
if err != nil {
return
}
case jsonparser.Boolean:
default:
values[skey] = string(value)
}
return
},
)
return values, nil
}
func _parse_json_array_string(str []byte, sequence *obiseq.BioSequence) ([]string, error) {
values := make([]string, 0)
jsonparser.ArrayEach(str,
func(value []byte, dataType jsonparser.ValueType, offset int, err error) {
if dataType == jsonparser.String {
skey := string(value)
values = append(values, skey)
}
},
)
return values, nil
}
func _parse_json_array_int(str []byte, sequence *obiseq.BioSequence) ([]int, error) {
values := make([]int, 0)
jsonparser.ArrayEach(str,
func(value []byte, dataType jsonparser.ValueType, offset int, err error) {
if dataType == jsonparser.Number {
intval, err := jsonparser.ParseInt(value)
if err != nil {
log.Fatalf("%s: Parsing int failed on value %s: %s", sequence.Id(), value, err)
}
values = append(values, int(intval))
}
},
)
return values, nil
}
func _parse_json_array_float(str []byte, sequence *obiseq.BioSequence) ([]float64, error) {
values := make([]float64, 0)
jsonparser.ArrayEach(str,
func(value []byte, dataType jsonparser.ValueType, offset int, err error) {
if dataType == jsonparser.Number {
floatval, err := strconv.ParseFloat(obiutils.UnsafeString(value), 64)
if err == nil {
values = append(values, float64(floatval))
} else {
log.Fatalf("%s: Parsing float failed on value %s: %s", sequence.Id(), value, err)
}
}
},
)
return values, nil
}
func _parse_json_array_bool(str []byte, sequence *obiseq.BioSequence) ([]bool, error) {
values := make([]bool, 0)
jsonparser.ArrayEach(str,
func(value []byte, dataType jsonparser.ValueType, offset int, err error) {
if dataType == jsonparser.Boolean {
boolval, err := jsonparser.ParseBoolean(value)
if err != nil {
log.Fatalf("%s: Parsing bool failed on value %s: %s", sequence.Id(), value, err)
}
values = append(values, boolval)
}
},
)
return values, nil
}
func _parse_json_array_interface(str []byte, sequence *obiseq.BioSequence) ([]interface{}, error) {
values := make([]interface{}, 0)
jsonparser.ArrayEach(str,
func(value []byte, dataType jsonparser.ValueType, offset int, err error) {
switch dataType {
case jsonparser.String:
values = append(values, string(value))
case jsonparser.Number:
// Try to parse the number as an int at first then as float if that fails.
intval, err := jsonparser.ParseInt(value)
if err != nil {
floatval, err := strconv.ParseFloat(obiutils.UnsafeString(value), 64)
if err != nil {
values = append(values, string(value))
} else {
values = append(values, floatval)
}
} else {
values = append(values, intval)
}
case jsonparser.Boolean:
boolval, err := jsonparser.ParseBoolean(value)
if err != nil {
values = append(values, string(value))
} else {
values = append(values, boolval)
}
default:
values = append(values, string(value))
}
},
)
return values, nil
}
func _parse_json_header_(header string, sequence *obiseq.BioSequence) string {
taxonomy := obitax.DefaultTaxonomy()
annotations := sequence.Annotations()
start := -1
stop := -1
level := 0
@ -51,23 +241,136 @@ func _parse_json_header_(header string, annotations obiseq.Annotation) string {
stop++
err := json.Unmarshal([]byte(header)[start:stop], &annotations)
jsonparser.ObjectEach(obiutils.UnsafeBytes(header[start:stop]),
func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) error {
var err error
for k, v := range annotations {
switch vt := v.(type) {
case float64:
if vt == math.Floor(vt) {
annotations[k] = int(vt)
}
{
annotations[k] = vt
}
}
}
skey := obiutils.UnsafeString(key)
if err != nil {
log.Fatalf("annotation parsing error on %s : %v\n", header, err)
}
switch {
case skey == "id":
sequence.SetId(string(value))
case skey == "definition":
sequence.SetDefinition(string(value))
case skey == "count":
if dataType != jsonparser.Number {
log.Fatalf("%s: Count attribut must be numeric: %s", sequence.Id(), string(value))
}
count, err := jsonparser.ParseInt(value)
if err != nil {
log.Fatalf("%s: Cannot parse count %s", sequence.Id(), string(value))
}
sequence.SetCount(int(count))
case skey == "obiclean_weight":
weight, err := _parse_json_map_int(value, sequence)
if err != nil {
log.Fatalf("%s: Cannot parse obiclean weight %s", sequence.Id(), string(value))
}
annotations[skey] = weight
case skey == "obiclean_status":
status, err := _parse_json_map_string(value, sequence)
if err != nil {
log.Fatalf("%s: Cannot parse obiclean status %s", sequence.Id(), string(value))
}
annotations[skey] = status
case strings.HasPrefix(skey, "merged_"):
if dataType == jsonparser.Object {
data, err := _parse_json_map_int(value, sequence)
if err != nil {
log.Fatalf("%s: Cannot parse merged slot %s: %v", sequence.Id(), skey, err)
} else {
annotations[skey] = data
}
} else {
log.Fatalf("%s: Cannot parse merged slot %s", sequence.Id(), skey)
}
case skey == "taxid":
if dataType == jsonparser.Number || dataType == jsonparser.String {
taxid := obiutils.UnsafeString(value)
taxon := taxonomy.Taxon(taxid)
if taxon != nil {
sequence.SetTaxon(taxon)
} else {
sequence.SetTaxid(string(value))
}
} else {
log.Fatalf("%s: Cannot parse taxid %s", sequence.Id(), string(value))
}
case strings.HasSuffix(skey, "_taxid"):
if dataType == jsonparser.Number || dataType == jsonparser.String {
rank, _ := obiutils.SplitInTwo(skey, '_')
taxid := obiutils.UnsafeString(value)
taxon := taxonomy.Taxon(taxid)
if taxon != nil {
taxid = taxon.String()
} else {
taxid = string(value)
}
sequence.SetTaxid(taxid, rank)
} else {
log.Fatalf("%s: Cannot parse taxid %s", sequence.Id(), string(value))
}
default:
skey = strings.Clone(skey)
switch dataType {
case jsonparser.String:
annotations[skey] = string(value)
case jsonparser.Number:
// Try to parse the number as an int at first then as float if that fails.
annotations[skey], err = jsonparser.ParseInt(value)
if err != nil {
annotations[skey], err = strconv.ParseFloat(obiutils.UnsafeString(value), 64)
}
case jsonparser.Array:
annotations[skey], err = _parse_json_array_interface(value, sequence)
case jsonparser.Object:
annotations[skey], err = _parse_json_map_interface(value, sequence)
case jsonparser.Boolean:
annotations[skey], err = jsonparser.ParseBoolean(value)
case jsonparser.Null:
annotations[skey] = nil
default:
log.Fatalf("Unknown data type %v", dataType)
}
}
if err != nil {
annotations[skey] = "NaN"
log.Fatalf("%s: Cannot parse value %s assicated to key %s into a %s value",
sequence.Id(), string(value), skey, dataType.String())
}
return err
},
)
// err := json.Unmarshal([]byte(header)[start:stop], &annotations)
// for k, v := range annotations {
// switch vt := v.(type) {
// case float64:
// if vt == math.Floor(vt) {
// annotations[k] = int(vt)
// }
// {
// annotations[k] = vt
// }
// }
// }
// if err != nil {
// log.Fatalf("annotation parsing error on %s : %v\n", header, err)
// }
return strings.TrimSpace(header[stop:])
}
@ -78,7 +381,9 @@ func ParseFastSeqJsonHeader(sequence *obiseq.BioSequence) {
definition_part := _parse_json_header_(
definition,
sequence.Annotations())
sequence,
)
if len(definition_part) > 0 {
if sequence.HasDefinition() {
definition_part = sequence.Definition() + " " + definition_part

View File

@ -15,6 +15,19 @@ import (
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
)
// loadNodeTable reads a node table from the provided reader and populates the given taxonomy.
// It is an internal function and should not be called directly. It is part of the NCBI taxdump reader.
// The node table is expected to be in CSV format with a custom delimiter ('|') and comments
// starting with '#'. Each record in the table represents a taxon with its taxid, parent taxid,
// and rank.
//
// Parameters:
// - reader: An io.Reader from which the node table is read.
// - taxonomy: A pointer to an obitax.Taxonomy instance where the taxon data will be added.
//
// The function reads each record from the input, trims whitespace from the taxid, parent, and rank,
// and adds the taxon to the taxonomy. If an error occurs while adding a taxon, the function logs
// a fatal error and terminates the program.
func loadNodeTable(reader io.Reader, taxonomy *obitax.Taxonomy) {
file := csv.NewReader(reader)
file.Comma = '|'
@ -38,6 +51,21 @@ func loadNodeTable(reader io.Reader, taxonomy *obitax.Taxonomy) {
}
}
// loadNameTable reads a name table from the provided reader and populates the given taxonomy.
// It is an internal function and should not be called directly. It is part of the NCBI taxdump reader.
// The name table is expected to be in a custom format with fields separated by the '|' character.
// Each record in the table represents a taxon with its taxid, name, and class name.
//
// Parameters:
// - reader: An io.Reader from which the name table is read.
// - taxonomy: A pointer to an obitax.Taxonomy instance where the taxon names will be set.
// - onlysn: A boolean flag indicating whether to only process records with the class name "scientific name".
//
// Returns:
//
// The number of taxon names successfully loaded into the taxonomy. If a line is too long, -1 is returned.
// The function processes each line, trims whitespace from the taxid, name, and class name, and sets
// the name in the taxonomy if the conditions are met.
func loadNameTable(reader io.Reader, taxonomy *obitax.Taxonomy, onlysn bool) int {
// file := csv.NewReader(reader)
// file.Comma = '|'
@ -71,6 +99,19 @@ func loadNameTable(reader io.Reader, taxonomy *obitax.Taxonomy, onlysn bool) int
return n
}
// loadMergedTable reads a merged table from the provided reader and populates the given taxonomy.
// It is an internal function and should not be called directly. It is part of the NCBI taxdump reader.
// The merged table is expected to be in CSV format with a custom delimiter ('|') and comments
// starting with '#'. Each record in the table represents a mapping between an old taxid and a new taxid.
//
// Parameters:
// - reader: An io.Reader from which the merged table is read.
// - taxonomy: A pointer to an obitax.Taxonomy instance where the alias mappings will be added.
//
// Returns:
//
// The number of alias mappings successfully loaded into the taxonomy. The function processes
// each record, trims whitespace from the old and new taxid, and adds the alias to the taxonomy.
func loadMergedTable(reader io.Reader, taxonomy *obitax.Taxonomy) int {
file := csv.NewReader(reader)
file.Comma = '|'

View File

@ -7,7 +7,7 @@ import (
// TODO: The version number is extracted from git. This induces that the version
// corresponds to the last commit, and not the one when the file will be
// commited
var _Commit = "795df34"
var _Commit = "abfa8f3"
var _Version = "Release 4.2.0"
// Version returns the version of the obitools package.

View File

@ -19,6 +19,8 @@ func TaxonomyClassifier(taxonomicRank string,
taxonomy *obitax.Taxonomy,
abortOnMissing bool) *BioSequenceClassifier {
taxonomy = taxonomy.OrDefault(true)
keys := make(map[*obitax.TaxNode]int)
codes := make([]*obitax.TaxNode, 1)
codes[0] = nil

View File

@ -12,6 +12,8 @@ func (sequence *BioSequence) TaxonomicDistribution(taxonomy *obitax.Taxonomy) ma
taxids := sequence.StatsOn(MakeStatsOnDescription("taxid"), "na")
taxons := make(map[*obitax.TaxNode]int, len(taxids))
taxonomy = taxonomy.OrDefault(true)
for taxid, v := range taxids {
t := taxonomy.Taxon(taxid)
if t == nil {
@ -27,6 +29,9 @@ func (sequence *BioSequence) TaxonomicDistribution(taxonomy *obitax.Taxonomy) ma
}
func (sequence *BioSequence) LCA(taxonomy *obitax.Taxonomy, threshold float64) (*obitax.Taxon, float64, int) {
taxonomy = taxonomy.OrDefault(true)
taxons := sequence.TaxonomicDistribution(taxonomy)
paths := make(map[*obitax.TaxNode]*obitax.TaxonSlice, len(taxons))
answer := (*obitax.TaxNode)(nil)
@ -34,11 +39,11 @@ func (sequence *BioSequence) LCA(taxonomy *obitax.Taxonomy, threshold float64) (
granTotal := 0
for t, w := range taxons {
p := (&obitax.Taxon{Taxonomy: taxonomy,
Node: t,
}).Path()
taxon := &obitax.Taxon{Taxonomy: taxonomy, Node: t}
p := taxon.Path()
if p == nil {
log.Panicf("Sequence %s: taxonomic path cannot be retreived from Taxid %d : %v", sequence.Id(), t.String(taxonomy.Code()))
log.Panicf("Sequence %s: taxonomic path cannot be retreived from Taxid : %s", sequence.Id(), taxon.String())
}
p.Reverse(true)
@ -103,6 +108,8 @@ func (sequence *BioSequence) LCA(taxonomy *obitax.Taxonomy, threshold float64) (
func AddLCAWorker(taxonomy *obitax.Taxonomy, slot_name string, threshold float64) SeqWorker {
taxonomy = taxonomy.OrDefault(true)
if !strings.HasSuffix(slot_name, "taxid") {
slot_name = slot_name + "_taxid"
}

View File

@ -9,6 +9,7 @@ import (
)
func (s *BioSequence) Taxon(taxonomy *obitax.Taxonomy) *obitax.Taxon {
taxid := s.Taxid()
if taxid == "NA" {
return nil
@ -21,16 +22,39 @@ func (s *BioSequence) Taxon(taxonomy *obitax.Taxonomy) *obitax.Taxon {
// Parameters:
//
// taxid - the taxid to set.
func (s *BioSequence) SetTaxid(taxid string) {
func (s *BioSequence) SetTaxid(taxid string, rank ...string) {
if taxid == "" {
taxid = "NA"
} else {
taxonomy := obitax.DefaultTaxonomy()
taxon := (*obitax.Taxon)(nil)
if taxonomy != nil {
taxon = taxonomy.Taxon(taxid)
}
if taxon != nil {
taxid = taxon.String()
}
}
if len(rank) > 0 {
r := rank[0]
s.SetAttribute(r+"_taxid", taxid)
} else {
s.SetAttribute("taxid", taxid)
}
s.SetAttribute("taxid", taxid)
}
func (s *BioSequence) SetTaxon(taxon *obitax.Taxon) {
func (s *BioSequence) SetTaxon(taxon *obitax.Taxon, rank ...string) {
taxid := taxon.String()
s.SetTaxid(taxid)
if len(rank) > 0 {
r := rank[0]
s.SetAttribute(r+"_taxid", taxid)
} else {
s.SetAttribute("taxid", taxid)
}
}
// Taxid returns the taxonomic ID associated with the BioSequence.

View File

@ -35,13 +35,15 @@ type TaxNode struct {
// - taxonomyCode: A string representing the code of the taxonomy to which the node belongs.
//
// Returns:
// - A formatted string representing the TaxNode in the form "taxonomyCode:id [scientificName]".
// - A formatted string representing the TaxNode in the form "taxonomyCode:id [scientificName]@rank".
func (node *TaxNode) String(taxonomyCode string) string {
if node.HasScientificName() {
return fmt.Sprintf("%s:%v [%s]",
return fmt.Sprintf("%s:%v [%s]@%s",
taxonomyCode,
*node.id,
node.ScientificName())
node.ScientificName(),
node.Rank(),
)
}
return fmt.Sprintf("%s:%v",

View File

@ -273,31 +273,31 @@ func CLIAnnotationWorker() obiseq.SeqWorker {
}
if CLIHasTaxonAtRank() {
taxo := obigrep.CLILoadSelectedTaxonomy()
taxo := obitax.DefaultTaxonomy()
w := AddTaxonAtRankWorker(taxo, CLITaxonAtRank()...)
annotator = annotator.ChainWorkers(w)
}
if CLISetTaxonomicPath() {
taxo := obigrep.CLILoadSelectedTaxonomy()
taxo := obitax.DefaultTaxonomy()
w := obiseq.MakeSetPathWorker(taxo)
annotator = annotator.ChainWorkers(w)
}
if CLISetTaxonomicRank() {
taxo := obigrep.CLILoadSelectedTaxonomy()
taxo := obitax.DefaultTaxonomy()
w := AddTaxonRankWorker(taxo)
annotator = annotator.ChainWorkers(w)
}
if CLISetScientificName() {
taxo := obigrep.CLILoadSelectedTaxonomy()
taxo := obitax.DefaultTaxonomy()
w := AddScientificNameWorker(taxo)
annotator = annotator.ChainWorkers(w)
}
if CLIHasAddLCA() {
taxo := obigrep.CLILoadSelectedTaxonomy()
taxo := obitax.DefaultTaxonomy()
w := obiseq.AddLCAWorker(taxo, CLILCASlotName(), CLILCAThreshold())
annotator = annotator.ChainWorkers(w)
}

View File

@ -11,6 +11,7 @@ import (
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obistats"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obigrep"
)
@ -245,7 +246,7 @@ func ICleanDB(itertator obiiter.IBioSequence) obiiter.IBioSequence {
log.Fatal(err)
}
taxonomy := obigrep.CLILoadSelectedTaxonomy()
taxonomy := obitax.DefaultTaxonomy()
if len(obigrep.CLIRequiredRanks()) > 0 {
rankPredicate = obigrep.CLIHasRankDefinedPredicate()

View File

@ -3,6 +3,7 @@ package obiconvert
import (
"os"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
log "github.com/sirupsen/logrus"
"github.com/DavidGamba/go-getoptions"
@ -115,6 +116,7 @@ func PairedFilesOptionSet(options *getoptions.GetOpt) {
}
func OptionSet(options *getoptions.GetOpt) {
obioptions.LoadTaxonomyOptionSet(options, false, false)
InputOptionSet(options)
OutputOptionSet(options)
PairedFilesOptionSet(options)

View File

@ -6,7 +6,7 @@ import (
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiapat"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats/ncbitaxdump"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
@ -33,7 +33,6 @@ var _Predicats = make([]string, 0)
var _IdList = ""
var _Taxdump = ""
var _Taxonomy = (*obitax.Taxonomy)(nil)
var _RequiredAttributes = make([]string, 0)
var _AttributePatterns = make(map[string]string, 0)
@ -49,10 +48,7 @@ var _pattern_indel = false
var _pattern_only_forward = false
func TaxonomySelectionOptionSet(options *getoptions.GetOpt) {
options.StringVar(&_Taxdump, "taxdump", _Taxdump,
options.Alias("t"),
options.Description("Points to the directory containing the NCBI Taxonomy database dump."))
obioptions.LoadTaxonomyOptionSet(options, false, false)
options.StringSliceVar(&_BelongTaxa, "restrict-to-taxon", 1, 1,
options.Alias("r"),
@ -246,31 +242,12 @@ func CLIPatternBothStrand() bool {
return !_pattern_only_forward
}
func CLILoadSelectedTaxonomy() *obitax.Taxonomy {
if CLISelectedNCBITaxDump() != "" {
if _Taxonomy == nil {
var err error
_Taxonomy, err = ncbitaxdump.LoadNCBITaxDump(CLISelectedNCBITaxDump(), true)
if err != nil {
log.Fatalf("cannot load taxonomy %s : %v",
CLISelectedNCBITaxDump(), err)
return nil
}
}
return _Taxonomy
}
log.Fatalln("no NCBI taxdump selected using option -t|--taxdump")
return nil
}
func CLIRestrictTaxonomyPredicate() obiseq.SequencePredicate {
var p obiseq.SequencePredicate
var p2 obiseq.SequencePredicate
if len(_BelongTaxa) > 0 {
taxonomy := CLILoadSelectedTaxonomy()
taxonomy := obitax.DefaultTaxonomy()
taxon := taxonomy.Taxon(_BelongTaxa[0])
if taxon == nil {
@ -300,7 +277,7 @@ func CLIAvoidTaxonomyPredicate() obiseq.SequencePredicate {
var p2 obiseq.SequencePredicate
if len(_NotBelongTaxa) > 0 {
taxonomy := CLILoadSelectedTaxonomy()
taxonomy := obitax.DefaultTaxonomy()
taxon := taxonomy.Taxon(_NotBelongTaxa[0])
if taxon == nil {
@ -329,7 +306,7 @@ func CLIAvoidTaxonomyPredicate() obiseq.SequencePredicate {
func CLIHasRankDefinedPredicate() obiseq.SequencePredicate {
if len(_RequiredRanks) > 0 {
taxonomy := CLILoadSelectedTaxonomy()
taxonomy := obitax.DefaultTaxonomy()
p := obiseq.HasRequiredRank(taxonomy, _RequiredRanks[0])
for _, rank := range _RequiredRanks[1:] {

View File

@ -70,11 +70,7 @@ func GeomIndexSesquence(seqidx int,
new_lca, _ := lca.LCA(taxa.Taxon(o))
if new_lca.SameAs(lca) {
lca = new_lca
index[int(seq_dist[o])] = fmt.Sprintf(
"%s@%s",
lca.String(),
lca.Rank(),
)
index[int(seq_dist[o])] = lca.String()
if lca.IsRoot() {
break

View File

@ -1,7 +1,6 @@
package obirefidx
import (
"fmt"
"os"
log "github.com/sirupsen/logrus"
@ -172,11 +171,7 @@ func IndexSequence(seqidx int,
for i, d := range closest {
if i < (len(closest)-1) && d < closest[i+1] {
current_taxon := pseq.Taxon(i)
obitag_index[d] = fmt.Sprintf(
"%s@%s",
current_taxon.String(),
current_taxon.Rank(),
)
obitag_index[d] = current_taxon.String()
}
}
@ -197,9 +192,10 @@ func IndexReferenceDB(iterator obiiter.IBioSequence) obiiter.IBioSequence {
source, references := iterator.Load()
log.Infof("Done. Database contains %d sequences", len(references))
taxo, error := obioptions.CLILoadSelectedTaxonomy()
if error != nil {
log.Panicln(error)
taxo := obitax.DefaultTaxonomy()
if taxo == nil {
log.Fatal("No taxonomy loaded.")
}
log.Infoln("Indexing sequence taxids...")

26
pkg/obiutils/unsafe.go Normal file
View File

@ -0,0 +1,26 @@
package obiutils
import "unsafe"
// UnsafeBytes converts a string into a byte slice without making a copy of the data.
// This function is considered unsafe because it directly manipulates memory and does not
// perform any checks on the string's contents. It should be used with caution.
//
// Parameters:
// - str: The input string to be converted into a byte slice.
//
// Returns:
//
// A byte slice representation of the input string. The returned slice shares the same
// underlying data as the original string, so modifications to the byte slice may affect
// the original string and vice versa.
func UnsafeBytes(str string) []byte {
d := unsafe.StringData(str)
b := unsafe.Slice(d, len(str))
return b
}
func UnsafeString(b []byte) string {
return unsafe.String(unsafe.SliceData(b), len(b))
}