mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
403 lines
10 KiB
Go
403 lines
10 KiB
Go
package obiformats
|
|
|
|
import (
|
|
"bytes"
|
|
"strconv"
|
|
"strings"
|
|
"unsafe"
|
|
|
|
log "github.com/sirupsen/logrus"
|
|
|
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
|
"github.com/buger/jsonparser"
|
|
)
|
|
|
|
func _parse_json_map_string(str []byte) (map[string]string, error) {
|
|
values := make(map[string]string)
|
|
jsonparser.ObjectEach(str,
|
|
func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
|
|
skey := string(key)
|
|
values[skey] = string(value)
|
|
return
|
|
},
|
|
)
|
|
return values, nil
|
|
}
|
|
|
|
func _parse_json_map_int(str []byte) (map[string]int, error) {
|
|
values := make(map[string]int)
|
|
jsonparser.ObjectEach(str,
|
|
func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
|
|
skey := string(key)
|
|
intval, err := jsonparser.ParseInt(value)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
values[skey] = int(intval)
|
|
return nil
|
|
},
|
|
)
|
|
return values, nil
|
|
}
|
|
|
|
func _parse_json_map_float(str []byte) (map[string]float64, error) {
|
|
values := make(map[string]float64)
|
|
jsonparser.ObjectEach(str,
|
|
func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
|
|
skey := string(key)
|
|
floatval, err := strconv.ParseFloat(obiutils.UnsafeString(value), 64)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
values[skey] = float64(floatval)
|
|
return nil
|
|
},
|
|
)
|
|
return values, nil
|
|
}
|
|
|
|
func _parse_json_map_bool(str []byte) (map[string]bool, error) {
|
|
values := make(map[string]bool)
|
|
jsonparser.ObjectEach(str,
|
|
func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
|
|
skey := string(key)
|
|
boolval, err := jsonparser.ParseBoolean(value)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
values[skey] = boolval
|
|
return nil
|
|
},
|
|
)
|
|
return values, nil
|
|
}
|
|
|
|
func _parse_json_map_interface(str []byte) (map[string]interface{}, error) {
|
|
values := make(map[string]interface{})
|
|
jsonparser.ObjectEach(str,
|
|
func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) (err error) {
|
|
skey := string(key)
|
|
switch dataType {
|
|
case jsonparser.String:
|
|
values[skey] = string(value)
|
|
case jsonparser.Number:
|
|
// Try to parse the number as an int at first then as float if that fails.
|
|
values[skey], err = jsonparser.ParseInt(value)
|
|
if err != nil {
|
|
values[skey], err = strconv.ParseFloat(obiutils.UnsafeString(value), 64)
|
|
}
|
|
if err != nil {
|
|
return
|
|
}
|
|
case jsonparser.Boolean:
|
|
default:
|
|
values[skey] = string(value)
|
|
}
|
|
return
|
|
},
|
|
)
|
|
return values, nil
|
|
}
|
|
|
|
func _parse_json_array_string(str []byte) ([]string, error) {
|
|
values := make([]string, 0)
|
|
jsonparser.ArrayEach(str,
|
|
func(value []byte, dataType jsonparser.ValueType, offset int, err error) {
|
|
if dataType == jsonparser.String {
|
|
skey := string(value)
|
|
values = append(values, skey)
|
|
}
|
|
},
|
|
)
|
|
return values, nil
|
|
}
|
|
|
|
func _parse_json_array_int(str []byte, sequence *obiseq.BioSequence) ([]int, error) {
|
|
values := make([]int, 0)
|
|
jsonparser.ArrayEach(str,
|
|
func(value []byte, dataType jsonparser.ValueType, offset int, err error) {
|
|
if dataType == jsonparser.Number {
|
|
intval, err := jsonparser.ParseInt(value)
|
|
if err != nil {
|
|
log.Fatalf("%s: Parsing int failed on value %s: %s", sequence.Id(), value, err)
|
|
}
|
|
values = append(values, int(intval))
|
|
}
|
|
},
|
|
)
|
|
return values, nil
|
|
}
|
|
|
|
func _parse_json_array_float(str []byte, sequence *obiseq.BioSequence) ([]float64, error) {
|
|
values := make([]float64, 0)
|
|
jsonparser.ArrayEach(str,
|
|
func(value []byte, dataType jsonparser.ValueType, offset int, err error) {
|
|
if dataType == jsonparser.Number {
|
|
floatval, err := strconv.ParseFloat(obiutils.UnsafeString(value), 64)
|
|
if err == nil {
|
|
values = append(values, float64(floatval))
|
|
} else {
|
|
log.Fatalf("%s: Parsing float failed on value %s: %s", sequence.Id(), value, err)
|
|
}
|
|
}
|
|
},
|
|
)
|
|
return values, nil
|
|
}
|
|
|
|
func _parse_json_array_bool(str []byte, sequence *obiseq.BioSequence) ([]bool, error) {
|
|
values := make([]bool, 0)
|
|
jsonparser.ArrayEach(str,
|
|
func(value []byte, dataType jsonparser.ValueType, offset int, err error) {
|
|
if dataType == jsonparser.Boolean {
|
|
boolval, err := jsonparser.ParseBoolean(value)
|
|
if err != nil {
|
|
log.Fatalf("%s: Parsing bool failed on value %s: %s", sequence.Id(), value, err)
|
|
}
|
|
values = append(values, boolval)
|
|
}
|
|
},
|
|
)
|
|
return values, nil
|
|
}
|
|
|
|
func _parse_json_array_interface(str []byte) ([]interface{}, error) {
|
|
values := make([]interface{}, 0)
|
|
jsonparser.ArrayEach(str,
|
|
func(value []byte, dataType jsonparser.ValueType, offset int, err error) {
|
|
switch dataType {
|
|
case jsonparser.String:
|
|
values = append(values, string(value))
|
|
case jsonparser.Number:
|
|
// Try to parse the number as an int at first then as float if that fails.
|
|
intval, err := jsonparser.ParseInt(value)
|
|
if err != nil {
|
|
floatval, err := strconv.ParseFloat(obiutils.UnsafeString(value), 64)
|
|
if err != nil {
|
|
values = append(values, string(value))
|
|
} else {
|
|
values = append(values, floatval)
|
|
}
|
|
} else {
|
|
values = append(values, intval)
|
|
}
|
|
case jsonparser.Boolean:
|
|
boolval, err := jsonparser.ParseBoolean(value)
|
|
if err != nil {
|
|
values = append(values, string(value))
|
|
} else {
|
|
values = append(values, boolval)
|
|
}
|
|
|
|
default:
|
|
values = append(values, string(value))
|
|
}
|
|
|
|
},
|
|
)
|
|
return values, nil
|
|
}
|
|
|
|
func _parse_json_header_(header string, sequence *obiseq.BioSequence) string {
|
|
annotations := sequence.Annotations()
|
|
start := -1
|
|
stop := -1
|
|
level := 0
|
|
lh := len(header)
|
|
inquote := false
|
|
|
|
for i := 0; (i < lh) && (stop < 0); i++ {
|
|
// fmt.Printf("[%d,%d-%d] : %d (%c) (%d,%c)\n", i, start, stop, header[i], header[i], '{', '{')
|
|
if level == 0 && header[i] == '{' && !inquote {
|
|
start = i
|
|
}
|
|
|
|
// TODO: escaped double quotes are not considered
|
|
if start > -1 && header[i] == '"' {
|
|
inquote = !inquote
|
|
}
|
|
|
|
if header[i] == '{' && !inquote {
|
|
level++
|
|
}
|
|
|
|
if header[i] == '}' && !inquote {
|
|
level--
|
|
}
|
|
|
|
if start >= 0 && level == 0 {
|
|
stop = i
|
|
}
|
|
|
|
}
|
|
|
|
if start < 0 || stop < 0 {
|
|
return header
|
|
}
|
|
|
|
stop++
|
|
|
|
jsonparser.ObjectEach(obiutils.UnsafeBytes(header[start:stop]),
|
|
func(key []byte, value []byte, dataType jsonparser.ValueType, offset int) error {
|
|
var err error
|
|
|
|
skey := obiutils.UnsafeString(key)
|
|
|
|
switch {
|
|
case skey == "id":
|
|
sequence.SetId(string(value))
|
|
case skey == "definition":
|
|
sequence.SetDefinition(string(value))
|
|
|
|
case skey == "count":
|
|
if dataType != jsonparser.Number {
|
|
log.Fatalf("%s: Count attribut must be numeric: %s", sequence.Id(), string(value))
|
|
}
|
|
count, err := jsonparser.ParseInt(value)
|
|
if err != nil {
|
|
log.Fatalf("%s: Cannot parse count %s", sequence.Id(), string(value))
|
|
}
|
|
sequence.SetCount(int(count))
|
|
|
|
case skey == "obiclean_weight":
|
|
weight, err := _parse_json_map_int(value)
|
|
if err != nil {
|
|
log.Fatalf("%s: Cannot parse obiclean weight %s", sequence.Id(), string(value))
|
|
}
|
|
annotations[skey] = weight
|
|
|
|
case skey == "obiclean_status":
|
|
status, err := _parse_json_map_string(value)
|
|
if err != nil {
|
|
log.Fatalf("%s: Cannot parse obiclean status %s", sequence.Id(), string(value))
|
|
}
|
|
annotations[skey] = status
|
|
|
|
case strings.HasPrefix(skey, "merged_"):
|
|
if dataType == jsonparser.Object {
|
|
data, err := _parse_json_map_int(value)
|
|
if err != nil {
|
|
log.Fatalf("%s: Cannot parse merged slot %s: %v", sequence.Id(), skey, err)
|
|
} else {
|
|
annotations[skey] = obiseq.MapAsStatsOnValues(data)
|
|
}
|
|
} else {
|
|
log.Fatalf("%s: Cannot parse merged slot %s", sequence.Id(), skey)
|
|
}
|
|
|
|
case skey == "taxid":
|
|
if dataType == jsonparser.Number || dataType == jsonparser.String {
|
|
taxid := string(value)
|
|
sequence.SetTaxid(taxid)
|
|
} else {
|
|
log.Fatalf("%s: Cannot parse taxid %s", sequence.Id(), string(value))
|
|
}
|
|
|
|
case strings.HasSuffix(skey, "_taxid"):
|
|
if dataType == jsonparser.Number || dataType == jsonparser.String {
|
|
rank, _ := obiutils.SplitInTwo(skey, '_')
|
|
|
|
taxid := string(value)
|
|
sequence.SetTaxid(taxid, rank)
|
|
} else {
|
|
log.Fatalf("%s: Cannot parse taxid %s", sequence.Id(), string(value))
|
|
}
|
|
|
|
default:
|
|
skey = strings.Clone(skey)
|
|
switch dataType {
|
|
case jsonparser.String:
|
|
annotations[skey] = string(value)
|
|
case jsonparser.Number:
|
|
// Try to parse the number as an int at first then as float if that fails.
|
|
annotations[skey], err = jsonparser.ParseInt(value)
|
|
if err != nil {
|
|
annotations[skey], err = strconv.ParseFloat(obiutils.UnsafeString(value), 64)
|
|
}
|
|
case jsonparser.Array:
|
|
annotations[skey], err = _parse_json_array_interface(value)
|
|
case jsonparser.Object:
|
|
annotations[skey], err = _parse_json_map_interface(value)
|
|
case jsonparser.Boolean:
|
|
annotations[skey], err = jsonparser.ParseBoolean(value)
|
|
case jsonparser.Null:
|
|
annotations[skey] = nil
|
|
default:
|
|
log.Fatalf("Unknown data type %v", dataType)
|
|
}
|
|
}
|
|
|
|
if err != nil {
|
|
annotations[skey] = "NaN"
|
|
log.Fatalf("%s: Cannot parse value %s assicated to key %s into a %s value",
|
|
sequence.Id(), string(value), skey, dataType.String())
|
|
}
|
|
|
|
return err
|
|
},
|
|
)
|
|
|
|
// err := json.Unmarshal([]byte(header)[start:stop], &annotations)
|
|
|
|
// for k, v := range annotations {
|
|
// switch vt := v.(type) {
|
|
// case float64:
|
|
// if vt == math.Floor(vt) {
|
|
// annotations[k] = int(vt)
|
|
// }
|
|
// {
|
|
// annotations[k] = vt
|
|
// }
|
|
// }
|
|
// }
|
|
|
|
// if err != nil {
|
|
// log.Fatalf("annotation parsing error on %s : %v\n", header, err)
|
|
// }
|
|
|
|
return strings.TrimSpace(header[stop:])
|
|
}
|
|
|
|
func ParseFastSeqJsonHeader(sequence *obiseq.BioSequence) {
|
|
definition := sequence.Definition()
|
|
sequence.SetDefinition("")
|
|
|
|
definition_part := _parse_json_header_(
|
|
definition,
|
|
sequence,
|
|
)
|
|
|
|
if len(definition_part) > 0 {
|
|
if sequence.HasDefinition() {
|
|
definition_part = sequence.Definition() + " " + definition_part
|
|
}
|
|
sequence.SetDefinition(definition_part)
|
|
}
|
|
}
|
|
|
|
func WriteFastSeqJsonHeader(buffer *bytes.Buffer, sequence *obiseq.BioSequence) {
|
|
|
|
annotations := sequence.Annotations()
|
|
|
|
if len(annotations) > 0 {
|
|
err := obiutils.JsonMarshalByteBuffer(buffer, sequence.Annotations())
|
|
|
|
if err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
}
|
|
}
|
|
|
|
func FormatFastSeqJsonHeader(sequence *obiseq.BioSequence) string {
|
|
annotations := sequence.Annotations()
|
|
buffer := bytes.Buffer{}
|
|
|
|
if len(annotations) > 0 {
|
|
obiutils.JsonMarshalByteBuffer(&buffer, sequence.Annotations())
|
|
return unsafe.String(unsafe.SliceData(buffer.Bytes()), len(buffer.Bytes()))
|
|
}
|
|
|
|
return ""
|
|
}
|