mirror of
https://github.com/metabarcoding/obitools4.git
synced 2026-06-24 01:31:00 +00:00
feat: support map and slice types in OBI attributes
Extends OBI header parsing to recognize and deserialize JSON-like arrays and objects. Introduces safe conversion utilities in `obiutils` to cast generic interface values into typed maps, and exposes them via new `BioSequence` methods. Header values are now marshaled, quote-normalized, and formatted for map and slice types.
This commit is contained in:
@@ -146,6 +146,65 @@ func __match__key__(text []byte) []int {
|
||||
return []int{} // Not a key
|
||||
}
|
||||
|
||||
func __match__array__(text []byte) []int {
|
||||
|
||||
state := 0
|
||||
level := 0
|
||||
start := 0
|
||||
instring := byte(0)
|
||||
|
||||
for i, r := range text {
|
||||
if state == 2 {
|
||||
if r == ';' {
|
||||
return []int{start, i + 1}
|
||||
}
|
||||
if r != ' ' && r != '\t' {
|
||||
return []int{}
|
||||
}
|
||||
}
|
||||
|
||||
if state == 0 {
|
||||
if r == '[' {
|
||||
level++
|
||||
state++
|
||||
start = i
|
||||
continue
|
||||
}
|
||||
if r != ' ' && r != '\t' {
|
||||
return []int{}
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// state == 1: inside the array
|
||||
if instring != 0 {
|
||||
if r == instring {
|
||||
instring = 0
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if r == '"' || r == '\'' {
|
||||
instring = r
|
||||
continue
|
||||
}
|
||||
|
||||
if r == '[' || r == '{' {
|
||||
level++
|
||||
continue
|
||||
}
|
||||
|
||||
if r == ']' || r == '}' {
|
||||
level--
|
||||
if level == 0 {
|
||||
state++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return []int{}
|
||||
}
|
||||
|
||||
func __match__general__(text []byte) []int {
|
||||
|
||||
for i, r := range text {
|
||||
@@ -242,6 +301,21 @@ func ParseOBIFeatures(text string, annotations obiseq.Annotation) string {
|
||||
stop = m[1] + 1
|
||||
} else {
|
||||
|
||||
// array value
|
||||
m = __match__array__(part)
|
||||
if len(m) > 0 {
|
||||
bvalue = bytes.TrimSpace(part[m[0]:(m[1] - 1)])
|
||||
j := bytes.ReplaceAll(bvalue, []byte("'"), []byte(`"`))
|
||||
j = __obi_header_map_int_key__.ReplaceAll(j, []byte(`$1"$2":`))
|
||||
arr, err := _parse_json_array_interface(j)
|
||||
if err != nil {
|
||||
value = string(bvalue)
|
||||
} else {
|
||||
value = arr
|
||||
}
|
||||
stop = m[1] + 1
|
||||
} else {
|
||||
|
||||
// Generic value
|
||||
|
||||
// m = __obi_header_value_general_pattern__.FindIndex(part)
|
||||
@@ -264,6 +338,7 @@ func ParseOBIFeatures(text string, annotations obiseq.Annotation) string {
|
||||
// no value
|
||||
break
|
||||
} // End of No value
|
||||
} // End of not array
|
||||
} // End of not dict
|
||||
} // End of not string
|
||||
} // End of not numeric
|
||||
@@ -327,9 +402,8 @@ func WriteFastSeqOBIHeade(buffer *bytes.Buffer, sequence *obiseq.BioSequence) {
|
||||
buffer.WriteString(fmt.Sprintf("%s=", key))
|
||||
buffer.Write(tv)
|
||||
buffer.WriteString("; ")
|
||||
case map[string]int,
|
||||
map[string]string,
|
||||
map[string]interface{}:
|
||||
default:
|
||||
if obiutils.IsAMap(value) || obiutils.IsASlice(value) || obiutils.IsAnArray(value) {
|
||||
tv, err := obiutils.JsonMarshal(t)
|
||||
if err != nil {
|
||||
log.Fatalf("Cannot convert %v value", value)
|
||||
@@ -338,11 +412,12 @@ func WriteFastSeqOBIHeade(buffer *bytes.Buffer, sequence *obiseq.BioSequence) {
|
||||
buffer.WriteString(fmt.Sprintf("%s=", key))
|
||||
buffer.Write(tv)
|
||||
buffer.WriteString("; ")
|
||||
default:
|
||||
} else {
|
||||
buffer.WriteString(fmt.Sprintf("%s=%v; ", key, value))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if sequence.HasDefinition() {
|
||||
buffer.WriteByte(' ')
|
||||
|
||||
@@ -364,6 +364,24 @@ func (s *BioSequence) GetIntSlice(key string) ([]int, bool) {
|
||||
return val, ok
|
||||
}
|
||||
|
||||
func (s *BioSequence) GetMapOfIntSlice(key string) (map[string][]int, bool) {
|
||||
v, ok := s.GetAttribute(key)
|
||||
if !ok {
|
||||
return nil, false
|
||||
}
|
||||
val, err := obiutils.InterfaceToMapOfIntSlice(v)
|
||||
return val, err == nil
|
||||
}
|
||||
|
||||
func (s *BioSequence) GetMapOfStringSlice(key string) (map[string][]string, bool) {
|
||||
v, ok := s.GetAttribute(key)
|
||||
if !ok {
|
||||
return nil, false
|
||||
}
|
||||
val, err := obiutils.InterfaceToMapOfStringSlice(v)
|
||||
return val, err == nil
|
||||
}
|
||||
|
||||
// Count returns the value of the "count" attribute of the BioSequence.
|
||||
//
|
||||
// The count of a sequence is the number of times it has been observed in the dataset.
|
||||
|
||||
@@ -103,7 +103,7 @@ func TestNewBioSequence(t *testing.T) {
|
||||
// Return type: None.
|
||||
func TestNewBioSequenceWithQualities(t *testing.T) {
|
||||
id := "123"
|
||||
sequence := []byte("ATGC")
|
||||
sequence := []byte("atgc")
|
||||
definition := "DNA sequence"
|
||||
qualities := []byte("1234")
|
||||
|
||||
|
||||
@@ -276,6 +276,44 @@ func InterfaceToStringMap(i interface{}) (val map[string]string, err error) {
|
||||
return
|
||||
}
|
||||
|
||||
func InterfaceToMapOfIntSlice(i interface{}) (val map[string][]int, err error) {
|
||||
err = nil
|
||||
switch m := i.(type) {
|
||||
case map[string][]int:
|
||||
val = m
|
||||
case map[string]interface{}:
|
||||
val = make(map[string][]int, len(m))
|
||||
for k, v := range m {
|
||||
val[k], err = InterfaceToIntSlice(v)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
default:
|
||||
err = &NotAMapInt{"value attribute cannot be casted to a map[string][]int"}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func InterfaceToMapOfStringSlice(i interface{}) (val map[string][]string, err error) {
|
||||
err = nil
|
||||
switch m := i.(type) {
|
||||
case map[string][]string:
|
||||
val = m
|
||||
case map[string]interface{}:
|
||||
val = make(map[string][]string, len(m))
|
||||
for k, v := range m {
|
||||
val[k], err = InterfaceToStringSlice(v)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
default:
|
||||
err = &NotAMapInt{"value attribute cannot be casted to a map[string][]string"}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func InterfaceToStringSlice(i interface{}) (val []string, err error) {
|
||||
err = nil
|
||||
|
||||
|
||||
Reference in New Issue
Block a user