feat: support map and slice types in OBI attributes

Extends OBI header parsing to recognize and deserialize JSON-like arrays and objects. Introduces safe conversion utilities in `obiutils` to cast generic interface values into typed maps, and exposes them via new `BioSequence` methods. Header values are now marshaled, quote-normalized, and formatted for map and slice types.
This commit is contained in:
Eric Coissac
2026-06-01 13:18:44 +02:00
parent af7ae3d60c
commit dcdaf9e372
4 changed files with 144 additions and 13 deletions
+87 -12
View File
@@ -146,6 +146,65 @@ func __match__key__(text []byte) []int {
return []int{} // Not a key return []int{} // Not a key
} }
func __match__array__(text []byte) []int {
state := 0
level := 0
start := 0
instring := byte(0)
for i, r := range text {
if state == 2 {
if r == ';' {
return []int{start, i + 1}
}
if r != ' ' && r != '\t' {
return []int{}
}
}
if state == 0 {
if r == '[' {
level++
state++
start = i
continue
}
if r != ' ' && r != '\t' {
return []int{}
}
continue
}
// state == 1: inside the array
if instring != 0 {
if r == instring {
instring = 0
}
continue
}
if r == '"' || r == '\'' {
instring = r
continue
}
if r == '[' || r == '{' {
level++
continue
}
if r == ']' || r == '}' {
level--
if level == 0 {
state++
}
}
}
return []int{}
}
func __match__general__(text []byte) []int { func __match__general__(text []byte) []int {
for i, r := range text { for i, r := range text {
@@ -242,6 +301,21 @@ func ParseOBIFeatures(text string, annotations obiseq.Annotation) string {
stop = m[1] + 1 stop = m[1] + 1
} else { } else {
// array value
m = __match__array__(part)
if len(m) > 0 {
bvalue = bytes.TrimSpace(part[m[0]:(m[1] - 1)])
j := bytes.ReplaceAll(bvalue, []byte("'"), []byte(`"`))
j = __obi_header_map_int_key__.ReplaceAll(j, []byte(`$1"$2":`))
arr, err := _parse_json_array_interface(j)
if err != nil {
value = string(bvalue)
} else {
value = arr
}
stop = m[1] + 1
} else {
// Generic value // Generic value
// m = __obi_header_value_general_pattern__.FindIndex(part) // m = __obi_header_value_general_pattern__.FindIndex(part)
@@ -264,6 +338,7 @@ func ParseOBIFeatures(text string, annotations obiseq.Annotation) string {
// no value // no value
break break
} // End of No value } // End of No value
} // End of not array
} // End of not dict } // End of not dict
} // End of not string } // End of not string
} // End of not numeric } // End of not numeric
@@ -327,19 +402,19 @@ func WriteFastSeqOBIHeade(buffer *bytes.Buffer, sequence *obiseq.BioSequence) {
buffer.WriteString(fmt.Sprintf("%s=", key)) buffer.WriteString(fmt.Sprintf("%s=", key))
buffer.Write(tv) buffer.Write(tv)
buffer.WriteString("; ") buffer.WriteString("; ")
case map[string]int,
map[string]string,
map[string]interface{}:
tv, err := obiutils.JsonMarshal(t)
if err != nil {
log.Fatalf("Cannot convert %v value", value)
}
tv = bytes.ReplaceAll(tv, []byte(`"`), []byte("'"))
buffer.WriteString(fmt.Sprintf("%s=", key))
buffer.Write(tv)
buffer.WriteString("; ")
default: default:
buffer.WriteString(fmt.Sprintf("%s=%v; ", key, value)) if obiutils.IsAMap(value) || obiutils.IsASlice(value) || obiutils.IsAnArray(value) {
tv, err := obiutils.JsonMarshal(t)
if err != nil {
log.Fatalf("Cannot convert %v value", value)
}
tv = bytes.ReplaceAll(tv, []byte(`"`), []byte("'"))
buffer.WriteString(fmt.Sprintf("%s=", key))
buffer.Write(tv)
buffer.WriteString("; ")
} else {
buffer.WriteString(fmt.Sprintf("%s=%v; ", key, value))
}
} }
} }
} }
+18
View File
@@ -364,6 +364,24 @@ func (s *BioSequence) GetIntSlice(key string) ([]int, bool) {
return val, ok return val, ok
} }
func (s *BioSequence) GetMapOfIntSlice(key string) (map[string][]int, bool) {
v, ok := s.GetAttribute(key)
if !ok {
return nil, false
}
val, err := obiutils.InterfaceToMapOfIntSlice(v)
return val, err == nil
}
func (s *BioSequence) GetMapOfStringSlice(key string) (map[string][]string, bool) {
v, ok := s.GetAttribute(key)
if !ok {
return nil, false
}
val, err := obiutils.InterfaceToMapOfStringSlice(v)
return val, err == nil
}
// Count returns the value of the "count" attribute of the BioSequence. // Count returns the value of the "count" attribute of the BioSequence.
// //
// The count of a sequence is the number of times it has been observed in the dataset. // The count of a sequence is the number of times it has been observed in the dataset.
+1 -1
View File
@@ -103,7 +103,7 @@ func TestNewBioSequence(t *testing.T) {
// Return type: None. // Return type: None.
func TestNewBioSequenceWithQualities(t *testing.T) { func TestNewBioSequenceWithQualities(t *testing.T) {
id := "123" id := "123"
sequence := []byte("ATGC") sequence := []byte("atgc")
definition := "DNA sequence" definition := "DNA sequence"
qualities := []byte("1234") qualities := []byte("1234")
+38
View File
@@ -276,6 +276,44 @@ func InterfaceToStringMap(i interface{}) (val map[string]string, err error) {
return return
} }
func InterfaceToMapOfIntSlice(i interface{}) (val map[string][]int, err error) {
err = nil
switch m := i.(type) {
case map[string][]int:
val = m
case map[string]interface{}:
val = make(map[string][]int, len(m))
for k, v := range m {
val[k], err = InterfaceToIntSlice(v)
if err != nil {
return
}
}
default:
err = &NotAMapInt{"value attribute cannot be casted to a map[string][]int"}
}
return
}
func InterfaceToMapOfStringSlice(i interface{}) (val map[string][]string, err error) {
err = nil
switch m := i.(type) {
case map[string][]string:
val = m
case map[string]interface{}:
val = make(map[string][]string, len(m))
for k, v := range m {
val[k], err = InterfaceToStringSlice(v)
if err != nil {
return
}
}
default:
err = &NotAMapInt{"value attribute cannot be casted to a map[string][]string"}
}
return
}
func InterfaceToStringSlice(i interface{}) (val []string, err error) { func InterfaceToStringSlice(i interface{}) (val []string, err error) {
err = nil err = nil