From dcdaf9e372ccca750b3b38f19cec2b76dc38c3bd Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Mon, 1 Jun 2026 13:18:44 +0200 Subject: [PATCH] feat: support map and slice types in OBI attributes Extends OBI header parsing to recognize and deserialize JSON-like arrays and objects. Introduces safe conversion utilities in `obiutils` to cast generic interface values into typed maps, and exposes them via new `BioSequence` methods. Header values are now marshaled, quote-normalized, and formatted for map and slice types. --- pkg/obiformats/fastseq_obi_header.go | 99 ++++++++++++++++++++++++---- pkg/obiseq/attributes.go | 18 +++++ pkg/obiseq/biosequence_test.go | 2 +- pkg/obiutils/cast_interface.go | 38 +++++++++++ 4 files changed, 144 insertions(+), 13 deletions(-) diff --git a/pkg/obiformats/fastseq_obi_header.go b/pkg/obiformats/fastseq_obi_header.go index 79a45b1..fb2fb78 100644 --- a/pkg/obiformats/fastseq_obi_header.go +++ b/pkg/obiformats/fastseq_obi_header.go @@ -146,6 +146,65 @@ func __match__key__(text []byte) []int { return []int{} // Not a key } +func __match__array__(text []byte) []int { + + state := 0 + level := 0 + start := 0 + instring := byte(0) + + for i, r := range text { + if state == 2 { + if r == ';' { + return []int{start, i + 1} + } + if r != ' ' && r != '\t' { + return []int{} + } + } + + if state == 0 { + if r == '[' { + level++ + state++ + start = i + continue + } + if r != ' ' && r != '\t' { + return []int{} + } + continue + } + + // state == 1: inside the array + if instring != 0 { + if r == instring { + instring = 0 + } + continue + } + + if r == '"' || r == '\'' { + instring = r + continue + } + + if r == '[' || r == '{' { + level++ + continue + } + + if r == ']' || r == '}' { + level-- + if level == 0 { + state++ + } + } + } + + return []int{} +} + func __match__general__(text []byte) []int { for i, r := range text { @@ -242,6 +301,21 @@ func ParseOBIFeatures(text string, annotations obiseq.Annotation) string { stop = m[1] + 1 } else { + // array value + m = __match__array__(part) + if len(m) > 0 { + bvalue = bytes.TrimSpace(part[m[0]:(m[1] - 1)]) + j := bytes.ReplaceAll(bvalue, []byte("'"), []byte(`"`)) + j = __obi_header_map_int_key__.ReplaceAll(j, []byte(`$1"$2":`)) + arr, err := _parse_json_array_interface(j) + if err != nil { + value = string(bvalue) + } else { + value = arr + } + stop = m[1] + 1 + } else { + // Generic value // m = __obi_header_value_general_pattern__.FindIndex(part) @@ -264,6 +338,7 @@ func ParseOBIFeatures(text string, annotations obiseq.Annotation) string { // no value break } // End of No value + } // End of not array } // End of not dict } // End of not string } // End of not numeric @@ -327,19 +402,19 @@ func WriteFastSeqOBIHeade(buffer *bytes.Buffer, sequence *obiseq.BioSequence) { buffer.WriteString(fmt.Sprintf("%s=", key)) buffer.Write(tv) buffer.WriteString("; ") - case map[string]int, - map[string]string, - map[string]interface{}: - tv, err := obiutils.JsonMarshal(t) - if err != nil { - log.Fatalf("Cannot convert %v value", value) - } - tv = bytes.ReplaceAll(tv, []byte(`"`), []byte("'")) - buffer.WriteString(fmt.Sprintf("%s=", key)) - buffer.Write(tv) - buffer.WriteString("; ") default: - buffer.WriteString(fmt.Sprintf("%s=%v; ", key, value)) + if obiutils.IsAMap(value) || obiutils.IsASlice(value) || obiutils.IsAnArray(value) { + tv, err := obiutils.JsonMarshal(t) + if err != nil { + log.Fatalf("Cannot convert %v value", value) + } + tv = bytes.ReplaceAll(tv, []byte(`"`), []byte("'")) + buffer.WriteString(fmt.Sprintf("%s=", key)) + buffer.Write(tv) + buffer.WriteString("; ") + } else { + buffer.WriteString(fmt.Sprintf("%s=%v; ", key, value)) + } } } } diff --git a/pkg/obiseq/attributes.go b/pkg/obiseq/attributes.go index dd4699f..d4d6884 100644 --- a/pkg/obiseq/attributes.go +++ b/pkg/obiseq/attributes.go @@ -364,6 +364,24 @@ func (s *BioSequence) GetIntSlice(key string) ([]int, bool) { return val, ok } +func (s *BioSequence) GetMapOfIntSlice(key string) (map[string][]int, bool) { + v, ok := s.GetAttribute(key) + if !ok { + return nil, false + } + val, err := obiutils.InterfaceToMapOfIntSlice(v) + return val, err == nil +} + +func (s *BioSequence) GetMapOfStringSlice(key string) (map[string][]string, bool) { + v, ok := s.GetAttribute(key) + if !ok { + return nil, false + } + val, err := obiutils.InterfaceToMapOfStringSlice(v) + return val, err == nil +} + // Count returns the value of the "count" attribute of the BioSequence. // // The count of a sequence is the number of times it has been observed in the dataset. diff --git a/pkg/obiseq/biosequence_test.go b/pkg/obiseq/biosequence_test.go index 5e51203..8aae1d6 100644 --- a/pkg/obiseq/biosequence_test.go +++ b/pkg/obiseq/biosequence_test.go @@ -103,7 +103,7 @@ func TestNewBioSequence(t *testing.T) { // Return type: None. func TestNewBioSequenceWithQualities(t *testing.T) { id := "123" - sequence := []byte("ATGC") + sequence := []byte("atgc") definition := "DNA sequence" qualities := []byte("1234") diff --git a/pkg/obiutils/cast_interface.go b/pkg/obiutils/cast_interface.go index 0218ffd..b82e4fe 100644 --- a/pkg/obiutils/cast_interface.go +++ b/pkg/obiutils/cast_interface.go @@ -276,6 +276,44 @@ func InterfaceToStringMap(i interface{}) (val map[string]string, err error) { return } +func InterfaceToMapOfIntSlice(i interface{}) (val map[string][]int, err error) { + err = nil + switch m := i.(type) { + case map[string][]int: + val = m + case map[string]interface{}: + val = make(map[string][]int, len(m)) + for k, v := range m { + val[k], err = InterfaceToIntSlice(v) + if err != nil { + return + } + } + default: + err = &NotAMapInt{"value attribute cannot be casted to a map[string][]int"} + } + return +} + +func InterfaceToMapOfStringSlice(i interface{}) (val map[string][]string, err error) { + err = nil + switch m := i.(type) { + case map[string][]string: + val = m + case map[string]interface{}: + val = make(map[string][]string, len(m)) + for k, v := range m { + val[k], err = InterfaceToStringSlice(v) + if err != nil { + return + } + } + default: + err = &NotAMapInt{"value attribute cannot be casted to a map[string][]string"} + } + return +} + func InterfaceToStringSlice(i interface{}) (val []string, err error) { err = nil