Compare commits

..

10 Commits

Author SHA1 Message Date
Eric Coissac e9210e28a3 Release 4.4.44 2026-06-02 14:40:09 +02:00
Eric Coissac 13a93fce11 feat: add which_max and which_min to retrieve extreme element indices
Implement reflection-based WhichMax and WhichMin to dynamically find the index or key of the maximum/minimum element in slices, arrays, or maps. Functions validate orderability, handle empty collections, and dispatch via reflect.Kind. Expose as which_max and which_min GVal functions, with float64 type assertions for compatibility and preserved error handling.
2026-06-02 14:39:31 +02:00
Eric Coissac 14064c919e fix(obiutils): correctly unwrap interface values in min/max
Introduces an `unwrapInterface` reflection helper to dereference `interface{}`-wrapped values before type validation. Updates slice and map iteration loops in min/max functions to apply this helper, ensuring `isOrderedKind` accurately identifies underlying concrete types instead of incorrectly rejecting `reflect.Interface` elements.
2026-06-02 14:34:00 +02:00
coissac 1dfd68aa6d Merge pull request #117 from metabarcoding/push-wvlmzvomslzv
Release 4.4.43
2026-06-01 14:14:40 +02:00
Eric Coissac 930fe5f1ba Release 4.4.43 2026-06-01 13:22:58 +02:00
Eric Coissac dcdaf9e372 feat: support map and slice types in OBI attributes
Extends OBI header parsing to recognize and deserialize JSON-like arrays and objects. Introduces safe conversion utilities in `obiutils` to cast generic interface values into typed maps, and exposes them via new `BioSequence` methods. Header values are now marshaled, quote-normalized, and formatted for map and slice types.
2026-06-01 13:21:11 +02:00
Eric Coissac af7ae3d60c Correct Shannon entropy bias for canonical k-mers
Multiple raw k-mers collapsing into identical circular canonical forms introduce bias into complexity estimates. This change pre-computes `log(class_size)` tables and per-word-size maximum entropy bounds. The `KmerEntropy` function and `KmerEntropyFilter` are updated to apply the corrected formula `(log(N) + Σf·log(s) - Σf·log(f))/N / emax`, ensuring accurate sequence complexity estimation.
2026-05-17 14:54:57 +08:00
Eric Coissac cecf90fa40 feat: add min/max filtering and saturating subtraction utilities
Introduce generic and reflection-based utilities for filtering slices and maps by minimum/maximum thresholds, along with saturating subtraction. The `obiutils` package provides type-safe generic implementations alongside dynamic reflection dispatchers to handle arbitrary ordered and numeric types. These are exposed as GVAL expression functions in `obiseq`, extending the language's built-in filtering and numeric capabilities.
2026-05-14 20:58:24 +08:00
Eric Coissac a186bd1c92 fix: validate non-empty sequence IDs in FASTA and FASTQ writers
Adds a pre-processing guard that checks for empty sequence identifiers before formatting. This prevents malformed FASTA output and stops downstream processing of invalid FASTQ data by terminating early. The check is placed before existing sequence-length validations to enforce non-empty IDs during batch processing.
2026-05-05 18:07:58 +02:00
coissac 46d60c1a44 Merge pull request #115 from metabarcoding/push-lkzqoskvyqtr
[4.4.2] Enhanced taxonomy handling, input robustness & PCR tag validation
2026-04-30 16:59:49 +02:00
11 changed files with 634 additions and 74 deletions
+79 -4
View File
@@ -146,6 +146,65 @@ func __match__key__(text []byte) []int {
return []int{} // Not a key
}
func __match__array__(text []byte) []int {
state := 0
level := 0
start := 0
instring := byte(0)
for i, r := range text {
if state == 2 {
if r == ';' {
return []int{start, i + 1}
}
if r != ' ' && r != '\t' {
return []int{}
}
}
if state == 0 {
if r == '[' {
level++
state++
start = i
continue
}
if r != ' ' && r != '\t' {
return []int{}
}
continue
}
// state == 1: inside the array
if instring != 0 {
if r == instring {
instring = 0
}
continue
}
if r == '"' || r == '\'' {
instring = r
continue
}
if r == '[' || r == '{' {
level++
continue
}
if r == ']' || r == '}' {
level--
if level == 0 {
state++
}
}
}
return []int{}
}
func __match__general__(text []byte) []int {
for i, r := range text {
@@ -242,6 +301,21 @@ func ParseOBIFeatures(text string, annotations obiseq.Annotation) string {
stop = m[1] + 1
} else {
// array value
m = __match__array__(part)
if len(m) > 0 {
bvalue = bytes.TrimSpace(part[m[0]:(m[1] - 1)])
j := bytes.ReplaceAll(bvalue, []byte("'"), []byte(`"`))
j = __obi_header_map_int_key__.ReplaceAll(j, []byte(`$1"$2":`))
arr, err := _parse_json_array_interface(j)
if err != nil {
value = string(bvalue)
} else {
value = arr
}
stop = m[1] + 1
} else {
// Generic value
// m = __obi_header_value_general_pattern__.FindIndex(part)
@@ -264,6 +338,7 @@ func ParseOBIFeatures(text string, annotations obiseq.Annotation) string {
// no value
break
} // End of No value
} // End of not array
} // End of not dict
} // End of not string
} // End of not numeric
@@ -327,9 +402,8 @@ func WriteFastSeqOBIHeade(buffer *bytes.Buffer, sequence *obiseq.BioSequence) {
buffer.WriteString(fmt.Sprintf("%s=", key))
buffer.Write(tv)
buffer.WriteString("; ")
case map[string]int,
map[string]string,
map[string]interface{}:
default:
if obiutils.IsAMap(value) || obiutils.IsASlice(value) || obiutils.IsAnArray(value) {
tv, err := obiutils.JsonMarshal(t)
if err != nil {
log.Fatalf("Cannot convert %v value", value)
@@ -338,11 +412,12 @@ func WriteFastSeqOBIHeade(buffer *bytes.Buffer, sequence *obiseq.BioSequence) {
buffer.WriteString(fmt.Sprintf("%s=", key))
buffer.Write(tv)
buffer.WriteString("; ")
default:
} else {
buffer.WriteString(fmt.Sprintf("%s=%v; ", key, value))
}
}
}
}
if sequence.HasDefinition() {
buffer.WriteByte(' ')
+3
View File
@@ -90,6 +90,9 @@ func FormatFastaBatch(batch obiiter.BioSequenceBatch, formater FormatHeader, ski
log.Debugf("FormatFastaBatch: #%d : %d seqs", batch.Order(), batch.Len())
for _, seq := range batch.Slice() {
if len(seq.Id()) == 0 {
log.Fatalf("Sequence identifier is empty")
}
if seq.Len() > 0 {
// Write header directly into bs — no intermediate string
bs.WriteByte('>')
+3
View File
@@ -64,6 +64,9 @@ func FormatFastqBatch(batch obiiter.BioSequenceBatch,
first := true
for _, seq := range batch.Slice() {
if len(seq.Id()) == 0 {
log.Fatalf("Sequence identifier is empty")
}
if seq.Len() > 0 {
_formatFastq(&bs, seq, formater)
+75 -40
View File
@@ -4,22 +4,21 @@ import "math"
// KmerEntropy computes the entropy of a single encoded k-mer.
//
// The algorithm mirrors the lowmask entropy calculation: it decodes the k-mer
// The algorithm mirrors the Rust obiskbuilder entropy: it decodes the k-mer
// to a DNA sequence, extracts all sub-words of each size from 1 to levelMax,
// normalizes them by circular canonical form, counts their frequencies, and
// computes Shannon entropy normalized by the maximum possible entropy.
// computes Shannon entropy corrected for class sizes, normalized by the
// maximum possible entropy over 4^ws raw bins.
// The returned value is the minimum entropy across all word sizes.
//
// Correction for small sequences: the raw entropy H = log(N) - Σ f·log(f)/N
// under-estimates the true complexity when many raw words collapse to the same
// canonical form. Adding Σ f·log(class_size)/N recovers the entropy of the
// underlying uncollapsed distribution (assuming uniform mixing within each
// equivalence class).
//
// A value close to 0 indicates very low complexity (e.g. "AAAA..."),
// while a value close to 1 indicates high complexity.
//
// Parameters:
// - kmer: the encoded k-mer (2 bits per base)
// - k: the k-mer size
// - levelMax: maximum sub-word size for entropy (typically 6)
//
// Returns:
// - minimum normalized entropy across all word sizes 1..levelMax
func KmerEntropy(kmer uint64, k int, levelMax int) float64 {
if k < 1 || levelMax < 1 {
return 1.0
@@ -35,7 +34,7 @@ func KmerEntropy(kmer uint64, k int, levelMax int) float64 {
var seqBuf [32]byte
seq := DecodeKmer(kmer, k, seqBuf[:])
// Pre-compute nLogN lookup (same as lowmask)
// Pre-compute nLogN lookup
nLogN := make([]float64, k+1)
for i := 1; i <= k; i++ {
nLogN[i] = float64(i) * math.Log(float64(i))
@@ -51,6 +50,23 @@ func KmerEntropy(kmer uint64, k int, levelMax int) float64 {
}
}
// Build ln(class_size) tables: for each canonical form, how many raw
// words map to it under circular normalization.
classLogSizeTables := make([][]float64, levelMax+1)
for ws := 1; ws <= levelMax; ws++ {
tableSize := 1 << (ws * 2)
classSize := make([]int, tableSize)
for code := 0; code < tableSize; code++ {
classSize[normTables[ws][code]]++
}
classLogSizeTables[ws] = make([]float64, tableSize)
for j := 0; j < tableSize; j++ {
if classSize[j] > 0 {
classLogSizeTables[ws][j] = math.Log(float64(classSize[j]))
}
}
}
minEntropy := math.MaxFloat64
for ws := 1; ws <= levelMax; ws++ {
@@ -75,23 +91,13 @@ func KmerEntropy(kmer uint64, k int, levelMax int) float64 {
table[normWord]++
}
// Compute Shannon entropy
// Compute emax over 4^ws raw bins (uncollapsed distribution).
floatNwords := float64(nwords)
logNwords := math.Log(floatNwords)
var sumNLogN float64
for j := 0; j < tableSize; j++ {
n := table[j]
if n > 0 {
sumNLogN += nLogN[n]
}
}
// Compute emax (maximum possible entropy for this word size)
na := CanonicalCircularKmerCount(ws)
na := tableSize // 4^ws
var emax float64
if nwords < na {
emax = math.Log(float64(nwords))
emax = logNwords
} else {
cov := nwords / na
remains := nwords - (na * cov)
@@ -105,7 +111,19 @@ func KmerEntropy(kmer uint64, k int, levelMax int) float64 {
continue
}
entropy := (logNwords - sumNLogN/floatNwords) / emax
// Accumulate Σ f·log(f) and Σ f·log(class_size) over canonical forms.
classLogSize := classLogSizeTables[ws]
var sumNLogN, sumClassLogN float64
for j := 0; j < tableSize; j++ {
n := table[j]
if n > 0 {
sumNLogN += nLogN[n]
sumClassLogN += float64(n) * classLogSize[j]
}
}
// Corrected entropy: H_raw ≈ log(N) + (Σf·log(s) - Σf·log(f)) / N
entropy := (logNwords + sumClassLogN/floatNwords - sumNLogN/floatNwords) / emax
if entropy < 0 {
entropy = 0
}
@@ -134,6 +152,7 @@ type KmerEntropyFilter struct {
threshold float64
nLogN []float64
normTables [][]int
classLogSizeTables [][]float64
emaxValues []float64
logNwords []float64
// Pre-allocated frequency tables reused across Entropy() calls.
@@ -142,11 +161,6 @@ type KmerEntropyFilter struct {
}
// NewKmerEntropyFilter creates an entropy filter with pre-computed tables.
//
// Parameters:
// - k: the k-mer size
// - levelMax: maximum sub-word size for entropy (typically 6)
// - threshold: entropy threshold (k-mers with entropy <= threshold are rejected)
func NewKmerEntropyFilter(k, levelMax int, threshold float64) *KmerEntropyFilter {
if levelMax >= k {
levelMax = k - 1
@@ -169,20 +183,38 @@ func NewKmerEntropyFilter(k, levelMax int, threshold float64) *KmerEntropyFilter
}
}
// ln(class_size) for each canonical form under circular normalization.
classLogSizeTables := make([][]float64, levelMax+1)
for ws := 1; ws <= levelMax; ws++ {
tableSize := 1 << (ws * 2)
classSize := make([]int, tableSize)
for code := 0; code < tableSize; code++ {
classSize[normTables[ws][code]]++
}
classLogSizeTables[ws] = make([]float64, tableSize)
for j := 0; j < tableSize; j++ {
if classSize[j] > 0 {
classLogSizeTables[ws][j] = math.Log(float64(classSize[j]))
}
}
}
// Pre-compute emax and logNwords per word size.
// emax uses 4^ws raw bins to match the corrected entropy.
emaxValues := make([]float64, levelMax+1)
logNwords := make([]float64, levelMax+1)
for ws := 1; ws <= levelMax; ws++ {
nw := k - ws + 1
na := CanonicalCircularKmerCount(ws)
na := 1 << (ws * 2) // 4^ws raw bins
floatNw := float64(nw)
logNwords[ws] = math.Log(floatNw)
if nw < na {
logNwords[ws] = math.Log(float64(nw))
emaxValues[ws] = math.Log(float64(nw))
emaxValues[ws] = logNwords[ws]
} else {
cov := nw / na
remains := nw - (na * cov)
f1 := float64(cov) / float64(nw)
f2 := float64(cov+1) / float64(nw)
logNwords[ws] = math.Log(float64(nw))
f1 := float64(cov) / floatNw
f2 := float64(cov+1) / floatNw
emaxValues[ws] = -(float64(na-remains)*f1*math.Log(f1) +
float64(remains)*f2*math.Log(f2))
}
@@ -200,6 +232,7 @@ func NewKmerEntropyFilter(k, levelMax int, threshold float64) *KmerEntropyFilter
threshold: threshold,
nLogN: nLogN,
normTables: normTables,
classLogSizeTables: classLogSizeTables,
emaxValues: emaxValues,
logNwords: logNwords,
freqTables: freqTables,
@@ -236,7 +269,7 @@ func (ef *KmerEntropyFilter) Entropy(kmer uint64) float64 {
// Count circular-canonical sub-word frequencies
tableSize := 1 << (ws * 2)
table := ef.freqTables[ws]
clear(table) // reset to zero
clear(table)
mask := (1 << (ws * 2)) - 1
normTable := ef.normTables[ws]
@@ -251,19 +284,21 @@ func (ef *KmerEntropyFilter) Entropy(kmer uint64) float64 {
table[normWord]++
}
// Compute Shannon entropy
floatNwords := float64(nwords)
logNwords := ef.logNwords[ws]
classLogSize := ef.classLogSizeTables[ws]
var sumNLogN float64
var sumNLogN, sumClassLogN float64
for j := 0; j < tableSize; j++ {
n := table[j]
if n > 0 {
sumNLogN += ef.nLogN[n]
sumClassLogN += float64(n) * classLogSize[j]
}
}
entropy := (logNwords - sumNLogN/floatNwords) / emax
// Corrected entropy: H_raw ≈ log(N) + (Σf·log(s) - Σf·log(f)) / N
entropy := (logNwords + sumClassLogN/floatNwords - sumNLogN/floatNwords) / emax
if entropy < 0 {
entropy = 0
}
+1 -1
View File
@@ -3,7 +3,7 @@ package obioptions
// Version is automatically updated by the Makefile from version.txt
// The patch number (third digit) is incremented on each push to the repository
var _Version = "Release 4.4.42"
var _Version = "Release 4.4.44"
// Version returns the version of the obitools package.
//
+18
View File
@@ -364,6 +364,24 @@ func (s *BioSequence) GetIntSlice(key string) ([]int, bool) {
return val, ok
}
func (s *BioSequence) GetMapOfIntSlice(key string) (map[string][]int, bool) {
v, ok := s.GetAttribute(key)
if !ok {
return nil, false
}
val, err := obiutils.InterfaceToMapOfIntSlice(v)
return val, err == nil
}
func (s *BioSequence) GetMapOfStringSlice(key string) (map[string][]string, bool) {
v, ok := s.GetAttribute(key)
if !ok {
return nil, false
}
val, err := obiutils.InterfaceToMapOfStringSlice(v)
return val, err == nil
}
// Count returns the value of the "count" attribute of the BioSequence.
//
// The count of a sequence is the number of times it has been observed in the dataset.
+1 -1
View File
@@ -103,7 +103,7 @@ func TestNewBioSequence(t *testing.T) {
// Return type: None.
func TestNewBioSequenceWithQualities(t *testing.T) {
id := "123"
sequence := []byte("ATGC")
sequence := []byte("atgc")
definition := "DNA sequence"
qualities := []byte("1234")
+27
View File
@@ -141,6 +141,33 @@ var OBILang = gval.NewLanguage(
gval.Function("max", func(args ...interface{}) (interface{}, error) {
return obiutils.Max(args[0])
}),
gval.Function("which_max", func(args ...interface{}) (interface{}, error) {
result, err := obiutils.WhichMax(args[0])
if idx, ok := result.(int); ok {
return float64(idx), nil
}
return result, err
}),
gval.Function("which_min", func(args ...interface{}) (interface{}, error) {
result, err := obiutils.WhichMin(args[0])
if idx, ok := result.(int); ok {
return float64(idx), nil
}
return result, err
}),
gval.Function("filtermin", func(args ...interface{}) (interface{}, error) {
return obiutils.FilterMin(args[0], args[1])
}),
gval.Function("filtermax", func(args ...interface{}) (interface{}, error) {
return obiutils.FilterMax(args[0], args[1])
}),
gval.Function("saturatingsub", func(args ...interface{}) (interface{}, error) {
return obiutils.SaturatingSub(args[0], args[1])
}),
gval.Function("contains", func(args ...interface{}) (interface{}, error) {
if obiutils.IsAMap(args[0]) {
val := reflect.ValueOf(args[0]).MapIndex(reflect.ValueOf(args[1]))
+38
View File
@@ -276,6 +276,44 @@ func InterfaceToStringMap(i interface{}) (val map[string]string, err error) {
return
}
func InterfaceToMapOfIntSlice(i interface{}) (val map[string][]int, err error) {
err = nil
switch m := i.(type) {
case map[string][]int:
val = m
case map[string]interface{}:
val = make(map[string][]int, len(m))
for k, v := range m {
val[k], err = InterfaceToIntSlice(v)
if err != nil {
return
}
}
default:
err = &NotAMapInt{"value attribute cannot be casted to a map[string][]int"}
}
return
}
func InterfaceToMapOfStringSlice(i interface{}) (val map[string][]string, err error) {
err = nil
switch m := i.(type) {
case map[string][]string:
val = m
case map[string]interface{}:
val = make(map[string][]string, len(m))
for k, v := range m {
val[k], err = InterfaceToStringSlice(v)
if err != nil {
return
}
}
default:
err = &NotAMapInt{"value attribute cannot be casted to a map[string][]string"}
}
return
}
func InterfaceToStringSlice(i interface{}) (val []string, err error) {
err = nil
+365 -4
View File
@@ -34,6 +34,26 @@ func MinMaxSlice[T constraints.Ordered](vec []T) (min, max T) {
return
}
func FilterMinSlice[T constraints.Ordered](vec []T, minimum T) []T {
result := make([]T, 0, len(vec))
for _, v := range vec {
if v >= minimum {
result = append(result, v)
}
}
return result
}
func FilterMaxSlice[T constraints.Ordered](vec []T, maximum T) []T {
result := make([]T, 0, len(vec))
for _, v := range vec {
if v <= maximum {
result = append(result, v)
}
}
return result
}
func MaxMap[K comparable, T constraints.Ordered](values map[K]T) (K, T, error) {
var maxKey K
var maxValue T
@@ -73,6 +93,46 @@ func MinMap[K comparable, T constraints.Ordered](values map[K]T) (K, T, error) {
return minKey, minValue, nil
}
func FilterMinMap[K comparable, T constraints.Ordered](values map[K]T, minimum T) map[K]T {
result := make(map[K]T)
for k, v := range values {
if v >= minimum {
result[k] = v
}
}
return result
}
func FilterMaxMap[K comparable, T constraints.Ordered](values map[K]T, maximum T) map[K]T {
result := make(map[K]T)
for k, v := range values {
if v <= maximum {
result[k] = v
}
}
return result
}
func SaturatingSubSlice[T Numeric](vec []T, sub T) []T {
result := make([]T, len(vec))
for i, v := range vec {
if v > sub {
result[i] = v - sub
}
}
return result
}
func SaturatingSubMap[K comparable, T Numeric](values map[K]T, sub T) map[K]T {
result := make(map[K]T)
for k, v := range values {
if v > sub {
result[k] = v - sub
}
}
return result
}
// Min returns the smallest element in a slice/array or map,
// or the value itself if data is a single comparable value.
// Returns an error if the container is empty or the type is unsupported.
@@ -135,11 +195,121 @@ func Max(data interface{}) (interface{}, error) {
}
}
func FilterMin(data interface{}, minimum interface{}) (interface{}, error) {
v := reflect.ValueOf(data)
switch v.Kind() {
case reflect.Slice, reflect.Array:
if v.Len() == 0 {
return nil, errors.New("empty slice or array")
}
return filterMinFromIterable(v, minimum)
case reflect.Map:
if v.Len() == 0 {
return nil, errors.New("empty map")
}
return filterMinFromMap(v, minimum)
default:
if !isOrderedKind(v.Kind()) {
return nil, fmt.Errorf("unsupported type: %s", v.Kind())
}
return data, nil
}
}
func FilterMax(data interface{}, maximum interface{}) (interface{}, error) {
v := reflect.ValueOf(data)
switch v.Kind() {
case reflect.Slice, reflect.Array:
if v.Len() == 0 {
return nil, errors.New("empty slice or array")
}
return filterMaxFromIterable(v, maximum)
case reflect.Map:
if v.Len() == 0 {
return nil, errors.New("empty map")
}
return filterMaxFromMap(v, maximum)
default:
if !isOrderedKind(v.Kind()) {
return nil, fmt.Errorf("unsupported type: %s", v.Kind())
}
return data, nil
}
}
func SaturatingSub(data interface{}, sub interface{}) (interface{}, error) {
v := reflect.ValueOf(data)
switch v.Kind() {
case reflect.Slice, reflect.Array:
return saturatingSubFromIterable(v, sub)
case reflect.Map:
return saturatingSubFromMap(v, sub)
default:
if !isNumericKind(v.Kind()) {
return nil, fmt.Errorf("unsupported type: %s", v.Kind())
}
r, err := saturatingSubValues(v, reflect.ValueOf(sub))
if err != nil {
return nil, err
}
return r.Interface(), nil
}
}
func saturatingSubFromIterable(v reflect.Value, sub interface{}) (interface{}, error) {
subVal := reflect.ValueOf(sub)
result := reflect.MakeSlice(v.Type(), v.Len(), v.Len())
for i := 0; i < v.Len(); i++ {
r, err := saturatingSubValues(v.Index(i), subVal)
if err != nil {
return nil, err
}
result.Index(i).Set(r)
}
return result.Interface(), nil
}
func saturatingSubFromMap(v reflect.Value, sub interface{}) (interface{}, error) {
subVal := reflect.ValueOf(sub)
result := reflect.MakeMap(v.Type())
for _, key := range v.MapKeys() {
r, err := saturatingSubValues(v.MapIndex(key), subVal)
if err != nil {
return nil, err
}
if !r.IsZero() {
result.SetMapIndex(key, r)
}
}
return result.Interface(), nil
}
func saturatingSubValues(a, b reflect.Value) (reflect.Value, error) {
result := reflect.New(a.Type()).Elem()
switch a.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
if av, bv := a.Int(), b.Int(); av > bv {
result.SetInt(av - bv)
}
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
if av, bv := a.Uint(), b.Uint(); av > bv {
result.SetUint(av - bv)
}
case reflect.Float32, reflect.Float64:
if av, bv := a.Float(), b.Float(); av > bv {
result.SetFloat(av - bv)
}
default:
return reflect.Value{}, fmt.Errorf("unsupported type for saturating subtraction: %s", a.Kind())
}
return result, nil
}
// maxFromIterable scans a slice/array to find the maximum.
func maxFromIterable(v reflect.Value) (interface{}, error) {
var best reflect.Value
for i := 0; i < v.Len(); i++ {
elem := v.Index(i)
elem := unwrapInterface(v.Index(i))
if !isOrderedKind(elem.Kind()) {
return nil, fmt.Errorf("unsupported element type: %s", elem.Kind())
}
@@ -154,7 +324,7 @@ func maxFromIterable(v reflect.Value) (interface{}, error) {
func minFromIterable(v reflect.Value) (interface{}, error) {
var minVal reflect.Value
for i := 0; i < v.Len(); i++ {
elem := v.Index(i)
elem := unwrapInterface(v.Index(i))
if !isOrderedKind(elem.Kind()) {
return nil, fmt.Errorf("unsupported element type: %s", elem.Kind())
}
@@ -165,12 +335,182 @@ func minFromIterable(v reflect.Value) (interface{}, error) {
return minVal.Interface(), nil
}
func filterMinFromIterable(v reflect.Value, minimum interface{}) (interface{}, error) {
minVal := reflect.ValueOf(minimum)
result := reflect.MakeSlice(v.Type(), 0, v.Len())
for i := 0; i < v.Len(); i++ {
elem := unwrapInterface(v.Index(i))
if !isOrderedKind(elem.Kind()) {
return nil, fmt.Errorf("unsupported element type: %s", elem.Kind())
}
if !less(elem, minVal) { // elem >= minimum
result = reflect.Append(result, elem)
}
}
return result.Interface(), nil
}
func filterMaxFromIterable(v reflect.Value, maximum interface{}) (interface{}, error) {
maxVal := reflect.ValueOf(maximum)
result := reflect.MakeSlice(v.Type(), 0, v.Len())
for i := 0; i < v.Len(); i++ {
elem := unwrapInterface(v.Index(i))
if !isOrderedKind(elem.Kind()) {
return nil, fmt.Errorf("unsupported element type: %s", elem.Kind())
}
if !greater(elem, maxVal) { // elem <= maximum
result = reflect.Append(result, elem)
}
}
return result.Interface(), nil
}
// whichMaxFromIterable returns the index of the maximum element in a slice/array.
func whichMaxFromIterable(v reflect.Value) (int, error) {
var best reflect.Value
bestIdx := 0
for i := 0; i < v.Len(); i++ {
elem := unwrapInterface(v.Index(i))
if !isOrderedKind(elem.Kind()) {
return 0, fmt.Errorf("unsupported element type: %s", elem.Kind())
}
if i == 0 || greater(elem, best) {
best = elem
bestIdx = i
}
}
return bestIdx, nil
}
// whichMinFromIterable returns the index of the minimum element in a slice/array.
func whichMinFromIterable(v reflect.Value) (int, error) {
var minVal reflect.Value
minIdx := 0
for i := 0; i < v.Len(); i++ {
elem := unwrapInterface(v.Index(i))
if !isOrderedKind(elem.Kind()) {
return 0, fmt.Errorf("unsupported element type: %s", elem.Kind())
}
if i == 0 || less(elem, minVal) {
minVal = elem
minIdx = i
}
}
return minIdx, nil
}
// whichMaxFromMap returns the key associated with the maximum value in a map.
func whichMaxFromMap(v reflect.Value) (interface{}, error) {
var best reflect.Value
var bestKey reflect.Value
first := true
for _, key := range v.MapKeys() {
elem := unwrapInterface(v.MapIndex(key))
if !isOrderedKind(elem.Kind()) {
return nil, fmt.Errorf("unsupported element type: %s", elem.Kind())
}
if first || greater(elem, best) {
best = elem
bestKey = key
first = false
}
}
return bestKey.Interface(), nil
}
// whichMinFromMap returns the key associated with the minimum value in a map.
func whichMinFromMap(v reflect.Value) (interface{}, error) {
var minVal reflect.Value
var minKey reflect.Value
first := true
for _, key := range v.MapKeys() {
elem := unwrapInterface(v.MapIndex(key))
if !isOrderedKind(elem.Kind()) {
return nil, fmt.Errorf("unsupported element type: %s", elem.Kind())
}
if first || less(elem, minVal) {
minVal = elem
minKey = key
first = false
}
}
return minKey.Interface(), nil
}
// WhichMax returns the key (for a map) or index (for a slice/array) of the maximum value.
func WhichMax(data interface{}) (interface{}, error) {
v := reflect.ValueOf(data)
switch v.Kind() {
case reflect.Slice, reflect.Array:
if v.Len() == 0 {
return nil, errors.New("empty slice or array")
}
return whichMaxFromIterable(v)
case reflect.Map:
if v.Len() == 0 {
return nil, errors.New("empty map")
}
return whichMaxFromMap(v)
default:
return nil, fmt.Errorf("unsupported type: %s", v.Kind())
}
}
// WhichMin returns the key (for a map) or index (for a slice/array) of the minimum value.
func WhichMin(data interface{}) (interface{}, error) {
v := reflect.ValueOf(data)
switch v.Kind() {
case reflect.Slice, reflect.Array:
if v.Len() == 0 {
return nil, errors.New("empty slice or array")
}
return whichMinFromIterable(v)
case reflect.Map:
if v.Len() == 0 {
return nil, errors.New("empty map")
}
return whichMinFromMap(v)
default:
return nil, fmt.Errorf("unsupported type: %s", v.Kind())
}
}
func filterMinFromMap(v reflect.Value, minimum interface{}) (interface{}, error) {
minVal := reflect.ValueOf(minimum)
result := reflect.MakeMap(v.Type())
for _, key := range v.MapKeys() {
elem := unwrapInterface(v.MapIndex(key))
if !isOrderedKind(elem.Kind()) {
return nil, fmt.Errorf("unsupported element type: %s", elem.Kind())
}
if !less(elem, minVal) { // elem >= minimum
result.SetMapIndex(key, elem)
}
}
return result.Interface(), nil
}
func filterMaxFromMap(v reflect.Value, maximum interface{}) (interface{}, error) {
maxVal := reflect.ValueOf(maximum)
result := reflect.MakeMap(v.Type())
for _, key := range v.MapKeys() {
elem := unwrapInterface(v.MapIndex(key))
if !isOrderedKind(elem.Kind()) {
return nil, fmt.Errorf("unsupported element type: %s", elem.Kind())
}
if !greater(elem, maxVal) { // elem <= maximum
result.SetMapIndex(key, elem)
}
}
return result.Interface(), nil
}
// maxFromMap scans map values to find the maximum.
func maxFromMap(v reflect.Value) (interface{}, error) {
var best reflect.Value
first := true
for _, key := range v.MapKeys() {
elem := v.MapIndex(key)
elem := unwrapInterface(v.MapIndex(key))
if !isOrderedKind(elem.Kind()) {
return nil, fmt.Errorf("unsupported element type: %s", elem.Kind())
}
@@ -187,7 +527,7 @@ func minFromMap(v reflect.Value) (interface{}, error) {
var minVal reflect.Value
first := true
for _, key := range v.MapKeys() {
elem := v.MapIndex(key)
elem := unwrapInterface(v.MapIndex(key))
if !isOrderedKind(elem.Kind()) {
return nil, fmt.Errorf("unsupported element type: %s", elem.Kind())
}
@@ -199,6 +539,27 @@ func minFromMap(v reflect.Value) (interface{}, error) {
return minVal.Interface(), nil
}
func isNumericKind(k reflect.Kind) bool {
switch k {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64,
reflect.Float32, reflect.Float64:
return true
default:
return false
}
}
// unwrapInterface returns v.Elem() when v holds an interface value, otherwise v unchanged.
// This is necessary when iterating map[string]interface{} or []interface{} via reflection:
// the element Kind is reflect.Interface, not the underlying concrete type.
func unwrapInterface(v reflect.Value) reflect.Value {
if v.Kind() == reflect.Interface {
return v.Elem()
}
return v
}
// isOrderedKind reports whether k supports comparison ordering.
func isOrderedKind(k reflect.Kind) bool {
switch k {
+1 -1
View File
@@ -1 +1 @@
4.4.42
4.4.44