mirror of
https://github.com/metabarcoding/obitools4.git
synced 2026-06-24 09:41:00 +00:00
Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| e9210e28a3 | |||
| 13a93fce11 | |||
| 14064c919e | |||
| 1dfd68aa6d | |||
| 930fe5f1ba | |||
| dcdaf9e372 | |||
| af7ae3d60c | |||
| cecf90fa40 | |||
| a186bd1c92 | |||
| 46d60c1a44 |
@@ -146,6 +146,65 @@ func __match__key__(text []byte) []int {
|
||||
return []int{} // Not a key
|
||||
}
|
||||
|
||||
func __match__array__(text []byte) []int {
|
||||
|
||||
state := 0
|
||||
level := 0
|
||||
start := 0
|
||||
instring := byte(0)
|
||||
|
||||
for i, r := range text {
|
||||
if state == 2 {
|
||||
if r == ';' {
|
||||
return []int{start, i + 1}
|
||||
}
|
||||
if r != ' ' && r != '\t' {
|
||||
return []int{}
|
||||
}
|
||||
}
|
||||
|
||||
if state == 0 {
|
||||
if r == '[' {
|
||||
level++
|
||||
state++
|
||||
start = i
|
||||
continue
|
||||
}
|
||||
if r != ' ' && r != '\t' {
|
||||
return []int{}
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// state == 1: inside the array
|
||||
if instring != 0 {
|
||||
if r == instring {
|
||||
instring = 0
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if r == '"' || r == '\'' {
|
||||
instring = r
|
||||
continue
|
||||
}
|
||||
|
||||
if r == '[' || r == '{' {
|
||||
level++
|
||||
continue
|
||||
}
|
||||
|
||||
if r == ']' || r == '}' {
|
||||
level--
|
||||
if level == 0 {
|
||||
state++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return []int{}
|
||||
}
|
||||
|
||||
func __match__general__(text []byte) []int {
|
||||
|
||||
for i, r := range text {
|
||||
@@ -242,6 +301,21 @@ func ParseOBIFeatures(text string, annotations obiseq.Annotation) string {
|
||||
stop = m[1] + 1
|
||||
} else {
|
||||
|
||||
// array value
|
||||
m = __match__array__(part)
|
||||
if len(m) > 0 {
|
||||
bvalue = bytes.TrimSpace(part[m[0]:(m[1] - 1)])
|
||||
j := bytes.ReplaceAll(bvalue, []byte("'"), []byte(`"`))
|
||||
j = __obi_header_map_int_key__.ReplaceAll(j, []byte(`$1"$2":`))
|
||||
arr, err := _parse_json_array_interface(j)
|
||||
if err != nil {
|
||||
value = string(bvalue)
|
||||
} else {
|
||||
value = arr
|
||||
}
|
||||
stop = m[1] + 1
|
||||
} else {
|
||||
|
||||
// Generic value
|
||||
|
||||
// m = __obi_header_value_general_pattern__.FindIndex(part)
|
||||
@@ -264,6 +338,7 @@ func ParseOBIFeatures(text string, annotations obiseq.Annotation) string {
|
||||
// no value
|
||||
break
|
||||
} // End of No value
|
||||
} // End of not array
|
||||
} // End of not dict
|
||||
} // End of not string
|
||||
} // End of not numeric
|
||||
@@ -327,19 +402,19 @@ func WriteFastSeqOBIHeade(buffer *bytes.Buffer, sequence *obiseq.BioSequence) {
|
||||
buffer.WriteString(fmt.Sprintf("%s=", key))
|
||||
buffer.Write(tv)
|
||||
buffer.WriteString("; ")
|
||||
case map[string]int,
|
||||
map[string]string,
|
||||
map[string]interface{}:
|
||||
tv, err := obiutils.JsonMarshal(t)
|
||||
if err != nil {
|
||||
log.Fatalf("Cannot convert %v value", value)
|
||||
}
|
||||
tv = bytes.ReplaceAll(tv, []byte(`"`), []byte("'"))
|
||||
buffer.WriteString(fmt.Sprintf("%s=", key))
|
||||
buffer.Write(tv)
|
||||
buffer.WriteString("; ")
|
||||
default:
|
||||
buffer.WriteString(fmt.Sprintf("%s=%v; ", key, value))
|
||||
if obiutils.IsAMap(value) || obiutils.IsASlice(value) || obiutils.IsAnArray(value) {
|
||||
tv, err := obiutils.JsonMarshal(t)
|
||||
if err != nil {
|
||||
log.Fatalf("Cannot convert %v value", value)
|
||||
}
|
||||
tv = bytes.ReplaceAll(tv, []byte(`"`), []byte("'"))
|
||||
buffer.WriteString(fmt.Sprintf("%s=", key))
|
||||
buffer.Write(tv)
|
||||
buffer.WriteString("; ")
|
||||
} else {
|
||||
buffer.WriteString(fmt.Sprintf("%s=%v; ", key, value))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -90,6 +90,9 @@ func FormatFastaBatch(batch obiiter.BioSequenceBatch, formater FormatHeader, ski
|
||||
log.Debugf("FormatFastaBatch: #%d : %d seqs", batch.Order(), batch.Len())
|
||||
|
||||
for _, seq := range batch.Slice() {
|
||||
if len(seq.Id()) == 0 {
|
||||
log.Fatalf("Sequence identifier is empty")
|
||||
}
|
||||
if seq.Len() > 0 {
|
||||
// Write header directly into bs — no intermediate string
|
||||
bs.WriteByte('>')
|
||||
|
||||
@@ -64,6 +64,9 @@ func FormatFastqBatch(batch obiiter.BioSequenceBatch,
|
||||
first := true
|
||||
|
||||
for _, seq := range batch.Slice() {
|
||||
if len(seq.Id()) == 0 {
|
||||
log.Fatalf("Sequence identifier is empty")
|
||||
}
|
||||
if seq.Len() > 0 {
|
||||
_formatFastq(&bs, seq, formater)
|
||||
|
||||
|
||||
+90
-55
@@ -4,22 +4,21 @@ import "math"
|
||||
|
||||
// KmerEntropy computes the entropy of a single encoded k-mer.
|
||||
//
|
||||
// The algorithm mirrors the lowmask entropy calculation: it decodes the k-mer
|
||||
// The algorithm mirrors the Rust obiskbuilder entropy: it decodes the k-mer
|
||||
// to a DNA sequence, extracts all sub-words of each size from 1 to levelMax,
|
||||
// normalizes them by circular canonical form, counts their frequencies, and
|
||||
// computes Shannon entropy normalized by the maximum possible entropy.
|
||||
// computes Shannon entropy corrected for class sizes, normalized by the
|
||||
// maximum possible entropy over 4^ws raw bins.
|
||||
// The returned value is the minimum entropy across all word sizes.
|
||||
//
|
||||
// Correction for small sequences: the raw entropy H = log(N) - Σ f·log(f)/N
|
||||
// under-estimates the true complexity when many raw words collapse to the same
|
||||
// canonical form. Adding Σ f·log(class_size)/N recovers the entropy of the
|
||||
// underlying uncollapsed distribution (assuming uniform mixing within each
|
||||
// equivalence class).
|
||||
//
|
||||
// A value close to 0 indicates very low complexity (e.g. "AAAA..."),
|
||||
// while a value close to 1 indicates high complexity.
|
||||
//
|
||||
// Parameters:
|
||||
// - kmer: the encoded k-mer (2 bits per base)
|
||||
// - k: the k-mer size
|
||||
// - levelMax: maximum sub-word size for entropy (typically 6)
|
||||
//
|
||||
// Returns:
|
||||
// - minimum normalized entropy across all word sizes 1..levelMax
|
||||
func KmerEntropy(kmer uint64, k int, levelMax int) float64 {
|
||||
if k < 1 || levelMax < 1 {
|
||||
return 1.0
|
||||
@@ -35,7 +34,7 @@ func KmerEntropy(kmer uint64, k int, levelMax int) float64 {
|
||||
var seqBuf [32]byte
|
||||
seq := DecodeKmer(kmer, k, seqBuf[:])
|
||||
|
||||
// Pre-compute nLogN lookup (same as lowmask)
|
||||
// Pre-compute nLogN lookup
|
||||
nLogN := make([]float64, k+1)
|
||||
for i := 1; i <= k; i++ {
|
||||
nLogN[i] = float64(i) * math.Log(float64(i))
|
||||
@@ -51,6 +50,23 @@ func KmerEntropy(kmer uint64, k int, levelMax int) float64 {
|
||||
}
|
||||
}
|
||||
|
||||
// Build ln(class_size) tables: for each canonical form, how many raw
|
||||
// words map to it under circular normalization.
|
||||
classLogSizeTables := make([][]float64, levelMax+1)
|
||||
for ws := 1; ws <= levelMax; ws++ {
|
||||
tableSize := 1 << (ws * 2)
|
||||
classSize := make([]int, tableSize)
|
||||
for code := 0; code < tableSize; code++ {
|
||||
classSize[normTables[ws][code]]++
|
||||
}
|
||||
classLogSizeTables[ws] = make([]float64, tableSize)
|
||||
for j := 0; j < tableSize; j++ {
|
||||
if classSize[j] > 0 {
|
||||
classLogSizeTables[ws][j] = math.Log(float64(classSize[j]))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
minEntropy := math.MaxFloat64
|
||||
|
||||
for ws := 1; ws <= levelMax; ws++ {
|
||||
@@ -75,23 +91,13 @@ func KmerEntropy(kmer uint64, k int, levelMax int) float64 {
|
||||
table[normWord]++
|
||||
}
|
||||
|
||||
// Compute Shannon entropy
|
||||
// Compute emax over 4^ws raw bins (uncollapsed distribution).
|
||||
floatNwords := float64(nwords)
|
||||
logNwords := math.Log(floatNwords)
|
||||
|
||||
var sumNLogN float64
|
||||
for j := 0; j < tableSize; j++ {
|
||||
n := table[j]
|
||||
if n > 0 {
|
||||
sumNLogN += nLogN[n]
|
||||
}
|
||||
}
|
||||
|
||||
// Compute emax (maximum possible entropy for this word size)
|
||||
na := CanonicalCircularKmerCount(ws)
|
||||
na := tableSize // 4^ws
|
||||
var emax float64
|
||||
if nwords < na {
|
||||
emax = math.Log(float64(nwords))
|
||||
emax = logNwords
|
||||
} else {
|
||||
cov := nwords / na
|
||||
remains := nwords - (na * cov)
|
||||
@@ -105,7 +111,19 @@ func KmerEntropy(kmer uint64, k int, levelMax int) float64 {
|
||||
continue
|
||||
}
|
||||
|
||||
entropy := (logNwords - sumNLogN/floatNwords) / emax
|
||||
// Accumulate Σ f·log(f) and Σ f·log(class_size) over canonical forms.
|
||||
classLogSize := classLogSizeTables[ws]
|
||||
var sumNLogN, sumClassLogN float64
|
||||
for j := 0; j < tableSize; j++ {
|
||||
n := table[j]
|
||||
if n > 0 {
|
||||
sumNLogN += nLogN[n]
|
||||
sumClassLogN += float64(n) * classLogSize[j]
|
||||
}
|
||||
}
|
||||
|
||||
// Corrected entropy: H_raw ≈ log(N) + (Σf·log(s) - Σf·log(f)) / N
|
||||
entropy := (logNwords + sumClassLogN/floatNwords - sumNLogN/floatNwords) / emax
|
||||
if entropy < 0 {
|
||||
entropy = 0
|
||||
}
|
||||
@@ -129,24 +147,20 @@ func KmerEntropy(kmer uint64, k int, levelMax int) float64 {
|
||||
// IMPORTANT: a KmerEntropyFilter is NOT safe for concurrent use.
|
||||
// Each goroutine must create its own instance via NewKmerEntropyFilter.
|
||||
type KmerEntropyFilter struct {
|
||||
k int
|
||||
levelMax int
|
||||
threshold float64
|
||||
nLogN []float64
|
||||
normTables [][]int
|
||||
emaxValues []float64
|
||||
logNwords []float64
|
||||
k int
|
||||
levelMax int
|
||||
threshold float64
|
||||
nLogN []float64
|
||||
normTables [][]int
|
||||
classLogSizeTables [][]float64
|
||||
emaxValues []float64
|
||||
logNwords []float64
|
||||
// Pre-allocated frequency tables reused across Entropy() calls.
|
||||
// One per word size (index 0 unused). Reset to zero before each use.
|
||||
freqTables [][]int
|
||||
}
|
||||
|
||||
// NewKmerEntropyFilter creates an entropy filter with pre-computed tables.
|
||||
//
|
||||
// Parameters:
|
||||
// - k: the k-mer size
|
||||
// - levelMax: maximum sub-word size for entropy (typically 6)
|
||||
// - threshold: entropy threshold (k-mers with entropy <= threshold are rejected)
|
||||
func NewKmerEntropyFilter(k, levelMax int, threshold float64) *KmerEntropyFilter {
|
||||
if levelMax >= k {
|
||||
levelMax = k - 1
|
||||
@@ -169,20 +183,38 @@ func NewKmerEntropyFilter(k, levelMax int, threshold float64) *KmerEntropyFilter
|
||||
}
|
||||
}
|
||||
|
||||
// ln(class_size) for each canonical form under circular normalization.
|
||||
classLogSizeTables := make([][]float64, levelMax+1)
|
||||
for ws := 1; ws <= levelMax; ws++ {
|
||||
tableSize := 1 << (ws * 2)
|
||||
classSize := make([]int, tableSize)
|
||||
for code := 0; code < tableSize; code++ {
|
||||
classSize[normTables[ws][code]]++
|
||||
}
|
||||
classLogSizeTables[ws] = make([]float64, tableSize)
|
||||
for j := 0; j < tableSize; j++ {
|
||||
if classSize[j] > 0 {
|
||||
classLogSizeTables[ws][j] = math.Log(float64(classSize[j]))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Pre-compute emax and logNwords per word size.
|
||||
// emax uses 4^ws raw bins to match the corrected entropy.
|
||||
emaxValues := make([]float64, levelMax+1)
|
||||
logNwords := make([]float64, levelMax+1)
|
||||
for ws := 1; ws <= levelMax; ws++ {
|
||||
nw := k - ws + 1
|
||||
na := CanonicalCircularKmerCount(ws)
|
||||
na := 1 << (ws * 2) // 4^ws raw bins
|
||||
floatNw := float64(nw)
|
||||
logNwords[ws] = math.Log(floatNw)
|
||||
if nw < na {
|
||||
logNwords[ws] = math.Log(float64(nw))
|
||||
emaxValues[ws] = math.Log(float64(nw))
|
||||
emaxValues[ws] = logNwords[ws]
|
||||
} else {
|
||||
cov := nw / na
|
||||
remains := nw - (na * cov)
|
||||
f1 := float64(cov) / float64(nw)
|
||||
f2 := float64(cov+1) / float64(nw)
|
||||
logNwords[ws] = math.Log(float64(nw))
|
||||
f1 := float64(cov) / floatNw
|
||||
f2 := float64(cov+1) / floatNw
|
||||
emaxValues[ws] = -(float64(na-remains)*f1*math.Log(f1) +
|
||||
float64(remains)*f2*math.Log(f2))
|
||||
}
|
||||
@@ -195,14 +227,15 @@ func NewKmerEntropyFilter(k, levelMax int, threshold float64) *KmerEntropyFilter
|
||||
}
|
||||
|
||||
return &KmerEntropyFilter{
|
||||
k: k,
|
||||
levelMax: levelMax,
|
||||
threshold: threshold,
|
||||
nLogN: nLogN,
|
||||
normTables: normTables,
|
||||
emaxValues: emaxValues,
|
||||
logNwords: logNwords,
|
||||
freqTables: freqTables,
|
||||
k: k,
|
||||
levelMax: levelMax,
|
||||
threshold: threshold,
|
||||
nLogN: nLogN,
|
||||
normTables: normTables,
|
||||
classLogSizeTables: classLogSizeTables,
|
||||
emaxValues: emaxValues,
|
||||
logNwords: logNwords,
|
||||
freqTables: freqTables,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -236,7 +269,7 @@ func (ef *KmerEntropyFilter) Entropy(kmer uint64) float64 {
|
||||
// Count circular-canonical sub-word frequencies
|
||||
tableSize := 1 << (ws * 2)
|
||||
table := ef.freqTables[ws]
|
||||
clear(table) // reset to zero
|
||||
clear(table)
|
||||
mask := (1 << (ws * 2)) - 1
|
||||
normTable := ef.normTables[ws]
|
||||
|
||||
@@ -251,19 +284,21 @@ func (ef *KmerEntropyFilter) Entropy(kmer uint64) float64 {
|
||||
table[normWord]++
|
||||
}
|
||||
|
||||
// Compute Shannon entropy
|
||||
floatNwords := float64(nwords)
|
||||
logNwords := ef.logNwords[ws]
|
||||
classLogSize := ef.classLogSizeTables[ws]
|
||||
|
||||
var sumNLogN float64
|
||||
var sumNLogN, sumClassLogN float64
|
||||
for j := 0; j < tableSize; j++ {
|
||||
n := table[j]
|
||||
if n > 0 {
|
||||
sumNLogN += ef.nLogN[n]
|
||||
sumClassLogN += float64(n) * classLogSize[j]
|
||||
}
|
||||
}
|
||||
|
||||
entropy := (logNwords - sumNLogN/floatNwords) / emax
|
||||
// Corrected entropy: H_raw ≈ log(N) + (Σf·log(s) - Σf·log(f)) / N
|
||||
entropy := (logNwords + sumClassLogN/floatNwords - sumNLogN/floatNwords) / emax
|
||||
if entropy < 0 {
|
||||
entropy = 0
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@ package obioptions
|
||||
// Version is automatically updated by the Makefile from version.txt
|
||||
// The patch number (third digit) is incremented on each push to the repository
|
||||
|
||||
var _Version = "Release 4.4.42"
|
||||
var _Version = "Release 4.4.44"
|
||||
|
||||
// Version returns the version of the obitools package.
|
||||
//
|
||||
|
||||
@@ -364,6 +364,24 @@ func (s *BioSequence) GetIntSlice(key string) ([]int, bool) {
|
||||
return val, ok
|
||||
}
|
||||
|
||||
func (s *BioSequence) GetMapOfIntSlice(key string) (map[string][]int, bool) {
|
||||
v, ok := s.GetAttribute(key)
|
||||
if !ok {
|
||||
return nil, false
|
||||
}
|
||||
val, err := obiutils.InterfaceToMapOfIntSlice(v)
|
||||
return val, err == nil
|
||||
}
|
||||
|
||||
func (s *BioSequence) GetMapOfStringSlice(key string) (map[string][]string, bool) {
|
||||
v, ok := s.GetAttribute(key)
|
||||
if !ok {
|
||||
return nil, false
|
||||
}
|
||||
val, err := obiutils.InterfaceToMapOfStringSlice(v)
|
||||
return val, err == nil
|
||||
}
|
||||
|
||||
// Count returns the value of the "count" attribute of the BioSequence.
|
||||
//
|
||||
// The count of a sequence is the number of times it has been observed in the dataset.
|
||||
|
||||
@@ -103,7 +103,7 @@ func TestNewBioSequence(t *testing.T) {
|
||||
// Return type: None.
|
||||
func TestNewBioSequenceWithQualities(t *testing.T) {
|
||||
id := "123"
|
||||
sequence := []byte("ATGC")
|
||||
sequence := []byte("atgc")
|
||||
definition := "DNA sequence"
|
||||
qualities := []byte("1234")
|
||||
|
||||
|
||||
@@ -141,6 +141,33 @@ var OBILang = gval.NewLanguage(
|
||||
gval.Function("max", func(args ...interface{}) (interface{}, error) {
|
||||
return obiutils.Max(args[0])
|
||||
}),
|
||||
gval.Function("which_max", func(args ...interface{}) (interface{}, error) {
|
||||
result, err := obiutils.WhichMax(args[0])
|
||||
if idx, ok := result.(int); ok {
|
||||
return float64(idx), nil
|
||||
}
|
||||
return result, err
|
||||
}),
|
||||
gval.Function("which_min", func(args ...interface{}) (interface{}, error) {
|
||||
result, err := obiutils.WhichMin(args[0])
|
||||
if idx, ok := result.(int); ok {
|
||||
return float64(idx), nil
|
||||
}
|
||||
return result, err
|
||||
}),
|
||||
|
||||
gval.Function("filtermin", func(args ...interface{}) (interface{}, error) {
|
||||
return obiutils.FilterMin(args[0], args[1])
|
||||
}),
|
||||
|
||||
gval.Function("filtermax", func(args ...interface{}) (interface{}, error) {
|
||||
return obiutils.FilterMax(args[0], args[1])
|
||||
}),
|
||||
|
||||
gval.Function("saturatingsub", func(args ...interface{}) (interface{}, error) {
|
||||
return obiutils.SaturatingSub(args[0], args[1])
|
||||
}),
|
||||
|
||||
gval.Function("contains", func(args ...interface{}) (interface{}, error) {
|
||||
if obiutils.IsAMap(args[0]) {
|
||||
val := reflect.ValueOf(args[0]).MapIndex(reflect.ValueOf(args[1]))
|
||||
|
||||
@@ -276,6 +276,44 @@ func InterfaceToStringMap(i interface{}) (val map[string]string, err error) {
|
||||
return
|
||||
}
|
||||
|
||||
func InterfaceToMapOfIntSlice(i interface{}) (val map[string][]int, err error) {
|
||||
err = nil
|
||||
switch m := i.(type) {
|
||||
case map[string][]int:
|
||||
val = m
|
||||
case map[string]interface{}:
|
||||
val = make(map[string][]int, len(m))
|
||||
for k, v := range m {
|
||||
val[k], err = InterfaceToIntSlice(v)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
default:
|
||||
err = &NotAMapInt{"value attribute cannot be casted to a map[string][]int"}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func InterfaceToMapOfStringSlice(i interface{}) (val map[string][]string, err error) {
|
||||
err = nil
|
||||
switch m := i.(type) {
|
||||
case map[string][]string:
|
||||
val = m
|
||||
case map[string]interface{}:
|
||||
val = make(map[string][]string, len(m))
|
||||
for k, v := range m {
|
||||
val[k], err = InterfaceToStringSlice(v)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
default:
|
||||
err = &NotAMapInt{"value attribute cannot be casted to a map[string][]string"}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func InterfaceToStringSlice(i interface{}) (val []string, err error) {
|
||||
err = nil
|
||||
|
||||
|
||||
+365
-4
@@ -34,6 +34,26 @@ func MinMaxSlice[T constraints.Ordered](vec []T) (min, max T) {
|
||||
return
|
||||
}
|
||||
|
||||
func FilterMinSlice[T constraints.Ordered](vec []T, minimum T) []T {
|
||||
result := make([]T, 0, len(vec))
|
||||
for _, v := range vec {
|
||||
if v >= minimum {
|
||||
result = append(result, v)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func FilterMaxSlice[T constraints.Ordered](vec []T, maximum T) []T {
|
||||
result := make([]T, 0, len(vec))
|
||||
for _, v := range vec {
|
||||
if v <= maximum {
|
||||
result = append(result, v)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func MaxMap[K comparable, T constraints.Ordered](values map[K]T) (K, T, error) {
|
||||
var maxKey K
|
||||
var maxValue T
|
||||
@@ -73,6 +93,46 @@ func MinMap[K comparable, T constraints.Ordered](values map[K]T) (K, T, error) {
|
||||
return minKey, minValue, nil
|
||||
}
|
||||
|
||||
func FilterMinMap[K comparable, T constraints.Ordered](values map[K]T, minimum T) map[K]T {
|
||||
result := make(map[K]T)
|
||||
for k, v := range values {
|
||||
if v >= minimum {
|
||||
result[k] = v
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func FilterMaxMap[K comparable, T constraints.Ordered](values map[K]T, maximum T) map[K]T {
|
||||
result := make(map[K]T)
|
||||
for k, v := range values {
|
||||
if v <= maximum {
|
||||
result[k] = v
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func SaturatingSubSlice[T Numeric](vec []T, sub T) []T {
|
||||
result := make([]T, len(vec))
|
||||
for i, v := range vec {
|
||||
if v > sub {
|
||||
result[i] = v - sub
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func SaturatingSubMap[K comparable, T Numeric](values map[K]T, sub T) map[K]T {
|
||||
result := make(map[K]T)
|
||||
for k, v := range values {
|
||||
if v > sub {
|
||||
result[k] = v - sub
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// Min returns the smallest element in a slice/array or map,
|
||||
// or the value itself if data is a single comparable value.
|
||||
// Returns an error if the container is empty or the type is unsupported.
|
||||
@@ -135,11 +195,121 @@ func Max(data interface{}) (interface{}, error) {
|
||||
}
|
||||
}
|
||||
|
||||
func FilterMin(data interface{}, minimum interface{}) (interface{}, error) {
|
||||
v := reflect.ValueOf(data)
|
||||
switch v.Kind() {
|
||||
case reflect.Slice, reflect.Array:
|
||||
if v.Len() == 0 {
|
||||
return nil, errors.New("empty slice or array")
|
||||
}
|
||||
return filterMinFromIterable(v, minimum)
|
||||
case reflect.Map:
|
||||
if v.Len() == 0 {
|
||||
return nil, errors.New("empty map")
|
||||
}
|
||||
return filterMinFromMap(v, minimum)
|
||||
default:
|
||||
if !isOrderedKind(v.Kind()) {
|
||||
return nil, fmt.Errorf("unsupported type: %s", v.Kind())
|
||||
}
|
||||
return data, nil
|
||||
}
|
||||
}
|
||||
|
||||
func FilterMax(data interface{}, maximum interface{}) (interface{}, error) {
|
||||
v := reflect.ValueOf(data)
|
||||
switch v.Kind() {
|
||||
case reflect.Slice, reflect.Array:
|
||||
if v.Len() == 0 {
|
||||
return nil, errors.New("empty slice or array")
|
||||
}
|
||||
return filterMaxFromIterable(v, maximum)
|
||||
case reflect.Map:
|
||||
if v.Len() == 0 {
|
||||
return nil, errors.New("empty map")
|
||||
}
|
||||
return filterMaxFromMap(v, maximum)
|
||||
default:
|
||||
if !isOrderedKind(v.Kind()) {
|
||||
return nil, fmt.Errorf("unsupported type: %s", v.Kind())
|
||||
}
|
||||
return data, nil
|
||||
}
|
||||
}
|
||||
|
||||
func SaturatingSub(data interface{}, sub interface{}) (interface{}, error) {
|
||||
v := reflect.ValueOf(data)
|
||||
switch v.Kind() {
|
||||
case reflect.Slice, reflect.Array:
|
||||
return saturatingSubFromIterable(v, sub)
|
||||
case reflect.Map:
|
||||
return saturatingSubFromMap(v, sub)
|
||||
default:
|
||||
if !isNumericKind(v.Kind()) {
|
||||
return nil, fmt.Errorf("unsupported type: %s", v.Kind())
|
||||
}
|
||||
r, err := saturatingSubValues(v, reflect.ValueOf(sub))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return r.Interface(), nil
|
||||
}
|
||||
}
|
||||
|
||||
func saturatingSubFromIterable(v reflect.Value, sub interface{}) (interface{}, error) {
|
||||
subVal := reflect.ValueOf(sub)
|
||||
result := reflect.MakeSlice(v.Type(), v.Len(), v.Len())
|
||||
for i := 0; i < v.Len(); i++ {
|
||||
r, err := saturatingSubValues(v.Index(i), subVal)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
result.Index(i).Set(r)
|
||||
}
|
||||
return result.Interface(), nil
|
||||
}
|
||||
|
||||
func saturatingSubFromMap(v reflect.Value, sub interface{}) (interface{}, error) {
|
||||
subVal := reflect.ValueOf(sub)
|
||||
result := reflect.MakeMap(v.Type())
|
||||
for _, key := range v.MapKeys() {
|
||||
r, err := saturatingSubValues(v.MapIndex(key), subVal)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if !r.IsZero() {
|
||||
result.SetMapIndex(key, r)
|
||||
}
|
||||
}
|
||||
return result.Interface(), nil
|
||||
}
|
||||
|
||||
func saturatingSubValues(a, b reflect.Value) (reflect.Value, error) {
|
||||
result := reflect.New(a.Type()).Elem()
|
||||
switch a.Kind() {
|
||||
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
|
||||
if av, bv := a.Int(), b.Int(); av > bv {
|
||||
result.SetInt(av - bv)
|
||||
}
|
||||
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
|
||||
if av, bv := a.Uint(), b.Uint(); av > bv {
|
||||
result.SetUint(av - bv)
|
||||
}
|
||||
case reflect.Float32, reflect.Float64:
|
||||
if av, bv := a.Float(), b.Float(); av > bv {
|
||||
result.SetFloat(av - bv)
|
||||
}
|
||||
default:
|
||||
return reflect.Value{}, fmt.Errorf("unsupported type for saturating subtraction: %s", a.Kind())
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// maxFromIterable scans a slice/array to find the maximum.
|
||||
func maxFromIterable(v reflect.Value) (interface{}, error) {
|
||||
var best reflect.Value
|
||||
for i := 0; i < v.Len(); i++ {
|
||||
elem := v.Index(i)
|
||||
elem := unwrapInterface(v.Index(i))
|
||||
if !isOrderedKind(elem.Kind()) {
|
||||
return nil, fmt.Errorf("unsupported element type: %s", elem.Kind())
|
||||
}
|
||||
@@ -154,7 +324,7 @@ func maxFromIterable(v reflect.Value) (interface{}, error) {
|
||||
func minFromIterable(v reflect.Value) (interface{}, error) {
|
||||
var minVal reflect.Value
|
||||
for i := 0; i < v.Len(); i++ {
|
||||
elem := v.Index(i)
|
||||
elem := unwrapInterface(v.Index(i))
|
||||
if !isOrderedKind(elem.Kind()) {
|
||||
return nil, fmt.Errorf("unsupported element type: %s", elem.Kind())
|
||||
}
|
||||
@@ -165,12 +335,182 @@ func minFromIterable(v reflect.Value) (interface{}, error) {
|
||||
return minVal.Interface(), nil
|
||||
}
|
||||
|
||||
func filterMinFromIterable(v reflect.Value, minimum interface{}) (interface{}, error) {
|
||||
minVal := reflect.ValueOf(minimum)
|
||||
result := reflect.MakeSlice(v.Type(), 0, v.Len())
|
||||
for i := 0; i < v.Len(); i++ {
|
||||
elem := unwrapInterface(v.Index(i))
|
||||
if !isOrderedKind(elem.Kind()) {
|
||||
return nil, fmt.Errorf("unsupported element type: %s", elem.Kind())
|
||||
}
|
||||
if !less(elem, minVal) { // elem >= minimum
|
||||
result = reflect.Append(result, elem)
|
||||
}
|
||||
}
|
||||
return result.Interface(), nil
|
||||
}
|
||||
|
||||
func filterMaxFromIterable(v reflect.Value, maximum interface{}) (interface{}, error) {
|
||||
maxVal := reflect.ValueOf(maximum)
|
||||
result := reflect.MakeSlice(v.Type(), 0, v.Len())
|
||||
for i := 0; i < v.Len(); i++ {
|
||||
elem := unwrapInterface(v.Index(i))
|
||||
if !isOrderedKind(elem.Kind()) {
|
||||
return nil, fmt.Errorf("unsupported element type: %s", elem.Kind())
|
||||
}
|
||||
if !greater(elem, maxVal) { // elem <= maximum
|
||||
result = reflect.Append(result, elem)
|
||||
}
|
||||
}
|
||||
return result.Interface(), nil
|
||||
}
|
||||
|
||||
// whichMaxFromIterable returns the index of the maximum element in a slice/array.
|
||||
func whichMaxFromIterable(v reflect.Value) (int, error) {
|
||||
var best reflect.Value
|
||||
bestIdx := 0
|
||||
for i := 0; i < v.Len(); i++ {
|
||||
elem := unwrapInterface(v.Index(i))
|
||||
if !isOrderedKind(elem.Kind()) {
|
||||
return 0, fmt.Errorf("unsupported element type: %s", elem.Kind())
|
||||
}
|
||||
if i == 0 || greater(elem, best) {
|
||||
best = elem
|
||||
bestIdx = i
|
||||
}
|
||||
}
|
||||
return bestIdx, nil
|
||||
}
|
||||
|
||||
// whichMinFromIterable returns the index of the minimum element in a slice/array.
|
||||
func whichMinFromIterable(v reflect.Value) (int, error) {
|
||||
var minVal reflect.Value
|
||||
minIdx := 0
|
||||
for i := 0; i < v.Len(); i++ {
|
||||
elem := unwrapInterface(v.Index(i))
|
||||
if !isOrderedKind(elem.Kind()) {
|
||||
return 0, fmt.Errorf("unsupported element type: %s", elem.Kind())
|
||||
}
|
||||
if i == 0 || less(elem, minVal) {
|
||||
minVal = elem
|
||||
minIdx = i
|
||||
}
|
||||
}
|
||||
return minIdx, nil
|
||||
}
|
||||
|
||||
// whichMaxFromMap returns the key associated with the maximum value in a map.
|
||||
func whichMaxFromMap(v reflect.Value) (interface{}, error) {
|
||||
var best reflect.Value
|
||||
var bestKey reflect.Value
|
||||
first := true
|
||||
for _, key := range v.MapKeys() {
|
||||
elem := unwrapInterface(v.MapIndex(key))
|
||||
if !isOrderedKind(elem.Kind()) {
|
||||
return nil, fmt.Errorf("unsupported element type: %s", elem.Kind())
|
||||
}
|
||||
if first || greater(elem, best) {
|
||||
best = elem
|
||||
bestKey = key
|
||||
first = false
|
||||
}
|
||||
}
|
||||
return bestKey.Interface(), nil
|
||||
}
|
||||
|
||||
// whichMinFromMap returns the key associated with the minimum value in a map.
|
||||
func whichMinFromMap(v reflect.Value) (interface{}, error) {
|
||||
var minVal reflect.Value
|
||||
var minKey reflect.Value
|
||||
first := true
|
||||
for _, key := range v.MapKeys() {
|
||||
elem := unwrapInterface(v.MapIndex(key))
|
||||
if !isOrderedKind(elem.Kind()) {
|
||||
return nil, fmt.Errorf("unsupported element type: %s", elem.Kind())
|
||||
}
|
||||
if first || less(elem, minVal) {
|
||||
minVal = elem
|
||||
minKey = key
|
||||
first = false
|
||||
}
|
||||
}
|
||||
return minKey.Interface(), nil
|
||||
}
|
||||
|
||||
// WhichMax returns the key (for a map) or index (for a slice/array) of the maximum value.
|
||||
func WhichMax(data interface{}) (interface{}, error) {
|
||||
v := reflect.ValueOf(data)
|
||||
switch v.Kind() {
|
||||
case reflect.Slice, reflect.Array:
|
||||
if v.Len() == 0 {
|
||||
return nil, errors.New("empty slice or array")
|
||||
}
|
||||
return whichMaxFromIterable(v)
|
||||
case reflect.Map:
|
||||
if v.Len() == 0 {
|
||||
return nil, errors.New("empty map")
|
||||
}
|
||||
return whichMaxFromMap(v)
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported type: %s", v.Kind())
|
||||
}
|
||||
}
|
||||
|
||||
// WhichMin returns the key (for a map) or index (for a slice/array) of the minimum value.
|
||||
func WhichMin(data interface{}) (interface{}, error) {
|
||||
v := reflect.ValueOf(data)
|
||||
switch v.Kind() {
|
||||
case reflect.Slice, reflect.Array:
|
||||
if v.Len() == 0 {
|
||||
return nil, errors.New("empty slice or array")
|
||||
}
|
||||
return whichMinFromIterable(v)
|
||||
case reflect.Map:
|
||||
if v.Len() == 0 {
|
||||
return nil, errors.New("empty map")
|
||||
}
|
||||
return whichMinFromMap(v)
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported type: %s", v.Kind())
|
||||
}
|
||||
}
|
||||
|
||||
func filterMinFromMap(v reflect.Value, minimum interface{}) (interface{}, error) {
|
||||
minVal := reflect.ValueOf(minimum)
|
||||
result := reflect.MakeMap(v.Type())
|
||||
for _, key := range v.MapKeys() {
|
||||
elem := unwrapInterface(v.MapIndex(key))
|
||||
if !isOrderedKind(elem.Kind()) {
|
||||
return nil, fmt.Errorf("unsupported element type: %s", elem.Kind())
|
||||
}
|
||||
if !less(elem, minVal) { // elem >= minimum
|
||||
result.SetMapIndex(key, elem)
|
||||
}
|
||||
}
|
||||
return result.Interface(), nil
|
||||
}
|
||||
|
||||
func filterMaxFromMap(v reflect.Value, maximum interface{}) (interface{}, error) {
|
||||
maxVal := reflect.ValueOf(maximum)
|
||||
result := reflect.MakeMap(v.Type())
|
||||
for _, key := range v.MapKeys() {
|
||||
elem := unwrapInterface(v.MapIndex(key))
|
||||
if !isOrderedKind(elem.Kind()) {
|
||||
return nil, fmt.Errorf("unsupported element type: %s", elem.Kind())
|
||||
}
|
||||
if !greater(elem, maxVal) { // elem <= maximum
|
||||
result.SetMapIndex(key, elem)
|
||||
}
|
||||
}
|
||||
return result.Interface(), nil
|
||||
}
|
||||
|
||||
// maxFromMap scans map values to find the maximum.
|
||||
func maxFromMap(v reflect.Value) (interface{}, error) {
|
||||
var best reflect.Value
|
||||
first := true
|
||||
for _, key := range v.MapKeys() {
|
||||
elem := v.MapIndex(key)
|
||||
elem := unwrapInterface(v.MapIndex(key))
|
||||
if !isOrderedKind(elem.Kind()) {
|
||||
return nil, fmt.Errorf("unsupported element type: %s", elem.Kind())
|
||||
}
|
||||
@@ -187,7 +527,7 @@ func minFromMap(v reflect.Value) (interface{}, error) {
|
||||
var minVal reflect.Value
|
||||
first := true
|
||||
for _, key := range v.MapKeys() {
|
||||
elem := v.MapIndex(key)
|
||||
elem := unwrapInterface(v.MapIndex(key))
|
||||
if !isOrderedKind(elem.Kind()) {
|
||||
return nil, fmt.Errorf("unsupported element type: %s", elem.Kind())
|
||||
}
|
||||
@@ -199,6 +539,27 @@ func minFromMap(v reflect.Value) (interface{}, error) {
|
||||
return minVal.Interface(), nil
|
||||
}
|
||||
|
||||
func isNumericKind(k reflect.Kind) bool {
|
||||
switch k {
|
||||
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
|
||||
reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64,
|
||||
reflect.Float32, reflect.Float64:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// unwrapInterface returns v.Elem() when v holds an interface value, otherwise v unchanged.
|
||||
// This is necessary when iterating map[string]interface{} or []interface{} via reflection:
|
||||
// the element Kind is reflect.Interface, not the underlying concrete type.
|
||||
func unwrapInterface(v reflect.Value) reflect.Value {
|
||||
if v.Kind() == reflect.Interface {
|
||||
return v.Elem()
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
// isOrderedKind reports whether k supports comparison ordering.
|
||||
func isOrderedKind(k reflect.Kind) bool {
|
||||
switch k {
|
||||
|
||||
+1
-1
@@ -1 +1 @@
|
||||
4.4.42
|
||||
4.4.44
|
||||
|
||||
Reference in New Issue
Block a user