mirror of
https://github.com/metabarcoding/obitools4.git
synced 2026-03-25 21:40:52 +00:00
273 lines
6.9 KiB
Go
273 lines
6.9 KiB
Go
|
|
package obidist
|
|||
|
|
|
|||
|
|
import (
|
|||
|
|
"fmt"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
// DistMatrix represents a symmetric matrix stored as a triangular matrix.
|
|||
|
|
// The diagonal has a constant value (typically 0 for distances, 1 for similarities).
|
|||
|
|
// Only the upper triangle (i < j) is stored to save memory.
|
|||
|
|
//
|
|||
|
|
// For an n×n matrix, we store n(n-1)/2 values.
|
|||
|
|
type DistMatrix struct {
|
|||
|
|
n int // Number of elements (matrix dimension)
|
|||
|
|
data []float64 // Triangular storage: upper triangle only
|
|||
|
|
labels []string // Optional labels for rows/columns
|
|||
|
|
diagonalValue float64 // Value on the diagonal
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// NewDistMatrix creates a new distance matrix of size n×n.
|
|||
|
|
// All distances are initialized to 0.0, diagonal is 0.0.
|
|||
|
|
func NewDistMatrix(n int) *DistMatrix {
|
|||
|
|
if n < 0 {
|
|||
|
|
panic("matrix size must be non-negative")
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Number of elements in upper triangle: n(n-1)/2
|
|||
|
|
size := n * (n - 1) / 2
|
|||
|
|
|
|||
|
|
return &DistMatrix{
|
|||
|
|
n: n,
|
|||
|
|
data: make([]float64, size),
|
|||
|
|
labels: make([]string, n),
|
|||
|
|
diagonalValue: 0.0,
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// NewDistMatrixWithLabels creates a new distance matrix with labels.
|
|||
|
|
// Diagonal is 0.0 by default.
|
|||
|
|
func NewDistMatrixWithLabels(labels []string) *DistMatrix {
|
|||
|
|
dm := NewDistMatrix(len(labels))
|
|||
|
|
copy(dm.labels, labels)
|
|||
|
|
return dm
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// NewSimilarityMatrix creates a new similarity matrix of size n×n.
|
|||
|
|
// All off-diagonal values are initialized to 0.0, diagonal is 1.0.
|
|||
|
|
func NewSimilarityMatrix(n int) *DistMatrix {
|
|||
|
|
if n < 0 {
|
|||
|
|
panic("matrix size must be non-negative")
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Number of elements in upper triangle: n(n-1)/2
|
|||
|
|
size := n * (n - 1) / 2
|
|||
|
|
|
|||
|
|
return &DistMatrix{
|
|||
|
|
n: n,
|
|||
|
|
data: make([]float64, size),
|
|||
|
|
labels: make([]string, n),
|
|||
|
|
diagonalValue: 1.0,
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// NewSimilarityMatrixWithLabels creates a new similarity matrix with labels.
|
|||
|
|
// Diagonal is 1.0.
|
|||
|
|
func NewSimilarityMatrixWithLabels(labels []string) *DistMatrix {
|
|||
|
|
dm := NewSimilarityMatrix(len(labels))
|
|||
|
|
copy(dm.labels, labels)
|
|||
|
|
return dm
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Size returns the dimension of the matrix (n for an n×n matrix).
|
|||
|
|
func (dm *DistMatrix) Size() int {
|
|||
|
|
return dm.n
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// indexFor computes the index in the data array for element (i, j).
|
|||
|
|
// Assumes i < j (upper triangle).
|
|||
|
|
//
|
|||
|
|
// The upper triangle is stored row by row:
|
|||
|
|
// (0,1), (0,2), ..., (0,n-1), (1,2), (1,3), ..., (1,n-1), (2,3), ...
|
|||
|
|
//
|
|||
|
|
// For element (i, j) where i < j:
|
|||
|
|
// index = i*(n-1) + j - 1 - i*(i+1)/2
|
|||
|
|
//
|
|||
|
|
// This can be simplified to:
|
|||
|
|
// index = i*n - i*(i+1)/2 + j - i - 1
|
|||
|
|
// = i*(n - (i+1)/2 - 1) + j - 1
|
|||
|
|
// = i*(n - 1 - i/2 - 1/2) + j - 1
|
|||
|
|
//
|
|||
|
|
// But the clearest formula is:
|
|||
|
|
// index = i*n - i*(i+3)/2 + j - 1
|
|||
|
|
func (dm *DistMatrix) indexFor(i, j int) int {
|
|||
|
|
if i >= j {
|
|||
|
|
panic(fmt.Sprintf("indexFor expects i < j, got i=%d, j=%d", i, j))
|
|||
|
|
}
|
|||
|
|
// Formula: number of elements in previous rows + position in current row
|
|||
|
|
// Previous rows (0 to i-1): sum from k=0 to i-1 of (n-1-k) = i*n - i*(i+1)/2
|
|||
|
|
// Current row position: j - i - 1
|
|||
|
|
return i*dm.n - i*(i+1)/2 + j - i - 1
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Get returns the value at position (i, j).
|
|||
|
|
// The matrix is symmetric, so Get(i, j) == Get(j, i).
|
|||
|
|
// The diagonal returns the diagonalValue (0.0 for distances, 1.0 for similarities).
|
|||
|
|
func (dm *DistMatrix) Get(i, j int) float64 {
|
|||
|
|
if i < 0 || i >= dm.n || j < 0 || j >= dm.n {
|
|||
|
|
panic(fmt.Sprintf("indices out of bounds: i=%d, j=%d, n=%d", i, j, dm.n))
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Diagonal: return the diagonal value
|
|||
|
|
if i == j {
|
|||
|
|
return dm.diagonalValue
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Ensure i < j for indexing
|
|||
|
|
if i > j {
|
|||
|
|
i, j = j, i
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return dm.data[dm.indexFor(i, j)]
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Set sets the value at position (i, j).
|
|||
|
|
// The matrix is symmetric, so Set(i, j, v) also sets (j, i) to v.
|
|||
|
|
// Setting the diagonal (i == j) is ignored (diagonal has a fixed value).
|
|||
|
|
func (dm *DistMatrix) Set(i, j int, value float64) {
|
|||
|
|
if i < 0 || i >= dm.n || j < 0 || j >= dm.n {
|
|||
|
|
panic(fmt.Sprintf("indices out of bounds: i=%d, j=%d, n=%d", i, j, dm.n))
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Ignore diagonal assignments (diagonal has a fixed value)
|
|||
|
|
if i == j {
|
|||
|
|
return
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Ensure i < j for indexing
|
|||
|
|
if i > j {
|
|||
|
|
i, j = j, i
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
dm.data[dm.indexFor(i, j)] = value
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// GetLabel returns the label for element i.
|
|||
|
|
func (dm *DistMatrix) GetLabel(i int) string {
|
|||
|
|
if i < 0 || i >= dm.n {
|
|||
|
|
panic(fmt.Sprintf("index out of bounds: i=%d, n=%d", i, dm.n))
|
|||
|
|
}
|
|||
|
|
return dm.labels[i]
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// SetLabel sets the label for element i.
|
|||
|
|
func (dm *DistMatrix) SetLabel(i int, label string) {
|
|||
|
|
if i < 0 || i >= dm.n {
|
|||
|
|
panic(fmt.Sprintf("index out of bounds: i=%d, n=%d", i, dm.n))
|
|||
|
|
}
|
|||
|
|
dm.labels[i] = label
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Labels returns a copy of all labels.
|
|||
|
|
func (dm *DistMatrix) Labels() []string {
|
|||
|
|
labels := make([]string, dm.n)
|
|||
|
|
copy(labels, dm.labels)
|
|||
|
|
return labels
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// GetRow returns the i-th row of the distance matrix.
|
|||
|
|
// The returned slice is a copy.
|
|||
|
|
func (dm *DistMatrix) GetRow(i int) []float64 {
|
|||
|
|
if i < 0 || i >= dm.n {
|
|||
|
|
panic(fmt.Sprintf("index out of bounds: i=%d, n=%d", i, dm.n))
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
row := make([]float64, dm.n)
|
|||
|
|
for j := 0; j < dm.n; j++ {
|
|||
|
|
row[j] = dm.Get(i, j)
|
|||
|
|
}
|
|||
|
|
return row
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// GetColumn returns the j-th column of the distance matrix.
|
|||
|
|
// Since the matrix is symmetric, GetColumn(j) == GetRow(j).
|
|||
|
|
// The returned slice is a copy.
|
|||
|
|
func (dm *DistMatrix) GetColumn(j int) []float64 {
|
|||
|
|
return dm.GetRow(j)
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// MinDistance returns the minimum non-zero distance in the matrix,
|
|||
|
|
// along with the indices (i, j) where it occurs.
|
|||
|
|
// Returns (0.0, -1, -1) if the matrix is empty or all distances are 0.
|
|||
|
|
func (dm *DistMatrix) MinDistance() (float64, int, int) {
|
|||
|
|
if dm.n <= 1 {
|
|||
|
|
return 0.0, -1, -1
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
minDist := -1.0
|
|||
|
|
minI, minJ := -1, -1
|
|||
|
|
|
|||
|
|
for i := 0; i < dm.n-1; i++ {
|
|||
|
|
for j := i + 1; j < dm.n; j++ {
|
|||
|
|
dist := dm.Get(i, j)
|
|||
|
|
if minDist < 0 || dist < minDist {
|
|||
|
|
minDist = dist
|
|||
|
|
minI = i
|
|||
|
|
minJ = j
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if minI < 0 {
|
|||
|
|
return 0.0, -1, -1
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return minDist, minI, minJ
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// MaxDistance returns the maximum distance in the matrix,
|
|||
|
|
// along with the indices (i, j) where it occurs.
|
|||
|
|
// Returns (0.0, -1, -1) if the matrix is empty.
|
|||
|
|
func (dm *DistMatrix) MaxDistance() (float64, int, int) {
|
|||
|
|
if dm.n <= 1 {
|
|||
|
|
return 0.0, -1, -1
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
maxDist := -1.0
|
|||
|
|
maxI, maxJ := -1, -1
|
|||
|
|
|
|||
|
|
for i := 0; i < dm.n-1; i++ {
|
|||
|
|
for j := i + 1; j < dm.n; j++ {
|
|||
|
|
dist := dm.Get(i, j)
|
|||
|
|
if maxDist < 0 || dist > maxDist {
|
|||
|
|
maxDist = dist
|
|||
|
|
maxI = i
|
|||
|
|
maxJ = j
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if maxI < 0 {
|
|||
|
|
return 0.0, -1, -1
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return maxDist, maxI, maxJ
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Copy creates a deep copy of the matrix.
|
|||
|
|
func (dm *DistMatrix) Copy() *DistMatrix {
|
|||
|
|
newDM := &DistMatrix{
|
|||
|
|
n: dm.n,
|
|||
|
|
data: make([]float64, len(dm.data)),
|
|||
|
|
labels: make([]string, dm.n),
|
|||
|
|
diagonalValue: dm.diagonalValue,
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
copy(newDM.data, dm.data)
|
|||
|
|
copy(newDM.labels, dm.labels)
|
|||
|
|
|
|||
|
|
return newDM
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// ToFullMatrix returns a full n×n matrix representation.
|
|||
|
|
// This allocates n² values, so use only when needed.
|
|||
|
|
func (dm *DistMatrix) ToFullMatrix() [][]float64 {
|
|||
|
|
matrix := make([][]float64, dm.n)
|
|||
|
|
for i := 0; i < dm.n; i++ {
|
|||
|
|
matrix[i] = make([]float64, dm.n)
|
|||
|
|
for j := 0; j < dm.n; j++ {
|
|||
|
|
matrix[i][j] = dm.Get(i, j)
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
return matrix
|
|||
|
|
}
|