update the geometric obitag

Former-commit-id: acd8fe1c8c1cf443098432d818397b0b5d02df33
This commit is contained in:
2024-01-17 23:38:51 +01:00
parent 13cfebd737
commit f2f7b4574e
5 changed files with 36 additions and 25 deletions

1
.gitignore vendored
View File

@ -116,3 +116,4 @@ doc/book/wolf_data/Release-253/ncbitaxo/names.dmp
doc/book/wolf_data/Release-253/ncbitaxo/nodes.dmp
doc/book/wolf_data/Release-253/ncbitaxo/readme.txt
doc/book/results/toto.tasta
sample/.DS_Store

View File

@ -15,14 +15,14 @@ import (
// SquareDist calculates the squared Euclidean distance between
// two vectors 'a' and 'b'.
//
// 'a' and 'b' are slices of float64 values representing
// 'a' and 'b' are slices of float64 or int values representing
// coordinate points in space. It is assumed that both slices
// have the same length.
// Returns the calculated squared distance as a float64.
func SquareDist[T float64 | int](a, b []T) float64 {
sum := 0.0
func SquareDist[T float64 | int](a, b []T) T {
sum := T(0)
for i, v := range a {
diff := float64(v - b[i])
diff := v - b[i]
sum += diff * diff
}
return sum
@ -35,7 +35,7 @@ func SquareDist[T float64 | int](a, b []T) float64 {
// is paired with the corresponding element of `b`.
// Returns the squared sum of the differences.
func EuclideanDist[T float64 | int](a, b []T) float64 {
return math.Sqrt(SquareDist(a, b))
return math.Sqrt(float64(SquareDist(a, b)))
}
// DefaultRG creates and returns a new instance of *rand.Rand.

View File

@ -6,6 +6,7 @@ import (
"sort"
"sync"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obistats"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
@ -24,26 +25,36 @@ func GeomIndexSesquence(seqidx int,
log.Fatalf("Sequence %s does not have a coordinate", sequence.Id())
}
seq_dist := make([]float64, len(references))
seq_dist := make([]int, len(references))
var wg sync.WaitGroup
for i, ref := range references {
wg.Add(1)
go func(i int, ref *obiseq.BioSequence) {
defer wg.Done()
reflocation := ref.GetCoordinate()
if reflocation == nil {
log.Fatalf("Sequence %s does not have a coordinate", ref.Id())
}
iseq_channel := make(chan int)
seq_dist[i] = obistats.SquareDist(location, reflocation)
}(i, ref)
for k := 0; k < obioptions.CLIParallelWorkers(); k++ {
wg.Add(1)
go func() {
defer wg.Done()
for i := range iseq_channel {
ref := references[i]
reflocation := ref.GetCoordinate()
if reflocation == nil {
log.Fatalf("Sequence %s does not have a coordinate", ref.Id())
}
seq_dist[i] = obistats.SquareDist(location, reflocation)
}
}()
}
for i := range references {
iseq_channel <- i
}
close(iseq_channel)
wg.Wait()
order := obiutils.Order(sort.Float64Slice(seq_dist))
order := obiutils.Order(sort.IntSlice(seq_dist))
lca := (*taxa)[seqidx]

View File

@ -1,7 +1,7 @@
package obitag
import (
"log"
log "github.com/sirupsen/logrus"
"math"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obialign"
@ -99,9 +99,9 @@ func MapOnLandmarkSequences(sequence *obiseq.BioSequence, landmarks *obiseq.BioS
func FindGeomClosest(sequence *obiseq.BioSequence,
landmarks *obiseq.BioSequenceSlice,
references *obiseq.BioSequenceSlice,
buffer *[]uint64) (*obiseq.BioSequence, float64, float64, []int, *obiseq.BioSequenceSlice) {
buffer *[]uint64) (*obiseq.BioSequence, int, float64, []int, *obiseq.BioSequenceSlice) {
min_dist := math.MaxFloat64
min_dist := math.MaxInt
min_idx := make([]int, 0)
query_location := MapOnLandmarkSequences(sequence, landmarks, buffer)
@ -129,7 +129,7 @@ func FindGeomClosest(sequence *obiseq.BioSequence,
for _, i := range min_idx {
seq := (*references)[i]
lcs, length := obialign.FastLCSEGFScore(sequence, seq, -1, buffer)
lcs, length := obialign.FastLCSScore(sequence, seq, -1, buffer)
ident := float64(lcs) / float64(length)
if ident > best_id {
best_id = ident

View File

@ -29,20 +29,19 @@ import (
// - taxid: The taxid associated with the matched distance.
// - rank: The rank associated with the matched distance.
// - scientificName: The scientific name associated with the matched distance.
func MatchDistanceIndex(distance float64, distanceIdx map[int]string) (int, string, string) {
idist := int(distance)
func MatchDistanceIndex(distance int, distanceIdx map[int]string) (int, string, string) {
keys := maps.Keys(distanceIdx)
slices.Sort(keys)
i := sort.Search(len(keys), func(i int) bool {
return idist <= keys[i]
return distance <= keys[i]
})
var taxid int
var rank string
var scientificName string
if i == len(keys) || idist > keys[len(keys)-1] {
if i == len(keys) || distance > keys[len(keys)-1] {
taxid = 1
rank = "no rank"
scientificName = "root"