update the geometric obitag

Former-commit-id: acd8fe1c8c1cf443098432d818397b0b5d02df33
This commit is contained in:
2024-01-17 23:38:51 +01:00
parent 13cfebd737
commit f2f7b4574e
5 changed files with 36 additions and 25 deletions

1
.gitignore vendored
View File

@ -116,3 +116,4 @@ doc/book/wolf_data/Release-253/ncbitaxo/names.dmp
doc/book/wolf_data/Release-253/ncbitaxo/nodes.dmp doc/book/wolf_data/Release-253/ncbitaxo/nodes.dmp
doc/book/wolf_data/Release-253/ncbitaxo/readme.txt doc/book/wolf_data/Release-253/ncbitaxo/readme.txt
doc/book/results/toto.tasta doc/book/results/toto.tasta
sample/.DS_Store

View File

@ -15,14 +15,14 @@ import (
// SquareDist calculates the squared Euclidean distance between // SquareDist calculates the squared Euclidean distance between
// two vectors 'a' and 'b'. // two vectors 'a' and 'b'.
// //
// 'a' and 'b' are slices of float64 values representing // 'a' and 'b' are slices of float64 or int values representing
// coordinate points in space. It is assumed that both slices // coordinate points in space. It is assumed that both slices
// have the same length. // have the same length.
// Returns the calculated squared distance as a float64. // Returns the calculated squared distance as a float64.
func SquareDist[T float64 | int](a, b []T) float64 { func SquareDist[T float64 | int](a, b []T) T {
sum := 0.0 sum := T(0)
for i, v := range a { for i, v := range a {
diff := float64(v - b[i]) diff := v - b[i]
sum += diff * diff sum += diff * diff
} }
return sum return sum
@ -35,7 +35,7 @@ func SquareDist[T float64 | int](a, b []T) float64 {
// is paired with the corresponding element of `b`. // is paired with the corresponding element of `b`.
// Returns the squared sum of the differences. // Returns the squared sum of the differences.
func EuclideanDist[T float64 | int](a, b []T) float64 { func EuclideanDist[T float64 | int](a, b []T) float64 {
return math.Sqrt(SquareDist(a, b)) return math.Sqrt(float64(SquareDist(a, b)))
} }
// DefaultRG creates and returns a new instance of *rand.Rand. // DefaultRG creates and returns a new instance of *rand.Rand.

View File

@ -6,6 +6,7 @@ import (
"sort" "sort"
"sync" "sync"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obistats" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obistats"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
@ -24,26 +25,36 @@ func GeomIndexSesquence(seqidx int,
log.Fatalf("Sequence %s does not have a coordinate", sequence.Id()) log.Fatalf("Sequence %s does not have a coordinate", sequence.Id())
} }
seq_dist := make([]float64, len(references)) seq_dist := make([]int, len(references))
var wg sync.WaitGroup var wg sync.WaitGroup
for i, ref := range references { iseq_channel := make(chan int)
wg.Add(1)
go func(i int, ref *obiseq.BioSequence) {
defer wg.Done()
reflocation := ref.GetCoordinate()
if reflocation == nil {
log.Fatalf("Sequence %s does not have a coordinate", ref.Id())
}
seq_dist[i] = obistats.SquareDist(location, reflocation) for k := 0; k < obioptions.CLIParallelWorkers(); k++ {
}(i, ref) wg.Add(1)
go func() {
defer wg.Done()
for i := range iseq_channel {
ref := references[i]
reflocation := ref.GetCoordinate()
if reflocation == nil {
log.Fatalf("Sequence %s does not have a coordinate", ref.Id())
}
seq_dist[i] = obistats.SquareDist(location, reflocation)
}
}()
} }
for i := range references {
iseq_channel <- i
}
close(iseq_channel)
wg.Wait() wg.Wait()
order := obiutils.Order(sort.Float64Slice(seq_dist)) order := obiutils.Order(sort.IntSlice(seq_dist))
lca := (*taxa)[seqidx] lca := (*taxa)[seqidx]

View File

@ -1,7 +1,7 @@
package obitag package obitag
import ( import (
"log" log "github.com/sirupsen/logrus"
"math" "math"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obialign" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obialign"
@ -99,9 +99,9 @@ func MapOnLandmarkSequences(sequence *obiseq.BioSequence, landmarks *obiseq.BioS
func FindGeomClosest(sequence *obiseq.BioSequence, func FindGeomClosest(sequence *obiseq.BioSequence,
landmarks *obiseq.BioSequenceSlice, landmarks *obiseq.BioSequenceSlice,
references *obiseq.BioSequenceSlice, references *obiseq.BioSequenceSlice,
buffer *[]uint64) (*obiseq.BioSequence, float64, float64, []int, *obiseq.BioSequenceSlice) { buffer *[]uint64) (*obiseq.BioSequence, int, float64, []int, *obiseq.BioSequenceSlice) {
min_dist := math.MaxFloat64 min_dist := math.MaxInt
min_idx := make([]int, 0) min_idx := make([]int, 0)
query_location := MapOnLandmarkSequences(sequence, landmarks, buffer) query_location := MapOnLandmarkSequences(sequence, landmarks, buffer)
@ -129,7 +129,7 @@ func FindGeomClosest(sequence *obiseq.BioSequence,
for _, i := range min_idx { for _, i := range min_idx {
seq := (*references)[i] seq := (*references)[i]
lcs, length := obialign.FastLCSEGFScore(sequence, seq, -1, buffer) lcs, length := obialign.FastLCSScore(sequence, seq, -1, buffer)
ident := float64(lcs) / float64(length) ident := float64(lcs) / float64(length)
if ident > best_id { if ident > best_id {
best_id = ident best_id = ident

View File

@ -29,20 +29,19 @@ import (
// - taxid: The taxid associated with the matched distance. // - taxid: The taxid associated with the matched distance.
// - rank: The rank associated with the matched distance. // - rank: The rank associated with the matched distance.
// - scientificName: The scientific name associated with the matched distance. // - scientificName: The scientific name associated with the matched distance.
func MatchDistanceIndex(distance float64, distanceIdx map[int]string) (int, string, string) { func MatchDistanceIndex(distance int, distanceIdx map[int]string) (int, string, string) {
idist := int(distance)
keys := maps.Keys(distanceIdx) keys := maps.Keys(distanceIdx)
slices.Sort(keys) slices.Sort(keys)
i := sort.Search(len(keys), func(i int) bool { i := sort.Search(len(keys), func(i int) bool {
return idist <= keys[i] return distance <= keys[i]
}) })
var taxid int var taxid int
var rank string var rank string
var scientificName string var scientificName string
if i == len(keys) || idist > keys[len(keys)-1] { if i == len(keys) || distance > keys[len(keys)-1] {
taxid = 1 taxid = 1
rank = "no rank" rank = "no rank"
scientificName = "root" scientificName = "root"