mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
update the geometric obitag
Former-commit-id: acd8fe1c8c1cf443098432d818397b0b5d02df33
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@ -116,3 +116,4 @@ doc/book/wolf_data/Release-253/ncbitaxo/names.dmp
|
||||
doc/book/wolf_data/Release-253/ncbitaxo/nodes.dmp
|
||||
doc/book/wolf_data/Release-253/ncbitaxo/readme.txt
|
||||
doc/book/results/toto.tasta
|
||||
sample/.DS_Store
|
||||
|
@ -15,14 +15,14 @@ import (
|
||||
// SquareDist calculates the squared Euclidean distance between
|
||||
// two vectors 'a' and 'b'.
|
||||
//
|
||||
// 'a' and 'b' are slices of float64 values representing
|
||||
// 'a' and 'b' are slices of float64 or int values representing
|
||||
// coordinate points in space. It is assumed that both slices
|
||||
// have the same length.
|
||||
// Returns the calculated squared distance as a float64.
|
||||
func SquareDist[T float64 | int](a, b []T) float64 {
|
||||
sum := 0.0
|
||||
func SquareDist[T float64 | int](a, b []T) T {
|
||||
sum := T(0)
|
||||
for i, v := range a {
|
||||
diff := float64(v - b[i])
|
||||
diff := v - b[i]
|
||||
sum += diff * diff
|
||||
}
|
||||
return sum
|
||||
@ -35,7 +35,7 @@ func SquareDist[T float64 | int](a, b []T) float64 {
|
||||
// is paired with the corresponding element of `b`.
|
||||
// Returns the squared sum of the differences.
|
||||
func EuclideanDist[T float64 | int](a, b []T) float64 {
|
||||
return math.Sqrt(SquareDist(a, b))
|
||||
return math.Sqrt(float64(SquareDist(a, b)))
|
||||
}
|
||||
|
||||
// DefaultRG creates and returns a new instance of *rand.Rand.
|
||||
|
@ -6,6 +6,7 @@ import (
|
||||
"sort"
|
||||
"sync"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obistats"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||
@ -24,26 +25,36 @@ func GeomIndexSesquence(seqidx int,
|
||||
log.Fatalf("Sequence %s does not have a coordinate", sequence.Id())
|
||||
}
|
||||
|
||||
seq_dist := make([]float64, len(references))
|
||||
seq_dist := make([]int, len(references))
|
||||
|
||||
var wg sync.WaitGroup
|
||||
|
||||
for i, ref := range references {
|
||||
wg.Add(1)
|
||||
go func(i int, ref *obiseq.BioSequence) {
|
||||
defer wg.Done()
|
||||
reflocation := ref.GetCoordinate()
|
||||
if reflocation == nil {
|
||||
log.Fatalf("Sequence %s does not have a coordinate", ref.Id())
|
||||
}
|
||||
iseq_channel := make(chan int)
|
||||
|
||||
seq_dist[i] = obistats.SquareDist(location, reflocation)
|
||||
}(i, ref)
|
||||
for k := 0; k < obioptions.CLIParallelWorkers(); k++ {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for i := range iseq_channel {
|
||||
ref := references[i]
|
||||
reflocation := ref.GetCoordinate()
|
||||
if reflocation == nil {
|
||||
log.Fatalf("Sequence %s does not have a coordinate", ref.Id())
|
||||
}
|
||||
|
||||
seq_dist[i] = obistats.SquareDist(location, reflocation)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
for i := range references {
|
||||
iseq_channel <- i
|
||||
}
|
||||
|
||||
close(iseq_channel)
|
||||
wg.Wait()
|
||||
|
||||
order := obiutils.Order(sort.Float64Slice(seq_dist))
|
||||
order := obiutils.Order(sort.IntSlice(seq_dist))
|
||||
|
||||
lca := (*taxa)[seqidx]
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
package obitag
|
||||
|
||||
import (
|
||||
"log"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"math"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obialign"
|
||||
@ -99,9 +99,9 @@ func MapOnLandmarkSequences(sequence *obiseq.BioSequence, landmarks *obiseq.BioS
|
||||
func FindGeomClosest(sequence *obiseq.BioSequence,
|
||||
landmarks *obiseq.BioSequenceSlice,
|
||||
references *obiseq.BioSequenceSlice,
|
||||
buffer *[]uint64) (*obiseq.BioSequence, float64, float64, []int, *obiseq.BioSequenceSlice) {
|
||||
buffer *[]uint64) (*obiseq.BioSequence, int, float64, []int, *obiseq.BioSequenceSlice) {
|
||||
|
||||
min_dist := math.MaxFloat64
|
||||
min_dist := math.MaxInt
|
||||
min_idx := make([]int, 0)
|
||||
|
||||
query_location := MapOnLandmarkSequences(sequence, landmarks, buffer)
|
||||
@ -129,7 +129,7 @@ func FindGeomClosest(sequence *obiseq.BioSequence,
|
||||
|
||||
for _, i := range min_idx {
|
||||
seq := (*references)[i]
|
||||
lcs, length := obialign.FastLCSEGFScore(sequence, seq, -1, buffer)
|
||||
lcs, length := obialign.FastLCSScore(sequence, seq, -1, buffer)
|
||||
ident := float64(lcs) / float64(length)
|
||||
if ident > best_id {
|
||||
best_id = ident
|
||||
|
@ -29,20 +29,19 @@ import (
|
||||
// - taxid: The taxid associated with the matched distance.
|
||||
// - rank: The rank associated with the matched distance.
|
||||
// - scientificName: The scientific name associated with the matched distance.
|
||||
func MatchDistanceIndex(distance float64, distanceIdx map[int]string) (int, string, string) {
|
||||
idist := int(distance)
|
||||
func MatchDistanceIndex(distance int, distanceIdx map[int]string) (int, string, string) {
|
||||
keys := maps.Keys(distanceIdx)
|
||||
slices.Sort(keys)
|
||||
|
||||
i := sort.Search(len(keys), func(i int) bool {
|
||||
return idist <= keys[i]
|
||||
return distance <= keys[i]
|
||||
})
|
||||
|
||||
var taxid int
|
||||
var rank string
|
||||
var scientificName string
|
||||
|
||||
if i == len(keys) || idist > keys[len(keys)-1] {
|
||||
if i == len(keys) || distance > keys[len(keys)-1] {
|
||||
taxid = 1
|
||||
rank = "no rank"
|
||||
scientificName = "root"
|
||||
|
Reference in New Issue
Block a user