Small change to increase efficiency (a little)

Former-commit-id: fa54c9bf82b8ba60653255ab2a5f671619dcfdcb
This commit is contained in:
2023-05-05 13:33:15 +02:00
parent 3778ae9266
commit e9574bae07
2 changed files with 44 additions and 35 deletions

View File

@ -48,22 +48,29 @@ func IndexSequence(seqidx int,
obiutils.Reverse(*pseq, true) obiutils.Reverse(*pseq, true)
// score := make([]int, len(references)) // score := make([]int, len(references))
mindiff := make([]int, len(*pseq)) mindiff := make([]int, len(*pseq))
/* nseq := make([]int, len(*pseq)) /*
nali := make([]int, len(*pseq)) nseq := make([]int, len(*pseq))
nok := make([]int, len(*pseq)) nali := make([]int, len(*pseq))
nfast := make([]int, len(*pseq)) nok := make([]int, len(*pseq))
nfastok := make([]int, len(*pseq)) nfast := make([]int, len(*pseq))
*/ lseq := sequence.Len() nfastok := make([]int, len(*pseq))
*/lseq := sequence.Len()
mini := -1 mini := -1
wordmin := 0
for i, ancestor := range *pseq { for i, ancestor := range *pseq {
for _, order := range ow { for _, order := range ow {
if lca[order] == ancestor { if lca[order] == ancestor {
// nseq[i]++ // nseq[i]++
wordmin := 0
if mini != -1 { if mini != -1 {
wordmin = obiutils.MaxInt(lseq-3-mini*4, 0) wordmin = obiutils.MaxInt(sequence.Len(), references[order].Len()) - 3 - 4*mini
} }
if cw[order] < wordmin {
break
}
lcs, alilength := -1, -1 lcs, alilength := -1, -1
errs := int(1e9) errs := int(1e9)
if mini != -1 && mini <= 1 { if mini != -1 && mini <= 1 {
@ -74,14 +81,13 @@ func IndexSequence(seqidx int,
// nfastok[i]++ // nfastok[i]++
} }
} else { } else {
if cw[order] >= wordmin { // nali[i]++
// nali[i]++ lcs, alilength = obialign.FastLCSScore(sequence, references[order], mini, &matrix)
lcs, alilength = obialign.FastLCSScore(sequence, references[order], mini, &matrix) if lcs >= 0 {
if lcs >= 0 { // nok[i]++
// nok[i]++ errs = alilength - lcs
errs = alilength - lcs
}
} }
} }
if mini == -1 || errs < mini { if mini == -1 || errs < mini {
mini = errs mini = errs
@ -106,13 +112,14 @@ func IndexSequence(seqidx int,
} }
} }
/* log.Println(sequence.Id(), tseq.Taxid(), tseq.ScientificName(), tseq.Rank(), obitag_index) /*
log.Println(sequence.Id(), tseq.Taxid(), tseq.ScientificName(), tseq.Rank(), nseq) log.Println(sequence.Id(), tseq.Taxid(), tseq.ScientificName(), tseq.Rank(), obitag_index)
log.Println(sequence.Id(), tseq.Taxid(), tseq.ScientificName(), tseq.Rank(), nfast) log.Println(sequence.Id(), tseq.Taxid(), tseq.ScientificName(), tseq.Rank(), nseq)
log.Println(sequence.Id(), tseq.Taxid(), tseq.ScientificName(), tseq.Rank(), nfastok) log.Println(sequence.Id(), tseq.Taxid(), tseq.ScientificName(), tseq.Rank(), nfast)
log.Println(sequence.Id(), tseq.Taxid(), tseq.ScientificName(), tseq.Rank(), nali) log.Println(sequence.Id(), tseq.Taxid(), tseq.ScientificName(), tseq.Rank(), nfastok)
log.Println(sequence.Id(), tseq.Taxid(), tseq.ScientificName(), tseq.Rank(), nok) log.Println(sequence.Id(), tseq.Taxid(), tseq.ScientificName(), tseq.Rank(), nali)
*/ return obitag_index log.Println(sequence.Id(), tseq.Taxid(), tseq.ScientificName(), tseq.Rank(), nok)
*/return obitag_index
} }
func IndexReferenceDB(iterator obiiter.IBioSequence) obiiter.IBioSequence { func IndexReferenceDB(iterator obiiter.IBioSequence) obiiter.IBioSequence {

View File

@ -34,9 +34,9 @@ func FindClosests(sequence *obiseq.BioSequence,
o := obiutils.Reverse(obiutils.IntOrder(cw), true) o := obiutils.Reverse(obiutils.IntOrder(cw), true)
bests := obiseq.MakeBioSequenceSlice() bests := obiseq.MakeBioSequenceSlice()
// bests = append(bests, references[o[0]]) // bests = append(bests, references[o[0]])
bestidxs := make([]int, 0) bestidxs := make([]int, 0)
// bestidxs = append(bestidxs, o[0]) // bestidxs = append(bestidxs, o[0])
bestId := 0.0 bestId := 0.0
bestmatch := references[o[0]].Id() bestmatch := references[o[0]].Id()
@ -45,13 +45,17 @@ func FindClosests(sequence *obiseq.BioSequence,
for _, order := range o { for _, order := range o {
ref := references[order] ref := references[order]
score := int(1e9)
if maxe != -1 { if maxe != -1 {
wordmin = obiutils.MaxInt(sequence.Len(), ref.Len()) - 4*maxe wordmin = obiutils.MaxInt(sequence.Len(), ref.Len()) - 3 - 4*maxe
}
if cw[order] < wordmin {
break
} }
lcs, alilength := -1, -1 lcs, alilength := -1, -1
score := int(1e9)
if maxe == 0 || maxe == 1 { if maxe == 0 || maxe == 1 {
d, _, _, _ := obialign.D1Or0(sequence, references[order]) d, _, _, _ := obialign.D1Or0(sequence, references[order])
if d >= 0 { if d >= 0 {
@ -60,11 +64,9 @@ func FindClosests(sequence *obiseq.BioSequence,
lcs = alilength - score lcs = alilength - score
} }
} else { } else {
if cw[order] >= wordmin { lcs, alilength = obialign.FastLCSScore(sequence, references[order], maxe, &matrix)
lcs, alilength = obialign.FastLCSScore(sequence, references[order], maxe, &matrix) if lcs >= 0 {
if lcs >= 0 { score = alilength - lcs
score = alilength - lcs
}
} }
} }
@ -73,7 +75,7 @@ func FindClosests(sequence *obiseq.BioSequence,
bestidxs = bestidxs[:0] bestidxs = bestidxs[:0]
maxe = score maxe = score
bestId = float64(lcs) / float64(alilength) bestId = float64(lcs) / float64(alilength)
// log.Println(ref.Id(), maxe, bestId,bestidxs) // log.Println(ref.Id(), maxe, bestId,bestidxs)
} }
if score == maxe { if score == maxe {
@ -84,12 +86,12 @@ func FindClosests(sequence *obiseq.BioSequence,
bestId = id bestId = id
bestmatch = ref.Id() bestmatch = ref.Id()
} }
// log.Println(ref.Id(), maxe, bestId,bestidxs) // log.Println(ref.Id(), maxe, bestId,bestidxs)
} }
} }
//log.Println("that's all falks", maxe, bestId, bestidxs) //log.Println("that's all falks", maxe, bestId, bestidxs)
return bests, maxe, bestId, bestmatch, bestidxs return bests, maxe, bestId, bestmatch, bestidxs
} }