diff --git a/pkg/obitools/obirefidx/obirefidx.go b/pkg/obitools/obirefidx/obirefidx.go index e3e7dde..d3dfc05 100644 --- a/pkg/obitools/obirefidx/obirefidx.go +++ b/pkg/obitools/obirefidx/obirefidx.go @@ -48,22 +48,29 @@ func IndexSequence(seqidx int, obiutils.Reverse(*pseq, true) // score := make([]int, len(references)) mindiff := make([]int, len(*pseq)) -/* nseq := make([]int, len(*pseq)) - nali := make([]int, len(*pseq)) - nok := make([]int, len(*pseq)) - nfast := make([]int, len(*pseq)) - nfastok := make([]int, len(*pseq)) - */ lseq := sequence.Len() + /* + nseq := make([]int, len(*pseq)) + nali := make([]int, len(*pseq)) + nok := make([]int, len(*pseq)) + nfast := make([]int, len(*pseq)) + nfastok := make([]int, len(*pseq)) + */lseq := sequence.Len() mini := -1 + wordmin := 0 + for i, ancestor := range *pseq { for _, order := range ow { if lca[order] == ancestor { // nseq[i]++ - wordmin := 0 if mini != -1 { - wordmin = obiutils.MaxInt(lseq-3-mini*4, 0) + wordmin = obiutils.MaxInt(sequence.Len(), references[order].Len()) - 3 - 4*mini } + + if cw[order] < wordmin { + break + } + lcs, alilength := -1, -1 errs := int(1e9) if mini != -1 && mini <= 1 { @@ -74,14 +81,13 @@ func IndexSequence(seqidx int, // nfastok[i]++ } } else { - if cw[order] >= wordmin { - // nali[i]++ - lcs, alilength = obialign.FastLCSScore(sequence, references[order], mini, &matrix) - if lcs >= 0 { - // nok[i]++ - errs = alilength - lcs - } + // nali[i]++ + lcs, alilength = obialign.FastLCSScore(sequence, references[order], mini, &matrix) + if lcs >= 0 { + // nok[i]++ + errs = alilength - lcs } + } if mini == -1 || errs < mini { mini = errs @@ -106,13 +112,14 @@ func IndexSequence(seqidx int, } } -/* log.Println(sequence.Id(), tseq.Taxid(), tseq.ScientificName(), tseq.Rank(), obitag_index) - log.Println(sequence.Id(), tseq.Taxid(), tseq.ScientificName(), tseq.Rank(), nseq) - log.Println(sequence.Id(), tseq.Taxid(), tseq.ScientificName(), tseq.Rank(), nfast) - log.Println(sequence.Id(), tseq.Taxid(), tseq.ScientificName(), tseq.Rank(), nfastok) - log.Println(sequence.Id(), tseq.Taxid(), tseq.ScientificName(), tseq.Rank(), nali) - log.Println(sequence.Id(), tseq.Taxid(), tseq.ScientificName(), tseq.Rank(), nok) - */ return obitag_index + /* + log.Println(sequence.Id(), tseq.Taxid(), tseq.ScientificName(), tseq.Rank(), obitag_index) + log.Println(sequence.Id(), tseq.Taxid(), tseq.ScientificName(), tseq.Rank(), nseq) + log.Println(sequence.Id(), tseq.Taxid(), tseq.ScientificName(), tseq.Rank(), nfast) + log.Println(sequence.Id(), tseq.Taxid(), tseq.ScientificName(), tseq.Rank(), nfastok) + log.Println(sequence.Id(), tseq.Taxid(), tseq.ScientificName(), tseq.Rank(), nali) + log.Println(sequence.Id(), tseq.Taxid(), tseq.ScientificName(), tseq.Rank(), nok) + */return obitag_index } func IndexReferenceDB(iterator obiiter.IBioSequence) obiiter.IBioSequence { diff --git a/pkg/obitools/obitag/obitag.go b/pkg/obitools/obitag/obitag.go index 0ebac39..8af304c 100644 --- a/pkg/obitools/obitag/obitag.go +++ b/pkg/obitools/obitag/obitag.go @@ -34,9 +34,9 @@ func FindClosests(sequence *obiseq.BioSequence, o := obiutils.Reverse(obiutils.IntOrder(cw), true) bests := obiseq.MakeBioSequenceSlice() -// bests = append(bests, references[o[0]]) + // bests = append(bests, references[o[0]]) bestidxs := make([]int, 0) -// bestidxs = append(bestidxs, o[0]) + // bestidxs = append(bestidxs, o[0]) bestId := 0.0 bestmatch := references[o[0]].Id() @@ -45,26 +45,28 @@ func FindClosests(sequence *obiseq.BioSequence, for _, order := range o { ref := references[order] + score := int(1e9) if maxe != -1 { - wordmin = obiutils.MaxInt(sequence.Len(), ref.Len()) - 4*maxe + wordmin = obiutils.MaxInt(sequence.Len(), ref.Len()) - 3 - 4*maxe + } + + if cw[order] < wordmin { + break } lcs, alilength := -1, -1 - score := int(1e9) if maxe == 0 || maxe == 1 { d, _, _, _ := obialign.D1Or0(sequence, references[order]) if d >= 0 { score = d - alilength = obiutils.MaxInt(sequence.Len(), ref.Len()) + alilength = obiutils.MaxInt(sequence.Len(), ref.Len()) lcs = alilength - score } } else { - if cw[order] >= wordmin { - lcs, alilength = obialign.FastLCSScore(sequence, references[order], maxe, &matrix) - if lcs >= 0 { - score = alilength - lcs - } + lcs, alilength = obialign.FastLCSScore(sequence, references[order], maxe, &matrix) + if lcs >= 0 { + score = alilength - lcs } } @@ -73,7 +75,7 @@ func FindClosests(sequence *obiseq.BioSequence, bestidxs = bestidxs[:0] maxe = score bestId = float64(lcs) / float64(alilength) - // log.Println(ref.Id(), maxe, bestId,bestidxs) + // log.Println(ref.Id(), maxe, bestId,bestidxs) } if score == maxe { @@ -84,12 +86,12 @@ func FindClosests(sequence *obiseq.BioSequence, bestId = id bestmatch = ref.Id() } - // log.Println(ref.Id(), maxe, bestId,bestidxs) + // log.Println(ref.Id(), maxe, bestId,bestidxs) } } - //log.Println("that's all falks", maxe, bestId, bestidxs) + //log.Println("that's all falks", maxe, bestId, bestidxs) return bests, maxe, bestId, bestmatch, bestidxs }