mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Took better advantage of taxonomy structure and LCA upperbound base on common word. Accelleration by a factor two of obirefidx compared to previous version.
Former-commit-id: 35f40498d642058e9dbff20128d11303a314018d
This commit is contained in:
@ -31,118 +31,73 @@ func IndexSequence(seqidx int,
|
|||||||
var matrix []uint64
|
var matrix []uint64
|
||||||
|
|
||||||
lca := make(obitax.TaxonSet, len(references))
|
lca := make(obitax.TaxonSet, len(references))
|
||||||
tref := (*taxa)[seqidx]
|
tseq := (*taxa)[seqidx]
|
||||||
|
|
||||||
for i, taxon := range (*taxa) {
|
for i, taxon := range *taxa {
|
||||||
lca[i],_ = tref.LCA(taxon)
|
lca[i], _ = tseq.LCA(taxon)
|
||||||
}
|
}
|
||||||
|
|
||||||
cw := make([]int, len(references))
|
cw := make([]int, len(references))
|
||||||
sw := (*kmers)[seqidx]
|
sw := (*kmers)[seqidx]
|
||||||
for i, ref := range *kmers {
|
for i, ref := range *kmers {
|
||||||
cw[i] = obikmer.Common4Mer(sw,ref)
|
cw[i] = obikmer.Common4Mer(sw, ref)
|
||||||
}
|
}
|
||||||
|
|
||||||
ow := obiutils.Reverse(obiutils.IntOrder(cw),true)
|
ow := obiutils.Reverse(obiutils.IntOrder(cw), true)
|
||||||
pref,_ := tref.Path()
|
pseq, _ := tseq.Path()
|
||||||
obiutils.Reverse(*pref,true)
|
obiutils.Reverse(*pseq, true)
|
||||||
// score := make([]int, len(references))
|
// score := make([]int, len(references))
|
||||||
mindiff := make([]int, len(*pref))
|
mindiff := make([]int, len(*pseq))
|
||||||
|
nseq := make([]int, len(*pseq))
|
||||||
|
nali := make([]int, len(*pseq))
|
||||||
|
nok := make([]int, len(*pseq))
|
||||||
|
lseq := sequence.Len()
|
||||||
|
|
||||||
|
mini := -1
|
||||||
for i,ancestor := range *pref {
|
for i, ancestor := range *pseq {
|
||||||
mini := -1
|
for _, order := range ow {
|
||||||
for _,order := range ow {
|
|
||||||
if lca[order] == ancestor {
|
if lca[order] == ancestor {
|
||||||
lcs, alilength := obialign.FastLCSScore(sequence, references[order], mini, &matrix)
|
nseq[i]++
|
||||||
if lcs >= 0 {
|
wordmin := 0
|
||||||
errs := alilength - lcs
|
if mini != -1 {
|
||||||
if mini== -1 || errs < mini {
|
wordmin = obiutils.MaxInt(lseq-3-mini*4, 0)
|
||||||
mini = errs
|
}
|
||||||
}
|
lcs, alilength := -1, -1
|
||||||
|
if cw[order] >= wordmin {
|
||||||
|
nali[i]++
|
||||||
|
lcs, alilength = obialign.FastLCSScore(sequence, references[order], mini, &matrix)
|
||||||
|
if lcs >= 0 {
|
||||||
|
nok[i]++
|
||||||
|
errs := alilength - lcs
|
||||||
|
if mini == -1 || errs < mini {
|
||||||
|
mini = errs
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if mini != -1 {
|
mindiff[i] = mini
|
||||||
mindiff[i] = mini
|
}
|
||||||
} else {
|
|
||||||
mindiff[i] = 1e6
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
obitag_index := make(map[int]string, len(*pref))
|
obitag_index := make(map[int]string, len(*pseq))
|
||||||
|
|
||||||
old := sequence.Len()
|
old := lseq
|
||||||
for i,d := range mindiff {
|
for i, d := range mindiff {
|
||||||
if d < old {
|
if d != -1 && d < old {
|
||||||
current_taxid :=(*pref)[i]
|
current_taxid := (*pseq)[i]
|
||||||
obitag_index[d] = fmt.Sprintf(
|
obitag_index[d] = fmt.Sprintf(
|
||||||
"%d@%s@%s",
|
"%d@%s@%s",
|
||||||
current_taxid.Taxid(),
|
current_taxid.Taxid(),
|
||||||
current_taxid.ScientificName(),
|
current_taxid.ScientificName(),
|
||||||
current_taxid.Rank())
|
current_taxid.Rank())
|
||||||
old = d
|
old = d
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* // t := 0
|
// log.Println(sequence.Id(), tseq.Taxid(), tseq.ScientificName(), tseq.Rank(), obitag_index)
|
||||||
// r := 0
|
// log.Println(sequence.Id(), tseq.Taxid(), tseq.ScientificName(), tseq.Rank(), nseq)
|
||||||
// w := 0
|
// log.Println(sequence.Id(), tseq.Taxid(), tseq.ScientificName(), tseq.Rank(), nali)
|
||||||
for i, ref := range references {
|
// log.Println(sequence.Id(), tseq.Taxid(), tseq.ScientificName(), tseq.Rank(), nok)
|
||||||
lcs, alilength := obialign.FastLCSScore(sequence, ref, -1, &matrix)
|
|
||||||
score[i] = alilength - lcs
|
|
||||||
}
|
|
||||||
|
|
||||||
// log.Println("Redone : ",r,"/",t,"(",w,")")
|
|
||||||
|
|
||||||
o := obiutils.IntOrder(score)
|
|
||||||
|
|
||||||
current_taxid, err := taxo.Taxon(references[o[0]].Taxid())
|
|
||||||
current_score := score[o[0]]
|
|
||||||
current_idx := o[0]
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
log.Panicln(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
obitag_index := make(map[int]string)
|
|
||||||
|
|
||||||
for _, idx := range o {
|
|
||||||
new_taxid, err := taxo.Taxon(references[idx].Taxid())
|
|
||||||
if err != nil {
|
|
||||||
log.Panicln(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
new_taxid, err = current_taxid.LCA(new_taxid)
|
|
||||||
if err != nil {
|
|
||||||
log.Panicln(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
new_score := score[idx]
|
|
||||||
|
|
||||||
if current_taxid.Taxid() != new_taxid.Taxid() {
|
|
||||||
|
|
||||||
if new_score > current_score {
|
|
||||||
obitag_index[score[current_idx]] = fmt.Sprintf(
|
|
||||||
"%d@%s@%s",
|
|
||||||
current_taxid.Taxid(),
|
|
||||||
current_taxid.ScientificName(),
|
|
||||||
current_taxid.Rank())
|
|
||||||
current_score = new_score
|
|
||||||
}
|
|
||||||
|
|
||||||
current_taxid = new_taxid
|
|
||||||
current_idx = idx
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
obitag_index[score[current_idx]] = fmt.Sprintf(
|
|
||||||
"%d@%s@%s",
|
|
||||||
current_taxid.Taxid(),
|
|
||||||
current_taxid.ScientificName(),
|
|
||||||
current_taxid.Rank())
|
|
||||||
*/
|
|
||||||
//log.Println(obitag_index)
|
|
||||||
return obitag_index
|
return obitag_index
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user