New version 0.3 with filtering on short words

git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/trunk@213 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
2009-05-13 06:51:25 +00:00
parent 5dc55c7f53
commit b7c1640042
12 changed files with 330 additions and 34 deletions

View File

@ -41,7 +41,8 @@ void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,ui
(void)mergeInit(&merged,data,s1,s2);
(void)newQueue(&queue,MINI(s1,s2));
while (merged.read1 < s1 && merged.read2 < merged.size)
while (merged.read1 < s1 || merged.read2 < merged.size)
{
if (! queue.empty)
{
@ -56,7 +57,8 @@ void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,ui
source=S1;
}
if (WORD(currentword) > WORD(merged.words[merged.read2]))
if (merged.read2 < merged.size &&
WORD(currentword) > WORD(merged.words[merged.read2]))
{
currentword = merged.words[merged.read2];
currentcount = merged.count[merged.read2];
@ -114,6 +116,8 @@ void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,ui
if (merged.read2 < merged.size)
{
//DEBUG_LOG("end1 %d %d/%d %d/%d",merged.write,merged.read1,s1,merged.read2,merged.size);
for (;merged.read2 < merged.size;merged.read2++)
{
merged.words[merged.write]=merged.words[merged.read2];
@ -122,7 +126,10 @@ void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,ui
merged.write++;
}
else while (! queue.empty)
}
else {
//DEBUG_LOG("end2 %d %d/%d %d/%d",merged.write,merged.read1,s1,merged.read2,merged.size);
while (! queue.empty)
{
// DEBUG_LOG("write : %s count : %d write : %d size : %d pop : %d push : %d empty : %d",ecoUnhashWord(queue.words[queue.pop],18),queue.count[queue.pop],merged.write,queue.size,queue.pop,queue.push,queue.empty)
merged.words[merged.write]=queue.words[queue.pop];
@ -131,6 +138,7 @@ void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,ui
if (remainingSeq + merged.count[merged.write] >= seqQuorum)
merged.write++;
}
}
data->size = merged.write;