Added some comments
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/trunk@399 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
@ -10,7 +10,7 @@
|
|||||||
static pmerge_t mergeInit(pmerge_t merge,pwordcount_t data,uint32_t s1,uint32_t s2);
|
static pmerge_t mergeInit(pmerge_t merge,pwordcount_t data,uint32_t s1,uint32_t s2);
|
||||||
|
|
||||||
|
|
||||||
static pmerge_t mergeInit(pmerge_t merge, pwordcount_t data,uint32_t s1,uint32_t s2)
|
static pmerge_t mergeInit(pmerge_t merge, pwordcount_t data, uint32_t s1, uint32_t s2)
|
||||||
{
|
{
|
||||||
merge->words = data->words;
|
merge->words = data->words;
|
||||||
merge->count = data->strictcount;
|
merge->count = data->strictcount;
|
||||||
@ -26,6 +26,15 @@ typedef enum {S1=1,S2=2,STACK=3} source_t;
|
|||||||
|
|
||||||
void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,uint32_t seqQuorum)
|
void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,uint32_t seqQuorum)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
/*
|
||||||
|
* data / in out : the table that contains the two parts to be merged
|
||||||
|
* s1 / in : end of the first part of the table
|
||||||
|
* s2 / in : end of the second part of the table
|
||||||
|
* remainingSeq / in : the number of remaining seqs to be added to the table
|
||||||
|
* seqQuorum / in : the minimum number of sequences in which a pattern must appear
|
||||||
|
*/
|
||||||
|
|
||||||
merge_t merged;
|
merge_t merged;
|
||||||
source_t source;
|
source_t source;
|
||||||
word_t currentword,tmpword;
|
word_t currentword,tmpword;
|
||||||
@ -38,12 +47,31 @@ void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,ui
|
|||||||
|
|
||||||
// DEBUG_LOG("Coucou %p s1= %d s2= %d",data,s1,s2)
|
// DEBUG_LOG("Coucou %p s1= %d s2= %d",data,s1,s2)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* init the merged structure (used only for coding convenience, never returned, allocated on the C-stack)
|
||||||
|
* note that :
|
||||||
|
* merged.words : hashcodes (initialized to data->words)
|
||||||
|
* merged.count : counts of each word (initialized to data->strictcount)
|
||||||
|
* merged.read1 : index of the first word of the first subtable (initialized to 0)
|
||||||
|
* merged.read1 : index of the first word of the first subtable (initialized to 0)
|
||||||
|
* merged.read2 : index of the first word of the second subtable (initialized to s1)
|
||||||
|
* merged.size : total size of the table (initialized to s1+s2)
|
||||||
|
*
|
||||||
|
* allocate a new stack of size min(s1, s2)
|
||||||
|
*/
|
||||||
|
|
||||||
(void)mergeInit(&merged,data,s1,s2);
|
(void)mergeInit(&merged,data,s1,s2);
|
||||||
(void)newQueue(&queue,MINI(s1,s2));
|
(void)newQueue(&queue,MINI(s1,s2));
|
||||||
|
|
||||||
|
|
||||||
|
/* true until
|
||||||
|
* merged.read1 == s1 AND merged.read2 == merged.size, i.e. ALL words have been processed
|
||||||
|
*/
|
||||||
while (merged.read1 < s1 || merged.read2 < merged.size)
|
while (merged.read1 < s1 || merged.read2 < merged.size)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* initialize current{word,count} from either STACK (if not empty) or first table (S1)
|
||||||
|
*/
|
||||||
if (! queue.empty)
|
if (! queue.empty)
|
||||||
{
|
{
|
||||||
currentword = queue.words[queue.pop];
|
currentword = queue.words[queue.pop];
|
||||||
@ -57,6 +85,12 @@ void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,ui
|
|||||||
source=S1;
|
source=S1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* IF there are some words in the second subtable remaining to be processed AND
|
||||||
|
* its first word is lower than current word
|
||||||
|
* THEN initialize current{word,count} from the second table (S2)
|
||||||
|
*
|
||||||
|
*/
|
||||||
if (merged.read2 < merged.size &&
|
if (merged.read2 < merged.size &&
|
||||||
WORD(currentword) > WORD(merged.words[merged.read2]))
|
WORD(currentword) > WORD(merged.words[merged.read2]))
|
||||||
{
|
{
|
||||||
@ -65,11 +99,20 @@ void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,ui
|
|||||||
source = S2;
|
source = S2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* record if the two words in the both subtable are the same
|
||||||
|
*/
|
||||||
same = (source != S2) && (WORD(currentword) == WORD(merged.words[merged.read2]));
|
same = (source != S2) && (WORD(currentword) == WORD(merged.words[merged.read2]));
|
||||||
nsame+=same;
|
nsame+=same;
|
||||||
|
|
||||||
// DEBUG_LOG("Merging : r1 = %d s1 = %d r2 = %d size = %d word = %s source=%u same=%u",merged.read1,s1,merged.read2-s1,merged.size,ecoUnhashWord(currentword,18),source,same)
|
// DEBUG_LOG("Merging : r1 = %d s1 = %d r2 = %d size = %d word = %s source=%u same=%u",merged.read1,s1,merged.read2-s1,merged.size,ecoUnhashWord(currentword,18),source,same)
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* merge step (AND apply the quorum property)
|
||||||
|
* update merged.read1 AND/OR merged.read2
|
||||||
|
* record the word and its count in the table
|
||||||
|
*/
|
||||||
tmpword = merged.words[merged.write];
|
tmpword = merged.words[merged.write];
|
||||||
tmpcount= merged.count[merged.write];
|
tmpcount= merged.count[merged.write];
|
||||||
|
|
||||||
@ -115,6 +158,12 @@ void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,ui
|
|||||||
// DEBUG_LOG("r1 : %d r2 : %d qsize : %d nsame : %d tot : %d write : %s count : %d source : %d size : %d pop : %d push : %d empty : %d",merged.read1,merged.read2-s1,qsize,nsame,qsize+nsame,ecoUnhashWord(currentword,18),currentcount,source,queue.size,queue.pop,queue.push,queue.empty)
|
// DEBUG_LOG("r1 : %d r2 : %d qsize : %d nsame : %d tot : %d write : %s count : %d source : %d size : %d pop : %d push : %d empty : %d",merged.read1,merged.read2-s1,qsize,nsame,qsize+nsame,ecoUnhashWord(currentword,18),currentcount,source,queue.size,queue.pop,queue.push,queue.empty)
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* finish the merging with words not processed (AND apply the quorum property)
|
||||||
|
* they are stored in the second subtable (IF)
|
||||||
|
* OR in the queue (ELSE)
|
||||||
|
*/
|
||||||
|
|
||||||
if (merged.read2 < merged.size)
|
if (merged.read2 < merged.size)
|
||||||
{
|
{
|
||||||
//DEBUG_LOG("end1 %d %d/%d %d/%d",merged.write,merged.read1,s1,merged.read2,merged.size);
|
//DEBUG_LOG("end1 %d %d/%d %d/%d",merged.write,merged.read1,s1,merged.read2,merged.size);
|
||||||
|
@ -91,7 +91,9 @@ void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circ
|
|||||||
table->words = ECOREALLOC(table->words,buffersize*sizeof(word_t),
|
table->words = ECOREALLOC(table->words,buffersize*sizeof(word_t),
|
||||||
"\n\nCannot allocate memory to extend word table" );
|
"\n\nCannot allocate memory to extend word table" );
|
||||||
|
|
||||||
|
/*
|
||||||
|
* newtable is a pointer on the memory planed to be used for the new sequence (ecoWordCount new hash codes max)
|
||||||
|
*/
|
||||||
newtable = table->words + table->size;
|
newtable = table->words + table->size;
|
||||||
|
|
||||||
// DEBUG_LOG("Words = %x (%u) new = %x", table->words,table->size,newtable);
|
// DEBUG_LOG("Words = %x (%u) new = %x", table->words,table->size,newtable);
|
||||||
@ -99,11 +101,24 @@ void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circ
|
|||||||
(void)ecoHashSequence(newtable,wordsize,circular,doublestrand,seq,&newsize,neededWords,neededWordCount,seqQuorum);
|
(void)ecoHashSequence(newtable,wordsize,circular,doublestrand,seq,&newsize,neededWords,neededWordCount,seqQuorum);
|
||||||
// DEBUG_LOG("new seq wordCount : %d",newsize);
|
// DEBUG_LOG("new seq wordCount : %d",newsize);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* at this stage, new hash codes have been added in the table but the table is not sorted
|
||||||
|
*/
|
||||||
|
|
||||||
newsize = ecoCompactHashSequence(newtable,newsize);
|
newsize = ecoCompactHashSequence(newtable,newsize);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* new hash codes have now been sorted BUT the whole table is not.
|
||||||
|
* MULTIWORDS have been tagged (and compacted)
|
||||||
|
*/
|
||||||
|
|
||||||
// DEBUG_LOG("compacted wordCount : %d",newsize);
|
// DEBUG_LOG("compacted wordCount : %d",newsize);
|
||||||
buffersize = table->size + newsize;
|
buffersize = table->size + newsize;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* buffersize is now set to the REAL size used by the table (but the memory chunck may be larger)
|
||||||
|
*/
|
||||||
|
|
||||||
// resize the count buffer
|
// resize the count buffer
|
||||||
|
|
||||||
table->inseqcount++;
|
table->inseqcount++;
|
||||||
@ -113,10 +128,13 @@ void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circ
|
|||||||
"Cannot allocate memory to extend example word count table");
|
"Cannot allocate memory to extend example word count table");
|
||||||
//fprintf (stderr, " NewAddress: %x\n", table->strictcount);
|
//fprintf (stderr, " NewAddress: %x\n", table->strictcount);
|
||||||
|
|
||||||
|
|
||||||
for (i=table->size; i < buffersize; i++)
|
for (i=table->size; i < buffersize; i++)
|
||||||
table->strictcount[i]=1;
|
table->strictcount[i]=1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* new words in the table are set to a count of ONE
|
||||||
|
*/
|
||||||
|
|
||||||
// Now we have to merge in situ the two tables
|
// Now we have to merge in situ the two tables
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user