Added some comments

git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/trunk@399 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
Frédéric Boyer
2012-02-27 14:57:03 +00:00
parent 9262e954cf
commit 9e36754fef
2 changed files with 70 additions and 3 deletions

View File

@ -10,7 +10,7 @@
static pmerge_t mergeInit(pmerge_t merge,pwordcount_t data,uint32_t s1,uint32_t s2); static pmerge_t mergeInit(pmerge_t merge,pwordcount_t data,uint32_t s1,uint32_t s2);
static pmerge_t mergeInit(pmerge_t merge, pwordcount_t data,uint32_t s1,uint32_t s2) static pmerge_t mergeInit(pmerge_t merge, pwordcount_t data, uint32_t s1, uint32_t s2)
{ {
merge->words = data->words; merge->words = data->words;
merge->count = data->strictcount; merge->count = data->strictcount;
@ -26,6 +26,15 @@ typedef enum {S1=1,S2=2,STACK=3} source_t;
void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,uint32_t seqQuorum) void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,uint32_t seqQuorum)
{ {
/*
* data / in out : the table that contains the two parts to be merged
* s1 / in : end of the first part of the table
* s2 / in : end of the second part of the table
* remainingSeq / in : the number of remaining seqs to be added to the table
* seqQuorum / in : the minimum number of sequences in which a pattern must appear
*/
merge_t merged; merge_t merged;
source_t source; source_t source;
word_t currentword,tmpword; word_t currentword,tmpword;
@ -38,12 +47,31 @@ void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,ui
// DEBUG_LOG("Coucou %p s1= %d s2= %d",data,s1,s2) // DEBUG_LOG("Coucou %p s1= %d s2= %d",data,s1,s2)
/*
* init the merged structure (used only for coding convenience, never returned, allocated on the C-stack)
* note that :
* merged.words : hashcodes (initialized to data->words)
* merged.count : counts of each word (initialized to data->strictcount)
* merged.read1 : index of the first word of the first subtable (initialized to 0)
* merged.read1 : index of the first word of the first subtable (initialized to 0)
* merged.read2 : index of the first word of the second subtable (initialized to s1)
* merged.size : total size of the table (initialized to s1+s2)
*
* allocate a new stack of size min(s1, s2)
*/
(void)mergeInit(&merged,data,s1,s2); (void)mergeInit(&merged,data,s1,s2);
(void)newQueue(&queue,MINI(s1,s2)); (void)newQueue(&queue,MINI(s1,s2));
/* true until
* merged.read1 == s1 AND merged.read2 == merged.size, i.e. ALL words have been processed
*/
while (merged.read1 < s1 || merged.read2 < merged.size) while (merged.read1 < s1 || merged.read2 < merged.size)
{ {
/*
* initialize current{word,count} from either STACK (if not empty) or first table (S1)
*/
if (! queue.empty) if (! queue.empty)
{ {
currentword = queue.words[queue.pop]; currentword = queue.words[queue.pop];
@ -57,6 +85,12 @@ void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,ui
source=S1; source=S1;
} }
/*
* IF there are some words in the second subtable remaining to be processed AND
* its first word is lower than current word
* THEN initialize current{word,count} from the second table (S2)
*
*/
if (merged.read2 < merged.size && if (merged.read2 < merged.size &&
WORD(currentword) > WORD(merged.words[merged.read2])) WORD(currentword) > WORD(merged.words[merged.read2]))
{ {
@ -65,11 +99,20 @@ void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,ui
source = S2; source = S2;
} }
/*
* record if the two words in the both subtable are the same
*/
same = (source != S2) && (WORD(currentword) == WORD(merged.words[merged.read2])); same = (source != S2) && (WORD(currentword) == WORD(merged.words[merged.read2]));
nsame+=same; nsame+=same;
// DEBUG_LOG("Merging : r1 = %d s1 = %d r2 = %d size = %d word = %s source=%u same=%u",merged.read1,s1,merged.read2-s1,merged.size,ecoUnhashWord(currentword,18),source,same) // DEBUG_LOG("Merging : r1 = %d s1 = %d r2 = %d size = %d word = %s source=%u same=%u",merged.read1,s1,merged.read2-s1,merged.size,ecoUnhashWord(currentword,18),source,same)
/*
* merge step (AND apply the quorum property)
* update merged.read1 AND/OR merged.read2
* record the word and its count in the table
*/
tmpword = merged.words[merged.write]; tmpword = merged.words[merged.write];
tmpcount= merged.count[merged.write]; tmpcount= merged.count[merged.write];
@ -115,6 +158,12 @@ void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,ui
// DEBUG_LOG("r1 : %d r2 : %d qsize : %d nsame : %d tot : %d write : %s count : %d source : %d size : %d pop : %d push : %d empty : %d",merged.read1,merged.read2-s1,qsize,nsame,qsize+nsame,ecoUnhashWord(currentword,18),currentcount,source,queue.size,queue.pop,queue.push,queue.empty) // DEBUG_LOG("r1 : %d r2 : %d qsize : %d nsame : %d tot : %d write : %s count : %d source : %d size : %d pop : %d push : %d empty : %d",merged.read1,merged.read2-s1,qsize,nsame,qsize+nsame,ecoUnhashWord(currentword,18),currentcount,source,queue.size,queue.pop,queue.push,queue.empty)
/*
* finish the merging with words not processed (AND apply the quorum property)
* they are stored in the second subtable (IF)
* OR in the queue (ELSE)
*/
if (merged.read2 < merged.size) if (merged.read2 < merged.size)
{ {
//DEBUG_LOG("end1 %d %d/%d %d/%d",merged.write,merged.read1,s1,merged.read2,merged.size); //DEBUG_LOG("end1 %d %d/%d %d/%d",merged.write,merged.read1,s1,merged.read2,merged.size);

View File

@ -91,7 +91,9 @@ void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circ
table->words = ECOREALLOC(table->words,buffersize*sizeof(word_t), table->words = ECOREALLOC(table->words,buffersize*sizeof(word_t),
"\n\nCannot allocate memory to extend word table" ); "\n\nCannot allocate memory to extend word table" );
/*
* newtable is a pointer on the memory planed to be used for the new sequence (ecoWordCount new hash codes max)
*/
newtable = table->words + table->size; newtable = table->words + table->size;
// DEBUG_LOG("Words = %x (%u) new = %x", table->words,table->size,newtable); // DEBUG_LOG("Words = %x (%u) new = %x", table->words,table->size,newtable);
@ -99,11 +101,24 @@ void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circ
(void)ecoHashSequence(newtable,wordsize,circular,doublestrand,seq,&newsize,neededWords,neededWordCount,seqQuorum); (void)ecoHashSequence(newtable,wordsize,circular,doublestrand,seq,&newsize,neededWords,neededWordCount,seqQuorum);
// DEBUG_LOG("new seq wordCount : %d",newsize); // DEBUG_LOG("new seq wordCount : %d",newsize);
/*
* at this stage, new hash codes have been added in the table but the table is not sorted
*/
newsize = ecoCompactHashSequence(newtable,newsize); newsize = ecoCompactHashSequence(newtable,newsize);
/*
* new hash codes have now been sorted BUT the whole table is not.
* MULTIWORDS have been tagged (and compacted)
*/
// DEBUG_LOG("compacted wordCount : %d",newsize); // DEBUG_LOG("compacted wordCount : %d",newsize);
buffersize = table->size + newsize; buffersize = table->size + newsize;
/*
* buffersize is now set to the REAL size used by the table (but the memory chunck may be larger)
*/
// resize the count buffer // resize the count buffer
table->inseqcount++; table->inseqcount++;
@ -113,10 +128,13 @@ void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circ
"Cannot allocate memory to extend example word count table"); "Cannot allocate memory to extend example word count table");
//fprintf (stderr, " NewAddress: %x\n", table->strictcount); //fprintf (stderr, " NewAddress: %x\n", table->strictcount);
for (i=table->size; i < buffersize; i++) for (i=table->size; i < buffersize; i++)
table->strictcount[i]=1; table->strictcount[i]=1;
/*
* new words in the table are set to a count of ONE
*/
// Now we have to merge in situ the two tables // Now we have to merge in situ the two tables