Improved the fasta parser for better memory handling and better parsing

of the last parts of fasta headers (definitions)
This commit is contained in:
Celine Mercier
2017-10-13 18:54:17 +02:00
parent 52e94bbea7
commit a258d334b1
5 changed files with 83 additions and 84 deletions

View File

@ -69,15 +69,34 @@ char* fastaSeqPtr_header_add_field(fastaSeqPtr seq, char* name, char* value)
element_from_header* table_header_add_field(element_from_header* header, char* name, char* value) element_from_header* table_header_add_field(element_from_header* header, char* name, char* value)
{ {
int nbf; int nbf;
int i, j;
element_from_header* new_header;
nbf = atoi(header[0].value); nbf = atoi(header[0].value);
nbf++; new_header = (element_from_header*) realloc(header, ((nbf+1)*sizeof(element_from_header)));
header = (element_from_header*) realloc(header, (nbf+1)*sizeof(element_from_header));
header[nbf].name = (char*) malloc((1+strlen(name))*sizeof(char)); i=0;
strcpy(header[nbf].name, name); while ((strcmp(new_header[i].name, "definition") != 0) && (i < nbf))
header[nbf].value = (char*) malloc((1+strlen(value))*sizeof(char)); i++;
strcpy(header[nbf].value, value);
sprintf(header[0].value, "%d", nbf); if (strcmp(new_header[i].name, "definition") == 0)
return(header); {
j = nbf-1;
while (strcmp(new_header[j].name, "definition") == 0)
{
new_header[j+1].name = new_header[j].name;
new_header[j+1].value = new_header[j].value;
j--;
}
}
new_header[i].name = (char*) malloc((1+strlen(name))*sizeof(char));
strcpy(new_header[i].name, name);
new_header[i].value = (char*) malloc((1+strlen(value))*sizeof(char));
strcpy(new_header[i].value, value);
sprintf(new_header[0].value, "%d", nbf+1);
return(new_header);
} }
@ -86,7 +105,7 @@ void free_header_table(element_from_header* header)
int i; int i;
int nbf = atoi(header[0].value); int nbf = atoi(header[0].value);
for (i = 0; i <= nbf; i++) for (i = 0; i < nbf; i++)
{ {
free((header[i]).name); free((header[i]).name);
free((header[i]).value); free((header[i]).value);
@ -101,7 +120,7 @@ char* getItemFromHeader(char* name, element_from_header* header)
int nbf; int nbf;
int i; int i;
nbf = atoi(header[0].value); nbf = atoi(header[0].value);
for (i = 1; i <= nbf; i++) for (i = 1; i < nbf; i++)
{ {
if (strcmp(header[i].name,name)==0) if (strcmp(header[i].name,name)==0)
value = header[i].value; value = header[i].value;
@ -115,7 +134,7 @@ void changeValue(element_from_header* header, char* name, char* newValue)
int i; int i;
int nbf = atoi(header[0].value); int nbf = atoi(header[0].value);
for (i = 1; i <= nbf; i++) for (i = 1; i < nbf; i++)
{ {
if (strcmp(header[i].name, name)==0) if (strcmp(header[i].name, name)==0)
{ {

View File

@ -798,7 +798,7 @@ YY_RULE_SETUP
(*p_header)[*nbf].value = (char*) malloc(sizeof(char)*size_needed); (*p_header)[*nbf].value = (char*) malloc(sizeof(char)*size_needed);
strcpy(((*p_header)[*nbf]).value,header_yytext); strcpy(((*p_header)[*nbf]).value,header_yytext);
(*nbf)++; p_header = check_and_realloc_mem_in_header_table(p_header, nbf, memory_allocated);
} }
YY_BREAK YY_BREAK
case 3: case 3:
@ -820,7 +820,7 @@ YY_RULE_SETUP
case 5: case 5:
YY_RULE_SETUP YY_RULE_SETUP
#line 69 "fasta_header_parser.l" #line 69 "fasta_header_parser.l"
{ { // TODO
/*fprintf(stderr,"\n<REGNAME>{SPACE} **%s**",header_yytext);*/ /*fprintf(stderr,"\n<REGNAME>{SPACE} **%s**",header_yytext);*/
if (i != 0) if (i != 0)
field = store_in_field(field,header_yytext,&free_size,&i); field = store_in_field(field,header_yytext,&free_size,&i);
@ -886,24 +886,21 @@ case YY_STATE_EOF(REGVAL):
#line 113 "fasta_header_parser.l" #line 113 "fasta_header_parser.l"
{ {
field = store_in_header_table(field, &((*p_header)[*nbf].value), &free_size, &i); field = store_in_header_table(field, &((*p_header)[*nbf].value), &free_size, &i);
p_header = check_and_realloc_mem_in_header_table(p_header, nbf, memory_allocated); (*nbf)++;
end_header_table(p_header, *nbf); end_header_table(p_header, *nbf);
free(field); free(field);
BEGIN(INITIAL); BEGIN(INITIAL);
return 0; return 0;
} }
YY_BREAK YY_BREAK
case YY_STATE_EOF(REGNAME): case YY_STATE_EOF(REGNAME):
#line 123 "fasta_header_parser.l" #line 122 "fasta_header_parser.l"
{ {
/*(*p_header)[*nbf].name = (char*) malloc(sizeof(char)*19); (*p_header)[*nbf].name = (char*) malloc(sizeof(char)*19);
strcpy((*p_header)[*nbf].name,"other_informations"); strcpy((*p_header)[*nbf].name,"definition");
field = store_in_header_table(field, &((*p_header)[*nbf].value), &free_size, &i); field = store_in_header_table(field, &((*p_header)[*nbf].value), &free_size, &i);
p_header = check_and_realloc_mem_in_header_table(p_header, nbf, memory_allocated); (*nbf)++;
*/ end_header_table(p_header, nbf);
end_header_table(p_header, *nbf);
free(field); free(field);
BEGIN(INITIAL); BEGIN(INITIAL);
return 0; return 0;
@ -911,10 +908,10 @@ case YY_STATE_EOF(REGNAME):
YY_BREAK YY_BREAK
case 12: case 12:
YY_RULE_SETUP YY_RULE_SETUP
#line 136 "fasta_header_parser.l" #line 133 "fasta_header_parser.l"
ECHO; ECHO;
YY_BREAK YY_BREAK
#line 918 "<stdout>" #line 915 "<stdout>"
case YY_STATE_EOF(INITIAL): case YY_STATE_EOF(INITIAL):
case YY_STATE_EOF(REGID): case YY_STATE_EOF(REGID):
yyterminate(); yyterminate();
@ -1912,7 +1909,7 @@ void header_yyfree (void * ptr )
#define YYTABLES_NAME "yytables" #define YYTABLES_NAME "yytables"
#line 136 "fasta_header_parser.l" #line 133 "fasta_header_parser.l"
@ -1923,38 +1920,32 @@ int header_yywrap()
element_from_header* header_parser_main(char *h) element_from_header* header_parser_main(char *h)
{ {
int nbfields,memory_allocated; int nbfields, memory_allocated;
element_from_header* header; element_from_header* header;
char* nbfields_n; char* nbfields_n;
char* nbfields_v; YY_BUFFER_STATE state;
nbfields_n = (char*) malloc(9*sizeof(char)); state=header_yy_scan_string(h);
nbfields_v = (char*) malloc(5*sizeof(char));
memory_allocated=MEMALLOCATED; memory_allocated=MEMALLOCATED;
header = (element_from_header*) malloc(memory_allocated * sizeof(element_from_header));
nbfields_n = (char*) malloc(9*sizeof(char));
strcpy(nbfields_n, "nbfields");
header[0].name = nbfields_n;
// Initialize memory to store the number of fields
header[0].value = (char*) malloc(10*sizeof(char));
nbfields=1; nbfields=1;
strcpy(nbfields_n, "nbfields");
strcpy(nbfields_v, "1");
header = (element_from_header*) malloc(memory_allocated * sizeof(element_from_header));
header[0].name = nbfields_n;
header[0].value = nbfields_v;
YY_BUFFER_STATE state;
state=header_yy_scan_string(h);
header_parser(&nbfields, &memory_allocated, &header); header_parser(&nbfields, &memory_allocated, &header);
header_yy_delete_buffer(state); header_yy_delete_buffer(state);
return header; return header;
} }

View File

@ -53,7 +53,7 @@ EQUAL =
(*p_header)[*nbf].value = (char*) malloc(sizeof(char)*size_needed); (*p_header)[*nbf].value = (char*) malloc(sizeof(char)*size_needed);
strcpy(((*p_header)[*nbf]).value,yytext); strcpy(((*p_header)[*nbf]).value,yytext);
(*nbf)++; p_header = check_and_realloc_mem_in_header_table(p_header, nbf, memory_allocated);
} }
@ -66,7 +66,7 @@ EQUAL =
field = store_in_field(field,yytext,&free_size,&i); field = store_in_field(field,yytext,&free_size,&i);
} }
<REGNAME>{SPACE} { <REGNAME>{SPACE} { // TODO
/*fprintf(stderr,"\n<REGNAME>{SPACE} **%s**",yytext);*/ /*fprintf(stderr,"\n<REGNAME>{SPACE} **%s**",yytext);*/
if (i != 0) if (i != 0)
field = store_in_field(field,yytext,&free_size,&i); field = store_in_field(field,yytext,&free_size,&i);
@ -112,22 +112,19 @@ EQUAL =
<REGVAL><<EOF>> { <REGVAL><<EOF>> {
field = store_in_header_table(field, &((*p_header)[*nbf].value), &free_size, &i); field = store_in_header_table(field, &((*p_header)[*nbf].value), &free_size, &i);
p_header = check_and_realloc_mem_in_header_table(p_header, nbf, memory_allocated); (*nbf)++;
end_header_table(p_header, *nbf); end_header_table(p_header, *nbf);
free(field); free(field);
BEGIN(INITIAL); BEGIN(INITIAL);
return 0; return 0;
} }
<REGNAME><<EOF>> { <REGNAME><<EOF>> {
/*(*p_header)[*nbf].name = (char*) malloc(sizeof(char)*19); (*p_header)[*nbf].name = (char*) malloc(sizeof(char)*19);
strcpy((*p_header)[*nbf].name,"other_informations"); strcpy((*p_header)[*nbf].name,"definition");
field = store_in_header_table(field, &((*p_header)[*nbf].value), &free_size, &i); field = store_in_header_table(field, &((*p_header)[*nbf].value), &free_size, &i);
p_header = check_and_realloc_mem_in_header_table(p_header, nbf, memory_allocated); (*nbf)++;
*/ end_header_table(p_header, nbf);
end_header_table(p_header, *nbf);
free(field); free(field);
BEGIN(INITIAL); BEGIN(INITIAL);
return 0; return 0;
@ -142,37 +139,31 @@ int header_yywrap()
element_from_header* header_parser_main(char *h) element_from_header* header_parser_main(char *h)
{ {
int nbfields,memory_allocated; int nbfields, memory_allocated;
element_from_header* header; element_from_header* header;
char* nbfields_n; char* nbfields_n;
char* nbfields_v; YY_BUFFER_STATE state;
nbfields_n = (char*) malloc(9*sizeof(char)); state=yy_scan_string(h);
nbfields_v = (char*) malloc(5*sizeof(char));
memory_allocated=MEMALLOCATED; memory_allocated=MEMALLOCATED;
header = (element_from_header*) malloc(memory_allocated * sizeof(element_from_header));
nbfields_n = (char*) malloc(9*sizeof(char));
strcpy(nbfields_n, "nbfields");
header[0].name = nbfields_n;
// Initialize memory to store the number of fields
header[0].value = (char*) malloc(10*sizeof(char));
nbfields=1; nbfields=1;
strcpy(nbfields_n, "nbfields");
strcpy(nbfields_v, "1");
header = (element_from_header*) malloc(memory_allocated * sizeof(element_from_header));
header[0].name = nbfields_n;
header[0].value = nbfields_v;
YY_BUFFER_STATE state;
state=yy_scan_string(h);
header_parser(&nbfields, &memory_allocated, &header); header_parser(&nbfields, &memory_allocated, &header);
yy_delete_buffer(state); yy_delete_buffer(state);
return header; return header;
} }

View File

@ -52,7 +52,7 @@ void printOnlyHeaderFromTable(element_from_header* header, FILE* output)
fprintf(output,">%s ",header[1].value); fprintf(output,">%s ",header[1].value);
for (i = 2; i <= nbf; i++) for (i = 2; i < nbf; i++)
{ {
if (strcmp(header[i].name, "definition") != 0) if (strcmp(header[i].name, "definition") != 0)
{ {
@ -60,11 +60,10 @@ void printOnlyHeaderFromTable(element_from_header* header, FILE* output)
fprintf(output,"="); fprintf(output,"=");
fprintf(output,"%s; ",header[i].value); fprintf(output,"%s; ",header[i].value);
} }
else if (strcmp(header[i].name, "definition") == 0)
fprintf(output,"%s ", header[i].value);
} }
if (strcmp(header[nbf].name, "definition") == 0)
fprintf(output,"%s; ",header[nbf].value);
fprintf(output,"\n"); fprintf(output,"\n");
} }

View File

@ -76,7 +76,7 @@ element_from_header** check_and_realloc_mem_in_header_table(element_from_header*
{ {
(*nbf)++; (*nbf)++;
if (*nbf == *memory_allocated) if ((*nbf)+1 == *memory_allocated)
{ {
(*memory_allocated)++; (*memory_allocated)++;
*p_header = (element_from_header*) realloc(*p_header, (*memory_allocated) * sizeof(element_from_header)); *p_header = (element_from_header*) realloc(*p_header, (*memory_allocated) * sizeof(element_from_header));
@ -87,7 +87,6 @@ element_from_header** check_and_realloc_mem_in_header_table(element_from_header*
void end_header_table(element_from_header** p_header, int nbf) void end_header_table(element_from_header** p_header, int nbf)
{ {
nbf = nbf - 1; *p_header = (element_from_header*) realloc(*p_header, nbf * sizeof(element_from_header));
//fprintf(stderr, "nbf = %d", nbf);
sprintf((*p_header)->value, "%d", nbf); sprintf((*p_header)->value, "%d", nbf);
} }