Accept to deal with sequence in lower case
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@217 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
BIN
src/ecoPCR.gz
Executable file
BIN
src/ecoPCR.gz
Executable file
Binary file not shown.
BIN
src/ecofind.gz
Executable file
BIN
src/ecofind.gz
Executable file
Binary file not shown.
BIN
src/ecogrep
Executable file
BIN
src/ecogrep
Executable file
Binary file not shown.
BIN
src/ecogrep.gz
Executable file
BIN
src/ecogrep.gz
Executable file
Binary file not shown.
BIN
src/ecoisundertaxon
Executable file
BIN
src/ecoisundertaxon
Executable file
Binary file not shown.
@ -4,6 +4,7 @@
|
||||
#include <zlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
|
||||
static FILE *open_seqfile(const char *prefix,int32_t index);
|
||||
|
||||
@ -11,32 +12,32 @@ static FILE *open_seqfile(const char *prefix,int32_t index);
|
||||
ecoseq_t *new_ecoseq()
|
||||
{
|
||||
void *tmp;
|
||||
|
||||
|
||||
tmp = ECOMALLOC(sizeof(ecoseq_t),"Allocate new ecoseq structure");
|
||||
|
||||
|
||||
return tmp;
|
||||
}
|
||||
|
||||
int32_t delete_ecoseq(ecoseq_t * seq)
|
||||
{
|
||||
|
||||
|
||||
if (seq)
|
||||
{
|
||||
if (seq->AC)
|
||||
ECOFREE(seq->AC,"Free sequence AC");
|
||||
|
||||
|
||||
if (seq->DE)
|
||||
ECOFREE(seq->DE,"Free sequence DE");
|
||||
|
||||
|
||||
if (seq->SQ)
|
||||
ECOFREE(seq->SQ,"Free sequence SQ");
|
||||
|
||||
|
||||
ECOFREE(seq,"Free sequence structure");
|
||||
|
||||
|
||||
return 0;
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -49,9 +50,9 @@ ecoseq_t *new_ecoseq_with_data( char *AC,
|
||||
ecoseq_t *tmp;
|
||||
int32_t lstr;
|
||||
tmp = new_ecoseq();
|
||||
|
||||
|
||||
tmp->taxid=taxid_idx;
|
||||
|
||||
|
||||
if (AC)
|
||||
{
|
||||
lstr =strlen(AC);
|
||||
@ -97,12 +98,14 @@ ecoseq_t *readnext_ecoseq(FILE *f)
|
||||
int32_t comp_status;
|
||||
unsigned long int seqlength;
|
||||
int32_t rs;
|
||||
|
||||
char *c;
|
||||
int32_t i;
|
||||
|
||||
raw = read_ecorecord(f,&rs);
|
||||
|
||||
|
||||
if (!raw)
|
||||
return NULL;
|
||||
|
||||
|
||||
if (is_big_endian())
|
||||
{
|
||||
raw->CSQ_length = swap_int32_t(raw->CSQ_length);
|
||||
@ -110,44 +113,48 @@ ecoseq_t *readnext_ecoseq(FILE *f)
|
||||
raw->SQ_length = swap_int32_t(raw->SQ_length);
|
||||
raw->taxid = swap_int32_t(raw->taxid);
|
||||
}
|
||||
|
||||
|
||||
seq = new_ecoseq();
|
||||
|
||||
|
||||
seq->taxid = raw->taxid;
|
||||
|
||||
|
||||
seq->AC = ECOMALLOC(strlen(raw->AC) +1,
|
||||
"Allocate Sequence Accesion number");
|
||||
strncpy(seq->AC,raw->AC,strlen(raw->AC));
|
||||
|
||||
|
||||
|
||||
seq->DE = ECOMALLOC(raw->DE_length+1,
|
||||
"Allocate Sequence definition");
|
||||
strncpy(seq->DE,raw->data,raw->DE_length);
|
||||
|
||||
|
||||
seqlength = seq->SQ_length = raw->SQ_length;
|
||||
|
||||
|
||||
compressed = raw->data + raw->DE_length;
|
||||
|
||||
seq->SQ = ECOMALLOC(seqlength+1,
|
||||
"Allocate sequence buffer");
|
||||
|
||||
|
||||
comp_status = uncompress((unsigned char*)seq->SQ,
|
||||
&seqlength,
|
||||
(unsigned char*)compressed,
|
||||
raw->CSQ_length);
|
||||
|
||||
|
||||
if (comp_status != Z_OK)
|
||||
ECOERROR(ECO_IO_ERROR,"I cannot uncompress sequence data");
|
||||
|
||||
|
||||
for (c=seq->SQ,i=0;i<seqlength;c++,i++)
|
||||
*c=toupper(*c);
|
||||
|
||||
|
||||
return seq;
|
||||
}
|
||||
|
||||
/**
|
||||
* Open the sequences database (.sdx file)
|
||||
* @param prefix name of the database (radical without extension)
|
||||
* @param prefix name of the database (radical without extension)
|
||||
* @param index integer
|
||||
*
|
||||
* @return file object
|
||||
*
|
||||
* @return file object
|
||||
*/
|
||||
FILE *open_seqfile(const char *prefix,int32_t index)
|
||||
{
|
||||
@ -161,22 +168,22 @@ FILE *open_seqfile(const char *prefix,int32_t index)
|
||||
"%s_%03d.sdx",
|
||||
prefix,
|
||||
index);
|
||||
|
||||
|
||||
fprintf(stderr,"# Coucou %s\n",filename_buffer);
|
||||
|
||||
|
||||
|
||||
if (filename_length >= 1024)
|
||||
ECOERROR(ECO_ASSERT_ERROR,"file name is too long");
|
||||
|
||||
filename_buffer[filename_length]=0;
|
||||
|
||||
|
||||
input=open_ecorecorddb(filename_buffer,&seqcount,0);
|
||||
|
||||
|
||||
if (input)
|
||||
fprintf(stderr,"# Reading file %s containing %d sequences...\n",
|
||||
filename_buffer,
|
||||
seqcount);
|
||||
|
||||
|
||||
return input;
|
||||
}
|
||||
|
||||
@ -186,38 +193,38 @@ ecoseq_t *ecoseq_iterator(const char *prefix)
|
||||
static int32_t current_file_idx = 1;
|
||||
static char current_prefix[1024];
|
||||
ecoseq_t *seq;
|
||||
|
||||
|
||||
if (prefix)
|
||||
{
|
||||
current_file_idx = 1;
|
||||
|
||||
if (current_seq_file)
|
||||
fclose(current_seq_file);
|
||||
|
||||
|
||||
strncpy(current_prefix,prefix,1023);
|
||||
current_prefix[1024]=0;
|
||||
|
||||
|
||||
current_seq_file = open_seqfile(current_prefix,
|
||||
current_file_idx);
|
||||
|
||||
|
||||
if (!current_seq_file)
|
||||
return NULL;
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
seq = readnext_ecoseq(current_seq_file);
|
||||
|
||||
|
||||
if (!seq && feof(current_seq_file))
|
||||
{
|
||||
current_file_idx++;
|
||||
fclose(current_seq_file);
|
||||
current_seq_file = open_seqfile(current_prefix,
|
||||
current_file_idx);
|
||||
|
||||
|
||||
|
||||
|
||||
if (current_seq_file)
|
||||
seq = readnext_ecoseq(current_seq_file);
|
||||
}
|
||||
|
||||
|
||||
return seq;
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user