Files
ecoprimers/src/libecoPCR/ecoseq.c
2008-10-13 10:39:14 +00:00

223 lines
4.5 KiB
C

#include "ecoPCR.h"
#include <stdlib.h>
#include <string.h>
#include <zlib.h>
#include <string.h>
#include <stdio.h>
static FILE *open_seqfile(const char *prefix,int32_t index);
ecoseq_t *new_ecoseq()
{
void *tmp;
tmp = ECOMALLOC(sizeof(ecoseq_t),"Allocate new ecoseq structure");
return tmp;
}
int32_t delete_ecoseq(ecoseq_t * seq)
{
if (seq)
{
if (seq->AC)
ECOFREE(seq->AC,"Free sequence AC");
if (seq->DE)
ECOFREE(seq->DE,"Free sequence DE");
if (seq->SQ)
ECOFREE(seq->SQ,"Free sequence SQ");
ECOFREE(seq,"Free sequence structure");
return 0;
}
return 1;
}
ecoseq_t *new_ecoseq_with_data( char *AC,
char *DE,
char *SQ,
int32_t taxid_idx
)
{
ecoseq_t *tmp;
int32_t lstr;
tmp = new_ecoseq();
tmp->taxid=taxid_idx;
if (AC)
{
lstr =strlen(AC);
tmp->AC=ECOMALLOC((lstr+1) * sizeof(char),
"Allocate sequence accession");
strcpy(tmp->AC,AC);
}
if (DE)
{
lstr =strlen(DE);
tmp->DE=ECOMALLOC((lstr+1) * sizeof(char),
"Allocate sequence definition");
strcpy(tmp->DE,DE);
}
if (SQ)
{
lstr =strlen(SQ);
tmp->SQ=ECOMALLOC((lstr+1) * sizeof(char),
"Allocate sequence data");
strcpy(tmp->SQ,SQ);
}
return tmp;
}
/**
* ?? used ??
**/
FILE *open_ecoseqdb(const char *filename,
int32_t *sequencecount)
{
return open_ecorecorddb(filename,sequencecount,1);
}
ecoseq_t *readnext_ecoseq(FILE *f)
{
char *compressed=NULL;
ecoseqformat_t *raw;
ecoseq_t *seq;
int32_t comp_status;
unsigned long int seqlength;
int32_t rs;
raw = read_ecorecord(f,&rs);
if (!raw)
return NULL;
if (is_big_endian())
{
raw->CSQ_length = swap_int32_t(raw->CSQ_length);
raw->DE_length = swap_int32_t(raw->DE_length);
raw->SQ_length = swap_int32_t(raw->SQ_length);
raw->taxid = swap_int32_t(raw->taxid);
}
seq = new_ecoseq();
seq->taxid = raw->taxid;
seq->AC = ECOMALLOC(strlen(raw->AC) +1,
"Allocate Sequence Accesion number");
strncpy(seq->AC,raw->AC,strlen(raw->AC));
seq->DE = ECOMALLOC(raw->DE_length+1,
"Allocate Sequence definition");
strncpy(seq->DE,raw->data,raw->DE_length);
seqlength = seq->SQ_length = raw->SQ_length;
compressed = raw->data + raw->DE_length;
seq->SQ = ECOMALLOC(seqlength+1,
"Allocate sequence buffer");
comp_status = uncompress((unsigned char*)seq->SQ,
&seqlength,
(unsigned char*)compressed,
raw->CSQ_length);
if (comp_status != Z_OK)
ECOERROR(ECO_IO_ERROR,"I cannot uncompress sequence data");
return seq;
}
/**
* Open the sequences database (.sdx file)
* @param prefix name of the database (radical without extension)
* @param index integer
*
* @return file object
*/
FILE *open_seqfile(const char *prefix,int32_t index)
{
char filename_buffer[1024];
int32_t filename_length;
FILE *input;
int32_t seqcount;
filename_length = snprintf(filename_buffer,
1023,
"%s_%03d.sdx",
prefix,
index);
fprintf(stderr,"# Coucou %s\n",filename_buffer);
if (filename_length >= 1024)
ECOERROR(ECO_ASSERT_ERROR,"file name is too long");
filename_buffer[filename_length]=0;
input=open_ecorecorddb(filename_buffer,&seqcount,0);
if (input)
fprintf(stderr,"# Reading file %s containing %d sequences...\n",
filename_buffer,
seqcount);
return input;
}
ecoseq_t *ecoseq_iterator(const char *prefix)
{
static FILE *current_seq_file= NULL;
static int32_t current_file_idx = 1;
static char current_prefix[1024];
ecoseq_t *seq;
if (prefix)
{
current_file_idx = 1;
if (current_seq_file)
fclose(current_seq_file);
strncpy(current_prefix,prefix,1023);
current_prefix[1024]=0;
current_seq_file = open_seqfile(current_prefix,
current_file_idx);
if (!current_seq_file)
return NULL;
}
seq = readnext_ecoseq(current_seq_file);
if (!seq && feof(current_seq_file))
{
current_file_idx++;
fclose(current_seq_file);
current_seq_file = open_seqfile(current_prefix,
current_file_idx);
if (current_seq_file)
seq = readnext_ecoseq(current_seq_file);
}
return seq;
}