Initial commit for this soft
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/trunk@175 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
223
src/libecoPCR/ecoseq.c
Normal file
223
src/libecoPCR/ecoseq.c
Normal file
@ -0,0 +1,223 @@
|
||||
#include "ecoPCR.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <zlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
static FILE *open_seqfile(const char *prefix,int32_t index);
|
||||
|
||||
|
||||
ecoseq_t *new_ecoseq()
|
||||
{
|
||||
void *tmp;
|
||||
|
||||
tmp = ECOMALLOC(sizeof(ecoseq_t),"Allocate new ecoseq structure");
|
||||
|
||||
return tmp;
|
||||
}
|
||||
|
||||
int32_t delete_ecoseq(ecoseq_t * seq)
|
||||
{
|
||||
|
||||
if (seq)
|
||||
{
|
||||
if (seq->AC)
|
||||
ECOFREE(seq->AC,"Free sequence AC");
|
||||
|
||||
if (seq->DE)
|
||||
ECOFREE(seq->DE,"Free sequence DE");
|
||||
|
||||
if (seq->SQ)
|
||||
ECOFREE(seq->SQ,"Free sequence SQ");
|
||||
|
||||
ECOFREE(seq,"Free sequence structure");
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
ecoseq_t *new_ecoseq_with_data( char *AC,
|
||||
char *DE,
|
||||
char *SQ,
|
||||
int32_t taxid_idx
|
||||
)
|
||||
{
|
||||
ecoseq_t *tmp;
|
||||
int32_t lstr;
|
||||
tmp = new_ecoseq();
|
||||
|
||||
tmp->taxid=taxid_idx;
|
||||
|
||||
if (AC)
|
||||
{
|
||||
lstr =strlen(AC);
|
||||
tmp->AC=ECOMALLOC((lstr+1) * sizeof(char),
|
||||
"Allocate sequence accession");
|
||||
strcpy(tmp->AC,AC);
|
||||
}
|
||||
|
||||
if (DE)
|
||||
{
|
||||
lstr =strlen(DE);
|
||||
tmp->DE=ECOMALLOC((lstr+1) * sizeof(char),
|
||||
"Allocate sequence definition");
|
||||
strcpy(tmp->DE,DE);
|
||||
}
|
||||
|
||||
if (SQ)
|
||||
{
|
||||
lstr =strlen(SQ);
|
||||
tmp->SQ=ECOMALLOC((lstr+1) * sizeof(char),
|
||||
"Allocate sequence data");
|
||||
strcpy(tmp->SQ,SQ);
|
||||
}
|
||||
return tmp;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* ?? used ??
|
||||
**/
|
||||
FILE *open_ecoseqdb(const char *filename,
|
||||
int32_t *sequencecount)
|
||||
{
|
||||
return open_ecorecorddb(filename,sequencecount,1);
|
||||
}
|
||||
|
||||
ecoseq_t *readnext_ecoseq(FILE *f)
|
||||
{
|
||||
char *compressed=NULL;
|
||||
|
||||
ecoseqformat_t *raw;
|
||||
ecoseq_t *seq;
|
||||
int32_t comp_status;
|
||||
unsigned long int seqlength;
|
||||
int32_t rs;
|
||||
|
||||
raw = read_ecorecord(f,&rs);
|
||||
|
||||
if (!raw)
|
||||
return NULL;
|
||||
|
||||
if (is_big_endian())
|
||||
{
|
||||
raw->CSQ_length = swap_int32_t(raw->CSQ_length);
|
||||
raw->DE_length = swap_int32_t(raw->DE_length);
|
||||
raw->SQ_length = swap_int32_t(raw->SQ_length);
|
||||
raw->taxid = swap_int32_t(raw->taxid);
|
||||
}
|
||||
|
||||
seq = new_ecoseq();
|
||||
|
||||
seq->taxid = raw->taxid;
|
||||
|
||||
seq->AC = ECOMALLOC(strlen(raw->AC) +1,
|
||||
"Allocate Sequence Accesion number");
|
||||
strncpy(seq->AC,raw->AC,strlen(raw->AC));
|
||||
|
||||
|
||||
seq->DE = ECOMALLOC(raw->DE_length+1,
|
||||
"Allocate Sequence definition");
|
||||
strncpy(seq->DE,raw->data,raw->DE_length);
|
||||
|
||||
seqlength = seq->SQ_length = raw->SQ_length;
|
||||
|
||||
compressed = raw->data + raw->DE_length;
|
||||
|
||||
seq->SQ = ECOMALLOC(seqlength+1,
|
||||
"Allocate sequence buffer");
|
||||
|
||||
comp_status = uncompress((unsigned char*)seq->SQ,
|
||||
&seqlength,
|
||||
(unsigned char*)compressed,
|
||||
raw->CSQ_length);
|
||||
|
||||
if (comp_status != Z_OK)
|
||||
ECOERROR(ECO_IO_ERROR,"I cannot uncompress sequence data");
|
||||
|
||||
return seq;
|
||||
}
|
||||
|
||||
/**
|
||||
* Open the sequences database (.sdx file)
|
||||
* @param prefix name of the database (radical without extension)
|
||||
* @param index integer
|
||||
*
|
||||
* @return file object
|
||||
*/
|
||||
FILE *open_seqfile(const char *prefix,int32_t index)
|
||||
{
|
||||
char filename_buffer[1024];
|
||||
int32_t filename_length;
|
||||
FILE *input;
|
||||
int32_t seqcount;
|
||||
|
||||
filename_length = snprintf(filename_buffer,
|
||||
1023,
|
||||
"%s_%03d.sdx",
|
||||
prefix,
|
||||
index);
|
||||
|
||||
fprintf(stderr,"# Coucou %s\n",filename_buffer);
|
||||
|
||||
|
||||
if (filename_length >= 1024)
|
||||
ECOERROR(ECO_ASSERT_ERROR,"file name is too long");
|
||||
|
||||
filename_buffer[filename_length]=0;
|
||||
|
||||
input=open_ecorecorddb(filename_buffer,&seqcount,0);
|
||||
|
||||
if (input)
|
||||
fprintf(stderr,"# Reading file %s containing %d sequences...\n",
|
||||
filename_buffer,
|
||||
seqcount);
|
||||
|
||||
return input;
|
||||
}
|
||||
|
||||
ecoseq_t *ecoseq_iterator(const char *prefix)
|
||||
{
|
||||
static FILE *current_seq_file= NULL;
|
||||
static int32_t current_file_idx = 1;
|
||||
static char current_prefix[1024];
|
||||
ecoseq_t *seq;
|
||||
|
||||
if (prefix)
|
||||
{
|
||||
current_file_idx = 1;
|
||||
|
||||
if (current_seq_file)
|
||||
fclose(current_seq_file);
|
||||
|
||||
strncpy(current_prefix,prefix,1023);
|
||||
current_prefix[1024]=0;
|
||||
|
||||
current_seq_file = open_seqfile(current_prefix,
|
||||
current_file_idx);
|
||||
|
||||
if (!current_seq_file)
|
||||
return NULL;
|
||||
|
||||
}
|
||||
|
||||
seq = readnext_ecoseq(current_seq_file);
|
||||
|
||||
if (!seq && feof(current_seq_file))
|
||||
{
|
||||
current_file_idx++;
|
||||
fclose(current_seq_file);
|
||||
current_seq_file = open_seqfile(current_prefix,
|
||||
current_file_idx);
|
||||
|
||||
|
||||
if (current_seq_file)
|
||||
seq = readnext_ecoseq(current_seq_file);
|
||||
}
|
||||
|
||||
return seq;
|
||||
}
|
Reference in New Issue
Block a user