Accept to deal with sequence in lower case

git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@217 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
2009-05-13 13:21:10 +00:00
parent 1428cd7499
commit ad6f493d0f
9 changed files with 261 additions and 42 deletions

122
.cproject Normal file
View File

@ -0,0 +1,122 @@
<?xml version="1.0" encoding="UTF-8"?>
<?fileVersion 4.0.0?>
<cproject>
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.toolchain.gnu.macosx.base.985481067">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.toolchain.gnu.macosx.base.985481067" moduleId="org.eclipse.cdt.core.settings" name="MacOSX GCC">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.MachO" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.MakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="ecoPCR" buildProperties="" id="cdt.managedbuild.toolchain.gnu.macosx.base.985481067" name="MacOSX GCC" parent="org.eclipse.cdt.build.core.emptycfg">
<folderInfo id="cdt.managedbuild.toolchain.gnu.macosx.base.985481067.141857048" name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.base.1673936174" name="cdt.managedbuild.toolchain.gnu.macosx.base" superClass="cdt.managedbuild.toolchain.gnu.macosx.base">
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.MachO" id="cdt.managedbuild.target.gnu.platform.macosx.base.584997877" name="Debug Platform" osList="macosx" superClass="cdt.managedbuild.target.gnu.platform.macosx.base"/>
<builder id="cdt.managedbuild.target.gnu.builder.macosx.base.328283627" managedBuildOn="false" name="Gnu Make Builder.MacOSX GCC" superClass="cdt.managedbuild.target.gnu.builder.macosx.base"/>
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.base.627652869" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.base"/>
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.base.815782479" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.base.536333148" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.202459766" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.base.1142106025" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.base.845498516" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.base"/>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile"/>
<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile">
<buildOutputProvider>
<openAction enabled="true" filePath=""/>
<parser enabled="true"/>
</buildOutputProvider>
<scannerInfoProvider id="specsFile">
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
<parser enabled="true"/>
</scannerInfoProvider>
</profile>
<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile">
<buildOutputProvider>
<openAction enabled="true" filePath=""/>
<parser enabled="true"/>
</buildOutputProvider>
<scannerInfoProvider id="makefileGenerator">
<runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/>
<parser enabled="true"/>
</scannerInfoProvider>
</profile>
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile">
<buildOutputProvider>
<openAction enabled="false" filePath=""/>
<parser enabled="false"/>
</buildOutputProvider>
<scannerInfoProvider id="specsFile">
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
<parser enabled="true"/>
</scannerInfoProvider>
</profile>
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP">
<buildOutputProvider>
<openAction enabled="false" filePath=""/>
<parser enabled="false"/>
</buildOutputProvider>
<scannerInfoProvider id="specsFile">
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
<parser enabled="true"/>
</scannerInfoProvider>
</profile>
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC">
<buildOutputProvider>
<openAction enabled="false" filePath=""/>
<parser enabled="false"/>
</buildOutputProvider>
<scannerInfoProvider id="specsFile">
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
<parser enabled="true"/>
</scannerInfoProvider>
</profile>
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfile">
<buildOutputProvider>
<openAction enabled="false" filePath=""/>
<parser enabled="false"/>
</buildOutputProvider>
<scannerInfoProvider id="specsFile">
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
<parser enabled="true"/>
</scannerInfoProvider>
</profile>
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileCPP">
<buildOutputProvider>
<openAction enabled="false" filePath=""/>
<parser enabled="false"/>
</buildOutputProvider>
<scannerInfoProvider id="specsFile">
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
<parser enabled="true"/>
</scannerInfoProvider>
</profile>
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileC">
<buildOutputProvider>
<openAction enabled="false" filePath=""/>
<parser enabled="false"/>
</buildOutputProvider>
<scannerInfoProvider id="specsFile">
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
<parser enabled="true"/>
</scannerInfoProvider>
</profile>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<project id="ecoPCR.null.1320766773" name="ecoPCR"/>
</storageModule>
</cproject>

83
.project Normal file
View File

@ -0,0 +1,83 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>ecoPCR</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.python.pydev.PyDevBuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
<triggers>clean,full,incremental,</triggers>
<arguments>
<dictionary>
<key>org.eclipse.cdt.make.core.fullBuildTarget</key>
<value>all</value>
</dictionary>
<dictionary>
<key>?name?</key>
<value></value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.enableAutoBuild</key>
<value>false</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.enableFullBuild</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.enableCleanBuild</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.cleanBuildTarget</key>
<value>clean</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.append_environment</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.contents</key>
<value>org.eclipse.cdt.make.core.activeConfigSettings</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.useDefaultBuildCmd</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.buildArguments</key>
<value></value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.buildCommand</key>
<value>make</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.autoBuildTarget</key>
<value>all</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.stopOnError</key>
<value>true</value>
</dictionary>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.cdt.core.cnature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
<nature>org.python.pydev.pythonNature</nature>
</natures>
</projectDescription>

7
.pydevproject Normal file
View File

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<?eclipse-pydev version="1.0"?>
<pydev_project>
<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.4</pydev_property>
<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>
</pydev_project>

BIN
src/ecoPCR.gz Executable file

Binary file not shown.

BIN
src/ecofind.gz Executable file

Binary file not shown.

BIN
src/ecogrep Executable file

Binary file not shown.

BIN
src/ecogrep.gz Executable file

Binary file not shown.

BIN
src/ecoisundertaxon Executable file

Binary file not shown.

View File

@ -4,6 +4,7 @@
#include <zlib.h> #include <zlib.h>
#include <string.h> #include <string.h>
#include <stdio.h> #include <stdio.h>
#include <ctype.h>
static FILE *open_seqfile(const char *prefix,int32_t index); static FILE *open_seqfile(const char *prefix,int32_t index);
@ -11,32 +12,32 @@ static FILE *open_seqfile(const char *prefix,int32_t index);
ecoseq_t *new_ecoseq() ecoseq_t *new_ecoseq()
{ {
void *tmp; void *tmp;
tmp = ECOMALLOC(sizeof(ecoseq_t),"Allocate new ecoseq structure"); tmp = ECOMALLOC(sizeof(ecoseq_t),"Allocate new ecoseq structure");
return tmp; return tmp;
} }
int32_t delete_ecoseq(ecoseq_t * seq) int32_t delete_ecoseq(ecoseq_t * seq)
{ {
if (seq) if (seq)
{ {
if (seq->AC) if (seq->AC)
ECOFREE(seq->AC,"Free sequence AC"); ECOFREE(seq->AC,"Free sequence AC");
if (seq->DE) if (seq->DE)
ECOFREE(seq->DE,"Free sequence DE"); ECOFREE(seq->DE,"Free sequence DE");
if (seq->SQ) if (seq->SQ)
ECOFREE(seq->SQ,"Free sequence SQ"); ECOFREE(seq->SQ,"Free sequence SQ");
ECOFREE(seq,"Free sequence structure"); ECOFREE(seq,"Free sequence structure");
return 0; return 0;
} }
return 1; return 1;
} }
@ -49,9 +50,9 @@ ecoseq_t *new_ecoseq_with_data( char *AC,
ecoseq_t *tmp; ecoseq_t *tmp;
int32_t lstr; int32_t lstr;
tmp = new_ecoseq(); tmp = new_ecoseq();
tmp->taxid=taxid_idx; tmp->taxid=taxid_idx;
if (AC) if (AC)
{ {
lstr =strlen(AC); lstr =strlen(AC);
@ -97,12 +98,14 @@ ecoseq_t *readnext_ecoseq(FILE *f)
int32_t comp_status; int32_t comp_status;
unsigned long int seqlength; unsigned long int seqlength;
int32_t rs; int32_t rs;
char *c;
int32_t i;
raw = read_ecorecord(f,&rs); raw = read_ecorecord(f,&rs);
if (!raw) if (!raw)
return NULL; return NULL;
if (is_big_endian()) if (is_big_endian())
{ {
raw->CSQ_length = swap_int32_t(raw->CSQ_length); raw->CSQ_length = swap_int32_t(raw->CSQ_length);
@ -110,44 +113,48 @@ ecoseq_t *readnext_ecoseq(FILE *f)
raw->SQ_length = swap_int32_t(raw->SQ_length); raw->SQ_length = swap_int32_t(raw->SQ_length);
raw->taxid = swap_int32_t(raw->taxid); raw->taxid = swap_int32_t(raw->taxid);
} }
seq = new_ecoseq(); seq = new_ecoseq();
seq->taxid = raw->taxid; seq->taxid = raw->taxid;
seq->AC = ECOMALLOC(strlen(raw->AC) +1, seq->AC = ECOMALLOC(strlen(raw->AC) +1,
"Allocate Sequence Accesion number"); "Allocate Sequence Accesion number");
strncpy(seq->AC,raw->AC,strlen(raw->AC)); strncpy(seq->AC,raw->AC,strlen(raw->AC));
seq->DE = ECOMALLOC(raw->DE_length+1, seq->DE = ECOMALLOC(raw->DE_length+1,
"Allocate Sequence definition"); "Allocate Sequence definition");
strncpy(seq->DE,raw->data,raw->DE_length); strncpy(seq->DE,raw->data,raw->DE_length);
seqlength = seq->SQ_length = raw->SQ_length; seqlength = seq->SQ_length = raw->SQ_length;
compressed = raw->data + raw->DE_length; compressed = raw->data + raw->DE_length;
seq->SQ = ECOMALLOC(seqlength+1, seq->SQ = ECOMALLOC(seqlength+1,
"Allocate sequence buffer"); "Allocate sequence buffer");
comp_status = uncompress((unsigned char*)seq->SQ, comp_status = uncompress((unsigned char*)seq->SQ,
&seqlength, &seqlength,
(unsigned char*)compressed, (unsigned char*)compressed,
raw->CSQ_length); raw->CSQ_length);
if (comp_status != Z_OK) if (comp_status != Z_OK)
ECOERROR(ECO_IO_ERROR,"I cannot uncompress sequence data"); ECOERROR(ECO_IO_ERROR,"I cannot uncompress sequence data");
for (c=seq->SQ,i=0;i<seqlength;c++,i++)
*c=toupper(*c);
return seq; return seq;
} }
/** /**
* Open the sequences database (.sdx file) * Open the sequences database (.sdx file)
* @param prefix name of the database (radical without extension) * @param prefix name of the database (radical without extension)
* @param index integer * @param index integer
* *
* @return file object * @return file object
*/ */
FILE *open_seqfile(const char *prefix,int32_t index) FILE *open_seqfile(const char *prefix,int32_t index)
{ {
@ -161,22 +168,22 @@ FILE *open_seqfile(const char *prefix,int32_t index)
"%s_%03d.sdx", "%s_%03d.sdx",
prefix, prefix,
index); index);
fprintf(stderr,"# Coucou %s\n",filename_buffer); fprintf(stderr,"# Coucou %s\n",filename_buffer);
if (filename_length >= 1024) if (filename_length >= 1024)
ECOERROR(ECO_ASSERT_ERROR,"file name is too long"); ECOERROR(ECO_ASSERT_ERROR,"file name is too long");
filename_buffer[filename_length]=0; filename_buffer[filename_length]=0;
input=open_ecorecorddb(filename_buffer,&seqcount,0); input=open_ecorecorddb(filename_buffer,&seqcount,0);
if (input) if (input)
fprintf(stderr,"# Reading file %s containing %d sequences...\n", fprintf(stderr,"# Reading file %s containing %d sequences...\n",
filename_buffer, filename_buffer,
seqcount); seqcount);
return input; return input;
} }
@ -186,38 +193,38 @@ ecoseq_t *ecoseq_iterator(const char *prefix)
static int32_t current_file_idx = 1; static int32_t current_file_idx = 1;
static char current_prefix[1024]; static char current_prefix[1024];
ecoseq_t *seq; ecoseq_t *seq;
if (prefix) if (prefix)
{ {
current_file_idx = 1; current_file_idx = 1;
if (current_seq_file) if (current_seq_file)
fclose(current_seq_file); fclose(current_seq_file);
strncpy(current_prefix,prefix,1023); strncpy(current_prefix,prefix,1023);
current_prefix[1024]=0; current_prefix[1024]=0;
current_seq_file = open_seqfile(current_prefix, current_seq_file = open_seqfile(current_prefix,
current_file_idx); current_file_idx);
if (!current_seq_file) if (!current_seq_file)
return NULL; return NULL;
} }
seq = readnext_ecoseq(current_seq_file); seq = readnext_ecoseq(current_seq_file);
if (!seq && feof(current_seq_file)) if (!seq && feof(current_seq_file))
{ {
current_file_idx++; current_file_idx++;
fclose(current_seq_file); fclose(current_seq_file);
current_seq_file = open_seqfile(current_prefix, current_seq_file = open_seqfile(current_prefix,
current_file_idx); current_file_idx);
if (current_seq_file) if (current_seq_file)
seq = readnext_ecoseq(current_seq_file); seq = readnext_ecoseq(current_seq_file);
} }
return seq; return seq;
} }