77 Commits

Author SHA1 Message Date
b000ae68d9 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@90 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-29 15:35:04 +00:00
2e8c634b80 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@89 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-29 15:34:23 +00:00
aaaf31063d git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@88 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-29 15:34:06 +00:00
ddcf0c2fc4 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@87 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-29 12:04:27 +00:00
9fe0c1ce48 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@86 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-29 07:17:45 +00:00
8eccd87035 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@85 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-29 07:14:53 +00:00
cacd9854f3 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@84 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-29 07:14:44 +00:00
212c1fbb7f git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@83 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-29 07:14:31 +00:00
2e7e0f625f git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@82 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-21 14:04:54 +00:00
d6fcc7673b git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@81 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-21 13:58:27 +00:00
379ca7988b git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@80 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-21 13:58:19 +00:00
24f958ce1f git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@79 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-21 13:58:06 +00:00
36d536305e git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@78 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-21 13:57:59 +00:00
1c421f5903 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@77 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-21 13:57:53 +00:00
c77c360782 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@76 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-20 07:39:40 +00:00
163693a132 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@75 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-20 07:39:25 +00:00
7f39141289 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@74 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-20 07:39:12 +00:00
433a19fd87 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@73 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-20 07:38:54 +00:00
dc6e7bba69 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@72 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-20 07:37:52 +00:00
f91c7804ab git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@71 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-20 07:37:40 +00:00
d94fc0daa4 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@70 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-20 07:37:33 +00:00
d8696f0566 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@69 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-20 07:37:27 +00:00
388fe75522 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@68 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-18 08:46:15 +00:00
fc26a9fda3 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@67 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-15 13:40:14 +00:00
8455ed0a99 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@66 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-14 14:11:13 +00:00
ab46791915 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@65 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-14 13:38:31 +00:00
86253e3712 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@64 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-14 13:34:45 +00:00
9f4aa93249 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@63 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-14 13:01:41 +00:00
087ee70620 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@62 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-13 11:22:31 +00:00
c264556629 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@61 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-13 09:56:09 +00:00
697e16ae98 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@60 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-12 08:39:10 +00:00
cb71eb72f0 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@59 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-12 08:38:28 +00:00
7fbd940ba2 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@58 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-12 08:38:22 +00:00
cdc0375356 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@57 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-12 08:38:16 +00:00
51a9cd7399 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@56 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-08 14:01:11 +00:00
88f0fff090 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@55 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-08 14:01:04 +00:00
82c93c6d10 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@54 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-08 14:00:59 +00:00
7bd4139a94 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@53 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-08 14:00:52 +00:00
0e5b94ad80 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@52 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-08 13:07:33 +00:00
c736f5b59d git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@51 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-08 12:57:49 +00:00
82f8b79cd2 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@50 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-08 12:56:40 +00:00
f3b14b9a8b git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@49 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-08 12:54:52 +00:00
255ab0c670 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@48 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-08 12:53:41 +00:00
6b28897ff5 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@47 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-06 12:37:17 +00:00
73786523bb git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@46 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-06 12:28:47 +00:00
d1051d2123 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@45 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-06 10:16:08 +00:00
6a8bf94854 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@44 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-06 10:14:29 +00:00
3117f978c2 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@43 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-06 10:13:43 +00:00
dcee664315 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@42 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-06 10:12:30 +00:00
cd4ea3fb32 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@41 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-06 10:12:18 +00:00
a827cc0cfe git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@40 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-05 14:27:28 +00:00
065f4fe0c6 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@39 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-05 14:10:06 +00:00
2a433afba2 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@38 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-05 14:00:14 +00:00
45f27e8c87 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@37 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-05 13:40:06 +00:00
d36821796f git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@36 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-05 13:39:23 +00:00
f2d3a6c5f6 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@35 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-05 13:39:08 +00:00
8b455b0954 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@34 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-05 13:38:33 +00:00
38030f2298 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@33 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-05 13:38:21 +00:00
1a768aa97e git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@32 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-05 13:37:41 +00:00
65e22a235d git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@31 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-04 15:51:49 +00:00
1673e76781 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@30 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-04 15:34:47 +00:00
0cbe831e0e git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@29 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-04 15:34:31 +00:00
adca6d14d9 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@28 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-04 15:34:14 +00:00
7d9ab96dc1 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@27 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-04 15:34:00 +00:00
5094a9a9ce git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@26 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-04 15:33:49 +00:00
4fd532e05c git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@25 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-04 15:33:30 +00:00
bcfa42d009 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@24 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-04 15:33:22 +00:00
64c1bf5151 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@23 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-01 15:08:42 +00:00
f64bb16909 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@22 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-01 15:08:15 +00:00
736a5b384f git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@21 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-01 15:07:24 +00:00
079d097fb2 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@20 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-01 14:33:04 +00:00
f4cce78d05 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@19 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-01 14:32:21 +00:00
22216dc3a0 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@18 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-06-01 14:16:06 +00:00
4b40069c51 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@17 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-05-28 09:09:47 +00:00
c81a591889 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@16 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-05-25 15:56:13 +00:00
33984922bd new makefile file
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@15 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2007-05-25 14:47:28 +00:00
e599fc29a0 Nouvelle branche ouverte pour que julien fasse le menage
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/branches/refactoring@14 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2007-05-25 14:43:12 +00:00
32 changed files with 2726 additions and 702 deletions

View File

@ -1,62 +0,0 @@
r -1acghtgd -2gctcagctagcta /Users/coissac/encours/ecoPCR/tools/ecodb
print put
print out
f ecoPCR
r -1GGGCAATCCTGAGCCAA -2CCATTGAGTCTCTGCACCTATC -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb
bt
print label1
up
print label1
print (char*)label1
r -1GGGCAATCCTGAGCCAA -2CCATTGAGTCTCTGCACCTATC -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb
bt
up 3
l
b 38
r
r -1GGGCAATCCTGAGCCAA -2CCATTGAGTCTCTGCACCTATC -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb
print label
print ranks
print *ranks
print ranks->label +1
print *ranks->label +1
print *(ranks->label +1)
print *(ranks->label +2)
b ecotax.c:147
r -1GGGCAATCCTGAGCCAA -2CCATTGAGTCTCTGCACCTATC -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb
print current_taxon ;
print current_taxon
n
print current_taxon ;
print current_taxon
print *current_taxon
print taxonomy->taxons[11]
print taxonomy->taxons[2952]
print taxonomy->taxons->taxon[2952]
print taxonomy->ranks->rank[11]
print taxonomy->ranks->label[11]
print taxonomy->ranks->label[3]
b ecotax.c:147
r -1GGGCAATCCTGAGCCAA -2CCATTGAGTCTCTGCACCTATC -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb
print current_taxon
print *current_taxon
n
print *current_taxon
b ecotax.c:147
r -1GGGCAATCCTGAGCCAA -2CCATTGAGTCTCTGCACCTATC -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb
n
print *current_taxon
r -1GGGCAATCCTGAGCCAA -2CCATTGAGTCTCTGCACCTATC -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb
n
print *current_taxon
b ecodna.c:70
r -1GGGCAATCCTGAGCC#A#A# -2CCATTGAGTCTCTGCACCTA#T#C# -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb
*sb
print *sb
print sb
print str
b 52
r -1GGGCAATCCTGAGCC#A#A# -2CCATTGAGTCTCTGCACCTA#T#C# -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb
print str
r -1GGGCAATCCTGAGCC#A#A# -2CCATTGAGTCTCTGCACCTA#T#C# -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb
bt

View File

@ -1,46 +0,0 @@
################################################################################
# Automatically-generated file. Do not edit!
################################################################################
-include ../makefile.init
RM := rm -rf
# All of the sources participating in the build are defined here
-include sources.mk
-include subdir.mk
-include src/libecoPCR/subdir.mk
-include src/libapat/subdir.mk
-include src/subdir.mk
-include objects.mk
ifneq ($(MAKECMDGOALS),clean)
ifneq ($(strip $(C_DEPS)),)
-include $(C_DEPS)
endif
endif
-include ../makefile.defs
# Add inputs and outputs from these tool invocations to the build variables
# All Target
all: ecoPCR
# Tool invocations
ecoPCR: $(OBJS) $(USER_OBJS)
@echo 'Building target: $@'
@echo 'Invoking: MacOS X C Linker'
gcc -o "ecoPCR" $(OBJS) $(USER_OBJS) $(LIBS)
@echo 'Finished building target: $@'
@echo ' '
# Other Targets
clean:
-$(RM) $(OBJS)$(EXECUTABLES)$(C_DEPS) ecoPCR
-@echo ' '
.PHONY: all clean dependents
.SECONDARY:
-include ../makefile.targets

View File

@ -1,7 +0,0 @@
################################################################################
# Automatically-generated file. Do not edit!
################################################################################
LIBS := -lz
USER_OBJS :=

View File

@ -1,19 +0,0 @@
################################################################################
# Automatically-generated file. Do not edit!
################################################################################
C_SRCS :=
O_SRCS :=
ASM_SRCS :=
S_SRCS :=
OBJ_SRCS :=
OBJS :=
EXECUTABLES :=
C_DEPS :=
# Every subdirectory with source files must be described here
SUBDIRS := \
src/libecoPCR \
src/libapat \
src \

View File

@ -1,24 +0,0 @@
################################################################################
# Automatically-generated file. Do not edit!
################################################################################
# Add inputs and outputs from these tool invocations to the build variables
C_SRCS += \
../src/libapat/CODES/gendual.c
OBJS += \
./src/libapat/CODES/gendual.o
C_DEPS += \
./src/libapat/CODES/gendual.d
# Each subdirectory must supply rules for building sources it contributes
src/libapat/CODES/%.o: ../src/libapat/CODES/%.c
@echo 'Building file: $<'
@echo 'Invoking: GCC C Compiler'
gcc -DMAC_OS_X -O0 -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<"
@echo 'Finished building: $<'
@echo ' '

View File

@ -1,30 +0,0 @@
################################################################################
# Automatically-generated file. Do not edit!
################################################################################
# Add inputs and outputs from these tool invocations to the build variables
C_SRCS += \
../src/libapat/apat_parse.c \
../src/libapat/apat_search.c \
../src/libapat/libstki.c
OBJS += \
./src/libapat/apat_parse.o \
./src/libapat/apat_search.o \
./src/libapat/libstki.o
C_DEPS += \
./src/libapat/apat_parse.d \
./src/libapat/apat_search.d \
./src/libapat/libstki.d
# Each subdirectory must supply rules for building sources it contributes
src/libapat/%.o: ../src/libapat/%.c
@echo 'Building file: $<'
@echo 'Invoking: GCC C Compiler'
gcc -DMAC_OS_X -O0 -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<"
@echo 'Finished building: $<'
@echo ' '

View File

@ -1,45 +0,0 @@
################################################################################
# Automatically-generated file. Do not edit!
################################################################################
# Add inputs and outputs from these tool invocations to the build variables
C_SRCS += \
../src/libecoPCR/ecoError.c \
../src/libecoPCR/ecoIOUtils.c \
../src/libecoPCR/ecoMalloc.c \
../src/libecoPCR/ecoapat.c \
../src/libecoPCR/ecodna.c \
../src/libecoPCR/ecorank.c \
../src/libecoPCR/ecoseq.c \
../src/libecoPCR/ecotax.c
OBJS += \
./src/libecoPCR/ecoError.o \
./src/libecoPCR/ecoIOUtils.o \
./src/libecoPCR/ecoMalloc.o \
./src/libecoPCR/ecoapat.o \
./src/libecoPCR/ecodna.o \
./src/libecoPCR/ecorank.o \
./src/libecoPCR/ecoseq.o \
./src/libecoPCR/ecotax.o
C_DEPS += \
./src/libecoPCR/ecoError.d \
./src/libecoPCR/ecoIOUtils.d \
./src/libecoPCR/ecoMalloc.d \
./src/libecoPCR/ecoapat.d \
./src/libecoPCR/ecodna.d \
./src/libecoPCR/ecorank.d \
./src/libecoPCR/ecoseq.d \
./src/libecoPCR/ecotax.d
# Each subdirectory must supply rules for building sources it contributes
src/libecoPCR/%.o: ../src/libecoPCR/%.c
@echo 'Building file: $<'
@echo 'Invoking: GCC C Compiler'
gcc -DMAC_OS_X -O0 -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<"
@echo 'Finished building: $<'
@echo ' '

View File

@ -1,24 +0,0 @@
################################################################################
# Automatically-generated file. Do not edit!
################################################################################
# Add inputs and outputs from these tool invocations to the build variables
C_SRCS += \
../src/ecopcr.c
OBJS += \
./src/ecopcr.o
C_DEPS += \
./src/ecopcr.d
# Each subdirectory must supply rules for building sources it contributes
src/%.o: ../src/%.c
@echo 'Building file: $<'
@echo 'Invoking: GCC C Compiler'
gcc -DMAC_OS_X -O0 -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<"
@echo 'Finished building: $<'
@echo ' '

View File

@ -1,46 +0,0 @@
################################################################################
# Automatically-generated file. Do not edit!
################################################################################
-include ../makefile.init
RM := rm -rf
# All of the sources participating in the build are defined here
-include sources.mk
-include subdir.mk
-include src/libecoPCR/subdir.mk
-include src/libapat/subdir.mk
-include src/subdir.mk
-include objects.mk
ifneq ($(MAKECMDGOALS),clean)
ifneq ($(strip $(C_DEPS)),)
-include $(C_DEPS)
endif
endif
-include ../makefile.defs
# Add inputs and outputs from these tool invocations to the build variables
# All Target
all: ecoPCR
# Tool invocations
ecoPCR: $(OBJS) $(USER_OBJS)
@echo 'Building target: $@'
@echo 'Invoking: MacOS X C Linker'
gcc -o "ecoPCR" $(OBJS) $(USER_OBJS) $(LIBS)
@echo 'Finished building target: $@'
@echo ' '
# Other Targets
clean:
-$(RM) $(OBJS)$(EXECUTABLES)$(C_DEPS) ecoPCR
-@echo ' '
.PHONY: all clean dependents
.SECONDARY:
-include ../makefile.targets

View File

@ -1,7 +0,0 @@
################################################################################
# Automatically-generated file. Do not edit!
################################################################################
LIBS := -lz
USER_OBJS :=

View File

@ -1,19 +0,0 @@
################################################################################
# Automatically-generated file. Do not edit!
################################################################################
C_SRCS :=
O_SRCS :=
ASM_SRCS :=
S_SRCS :=
OBJ_SRCS :=
OBJS :=
EXECUTABLES :=
C_DEPS :=
# Every subdirectory with source files must be described here
SUBDIRS := \
src/libecoPCR \
src/libapat \
src \

View File

@ -1,30 +0,0 @@
################################################################################
# Automatically-generated file. Do not edit!
################################################################################
# Add inputs and outputs from these tool invocations to the build variables
C_SRCS += \
../src/libapat/apat_parse.c \
../src/libapat/apat_search.c \
../src/libapat/libstki.c
OBJS += \
./src/libapat/apat_parse.o \
./src/libapat/apat_search.o \
./src/libapat/libstki.o
C_DEPS += \
./src/libapat/apat_parse.d \
./src/libapat/apat_search.d \
./src/libapat/libstki.d
# Each subdirectory must supply rules for building sources it contributes
src/libapat/%.o: ../src/libapat/%.c
@echo 'Building file: $<'
@echo 'Invoking: GCC C Compiler'
gcc -DMAC_OS_X -O3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<"
@echo 'Finished building: $<'
@echo ' '

View File

@ -1,45 +0,0 @@
################################################################################
# Automatically-generated file. Do not edit!
################################################################################
# Add inputs and outputs from these tool invocations to the build variables
C_SRCS += \
../src/libecoPCR/ecoError.c \
../src/libecoPCR/ecoIOUtils.c \
../src/libecoPCR/ecoMalloc.c \
../src/libecoPCR/ecoapat.c \
../src/libecoPCR/ecodna.c \
../src/libecoPCR/ecorank.c \
../src/libecoPCR/ecoseq.c \
../src/libecoPCR/ecotax.c
OBJS += \
./src/libecoPCR/ecoError.o \
./src/libecoPCR/ecoIOUtils.o \
./src/libecoPCR/ecoMalloc.o \
./src/libecoPCR/ecoapat.o \
./src/libecoPCR/ecodna.o \
./src/libecoPCR/ecorank.o \
./src/libecoPCR/ecoseq.o \
./src/libecoPCR/ecotax.o
C_DEPS += \
./src/libecoPCR/ecoError.d \
./src/libecoPCR/ecoIOUtils.d \
./src/libecoPCR/ecoMalloc.d \
./src/libecoPCR/ecoapat.d \
./src/libecoPCR/ecodna.d \
./src/libecoPCR/ecorank.d \
./src/libecoPCR/ecoseq.d \
./src/libecoPCR/ecotax.d
# Each subdirectory must supply rules for building sources it contributes
src/libecoPCR/%.o: ../src/libecoPCR/%.c
@echo 'Building file: $<'
@echo 'Invoking: GCC C Compiler'
gcc -DMAC_OS_X -O3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<"
@echo 'Finished building: $<'
@echo ' '

View File

@ -1,24 +0,0 @@
################################################################################
# Automatically-generated file. Do not edit!
################################################################################
# Add inputs and outputs from these tool invocations to the build variables
C_SRCS += \
../src/ecopcr.c
OBJS += \
./src/ecopcr.o
C_DEPS += \
./src/ecopcr.d
# Each subdirectory must supply rules for building sources it contributes
src/%.o: ../src/%.c
@echo 'Building file: $<'
@echo 'Invoking: GCC C Compiler'
gcc -DMAC_OS_X -O3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<"
@echo 'Finished building: $<'
@echo ' '

84
src/Makefile Normal file
View File

@ -0,0 +1,84 @@
EXEC=ecoPCR ecofind ecoisundertaxon
PCR_SRC= ecopcr.c
PCR_OBJ= $(patsubst %.c,%.o,$(PCR_SRC))
FIND_SRC= ecofind.c
FIND_OBJ= $(patsubst %.c,%.o,$(FIND_SRC))
IUT_SRC= ecoisundertaxon.c
IUT_OBJ= $(patsubst %.c,%.o,$(IUT_SRC))
SRCS= $(PCR_SRC) $(FIND_SRC) $(IUT_SRC)
LIB= -lecoPCR -lapat -lz -lm
LIBFILE= libapat/libapat.a \
libecoPCR/libecoPCR.a
include global.mk
all: $(EXEC)
########
#
# ecoPCR compilation
#
########
# executable compilation and link
ecoPCR: $(PCR_OBJ) $(LIBFILE)
$(CC) $(LDFLAGS) -o $@ $< $(LIBPATH) $(LIB)
########
#
# ecofind compilation
#
########
# executable compilation and link
ecofind: $(FIND_OBJ) $(LIBFILE)
$(CC) $(LDFLAGS) -o $@ $< $(LIBPATH) $(LIB)
########
#
# IsUnderTaxon compilation
#
########
# executable compilation and link
ecoisundertaxon: $(IUT_OBJ) $(LIBFILE)
$(CC) $(LDFLAGS) -o $@ $< $(LIBPATH) $(LIB)
########
#
# library compilation
#
########
libapat/libapat.a:
$(MAKE) -C libapat
libecoPCR/libecoPCR.a:
$(MAKE) -C libecoPCR
########
#
# project management
#
########
clean:
rm -f *.o
rm -f $(EXEC)
$(MAKE) -C libapat clean
$(MAKE) -C libecoPCR clean

345
src/ecofind.c Normal file
View File

@ -0,0 +1,345 @@
#include "libecoPCR/ecoPCR.h"
#include <regex.h>
#include <string.h>
#include <stdlib.h>
#include <getopt.h>
#include <stdio.h>
#define VERSION "0.1"
/**
* display the result
**/
static void printresult(ecotx_t *taxon,econame_t* name,ecotaxonomy_t *taxonomy)
{
char* rankname;
char* classname;
char* matchedname=taxon->name;
classname="scientific name";
if (name)
{
classname=name->classname;
matchedname=name->name;
}
rankname= taxonomy->ranks->label[taxon->rank];
printf("%10d \t| %15s \t|\t %-50s \t|\t %15s \t|\t %s\n",
taxon->taxid,
rankname,
matchedname,
classname,
taxon->name);
}
/**
* display header before printing any result
**/
static void printheader(void)
{
printf("# %12s \t| %15s \t|\t %-50s \t|\t %-15s \t|\t %s\n#\n",
"taxonomy id",
"taxonomy rank",
"name",
"class name",
"scientific name");
}
/**
* display son's list for given taxon
**/
static void get_son(ecotaxonomy_t *taxonomy, ecotx_t *taxon, int32_t *count, char *rankname)
{
int32_t i;
ecotx_t *current_taxon;
for ( i = 0, current_taxon = taxonomy->taxons->taxon;
i < taxonomy->taxons->count;
i++, current_taxon++)
{
if (taxon->taxid == current_taxon->parent->taxid)
{
if (rankname == NULL || !strcmp(rankname,taxonomy->ranks->label[current_taxon->rank]))
{
printresult(current_taxon, NULL, taxonomy);
(*count)++;
}
get_son(taxonomy,current_taxon,count,rankname);
}
}
}
/**
* display list of rank filter option (-l option)
**/
static void listfilteroptions(ecorankidx_t *ranks)
{
int32_t i;
printf("#\n");
for ( i=0;
i < ranks->count;
i++)
{
printf("# %s \n",ranks->label[i]);
}
printf("#\n");
}
/* ---------------------------------------- */
/* get back on given taxid taxonomic parent */
/* and display it */
/* ---------------------------------------- */
void gettaxidparents(int32_t taxid, ecotaxonomy_t *taxonomy, char *rankname)
{
ecotx_t *next_parent;
int32_t c = 0;
next_parent = eco_findtaxonbytaxid(taxonomy, taxid);
printheader();
printresult(next_parent, NULL,taxonomy);
while ( strcmp(next_parent->name, "root") )
{
next_parent = next_parent->parent;
if (rankname == NULL || !strcmp(rankname,taxonomy->ranks->label[next_parent->rank]))
{
printresult(next_parent, NULL,taxonomy);
c++;
}
}
printf("# %d parent(s) found\n#\n",c);
}
/**
* printout usage and exit
**/
#define PP fprintf(stderr,
static void ExitUsage(stat)
int stat;
{
PP "usage: ecofind [-d database] [-h] [-l] [-r taxonomic rank] [-p taxid] [-s taxid] <taxon name pattern> ... \n");
PP "type \"ecofind -h\" for help\n");
if (stat)
exit(stat);
}
#undef PP
/**
* printout help
**/
#define PP fprintf(stdout,
static void PrintHelp()
{
PP "------------------------------------------\n");
PP " ecofind Version %s\n", VERSION);
PP "------------------------------------------\n");
PP "synopsis : searching for taxonomic and rank and\n");
PP " taxonomy id for given regular expression patterns\n\n");
PP "usage: ecofind [options] <patterns>\n");
PP "------------------------------------------\n");
PP "options:\n");
PP "-a : [A]ll enable the search on all alternative names and not only scientific names.\n\n");
PP "-d : [D]atabase containing the taxonomy.\n");
PP " To match the expected format, the database\n");
PP " has to be formated first by the ecoPCRFormat.py\n");
PP " program located in the tools directory.\n");
PP " Write the database radical without any extension.\n\n");
PP "-h : [H]elp - print <this> help\n\n");
PP "-l : [L]ist all taxonomic rank available for -r option\n\n");
PP "-p : [P]arents : specifiying this option displays all parental tree's information for the given taxid.\n\n");
PP "-r : [R]estrict to given taxonomic rank\n\n");
PP "-s : [S]ons: specifiying this option displays all subtree's information for the given taxid.\n\n");
PP "arguments:\n");
PP "<taxon> name pattern bearing regular expressions\n\n");
PP "------------------------------------------\n");
PP " http://www.grenoble.prabi.fr/trac/ecoPCR/\n");
PP "------------------------------------------\n\n");
}
/* ----------------------------------------------- */
#define PATTERN_NUMBER 10
#define PATTERN_LENGHT 40
#define RESULT_LENGTH 100
int main(int argc, char **argv)
{
int32_t carg = 0;
int32_t nummatch = 0;
int32_t k,j = 0;
int32_t errflag = 0;
int32_t tax_count = 0;
int32_t alternative = 0;
char *prefix = NULL;
ecotaxonomy_t *taxonomy;
econame_t *name;
int32_t name_count;
int re_error;
int re_match;
regex_t re_preg;
int32_t uptree = 0;
int32_t subtree = 0;
char *rankname = NULL;
int32_t rankfilter = 1;
int32_t list = 0;
ecotx_t *subtree_parent;
int32_t count_son = 0;
while ((carg = getopt(argc, argv, "had:p:s:r:l")) != -1) {
switch (carg) {
case 's': /* path to the database */
sscanf(optarg,"%d",&subtree);
break;
case 'r': /* rank filter */
rankname = ECOMALLOC(strlen(optarg),"allocation rankname");
strcpy(rankname,optarg);
rankfilter = 0;
break;
case 'd': /* path to the database */
prefix = ECOMALLOC(strlen(optarg),"allocation prefix");
strcpy(prefix,optarg);
break;
case 'l': /* list rank filter options */
list = 1;
break;
case 'a': /* allow alternative names */
alternative = 1;
break;
case 'h': /* display help */
PrintHelp();
exit(0);
break;
case 'p': /* taxid */
sscanf(optarg,"%d",&uptree);
break;
case '?': /* bad option */
errflag++;
}
}
if ((argc - optind) < 1)
errflag++;
if (prefix == NULL)
{
prefix = getenv("ECOPCRDB");
if (prefix == NULL)
errflag++;
}
if (errflag && !uptree && !rankname && !subtree && !list)
ExitUsage(errflag);
/**
* load taxonomy using libecoPCR functions
**/
printf("# \n# opening %s database\n",prefix);
taxonomy = read_taxonomy(prefix,1);
tax_count = taxonomy->taxons->count;
name_count = taxonomy->names->count;
/* ---------------------------------------- */
/* list -r option possibility */
/* ---------------------------------------- */
if (list)
{
listfilteroptions(taxonomy->ranks);
return 0;
}
/* ---------------------------------------- */
/* display taxid parent if -t option */
/* specified in command line */
/* ---------------------------------------- */
if (uptree)
{
gettaxidparents(uptree,taxonomy,rankname);
return 0;
}
/* ---------------------------------------- */
/* display taxid sons if -s option */
/* specified in command line */
/* ---------------------------------------- */
if (subtree)
{
printheader();
subtree_parent = eco_findtaxonbytaxid(taxonomy,subtree);
printresult(subtree_parent, NULL,taxonomy);
get_son(taxonomy, subtree_parent,&count_son,rankname);
printf("# %d son(s) found\n#\n",count_son);
return 0;
}
printf("# %d taxons\n", tax_count);
/**
* parse taxonomy
**/
for (k=optind;k<argc;k++)
{
printf("#\n# searching for '%s' pattern\n",argv[k]);
re_error = regcomp (&re_preg, argv[k], REG_NOSUB | REG_EXTENDED | REG_ICASE);
if (re_error)
{
fprintf(stderr,"# misformed pattern '%s'\n",argv[k]);
exit(1);
}
nummatch=0;
printheader();
for (j=0,name=taxonomy->names->names;
j < name_count;
name++,j++)
{
if(rankname)
rankfilter = !(strcmp(rankname,taxonomy->ranks->label[name->taxon->rank]));
re_match = regexec (&re_preg, name->name, 0, NULL, 0);
if (!re_match && (alternative || name->is_scientificname) && rankfilter)
{
printresult(name->taxon,name,taxonomy);
nummatch++;
}
}
printf("# %d records found \n",nummatch);
regfree(&re_preg);
}
return 0;
}

403
src/ecogrep.c Normal file
View File

@ -0,0 +1,403 @@
#include "libecoPCR/ecoPCR.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <getopt.h>
#include <stdlib.h>
#include <sys/stat.h>
#define VERSION "0.1"
void getLineContent(char *stream, ecoseq_t *seq, ecoseq_t *oligoseq_1, ecoseq_t *oligoseq_2){
int i;
char *buffer;
for( i=0, buffer = strtok(stream,"|");
buffer != NULL;
i++, buffer = strtok(NULL,"|"))
{
switch (i) {
case 0:
seq->AC = strdup(buffer);
break;
case 4:
sscanf(buffer,"%d",&seq->taxid);
break;
case 13:
oligoseq_1->SQ = strdup(buffer);
oligoseq_1->SQ_length = strlen(buffer);
break;
case 15:
oligoseq_2->SQ = strdup(buffer);
oligoseq_2->SQ_length = strlen(buffer);
break;
case 18:
seq->SQ = strdup(buffer);
seq->SQ_length = strlen(buffer);
break;
default:
break;
}
}
}
void freememory(char **tab, int32_t num){
int32_t i;
for (i=0;i<num-1;i++){
ECOFREE(tab[i],"Error in freememory function");
}
}
/**
* Check if pattern match a string using mamberall libapat function
* @param line array containing sequence information
* @param pattern array containing patterns to test on the sequence
* @param numpattern number of pattern in pattern array
* @param error_max error rate allowed by the user
*
* @return int 1 if a pattern match, else 0
**/
int ispatternmatching(ecoseq_t *seq, PatternPtr pattern){
if (pattern != NULL)
{
SeqPtr apatseq = NULL;
apatseq=ecoseq2apatseq(seq,apatseq);
return ManberAll(apatseq,pattern,0,0,apatseq->seqlen) > 0;
}
else return 0;
}
/* ----------------------------------------------- */
/* printout help */
/* ----------------------------------------------- */
#define PP fprintf(stdout,
static void PrintHelp()
{
PP "\n------------------------------------------\n");
PP " ecogrep Version %s\n", VERSION);
PP "------------------------------------------\n");
PP " synopsis : filtering ecoPCR result based on\n");
PP " taxonomic id filter and regular expression pattern\n");
PP " usage: ecogrep [options] filename\n");
PP "------------------------------------------\n");
PP " options:\n");
PP " -1 : [FIRST] : compare the given pattern with direct strand oligonucleotide\n\n");
PP " -2 : [SECOND] : compare the given pattern with reverse strand oligonucleotide\n\n");
PP " -d : [D]atabase containing taxonomic information\n\n");
PP " -e : [E]rrors : max error allowed in pattern match (option-1, -2 and -p) (0 by default)\n\n");
PP " -p : [P]attern : oligonucleotide pattern\n\n");
PP " -h : [H]elp : print <this> help\n\n");
PP " -i : [I]gnore subtree under given taxonomic id\n\n");
PP " -r : [R]estrict search to subtree under given taxomic id\n\n");
PP " -v : in[V]ert the sense of matching, to select non-matching lines.\n\n");
PP " argument:\n");
PP " ecoPCR ouput file name\n");
PP "------------------------------------------\n\n");
PP " http://www.grenoble.prabi.fr/trac/ecoPCR/\n");
PP "------------------------------------------\n\n");
}
#undef PP
/* ----------------------------------------------- */
/* printout usage and exit */
/* ----------------------------------------------- */
#define PP fprintf(stderr,
static void ExitUsage(stat)
int stat;
{
PP "usage: ecogrep [-d database] [-p pattern] [-i taxid] [-r taxid] [-v] [-h] <file name>\n");
PP "type \"ecogrep -h\" for help\n");
if (stat)
exit(stat);
}
#undef PP
/* ----------------------------------------------- */
/* MAIN */
/* ----------------------------------------------- */
#define LINE_BUFF_SIZE 10000
int main(int argc, char **argv){
int32_t carg = 0;
int32_t r = 0; // number of restricted taxid
int32_t i = 0; // number of ignored taxid
int32_t v = 0; // stores if -v mode is active
int32_t k = 0; // file counter
int32_t errflag = 0;
int32_t error_max = 0; // stores the error rate allowed by the user
int32_t matchingresult = 0; // stores number of matching result
ecotaxonomy_t *taxonomy; // stores the taxonomy
ecoseq_t *seq = NULL; // stores sequence info
ecoseq_t *oligoseq_1 = NULL; // stores the oligo_1 info
ecoseq_t *oligoseq_2 = NULL; // stores the oligo_2 info
char *database = NULL; // stores the database path (for taxonomy)
char *p = NULL; // pattern for sequence
PatternPtr pattern = NULL; // stores the build pattern for sequence
char *o1 = NULL; // pattern for direct strand oligo
PatternPtr oligo_1 = NULL; // stores the build pattern for direct strand oligo
char *o2 = NULL; // pattern for reverse strand oligo
PatternPtr oligo_2 = NULL; // stores the build pattern for reverse strand oligo
int32_t *restricted_taxid = NULL; // stores the restricted taxid
int32_t *ignored_taxid = NULL; // stores the ignored taxid
FILE *file = NULL; // stores the data stream, stdin by default
char *stream = ECOMALLOC(sizeof(char *)*LINE_BUFF_SIZE,"error stream buffer allocation");
char *orig = ECOMALLOC(sizeof(char *)*LINE_BUFF_SIZE,"error orig buffer allocation");
int is_ignored = 0;
int is_included = 0;
int is_matching = 0;
int match_o1 = 0;
int match_o2 = 0;
int good = 0;
seq = new_ecoseq();
oligoseq_1 = new_ecoseq();
oligoseq_2 = new_ecoseq();
/**
* Parse commande line options
**/
while ((carg = getopt(argc, argv, "1:2:p:d:i:r:e:vh")) != -1) {
switch (carg) {
case '1':
o1 = ECOMALLOC(strlen(optarg)+1,
"Error on o1 allocation");
strcpy(o1,optarg);
break;
case '2':
o2 = ECOMALLOC(strlen(optarg)+1,
"Error on o2 allocation");
strcpy(o2,optarg);
break;
case 'd':
database = ECOMALLOC(strlen(optarg)+1,
"Error on datafile allocation");
strcpy(database,optarg);
break;
case 'i':
ignored_taxid = ECOREALLOC( ignored_taxid,
sizeof(int32_t)*(i+1),
"Error on ignored_taxid reallocation");
sscanf(optarg,"%d",&ignored_taxid[i]);
i++;
break;
case 'r':
restricted_taxid = ECOREALLOC( restricted_taxid,
sizeof(int32_t)*(r+1),
"Error on restricted_taxid reallocation");
sscanf(optarg,"%d",&restricted_taxid[r]);
r++;
break;
case 'v':
v = 1;
break;
case 'h':
PrintHelp();
exit(0);
break;
case 'e':
sscanf(optarg,"%d",&error_max);
break;
case 'p':
p = ECOMALLOC(strlen(optarg)+1,
"Error on pattern allocation");
strcpy(p,optarg);
break;
case '?':
errflag++;
}
}
/**
* Check sequence pattern length and build it in PatternPtr format
**/
if(p)
{
if (strlen(p) > 32){
printf("# Sorry, ecogrep doesn't handle pattern longer than 32 characters.\
\n# Please check it out : %s\n",p);
exit(EXIT_FAILURE);
}
else if ( (pattern = buildPattern(p,error_max)) == NULL)
exit(EXIT_FAILURE);
}
/**
* Check o1 pattern length and build it in PatternPtr format
**/
if(o1)
{
if (strlen(o1) > 32){
printf("# Sorry, ecogrep doesn't handle pattern longer than 32 characters.\
\n# Please check it out : %s\n",o1);
exit(EXIT_FAILURE);
}
else if ( (oligo_1 = buildPattern(o1,error_max)) == NULL)
exit(EXIT_FAILURE);
}
/**
* Check o2 pattern length and build it in PatternPtr format
**/
if(o2)
{
if (strlen(o2) > 32){
printf("# Sorry, ecogrep doesn't handle pattern longer than 32 characters.\
\n# Please check it out : %s\n",o2);
exit(EXIT_FAILURE);
}
else if ( (oligo_2 = buildPattern(o2,error_max)) == NULL)
exit(EXIT_FAILURE);
}
/**
* try to get the database name from environment variable
* if no database name specified in the -d option
**/
if (database == NULL)
{
database = getenv("ECOPCRDB");
if (database == NULL)
errflag++;
}
/**
* check at leat one processing is asked
* either patterns or taxid filters
**/
if ( !p && !o1 && !o2 && restricted_taxid == NULL && ignored_taxid == NULL )
{
errflag++;
}
if (errflag)
ExitUsage(errflag);
/**
* Get the taxonomy back
**/
taxonomy = read_taxonomy(database,0);
/**
* Parse the stream
**/
for (k=0 ; argc >= optind ; optind++, k++){
matchingresult = 0;
if ( (file = fopen(argv[optind], "r")) == NULL)
{
if (isatty(fileno(stdin)) == 0)
{
file = stdin;
printf("# Processing standard input...\n");
}
else
break;
}
else
printf("# Processing %s...\n",argv[optind]);
while( fgets(stream, LINE_BUFF_SIZE, file) != NULL ){
if (stream[0]!= '#')
{
stream[LINE_BUFF_SIZE-1]=0;
strcpy(orig,stream);
getLineContent(stream,seq,oligoseq_1,oligoseq_2);
/* -----------------------------------------------*/
/* is ignored if at least one option -i */
/* AND */
/* if current sequence is son of taxid */
/* -----------------------------------------------*/
is_ignored = ( (i > 0) && (eco_is_taxid_included( taxonomy,
ignored_taxid,
i,
seq->taxid))
);
/* -----------------------------------------------*/
/* is included if no -r option */
/* OR */
/* if current sequence is son of taxid */
/* -----------------------------------------------*/
is_included = ( (r == 0) || (eco_is_taxid_included( taxonomy,
restricted_taxid,
r,
seq->taxid))
);
/* ----------------------------------------------------------- */
/* match if no pattern or if pattern match current sequence */
/* ----------------------------------------------------------- */
is_matching = ( !p || (ispatternmatching(seq,pattern)));
/* ---------------------------------------------------------------------------- */
/* match if no direct oligo pattern or if pattern match current direct oligo */
/* ---------------------------------------------------------------------------- */
match_o1 = (!o1 || (ispatternmatching(oligoseq_1,oligo_1)));
/* ------------------------------------------------------------------------------- */
/* match if no revesrse oligo pattern or if pattern match current reverse oligo */
/* ------------------------------------------------------------------------------- */
match_o2 = (!o2 || (ispatternmatching(oligoseq_2,oligo_2)));
good = (is_included && is_matching && !is_ignored && match_o1 && match_o2);
if (v)
good=!good;
if ( good )
{
printf("%s",orig);
matchingresult++;
}
}
}
if ( file != stdin )
fclose(file);
printf("# %d matching result(s)\n#\n",matchingresult);
}
/**
* clean and free before leaving
**/
ECOFREE(orig,"Error in free orig");
ECOFREE(stream,"Error in free stream");
ECOFREE(ignored_taxid,"Error in free stream");
ECOFREE(restricted_taxid,"Error in free stream");
return 0;
}

123
src/ecoisundertaxon.c Normal file
View File

@ -0,0 +1,123 @@
#include "libecoPCR/ecoPCR.h"
#include <getopt.h>
#include <stdlib.h>
#define VERSION "0.1"
/* ----------------------------------------------- */
/* printout verbose mode */
/* ----------------------------------------------- */
static void printTaxon(ecotx_t *taxon){
printf("# taxid : %d | rank : %d | name : %s \n\n",taxon->taxid, taxon->rank, taxon->name);
}
/* ----------------------------------------------- */
/* printout help */
/* ----------------------------------------------- */
#define PP fprintf(stdout,
static void PrintHelp()
{
PP "\n------------------------------------------\n");
PP " ecoisundertaxon Version %s\n", VERSION);
PP "------------------------------------------\n");
PP " synopsis : searching relationship in taxonomy\n");
PP " usage: ecoisundertaxon [options] database\n");
PP "------------------------------------------\n");
PP " options:\n");
PP " -1 : [FIRST] taxomic id of the hypothetical son\n\n");
PP " -2 : [SECOND] taxonomic id of the hypothetical parent\n\n");
PP " -h : [H]elp - print <this> help\n\n");
PP " -v : [V]erbose mode. Display taxonomic information for both\n");
PP " : taxonomic id.\n\n");
PP "------------------------------------------\n");
PP " database : to match the expected format, the database\n");
PP " has to be formated first by the ecoPCRFormat.py program located.\n");
PP " in the tools directory. Type the radical only, leaving out the extension\n");
PP "------------------------------------------\n\n");
PP " https://www.grenoble.prabi.fr/trac/ecoPCR/wiki");
PP "------------------------------------------\n\n");
}
#undef PP
/* ----------------------------------------------- */
/* printout usage and exit */
/* ----------------------------------------------- */
#define PP fprintf(stderr,
static void ExitUsage(stat)
int stat;
{
PP "usage: ecoisundertaxon [-1 taxid] [-2 taxid] [-v] [-h] datafile\n");
PP "type \"ecoisundertaxon -h\" for help\n");
if (stat)
exit(stat);
}
#undef PP
/* ----------------------------------------------- */
/* MAIN */
/* ----------------------------------------------- */
int main(int argc, char **argv){
int32_t carg = 0;
int32_t taxid_1 = 0;
int32_t taxid_2 = 0;
int32_t verbose = 0;
int32_t errflag = 0;
ecotaxonomy_t *taxonomy = NULL;
ecotx_t *son = NULL;
ecotx_t *parent = NULL;
while ((carg = getopt(argc, argv, "1:2:vh")) != -1) {
switch (carg) {
case '1':
sscanf(optarg,"%d",&taxid_1);
break;
case '2':
sscanf(optarg,"%d",&taxid_2);
break;
case 'v':
verbose = 1;
break;
case 'h':
PrintHelp();
exit(0);
break;
case '?':
errflag++;
}
}
if ((argc -= optind) != 1)
errflag++;
if (errflag)
ExitUsage(errflag);
taxonomy = read_taxonomy(argv[optind],0);
son = eco_findtaxonbytaxid(taxonomy, taxid_1);
if (verbose){
parent = eco_findtaxonbytaxid(taxonomy, taxid_2);
printTaxon(son);
printTaxon(parent);
}
if (eco_isundertaxon(son, taxid_2))
printf("# taxid_1 (%d) is son of taxid_2 (%d)\n",taxid_1, taxid_2);
else
printf("# taxid_1 (%d) is NOT son of taxid_2 (%d)\n",taxid_1, taxid_2);
return 0;
}

View File

@ -8,104 +8,69 @@
#define VERSION "0.1"
/* ----------------------------------------------- */
/* printout help */ /* ----------------------------------------------- */
/* printout help */
/* ----------------------------------------------- */
#define PP fprintf(stdout,
static void PrintHelp()
{
PP "------------------------------------------\n");
PP " Apat Version %s\n", VERSION);
PP " ecoPCR Version %s\n", VERSION);
PP "------------------------------------------\n");
PP "synopsis : pattern(s) searching program\n");
PP "usage: apat [options] patfile datafile\n");
PP "synopsis : searching for sequence and taxonomy hybriding with given primers\n");
PP "usage: ecoPCR [options] <nucleotidic patterns>\n");
PP "------------------------------------------\n");
PP "options:\n");
PP "-a code : [A]lphabet encoding for pattern\n");
PP " code is one of : \n");
PP " dna: use IUPAC equivalences for dna/rna\n");
PP " prot: use IUPAC equivalences for proteins\n");
PP " alpha: no equivalences, just treat plain symbols\n");
PP " note: the equivalences are used in pattern only\n");
PP " *not* in sequence(s) (see note (4) below)\n");
PP " dft: alpha\n");
PP "-c : [C]ooccurences\n");
PP " print patterns cooccurence matrix \n");
PP " dft: off\n");
PP "-h : [H]elp - print <this> help\n");
PP "-m : [M]ultiple occurences\n");
PP " see -q option \n");
PP " dft: off\n");
PP "-o file : [O]utput sequences\n");
PP " additionaly output sequence(s) that match into\n");
PP " 'file' in fasta format\n");
PP " dft: off\n");
PP "-p : no [Print] - don't printout hits\n");
PP " when just counts are needed\n");
PP " dft: off\n");
PP "-q nn : [Quorum]\n");
PP " printout result if at least nn\n");
PP " different patterns are found on the sequence\n");
PP " (with -m : at least nn different <hits>)\n");
PP " dft: # of patterns read\n");
PP "-s : no [Sort] - don't sort hits before printing\n");
PP " usually hits are printed by increasing position\n");
PP " this option will list them by pattern\n");
PP " dft: off\n");
PP "-t : [T]est sequence\n");
PP " additionnaly check if sequences are uppercase\n");
PP " this is mostly used for testing\n");
PP " dft: off\n");
PP "-u : [U]pper\n");
PP " force lower->upper sequence conversion\n");
PP " without this option lowercase symbols in sequence\n");
PP " will not be considered to as matches\n");
PP " dft: off\n");
PP "-v : [V]erbose\n");
PP " just display a kind of progress clock on stderr\n");
PP " (this is only useful if you redirect stdout)\n");
PP "\n");
PP "patfile : pattern file (see below)\n");
PP "datafile : database file (see below)\n");
PP "------------------------------------------\n");
PP "pattern file format :\n");
PP " one pattern/line\n");
PP " format : <pattern> <space> #errors\n");
PP " <pattern> := pattern<symbol>\n");
PP " or !pattern<symbol>\n");
PP " or pattern<symbol>#\n");
PP " or !pattern<symbol>#\n");
PP " <symbol> := <letter>\n");
PP " or [<letter>....<letter>]\n");
PP " <letter> := uppercase letter (A-Z)\n");
PP " <number> := a positive number indicates max number of mismatches\n");
PP " a negative number indicates max number of mismatches or indels\n");
PP " # means that no error is allowed at this position\n");
PP " ! complement the <symbol>\n");
PP " [...] means that all symbols within [] are allowed\n");
PP " in addition IUPAC equivalences may be used as symbols\n");
PP " with the -a option\n");
PP "\n");
PP "example: G[DE]S#[GIV]!HP![DE]# 1\n");
PP "-d : [D]atabase : to match the expected format, the database\n");
PP " has to be formated first by the ecoPCRFormat.py program located.\n");
PP " in the tools directory.\n");
PP " ecoPCRFormat.py creates three file types :\n");
PP " .sdx : contains the sequences\n");
PP " .tdx : contains information concerning the taxonomy\n");
PP " .rdx : contains the taxonomy rank\n\n");
PP " ecoPCR needs all the file type. As a result, you have to write the\n");
PP " database radical without any extension. For example /ecoPCRDB/gbmam\n\n");
PP "-e : [E]rror : max error allowed by oligonucleotide (0 by default)\n\n");
PP "-h : [H]elp - print <this> help\n\n");
PP "-i : [I]gnore the given taxonomy id.\n");
PP " Taxonomy id are available using the ecofind program.\n");
PP " see its help typing ecofind -h for more information.\n\n");
PP "-k : [K]ingdom mode : set the kingdom mode\n");
PP " super kingdom mode by default.\n\n");
PP "-l : minimum [L]ength : define the minimum amplication length. \n\n");
PP "-L : maximum [L]ength : define the maximum amplicationlength. \n\n");
PP "-r : [R]estricts the search to the given taxonomic id.\n");
PP " Taxonomy id are available using the ecofind program.\n");
PP " see its help typing ecofind -h for more information.\n");
PP "\n");
PP "------------------------------------------\n");
PP "datafile contains one or more sequences in\n");
PP "Fasta format, with *uppercase* symbols \n");
PP "\n");
PP "first argument : oligonucleotide for direct strand\n\n");
PP "second argument : oligonucleotide for reverse strand\n\n");
PP "------------------------------------------\n");
PP "note (1): the maximum number of patterns is %d\n", MAX_PATTERN);
PP "\n");
PP "note (2): the maximum length for one pattern is %d\n", MAX_PAT_LEN);
PP "\n");
PP "note (3): indels are still experimental and are :\n");
PP " not handled gracefully with the # syntax\n");
PP " and hits are not printed very nicely\n");
PP "\n");
PP "note (4): the IUPAC equivalences (-a option) are used\n");
PP " in pattern only *not* in sequence(s).\n");
PP " for instance GATN (with option -a dna) is equivalent to GAT[ACGT]\n");
PP " and will match GATA/GATC/GATG/GATC but will not match GATN\n");
PP " (nor NNNN) in sequence.\n");
PP "Table result description : \n");
PP "column 1 : accession number\n");
PP "column 2 : sequence length\n");
PP "column 3 : taxonomic id\n");
PP "column 4 : rank\n");
PP "column 5 : species taxonomic id\n");
PP "column 6 : scientific name\n");
PP "column 7 : genus taxonomic id\n");
PP "column 8 : genus name\n");
PP "column 9 : family taxonomic id\n");
PP "column 10 : family name\n");
PP "column 11 : super kingdom taxonomic id\n");
PP "column 12 : super kingdom name\n");
PP "column 13 : strand (direct or reverse)\n");
PP "column 14 : first oligonucleotide\n");
PP "column 15 : number of errors for the first strand\n");
PP "column 16 : second oligonucleotide\n");
PP "column 17 : number of errors for the second strand\n");
PP "column 18 : amplification length\n");
PP "column 19 : sequence\n");
PP "column 20 : definition\n");
PP "------------------------------------------\n");
PP " http://www.grenoble.prabi.fr/trac/ecoPCR/\n");
PP "------------------------------------------\n\n");
PP "\n");
}
@ -121,10 +86,8 @@ static void PrintHelp()
static void ExitUsage(stat)
int stat;
{
PP "usage: apat [-a dna|prot] [-c] [-h] [-m] [-o file] [-p]\n");
PP " [-q nn] [-t] [-u] [-v]\n");
PP " patfile datafile\n");
PP "type \"apat -h\" for help\n");
PP "usage: ecoPCR [-d database] [-l value] [-L value] [-e value] [-r taxid] [-i taxid] [-k] oligo1 oligo2\n");
PP "type \"ecoPCR -h\" for help\n");
if (stat)
exit(stat);
@ -306,7 +269,7 @@ int main(int argc, char **argv)
int32_t errflag=0;
char kingdom_mode=0;
char *prefix;
char *prefix = NULL;
int32_t checkedSequence = 0;
int32_t positiveSequence= 0;
@ -330,43 +293,39 @@ int main(int argc, char **argv)
int32_t erri;
int32_t errj;
int32_t *restricted_taxid = NULL;
int32_t *ignored_taxid = NULL;
int32_t r=0;
int32_t g=0;
while ((carg = getopt(argc, argv, "h1:2:l:L:e:k")) != -1) {
while ((carg = getopt(argc, argv, "hd:l:L:e:i:r:k")) != -1) {
switch (carg) {
/* -------------------- */
case '1': /* prenier oligo */
case 'd': /* database name */
/* -------------------- */
oligo1 = ECOMALLOC(strlen(optarg)+1,
"Error on oligo 1 allocation");
strcpy(oligo1,optarg);
break;
/* -------------------- */
case '2': /* coocurence option */
/* -------------------- */
oligo2 = ECOMALLOC(strlen(optarg)+1,
"Error on oligo 1 allocation");
strcpy(oligo2,optarg);
prefix = ECOMALLOC(strlen(optarg)+1,
"Error on prefix allocation");
strcpy(prefix,optarg);
break;
/* -------------------- */
case 'h': /* help */
/* -------------------- */
PrintHelp();
exit(0);
break;
/* -------------------- */
case 'l': /* lmin amplification */
/* -------------------- */
/* ------------------------- */
case 'l': /* min amplification lenght */
/* ------------------------- */
sscanf(optarg,"%d",&lmin);
break;
/* -------------------- */
case 'L': /* lmax amplification */
/* -------------------- */
/* -------------------------- */
case 'L': /* max amplification lenght */
/* -------------------------- */
sscanf(optarg,"%d",&lmax);
break;
@ -376,9 +335,27 @@ int main(int argc, char **argv)
sscanf(optarg,"%d",&error_max);
break;
case 'k': /* error max */
/* -------------------- */
kingdom_mode = 1;
case 'k': /* set the kingdom mode */
kingdom_mode = 1; /* -------------------- */
break;
/* ------------------------------------------ */
case 'r': /* stores the restricting search taxonomic id */
/* ------------------------------------------ */
restricted_taxid = ECOREALLOC(restricted_taxid,sizeof(int32_t)*(r+1),
"Error on restricted_taxid reallocation");
sscanf(optarg,"%d",&restricted_taxid[r]);
r++;
break;
/* --------------------------------- */
case 'i': /* stores the taxonomic id to ignore */
/* --------------------------------- */
ignored_taxid = ECOREALLOC(ignored_taxid,sizeof(int32_t)*(g+1),
"Error on excluded_taxid reallocation");
sscanf(optarg,"%d",&ignored_taxid[g]);
g++;
break;
/* -------------------- */
@ -389,25 +366,43 @@ int main(int argc, char **argv)
}
if ((argc -= optind) != 1)
/**
* check the path to the database is given as last argument
*/
if ((argc -= optind) == 2)
{
oligo1 = ECOMALLOC(strlen(argv[optind])+1,
"Error on oligo1 allocation");
strcpy(oligo1,argv[optind]);
optind++;
oligo2 = ECOMALLOC(strlen(argv[optind])+1,
"Error on oligo1 allocation");
strcpy(oligo2,argv[optind]);
}
else
errflag++;
if (prefix == NULL)
{
prefix = getenv("ECOPCRDB");
if (prefix == NULL)
errflag++;
}
if (!oligo1 || !oligo2)
errflag++;
if (errflag)
ExitUsage(errflag);
prefix = argv[optind];
o1 = buildPattern(oligo1,error_max);
o2 = buildPattern(oligo2,error_max);
o1c = complementPattern(o1);
o2c = complementPattern(o2);
printf("#\n");
printf("# ecoPCR version %s\n",VERSION);
printf("# direct strand oligo1 : %-32s ; oligo2c : %32s\n", o1->cpat,o2c->cpat);
@ -427,11 +422,10 @@ int main(int argc, char **argv)
printf("# output in superkingdom mode\n");
printf("#\n");
taxonomy = read_taxonomy(prefix);
taxonomy = read_taxonomy(prefix,0);
seq = ecoseq_iterator(prefix);
checkedSequence = 0;
positiveSequence= 0;
amplifiatCount = 0;
@ -439,6 +433,24 @@ int main(int argc, char **argv)
while(seq)
{
checkedSequence++;
/**
* check if current sequence should be included
**/
if ( (r == 0) ||
(eco_is_taxid_included(taxonomy,
restricted_taxid,
r,
taxonomy->taxons->taxon[seq->taxid].taxid)
)
)
if ((g == 0) ||
!(eco_is_taxid_included(taxonomy,
ignored_taxid,
g,
taxonomy->taxons->taxon[seq->taxid].taxid)
)
)
{
scname = taxonomy->taxons->taxon[seq->taxid].name;
strncpy(head,seq->SQ,10);
@ -446,13 +458,11 @@ int main(int argc, char **argv)
strncpy(tail,seq->SQ+seq->SQ_length-10,10);
tail[10]=0;
apatseq=ecoseq2apatseq(seq,apatseq);
o1Hits = ManberAll(apatseq,o1,0,0,apatseq->seqlen);
o2cHits= 0;
if (o1Hits)
{
stktmp = apatseq->hitpos[0];
@ -517,12 +527,15 @@ int main(int argc, char **argv)
}
}
} /* End of taxonomic selection */
delete_ecoseq(seq);
seq = ecoseq_iterator(NULL);
}
ECOFREE(restricted_taxid, "Error: could not free restricted_taxid\n");
ECOFREE(ignored_taxid, "Error: could not free excluded_taxid\n");
return 0;
}

18
src/global.mk Normal file
View File

@ -0,0 +1,18 @@
MACHINE=MAC_OS_X
LIBPATH= -Llibapat -LlibecoPCR
MAKEDEPEND = gcc -D$(MACHINE) -M $(CPPFLAGS) -o $*.d $<
CC=gcc
CFLAGS= -W -Wall -O2 -g
default: all
%.o: %.c
$(CC) -D$(MACHINE) $(CFLAGS) -c -o $@ $<
%.P : %.c
$(MAKEDEPEND)
@sed 's/\($*\)\.o[ :]*/\1.o $@ : /g' < $*.d > $@; \
rm -f $*.d; [ -s $@ ] || rm -f $@
include $(SRCS:.c=.P)

23
src/libapat/Makefile Normal file
View File

@ -0,0 +1,23 @@
SOURCES = apat_parse.c \
apat_search.c \
libstki.c
SRCS=$(SOURCES)
OBJECTS= $(patsubst %.c,%.o,$(SOURCES))
LIBFILE= libapat.a
include ../global.mk
all: $(LIBFILE)
clean:
rm -rf $(OBJECTS) $(LIBFILE)
$(LIBFILE): $(OBJECTS)
ar -cr $@ $?

29
src/libecoPCR/Makefile Normal file
View File

@ -0,0 +1,29 @@
SOURCES = ecoapat.c \
ecodna.c \
ecoError.c \
ecoIOUtils.c \
ecoMalloc.c \
ecorank.c \
ecoseq.c \
ecotax.c \
ecofilter.c \
econame.c
SRCS=$(SOURCES)
OBJECTS= $(patsubst %.c,%.o,$(SOURCES))
LIBFILE= libecoPCR.a
include ../global.mk
all: $(LIBFILE)
clean:
rm -rf $(OBJECTS) $(LIBFILE)
$(LIBFILE): $(OBJECTS)
ar -cr $@ $?

View File

@ -2,7 +2,15 @@
#include <stdio.h>
#include <stdlib.h>
/*
* print the message given as argument and exit the program
* @param error error number
* @param message the text explaining what's going on
* @param filename the file source where the program failed
* @param linenumber the line where it has failed
* filename and linenumber are written at pre-processing
* time by a macro
*/
void ecoError(int32_t error,
const char* message,
const char * filename,

View File

@ -16,20 +16,19 @@ int32_t is_big_endian()
int32_t swap_int32_t(int32_t i)
{
return SWAPINT32(i);
}
/**
* Read part of the file
* @param *f the database
* @param recordSize the size to be read
*
* @return buffer
*/
void *read_ecorecord(FILE *f,int32_t *recordSize)
{
static void *buffer =NULL;
@ -79,10 +78,14 @@ void *read_ecorecord(FILE *f,int32_t *recordSize)
/**
* Open the database and check it's readable
* @param filename name of the database (.sdx, .rdx, .tbx)
* @param sequencecount buffer - pointer to variable storing the number of occurence
* @param abort_on_open_error boolean to define the behaviour in case of error
* while opening the database
* @return FILE type
**/
FILE *open_ecorecorddb(const char *filename,
int32_t *sequencecount,
int32_t abort_on_open_error)

View File

@ -56,10 +56,10 @@ typedef struct {
} ecotxformat_t;
typedef struct {
typedef struct ecotxnode {
int32_t taxid;
int32_t rank;
int32_t parent;
struct ecotxnode *parent;
char *name;
} ecotx_t;
@ -80,8 +80,38 @@ typedef struct {
char* label[1];
} ecorankidx_t;
/*
*
* Taxonomy name types
*
*/
typedef struct {
int32_t is_scientificname;
int32_t namelength;
int32_t classlength;
int32_t taxid;
char names[1];
} econameformat_t;
typedef struct {
char *name;
char *classname;
int32_t is_scientificname;
struct ecotxnode *taxon;
} econame_t;
typedef struct {
int32_t count;
econame_t names[1];
} econameidx_t;
typedef struct {
ecorankidx_t *ranks;
econameidx_t *names;
ecotxidx_t *taxons;
} ecotaxonomy_t;
@ -175,6 +205,9 @@ ecoseq_t *readnext_ecoseq(FILE *);
ecorankidx_t *read_rankidx(const char *filename);
econameidx_t *read_nameidx(const char *filename,ecotaxonomy_t *taxonomy);
/**
* Read taxonomy data as formated by the ecoPCRFormat.py script.
@ -189,8 +222,11 @@ ecorankidx_t *read_rankidx(const char *filename);
ecotxidx_t *read_taxonomyidx(const char *filename);
ecotaxonomy_t *read_taxonomy(const char *prefix);
ecotaxonomy_t *read_taxonomy(const char *prefix,int32_t readAlternativeName);
ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy, int32_t taxid);
int eco_isundertaxon(ecotx_t *taxon, int other_taxid);
ecoseq_t *ecoseq_iterator(const char *prefix);
@ -227,4 +263,7 @@ ecotx_t *eco_getfamily(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
ecotx_t *eco_getkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
int eco_is_taxid_ignored(int32_t *ignored_taxid, int32_t tab_len, int32_t taxid);
int eco_is_taxid_included(ecotaxonomy_t *taxonomy, int32_t *included_taxid, int32_t tab_len, int32_t taxid);
#endif /*ECOPCR_H_*/

19
src/libecoPCR/ecofilter.c Normal file
View File

@ -0,0 +1,19 @@
#include "ecoPCR.h"
int eco_is_taxid_included( ecotaxonomy_t *taxonomy,
int32_t *restricted_taxid,
int32_t tab_len,
int32_t taxid)
{
int i;
ecotx_t *taxon;
taxon = eco_findtaxonbytaxid(taxonomy, taxid);
for (i=0; i < tab_len; i++)
if ( (taxon->taxid == restricted_taxid[i]) ||
(eco_isundertaxon(taxon, restricted_taxid[i])) )
return 1;
return 0;
}

61
src/libecoPCR/econame.c Normal file
View File

@ -0,0 +1,61 @@
#include "ecoPCR.h"
#include <string.h>
#include <stdlib.h>
static econame_t *readnext_econame(FILE *f,econame_t *name,ecotaxonomy_t *taxonomy);
econameidx_t *read_nameidx(const char *filename,ecotaxonomy_t *taxonomy)
{
int32_t count;
FILE *f;
econameidx_t *indexname;
int32_t i;
f = open_ecorecorddb(filename,&count,1);
indexname = (econameidx_t*) ECOMALLOC(sizeof(econameidx_t) + sizeof(econame_t) * (count-1),"Allocate names");
indexname->count=count;
for (i=0; i < count; i++){
readnext_econame(f,(indexname->names)+i,taxonomy);
}
return indexname;
}
econame_t *readnext_econame(FILE *f,econame_t *name,ecotaxonomy_t *taxonomy)
{
econameformat_t *raw;
int32_t rs;
raw = read_ecorecord(f,&rs);
if (!raw)
return NULL;
if (is_big_endian())
{
raw->is_scientificname = swap_int32_t(raw->is_scientificname);
raw->namelength = swap_int32_t(raw->namelength);
raw->classlength = swap_int32_t(raw->classlength);
raw->taxid = swap_int32_t(raw->taxid);
}
name->is_scientificname=raw->is_scientificname;
name->name = ECOMALLOC((raw->namelength+1) * sizeof(char),"Allocate name");
strncpy(name->name,raw->names,raw->namelength);
name->name[raw->namelength]=0;
name->classname = ECOMALLOC((raw->classlength+1) * sizeof(char),"Allocate classname");
strncpy(name->classname,(raw->names+raw->namelength),raw->classlength);
name->classname[raw->classlength]=0;
name->taxon = taxonomy->taxons->taxon + raw->taxid;
return name;
}

View File

@ -79,7 +79,9 @@ ecoseq_t *new_ecoseq_with_data( char *AC,
}
/**
* ?? used ??
**/
FILE *open_ecoseqdb(const char *filename,
int32_t *sequencecount)
{
@ -140,6 +142,13 @@ ecoseq_t *readnext_ecoseq(FILE *f)
return seq;
}
/**
* Open the sequences database (.sdx file)
* @param prefix name of the database (radical without extension)
* @param index integer
*
* @return file object
*/
FILE *open_seqfile(const char *prefix,int32_t index)
{
char filename_buffer[1024];
@ -153,7 +162,7 @@ FILE *open_seqfile(const char *prefix,int32_t index)
prefix,
index);
fprintf(stderr,"Coucou %s\n",filename_buffer);
fprintf(stderr,"# Coucou %s\n",filename_buffer);
if (filename_length >= 1024)
@ -164,7 +173,7 @@ FILE *open_seqfile(const char *prefix,int32_t index)
input=open_ecorecorddb(filename_buffer,&seqcount,0);
if (input)
fprintf(stderr,"Reading file %s containing %d sequences...\n",
fprintf(stderr,"# Reading file %s containing %d sequences...\n",
filename_buffer,
seqcount);

View File

@ -5,7 +5,11 @@
static ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon);
/**
* Open the taxonomy database
* @param pointer to the database (.tdx file)
* @return a ecotxidx_t structure
*/
ecotxidx_t *read_taxonomyidx(const char *filename)
{
int32_t count;
@ -19,13 +23,14 @@ ecotxidx_t *read_taxonomyidx(const char *filename)
"Allocate taxonomy");
index->count=count;
for (i=0; i < count; i++)
for (i=0; i < count; i++){
readnext_ecotaxon(f,&(index->taxon[i]));
index->taxon[i].parent=index->taxon + (int32_t)index->taxon[i].parent;
}
return index;
}
int32_t delete_taxonomy(ecotxidx_t *index)
{
int32_t i;
@ -61,6 +66,15 @@ int32_t delete_taxon(ecotx_t *taxon)
return 1;
}
/**
* Read the database for a given taxon a save the data
* into the taxon structure(if any found)
* @param *f pointer to FILE type returned by fopen
* @param *taxon pointer to the structure
*
* @return a ecotx_t structure if any taxon found else NULL
*/
ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon)
{
@ -80,7 +94,7 @@ ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon)
raw->taxid = swap_int32_t(raw->taxid);
}
taxon->parent = raw->parent;
taxon->parent = (ecotx_t*)raw->parent;
taxon->taxid = raw->taxid;
taxon->rank = raw->rank;
@ -93,7 +107,7 @@ ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon)
}
ecotaxonomy_t *read_taxonomy(const char *prefix)
ecotaxonomy_t *read_taxonomy(const char *prefix,int32_t readAlternativeName)
{
ecotaxonomy_t *tax;
char *filename;
@ -115,10 +129,19 @@ ecotaxonomy_t *read_taxonomy(const char *prefix)
tax->taxons = read_taxonomyidx(filename);
if (readAlternativeName)
{
snprintf(filename,buffsize,"%s.ndx",prefix);
tax->names=read_nameidx(filename,tax);
}
else
tax->names=NULL;
return tax;
}
int32_t delete_ecotaxonomy(ecotaxonomy_t *taxonomy)
{
if (taxonomy)
@ -138,20 +161,19 @@ int32_t delete_ecotaxonomy(ecotaxonomy_t *taxonomy)
}
ecotx_t *eco_findtaxonatrank(ecotx_t *taxon,
int32_t rankidx,
ecotaxonomy_t *taxonomy)
int32_t rankidx)
{
ecotx_t *current_taxon;
ecotx_t *next_taxon;
current_taxon = taxon;
next_taxon = &(taxonomy->taxons->taxon[current_taxon->parent]);
next_taxon = current_taxon->parent;
while ((current_taxon!=next_taxon) &&
while ((current_taxon!=next_taxon) && // I' am the root node
(current_taxon->rank!=rankidx))
{
current_taxon = next_taxon;
next_taxon = &(taxonomy->taxons->taxon[current_taxon->parent]);
next_taxon = current_taxon->parent;
}
if (current_taxon->rank==rankidx)
@ -160,6 +182,61 @@ ecotx_t *eco_findtaxonatrank(ecotx_t *taxon,
return NULL;
}
/**
* Get back information concerning a taxon from a taxonomic id
* @param *taxonomy the taxonomy database
* @param taxid the taxonomic id
*
* @result a ecotx_t structure containing the taxonimic information
**/
ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy,
int32_t taxid)
{
ecotx_t *current_taxon;
int32_t taxoncount;
int32_t i;
taxoncount=taxonomy->taxons->count;
for (current_taxon=taxonomy->taxons->taxon,
i=0;
i < taxoncount;
i++,
current_taxon++){
if (current_taxon->taxid==taxid){
return current_taxon;
}
}
return (ecotx_t*)NULL;
}
/**
* Find out if taxon is son of other taxon (identified by its taxid)
* @param *taxon son taxon
* @param parent_taxid taxonomic id of the other taxon
*
* @return 1 is the other taxid math a parent taxid, else 0
**/
int eco_isundertaxon(ecotx_t *taxon,
int other_taxid)
{
ecotx_t *next_parent;
next_parent = taxon->parent;
while ( (other_taxid != next_parent->taxid) &&
(strcmp(next_parent->name, "root")) )
{
next_parent = next_parent->parent;
}
if (other_taxid == next_parent->taxid)
return 1;
else
return 0;
}
ecotx_t *eco_getspecies(ecotx_t *taxon,
ecotaxonomy_t *taxonomy)
{
@ -175,7 +252,7 @@ ecotx_t *eco_getspecies(ecotx_t *taxon,
if (!tax || rankindex < 0)
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
return eco_findtaxonatrank(taxon,rankindex,tax);
return eco_findtaxonatrank(taxon,rankindex);
}
ecotx_t *eco_getgenus(ecotx_t *taxon,
@ -193,7 +270,7 @@ ecotx_t *eco_getgenus(ecotx_t *taxon,
if (!tax || rankindex < 0)
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
return eco_findtaxonatrank(taxon,rankindex,tax);
return eco_findtaxonatrank(taxon,rankindex);
}
@ -212,7 +289,7 @@ ecotx_t *eco_getfamily(ecotx_t *taxon,
if (!tax || rankindex < 0)
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
return eco_findtaxonatrank(taxon,rankindex,tax);
return eco_findtaxonatrank(taxon,rankindex);
}
ecotx_t *eco_getkingdom(ecotx_t *taxon,
@ -230,7 +307,7 @@ ecotx_t *eco_getkingdom(ecotx_t *taxon,
if (!tax || rankindex < 0)
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
return eco_findtaxonatrank(taxon,rankindex,tax);
return eco_findtaxonatrank(taxon,rankindex);
}
ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,
@ -248,5 +325,5 @@ ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,
if (!tax || rankindex < 0)
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
return eco_findtaxonatrank(taxon,rankindex,tax);
return eco_findtaxonatrank(taxon,rankindex);
}

303
tools/ecoPCRFilter.py Executable file
View File

@ -0,0 +1,303 @@
#!/usr/bin/env python
import struct
import sys
import os
import gzip
#####
#
# Generic file function
#
#####
class Filter(object):
def __init__(self,path):
self._path = path
self._taxonFile = "%s.tdx" % self._path
self._ranksFile = "%s.rdx" % self._path
self._namesFile = "%s.ndx" % self._path
self._taxonomy, self._index, self._ranks, self._name = self.__readNodeTable()
def __universalOpen(self,file):
if isinstance(file,str):
if file[-3:] == '.gz':
rep = gzip.open(file)
else:
rep = open(file)
else:
rep = file
return rep
def __universalTell(self,file):
if isinstance(file, gzip.GzipFile):
file=file.myfileobj
return file.tell()
def __fileSize(self,file):
if isinstance(file, gzip.GzipFile):
file=file.myfileobj
pos = file.tell()
file.seek(0,2)
length = file.tell()
file.seek(pos,0)
return length
def __progressBar(self,pos,max,reset=False,delta=[]):
if reset:
del delta[:]
if not delta:
delta.append(time.time())
delta.append(time.time())
delta[1]=time.time()
elapsed = delta[1]-delta[0]
percent = float(pos)/max * 100
remain = time.strftime('%H:%M:%S',time.gmtime(elapsed / percent * (100-percent)))
bar = '#' * int(percent/2)
bar+= '|/-\\-'[pos % 5]
bar+= ' ' * (50 - int(percent/2))
sys.stderr.write('\r%5.1f %% |%s] remain : %s' %(percent,bar,remain))
#####
#
# Iterator functions
#
#####
def __ecoRecordIterator(self,file):
file = self.__universalOpen(file)
(recordCount,) = struct.unpack('> I',file.read(4))
for i in xrange(recordCount):
(recordSize,)=struct.unpack('>I',file.read(4))
record = file.read(recordSize)
yield record
def __ecoNameIterator(self):
for record in self.__ecoRecordIterator(self._namesFile):
lrecord = len(record)
lnames = lrecord - 16
(isScientificName,namelength,classLength,indextaxid,names)=struct.unpack('> I I I I %ds' % lnames, record)
name=names[:namelength]
classname=names[namelength:]
yield (name,classname,indextaxid)
def __ecoTaxonomicIterator(self):
for record in self.__ecoRecordIterator(self._taxonFile):
lrecord = len(record)
lnames = lrecord - 16
(taxid,rankid,parentidx,nameLength,name)=struct.unpack('> I I I I %ds' % lnames, record)
yield (taxid,rankid,parentidx,name)
def __ecoSequenceIterator(self,file):
for record in self.__ecoRecordIterator(file):
lrecord = len(record)
lnames = lrecord - (4*4+20)
(taxid,seqid,deflength,seqlength,cptseqlength,string)=struct.unpack('> I 20s I I I %ds' % lnames, record)
de = string[:deflength]
seq = gzip.zlib.decompress(string[deflength:])
yield (taxid,seqid,deflength,seqlength,cptseqlength,de,seq)
def __ecoRankIterator(self):
for record in self.__ecoRecordIterator(self._ranksFile):
yield record
#####
#
# Indexes
#
#####
def __ecoNameIndex(self):
indexName = [x for x in self.__ecoNameIterator()]
return indexName
def __ecoRankIndex(self):
rank = [r for r in self.__ecoRankIterator()]
return rank
def __ecoTaxonomyIndex(self):
taxonomy = []
index = {}
i = 0;
for x in self.__ecoTaxonomicIterator():
taxonomy.append(x)
index[x[0]] = i
i = i + 1
return taxonomy, index
def __readNodeTable(self):
taxonomy, index = self.__ecoTaxonomyIndex()
ranks = self.__ecoRankIndex()
name = self.__ecoNameIndex()
return taxonomy,index,ranks,name
def findTaxonByTaxid(self,taxid):
return self._taxonomy[self._index[taxid]]
#####
#
# PUBLIC METHODS
#
#####
def subTreeIterator(self, taxid):
"return subtree for given taxonomic id "
idx = self._index[taxid]
yield self._taxonomy[idx]
for t in self._taxonomy:
if t[2] == idx:
for subt in self.subTreeIterator(t[0]):
yield subt
def parentalTreeIterator(self, taxid):
"""
return parental tree for given taxonomic id starting from
first ancester to the root.
"""
taxon=self.findTaxonByTaxid(taxid)
while taxon[2]!= 0:
yield taxon
taxon = self._taxonomy[taxon[2]]
yield self._taxonomy[0]
def ecoPCRResultIterator(self, file):
"iteration on ecoPCR result file"
file = self.__universalOpen(file)
data = ColumnFile(file,
sep='|',
types=(str,int,int,
str,int,str,
int,str,int,
str,int,str,
str,str,int,
str,int,int,
str,str),skip='#')
for ac, sq_len, taxid,\
rank, sp_taxid, species,\
ge_taxid, genus, fa_taxid,\
family, sk_taxid, s_kgdom,\
strand, oligo_1, error_1,\
oligo_2, error_2, amp_len,\
sq_des, definition in data:
yield {'ac':ac, 'sq_len':sq_len, 'taxid':taxid,
'rank':rank, 'sp_taxid':sp_taxid, 'species':species,
'ge_taxid':ge_taxid, 'genus':genus, 'fa_taxid':fa_taxid,
'family':family, 'sk_taxid':sk_taxid, 's_kgdom':s_kgdom,
'strand':strand, 'oligo_1':oligo_1, 'error_1':error_1,
'oligo_2':oligo_2, 'error_2':error_2, 'amp_len':amp_len,
'sq_des':sq_des, 'definition':definition}
def rankFilter(self,rankid,filter):
return self._ranks[rankid] == filter
def lastCommonTaxon(self,taxid_1, taxid_2):
t1 = [x[0] for x in self.parentalTreeIterator(taxid_1)]
t2 = [x[0] for x in self.parentalTreeIterator(taxid_2)]
t1.reverse()
t2.reverse()
count = t1 < t2 and len(t1) or len(t2)
for i in range(count):
if t1[i] != t2[i]:
return t1[i-1]
class ColumnFile(object):
def __init__(self,stream,sep=None,strip=True,types=None,skip=None):
if isinstance(stream,str):
self._stream = open(stream)
elif hasattr(stream,'next'):
self._stream = stream
else:
raise ValueError,'stream must be string or an iterator'
self._delimiter=sep
self._strip=strip
if types:
self._types=[x for x in types]
for i in xrange(len(self._types)):
if self._types[i] is bool:
self._types[i]=ColumnFile.str2bool
else:
self._types=None
self._skip = skip
def str2bool(x):
return bool(eval(x.strip()[0].upper(),{'T':True,'V':True,'F':False}))
str2bool = staticmethod(str2bool)
def __iter__(self):
return self
def next(self):
ligne = self._stream.next()
while ligne[0] == self._skip:
ligne = self._stream.next()
data = ligne.split(self._delimiter)
if self._strip or self._types:
data = [x.strip() for x in data]
if self._types:
it = self.endLessIterator(self._types)
data = [x[1](x[0]) for x in ((y,it.next()) for y in data)]
return data
def endLessIterator(self,endedlist):
for x in endedlist:
yield x
while(1):
yield endedlist[-1]
class Table(list):
def __init__(self, headers, types):
list.__init__(self)
self.headers = headers
self.types = types
self.lines = []
def printTable(self):
for h in self.headers:
print "\t%s\t|" % h,
print "\n"
for l in self.lines:
for c in l:
print "\t%s\t|" % c
print "\n"
def getColumn(self,n):
print "\t%s\n" % self.header[n]
for i in range(len(self.lines)):
print "\t%s\n" % i[n]

View File

@ -69,7 +69,6 @@ def endLessIterator(endedlist):
while(1):
yield endedlist[-1]
class ColumnFile(object):
def __init__(self,stream,sep=None,strip=True,types=None):
@ -149,7 +148,6 @@ def readNodeTable(file):
bool,bool,bool,str))
print >>sys.stderr,"Reading taxonomy dump file..."
taxonomy=[[n[0],n[2],n[1]] for n in nodes]
print >>sys.stderr,"List all taxonomy rank..."
ranks =list(set(x[1] for x in taxonomy))
ranks.sort()
@ -171,15 +169,14 @@ def readNodeTable(file):
return taxonomy,ranks,index
def scientificNameIterator(file):
def nameIterator(file):
file = universalOpen(file)
names = ColumnFile(file,
sep='|',
types=(int,str,
str,str))
for taxid,name,unique,classname,white in names:
if classname == 'scientific name':
yield taxid,name
yield taxid,name,classname
def mergedNodeIterator(file):
file = universalOpen(file)
@ -201,7 +198,11 @@ def readTaxonomyDump(taxdir):
taxonomy,ranks,index = readNodeTable('%s/nodes.dmp' % taxdir)
print >>sys.stderr,"Adding scientific name..."
for taxid,name in scientificNameIterator('%s/names.dmp' % taxdir):
alternativeName=[]
for taxid,name,classname in nameIterator('%s/names.dmp' % taxdir):
alternativeName.append((name,classname,index[taxid]))
if classname == 'scientific name':
taxonomy[index[taxid]].append(name)
print >>sys.stderr,"Adding taxid alias..."
@ -212,7 +213,7 @@ def readTaxonomyDump(taxdir):
for taxid in deletedNodeIterator('%s/delnodes.dmp' % taxdir):
index[taxid]=None
return taxonomy,ranks,index
return taxonomy,ranks,alternativeName,index
#####
@ -267,27 +268,51 @@ def genbankEntryParser(entry):
Tx = None
return {'id':Id,'taxid':Tx,'definition':De,'sequence':Sq}
_fastaParseID = re.compile('(?<=^>)[^ ]+')
_fastaParseDE = re.compile('(?<=^>).+',)
_fastaParseSQ = re.compile('^[^>].+',re.MULTILINE+re.DOTALL)
_fastaParseTX = re.compile('(?<=[[Tt]axon:) *[0-9]+ *(?=])')
######################
def fastaEntryParser(entry):
Id = _fastaParseID.findall(entry)[0]
De = _fastaParseDE.findall(entry)[0].split(None,1)[1:]
if not De:
De=''
else:
De=De[0]
Sq = cleanSeq(_fastaParseSQ.findall(entry)[0].upper())
_cleanDef = re.compile('[\nDE]')
def cleanDef(definition):
return _cleanDef.sub('',definition)
_emblParseID = re.compile('(?<=^ID {3})[^ ]+(?=;)',re.MULTILINE)
_emblParseDE = re.compile('(?<=^DE {3}).+?\. *$(?=[^ ])',re.MULTILINE+re.DOTALL)
_emblParseSQ = re.compile('(?<=^ ).+?(?=^//$)',re.MULTILINE+re.DOTALL)
_emblParseTX = re.compile('(?<= /db_xref="taxon:)[0-9]+(?=")')
def emblEntryParser(entry):
Id = _emblParseID.findall(entry)[0]
De = ' '.join(cleanDef(_emblParseDE.findall(entry)[0]).split())
Sq = cleanSeq(_emblParseSQ.findall(entry)[0].upper())
try:
Tx = int(_fastaParseTX.findall(entry)[0])
Tx = int(_emblParseTX.findall(entry)[0])
except IndexError:
Tx = None
return {'id':Id,'taxid':Tx,'definition':De,'sequence':Sq}
######################
def parseFasta(seq):
title = seq[0].strip()[1:].split(None,1)
id=title[0]
if len(title) == 2:
field = title[1].split('; ')
else:
field=[]
info = dict(x.split('=') for x in field if '=' in x)
definition = ' '.join([x for x in field if '=' not in x])
seq=(''.join([x.strip() for x in seq[1:]])).upper()
return id,seq,definition,info
def fastaEntryParser(entry):
id,seq,definition,info = parseFasta(entry)
Tx = info.get('taxid',None)
if Tx is not None:
Tx=int(Tx)
return {'id':id,'taxid':Tx,'definition':definition,'sequence':seq}
def sequenceIteratorFactory(entryParser,entryIterator):
def sequenceIterator(file):
@ -381,6 +406,22 @@ def ecoRankPacker(rank):
return packed
def ecoNamePacker(name):
namelength = len(name[0])
classlength= len(name[1])
totalSize = namelength + classlength + 4 + 4 + 4 + 4
packed = struct.pack('> I I I I I %ds %ds' % (namelength,classlength),
totalSize,
int(name[1]=='scientific name'),
namelength,
classlength,
name[2],
name[0],
name[1])
return packed
def ecoSeqWriter(file,input,taxindex,parser):
output = open(file,'wb')
@ -439,17 +480,39 @@ def ecoRankWriter(file,ranks):
output.close()
def nameCmp(n1,n2):
name1=n1[0].upper()
name2=n2[0].upper()
if name1 < name2:
return -1
elif name1 > name2:
return 1
return 0
def ecoNameWriter(file,names):
output = open(file,'wb')
output.write(struct.pack('> I',len(names)))
names.sort(nameCmp)
for name in names:
output.write(ecoNamePacker(name))
output.close()
def ecoDBWriter(prefix,taxonomy,seqFileNames,parser):
ecoRankWriter('%s.rdx' % prefix, taxonomy[1])
ecoTaxWriter('%s.tdx' % prefix, taxonomy[0])
ecoNameWriter('%s.ndx' % prefix, taxonomy[2])
filecount = 0
for filename in seqFileNames:
filecount+=1
sk=ecoSeqWriter('%s_%03d.sdx' % (prefix,filecount),
filename,
taxonomy[2],
taxonomy[3],
parser)
if sk:
print >>sys.stderr,"Skipped entry :"
@ -464,32 +527,53 @@ def ecoParseOptions(arguments):
}
o,filenames = getopt.getopt(arguments,
'ht:n:gf',
'ht:n:gfe',
['help',
'taxonomy=',
'name=',
'genbank',
'fasta'])
'fasta',
'embl'])
for name,value in o:
if name in ('-h','--help'):
pass
printHelp()
exit()
elif name in ('-t','--taxonomy'):
opt['taxdir']=value
elif name in ('-n','--name'):
opt['prefix']=value
elif name in ('-g','--genbank'):
opt['parser']=sequenceIteratorFactory(genbankEntryParser,
entryIterator
)
entryIterator)
elif name in ('-f','--fasta'):
opt['parser']=sequenceIteratorFactory(fastaEntryParser,
fastaEntryIterator)
elif name in ('-e','--embl'):
opt['parser']=sequenceIteratorFactory(emblEntryParser,
entryIterator)
else:
raise ValueError,'Unknown option %s' % name
return opt,filenames
def printHelp():
print "-----------------------------------"
print " ecoPCRFormat.py"
print "-----------------------------------"
print "ecoPCRFormat.py [option] <argument>"
print "-----------------------------------"
print "-e --embl :[E]mbl format"
print "-f --fasta :[F]asta format"
print "-g --genbank :[G]enbank format"
print "-h --help :[H]elp - print this help"
print "-n --name :[N]ame of the new database created"
print "-t --taxonomy :[T]axonomy - path to the taxonomy database"
print " :bcp-like dump from GenBank taxonomy database."
print "-----------------------------------"
if __name__ == '__main__':
opt,filenames = ecoParseOptions(sys.argv[1:])

811
tools/ecoSort.py Executable file
View File

@ -0,0 +1,811 @@
#!/usr/bin/env python
import struct
import sys
import os
import gzip
import re
import string
from reportlab.graphics.shapes import *
from reportlab.graphics.charts.barcharts import VerticalBarChart
from reportlab.graphics.charts.piecharts import Pie
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.lib.units import cm
from reportlab.lib import colors
from reportlab.platypus import *
#####
#
# Generic file function
#
#####
class Filter(object):
"""
This object provides different iterators and method :
* findTaxonByTaxid
* subTreeIterator
* parentalTreeIterator
* ecoPCRResultIterator
* rankFilter
* lastCommonTaxon
see each method for more informations
"""
def __init__(self,path):
self._path = path
self._taxonFile = "%s.tdx" % self._path
self._ranksFile = "%s.rdx" % self._path
self._namesFile = "%s.ndx" % self._path
self._taxonomy, self._index, self._ranks, self._name = self.__readNodeTable()
def __universalOpen(self,file):
if isinstance(file,str):
if file[-3:] == '.gz':
rep = gzip.open(file)
else:
rep = open(file)
else:
rep = file
return rep
def __universalTell(self,file):
if isinstance(file, gzip.GzipFile):
file=file.myfileobj
return file.tell()
def __fileSize(self,file):
if isinstance(file, gzip.GzipFile):
file=file.myfileobj
pos = file.tell()
file.seek(0,2)
length = file.tell()
file.seek(pos,0)
return length
def __progressBar(self,pos,max,reset=False,delta=[]):
if reset:
del delta[:]
if not delta:
delta.append(time.time())
delta.append(time.time())
delta[1]=time.time()
elapsed = delta[1]-delta[0]
percent = float(pos)/max * 100
remain = time.strftime('%H:%M:%S',time.gmtime(elapsed / percent * (100-percent)))
bar = '#' * int(percent/2)
bar+= '|/-\\-'[pos % 5]
bar+= ' ' * (50 - int(percent/2))
sys.stderr.write('\r%5.1f %% |%s] remain : %s' %(percent,bar,remain))
#####
#
# Iterator functions
#
#####
def __ecoRecordIterator(self,file):
file = self.__universalOpen(file)
(recordCount,) = struct.unpack('> I',file.read(4))
for i in xrange(recordCount):
(recordSize,)=struct.unpack('>I',file.read(4))
record = file.read(recordSize)
yield record
def __ecoNameIterator(self):
for record in self.__ecoRecordIterator(self._namesFile):
lrecord = len(record)
lnames = lrecord - 16
(isScientificName,namelength,classLength,indextaxid,names)=struct.unpack('> I I I I %ds' % lnames, record)
name=names[:namelength]
classname=names[namelength:]
yield (name,classname,indextaxid)
def __ecoTaxonomicIterator(self):
for record in self.__ecoRecordIterator(self._taxonFile):
lrecord = len(record)
lnames = lrecord - 16
(taxid,rankid,parentidx,nameLength,name)=struct.unpack('> I I I I %ds' % lnames, record)
yield (taxid,rankid,parentidx,name)
def __ecoSequenceIterator(self,file):
for record in self.__ecoRecordIterator(file):
lrecord = len(record)
lnames = lrecord - (4*4+20)
(taxid,seqid,deflength,seqlength,cptseqlength,string)=struct.unpack('> I 20s I I I %ds' % lnames, record)
de = string[:deflength]
seq = gzip.zlib.decompress(string[deflength:])
yield (taxid,seqid,deflength,seqlength,cptseqlength,de,seq)
def __ecoRankIterator(self):
for record in self.__ecoRecordIterator(self._ranksFile):
yield record
#####
#
# Indexes
#
#####
def __ecoNameIndex(self):
indexName = [x for x in self.__ecoNameIterator()]
return indexName
def __ecoRankIndex(self):
rank = [r for r in self.__ecoRankIterator()]
return rank
def __ecoTaxonomyIndex(self):
taxonomy = []
index = {}
i = 0;
for x in self.__ecoTaxonomicIterator():
taxonomy.append(x)
index[x[0]] = i
i = i + 1
return taxonomy, index
def __readNodeTable(self):
taxonomy, index = self.__ecoTaxonomyIndex()
ranks = self.__ecoRankIndex()
name = self.__ecoNameIndex()
return taxonomy,index,ranks,name
#####
#
# PUBLIC METHODS
#
#####
def findTaxonByTaxid(self,taxid):
"""
Returns a list containing [taxid,rankid,parent_index,nameLength,name]
It takes one argument : a taxonomic id
"""
return self._taxonomy[self._index[taxid]]
def subTreeIterator(self, taxid):
"""
Returns subtree for given taxid from first child
to last child. It takes one argument : a taxonomic id
"""
idx = self._index[taxid]
yield self._taxonomy[idx]
for t in self._taxonomy:
if t[2] == idx:
for subt in self.subTreeIterator(t[0]):
yield subt
def parentalTreeIterator(self, taxid):
"""
return parental tree for given taxonomic id starting from
first ancester to the root.
"""
taxon=self.findTaxonByTaxid(taxid)
while taxon[2]!= 0:
yield taxon
taxon = self._taxonomy[taxon[2]]
yield self._taxonomy[0]
def ecoPCRResultIterator(self, file):
"""
iteration on ecoPCR result file"
It returns a dictionnary
"""
file = self.__universalOpen(file)
data = ColumnFile(file,
sep='|',
types=(str,int,int,
str,int,str,
int,str,int,
str,int,str,
str,str,int,
str,int,int,
str,str),skip='#')
for ac, sq_len, taxid,\
rank, sp_taxid, species,\
ge_taxid, genus, fa_taxid,\
family, sk_taxid, s_kgdom,\
strand, oligo_1, error_1,\
oligo_2, error_2, amp_len,\
sq_des, definition in data:
yield {'ac':ac, 'sq_len':sq_len, 'taxid':taxid,
'rank':rank, 'sp_taxid':sp_taxid, 'species':species,
'ge_taxid':ge_taxid, 'genus':genus, 'fa_taxid':fa_taxid,
'family':family, 'sk_taxid':sk_taxid, 's_kgdom':s_kgdom,
'strand':strand, 'oligo_1':oligo_1, 'error_1':error_1,
'oligo_2':oligo_2, 'error_2':error_2, 'amp_len':amp_len,
'sq_des':sq_des, 'definition':definition}
def rankFilter(self,rankid,filter):
"""
boolean telling whether rankid match filter
takes 2 arguments :
1- rankid
2- filter (i.e genus)
"""
return self._ranks[rankid] == filter
def lastCommonTaxon(self,taxid_1, taxid_2):
"""
returns a last common parent for two given taxon.
It starts from the root and goes down the tree
until their parents diverge.
It takes 2 arguments :
1- taxid 1
2- taxid 2
"""
t1 = [x[0] for x in self.parentalTreeIterator(taxid_1)]
t2 = [x[0] for x in self.parentalTreeIterator(taxid_2)]
t1.reverse()
t2.reverse()
count = t1 < t2 and len(t1) or len(t2)
for i in range(count):
if t1[i] != t2[i]:
return t1[i-1]
return t1[count-1]
class ColumnFile(object):
"""
cut an ecoPCR file into a list
"""
def __init__(self,stream,sep=None,strip=True,types=None,skip=None):
if isinstance(stream,str):
self._stream = open(stream)
elif hasattr(stream,'next'):
self._stream = stream
else:
raise ValueError,'stream must be string or an iterator'
self._delimiter=sep
self._strip=strip
if types:
self._types=[x for x in types]
for i in xrange(len(self._types)):
if self._types[i] is bool:
self._types[i]=ColumnFile.str2bool
else:
self._types=None
self._skip = skip
self._oligo = {}
def str2bool(x):
return bool(eval(x.strip()[0].upper(),{'T':True,'V':True,'F':False}))
str2bool = staticmethod(str2bool)
def __iter__(self):
return self
def next(self):
ligne = self._stream.next()
while ligne[0] == self._skip:
ligne = self._stream.next()
data = ligne.split(self._delimiter)
if self._strip or self._types:
data = [x.strip() for x in data]
if self._types:
it = self.endLessIterator(self._types)
data = [x[1](x[0]) for x in ((y,it.next()) for y in data)]
return data
def endLessIterator(self,endedlist):
for x in endedlist:
yield x
while(1):
yield endedlist[-1]
def getOligo(self,line):
if line[2:8] == 'direct':
r = re.compile('(?<=direct strand oligo1 : )[A-Z]+(?= *)')
self._oligo['o1'] = r.findall(line)
if line[2:9] == 'reverse':
r = re.compile('(?<=reverse strand oligo2 : )[A-Z]+(?= *)')
self._oligo['o2'] = r.findall(line)
return None
###########
#
# DATA STRUCTURE
#
###########
class ecoTable(list):
"""
Data object inheriting from list
"""
def __init__(self, headers, types):
list.__init__(self)
self.headers = headers
self.types = types
def __setitem__ (self,key,value):
"""
method overloaded to check data types
"""
for e in range(len(value)):
value[e] = self.types[e](value[e])
list.__setitem__(self,key,value)
def __getitem__(self,index):
newtable = ecoTable(self.headers,self.types)
if isinstance(index,slice):
newtable.extend(list.__getitem__(self,index))
else:
newtable.append(list.__getitem__(self,index))
return newtable
def getColumns(self,columnList):
newhead = [self.headers[x] for x in columnList]
newtype = [self.types[x] for x in columnList]
newtable = ecoTable(newhead,newtype)
for line in self:
newtable.append([line[x] for x in columnList])
return newtable
###########
#
# PARSE FUNCTIONS
#
###########
def _parseOligoResult(filter,file,strand):
seq = {}
if strand == 'direct':
key = 'oligo_1'
elif strand == 'reverse':
key = 'oligo_2'
for s in filter.ecoPCRResultIterator(file):
o = s[key]
taxid = s['taxid']
if not seq.has_key(o):
seq[o] = [1,taxid]
else:
seq[o][0] = seq[o][0] + 1
seq[o][1] = filter.lastCommonTaxon(seq[o][1],taxid)
return seq
def _parseTaxonomyResult(table):
tax = {}
for l in table:
taxid = l[2]
scName = l[3]
count = l[1]
if not tax.has_key(taxid):
tax[taxid] = [1,scName,count]
else:
tax[taxid][0] = tax[taxid][0] + 1
tax[taxid][2] = tax[taxid][2] + count
return tax
def _sortTable(e1,e2):
e1 = e1[1]
e2 = e2[1]
if e1 < e2:
return 1
if e1 > e2:
return -1
return 0
def _countOligoMismatch(o1,o2):
"""
define mismatch between two oligonucleotids
return number of mismatch
"""
mmatch = 0
if len(o1) < len(o2):
mmatch = int(len(o2) - len(o1))
for i in range(len(o1)):
try:
if o1[i] != o2[i]:
mmatch = mmatch + 1
except:
mmatch = mmatch + 1
return mmatch
###########
#
# TOOLS FUNCTIONS
#
###########
def customSort(table,x,y):
"""
"""
x = x-1
y = y-1
h = (table.headers[x],table.headers[y])
t = (table.types[x],table.types[y])
cTable = ecoTable(h,t)
tmp = {}
for l in table:
if tmp.has_key(l[x]):
tmp[l[x]] = tmp[l[x]] + l[y]
else:
tmp[l[x]] = l[y]
for k,v in tmp.items():
cTable.append([k,v])
return cTable
def countColumnOccurrence(table,x):
x = x-1
h = (table.headers[x],"count")
t = (table.types[x],int)
cTable = Table(h,t)
tmp = {}
for l in table:
if tmp.has_key(l[x]):
tmp[l[x]] = tmp[l[x]] + 1
else:
tmp[l[x]] = 1
for k,v in tmp.items():
cTable.append([k,v])
return cTable
def buildSpecificityTable(table):
header = ("mismatch","taxon","count")
type = (int,str,int)
speTable = ecoTable(header,type)
tmp = {}
for l in table:
if not tmp.has_key(l[5]):
tmp[l[5]] = {}
if not tmp[l[5]].has_key(l[3]):
tmp[l[5]][l[3]] = l[1]
else:
tmp[l[5]][l[3]] = tmp[l[5]][l[3]] + l[1]
for mismatch in tmp.items():
for taxon,count in mismatch[1].items():
speTable.append([mismatch[0],taxon,count])
return speTable
def buildOligoTable(table, file, filter, oligoRef, strand='direct'):
"""
Fills and sorts a Table object with ecoPCR result file
Takes 4 arguments
1- Table object
2- ecoPCR result file path
3- Filter Object
4- the oligo used in ecoPCR
5- the oligo type : direct or reverse
"""
seq = _parseOligoResult(filter, file, strand)
i = 0
for oligo, info in seq.items():
table.append(0)
count, lctTaxid = info[0], info[1]
scName = filter.findTaxonByTaxid(info[1])[3]
rank = filter._ranks[filter.findTaxonByTaxid(info[1])[1]]
mismatch = _countOligoMismatch(oligoRef,oligo)
table[i]=[oligo,count,lctTaxid,scName,rank,mismatch]
i = i + 1
table.sort(_sortTable)
def buildTaxonomicTable(table):
"""
Fill a Table object with a taxonomic synthesis
"""
taxHeaders = ("scName","numOfOligo","numOfAmpl","taxid")
taxTypes = (str, int, int, int)
taxTable = ecoTable(taxHeaders, taxTypes)
tax = _parseTaxonomyResult(table)
i = 0
for taxid, info in tax.items():
taxTable.append(0)
numOfOligo, scName, numOfAmpl = info[0], info[1], info[2]
taxTable[i]=[scName,numOfOligo,numOfAmpl,taxid]
i = i + 1
taxTable.sort(_sortTable)
return taxTable
def _parseSequenceResult(filter,file,id):
sequences = {}
idIndex = {}
if id == 'family':
key = 'fa_taxid'
elif id == 'genus':
key = 'ge_taxid'
else:
key = 'taxid'
for s in filter.ecoPCRResultIterator(file):
seq = s['sq_des']
id = s[key]
if not idIndex.has_key(id):
idIndex[id] = []
if not sequences.has_key(seq):
sequences[seq] = [id]
else:
sequences[seq].append(id)
return sequences, idIndex
def _sameValuesInList(array):
for i in range(len(array)-1):
if array[i] != array[i+1]:
return False
return True
def _sortSequences(file,filter):
sequences, idIndex = _parseSequenceResult(filter,file,'species')
for s,id in sequences.items():
if len(id) == 1 or _sameValuesInList(id):
idIndex[id[0]].append(1)
else:
for e in id:
idIndex[e].append(0)
for id,values in idIndex.items():
idIndex[id] = float(values.count(1)) / float(len(values)) * 100
identified = {}
non_identified = {}
ambiguous = {}
return sequences, idIndex
def getIntraSpeciesDiversity(table,file,filter):
intraDiv = {}
seq, idIndex = _sortSequences(file,filter)
for id,percent in idIndex.items():
if percent == 100:
intraDiv[id] = [0,[]]
for seq,idList in sequences.items():
if id in idList:
intraDiv[id][0] = intraDiv[id][0] + 1
intraDiv[id][1].append(seq)
for id, values in intraDiv.items():
table.append(id,values[0],values[1])
###########
#
# OUTPUT FUNCTIONS
#
###########
def printTable(table):
"""
Displays the content a of Table object
Take 1 arguments
1- Table object
"""
format = ("%20s | " * len(table.headers))[:-3]
print format % tuple([str(e) for e in table.headers ]) +"\n" + "-"*23*len(table.headers)
for l in table:
print format % tuple([str(e) for e in l ])
print "# %d results" % len(table)
def saveAsCSV(table,path):
"""
Creates a csv file from a Table object
Takes 2 arguments
1- Table object
2- path of the file-to-be
"""
file = open(path,'w')
file.write(','.join([str(e) for e in table.headers ]) + "\n")
for l in table:
file.write(','.join([str(e) for e in l ]) + "\n")
file.close()
def grepTable(table,col,pattern):
"""
Filters a Table object with regular expression
Takes 3 arguments :
1- Table object
2- number of column to match with
3- regular expression pattern
Returns a Table object
"""
col = col -1
p = re.compile(pattern, re.IGNORECASE)
out = ecoTable(table.headers,table.types)
for l in table:
if p.search(l[col]):
out.append(l)
return out
###########
#
# GRAPH FUNCTIONS
#
###########
class EcoGraph(object):
def __init__(self):
self._styles = getSampleStyleSheet()
self._element = []
self._element.append(self._box(Paragraph("EcoPCR report", self._styles['Title'])))
self._element.append(Spacer(0, 0.5 * cm))
def _box(self,flt, center=True):
box_style = [('BOX', (0, 0), (-1, -1), 0.5, colors.lightgrey)]
if center:
box_style += [('ALIGN', (0, 0), (-1, -1), 'CENTER')]
return Table([[flt]], style=box_style)
def _addChart(self,chart,title):
drawing = Drawing(300, 250)
drawing.add(chart)
self._element.append(self._box(Paragraph(title, self._styles['Normal'])))
self._element.append(self._box(drawing))
self._element.append(Spacer(0, 0.5 * cm))
def _formatData(self,table):
data, label = [],[]
for i in range(len(table)):
label.append(table[i][0])
data.append(table[i][1])
return data, label
def makePie(self, table, title):
data, label = self._formatData(table)
pie = Pie()
pie.x = 100
pie.y = 100
pie.data = data
pie.labels = label
self._addChart(pie, title)
def makeHistogram(self, table, title):
data, label = self._formatData(table)
data = [tuple(data)]
histo = VerticalBarChart()
histo.x = 10
histo.y = 70
histo.height = 150
histo.width = 300
histo.bars.strokeWidth = 1
histo.barSpacing = 1
histo.barLabels.dy = 5
histo.barLabelFormat = '%d'
histo.barLabels.fontSize = 9 - (len(data[0])/10)
histo.data = data
histo.categoryAxis.labels.boxAnchor = 'e'
histo.categoryAxis.labels.textAnchor = 'start'
histo.categoryAxis.labels.dx = -40
histo.categoryAxis.labels.dy = -50
histo.categoryAxis.labels.angle = 45
histo.categoryAxis.labels.width = 10
histo.categoryAxis.labels.height = 4
histo.categoryAxis.categoryNames = label
histo.categoryAxis.strokeWidth = 1
histo.categoryAxis.labels.fontSize = 8
histo.valueAxis.valueMin = min(data[0])*0.7
histo.valueAxis.valueMax = max(data[0])
step = (max(data[0]) - min(data[0])) / 10
histo.valueAxis.valueStep = step > 1 and step or 1
self._addChart(histo, title)
def makeReport(self,path):
doc = SimpleDocTemplate(path)
doc.build(self._element)
######################
def init():
file = "/Users/bessiere/Documents/workspace/ecoPCR/src/toto.tmp"
oligo = {'o1':'ATGTTTAAAA','o2':'ATGGGGGTATTG'}
filter = Filter("/ecoPCRDB/gbmam")
headers = ("oligo", "Num", "LCT Taxid", "Sc Name", "Rank", "distance")
types = (str, int, int, str, str, int)
o1Table = ecoTable(headers, types)
o2Table = ecoTable(headers, types)
buildOligoTable(o1Table, file, filter, oligo['o1'], 'direct')
buildOligoTable(o2Table, file, filter, oligo['o2'], 'reverse')
taxTable = buildTaxonomicTable(o1Table)
speTable = buildSpecificityTable(o1Table)
return o1Table, o2Table, taxTable
def start():
file = "/Users/bessiere/Documents/workspace/ecoPCR/src/toto.tmp"
filter = Filter("/ecoPCRDB/gbmam")
speHeaders = ("taxid","num of seq","list of seq")
speTypes = (int,int,list)
speTable = ecoTable(speHeaders,speTypes)
getIntraSpeciesDiversity(speTable, file, filter)