diff --git a/TODO b/TODO index 062a3fa..66e715a 100644 --- a/TODO +++ b/TODO @@ -12,5 +12,9 @@ o CDS speedup à tester o CDS ajouter tools/dbchloro et tools/compare -o $red +o concurency issue in formatdb + +o parameters in go_subdb.sh + +o models in DB_DIR diff --git a/data/cds/chlorodb/fasta/AC_000188.fst.gz b/data/cds/chlorodb/fasta/AC_000188.fst.gz index f4842c3..b52e08f 100644 Binary files a/data/cds/chlorodb/fasta/AC_000188.fst.gz and b/data/cds/chlorodb/fasta/AC_000188.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_000925.fst.gz b/data/cds/chlorodb/fasta/NC_000925.fst.gz index e44f5b5..e0321cb 100644 Binary files a/data/cds/chlorodb/fasta/NC_000925.fst.gz and b/data/cds/chlorodb/fasta/NC_000925.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_000926.fst.gz b/data/cds/chlorodb/fasta/NC_000926.fst.gz index 4b9501f..63d7a7d 100644 Binary files a/data/cds/chlorodb/fasta/NC_000926.fst.gz and b/data/cds/chlorodb/fasta/NC_000926.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_000927.fst.gz b/data/cds/chlorodb/fasta/NC_000927.fst.gz index 1bfc1c8..1056641 100644 Binary files a/data/cds/chlorodb/fasta/NC_000927.fst.gz and b/data/cds/chlorodb/fasta/NC_000927.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_000932.fst.gz b/data/cds/chlorodb/fasta/NC_000932.fst.gz index 584fead..6fbda8b 100644 Binary files a/data/cds/chlorodb/fasta/NC_000932.fst.gz and b/data/cds/chlorodb/fasta/NC_000932.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_001319.fst.gz b/data/cds/chlorodb/fasta/NC_001319.fst.gz index af37fb4..810b8db 100644 Binary files a/data/cds/chlorodb/fasta/NC_001319.fst.gz and b/data/cds/chlorodb/fasta/NC_001319.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_001320.fst.gz b/data/cds/chlorodb/fasta/NC_001320.fst.gz index feae685..4c048fd 100644 Binary files a/data/cds/chlorodb/fasta/NC_001320.fst.gz and b/data/cds/chlorodb/fasta/NC_001320.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_001568.fst.gz b/data/cds/chlorodb/fasta/NC_001568.fst.gz index 44e3caa..907e292 100644 Binary files a/data/cds/chlorodb/fasta/NC_001568.fst.gz and b/data/cds/chlorodb/fasta/NC_001568.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_001603.fst.gz b/data/cds/chlorodb/fasta/NC_001603.fst.gz index 6f9a8d1..d1c0b81 100644 Binary files a/data/cds/chlorodb/fasta/NC_001603.fst.gz and b/data/cds/chlorodb/fasta/NC_001603.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_001631.fst.gz b/data/cds/chlorodb/fasta/NC_001631.fst.gz index 90533e2..ac33655 100644 Binary files a/data/cds/chlorodb/fasta/NC_001631.fst.gz and b/data/cds/chlorodb/fasta/NC_001631.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_001666.fst.gz b/data/cds/chlorodb/fasta/NC_001666.fst.gz index a64bc3c..f3a9ae9 100644 Binary files a/data/cds/chlorodb/fasta/NC_001666.fst.gz and b/data/cds/chlorodb/fasta/NC_001666.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_001675.fst.gz b/data/cds/chlorodb/fasta/NC_001675.fst.gz index 76b6c7f..131d112 100644 Binary files a/data/cds/chlorodb/fasta/NC_001675.fst.gz and b/data/cds/chlorodb/fasta/NC_001675.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_001713.fst.gz b/data/cds/chlorodb/fasta/NC_001713.fst.gz index 691b8f9..2f99f50 100644 Binary files a/data/cds/chlorodb/fasta/NC_001713.fst.gz and b/data/cds/chlorodb/fasta/NC_001713.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_001840.fst.gz b/data/cds/chlorodb/fasta/NC_001840.fst.gz index 1e892a3..a17cd1c 100644 Binary files a/data/cds/chlorodb/fasta/NC_001840.fst.gz and b/data/cds/chlorodb/fasta/NC_001840.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_001865.fst.gz b/data/cds/chlorodb/fasta/NC_001865.fst.gz index 0ffbd6c..10a1bfe 100644 Binary files a/data/cds/chlorodb/fasta/NC_001865.fst.gz and b/data/cds/chlorodb/fasta/NC_001865.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_001879.fst.gz b/data/cds/chlorodb/fasta/NC_001879.fst.gz index b18fd65..e9aa342 100644 Binary files a/data/cds/chlorodb/fasta/NC_001879.fst.gz and b/data/cds/chlorodb/fasta/NC_001879.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_002186.fst.gz b/data/cds/chlorodb/fasta/NC_002186.fst.gz index d91bae7..8417aa3 100644 Binary files a/data/cds/chlorodb/fasta/NC_002186.fst.gz and b/data/cds/chlorodb/fasta/NC_002186.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_002202.fst.gz b/data/cds/chlorodb/fasta/NC_002202.fst.gz index c54f3ee..329942f 100644 Binary files a/data/cds/chlorodb/fasta/NC_002202.fst.gz and b/data/cds/chlorodb/fasta/NC_002202.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_002652.fst.gz b/data/cds/chlorodb/fasta/NC_002652.fst.gz index 84a5e64..35e59e6 100644 Binary files a/data/cds/chlorodb/fasta/NC_002652.fst.gz and b/data/cds/chlorodb/fasta/NC_002652.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_002693.fst.gz b/data/cds/chlorodb/fasta/NC_002693.fst.gz index a02a434..22ac1f2 100644 Binary files a/data/cds/chlorodb/fasta/NC_002693.fst.gz and b/data/cds/chlorodb/fasta/NC_002693.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_002694.fst.gz b/data/cds/chlorodb/fasta/NC_002694.fst.gz index fe10b0a..b75dc30 100644 Binary files a/data/cds/chlorodb/fasta/NC_002694.fst.gz and b/data/cds/chlorodb/fasta/NC_002694.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_002762.fst.gz b/data/cds/chlorodb/fasta/NC_002762.fst.gz index fcac93c..f391cb5 100644 Binary files a/data/cds/chlorodb/fasta/NC_002762.fst.gz and b/data/cds/chlorodb/fasta/NC_002762.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_003119.fst.gz b/data/cds/chlorodb/fasta/NC_003119.fst.gz index cd4cfb5..70cbde2 100644 Binary files a/data/cds/chlorodb/fasta/NC_003119.fst.gz and b/data/cds/chlorodb/fasta/NC_003119.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_003386.fst.gz b/data/cds/chlorodb/fasta/NC_003386.fst.gz index 1aa50f1..857a5a0 100644 Binary files a/data/cds/chlorodb/fasta/NC_003386.fst.gz and b/data/cds/chlorodb/fasta/NC_003386.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_004115.fst.gz b/data/cds/chlorodb/fasta/NC_004115.fst.gz index 88d5d66..793905d 100644 Binary files a/data/cds/chlorodb/fasta/NC_004115.fst.gz and b/data/cds/chlorodb/fasta/NC_004115.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_004543.fst.gz b/data/cds/chlorodb/fasta/NC_004543.fst.gz index 74684ba..11dca98 100644 Binary files a/data/cds/chlorodb/fasta/NC_004543.fst.gz and b/data/cds/chlorodb/fasta/NC_004543.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_004561.fst.gz b/data/cds/chlorodb/fasta/NC_004561.fst.gz index 1082a52..cacd729 100644 Binary files a/data/cds/chlorodb/fasta/NC_004561.fst.gz and b/data/cds/chlorodb/fasta/NC_004561.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_004677.fst.gz b/data/cds/chlorodb/fasta/NC_004677.fst.gz index 809e6b7..20d33e0 100644 Binary files a/data/cds/chlorodb/fasta/NC_004677.fst.gz and b/data/cds/chlorodb/fasta/NC_004677.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_004766.fst.gz b/data/cds/chlorodb/fasta/NC_004766.fst.gz index c13006a..b4bc827 100644 Binary files a/data/cds/chlorodb/fasta/NC_004766.fst.gz and b/data/cds/chlorodb/fasta/NC_004766.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_004799.fst.gz b/data/cds/chlorodb/fasta/NC_004799.fst.gz index 4bdb528..6068441 100644 Binary files a/data/cds/chlorodb/fasta/NC_004799.fst.gz and b/data/cds/chlorodb/fasta/NC_004799.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_004993.fst.gz b/data/cds/chlorodb/fasta/NC_004993.fst.gz index ca5efc1..2fa9de9 100644 Binary files a/data/cds/chlorodb/fasta/NC_004993.fst.gz and b/data/cds/chlorodb/fasta/NC_004993.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_005086.fst.gz b/data/cds/chlorodb/fasta/NC_005086.fst.gz index f0ebf4f..9638ad5 100644 Binary files a/data/cds/chlorodb/fasta/NC_005086.fst.gz and b/data/cds/chlorodb/fasta/NC_005086.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_005087.fst.gz b/data/cds/chlorodb/fasta/NC_005087.fst.gz index c547133..db1b3bb 100644 Binary files a/data/cds/chlorodb/fasta/NC_005087.fst.gz and b/data/cds/chlorodb/fasta/NC_005087.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_005353.fst.gz b/data/cds/chlorodb/fasta/NC_005353.fst.gz index 8d70fe3..aa91114 100644 Binary files a/data/cds/chlorodb/fasta/NC_005353.fst.gz and b/data/cds/chlorodb/fasta/NC_005353.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_005878.fst.gz b/data/cds/chlorodb/fasta/NC_005878.fst.gz index 8659698..5e77ec1 100644 Binary files a/data/cds/chlorodb/fasta/NC_005878.fst.gz and b/data/cds/chlorodb/fasta/NC_005878.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_005973.fst.gz b/data/cds/chlorodb/fasta/NC_005973.fst.gz index 3bd7e1f..c0d6104 100644 Binary files a/data/cds/chlorodb/fasta/NC_005973.fst.gz and b/data/cds/chlorodb/fasta/NC_005973.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_006050.fst.gz b/data/cds/chlorodb/fasta/NC_006050.fst.gz index cf05aa0..1c2a90f 100644 Binary files a/data/cds/chlorodb/fasta/NC_006050.fst.gz and b/data/cds/chlorodb/fasta/NC_006050.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_006084.fst.gz b/data/cds/chlorodb/fasta/NC_006084.fst.gz index 825c3c7..72f6621 100644 Binary files a/data/cds/chlorodb/fasta/NC_006084.fst.gz and b/data/cds/chlorodb/fasta/NC_006084.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_006137.fst.gz b/data/cds/chlorodb/fasta/NC_006137.fst.gz index 19a088a..a77a31c 100644 Binary files a/data/cds/chlorodb/fasta/NC_006137.fst.gz and b/data/cds/chlorodb/fasta/NC_006137.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_006290.fst.gz b/data/cds/chlorodb/fasta/NC_006290.fst.gz index 059ae33..e8d74e3 100644 Binary files a/data/cds/chlorodb/fasta/NC_006290.fst.gz and b/data/cds/chlorodb/fasta/NC_006290.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_006861.fst.gz b/data/cds/chlorodb/fasta/NC_006861.fst.gz index c1194b5..ff8aa76 100644 Binary files a/data/cds/chlorodb/fasta/NC_006861.fst.gz and b/data/cds/chlorodb/fasta/NC_006861.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_007144.fst.gz b/data/cds/chlorodb/fasta/NC_007144.fst.gz index 74d5c50..00d8bc9 100644 Binary files a/data/cds/chlorodb/fasta/NC_007144.fst.gz and b/data/cds/chlorodb/fasta/NC_007144.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_007288.fst.gz b/data/cds/chlorodb/fasta/NC_007288.fst.gz index 20c69e5..58dd65b 100644 Binary files a/data/cds/chlorodb/fasta/NC_007288.fst.gz and b/data/cds/chlorodb/fasta/NC_007288.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_007407.fst.gz b/data/cds/chlorodb/fasta/NC_007407.fst.gz index 9ef3fc8..ff38aaa 100644 Binary files a/data/cds/chlorodb/fasta/NC_007407.fst.gz and b/data/cds/chlorodb/fasta/NC_007407.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_007499.fst.gz b/data/cds/chlorodb/fasta/NC_007499.fst.gz index 07c1410..55ccac3 100644 Binary files a/data/cds/chlorodb/fasta/NC_007499.fst.gz and b/data/cds/chlorodb/fasta/NC_007499.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_007500.fst.gz b/data/cds/chlorodb/fasta/NC_007500.fst.gz index 9faa2cf..ec550fe 100644 Binary files a/data/cds/chlorodb/fasta/NC_007500.fst.gz and b/data/cds/chlorodb/fasta/NC_007500.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_007578.fst.gz b/data/cds/chlorodb/fasta/NC_007578.fst.gz index dc35d20..18254b1 100644 Binary files a/data/cds/chlorodb/fasta/NC_007578.fst.gz and b/data/cds/chlorodb/fasta/NC_007578.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_007602.fst.gz b/data/cds/chlorodb/fasta/NC_007602.fst.gz index 318a489..febcf2b 100644 Binary files a/data/cds/chlorodb/fasta/NC_007602.fst.gz and b/data/cds/chlorodb/fasta/NC_007602.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_007898.fst.gz b/data/cds/chlorodb/fasta/NC_007898.fst.gz index dc02a6b..ff0236c 100644 Binary files a/data/cds/chlorodb/fasta/NC_007898.fst.gz and b/data/cds/chlorodb/fasta/NC_007898.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_007932.fst.gz b/data/cds/chlorodb/fasta/NC_007932.fst.gz index bbb0d49..76124f7 100644 Binary files a/data/cds/chlorodb/fasta/NC_007932.fst.gz and b/data/cds/chlorodb/fasta/NC_007932.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_007942.fst.gz b/data/cds/chlorodb/fasta/NC_007942.fst.gz index ba5ab22..4d36cce 100644 Binary files a/data/cds/chlorodb/fasta/NC_007942.fst.gz and b/data/cds/chlorodb/fasta/NC_007942.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_007943.fst.gz b/data/cds/chlorodb/fasta/NC_007943.fst.gz index febc8f3..0e039e2 100644 Binary files a/data/cds/chlorodb/fasta/NC_007943.fst.gz and b/data/cds/chlorodb/fasta/NC_007943.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_007944.fst.gz b/data/cds/chlorodb/fasta/NC_007944.fst.gz index 25280c2..8cab6ff 100644 Binary files a/data/cds/chlorodb/fasta/NC_007944.fst.gz and b/data/cds/chlorodb/fasta/NC_007944.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_007957.fst.gz b/data/cds/chlorodb/fasta/NC_007957.fst.gz index 3426db7..34a4a15 100644 Binary files a/data/cds/chlorodb/fasta/NC_007957.fst.gz and b/data/cds/chlorodb/fasta/NC_007957.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_007977.fst.gz b/data/cds/chlorodb/fasta/NC_007977.fst.gz index 050f99e..2df2fd9 100644 Binary files a/data/cds/chlorodb/fasta/NC_007977.fst.gz and b/data/cds/chlorodb/fasta/NC_007977.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008096.fst.gz b/data/cds/chlorodb/fasta/NC_008096.fst.gz index db86fb0..5129e35 100644 Binary files a/data/cds/chlorodb/fasta/NC_008096.fst.gz and b/data/cds/chlorodb/fasta/NC_008096.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008097.fst.gz b/data/cds/chlorodb/fasta/NC_008097.fst.gz index d55e794..9c4c037 100644 Binary files a/data/cds/chlorodb/fasta/NC_008097.fst.gz and b/data/cds/chlorodb/fasta/NC_008097.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008099.fst.gz b/data/cds/chlorodb/fasta/NC_008099.fst.gz index 79f1308..9eed1a0 100644 Binary files a/data/cds/chlorodb/fasta/NC_008099.fst.gz and b/data/cds/chlorodb/fasta/NC_008099.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008100.fst.gz b/data/cds/chlorodb/fasta/NC_008100.fst.gz index 702ad81..ddd5850 100644 Binary files a/data/cds/chlorodb/fasta/NC_008100.fst.gz and b/data/cds/chlorodb/fasta/NC_008100.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008101.fst.gz b/data/cds/chlorodb/fasta/NC_008101.fst.gz index 04a00df..261e816 100644 Binary files a/data/cds/chlorodb/fasta/NC_008101.fst.gz and b/data/cds/chlorodb/fasta/NC_008101.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008114.fst.gz b/data/cds/chlorodb/fasta/NC_008114.fst.gz index ea5217e..f1ac16c 100644 Binary files a/data/cds/chlorodb/fasta/NC_008114.fst.gz and b/data/cds/chlorodb/fasta/NC_008114.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008115.fst.gz b/data/cds/chlorodb/fasta/NC_008115.fst.gz index 95c3ffa..04e5cf4 100644 Binary files a/data/cds/chlorodb/fasta/NC_008115.fst.gz and b/data/cds/chlorodb/fasta/NC_008115.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008116.fst.gz b/data/cds/chlorodb/fasta/NC_008116.fst.gz index 96d29e4..32f1c98 100644 Binary files a/data/cds/chlorodb/fasta/NC_008116.fst.gz and b/data/cds/chlorodb/fasta/NC_008116.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008117.fst.gz b/data/cds/chlorodb/fasta/NC_008117.fst.gz index aacc094..153eda2 100644 Binary files a/data/cds/chlorodb/fasta/NC_008117.fst.gz and b/data/cds/chlorodb/fasta/NC_008117.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008155.fst.gz b/data/cds/chlorodb/fasta/NC_008155.fst.gz index 560ef24..a781ff8 100644 Binary files a/data/cds/chlorodb/fasta/NC_008155.fst.gz and b/data/cds/chlorodb/fasta/NC_008155.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008235.fst.gz b/data/cds/chlorodb/fasta/NC_008235.fst.gz index 8f5b5a2..d80a3f3 100644 Binary files a/data/cds/chlorodb/fasta/NC_008235.fst.gz and b/data/cds/chlorodb/fasta/NC_008235.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008289.fst.gz b/data/cds/chlorodb/fasta/NC_008289.fst.gz index 25041ee..a3253a7 100644 Binary files a/data/cds/chlorodb/fasta/NC_008289.fst.gz and b/data/cds/chlorodb/fasta/NC_008289.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008325.fst.gz b/data/cds/chlorodb/fasta/NC_008325.fst.gz index cf5bc52..849ee38 100644 Binary files a/data/cds/chlorodb/fasta/NC_008325.fst.gz and b/data/cds/chlorodb/fasta/NC_008325.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008326.fst.gz b/data/cds/chlorodb/fasta/NC_008326.fst.gz index 9ceff07..3f73760 100644 Binary files a/data/cds/chlorodb/fasta/NC_008326.fst.gz and b/data/cds/chlorodb/fasta/NC_008326.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008334.fst.gz b/data/cds/chlorodb/fasta/NC_008334.fst.gz index af729db..aa49af0 100644 Binary files a/data/cds/chlorodb/fasta/NC_008334.fst.gz and b/data/cds/chlorodb/fasta/NC_008334.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008335.fst.gz b/data/cds/chlorodb/fasta/NC_008335.fst.gz index 745f832..3e899a9 100644 Binary files a/data/cds/chlorodb/fasta/NC_008335.fst.gz and b/data/cds/chlorodb/fasta/NC_008335.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008336.fst.gz b/data/cds/chlorodb/fasta/NC_008336.fst.gz index 2dfbec1..e4c7d9a 100644 Binary files a/data/cds/chlorodb/fasta/NC_008336.fst.gz and b/data/cds/chlorodb/fasta/NC_008336.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008359.fst.gz b/data/cds/chlorodb/fasta/NC_008359.fst.gz index 98b893c..478888b 100644 Binary files a/data/cds/chlorodb/fasta/NC_008359.fst.gz and b/data/cds/chlorodb/fasta/NC_008359.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008372.fst.gz b/data/cds/chlorodb/fasta/NC_008372.fst.gz index 7824952..47c3a8a 100644 Binary files a/data/cds/chlorodb/fasta/NC_008372.fst.gz and b/data/cds/chlorodb/fasta/NC_008372.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008407.fst.gz b/data/cds/chlorodb/fasta/NC_008407.fst.gz index 2280135..ad3fb8d 100644 Binary files a/data/cds/chlorodb/fasta/NC_008407.fst.gz and b/data/cds/chlorodb/fasta/NC_008407.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008408.fst.gz b/data/cds/chlorodb/fasta/NC_008408.fst.gz index 4b45f6b..5910a74 100644 Binary files a/data/cds/chlorodb/fasta/NC_008408.fst.gz and b/data/cds/chlorodb/fasta/NC_008408.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008454.fst.gz b/data/cds/chlorodb/fasta/NC_008454.fst.gz index 07e443c..e59d303 100644 Binary files a/data/cds/chlorodb/fasta/NC_008454.fst.gz and b/data/cds/chlorodb/fasta/NC_008454.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008456.fst.gz b/data/cds/chlorodb/fasta/NC_008456.fst.gz index bf1e179..55f00ab 100644 Binary files a/data/cds/chlorodb/fasta/NC_008456.fst.gz and b/data/cds/chlorodb/fasta/NC_008456.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008457.fst.gz b/data/cds/chlorodb/fasta/NC_008457.fst.gz index 123a143..461e47f 100644 Binary files a/data/cds/chlorodb/fasta/NC_008457.fst.gz and b/data/cds/chlorodb/fasta/NC_008457.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008535.fst.gz b/data/cds/chlorodb/fasta/NC_008535.fst.gz index 0fbd7fb..9979d79 100644 Binary files a/data/cds/chlorodb/fasta/NC_008535.fst.gz and b/data/cds/chlorodb/fasta/NC_008535.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008588.fst.gz b/data/cds/chlorodb/fasta/NC_008588.fst.gz index bc8fdca..3572a6e 100644 Binary files a/data/cds/chlorodb/fasta/NC_008588.fst.gz and b/data/cds/chlorodb/fasta/NC_008588.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008589.fst.gz b/data/cds/chlorodb/fasta/NC_008589.fst.gz index bb5bbd0..f30fb4c 100644 Binary files a/data/cds/chlorodb/fasta/NC_008589.fst.gz and b/data/cds/chlorodb/fasta/NC_008589.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008590.fst.gz b/data/cds/chlorodb/fasta/NC_008590.fst.gz index 8a4b33c..116ff31 100644 Binary files a/data/cds/chlorodb/fasta/NC_008590.fst.gz and b/data/cds/chlorodb/fasta/NC_008590.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008591.fst.gz b/data/cds/chlorodb/fasta/NC_008591.fst.gz index 3368d94..53d8649 100644 Binary files a/data/cds/chlorodb/fasta/NC_008591.fst.gz and b/data/cds/chlorodb/fasta/NC_008591.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008602.fst.gz b/data/cds/chlorodb/fasta/NC_008602.fst.gz index 431e434..0853868 100644 Binary files a/data/cds/chlorodb/fasta/NC_008602.fst.gz and b/data/cds/chlorodb/fasta/NC_008602.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008641.fst.gz b/data/cds/chlorodb/fasta/NC_008641.fst.gz index ddbd833..f5c440a 100644 Binary files a/data/cds/chlorodb/fasta/NC_008641.fst.gz and b/data/cds/chlorodb/fasta/NC_008641.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008788.fst.gz b/data/cds/chlorodb/fasta/NC_008788.fst.gz index 57acf42..8fdf452 100644 Binary files a/data/cds/chlorodb/fasta/NC_008788.fst.gz and b/data/cds/chlorodb/fasta/NC_008788.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008796.fst.gz b/data/cds/chlorodb/fasta/NC_008796.fst.gz index e59c87a..7035748 100644 Binary files a/data/cds/chlorodb/fasta/NC_008796.fst.gz and b/data/cds/chlorodb/fasta/NC_008796.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008822.fst.gz b/data/cds/chlorodb/fasta/NC_008822.fst.gz index b685664..afab3bd 100644 Binary files a/data/cds/chlorodb/fasta/NC_008822.fst.gz and b/data/cds/chlorodb/fasta/NC_008822.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_008829.fst.gz b/data/cds/chlorodb/fasta/NC_008829.fst.gz index 89a99c5..92d42d9 100644 Binary files a/data/cds/chlorodb/fasta/NC_008829.fst.gz and b/data/cds/chlorodb/fasta/NC_008829.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_009143.fst.gz b/data/cds/chlorodb/fasta/NC_009143.fst.gz index f0982af..39e8620 100644 Binary files a/data/cds/chlorodb/fasta/NC_009143.fst.gz and b/data/cds/chlorodb/fasta/NC_009143.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_009259.fst.gz b/data/cds/chlorodb/fasta/NC_009259.fst.gz index d4b3622..83932a2 100644 Binary files a/data/cds/chlorodb/fasta/NC_009259.fst.gz and b/data/cds/chlorodb/fasta/NC_009259.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_009265.fst.gz b/data/cds/chlorodb/fasta/NC_009265.fst.gz index 9216e2f..daeb476 100644 Binary files a/data/cds/chlorodb/fasta/NC_009265.fst.gz and b/data/cds/chlorodb/fasta/NC_009265.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_009266.fst.gz b/data/cds/chlorodb/fasta/NC_009266.fst.gz index d65fff3..c349bc5 100644 Binary files a/data/cds/chlorodb/fasta/NC_009266.fst.gz and b/data/cds/chlorodb/fasta/NC_009266.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_009267.fst.gz b/data/cds/chlorodb/fasta/NC_009267.fst.gz index e3235b3..093a7f8 100644 Binary files a/data/cds/chlorodb/fasta/NC_009267.fst.gz and b/data/cds/chlorodb/fasta/NC_009267.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_009268.fst.gz b/data/cds/chlorodb/fasta/NC_009268.fst.gz index ccdf246..5e4731f 100644 Binary files a/data/cds/chlorodb/fasta/NC_009268.fst.gz and b/data/cds/chlorodb/fasta/NC_009268.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_009269.fst.gz b/data/cds/chlorodb/fasta/NC_009269.fst.gz index f32b489..b2418ef 100644 Binary files a/data/cds/chlorodb/fasta/NC_009269.fst.gz and b/data/cds/chlorodb/fasta/NC_009269.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_009270.fst.gz b/data/cds/chlorodb/fasta/NC_009270.fst.gz index edfdcef..a5d1566 100644 Binary files a/data/cds/chlorodb/fasta/NC_009270.fst.gz and b/data/cds/chlorodb/fasta/NC_009270.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_009271.fst.gz b/data/cds/chlorodb/fasta/NC_009271.fst.gz index b45c20b..0e101a5 100644 Binary files a/data/cds/chlorodb/fasta/NC_009271.fst.gz and b/data/cds/chlorodb/fasta/NC_009271.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_009272.fst.gz b/data/cds/chlorodb/fasta/NC_009272.fst.gz index 39092cc..8f6b829 100644 Binary files a/data/cds/chlorodb/fasta/NC_009272.fst.gz and b/data/cds/chlorodb/fasta/NC_009272.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_009273.fst.gz b/data/cds/chlorodb/fasta/NC_009273.fst.gz index d3912f6..9c4690e 100644 Binary files a/data/cds/chlorodb/fasta/NC_009273.fst.gz and b/data/cds/chlorodb/fasta/NC_009273.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_009274.fst.gz b/data/cds/chlorodb/fasta/NC_009274.fst.gz index 01b4c39..ebddfc1 100644 Binary files a/data/cds/chlorodb/fasta/NC_009274.fst.gz and b/data/cds/chlorodb/fasta/NC_009274.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_009275.fst.gz b/data/cds/chlorodb/fasta/NC_009275.fst.gz index 2fd4ef6..68a63d6 100644 Binary files a/data/cds/chlorodb/fasta/NC_009275.fst.gz and b/data/cds/chlorodb/fasta/NC_009275.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_009573.fst.gz b/data/cds/chlorodb/fasta/NC_009573.fst.gz index 9643f63..bbe18f1 100644 Binary files a/data/cds/chlorodb/fasta/NC_009573.fst.gz and b/data/cds/chlorodb/fasta/NC_009573.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_009598.fst.gz b/data/cds/chlorodb/fasta/NC_009598.fst.gz index 412924e..0fa62e7 100644 Binary files a/data/cds/chlorodb/fasta/NC_009598.fst.gz and b/data/cds/chlorodb/fasta/NC_009598.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_009599.fst.gz b/data/cds/chlorodb/fasta/NC_009599.fst.gz index 15dc6d2..f1ebba7 100644 Binary files a/data/cds/chlorodb/fasta/NC_009599.fst.gz and b/data/cds/chlorodb/fasta/NC_009599.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_009600.fst.gz b/data/cds/chlorodb/fasta/NC_009600.fst.gz index 32922f0..07ce9b9 100644 Binary files a/data/cds/chlorodb/fasta/NC_009600.fst.gz and b/data/cds/chlorodb/fasta/NC_009600.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_009601.fst.gz b/data/cds/chlorodb/fasta/NC_009601.fst.gz index c9ce6d2..db45948 100644 Binary files a/data/cds/chlorodb/fasta/NC_009601.fst.gz and b/data/cds/chlorodb/fasta/NC_009601.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_009618.fst.gz b/data/cds/chlorodb/fasta/NC_009618.fst.gz index 9004afe..30d2f24 100644 Binary files a/data/cds/chlorodb/fasta/NC_009618.fst.gz and b/data/cds/chlorodb/fasta/NC_009618.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_009681.fst.gz b/data/cds/chlorodb/fasta/NC_009681.fst.gz index 84c4bae..f149b20 100644 Binary files a/data/cds/chlorodb/fasta/NC_009681.fst.gz and b/data/cds/chlorodb/fasta/NC_009681.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_009765.fst.gz b/data/cds/chlorodb/fasta/NC_009765.fst.gz index a0fde90..4821325 100644 Binary files a/data/cds/chlorodb/fasta/NC_009765.fst.gz and b/data/cds/chlorodb/fasta/NC_009765.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_009766.fst.gz b/data/cds/chlorodb/fasta/NC_009766.fst.gz index ba70a06..76684e1 100644 Binary files a/data/cds/chlorodb/fasta/NC_009766.fst.gz and b/data/cds/chlorodb/fasta/NC_009766.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_009808.fst.gz b/data/cds/chlorodb/fasta/NC_009808.fst.gz index 88a3c14..df67a66 100644 Binary files a/data/cds/chlorodb/fasta/NC_009808.fst.gz and b/data/cds/chlorodb/fasta/NC_009808.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_009949.fst.gz b/data/cds/chlorodb/fasta/NC_009949.fst.gz index a97d167..e945492 100644 Binary files a/data/cds/chlorodb/fasta/NC_009949.fst.gz and b/data/cds/chlorodb/fasta/NC_009949.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_009950.fst.gz b/data/cds/chlorodb/fasta/NC_009950.fst.gz index cebb549..3ce2b00 100644 Binary files a/data/cds/chlorodb/fasta/NC_009950.fst.gz and b/data/cds/chlorodb/fasta/NC_009950.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_009962.fst.gz b/data/cds/chlorodb/fasta/NC_009962.fst.gz index f0486f3..bec8952 100644 Binary files a/data/cds/chlorodb/fasta/NC_009962.fst.gz and b/data/cds/chlorodb/fasta/NC_009962.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_009963.fst.gz b/data/cds/chlorodb/fasta/NC_009963.fst.gz index 23abcbd..3eab3de 100644 Binary files a/data/cds/chlorodb/fasta/NC_009963.fst.gz and b/data/cds/chlorodb/fasta/NC_009963.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_010093.fst.gz b/data/cds/chlorodb/fasta/NC_010093.fst.gz index 8e27ea1..b8863ba 100644 Binary files a/data/cds/chlorodb/fasta/NC_010093.fst.gz and b/data/cds/chlorodb/fasta/NC_010093.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_010109.fst.gz b/data/cds/chlorodb/fasta/NC_010109.fst.gz index 54c03ae..ac28660 100644 Binary files a/data/cds/chlorodb/fasta/NC_010109.fst.gz and b/data/cds/chlorodb/fasta/NC_010109.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_010323.fst.gz b/data/cds/chlorodb/fasta/NC_010323.fst.gz index 02208aa..a4371a9 100644 Binary files a/data/cds/chlorodb/fasta/NC_010323.fst.gz and b/data/cds/chlorodb/fasta/NC_010323.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_010358.fst.gz b/data/cds/chlorodb/fasta/NC_010358.fst.gz index eb77bbe..25f248f 100644 Binary files a/data/cds/chlorodb/fasta/NC_010358.fst.gz and b/data/cds/chlorodb/fasta/NC_010358.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_010359.fst.gz b/data/cds/chlorodb/fasta/NC_010359.fst.gz index 213f714..030dfd8 100644 Binary files a/data/cds/chlorodb/fasta/NC_010359.fst.gz and b/data/cds/chlorodb/fasta/NC_010359.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_010360.fst.gz b/data/cds/chlorodb/fasta/NC_010360.fst.gz index a6ba632..d71a2ec 100644 Binary files a/data/cds/chlorodb/fasta/NC_010360.fst.gz and b/data/cds/chlorodb/fasta/NC_010360.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_010361.fst.gz b/data/cds/chlorodb/fasta/NC_010361.fst.gz index 5e1d051..cd12d0f 100644 Binary files a/data/cds/chlorodb/fasta/NC_010361.fst.gz and b/data/cds/chlorodb/fasta/NC_010361.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_010362.fst.gz b/data/cds/chlorodb/fasta/NC_010362.fst.gz index a122408..6b202aa 100644 Binary files a/data/cds/chlorodb/fasta/NC_010362.fst.gz and b/data/cds/chlorodb/fasta/NC_010362.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_010433.fst.gz b/data/cds/chlorodb/fasta/NC_010433.fst.gz index bc0cd8e..4d3045b 100644 Binary files a/data/cds/chlorodb/fasta/NC_010433.fst.gz and b/data/cds/chlorodb/fasta/NC_010433.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_010442.fst.gz b/data/cds/chlorodb/fasta/NC_010442.fst.gz index 1f7932e..06f9575 100644 Binary files a/data/cds/chlorodb/fasta/NC_010442.fst.gz and b/data/cds/chlorodb/fasta/NC_010442.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_010548.fst.gz b/data/cds/chlorodb/fasta/NC_010548.fst.gz index 38c30be..4779c4e 100644 Binary files a/data/cds/chlorodb/fasta/NC_010548.fst.gz and b/data/cds/chlorodb/fasta/NC_010548.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_010601.fst.gz b/data/cds/chlorodb/fasta/NC_010601.fst.gz index a74d0e3..95ebb70 100644 Binary files a/data/cds/chlorodb/fasta/NC_010601.fst.gz and b/data/cds/chlorodb/fasta/NC_010601.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_010654.fst.gz b/data/cds/chlorodb/fasta/NC_010654.fst.gz index 5851ea8..a87ba74 100644 Binary files a/data/cds/chlorodb/fasta/NC_010654.fst.gz and b/data/cds/chlorodb/fasta/NC_010654.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_010772.fst.gz b/data/cds/chlorodb/fasta/NC_010772.fst.gz index d18fb56..8f6da79 100644 Binary files a/data/cds/chlorodb/fasta/NC_010772.fst.gz and b/data/cds/chlorodb/fasta/NC_010772.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_010776.fst.gz b/data/cds/chlorodb/fasta/NC_010776.fst.gz index a847f1b..e8873b3 100644 Binary files a/data/cds/chlorodb/fasta/NC_010776.fst.gz and b/data/cds/chlorodb/fasta/NC_010776.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_011031.fst.gz b/data/cds/chlorodb/fasta/NC_011031.fst.gz index becd7ed..9f91a6b 100644 Binary files a/data/cds/chlorodb/fasta/NC_011031.fst.gz and b/data/cds/chlorodb/fasta/NC_011031.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_011032.fst.gz b/data/cds/chlorodb/fasta/NC_011032.fst.gz index e558e7c..a88aa22 100644 Binary files a/data/cds/chlorodb/fasta/NC_011032.fst.gz and b/data/cds/chlorodb/fasta/NC_011032.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_011152.fst.gz b/data/cds/chlorodb/fasta/NC_011152.fst.gz index 25c9632..8bb9952 100644 Binary files a/data/cds/chlorodb/fasta/NC_011152.fst.gz and b/data/cds/chlorodb/fasta/NC_011152.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_011153.fst.gz b/data/cds/chlorodb/fasta/NC_011153.fst.gz index 25d860a..0787f72 100644 Binary files a/data/cds/chlorodb/fasta/NC_011153.fst.gz and b/data/cds/chlorodb/fasta/NC_011153.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_011154.fst.gz b/data/cds/chlorodb/fasta/NC_011154.fst.gz index 58df6bc..f908378 100644 Binary files a/data/cds/chlorodb/fasta/NC_011154.fst.gz and b/data/cds/chlorodb/fasta/NC_011154.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_011155.fst.gz b/data/cds/chlorodb/fasta/NC_011155.fst.gz index 057c39e..9ba473b 100644 Binary files a/data/cds/chlorodb/fasta/NC_011155.fst.gz and b/data/cds/chlorodb/fasta/NC_011155.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_011156.fst.gz b/data/cds/chlorodb/fasta/NC_011156.fst.gz index f270fa9..551db98 100644 Binary files a/data/cds/chlorodb/fasta/NC_011156.fst.gz and b/data/cds/chlorodb/fasta/NC_011156.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_011158.fst.gz b/data/cds/chlorodb/fasta/NC_011158.fst.gz index bfda1f5..d102404 100644 Binary files a/data/cds/chlorodb/fasta/NC_011158.fst.gz and b/data/cds/chlorodb/fasta/NC_011158.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_011159.fst.gz b/data/cds/chlorodb/fasta/NC_011159.fst.gz index 5f2b7ea..6bd8937 100644 Binary files a/data/cds/chlorodb/fasta/NC_011159.fst.gz and b/data/cds/chlorodb/fasta/NC_011159.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_011163.fst.gz b/data/cds/chlorodb/fasta/NC_011163.fst.gz index 7c62315..d290213 100644 Binary files a/data/cds/chlorodb/fasta/NC_011163.fst.gz and b/data/cds/chlorodb/fasta/NC_011163.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_011600.fst.gz b/data/cds/chlorodb/fasta/NC_011600.fst.gz index 5065f55..29c7894 100644 Binary files a/data/cds/chlorodb/fasta/NC_011600.fst.gz and b/data/cds/chlorodb/fasta/NC_011600.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_011713.fst.gz b/data/cds/chlorodb/fasta/NC_011713.fst.gz index ddda3a8..bf15569 100644 Binary files a/data/cds/chlorodb/fasta/NC_011713.fst.gz and b/data/cds/chlorodb/fasta/NC_011713.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_011828.fst.gz b/data/cds/chlorodb/fasta/NC_011828.fst.gz index c13cb6c..68f2d2f 100644 Binary files a/data/cds/chlorodb/fasta/NC_011828.fst.gz and b/data/cds/chlorodb/fasta/NC_011828.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_011930.fst.gz b/data/cds/chlorodb/fasta/NC_011930.fst.gz index 6959580..a9c9e10 100644 Binary files a/data/cds/chlorodb/fasta/NC_011930.fst.gz and b/data/cds/chlorodb/fasta/NC_011930.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_011942.fst.gz b/data/cds/chlorodb/fasta/NC_011942.fst.gz index 7bb9bf4..682695a 100644 Binary files a/data/cds/chlorodb/fasta/NC_011942.fst.gz and b/data/cds/chlorodb/fasta/NC_011942.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_011954.fst.gz b/data/cds/chlorodb/fasta/NC_011954.fst.gz index c6eeabc..d615c38 100644 Binary files a/data/cds/chlorodb/fasta/NC_011954.fst.gz and b/data/cds/chlorodb/fasta/NC_011954.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_012052.fst.gz b/data/cds/chlorodb/fasta/NC_012052.fst.gz index 5f9cf17..175b80a 100644 Binary files a/data/cds/chlorodb/fasta/NC_012052.fst.gz and b/data/cds/chlorodb/fasta/NC_012052.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_012097.fst.gz b/data/cds/chlorodb/fasta/NC_012097.fst.gz index 3f05a72..d0be51f 100644 Binary files a/data/cds/chlorodb/fasta/NC_012097.fst.gz and b/data/cds/chlorodb/fasta/NC_012097.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_012099.fst.gz b/data/cds/chlorodb/fasta/NC_012099.fst.gz index f942f38..b21c213 100644 Binary files a/data/cds/chlorodb/fasta/NC_012099.fst.gz and b/data/cds/chlorodb/fasta/NC_012099.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_012101.fst.gz b/data/cds/chlorodb/fasta/NC_012101.fst.gz index 6ca73f0..b79d039 100644 Binary files a/data/cds/chlorodb/fasta/NC_012101.fst.gz and b/data/cds/chlorodb/fasta/NC_012101.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_012224.fst.gz b/data/cds/chlorodb/fasta/NC_012224.fst.gz index 253a5b3..6b1792c 100644 Binary files a/data/cds/chlorodb/fasta/NC_012224.fst.gz and b/data/cds/chlorodb/fasta/NC_012224.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_012568.fst.gz b/data/cds/chlorodb/fasta/NC_012568.fst.gz index f014f71..9ec8d14 100644 Binary files a/data/cds/chlorodb/fasta/NC_012568.fst.gz and b/data/cds/chlorodb/fasta/NC_012568.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_012575.fst.gz b/data/cds/chlorodb/fasta/NC_012575.fst.gz index 541d561..e3a7fac 100644 Binary files a/data/cds/chlorodb/fasta/NC_012575.fst.gz and b/data/cds/chlorodb/fasta/NC_012575.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_012615.fst.gz b/data/cds/chlorodb/fasta/NC_012615.fst.gz index 31734e6..a1da9ea 100644 Binary files a/data/cds/chlorodb/fasta/NC_012615.fst.gz and b/data/cds/chlorodb/fasta/NC_012615.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_012818.fst.gz b/data/cds/chlorodb/fasta/NC_012818.fst.gz index 5603b9a..01a7075 100644 Binary files a/data/cds/chlorodb/fasta/NC_012818.fst.gz and b/data/cds/chlorodb/fasta/NC_012818.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_012898.fst.gz b/data/cds/chlorodb/fasta/NC_012898.fst.gz index a23aa4c..df67baa 100644 Binary files a/data/cds/chlorodb/fasta/NC_012898.fst.gz and b/data/cds/chlorodb/fasta/NC_012898.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_012903.fst.gz b/data/cds/chlorodb/fasta/NC_012903.fst.gz index bfc4b24..fb15812 100644 Binary files a/data/cds/chlorodb/fasta/NC_012903.fst.gz and b/data/cds/chlorodb/fasta/NC_012903.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_012927.fst.gz b/data/cds/chlorodb/fasta/NC_012927.fst.gz index f774dd9..fa3f4cd 100644 Binary files a/data/cds/chlorodb/fasta/NC_012927.fst.gz and b/data/cds/chlorodb/fasta/NC_012927.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_012978.fst.gz b/data/cds/chlorodb/fasta/NC_012978.fst.gz index bde9aa6..85aef45 100644 Binary files a/data/cds/chlorodb/fasta/NC_012978.fst.gz and b/data/cds/chlorodb/fasta/NC_012978.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_013086.fst.gz b/data/cds/chlorodb/fasta/NC_013086.fst.gz index 874ab3b..be644eb 100644 Binary files a/data/cds/chlorodb/fasta/NC_013086.fst.gz and b/data/cds/chlorodb/fasta/NC_013086.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_013088.fst.gz b/data/cds/chlorodb/fasta/NC_013088.fst.gz index 7a810f2..12c957e 100644 Binary files a/data/cds/chlorodb/fasta/NC_013088.fst.gz and b/data/cds/chlorodb/fasta/NC_013088.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_013273.fst.gz b/data/cds/chlorodb/fasta/NC_013273.fst.gz index b46cc42..bc08757 100644 Binary files a/data/cds/chlorodb/fasta/NC_013273.fst.gz and b/data/cds/chlorodb/fasta/NC_013273.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_013359.fst.gz b/data/cds/chlorodb/fasta/NC_013359.fst.gz index d3bf37b..413e37c 100644 Binary files a/data/cds/chlorodb/fasta/NC_013359.fst.gz and b/data/cds/chlorodb/fasta/NC_013359.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_013498.fst.gz b/data/cds/chlorodb/fasta/NC_013498.fst.gz index ea72b6a..450ebb2 100644 Binary files a/data/cds/chlorodb/fasta/NC_013498.fst.gz and b/data/cds/chlorodb/fasta/NC_013498.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_013553.fst.gz b/data/cds/chlorodb/fasta/NC_013553.fst.gz index 5e2628d..cb131e2 100644 Binary files a/data/cds/chlorodb/fasta/NC_013553.fst.gz and b/data/cds/chlorodb/fasta/NC_013553.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_013703.fst.gz b/data/cds/chlorodb/fasta/NC_013703.fst.gz index d56d0d3..2a7a9d7 100644 Binary files a/data/cds/chlorodb/fasta/NC_013703.fst.gz and b/data/cds/chlorodb/fasta/NC_013703.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_013707.fst.gz b/data/cds/chlorodb/fasta/NC_013707.fst.gz index 88229f0..f207e8e 100644 Binary files a/data/cds/chlorodb/fasta/NC_013707.fst.gz and b/data/cds/chlorodb/fasta/NC_013707.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_013823.fst.gz b/data/cds/chlorodb/fasta/NC_013823.fst.gz index 4907102..49c46a4 100644 Binary files a/data/cds/chlorodb/fasta/NC_013823.fst.gz and b/data/cds/chlorodb/fasta/NC_013823.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_013843.fst.gz b/data/cds/chlorodb/fasta/NC_013843.fst.gz index 854ad65..a1cd691 100644 Binary files a/data/cds/chlorodb/fasta/NC_013843.fst.gz and b/data/cds/chlorodb/fasta/NC_013843.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_013991.fst.gz b/data/cds/chlorodb/fasta/NC_013991.fst.gz index 9e101c1..33d29bd 100644 Binary files a/data/cds/chlorodb/fasta/NC_013991.fst.gz and b/data/cds/chlorodb/fasta/NC_013991.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_014056.fst.gz b/data/cds/chlorodb/fasta/NC_014056.fst.gz index 77ab8c2..dcdeedc 100644 Binary files a/data/cds/chlorodb/fasta/NC_014056.fst.gz and b/data/cds/chlorodb/fasta/NC_014056.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_014057.fst.gz b/data/cds/chlorodb/fasta/NC_014057.fst.gz index be137ef..c864b60 100644 Binary files a/data/cds/chlorodb/fasta/NC_014057.fst.gz and b/data/cds/chlorodb/fasta/NC_014057.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_014062.fst.gz b/data/cds/chlorodb/fasta/NC_014062.fst.gz index b8ae10e..e2a0b8f 100644 Binary files a/data/cds/chlorodb/fasta/NC_014062.fst.gz and b/data/cds/chlorodb/fasta/NC_014062.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_014063.fst.gz b/data/cds/chlorodb/fasta/NC_014063.fst.gz index 25911e9..0f0eeef 100644 Binary files a/data/cds/chlorodb/fasta/NC_014063.fst.gz and b/data/cds/chlorodb/fasta/NC_014063.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_014267.fst.gz b/data/cds/chlorodb/fasta/NC_014267.fst.gz index 057d20b..93b2426 100644 Binary files a/data/cds/chlorodb/fasta/NC_014267.fst.gz and b/data/cds/chlorodb/fasta/NC_014267.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_014287.fst.gz b/data/cds/chlorodb/fasta/NC_014287.fst.gz index 6756bef..638d600 100644 Binary files a/data/cds/chlorodb/fasta/NC_014287.fst.gz and b/data/cds/chlorodb/fasta/NC_014287.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_014340.fst.gz b/data/cds/chlorodb/fasta/NC_014340.fst.gz index afc3199..ff3feff 100644 Binary files a/data/cds/chlorodb/fasta/NC_014340.fst.gz and b/data/cds/chlorodb/fasta/NC_014340.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_014345.fst.gz b/data/cds/chlorodb/fasta/NC_014345.fst.gz index 92eacc6..abbd8c7 100644 Binary files a/data/cds/chlorodb/fasta/NC_014345.fst.gz and b/data/cds/chlorodb/fasta/NC_014345.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_014346.fst.gz b/data/cds/chlorodb/fasta/NC_014346.fst.gz index 3decacf..b572e58 100644 Binary files a/data/cds/chlorodb/fasta/NC_014346.fst.gz and b/data/cds/chlorodb/fasta/NC_014346.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_014348.fst.gz b/data/cds/chlorodb/fasta/NC_014348.fst.gz index 24b57c6..0cf5cd6 100644 Binary files a/data/cds/chlorodb/fasta/NC_014348.fst.gz and b/data/cds/chlorodb/fasta/NC_014348.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_014569.fst.gz b/data/cds/chlorodb/fasta/NC_014569.fst.gz index 6a24f3b..f11e824 100644 Binary files a/data/cds/chlorodb/fasta/NC_014569.fst.gz and b/data/cds/chlorodb/fasta/NC_014569.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_014570.fst.gz b/data/cds/chlorodb/fasta/NC_014570.fst.gz index f9339e1..232e0ff 100644 Binary files a/data/cds/chlorodb/fasta/NC_014570.fst.gz and b/data/cds/chlorodb/fasta/NC_014570.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_014573.fst.gz b/data/cds/chlorodb/fasta/NC_014573.fst.gz index c5ba218..d7152b4 100644 Binary files a/data/cds/chlorodb/fasta/NC_014573.fst.gz and b/data/cds/chlorodb/fasta/NC_014573.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_014575.fst.gz b/data/cds/chlorodb/fasta/NC_014575.fst.gz index 00d98cd..29d89cd 100644 Binary files a/data/cds/chlorodb/fasta/NC_014575.fst.gz and b/data/cds/chlorodb/fasta/NC_014575.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_014582.fst.gz b/data/cds/chlorodb/fasta/NC_014582.fst.gz index 1555ee5..bc29e6a 100644 Binary files a/data/cds/chlorodb/fasta/NC_014582.fst.gz and b/data/cds/chlorodb/fasta/NC_014582.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_014589.fst.gz b/data/cds/chlorodb/fasta/NC_014589.fst.gz index e687caf..bad4103 100644 Binary files a/data/cds/chlorodb/fasta/NC_014589.fst.gz and b/data/cds/chlorodb/fasta/NC_014589.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_014592.fst.gz b/data/cds/chlorodb/fasta/NC_014592.fst.gz index ce3d5ee..39dca79 100644 Binary files a/data/cds/chlorodb/fasta/NC_014592.fst.gz and b/data/cds/chlorodb/fasta/NC_014592.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_014674.fst.gz b/data/cds/chlorodb/fasta/NC_014674.fst.gz index 821d2d2..c7b4052 100644 Binary files a/data/cds/chlorodb/fasta/NC_014674.fst.gz and b/data/cds/chlorodb/fasta/NC_014674.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_014675.fst.gz b/data/cds/chlorodb/fasta/NC_014675.fst.gz index f554df9..058692b 100644 Binary files a/data/cds/chlorodb/fasta/NC_014675.fst.gz and b/data/cds/chlorodb/fasta/NC_014675.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_014676.fst.gz b/data/cds/chlorodb/fasta/NC_014676.fst.gz index 8666513..f77422e 100644 Binary files a/data/cds/chlorodb/fasta/NC_014676.fst.gz and b/data/cds/chlorodb/fasta/NC_014676.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_014697.fst.gz b/data/cds/chlorodb/fasta/NC_014697.fst.gz index 9c7db13..531b16e 100644 Binary files a/data/cds/chlorodb/fasta/NC_014697.fst.gz and b/data/cds/chlorodb/fasta/NC_014697.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_014699.fst.gz b/data/cds/chlorodb/fasta/NC_014699.fst.gz index 5275d6e..4aeb776 100644 Binary files a/data/cds/chlorodb/fasta/NC_014699.fst.gz and b/data/cds/chlorodb/fasta/NC_014699.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_014807.fst.gz b/data/cds/chlorodb/fasta/NC_014807.fst.gz index f0b0059..af2d8fa 100644 Binary files a/data/cds/chlorodb/fasta/NC_014807.fst.gz and b/data/cds/chlorodb/fasta/NC_014807.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_014808.fst.gz b/data/cds/chlorodb/fasta/NC_014808.fst.gz index 6a4f6e4..5d350ad 100644 Binary files a/data/cds/chlorodb/fasta/NC_014808.fst.gz and b/data/cds/chlorodb/fasta/NC_014808.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_014874.fst.gz b/data/cds/chlorodb/fasta/NC_014874.fst.gz index c0df9eb..cf0f100 100644 Binary files a/data/cds/chlorodb/fasta/NC_014874.fst.gz and b/data/cds/chlorodb/fasta/NC_014874.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_015083.fst.gz b/data/cds/chlorodb/fasta/NC_015083.fst.gz index 6392c93..c4b0ae7 100644 Binary files a/data/cds/chlorodb/fasta/NC_015083.fst.gz and b/data/cds/chlorodb/fasta/NC_015083.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_015084.fst.gz b/data/cds/chlorodb/fasta/NC_015084.fst.gz index 595a988..35e93d9 100644 Binary files a/data/cds/chlorodb/fasta/NC_015084.fst.gz and b/data/cds/chlorodb/fasta/NC_015084.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_015113.fst.gz b/data/cds/chlorodb/fasta/NC_015113.fst.gz index 2a6f635..293dc45 100644 Binary files a/data/cds/chlorodb/fasta/NC_015113.fst.gz and b/data/cds/chlorodb/fasta/NC_015113.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_015204.fst.gz b/data/cds/chlorodb/fasta/NC_015204.fst.gz index 6484209..08844fb 100644 Binary files a/data/cds/chlorodb/fasta/NC_015204.fst.gz and b/data/cds/chlorodb/fasta/NC_015204.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_015206.fst.gz b/data/cds/chlorodb/fasta/NC_015206.fst.gz index e69fd9b..5abec45 100644 Binary files a/data/cds/chlorodb/fasta/NC_015206.fst.gz and b/data/cds/chlorodb/fasta/NC_015206.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_015308.fst.gz b/data/cds/chlorodb/fasta/NC_015308.fst.gz index 86c54c7..3fd6558 100644 Binary files a/data/cds/chlorodb/fasta/NC_015308.fst.gz and b/data/cds/chlorodb/fasta/NC_015308.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_015359.fst.gz b/data/cds/chlorodb/fasta/NC_015359.fst.gz index da8d3cf..909340d 100644 Binary files a/data/cds/chlorodb/fasta/NC_015359.fst.gz and b/data/cds/chlorodb/fasta/NC_015359.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_015401.fst.gz b/data/cds/chlorodb/fasta/NC_015401.fst.gz index 8554a98..36e1b9a 100644 Binary files a/data/cds/chlorodb/fasta/NC_015401.fst.gz and b/data/cds/chlorodb/fasta/NC_015401.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_015402.fst.gz b/data/cds/chlorodb/fasta/NC_015402.fst.gz index ef5209f..7f90c6b 100644 Binary files a/data/cds/chlorodb/fasta/NC_015402.fst.gz and b/data/cds/chlorodb/fasta/NC_015402.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_015403.fst.gz b/data/cds/chlorodb/fasta/NC_015403.fst.gz index 5ab38bf..458e2a5 100644 Binary files a/data/cds/chlorodb/fasta/NC_015403.fst.gz and b/data/cds/chlorodb/fasta/NC_015403.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_015543.fst.gz b/data/cds/chlorodb/fasta/NC_015543.fst.gz index eed26c9..5319e2c 100644 Binary files a/data/cds/chlorodb/fasta/NC_015543.fst.gz and b/data/cds/chlorodb/fasta/NC_015543.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_015604.fst.gz b/data/cds/chlorodb/fasta/NC_015604.fst.gz index 058f599..795c033 100644 Binary files a/data/cds/chlorodb/fasta/NC_015604.fst.gz and b/data/cds/chlorodb/fasta/NC_015604.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_015605.fst.gz b/data/cds/chlorodb/fasta/NC_015605.fst.gz index d2d2463..75d0f03 100644 Binary files a/data/cds/chlorodb/fasta/NC_015605.fst.gz and b/data/cds/chlorodb/fasta/NC_015605.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_015608.fst.gz b/data/cds/chlorodb/fasta/NC_015608.fst.gz index c71ea5d..55db8b8 100644 Binary files a/data/cds/chlorodb/fasta/NC_015608.fst.gz and b/data/cds/chlorodb/fasta/NC_015608.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_015610.fst.gz b/data/cds/chlorodb/fasta/NC_015610.fst.gz index 042e4b4..a3fc93c 100644 Binary files a/data/cds/chlorodb/fasta/NC_015610.fst.gz and b/data/cds/chlorodb/fasta/NC_015610.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_015621.fst.gz b/data/cds/chlorodb/fasta/NC_015621.fst.gz index bd12cfe..ffc9309 100644 Binary files a/data/cds/chlorodb/fasta/NC_015621.fst.gz and b/data/cds/chlorodb/fasta/NC_015621.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_015623.fst.gz b/data/cds/chlorodb/fasta/NC_015623.fst.gz index e60d3a1..9e0d2b8 100644 Binary files a/data/cds/chlorodb/fasta/NC_015623.fst.gz and b/data/cds/chlorodb/fasta/NC_015623.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_015645.fst.gz b/data/cds/chlorodb/fasta/NC_015645.fst.gz index b7d8ba6..b825810 100644 Binary files a/data/cds/chlorodb/fasta/NC_015645.fst.gz and b/data/cds/chlorodb/fasta/NC_015645.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_015803.fst.gz b/data/cds/chlorodb/fasta/NC_015803.fst.gz index 9583298..4d78922 100644 Binary files a/data/cds/chlorodb/fasta/NC_015803.fst.gz and b/data/cds/chlorodb/fasta/NC_015803.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_015817.fst.gz b/data/cds/chlorodb/fasta/NC_015817.fst.gz index 6d58b44..e53e414 100644 Binary files a/data/cds/chlorodb/fasta/NC_015817.fst.gz and b/data/cds/chlorodb/fasta/NC_015817.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_015820.fst.gz b/data/cds/chlorodb/fasta/NC_015820.fst.gz index ceb28a8..14c075b 100644 Binary files a/data/cds/chlorodb/fasta/NC_015820.fst.gz and b/data/cds/chlorodb/fasta/NC_015820.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_015826.fst.gz b/data/cds/chlorodb/fasta/NC_015826.fst.gz index 9a9ba14..3371902 100644 Binary files a/data/cds/chlorodb/fasta/NC_015826.fst.gz and b/data/cds/chlorodb/fasta/NC_015826.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_015830.fst.gz b/data/cds/chlorodb/fasta/NC_015830.fst.gz index d8cd996..1f2e813 100644 Binary files a/data/cds/chlorodb/fasta/NC_015830.fst.gz and b/data/cds/chlorodb/fasta/NC_015830.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_015831.fst.gz b/data/cds/chlorodb/fasta/NC_015831.fst.gz index ad36b72..15927f2 100644 Binary files a/data/cds/chlorodb/fasta/NC_015831.fst.gz and b/data/cds/chlorodb/fasta/NC_015831.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_015891.fst.gz b/data/cds/chlorodb/fasta/NC_015891.fst.gz index 6700d6b..787436f 100644 Binary files a/data/cds/chlorodb/fasta/NC_015891.fst.gz and b/data/cds/chlorodb/fasta/NC_015891.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_015892.fst.gz b/data/cds/chlorodb/fasta/NC_015892.fst.gz index 2f0a270..7f1bfb7 100644 Binary files a/data/cds/chlorodb/fasta/NC_015892.fst.gz and b/data/cds/chlorodb/fasta/NC_015892.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_015894.fst.gz b/data/cds/chlorodb/fasta/NC_015894.fst.gz index f7b8b3c..5336641 100644 Binary files a/data/cds/chlorodb/fasta/NC_015894.fst.gz and b/data/cds/chlorodb/fasta/NC_015894.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_015899.fst.gz b/data/cds/chlorodb/fasta/NC_015899.fst.gz index 47ba904..1bcb292 100644 Binary files a/data/cds/chlorodb/fasta/NC_015899.fst.gz and b/data/cds/chlorodb/fasta/NC_015899.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_015983.fst.gz b/data/cds/chlorodb/fasta/NC_015983.fst.gz index bbcaca1..31d19ba 100644 Binary files a/data/cds/chlorodb/fasta/NC_015983.fst.gz and b/data/cds/chlorodb/fasta/NC_015983.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_015990.fst.gz b/data/cds/chlorodb/fasta/NC_015990.fst.gz index 3abb031..37839db 100644 Binary files a/data/cds/chlorodb/fasta/NC_015990.fst.gz and b/data/cds/chlorodb/fasta/NC_015990.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_015996.fst.gz b/data/cds/chlorodb/fasta/NC_015996.fst.gz index a590465..257bf52 100644 Binary files a/data/cds/chlorodb/fasta/NC_015996.fst.gz and b/data/cds/chlorodb/fasta/NC_015996.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016058.fst.gz b/data/cds/chlorodb/fasta/NC_016058.fst.gz index f473af7..894164a 100644 Binary files a/data/cds/chlorodb/fasta/NC_016058.fst.gz and b/data/cds/chlorodb/fasta/NC_016058.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016063.fst.gz b/data/cds/chlorodb/fasta/NC_016063.fst.gz index 14cbc1d..ec7c2fe 100644 Binary files a/data/cds/chlorodb/fasta/NC_016063.fst.gz and b/data/cds/chlorodb/fasta/NC_016063.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016064.fst.gz b/data/cds/chlorodb/fasta/NC_016064.fst.gz index b4da2d1..57200ff 100644 Binary files a/data/cds/chlorodb/fasta/NC_016064.fst.gz and b/data/cds/chlorodb/fasta/NC_016064.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016065.fst.gz b/data/cds/chlorodb/fasta/NC_016065.fst.gz index f40e371..920e16b 100644 Binary files a/data/cds/chlorodb/fasta/NC_016065.fst.gz and b/data/cds/chlorodb/fasta/NC_016065.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016068.fst.gz b/data/cds/chlorodb/fasta/NC_016068.fst.gz index 2588566..e18a717 100644 Binary files a/data/cds/chlorodb/fasta/NC_016068.fst.gz and b/data/cds/chlorodb/fasta/NC_016068.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016069.fst.gz b/data/cds/chlorodb/fasta/NC_016069.fst.gz index 5f1be6f..81dd880 100644 Binary files a/data/cds/chlorodb/fasta/NC_016069.fst.gz and b/data/cds/chlorodb/fasta/NC_016069.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016430.fst.gz b/data/cds/chlorodb/fasta/NC_016430.fst.gz index 57cb682..ff82880 100644 Binary files a/data/cds/chlorodb/fasta/NC_016430.fst.gz and b/data/cds/chlorodb/fasta/NC_016430.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016433.fst.gz b/data/cds/chlorodb/fasta/NC_016433.fst.gz index 168a7aa..3990daf 100644 Binary files a/data/cds/chlorodb/fasta/NC_016433.fst.gz and b/data/cds/chlorodb/fasta/NC_016433.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016468.fst.gz b/data/cds/chlorodb/fasta/NC_016468.fst.gz index e01c807..a600fea 100644 Binary files a/data/cds/chlorodb/fasta/NC_016468.fst.gz and b/data/cds/chlorodb/fasta/NC_016468.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016471.fst.gz b/data/cds/chlorodb/fasta/NC_016471.fst.gz index 25d7498..1414458 100644 Binary files a/data/cds/chlorodb/fasta/NC_016471.fst.gz and b/data/cds/chlorodb/fasta/NC_016471.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016668.fst.gz b/data/cds/chlorodb/fasta/NC_016668.fst.gz index 13490c0..9273a19 100644 Binary files a/data/cds/chlorodb/fasta/NC_016668.fst.gz and b/data/cds/chlorodb/fasta/NC_016668.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016670.fst.gz b/data/cds/chlorodb/fasta/NC_016670.fst.gz index dc55153..6f8f8c3 100644 Binary files a/data/cds/chlorodb/fasta/NC_016670.fst.gz and b/data/cds/chlorodb/fasta/NC_016670.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016677.fst.gz b/data/cds/chlorodb/fasta/NC_016677.fst.gz index 99ae2c3..1a260ee 100644 Binary files a/data/cds/chlorodb/fasta/NC_016677.fst.gz and b/data/cds/chlorodb/fasta/NC_016677.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016690.fst.gz b/data/cds/chlorodb/fasta/NC_016690.fst.gz index f9061bb..cc44169 100644 Binary files a/data/cds/chlorodb/fasta/NC_016690.fst.gz and b/data/cds/chlorodb/fasta/NC_016690.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016692.fst.gz b/data/cds/chlorodb/fasta/NC_016692.fst.gz index 6418a77..5b486c9 100644 Binary files a/data/cds/chlorodb/fasta/NC_016692.fst.gz and b/data/cds/chlorodb/fasta/NC_016692.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016699.fst.gz b/data/cds/chlorodb/fasta/NC_016699.fst.gz index daecf23..01f4255 100644 Binary files a/data/cds/chlorodb/fasta/NC_016699.fst.gz and b/data/cds/chlorodb/fasta/NC_016699.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016703.fst.gz b/data/cds/chlorodb/fasta/NC_016703.fst.gz index 883d75a..6175b55 100644 Binary files a/data/cds/chlorodb/fasta/NC_016703.fst.gz and b/data/cds/chlorodb/fasta/NC_016703.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016708.fst.gz b/data/cds/chlorodb/fasta/NC_016708.fst.gz index a8739ad..d000599 100644 Binary files a/data/cds/chlorodb/fasta/NC_016708.fst.gz and b/data/cds/chlorodb/fasta/NC_016708.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016711.fst.gz b/data/cds/chlorodb/fasta/NC_016711.fst.gz index b01db8e..2d5929b 100644 Binary files a/data/cds/chlorodb/fasta/NC_016711.fst.gz and b/data/cds/chlorodb/fasta/NC_016711.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016712.fst.gz b/data/cds/chlorodb/fasta/NC_016712.fst.gz index 015548f..b9b2f0a 100644 Binary files a/data/cds/chlorodb/fasta/NC_016712.fst.gz and b/data/cds/chlorodb/fasta/NC_016712.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016718.fst.gz b/data/cds/chlorodb/fasta/NC_016718.fst.gz index d6cd0aa..38ccc13 100644 Binary files a/data/cds/chlorodb/fasta/NC_016718.fst.gz and b/data/cds/chlorodb/fasta/NC_016718.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016727.fst.gz b/data/cds/chlorodb/fasta/NC_016727.fst.gz index 5e4bee5..c2bd977 100644 Binary files a/data/cds/chlorodb/fasta/NC_016727.fst.gz and b/data/cds/chlorodb/fasta/NC_016727.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016728.fst.gz b/data/cds/chlorodb/fasta/NC_016728.fst.gz index 91a8e25..b52dbd0 100644 Binary files a/data/cds/chlorodb/fasta/NC_016728.fst.gz and b/data/cds/chlorodb/fasta/NC_016728.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016729.fst.gz b/data/cds/chlorodb/fasta/NC_016729.fst.gz index 9eecb63..4de853d 100644 Binary files a/data/cds/chlorodb/fasta/NC_016729.fst.gz and b/data/cds/chlorodb/fasta/NC_016729.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016730.fst.gz b/data/cds/chlorodb/fasta/NC_016730.fst.gz index fae19b5..0e8135b 100644 Binary files a/data/cds/chlorodb/fasta/NC_016730.fst.gz and b/data/cds/chlorodb/fasta/NC_016730.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016731.fst.gz b/data/cds/chlorodb/fasta/NC_016731.fst.gz index e76c854..32d17e5 100644 Binary files a/data/cds/chlorodb/fasta/NC_016731.fst.gz and b/data/cds/chlorodb/fasta/NC_016731.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016732.fst.gz b/data/cds/chlorodb/fasta/NC_016732.fst.gz index a63c06d..477ca94 100644 Binary files a/data/cds/chlorodb/fasta/NC_016732.fst.gz and b/data/cds/chlorodb/fasta/NC_016732.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016733.fst.gz b/data/cds/chlorodb/fasta/NC_016733.fst.gz index e248521..aac6f09 100644 Binary files a/data/cds/chlorodb/fasta/NC_016733.fst.gz and b/data/cds/chlorodb/fasta/NC_016733.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016734.fst.gz b/data/cds/chlorodb/fasta/NC_016734.fst.gz index 12301fc..c15a567 100644 Binary files a/data/cds/chlorodb/fasta/NC_016734.fst.gz and b/data/cds/chlorodb/fasta/NC_016734.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016735.fst.gz b/data/cds/chlorodb/fasta/NC_016735.fst.gz index 15175f5..57c6bc7 100644 Binary files a/data/cds/chlorodb/fasta/NC_016735.fst.gz and b/data/cds/chlorodb/fasta/NC_016735.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016736.fst.gz b/data/cds/chlorodb/fasta/NC_016736.fst.gz index 41868b0..6680f0f 100644 Binary files a/data/cds/chlorodb/fasta/NC_016736.fst.gz and b/data/cds/chlorodb/fasta/NC_016736.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016753.fst.gz b/data/cds/chlorodb/fasta/NC_016753.fst.gz index cb760fb..ae79f9f 100644 Binary files a/data/cds/chlorodb/fasta/NC_016753.fst.gz and b/data/cds/chlorodb/fasta/NC_016753.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016921.fst.gz b/data/cds/chlorodb/fasta/NC_016921.fst.gz index f211f2a..020a056 100644 Binary files a/data/cds/chlorodb/fasta/NC_016921.fst.gz and b/data/cds/chlorodb/fasta/NC_016921.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016927.fst.gz b/data/cds/chlorodb/fasta/NC_016927.fst.gz index 8d40972..10edb8a 100644 Binary files a/data/cds/chlorodb/fasta/NC_016927.fst.gz and b/data/cds/chlorodb/fasta/NC_016927.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_016986.fst.gz b/data/cds/chlorodb/fasta/NC_016986.fst.gz index 199451a..0186398 100644 Binary files a/data/cds/chlorodb/fasta/NC_016986.fst.gz and b/data/cds/chlorodb/fasta/NC_016986.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_017006.fst.gz b/data/cds/chlorodb/fasta/NC_017006.fst.gz index 09c00aa..ab7bb4b 100644 Binary files a/data/cds/chlorodb/fasta/NC_017006.fst.gz and b/data/cds/chlorodb/fasta/NC_017006.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_017602.fst.gz b/data/cds/chlorodb/fasta/NC_017602.fst.gz index b1d79fe..1861ce5 100644 Binary files a/data/cds/chlorodb/fasta/NC_017602.fst.gz and b/data/cds/chlorodb/fasta/NC_017602.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_017609.fst.gz b/data/cds/chlorodb/fasta/NC_017609.fst.gz index 1c619fe..170653e 100644 Binary files a/data/cds/chlorodb/fasta/NC_017609.fst.gz and b/data/cds/chlorodb/fasta/NC_017609.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_017754.fst.gz b/data/cds/chlorodb/fasta/NC_017754.fst.gz index e9b9e06..157f4b8 100644 Binary files a/data/cds/chlorodb/fasta/NC_017754.fst.gz and b/data/cds/chlorodb/fasta/NC_017754.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_017835.fst.gz b/data/cds/chlorodb/fasta/NC_017835.fst.gz index 76c018c..e7de783 100644 Binary files a/data/cds/chlorodb/fasta/NC_017835.fst.gz and b/data/cds/chlorodb/fasta/NC_017835.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_017894.fst.gz b/data/cds/chlorodb/fasta/NC_017894.fst.gz index 1e279da..3621d4f 100644 Binary files a/data/cds/chlorodb/fasta/NC_017894.fst.gz and b/data/cds/chlorodb/fasta/NC_017894.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_018051.fst.gz b/data/cds/chlorodb/fasta/NC_018051.fst.gz index 8df7f0d..b234edc 100644 Binary files a/data/cds/chlorodb/fasta/NC_018051.fst.gz and b/data/cds/chlorodb/fasta/NC_018051.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_018109.fst.gz b/data/cds/chlorodb/fasta/NC_018109.fst.gz index 2bf466c..46e18e8 100644 Binary files a/data/cds/chlorodb/fasta/NC_018109.fst.gz and b/data/cds/chlorodb/fasta/NC_018109.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_018110.fst.gz b/data/cds/chlorodb/fasta/NC_018110.fst.gz index 6df39da..a39a58d 100644 Binary files a/data/cds/chlorodb/fasta/NC_018110.fst.gz and b/data/cds/chlorodb/fasta/NC_018110.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_018111.fst.gz b/data/cds/chlorodb/fasta/NC_018111.fst.gz index e139ba9..4c89ba6 100644 Binary files a/data/cds/chlorodb/fasta/NC_018111.fst.gz and b/data/cds/chlorodb/fasta/NC_018111.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_018112.fst.gz b/data/cds/chlorodb/fasta/NC_018112.fst.gz index b00d9ab..a7e1873 100644 Binary files a/data/cds/chlorodb/fasta/NC_018112.fst.gz and b/data/cds/chlorodb/fasta/NC_018112.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_018113.fst.gz b/data/cds/chlorodb/fasta/NC_018113.fst.gz index 01d9911..f044e7b 100644 Binary files a/data/cds/chlorodb/fasta/NC_018113.fst.gz and b/data/cds/chlorodb/fasta/NC_018113.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_018114.fst.gz b/data/cds/chlorodb/fasta/NC_018114.fst.gz index 082cf51..52b5165 100644 Binary files a/data/cds/chlorodb/fasta/NC_018114.fst.gz and b/data/cds/chlorodb/fasta/NC_018114.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_018117.fst.gz b/data/cds/chlorodb/fasta/NC_018117.fst.gz index 4f7e40a..594833a 100644 Binary files a/data/cds/chlorodb/fasta/NC_018117.fst.gz and b/data/cds/chlorodb/fasta/NC_018117.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_018357.fst.gz b/data/cds/chlorodb/fasta/NC_018357.fst.gz index 244459a..5999931 100644 Binary files a/data/cds/chlorodb/fasta/NC_018357.fst.gz and b/data/cds/chlorodb/fasta/NC_018357.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_018523.fst.gz b/data/cds/chlorodb/fasta/NC_018523.fst.gz index 408a708..6f27dd3 100644 Binary files a/data/cds/chlorodb/fasta/NC_018523.fst.gz and b/data/cds/chlorodb/fasta/NC_018523.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_018541.fst.gz b/data/cds/chlorodb/fasta/NC_018541.fst.gz index a1df92e..1188fad 100644 Binary files a/data/cds/chlorodb/fasta/NC_018541.fst.gz and b/data/cds/chlorodb/fasta/NC_018541.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_018552.fst.gz b/data/cds/chlorodb/fasta/NC_018552.fst.gz index fdd40e1..81a6453 100644 Binary files a/data/cds/chlorodb/fasta/NC_018552.fst.gz and b/data/cds/chlorodb/fasta/NC_018552.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_018565.fst.gz b/data/cds/chlorodb/fasta/NC_018565.fst.gz index 28cec75..3931b82 100644 Binary files a/data/cds/chlorodb/fasta/NC_018565.fst.gz and b/data/cds/chlorodb/fasta/NC_018565.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_018569.fst.gz b/data/cds/chlorodb/fasta/NC_018569.fst.gz index f05100f..e95629f 100644 Binary files a/data/cds/chlorodb/fasta/NC_018569.fst.gz and b/data/cds/chlorodb/fasta/NC_018569.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_018766.fst.gz b/data/cds/chlorodb/fasta/NC_018766.fst.gz index 9f254e6..eb49801 100644 Binary files a/data/cds/chlorodb/fasta/NC_018766.fst.gz and b/data/cds/chlorodb/fasta/NC_018766.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_018767.fst.gz b/data/cds/chlorodb/fasta/NC_018767.fst.gz index 04d279b..fbb3083 100644 Binary files a/data/cds/chlorodb/fasta/NC_018767.fst.gz and b/data/cds/chlorodb/fasta/NC_018767.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_019601.fst.gz b/data/cds/chlorodb/fasta/NC_019601.fst.gz index b4035cb..7b2e6b3 100644 Binary files a/data/cds/chlorodb/fasta/NC_019601.fst.gz and b/data/cds/chlorodb/fasta/NC_019601.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_019602.fst.gz b/data/cds/chlorodb/fasta/NC_019602.fst.gz index 6135355..d02d851 100644 Binary files a/data/cds/chlorodb/fasta/NC_019602.fst.gz and b/data/cds/chlorodb/fasta/NC_019602.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_019616.fst.gz b/data/cds/chlorodb/fasta/NC_019616.fst.gz index 7402933..3509363 100644 Binary files a/data/cds/chlorodb/fasta/NC_019616.fst.gz and b/data/cds/chlorodb/fasta/NC_019616.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_019628.fst.gz b/data/cds/chlorodb/fasta/NC_019628.fst.gz index 443eae5..b2f7e95 100644 Binary files a/data/cds/chlorodb/fasta/NC_019628.fst.gz and b/data/cds/chlorodb/fasta/NC_019628.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_019648.fst.gz b/data/cds/chlorodb/fasta/NC_019648.fst.gz index 9520ed8..ff1fca4 100644 Binary files a/data/cds/chlorodb/fasta/NC_019648.fst.gz and b/data/cds/chlorodb/fasta/NC_019648.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_019649.fst.gz b/data/cds/chlorodb/fasta/NC_019649.fst.gz index 176ff25..bd1a074 100644 Binary files a/data/cds/chlorodb/fasta/NC_019649.fst.gz and b/data/cds/chlorodb/fasta/NC_019649.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_019650.fst.gz b/data/cds/chlorodb/fasta/NC_019650.fst.gz index 27bf799..e3b54e3 100644 Binary files a/data/cds/chlorodb/fasta/NC_019650.fst.gz and b/data/cds/chlorodb/fasta/NC_019650.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_019651.fst.gz b/data/cds/chlorodb/fasta/NC_019651.fst.gz index c270958..1a56509 100644 Binary files a/data/cds/chlorodb/fasta/NC_019651.fst.gz and b/data/cds/chlorodb/fasta/NC_019651.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_020014.fst.gz b/data/cds/chlorodb/fasta/NC_020014.fst.gz index 181e47a..373afd8 100644 Binary files a/data/cds/chlorodb/fasta/NC_020014.fst.gz and b/data/cds/chlorodb/fasta/NC_020014.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_020018.fst.gz b/data/cds/chlorodb/fasta/NC_020018.fst.gz index f410281..7acb44f 100644 Binary files a/data/cds/chlorodb/fasta/NC_020018.fst.gz and b/data/cds/chlorodb/fasta/NC_020018.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_020019.fst.gz b/data/cds/chlorodb/fasta/NC_020019.fst.gz index c915f8c..ace2465 100644 Binary files a/data/cds/chlorodb/fasta/NC_020019.fst.gz and b/data/cds/chlorodb/fasta/NC_020019.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_020092.fst.gz b/data/cds/chlorodb/fasta/NC_020092.fst.gz index c91af1c..6d125ff 100644 Binary files a/data/cds/chlorodb/fasta/NC_020092.fst.gz and b/data/cds/chlorodb/fasta/NC_020092.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_020098.fst.gz b/data/cds/chlorodb/fasta/NC_020098.fst.gz index 30d1677..1343004 100644 Binary files a/data/cds/chlorodb/fasta/NC_020098.fst.gz and b/data/cds/chlorodb/fasta/NC_020098.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_020146.fst.gz b/data/cds/chlorodb/fasta/NC_020146.fst.gz index 7ab271a..a7e366e 100644 Binary files a/data/cds/chlorodb/fasta/NC_020146.fst.gz and b/data/cds/chlorodb/fasta/NC_020146.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_020147.fst.gz b/data/cds/chlorodb/fasta/NC_020147.fst.gz index 672b279..d44e929 100644 Binary files a/data/cds/chlorodb/fasta/NC_020147.fst.gz and b/data/cds/chlorodb/fasta/NC_020147.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_020152.fst.gz b/data/cds/chlorodb/fasta/NC_020152.fst.gz index 9a0c67b..1416c9a 100644 Binary files a/data/cds/chlorodb/fasta/NC_020152.fst.gz and b/data/cds/chlorodb/fasta/NC_020152.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_020316.fst.gz b/data/cds/chlorodb/fasta/NC_020316.fst.gz index 29b7804..4ecf9fe 100644 Binary files a/data/cds/chlorodb/fasta/NC_020316.fst.gz and b/data/cds/chlorodb/fasta/NC_020316.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_020317.fst.gz b/data/cds/chlorodb/fasta/NC_020317.fst.gz index d671c27..fa9d993 100644 Binary files a/data/cds/chlorodb/fasta/NC_020317.fst.gz and b/data/cds/chlorodb/fasta/NC_020317.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_020318.fst.gz b/data/cds/chlorodb/fasta/NC_020318.fst.gz index aeddde6..472f556 100644 Binary files a/data/cds/chlorodb/fasta/NC_020318.fst.gz and b/data/cds/chlorodb/fasta/NC_020318.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_020319.fst.gz b/data/cds/chlorodb/fasta/NC_020319.fst.gz index a481655..55c075a 100644 Binary files a/data/cds/chlorodb/fasta/NC_020319.fst.gz and b/data/cds/chlorodb/fasta/NC_020319.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_020320.fst.gz b/data/cds/chlorodb/fasta/NC_020320.fst.gz index 434ec0b..c807002 100644 Binary files a/data/cds/chlorodb/fasta/NC_020320.fst.gz and b/data/cds/chlorodb/fasta/NC_020320.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_020321.fst.gz b/data/cds/chlorodb/fasta/NC_020321.fst.gz index 63a2ab4..9bb3d80 100644 Binary files a/data/cds/chlorodb/fasta/NC_020321.fst.gz and b/data/cds/chlorodb/fasta/NC_020321.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_020341.fst.gz b/data/cds/chlorodb/fasta/NC_020341.fst.gz index b8a2291..612e459 100644 Binary files a/data/cds/chlorodb/fasta/NC_020341.fst.gz and b/data/cds/chlorodb/fasta/NC_020341.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_020361.fst.gz b/data/cds/chlorodb/fasta/NC_020361.fst.gz index 74c71e5..e476959 100644 Binary files a/data/cds/chlorodb/fasta/NC_020361.fst.gz and b/data/cds/chlorodb/fasta/NC_020361.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_020362.fst.gz b/data/cds/chlorodb/fasta/NC_020362.fst.gz index 601d216..11d7d60 100644 Binary files a/data/cds/chlorodb/fasta/NC_020362.fst.gz and b/data/cds/chlorodb/fasta/NC_020362.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_020363.fst.gz b/data/cds/chlorodb/fasta/NC_020363.fst.gz index f08bce6..9bfb0e3 100644 Binary files a/data/cds/chlorodb/fasta/NC_020363.fst.gz and b/data/cds/chlorodb/fasta/NC_020363.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_020364.fst.gz b/data/cds/chlorodb/fasta/NC_020364.fst.gz index c2ec142..ee6a826 100644 Binary files a/data/cds/chlorodb/fasta/NC_020364.fst.gz and b/data/cds/chlorodb/fasta/NC_020364.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_020365.fst.gz b/data/cds/chlorodb/fasta/NC_020365.fst.gz index dc8b74a..4ee6d60 100644 Binary files a/data/cds/chlorodb/fasta/NC_020365.fst.gz and b/data/cds/chlorodb/fasta/NC_020365.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_020366.fst.gz b/data/cds/chlorodb/fasta/NC_020366.fst.gz index 1217587..adf5305 100644 Binary files a/data/cds/chlorodb/fasta/NC_020366.fst.gz and b/data/cds/chlorodb/fasta/NC_020366.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_020367.fst.gz b/data/cds/chlorodb/fasta/NC_020367.fst.gz index 762d388..c4a95a7 100644 Binary files a/data/cds/chlorodb/fasta/NC_020367.fst.gz and b/data/cds/chlorodb/fasta/NC_020367.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_020371.fst.gz b/data/cds/chlorodb/fasta/NC_020371.fst.gz index 7afc86d..bab2dfa 100644 Binary files a/data/cds/chlorodb/fasta/NC_020371.fst.gz and b/data/cds/chlorodb/fasta/NC_020371.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_020372.fst.gz b/data/cds/chlorodb/fasta/NC_020372.fst.gz index a12476a..5c75f91 100644 Binary files a/data/cds/chlorodb/fasta/NC_020372.fst.gz and b/data/cds/chlorodb/fasta/NC_020372.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_020431.fst.gz b/data/cds/chlorodb/fasta/NC_020431.fst.gz index c3cb213..40db220 100644 Binary files a/data/cds/chlorodb/fasta/NC_020431.fst.gz and b/data/cds/chlorodb/fasta/NC_020431.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_020438.fst.gz b/data/cds/chlorodb/fasta/NC_020438.fst.gz index 25e6457..d32e067 100644 Binary files a/data/cds/chlorodb/fasta/NC_020438.fst.gz and b/data/cds/chlorodb/fasta/NC_020438.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_020460.fst.gz b/data/cds/chlorodb/fasta/NC_020460.fst.gz index f6ae086..f3a7254 100644 Binary files a/data/cds/chlorodb/fasta/NC_020460.fst.gz and b/data/cds/chlorodb/fasta/NC_020460.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_020607.fst.gz b/data/cds/chlorodb/fasta/NC_020607.fst.gz index 136bbec..a210907 100644 Binary files a/data/cds/chlorodb/fasta/NC_020607.fst.gz and b/data/cds/chlorodb/fasta/NC_020607.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_020795.fst.gz b/data/cds/chlorodb/fasta/NC_020795.fst.gz index badcd73..b40753e 100644 Binary files a/data/cds/chlorodb/fasta/NC_020795.fst.gz and b/data/cds/chlorodb/fasta/NC_020795.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021075.fst.gz b/data/cds/chlorodb/fasta/NC_021075.fst.gz index 794d988..7f7cb9f 100644 Binary files a/data/cds/chlorodb/fasta/NC_021075.fst.gz and b/data/cds/chlorodb/fasta/NC_021075.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021091.fst.gz b/data/cds/chlorodb/fasta/NC_021091.fst.gz index 2f0c1e5..51145e1 100644 Binary files a/data/cds/chlorodb/fasta/NC_021091.fst.gz and b/data/cds/chlorodb/fasta/NC_021091.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021101.fst.gz b/data/cds/chlorodb/fasta/NC_021101.fst.gz index 5a94913..f4c8506 100644 Binary files a/data/cds/chlorodb/fasta/NC_021101.fst.gz and b/data/cds/chlorodb/fasta/NC_021101.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021102.fst.gz b/data/cds/chlorodb/fasta/NC_021102.fst.gz index c3c4307..1fc949d 100644 Binary files a/data/cds/chlorodb/fasta/NC_021102.fst.gz and b/data/cds/chlorodb/fasta/NC_021102.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021109.fst.gz b/data/cds/chlorodb/fasta/NC_021109.fst.gz index 484a54b..af6c7f7 100644 Binary files a/data/cds/chlorodb/fasta/NC_021109.fst.gz and b/data/cds/chlorodb/fasta/NC_021109.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021110.fst.gz b/data/cds/chlorodb/fasta/NC_021110.fst.gz index b7816b7..2a517de 100644 Binary files a/data/cds/chlorodb/fasta/NC_021110.fst.gz and b/data/cds/chlorodb/fasta/NC_021110.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021111.fst.gz b/data/cds/chlorodb/fasta/NC_021111.fst.gz index 843593a..9939920 100644 Binary files a/data/cds/chlorodb/fasta/NC_021111.fst.gz and b/data/cds/chlorodb/fasta/NC_021111.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021121.fst.gz b/data/cds/chlorodb/fasta/NC_021121.fst.gz index 3b80bec..742451f 100644 Binary files a/data/cds/chlorodb/fasta/NC_021121.fst.gz and b/data/cds/chlorodb/fasta/NC_021121.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021189.fst.gz b/data/cds/chlorodb/fasta/NC_021189.fst.gz index 04ed1e1..dcf2ce0 100644 Binary files a/data/cds/chlorodb/fasta/NC_021189.fst.gz and b/data/cds/chlorodb/fasta/NC_021189.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021372.fst.gz b/data/cds/chlorodb/fasta/NC_021372.fst.gz index 26b5417..056f394 100644 Binary files a/data/cds/chlorodb/fasta/NC_021372.fst.gz and b/data/cds/chlorodb/fasta/NC_021372.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021423.fst.gz b/data/cds/chlorodb/fasta/NC_021423.fst.gz index 09fb051..3c3e325 100644 Binary files a/data/cds/chlorodb/fasta/NC_021423.fst.gz and b/data/cds/chlorodb/fasta/NC_021423.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021425.fst.gz b/data/cds/chlorodb/fasta/NC_021425.fst.gz index 061be63..30ac4f4 100644 Binary files a/data/cds/chlorodb/fasta/NC_021425.fst.gz and b/data/cds/chlorodb/fasta/NC_021425.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021426.fst.gz b/data/cds/chlorodb/fasta/NC_021426.fst.gz index b130182..dd71e26 100644 Binary files a/data/cds/chlorodb/fasta/NC_021426.fst.gz and b/data/cds/chlorodb/fasta/NC_021426.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021429.fst.gz b/data/cds/chlorodb/fasta/NC_021429.fst.gz index c2e1f68..3fedc55 100644 Binary files a/data/cds/chlorodb/fasta/NC_021429.fst.gz and b/data/cds/chlorodb/fasta/NC_021429.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021430.fst.gz b/data/cds/chlorodb/fasta/NC_021430.fst.gz index 7ee6176..52ca2f5 100644 Binary files a/data/cds/chlorodb/fasta/NC_021430.fst.gz and b/data/cds/chlorodb/fasta/NC_021430.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021431.fst.gz b/data/cds/chlorodb/fasta/NC_021431.fst.gz index 6ac58fd..3a188e2 100644 Binary files a/data/cds/chlorodb/fasta/NC_021431.fst.gz and b/data/cds/chlorodb/fasta/NC_021431.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021432.fst.gz b/data/cds/chlorodb/fasta/NC_021432.fst.gz index d232cec..d7165fd 100644 Binary files a/data/cds/chlorodb/fasta/NC_021432.fst.gz and b/data/cds/chlorodb/fasta/NC_021432.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021433.fst.gz b/data/cds/chlorodb/fasta/NC_021433.fst.gz index 69680d0..8dd79ad 100644 Binary files a/data/cds/chlorodb/fasta/NC_021433.fst.gz and b/data/cds/chlorodb/fasta/NC_021433.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021437.fst.gz b/data/cds/chlorodb/fasta/NC_021437.fst.gz index 4526619..c29cb23 100644 Binary files a/data/cds/chlorodb/fasta/NC_021437.fst.gz and b/data/cds/chlorodb/fasta/NC_021437.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021438.fst.gz b/data/cds/chlorodb/fasta/NC_021438.fst.gz index 3fa7200..b91f274 100644 Binary files a/data/cds/chlorodb/fasta/NC_021438.fst.gz and b/data/cds/chlorodb/fasta/NC_021438.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021439.fst.gz b/data/cds/chlorodb/fasta/NC_021439.fst.gz index eee623b..46b76fb 100644 Binary files a/data/cds/chlorodb/fasta/NC_021439.fst.gz and b/data/cds/chlorodb/fasta/NC_021439.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021440.fst.gz b/data/cds/chlorodb/fasta/NC_021440.fst.gz index eccddd2..9a2cad7 100644 Binary files a/data/cds/chlorodb/fasta/NC_021440.fst.gz and b/data/cds/chlorodb/fasta/NC_021440.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021441.fst.gz b/data/cds/chlorodb/fasta/NC_021441.fst.gz index 6759439..1e5cb37 100644 Binary files a/data/cds/chlorodb/fasta/NC_021441.fst.gz and b/data/cds/chlorodb/fasta/NC_021441.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021449.fst.gz b/data/cds/chlorodb/fasta/NC_021449.fst.gz index e379763..f95a709 100644 Binary files a/data/cds/chlorodb/fasta/NC_021449.fst.gz and b/data/cds/chlorodb/fasta/NC_021449.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021455.fst.gz b/data/cds/chlorodb/fasta/NC_021455.fst.gz index f330be4..5746914 100644 Binary files a/data/cds/chlorodb/fasta/NC_021455.fst.gz and b/data/cds/chlorodb/fasta/NC_021455.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021456.fst.gz b/data/cds/chlorodb/fasta/NC_021456.fst.gz index 31ee501..3e8b337 100644 Binary files a/data/cds/chlorodb/fasta/NC_021456.fst.gz and b/data/cds/chlorodb/fasta/NC_021456.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021618.fst.gz b/data/cds/chlorodb/fasta/NC_021618.fst.gz index fae887c..8c91547 100644 Binary files a/data/cds/chlorodb/fasta/NC_021618.fst.gz and b/data/cds/chlorodb/fasta/NC_021618.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021636.fst.gz b/data/cds/chlorodb/fasta/NC_021636.fst.gz index 6fb42b2..22c6231 100644 Binary files a/data/cds/chlorodb/fasta/NC_021636.fst.gz and b/data/cds/chlorodb/fasta/NC_021636.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021637.fst.gz b/data/cds/chlorodb/fasta/NC_021637.fst.gz index e07ec6e..4b868ce 100644 Binary files a/data/cds/chlorodb/fasta/NC_021637.fst.gz and b/data/cds/chlorodb/fasta/NC_021637.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021645.fst.gz b/data/cds/chlorodb/fasta/NC_021645.fst.gz index e563917..d929767 100644 Binary files a/data/cds/chlorodb/fasta/NC_021645.fst.gz and b/data/cds/chlorodb/fasta/NC_021645.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021646.fst.gz b/data/cds/chlorodb/fasta/NC_021646.fst.gz index d227725..bb61e07 100644 Binary files a/data/cds/chlorodb/fasta/NC_021646.fst.gz and b/data/cds/chlorodb/fasta/NC_021646.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021647.fst.gz b/data/cds/chlorodb/fasta/NC_021647.fst.gz index 3942d48..c3ed366 100644 Binary files a/data/cds/chlorodb/fasta/NC_021647.fst.gz and b/data/cds/chlorodb/fasta/NC_021647.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021648.fst.gz b/data/cds/chlorodb/fasta/NC_021648.fst.gz index 62a3023..1d2faa9 100644 Binary files a/data/cds/chlorodb/fasta/NC_021648.fst.gz and b/data/cds/chlorodb/fasta/NC_021648.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021649.fst.gz b/data/cds/chlorodb/fasta/NC_021649.fst.gz index 867a887..2932cb0 100644 Binary files a/data/cds/chlorodb/fasta/NC_021649.fst.gz and b/data/cds/chlorodb/fasta/NC_021649.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021650.fst.gz b/data/cds/chlorodb/fasta/NC_021650.fst.gz index d3d38ab..b4fefb4 100644 Binary files a/data/cds/chlorodb/fasta/NC_021650.fst.gz and b/data/cds/chlorodb/fasta/NC_021650.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021760.fst.gz b/data/cds/chlorodb/fasta/NC_021760.fst.gz index e48f33c..d4147a9 100644 Binary files a/data/cds/chlorodb/fasta/NC_021760.fst.gz and b/data/cds/chlorodb/fasta/NC_021760.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021761.fst.gz b/data/cds/chlorodb/fasta/NC_021761.fst.gz index ae06c97..81525c9 100644 Binary files a/data/cds/chlorodb/fasta/NC_021761.fst.gz and b/data/cds/chlorodb/fasta/NC_021761.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021762.fst.gz b/data/cds/chlorodb/fasta/NC_021762.fst.gz index 7bb99d3..d9fd3c7 100644 Binary files a/data/cds/chlorodb/fasta/NC_021762.fst.gz and b/data/cds/chlorodb/fasta/NC_021762.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_021936.fst.gz b/data/cds/chlorodb/fasta/NC_021936.fst.gz index 03c43ab..1504926 100644 Binary files a/data/cds/chlorodb/fasta/NC_021936.fst.gz and b/data/cds/chlorodb/fasta/NC_021936.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022133.fst.gz b/data/cds/chlorodb/fasta/NC_022133.fst.gz index 196739d..86c43ac 100644 Binary files a/data/cds/chlorodb/fasta/NC_022133.fst.gz and b/data/cds/chlorodb/fasta/NC_022133.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022135.fst.gz b/data/cds/chlorodb/fasta/NC_022135.fst.gz index 9aefc19..80e3dae 100644 Binary files a/data/cds/chlorodb/fasta/NC_022135.fst.gz and b/data/cds/chlorodb/fasta/NC_022135.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022136.fst.gz b/data/cds/chlorodb/fasta/NC_022136.fst.gz index 425942d..2618b83 100644 Binary files a/data/cds/chlorodb/fasta/NC_022136.fst.gz and b/data/cds/chlorodb/fasta/NC_022136.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022137.fst.gz b/data/cds/chlorodb/fasta/NC_022137.fst.gz index 2c7e267..d15aa1b 100644 Binary files a/data/cds/chlorodb/fasta/NC_022137.fst.gz and b/data/cds/chlorodb/fasta/NC_022137.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022259.fst.gz b/data/cds/chlorodb/fasta/NC_022259.fst.gz index b958060..d6536b5 100644 Binary files a/data/cds/chlorodb/fasta/NC_022259.fst.gz and b/data/cds/chlorodb/fasta/NC_022259.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022260.fst.gz b/data/cds/chlorodb/fasta/NC_022260.fst.gz index 9bda640..f249ef1 100644 Binary files a/data/cds/chlorodb/fasta/NC_022260.fst.gz and b/data/cds/chlorodb/fasta/NC_022260.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022261.fst.gz b/data/cds/chlorodb/fasta/NC_022261.fst.gz index c585674..ee6fcdc 100644 Binary files a/data/cds/chlorodb/fasta/NC_022261.fst.gz and b/data/cds/chlorodb/fasta/NC_022261.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022262.fst.gz b/data/cds/chlorodb/fasta/NC_022262.fst.gz index 89faf34..69ad93a 100644 Binary files a/data/cds/chlorodb/fasta/NC_022262.fst.gz and b/data/cds/chlorodb/fasta/NC_022262.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022263.fst.gz b/data/cds/chlorodb/fasta/NC_022263.fst.gz index c12ed34..a933fba 100644 Binary files a/data/cds/chlorodb/fasta/NC_022263.fst.gz and b/data/cds/chlorodb/fasta/NC_022263.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022264.fst.gz b/data/cds/chlorodb/fasta/NC_022264.fst.gz index f6386aa..1525cdc 100644 Binary files a/data/cds/chlorodb/fasta/NC_022264.fst.gz and b/data/cds/chlorodb/fasta/NC_022264.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022378.fst.gz b/data/cds/chlorodb/fasta/NC_022378.fst.gz index de48b27..e9a692f 100644 Binary files a/data/cds/chlorodb/fasta/NC_022378.fst.gz and b/data/cds/chlorodb/fasta/NC_022378.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022379.fst.gz b/data/cds/chlorodb/fasta/NC_022379.fst.gz index cfbac98..0cc2252 100644 Binary files a/data/cds/chlorodb/fasta/NC_022379.fst.gz and b/data/cds/chlorodb/fasta/NC_022379.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022380.fst.gz b/data/cds/chlorodb/fasta/NC_022380.fst.gz index 3b2301f..b654599 100644 Binary files a/data/cds/chlorodb/fasta/NC_022380.fst.gz and b/data/cds/chlorodb/fasta/NC_022380.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022381.fst.gz b/data/cds/chlorodb/fasta/NC_022381.fst.gz index f64ead7..02b528b 100644 Binary files a/data/cds/chlorodb/fasta/NC_022381.fst.gz and b/data/cds/chlorodb/fasta/NC_022381.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022382.fst.gz b/data/cds/chlorodb/fasta/NC_022382.fst.gz index 9f825aa..5902627 100644 Binary files a/data/cds/chlorodb/fasta/NC_022382.fst.gz and b/data/cds/chlorodb/fasta/NC_022382.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022383.fst.gz b/data/cds/chlorodb/fasta/NC_022383.fst.gz index 51ec8f8..63b66e4 100644 Binary files a/data/cds/chlorodb/fasta/NC_022383.fst.gz and b/data/cds/chlorodb/fasta/NC_022383.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022384.fst.gz b/data/cds/chlorodb/fasta/NC_022384.fst.gz index 1a78b59..0d19974 100644 Binary files a/data/cds/chlorodb/fasta/NC_022384.fst.gz and b/data/cds/chlorodb/fasta/NC_022384.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022385.fst.gz b/data/cds/chlorodb/fasta/NC_022385.fst.gz index 80a575c..ed12fbf 100644 Binary files a/data/cds/chlorodb/fasta/NC_022385.fst.gz and b/data/cds/chlorodb/fasta/NC_022385.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022386.fst.gz b/data/cds/chlorodb/fasta/NC_022386.fst.gz index 985a727..f09f245 100644 Binary files a/data/cds/chlorodb/fasta/NC_022386.fst.gz and b/data/cds/chlorodb/fasta/NC_022386.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022387.fst.gz b/data/cds/chlorodb/fasta/NC_022387.fst.gz index 5b5db02..9f060c2 100644 Binary files a/data/cds/chlorodb/fasta/NC_022387.fst.gz and b/data/cds/chlorodb/fasta/NC_022387.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022388.fst.gz b/data/cds/chlorodb/fasta/NC_022388.fst.gz index 655d129..b1949dc 100644 Binary files a/data/cds/chlorodb/fasta/NC_022388.fst.gz and b/data/cds/chlorodb/fasta/NC_022388.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022389.fst.gz b/data/cds/chlorodb/fasta/NC_022389.fst.gz index 4067297..d0c5033 100644 Binary files a/data/cds/chlorodb/fasta/NC_022389.fst.gz and b/data/cds/chlorodb/fasta/NC_022389.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022390.fst.gz b/data/cds/chlorodb/fasta/NC_022390.fst.gz index 6d4c821..909daee 100644 Binary files a/data/cds/chlorodb/fasta/NC_022390.fst.gz and b/data/cds/chlorodb/fasta/NC_022390.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022391.fst.gz b/data/cds/chlorodb/fasta/NC_022391.fst.gz index 7a8195b..7b1e393 100644 Binary files a/data/cds/chlorodb/fasta/NC_022391.fst.gz and b/data/cds/chlorodb/fasta/NC_022391.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022392.fst.gz b/data/cds/chlorodb/fasta/NC_022392.fst.gz index 44c0f52..794bbe0 100644 Binary files a/data/cds/chlorodb/fasta/NC_022392.fst.gz and b/data/cds/chlorodb/fasta/NC_022392.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022393.fst.gz b/data/cds/chlorodb/fasta/NC_022393.fst.gz index 4910a55..99e6500 100644 Binary files a/data/cds/chlorodb/fasta/NC_022393.fst.gz and b/data/cds/chlorodb/fasta/NC_022393.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022394.fst.gz b/data/cds/chlorodb/fasta/NC_022394.fst.gz index 185aaa2..a91c425 100644 Binary files a/data/cds/chlorodb/fasta/NC_022394.fst.gz and b/data/cds/chlorodb/fasta/NC_022394.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022395.fst.gz b/data/cds/chlorodb/fasta/NC_022395.fst.gz index 2c01f18..1931179 100644 Binary files a/data/cds/chlorodb/fasta/NC_022395.fst.gz and b/data/cds/chlorodb/fasta/NC_022395.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022396.fst.gz b/data/cds/chlorodb/fasta/NC_022396.fst.gz index b81bbef..e2a71fe 100644 Binary files a/data/cds/chlorodb/fasta/NC_022396.fst.gz and b/data/cds/chlorodb/fasta/NC_022396.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022397.fst.gz b/data/cds/chlorodb/fasta/NC_022397.fst.gz index 41ae84b..30e9d7a 100644 Binary files a/data/cds/chlorodb/fasta/NC_022397.fst.gz and b/data/cds/chlorodb/fasta/NC_022397.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022398.fst.gz b/data/cds/chlorodb/fasta/NC_022398.fst.gz index 01c449a..c63c321 100644 Binary files a/data/cds/chlorodb/fasta/NC_022398.fst.gz and b/data/cds/chlorodb/fasta/NC_022398.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022399.fst.gz b/data/cds/chlorodb/fasta/NC_022399.fst.gz index f0e1c2d..dd94d6c 100644 Binary files a/data/cds/chlorodb/fasta/NC_022399.fst.gz and b/data/cds/chlorodb/fasta/NC_022399.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022400.fst.gz b/data/cds/chlorodb/fasta/NC_022400.fst.gz index d62c740..9e4fa18 100644 Binary files a/data/cds/chlorodb/fasta/NC_022400.fst.gz and b/data/cds/chlorodb/fasta/NC_022400.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022401.fst.gz b/data/cds/chlorodb/fasta/NC_022401.fst.gz index 5140ee9..98d0365 100644 Binary files a/data/cds/chlorodb/fasta/NC_022401.fst.gz and b/data/cds/chlorodb/fasta/NC_022401.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022402.fst.gz b/data/cds/chlorodb/fasta/NC_022402.fst.gz index c80c326..0a7021c 100644 Binary files a/data/cds/chlorodb/fasta/NC_022402.fst.gz and b/data/cds/chlorodb/fasta/NC_022402.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022403.fst.gz b/data/cds/chlorodb/fasta/NC_022403.fst.gz index 6d05ebf..45f267d 100644 Binary files a/data/cds/chlorodb/fasta/NC_022403.fst.gz and b/data/cds/chlorodb/fasta/NC_022403.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022404.fst.gz b/data/cds/chlorodb/fasta/NC_022404.fst.gz index 9b291b0..83541ad 100644 Binary files a/data/cds/chlorodb/fasta/NC_022404.fst.gz and b/data/cds/chlorodb/fasta/NC_022404.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022405.fst.gz b/data/cds/chlorodb/fasta/NC_022405.fst.gz index 78503b1..37ce68c 100644 Binary files a/data/cds/chlorodb/fasta/NC_022405.fst.gz and b/data/cds/chlorodb/fasta/NC_022405.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022406.fst.gz b/data/cds/chlorodb/fasta/NC_022406.fst.gz index f641bf8..0afd032 100644 Binary files a/data/cds/chlorodb/fasta/NC_022406.fst.gz and b/data/cds/chlorodb/fasta/NC_022406.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022407.fst.gz b/data/cds/chlorodb/fasta/NC_022407.fst.gz index 051db74..0672aad 100644 Binary files a/data/cds/chlorodb/fasta/NC_022407.fst.gz and b/data/cds/chlorodb/fasta/NC_022407.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022408.fst.gz b/data/cds/chlorodb/fasta/NC_022408.fst.gz index 687c59f..a91c20b 100644 Binary files a/data/cds/chlorodb/fasta/NC_022408.fst.gz and b/data/cds/chlorodb/fasta/NC_022408.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022409.fst.gz b/data/cds/chlorodb/fasta/NC_022409.fst.gz index 9ffbb2a..2aa312a 100644 Binary files a/data/cds/chlorodb/fasta/NC_022409.fst.gz and b/data/cds/chlorodb/fasta/NC_022409.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022410.fst.gz b/data/cds/chlorodb/fasta/NC_022410.fst.gz index 86c58fe..515f180 100644 Binary files a/data/cds/chlorodb/fasta/NC_022410.fst.gz and b/data/cds/chlorodb/fasta/NC_022410.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022411.fst.gz b/data/cds/chlorodb/fasta/NC_022411.fst.gz index 2ab37a3..3cfaf3e 100644 Binary files a/data/cds/chlorodb/fasta/NC_022411.fst.gz and b/data/cds/chlorodb/fasta/NC_022411.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022412.fst.gz b/data/cds/chlorodb/fasta/NC_022412.fst.gz index 5b9a175..0d2470b 100644 Binary files a/data/cds/chlorodb/fasta/NC_022412.fst.gz and b/data/cds/chlorodb/fasta/NC_022412.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022413.fst.gz b/data/cds/chlorodb/fasta/NC_022413.fst.gz index f62b8a7..b6cfb22 100644 Binary files a/data/cds/chlorodb/fasta/NC_022413.fst.gz and b/data/cds/chlorodb/fasta/NC_022413.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022414.fst.gz b/data/cds/chlorodb/fasta/NC_022414.fst.gz index 6f3574d..2d9068b 100644 Binary files a/data/cds/chlorodb/fasta/NC_022414.fst.gz and b/data/cds/chlorodb/fasta/NC_022414.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022417.fst.gz b/data/cds/chlorodb/fasta/NC_022417.fst.gz index 8099d2e..e8c3b4f 100644 Binary files a/data/cds/chlorodb/fasta/NC_022417.fst.gz and b/data/cds/chlorodb/fasta/NC_022417.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022431.fst.gz b/data/cds/chlorodb/fasta/NC_022431.fst.gz index 1e6afa8..6c24357 100644 Binary files a/data/cds/chlorodb/fasta/NC_022431.fst.gz and b/data/cds/chlorodb/fasta/NC_022431.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022432.fst.gz b/data/cds/chlorodb/fasta/NC_022432.fst.gz index fb75bf7..eea5a6a 100644 Binary files a/data/cds/chlorodb/fasta/NC_022432.fst.gz and b/data/cds/chlorodb/fasta/NC_022432.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022451.fst.gz b/data/cds/chlorodb/fasta/NC_022451.fst.gz index 69fbac3..8737b76 100644 Binary files a/data/cds/chlorodb/fasta/NC_022451.fst.gz and b/data/cds/chlorodb/fasta/NC_022451.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022457.fst.gz b/data/cds/chlorodb/fasta/NC_022457.fst.gz index 0b410f8..bb40550 100644 Binary files a/data/cds/chlorodb/fasta/NC_022457.fst.gz and b/data/cds/chlorodb/fasta/NC_022457.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022459.fst.gz b/data/cds/chlorodb/fasta/NC_022459.fst.gz index 431553e..7601113 100644 Binary files a/data/cds/chlorodb/fasta/NC_022459.fst.gz and b/data/cds/chlorodb/fasta/NC_022459.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022460.fst.gz b/data/cds/chlorodb/fasta/NC_022460.fst.gz index 86e6816..2bbd1d3 100644 Binary files a/data/cds/chlorodb/fasta/NC_022460.fst.gz and b/data/cds/chlorodb/fasta/NC_022460.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022461.fst.gz b/data/cds/chlorodb/fasta/NC_022461.fst.gz index 05ba853..f19b6f6 100644 Binary files a/data/cds/chlorodb/fasta/NC_022461.fst.gz and b/data/cds/chlorodb/fasta/NC_022461.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022462.fst.gz b/data/cds/chlorodb/fasta/NC_022462.fst.gz index f94c449..8a899e4 100644 Binary files a/data/cds/chlorodb/fasta/NC_022462.fst.gz and b/data/cds/chlorodb/fasta/NC_022462.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022463.fst.gz b/data/cds/chlorodb/fasta/NC_022463.fst.gz index 408d054..9066dfe 100644 Binary files a/data/cds/chlorodb/fasta/NC_022463.fst.gz and b/data/cds/chlorodb/fasta/NC_022463.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022668.fst.gz b/data/cds/chlorodb/fasta/NC_022668.fst.gz index b65443d..7bc3e46 100644 Binary files a/data/cds/chlorodb/fasta/NC_022668.fst.gz and b/data/cds/chlorodb/fasta/NC_022668.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022715.fst.gz b/data/cds/chlorodb/fasta/NC_022715.fst.gz index 31f67af..64a4aa9 100644 Binary files a/data/cds/chlorodb/fasta/NC_022715.fst.gz and b/data/cds/chlorodb/fasta/NC_022715.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022810.fst.gz b/data/cds/chlorodb/fasta/NC_022810.fst.gz index 41fdd21..eeacbe6 100644 Binary files a/data/cds/chlorodb/fasta/NC_022810.fst.gz and b/data/cds/chlorodb/fasta/NC_022810.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022811.fst.gz b/data/cds/chlorodb/fasta/NC_022811.fst.gz index de3b202..afcb33e 100644 Binary files a/data/cds/chlorodb/fasta/NC_022811.fst.gz and b/data/cds/chlorodb/fasta/NC_022811.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022812.fst.gz b/data/cds/chlorodb/fasta/NC_022812.fst.gz index 2f56d44..5de6edd 100644 Binary files a/data/cds/chlorodb/fasta/NC_022812.fst.gz and b/data/cds/chlorodb/fasta/NC_022812.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022813.fst.gz b/data/cds/chlorodb/fasta/NC_022813.fst.gz index 8630fb9..5e0caea 100644 Binary files a/data/cds/chlorodb/fasta/NC_022813.fst.gz and b/data/cds/chlorodb/fasta/NC_022813.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022814.fst.gz b/data/cds/chlorodb/fasta/NC_022814.fst.gz index f66f26a..9bc6df7 100644 Binary files a/data/cds/chlorodb/fasta/NC_022814.fst.gz and b/data/cds/chlorodb/fasta/NC_022814.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022850.fst.gz b/data/cds/chlorodb/fasta/NC_022850.fst.gz index 9df4e3a..643add6 100644 Binary files a/data/cds/chlorodb/fasta/NC_022850.fst.gz and b/data/cds/chlorodb/fasta/NC_022850.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022859.fst.gz b/data/cds/chlorodb/fasta/NC_022859.fst.gz index 25dbf63..ed0469e 100644 Binary files a/data/cds/chlorodb/fasta/NC_022859.fst.gz and b/data/cds/chlorodb/fasta/NC_022859.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022868.fst.gz b/data/cds/chlorodb/fasta/NC_022868.fst.gz index ce1f670..bd06f37 100644 Binary files a/data/cds/chlorodb/fasta/NC_022868.fst.gz and b/data/cds/chlorodb/fasta/NC_022868.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022926.fst.gz b/data/cds/chlorodb/fasta/NC_022926.fst.gz index 28368eb..de87594 100644 Binary files a/data/cds/chlorodb/fasta/NC_022926.fst.gz and b/data/cds/chlorodb/fasta/NC_022926.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022927.fst.gz b/data/cds/chlorodb/fasta/NC_022927.fst.gz index 5fdf25d..e01c3b2 100644 Binary files a/data/cds/chlorodb/fasta/NC_022927.fst.gz and b/data/cds/chlorodb/fasta/NC_022927.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022928.fst.gz b/data/cds/chlorodb/fasta/NC_022928.fst.gz index 7a0352b..b518b13 100644 Binary files a/data/cds/chlorodb/fasta/NC_022928.fst.gz and b/data/cds/chlorodb/fasta/NC_022928.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_022958.fst.gz b/data/cds/chlorodb/fasta/NC_022958.fst.gz index 47295e1..5eb5d74 100644 Binary files a/data/cds/chlorodb/fasta/NC_022958.fst.gz and b/data/cds/chlorodb/fasta/NC_022958.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023084.fst.gz b/data/cds/chlorodb/fasta/NC_023084.fst.gz index be29e5d..fefc81a 100644 Binary files a/data/cds/chlorodb/fasta/NC_023084.fst.gz and b/data/cds/chlorodb/fasta/NC_023084.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023085.fst.gz b/data/cds/chlorodb/fasta/NC_023085.fst.gz index 28f13ab..cd31af6 100644 Binary files a/data/cds/chlorodb/fasta/NC_023085.fst.gz and b/data/cds/chlorodb/fasta/NC_023085.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023086.fst.gz b/data/cds/chlorodb/fasta/NC_023086.fst.gz index 737f85a..65c1b97 100644 Binary files a/data/cds/chlorodb/fasta/NC_023086.fst.gz and b/data/cds/chlorodb/fasta/NC_023086.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023090.fst.gz b/data/cds/chlorodb/fasta/NC_023090.fst.gz index fe225c1..b7de5ef 100644 Binary files a/data/cds/chlorodb/fasta/NC_023090.fst.gz and b/data/cds/chlorodb/fasta/NC_023090.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023092.fst.gz b/data/cds/chlorodb/fasta/NC_023092.fst.gz index 58da494..b65077e 100644 Binary files a/data/cds/chlorodb/fasta/NC_023092.fst.gz and b/data/cds/chlorodb/fasta/NC_023092.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023096.fst.gz b/data/cds/chlorodb/fasta/NC_023096.fst.gz index 26c1d02..f7d4474 100644 Binary files a/data/cds/chlorodb/fasta/NC_023096.fst.gz and b/data/cds/chlorodb/fasta/NC_023096.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023097.fst.gz b/data/cds/chlorodb/fasta/NC_023097.fst.gz index ff23caf..68d9dfb 100644 Binary files a/data/cds/chlorodb/fasta/NC_023097.fst.gz and b/data/cds/chlorodb/fasta/NC_023097.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023102.fst.gz b/data/cds/chlorodb/fasta/NC_023102.fst.gz index ec4ae59..cb0e4af 100644 Binary files a/data/cds/chlorodb/fasta/NC_023102.fst.gz and b/data/cds/chlorodb/fasta/NC_023102.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023107.fst.gz b/data/cds/chlorodb/fasta/NC_023107.fst.gz index d441ae5..9016bec 100644 Binary files a/data/cds/chlorodb/fasta/NC_023107.fst.gz and b/data/cds/chlorodb/fasta/NC_023107.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023108.fst.gz b/data/cds/chlorodb/fasta/NC_023108.fst.gz index 87cb10f..afdaa24 100644 Binary files a/data/cds/chlorodb/fasta/NC_023108.fst.gz and b/data/cds/chlorodb/fasta/NC_023108.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023109.fst.gz b/data/cds/chlorodb/fasta/NC_023109.fst.gz index 73f714a..abc1610 100644 Binary files a/data/cds/chlorodb/fasta/NC_023109.fst.gz and b/data/cds/chlorodb/fasta/NC_023109.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023110.fst.gz b/data/cds/chlorodb/fasta/NC_023110.fst.gz index a076581..4e52577 100644 Binary files a/data/cds/chlorodb/fasta/NC_023110.fst.gz and b/data/cds/chlorodb/fasta/NC_023110.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023111.fst.gz b/data/cds/chlorodb/fasta/NC_023111.fst.gz index 63fa296..55caa6b 100644 Binary files a/data/cds/chlorodb/fasta/NC_023111.fst.gz and b/data/cds/chlorodb/fasta/NC_023111.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023112.fst.gz b/data/cds/chlorodb/fasta/NC_023112.fst.gz index 8163594..12c0e71 100644 Binary files a/data/cds/chlorodb/fasta/NC_023112.fst.gz and b/data/cds/chlorodb/fasta/NC_023112.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023113.fst.gz b/data/cds/chlorodb/fasta/NC_023113.fst.gz index d55e548..c9bdfcb 100644 Binary files a/data/cds/chlorodb/fasta/NC_023113.fst.gz and b/data/cds/chlorodb/fasta/NC_023113.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023114.fst.gz b/data/cds/chlorodb/fasta/NC_023114.fst.gz index 893c60e..2f94183 100644 Binary files a/data/cds/chlorodb/fasta/NC_023114.fst.gz and b/data/cds/chlorodb/fasta/NC_023114.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023115.fst.gz b/data/cds/chlorodb/fasta/NC_023115.fst.gz index 5cd6235..a875dd1 100644 Binary files a/data/cds/chlorodb/fasta/NC_023115.fst.gz and b/data/cds/chlorodb/fasta/NC_023115.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023119.fst.gz b/data/cds/chlorodb/fasta/NC_023119.fst.gz index 578e3eb..019d357 100644 Binary files a/data/cds/chlorodb/fasta/NC_023119.fst.gz and b/data/cds/chlorodb/fasta/NC_023119.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023120.fst.gz b/data/cds/chlorodb/fasta/NC_023120.fst.gz index 91a522e..0d36ba2 100644 Binary files a/data/cds/chlorodb/fasta/NC_023120.fst.gz and b/data/cds/chlorodb/fasta/NC_023120.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023121.fst.gz b/data/cds/chlorodb/fasta/NC_023121.fst.gz index 9d991d1..e14a6ee 100644 Binary files a/data/cds/chlorodb/fasta/NC_023121.fst.gz and b/data/cds/chlorodb/fasta/NC_023121.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023130.fst.gz b/data/cds/chlorodb/fasta/NC_023130.fst.gz index 7f4077c..5b091dc 100644 Binary files a/data/cds/chlorodb/fasta/NC_023130.fst.gz and b/data/cds/chlorodb/fasta/NC_023130.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023131.fst.gz b/data/cds/chlorodb/fasta/NC_023131.fst.gz index 04e698f..1ade3e6 100644 Binary files a/data/cds/chlorodb/fasta/NC_023131.fst.gz and b/data/cds/chlorodb/fasta/NC_023131.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023132.fst.gz b/data/cds/chlorodb/fasta/NC_023132.fst.gz index cd29a89..25e4924 100644 Binary files a/data/cds/chlorodb/fasta/NC_023132.fst.gz and b/data/cds/chlorodb/fasta/NC_023132.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023133.fst.gz b/data/cds/chlorodb/fasta/NC_023133.fst.gz index e891366..737b2cf 100644 Binary files a/data/cds/chlorodb/fasta/NC_023133.fst.gz and b/data/cds/chlorodb/fasta/NC_023133.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023213.fst.gz b/data/cds/chlorodb/fasta/NC_023213.fst.gz index 9c90761..f536ec2 100644 Binary files a/data/cds/chlorodb/fasta/NC_023213.fst.gz and b/data/cds/chlorodb/fasta/NC_023213.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023214.fst.gz b/data/cds/chlorodb/fasta/NC_023214.fst.gz index 7c4eb58..2f45e15 100644 Binary files a/data/cds/chlorodb/fasta/NC_023214.fst.gz and b/data/cds/chlorodb/fasta/NC_023214.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023215.fst.gz b/data/cds/chlorodb/fasta/NC_023215.fst.gz index ddd0e79..9e8c346 100644 Binary files a/data/cds/chlorodb/fasta/NC_023215.fst.gz and b/data/cds/chlorodb/fasta/NC_023215.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023216.fst.gz b/data/cds/chlorodb/fasta/NC_023216.fst.gz index 20c2157..7b9ce8b 100644 Binary files a/data/cds/chlorodb/fasta/NC_023216.fst.gz and b/data/cds/chlorodb/fasta/NC_023216.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023217.fst.gz b/data/cds/chlorodb/fasta/NC_023217.fst.gz index f32cebf..32d96e7 100644 Binary files a/data/cds/chlorodb/fasta/NC_023217.fst.gz and b/data/cds/chlorodb/fasta/NC_023217.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023218.fst.gz b/data/cds/chlorodb/fasta/NC_023218.fst.gz index aff7926..b34eda2 100644 Binary files a/data/cds/chlorodb/fasta/NC_023218.fst.gz and b/data/cds/chlorodb/fasta/NC_023218.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023245.fst.gz b/data/cds/chlorodb/fasta/NC_023245.fst.gz index af7f62d..7ea46f7 100644 Binary files a/data/cds/chlorodb/fasta/NC_023245.fst.gz and b/data/cds/chlorodb/fasta/NC_023245.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023247.fst.gz b/data/cds/chlorodb/fasta/NC_023247.fst.gz index 31854f4..eaefe27 100644 Binary files a/data/cds/chlorodb/fasta/NC_023247.fst.gz and b/data/cds/chlorodb/fasta/NC_023247.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023256.fst.gz b/data/cds/chlorodb/fasta/NC_023256.fst.gz index 51c8b6b..b6f663a 100644 Binary files a/data/cds/chlorodb/fasta/NC_023256.fst.gz and b/data/cds/chlorodb/fasta/NC_023256.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023259.fst.gz b/data/cds/chlorodb/fasta/NC_023259.fst.gz index a2f0e32..19871d7 100644 Binary files a/data/cds/chlorodb/fasta/NC_023259.fst.gz and b/data/cds/chlorodb/fasta/NC_023259.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023260.fst.gz b/data/cds/chlorodb/fasta/NC_023260.fst.gz index e21ff76..9a1046a 100644 Binary files a/data/cds/chlorodb/fasta/NC_023260.fst.gz and b/data/cds/chlorodb/fasta/NC_023260.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023261.fst.gz b/data/cds/chlorodb/fasta/NC_023261.fst.gz index 3da52d9..a8aae66 100644 Binary files a/data/cds/chlorodb/fasta/NC_023261.fst.gz and b/data/cds/chlorodb/fasta/NC_023261.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023356.fst.gz b/data/cds/chlorodb/fasta/NC_023356.fst.gz index e7e6ca5..ab0563e 100644 Binary files a/data/cds/chlorodb/fasta/NC_023356.fst.gz and b/data/cds/chlorodb/fasta/NC_023356.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023357.fst.gz b/data/cds/chlorodb/fasta/NC_023357.fst.gz index 945eb12..297fc4d 100644 Binary files a/data/cds/chlorodb/fasta/NC_023357.fst.gz and b/data/cds/chlorodb/fasta/NC_023357.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023358.fst.gz b/data/cds/chlorodb/fasta/NC_023358.fst.gz index 4952521..a6a026c 100644 Binary files a/data/cds/chlorodb/fasta/NC_023358.fst.gz and b/data/cds/chlorodb/fasta/NC_023358.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023359.fst.gz b/data/cds/chlorodb/fasta/NC_023359.fst.gz index 0b5340c..7082e25 100644 Binary files a/data/cds/chlorodb/fasta/NC_023359.fst.gz and b/data/cds/chlorodb/fasta/NC_023359.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023360.fst.gz b/data/cds/chlorodb/fasta/NC_023360.fst.gz index cd23b21..4131bae 100644 Binary files a/data/cds/chlorodb/fasta/NC_023360.fst.gz and b/data/cds/chlorodb/fasta/NC_023360.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023367.fst.gz b/data/cds/chlorodb/fasta/NC_023367.fst.gz index db8bdba..d9d7f24 100644 Binary files a/data/cds/chlorodb/fasta/NC_023367.fst.gz and b/data/cds/chlorodb/fasta/NC_023367.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023449.fst.gz b/data/cds/chlorodb/fasta/NC_023449.fst.gz index 4a8a0f7..ca176f2 100644 Binary files a/data/cds/chlorodb/fasta/NC_023449.fst.gz and b/data/cds/chlorodb/fasta/NC_023449.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023463.fst.gz b/data/cds/chlorodb/fasta/NC_023463.fst.gz index a42e58d..194372f 100644 Binary files a/data/cds/chlorodb/fasta/NC_023463.fst.gz and b/data/cds/chlorodb/fasta/NC_023463.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023464.fst.gz b/data/cds/chlorodb/fasta/NC_023464.fst.gz index 3012d9c..aa6a530 100644 Binary files a/data/cds/chlorodb/fasta/NC_023464.fst.gz and b/data/cds/chlorodb/fasta/NC_023464.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023465.fst.gz b/data/cds/chlorodb/fasta/NC_023465.fst.gz index 0507a8b..40f6507 100644 Binary files a/data/cds/chlorodb/fasta/NC_023465.fst.gz and b/data/cds/chlorodb/fasta/NC_023465.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023533.fst.gz b/data/cds/chlorodb/fasta/NC_023533.fst.gz index 2993d81..b23be5d 100644 Binary files a/data/cds/chlorodb/fasta/NC_023533.fst.gz and b/data/cds/chlorodb/fasta/NC_023533.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023544.fst.gz b/data/cds/chlorodb/fasta/NC_023544.fst.gz index e8a821f..d726e0f 100644 Binary files a/data/cds/chlorodb/fasta/NC_023544.fst.gz and b/data/cds/chlorodb/fasta/NC_023544.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023775.fst.gz b/data/cds/chlorodb/fasta/NC_023775.fst.gz index 387974e..5874dc7 100644 Binary files a/data/cds/chlorodb/fasta/NC_023775.fst.gz and b/data/cds/chlorodb/fasta/NC_023775.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023785.fst.gz b/data/cds/chlorodb/fasta/NC_023785.fst.gz index e06219b..659c894 100644 Binary files a/data/cds/chlorodb/fasta/NC_023785.fst.gz and b/data/cds/chlorodb/fasta/NC_023785.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023790.fst.gz b/data/cds/chlorodb/fasta/NC_023790.fst.gz index d272ebf..788a936 100644 Binary files a/data/cds/chlorodb/fasta/NC_023790.fst.gz and b/data/cds/chlorodb/fasta/NC_023790.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023792.fst.gz b/data/cds/chlorodb/fasta/NC_023792.fst.gz index 021b807..422d175 100644 Binary files a/data/cds/chlorodb/fasta/NC_023792.fst.gz and b/data/cds/chlorodb/fasta/NC_023792.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023798.fst.gz b/data/cds/chlorodb/fasta/NC_023798.fst.gz index ecc98fa..1914d90 100644 Binary files a/data/cds/chlorodb/fasta/NC_023798.fst.gz and b/data/cds/chlorodb/fasta/NC_023798.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023800.fst.gz b/data/cds/chlorodb/fasta/NC_023800.fst.gz index fd4c0e5..1cf62bb 100644 Binary files a/data/cds/chlorodb/fasta/NC_023800.fst.gz and b/data/cds/chlorodb/fasta/NC_023800.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023801.fst.gz b/data/cds/chlorodb/fasta/NC_023801.fst.gz index 0bdf9fd..260fc2e 100644 Binary files a/data/cds/chlorodb/fasta/NC_023801.fst.gz and b/data/cds/chlorodb/fasta/NC_023801.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023805.fst.gz b/data/cds/chlorodb/fasta/NC_023805.fst.gz index b6b30ab..ae0b617 100644 Binary files a/data/cds/chlorodb/fasta/NC_023805.fst.gz and b/data/cds/chlorodb/fasta/NC_023805.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023833.fst.gz b/data/cds/chlorodb/fasta/NC_023833.fst.gz index c78a1b5..b5f72a3 100644 Binary files a/data/cds/chlorodb/fasta/NC_023833.fst.gz and b/data/cds/chlorodb/fasta/NC_023833.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023835.fst.gz b/data/cds/chlorodb/fasta/NC_023835.fst.gz index d1aa4b8..4c81826 100644 Binary files a/data/cds/chlorodb/fasta/NC_023835.fst.gz and b/data/cds/chlorodb/fasta/NC_023835.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023934.fst.gz b/data/cds/chlorodb/fasta/NC_023934.fst.gz index 9ab6aef..75d57fe 100644 Binary files a/data/cds/chlorodb/fasta/NC_023934.fst.gz and b/data/cds/chlorodb/fasta/NC_023934.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023935.fst.gz b/data/cds/chlorodb/fasta/NC_023935.fst.gz index f4c887d..6547c1d 100644 Binary files a/data/cds/chlorodb/fasta/NC_023935.fst.gz and b/data/cds/chlorodb/fasta/NC_023935.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023956.fst.gz b/data/cds/chlorodb/fasta/NC_023956.fst.gz index 5828365..6a3b496 100644 Binary files a/data/cds/chlorodb/fasta/NC_023956.fst.gz and b/data/cds/chlorodb/fasta/NC_023956.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_023959.fst.gz b/data/cds/chlorodb/fasta/NC_023959.fst.gz index 047f823..6cca8db 100644 Binary files a/data/cds/chlorodb/fasta/NC_023959.fst.gz and b/data/cds/chlorodb/fasta/NC_023959.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_024019.fst.gz b/data/cds/chlorodb/fasta/NC_024019.fst.gz index ca1ab81..9ae0025 100644 Binary files a/data/cds/chlorodb/fasta/NC_024019.fst.gz and b/data/cds/chlorodb/fasta/NC_024019.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_024021.fst.gz b/data/cds/chlorodb/fasta/NC_024021.fst.gz index 47cd104..2c77720 100644 Binary files a/data/cds/chlorodb/fasta/NC_024021.fst.gz and b/data/cds/chlorodb/fasta/NC_024021.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_024022.fst.gz b/data/cds/chlorodb/fasta/NC_024022.fst.gz index 2b0d96c..a99b2a5 100644 Binary files a/data/cds/chlorodb/fasta/NC_024022.fst.gz and b/data/cds/chlorodb/fasta/NC_024022.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_024023.fst.gz b/data/cds/chlorodb/fasta/NC_024023.fst.gz index 0ed65c9..00fdfb7 100644 Binary files a/data/cds/chlorodb/fasta/NC_024023.fst.gz and b/data/cds/chlorodb/fasta/NC_024023.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_024024.fst.gz b/data/cds/chlorodb/fasta/NC_024024.fst.gz index 5b2c09f..79fa8c5 100644 Binary files a/data/cds/chlorodb/fasta/NC_024024.fst.gz and b/data/cds/chlorodb/fasta/NC_024024.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_024027.fst.gz b/data/cds/chlorodb/fasta/NC_024027.fst.gz index 42a80b0..d005acc 100644 Binary files a/data/cds/chlorodb/fasta/NC_024027.fst.gz and b/data/cds/chlorodb/fasta/NC_024027.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_024034.fst.gz b/data/cds/chlorodb/fasta/NC_024034.fst.gz index 5599873..9adaa61 100644 Binary files a/data/cds/chlorodb/fasta/NC_024034.fst.gz and b/data/cds/chlorodb/fasta/NC_024034.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_024035.fst.gz b/data/cds/chlorodb/fasta/NC_024035.fst.gz index c38e620..6cbdd9b 100644 Binary files a/data/cds/chlorodb/fasta/NC_024035.fst.gz and b/data/cds/chlorodb/fasta/NC_024035.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_024036.fst.gz b/data/cds/chlorodb/fasta/NC_024036.fst.gz index dfd60c9..ea6c57b 100644 Binary files a/data/cds/chlorodb/fasta/NC_024036.fst.gz and b/data/cds/chlorodb/fasta/NC_024036.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_024038.fst.gz b/data/cds/chlorodb/fasta/NC_024038.fst.gz index a9ae660..4862bd4 100644 Binary files a/data/cds/chlorodb/fasta/NC_024038.fst.gz and b/data/cds/chlorodb/fasta/NC_024038.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_024050.fst.gz b/data/cds/chlorodb/fasta/NC_024050.fst.gz index f817b27..ec11c38 100644 Binary files a/data/cds/chlorodb/fasta/NC_024050.fst.gz and b/data/cds/chlorodb/fasta/NC_024050.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_024060.fst.gz b/data/cds/chlorodb/fasta/NC_024060.fst.gz index 13f6440..064f3ca 100644 Binary files a/data/cds/chlorodb/fasta/NC_024060.fst.gz and b/data/cds/chlorodb/fasta/NC_024060.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_024061.fst.gz b/data/cds/chlorodb/fasta/NC_024061.fst.gz index e15d15f..b7ecf3c 100644 Binary files a/data/cds/chlorodb/fasta/NC_024061.fst.gz and b/data/cds/chlorodb/fasta/NC_024061.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_024062.fst.gz b/data/cds/chlorodb/fasta/NC_024062.fst.gz index 56dffff..412e1e4 100644 Binary files a/data/cds/chlorodb/fasta/NC_024062.fst.gz and b/data/cds/chlorodb/fasta/NC_024062.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_024064.fst.gz b/data/cds/chlorodb/fasta/NC_024064.fst.gz index 2560ae1..3a483f2 100644 Binary files a/data/cds/chlorodb/fasta/NC_024064.fst.gz and b/data/cds/chlorodb/fasta/NC_024064.fst.gz differ diff --git a/data/cds/chlorodb/fasta/NC_024065.fst.gz b/data/cds/chlorodb/fasta/NC_024065.fst.gz index a8bb2f6..1cc6dba 100644 Binary files a/data/cds/chlorodb/fasta/NC_024065.fst.gz and b/data/cds/chlorodb/fasta/NC_024065.fst.gz differ diff --git a/data/cds/chlorodb/models/blosum62.mat b/data/cds/chlorodb/models/blosum62.mat new file mode 100644 index 0000000..37f3f29 --- /dev/null +++ b/data/cds/chlorodb/models/blosum62.mat @@ -0,0 +1,29 @@ +# +# blosum62 substitution matrix +# with larger penalty for stops +# + A R N D C Q E G H I L K M F P S T W Y V B Z X * +A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -50 +R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -50 +N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -50 +D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1 -50 +C 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -50 +Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1 -50 +E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1 -50 +G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -50 +H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1 -50 +I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1 -50 +L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1 -50 +K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1 -50 +M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1 -50 +F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1 -50 +P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2 -50 +S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0 -50 +T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0 -50 +W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2 -50 +Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1 -50 +V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1 -50 +B -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1 -50 +Z -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1 -50 +X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -50 +* -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 1 diff --git a/data/cds/chlorodb/models/splice.none.frq b/data/cds/chlorodb/models/splice.none.frq new file mode 100644 index 0000000..3a1626c --- /dev/null +++ b/data/cds/chlorodb/models/splice.none.frq @@ -0,0 +1,6 @@ +# 3'/5' splice null model +# A C G T +25 25 25 25 +splice +25 25 25 25 +# end of 3'/5' splice null model diff --git a/data/cds/chlorodb/models/splice3.default.frq b/data/cds/chlorodb/models/splice3.default.frq new file mode 100644 index 0000000..4ba2d4c --- /dev/null +++ b/data/cds/chlorodb/models/splice3.default.frq @@ -0,0 +1,13 @@ +# 3' splice model : default +# A C G T +19 18 27 36 +36 5 34 25 +20 7 51 23 +29 13 14 44 +splice +5 2 78 15 +3 7 15 75 +4 1 83 12 +5 51 13 31 +5 7 79 9 +53 10 27 11 diff --git a/data/cds/chlorodb/models/splice3.ndha.frq b/data/cds/chlorodb/models/splice3.ndha.frq new file mode 100644 index 0000000..3584fe3 --- /dev/null +++ b/data/cds/chlorodb/models/splice3.ndha.frq @@ -0,0 +1,13 @@ +# 3' splice model : ndha +# A C G T +1 43 1 56 +0 57 0 42 +41 1 2 56 +57 42 0 1 +splice +1 55 42 2 +0 1 57 42 +1 1 43 56 +2 11 56 31 +1 29 42 28 +41 1 57 1 diff --git a/data/cds/chlorodb/models/splice3.psba.frq b/data/cds/chlorodb/models/splice3.psba.frq new file mode 100644 index 0000000..21a6d8a --- /dev/null +++ b/data/cds/chlorodb/models/splice3.psba.frq @@ -0,0 +1,13 @@ +# 3' splice model : psba +# A C G T +26 14 40 21 +13 44 35 9 +41 3 33 23 +1 13 5 81 +splice +37 5 27 31 +62 5 9 24 +56 13 23 8 +32 28 6 33 +35 21 17 28 +53 12 10 26 diff --git a/data/cds/chlorodb/models/splice5.default.frq b/data/cds/chlorodb/models/splice5.default.frq new file mode 100644 index 0000000..b5cb067 --- /dev/null +++ b/data/cds/chlorodb/models/splice5.default.frq @@ -0,0 +1,13 @@ +# 5' splice model : default +# A C G T +15 42 3 40 +15 46 6 33 +13 32 2 53 +45 17 21 16 +70 13 6 11 +21 39 8 32 +splice +37 27 9 26 +31 35 17 17 +29 23 26 22 +30 25 28 17 diff --git a/data/cds/chlorodb/models/splice5.ndha.frq b/data/cds/chlorodb/models/splice5.ndha.frq new file mode 100644 index 0000000..fdad60d --- /dev/null +++ b/data/cds/chlorodb/models/splice5.ndha.frq @@ -0,0 +1,13 @@ +# 5' splice model : ndha +# A C G T +0 54 2 44 +43 1 1 56 +57 1 7 35 +8 1 39 52 +56 2 40 2 +55 1 0 44 +splice +1 0 2 97 +40 1 1 58 +55 0 1 44 +1 41 1 58 diff --git a/data/cds/chlorodb/models/splice5.psba.frq b/data/cds/chlorodb/models/splice5.psba.frq new file mode 100644 index 0000000..7785836 --- /dev/null +++ b/data/cds/chlorodb/models/splice5.psba.frq @@ -0,0 +1,13 @@ +# 5' splice model : psba +# A C G T +38 15 9 37 +40 5 13 42 +41 15 6 37 +32 9 15 44 +22 8 10 60 +0 9 83 8 +splice +33 15 27 24 +31 6 26 37 +17 28 6 49 +13 15 23 49 diff --git a/data/cds/chlorodb/models/start.default.frq b/data/cds/chlorodb/models/start.default.frq new file mode 100644 index 0000000..aa16582 --- /dev/null +++ b/data/cds/chlorodb/models/start.default.frq @@ -0,0 +1,8 @@ +# start model : default +atg 0.9692592 36165 +acg 0.01173885 438 +gtg 0.007504288 280 +ata 0.003886149 145 +atc 0.00337693 126 +att 0.002840909 106 +ttg 0.001393654 52 diff --git a/data/cds/chlorodb/models/start.ndhd.frq b/data/cds/chlorodb/models/start.ndhd.frq new file mode 100644 index 0000000..893232b --- /dev/null +++ b/data/cds/chlorodb/models/start.ndhd.frq @@ -0,0 +1,7 @@ +# start model : ndhd +acg 0.5066667 190 +atg 0.3493333 131 +atc 0.05066667 19 +ata 0.03466667 13 +acc 0.02933333 11 +gtg 0.02933333 11 diff --git a/data/cds/chlorodb/models/start.rps19.frq b/data/cds/chlorodb/models/start.rps19.frq new file mode 100644 index 0000000..59a3169 --- /dev/null +++ b/data/cds/chlorodb/models/start.rps19.frq @@ -0,0 +1,3 @@ +# start model : rps19 +gtg 0.6261023 355 +atg 0.3738977 212 diff --git a/data/cds/chlorodb/models/stop.default.frq b/data/cds/chlorodb/models/stop.default.frq new file mode 100644 index 0000000..f5f76e5 --- /dev/null +++ b/data/cds/chlorodb/models/stop.default.frq @@ -0,0 +1,4 @@ +# stop model : default (freq. ignored) +taa 0.5742367 21968 +tag 0.2351265 8995 +tga 0.1906368 7293 diff --git a/data/cds/chlorodb/parameters.sh b/data/cds/chlorodb/parameters.sh new file mode 100644 index 0000000..75749a3 --- /dev/null +++ b/data/cds/chlorodb/parameters.sh @@ -0,0 +1,28 @@ +# sourced file + +set CORE_NCDS_CUTOFF = 200 +set CORE_START_ATG_CUTOFF = 50 +set CORE_START_DFT_CUTOFF = 50 +set CORE_START_OTH_CUTOFF = 10 +set CORE_STOP_CUTOFF = 200 +set CORE_SPLICE_CUTOFF = 50 + +set SHEL_NCDS_CUTOFF = 10 + +set CORE_DELTA = Inf +set CORE_COVMIN = 30 +set CORE_PMAX = 1e-6 +set CORE_IDMIN = 30 +set CORE_SIZMIN = 50 + +set SHEL_DELTA = 0.5 +set SHEL_COVMIN = 30 +set SHEL_PMAX = 1e-6 +set SHEL_IDMIN = 30 +set SHEL_SIZMIN = 10 + +set DUST_DELTA = 0.5 +set DUST_COVMIN = 30 +set DUST_PMAX = 1e-6 +set DUST_IDMIN = 30 +set DUST_SIZMIN = 10 diff --git a/detectors/cds/bin/go_pass1.sh b/detectors/cds/bin/do_exonerate.sh similarity index 85% rename from detectors/cds/bin/go_pass1.sh rename to detectors/cds/bin/do_exonerate.sh index 4f911a5..5fbe664 100755 --- a/detectors/cds/bin/go_pass1.sh +++ b/detectors/cds/bin/do_exonerate.sh @@ -1,21 +1,21 @@ #!/bin/csh -f # -# Annotate CDS - Pass1 +# Annotate CDS - Exonerate # #======================================================================================== # -# Annotate CDS of chlorodb/core proteins using exonerate +# Annotate CDS using exonerate # -# pass1.sh [] +# do_exonerate.sh [] # -# - : The fasta file containing the genome to annotate -# - : Name of the protein family (defined in chlorodb/core) +# - : The fasta file containing the genome to annotate +# - : The fasta file containing the protein family # -# Results are in file : `basename :r`..res +# Results are in file : `basename :r`.`basename :r`.res # #======================================================================================== # -# usage: go_pass1.sh fasta family [outdir] +# usage: do_exonerate.sh dna.fasta prot.fasta [outdir] # unsetenv ORG_SOURCED @@ -31,12 +31,14 @@ NeedArg 2 set GenoFile = $Argv[1] set GenoName = `basename $GenoFile:r` -set ProtName = $Argv[2] -set ProtDir = $CDS_DATA_DIR/chlorodb/core -set ProtFile = $ProtDir/$ProtName.fst + +set ProtFile = $Argv[2] +set ProtDir = `dirname $ProtFile` +set ProtName = `basename $ProtFile:r` NeedFile $GenoFile NeedFile $ProtFile +NeedFile $ProtDir/Annot.lst set OutDir = . if ($#Argv >= 3) set OutDir = $3 @@ -101,7 +103,7 @@ endif if ($PASS1_SPEEDUP != 0) then - $PROG_DIR/go_filterbx.sh $GenoFile $ProtFile \ + $PROG_DIR/do_filterbx.sh $GenoFile $ProtFile \ $PASS1_BLASTX_FILTER_IDMIN \ $PASS1_BLASTX_FILTER_NBMIN \ $PASS1_BLASTX_FILTER_NBMAX > D_$$ @@ -159,8 +161,7 @@ $AwkCmd -v MAX_SPAN=$PASS1_MAX_SPAN \ # get annotations # -egrep "^$ProtName " $CDS_DATA_DIR/chlorodb/core/Annot.lst |\ - awk '{print "c annot", $0}' > T_$$ +egrep "^$ProtName " $ProtDir/Annot.lst | awk '{print "c annot", $0}' > T_$$ # # extend start/stop diff --git a/detectors/cds/bin/go_filterbx.sh b/detectors/cds/bin/do_filterbx.sh similarity index 94% rename from detectors/cds/bin/go_filterbx.sh rename to detectors/cds/bin/do_filterbx.sh index 7213fa3..ea51b63 100755 --- a/detectors/cds/bin/go_filterbx.sh +++ b/detectors/cds/bin/do_filterbx.sh @@ -5,7 +5,7 @@ # # output on stdout # -# usage: go_filterbx.sh dna_fasta prot_fasta [idmin nbmin nbmax] +# usage: do_filterbx.sh dna.fasta prot.fasta [idmin nbmin nbmax] # unsetenv ORG_SOURCED diff --git a/detectors/cds/bin/go_cds.sh b/detectors/cds/bin/go_cds.sh index 2dae666..f38e053 100755 --- a/detectors/cds/bin/go_cds.sh +++ b/detectors/cds/bin/go_cds.sh @@ -13,7 +13,7 @@ # Results are printed to the standard output # #======================================================================================== -# usage: go_cds.sh fasta +# usage: go_cds.sh fasta [db_core] # unsetenv ORG_SOURCED @@ -22,13 +22,19 @@ source $ORG_HOME/scripts/csh_init.sh NeedArg 1 -set Fasta = $Argv[1] +set Fasta = $Argv[1]; Shift NeedFile $Fasta set Genome = `basename $Fasta:r` -NeedFile $CDS_DATA_DIR/chlorodb/core +set DbCore = $CDS_DATA_DIR/chlorodb/core + +if ($#Argv > 0) then + set DbCore = $Argv[1]; Shift +endif + +NeedFile $DbCore/Annot.lst # # run everything into temporary place @@ -44,15 +50,15 @@ endif # pass1: run exonerate # -set fams = `ls $CDS_DATA_DIR/chlorodb/core/*.fst` +set fams = `ls $DbCore/*.fst` -Notify "running pass1: exonerate of $Genome" +Notify "running pass1: exonerate of $Genome on $DbCore" foreach f ($fams) - set prot = `basename $f:r` - $PROG_DIR/go_pass1.sh $Fasta $prot $temp + $PROG_DIR/do_exonerate.sh $Fasta $f $temp end + # # pass2: transsplicing # diff --git a/detectors/cds/test/go_test.sh b/detectors/cds/test/go_test.sh index f778c9f..9e62ae4 100755 --- a/detectors/cds/test/go_test.sh +++ b/detectors/cds/test/go_test.sh @@ -1,5 +1,7 @@ #!/bin/csh -f +setenv Verbose 1 + setenv ORG_HOME `dirname $0`/../../.. source $ORG_HOME/scripts/csh_init.sh @@ -8,9 +10,9 @@ echo "+ testing CDS" setenv TMP_CLEANUP 0 setenv PASS1_SPEEDUP 1 setenv PASS1_SLOWDOWN 0 -setenv PASS1_BLASTX_FILTER_NBMAX 10 +setenv PASS1_BLASTX_FILTER_NBMAX 5 -`dirname $0`/../bin/go_cds.sh test.fst > test.bak +`dirname $0`/../bin/go_cds.sh test.fst test.db > test.bak diff -q test.bak test.ref >& /dev/null @@ -18,7 +20,7 @@ set stat = $status if ($stat == 0) then echo "+ $VTC[3]CDS test Ok$VTC[1]" - \rm -r test.bak test.tmp + \rm -r test.bak test.tmp test.db/*.fst.p?? else echo "* $VTC[2]CDS test Failure$VTC[1]" endif diff --git a/detectors/cds/test/test.db/Annot.lst b/detectors/cds/test/test.db/Annot.lst new file mode 100644 index 0000000..b1d9e74 --- /dev/null +++ b/detectors/cds/test/test.db/Annot.lst @@ -0,0 +1,18 @@ +atpf atpF 483 1:80_2:401_3:1_4:1 POLYEX ATP_synthase_CF0_B_subunit +ccsa ccsA 477 1:476_2:1 POLYEX cytochrome_c_biogenesis_protein +ndha ndhA 384 1:9_2:375 POLYEX NADH_dehydrogenase_subunit_1 +ndhb ndhB 699 1:5_2:693_3:1 POLYEX NADH_dehydrogenase_subunit_2 +ndhd ndhD 383 1:383 MONEX NADH_dehydrogenase_subunit_4 +ndhe ndhE 395 1:395 MONEX NADH_dehydrogenase_subunit_4L +ndhf ndhF 384 1:384 MONEX NADH_dehydrogenase_subunit_5 +ndhg ndhG 386 1:386 MONEX NADH_dehydrogenase_subunit_6 +ndhh ndhH 406 1:400_2:6 POLYEX NADH_dehydrogenase_subunit_7 +ndhi ndhI 386 1:386 MONEX NADH_dehydrogenase_subunit_I +psac psaC 498 1:491_2:4_3:3 POLYEX photosystem_I_subunit_VII +rpl2 rpl2 800 1:101_2:698_5:1 POLYEX ribosomal_protein_L2 +rpl23 rpl23 787 1:782_2:2_3:1_4:2 POLYEX ribosomal_protein_L23 +rpl32 rpl32 474 1:474 MONEX ribosomal_protein_L32 +rps15 rps15 480 1:479_2:1 POLYEX ribosomal_protein_S15 +rps7 rps7 853 1:853 MONEX ribosomal_protein_S7 +ycf1 ycf1 408 1:406_2:2 POLYEX hypothetical_chloroplast_RF1 +ycf2 ycf2 654 1:649_2:5 POLYEX Ycf2 diff --git a/detectors/cds/test/test.db/atpf.fst b/detectors/cds/test/test.db/atpf.fst new file mode 100644 index 0000000..350bddd --- /dev/null +++ b/detectors/cds/test/test.db/atpf.fst @@ -0,0 +1,50 @@ +>AC_000188@LyesCp071@atpf@11803@13043@R@2@185 ATP_synthase_CF0_B_chain +MKNVTDSFVSLGHWPSAGSFGFNTDILATNPINLSVVLGVLIFFGKGVLS +DLLDNRKQRILNTIRNSEELRGGAIEQLEKARSRLRKVETEAEQFRVNGY +SEIEREKLNLINSTYKTLEQLENYKNETIQFEQQRAINQVRQRVFQQALR +GALGTLNSCLNNELHLRTISANIGMLGTMKEITD +>NC_000925@PopuCp059@atpf@45392@45943@R@1@184 ATP_synthase_CF0_B_subunit +MNSIVNITPIIIILSEHSSEHTFGFNSDIFEANVINILLLLFGLIYVLKQ +SLGSTLNERQLKVLAAIQESEERLEQASSRLSESEKQLAQTQIIINQIKK +EAQLTAEKVRSSILAQGQIDIERLAITGKSNIETAEKQIRRQIQQQIAFL +ALKKVTLQLENQMSSDIQLRIIDNNIAKLGDQL +>NC_000926@GuthCp075@atpf@65505@66053@R@1@183 ATP_synthase_CF0_B_subunit +MDIISGFYNTINLAELSNAKTFGFNPNILEANVLNIAILLSGVIYLGRNF +LTSALESRQQKVTEAIQEAEERLQQANVKLLDAEKQLTQAQTVIEQIKKE +AEKTARTVKETILAQGKLDIERLTNNGKSSIEKAELQIKKQIQQHITDLA +IKKVSAQMETFMTDNLQVKVIDTNIASLGGKI +>NC_000927@NeolCp025@atpf@19121@19651@R@1@177 ATP_synthase_CF0_B_chain +MFHFLALTPLAHSEGFGLNTNILETNILNLAAVFALLAYVGTDFVSSLLK +TRKESILKSLRDADERYQDAVNQLKQALQELETARTNAAEIRRQSEINAE +AIRQRLELLTQEEMARLEEAKETIIKLEEEKAVAEVCTKVISMALVRAEK +KIISSMDEAMHRRVMDMYLNLLREVY +>NC_000932@ArthCp008@atpf@11529@12798@R@2@185 ATP_synthase_CF0_B_subunit +MKNLTDSFVYLGHWPSAGSFGFNTDILATNPINLSVVFGVLIFFGKGVLN +DLLDNRKQRILNTIRNSEELREGAIQQLENARARLRNVETEADKFRVNGY +SEIEREKLNLINSTYKTLKQLENYKNETILFEQQRTINQVRERVFQQALQ +GAIGTLNSCLSNELHLRTINANIGMFGTMKEITD +>NC_001319@MapoCp012@atpf@18468@19609@D@2@185 ATP_synthase_CF0_B_subunit +MENGTYFIISSNFWTIAGSFGLNTNLLETNLINLGVVLGLLVYFGKGVLS +NLLNNRKLTILNTIQDAEERYKEATDKLNQARTRLQQAKQKADDIRINGL +SQMEKEKQDLINAADEDSKRLEDSKNATIRFEKQRAIEQVRQQVSRLALE +RALETLKSRLNSELHLRMIDYHIGLLRAMESTIE +>NC_001320@OrsajCp021@atpf@32741@34111@D@2@181 ATP_synthase_CF0_B_subunit +MKNVTHSFVFLAHWPSAGSFGLNTDILATNLINLTVVVGVLIYFGKGVLK +DLLDNRKQRILSTIRNSEELRRGTIEQLEKARIRLQKVELEADEYRMNGY +SEIEREKANLINATSISLEQLEKSKNETLYFEKQRAMNQVRQRVFQQAVQ +GALGTLNSCLNTELHFRTIRANISILGAME +>NC_001603@EugrCp038@atpf@64813@66970@D@4@184 ATP_synthase_CF0_B_chain +MVIDNFNIFTIISNAKTFGINTNVFETNIINLAIVVGTLFYYGKLTLSDL +LKTRKKTIIKNILDIDEKIRSSQSSLYLAELEFENAAKKASLIRSNGTTF +CLKSFDIIRSSVNEDIKRLKQSKRLILRTEDKKSVREIFKNLYSQACQKA +KATIIKRLNSKIHKKIILKKMEKMSLKKLKPKY +>NC_001631@PithCp015@atpf@11663@12972@R@2@185 ATP_synthase_CF0_B_subunit +MKNVIDPFISLSYWPSAGGFGSNTNILETNIINSSVVLSVLIYFGKGVLS +NLLDNRKQKILETIRNSEELCKGAIDQLEKARACLRNVEMIADEIQVNGN +SQIEREKEDLLNTASDNLEQLEDPKNETIYSEQQRAFDQIRQQVSRQALR +RAIGTLNSRLNTELHLRTIDHNIGLLRTMMNTND +>NC_001666@ZemaCp019@atpf@35097@36479@D@2@184 ATP_synthase_CF0_B_subunit +MKNVTHSFVFLAHWPFAGSFGLNTDILATNLINLTVVVGVLIFFGKGVLK +DLLDNRKQRILSTIRNSEELRKGTLEQLEKARIRLQKVELEADEYRMNGY +SEIEREKENLINATSISLEQLEKSKNETLYFEKQRAMNQVRQQGFQQAVQ +GALGTLNSCLNTELHFRTIRANIGILGAIEWKR diff --git a/detectors/cds/test/test.db/ccsa.fst b/detectors/cds/test/test.db/ccsa.fst new file mode 100644 index 0000000..3a3b5f2 --- /dev/null +++ b/detectors/cds/test/test.db/ccsa.fst @@ -0,0 +1,78 @@ +>AC_000188@LyesCp084@ccsa@115765@116706@D@1@314 cytochrome_c_biogenesis_protein +MIFSTLEHILTHISFSIVSIVITIHLITFLVDEIVKLYDSSEKGIIVTFF +CITGLLVTRWVSSGHFPLSDLYESLIFLSWSFSLIHIIPYFKKNVLILSK +ITGPSAILTQGFATSGILTEIHQSGILVPALQSEWLIMHVSMMILGYAAL +LCGSLLSVALLVITFRKNRKLFSKSNVFLNESFFLGENVVENTSFFCTKN +YYRSQLIQQLDYWSYRVISLGFTFLTIGILSGAVWANEAWGSYWNWDPKE +TWAFITWIVFAIYLHTRTNRNLRGPNSAIVASIGFLIIWICYFGVNLLGI +GLHSYGSFPSTFN +>NC_000925@PopuCp184@ccsa@160407@161366@D@1@320 cytochrome_c_biogenesis_protein +MNLEMMQNSCVNFAFGGLLTAMLVYWSSLAFPRISGLNKLAALITLLVNI +ALALTLSSRWFANGYFPLSNLYESLLFLAWGLTFVHLFIESKTKSRLIGA +VSIPVAMFVTAFASLALPIEMQKASPLVPALKSNWLMMHVSIMMISYSIL +ILGSLLSILFLIITRGQDINLKGSSVGTGSYTVKSLDSNPSFAFSNPSGI +VQEQSNMLINSTRMNLLESIDNLSYRIIGLGFPLLTIGIVAGAVWANEAW +GSYWSWDPKETWALITWLIFAAYLHCRITKSWQGKRPAILASVGFLVVWI +CYLGVNFLGKGLHSYGWLA +>NC_000926@GuthCp037@ccsa@33657@34562@R@1@302 cytochrome_c_biogenesis_protein +MFNVQFDIFNFSNNITFLTLLISLISYWLGLIFKKIKNVFYIGYGSTILA +CITITIILGTRWIESGYFPLSNLYESLMFLTWGLLFSAIYLEYKTNLYLI +GAIVSPISLFIVSFSTLSLPQDMQKAAPLVPALKSNWLMMHVSVMMLSYS +TLIIGSLLAILYLVLIKAQQKKHSLKDFAFANLEFTFPKSTNSTNFNLLE +TLDNLSYRTIGFGFPLLTIGIIAGAVWANEAWGTYWSWDPKETWALITWL +VFAAYLHARITKSWTGERPAYLAALGFVVVWICYLGVNFLGKGLHSYGWL +N +>NC_000927@NeolCp108@ccsa@128438@129259@D@1@274 cytochrome_c_biogenesis_protein +MSTFSILSLVAFATLFVTMLLYFFQRQPLARQSMWIAHTSLAGLLLLRWV +QSGHFPLSNLYESCLFLSWAVTLGHFVVEKDASRAGFLDLGIFTAPMAFF +VYAFATFSLPPTMQEAGPLVPALRSHWLMMHVTLMILSYAALLFGSVLSL +AFLVITTGPRKNSEKLQSLASTFDTLSYRTLGIGFPLLTVGILSGAVWAN +EAWGSYWSWDPKETWALITWLIFAIYLHSRLTYGWNGQKAALIASVGFFL +IWICYLGVNLLGKGLHSYGWLTS +>NC_000927@NeolCp129@ccsa@163667@164488@R@1@274 cytochrome_c_biogenesis_protein +MSTFSILSLVAFATLFVTMLLYFFQRQPLARQSMWIAHTSLAGLLLLRWV +QSGHFPLSNLYESCLFLSWAVTLGHFVVEKDASRAGFLDLGIFTAPMAFF +VYAFATFSLPPTMQEAGPLVPALRSHWLMMHVTLMILSYAALLFGSVLSL +AFLVITTGPRKNSEKLQSLASTFDTLSYRTLGIGFPLLTVGILSGAVWAN +EAWGSYWSWDPKETWALITWLIFAIYLHSRLTYGWNGQKAALIASVGFFL +IWICYLGVNLLGKGLHSYGWLTS +>NC_000932@ArthCp073@ccsa@114461@115447@D@1@329 cytochrome_c_biogenesis_protein +MIFSILEHILTHISFSVVSIVLTIYFLTLLVNLDEIIGFFDSSDKGIIIT +FFGITGLLLTRWIYSGHFPLSNLYESLIFLSWAFSIIHMVSYFNKKQQNK +LNTITAPSVIFIQGFATSGLLNKMPQSAILVPALQSQWLMMHVSMMILGY +GALLCGSLLSIALLVITFRKVGPTFWKKNIKKNFLLNELFSFDVLYYINE +RNSILLQQNINFSFSRNYYRYQLIQQLDFWSFRIISLGFIFLTVGILSGA +VWANETWGSYWNWDPKETWAFITWTIFAIYLHIKTNRNVRGINSAIVALI +GFILIWICYFGVNLLGIGLHSYGSFTSN +>NC_001319@MapoCp078@ccsa@95482@96444@D@1@321 cytochrome_c_biogenesis_protein +MPFITLERILAHTSFFLLFFVTFIYWGKFLYINIKPITILGEISMKIACF +FITTFLLIRWSSSGHFPLSNLYESSMFLSWSFTLIHLILENKSKNTWLGI +ITAPSAMLTHGFATLSLPKEMQESVFLVPALQSHWLMMHVTMMMLSYSTL +LCGSLLAITILIITLTKQKNLPILTSYFNFPFNSFIFKNLLQPMENEILS +YKTQKVFSFINFRKWQLIKELDNWSYRVISLGFPLLTIGILSGAVWANEA +WGSYWNWDPKETWALITWLIFAIYLHTRMIKGWQGKKPAIIASLGFFIVW +ICYLGVNLLGKGLHSYGWLI +>NC_001320@OrsajCp087@ccsa@105236@106201@D@1@322 cytochrome_c_biogenesis_protein +MLFATLEHILTHISFSTISIVITIHLITLLVRELGGLRDSSEKGMIATFF +CITGFLVSRWASSGHFPLSNLYESLIFLSWALYILHMIPKIQNSKNDLST +ITTPSTILTQGFATSGLLTEMHQSTILVPALQSQWLMMHVSMMLLSYATL +LCGSLLSAALLMIRFRKNLDFFSKKKKNVLSKTFFFNEIEYFYAKRSALK +STFFPLFPNYYKYQLIERLDSWSYRVISLGFTLLTIGILCGAVWANEAWG +SYWNWDPKETWAFITWTIFAIYLHSRTNPNWKGTKSAFVASIGFLIIWIC +YFGINLLGIGLHSYGSFTLPI +>NC_001631@PithCp147@ccsa@104925@105887@R@1@321 cytochrome_c_biogenesis_protein +MIFITLEHILAHISFSLILVVTLIYWGTLVYRIEGLSSSGGKGMIVTFLC +TTGLLINRWLYSGHLPLSNLYESFMFLSWSSSVFHILLEVRSRDDRWLGA +ITAPSAMLTHGFATLGLPEEMQRSGMLVPALQSHWSMMHVSMILFSYATL +LCGSLASIALLVIMSGVNRQVIFGAMDNLFSRAILPNENFYSHEKQKSDL +QYTVYFSSTNYRKCQLIKQLDHWSYRAIGLGFSLSTIGTLSGAIWANEAW +GSYWSWDPKETWALITWTIFAIYLHTRMNKGWQGEEPAIVASLGFFIVWI +RYLGVNLLGIGLHSYGWLEP +>NC_001666@ZemaCp085@ccsa@108995@109960@D@1@322 cytochrome_c_biogenesis_protein +MLFATLEHILTHISFSTISIVITIHLITLLVRELRGLRDSSEKGMIATFF +SITGFLVSRWVSSGHFPLSNLYESLIFLSWTLYILHTIPKIQNSKNDLST +ITTPSTILTQGFATSGLLTEMHQSTILVPALQSQWLMMHVSMMLLSYATL +LCGSLLSAALLIIRFRKNFDFFSLKKNVFLKTFFFSEIEYLYAKRSALKN +TSFPVFPNYYKYQLTERLDSWSYRVISLGFTLLTVGILCGAVWANEAWGS +YWNWDPKETWAFITWTIFAIYLHSRKNPNWKGTNSALVASIGFLIIWICY +FGINLLGIGLHSYGSFTLPSK diff --git a/detectors/cds/test/test.db/ndha.fst b/detectors/cds/test/test.db/ndha.fst new file mode 100644 index 0000000..16cef4f --- /dev/null +++ b/detectors/cds/test/test.db/ndha.fst @@ -0,0 +1,90 @@ +>AC_000188@LyesCp085@ndha@121113@123337@R@2@364 NADH_dehydrogenase_subunit_1 +MIIDTTEIETINSFSKLESLKEVYGIIWMLVPIVTLVLGITIGVLVIVWL +EREISAGIQQRIGPEYAGPLGILQALADGTKLLLKENLIPSTGDTRLFSI +GPSIAVISIFLSYSVIPFGDHLVLADLSIGVFFWIAISSIAPVGLLMSGY +GSNNKYSFLGGLRAAAQSISYEIPLALCVLSISLLSNSLSTVDIVEAQSK +YGFWGWNLWRQPIGFIVFLISSLAECERLPFDLPEAEEELVAGYQTEYSG +IKFGLFYIASYLNLLVSSLFVTVLYLGGWNLSIPYIFVPDIFGINKGGKV +FGTLIGIFITLAKTYLFLFIPIATRWTLPRLRMDQLLNLGWKFLLPISLG +NLLLTTSSQLLSL +>NC_000927@NeolCp116@ndha@144380@145468@D@1@363 NADH_dehydrogenase_subunit_1 +MTYVLDLKRSFLDACTWLIGDNFRDLGSVLWVPLPILSLVIVATLGVLVI +VWLERKISAGVQQRVGPEYGGALGLLQPLADGLKLVFKEDVVPAKSDTWL +FTLGPAVVVIPIFLAYLVVPFGQQLIIADLRIGIFFWIAISSIAPIGLLM +SGYGSNNKYSFLGGLRAAAQSISYELPLAICVLSVCLLADSLSTVDIVES +QSSWGILTWNIWRQPIGFVAFLIAALAECERLPFDLPEAEEELVAGYQTE +YTGMKFGLFYVGSYVNLLVSGCFVTVLYLGGWHGPFAIDGILPDSPPFQV +LDAFLGITWTLLKTFLFLFAAILTRWTLPRVRIDQLLDLGWKFLLPVSLG +NLLLTASLKLLF +>NC_000932@ArthCp079@ndha@119847@122009@R@2@361 NADH_dehydrogenase_subunit_1 +MIIYATAVQTINSFVKLESLKEVYGLIWIFVPIFSLVLGIITGVLVIVWL +EREISAGIQQRIGPEYAGPLGILQALADGTKLLFKENLRPSRGNTPLFSI +GPSIAVISILLSYSVIPFSNHLVLADLNIGIFLWIAISSIAPIGLLMSGY +GSNNKYSFLGGLRAAAQSISYEIPLTLCVLSISLLSNSLSTVDIVEAQSK +YGFWGWNLWRQPIGFIIFLISSLAECERLPFDLPEAEEELIAGYQTEYSG +IKFGLFYVASYLNLLISSLFVTVLYLGGWNISIPYISILELFQRDQIFGT +TIGIFITLAKTYLFLFVSIATRWTLPRLRMDQLLNLGWKFLLPISLGNLL +LTTSFQLFSL +>NC_001319@MapoCp084@ndha@100382@102200@R@2@369 NADH_dehydrogenase_subunit_1 +MISNINLEDKFFSFFFTLGFSKEFFNFLWIIFSILILMLGVTIGVLVLVW +LERKISAAIQQRIGPEYAGPLGIIQALADGIKLFLKEDIVPAQGDVWLFN +IGPILVLIPVFLSYLVIPFEYNVILANFSIGVFFWIAVSSVVPLGLLMAG +YGSNNKYSFLGGLRAAAQSISYEIPLALSVLSIALLSNSLSTVDIVEAQS +KYGFLSWNLWRQPIGFIVFFIASLAECERLPFDLPEAEEELVAGYQTEYS +GMKFAFFYLASYLNLLVSSLFVTILYLGGWHFSIPFFSLFKNFEWNLMSN +GISEVISIIIGIVITLVKSYLFLFISIMTRWTLPRIRIDQLLNLGWKFLL +PIALGNLLLTTSFQLFLL +>NC_001320@OrsajCp093@ndha@110631@112706@R@2@363 NADH_dehydrogenase_subunit_1 +MIIDRVQVEAINSFSNLELLKEVYGLIWILPILTLLLGITIEVLVIVWLE +REISASIQQRIGPEYAGPLGLLQAIADGTKLLFKEDILPSRGDIPLFSIG +PSIAVISILLSFLVIPLGYRFVLADLSIGVFLWIAISSIAPIGLLMAGYS +SNNKYSFSGGLRAAAQSISYEIPLTFCVLAISLLSNSSSTVDIVEAQSKY +GFFGWNLWRQPIGFLVFLISSLAECERLPFDLPEAEEELVAGYQTEYSGI +KYGLFYLVSYLNLLVSSLFVTVLYLGGWNLSIPYISFFGFFQMNKMVGIL +EMTMSIFITLTKAYLFLFISITIRWTLPRMRMDQLLNLGWKFLLPISLGN +LLLTTSSQLVSL +>NC_001666@ZemaCp091@ndha@114343@116454@R@2@363 NADH_dehydrogenase_subunit_1 +MIIDRVEVETINSFSKSELFKEIYGLIWILPIFALLLGITIEVLVIVWLE +REISASIQQRIGPEYAGPLGLLQAIADGTKLLLKEDILPSRGDIPLFSIG +PSIAVISILLSFLVIPLGYRFVLADLSIGVFLWIAISSIAPIGLLMAGYS +SNNKYSFSGGLRAAAQSISYEIPLTFCVLAISLLSNSSSTVDIVEAQSKY +GFFGWNLWRQPIGFLVFLISSLAECERLPFDLPEAEEELVAGYQTEYSGI +KYGLFYLVSYLNLLVSSLFVTVLYLGGWNFSIPYISFFGFFQMNKIIGIL +EMVIGIFITLTKAYLFLFISITIRWTLPRMRMDQLLNLGWKFLLPISLGN +LLLTTSSQLVSL +>NC_001879@NitaCp088@ndha@121696@123935@R@2@364 NADH_dehydrogenase_subunit_1 +MIIDTTEIETINSFSKLESLKEVYGIIWMLFPILTLVLGITIGVLVIVWL +EREISAGIQQRIGPEYAGPLGILQALADGTKLLLKENLIPSTGDTRLFSI +GPSIAVISIFLSYSVIPFGDHLVLADLSIGVFFWIAISSIAPVGLLMSGY +GSNNKYSFLGGLRAAAQSISYEIPLALCVLSISLLSNSLSTVDIVEAQSK +YGFWGWNLWRQPIGFIVFLISSLAECERLPFDLPEAEEELVAGYQTEYSG +IKFGLFYIASYLNLLVSSLFVTVLYLGGWNLSIPYIFVPELFGINKRGKV +FGTLIGIFITLAKTYLFLFIPIATRWTLPRLRMDQLLNLGWKFLLPISLG +NLLLTTSSQLLSL +>NC_002186@MeviCp102@ndha@106489@107592@R@1@368 NADH_dehydrogenase_subunit_1 +MLLTINLKDSFLTFFSNLGFSNEFSKALWIPLPILLLIILAVVGVLVVVW +LERKISAAVQQRIGPEYAGPLGVLQPLADGLKLAFKEDIIPSKGDVLLFT +LGPAIVVIPIFLSYLIVPFGENLIVSNINLGIFFWITVSSVAPLGLLMSG +YGSNNKYSFLGGLRATAQSLSYEIPLALCVLSICLLSDSLSTIDIVQKQS +TYGILGWNIWRQPIGFIAFIIAALAECERLPFDLPEAEEELVAGYQTEYT +GMKFGLFYIGSYVNLLVSALFASVLYLGGWSLPIPIEFLLNKMSLNASDS +EVQVISAFLGIGMTLLKTYLFLFLSILTRWTMPRVRIDQLLDLGWKFLLP +ISLGNLLLTASLKIALL +>NC_002202@SpolCp090@ndha@117598@119774@R@2@366 NADH_dehydrogenase_subunit_1 +MIIDTTTTKVQAINSFSRLEFLKEVYETIWMLFPILILVLGITIGVLVIV +WLEREISASIQQRIGPEYAGPLGILQALADGTKLLFKENLLPSRGDTYLF +SIGPSIAVISILLGYLIIPFGSRLVLADLSIGVFLWIAVSSIAPIGLLMS +GYGSNNKYSFLGGLRAAAQSISYEIPLTLCVLSISLLSNSSSTVDIVEAQ +SKYGFWGWNLWRQPIGFIVFIISSLAECERLPFDLPEAEEELVAGYQTEY +SGIKFGLFYVASYLNLLISSLFVTVLYLGGWNLSIPYIFISEFFEINKID +GVFGTTIGIFITLAKTFLFLFIPITTRWTLPRLRMDQLLNLGWKFLLPIS +LGNLLLTTSSQLFSL +>NC_002693@OeelhCp102@ndha@125123@127258@R@2@364 NADH_dehydrogenase_subunit_1 +MIIDTTAVQDMNSFSRLQSLKEVSGIIWMLVPILSLVLGITLGVLVIVWL +EREISAGIQQRIGPEYAGPMGILQALADGIKLIFKENLLPSRGDTRLFSI +GPSIAVISILLSYSVIPFSSHLVLSDLNIGVFLWIAVSSIAPIGLLMSGY +GSNNKYSFLGGLRAAAQSISYEIPLTLCLLSISLLSNSSSTVDIVEAQSK +YGLWGWNLWRQPIGFLVFLISSLAECERLPFDLPEAEEELVAGYQTEYSG +IKFGLFYVASYLNLLVSSLFVTVLYLGGWNISISYIFVPGLFEITKVGRV +FGTTIGIFTTLAKTYLFLFISITTRWTLPRLRMDQLLNLGWKFLLPISLG +NLLLTTSSQLLSL diff --git a/detectors/cds/test/test.db/ndhb.fst b/detectors/cds/test/test.db/ndhb.fst new file mode 100644 index 0000000..a674c53 --- /dev/null +++ b/detectors/cds/test/test.db/ndhb.fst @@ -0,0 +1,120 @@ +>AC_000188@LyesCp066@ndhb@96224@98435@R@2@511 NADH_dehydrogenase_subunit_2 +MIWHVQNENFILDSTRIFMKAFHLLLFDGSLIFPECILIFGLILLLMIDS +TSDQKDIPWLYFISSTSLVMSITALLFRWREEPMISFSGNFQTNNFNEIF +QFLILLCSTLCIPLSVEYIECTEMAITEFLLFVLTATLGGMFLCGANDLI +TIFVAPECFSLCSYLLSGYTKKDVRSNEATMKYLLMGGASSSILVHGFSW +LYGSSGGEIELQEIVNGLINTQMYNSPGISIALIFITVGIGFKLSPAPSH +QWTPDVYEGSPTPVVAFLSVTSKVAASASATRIFNIPFYFSSNEWHLLLE +ILAILSMILGNLIAITQTSMKRMLAYSSIGQIGYVIIGIIVGDSNDGYAS +MITYMLFYISMNLGTFACIVLFGLRTGTDNIRDYAGLYTKDPFLALSLAL +CLLSLGGLPPLAGFFGKLYLFWCGWQAGLYFLVLIGLLTSVVSIYYYLKI +IKLLMTGRNQEITPHVRNYRRSPLRSNNSIELSMIVCVIASTIPGISMNP +IIAIAQDSLF +>AC_000188@LyesCp086@ndhb@142909@145120@D@2@511 NADH_dehydrogenase_subunit_2 +MIWHVQNENFILDSTRIFMKAFHLLLFDGSLIFPECILIFGLILLLMIDS +TSDQKDIPWLYFISSTSLVMSITALLFRWREEPMISFSGNFQTNNFNEIF +QFLILLCSTLCIPLSVEYIECTEMAITEFLLFVLTATLGGMFLCGANDLI +TIFVAPECFSLCSYLLSGYTKKDVRSNEATMKYLLMGGASSSILVHGFSW +LYGSSGGEIELQEIVNGLINTQMYNSPGISIALIFITVGIGFKLSPAPSH +QWTPDVYEGSPTPVVAFLSVTSKVAASASATRIFNIPFYFSSNEWHLLLE +ILAILSMILGNLIAITQTSMKRMLAYSSIGQIGYVIIGIIVGDSNDGYAS +MITYMLFYISMNLGTFACIVLFGLRTGTDNIRDYAGLYTKDPFLALSLAL +CLLSLGGLPPLAGFFGKLYLFWCGWQAGLYFLVLIGLLTSVVSIYYYLKI +IKLLMTGRNQEITPHVRNYRRSPLRSNNSIELSMIVCVIASTIPGISMNP +IIAIAQDSLF +>NC_000927@NeolCp033@ndhb@28430@29950@D@1@507 NADH_dehydrogenase_subunit_2 +MELSDILASFHASNLIPEGIVACTILLVLLLDLVYSRTCHAWLAWVAMAG +LSLASVLLGQQWYQLMNLPTATMTFGGSFQADSLSLVFRAIIAMSCVLCI +LLSIDYVESTGTAPSEFLVLIATASLGGMLVAGSNDLLMMFVSLETLGLA +SYLLTGYMKRDVRSNEASLKYLLVGAASSGLFLYGISWMYGISGGHMELN +SIAHAIVSLDETKTTTCALALVLMTVGVGFKVAAAPFHQWTPDVYQGSPT +PVVAFLSVGSKAAGFILAVRMCTTLFPSFNTEWHLIFTILSILSMIVGNF +IAVTQTSLKRMLGYSSVGQAGVMMIGMLTDSPDGYASLIVYLLIYLFMNL +GAFACVILFGLRTGTDQIQDYSGLLARDPFLALCLSLCLLSLGGIPPLAG +FFGKMYLFLAAWDAGQYSLVWVGLITSVVSIYYYLSVVKIMLVPATQEMS +LAVREYPRRAWSLEPIQPLEVGIFVCVLGSILVGVAGNSMVNLMTITMSQ +APSLGV +>NC_000932@ArthCp068@ndhb@94941@97164@R@2@513 NADH_dehydrogenase_subunit_2 +MIWHVQNENFILDSTRIFMKAFHLLLFDGSFIFPECILIFGLILLLMIDS +TSDQKDIPWLYFISSTSFVMSITALLFRWREEPMISFSGNFQTNNFNEIF +QFLILLCSTLCIPLSVEYIECTEMAITEFLLFILTATLGGMFLCGANDLI +TIFVAPECFSLCSYLLSGYTKKDIRSNEATMKYLLMGGASSSILVHGFSW +LYGSSGGEIELQEIVNGLINTQMYNSPGISIALIFITVGIGFKLSLAPSH +QWTPDVYEGSPTPVVAFLSVTSKVAASASATRIFDIPFYFSSNEWHLLLE +ILAILSMIFGNLIAITQTSMKRMLAYSSIGQIGYVIIGIIVGDSNGGYAS +MITYMLFYIAMNLGTFACIILFGLRTGTDNIRDYAGLYTKDPFLALSLAL +CLLSLGGLPPLAGFFGKLHLFWCGWQAGLYFLVSIGLLTSVLSIYYYLKI +IKLLMTGRNQEITPHMRNYRISPLRSNNSIELSMIVCVIASTIPGISMNP +IIAIAQDTLFSF +>NC_000932@ArthCp086@ndhb@141485@143708@D@2@513 NADH_dehydrogenase_subunit_2 +MIWHVQNENFILDSTRIFMKAFHLLLFDGSFIFPECILIFGLILLLMIDS +TSDQKDIPWLYFISSTSFVMSITALLFRWREEPMISFSGNFQTNNFNEIF +QFLILLCSTLCIPLSVEYIECTEMAITEFLLFILTATLGGMFLCGANDLI +TIFVAPECFSLCSYLLSGYTKKDIRSNEATMKYLLMGGASSSILVHGFSW +LYGSSGGEIELQEIVNGLINTQMYNSPGISIALIFITVGIGFKLSLAPSH +QWTPDVYEGSPTPVVAFLSVTSKVAASASATRIFDIPFYFSSNEWHLLLE +ILAILSMIFGNLIAITQTSMKRMLAYSSIGQIGYVIIGIIVGDSNGGYAS +MITYMLFYIAMNLGTFACIILFGLRTGTDNIRDYAGLYTKDPFLALSLAL +CLLSLGGLPPLAGFFGKLHLFWCGWQAGLYFLVSIGLLTSVLSIYYYLKI +IKLLMTGRNQEITPHMRNYRISPLRSNNSIELSMIVCVIASTIPGISMNP +IIAIAQDTLFSF +>NC_001319@MapoCp003@ndhb@1514@3555@D@2@502 NADH_dehydrogenase_subunit_2 +MKLELDMFFLYGSTILPECILIFSLLIILIIDLTFPKKDTIWLYFISLTS +LLISIIILLFQYKTDPIISFLGSFQTDSFNRIFQSFIVFCSILCIPLSIE +YIKCAKMAIPEFLIFILTATVGGMFLCGANDLVTIFVSLECLSLCSYLLC +GYTKRDIRSNEAAIKYLLIGGTSSSILAYGFSWLYGLSGGETNIQKITNG +LLNAETYNSSGTFIAFICILVGLAFKLSLVPFHQWTPDIYEGSPTPVVAF +LSVTSKIAGLALATRILNILFSFSPNEWKIFLEILAILSMILGNLVAITQ +TSMKRMLAYSSISQIGYILIGLITGDLKGYTSMTIYVFFYIFMNLGTFAC +IILYSLRTGTDNIRDYAGLYIKDPLLSFSLTLCLLSLGGLPPLTGFFGKL +YLFWCGWQSGFYLLVFIALITSVISLYYYLKIIKLILTKKNNEINPYIQA +YIITSPTFFSKNPIEFVMIFCVLGSTFLGIIINPIFSFFQDSLSLSVFFI +K +>NC_001320@OrsajCp075@ndhb@85395@87639@R@2@511 NADH_dehydrogenase_subunit_2 +MIWHVQNENFILDSTRIFMKAFHLLLFQGSFIFPECILIFGLILLLMIDL +TSDQKDRPWFYFISSTSLVISITALLFRWREEPIISFSGNFQTNNFNEIF +QFLILLCSTLCIPLSVEYIECTEMAITEFLLFVLTATLGGMFLCGANDLI +TIFVAPECFSLCSYLLSGYTKRDLRSNEATMKYLLMGGASSSILVHGFSW +LYGSSGGEIELQEIVNGLINTQMYNSPGISIALISITVGLGFKLSPAPFH +QWTPDVYEGSPTPVVAFLSVTSKVAASASATRILDIPFYFSSNEWHLLLE +ILAILSMILGNLLAITQTSMKRMLAYSSIGQIGYVIIGIIVGDSNDGYAS +MITYMLFYISMNLGTFACIVLFGLRTGTDNIRDYAGLYTKDPFLALSLAL +CLLSLGGLPPLAGFFGKLYLFWCGWQAGLYFLVSIGLLTSVLSIYYYLKI +VKLLMTGRNQEITPYVRNYRRSPLRSNNSIELSMTVCVIASTIPGISMNP +ILAIAQDTLF +>NC_001320@OrsajCp104@ndhb@127479@129723@D@2@511 NADH_dehydrogenase_subunit_2 +MIWHVQNENFILDSTRIFMKAFHLLLFQGSFIFPECILIFGLILLLMIDL +TSDQKDRPWFYFISSTSLVISITALLFRWREEPIISFSGNFQTNNFNEIF +QFLILLCSTLCIPLSVEYIECTEMAITEFLLFVLTATLGGMFLCGANDLI +TIFVAPECFSLCSYLLSGYTKRDLRSNEATMKYLLMGGASSSILVHGFSW +LYGSSGGEIELQEIVNGLINTQMYNSPGISIALISITVGLGFKLSPAPFH +QWTPDVYEGSPTPVVAFLSVTSKVAASASATRILDIPFYFSSNEWHLLLE +ILAILSMILGNLLAITQTSMKRMLAYSSIGQIGYVIIGIIVGDSNDGYAS +MITYMLFYISMNLGTFACIVLFGLRTGTDNIRDYAGLYTKDPFLALSLAL +CLLSLGGLPPLAGFFGKLYLFWCGWQAGLYFLVSIGLLTSVLSIYYYLKI +VKLLMTGRNQEITPYVRNYRRSPLRSNNSIELSMTVCVIASTIPGISMNP +ILAIAQDTLF +>NC_001666@ZemaCp075@ndhb@89236@91472@R@2@511 NADH_dehydrogenase_subunit_2 +MIWHVQNENFILDSTRIFMKAFHLLLFHGSFIFPECILIFGLILLLMIDL +TSDQKDRPWFYFISSTSLVISITALLFRWREEPIISFSGNFQTNNFNEIF +QFLILLCSTLCIPLSVEYIECTEMAITEFLLFVLTATLGGMFLCGANDLI +TIFVAPECFSLCSYLLSGYTKRDLRSNEATMKYLLMGGASSSILVHGFSW +LYGSSGGEIELQEIVNGLINTQMYNSPGISIALIFITVGLGFKLSPAPFH +QWTPDVYEGSPTPVVAFLSVTSKVAASALATRILDIPFYFSSNEWHLLLE +ILAILSMILGNLLAITQTSMKRMLAYSSIGQIGYVIIGIIVGDSNDGYAS +MITYMLFYISMNLGTFACIVLFGLRTGTDNIRDYAGLYTKDPFLALSLAL +CLLSLGGLPPLAGFFGKLYLFWCGWQAGLYFLVSIGLLTSVLSIYYYLKI +IKLLMTGRNQEITPYVRNYRRSPLRSNNSIELSMTVCVIASTIPGISMNP +ILAIAQDTLF +>NC_001666@ZemaCp101@ndhb@131265@133501@D@2@511 NADH_dehydrogenase_subunit_2 +MIWHVQNENFILDSTRIFMKAFHLLLFHGSFIFPECILIFGLILLLMIDL +TSDQKDRPWFYFISSTSLVISITALLFRWREEPIISFSGNFQTNNFNEIF +QFLILLCSTLCIPLSVEYIECTEMAITEFLLFVLTATLGGMFLCGANDLI +TIFVAPECFSLCSYLLSGYTKRDLRSNEATMKYLLMGGASSSILVHGFSW +LYGSSGGEIELQEIVNGLINTQMYNSPGISIALIFITVGLGFKLSPAPFH +QWTPDVYEGSPTPVVAFLSVTSKVAASALATRILDIPFYFSSNEWHLLLE +ILAILSMILGNLLAITQTSMKRMLAYSSIGQIGYVIIGIIVGDSNDGYAS +MITYMLFYISMNLGTFACIVLFGLRTGTDNIRDYAGLYTKDPFLALSLAL +CLLSLGGLPPLAGFFGKLYLFWCGWQAGLYFLVSIGLLTSVLSIYYYLKI +IKLLMTGRNQEITPYVRNYRRSPLRSNNSIELSMTVCVIASTIPGISMNP +ILAIAQDTLF diff --git a/detectors/cds/test/test.db/ndhd.fst b/detectors/cds/test/test.db/ndhd.fst new file mode 100644 index 0000000..e4b1b3d --- /dev/null +++ b/detectors/cds/test/test.db/ndhd.fst @@ -0,0 +1,113 @@ +>AC_000188@LyesCp081@ndhd@116944@118446@R@1@501 NADH_dehydrogenase_subunit_4 +MNYFPWLTIIVVFPIFAGSLIFFLPHKGNRVIRWYTICICILELLLTTYA +FCYHFQSDDPLIQLVEDYKWIDFFDFHWRLGIDGLSIGPILLTGFITTLA +TLAAWPVTRDSRLFHFLMLAMYSGQIGLFSSRDLLLFFIMWELELIPVYL +LLAMWGGKKRLYSATKFILYTAGGSVFLLMGVLGVALYGSNEPTLNFETS +VNQSYPVVLEIIFYIGFFIAFAVKSPIIPLHTWLPDTHGEAHYSTCMLLA +GILLKMGAYGLIRINMELLPHAHSIFSPWLMIIGTIQIIYAASTSLGQRN +LKKRIAYSSVSHMGFIIIGISSLTDTGLNGALLQIISHGFIGAALFFLAG +TTYDRIRLVYLDEMGGIAIPMPKMFTMFSSFSMASLALPGMSGFVAELIV +FFGIITGQKYLLMPKLLITFVMAIGIILTPIYSLSMPRQMFYGYKLFNAP +KDSFFDSGPRELFLSISIFLPVIGIGIYPDFVLSLAVDKVEVILSNFFYR +>NC_000927@NeolCp113@ndhd@139299@140801@D@1@501 NADH_dehydrogenase_subunit_4 +MTHFPWLSTIVLFPLLASLAIPWLPDRKGTTVRWYALGVGLIDFSLIAYM +FGRYYDFEQTSLQFVEDITWIDRLHLHWSLGVDGLSMPLVLLTGFITTLA +TLAAWPVTKNPRLFYFLMLAMYTGQLGVFVVQDLLLFFLMWELELIPVYL +LVSCWGGKKRLYAATKFILYTALGSIFILLGALTMPFMGIQGVTFDMSTL +AYREYSLPVEILLYTGFLIAYGVKLPAIPVHTWLPDTHGEAHYSTCMLLA +GILLKMGGYALIRINMNMLPHAHALFSPWLIGVGVVNIIYAALTSFAQRN +LKRKIAYSSVSHMGFVLIGIGSLSEAGLNGAMLQMISHGLIGASLFFLAG +TTYDRTRTLVLEEMGGLATFMPKTFALFTACSLASLALPGMSGFFAELLV +FLGLVTSEAYSPTFRAIMTVFEAIGILLTPIYLLSMLRQLFYGRSIGRPK +ALIDAGPREVFVVSCLLVPILGIGIYPKLATAIYVNTTDHVVQHVLSALR +>NC_000932@ArthCp074@ndhd@115665@117167@R@1@501 NADH_dehydrogenase_subunit_4 +MNDFPWLTIIVVFPISAGSLMLFLPHRGNKVNKWYTICICILELLLTTYA +FCYNFKMDDPLIQLSEDYKWIDFFDFYWRMGIDGLSIGTILLTGFITTLA +TLAAFPVTRDSRFFHFLMLAMYSGQIGSFSSRDLLLFFIMWELELIPVYL +LLSMWGGKKRLYSATKFILYTAGSSIFLLIGVLGISLYGSNEPTLNLELL +ANKSYPVTLEILFYIGFLIAFAVKSPIIPLHTWLPDTHGEAHYSTCMLLA +GILLKMGAYGLVRINMELLPHAHSMFSPWLLVVGTIQIIYAASTSPGQRN +LKKRIAYSSVSHMGFIIIGISSITDPGLNGAILQIISHGFIGAALFFLAG +TSYDRIRLVYLDEMGGMAISIPKIFTMFTILSMASLALPGMSGFIAEFIV +FFGIITSQKYFLISKIFIIFVMAIGMILTPIYLLSMLRQMFYGYKLINIK +NFSFFDSGPRELFLSISILLPIIGIGIYPDFVLSLASDKVESILSNYFYG +>NC_001319@MapoCp079@ndhd@96665@98164@R@1@500 NADH_dehydrogenase_subunit_4 +MNHFPWLTIIVLFPISAGLVIPFLPSTGNKIIRWYTLGVCLLEFLLITYI +FCYHYQFNDHLIQLKEDYNWISFINFHWRLGIDGFSIGLILLTGFITTLA +TLAAWPVTRNPRLFYFLMLAMYSGQIGLFASQDILLFFFMWELELLPVYL +LLAMWGGKRRLYAATKFILYTAAGSLFILIGGLIMAFYNSNEFTFDFQFL +INKKYPLELEIIIYLSFLIAYAVKLPIIPFHTWLPDTHGEAHYSTCMLLA +GILLKMGAYGLIRINMELLPHAHSFFAPWLVIVGAIQIVYAALTSLSQRN +LKRRIAYSSVSHMGFVLIGIGSITNLGLNGAILQMISHGLIGASLFFLAG +ISYDRTRTLVLDQMGGIGNSMPKIFTLFTSCSMASLALPGMSGFIAELMI +FLGVIDNPNYSSLFKIIIIIIQGIGIILTPIYLLSMLRQMFYGYKFSNTL +EPYFMDAGPREIFILICLFFPIISIGIYPNFVLSIWNSKVNFLLSNNFF +>NC_001320@OrsajCp088@ndhd@106398@107900@R@1@501 NADH_dehydrogenase_subunit_4 +MSSFPWLTILVVLPIFAGSLIFFLPHRGNKIVRWYTMSICLLEFLLMTYA +FCYHFQLEDPLIQLKEDSKWIDVFNFHWRLGIDGLSLGSILLTGFMTTLA +TLAAWPVTRNSRLFYFLMLAMYSGQIGLFSSRDLLLFFIMWELELIPVYL +LLSMWGGKRRLYSATKFILYTAGGSIFFLIGVLGMGLYGSNEPRLDLERL +INQSYPATLEILFYFGFLIAYAVKLPIIPLHTWLPDTHGEAHYSTCMLLA +GILLKMGAYGLIRINMELLPHAHYLFSPWLVIIGAMQIIYAASTSLGQRN +FKKRIAYSSVSHMGFIIIGIGSITNIGLNGAILQILSHGFIGATLFFLAG +TACDRMRLVYLEELGGASIPMPKIFTMFSSFSMASLALPGMSGFVAELVV +FFGLITSPKFLLMPKMLITFVMAIGMILTPIYLLSMLRQMFYGYKLFHVP +NENFEDSGPRELFLLICIFLPVIGIGIYPDFVLSLSVDRVEALLSNYYPK +>NC_001666@ZemaCp086@ndhd@110138@111640@R@1@501 NADH_dehydrogenase_subunit_4 +MSYFPWLTILVVLPIFAGSLIFFLPHKGNKIVRWYTIAICLLEFLLMTYA +FCYHFQLEDPLIQLKEDSKWIDVFDFHWRLGIDPLSLGSILLTGFITTLA +TLAAWPVTRNSQLFYFLMLAMYSGQIGLFSSRDLLLFFIMWELELIPVYL +LLSMWGGKRRLYSATKFILYTAGGSIFFLIGVLGMGLYGSNEPGLDLERL +INQSYPTTWEILLYFGFLIAYAVKLPIIPLHTWLPYTHGEAHYSTCMLLA +GILLKMGAYGLIRVNMELLPHAHYLFSPWLVIIGAVQIIYAASTSLGQRN +FKKRIAYSSVSHMGFIIIGIGSITNIGLNGAILQILSHGFIGATLFFLAG +TACDRMRLVYLEELGGISIPMPKIFTMFSSFSMASLALPGMSGFVAELVV +FFGLITSPKFMLMPKMLITFVMAIGMILTPIYLLSMLRQMFYGYKLFHVP +NKNFVDSGPRELFLLICIFLPVIGIGIYPDLVLSLSVDRVEVLLSNYYTK +>NC_001879@NitaCp083@ndhd@117525@119027@R@1@501 NADH_dehydrogenase_subunit_4 +MNYFPWLTIIVVFPIFAGSLIFFLPHKGNRVIRWYTICICILELLLTTYA +FCYHFQSDDPLIQLVEDYKWINFFDFHWRLGIDGLSIGPILLTGFITTLA +TLAAWPVTRDSRLFHFLMLAMYSGQIGSFSSRDLLLFFIMWELELIPVYL +LLCMWGGKKRLYSATKFILYTAGGSVFLLMGVLGLALYGSNEPTLNFETS +VNQSYPVVLEIIFYIGFFIAFAVKSPIIPLHTWLPDTHGEAHYSTCMLLA +GILLKMGAYGLIRINMELLPHAHSIFSPWLMIIGTIQIIYAALTSLGQRN +LKKRIAYSSVSHMGFIIIGISSLTDTGLNGALLQIISHGFIGAALFFLAG +TTYDRIRLVYLDEMGGIAIPMPKMFTMFSSFSMASLALPGMSGFVAELIV +FFGIITGQKYLLIPKILITFVMAIGMILTPIYSLSMSRQMFYGYKLFNAP +KDSFFDSGPRELFLSISIFLPVIGIGIYPDFVLSLAVDKVEVILSNFFYR +>NC_002186@MeviCp085@ndhd@90277@91800@D@1@508 NADH_dehydrogenase_subunit_4 +MNNFPWITSIVMLPILAGLLIPFIPDENGKNVRWYALGIGLLDFLLISYI +FGYKYNIQDTSLQLIDDYEWISSINFHWRLGIDGLSIPLILLTGFITTLA +MLGAWPIQKNAKLFYFLMLAMYSGQLGVFASQDLLLFFLMWELELIPIYI +LLIIWGGKKRLYAATKFILYTALGSIFILIAAFGMAFYGENMSFDMQILG +EKEYPINLEILFYICFLIAYAVKLPAFPVHTWLPDTHGEAHYSTCMLLAG +ILLKMGGYALIRINMNMLPNAHIYFAPYLAIIGVINIIYAALTSFAQRNI +KRKIAYSSISHMGFVLIGISSFTDIGLSGAMLQMVSHGLIGASLFFLAGT +TYDRTRTLILEDMGGIAKYMPKIFAMFTTCSLASLALPGMSGFVAELMVF +LGFANSNAYSIEFRGIITFLEAIGIIVTPIYLLSMLRQVFYGSENLKLLK +VNNLIDASAREIFIISCLLVPVIGIGIYPRILTQIYDLKTNAIIEHLEII +RSNSQIM +>NC_002202@SpolCp085@ndhd@113491@114996@R@1@502 NADH_dehydrogenase_subunit_4 +MTNSFPWLTTIVVLPIFAGSLIFLFPHRGNKVIRWYTICISMIELLLMTY +VFFYHFQPDDPLIQLVEDYKWINFFDFHWRLGIDGLSIGPILLTGFITTL +ATLAAWPVTRNSQLFHFLMLAMYSAQIGLFSSRDLLLFFIMWELELIPVY +LLLSMWGGKKRLYSATKFILYTAGGSIFLLMGVLGVGLYGSNEPTLNLET +LVNQSYPVALEIIFYIGFFIAFAVKLPIIPLHTWLPDTHGEAHYSTCMLL +AGILLKMGAYGLVRINMELLPHAHSIFSPWLMIIGTMQIIYAASTSPGQR +NLKKRIAYSSVSHMGFIIIGISSITDTGLNGAILQIISHGFIGAALFFLA +GTSYDRIRLVYLDEMGGIAIPMPKIFTLFSSFSMASLALPGMSGFIAELI +VFFGLITSQKYLLIPKLLITFGMAIGMILTPIYLLSMSRQMFYGYKLFNI +SNSSFFDSGPRELFVSTSIFLPVIGIGVYPDLVLSLSVEKVEAILSNYFY +R +>NC_002693@OeelhCp097@ndhd@121124@122647@R@1@508 NADH_dehydrogenase_subunit_4 +MNSFPWLTIIVVFPILTGSLIFLLPHRGNKVMKWYTLCICILELLLTTYT +FCYHFQLDDPLTQLTENYKWIHFFDFYWRLGIDGLSIGPILLTGFITTLA +TLAAWPVTRDAQLFHFLMLAMYSGQIGSFSSRDLLLFFLMWEFELIPVYL +LLSMWGGKKRLYSATKFILYTAGGSIFLLIGVLGIGLYGSNEPTLNFETL +ANQSYPVALEVIFYVGFLIAFAVKLPIIPFHTWLPDTHGEAHYSTCMLLA +GILLKMGAYGLVRINMELLPHAHCLFSPGLIIVGAIQIIYAASTSPGQLN +LKKRIAYSSISHMGFIIIGIGSLSDTGLNGAILQIISHGFIGAALFFLAG +TSYDRIRLLYLDEMGGMAIPLPKLFTMLSILSMASLALPGLSGFVAELLV +FFGIITSQKYLLMPKILIAFLMAIGMILTPIYSLSMLRQMFYGYKLFNVP +NYYFFDSGPRELFVSISLLLPIIGIGIYPDFVLSLSVEKVEAIISHFFFS +IVFKKKE diff --git a/detectors/cds/test/test.db/ndhe.fst b/detectors/cds/test/test.db/ndhe.fst new file mode 100644 index 0000000..4c87a47 --- /dev/null +++ b/detectors/cds/test/test.db/ndhe.fst @@ -0,0 +1,39 @@ +>AC_000188@LyesCp080@ndhe@119061@119366@R@1@102 NADH_dehydrogenase_subunit_4 +MILEHVLVLSAYLFSIGIYGLITSRNMVRALMCLELILNAVNINFVTFSD +FFDNRQLKGDIFSIFVIAIAAAEAAIGLAIVSSIYRNRKSTRINQSNLLN +N +>NC_000927@NeolCp119@ndhe@147326@147631@D@1@102 NADH_dehydrogenase_subunit_4L +MIFQSYLLIAASMFCIGLYGLLTSRNVVRVLMSLELLLNAVNLNLLTFSN +FVDSHEMKGQVLALFVIALAAAEAAIGLAIILSIYRNQRTVDPEQFNLLK +W +>NC_000932@ArthCp076@ndhe@117804@118109@R@1@102 NADH_dehydrogenase_subunit_4L +MILEHVLVLSAYLFLIGLYGLITSRNMVRALMCLELILNAVNMNFVTFSD +FFDNSQLKGEIFCIFVIAIAAAEAAIGLAIVSSIYRNRKSIRINQSTLLN +K +>NC_001319@MapoCp081@ndhe@98757@99059@R@1@101 NADH_dehydrogenase_subunit_4L +MLEHILTLSAFLFCIGVFGLITSRNMVRALMCLELIFNAVNINLVAFSNF +LDSSQIKGEIFSIFIIAIAAAEATIGLAIVLAIYRNRKSTRIDQFNLLKW +>NC_001320@OrsajCp090@ndhe@108712@109017@R@1@102 NADH_dehydrogenase_subunit_4L +MMFEHVLFLSVYLFSIGIYGLITSRNMVRALICLELILNSINLNLVTFSD +LFDSRQLKGDIFAIFVIALAAAEAAIGLSILSSIHRNRKSTRINQSNFLN +N +>NC_001666@ZemaCp088@ndhe@112473@112778@R@1@102 NADH_dehydrogenase_subunit_4L +MMFERVLFLSVYLFSIGIYGLITSRNMVRALICLELILNSINLNLVTFSD +LFDSRQLKGDIFAIFVIALAAAEAAIGLSILSSIHRNRKSTRINQSNFLN +N +>NC_001879@NitaCp085@ndhe@119652@119957@R@1@102 NADH_dehydrogenase_subunit_4L +MILEHVLVLSAYLFSIGIYGLITSRNMVRALMCLELILNAVNINFVTFSD +FFDNRQLKGDIFSIFVIAIAAAEAAIGLAIVSSIYRNRKSTRINQSNLLN +N +>NC_002186@MeviCp099@ndhe@104916@105221@R@1@102 NADH_dehydrogenase_subunit_4L +MYIENFLLLASALFCIGIYGLLTSRNIVRVLMCLELCLNAININFIAFSN +FIDYEKINGQVIAIFIMTIAAAEAAIGLALVLTIYRNRETVDIENFDLLK +G +>NC_002202@SpolCp087@ndhe@115663@115968@R@1@102 NADH_dehydrogenase_subunit_4L +MILEHVLVLSAFLFSIGIYGLVTSRNLVRALMCLELILNAVNLNFVTFSD +FFDSRQLKGNIFSIFVIAIAAAEAAIGPAIVSSIYRNRKSIRINQSNLLN +K +>NC_002693@OeelhCp099@ndhe@123298@123603@R@1@102 NADH_dehydrogenase_subunit_4L +MILEHVLVLSAYLFSIGIYGLITSRNMVRALMCLELILNSVNLNFVTFSD +FFDSRQLKGDIFSIFIIAIAAAEAAIGLAIVSSIYRNRKSIRINQSNLLN +K diff --git a/detectors/cds/test/test.db/ndhf.fst b/detectors/cds/test/test.db/ndhf.fst new file mode 100644 index 0000000..9c5b2df --- /dev/null +++ b/detectors/cds/test/test.db/ndhf.fst @@ -0,0 +1,157 @@ +>AC_000188@LyesCp073@ndhf@111508@113721@R@1@738 NADH_dehydrogenase_subunit_5 +MEQTYEYAWIIPFIPLPVPMLIGAGLILFPTATKRFRRMWAFQSVLLLSI +VMIFSIYLSIQQINSSSVYQYVWSWIINNDFSLDFGYLIDPLTSIMSILI +TTVGIMVLIYSDNYMAHDQGYLRFFAYMSFFSTSMLGLVTSSNLIQIYIF +WELVGLCSYLLIGFWFTRPVAANACQKAFVTNRVGDFGLLLGILGFYWIT +GSFEFRDLFEIFNNLIYNNELNFLFVTLCAVLLFAGAVAKSAQFPLHVWL +PDAMEGPTPISALIHAATMVAAGIFLVARLLPLFRVIPYIMYLISVIGII +TVLLGATLALAQKDIKRGLAYSTMSQLGYMMLALGMGSYRSALFHLITHA +YSKALLFLGSGSIIHSMETIVGYSPAKSQNMGLMGGLRKHVPITKITFLL +GTLSLCGIPPLACFWSKDEILNDSWLYSPIFAIIAWATAGLTAFYMFRIY +LLTFEGHLNAHFQNYGGKQKIPFYSISLWGKNGVKKNSCLLTMNNNESTY +FLSKTKYPIAKNGRKMTRPFMTIAHFKHKAVSSYPYESDNTMLFPIFVLG +LFTLFVGAIGIPFNQEGVNLDILSKWLAPSINLLHPKSNNSLDWNEFLKD +AVVSVSIAYFGIFIASFLYKPIYSSLKNLEFINSFVKKGPKRILWDKILN +GIYDWSYNRAYIDAFYTRFFVGGIRGLAEFTHFVDRRVIDGMTNGVGVIS +FIVGEGIKYIGGGRISSYLFLYLAYVSVFLLVYYLLF +>NC_000927@NeolCp114@ndhf@140837@142783@R@1@649 NADH_dehydrogenase_subunit_5 +MEIFHQAIWLIPVLPLSASMLSGIGLLTFRETTSDLRRLHGALAIGAMAL +SFVVSLGVLWNQLHGIAPVRWIIEWMLTDTFRLEIGYWVDPLTSTMLVVV +TSVALLVMIYSDEYMHVDEGYVRFFVYLSIFTTSMLGLVLSPNLVQVYGF +WELVGMCSYLLVGFWFTRPTAAEASQKAFITNRVGDFGLLLGILALYWMT +GSFEFASIADRLGDLLIAIPSLRTIACIACILVFMGPIAKSAQFPLHVWL +PDAMEGPTPISALIHAATMVAAGVFLVARMFPVFDQLPLVMELIAWTGTL +TAFLGATMALTQSDIKKGLAYSTMSQLGYMIMALGTGAYSEALFHLTTHA +YSKALLFLAAGSVIHGMEPVVGFSPMQNQNMHRMGGLRKYMPLTAMTFLL +GTCSICGIPPLACFWSKDAILAEVFATHPTCWLIAWLTAGMTGFYMFRIY +FLTFEGSFRSDLGRAKPKESHLGMVAPLIILAIPTVAIGSLGTPFAPVWE +TFVHAPGQLSSLDEEFDLAEFLEMAGSSVGIGLLGISLSSLMYRNYAIDA +TRISEYFSPLNRLFASKWYIDDLYAQVIVQGTRTIAQTLLIFDQRIIDGA +VNLTAFGTLSAADTLKYWENGRVQFYILSIIFGVLFGSWLLTTHLSSL +>NC_000932@ArthCp071@ndhf@110398@112638@R@1@747 NADH_dehydrogenase_subunit_5 +MEHTYQYSWIIPFIPLPVPILLGVGLLLFPTATKNLRRMWTFLSIFLLSI +VMIFSIYLSIQQIFLSCIHQNVWSWTINNEFSFEFGYFIDPLTSIMSILI +TTVGILVLIYSDNYMSHDQGYLRFFAYMGFFNTSMLGLVTSSNLIQVYFF +WELVGMCSYLLIGFWFTRPIAANACQKAFVTNRVGDFGLLLGILGLYWIT +GSFEFQDLFEIFNNLILNNRVNLLFLTLCAFLLFVGPIAKSAQFPLHVWL +PDAMEGPTPISALIHAATMVAAGIFLVARLLPLFIVIPSIMYIISLIGII +TVLLGATLALAQKDIKRGLAYSTMSQLGYMMLALGMGSYRSALFHLITHA +YSKALLFLGSGSIIHSMEAIVGYSPDKSQNMILMGGLTKHVPITKTAFLI +GTLSLCGIPPLACFWSKDEILNDSLLFSPIFAIIACSTAGLTAFYMFRIY +LLTFEGHLNTYFLNYSGKKSGSFYSLSLWGKEEEKKLNKNFGLVPLLTMN +NTKRASFFCNKTYKISNNVRNQIFITVENFGLNTRTFYYPHESDNTILFP +MLILVLFTLFIGAIGIPFNQEGIDFDILSKFFTPSINLLHKNSQNFVDWY +EFLRNATFSVSIAFFGIFIAYCLYKPFYSSLLNLTLLNSFQKWNSKRIHW +EKLINFVYNWSYNRGYIDSFFKTSLIESIRRLAKQTTFFDKRIIDGITNG +VGITSFFVGEVTKYIGGSRISSYLFLYLSYVLIFLMILFFFYFEKF +>NC_001319@MapoCp074@ndhf@91101@93179@R@1@693 NADH_dehydrogenase_subunit_5 +MELIFQNVWFVPLFPFLASILLGIGLFFFPNSIKKFRRLSSFISIMFLNI +AMLLSFHFFWQQITGSPIHRYLWSWVLYKNFVLEIGYLLDPLTSIMLVLV +TTVAVMVMIYSDSYMFYDEGYIKFFCYLSLFTASMLGLVLSPNLIQVYIF +WELVGMCSYLLIGFWFTRPSAANACQKAFVTNRIGDFGLLLGILGFYWIT +GSFDFQQLSKRFFELLSYNQINLVFATLCALFLFLGPVAKSAQFPLHIWL +PDAMEGPTPISALIHAATMVAAGIFLVARMFPLFQMLPFVMSIISWTGAI +TALLGATIALAQKDLKKGLAYSTMSQLGYMMLALGIGSYKAGLFHLITHA +YSKALLFLGSGSVIHSMEPIVGYHPNKSQNMIFMGGLRQYMPITAITFLF +GTLSLCGIPPFACFWSKDEILVNSWLHFPILGSIAFFTAGLTAFYMFRIY +FLTFEGDFRGHFFDDVKKLSSISIWGSLEFNKEQFKLDKKSTLYPKEANN +IMLFPLIILTIPTVFIGFIGILFDENKMNVDSLSYWLTLSINSFNYSNSE +KFLEFLFNAIPSVSIAFFGILIAFYLYGPNFSFLKKEKKKLQLKSEIDIV +LKSFSNFIYNWSYYRAYIDGFYSSFFIKGLRFLIKIVSFIDRWIIDGIIN +GIGIFSFFGGESLKYIEGGRISSYLFFIIFCMFLFFLYSYII +>NC_001320@OrsajCp085@ndhf@101433@103637@R@1@735 NADH_dehydrogenase_subunit_5 +MEHTYQYAWVIPLLPLPVIMSMGFGLFLVPTATKNLRRIWAFPSVLLLSI +AMVFSVHLSIQQINGSSIYQYLWSWTVNNDFSLEFGYLIDPLTSIMLILI +TTVGILVLIYSDDYMSHDEGYLRFFVYISFFNTSMLGLVTSSNLIQIYFF +WELVGMCSYLLIGFWFTRPIAASACQKAFVTNRVGDFGLLLGILGFFWIT +GSLEFRDLFKIANNWIPNNEINSLLTILCAFLLFLGAVAKSAQFPLHVWL +PDAMEGPTPISALIHATTMVAAGIFLIARLLPLFISLPLIMSFISLIGTL +TLFLGATLALAQRDIKRSLAYSTMSQLGYMMLALGIGSYQAALFHLITHA +YSKALLFLGSGSVIHSMEPLVGYSPDKSQNMVLMGGLRKYIPITRTCFLW +GTLSLCGIPPLACFWSKDEILSNSWLYSPFFGIIASFTAGLTAFYMFRIY +LLTFDGYLRVHFQNYSSTKEDSLYSISLWGKRISKGVNRDFVLSTAKSGV +SFFSQNLSKIHVNTGNRIGSFSTSLGTKNTFVYPHEPGNTMLFPLLILLL +CTLFIGSIGIHFDNEIGELTILSKWLTPSINFFQESSNSSINSYEFITNA +ISSVSLAIFGLFIGIYVLWICLLFFSEFDLINSFVKGGPKKYFFHQLKKK +IYSWSYNRGYIDIFYTRTFTLGIRGLTELTQFFDKGVIDGITNGVGLASF +CIGEEIKYVGGGRISSYLFFFLCYVSVFLFFFLS +>NC_001666@ZemaCp083@ndhf@105072@107288@R@1@739 NADH_dehydrogenase_subunit_5 +MEHTYQYAWVIPLLPLPVIMSMGFGLFLIPTATKNLRRIWAFPSILLLSI +AMVFSLHLSIQQINGSSIYQYLWSWTINNDFSLEFGYLVDPLTSIMLILI +TTVGILVLIYSDDYMSHDEGYLRFFVYISFFNTSMLGLVTSSNLIQIYFF +WELVGMCSYLLIGFWFTRPIAASACQKAFVTNRVGDFGLLLGILGFFWIT +GSLEFRDLFKIANNWIPNNGINSLLTTLCAFLLFLGAVAKSAQFPLHVWL +PDAMEGPTPISALIHAATMVAAGIFLLARLLPLFISLPWIMSFISLIGTI +TLFLGATLALAQRDIKRSLAYSTMSQLGYMMLALGIGSYQAALFHLITHA +YSKALLFLGSGSVIHSMEPLVGYSPDKSQNMVLMGGLRKYVPITRTTFLC +GTLSLCGIPPLACFWSKDEILSNSWLYSPFFGIIASFTAGLTAFYMFRIY +LLTFDGYLRVHFQNYSSTKEGSLYSISLWGKSISKGVNRDFVLSTMKSGV +SFFSQNIPKIPANTRNKIGSFSTPFGAKNTFVYPHETGNTMLFPLLILLL +FTLFIGSIGIHFDNGVKDNRILELTILSKWLTPSINLFQENSNSSINSYE +FLTNAISSVSLAIFGLFIAYIFYGSAYSFFQNLNFQNSLVKKNPKKSFLD +EVKKKIYSWSYNRGYIDFFYTRVFILGIRKLAELTHFFDKGVIDGITNGV +GLAGFCIGEEIKYVGGGRISSYLFFFLCYVSLFLFFIP +>NC_001879@NitaCp080@ndhf@112072@114294@R@1@741 NADH_dehydrogenase_subunit_5 +MEQTYEYAWIIPFIPLPVPMLIGAGLFLFPTATKSFRRMWAFQSVLLLSI +VMVFSIYLSIQQINSSSFYQYVWSWIINNDFSLDFGYLIDPLTSIMSILI +TTVGIMVLIYSDNYMAHDQGYLRFFAYMSFFSTSMLGLVTSSNLIQIYIF +WELVGLCSYLLIGFWFTRPVAANACQKAFVTNRVGDFGLLLGILGFYWIT +GSFEFRDLFEIFNNLIYNNEVDFLFVTLCAVLLFAGAVAKSAQFPLHVWL +PDAMEGPTPISALIHAATMVAAGIFLVARLLPLFRVIPYIMYLISVIGII +TVLLGATLALAQKDIKRGLAYSTMSQLGYMMLALGMGSYRSALFHLITHA +YSKALLFLGSGSIIHSMETIVGYSPAKSQNMGLMGGLRKHVPISKITFLL +GTLSLCGIPPLACFWSKDEILNDSWLYSPIFAIIAWATAGLTAFYMFRIY +LLTFEGHLNAHFPNYGGKQKTPFYSISLWGKNGVKKNSCLLTMNNNESTY +FFAKTKYPIDKNGRKMTRPFMTIAHFEHKAVYSYPYESDNTMLFPIFVLG +LFTLFVGSIGIPFNQEGGNLDILSKWLAPSINLLHQKSNNSMDWNEFLKD +AVLSVSIAYFGIFIASFLYKPIYSSLKNFELINSFVKKGPKRILWDKIIN +GIYDWSYNRAYIDAFYTRFLVGGIRGLAEFTHFFDRRVIDGMTNGVGVIS +FIVGEGIKYIGGGRISSYLFLYLAYVSIFLLVYYLLFSTL +>NC_002186@MeviCp105@ndhf@109803@111761@D@1@653 NADH_dehydrogenase_subunit_5 +MESISQYAWLIPIFPLAGSLLIGIGLISFRRATNILRWRYSFLIIALLGI +SLILSCLILFSQINATPSYQWIFQWIVTNNFLLEIGYFVDPLTAVMLVIV +TTVAILVLIYTDGYMSYDEGYVRFFAYLSLFTTSMLGLVLSPNLLQIYVF +WELVGMCSYLLIGFWFTRPAAADACQKAFVTNRVGDFGLLLGILGFYWMT +GSFEFDVISMKLLQLAEYDNFNTQLAIFFGFLIFLGPVAKSAQFPLHVWL +PDAMEGPTPISALIHAATMVAAGVFLVARMFPIFSQFPFLMDLIAWTGAI +TAIIGATIAVTQVDLKKGLAYSTMSQLGYMIMAMGMGSYTASLFHLMTHA +YSKALLFLSAGSTIHGMEPIVGFNPAKNQNMSLMGGIRKYMPITGNAFLI +GTLSLCGIPPLACFWSKDAILSNAFVHSPLLWFIGWSTAGLTSFYMFRMY +FLVFEGEFRGNSVNQEKIRSNKLPKESNTKMTLPLIILTLFSITIGWIGT +PFNNQFMFLIHTINQEIEPFDINEFLFIAGSSVGIALLGCYTAYLIYIKD +KNTDKFANLLQPFYQLSFNKWYIDDIYEYIFVKGNRQLAQQTLLFDKKII +DGFVNLTGLITLVSSESLRSIENGKIQSYILMIIFTLLTILGISQTYYSL +IL +>NC_002202@SpolCp080@ndhf@107770@109998@R@1@743 NADH_dehydrogenase_subunit_5 +MEHIYQYAWIIPFLPLPVPLLIGAGLLFFPTATKNLRRIWAFSSISLLSI +VMIFSMKLAIQQINSNSIYQYLWSWTINNDFSLEFGYLMDPLTSIMSMLI +TTVAILVLIYSDNYMSHDQGYLRFFAYMSFFNTSMLGLVTSSNLIQIYIF +WELVGMCSYLLIGFWFTRPIAANACQKAFVTNRVGDFGLLLGILGLYWIT +GSFEFRDLFEIFNNLIKNNEVNSLFCILCAFLLFAGAVAKSAQFPLHVWL +PDAMEGPTPISALIHAATMVAAGIFLVARLLPLFVVIPYIMYVISFIGII +TVLLGATLALAQKDIKRSLAYYTMSQLGYMMLALGMGSYRTALFHLITHA +YSKALLFLASGSLIHSMGTIVGYSPDKSQNMVLMGGLTKHVPITKTSFLI +GTLSLCGIPPLACFWSKDEILNDSWVYSPIFAIIAYFTAGLTAFYMFRIY +LLTFEGHLNFFCKNYSGKKSSSFYSISLWGKKELKTINQKISLLNLLTMN +NKERASFFSKKPYEINVKLTKLLRSFITITYFENKNISLYPYESDNTMLF +PLIILIMFTLFVGFIGIPFNQEGMDLDILTKWLTPSINLLHSNSENFVDW +YEFVINAIFSISIAFFGIFIAFFFYKPIYSSLKNFDLINSFDKRGQKRIL +GDNIITIIYNWSANRGYIDAFYSTFLIKGIRSLSELVSFFDRRIIDGIPN +GFGVTSFFVGEGIKYVGGGRISSYLFWYLLYVSIFLFIFTFT +>NC_002693@OeelhCp094@ndhf@115809@118142@R@1@778 NADH_dehydrogenase_subunit_5 +MEYTYQYSWIIPFIPLPVPILIGMGLLLFPTATKNHRRVWSFPSILLLSM +VMLLSVYLSIQQINRSFIYQYVWSWTINNDFSLEFGHLIDPLASIMLILI +TTVGILVLFYSDNYMSHDQGYLRFFAYLSFFNTSMLGLVTSSNLIQIYIF +WELVGMCSYLLIGFWFTRPIAATACQKAFVTNRVGDFGLLLGILGLYWIT +GSFEFRDLFEIVNNLIDNNNQVHFLFVTLCSFLLFAGAVAKSAQFPLHVW +LPDAMEGPTPISALIHAATMVAAGIFLVARLLPLFVITPYIMNLISLIGI +ITVLLGATLALAQKDIKRSLAYSTMSQLGYMMLALGMGSYRAALFHLITH +AYSKALLFLGSGSIIHSMESIVGYSPDKSQNMVLMGGLKKHVPITKTAFL +VGTLSLCGIPPLACFWSKDEILNDSWLYSPIFAIIACSTAGFTAFYMFRV +YLLTFDGHLNVHFQNYSGQKSSSVYSISLWGKQVPKRIQNPFCLLNLLTM +NNNESTSFFWNNKCKLDGNVKKRIRPFITVTHFPNRKTFSYPHESDNTML +FSLFVLVLFTLFVAAIGIPFNQEGSDCDILSKLLNPSINLLHQNSNNFTD +WYEFVTNASFSVSIALLGIFIATFLYKPIYSSLQNFNLLNSFYKRSANRV +MWDKIQNWIYDWSYNRGYIDSFYTISLTGGIRGLAELSHFFDRRVIDGIL +NGFGLTSFFLGESLKYFGGGRISSYLLLYSIFIFIFLLMDSFFTNLPFFV +LCQFLDSSFSMSISGFLLYENFLYENF diff --git a/detectors/cds/test/test.db/ndhg.fst b/detectors/cds/test/test.db/ndhg.fst new file mode 100644 index 0000000..40dc735 --- /dev/null +++ b/detectors/cds/test/test.db/ndhg.fst @@ -0,0 +1,50 @@ +>AC_000188@LyesCp077@ndhg@119590@120120@R@1@177 NADH_degydrogenase_subunit_6 +MDLSEPIHDFLLVFLGSGLILGGLGVVLLPNPIYSAFSLGLVLVCTSLFY +ILSNAYFVAAAQLLIYVGAINVLIIFAVMFMNGSEYYKDFHLWTVGDGIT +SMVCISLFISLITTISDTSWYGIIWTTRSNQIIEQDFLSNSQQIGIHLST +DFFLPFELISIILLVALIGAIAVARQ +>NC_000927@NeolCp118@ndhg@146302@146811@D@1@170 NADH_dehydrogenase_subunit_6 +MEIVQNFSSAALTTGILLGCLGVIFLPSIVYAAFLLGAVFFCLAGIYVLL +HADFVAAAQVLVYVGAINVLILFAIMLVNPQDAPPRALDSPPLIPGIACI +GLLGVLVQMISTTSWLTPPWTPEPNSLPVLGGHLFSDCLLAFEVMSLVLL +VALVGAIVLARREPVERSS +>NC_000932@ArthCp077@ndhg@118377@118907@R@1@177 NADH_dehydrogenase_subunit_6 +MDLPGPIHDFLLVFLGSGLLVGGLGVVLLPNPIFSAFSLGFVLVCISLLY +ILSNSHFVAAAQLLIYVGAINVLIIFAVMFMNDSEYSTDFNLWTIGNGIT +SLVCTTILFLLMSTILDTSWYGVIWTTKLNQILEQDLISNSQQIGIHLST +DFFLPFELISIILLVALIGAISVARQ +>NC_001319@MapoCp082@ndhg@99113@99688@R@1@192 NADH_dehydrogenase_subunit_6 +MKLPESFYETIFLFLESGLILGSLGVILLTNIVYSALFLGFVFVCISLLY +LLLNADFVAAAQILIYVGAVNVLIIFAVMLINKKQYSNFFVYWTIGDGIT +LTLCTSIFLLLNNFISNTSWSKIFLMTKPNLVVKDIILINTVRHIGSELL +TEFLLPFELMSIILLVALIGAITLARREKKIELEKNDFFNF +>NC_001320@OrsajCp091@ndhg@109227@109757@R@1@177 NADH_dehydrogenase_subunit_6 +MDLPGPIHEILVLFGGFVLLLGGLGVVLLTNPTFSAFSLGLVLVCISLFY +ILLNSYFVAVAQLLIYVGAINVLIIFAVMFVNGSEWSKDKNFWTIGDGFT +SLVCITIPFSLMTTIPDTSWYGILWTTRSNQIVEQGLINNVQQIGIHLAT +DFYLPFELISIILLVSLIGAITMARQ +>NC_001666@ZemaCp089@ndhg@112993@113523@R@1@177 NADH_dehydrogenase_subunit_6 +MDLPGPIHEILVLFGGFGLLLGGLGVVLLTNPIYSAFSLGLVLVCISLFY +FLLNSYFVAVAQLLIYVGAINVLIIFAVMFVNGSEWSKDKNYWTIGDGFT +LLLCITIPFSLMTTIPDTSWYGILWTTRSNQIVEQGLINNVQQIGIHLAT +DFYLPFELISLILLVSLIGAITMARQ +>NC_001879@NitaCp086@ndhg@120181@120711@R@1@177 NADH_dehydrogenase_subunit_6 +MDLSEPIHDFLLVFLGSGLILGGLGVVLLPNPIYSAFSLGLVLVCTSLFY +ILSNSYFVAAAQLLIYVGAINVLIIFAVMFMNGSEYYKDFHLWTVGDGIT +SMVCISLFISLITTISDTSWYGIIWTTRSNQIIEQDFISNSQQIGIHLST +DFFLPFELISIILLVALIGAIAVARQ +>NC_002186@MeviCp100@ndhg@105252@105821@R@1@190 NADH_dehydrogenase_subunit_6 +MSFSEQIQNLSLLLLEIGTIIGALGVVLLPNILYSGFLLGGVLICIAGIY +LLLNAEFIAAAQVLIYVGAINVIILFAIMLVNKIENLNPSNNQMMRNGLS +SFICFSFFILLSNMIFDTQWIDTVGVSTKYSISIIGNHIFSDFLLPFEIV +SVLLLVTLVGAVFIARKEDASEIEISKISFLNLPDPSKK +>NC_002202@SpolCp088@ndhg@116164@116694@R@1@177 NADH_dehydrogenase_subunit_6 +MDLPGPIHDFLLVFLGSGLILGALGVVLFTNPIFSAFSLGLVLVCISLFY +ILANSHFVASAQLLIYVGAINVLIIFSVMFMSGPEYDKKFQLWTVGDGVT +SLVCISLFVSLISTILNTSWYGIIWTTKSNQILEQDLINASQQIGIHLST +DFFLPFELISIILLVSLIGAIAVARQ +>NC_002693@OeelhCp100@ndhg@123796@124326@R@1@177 NADH_dehydrogenase_subunit_6 +MDLPGPIHDFLLVFLGSGLIVGGLGVVLLTNPIFSAFSLGLVLVCISLFF +SLSNSYFVAAAQLLIYVGAINVLILFAVMFMNGSEYSKDLTLWTVGDGIT +SLVCTSIFISLITTILDTSWYGIIWTTKSNQIIEQDLIGNSQQIGIHLST +DFFLPFELISIILLVSLIGAIAVARQ diff --git a/detectors/cds/test/test.db/ndhh.fst b/detectors/cds/test/test.db/ndhh.fst new file mode 100644 index 0000000..c195a4a --- /dev/null +++ b/detectors/cds/test/test.db/ndhh.fst @@ -0,0 +1,90 @@ +>AC_000188@LyesCp078@ndhh@123339@124520@R@1@394 NADH_dehydrogenase_subunit_7 +MTAPTTRKDLMIVNMGPQHPSMHGVLRLIVTLDGEDVVDCEPILGYLHRG +MEKIAENRTIIQYLPYVTRWDYLATMFTEAITINGPEQLGNIQVPKRASY +IRVIMLELSRIASHLLWLGPFMADIGAQTPFFYIFRERELIYDLFEAATG +MRMMHNYFRIGGVAADLPYGWIDKCLDFCDYFLTGVAEYQKLITRNPIFL +ERVEGVGIIGRDEALNWGLSGPMLRASGIEWDLRKVDHYESYDEFDWQVQ +WQREGDSLARYLVRIGEMTESIKIIQQALEGIPGGPYENLEMRRFDRLKD +PEWNDFEYRFISKKPSPTFELSKQELYVRVEAPKGELGIFLIGDQSVFPW +RWKIRPPGFINLQILPQLVKRMKLADIMTILGSIDIIMGEVDR +>NC_000927@NeolCp115@ndhh@143205@144380@D@1@392 NADH_dehydrogenase_subunit_7 +MIEAKTDPMIVSMGPHHPSMHGVLRLIVTLDGENVLDCEPVVGYLHRGME +KIAENRTIVQYLPYVTRWDYLATMFTEAITVNAPERLANIEVPRRASYLR +VIMLELSRIASHLLWLGPFMADLGAQTPFFYILREREMIYDLFEAATGMR +MMHNYFRVGGVAADVPYGWIDKCLDFCEYFLPKVDEYEALITRNPIFLKR +VKGVGTISPQQAINWGLSGPMLRASGVSWDLRKVDRYECYEDFHWSVESE +ETGDCLARYLVRIREMRTSTKIVQQALKSIPGGPTENLEARQLSQGRTSP +WNEFDYQFLGKKASPTFKMPRQEHYVRVEAPKGELGVFLIGDDHVFPWRW +KIRPPGFINVQILPNLVQGMKLADIMTILGSIDIIMGEVDR +>NC_000932@ArthCp080@ndhh@122011@123192@R@1@394 NADH_dehydrogenase_subunit_7 +MKRPVTGKDLMIVNMGPHHPSMHGVLRLIVTLDGEDVVDCEPILGYLHRG +MEKIAENRAIIQYLPYVTRWDYLATMFTEAITVNGPEQLGNIQVPKRASY +IRVIMLELSRIASHLLWLGPFMADIGAQTPFFYIFREREFVYDLFEAATG +MRMMHNFFRIGGIAADLPYGWIDKCLDFCDYFLTEVVEYQKLITRNPIFL +ERVEGVGIIGGEEAINWGLSGPMLRASGIPWDLRKIDRYESYDEFEWEIQ +WQKQGDSLARYLVRLSEMTESIKIIQQALEGLPGGPYENLESRGFDRKRN +PEWNDFEYRFISKKPSPTFELSKQELYVRVEAPKGELGIFLIGDQSGFPW +RWKIRPPGFINLQILPELVKRMKLADIMTILGSIDIIMGEVDR +>NC_001319@MapoCp085@ndhh@102202@103380@R@1@393 NADH_dehydrogenase_subunit_7 +MMILTKNKPMIVSMGPHHPSMHGVLRLIVTLDGEDVLDCEPVLGYLHRGM +EKIAENRTIVQYLPYVTRWDYLATMFTEAITVNAPEKLTNIQVPKRASYI +RIIMLELSRIASHLLWLGPFMADIGAQTPFFYIFREREMIYDLFESATGM +RMMHNYFRIGGVAVDLPYGWIDKCLDFCDYFLPKINEYERLITNNPIFLK +RVEGIGTVTREEAINWGLSGPMLRASGVQWDLRKVDHYECYDELDWKIQW +QKEGDSLARYLVRIGEMKESVKIIQQALKAIPGGPFENLEARRLNQGKNS +EWNLFEYQFISKKPSPTFKLPKQEHYVRVEAPKGELGIFLIGDDSVFPWR +LKIRSPGFINLQILPQLVKGMKLADIMTILGSIDIIMGEVDR +>NC_001320@OrsajCp094@ndhh@112708@113889@R@1@394 NADH_dehydrogenase_subunit_7 +MSLPLTRKDLMIVNMGPQHPSMHGVLRLIVTLDGEDVIDCEPILGYLHRG +MEKIAENRTIIQYLPYVTRWDYLATMFTEAITVNAPEFLENIQIPQRASY +IRVIMLELSRIASHLLWLGPFMADLGAQTPFFYIFRERELIYDLFEAATG +MRMMHNYFRIGGVAADLPYGWIDKCLDFCDYFLRGVIEYQQLITQNPIFL +ERVEGVGFISGEEAVNWGLSGPMLRASGIQWDLRKVDLYESYNQFDWKVQ +WQKEGDSLARYLVRIGEMRESIKIIQQAVEKIPGGPYENLEVRRFKKAKN +SEWNDFEYRFLGKKPSPNFELSKQELYARVEAPKGELGIYLVGDDSLFPW +RWKIRPPGFINLQILPQLVKKMKLADIMTILGSIDIIMGEVDR +>NC_001666@ZemaCp092@ndhh@116456@117637@R@1@394 NADH_dehydrogenase_subunit_7 +MSLSLKRKDLMIVNMGPQHPSMHGVLRLIVTLDGEDVIDCEPILGYLHRG +MEKIAENRSIIQYLPYVTRWDYLATMFTEAITVNAPEFLENIQIPKRASY +IRVIMLELSRIASHLLWLGPFMADLGAQTPFFYIFRERELIYDLFEAVTG +MRMMHNYFRIGGVAADLPYGWMDKCLDFCDYFLQGVVEYQELITQNPIFL +ERVEGVGFISGEEAVNWGLSGPMLRASGIQWDLRKIDPYESYNQFDWKVQ +WQKEGDSLARYLVRVGEMRESIKIIQQAVEKIPGGPYENLEARRFKKAKN +PEWNDFEYRFLGKKPSPNFELSKQELYVRVEAPKGELGIYLVGDDSLFPW +RWKIRPPGFINLQILPQLVKKMKLADIMTILGSIDIIMGEVDR +>NC_001879@NitaCp089@ndhh@123937@125118@R@1@394 NADH_dehydrogenase_subunit_7 +MTAPTTRKDLMIVNMGPQHPSMHGVLRLIVTLDGEDVVDCEPILGYLHRG +MEKIAENRTIIQYLPYVTRWDYLATMFTEAITINGPEQLGNIQVPKRASY +IRVIMLELSRIASHLLWLGPFMADIGAQTPFFYIFRERELIYDLFEAATG +MRMMHNYFRIGGVAADLPYGWIDKCLDFCDYFLTGVAEYQKLITRNPIFL +ERVEGVGIIGGDEALNWGLSGPMLRASGIEWDLRKVDHYESYDEFDWQVQ +WQREGDSLARYLVRIGEMTESIKIIQQALEGIPGGPYENLEIRRFDRLKD +PEWNDFEYRFISKKPSPTFELSKQELYVRVEAPKGELGIFLIGDQSVFPW +RWKIRPPGFINLQILPQLVKRMKLADIMTILGSIDIIMGEVDR +>NC_002186@MeviCp103@ndhh@107611@108792@R@1@394 NADH_dehydrogenase_subunit_7 +MTMLQTKTDPMVISMGPHHPSMHGVLRLIVTLDGENVIDCEPVLGYLHRA +MEKIAENRTIVQYLPYVTRWDYLATMFTEAITVNAPEKLANIEVPKRASY +IRVIMLELSRIASHLLWLGPFMADIGAQTPFFYILREREMIYDLFEAATG +MRMMHNYFRIGGVASDLPYGWVDKCLDFSDYFLPKVDEYERLITNNPIFL +KRVRDVGFISREEAINWGLSGPMLRASGVQWDLRKVDNYECYGELDWNVQ +WQSDGDCLARYLVRLGEMRESTKIIQQALKAIPGGPYENLEARRLSKGRK +SEWNNFEYQFVGKKPSPTFKIPKQEHYVRVEAPKGELGVFLMGDDNVFPW +RWKIRSPGFINVQILPELVRGMKLADIMTILGSIDIIMGEVDR +>NC_002202@SpolCp091@ndhh@119776@120957@R@1@394 NADH_dehydrogenase_subunit_7 +MAVPTTRKDLMIVNMGPHHPSMHGVLRLIVTLDGEDVIDCEPIVGYLHRG +MEKIAENRTIIQYLPYVTRWDYLATMFTEAITVNGPEQLGNIQVPKRASY +IRVIMLELSRIASHLLWLGPFMADIGAQTPFFYILRERELIYDLFEAATG +MRMMHNYFRIGGVAADLPYGWIDKCLDFCDYFLIGLTEYQKLITRNPIFL +ERVENVGIIGGEEAINWGLSGPMLRASGIQWDLRKVDHYECYDEFDWEVQ +WQKEGDSLARYLIRIGEMAESVKIIQQALEGIPGGPYENLEIRRFNRIKY +PEWNDFEYRFISKKPSPAFELSKQELYVRVEAPKGELGIFLIGDQSVFPW +RWKIRPPGFINLQILPQLVKKMKLADIMTILGSIDIIMGEVDR +>NC_002693@OeelhCp103@ndhh@127260@128441@R@1@394 NADH_dehydrogenase_subunit_7 +MNVTTTRKDLMIVNMGPHHPSMHGVLRLILTLDGEDVIDCEPILGYLHRG +MEKIAENRTVIQYLPYVTRWDYLATMFTEAITINGPEQLGNIQVPKRASY +IRIIMLELSRIASHLLWLGPFMADIGAQTPFFYIFRERELVYDLFEAATG +MRMMHNYFRIGGVAADLPYGWIDKCLDFCDYFLTAVSEYQKLITRNPIFL +ERVEGVGIIGGEEAINWGLSGPMLRASGIEWDLRKVDRYECYGELDWEIR +WQKEGDSLARYLVRMSEMTESIKIIQQALEGIPGGPYENLEIRCFDREKD +PEWDGFEYRFISKKPSPTFELPKQELYVRVEAPKGELGIFLIGDQSGFPW +RWKIRPPGFINLQILPQLVKRMKLADIMTILGSIDIIMGEVDR diff --git a/detectors/cds/test/test.db/ndhi.fst b/detectors/cds/test/test.db/ndhi.fst new file mode 100644 index 0000000..a2a13ce --- /dev/null +++ b/detectors/cds/test/test.db/ndhi.fst @@ -0,0 +1,50 @@ +>AC_000188@LyesCp076@ndhi@120525@121028@R@1@168 NADH_dehydrogenase_subunit_I +MLPMITEFINYGQQTIRAARYIGQGFMITLSHANRLPVTIQYPYEKLITS +ERFRGRIHFEFDKCIACEVCVRVCPIDLPVVDWKLETDIRKKRLLNYSID +FGICIFCGNCVEYCPTNCLSMTEEYELSTYDRHELNYNQIALGRLPMSVI +DDYTIRTISNLPQINNE +>NC_000927@NeolCp117@ndhi@145747@146268@D@1@174 NADH_dehydrogenase_subunit_I +MFDFLTSLQTYRQEAAQAAQYIGQGFGVTFDHMSRRPITIHYPYEKLIPS +ERFRGRIHFEFDKCIACEVCVRVCPINLPVVDWDYQKSVKKKQLRSYSID +FGVCIFCGNCVEYCPTNCLSMTEEYELSVYDRHELNFDHVALGRVPTSVV +QDTLVTPVLGLGYLPKGELSSLP +>NC_000932@ArthCp078@ndhi@119244@119762@R@1@173 NADH_dehydrogenase_subunit_I +MLPMITGFMNYGQQTLRAARYIGQGFMITLSHTNRLPVTIQYPYEKLITS +ERFRGRIHFEFDKCIACEVCVRVCPIDLPVVDWKLETNIRKKRLLNYSID +FGICIFCGNCVEYCPTNCLSMTEEYEFSTYDRHELNYNQIALGRLPMSVI +DDYTIRTIWNSPQTKNGVNPLI +>NC_001319@MapoCp083@ndhi@99779@100330@R@1@184 NADH_dehydrogenase_subunit_I +MFSIINGLKNYNQQAIQAARYIGQGFLVTLDHMNRLPTTIQYPYEKLIPS +ERFRGRIHFEFDKCIACEVCVRVCPINLPVVDWELKKTIKKKQLKNYSID +FGVCIFCGNCVEYCPTNCLSMTEEYELSTYNRHELNYDQIALGRLPISII +EDSTIENIFNLTSLPKGKIEGHIYSRNITNIVN +>NC_001320@OrsajCp092@ndhi@110000@110536@R@1@179 NADH_dehydrogenase_subunit_I +MFPMVTGFMGQQTIRAARYIGQSFIITLSHTNRLPITIHYPYEKSITSER +FRGRIHFEFDKCIACEVCVRVCPIDLPLVDWRFEKDIKRKQLLNYSIDFG +VCIFCGNCVEYCPTNCLSMTEEYELSTYDRHELNYNQIALSRLPISIMGD +YTIQTIRNSTQSKIDEEKSWNSRTITDY +>NC_001666@ZemaCp090@ndhi@113707@114249@R@1@181 NADH_dehydrogenase_subunit_I +MFPMLTGFISYGQQTIRAARYIGQSFIITLSHTNRLPITIHYPYEKSITS +ERFRGRIHFEFDKCIACEVCVRVCPIDLPLVDWRFEKDIKRKQLLNYSID +FGVCIFCGNCVEYCPTNCLSMTEEYELSTYDRHELNYNQIALSRLPISIM +GDYTIQTIRNSPQSKIDEEKSWNSRTITDY +>NC_001879@NitaCp087@ndhi@121108@121611@R@1@168 NADH_dehydrogenase_subunit_I +MLPMITEFINYGQQTIRAARYIGQGFMITLSHANRLPVTIQYPYEKLITS +ERFRGRIHFEFDKCIACEVCVRVCPIDLPVVDWKLETDIRKKRLLNYSID +FGICIFCGNCVEYCPTNCLSMTEEYELSTYDRHELNYNQIALGRLPMSVI +DDYTIRTISNLPQIKNE +>NC_002186@MeviCp101@ndhi@105946@106476@R@1@177 NADH_dehydrogenase_subunit_I +MFNFIDNVQTYSKEALQAAKYIGQGFMVTFDHMNRRAITIQYPYEKLIPS +ERFRGRIHFEFDKCIACEVCVRVCPINLPVVNWEFQKEKKKKQLQTYSID +FGVCIFCGNCVEYCPTNCLSMTEEYELSVYDRHELNYDNFALGRLPTMVN +NDSMVKGIKGLGYLPKGIIEGHIDNQ +>NC_002202@SpolCp089@ndhi@116981@117493@R@1@171 NADH_dehydrogenase_subunit_I +MFPMVTGFINYGQQTIRAARYIGQSFMITLSHANRLPVTIQYPYEKLITS +ERFRGRIHFEFDKCIACEVCVRACPIDLPVVDWKLETDIRKKRLLNYSID +FGICIFCGNCVEYCPTNCLSMTEEYELSTYDRHELNYNQIALGRLPISIT +DDYTIRTILNSPQTKEKACD +>NC_002693@OeelhCp101@ndhi@124541@125038@R@1@166 NADH_dehydrogenase_subunit_I +MFPMVTGFMNYGQQTVRAARYIGQGFMITLSHANRLPVTIQYPYEKLITS +ERFRGRIHFEFDKCIACEVCVRVCPIDLPVVDWKLETGVRKKRLLNYSID +FGVCIFCGNCVEYCPTNCLSMTEEYELSTYDRHELNYNQIALGRLPMSVI +DDYTIRTILNSAQIK diff --git a/detectors/cds/test/test.db/psac.fst b/detectors/cds/test/test.db/psac.fst new file mode 100644 index 0000000..bde2496 --- /dev/null +++ b/detectors/cds/test/test.db/psac.fst @@ -0,0 +1,30 @@ +>AC_000188@LyesCp065@psac@118564@118809@R@1@82 photosystem_I_subunit_VII +MSHSVKIYDTCIGCTQCVRACPTDVLEMIPWDGCKAKQIASAPRTEDCVG +CKRCESACPTDFLSVRVYLWHETTRSMGLAY +>NC_000925@PopuCp189@psac@164452@164697@R@1@82 photosystem_I_subunit_VII +MAHSVKVYDTCIGCTQCVRACPCDVLEMVPWDGCKAKQIASAPRTEDCIG +CKRCETACPTDFLSVRVYLGAETTRSMGLAY +>NC_000926@GuthCp035@psac@33008@33253@D@1@82 photosystem_I_subunit_VII +MSHSVKVYDTCIGCTQCVRACPCDVLEMVAWDGCKAGQIASAPRTEDCIG +CKRCETACPTDFLSVRVYLGGETTRSMGLAY +>NC_000927@NeolCp112@psac@138977@139222@D@1@82 photosystem_I_subunit_VII +MSHSVKIYDTCIGCTQCVRACPTDVLEMVPWGGCKAAQIASAPRTEDCVG +CKRCESACPTDFLSVRVYLGAETTRSMGLAY +>NC_000932@ArthCp075@psac@117318@117563@R@1@82 photosystem_I_subunit_VII +MSHSVKIYDTCIGCTQCVRACPTDVLEMIPWDGCKAKQIASAPRTEDCVG +CKRCESACPTDFLSVRVYLWHETTRSMGLAY +>NC_001319@MapoCp080@psac@98289@98534@R@1@82 photosystem_I_subunit_VII +MAHAVKIYDTCIGCTQCVRACPTDVLEMIPWDGCKANQIASAPRTEDCVG +CKRCESRCPTDFLSVRVYLGNETTRSMGLSY +>NC_001320@OrsajCp089@psac@108020@108265@R@1@82 photosystem_I_subunit_VII +MSHSVKIYDTCIGCTQCVRACPTDVLEMIPWDGCKAKQIASAPRTEDCVG +CKRCESACPTDFLSVRVYLGPETTRSMALSY +>NC_001603@EugrCp046@psac@74937@75893@R@3@82 photosystem_I_subunit_VII +MSHSVKIYNTCIGCTQCVRACPTDVLEMVPWDGCKAGQIASSPRTEDCVG +CKRCESACPTDFLSVRVYLGSETSRSMGLAY +>NC_001631@PithCp145@psac@103521@103766@D@1@82 photosystem_I_subunit_VII +MAHSVKIYDTCIGCTQCVRACPTDVLEMIPWEGCKAKQIASAPRTEDCAG +CKRCESACPTDFLSVRVYLWHETTRSMGLAY +>NC_001666@ZemaCp087@psac@111760@112005@R@1@82 photosystem_I_subunit_VII +MSHSVKIYDTCIGCTHCVRACPTDVLEMIPWDGCKAKQIASAPRTEDCVG +CKRCESACPTDFLSVRVYLGPETTRSMALSY diff --git a/detectors/cds/test/test.db/rpl2.fst b/detectors/cds/test/test.db/rpl2.fst new file mode 100644 index 0000000..f4ac988 --- /dev/null +++ b/detectors/cds/test/test.db/rpl2.fst @@ -0,0 +1,70 @@ +>AC_000188@LyesCp030@rpl2@86038@87528@R@2@275 ribosomal_protein_L2 +MAIHLYKTSTPSTRNGTVDSQVKSNPRNNLIYGQRRCGKGRNARGIITAR +HRGGGHKRLYRKIDFRRNEKDIYGRIVTIEYDPNRNAYICLIHYGDGEKR +YILHPRGAIIGDTIVSGTEVPIKMGNALPLTDMPLGTAIHNIEITLGKGG +QLARAAGAVAKLIAKEGKSATLKLPSGEVRLISKNCSATVGQVGNVGVNQ +KSLGRAGSKRWLGKRPVVRGVVMNPVDHPHGGGEGRAPIGRKKPTTPWGY +PALGRRSRKRNKYSDNLILRRRSK +>AC_000188@LyesCp087@rpl2@153816@155306@D@2@275 ribosomal_protein_L2 +MAIHLYKTSTPSTRNGTVDSQVKSNPRNNLIYGQRRCGKGRNARGIITAR +HRGGGHKRLYRKIDFRRNEKDIYGRIVTIEYDPNRNAYICLIHYGDGEKR +YILHPRGAIIGDTIVSGTEVPIKMGNALPLTDMPLGTAIHNIEITLGKGG +QLARAAGAVAKLIAKEGKSATLKLPSGEVRLISKNCSATVGQVGNVGVNQ +KSLGRAGSKRWLGKRPVVRGVVMNPVDHPHGGGEGRAPIGRKKPTTPWGY +PALGRRSRKRNKYSDNLILRRRSK +>NC_000925@PopuCp126@rpl2@103379@104263@R@1@295 ribosomal_protein_L2 +MAIRLYRAYTPGTRNRTVSTFSEITTDKPEKSLINKHHFCKGRNNRGVIT +CRHKGGGHKQRYRLIDFKRNRHNIIAKVASIEYDPNRNARIALLHYLDGE +KRYILHPRSLSVGAIVVSGPMAPIEVGNALPLSTIPLGTAVHNIELRPYC +GGQIVRSAGTYAQIVAKEGNFVTVKLPSSEVRMIRKECYATIGQVGNIDA +SNITLGKAGRSRWLGKRPTVRGVVMNPVDHPHGGGGEGKSPIGRSRPVTP +WGKPALGVKTRNPNKYSNPYVLLVVNKVYLTYNLILKYNVEINT +>NC_000926@GuthCp113@rpl2@100540@101367@D@1@276 ribosomal_protein_L2 +MGIRIYKSYTPGTRNRSSSDFVEITKSKPEKSLLRKKLSCAGRNNRGLIT +VRHKGGGHKQRYRLVDFKRNKLDIPAIVASVEYDPNRNARIALLHYQDGE +KRYILHPKKLAVGDKIYSGINVPIEIGNAMPLYNVPLGTAVHNVELIPGR +GGQIVRSAGTSAQVVAKDGQVVTIKMPSNEVRMIYKNCYATIGEVGNADI +KNIRLGKAGRKRWLGIRPSVRGVVMNPCDHPHGGGEGRSPIGRAKPVTPW +GKPALGVKTRRQNKYSDFCIIRSRN +>NC_000927@NeolCp022@rpl2@15904@16731@R@1@276 ribosomal_protein_L2 +MGIRFYRAHTPGTRNRSVSDFHEITTSTPTKSLTHANHRARGRNHSGSIT +TRWRGGGHKRLYRQIDFRRDKVGVLARVATVEYDPNRSARIALLHYQDGS +KRYILHPQGLAIGAEVMSSPEAPISIGNALPLVNMPLGTEVHNIELRPYN +GGQLVRAAGAVAQLVAKEGGFGTLRMPSGEVRLVAKDCWATVGQVGHVES +INLTLGKAGRSRWLDRRPRVRGSVMNACDHPHGGGEGRCPIGHPGPLTPW +GKPALGQRTRARKKYSDALLVRRRK +>NC_000932@ArthCp064@rpl2@84337@85843@R@2@275 ribosomal_protein_L2 +MAIHLYKTSTPSTRNGAVDSQVKSNPRNNLICGQHHCGKGRNARGIITAR +HRGGGHKRLYRKIDFRRNAKDIYGRIVTIEYDPNRNAYICLIHYGDGEKR +YILHPRGAIIGDTIVSGTEVPIKMGNALPLTDMPLGTAIHNIEITLGRGG +QLARAAGAVAKLIAKEGKSATLKLPSGEVRLISKNCSATVGQVGNVGVNQ +KSLGRAGSKCWLGKRPVVRGVVMNPVDHPHGGGEGRAPIGRKKPVTPWGY +PALGRRTRKRKKYSETLILRRRSK +>NC_000932@ArthCp085@rpl2@152806@154312@D@2@275 ribosomal_protein_L2 +MAIHLYKTSTPSTRNGAVDSQVKSNPRNNLICGQHHCGKGRNARGIITAR +HRGGGHKRLYRKIDFRRNAKDIYGRIVTIEYDPNRNAYICLIHYGDGEKR +YILHPRGAIIGDTIVSGTEVPIKMGNALPLTDMPLGTAIHNIEITLGRGG +QLARAAGAVAKLIAKEGKSATLKLPSGEVRLISKNCSATVGQVGNVGVNQ +KSLGRAGSKCWLGKRPVVRGVVMNPVDHPHGGGEGRAPIGRKKPVTPWGY +PALGRRTRKRKKYSETLILRRRSK +>NC_001319@MapoCp072@rpl2@79137@80514@R@2@278 ribosomal_protein_L2 +MAIRLYRAYTPGTRNRSVPKFDEIVKCQPQKKLTYNKHIKKGRNNRGIIT +SQHRGGGHKRLYRKIDFQRNKKYITGKIKTIEYDPNRNTYICLINYEDGE +KRYILYPRGIKLDDTIISSEEAPILIGNTLPLTNMPLGTAIHNIEITPGK +GGQLVRAAGTVAKIIAKEGQLVTLRLPSGEIRLISQKCLATIGQIGNVDV +NNLRIGKAGSKRWLGKRPKVRGVVMNPIDHPHGGGEGRAPIGRKKPLTPW +GHPALGKRSRKNNKYSDTLILRRRKNS +>NC_001320@OrsajCp069@rpl2@81180@82664@R@2@274 ribosomal_protein_L2 +MAKHLYKTPIPSTRKGTIDRQVKSNPRNNLIHGRHRCGKGRNSRGIITAR +HRGGGHKRLYRKIDFRRNQKDISGRIVTIEYDPNRNAYICLIHYGDGEKG +YILHPRGAIIGDTIVSGTKVPISMGNALPLTDMPLGTAIHNIEITRGRGG +QLARAAGAVAKLIAKEGKSATLRLPSGEVRLVSQNCLATVGQVGNVGVNQ +KSLGRAGSKCWLGKRPVVRGVVMNPVDHPHGGGEGKAPIGRKKPTTPWGY +PALGRRTRKRKKYSDSFILRRRK +>NC_001320@OrsajCp109@rpl2@132454@133938@D@2@274 ribosomal_protein_L2 +MAKHLYKTPIPSTRKGTIDRQVKSNPRNNLIHGRHRCGKGRNSRGIITAR +HRGGGHKRLYRKIDFRRNQKDISGRIVTIEYDPNRNAYICLIHYGDGEKG +YILHPRGAIIGDTIVSGTKVPISMGNALPLTDMPLGTAIHNIEITRGRGG +QLARAAGAVAKLIAKEGKSATLRLPSGEVRLVSQNCLATVGQVGNVGVNQ +KSLGRAGSKCWLGKRPVVRGVVMNPVDHPHGGGEGKAPIGRKKPTTPWGY +PALGRRTRKRKKYSDSFILRRRK diff --git a/detectors/cds/test/test.db/rpl23.fst b/detectors/cds/test/test.db/rpl23.fst new file mode 100644 index 0000000..7a6ef00 --- /dev/null +++ b/detectors/cds/test/test.db/rpl23.fst @@ -0,0 +1,30 @@ +>AC_000188@LyesCp026@rpl23@87547@87828@R@1@94 ribosomal_protein_L23 +MDGIKYAVFTDKSIRLLGKNQYTSNVESGSTRTEIKHWVELFFGVKVIAM +NSHRLPGKSRRMGPIMGHTMHYRRMIITLQPGYSIPPLRKKRT +>AC_000188@LyesCp083@rpl23@153516@153797@D@1@94 ribosomal_protein_L23 +MDGIKYAVFTDKSIRLLGKNQYTSNVESGSTRTEIKHWVELFFGVKVIAM +NSHRLPGKSRRMGPIMGHTMHYRRMIITLQPGYSIPPLRKKRT +>NC_000925@PopuCp127@rpl23@104311@104613@R@1@101 ribosomal_protein_L23 +MDSIDSRDLLDLVKYPIITDKTTKLLEENQYCFAVDPNATKINIKAAIQY +IFNVQVTGVNTCHPPKKKRSIGRFIGKRPHYKKAIITLASKDSINLFPET +>NC_000926@GuthCp112@rpl23@100237@100524@D@1@96 ribosomal_protein_L23 +MHALIDLVKYPLITDKATRLLELNQYTFLTSRVATKTDIKNAIEFLFNVK +VISINTCLLPLKRKRLGKFVGSKPRYKKAVVTLEKNNTINLFSEN +>NC_000927@NeolCp023@rpl23@16871@17149@R@1@93 ribosomal_protein_L23 +MIIDLVKRPVITEKATRILEKNQYTFDVELSLTKPKIKALIEKAFKVEVV +SVNTHRPPRRKRRLGTTQGYLPRYKRAIITLKRGFMIPLTPF +>NC_000932@ArthCp065@rpl23@85862@86143@R@1@94 ribosomal_protein_L23 +MDGIKYAVFTDKSIRLLGKNQYTFNVESGSTRTEIKHWVELFFGVKVIAM +NSHRLPGKVKRMGPILGHTMHYRRMIITLQPGYSIPPLRKKRT +>NC_000932@ArthCp084@rpl23@152506@152787@D@1@94 ribosomal_protein_L23 +MDGIKYAVFTDKSIRLLGKNQYTFNVESGSTRTEIKHWVELFFGVKVIAM +NSHRLPGKVKRMGPILGHTMHYRRMIITLQPGYSIPPLRKKRT +>NC_001319@MapoCp073@rpl23@80550@80825@R@1@92 ribosomal_protein_L23 +MNQVKYPVLTEKTIRLLEKNQYSFDVNIDSNKTQIKKWIELFFNVKVISV +NSHRLPKKKKKIGTTTGYTVRYKRMIIKLQSGYSIPLFSNK +>NC_001320@OrsajCp071@rpl23@82683@82964@R@1@94 ribosomal_protein_L23 +MDGIKYAVFTEKSLRLLGKNQYTFNVESGFTKTEIKHWVELFFGVKVVAV +NSHRLPGKGRRMGPILGHTMHYRRMIITLQPGYSIPLLDREKN +>NC_001320@OrsajCp108@rpl23@132154@132435@D@1@94 ribosomal_protein_L23 +MDGIKYAVFTEKSLRLLGKNQYTFNVESGFTKTEIKHWVELFFGVKVVAV +NSHRLPGKGRRMGPILGHTMHYRRMIITLQPGYSIPLLDREKN diff --git a/detectors/cds/test/test.db/rpl32.fst b/detectors/cds/test/test.db/rpl32.fst new file mode 100644 index 0000000..7cf400e --- /dev/null +++ b/detectors/cds/test/test.db/rpl32.fst @@ -0,0 +1,30 @@ +>AC_000188@LyesCp036@rpl32@114504@114671@D@1@56 ribosomal_protein_L32 +MAVPKKRTSTSKKRIRKNIWKRKGYWVALKAFSLAKSLSTGNSKSFFVRQ +TKINK +>NC_000925@PopuCp022@rpl32@16239@16418@D@1@60 ribosomal_protein_L32 +MAVPKKRTSKAKKNARKANWKNQAKTEAQKALSLAKSVLTGKSNGFVYNT +LEVADAIVE +>NC_000926@GuthCp026@rpl32@24770@24931@D@1@54 ribosomal_protein_L32 +MAVPKKRTSRSKTNSRFANWLNKSNLQAQRAISKAKSITNKKNTVNDETI +ETE +>NC_000927@NeolCp122@rpl32@149220@149447@D@1@76 ribosomal_protein_L32 +MAVPKKRKSKSRANSQNHVWKREIVKQARRAVSLAKALLGGNTNFLLVSP +GPTTPIKPNPKKQTGRRPRSQRRRT +>NC_000932@ArthCp072@rpl32@113449@113607@D@1@53 ribosomal_protein_L32 +MAVPKKRTSISKKRIRKKIWKRKGYWTSLKAFSLGKSLSTGNSKSFFVQQ +NK +>NC_001319@MapoCp076@rpl32@93886@94095@D@1@70 ribosomal_protein_L32 +MAVPKKRTSKSKTRIRKAIWKNKANKSALRAFSLAKSILTNRSKSFYYTI +NDKLLNSSKSISTSKLDES +>NC_001320@OrsajCp086@rpl32@104352@104543@D@1@64 ribosomal_protein_L32 +MAVPKKRTSMSKKRIRKNLWKKKTYFSIVQSYSLAKSRSFSGVSEHPKPK +GFSRQQTNNRVLG +>NC_001603@EugrCp047@rpl32@75928@76092@R@1@55 ribosomal_protein_L32 +MAVPKKKMSKSRRNSRKSNWKKKVLKKVLFALSLGKSFEANTNVNFSFGD +KLPQ +>NC_001631@PithCp150@rpl32@107399@107611@R@1@71 ribosomal_protein_L32 +MAVPKKRTSRSKKKIRKNVRKGKAYRAAIKAFSLAKSISTGHSKSFYCIV +NDDSSGSSESKLTAIDLDDP +>NC_001666@ZemaCp084@rpl32@108127@108306@D@1@60 ribosomal_protein_L32 +MAVPKKRTSMSKKRIRKNLWKKKTYFSIVQSYSLAKSRSFSRGNEHPKPK +GFSGQQANK diff --git a/detectors/cds/test/test.db/rps15.fst b/detectors/cds/test/test.db/rps15.fst new file mode 100644 index 0000000..cd3fe81 --- /dev/null +++ b/detectors/cds/test/test.db/rps15.fst @@ -0,0 +1,30 @@ +>AC_000188@LyesCp029@rps15@124632@124895@R@1@88 ribosomal_protein_S15 +MVKNSVISVISQEEKKGSVEFQVFNFTNKIRRLTSHLELHKKDYLSQRGL +KKILGKRQRLLAYLAKKNRVRYKELINRLDIRETKTR +>NC_000932@ArthCp081@rps15@123296@123562@R@1@89 ribosomal_protein_S15 +MIKNIVISFEEQKEESRGSVEFQVFSFTNKIRRLTSHLELHRKDYLSQRG +LRKILGKRQRLLAYLSKKNRVRYKELINQLNIRELKTR +>NC_001319@MapoCp086@rps15@103433@103699@R@1@89 ribosomal_protein_S15 +MSKNLFMDLSSISEKEKGSVEFQIFRLTNRVVKLTYHFKKHGKDYSSQRG +LWKILGKRKRLLAYLFKTNFVSYENLIIQLGIRGLKKN +>NC_001320@OrsajCp083@rps15@100818@101090@D@1@91 ribosomal_protein_S15 +MKKKGGRKIFGFMVKEEKEENWGSVEFQVFSFTNKIRRLASHLELHKKDF +SSERGLRRLLGKRQRLLAYLAKKNRVRYKKLISQLDIRER +>NC_001320@OrsajCp095@rps15@114028@114300@R@1@91 ribosomal_protein_S15 +MKKKGGRKIFGFMVKEEKEENWGSVEFQVFSFTNKIRRLASHLELHKKDF +SSERGLRRLLGKRQRLLAYLAKKNRVRYKKLISQLDIRER +>NC_001631@PithCp139@rps15@101136@101402@D@1@89 ribosomal_protein_S15 +MINNLSISSSLIPDKQRGSVESQVFYLTNRVLRLTQHLQLHGRDYSSQRG +LWKILSKRKQLLVYLSKRDKLRYDDLIGQLGIRGLKTR +>NC_001666@ZemaCp082@rps15@104729@104965@D@1@79 ribosomal_protein_S15 +MVKEEKQENRGSVEFQVFSFTNKIRRLASHLELHKKDFSSERGLRRLLGK +RQRLLAYLAKKNRVRYKKLISQLDIREK +>NC_001666@ZemaCp093@rps15@117772@118008@R@1@79 ribosomal_protein_S15 +MVKEEKQENRGSVEFQVFSFTNKIRRLASHLELHKKDFSSERGLRRLLGK +RQRLLAYLAKKNRVRYKKLISQLDIREK +>NC_001879@NitaCp090@rps15@125230@125493@R@1@88 ribosomal_protein_S15 +MVKNSVISVISQEEKRGSVEFQVFNFTNKIRRLTSHLELHKKDYLSQRGL +KKILGKRQRLLAYLSKKNRVRYKELINQLDIRETKTR +>NC_002186@MeviCp104@rps15@109145@109417@R@1@91 ribosomal_protein_S15 +MLKKKIIKTHANHTNDTGSTQVQVSLLSSRVAQLTKHLNNHKNDYSSQRG +LKKLLGQRKRLLKYLFVKDPLGYNNLIIQLGIRPGKSLVN diff --git a/detectors/cds/test/test.db/rps7.fst b/detectors/cds/test/test.db/rps7.fst new file mode 100644 index 0000000..0f9187e --- /dev/null +++ b/detectors/cds/test/test.db/rps7.fst @@ -0,0 +1,50 @@ +>AC_000188@LyesCp008@rps7@98721@99188@R@1@156 ribosomal_protein_S7 +MSRRGTAEKKTAKSDPIYRNRLVNMLVNRILKHGKKSLAYQIIYRAVKKI +QQKTETNPLSVLRQAIRGVTPDITVKARRVGGSTHQVPIEIGSTQGKALA +IRWLLAASRKRPGRNMAFKLSSELVDAAKGSGDAIRKKEETHRMAEANRA +FAHFR +>AC_000188@LyesCp022@rps7@142156@142623@D@1@156 ribosomal_protein_S7 +MSRRGTAEKKTAKSDPIYRNRLVNMLVNRILKHGKKSLAYQIIYRAVKKI +QQKTETNPLSVLRQAIRGVTPDITVKARRVGGSTHQVPIEIGSTQGKALA +IRWLLAASRKRPGRNMAFKLSSELVDAAKGSGDAIRKKEETHRMAEANRA +FAHFR +>NC_000925@PopuCp103@rps7@92209@92679@R@1@157 ribosomal_protein_S7 +MSRRNTAKKRFASPDPLYKSRLVSMLTVRILKSGKKTLAQRIIYQALDIV +KERTETDPLNVLEKAIRNITPLVEVKARRVGGSTYQVPIEVRAYRGTNLA +LRWITRFSRERSGKSMSMKLANEIMDAANETGNSIRKREETHRMAEANKA +FAHYRY +>NC_000926@GuthCp136@rps7@111745@112215@D@1@157 ribosomal_protein_S7 +MSRRSTTKKKLALPDPIYNSRLVNMLTVRILKEGKKHLAQRIIYNAFDII +KQRTGEDAILVFESAIKKVTPLVEVKARRIGGSTYQVPMEVRAFRGTNLA +LRWITKYARERAGKSMSMKLANEIMDAANETGSSIRKREEIHRMAEANKA +FAHYRF +>NC_000927@NeolCp044@rps7@48318@48788@D@1@157 ribosomal_protein_S7 +MSRRNTAVKRSISSDPVYNSQLIHMMISHILKEGKKALAYRLMYDAMKRI +EKTTQQDPILVVERAVRNATPTIEVKARRMGGSIYQVPLEVKPERGTALA +LRWILLAARNRTGRDMVAKLSNELMDASNRIGNAVRKRDEMHRMAEANKA +FAHIRV +>NC_000932@ArthCp069@rps7@97478@97945@R@1@156 ribosomal_protein_S7 +MSRRGTAEEKTAKSDPIYRNRLVNMLVNRILKHGKKSLAYQIIYRALKKI +QQKTETNPLSVLRQAIRGVTPDIAVKARRVGGSTHQVPIEIGSTQGKALA +IRWLLGASRKRPGRNMAFKLSSELVDAAKGSGDAIRKKEETHRMAEANRA +FAHFR +>NC_000932@ArthCp088@rps7@140704@141171@D@1@156 ribosomal_protein_S7 +MSRRGTAEEKTAKSDPIYRNRLVNMLVNRILKHGKKSLAYQIIYRALKKI +QQKTETNPLSVLRQAIRGVTPDIAVKARRVGGSTHQVPIEIGSTQGKALA +IRWLLGASRKRPGRNMAFKLSSELVDAAKGSGDAIRKKEETHRMAEANRA +FAHFR +>NC_001319@MapoCp002@rps7@892@1359@D@1@156 ribosomal_protein_S7 +MSRKSIAEKQVAKPDPIYRNRLVNMLVNRILKNGKKSLAYRILYKAMKNI +KQKTKKNPLFVLRQAVRKVTPNVTVKARRIDGSTYQVPLEIKSTQGKALA +IRWLLGASRKRSGQNMAFKLSYELIDAARDNGIAIRKKEETHKMAEANRA +FAHFR +>NC_001320@OrsajCp076@rps7@87944@88414@R@1@157 ribosomal_protein_S7 +MSRRGTAEKRTAKSDPIFRNRLVNMVVNRIMKDGKKSLAYQILYRAVKKI +QQKTETNPLLVLRQAIRRVTPNIGVKTRRNKKGSTRKVPIEIGSKQGRAL +AIRWLLEASQKRPGRNMAFKLSSELVDAAKGGGGAIRKKEATHRMAEANR +ALAHFR +>NC_001320@OrsajCp103@rps7@126704@127174@D@1@157 ribosomal_protein_S7 +MSRRGTAEKRTAKSDPIFRNRLVNMVVNRIMKDGKKSLAYQILYRAVKKI +QQKTETNPLLVLRQAIRRVTPNIGVKTRRNKKGSTRKVPIEIGSKQGRAL +AIRWLLEASQKRPGRNMAFKLSSELVDAAKGGGGAIRKKEATHRMAEANR +ALAHFR diff --git a/detectors/cds/test/test.db/ycf1.fst b/detectors/cds/test/test.db/ycf1.fst new file mode 100644 index 0000000..cb7986a --- /dev/null +++ b/detectors/cds/test/test.db/ycf1.fst @@ -0,0 +1,353 @@ +>AC_000188@LyesCp019@ycf1@125297@130972@R@1@1892 ycf1_protein +MIFQSFLLGNLVSLCMKIINSVVVVGLYYGFLTTFSIGPSYLFLLRALVM +EEGTEKKVSATTGFITGQLMMFISIYYAPLHLALGRPHTITVLALPYLLF +HFFWNNHKHFFDYGSTTRNSMRNLSIQCVFLNNLIFQLFNHFILPSSMLA +RLVNIYLFRCNNKILFVTSGFVGWLIGHILFMKWLGLVLVWIRQNHSIRS +NKYIRSNKYLVLELRNSMARIFSILLFITCVYYLGRIPSPILTKKLKEAS +KTEERVESEEERDVEIETASEMKGTKQEQEGSTEEDPYPSPSLFSEEGWD +PDKIDETEEIRVNGKDKIKDKFHSHLTETGYNNINTSNSPIYDYQDSYLN +NNNTGNLENCKLQLLDKKNENQEQDLFWFQKPLVSLLFDYNRWNRPFRYI +KNNRFEQAVRTEMSQYFFDTCKSDGKQKISFTYPPSLSTFWKMIKRKIPL +LSLQKTLPNELDTQWVSTNKEKSNNLNKEFLNRLEILDKESLSLDILETR +TRFCNDDTKKEYVPKMYDPLLNGLYRGTIKKGVSSSIINNTLLENWEKRV +RLNRIHTIFLPNIDYQEFEQKAYTIDKKPLSTEIDEFLTLINELGNEAKS +SLNLKGLSLFSDQEQRRANSEKRTKFVKFVFNALDPNETKSGKKSIGIKE +ISKKVPRWSHKLITELDQQMGEFKDRASMDHQLRSRKAKRVVIFTDNKAT +KDAEEEVALISYSQQSDFRRGIITGSMRAQRRKTFISKLFQANVHSPLFV +DRITPLRLFSFDISELIKPILKNWTDKEGEFKILESREEQTKREEKKEKD +KKEDNKRKEQARIAIEEAWDTIPLAQIIRGYMLITQSILRKYILLPALII +AKNIGRMLFLQLPEWSEDLQEWNREMQIKCTYNGVQLSETEFPKNWLRDG +IQIKILFPFCLKPWHISKLYPSRRELMKKQKQKDDFCFLTVWGMEAELPF +GSPRKRPSFFEPIFKELEKKIGKFKKKYFLTLKILKGKTKLFRKVSKETT +KLFIKSIGFLKKIKKELSKVNLIVLFRFKEISESNETKKEKDYLISNQII +NESFRQIESGNWPNSSLIETKMKDLTNRTSTIKNKIERITKEKKKVTPEI +DINPNKTNNIKKFESPKKIFQILKSRNTRVIWKFHYFLKLFIQRLYINLF +LSIINIPRITTQLFLKSTNKLIEKFISNNEINQEKINNKKKIHFMFISTI +KKSLYNISKKNSHILCDLSYLSQAYVFYKLSQTQVINFSKFRSVLQYNTT +SCFLKTKIKDYFKTLGIFHSELKHKKLQSYRINQWKNWLRWHYQYDLSQI +RWSRLMPKKWRTRVNQSCMAQNKNRNLNKWNSYEKDQLLHYKKENDSELY +SLSNEKDNFKKCYGYGLLAYKSINYENKSDSFFSRLPFEVQVKKNLEISY +NSNTSKHNFVDMPGNLHINNYLRKGNILDRERNLDRKYFDWKIIHFSLRQ +KGDIEAWVKIDTNSNPNTKIGINNYQIIDKIEKKGVFYLTTHQNPEKTQK +NSKKFFFDWMGMNEKIFNRPILNLEFWFFPEFVLLYNVYKIKPWIIPSKF +LLFNLNTNKNVSQNKNQNFFLPSNKKIKIKNRSQEAKEPPSQRERGSDIE +NKGNLSPVFSKHQTDLEKDYVESDTKKGKNKKQYKSNTEAELDLFLKRYL +LFQLRWNGALNQRMFENIKVYCLLLRLINPTKITISSIQRREMSLDIMLI +QANLPLTDLMKKGVLIIEPIRLSVKDNGQFIMYQTIGISLIHKSKHQTNQ +RYREQRYVDKKNFDEFILQPQTQRINTEKTHFGLLVPENILWSRRRRELR +IRSFFNSWNWNVVDRNSVFCNETNVKNWSQFLGERKPLYKDKNELIKFKF +FFWPNYRLEDLACMNRYWFDTNNGSRFSILRIHMYPRLKIN +>NC_000932@ArthCp070@ycf1@109405@110436@D@1@344 hypothetical_protein +MMVFQSFILGNLVSLCMKIINSVVVVGLYYGFLTTFSIGPSYLFLLRARV +MDEGEEGTEKKVSATTGFIAGQLMMFISIYYAPLHLALGRPHTITVLALP +YLLFHFFWNNHKHFFDYGSTTRNEMRNLRIQCVFLNNLIFQLFNHFILPS +SMLARLVNIYMFRCNNKMLFVTSSFVGWLIGHILFMKWVGLVLVWIQQNN +SIRSNVVIRSNKYKFLVSELRNSMARIFSILLFITCVYYLGRIPSPIFTK +KLKGTSETGGTKQDQEVSTEEAPFPSLFSEEGEDLDKIDEMEEIRVNGKD +KINKDDEFHVRTYYNYKTVSENLYGNKENSNLEFFKIKKKEDH +>NC_000932@Arthcp087@ycf1@123884@129244@R@1@1787 Ycf1 +MMVFQSFILGNLVSLCMKIINSVVVVGLYYGFLTTFSIGPSYLFLLRARV +MDEGEEGTEKKVSATTGFIAGQLMMFISIYYAPLHLALGRPHTITVLALP +YLLFHFFWNNHKHFFDYGSTTRNEMRNLRIQCVFLNNLIFQLFNHFILPS +SMLARLVNIYMFRCNNKMLFVTSSFVGWLIGHILFMKWVGLVLVWIQQNN +SIRSNVVIRSNKYKFLVSELRNSMARIFSILLFITCVYYLGRIPSPIFTK +KLKGTSETGGTKQDQEVSTEEAPFPSLFSEEGEDLDKIDEMEEIRVNGKD +KINKDDEFHVRTYYNYKTVSENLYGNKENSNLEFFKIKKKEDHFLWFEKP +FVTLVFDYKRWNRPNRYIKNDKIENIVRNEMSQYFFYTCQSDGKERISFT +YPPNLSTFFEMIQKRIPSFTKEKKTFDQVSTYWSLIHEEKRENLKKEFLN +RIEALDKEWSVENILEKTTRFCYNEAKKEYLPKIYDPFLHGISRGRIKKL +PPFQIITETYRKNNLGGSWINKIHGLLLKINYKKFEQTIEKFNRKSLSIE +KKLSFFSEPQQEEKINSEEEIKTFKFLFDIVRTDSNDQTLIKNFMDFPEI +NKKVPRWSYKLISELEELEGENEENVPMEPGIRSRKAKRVVVFTDKEPHG +EIYTNLKDNQNSDQNDEMALIRYSQQSDFRREIIKGSMRSQRRKTVIWEF +FQAKVHSPLFFDRIDKLFFFSFDIWGLKKKIIKNFIWKKKIDKKEEEQSK +REETRRIEIAETWDSFLFAQIIRGSLLVTQSILRKYIILPLLIIIKNSVR +MLLFQFPEWSQDLKDWKREMHVKCTYNGVQLSETEFPRNWLTDGIQIKIL +FPFYLKPWHKSKFQASQKARLKKTKDKGEKNDFCFLTVWGMETELPFGSA +QRKPSFFEPISKELKKRIKKLKKKSFVVLKIFKERAPIFLKVAKETKNWI +LKNFIFIKGISKRNLIPLFGPREIYELNEPKKDSIISNQMIHELSVQNKS +LEWTNSSLSEKKIKNLIDRKKTIRNQIEEISKEKQNLTNSCTKLRYDSKI +IESSKKIWQTFKRKNTRLIRKSIFFFKFCIEQMSIAIFLGIINIPRITTQ +LFFESTKKILDKYIYKNEENGEKKKNTLYFISTIKNLISNKKKMSYDLCS +LSQAYVFYKLSQIKVSNFCKLKAVLEYNICITSFFVKNKIKVFFQEHGIF +HYELKNKTFLNSEVNQWKNWLRSQYQYNLPQISWARLVTQNWKNKINKDS +LVLNPSLTKEDSYEKKKFDNYKKQKFFEADSLLNPKHNVKKDSIYNLFCY +KSIHSTEKNFDMSIGIALDNCLVSSFLEKYNIRGMGEIRHRKYLDWRILN +FWFTKKVTIEPWVDTKSKKKYINTKVQNYQKIDKITQTDLANKKRNFFDW +MGMNEEILNQRITNFEFFFFPEFFLFSSTYKMKPWVIPIKLLLLNFNENI +NVNKKIIRKKKGFIPSNEKESLRFYNLNKEEKESAGQVELESDKETKRNP +EAARLNQEKNIEENFAESTIKKRKNKKQYKSNTEAELDLFLTRYSRFQLR +WNCFFNQKILNNVKVYCLLVRLNNPNEIAVSSIERGEMSLDILMIEKNFT +FAKLMKKGILIIEPVRLSVQNDGQLIIYRTIGISLVHKNKHKISKRYKKK +SYINKKFFEKSITKYQNKTVNKKKNNYDFFVPEKILSPKRRREFRILICF +NLKKKNARDTNSRFDKNIQNLTTVLHKKKDLDLDKDKNNLINLKSFLWPN +FKLEDLACMNRYWFNTTNGNHFSMIRIRMYTRFPIP +>NC_001879@NitaCp151@ycf1@125891@131599@R@1@1903 Ycf1 +MMIFQSFLLGNLVSLCMKIINSVVVVGLYYGFLTTFSIGPSYLFLLRALV +MEEGTEKKVSATTGFITGQLMMFISIYYAPLHLALGRPHTITVLALPYLL +FHFFWNNHKHFFDYGSTTRNSMRNLSIQCVFLNNLIFQLFNHFILPSSML +ARLVNIYLFRCNSKILFVTSGFVGWLIGHILFMKWLGLVLVWIRQNHSIR +SNKYIRSNKYLVLELRNSMARIFSILLFITCVYYLGRIPSPILTKKLKEA +SKTEERVESEEERDVEIETASEMKGTKQEQEGSTEEDPYPSPSLFSEERW +DPDKIDETEEIRVNGKDKIKDKFHSHLTETGYNNINTSNSPIYDYEDSYL +NNNNTGNTEIFKLQLLDKKNENKDLFWFQQPLVSLLFDYNRWNRPFRYIK +NNRFEQAIRTEMSQYFFNTCKSDGKQRISFTYPPSLSTFWKMIKRRIPLL +SLQKTLPNELDNQWISTNKEKSNNLNKEFLNRLEVLDKESFSLDILETRT +RLCNDDTKKEYVPKMYDPLLNGPYRGTIKKKFSPSIINNTSLENLKERVR +INRIHTIFLPNTDYQELEQKVDTVAKKPLSTEIDEFLTLINEFGNEPKSS +LNLKDLSLFSDQEQGRVNSEKRTKFVKFVFNAIAPNGTTSEKKSIGIKEI +SKKIPRWSHKLITELEQQSGDYQEGVPLDHQIRSRKAKRVVIFTANNQNN +DPDTKDTDTADQDQTKEVALIRYSQQPDFRRGIIKGSMRAQRRKTVIWKL +FQANVHSPLFLDRITPPFLFSFDISGLIKPIFRNWSGKEGEFKILESREE +QTKREEKKEKDKKGENKRKEKARIEIAEAWDTIPFAQIIRGYMLITQSIL +RKYIVLPSLIIAKNLGRMLVLQLPEWSEDLQEWNREMHIKCTYNGVQLSE +TEFPKNWLKDGIQIKILFPFCLKPWHISKLYSSRGELMKKKKQKDDFCFL +TVWGMEAELPFGSPRKRPSFFEPIFKELEKKIGKFKKKYFITLKVFKGKI +KLFRRISKETKKWLIKSSLFIKKMKKELSKVNPIVLFRLKEIDESNETKK +EKDSLMSNQIINESFSQIESGNWPNSSLIESKMKDLTDRTSTIKNQIERI +TKEKKKVTPEIDISPNKTNNIKKFESPKKIFQILKRRNTRLIWKFHYFLK +LFIQRLYIDLFLSIINIPRINTQLFLESTNKLIDKYISNNEINQEKINNQ +KKIHFISTIKKSLYNISKKNSHIFFDLSYLSQAYVFYKLSQPQVINLSKL +RSVLQYNRTSFFLKTKIKDYFRTLGIFHSELKHKKLQSYRINQWKNWLRR +HYQYDLSQIRWSRLMPQKWRNRVNQGCMAQNRNLNKWNSYEKDQLIHYKK +ENDSELYSLANQKDNFQKCYRYDLLAYKSINYEKKNDSFISRLPFQVNKN +LEISSNSNTSKHNLFDMLGNLHINNYLRKGNILYIERNLDRKYFDWKIIH +FSLRQKEDIEAWVKIDTNSNPNTKIGINNYQIIDKIDKKGFFYLTIHQNP +ENNQKNSKKAFFDWMGMNEKILNRPILNLEFWFFPEFVPLYNVYKIKPWI +IPSKLLLLNLNTNENVSQNKNINKNQKQNFFLRSNKKIKNRIQEAKEPAS +QGEKERGSDIENKGNLGPVLSKHQNALKKDYAESDTKKGKKKKQYKSNTE +AELDLFLKRYLLFQLRWNDALNQRMIENIKVYCLLLRLINPSKIAISSIQ +RREMSLDIMLIQKNLTLTELMKKGILIIEPIRLSVKNNGQFIMYQTIGIS +LVHKSKHQTNQRYPEQRYVDKKNFDEFILQPQTQRINTDKNHFDLLVPEN +ILWSRRRRELRIRSLFNSLNWNGIDRNSVFCNENNVKNWSQFLDERKPLY +KEKNELIKLKFFLWPNYRLEDLACMNRYWFDTNNGSRFSILRIHMYPQLK +IN +>NC_002202@SpolCp093@ycf1@121596@127097@R@1@1834 ycf1_protein +MIFQSFLLGNLVSLCMKIINSVVVVGLYYGFLTTFSIGPSYLFLLRAQVM +EEGEEGTEKKVSGTTGFIMGQLMMFISIYYTPLHLALGRPHTITVLALPY +LLFHFFWNNHKHFFDYGSTSRNSMRNLSIQCVFLNNLIFQLFNYFILPSS +MLARLVNIYMFRCNNKMLFVTSSFVGWLIGHILFMKWVGLVLVWIQQNNS +IRSNKYLVSELRNSMARIFSILFFITCVYYLGRMPSPIFTNKLKQMLETN +EIEEETNLEIEKTSETKETKQEEEGFTEEDPSPSLFSEEKEDPDKIDETE +KIRVNGKDKTKDEFHLKEACYKNSPTSYSGNQDISKLEILKKEKKILFWF +QKPLIFLLFDYKRWNRPMRYIKNNRFENAVRNEMSQYFFYTCQNDGKQRI +SFTYPPSLSIFWEMIQRKISLATTEKFLYDDELYNYWIYTNEQKKNSLSN +EFANRITVLDKGLFYIDVLDKKTRLCKSKNEYLQKDHDPLLNGSYRGIIK +KTLLPFINNDETTVKKLIDEIFINKIHSVLGNCNNYQEFEYKKDPFKKNP +ISSKIRHFVTLMSQFDGESTFNQKGISLLSEHKQICSEDPEIFFKFLVDT +IIADSFTQTIPKESIGIKEISKKVPHWSYQLIDESEQEEMENEKQVSWPH +QIRSRSGKEVVFFTDKQENTDNPTPNTADISEQADEVVLTRYPQESDFRR +DIIKGSMRSQRRKIVIWELFQANIHSPLFLDRTNKSSFFSITFSRLIKRI +FKNYMGKNPELDISNYKEEELKKKEKAKEHKKDKEKKQEQIRLDIAETWD +TIPGAQIIRSLILLTQSILRKYILLPLLITGKNIGRILLFQLPEWSDDFK +EWTSEMHIKCTYNGVQLSEKEFPKNWLTDGMQIKILSPFCLKPWHKSMIR +PYHQDKKKKEQNQIDAFCFLTVVGLETDIPFGPPRKRPSFFQPIFKQLDK +KIEKLIKGNFQVRKRLKEKILFFLKLQNETNNWIIEIFPFFKKIIRKMST +VNTIGVFGLKEASSEIKSEKDSRIKNHMIHESSVQIRFLNQTNSSVTEKK +MKDLANRTRIIKNKIEKISNDKLKMSPKKTRYGTKNLGQILKRRNARLIR +NSNYILKFFRERIYGDIFLYIINIPKINTQLFLESTKNGIDKSIYNNESI +TKTNKNRIQFISTINKKFLPFLSTSKNNSKIISDFSFLSQAYVFYKLSQA +KILNLYKLRLVLQYRGISLFLKNEIKDFFGTQGITNSELKTKKLPNSGMN +QWKNWLKLKNNYQYNLSQLKWSRLVPQKWRNRVTEHCEVENTNLYQNEEL +INSKKHLLLLPDQKYNFQKNYRYDVLSYKFFNYKNKNDSYRYSYGLPFQV +NKNQEFSYTYNYNINNNKFIDMWWNIPISNFSYLEKTKIMDIDKNIDRKY +LDFKILDFSLRNKIDIEDWIDISTSINENTKTEPRNYQIVEKINKKSLVY +STIYQEIKQSDQKNKLFDWMGMNEKILSRPISNLEFWFFSEFFSFYNAYK +MKPWVIPINLLFSNSNVSEKFSKNKSINRKKKTNPFIPSNEKKSFELENR +NQDEKELVSKEDLGSYVQENYEKDIEEDYISFIDIKKPIKQKQPKSVIEA +EFDLFLKRYLLFQLKWADSLNEKLMDNIQVYCLVLRLINPIEILISSIER +KELSMDIMLDRKDFNCPNWKQKRVLIIEPIRLSIRGDGQFLLYQTIGISL +VHKSKHQNNQKRYSENVDKKFLGERNKNNFDLLAPENLLSPRRRRELRIL +LCLNSRNNNGVNTNPMENRVKNCNQFFDEKKDLDRDKNTLRNLKFFLWPN +YRLEDLACMNRFWFDTNNGSRFSILRIHMYPQF +>NC_002693@OeelhCp105@ycf1@129224@136615@R@1@2464 Ycf1 +MVNLVYVCMKINNSVVMVGLYYGFISAFSIGSSYLFLLRPRFLNDDPDAI +EKKASETAGFFTGQLLIFISILYGPLHLALGRPHTILLLLAPYFFFHYLF +SNSGQWPSQRFAFPLLTKSMRNRRFQLVFLNNLLFQLFSLSLLGRPMLTR +LSYIYIFRCNNKMLFVLSSFVGWLIGHILVLKWAGLVFVWLLQVIRSKTM +KYITCNVLIPATKYIIEKWRNSFVAGLIREILAMKQVESALVRIKNSKLL +DDARWWIRGSSLISGLKINIRFYARLILRGFENVYVGAKFRQDMEHLFSI +ILFAIFLLYLDQTPLLYADPADKKLQLQRKLSNETQAARAEKKLEERLTK +KFEAQRRAQRAAQRQALQEFKQGVVESYLAKQVAKDANQIQAQKDEKQIQ +AEQKARRIRAEQVVQYTFWLIEAQRREMEIEAARAMQEAYKGMLAAQEGY +VEEGVQEKQEGFPEELISPSPIFHSEEREENPKLLILKEKISILKKKISI +LKKKISILKEKNDLFSFEIPIITSLFDPQKPLRPLRYIKTCAGVEKAVKN +EMSQYFFYACRSDGKQRICFTYPPSLATFWEMIQRKMASRFPRIYAKAKW +RALRWSAPGSYRQWISRNKKKKNSLSTEFQNRIKTLDKKKSLLNVLARRK +RSSLLNVLARRKRSSLQNVLETRKRLCNYKTNKTKKEYLPEKEYLPEIAD +PFLTGALRGKSDPEVDDGGRKTSDLIKVVFLKNNITMATLRNKNDDDLRE +QKNAIALLSRMKNPVNKLHLLFVNERDYPFVKTLVNRINGPAVPKKKKKI +SKSKQKNVKSKQKNVKSKQKNVKSKQKNVKSKQNEIKRKVNEIKRKVNEI +KRKQNESYPRGVKFGATPKTEINPHGIRFDAATIEKYSFATGYSYSPPSF +DDILFHAFVTEPQRNKKAVIELEEEINKKVPRWSYQLIDELEQLEGAEGE +TQFSDHEIRILPFKRVAVFTEKDSKKRKPLIDEQGNFVRHRKTYAIRFLG +HMSDFRRGLIKGSARQDRRKAYVCRTTQVNARSPLFALGPRTFLDGLVNL +AVQVKFFYETRIKGEKIVDDDDDNEKDEFKVMIPDTKSIVAETREMLKQA +GAEDGQSYEDVEDDIRIENVTEMWENIDYGQVIRTFILLLHIFLRKKVVF +PAFIIGKNIARMLLLQATEWKIDFARLKRERYAICTYNGMKVSEKIAFDQ +FPPDWADDGIQILVTNPFYLKPWYRSKTRSIQKDPKKEKDPKKEKGPKKE +PWYRFKTRFIQKDPKKEKGPKKEKGPKKEPWYRRFFFQKDPKKEKGPKKG +KAQFEGDRGVRFLTSFGILTDRPFGDLITPDWGVFFNPIRNELKKKIRQF +EKKHSIILSKRFRNVLKKTKKWFIKSFLFLKRARLKRHPIELSGGRETPE +FTRSQKDIDNLKNEQDFRMSRNPRISESLLQGPVRALKDDSLPEEKVADP +EKEPSDLDNELRAVWDEIDKVTKERKKIVFTPKPDSPDKLVQAKKNILKK +LERIKSRRHKFYFLRIRKSYYVLLFFIKRISRNIKRIYLNPLERAISIRK +IHPQRFFEFSKKMIEKSIGIGKTETNKETVYKTKKKKKKKNPFISIFKES +LYDKDIRISENDIKLGDTWNGYKYKRKKATDTSDLASMSQAYVFYKLYQT +QQTQLIHLDKLRYVLQYDGTSRFLKKELKDYFEAQELFHSKLKHKNSLNS +GKNQWKNWLKAQHQYSVSPIIWNSLSPQKWRTKVNQERMDENTDLNKRYS +NEKRKQFFEANSLDDEENVVETYLGQRAGDIKNSIKSYSYDLFSYQSINS +EDKYVCINNKQKNSYNYNRRKVNLVDSPEGIALSNQFLVQNDLLDLYTFP +DRKYVPWRLFPGSLIGGNDKDKDRFVKMWTATNSGNAVKYWTAANGNTSI +KPGVFWTFQNSQRTKKQNPLFDWRGMNTELPNRCISDLKGWFFFSELLKL +DLRYQVKPWILSKNLLFENLIFENQEENPNLIQNPIEDGRKNVIQNENEN +DPIEDGRQNVIQNENENAIQNLIDFFLEKKNSPKDTNQELHAQAKARIWD +ALVASLKQKREQKERKNKRIAQLIEKKKQKEIEKQKRKIEKQKRKKEKIE +NAKKKIENEKKKIETEEEKIEKEKRKKERKKEKLKKKVAKNIEKLKNKVA +KNVAKNIEKLKKQRAKNIARMEEEDKKARKKRKRKVQVQENKIPYTAFGS +DKWQRPIAEYPKSGDIRNFQVILPEDDDEDDEEDRLDELKLNAYELSRIQ +KITDEKRMKRNLLSSIKRERLKMEFSTRNNSLATIMLTHGIFSIEPLRIS +RQNQDASFLIYQLIKISLVEQLDPYDHNDSFELTEKYRARRNFFMPKTNA +ETMHKSDSDLFVPETILSTKRRRELRILISFYSRRGKRKNRIYKNPVFWK +YVKNCGEVVDNSEKKKKKLIKSFLWPNYRLEDLACMNRYWFNAQNGSRFS +MLRIRMYPRLKIR +>NC_002694@LocoCp080@ycf1@120497@125878@R@1@1794 hypothetical_protein +MIFQSFILDNLVSLCLKIINSVIVVGLYYGFMTTFSTGPSYLFLLRAHVM +EEGTEKKISATTGFITGQLVMFISIYYAPLHIALDRPHTITVITLPYLLL +YFLGNNQKNFLNYVYKNQNSIRHFSIQRIFFQNLFFQLLNPFFLPSSILM +RLANIYIFQSNNKVLFLTSSFVGWLIGHVFFMKWIGLMLVWIQEKNNSIK +STVAIRSNKGVLAKFRKSMFQIFLIFFFITCLYYLGRIPPIYFFTPKMSE +IKERGEIEKREGEIDIEINSQRAGSKQEQKITAEEKLSPYLFSKKNNNLD +KIKEENDIFGFQKPLVTILFDYNRWNRPLRYIKNDRFENVVRNEISQFFF +FTCQSDGKERISFTYPPNLSTFQKMMEMKISLFTRDIISYEELSNSWRST +NEEKKKKLTNEFLNRVEVLDKESLPVDIFENRIRLCNDEKKQKYLTKEYD +PFLNGPCRGQIQKWFSPPIQKETYKKNSLFINKIHGILFSNTNNYPKFEQ +KKNIFDRKSLLTDINFFFNLITKFSRKSVSSLNFEGLYLFPKDNKGKMSS +KKKKFLFDTIRPDLNDNKIVNLQKCIGINEIVKKLPRWSYNLIDELEQLE +GKKKVEYHQIRSRKAKRVVLLTKNSQNDDNYDETTDTDNTEKKKELALIR +YSQQPDFRRDIIKGSIRAQRRKTVTCKLFQRSVDSPLFLEKMEKTSFFCF +DILDSSKIFFMFKNWIRKKKELKNSDYTDEKAKESQKKEEEKIKKNEKEE +KRRIEIGEAWDSIIFAQVIRGCLLITQSILRKYILLPSLIITKNIVRILL +FQFPEWSEDFRDWQREMYIKCTYNGVQLSETEFPKKWLTDGIQIKILFPF +RLKPWHRSKLRFTEKKKDPLKNKKVKKKNFCFLTIFGMEVELPFSGYPRN +RFSFFDPILKELKKKMKKLKNNFFLILKIVNERTKNFITTLKETSKRIIQ +SILKKVLFLNKKIKKLYNYLFLFRFKKIDELNQNKKNFPITKNNPIIYES +TILIQAINKTNCSLTEKKIKAINAKTKKIIKKIERMTKENKGGFLISEIN +SNSKKTSSNTKGLELEKKILQILQRRNVQLTHKLYSFFKFLLNFMKKVYT +DIFLCIVSVPRINVQFFLESTKKIINQSIYNKKTNEEIIDKTNQSIIHFI +SIINKSSNTKNTNSAANSYEVSALSQAYVFFKISQIQVLNVYKYKFKYVF +DYDGRSFFIKDEIKDYFFGIQGIIHSKLRHKNSPVSLKNQWTNWLKVHYQ +YDLSQNRWSRLVQKNLKNRINKHRLDQNKDLTKCDSYKKTQLIVSKNKKQ +QVDFLVNLLIQKKIKKQSRYDLLLYKFINYAEKKELSIYGYRSPFQANKK +RAISYDYNTQKKEFFDRMDDISIKNYIAEDAIRYIEQNRDRKYFDWVVMD +VKIQNNSISNLQFSFFFKFLRFYDAYRNKPWIIPIKFLFLHFSVNQNFNK +IKNIIEKKRRIDIFKPWKKKKILEVELETPNRAKKEYTSRVDLNKPSLSN +QEKDIEEDYGESDSKKGGKDKNKKKYKNKIEAEVNLLLRKYLNFHLNWKG +SLNKRVINNVKVYCLLIRLKNIKQIAISSIQRGELSLDIMMIQNEKDSTL +TGFRKKKEFIEKGIFIIEPVRLSRKNNEQFFMYETARLLLIHKSKRQINQ +RNPEKSDLDKQIFYKNIPPKRDQRITQNKEKKHYALVVIENILSARRRRE +LRILICFNPRSINSMPRKTIFDNENKINNCCQVFAKNKDLDKEKKILMNL +KLILWPNYRLEDLACINRYWFDTYNGSRFSIVRIHMYPRLKMR +>NC_003119@MetrCp012@ycf1@13390@18672@R@1@1761 hypothetical_chloroplast_RF1 +MIYQLFILDRLVGLWLKILNSAIVMGLYYGFLTTFSIGPSYLFLIRARVM +DKGTETEIAATTGFITGQLMMFISIYYAPLHLALIRPHTITVLTLPYLFF +NFVYKNNKHYYSADSHFYLDLDYGYKNPNSIRKFRIYKVFFNNLFFQLSN +PLLFPSSILLRLMNIYLFRSNNKLLFLTSSFLGWLIGHIFLMKCIGLILL +VWSKQKNSIKSKLTMRFDKYILLQLRNYVGQIFVVFSFVIVVHYLGRTPV +PYLYTYTDEILEYDEKQKDEINGETEIDVEIDSEQEQNGSIEDEEDILSY +LFPKKDKTLENIEQDNNLLALEKPLVTTLFDYRKWNRPLRYIKNDHFERV +VRDENSQFFFHICQSDGKERISFTYPPDLSSFLKIMEKKMDLFTKDKISY +NDNELSNYWSSNNKEKRKKLSNEFFKRAKVLDKKYKKYKKFIPVDVFENR +IRLSNDKRKIKYLTKIYDPFLNGPFRGQSFSPSIQNETYTTNSILINKIH +GLLLINSNYPEFDNSNYPEFDNSNYPEFEQKIDQFDRKFLLTEIGFFFNL +ISQFSEKSVSSFNFDGLYLFPEHEQVKIYSEEKKRKKKFLFEAIRTDQNN +QTIFNRKKCTGINEISKQVPRWSYELIDELEQMTERLTKEFQIRSAKAER +MVIFNGNTDSLTLNIGPRNDNDAIPEVDLNHEFFLVNFLREPDFDRDIIK +GSMRPLRRKIATTKLSQGNAQPHSPIFLEMIDPLYFIFGDLFDDLSQIFK +EMFRKPGTDNSEFVEFQERLEHKYEEDAKDDAEIRRLKIEEDWESILYGL +IIRSFVLLIQSFFRKYILLPSLIITKNIIRILLFQNPEWSEDFRDWSREV +HIKCTYQGIPVSDKELPKNWFDEGIQIRILNPFVLKAWHKSKVQSTEKKK +KKRSTEKKYTENKNFWFLTGYGTLVESYLDEGFPRDPLSIFGPVLKTIRK +QLKKDLKKHFFLVLKFLNERKKWFPTMLKKIENWNIKRILKSILFRFKII +DELSESKKTSTISKNNSKIEVIEVIEESPVKMESINWTNSSFTEKRIKDL +NVKTKTIIKQIETMTEEKKEGILTSEINLNSNKTTYDAKRLELQKNNLQI +LQRRFVRLIRKSYSFFKIFIEGVYIDILLCISSIARIHRQRFLDFLESTD +KILNVKKPIYDKKKKMEEMEERFENLSVSRLISILEKSENITNMNSQNSW +DVSSLSSLSQEYVFYKLSQIQFSNGSKFKIRSILESPGRSFFLKNEIKDY +FFRMQGTYNSKLRHKKRSDSLMNPWTNWFKVLYQYDLPEKRWSRLVSQNW +RNRINEHRVAQNKDLVEYDSYEKNQLIWKELILSKKQEQEGDLLKIEIKN +KIKKQYRYDLFSYQYLNFANKKKSSIYGYRSPNKNQAISYNYNISIQNYL +EEYDILDMEKNLEKNLDRKYFNWMGMNVKRKKTSRPKDKFLIPGFWFFSK +LSKLYCAYKMNPWILPIKFFVLQLDNLELTTEEYVNTVDEDLKSVSYYYK +GSDSKYRTDLKGERDFLLSKYLGFYLHCDSSDEEIGMDNTNLFCLLLRMK +KFNKIVIMSIKKLELDIEMLVDSRTKDFCYTECRDTEDLKERLIFFIEPI +RLPRKKHEQSLLYQTIRLPLIHKSKTRKSWSWKKKKSRVDQKITENKDKN +LYDLFVPENLLSTRRRRELRILTCFNPRNRNTVHRKTINDNENQIKNVSQ +VLTKNKDLDSETKKLMNFKLFLWPNYRLEDLACINRYWFNTHNGSHFSIL +RIHMYPRLKD +>NC_003386@PsnuCp088@ycf1@112439@117550@R@1@1704 hypothetical_protein +MIGRLYMKKLKNLFLFLSSLCPVFPWISQISLVMPFGLYYGFLTALPIGP +SQILSIRTFFLEGNRSGIICILGSMMGQFVILLSIYCSPLYVMLVKPHLM +TLLVIPYMFYYWYRTKNPSRYYILHPIKSLTHAHTRNLLLDSFIFQLLNP +ILLPNPVLTRLLNLFLFRYSSNVFFLTSSLLGWLCGHILFINSIKLLLFR +IEHDSPIIYILMKRSISRTFSILISITFFLYLGRSPVPLITKKFADEITL +SDQKIKENLWEESLWLYRPWPTSFFDQYRWNRPIRYIPNSKSSHNGFVKK +QVSKFFYDECITDGKNAISFASQPSLSIFKKQLMNYLHNSDISISTKDSY +KGWIETKREKRDALNNEFKDRIQFVYNSSTIEEAMENKTGFSHDRNHFLV +KVNDPFLSGSSRIRIPNKKYSSSLLKLHDSKDQTMKISKKTKRKHTRNKM +RNWIFNKHKKWQHNKFPLPWEPIPTKAEKVFWRILNESENPIILEMLTTL +NSIKEKNYQFRITWEHIFQLPRIEKAIFLFRSKQEIEDSIFRYPSHLSLK +NLTLFNIFTRSKNIFYSAKIAVSPILQIEEMQKELPRYNSRLRSDRIDAV +NVDVDIRQRKIKNLGPRKGKLEDKEKEKEKAAQTQTEVKKEREKEKEERV +IKRFQNQSDFRRKLVKGSIRARRRKTGIWRLYQSGTHSPFFLRMKEIPIS +FQSSINALRLNKMKDERAILGIGKELRPFNLYKKRSKADRLTIAARFDFP +IAHAGRGVLLIIQSNIRKYVILPILIICKNIGRIMLFQSPEWKEDWAEWN +QEIHIKCTYDGIEVSHRHLPAHWFKEGLQIKILYPFHLKPWHIHRTNNIN +DLRNEAQIQKEISDFGKQRKLSFSYLTIWGYQTSSVFGSMKKRPSFWRPI +ANALKKKLQRNLFSKLTWISHFFYEIILLSRTFIISKKPNNIPEMSIQSN +ELRYDVSDYELIQKYPNSNEKNDYVVMNEISIESNNRNGKEISHESQDQY +KDNFNNIRSFNDIETLLTDISGTSVEESYRDRIETYLRLNKKNHRYAINI +RLIWNKQLVQTQQEFSRFRRIIMQFMHKGYRLAKRFLTKFYREIFRRFTF +SIQLSIQLVLRLTKNITKLSEKNKVYQNLNLLKKNEQNLKIDSSRNKPVL +SQAYVFQKLWHARTRTKIDVHYLVQSLEREIVNSIENNELKASKLKDLKW +NEHNYLNDHIKDLLEIQGLLKETQTFTEKNWKEWLHCFTRYQISSKIEYG +IVPQKWKNEVKKRWKSNTNKLDKNKEYKTLEKENKYSLYETNNMLKQRIN +NRNNYCEFYNLLYSFIDSTKASNIIKLPIQQKGKEDPIQYINDINKIHEN +IHLNSKKKYKRPQFQSISTEKGDIDSNLMLWLLPNLLDTKPESVTNSLDS +YSFEMYLSQNEDKDSLKKEIRFNAKKLNLDTKEPTSDAMKPTSDTKELIS +DTNEPTSDIKSDDQSENQNKPLKEKSIRERKHHRPIPQVKWKSKSVEKKM +QRINNLTSFLSVIEDRKNMENYIISFCMKMGIDIDLLNSFFTNTEDELSI +QLLDDSAHRLPRLLNDQTLVRKMVSILLNFEKQFEEGITSKISSQSISSI +YRTEKKYSVNSYNLEDIMLSRRYRELRILNSLILEKQYVNFDHWIDKSEK +YPFLNLPSQVQIIKRFLWPTYRLEDLACMNRFWFNTNNGSRFAMLKLRMY +CPD +>NC_004115@ChglCp095@ycf1@114383@118735@R@1@1451 hypothetical_chloroplast_RF1 +MITTYSTFLFNFLSQFQYLVNIPEPLILFGLYYGFLTTLPISFSHIVVIR +NRLIEGKTSSVMAFCGLITGQLCMIGTIYYTPLYKLFIKPHLILLLSIIY +SFFYWQRLRNNQNYDDLREAQSLINVRNFFSFFDSFVFQILNPILLPTPI +FFRLNNVFLFRYSNNLNFFLSFFIGSLIGNFLFFNALNWIRYRFEQDSNV +IYPVLKLLINKSIIPIVFCICLIPIAKYSHIPFCTMKQKEGQSSYSFDKN +WPNIIFDSNQPHRPIRIFSETKTDDNLNINDNLSKKQTSQFFFKECISDG +NVRISYTYPSTLANFQTDLSSSFQDFSLSEQSFDNLYSNWKLEKLSRKDN +LNNLLLTKIKLLNNKKEWFYKHFQNKFGTFIKDDNNYNKFVKKSNDVRLK +QSSKIQIKKSKLLTSDIRDISTTQSGFYDLKKNKLKSFISQKFKMNSNNS +TLPVWNHLNKQLLQNELKRIKKQLQDKTKNIKENDFNNLKLLKSNIETID +NTINDIHHNKIKQITSVDLIKIFATNNKTLLLETLAFNKKITQKDNFNFN +KLFQHKNKKFTTNSGNENTYLNLNDIFKNIKRLPKWRTFSKHVVYDEVSD +IRRRAIKSNSKLKIANKDSDIIIFEYKKSLNFRARLPKGSLRARRKNKFT +WKLFHNNLNSPFFIRSKQLLNKTDIPFLKYNENYLNFFKNFISPDKNINY +LNNDISEMRRQELLFKWDKTNVHILRSMVLVGQAFFRKYIKLPIFIFFKN +LSRQLLYQPSEWTKDWSNWMNEWYIFCYYDGTELAKDQWPEMWLQRGIQI +KLINPFYIKPWYIQKSFIKNKQNKKTRTSYLTVFGSQQELPFGKKIRMPS +FWKPVRREVSKSIKLKLYFPFLTLQKNTIILFEKVFNKKRINEDNKTIEK +SILNKKNEQLILKKDEVIPNNKSIAGKLSKLDFHNQNITKTSIKNATKQI +LIKNEYNSLLKENKNLFTKNKIVFLKIKNILNKLNLKLIKVKINFTYKIK +TVLKIISRNLLKFYSIIQFQLENLGRNNSNDLSYKNQLSYQKDFPNFNNF +CLNQANIIQNLCKNNILKHKKLNQNFQINSKNLNQTNIIDVNPENIKAQD +FKNLLENIYTFTPTINLWDKLSTNNWKISVQNNWKQKSYNNYDLTKKALV +SKNLNFISYFYQNNLINNLNKKIKHTKIFNLSKNYLSLNNLNQNQIKNFD +FQNSLNNNITYKKNIKNFTIRQNVPSQLRRWDWKNNKIKKFVNRLLQKNT +ILLKEEVFNLIPFFDRFTIQNPMIRNWSHPISSILDDEIFTYELLDTFLQ +INKNIDFLHTKQIEDNLSSNSNQAIASLPLSSTTAENFLYYLTTVEDLIS +IEDKKELKILNSLNFNKSTPNYIKTNVVEKSLNENLSKNLQSILSKETLD +SINNTQILKKFLWASYRCEDLACMNRFWFSTNNGSRFGTLRLRLYPNLKN diff --git a/detectors/cds/test/test.db/ycf2.fst b/detectors/cds/test/test.db/ycf2.fst new file mode 100644 index 0000000..729ecc3 --- /dev/null +++ b/detectors/cds/test/test.db/ycf2.fst @@ -0,0 +1,466 @@ +>AC_000188@LyesCp004@ycf2@88196@95032@D@1@2279 Ycf2_protein +MRGHQFKSWIFELREILREIKNSHHFLDSWTQFNSVGSFIHIFFHQERFL +KLFDPRIWSILLSRNSQGSPSNRYFTIKGVILFVVAVLIYRINNRNMVER +KNLYLIGLLPIPMNSIGPRNDTLEESVGSSNINRLIVSLLYLPKGKKISE +SCFLNPKESTWVLPITKKCSMPESNWGSRWWRNWIGKKRDSSCKISNETV +AGIEILFKEKDLKYLEFLFVYYMDDPIRKDHDWELFDRLSLRKSRNRINL +NSGPLFEILVKHWISYLMSAFREKIPIEVEGFFKQQGAGSTIQSNDIEHV +SHLFSRNKWAISLQNCAQFHMWQFRQDLFVSWGKNPPESDFLRNVSRENW +IWLDNVWLVNKDRFFSKVQNVSSNIQYDSTRSSFVQVTDSSQLKGSSDQS +RDHLDSISNEDSEYHTLINQREIQQRKERSILWDPSFLQTERKEIESGRF +PKCLSGYSSMSRLFTEREKQMINHLFPEEIEEFLGNPTRSVRSFFSDRWS +ELHLGSNPTERSTRDQKLLKKQQDLSFVPSRRSEKKEMVNIFKIITYLQN +TVSIHPISSDPGCDMVPKDEPDMDSSNKISFLNKNPFFDLFHLFHDRNRG +GYTLHYDFASEERFQEMADLFTLSITEPDLVYHKGFAFSIDSCGLDQKQF +LNEARDESKKKSLLVLPPIFYEENESFSRRIRKKWVRISCGNDLEDPKPK +IVVFASNNIMEAVTQYRLIRNLIQIQYSTYGYIRNVLNRFFLMNRSDRNF +EYGIQRDQIGKDTLNHRTIMKYTINQYLSNLKKSQKKWFEPLILISRTER +SMNRDPDAYRYKWSNGSKSFQEHLEQSVSKQKSRFQVVFDRLRINQYSID +WSEVIDKKDLSKSLRFFLSKSLLFLSKLLLFLSNSLPFFCVSFGNIPIHR +SEIYIYEELKGPNDQLCNQLLESIGLQIVHLKKLKPFLLDDHDTSQKSKF +LINGGTISPFLFNKIPKWMIDSFHTRNNRRKSFDNPDSYFSMIFHDQDNW +LNPVKPFHRSSLISSFYKANRLRFLNNPHHFCFYWNTRFPFSVEKARINN +SDFTYGQFLNILFIRNKIFSLCVGKKKHAFWGRDTISPIESQVSNIFIPN +DFPQSGDETYNLYKSFHFPSRSDPFVRRAIYSIADISGTPLTEGQIVNFE +RTYCQPLSDMNLSDSEGKNLHQYLNFNSNMGLIHTPCSEKDLSSEKRKKW +SLCLKKCVEKGQTYRTFQRDSAFSTLSKWNLFQTYMPWFLTSTGYKYLNL +IFLDTFSDLLPILSSSQKFVSIFPDIMHGSGISWRILQKKLCLPQWNLIS +EISSKCLHNLLLSEEMIHRNNESPLISTHLRSPNAREFLYSILFLLLVAG +YLVRTHLLFVSRASSELQTEFERVKSLMTPSSMIELRKLLDRYPTSEPNS +FWLKNLFLVALEQLGDSLEEIRGSASGGNMLGPAYGVKSIRSKKKDWNIN +LIEIIDLIPNPINRITFSRNTRHLSHTSKEIYSLIRKRKNVNGDWIDEKI +ESWVANSDSIDDEEREFLVQFSTLTTENRIDQILLSLTHSDHLSKNDSGY +QMIEQPGAIYLRYLVDIHKKHLMNYEFNPSCLAERRIFLAHYQTITYSQT +SCGENSFHFPSHGKPFSLRLALSPSRGILVIGSIGTGRSYLVKYLATNSY +VPFITVFLNKFLDNKSKGFLLDEIDIDDSDDIDDSDNLDASDDIDRDLDT +ELELLTRMNGLTVDMMPEIDRFYITLQFELAKAMSPCIIWIPNIHDLDVN +ESNDLSLGLLVNHLSRDCERCSTRNILVIASTHIPQKVDPALIAPNKLNT +CIKIRRLLIPQQRKHFFTLSYTRGFHLEKKMFHTNGFGSITMGSNARDLV +ALTNEVLSISITQKKSIIDTNTIRSALHRQTWDLRSQVRSVQDHGILFYQ +IGRAVAQNVLLSNCPIDPISIYMKKKSCNEGDSYLYKWYFELGTSMKRLT +ILLYLLSCSAGSVAQDLWSLSVPDEKNGITSYGLVENDSDLVHGLLEVEG +ALVGSSRTEKDCSQFDNDRVTLLLRPEPRNPLDMMQKGSWSILDQRFLYE +KYESEFEEGEGEGALDPQEDLFNHIVWAPRIWRPWGFLFDCIERPNELGF +PYWSRSFRGKRIIYDEEDELQENDSGFLQSGTMQYQTRDRSQGLFRISQF +IWDPADPLFFLFKDQPPGSVFSHRELFADEEMSKGLLTSQTDPPTSLYKR +WFIKNTQEKHFELLINRQRWLRTNSSLSNGSFRSNTLSESYQYLSNLFLS +NGTLLDQMPKTLLRKRWLFPDEMKIGFM +>AC_000188@LyesCp055@ycf2@146312@153148@R@1@2279 Ycf2_protein +MRGHQFKSWIFELREILREIKNSHHFLDSWTQFNSVGSFIHIFFHQERFL +KLFDPRIWSILLSRNSQGSPSNRYFTIKGVILFVVAVLIYRINNRNMVER +KNLYLIGLLPIPMNSIGPRNDTLEESVGSSNINRLIVSLLYLPKGKKISE +SCFLNPKESTWVLPITKKCSMPESNWGSRWWRNWIGKKRDSSCKISNETV +AGIEILFKEKDLKYLEFLFVYYMDDPIRKDHDWELFDRLSLRKSRNRINL +NSGPLFEILVKHWISYLMSAFREKIPIEVEGFFKQQGAGSTIQSNDIEHV +SHLFSRNKWAISLQNCAQFHMWQFRQDLFVSWGKNPPESDFLRNVSRENW +IWLDNVWLVNKDRFFSKVQNVSSNIQYDSTRSSFVQVTDSSQLKGSSDQS +RDHLDSISNEDSEYHTLINQREIQQRKERSILWDPSFLQTERKEIESGRF +PKCLSGYSSMSRLFTEREKQMINHLFPEEIEEFLGNPTRSVRSFFSDRWS +ELHLGSNPTERSTRDQKLLKKQQDLSFVPSRRSEKKEMVNIFKIITYLQN +TVSIHPISSDPGCDMVPKDEPDMDSSNKISFLNKNPFFDLFHLFHDRNRG +GYTLHYDFASEERFQEMADLFTLSITEPDLVYHKGFAFSIDSCGLDQKQF +LNEARDESKKKSLLVLPPIFYEENESFSRRIRKKWVRISCGNDLEDPKPK +IVVFASNNIMEAVTQYRLIRNLIQIQYSTYGYIRNVLNRFFLMNRSDRNF +EYGIQRDQIGKDTLNHRTIMKYTINQYLSNLKKSQKKWFEPLILISRTER +SMNRDPDAYRYKWSNGSKSFQEHLEQSVSKQKSRFQVVFDRLRINQYSID +WSEVIDKKDLSKSLRFFLSKSLLFLSKLLLFLSNSLPFFCVSFGNIPIHR +SEIYIYEELKGPNDQLCNQLLESIGLQIVHLKKLKPFLLDDHDTSQKSKF +LINGGTISPFLFNKIPKWMIDSFHTRNNRRKSFDNPDSYFSMIFHDQDNW +LNPVKPFHRSSLISSFYKANRLRFLNNPHHFCFYWNTRFPFSVEKARINN +SDFTYGQFLNILFIRNKIFSLCVGKKKHAFWGRDTISPIESQVSNIFIPN +DFPQSGDETYNLYKSFHFPSRSDPFVRRAIYSIADISGTPLTEGQIVNFE +RTYCQPLSDMNLSDSEGKNLHQYLNFNSNMGLIHTPCSEKDLSSEKRKKW +SLCLKKCVEKGQTYRTFQRDSAFSTLSKWNLFQTYMPWFLTSTGYKYLNL +IFLDTFSDLLPILSSSQKFVSIFPDIMHGSGISWRILQKKLCLPQWNLIS +EISSKCLHNLLLSEEMIHRNNESPLISTHLRSPNAREFLYSILFLLLVAG +YLVRTHLLFVSRASSELQTEFERVKSLMTPSSMIELRKLLDRYPTSEPNS +FWLKNLFLVALEQLGDSLEEIRGSASGGNMLGPAYGVKSIRSKKKDWNIN +LIEIIDLIPNPINRITFSRNTRHLSHTSKEIYSLIRKRKNVNGDWIDEKI +ESWVANSDSIDDEEREFLVQFSTLTTENRIDQILLSLTHSDHLSKNDSGY +QMIEQPGAIYLRYLVDIHKKHLMNYEFNPSCLAERRIFLAHYQTITYSQT +SCGENSFHFPSHGKPFSLRLALSPSRGILVIGSIGTGRSYLVKYLATNSY +VPFITVFLNKFLDNKSKGFLLDEIDIDDSDDIDDSDNLDASDDIDRDLDT +ELELLTRMNGLTVDMMPEIDRFYITLQFELAKAMSPCIIWIPNIHDLDVN +ESNDLSLGLLVNHLSRDCERCSTRNILVIASTHIPQKVDPALIAPNKLNT +CIKIRRLLIPQQRKHFFTLSYTRGFHLEKKMFHTNGFGSITMGSNARDLV +ALTNEVLSISITQKKSIIDTNTIRSALHRQTWDLRSQVRSVQDHGILFYQ +IGRAVAQNVLLSNCPIDPISIYMKKKSCNEGDSYLYKWYFELGTSMKRLT +ILLYLLSCSAGSVAQDLWSLSVPDEKNGITSYGLVENDSDLVHGLLEVEG +ALVGSSRTEKDCSQFDNDRVTLLLRPEPRNPLDMMQKGSWSILDQRFLYE +KYESEFEEGEGEGALDPQEDLFNHIVWAPRIWRPWGFLFDCIERPNELGF +PYWSRSFRGKRIIYDEEDELQENDSGFLQSGTMQYQTRDRSQGLFRISQF +IWDPADPLFFLFKDQPPGSVFSHRELFADEEMSKGLLTSQTDPPTSLYKR +WFIKNTQEKHFELLINRQRWLRTNSSLSNGSFRSNTLSESYQYLSNLFLS +NGTLLDQMPKTLLRKRWLFPDEMKIGFM +>NC_000932@ArthCp066@ycf2@86474@93358@D@1@2295 Ycf2 +MKGHQFKSWIFELREIVREIKNAHYFLDSWTQFNSVGSFIHIFFHQERFR +KLLDPRIFSILLLRNSQGSTSNRYFTIKGVVLFVVAALLYRINNRNMVES +KNLYLKGLLPIPMNSIGPRNDTSEESFGSCNINRLIVSLLYLTKGKKISE +SCFRDPKESTWVLPITQKCIMPESNWSSRWWRNWIGKKRGFCCKISNETV +AGIDISFKEKDIKYLEFLFVYYMDDPIRKGHDWELFDRLSPSKRRNIINL +NSGQLFEILVKDWICYLMFAFREKIPIEVEGFCKQQGAGSTIQSNDIEHV +SHLFSRNKWAISLQNCAQFHMWQFHQDLFVSWGKNPHESDFFRKISRENW +IWLDNVWLVNKDRFFSKVRNVSSNIQYDSTRSSFVQVTDSSQLNGSSDQF +IDPFDSISNEDSEYHYHTLINQREIQQLKERSILLDPSFIQTEGREIESD +RFPKYLSGYSSMPRLFTEREKRMNNHLLPEESEEFLGNPTRAIRSFFSDR +WSELHLGSNPTERSTRDQKLLKKEQDVSFVPSRRSENKEIVNIFKIITYL +QNTVSIHPISSDLGCDMVPKDELDMDSSNKISFLNKNPFFDLFHLFHERK +RGGYTLRHESEERFQEMADLFTLSITEPDLVYHKGFAFSIDSYGLDQRQF +LKEVFNFRDESKKKSLLVLPPIFYEENESFYRRLRKIWVRISCGNYLEDQ +KRVVFASNNIMEAVNQYRLIRNMIQIQFQYSPYGYIRNVLNRFFLMKRPD +RNFEYGIQRDLIGNDTLNHRTIMKDTINQHLSNLKKSQKKWFDPLIFLSQ +TERSINRDPNAYRYKWSNGSKNFQEHLEHFVSERKSRFQVVFDQLCINQY +SIDWSEVIDKKDLSKSLRFFLSKLLRFFLSKLLLFLSKLLLFLSNSLPFF +FVSFENIPIHRSEIHIYELKGPNDQLCNQLLESIGLQIVHLKKLKPFLLD +DHNTSQKSKFLINGGTISPFLFNKIPKWMIDSFHTRKNRRKSFDNTDSAY +FSIVSHDQDNWLNPVKPFQRSSLISSFSKANRLRFLNNPHHFCFYCNKRF +PFYVEKARLNNSDFTFTYGQFLTILFIHNKTFSSCGGKKKHAFLERDTIS +PSSIESQVSNIFISNDFPQSGDERYNLYKSFHFPIRSDPLVRRAIYSIAD +ISGTPLIEGQRVNFERTYCQTLSDMNLSDSEEKSLHQYLNFNSNMGLIHT +PCSEKYLQRKKRSLCLKKCVDKGQMDRTFQRDSAFSTLSKWNLFQTYMPW +FFTSTGYKYLNLIFLDTFSDLLRILSSSQKFVSIFHDIMHGLDISWRILQ +KKLCLPQRNLISEISSKSLHNLLLSEEMIHRNNESSLISTHLRSPNVREV +LYSILFLLLVAGYIVRTHLLFVSRAYSELQTEFEKIKSLMIPSYMIELRK +LLDRYPTSELNSFWLKNLFLVALEQLGDCLEEIRGSGGNMLWGGDPAYGV +KSIRSKKKDLKINFIDIIDLISIIPNPINRITFSRNTRHLSHTSKEIYSL +IRKRKNVSGDWIDDKIESWVANSDSIDDKEREFLVQFSTLRAEKRIDQIL +LSLTHSDHLSKNDSGYQMIEQPGTIYLRYLVDIHKKYLMNYEFNTSCLAE +RRIFLAHYQTITYSQTSCGANSFHFPSHGKPFSLRLALSPSRSILVIGSI +GTGRSYLVKYLATNSYVPFITVFLNKFLDNKPKGFFIDDIDIDDSDDIDA +SNDIDRELDTELELLTMMNALTMDMMLEIDRFYITLQFELAKAMSPCIIW +IPNIHDLDVNESSYLALGLLVNSLSRDCERCSTRNILVIASTHIPQKVDP +ALIAPNKLNTCIKIRRLLIPQQRKHFFTLSYTRGFHLEKKMFHTNGFESI +TMGSSARDLVALTNEALSISITQKKSIIDTNTIRSALHRQTWDLRSQVRS +VQDHGILFYQIGRAVAQNVLISNCPIDPISIYMKKKSCNEGDSYLYKWYF +ELGTSMKKFTILLYLLSCSAGSVAQDLWSLPVPDEKNRITSYGFVENDSD +LVHGLLEVQGALVGSSRTEKDCSQFDNDRVTLLFRSEPRDPLYMMQDGSC +SIVDQRFLYEKYESEFEEGEGEGVLDPQQIEEDLFNHIVWAPRIWRPRGF +LFDCIERPNELGFPYSAGSFRGKRIIYDEKYELQENDSEFLQSGTMQYQR +RDRSSKEQGFFRISQFIWDPADPLFFLFKDQPFVSVFSHREFFADEEMSK +GLLTSQTDPPTSIYKRWFIKNTQEKHFELLIQRQRWLRTNSSLSNGFFRS +NTRSESYQYLSNLFISNGTLLDRMTKTLLKKRWLFSDEMKIGFM +>NC_000932@ArthCp083@ycf2@145291@152175@R@1@2295 Ycf2 +MKGHQFKSWIFELREIVREIKNAHYFLDSWTQFNSVGSFIHIFFHQERFR +KLLDPRIFSILLLRNSQGSTSNRYFTIKGVVLFVVAALLYRINNRNMVES +KNLYLKGLLPIPMNSIGPRNDTSEESFGSCNINRLIVSLLYLTKGKKISE +SCFRDPKESTWVLPITQKCIMPESNWSSRWWRNWIGKKRGFCCKISNETV +AGIDISFKEKDIKYLEFLFVYYMDDPIRKGHDWELFDRLSPSKRRNIINL +NSGQLFEILVKDWICYLMFAFREKIPIEVEGFCKQQGAGSTIQSNDIEHV +SHLFSRNKWAISLQNCAQFHMWQFHQDLFVSWGKNPHESDFFRKISRENW +IWLDNVWLVNKDRFFSKVRNVSSNIQYDSTRSSFVQVTDSSQLNGSSDQF +IDPFDSISNEDSEYHYHTLINQREIQQLKERSILLDPSFIQTEGREIESD +RFPKYLSGYSSMPRLFTEREKRMNNHLLPEESEEFLGNPTRAIRSFFSDR +WSELHLGSNPTERSTRDQKLLKKEQDVSFVPSRRSENKEIVNIFKIITYL +QNTVSIHPISSDLGCDMVPKDELDMDSSNKISFLNKNPFFDLFHLFHERK +RGGYTLRHESEERFQEMADLFTLSITEPDLVYHKGFAFSIDSYGLDQRQF +LKEVFNFRDESKKKSLLVLPPIFYEENESFYRRLRKIWVRISCGNYLEDQ +KRVVFASNNIMEAVNQYRLIRNMIQIQFQYSPYGYIRNVLNRFFLMKRPD +RNFEYGIQRDLIGNDTLNHRTIMKDTINQHLSNLKKSQKKWFDPLIFLSQ +TERSINRDPNAYRYKWSNGSKNFQEHLEHFVSERKSRFQVVFDQLCINQY +SIDWSEVIDKKDLSKSLRFFLSKLLRFFLSKLLLFLSKLLLFLSNSLPFF +FVSFENIPIHRSEIHIYELKGPNDQLCNQLLESIGLQIVHLKKLKPFLLD +DHNTSQKSKFLINGGTISPFLFNKIPKWMIDSFHTRKNRRKSFDNTDSAY +FSIVSHDQDNWLNPVKPFQRSSLISSFSKANRLRFLNNPHHFCFYCNKRF +PFYVEKARLNNSDFTFTYGQFLTILFIHNKTFSSCGGKKKHAFLERDTIS +PSSIESQVSNIFISNDFPQSGDERYNLYKSFHFPIRSDPLVRRAIYSIAD +ISGTPLIEGQRVNFERTYCQTLSDMNLSDSEEKSLHQYLNFNSNMGLIHT +PCSEKYLQRKKRSLCLKKCVDKGQMDRTFQRDSAFSTLSKWNLFQTYMPW +FFTSTGYKYLNLIFLDTFSDLLRILSSSQKFVSIFHDIMHGLDISWRILQ +KKLCLPQRNLISEISSKSLHNLLLSEEMIHRNNESSLISTHLRSPNVREV +LYSILFLLLVAGYIVRTHLLFVSRAYSELQTEFEKIKSLMIPSYMIELRK +LLDRYPTSELNSFWLKNLFLVALEQLGDCLEEIRGSGGNMLWGGDPAYGV +KSIRSKKKDLKINFIDIIDLISIIPNPINRITFSRNTRHLSHTSKEIYSL +IRKRKNVSGDWIDDKIESWVANSDSIDDKEREFLVQFSTLRAEKRIDQIL +LSLTHSDHLSKNDSGYQMIEQPGTIYLRYLVDIHKKYLMNYEFNTSCLAE +RRIFLAHYQTITYSQTSCGANSFHFPSHGKPFSLRLALSPSRSILVIGSI +GTGRSYLVKYLATNSYVPFITVFLNKFLDNKPKGFFIDDIDIDDSDDIDA +SNDIDRELDTELELLTMMNALTMDMMLEIDRFYITLQFELAKAMSPCIIW +IPNIHDLDVNESSYLALGLLVNSLSRDCERCSTRNILVIASTHIPQKVDP +ALIAPNKLNTCIKIRRLLIPQQRKHFFTLSYTRGFHLEKKMFHTNGFESI +TMGSSARDLVALTNEALSISITQKKSIIDTNTIRSALHRQTWDLRSQVRS +VQDHGILFYQIGRAVAQNVLISNCPIDPISIYMKKKSCNEGDSYLYKWYF +ELGTSMKKFTILLYLLSCSAGSVAQDLWSLPVPDEKNRITSYGFVENDSD +LVHGLLEVQGALVGSSRTEKDCSQFDNDRVTLLFRSEPRDPLYMMQDGSC +SIVDQRFLYEKYESEFEEGEGEGVLDPQQIEEDLFNHIVWAPRIWRPRGF +LFDCIERPNELGFPYSAGSFRGKRIIYDEKYELQENDSEFLQSGTMQYQR +RDRSSKEQGFFRISQFIWDPADPLFFLFKDQPFVSVFSHREFFADEEMSK +GLLTSQTDPPTSIYKRWFIKNTQEKHFELLIQRQRWLRTNSSLSNGFFRS +NTRSESYQYLSNLFISNGTLLDRMTKTLLKKRWLFSDEMKIGFM +>NC_001568@EpviCp27@ycf2@22045@28695@D@1@2217 Ycf2 +MKEHPFPYKSWILELREIKNSHYFLDSWTKFNSVGSYINIFSHQERFIKL +FDPRILSILLSRNSQGSTSNRYFTIKGVILFVVAVLIYRINNRNMVEIKN +IYWRGLLPIPMNSIGPRNDTLEELVGSYNINRFIVSLLYLTKGKNISESF +FLNLKESTLVLPITKKCSMPESNWGSRWWRNWTGKNRDYSCKISNETVAG +IEILFKEKDKKYLEFIFFYYMDDPIRKDRDWELFDRLSPSKRLNKINFYS +GPLFEILVKRRIYYLMSAFREKIPIEVVKGFFKQQKVGSTIQSNDIEHVS +HFFSRNKRAISLKNSAQFNMWQFRQDLLVSWGENPHESDFLRNVSRANWI +WLNNVWLVNKYRFCRKVRNVSSNIKYKYDSTRSRSSFVQVTDSSQLKGSY +YKSSGHFYSVISNEDSEYHTLINQREIKPLKSIFFDPSFLQTEATEIESD +QLQKRPSGYSSTLFTEHEKQMINHMLPEEIEEFIGNPTRLVHSFLSDRWS +ELHLGSNPTERSTRDHKLLKKQQDLSFVPSRRSENKELVNILKIITYLKN +TVSIHPISSDPGCDGVLKDEPDMDSSNKISVFNKNTFIYLFHLFHDWNRV +GYTLNLHHDFELEERFQEKADLFTLSITEPDLVYHKGFSFSIYMDQKQKM +VVFASNNIMEAVNQSRFIRNMIKIQYSTYGYIRNVLHRFFLMNRSDHNLE +YEIKRDQIGKDTLNHRTIIKYMINQHLSNFKKSQNKWFNPILFFSRTERS +VNRNPDAYRYKRSNGSNNFLEHLEHFVSEQKSHFKFKIVFDLIRFNQYSI +DWSAFIDTKDLSKPLRFFLSKLLFFLSNSLPFFCVSFGNIPIHRSEIYIY +ELKDPNDQLCNQFLEPIDLKIVHLKKRKPFLLGYHGTSRKLKLLITGGRP +FLFNKIPRCMIDSFHTINNRSKSFDNTDSYLSMIFHNKDNWLNLVKPFHR +SSLISYFYKANRLQFLNNPHNFCFYCNTRLPFYVEKAHIHNYYFTYGQFL +NILFIRNKIFSLCVDKKKHAFWGGRDTISPIESQVSKIFIPKNFPQSGDE +TYNLSQPFHFPSRYDPFVRLIANIYGTPLTEGQIVNLGRTYCQPLSDMNL +SDSEGKNFHQYLNFNSNMGLIHTPCSDKYLPSEKRKKRSLCINKYKCVEK +GQMYRTFQRKVAFSTLSKWNLFQTYMPWFLTSAGYKYINLIFLDTFSELL +SILSSSKKFVSIFNNIMHGSGISWRIINKKRCLPQWNLISEISSKCLHNL +LLSEETIRQNNESPLISTHLRSPNVREFLYSILFLLLVVGYLVRTHLLFV +SRASSELQTEFKRVKSLMIPSSMIELRKLLNRYPTPASNSFWLKNLFIVA +MEQLVYSLEEIRASGGNLLGPAYGVKSICSKNKYFNINLIDLIPNPINRI +IFSRNMRHLSHTSKEIYSLIRKRKNVNGDWIDDIIESWVANSDSIDDEER +EFLVQFSALTTEKRIYQILLSLTHSDHLSKNDSGYKMIEQPGAIYLRYLV +DIHKKYLLNYECNTSCLVERRVFLAHSQTITYSQTSRGTNTLHFPSQGKP +FSISLALSPSKGILVIGSIGTGRSFLVKYLATNSYVPFITVFLNKFLDNK +PKGFLVDDNDDNDSSDDIYASDDINSDLDTELELITMMNALTMDMMLELD +RFFTTLQLELAKAMSPWIIWIPNIHDLDVNESNYLSFGLLVNHLSERCST +NNIIVIASTHIPKKVDPALLAPNKLNTCIKIRRLLIPQQRKHFCTLSYTR +GFHLENKIFHTNGFGSITMGSSARDLVALTNEALSISITQNKSILDTNTI +RSALHRQTWDLRSGVRSFQDNGILSYQIGRAITQNVLLSNCPIDPISIYM +KKKSCTCNGGDYYFYKWYFGLGTSMKKLTILLYLLSCSAGSVAQDLWSLP +GPAEKNGITSYGLVENDSDLVRGLLEVEGALVGSSRTEKDCSPFDNDRVI +FTLILRPEPGNPLDIIKKGSCSIFDHRFIYEKYESEFEEGYGEGALDPQQ +IEEDLFNHIVWAPRIWRPWGFIFYCIERPNELGFPYWSRSFRGKRIVYDK +DEEGELQENDSELLKSGTVQYQTRDRSSKEQGLLKINQFIWDPADPLFFL +LKDQPPGSVFSHRRFFADEEMSKGLLTSQKDPPTSIYKRWFIKNTQEQHF +ELLINRQRWLRTKSSLSKSNGSFRSNTLFESYQYLSTLFLSNGTLFDKMT +KTLLIKRWLFPDEMQM +>NC_001568@EpviCp32@ycf2@61133@67783@R@1@2217 Ycf2 +MKEHPFPYKSWILELREIKNSHYFLDSWTKFNSVGSYINIFSHQERFIKL +FDPRILSILLSRNSQGSTSNRYFTIKGVILFVVAVLIYRINNRNMVEIKN +IYWRGLLPIPMNSIGPRNDTLEELVGSYNINRFIVSLLYLTKGKNISESF +FLNLKESTLVLPITKKCSMPESNWGSRWWRNWTGKNRDYSCKISNETVAG +IEILFKEKDKKYLEFIFFYYMDDPIRKDRDWELFDRLSPSKRLNKINFYS +GPLFEILVKRRIYYLMSAFREKIPIEVVKGFFKQQKVGSTIQSNDIEHVS +HFFSRNKRAISLKNSAQFNMWQFRQDLLVSWGENPHESDFLRNVSRANWI +WLNNVWLVNKYRFCRKVRNVSSNIKYKYDSTRSRSSFVQVTDSSQLKGSY +YKSSGHFYSVISNEDSEYHTLINQREIKPLKSIFFDPSFLQTEATEIESD +QLQKRPSGYSSTLFTEHEKQMINHMLPEEIEEFIGNPTRLVHSFLSDRWS +ELHLGSNPTERSTRDHKLLKKQQDLSFVPSRRSENKELVNILKIITYLKN +TVSIHPISSDPGCDGVLKDEPDMDSSNKISVFNKNTFIYLFHLFHDWNRV +GYTLNLHHDFELEERFQEKADLFTLSITEPDLVYHKGFSFSIYMDQKQKM +VVFASNNIMEAVNQSRFIRNMIKIQYSTYGYIRNVLHRFFLMNRSDHNLE +YEIKRDQIGKDTLNHRTIIKYMINQHLSNFKKSQNKWFNPILFFSRTERS +VNRNPDAYRYKRSNGSNNFLEHLEHFVSEQKSHFKFKIVFDLIRFNQYSI +DWSAFIDTKDLSKPLRFFLSKLLFFLSNSLPFFCVSFGNIPIHRSEIYIY +ELKDPNDQLCNQFLEPIDLKIVHLKKRKPFLLGYHGTSRKLKLLITGGRP +FLFNKIPRCMIDSFHTINNRSKSFDNTDSYLSMIFHNKDNWLNLVKPFHR +SSLISYFYKANRLQFLNNPHNFCFYCNTRLPFYVEKAHIHNYYFTYGQFL +NILFIRNKIFSLCVDKKKHAFWGGRDTISPIESQVSKIFIPKNFPQSGDE +TYNLSQPFHFPSRYDPFVRLIANIYGTPLTEGQIVNLGRTYCQPLSDMNL +SDSEGKNFHQYLNFNSNMGLIHTPCSDKYLPSEKRKKRSLCINKYKCVEK +GQMYRTFQRKVAFSTLSKWNLFQTYMPWFLTSAGYKYINLIFLDTFSELL +SILSSSKKFVSIFNNIMHGSGISWRIINKKRCLPQWNLISEISSKCLHNL +LLSEETIRQNNESPLISTHLRSPNVREFLYSILFLLLVVGYLVRTHLLFV +SRASSELQTEFKRVKSLMIPSSMIELRKLLNRYPTPASNSFWLKNLFIVA +MEQLVYSLEEIRASGGNLLGPAYGVKSICSKNKYFNINLIDLIPNPINRI +IFSRNMRHLSHTSKEIYSLIRKRKNVNGDWIDDIIESWVANSDSIDDEER +EFLVQFSALTTEKRIYQILLSLTHSDHLSKNDSGYKMIEQPGAIYLRYLV +DIHKKYLLNYECNTSCLVERRVFLAHSQTITYSQTSRGTNTLHFPSQGKP +FSISLALSPSKGILVIGSIGTGRSFLVKYLATNSYVPFITVFLNKFLDNK +PKGFLVDDNDDNDSSDDIYASDDINSDLDTELELITMMNALTMDMMLELD +RFFTTLQLELAKAMSPWIIWIPNIHDLDVNESNYLSFGLLVNHLSERCST +NNIIVIASTHIPKKVDPALLAPNKLNTCIKIRRLLIPQQRKHFCTLSYTR +GFHLENKIFHTNGFGSITMGSSARDLVALTNEALSISITQNKSILDTNTI +RSALHRQTWDLRSGVRSFQDNGILSYQIGRAITQNVLLSNCPIDPISIYM +KKKSCTCNGGDYYFYKWYFGLGTSMKKLTILLYLLSCSAGSVAQDLWSLP +GPAEKNGITSYGLVENDSDLVRGLLEVEGALVGSSRTEKDCSPFDNDRVI +FTLILRPEPGNPLDIIKKGSCSIFDHRFIYEKYESEFEEGYGEGALDPQQ +IEEDLFNHIVWAPRIWRPWGFIFYCIERPNELGFPYWSRSFRGKRIVYDK +DEEGELQENDSELLKSGTVQYQTRDRSSKEQGLLKINQFIWDPADPLFFL +LKDQPPGSVFSHRRFFADEEMSKGLLTSQKDPPTSIYKRWFIKNTQEQHF +ELLINRQRWLRTKSSLSKSNGSFRSNTLFESYQYLSTLFLSNGTLFDKMT +KTLLIKRWLFPDEMQM +>NC_001879@NitaCp068@ycf2@88885@95727@D@1@2281 Ycf2 +MRGHQFKSWIFELREILREIKNSHHFLDSWTQFNSAGSFIHIFFHQERFL +KLFDPRIWSILLSRNSQGSTSNRYFTIKGVILFVVAVLIYRINNRNMVER +KNLYLIGLLPIPMNSIGPRNDTLEESVGSSNINRLIVSLLYLPKGKKISE +SCFLNPKESTWVLPITKKCSMPESNWGSRWWRNWIGKKRDSSCKISNETV +AGIEILFKEKDLKYLEFLFVYYMDDPIRKDHDWELFDRLSLRKSRNRINL +NSGPLFEILVKHWISYLMSAFREKIPIEVEGFFKQQGAGSTIQSNDIEHV +SHLFSRNKWAISLQNCAQFHMWQFRQDLFVSWGKNPPESDFLRNVSRENW +IWLDNVWLVNKDRFFSKVQNVSSNIQYDSTRSSFVQVTDSSQLKGSSDQS +RDHLDSISNEDSEYHTLINQREIQQRKERSILWDPSFLQTERKEIESGRF +PKCLSGYSSMSRLFTEREKQMINHLFPEEIEEFLGNPTRSVRSFFSDRWS +ELHLGSNPTERSTRDQKLLKKQQDLSFVPSKRSENKEMVNIFKIITYLQN +TVSIHPISSDPGCDMVPKDEPDMDSSNKISFLNKNPFFDLFHLFHDRNRG +GYTLHYDFESEERFQEMADLFTLSITEPDLVYHKGFAFSIDSCGLDQKQF +LNEARDESKKKSLLVLPPIFYEENESFSRRIRKKWVRISCGNDLEDPKPK +IVVFASNNIMEAVTQYRLIRNLIQIQYSTYGYIRNVLNRFFLMNRSDRNF +EYGIQRDQIGKDTLNHRTIMKYTINQYLSNLKKSQKKWFEPLILISRTER +SMNRDPDAYRYKWSNGSKNFQEHLEQSVSEQKSRFQVVFDRLRINQYSID +WSEVIDKKDLSKPLRFFLSKSLLFLSKLLFFLSNSLPFFCVSFGNIPIHR +SEIYIYELKGPNDQLCNQLLESIGLQIVHLKKWKPFLLDDHDTSQKSKFL +INGGTISPFLFNKIPKWMIDSFHTRNNRRKSFDNPDSYFSMIFHDQDNWL +NPVKPFHRSSLISSFYKANRLRFLNNPHHFCFYWNTRFPFSVEKARINNS +DFTYGQFLNILFIRNKIFSLCVGKKKHAFWGRDTISPIESQVSNIFIPND +FPQSGDETYNLYKSFHFPSRSDPFVRRAIYSIADISGTPLTEGQIVNFER +TYCQPLSDMNLSDSEGKNLHQYLNFNSNMGLIHTPCSEKDLSSEKRKKRS +LCLKKCVEKGQMYRTFQRDSAFSTLSKWNLFQTYMPWFLTSTGYKYLNLI +FLDTFSDLLPILSSSQKFVPIFHDIMHGSGISWRILQKKLCLPQWNLISE +ISSKCLHNLLLSEEMIHRNNESPLISTHLRSPNAREFLYSILFLLLVAGY +LVRTHLLFVSRASSELQTEFEKVKSLMIPSSMIELRKLLDRYPTSEPNSF +WLKNLFLVALEQLGDSLEEIRGSASGGNMLGPAYGVKSIRSKKKDWNINL +IEIIDLIPNPINRITFSRNTRHLSHTSKEIYSLIRKRKNVNGDWIDDKIE +SWVANSDSIDDEEREFLVQFSTLTTENRIDQILLSLTHSDRLSKNDSGYQ +MIEQPGAIYLRYLVDIHKKHLMNYEFNPSCLAERRIFLAHYQTITYSQTS +CGENSFHFPSHGKPFSLRLALSPSRGILVIGSIGTGRSYLVKYLATNSYV +PFITVFLNKFLDNKPKGFLLDEIDIDDSDDIDDSDNLDASDDIDRDLDTE +LKLLTRMNGLTMDMMPEIDRFYITLQFELAKAMSPCIIWIPNIHDLDVNE +SNDLALGLLVNHLSRDCERCSTRNILVIASTHIPQKVDPALIAPNKLNTC +IKIRRLLLPQQRKHFFTLSYTRGFHLEKKMFHTNGFGSITMGSNARDLVA +LTNEVLSISITQKKSIIDTNTIRSALHRQTWDLRSQVRSVQDHGILFYQI +GRAVAQNVLLSNCPIDPISIYMKKKSCNEGDSYLYKWYFELGTSMKRLTI +LLYLLSCSAGSVAQDLWSLSGPDEKNGITSYGLVENDSDLVHGLLEVEGA +LVGSSRTEKDCSQFDNDRVTLLLRPEPRNPLDMMQNGSCSILDQRFLYEK +YESEFEEGEGEGALDPQEDLFNHIVWAPRIWRPWGFLFDCIERPNELGFP +YWSRSFRGKRIIYDEEDELQENDSEFLQSGTMQYQTRDRSSKEQGLFRIS +QFIWDPADPLFFLFKDQPPGSVFSHRELFADEEMSKGLLTSQTDPPTSIY +KRWFIKNTQEKHFELLINRQRWLRTNSSLSNGSFRSNTLSESYQYLSNLF +LSNGTLLDQMTKTLLRKRWLFPDEMKIGFM +>NC_001879@NitaCp102@ycf2@146903@153745@R@1@2281 Ycf2 +MRGHQFKSWIFELREILREIKNSHHFLDSWTQFNSAGSFIHIFFHQERFL +KLFDPRIWSILLSRNSQGSTSNRYFTIKGVILFVVAVLIYRINNRNMVER +KNLYLIGLLPIPMNSIGPRNDTLEESVGSSNINRLIVSLLYLPKGKKISE +SCFLNPKESTWVLPITKKCSMPESNWGSRWWRNWIGKKRDSSCKISNETV +AGIEILFKEKDLKYLEFLFVYYMDDPIRKDHDWELFDRLSLRKSRNRINL +NSGPLFEILVKHWISYLMSAFREKIPIEVEGFFKQQGAGSTIQSNDIEHV +SHLFSRNKWAISLQNCAQFHMWQFRQDLFVSWGKNPPESDFLRNVSRENW +IWLDNVWLVNKDRFFSKVQNVSSNIQYDSTRSSFVQVTDSSQLKGSSDQS +RDHLDSISNEDSEYHTLINQREIQQRKERSILWDPSFLQTERKEIESGRF +PKCLSGYSSMSRLFTEREKQMINHLFPEEIEEFLGNPTRSVRSFFSDRWS +ELHLGSNPTERSTRDQKLLKKQQDLSFVPSKRSENKEMVNIFKIITYLQN +TVSIHPISSDPGCDMVPKDEPDMDSSNKISFLNKNPFFDLFHLFHDRNRG +GYTLHYDFESEERFQEMADLFTLSITEPDLVYHKGFAFSIDSCGLDQKQF +LNEARDESKKKSLLVLPPIFYEENESFSRRIRKKWVRISCGNDLEDPKPK +IVVFASNNIMEAVTQYRLIRNLIQIQYSTYGYIRNVLNRFFLMNRSDRNF +EYGIQRDQIGKDTLNHRTIMKYTINQYLSNLKKSQKKWFEPLILISRTER +SMNRDPDAYRYKWSNGSKNFQEHLEQSVSEQKSRFQVVFDRLRINQYSID +WSEVIDKKDLSKPLRFFLSKSLLFLSKLLFFLSNSLPFFCVSFGNIPIHR +SEIYIYELKGPNDQLCNQLLESIGLQIVHLKKWKPFLLDDHDTSQKSKFL +INGGTISPFLFNKIPKWMIDSFHTRNNRRKSFDNPDSYFSMIFHDQDNWL +NPVKPFHRSSLISSFYKANRLRFLNNPHHFCFYWNTRFPFSVEKARINNS +DFTYGQFLNILFIRNKIFSLCVGKKKHAFWGRDTISPIESQVSNIFIPND +FPQSGDETYNLYKSFHFPSRSDPFVRRAIYSIADISGTPLTEGQIVNFER +TYCQPLSDMNLSDSEGKNLHQYLNFNSNMGLIHTPCSEKDLSSEKRKKRS +LCLKKCVEKGQMYRTFQRDSAFSTLSKWNLFQTYMPWFLTSTGYKYLNLI +FLDTFSDLLPILSSSQKFVPIFHDIMHGSGISWRILQKKLCLPQWNLISE +ISSKCLHNLLLSEEMIHRNNESPLISTHLRSPNAREFLYSILFLLLVAGY +LVRTHLLFVSRASSELQTEFEKVKSLMIPSSMIELRKLLDRYPTSEPNSF +WLKNLFLVALEQLGDSLEEIRGSASGGNMLGPAYGVKSIRSKKKDWNINL +IEIIDLIPNPINRITFSRNTRHLSHTSKEIYSLIRKRKNVNGDWIDDKIE +SWVANSDSIDDEEREFLVQFSTLTTENRIDQILLSLTHSDRLSKNDSGYQ +MIEQPGAIYLRYLVDIHKKHLMNYEFNPSCLAERRIFLAHYQTITYSQTS +CGENSFHFPSHGKPFSLRLALSPSRGILVIGSIGTGRSYLVKYLATNSYV +PFITVFLNKFLDNKPKGFLLDEIDIDDSDDIDDSDNLDASDDIDRDLDTE +LKLLTRMNGLTMDMMPEIDRFYITLQFELAKAMSPCIIWIPNIHDLDVNE +SNDLALGLLVNHLSRDCERCSTRNILVIASTHIPQKVDPALIAPNKLNTC +IKIRRLLLPQQRKHFFTLSYTRGFHLEKKMFHTNGFGSITMGSNARDLVA +LTNEVLSISITQKKSIIDTNTIRSALHRQTWDLRSQVRSVQDHGILFYQI +GRAVAQNVLLSNCPIDPISIYMKKKSCNEGDSYLYKWYFELGTSMKRLTI +LLYLLSCSAGSVAQDLWSLSGPDEKNGITSYGLVENDSDLVHGLLEVEGA +LVGSSRTEKDCSQFDNDRVTLLLRPEPRNPLDMMQNGSCSILDQRFLYEK +YESEFEEGEGEGALDPQEDLFNHIVWAPRIWRPWGFLFDCIERPNELGFP +YWSRSFRGKRIIYDEEDELQENDSEFLQSGTMQYQTRDRSSKEQGLFRIS +QFIWDPADPLFFLFKDQPPGSVFSHRELFADEEMSKGLLTSQTDPPTSIY +KRWFIKNTQEKHFELLINRQRWLRTNSSLSNGSFRSNTLSESYQYLSNLF +LSNGTLLDQMTKTLLRKRWLFPDEMKIGFM +>NC_002202@SpolCp103@ycf2@142690@149085@R@1@2132 Ycf2 +MKGHQFKSWIFELREILREIKNSHYFLDSWTQFNSVGSFIHIFFYQERFL +KLFDPRIWSILLSPNSQGSTSNRYFTIKGVVLFVVVVLIYRITNRNMVER +KNLYLIGLFPIPMNSIGPRNDTLEKSFGSSNINRLIVSLLYLPKGKKISE +SYFLDPKESTWFLPITKKCIMPESNRGSRWWRNWIGKRRDSSCKISNETV +AGIEISFKEKDIQYLEFPFVYYMDDPIRKDHDWELFDCLSLFLRNVSREN +WIWLDNVRLVNKDRFFSKVRNVSSNIQYDFTRSSFVQVTDSSQLKESSDQ +SRDRSNSISNADSEYHTLINKREIQQLKERSILRDPSFLQTEGTEIESDR +FPKCLSGYSSMPRLFTAREKQMIIHLLPEEIEQLLENPTRSIRSFFSGRW +SELHLGSNPTERSTRDPQLLKKQQDVSFAPSRQSENKEMVNIFKIIKYLQ +NTVSIHPISSDPGCDMVPKDELDMDSSDKISFLNKNSFFDLFHLFHDRNR +GGYALHHDFESEEKFQEMADLFTLSITDPDLVYHRGFSFSIDSCGLDQKQ +FLNEVFNSRDESKKKSLLVLSPIFYEENESFYRRIRKKGVRISRNVLNRF +FLINRSDRSFEYGIQRDQIGNDTLNHRTIRKYMINQDFSNLKKSQKKWFD +PLIFLSRTERFMNRDPDAYRYKWFNGSKNFQEHLEHFVSEQKSRFQVVFD +QLRINQYSIDWSEVIDKKDLSKSLRFFLSKSLRFFLSKLLLFLSNSLPFF +FVSFGNIPINRSEIRIYELKGPNDQLYNPLVESIGLQIVHLKKWKAFLLD +DHDTFQKSKFLINGGTISPFLFNKIPKWMIDSFHTRNNSGKSFDNTDSYF +SMISHDQNNWLNPVKPFHRSSLISSFYKANQLRFLNNPHHFCFYCNKRFP +FYMEKARINNSDFTYRQFLNILFIHNKLFSLCVGKKKHAFLERDTISPIE +SQVSNIFLPNDFPIRSDLLVRRTIYSIADISGTPLTEGQLVHFERTYCQP +LSDMNLSDSEKKNLHQYLNFNSNMGFIYTPCSEKYLLSEKRKKRSLCLKK +CVEKGQMYRTFQRDSAFSTLSKWNLFQTYMPWFLTSTGYKYLNFLFLDTF +SDLLPILSSSQKFLSILHDIMHGSGISWRILQKKLCLPPWNLISEISSKC +LHNLLLPEEMIHRNNESPLIWTHLASPNVREFFYSILFLLFVAGYLVRTH +LLFVFRASSELQTEFERVKSLMIPSYMIELRKLLDRYPTSEPNSFWLKNL +FLVALEQLGDSLEEIRGSASGDNMLLGGGPGPAYGFKSIRSKKKYLNINL +IDILDLISIIPNPINRITFSRNTRHLSHTSKEIYSLIRKRKRVNGDWIDD +KIESWVASSDSIDDEEREFLVQFSTLTTEKRIDQILLSLTHSDHLSKNDS +GYQLIEQPGAIYLRYLVDIHKKYLMNYEFNTSCLAERRVFLAHYQTITYS +QTSCGANSFHFPSHGKPFSLRLALSPSRGILVIGSIGTGRSYLVKYLATN +SYVPFITVFLNKFLDNKPKGSLIDASDDIDRDLDTELELLTMMNALTMDM +MPEIDQFSITLQFELAKAMSPCIIWIPNIHDLDVNESNYLSLGLFVNYLS +RDCERGSTRNILVIASTHIPQKVDPALIAPNQLNTCIKIRRLRIPQQRKH +FFTLSYTRGFHLEKKMFHTNGFGSITMGSNVRDLVAFINEALSISITQKK +SIIDTNTIRSALHRQTWDLRSQVRSVQDHGILFYQIGRAVAQNVLLSNCP +IDPISTYMKKKSCNEGDSYLYKWYFELGTSMKKLTILLYLLSCSAGSVAQ +DLWSLPGPDEKNGITSYGLVENDSYLVHGLLEVEGALVGSSRIEKACSQN +DRVTLFLRPELRNPLDMMQNGSCSILDHRFLYEKYESELEEGEGALDPQQ +IEEDLFNHIVWAPRIWNPWGFLFDCIERPNELGFPYWARSFRGKRSIYDK +EDELQENDSEFLQSGTMQYQTRDRSSKEQGFFRISQFIWDPADPLFFLFK +DQPFVSVFSHREFFADEEISKGLLTSQMNPPISIFQRWFIKNTQEKHFEL +LINRQRWLRTNSSLSNGSFRSNTLSESYQYLSNLFLSNGTLLDQMTKTLL +RKRWLFPDEMKIGFMQEEKDFPFLSRKDMWP +>NC_002693@OeelhCp081@ycf2@92143@99165@D@1@2341 Ycf2 +MGNQRNRVNLNPFRFWVFELREILREIKNSRYPFNSVGSFIHIFVHQERF +LKLLDPRIWSVLRSQGSTGVVLFLVAVLIYRINNRNMIERKNIYLTGLLP +IPTNFAGPRNETLEESFLSSNINRLIVSLLHLPKGKRLSESCFLDPKEST +RVLPITKWRNWIGKRRDSSQLKGSSDQSRDHFDSIGTEDSEYHTLINQRE +IQQRKERSSLLDPSFLQTERTEIESDRFSKGLSGSSSKSRLFTEGEKEMN +NHLPPEEIEEFLGNPTRSILSFFSDEWSELHLGSNPTERSTVDQKLLKKE +QEVSFAPFRRSETKEIVNLFKTMAYLQKTVSIHPISSDPGCDMVPKDELD +SEERFQEMADLFTLSITEPDLVYHKGFAFSIDSSVLDQKQFLAEARDESK +KKSLLVLPPVFYQENESFYRRIRKRGVQISCGNDLEDPKPKIVVFASNNI +VEAVNQYRWIRNLIQIQYSTHGYIRNVLNRFFLMNRSDRNFEYGIQRDQI +GNDTLNHRTFMKYTINQHLSNLKKSQKKGSDPLILISRTERSVNRDPNAY +RYKWSKGSKNFQEHLEHFVSEQKSRFQVVFDRYRSIRNRYRSIRNRYRSR +INQYSSDRSEVSDKKDNRYRSRINQYSSDRSEVSDQKNLAKFRSFVFSKL +LLFLSNSLPFFFVSFGNTPPIQRSEIRVSELKGPNDRLCNQFLESIGLQL +VYLKKLKPFLLDDHETSQKSKLLFNKKPEGMIDSFHTRNNRGKSLDSYFS +MISHDQDNWLNPVKPFHRSSLISSFYKANRLRFLNNPHDFGFFCNKRFPF +YVDIKNLDFTYGQFLNILFIRNTKFSLCGDKKKHAFLERDTISSIESQVS +NLFKDFPQSGDERYNFYKYFHLAMRSDPLVRRAIYSIADISGTPLTEGQR +VNFERTYCQPLSDMNLSDSEGKNLYQYLNFNSNMGLIYSEKCFSSEKRKK +KKPEKRKEKKPEKRKEKKPEKRKEKKPEKRKEKKPEKRKEKKPEKRKEKK +PEKRKEKKQSLYLKQWVEKVQMDRALQGERVSLILSNWNLFKTYVMPFSL +TSTGYNLLKLMFLDTLGSYVMPLLRSSPKFVSICYAISDPCGISWRILQK +KLCLLQWNWISAISNKCFHKLLLSEESIHRNNESPSMTDLRWPNLGAFLY +SILFLLFVAGHLVFSHLLFLSQDFSELQRDFARAQSLMIPSYIVELRELL +DMYPAPRSFKKLFLAAREKLVNYLRWGGGRKSFLIHLFELLNITPNPIDR +IAFLKNTRHLSHTSKELYSLITELGDFSSLCSGQRYRYDQIIENVNGPCC +LIDDKIESWISNCDAIEDKEREFLVPFCNFTRETRIDQILLSLTHSDHLS +NNDSASQMSEEPGAFYLRHLVDIHKKGLMNYECNTSCLAERRIFLAHYQT +ITYSPCGDNRSHFPSHGKTFSLRLPLHPSRATLVIGSIGSGRSYLVKSLA +TNSYVPLITVVLNKFLKNWTPQGFDIHESGVYDEYGDDAEEANDYGASFF +DFLDNDSDDYEDRDSDDYDEPGASDDYEDRDMEDFVDSEMTEWLTKTNVP +LVYQLLDDEIDEFYITLQFELAKAMSPCILWIPNIHDLDAKESDYLSLGL +LVNHLSRDCGRRSTKNEILVIASTHIPQKVDPSLIGPDGLSTCIKTRRLL +VPQQQQCLFTLSYTRGFHLENKMFHTHTNEFESTILGPSVPDLVALTNEA +LSISITQKKSIIDTTTIRYALHRKTWDLEADRNLSPAKEHGTLFYQVGRA +FAHTVLLRNCPIDPISIYIKKNLCEAGDSSLYKWYFELGTSMKKLTILLY +LLTCSAGSIAQDLLSPPGPDEQNLITSYGLVENDSDLVHGLSDIVHGLLE +LEGALVGSSPTEEEVEGTEEEVEGTEEEVEGTEEEVEGTEEEVEGTEEEV +EGTEEEVEGTEDEEGEGTEEEVEGTEDEEGEGTEEEVEGTEDEEGEGTEE +EVEGTEDEEGEGTEDEEGEGTEEEVEGTEEEVEGTEDEEGEGTEKDSSQF +DNDRVTLLLRPKPRNPLDIQRLIYQHQKYESELEEDDDDDEDVFAPQKML +EDLFSELVWSPRIWHPWDFILDCEAEIPAEEIPEEEDPLPEEALETEVAV +WGEEEEGEADDEEDERLEAQQEDELLEEEDEELKEEEDELHEEEEEEEEE +EEEEEEDELHEEEEEEEEEDELQENDSEFFRSETQQPQARDGFSEEEGCF +RISQFMWVPGDPLSFLYKDTPFVEVLSYPEEATEISKELLRLLNPKTKRD +APKRARQRWWTKKKQDKHYELVLDRQRWLITKSSLSKSNGFFRSNTPSES +YQYLSNLFLSNRRLLDQITKTFFRKKWLFPDEMKIGFMEQ diff --git a/detectors/cds/test/test.ref b/detectors/cds/test/test.ref index 7449c64..4101ab6 100644 --- a/detectors/cds/test/test.ref +++ b/detectors/cds/test/test.ref @@ -45,28 +45,27 @@ FT AAAQSISYEIPLALCVLSISLRVIR" FT gene 42910..45121 FT /gene="ndhB" FT /locus_tag="" -FT CDS join(42910..43780,44367..45121) +FT CDS join(42910..43685,44365..45121) FT /codon_start=1 FT /transl_table=11 FT /gene="ndhB" FT /locus_tag="" FT /product="NADH dehydrogenase subunit 2" -FT /inference="similar to DNA sequence:NC_008535:CoarCp067" +FT /inference="similar to DNA sequence:AC_000188:LyesCp066" FT /translation="MIWHVQNENFILDSTRIFMKAFHLLLFDGSLIFPECILIFGLILL FT LMIDSTSDQKDIPWLYFISSTSLVMSITALLFRWREEPMISFSGNFQTNNFNEIFQFLI FT LLCSTLCIPLSVEYIECTEMAITEFLLFVLTATLGGMFLCGANDLITIFVAPECFSLCS FT YLLSGYTKKDVRSNEATMKYLLMGGASSSILVHGFSWLYGSSGGEIELQEIVNGLINTQ -FT MYNSPGISIALIFITVGIGFKLSPAPSHQWTPDVYEGVRFVREIPTSLSISEMFGFFKT -FT PWTCRREMLSPTPVVAFLSVTSKVAASASATRIFNIPFYFSSNEWHLLLEILAILSMIL -FT GNLIAITQTSMKRMLAYSSIGQIGYVIIGIIVGDSNDGYASMITYMLFYISMNLGTFAC -FT IVLFGLRTGTDNIRDYAGLYTKDPFLALSLALCLLSLGGLPPLAGFFGKLYLFWCGWQA -FT GLYFLVLIGLLTSVVSIYYYLKIIKLLMTGRNQEITPHVRNYRRSPLRSNNSIELSMIV -FT CVIASTIPGISMNPIIAIAQDSLF" -FT exon 42910..43780 +FT MYNSPGISIALIFITVGIGFKLSPAPSHQWTPDVYEGSPTPVVAFLSVTSKVAASASAT +FT RIFNIPFYFSSNEWHLLLEILAILSMILGNLIAITQTSMKRMLAYSSIGQIGYVIIGII +FT VGDSNDGYASMITYMLFYISMNLGTFACIVLFGLRTGTDNIRDYAGLYTKDPFLALSLA +FT LCLLSLGGLPPLAGFFGKLYLFWCGWQAGLYFLVLIGLLTSVVSIYYYLKIIKLLMTGR +FT NQEITPHVRNYRRSPLRSNNSIELSMIVCVIASTIPGISMNPIIAIAQDSLF" +FT exon 42910..43685 FT /gene="ndhB" FT /locus_tag="" FT /number=1 -FT exon 44367..45121 +FT exon 44365..45121 FT /gene="ndhB" FT /locus_tag="" FT /number=2 @@ -79,7 +78,7 @@ FT /transl_table=11 FT /gene="ndhD" FT /locus_tag="" FT /product="NADH dehydrogenase subunit 4" -FT /inference="similar to DNA sequence:NC_007898:LyesC2p017" +FT /inference="similar to DNA sequence:AC_000188:LyesCp081" FT /translation="MNYFPWLTIIVVFPIFAGSLIFFLPHKGNRVIRWYTICICILELL FT LTTYAFCYHFQSDDPLIQLVEDYKWIDFFDFHWRLGIDGLSIGPILLTGFITTLATLAA FT WPVTRDSRLFHFLMLAMYSGQIGLFSSRDLLLFFIMWELELIPVYLLLAMWGGKKRLYS @@ -98,7 +97,7 @@ FT /transl_table=11 FT /gene="ndhE" FT /locus_tag="" FT /product="NADH dehydrogenase subunit 4L" -FT /inference="similar to DNA sequence:NC_001879:NitaCp085" +FT /inference="similar to DNA sequence:AC_000188:LyesCp080" FT /translation="MILEHVLVLSAYLFSIGIYGLITSRNMVRALMCLELILNAVNINF FT VTFSDFFDNRQLKGDIFSIFVIAIAAAEAAIGLAIVSSIYRNRKSTRINQSNLLNN" FT gene complement(11509..13722) @@ -178,7 +177,7 @@ FT /transl_table=11 FT /gene="psaC" FT /locus_tag="" FT /product="photosystem I subunit VII" -FT /inference="similar to DNA sequence:NC_023792:CP89_p014" +FT /inference="similar to DNA sequence:AC_000188:LyesCp065" FT /translation="MSHSVKIYDTCIGCTQCVRACPTDVLEMIPWDGCKAKQIASAPRT FT EDCVGCKRCESACPTDFLSVRVYLWHETTRSMGLAY" FT gene 53817..55307 @@ -190,7 +189,7 @@ FT /transl_table=11 FT /gene="rpl2" FT /locus_tag="" FT /product="ribosomal protein L2" -FT /inference="similar to DNA sequence:NC_007898:LyesC2p002" +FT /inference="similar to DNA sequence:AC_000188:LyesCp030" FT /translation="MAIHLYKTSTPSTRNGTVDSQVKSNPRNNLIYGQRRCGKGRNARG FT IITARHRGGGHKRLYRKIDFRRNEKDIYGRIVTIEYDPNRNAYICLIHYGDGEKRYILH FT PRGAIIGDTIVSGTEVPIKMGNALPSTDMPLGTAIHNIEITLGKGGQLARAAGAVAKLI @@ -213,7 +212,7 @@ FT /transl_table=11 FT /gene="rpl23" FT /locus_tag="" FT /product="ribosomal protein L23" -FT /inference="similar to DNA sequence:NC_007898:LyesC2p003" +FT /inference="similar to DNA sequence:AC_000188:LyesCp026" FT /translation="MDGIKYAVFTDKSIRLLGKNQYTSNVESGSTRTEIKHWVELFFGV FT KVIAMNSHRLPGKSRRMGPIMGHTMHYRRMIITLQPGYSIPPLRKKRT" FT gene 14505..14672 @@ -249,7 +248,7 @@ FT /transl_table=11 FT /gene="rps7" FT /locus_tag="" FT /product="ribosomal protein S7" -FT /inference="similar to DNA sequence:NC_007898:LyesC2p007" +FT /inference="similar to DNA sequence:AC_000188:LyesCp008" FT /translation="MSRRGTAEKKTAKSDPIYRNRLVNMLVNRILKHGKKSLAYQIIYR FT AVKKIQQKTETNPLSVLRQAIRGVTPDITVKARRVGGSTHQVPIEIGSTQGKALAIRWL FT LAASRKRPGRNMAFKLSSELVDAAKGSGDAIRKKEETHRMAEANRAFAHFR" diff --git a/detectors/cds/tools/chlorodb/go_chlorodb.sh b/detectors/cds/tools/chlorodb/go_chlorodb.sh new file mode 100755 index 0000000..75ad7e5 --- /dev/null +++ b/detectors/cds/tools/chlorodb/go_chlorodb.sh @@ -0,0 +1,254 @@ +#!/bin/csh -f +# +# make ChloroDB's +# +# usage: copy genbank/embl files into 'DB_DIR/download' +# usage: [create a paramter.sh file in 'DB_DIR'] +# usage: go_chlorodb [DB_DIR] +# +unsetenv ORG_SOURCED + +setenv ORG_HOME `dirname $0`/../../../.. +source $ORG_HOME/scripts/csh_init.sh + +# +# which DB to process +# + +set DB_BASE = $DATA_DIR/cds/chlorodb # default location + +if ($#Argv > 0) then + set DB_BASE = $Argv[1]; Shift +endif + +set DB_BASE = `cd $DB_BASE && pwd -P` + +NeedDir $DB_BASE/download + +if (! -d $DB_BASE/info) mkdir $DB_BASE/info +if (! -d $DB_BASE/fasta) mkdir $DB_BASE/fasta + +cd $DB_BASE/info + +# +# params +# + +if (! -e $DB_BASE/parameters.sh) then + @ n = `find $DB_BASE/download -depth 1 -type f -print | wc -l` + @ cor_cutoff = $n / 2 + @ atg_cutoff = $n / 10 + @ dbs_cutoff = $n / 4 + if ($cor_cutoff == 0) @ cor_cutoff = 1 + if ($atg_cutoff == 0) @ atg_cutoff = 1 + if ($dbs_cutoff == 0) @ dbs_cutoff = 1 + echo "# sourced file" > $DB_BASE/parameters.sh + echo "" >> $DB_BASE/parameters.sh + echo "set CORE_NCDS_CUTOFF = $cor_cutoff" >> $DB_BASE/parameters.sh + echo "set CORE_START_ATG_CUTOFF = $atg_cutoff" >> $DB_BASE/parameters.sh + echo "set CORE_START_DFT_CUTOFF = $atg_cutoff" >> $DB_BASE/parameters.sh + echo "set CORE_START_OTH_CUTOFF = 10" >> $DB_BASE/parameters.sh + echo "set CORE_STOP_CUTOFF = $cor_cutoff" >> $DB_BASE/parameters.sh + echo "set CORE_SPLICE_CUTOFF = $atg_cutoff" >> $DB_BASE/parameters.sh + echo "" >> $DB_BASE/parameters.sh + echo "set SHEL_NCDS_CUTOFF = 10" >> $DB_BASE/parameters.sh + echo "" >> $DB_BASE/parameters.sh + echo "set CORE_DELTA = Inf" >> $DB_BASE/parameters.sh + echo "set CORE_COVMIN = 30" >> $DB_BASE/parameters.sh + echo "set CORE_PMAX = 1e-6" >> $DB_BASE/parameters.sh + echo "set CORE_IDMIN = 30" >> $DB_BASE/parameters.sh + echo "set CORE_SIZMIN = $cor_cutoff" >> $DB_BASE/parameters.sh + echo "" >> $DB_BASE/parameters.sh + echo "set SHEL_DELTA = 0.5" >> $DB_BASE/parameters.sh + echo "set SHEL_COVMIN = 30" >> $DB_BASE/parameters.sh + echo "set SHEL_PMAX = 1e-6" >> $DB_BASE/parameters.sh + echo "set SHEL_IDMIN = 30" >> $DB_BASE/parameters.sh + echo "set SHEL_SIZMIN = $dbs_cutoff" >> $DB_BASE/parameters.sh + echo "" >> $DB_BASE/parameters.sh + echo "set DUST_DELTA = 0.5" >> $DB_BASE/parameters.sh + echo "set DUST_COVMIN = 30" >> $DB_BASE/parameters.sh + echo "set DUST_PMAX = 1e-6" >> $DB_BASE/parameters.sh + echo "set DUST_IDMIN = 30" >> $DB_BASE/parameters.sh + echo "set DUST_SIZMIN = 10" >> $DB_BASE/parameters.sh + +endif + +source $DB_BASE/parameters.sh + +##set CMIN_COD = 0 +##set FMIN_COD = 0.01 + +# +# temporarily uncompress +# + +set ff = `find $DB_BASE/download -depth 1 -name \*.gz -print` + +if ($#ff != 0) then + Notify "uncompressing $#ff entries" + foreach f ($ff) + gunzip -f $f + end +endif + +# +# convert gbk/embl to fasta +# + +set ff = `find $DB_BASE/download -depth 1 \( -name \*.gbk -or -name \*.embl \) -print` + +Notify "convert $#ff gbk/embl entries to fasta" + +foreach f ($ff) + set nom = `basename $f:r` + set typ = $f:e + $AwkCmd -f $LIB_DIR/$typ.tofasta.awk $f > $DB_BASE/fasta/$nom.fst +end + +# +# get gbk/embl info +# + +Notify "get gbk/embl info for $#ff entries" + +echo "" | awk -v HEADONLY=1 -f $LIB_DIR/gbk.info.awk > db.info.txt # just get header + +foreach f ($ff) + set nom = `basename $f:r` + set typ = $f:e + $AwkCmd -f $LIB_DIR/$typ.oneliner.awk $f |\ + $AwkCmd -f $LIB_DIR/libutil.awk -f $LIB_DIR/$typ.info.awk |\ + egrep -v '^#' >> db.info.txt +end + +# +# get cds info +# + +Notify "get gbk/embl cds for $#ff entries" + +echo "" | awk -v HEADONLY=1 -f $LIB_DIR/gbk.cds_long.awk > db.cds.txt # just get header + +foreach f ($ff) + set nom = `basename $f:r` + set typ = $f:e + $AwkCmd -f $LIB_DIR/$typ.oneliner.awk $f |\ + $AwkCmd -v FASTA=$DB_BASE/fasta/$nom.fst -f $LIB_DIR/libutil.awk \ + -f $LIB_DIR/$typ.cds_long.awk |\ + egrep -v '^#' >> db.cds.txt +end + +# +# get fasta for prots +# + +Notify "get prots" +$AwkCmd -f $LIB_DIR/libutil.awk -f $LIB_DIR/cds2fasta.awk db.cds.txt > db.prot.fst + +# +# get introns +# + +Notify "get gbk/embl introns for $#ff entries" + +echo "" | awk -v HEADONLY=1 -f $LIB_DIR/gbk.intron.awk > db.intron.txt # just get header + +foreach f ($ff) + set nom = `basename $f:r` + set typ = $f:e + $AwkCmd -f $LIB_DIR/$typ.oneliner.awk $f |\ + $AwkCmd -v FASTA=$DB_BASE/fasta/$nom.fst -f $LIB_DIR/libutil.awk \ + -f $LIB_DIR/$typ.intron.awk |\ + egrep -v '^#' >> db.intron.txt +end + +# +# make models +# + +Notify "Making models" + +echo -n "" > db.models.params.txt +echo "CORE_NCDS_CUTOFF <- $CORE_NCDS_CUTOFF" >> db.models.params.txt +echo "CORE_START_ATG_CUTOFF <- $CORE_START_ATG_CUTOFF" >> db.models.params.txt +echo "CORE_START_DFT_CUTOFF <- $CORE_START_DFT_CUTOFF" >> db.models.params.txt +echo "CORE_START_OTH_CUTOFF <- $CORE_START_OTH_CUTOFF" >> db.models.params.txt +echo "CORE_STOP_CUTOFF <- $CORE_STOP_CUTOFF" >> db.models.params.txt +echo "CORE_SPLICE_CUTOFF <- $CORE_SPLICE_CUTOFF" >> db.models.params.txt +echo "SHEL_NCDS_CUTOFF <- $SHEL_NCDS_CUTOFF" >> db.models.params.txt + +$LIB_DIR/make.models.r |& Cat + +GetStatus +OnError then + Error 2 "model parameter too stringent" +endif + +# +# add matrices +# + +cp -f $PROG_DIR/matrices/* models + +# +# make subDBs +# + +if (-e db.core.pat.txt) then + Notify "Making core DB (take some time... please wait)" + $PROG_DIR/subdb/go_subdb.sh db.prot.fst db.core.pat.txt \ + $CORE_DELTA $CORE_COVMIN $CORE_PMAX $CORE_IDMIN $CORE_SIZMIN +endif + +if (-e db.shell.pat.txt) then + Notify "Making shell DB (take some time... please wait)" + $PROG_DIR/subdb/go_subdb.sh db.prot.fst db.shell.pat.txt \ + $SHEL_DELTA $SHEL_COVMIN $SHEL_PMAX $SHEL_IDMIN $SHEL_SIZMIN +endif + +if (-e db.dust.pat.txt) then + Notify "Making dust DB (take some time... please wait)" + $PROG_DIR/subdb/go_subdb.sh db.prot.fst db.dust.pat.txt \ + $DUST_DELTA $DUST_COVMIN $DUST_PMAX $DUST_IDMIN $DUST_SIZMIN +endif + +# +# recompress entries +# + +set ff = `find $DB_BASE/download -depth 1 -type f -print` + +if ($#ff != 0) then + Notify "recompressing $#ff entries" + foreach f ($ff) + gzip -f $f + end +endif + +# compress fasta + +set ff = `find $DB_BASE/fasta -depth 1 -name \*.fst -print` + +if ($#ff != 0) then + Notify "compressing $#ff fasta entries" + foreach f ($ff) + gzip -f $f + end +endif + +# install everything in proper directory + +foreach dir ("core" "shell" "dust") + if (-e $DB_BASE/$dir) \rm -r $DB_BASE/$dir + if ((-d db.$dir.pat.db) && (-e db.$dir.pat.db/Annot.lst)) then + Notify "installing $DB_BASE/$dir" + \mv -f db.$dir.pat.db $DB_BASE/$dir + endif +end + +if (-e $DB_BASE/models) \rm -r $DB_BASE/models +if (-d models) \mv -f models $DB_BASE + +Notify "Done" +exit 0 + diff --git a/detectors/cds/tools/chlorodb/matrices/blosum62.mat b/detectors/cds/tools/chlorodb/matrices/blosum62.mat new file mode 100644 index 0000000..37f3f29 --- /dev/null +++ b/detectors/cds/tools/chlorodb/matrices/blosum62.mat @@ -0,0 +1,29 @@ +# +# blosum62 substitution matrix +# with larger penalty for stops +# + A R N D C Q E G H I L K M F P S T W Y V B Z X * +A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -50 +R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -50 +N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -50 +D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1 -50 +C 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -50 +Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1 -50 +E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1 -50 +G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -50 +H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1 -50 +I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1 -50 +L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1 -50 +K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1 -50 +M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1 -50 +F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1 -50 +P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2 -50 +S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0 -50 +T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0 -50 +W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2 -50 +Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1 -50 +V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1 -50 +B -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1 -50 +Z -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1 -50 +X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -50 +* -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 1 diff --git a/detectors/cds/tools/chlorodb/subdb/go_subdb.sh b/detectors/cds/tools/chlorodb/subdb/go_subdb.sh new file mode 100755 index 0000000..9365fec --- /dev/null +++ b/detectors/cds/tools/chlorodb/subdb/go_subdb.sh @@ -0,0 +1,195 @@ +#!/bin/csh -f +# +# usage: go_subdb.sh prot.fst pat.txt [deltalen covmin pmax idmin sizmin] +# usage: prot.fst : proteins fasta file +# usage: pat.txt : text file containing patterns and names for families to extract +# usage: output directory containig subdbs : basename .db +# + +unsetenv ORG_SOURCED + +setenv ORG_HOME `dirname $0`/../../../../.. +source $ORG_HOME/scripts/csh_init.sh + +NeedArg 2 + +set ProtFile = $Argv[1]; Shift +set PatFile = $Argv[1]; Shift + +NeedFile $ProtFile +NeedFile $PatFile + +# +# parameters +# + +set Delta = 0.5 +set Covmin = 30 +set Pmax = 1e-6 +set Idmin = 30 +set Sizmin = 5 + +if ($#Argv > 0) then + set Delta = $Argv[1]; Shift +endif + +if ($#Argv > 0) then + set Covmin = $Argv[1]; Shift +endif + +if ($#Argv > 0) then + set Pmax = $Argv[1]; Shift +endif + +if ($#Argv > 0) then + set Idmin = $Argv[1]; Shift +endif + +if ($#Argv > 0) then + set Sizmin = $Argv[1]; Shift +endif + +# +# output directory +# + +set OutDir = `basename $PatFile:r`.db + +if (-d $OutDir) \rm -r $OutDir +mkdir $OutDir + +set OutLog = `basename $PatFile:r`.log + +echo -n '' > $OutLog + +alias Report 'egrep "^>" \!:1 | wc -l | awk -v P=`basename \!:1` -v H=\!:2 '"'{print H,P,"'$1}'"'"' >> $OutLog' + +# +# remove entries with bad symbols +# + +Notify "cleanup $ProtFile" + +Report $ProtFile "init_size" + +$AwkCmd -f $LIB_DIR/db.filter.sym.awk $ProtFile > P_$$ + +Report $ProtFile "cleanup_size" + +# +# select by name pattern +# + +Notify "select by patterns" + +mkdir D_$$ +mkdir E_$$ +mkdir F_$$ + +set noms = `awk '{print $1}' $PatFile` + +foreach nom ($noms) + set pat = `egrep "^$nom " $PatFile | awk '{print $2}'` + $AwkCmd -f $LIB_DIR/db.filter.pat.awk -v PAT="$pat" P_$$ > D_$$/$nom.fst + set n = `egrep '^>' D_$$/$nom.fst | wc -l` + Notify " pattern : $nom : $n" + Report D_$$/$nom.fst "pattern_filter" + if ($n <= $Sizmin) \rm -f D_$$/$nom.fst +end + +set ok = `ls D_$$ | wc -l` +if ($ok == 0) goto fin + +# +# select by length +# + +Notify "select by length" + +foreach f (D_$$/*.fst) + set nom = `basename $f:r` + $AwkCmd -f $LIB_DIR/db.getlen.awk $f > L_$$ + $LIB_DIR/db.filter.len.r L_$$ $Delta |\ + $AwkCmd '($NF == "TRUE") {print $2}' > M_$$ + $AwkCmd -v FILE=M_$$ -f $LIB_DIR/db.subdb.awk $f > E_$$/$nom.fst + Report E_$$/$nom.fst "length_filter" + set n = `egrep '^>' E_$$/$nom.fst | wc -l` + Notify " length filter : $nom : $n" + if ($n <= $Sizmin) \rm -f E_$$/$nom.fst +end + +set ok = `ls E_$$ | wc -l` +if ($ok == 0) goto fin + + +# +# select by similarity +# + +Notify "select by similarity" + +foreach f (E_$$/*.fst) + set nom = `basename $f:r` + + Notify " blasting $nom" + + makeblastdb -dbtype 'prot' -in $f >>& db.log + blastp -db $f -query $f -outfmt 7 > $f.blast.out + \rm -f $f.p?? + + $AwkCmd -v COVMIN=$Covmin -v PMAX=$Pmax -v IDMIN=$Idmin \ + -f $LIB_DIR/db.blastlink.awk $f.blast.out |\ + $AwkCmd -f $LIB_DIR/db.todl.awk > G_$$ + + ($LIB_DIR/db.cc.r G_$$ > $f.cc.txt) >>& db.log + + awk -v NAME=$nom -f $LIB_DIR/db.reportcc.awk $f.cc.txt >> $OutLog + + $AwkCmd -f $LIB_DIR/db.selcc.awk $f.cc.txt > S_$$ + $AwkCmd -v FILE=S_$$ -f $LIB_DIR/db.subdb.awk $f > F_$$/$nom.fst + + Report F_$$/$nom.fst "similarity_filter" + + set n = `egrep '^>' F_$$/$nom.fst | wc -l` + Notify " blast filter : $nom : $n" + if ($n <= $Sizmin) \rm -f F_$$/$nom.fst + +end + +set ok = `ls D_$$ | wc -l` +if ($ok == 0) goto fin + +# +# annotations +# + +echo -n "" > J_$$ + +foreach f (F_$$/*.fst) + $AwkCmd -f $LIB_DIR/db.annot.awk $f >> J_$$ +end + +awk '(NF >= 3) {print $1, $NF}' $PatFile | sort > A_$$ +sort J_$$ | egrep -v '^ *$' > B_$$ +join A_$$ B_$$ > F_$$/Annot.lst + +# +# copy files +# + +set n = `ls F_$$/* | wc -l` +Notify "copy $n files to $OutDir" + +\mv -f F_$$/* $OutDir + +# +# end +# + +fin: +Notify "output directory : $OutDir" + +\rm -r ?_$$ + + +exit 0 diff --git a/detectors/cds/tools/chlorodb/subdb/lib/db.annot.awk b/detectors/cds/tools/chlorodb/subdb/lib/db.annot.awk new file mode 100644 index 0000000..f000eca --- /dev/null +++ b/detectors/cds/tools/chlorodb/subdb/lib/db.annot.awk @@ -0,0 +1,39 @@ +# + +/^>/ { + N++ + na = split($1, a, "@") + if (a[na-1] > NEXMAX) NEXMAX = a[na-1] + NEX[a[na-1]]++ + ANNOT[$NF]++ +} + +END { + na = split(FILENAME, a, "/") + na = split(a[na], a, "\\.") + printf("%s %d ", a[1], N) + s = "" + for (i = 1 ; i <= NEXMAX ; i ++) { + if (NEX[i] != 0) + s = s "" i ":" NEX[i] "_" + } + gsub("_+$", "", s) + printf("%s ", s) + + s = (NEXMAX == 1) ? "MONEX" : "POLYEX" + printf("%s ", s) + + nmax = 0 + amax = "none" + for (e in ANNOT) { + if (ANNOT[e] > nmax) { + nmax = ANNOT[e] + amax = e + } + } + print amax + +} + + + diff --git a/detectors/cds/tools/chlorodb/subdb/lib/db.blastlink.awk b/detectors/cds/tools/chlorodb/subdb/lib/db.blastlink.awk new file mode 100644 index 0000000..0d9eb17 --- /dev/null +++ b/detectors/cds/tools/chlorodb/subdb/lib/db.blastlink.awk @@ -0,0 +1,48 @@ +# + +function min(x, y) { + return ((x < y) ? x : y) +} + +BEGIN { + if (COVMIN == "") COVMIN = 50 + if (PMAX == "") PMAX = 1e-6 + if (IDMIN == "") IDMIN = 30 +} + +/^#/ { + hitnum = 0; + next; +} + +{ + if ($1 == $2) next + + hitnum++; + + na = split($1, a, "@"); + if (na < 2) { + print "query file not properly formatted" > "/dev/stderr" + exit(1); + } + len1 = a[na]; + + na = split($2, a, "@"); + if (na < 2) { + print "bank file not properly formatted" > "/dev/stderr" + exit(1); + } + len2 = a[na]; + + id = $3 + 0.0; + ali = $4; + + covmin = ali * 100. / min(len1, len2); + + proba = $11 + 0.0; + + if ((covmin > COVMIN) && ((proba < PMAX) || (proba == 0)) && (id > IDMIN)) { + print $1, $2, hitnum, id, covmin, proba, ali, len1, len2; + } +} + diff --git a/detectors/cds/tools/chlorodb/subdb/lib/db.cc.r b/detectors/cds/tools/chlorodb/subdb/lib/db.cc.r new file mode 100755 index 0000000..2a69b8e --- /dev/null +++ b/detectors/cds/tools/chlorodb/subdb/lib/db.cc.r @@ -0,0 +1,18 @@ +#!/usr/bin/env Rscript +# + +require(igraph, warn.conflicts=F) + +args <- commandArgs(T) +path <- if(length(args) > 0) args[1] else 'graph.dl' + +g <- read.graph(path, format='dl') + +cc <- clusters(g) + +res <- cbind(V(g)$name, membership(cc)) + +write.table(res, quote=FALSE, row.names=FALSE, col.names=FALSE) + +quit(save="no") + diff --git a/detectors/cds/tools/chlorodb/subdb/lib/db.filter.len.r b/detectors/cds/tools/chlorodb/subdb/lib/db.filter.len.r new file mode 100755 index 0000000..13cb5a2 --- /dev/null +++ b/detectors/cds/tools/chlorodb/subdb/lib/db.filter.len.r @@ -0,0 +1,19 @@ +#!/usr/bin/env Rscript +# + +args <- commandArgs(T) +path <- if(length(args) > 0) args[1] else 'len.txt' +delta <- if(length(args) > 1) args[2] else 0.5 + +tab <- read.table(path, header=T) + +lmed <- median(tab$len) + +dlen <- lmed * as.numeric(delta) + +tab$ok <- (abs(tab$len-lmed)/lmed) <= delta + +write.table(tab, quote=F) + +quit(save='no') + diff --git a/detectors/cds/tools/chlorodb/subdb/lib/db.filter.pat.awk b/detectors/cds/tools/chlorodb/subdb/lib/db.filter.pat.awk new file mode 100644 index 0000000..c14baf4 --- /dev/null +++ b/detectors/cds/tools/chlorodb/subdb/lib/db.filter.pat.awk @@ -0,0 +1,10 @@ +# + +/^>/ { + split($1, a, "@") + ok = a[3] ~ PAT +} + +ok { + print $0 +} diff --git a/detectors/cds/tools/chlorodb/subdb/lib/db.filter.sym.awk b/detectors/cds/tools/chlorodb/subdb/lib/db.filter.sym.awk new file mode 100644 index 0000000..9ae9570 --- /dev/null +++ b/detectors/cds/tools/chlorodb/subdb/lib/db.filter.sym.awk @@ -0,0 +1,30 @@ +# +# +# + +function Check(seq) { + if (seq == "") return 0 + gsub("[ACDEFGHIKLMNPQRSTVWXY\n]+", "", seq) + return (length(seq) == 0) +} + +/^>/ { + if (Check(Seq)) { + print Name + printf("%s", Seq) + } + Name = $0 + Seq = "" + next +} + +{ + Seq = Seq "" $0 "\n" +} + +END { + if (Check(Seq)) { + print Name + printf("%s", Seq) + } +} diff --git a/detectors/cds/tools/chlorodb/subdb/lib/db.getlen.awk b/detectors/cds/tools/chlorodb/subdb/lib/db.getlen.awk new file mode 100644 index 0000000..117c90b --- /dev/null +++ b/detectors/cds/tools/chlorodb/subdb/lib/db.getlen.awk @@ -0,0 +1,10 @@ +# +BEGIN { + print "id len" +} + +/^>/ { + na = split($1, a, "@") + print substr($1, 2), a[na] +} + diff --git a/detectors/cds/tools/chlorodb/subdb/lib/db.reportcc.awk b/detectors/cds/tools/chlorodb/subdb/lib/db.reportcc.awk new file mode 100644 index 0000000..0acb7f9 --- /dev/null +++ b/detectors/cds/tools/chlorodb/subdb/lib/db.reportcc.awk @@ -0,0 +1,15 @@ +# +# + +{ + cnt[$NF]++ +} + +END { + n = asort(cnt) + printf("cc_size %s", NAME) + for (i = n ; i >= 1 ; i--) + printf(" %d", cnt[i]) + print "" +} + diff --git a/detectors/cds/tools/chlorodb/subdb/lib/db.selcc.awk b/detectors/cds/tools/chlorodb/subdb/lib/db.selcc.awk new file mode 100644 index 0000000..a621519 --- /dev/null +++ b/detectors/cds/tools/chlorodb/subdb/lib/db.selcc.awk @@ -0,0 +1,19 @@ +# + +{ + N[$NF]++ + E[$NF, N[$NF]] = $1 +} + +END { + cmax = 1 + nmax = N[1] + for (i in N) { + if (N[i] > nmax) { + nmax = N[i] + cmax = i + } + } + for (i = 1 ; i <= nmax ; i++) + print E[cmax, i] +} diff --git a/detectors/cds/tools/chlorodb/subdb/lib/db.subdb.awk b/detectors/cds/tools/chlorodb/subdb/lib/db.subdb.awk new file mode 100644 index 0000000..472b4f3 --- /dev/null +++ b/detectors/cds/tools/chlorodb/subdb/lib/db.subdb.awk @@ -0,0 +1,17 @@ +# + +BEGIN { + if (FILE == "") FILE = "db.sel.txt" + while (getline < FILE) + INC[$1] = $1 + close(FILE) +} + +/^>/ { + name = substr($1, 2) + ok = name in INC +} + +ok { + print $0 +} diff --git a/detectors/cds/tools/chlorodb/subdb/lib/db.todl.awk b/detectors/cds/tools/chlorodb/subdb/lib/db.todl.awk new file mode 100644 index 0000000..65c9a07 --- /dev/null +++ b/detectors/cds/tools/chlorodb/subdb/lib/db.todl.awk @@ -0,0 +1,21 @@ +# + +{ + node[$1]++ + node[$2]++ + link[++M] = $1 " " $2 +} + + +END { + for (n in node) + N++ + print "DL n=" N + print "format = edgelist1" + print "labels embedded:" + print "data:" + for (i = 1 ; i <= M ; i++) + print link[i] +} + + diff --git a/detectors/cds/tools/compare/go_compare.sh b/detectors/cds/tools/compare/go_compare.sh index 1305598..9cca5c2 100755 --- a/detectors/cds/tools/compare/go_compare.sh +++ b/detectors/cds/tools/compare/go_compare.sh @@ -47,8 +47,8 @@ $AwkCmd -f $LIB_DIR/libutil.awk -f $LIB_DIR/$PrdType.cds_short.awk > P_$$ Notify "compare bank to predictions" -$AwkCmd -f $LIB_DIR/libnws.awk \ - -f $LIB_DIR/compareCds.awk \ +$AwkCmd -f $LIB_DIR/libnws.awk \ + -f $LIB_DIR/compare.cds.awk \ R_$$ P_$$ > S_$$ # base statistics diff --git a/detectors/cds/tools/compare/go_summarize.sh b/detectors/cds/tools/compare/go_summarize.sh index d6f3ce7..04ac7c5 100755 --- a/detectors/cds/tools/compare/go_summarize.sh +++ b/detectors/cds/tools/compare/go_summarize.sh @@ -15,6 +15,8 @@ NeedArg 1 egrep '^#|^MATCH' $* | awk -f $LIB_DIR/summary.cmp.awk > compare.txt +Notify "text file: compare.txt" + $LIB_DIR/summarize_cmp.r diff --git a/detectors/cds/tools/lib/cds2fasta.awk b/detectors/cds/tools/lib/cds2fasta.awk new file mode 100644 index 0000000..41436e2 --- /dev/null +++ b/detectors/cds/tools/lib/cds2fasta.awk @@ -0,0 +1,19 @@ +# +# get fasta sequence from cds list +# +# [-v FIELD=13] CDS sequence +# [-v FIELD=14] Prot Sequence +# + +BEGIN { + if (CHARPERLINE == "") CHARPERLINE = 50 + if (FIELD == "") FIELD = 14 +} + +/^#/ { next } + +{ + name = $1 "@" $2 "@" $3 "@" $5 "@" $6 "@" $7 "@" $8 "@" int($9/3) + comment = $NF + PrintFasta($FIELD, name " " comment) +} diff --git a/detectors/cds/tools/lib/compareCds.awk b/detectors/cds/tools/lib/compare.cds.awk similarity index 100% rename from detectors/cds/tools/lib/compareCds.awk rename to detectors/cds/tools/lib/compare.cds.awk diff --git a/detectors/cds/tools/lib/embl.cds_long.awk b/detectors/cds/tools/lib/embl.cds_long.awk new file mode 100644 index 0000000..48fb098 --- /dev/null +++ b/detectors/cds/tools/lib/embl.cds_long.awk @@ -0,0 +1,97 @@ +# +# get cds features from embl (long version) +# +# -v FASTA + +# @include lib.embl.awk + + +BEGIN { + print "#locus locustag genefam gene from to strand nexon length status start stop dnaseq protseq product" + + if (HEADONLY != "") exit(0) + + if (MAXSPAN == "") MAXSPAN = 10000 + + if (FASTA == "") Error("No FASTA file specified", 1) + + if (! TestPath(FASTA)) Error("Fasta file: '" FASTA "' not found", 1) + + Seq = tolower(ReadFasta(FASTA)) +} + +/^ID / { + locus = $2 + gsub(";", "", locus) + incds = 0 + next +} + +/^FT CDS/ { + revstrand = match($3, "^complement") + s = substr($0, 22) + gsub("^complement", "", s) + ok = ! match(s, "complement|order") + nexon = Nexons(s) + SpanLocation(s, sloc) + spanlen = sloc[2] - sloc[1] + 1 + len = LenLocation(s) + ok = ok && (len < MAXSPAN) + cdsseq = ok ? SeqLocation(Seq, s, revstrand) : "XXX" + cstart = substr(cdsseq, 1,3) + cstop = substr(cdsseq, length(cdsseq)-2) + + gene = "none" + locustag = "none" + product = "none" + translation = "X" + incds = 1 + next +} + +(incds && /^FT [^ ]/) { + print locus, locustag, GeneFamily(gene), gene, sloc[1], sloc[2], (revstrand ? "R" : "D"), + nexon, len, (ok ? "Ok" : "Error"), cstart, cstop, cdsseq, translation, product + incds = 0 + next +} + +/^FT \/gene=/ { + split($0, a, "=") + gene = a[2] + gsub("^[^a-z,A-Z]+", "", gene) + gsub("\"", "", gene) + gsub(" ", "_", gene) + next +} + +/^FT \/locus_tag=/ { + split($0, a, "=") + locustag = a[2] + gsub("\"", "", locustag) + gsub(" ", "_", locustag) + next +} + +/^FT \/product=/ { + split($0, a, "=") + product = a[2] + gsub("\"", "", product) + gsub(" ", "_", product) + next +} + +/^FT \/translation=/ { + split($0, a, "=") + translation = a[2] + gsub("\"", "", translation) + gsub(" ", "", translation) + next +} + +END { + if (incds) { + print locus, locustag, GeneFamily(gene), gene, sloc[1], sloc[2], (revstrand ? "R" : "D"), + nexon, len, (ok ? "Ok" : "Error"), cstart, cstop, cdsseq, translation, product + } +} diff --git a/detectors/cds/tools/lib/embl.info.awk b/detectors/cds/tools/lib/embl.info.awk new file mode 100644 index 0000000..dda6cc2 --- /dev/null +++ b/detectors/cds/tools/lib/embl.info.awk @@ -0,0 +1,97 @@ +# +# get feature info from embl +# + +# @include libgbk.awk + +function GC(s, _local_, i, len) { + s = toupper(s) + len = length(s) + gsub("G|C", "", s) + return ((len - length(s)) * 100 / (len ? len : 1)) +} + +# +# rules +# + +BEGIN { + print "#locus orga len oklen gc nbCds nbCds_int0 nbCds_int1 nbCds_intsup1 perCds_noex meanCdsSize nbtRNA nbrRNA nboRNA" +} + +/^ID/ { + locus = $2 + gsub(";", "", locus) + next +} + +/^OS/ { + orga = substr($0, 6) + gsub(" ", "_", orga) + next +} + +/^FT source/ { + GetLoc($3, loc); + len = loc[2]; + next +} + +/^FT CDS/ { + meanCds = meanCds * nbCds + LenLocation($3) + nbCds++ + meanCds /= nbCds + n = Nexons($3) + if (n > 3) n = 3 + nbCdx[n]++ + next +} + +/^FT tRNA/ { + nbTrna++ + next +} + +/^FT rRNA/ { + nbRrna++ + next +} + +/^FT mRNA/ { + next +} + +/^FT .*RNA/ { + nbOrna++ + next +} + +/^SQ / { + inseq = 1 + seq = "" + next +} + +inseq && /^ / { + s = $0 + gsub("[0-9]+", "", s) + gsub(" ", "", s) + seq = seq "" s + next +} + +/^\/\// { + oklen = (len == length(seq) ? "ok" : "wrong") + gc = GC(seq) + print locus, orga, len, oklen, gc, nbCds+0, nbCdx[1]+0, \ + nbCdx[2]+0, nbCdx[3]+0, (nbCdx[1]+0)*100/Max(1, nbCds+0), \ + meanCds+0, nbTrna+0, nbRrna+0, nbOrna+0 + nbCds = nbTrna = nbRrna = nbOrna = len = inseq = meanCds = 0 + delete nbCdx + orga = locus = "?" +} + + + + + diff --git a/detectors/cds/tools/lib/embl.intron.awk b/detectors/cds/tools/lib/embl.intron.awk new file mode 100644 index 0000000..83dabb0 --- /dev/null +++ b/detectors/cds/tools/lib/embl.intron.awk @@ -0,0 +1,97 @@ +# +# get intron features from embl +# + +# @include libembl.awk + +BEGIN { + print "#locus locustag genefam gene from to strand intron_num intron_nb acceptor-donor status" + + if (HEADONLY != "") exit(0) + + if (FASTA == "") Error("No FASTA file specified", 1) + + if (! TestPath(FASTA)) Error("Fasta file: '" FASTA "' not found", 1) + + Seq = tolower(ReadFasta(FASTA)) +} + +/^ID / { + locus = $2 + gsub(";", "", locus) + next +} + +/^FT CDS/ { + revstrand = match($3, "^complement") + s = substr($0, 22) + gsub("^complement", "", s) + ok = ! match(s, "complement|order") + if (! ok) next + + na = ParseLocation(s, locs) + if (na < 2) next + + delete SINfo + Ninfo = 0 + + val = locs[1][1] + for (i = 2 ; i <= na ; i++) { + if (locs[i][1] < val) ok = 0 + val = locs[i][1] + } + if (! ok) next + + val = locs[1][2] + for (i = 2 ; i <= na ; i++) { + if (locs[i][2] < val) ok = 0 + val = locs[i][2] + } + if (! ok) next + + from = locs[1][2] + 1 + for (i = 2 ; i <= na ; i++) { + to = locs[i][1] - 1 + inseq = SeqLocation(Seq, (from - 4) ".." (to + 4), revstrand) + SINfo[++Ninfo] = from " " to " " (revstrand ? "R" : "D") " "\ + (revstrand ? na-i+1 : i-1) " " na-1 " "\ + substr(inseq, 1,4) "."\ + substr(inseq, 5,6) "-"\ + substr(inseq, length(inseq)-9, 6) "."\ + substr(inseq, length(inseq)-3, 4) " "\ + "ok" + from = locs[i][2] + 1 + } + + gene = "none" + locustag = "none" + next +} + +/^FT \/gene=/ { + split($0, a, "=") + gene = a[2] + gsub("^[^a-z,A-Z]+", "", gene) + gsub("\"", "", gene) + gsub(" ", "_", gene) + next +} + +/^FT \/locus_tag=/ { + split($0, a, "=") + locustag = a[2] + gsub("\"", "", locustag) + gsub(" ", "_", locustag) + next +} + +/^FT \/translation=/ { + for (i = 1 ; i <= Ninfo ; i++) + print locus, locustag, GeneFamily(gene), gene, SINfo[i] + Ninfo = 0 + next +} + +/^\/\// { + locus = "?" +} diff --git a/detectors/cds/tools/lib/embl.tofasta.awk b/detectors/cds/tools/lib/embl.tofasta.awk new file mode 100644 index 0000000..0485ffb --- /dev/null +++ b/detectors/cds/tools/lib/embl.tofasta.awk @@ -0,0 +1,36 @@ +# +# get fasta sequence from embl +# + +/^ID / { + locus = $2 + gsub(";", "", locus) + next +} + +/^SQ / { + inseq = 1 + nln = 0 + delete seq + next +} + +/^\/\// { + inseq = 0 + print ">" locus + for (i = 1 ; i <= nln ; i++) + print seq[i] + next +} + +inseq { + s = $0 + gsub(" ", "", s) + gsub("[0-9]+", "", s) + seq[++nln] = s + next +} + + + + diff --git a/detectors/cds/tools/lib/gbk.cds_long.awk b/detectors/cds/tools/lib/gbk.cds_long.awk new file mode 100644 index 0000000..e3f1c7a --- /dev/null +++ b/detectors/cds/tools/lib/gbk.cds_long.awk @@ -0,0 +1,99 @@ +# +# get cds features from genbank (long version) +# +# -v FASTA + +# @include libgbk.awk + +BEGIN { + print "#locus locustag genefam gene from to strand nexon length status start stop dnaseq protseq product" + + if (HEADONLY != "") exit(0) + + if (MAXSPAN == "") MAXSPAN = 10000 + + if (FASTA == "") Error("No FASTA file specified", 1) + + if (! TestPath(FASTA)) Error("Fasta file: '" FASTA "' not found", 1) + + Seq = tolower(ReadFasta(FASTA)) +} + +/^LOCUS/ { + locus = $2 + incds = 0 + next +} + +/^ CDS/ { + revstrand = match($2, "^complement") + s = substr($0, 22) + gsub("^complement", "", s) + ok = ! match(s, "complement|order") + nexon = Nexons(s) + SpanLocation(s, sloc) + spanlen = sloc[2] - sloc[1] + 1 + len = LenLocation(s) + ok = ok && (len < MAXSPAN) + cdsseq = ok ? SeqLocation(Seq, s, revstrand) : "XXX" + cstart = substr(cdsseq, 1,3) + cstop = substr(cdsseq, length(cdsseq)-2) + + gene = "none" + locustag = "none" + product = "none" + translation = "X" + incds = 1 + next +} + +(incds && /^ [^ ]/) { + print locus, locustag, GeneFamily(gene), gene, sloc[1], sloc[2], (revstrand ? "R" : "D"), + nexon, len, (ok ? "Ok" : "Error"), cstart, cstop, cdsseq, translation, product + incds = 0 + next +} + +/^ \/gene=/ { + split($0, a, "=") + gene = a[2] + gsub("^[^a-z,A-Z]+", "", gene) + gsub("\"", "", gene) + gsub(" ", "_", gene) + next +} + +/^ \/locus_tag=/ { + split($0, a, "=") + locustag = a[2] + gsub("\"", "", locustag) + gsub(" ", "_", locustag) + next +} + +/^ \/product=/ { + split($0, a, "=") + product = a[2] + gsub("\"", "", product) + gsub(" ", "_", product) + next +} + +/^ \/translation=/ { + split($0, a, "=") + translation = a[2] + gsub("\"", "", translation) + gsub(" ", "", translation) + next +} + +/^\/\// { + locus = "?" +} + +END { + if (incds) { + print locus, locustag, GeneFamily(gene), gene, sloc[1], sloc[2], (revstrand ? "R" : "D"), + nexon, len, (ok ? "Ok" : "Error"), cstart, cstop, cdsseq, translation, product + } +} diff --git a/detectors/cds/tools/lib/gbk.info.awk b/detectors/cds/tools/lib/gbk.info.awk new file mode 100644 index 0000000..3fdadc3 --- /dev/null +++ b/detectors/cds/tools/lib/gbk.info.awk @@ -0,0 +1,97 @@ +# +# get feature info from genbank +# + +# @include libgbk.awk + +function GC(s, _local_, i, len) { + s = toupper(s) + len = length(s) + gsub("G|C", "", s) + return ((len - length(s)) * 100 / len) +} + +# +# rules +# + +BEGIN { + print "#locus orga len oklen gc nbCds nbCds_int0 nbCds_int1 nbCds_intsup1 perCds_noex meanCdsSize nbtRNA nbrRNA nboRNA" +} + +/^LOCUS/ { + locus = $2 + next +} + +/^ ORGANISM/ { + orga = substr($0, 13) + split(orga, a, ";") + orga = a[1] + gsub(" ", "_", orga) + next +} + +/^ source/ { + GetLoc($2, loc); + len = loc[2]; + next +} + +/^ CDS/ { + meanCds = meanCds * nbCds + LenLocation($2) + nbCds++ + meanCds /= nbCds + n = Nexons($2) + if (n > 3) n = 3 + nbCdx[n]++ + next +} + +/^ tRNA/ { + nbTrna++ + next +} + +/^ rRNA/ { + nbRrna++ + next +} + +/^ mRNA/ { + next +} + +/^ .*RNA/ { + nbOrna++ + next +} + +/^ORIGIN/ { + inseq = 1 + seq = "" + next +} + +inseq && /^ +[1-9][0-9]*/ { + s = substr($0, 11) + gsub(" ", "", s) + seq = seq "" s + next +} + +/^\/\// { + oklen = (len == length(seq) ? "ok" : "wrong") + gc = GC(seq) + print locus, orga, len, oklen, gc, nbCds+0, nbCdx[1]+0, \ + nbCdx[2]+0, nbCdx[3]+0, (nbCdx[1]+0)*100/Max(1, nbCds+0), \ + meanCds+0, nbTrna+0, nbRrna+0, nbOrna+0 + nbCds = nbTrna = nbRrna = nbOrna = len = inseq = meanCds = 0 + delete nbCdx + orga = locus = "?" +} + + + + + diff --git a/detectors/cds/tools/lib/gbk.intron.awk b/detectors/cds/tools/lib/gbk.intron.awk new file mode 100644 index 0000000..32aa5be --- /dev/null +++ b/detectors/cds/tools/lib/gbk.intron.awk @@ -0,0 +1,96 @@ +# +# get intron features from genbank +# + +# @include libgbk.awk + +BEGIN { + print "#locus locustag genefam gene from to strand intron_num intron_nb acceptor-donor status" + + if (HEADONLY != "") exit(0) + + if (FASTA == "") Error("No FASTA file specified", 1) + + if (! TestPath(FASTA)) Error("Fasta file: '" FASTA "' not found", 1) + + Seq = tolower(ReadFasta(FASTA)) +} + +/^LOCUS/ { + locus = $2 + next +} + +/^ CDS/ { + revstrand = match($2, "^complement") + s = substr($0, 22) + gsub("^complement", "", s) + ok = ! match(s, "complement|order") + if (! ok) next + + na = ParseLocation(s, locs) + if (na < 2) next + + delete SINfo + Ninfo = 0 + + val = locs[1][1] + for (i = 2 ; i <= na ; i++) { + if (locs[i][1] < val) ok = 0 + val = locs[i][1] + } + if (! ok) next + + val = locs[1][2] + for (i = 2 ; i <= na ; i++) { + if (locs[i][2] < val) ok = 0 + val = locs[i][2] + } + if (! ok) next + + from = locs[1][2] + 1 + for (i = 2 ; i <= na ; i++) { + to = locs[i][1] - 1 + inseq = SeqLocation(Seq, (from - 4) ".." (to + 4), revstrand) + SINfo[++Ninfo] = from " " to " " (revstrand ? "R" : "D") " "\ + (revstrand ? na-i+1 : i-1) " " na-1 " "\ + substr(inseq, 1,4) "."\ + substr(inseq, 5,6) "-"\ + substr(inseq, length(inseq)-9, 6) "."\ + substr(inseq, length(inseq)-3, 4) " "\ + "ok" + from = locs[i][2] + 1 + } + + gene = "none" + locustag = "none" + next +} + +/^ \/gene=/ { + split($0, a, "=") + gene = a[2] + gsub("^[^a-z,A-Z]+", "", gene) + gsub("\"", "", gene) + gsub(" ", "_", gene) + next +} + +/^ \/locus_tag=/ { + split($0, a, "=") + locustag = a[2] + gsub("\"", "", locustag) + gsub(" ", "_", locustag) + next +} + +/^ \/translation=/ { + for (i = 1 ; i <= Ninfo ; i++) + print locus, locustag, GeneFamily(gene), gene, SINfo[i] + Ninfo = 0 + next +} + +/^\/\// { + locus = "?" +} diff --git a/detectors/cds/tools/lib/gbk.tofasta.awk b/detectors/cds/tools/lib/gbk.tofasta.awk new file mode 100644 index 0000000..195ebc5 --- /dev/null +++ b/detectors/cds/tools/lib/gbk.tofasta.awk @@ -0,0 +1,32 @@ +# +# get fasta sequence from genbank +# + +/^LOCUS/ { + locus = $2 + next +} + +/^ORIGIN/ { + inseq = 1 + nln = 0 + delete seq +} + +inseq && /^ +[1-9][0-9]*/ { + s = substr($0, 11) + gsub(" ", "", s) + seq[++nln] = s + next +} + +/^\/\// { + print ">" locus + for (i = 1 ; i <= nln ; i++) + print seq[i] +} + + + + + diff --git a/detectors/cds/tools/lib/install.rpackages.r b/detectors/cds/tools/lib/install.rpackages.r new file mode 100755 index 0000000..c6f9778 --- /dev/null +++ b/detectors/cds/tools/lib/install.rpackages.r @@ -0,0 +1,31 @@ +#!/usr/bin/env Rscript +# +# check and install required packages +# + +out <- function(...) { + cat(paste0('+ ', ..., '\n'), file=stderr()) +} + +installed <- function(package) { + package %in% rownames(installed.packages()) +} + +check <- function(package, repos="http://cran.univ-lyon1.fr") { + if (installed(package)) { + out("R package ", package, " installed") + } else { + out("Installing R package ", package, " from ", repos) + install.packages(package, repos=repos) + } + invisible(installed(package)) +} + +check("grid") +check("gridExtra") +check("vcd") +check("plotrix") +check("igraph") + +quit(save='no', status=0) + diff --git a/detectors/cds/tools/lib/make.models.r b/detectors/cds/tools/lib/make.models.r new file mode 100755 index 0000000..b2e0a74 --- /dev/null +++ b/detectors/cds/tools/lib/make.models.r @@ -0,0 +1,224 @@ +#!/usr/bin/env Rscript +# +# compute start, stop, splice-junctions models for core DB +# +# source("make.models.r") +# + +LIB_DIR <- Sys.getenv("LIB_DIR") +if (LIB_DIR == "") LIB_DIR = "." + +source(paste0(LIB_DIR, "/util.base.r")) +source(paste0(LIB_DIR, "/util.cons.r")) +source(paste0(LIB_DIR, "/util.modelio.r")) + +# ------------------------------- +# parameters +# ------------------------------- + +# core cutoffs + +source("db.models.params.txt") + +# ------------------------------- +# genome infos +# ------------------------------- + +notify("loading info table") +chromo <- read.table("db.info.txt", com="", head=T, stringsAsFactors=F) + +# ------------------------------- +# CDS +# ------------------------------- + +notify("loading cds table") +cds <- read.table("db.cds.txt", com="", header=T, stringsAsFactors=F) + +cds$start <- as.factor(cds$start) +cds$stop <- as.factor(cds$stop) + +cds <- cds[cds$status=="Ok",] +cds <- cds[cds$genefam!="none",] + +cds$categ <- "dust" + +x <- sort(table(cds$genefam), dec=T) +ok <- names(x[x >= CORE_NCDS_CUTOFF]) + +cds$categ[cds$genefam %in% ok] <- "core" + +x <- x[! names(x) %in% ok] +ok <- names(x[x >= SHEL_NCDS_CUTOFF]) + +cds$categ[cds$genefam %in% ok] <- "shell" + +# + +cds.ori <- cds + +cds.lst <- split(cds.ori, cds.ori$categ) + +# +# write out families +# + +# patterns & names + +invisible(lapply(cds.lst, function(cds) { + + x <- sort(table(cds$genefam), decreasing=T) + tab <- paste0("^", names(x), "$") + names(tab) <- names(x) + + y <- sapply(split(cds$gene, cds$genefam), function(g) { + head(names(sort(table(g), decreasing=T)), 1) + }) + + tab <- cbind(tab, y[names(x)]) + + categ <- unique(cds$categ) + f <- paste0("db.", categ, ".pat.txt") + notify("writing patterns for", categ, ":", f) + write.table(tab, file=f, quote=F, col.names=F, row.names=T) +})) + +# ------------------------------- +# Start models (core only) +# ------------------------------- + +if (! "core" %in% names(cds.lst)) { + notify("*** no gene found in core") + notify("*** please change parameters") + quit(save='no', status=1) +} + +cds <- cds.lst[["core"]] + +# +# start by genes +# + +tab <- split(cds$start, cds$genefam) + +fatg <- sapply(tab, function(x) table(x)["atg"]/length(x)*100) +names(fatg) <- names(tab) + +start.dft <- names(which(fatg >= CORE_START_ATG_CUTOFF)) +start.spc <- names(which(fatg < CORE_START_ATG_CUTOFF)) + +tab <- cds[cds$genefam %in% start.dft,] +tab <- table(tab$start) + +# default model + +x <- sort(tab[tab>=CORE_START_DFT_CUTOFF], decreasing=T) +write.model.start(x, "default") + +# gene specific models + +invisible(sapply(start.spc, function(g) { + x <- cds[cds$genefam == g,] + tx <- table(x$start) + tx <- sort(tx[tx>=CORE_START_OTH_CUTOFF], decreasing=T) + write.model.start(tx, g) +})) + +# ------------------------------- +# Stop models (core only) +# ------------------------------- + +# write default stop model + +tab <- table(cds$stop) +x <- sort(tab[tab>=CORE_STOP_CUTOFF], decreasing=T) +write.model.stop(x, "default") + +# ------------------------------- +# splice junctions +# ------------------------------- + +notify("loading intron table") +intron <- read.table("db.intron.txt", com="", header=T, stringsAsFactors=F) + +# remove invalid sequences + +intron$seq <- gsub("\\.|-", "", intron$acceptor.donor) + +lseq <- nchar(gsub("[^acgt]", "", intron$seq)) + +intron <- intron[lseq == 20,] + +# remove genes out of core + +intron <- intron[intron$genefam %in% cds$genefam,] + +# acceptors / donors + +intron$acc <- substr(intron$seq, 5, 6) +intron$don <- substr(intron$seq, 15, 16) + +# consensus + +cons.px <- cons.build(intron$acceptor.donor) +cons.px <- cons.px[,! is.nan(colSums(cons.px))] + +seq.px <- sapply(intron$acceptor.donor, function(s) gsub("[^acgt]", "", s)) + +conf.px <- cons.confusion(cons.px, seq.px) + +sfam <- split(conf.px$l2scor, intron$genefam) +sfam <- sfam[order(sapply(sfam, median))] + +# extract splice exceptions + +name.bad <- names(which(sapply(sfam, median) < 0)) +name.spc <- names(which(sapply(sfam[name.bad], length) >= CORE_SPLICE_CUTOFF)) +name.ok <- setdiff(unique(intron$genefam), name.bad) +name.bad <- setdiff(name.bad, name.spc) +name.list <- c(sapply(name.spc, function(x) x), list(default=name.ok)) + +cons <- lapply(name.list, function(x) cons.build(intron[intron$genefam %in% x, "acceptor.donor"])) + +# write junction models + +invisible(sapply(names(cons), function(n) write.model.splice3(cons[[n]], n))) +invisible(sapply(names(cons), function(n) write.model.splice5(cons[[n]], n))) + +# use uniform model for bad guys + +invisible(sapply(name.bad, function(n) write.unif.splice(3, n))) +invisible(sapply(name.bad, function(n) write.unif.splice(5, n))) + +invisible(write.unif.splice('', "none")) + +# ------------------------------- +# keep data for plotting +# ------------------------------- + +DB <- list() + +params <- list() + +params$CORE_NCDS_CUTOFF <- CORE_NCDS_CUTOFF +params$CORE_START_ATG_CUTOFF <- CORE_START_ATG_CUTOFF +params$CORE_START_DFT_CUTOFF <- CORE_START_DFT_CUTOFF +params$CORE_START_OTH_CUTOFF <- CORE_START_OTH_CUTOFF +params$CORE_STOP_CUTOFF <- CORE_STOP_CUTOFF +params$CORE_SPLICE_CUTOFF <- CORE_SPLICE_CUTOFF + +params$SHEL_NCDS_CUTOFF <- SHEL_NCDS_CUTOFF + +DB$params <- params +DB$chromo <- chromo +DB$cds.lst <- cds.lst +DB$intron <- intron +DB$cons <- cons + +notify("saving db.data.Rdata") +save(DB, file="db.data.Rdata") + +# ------------------------------- +# end +# ------------------------------- + +quit(save='no') diff --git a/detectors/cds/tools/lib/plot.models.r b/detectors/cds/tools/lib/plot.models.r new file mode 100755 index 0000000..223b091 --- /dev/null +++ b/detectors/cds/tools/lib/plot.models.r @@ -0,0 +1,424 @@ +#!/usr/bin/env Rscript +# +# plots models previously computed by make.models.r +# +# source("plot.models.r") +# + +require(vcd) +require(plotrix) + +LIBDIR <- Sys.getenv("LIB_DIR") +if (LIBDIR == "") LIBDIR = "." + +source(paste0(LIBDIR, "/util.base.r")) +source(paste0(LIBDIR, "/util.plot.r")) +source(paste0(LIBDIR, "/util.cons.r")) +source(paste0(LIBDIR, "/util.grid.r")) + +# ------------------------------- +# setup +# ------------------------------- + +OUT.DEV <- TRUE +OUT.TYPE <- "pdf" +OUT.FILE <- "models" + +if (OUT.DEV) uplot.init.dev(OUT.FILE, OUT.TYPE) + +# ------------------------------- +# Load data +# ------------------------------- + +notify("loading DB data") +load("db.data.Rdata") + +params <- DB$params +chromo <- DB$chromo +cds.lst <- DB$cds.lst +intron <- DB$intron +cons <- DB$cons + +# ------------------------------- +# Genomes infos +# ------------------------------- + +grd.titlepage("Species") +grd.textpage(lineno=1, "# org: ", nrow(chromo)) + +# +# general stats +# + +grd.hist(chromo, "len", main="Histogram of chromosome length") +grd.hist(chromo, "gc", pos.quant=c(0.75, 0.6), main="Histogram of chromosome GC") +grd.hist(chromo, "nbCds") +grd.fplot(chromo, "len", "nbCds") + +# +# nb cds no introns +# + +chromo$nbCds_Mono <- chromo$nbCds_int0 +chromo$nbCds_Poly <- chromo$nbCds_int1 + chromo$nbCds_intsup1 +chromo$percentPoly <- round(chromo$nbCds_Poly * 100 / (chromo$nbCds_Poly + chromo$nbCds_Mono)) + +grd.hist(chromo, "nbCds_Mono", main="Histogram of monoexonic Cds") +grd.hist(chromo, "nbCds_Poly", main="Histogram of polyexonic Cds") +grd.hist(chromo, "percentPoly", pos.sum=c(0.23,0.6), main="Histogram of % polyexonic") + +grd.fplot(chromo, "nbCds", "nbCds_Mono", TRUE, ablin=list(a=0, b=1, col=3)) +grd.fplot(chromo, "nbCds", "nbCds_Poly") + +# +# cds size +# + +grd.hist(chromo, "meanCdsSize", pos.quant=NULL, main="Histogram of Cds size") + +# ------------------------------- +# CDS +# ------------------------------- + +cds.all <- do.call(rbind, cds.lst) + +grd.titlepage("CDS") + +grd.textpage(lineno=1, "# core cds core cutoff: ", params$CORE_NCDS_CUTOFF) +grd.textpage(lineno=2, "# core cds shell cutoff: ", params$SHEL_NCDS_CUTOFF) + +grd.textpage(lineno=4, "# total cds: ", nrow(cds.all)) +grd.textpage(lineno=5, "# core cds: ", nrow(cds.lst[["core"]])) +grd.textpage(lineno=6, "# shell cds: ", nrow(cds.lst[["shell"]])) +grd.textpage(lineno=7, "# dust cds: ", nrow(cds.lst[["dust"]])) + +grd.textpage(lineno=9, "# total org: ", length(unique(cds.all$X.locus))) +grd.textpage(lineno=10, "# core org: ", length(unique(cds.lst[["core"]]$X.locus))) +grd.textpage(lineno=11, "# shell org: ", length(unique(cds.lst[["shell"]]$X.locus))) +grd.textpage(lineno=12, "# dust org: ", length(unique(cds.lst[["dust"]]$X.locus))) + + +grd.textpage(lineno=14, "# total families: ", length(unique(cds.all$genefam))) +grd.textpage(lineno=15, "# core families: ", length(unique(cds.lst[["core"]]$genefam))) +grd.textpage(lineno=16, "# shell families: ", length(unique(cds.lst[["shell"]]$genefam))) +grd.textpage(lineno=17, "# dust families: ", length(unique(cds.lst[["dust"]]$genefam))) + +uplot.setup(mfrow=c(2,2), xpd=NA) + +x <- sapply(cds.lst, nrow) +uplot.pie(x, main="CDS", text.r=1.1, col=c(3,2,4)) + +x <- sapply(cds.lst, function(cds) length(unique(cds$X.locus))) +uplot.pie(x, main="ORG", text.r=1.1, col=c(3,2,4)) + +x <- sapply(cds.lst, function(cds) length(unique(cds$genefam))) +uplot.pie(x, main="FAM", text.r=1.1, col=c(3,2,4)) + +uplot.setup(xpd=F) + +# +# plot genes cutoff +# + +cds.all <- do.call(rbind, cds.lst) +cds.byfam <- split(cds.all, cds.all$genefam) + +tab <- sort(sapply(cds.byfam, nrow), decreasing=T) +cols <- rep("red", length(tab)) +cols[tab >= params$SHEL_NCDS_CUTOFF] <- "blue" +cols[tab >= params$CORE_NCDS_CUTOFF] <- "green" +barplot(tab, col=cols, border=NA, main="# genes") + +cols <- cols[tab >= 50] +tab <- tab[tab >= 50] +barplot(tab, col=cols, border=NA, las=2, cex.names=0.5, main="# genes in core") +abline(h=params$CORE_NCDS_CUTOFF, col=1) +text(50, 200, "CORE_NCDS_CUTOFF", pos=3) + +# +# cds length for core +# + +invisible(sapply(c("core", "shell", "dust"), function(what) { + + cds <- cds.lst[[what]] + + x <- split(cds$length, cds$genefam) + x <- x[order(sapply(x, mean))] + + uplot.setup(mfrow=c(2,1)) + + boxplot(x, pars = list(boxwex = 0.8, staplewex = 0.5, outwex = 0.5, outcex = 0.1), + las=2, cex.axis=0.5, main=paste0(what, " genes - length distribution")) + + boxplot(x, pars = list(boxwex = 0.8, staplewex = 0.5, outwex = 0.5, outcex = 0.1), ylim=c(0,2000), + las=2, cex.axis=0.5, main=paste0(what, " genes - length distribution zoom")) + + uplot.setup() +})) + +# ------------------------------- +# starts & stops +# ------------------------------- + +cds <- cds.lst[["core"]] + +grd.titlepage("Starts and Stops") + +tab <- sort(table(cds$start), dec=T) +tab <- tab[tab >= 100] +tab <- tab / sum(tab) * 100 + +barplot(tab, log="y", las=1, ylim=c(0.1, 100), main="start frequencies (%)") +text(0.7, 50, round(tab[1], 2)) +text(1.9, 2.3, round(tab[2], 2)) +text(3.1, 2.3, round(tab[3], 2)) + +# +# start by org and gc +# + +x <- split(cds$start, cds$X.locus) + +fatg <- sapply(x, function(x) table(x)["atg"]/length(x)*100) +names(fatg) <- names(x) +chromo$fatg <- round(fatg[chromo$X.locus], 2) + +fgtg <- sapply(x, function(x) table(x)["gtg"]/length(x)*100) +names(fgtg) <- names(x) +chromo$fgtg <- round(fgtg[chromo$X.locus], 2) + +facg <- sapply(x, function(x) table(x)["acg"]/length(x)*100) +names(facg) <- names(x) +chromo$facg <- round(facg[chromo$X.locus], 2) + +grd.hist(chromo, "fatg", pos.quant=c(0.5, 0.6), main="Histogram of atg freq. by org") +grd.hist(chromo, "fgtg", main="Histogram of gtg freq. by org") +grd.hist(chromo, "facg", pos.sum=c(0.3, 0.6), main="Histogram of acg freq. by org") + +grd.fplot(chromo, "gc", "fatg", main="atg freq. by org GC", pos=c(0.2, 0.3)) +grd.fplot(chromo, "gc", "fgtg", main="gtg freq. by org GC") +grd.fplot(chromo, "gc", "facg", main="acg freq. by org GC") + +ter <- cbind(fatg, fgtg, facg) +colnames(ter) <- c("ATG", "GTG", "ACG") +igc <- cut(chromo$gc, breaks=quantile(chromo$gc, seq(0, 1, 0.1)), include.lowest=T, labels=1:10) +cols <- rainbow(10)[igc] +ternaryplot(ter, col=cols, cex=0.2, main="Start by org", labels="outside") + +# +# start by common genes +# + +x <- split(cds$start, cds$genefam) + +fatg <- sapply(x, function(x) table(x)["atg"]/length(x)*100) +names(fatg) <- names(x) + +fgtg <- sapply(x, function(x) table(x)["gtg"]/length(x)*100) +names(fgtg) <- names(x) + +facg <- sapply(x, function(x) table(x)["acg"]/length(x)*100) +names(facg) <- names(x) + +barplot(sort(fatg)[1:10], las=2, main="atg freq. by gene") +barplot(sort(fgtg, dec=T)[1:10], las=2, main="gtg freq. by gene") +barplot(sort(facg, dec=T)[1:10], las=2, main="acg freq. by gene") + +ter <- cbind(fatg, fgtg, facg) +colnames(ter) <- c("ATG", "GTG", "ACG") +ternaryplot(ter, col=1, cex=0.5, id=rownames(ter), main="Starts by genes", labels="outside") + +# ------------------------------- +# stops +# ------------------------------- + +# +# stop by org and gc +# + +tab <- sort(table(cds$stop), dec=T) +tab <- tab[tab >= 100] +tab <- tab / sum(tab) * 100 + +barplot(tab, log="y", las=1, ylim=c(0.1, 100), main="stop frequencies (%)") +text(0.7, 80, round(tab[1], 2)) +text(1.9, 30, round(tab[2], 2)) +text(3.1, 25, round(tab[3], 2)) + +x <- split(cds$stop, cds$X.locus) + +ftaa <- sapply(x, function(x) table(x)["taa"]/length(x)*100) +names(ftaa) <- names(x) +chromo$ftaa <- round(ftaa[chromo$X.locus], 2) + +ftag <- sapply(x, function(x) table(x)["tag"]/length(x)*100) +names(ftag) <- names(x) +chromo$ftag <- round(ftag[chromo$X.locus], 2) + +ftga <- sapply(x, function(x) table(x)["tga"]/length(x)*100) +names(ftga) <- names(x) +chromo$ftga <- round(ftga[chromo$X.locus], 2) + +grd.hist(chromo, "ftaa", pos.quant=c(0.7, 0.6), main="Histogram of taa freq. by org") +grd.hist(chromo, "ftag", pos.quant=c(0.8, 0.6), main="Histogram of tag freq. by org") +grd.hist(chromo, "ftga", pos.quant=c(0.8, 0.6), main="Histogram of tga freq. by org") + +grd.fplot(chromo, "gc", "ftaa", main="taa freq. by org GC", pos=c(0.2, 0.3)) +grd.fplot(chromo, "gc", "ftag", main="tag freq. by org GC") +grd.fplot(chromo, "gc", "ftga", main="tga freq. by org GC") + +ter <- cbind(ftaa, ftag, ftga) +colnames(ter) <- c("TAA", "TAG", "TGA") +igc <- cut(chromo$gc, breaks=quantile(chromo$gc, seq(0, 1, 0.1)), include.lowest=T, labels=1:10) +cols <- rainbow(10)[igc] +ternaryplot(ter, col=cols, cex=0.2, main="Stops by org", labels="outside") + +# +# stop by common genes +# + +x <- split(cds$stop, cds$genefam) + +ftaa <- sapply(x, function(x) table(x)["taa"]/length(x)*100) +names(ftaa) <- names(x) + +ftag <- sapply(x, function(x) table(x)["tag"]/length(x)*100) +names(ftag) <- names(x) + +ftga <- sapply(x, function(x) table(x)["tga"]/length(x)*100) +names(ftga) <- names(x) + +barplot(sort(ftaa), las=2, cex.names=0.5, ylim=c(0,100), main="taa freq. by gene") +barplot(sort(ftag), las=2, cex.names=0.5, ylim=c(0,100), main="tag freq. by gene") +barplot(sort(ftga), las=2, cex.names=0.5, ylim=c(0,100), main="tga freq. by gene") + +ter <- cbind(ftaa, ftag, ftga) +colnames(ter) <- c("TAA", "TAG", "TGA") +ternaryplot(ter, col=1, cex=0.3, id=rownames(ter), main="Stops by genes", labels="outside") + +# ------------------------------- +# splice junctions +# ------------------------------- + +grd.titlepage("Splice Junctions") + +grd.textpage(lineno=1, "# intron in core: ", nrow(intron)) + +# +# intron size +# + +intron$size <- intron$to - intron$from + 1 + +grd.hist(intron, "size", pos.quant=NULL, main="Histogram of intron size", br=1000, xlim=c(0,2000)) + +# +# nb intron / gene +# + +x <- split(intron, intron$genefam) +x <- x[order(sapply(x, function(x) mean(x$intron_nb)), decreasing=T)] + +nmax <- max(intron$intron_nb) +lintron <- lapply(x, function(x) x$intron_nb) +mintron <- sapply(lintron, function(x) table(factor(x, levels=1:nmax))) + +lintron0 <- table(cds[cds$nexon == 1,"genefam"])[names(lintron)] +mintron <- rbind("0"=lintron0, mintron) +mintron <- t(t(mintron)/colSums(mintron)) + +mintron[mintron==0] <- NA + +nn <- nrow(mintron) +xx <- mintron[nn:1,] +ll <- lapply(1:nn, function(i) xx[i,]) +mintron <- mintron[,do.call(order, c(ll, decreasing=T))] + +battleship.plot(mintron, maxxspan=0.3, maxyspan=0.3, + cex.labels=0.7, + main="% intron per polyexonic gene") + +# +# acceptors / donors +# + +tab <- sort(table(intron$acc), dec=T) +tab <- tab[tab >= 100] +tab <- tab / sum(tab) * 100 + +barplot(tab, log="y", las=1, ylim=c(0.1, 100), main="acceptor frequencies (%)") +text(0.7, 50, round(tab[1], 2)) +text(1.9, 3, round(tab[2], 2)) +text(3.1, 2.3, round(tab[3], 2)) + +tab <- sort(table(intron$don), dec=T) +tab <- tab[tab >= 100] +tab <- tab / sum(tab) * 100 + +barplot(tab, log="y", las=1, ylim=c(0.1, 100), main="donor frequencies (%)") +text(0.7, 50, round(tab[1], 1)) +text(1.9, 40, round(tab[2], 1)) +text(3.1, 15, round(tab[3], 1)) +text(4.3, 12, round(tab[4], 1)) + +# +# consensus all +# + +cons$all <- cons.build(intron$acceptor.donor) + +invisible(sapply(rev(names(cons)), function(what) { + cons.plot(cons[[what]], paste0("consensus ", what)) +})) + +# +# default consensus score by consensus length +# + +cons.def <- cons[["default"]] +cons.def <- cons.def[,! is.nan(colSums(cons.def))] +seq.def <- sapply(intron$acceptor.donor, function(s) gsub("[^acgt]", "", s)) + +epx <- apply(cons.def, 2, function(col) -sum(col * log(col, base=4))) +opx <- order(epx) + +conf.def <- lapply(seq(2, length(opx), by=2), function(n) { + pos <- head(opx, n) + notify(n, "/", length(opx)) + cons.confusion(cons.def, seq.def, thresh=0, pos=pos) +}) + +acc <- sapply(conf.def, function(x) x$acc) +sen <- sapply(conf.def, function(x) x$sen) +sel <- sapply(conf.def, function(x) x$sel) + +plot(sel, ylim=c(0.7, 1), pch=1, type="b", main="accuracy by nb consensus positions", ylab="") +lines(sen, type="b", pch=2) +lines(acc, type="b", pch=3) +legend(1, 0.95, c("sensit.", "select.", "accur."), pch=1:3, horiz=T, bty="n") + +# +# default consensus score by genes +# + +conf.def <- cons.confusion(cons.def, seq.def, thresh=0) +cons.histconf(conf.def) + +sfam <- split(conf.def$l2scor, intron$genefam) +sfam <- sfam[order(sapply(sfam, median))] + +boxplot(sfam, pars = list(boxwex = 0.8, staplewex = 0.5, outwex = 0.5, outcex = 0.1), + las=2, cex.axis=0.7, main="default junction logr score by genes") +abline(h=0) + +# +# end +# + +if (OUT.DEV) { + cat("+ plot file:", paste0(OUT.FILE, ".", OUT.TYPE), "\n") + invisible(dev.off()) +} + +quit(save='no') diff --git a/detectors/cds/tools/lib/summarize_cmp.r b/detectors/cds/tools/lib/summarize_cmp.r new file mode 100755 index 0000000..145564a --- /dev/null +++ b/detectors/cds/tools/lib/summarize_cmp.r @@ -0,0 +1,71 @@ +#!/usr/bin/env Rscript +# +# plot summary graphics of comparisons +# +# + +LIBDIR <- Sys.getenv("LIB_DIR") +if (LIBDIR == "") LIBDIR = "." + +source(paste0(LIBDIR, "/util.plot.r")) + +COLORS <- 2:10 + +# + +OUT.DEV <- TRUE +OUT.TYPE <- "pdf" +OUT.FILE <- "compare" +if (OUT.DEV) uplot.init.dev(OUT.FILE, OUT.TYPE) + +# + +tab <- read.table("compare.txt", header=T, comment.char="", stringsAsFactors=F) + +# + +par(xpd=NA) + +# + +sel <- c("cor", "alcor", "acc", "wrong", "over", "misstot") +tab$ptot <- rowSums(tab[,sel]) +for (s in sel) + tab[,paste0("p", s)] <- tab[,s] * 100 / tab$ptot + +colors <- head(COLORS, length(sel)) + +cols <- paste0("p", sel) +ord <- order(tab$pcor+tab$palcor+tab$pacc, decreasing=T) + +barplot(t(tab[ord,cols]), names.arg=tab$X.org[ord], + ylim=c(0,100), col=colors, las=2, cex.names=0.5) + +legend(0, 110, sel, fill=colors, cex=0.7, horiz=T) + +# + +sel <- c("cor", "alcor", "acc", "wrong", "over", "misschlo") +tab$rtot <- rowSums(tab[,sel]) +for (s in sel) + tab[,paste0("r", s)] <- tab[,s] * 100 / tab$rtot + +colors <- head(COLORS, length(sel)) + +cols <- paste0("r", sel) +ord <- order(tab$rcor+tab$ralcor+tab$racc, decreasing=T) + +barplot(t(tab[ord,cols]), names.arg=tab$X.org[ord], + ylim=c(0,100), col=colors, las=2, cex.names=0.5) + +legend(0, 110, sel, fill=colors, cex=0.7, horiz=T) + +# + +if (OUT.DEV) { + cat("# plot file:", paste0(OUT.FILE, ".", OUT.TYPE), "\n") + invisible(dev.off()) +} + +quit(save='no') + diff --git a/detectors/cds/tools/lib/summary.cmp.awk b/detectors/cds/tools/lib/summary.cmp.awk new file mode 100644 index 0000000..6d1898f --- /dev/null +++ b/detectors/cds/tools/lib/summary.cmp.awk @@ -0,0 +1,54 @@ +# +# + +function getOrg(s, _local_, a, na, org) { + na = split(s, a, "/") + na = split(a[na], a, "\\.") + return a[1] +} + + +BEGIN { + PROCINFO["sorted_in"] = "@ind_num_asc" + print "#org tot cor alcor acc wrong over misstot misschlo missoth" +} + +/MISSED in ChloroDB/ { + org = getOrg($1) + Org[org]++ + Cnt[org]["MISSCHLORO"] = $2 + next +} + +/MISSED not in ChloroDB/ { + org = getOrg($1) + Org[org]++ + Cnt[org]["MISSNOTCHLORO"] = $2 + next +} + +/^#/ { next } + +/^.*:MATCH/ { + org = getOrg($1) + Org[org]++ + split($NF, a, "\\.") + Cnt[org][a[1]]++ +} + +END { + for (org in Org) { + Cnt[org]["TOTAL"] = Cnt[org]["CORRECT"] + Cnt[org]["ALMOST_CORRECT"] \ + + Cnt[org]["ACCEPTABLE"] + Cnt[org]["WRONG"] \ + + Cnt[org]["MISSED"] + } + for (org in Org) { + print org, Cnt[org]["TOTAL"]+0, Cnt[org]["CORRECT"]+0, \ + Cnt[org]["ALMOST_CORRECT"]+0, Cnt[org]["ACCEPTABLE"]+0, \ + Cnt[org]["WRONG"]+0, Cnt[org]["OVERPRED"]+0, \ + Cnt[org]["MISSED"]+0, \ + Cnt[org]["MISSCHLORO"]+0, Cnt[org]["MISSNOTCHLORO"]+0 + + } + +} diff --git a/detectors/cds/tools/lib/util.base.r b/detectors/cds/tools/lib/util.base.r new file mode 100644 index 0000000..ea1690e --- /dev/null +++ b/detectors/cds/tools/lib/util.base.r @@ -0,0 +1,12 @@ +# +# R basic utilities +# + +# +# notify on stderr +# + +notify <- function(...) cat("+", ..., "\n") + + + diff --git a/detectors/cds/tools/lib/util.cons.r b/detectors/cds/tools/lib/util.cons.r new file mode 100644 index 0000000..707b503 --- /dev/null +++ b/detectors/cds/tools/lib/util.cons.r @@ -0,0 +1,109 @@ +# +# R consensus utilities +# + +# +# compute consensus +# + +cons.build <- function(seqs, backcount=1) { + xx <- do.call(rbind, sapply(seqs, strsplit, "", USE.NAMES=F)) + lv <- c("a", "c", "g", "t", ".", "-") + mx <- apply(xx, 2, function(x) table(factor(x, levels=lv)))[1:4,] + cx <- colSums(mx) + mx <- mx + backcount + mx[,cx==0] <- 0 + apply(mx, 2, function(x) x / sum(x)) +} + +# +# score consensus +# + +cons.score <- function(cons, seq, pos=1:ncol(cons)) { + seq <- strsplit(seq, "")[[1]] + if (length(seq) != ncol(cons)) { + warning("incompatible seq and cons size") + return(NA) + } + ppx <- sapply(pos, function(i) cons[seq[i],i]) + sum(log10(ppx+1e-6)) +} + +# +# logratio to uniform model score +# + +cons.logratio <- function(cons, seq, m0=NULL, pos=1:ncol(cons)) { + if (is.null(m0)) { + m0 <- matrix(rep(0.25, 4), nrow=4, ncol=ncol(cons)) + rownames(m0) <- c('a', 'c', 'g', 't') + } + + sc <- cons.score(cons, seq, pos=pos) + sc0 <- cons.score(m0, seq, pos=pos) + + 2 * (log(10^sc, base=2) - log(10^sc0, base=2)) +} + +# +# shuffle sequence +# + +seq.shuf <- function(seq) { + paste0(sample(strsplit(seq, "")[[1]], nchar(seq), replace=F), collapse="") +} + +# +# compute confusion matrix between actual and shuffled sequences +# + +cons.confusion <- function(cons, seq, m0=NULL, pos=1:ncol(cons), thresh=0) { + som <- function(x) sum(x, na.rm=T) + + res <- list() + res$l2scor <- l2scor <- sapply(seq, function(s) cons.logratio(cons, s, m0=m0, pos=pos)) + + seq <- sapply(seq, seq.shuf) + res$r2scor <- r2scor <- sapply(seq, function(s) cons.logratio(cons, s, m0=m0, pos=pos)) + + res$conf <- conf <- matrix(c(som(l2scor >= thresh), som(l2scor < thresh), + som(r2scor >= thresh), som(r2scor < thresh)), + nrow=2, byrow=T) + + res$acc <- sum(diag(conf)) / sum(conf) + res$sen <- conf[1,1] / sum(conf[1,]) + res$sel <- conf[1,1] / sum(conf[,1]) + + res +} + +# +# plot consensus +# + + +cons.plot <- function(cons, main="consensus") { + cols <- c("blue", "orange", "red", "green") + bp <- barplot(cons, col=cols, ylim=c(0,1), main=main) + plx <- apply(cons, 2, function(col) -sum(col * log(col+1e-6, base=4))) + lines(bp, plx, type="b", pch=19) + legend(0, 1.1, c("a","c","g","t"), fill=cols, horiz=T, xpd=NA, bty="n") + legend(20, 1.1, "entropy", pch=19, horiz=T, xpd=NA, bty="n") + invisible() +} + +# +# plot confusion scores histograms +# + +cons.histconf <- function(conf, main="junction logr score") { + lrh <- hist(c(conf$l2scor, conf$r2scor), br=50, plot=F) + lh <- hist(conf$l2scor, br=lrh$breaks, plot=F) + rh <- hist(conf$r2scor, br=lrh$breaks, plot=F) + xx <- rbind(lh$counts, rh$counts) / sum(lh$counts) + colnames(xx) <- lrh$mids + barplot(xx, col=c(3,2), beside=T, main=main) + legend(0, 0.1, c("true", "shuffled"), fill=c(3,2), horiz=F, xpd=NA) + invisible() +} diff --git a/detectors/cds/tools/lib/util.grid.r b/detectors/cds/tools/lib/util.grid.r new file mode 100644 index 0000000..f8d34cd --- /dev/null +++ b/detectors/cds/tools/lib/util.grid.r @@ -0,0 +1,93 @@ +# +# R misc grid plotting +# + +require(grid) +require(gridExtra) + +# +# get line height +# + +grd.lineheight <- function(s="X") { + convertHeight(unit(1,"strheight", s), "native", valueOnly=T) +} + +# +# quantile table +# + +grd.qtab <- function(df, what, cols, n=5) { + df <- df[order(df[,what], decreasing=T),cols] + sep <- head(df,1) + sep[] <- "-" + rbind(head(df, n), sep, tail(df, n)) +} + +# +# histogram with tables +# + +grd.hist <- function(df, what, cols = c(1,2, which(colnames(df) == what)), + breaks=50, pos.sum=c(0.2,0.6), pos.quant=c(0.7,0.6), cex=0.7, + main=paste0("Histogram of ", what), ...) { + hist(df[,what], breaks=breaks, xlab=what, main=main, ...) + if (! is.null(pos.sum)) { + pushViewport(viewport(pos.sum[1], pos.sum[2], gp=gpar(cex=cex))) + grid.table(x<-summary(df[,what]), rows=names(x)) + popViewport() + } + if (! is.null(pos.quant)) { + pushViewport(viewport(pos.quant[1], pos.quant[2], gp=gpar(cex=cex))) + grid.table(grd.qtab(df, what, cols), rows=NULL) + popViewport() + } + invisible() +} + +# +# plot with fit +# + +grd.fplot <- function(df, what.x, what.y, linfit=T, pos=c(0.2, 0.8), ablin=NULL, ...) { + plot(df[,what.x], df[,what.y], xlab=what.x, ylab=what.y, ...) + if (linfit) { + fit <- lm(df[,what.y] ~ df[,what.x]) + abline(fit, col=2) + pushViewport(viewport(gp=gpar(col=2))) + a <- sprintf("%.2e", coef(fit)[2]) + b <- sprintf("%.2e", coef(fit)[1]) + grid.text(paste0(what.y, " = ", a, " * ", what.x, " + ", b), + pos[1], pos[2], just="left") + pos[2] = pos[2] - 2 * grd.lineheight() + grid.text(paste0("R2=", round(summary(fit)$r.squared, 3)), + pos[1], pos[2], just="left") + popViewport() + } + if (! is.null(ablin)) + do.call(abline, ablin) + invisible() +} + +# +# write text +# + +grd.textpage <- function(..., lineno=0, left=0.1, top=0.9, cex=1, fact=1.4) { + txt <- do.call(paste0, list(...)) + pushViewport(viewport(gp=gpar(cex=cex))) + grid.text(txt, left, top-lineno*grd.lineheight()*fact, just="left") + popViewport() + invisible(txt) +} + +# +# title page +# + +grd.titlepage <- function(title, x=0.5, y=0.7, cex=3, ...) { + notify("processing", title) + grid.newpage() + grid.text(title, x, y, gp=gpar(cex=cex), ...) + invisible() +} diff --git a/detectors/cds/tools/lib/util.modelio.r b/detectors/cds/tools/lib/util.modelio.r new file mode 100644 index 0000000..76c725f --- /dev/null +++ b/detectors/cds/tools/lib/util.modelio.r @@ -0,0 +1,90 @@ +# +# R models I/O utilities +# + +# +# write start model +# + +write.model.start <- function(frq, what) { + dir.create("models", showWarnings=F) + fil <- paste0("models/start.", what, ".frq") + notify("writing start model:", fil) + cat("# start model :", what, "\n", file=fil) + for (x in names(frq)) + cat(x, frq[x]/sum(frq), frq[x], "\n", file=fil, append=T) + invisible(fil) +} + +# +# write stop model +# + +write.model.stop <- function(frq, what) { + dir.create("models", showWarnings=F) + fil <- paste0("models/stop.", what, ".frq") + notify("writing stop model:", fil) + cat("# stop model :", what, "(freq. ignored)\n", file=fil) + for (x in names(frq)) + cat(x, frq[x]/sum(frq), frq[x], "\n", file=fil, append=T) + invisible(fil) +} + +# +# write splice3 model +# [FIXME] positions are hard-coded +# + +write.model.splice3 <- function(cons, what) { + dir.create("models", showWarnings=F) + fil <- paste0("models/splice3.", what, ".frq") + notify("writing splice3 model:", fil) + .catcons <- function(i) { + cat(round(cons[c("a","c","g","t"), i]*100, 0), "\n", + file=fil, append=T) + } + cat("# 3' splice model :", what, "\n", file=fil) + cat("# A C G T\n", file=fil, append=T) + sapply(seq.int(1, 4), .catcons) + cat("splice\n", file=fil, append=T) + sapply(seq.int(6, 11), .catcons) + invisible(fil) +} + +# +# write splice5 model +# [FIXME] positions are hard-coded +# + +write.model.splice5 <- function(cons, what) { + dir.create("models", showWarnings=F) + fil <- paste0("models/splice5.", what, ".frq") + notify("writing splice5 model:", fil) + .catcons <- function(i) { + cat(round(cons[c("a","c","g","t"), i]*100, 0), "\n", + file=fil, append=T) + } + cat("# 5' splice model :", what, "\n", file=fil) + cat("# A C G T\n", file=fil, append=T) + sapply(seq.int(13, 18), .catcons) + cat("splice\n", file=fil, append=T) + sapply(seq.int(20, 23), .catcons) + invisible(fil) +} + +# +# write splice3/5 uniform model +# + +write.unif.splice <- function(pos, what) { + dir.create("models", showWarnings=F) + fil <- paste0("models/splice", pos, ".", what, ".frq") + notify("writing uniform splice", pos, "model:", fil) + cat("# 3'/5' splice null model", file=fil) + cat("# A C G T\n", file=fil, append=T) + cat("25 25 25 25\n", file=fil, append=T) + cat("splice\n", file=fil, append=T) + cat("25 25 25 25\n", file=fil, append=T) + invisible(fil) +} + diff --git a/detectors/cds/tools/lib/util.plot.r b/detectors/cds/tools/lib/util.plot.r new file mode 100644 index 0000000..056e4e6 --- /dev/null +++ b/detectors/cds/tools/lib/util.plot.r @@ -0,0 +1,105 @@ +# +# R plot utilities +# + +# +# setup graphic device +# tiff: high resolution 600 dpi +# pdf +# + +uplot.init.dev <- function(fname, type="pdf", width=7, height=7, resol=600, ...) { + fname <- paste0(fname, ".", type) + res <- NULL + if (type == "tiff") { + res <- tiff(fname, width=width, height=height, units="in", res=resol, ...) + } + if (type == "pdf") { + res <- pdf(fname, width=width, height=height, ...) + } + invisible(res) +} + +# +# convert pdf to tiff using ghostscript +# + +uplot.convert2tiff <- function(fname, resol=600) { + infile <- paste0(fname, ".pdf") + oufile <- paste0(fname, ".tif") + cmd <- paste0("echo quit | gs -r", resol, "-dBATCH -dNOPAUSE -sDEVICE=tiff12nc -sCompression=lzw -sOutputFile=", oufile, " ", infile) + system(cmd) +} + +# +# default plot setup +# + +uplot.setup <- function(mfrow=c(1,1), + las=1, + mgp=c(2, 0.7, 0), + oma=c(0, 0, 0, 0), + mar=c(4, 3, 3, 2), + cex.main=1, + font.main=1, + family='Helvetica', ...) { + par(mfrow=mfrow, las=las, mgp=mgp, oma=oma, mar=mar, cex.main=cex.main, font.main=font.main, family=family, ...) +} + +# +# pie plot +# + +uplot.pie <- function(tab, main="", labels=c("name", "val", "per"), text.r=0.5, text.col="black", text.cex=1, main.pos=c(0,0), main.col="black", ...) { + pie(tab, edges=2000, main="", labels="", ...) + text(main.pos[1], main.pos[2], main, cex=1.5, col=main.col) + prop <- tab/sum(tab) + theta <- 2*pi * (cumsum(prop) - prop/2) + lab <- list(name=names(tab), val=tab, per=sprintf("%d%%", round(prop*100))) + lab <- apply(data.frame(lab[labels]), 1, paste, collapse="\n") + if (length(lab) > 0) + text(text.r*cos(theta), text.r*sin(theta), lab, cex=text.cex, col=text.col) + invisible(NULL) +} + +# +# plot utility : color representation of a table +# + +uplot.table <- function(tab, col=heat.colors(100), with.lines=TRUE) { + image(as.matrix(tab), xaxt="n", yaxt="n", col=col) + nli <- nrow(tab) + nco <- ncol(tab) + dx <- 0.5 / (nli-1) + dy <- 0.5 / (nco-1) + xf <- (seq_len(nli)-1)/(nli-1) - dx + yf <- (seq_len(nco)-1)/(nco-1) - dy + if (with.lines) { + segments(xf, -dy, xf, 1+dy) + segments(-dx, yf, 1+dx, yf) + } + Axis(c(0,1), at=xf+dx, side=1, labels=rownames(tab), las=2, cex.axis=0.5, padj=0) + Axis(c(0,1), at=yf+dy, side=2, labels=colnames(tab), las=2, cex.axis=0.5, padj=0) + invisible(NULL) +} + +# +# plot utility : identify points within user's rectangle +# + +rect.identify <- function(data) { + if (is.null(dim(data))) data <- cbind(seq_along(data), data) + xy <- locator(n=2, type='n') + r <- matrix(c(range(xy$x), range(xy$y)), ncol=2, byrow=TRUE) + rect(r[1,1], r[2,1], r[1,2], r[2,2], border='red') + .in.range <- function(p, r) { + .in.int <- function(i) { + (p[i] >= r[i,1]) && (p[i] <= r[i,2]) + } + .in.int(1) && .in.int(2) + } + isel <- which(apply(data, 1, .in.range, r)) + points(data[isel,], col='red', pch=19) + isel +} + diff --git a/scripts/csh_init.sh b/scripts/csh_init.sh index 3f4e1a3..7099a7d 100644 --- a/scripts/csh_init.sh +++ b/scripts/csh_init.sh @@ -18,8 +18,8 @@ if ($?ORG_SOURCED == 0) then # o Verbose : default verbosity (may be changed by -v option) # - setenv AwkCmd "gawk" - setenv Verbose 0 + if ($?AwkCmd == 0) setenv AwkCmd "gawk" + if ($?Verbose == 0) setenv Verbose 0 # -------------------------------------- # don't change hereafter (normally) @@ -90,11 +90,15 @@ set path = ($SCRIPT_DIR $BIN_DIR $path) # alias should be sourced each time # -------------------------------------- -alias Debug 'if ($Verbose) echo "# "\!:* >> /dev/stderr' +alias Cout 'echo `date "+%Y-%m-%d %H:%M:%S"` "[OA \!:1]" \!:2-* >> /dev/stderr' -alias Notify 'echo "# "\!:* >> /dev/stderr' +alias Debug 'if ($Verbose) echo `date "+%Y-%m-%d %H:%M:%S"` "[OA DEBUG]" \!:* >> /dev/stderr' -alias Error 'echo "# Error "\!:2-*"" >> /dev/stderr; Exit \!:1' +alias Cat 'awk -v D="`date '"'"'+%Y-%m-%d %H:%M:%S'"'"'`" '"'"'{print D " [OA FILE ] " $0}'"'"' \!:* >> /dev/stderr' + +alias Notify 'Cout "INFO " \!:*' + +alias Error 'Cout ERROR \!:2-*; Exit \!:1' alias Exit 'set Stat = \!:1; Debug "<--- $0 [$Stat]"; exit \!:1' @@ -124,8 +128,6 @@ alias NeedFile 'if (! -e \!:1) eval Error 5 "\!:1 : file not found"' alias NeedDir 'if (! -d \!:1) eval Error 5 "\!:1 : directory not found"' -alias Cat 'awk '"'"'{print "# " $0}'"'"' \!:*' - alias AssignUndef 'if ($?\!:1 == 0) set \!:1=\!:2-*' # --------------------------------------