From 9bcfa914fee3a0a31e02bc8e1822fb9026f9e671 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Thu, 5 Apr 2018 18:01:01 +0200 Subject: [PATCH] Switch to version 1.2.38 of aragorn Former-commit-id: 8fdde2e7586aa22e401bc760f8c67c24eded9d78 Former-commit-id: 7e79dc7df97373f5527b6a2d9526cecfcb2b7376 --- src/aragorn/aragorn1.2.36.c | 10494 ---------------------------------- 1 file changed, 10494 deletions(-) delete mode 100644 src/aragorn/aragorn1.2.36.c diff --git a/src/aragorn/aragorn1.2.36.c b/src/aragorn/aragorn1.2.36.c deleted file mode 100644 index dea1d7c..0000000 --- a/src/aragorn/aragorn1.2.36.c +++ /dev/null @@ -1,10494 +0,0 @@ - -/* ---------------------------------------------------------------- -ARAGORN v1.2.36 Dean Laslett ---------------------------------------------------------------- - - ARAGORN (together with ARWEN at last) - Detects tRNA, mtRNA, and tmRNA genes in nucleotide sequences - Copyright (C) 2003-2015 Dean Laslett - - Please, report bugs and suggestions of improvements to the authors - - E-mail: Björn Canbäck: bcanback@acgt.se - Dean Laslett: gaiaquark@gmail.com - - Version 1.2.36 February 15th, 2013. - Thanks to Sascha Steinbiss for fixing more bugs - - - Please reference the following papers if you use this - program as part of any published research. - - Laslett, D. and Canback, B. (2004) - ARAGORN, a program for the detection of transfer RNA and - transfer-messenger RNA genes in nucleotide sequences. - Nucleic Acids Research, 32;11-16. - - Laslett, D. and Canback, B. (2008) - ARWEN: a program to detect tRNA genes in - metazoan mitochondrial nucleotide sequences. - Bioinformatics, 24(2); 172-175. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; version 2 of the License, (see below). - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - - GNU GENERAL PUBLIC LICENSE - Version 2, June 1991 - - Copyright (C) 1989, 1991 Free Software Foundation, Inc. - 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -License is intended to guarantee your freedom to share and change free -software--to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Library General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - - GNU GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License applies to any program or other work which contains -a notice placed by the copyright holder saying it may be distributed -under the terms of this General Public License. The "Program", below, -refers to any such program or work, and a "work based on the Program" -means either the Program or any derivative work under copyright law: -that is to say, a work containing the Program or a portion of it, -either verbatim or with modifications and/or translated into another -language. (Hereinafter, translation is included without limitation in -the term "modification".) Each licensee is addressed as "you". - -Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running the Program is not restricted, and the output from the Program -is covered only if its contents constitute a work based on the -Program (independent of having been made by running the Program). -Whether that is true depends on what the Program does. - - 1. You may copy and distribute verbatim copies of the Program's -source code as you receive it, in any medium, provided that you -conspicuously and appropriately publish on each copy an appropriate -copyright notice and disclaimer of warranty; keep intact all the -notices that refer to this License and to the absence of any warranty; -and give any other recipients of the Program a copy of this License -along with the Program. - -You may charge a fee for the physical act of transferring a copy, and -you may at your option offer warranty protection in exchange for a fee. - - 2. You may modify your copy or copies of the Program or any portion -of it, thus forming a work based on the Program, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) You must cause the modified files to carry prominent notices - stating that you changed the files and the date of any change. - - b) You must cause any work that you distribute or publish, that in - whole or in part contains or is derived from the Program or any - part thereof, to be licensed as a whole at no charge to all third - parties under the terms of this License. - - c) If the modified program normally reads commands interactively - when run, you must cause it, when started running for such - interactive use in the most ordinary way, to print or display an - announcement including an appropriate copyright notice and a - notice that there is no warranty (or else, saying that you provide - a warranty) and that users may redistribute the program under - these conditions, and telling the user how to view a copy of this - License. (Exception: if the Program itself is interactive but - does not normally print such an announcement, your work based on - the Program is not required to print an announcement.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Program, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Program, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Program. - -In addition, mere aggregation of another work not based on the Program -with the Program (or with a work based on the Program) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may copy and distribute the Program (or a work based on it, -under Section 2) in object code or executable form under the terms of -Sections 1 and 2 above provided that you also do one of the following: - - a) Accompany it with the complete corresponding machine-readable - source code, which must be distributed under the terms of Sections - 1 and 2 above on a medium customarily used for software interchange; or, - - b) Accompany it with a written offer, valid for at least three - years, to give any third party, for a charge no more than your - cost of physically performing source distribution, a complete - machine-readable copy of the corresponding source code, to be - distributed under the terms of Sections 1 and 2 above on a medium - customarily used for software interchange; or, - - c) Accompany it with the information you received as to the offer - to distribute corresponding source code. (This alternative is - allowed only for noncommercial distribution and only if you - received the program in object code or executable form with such - an offer, in accord with Subsection b above.) - -The source code for a work means the preferred form of the work for -making modifications to it. For an executable work, complete source -code means all the source code for all modules it contains, plus any -associated interface definition files, plus the scripts used to -control compilation and installation of the executable. However, as a -special exception, the source code distributed need not include -anything that is normally distributed (in either source or binary -form) with the major components (compiler, kernel, and so on) of the -operating system on which the executable runs, unless that component -itself accompanies the executable. - -If distribution of executable or object code is made by offering -access to copy from a designated place, then offering equivalent -access to copy the source code from the same place counts as -distribution of the source code, even though third parties are not -compelled to copy the source along with the object code. - - 4. You may not copy, modify, sublicense, or distribute the Program -except as expressly provided under this License. Any attempt -otherwise to copy, modify, sublicense or distribute the Program is -void, and will automatically terminate your rights under this License. -However, parties who have received copies, or rights, from you under -this License will not have their licenses terminated so long as such -parties remain in full compliance. - - 5. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Program or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Program (or any work based on the -Program), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Program or works based on it. - - 6. Each time you redistribute the Program (or any work based on the -Program), the recipient automatically receives a license from the -original licensor to copy, distribute or modify the Program subject to -these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - - 7. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Program at all. For example, if a patent -license would not permit royalty-free redistribution of the Program by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Program. - -If any portion of this section is held invalid or unenforceable under -any particular circumstance, the balance of the section is intended to -apply and the section as a whole is intended to apply in other -circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system, which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 8. If the distribution and/or use of the Program is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Program under this License -may add an explicit geographical distribution limitation excluding -those countries, so that distribution is permitted only in or among -countries not thus excluded. In such case, this License incorporates -the limitation as if written in the body of this License. - - 9. The Free Software Foundation may publish revised and/or new versions -of the General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of this License which applies to it and "any -later version", you have the option of following the terms and conditions -either of that version or of any later version published by the Free -Software Foundation. If the Program does not specify a version number of -this License, you may choose any version ever published by the Free Software -Foundation. - - 10. If you wish to incorporate parts of the Program into other free -programs whose distribution conditions are different, write to the author -to ask for permission. For software which is copyrighted by the Free -Software Foundation, write to the Free Software Foundation; we sometimes -make exceptions for this. Our decision will be guided by the two goals -of preserving the free status of all derivatives of our free software and -of promoting the sharing and reuse of software generally. - - NO WARRANTY - - 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - - 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO -LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR -THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH -DAMAGES. - - END OF TERMS AND CONDITIONS - -*/ - - - - -/* ---------------------------------------------------------------- -ARAGORN v1.2.36 Dean Laslett ---------------------------------------------------------------- - - -aragorn detects tRNA, mtRNA, and tmRNA genes. -A minimum requirement is at least a 32 bit compiler architecture -(variable types int and unsigned int are at least 4 bytes long). - -Usage: -aragorn -v -s -d -c -l -j -a -q -rn -w -ifro, -t -m -mt - -gc -tv -seq -br -fasta -fo -o - - is assumed to contain one or more sequences -in FASTA format. Results of the search are printed to -STDOUT. All switches are optional and case-insensitive. -Unless -i is specified, tRNA genes containing introns -are not detected. - - -m Search for tmRNA genes. - -t Search for tRNA genes. - By default, all are detected. If one of - -m or -t is specified, then the other - is not detected unless specified as well. - -mt Search for Metazoan mitochondrial tRNA genes. - tRNA genes with introns not detected. -i,-sr switchs - ignored. Composite Metazoan mitochondrial - genetic code used. - -mtmam Search for Mammalian mitochondrial tRNA - genes. -i,-sr switchs ignored. -tv switch set. - Mammalian mitochondrial genetic code used. - -mtx Same as -mt but low scoring tRNA genes are - not reported. - -mtd Overlapping metazoan mitochondrial tRNA genes - on opposite strands are reported. - -gc Use the GenBank transl_table = genetic code. - -gcstd Use standard genetic code. - -gcmet Use composite Metazoan mitochondrial genetic code. - -gcvert Use Vertebrate mitochondrial genetic code. - -gcinvert Use Invertebrate mitochondrial genetic code. - -gcyeast Use Yeast mitochondrial genetic code. - -gcprot Use Mold/Protozoan/Coelenterate mitochondrial genetic code. - -gcciliate Use Ciliate genetic code. - -gcflatworm Use Echinoderm/Flatworm mitochondrial genetic code - -gceuplot Use Euplotid genetic code. - -gcbact Use Bacterial/Plant Chloroplast genetic code. - -gcaltyeast Use alternative Yeast genetic code. - -gcascid Use Ascidian Mitochondrial genetic code. - -gcaltflat Use alternative Flatworm Mitochondrial genetic code. - -gcblep Use Blepharisma genetic code. - -gcchloroph Use Chlorophycean Mitochondrial genetic code. - -gctrem Use Trematode Mitochondrial genetic code. - -gcscen Use Scenedesmus obliquus Mitochondrial genetic code. - -gcthraust Use Thraustochytrium Mitochondrial genetic code. - Individual modifications can be appended using - ,BBB= B = A,C,G, or T. is the three letter - code for an amino-acid. More than one modification - can be specified. eg -gcvert,aga=Trp,agg=Trp uses - the Vertebrate Mitochondrial code and the codons - AGA and AGG changed to Tryptophan. - -tv Do not search for mitochondrial TV replacement - loop tRNA genes. Only relevant if -mt used. - -c7 Search for tRNA genes with 7 base C-loops only. - -i Search for tRNA genes with introns in - anticodon loop with maximum length 3000 - bases. Minimum intron length is 0 bases. - Ignored if -m is specified. - -i Search for tRNA genes with introns in - anticodon loop with maximum length - bases. Minimum intron length is 0 bases. - Ignored if -m is specified. - -i, Search for tRNA genes with introns in - anticodon loop with maximum length - bases, and minimum length bases. - Ignored if -m is specified. - -io Same as -i, but allow tRNA genes with long - introns to overlap shorter tRNA genes. - -if Same as -i, but fix intron between positions - 37 and 38 on C-loop (one base after anticodon). - -ifo Same as -if and -io combined. - -ir Same as -i, but report tRNA genes with minimum - length bases rather than search for - tRNA genes with minimum length bases. - With this switch, acts as an output filter, - minimum intron length for searching is still 0 bases. - -c Assume that each sequence has a circular - topology. Search wraps around each end. - Default setting. - -l Assume that each sequence has a linear - topology. Search does not wrap. - -d Double. Search both strands of each - sequence. Default setting. - -s or -s+ Single. Do not search the complementary - (antisense) strand of each sequence. - -sc or -s- Single complementary. Do not search the sense - strand of each sequence. - -ps Lower scoring thresholds to 95% of default levels. - -ps Change scoring thresholds to percent of default levels. - -rp Flag possible pseudogenes (score < 100 or tRNA anticodon - loop <> 7 bases long). Note that genes with score < 100 - will not be detected or flagged if scoring thresholds are not - also changed to below 100% (see -ps switch). - -seq Print out primary sequence. - -br Show secondary structure of tRNA gene primary sequence - using round brackets. - -fasta Print out primary sequence in fasta format. - -fo Print out primary sequence in fasta format only - (no secondary structure). - -fon Same as -fo, with sequence and gene numbering in header. - -fos Same as -fo, with no spaces in header. - -fons Same as -fo, with sequence and gene numbering, but no spaces. - -w Print out in Batch mode. - -ss Use the stricter canonical 1-2 bp spacer1 and - 1 bp spacer2. Ignored if -mt set. Default is to - allow 3 bp spacer1 and 0-2 bp spacer2, which may - degrade selectivity.\n"); - -v Verbose. Prints out information during - search to STDERR. - -a Print out tRNA domain for tmRNA genes. - -a7 Restrict tRNA astem length to a maximum of 7 bases - -aa Display message if predicted iso-acceptor species - does not match species in sequence name (if present). - -j Display 4-base sequence on 3' end of astem - regardless of predicted amino-acyl acceptor length. - -jr Allow some divergence of 3' amino-acyl acceptor - sequence from NCCA. - -jr4 Allow some divergence of 3' amino-acyl acceptor - sequence from NCCA, and display 4 bases. - -q Dont print configuration line (which switchs - and files were used). - -rn Repeat sequence name before summary information. - -O Print output to . If - already exists, it is overwritten. By default - all output goes to stdout. - - -*/ - - -#include -#include - -#ifndef SEEK_SET -#define SEEK_SET 0 -#define SEEK_CUR 1 -#define SEEK_END 2 -#endif - - -#define DLIM '\n' -#define STRLEN 4001 -#define STRLENM1 4000 -#define SHORTSTRLEN 51 -#define SHORTSTRLENM1 50 -#define KEYLEN 15 -#define INACTIVE 2.0e+35 -#define IINACTIVE 2000000001L -#define ITHRESHOLD 2000000000L -#define space(c) (c==' ')||(c=='\t')||(c=='\n')||(c=='\r') -#define sq(pos) ((pos + d->psmax - 1L) % d->psmax) + 1L -#define itmparam(x,y) fputc(x,y) - - - -#define FASTA 0 -#define GENBANK 1 - -#define noGENE -1 -#define tRNA 0 -#define tmRNA 1 -#define srpRNA 2 -#define rRNA 3 -#define CDS 4 -#define NS 6 /* should be one more than number of types of gene */ - -#define MAXGCMOD 16 -#define MAMMAL_MT 2 -#define NGENECODE 24 -#define METAZOAN_MT 0 -#define STANDARD 1 -#define VERTEBRATE_MT 2 - -#define NAMINOACID 27 -#define Phe 0 -#define Val 1 -#define Leu 2 -#define Ile 3 -#define Cys 4 -#define Gly 5 -#define Arg 6 -#define Ser 7 -#define Ala 8 -#define Pro 9 -#define Thr 10 -#define Tyr 11 -#define Asp 12 -#define His 13 -#define Asn 14 -#define Met 15 -#define Trp 16 -#define Glu 17 -#define Gln 18 -#define Lys 19 -#define Stop 20 -#define SeC 21 -#define Pyl 22 - -#define INSERT -2 -#define TERM -1 -#define Adenine 0 -#define Cytosine 1 -#define Guanine 2 -#define Thymine 3 -#define AMBIG 4 -#define NOBASE 5 - -#define tRNAthresh 132.0 -#define mtRNAdtthresh 91.5 -#define mtRNAtthresh 83.5 -#define mtRNAdthresh 85.0 -#define tmRNAthresh 325.0 -#define srpRNAthresh 175.0 -#define CDSthresh 100.0 -#define PSEUDOGENElevel 0.95 - -#define RIGHT 0 -#define UP 1 -#define LEFT 2 -#define DOWN 3 -#define UPRIGHT 4 -#define SLANTDR 5 -#define SLANTUR 6 -#define SLANTUL 7 -#define SLANTDL 8 -#define SLANT 5 - -#define MATX 42 /* 41 */ -#define MATY 34 - - - -#define ASTEM2_EXT 9 -#define ASTEM2_EXTD 4 /* <= ASTEM2_EXT */ -#define ASTEM2_EXTE 5 /* ASTEM2_EXT - ASTEM2_EXTD */ -#define MINTSTEM_DIST (17 + ASTEM2_EXT) -#define MAXTSTEM_DIST (26 + ASTEM2_EXT) -#define MAXDSTEM_DIST 9 -#define MINDSTEM_DIST 8 -#define MININTRONLEN 0 -#define MAXINTRONLEN 3000 -#define MINCTRNALEN 62 -#define MAXCTRNALEN 110 -#define MINTRNALEN (MINCTRNALEN + 1) -#define MAXTRNALEN (MAXCTRNALEN + ASTEM2_EXT) -#define MAXETRNALEN (MAXTRNALEN + MAXINTRONLEN) -#define VARMAX 26 /* 25 */ -#define VARMIN 3 -#define VARDIFF 23 /* 22 */ /* VARMAX - VARMIN */ -#define MINTPTSDIST 50 -#define MAXTPTSDIST 321 -#define TPWINDOW (MAXTPTSDIST - MINTPTSDIST + 1) -#define MINTPDIST 50 -#define MAXTPDIST 250 -#define TPDISTWINDOW (MAXTPDIST - MINTPDIST + 1) -#define MINTAGDIST 12 -#define MAXTAGDIST 102 -#define TAGWINDOW MAXTAGDIST - MINTAGDIST -#define MINRNACDIST (MINTPDIST - 5) -#define MAXRNACDIST (MAXTPDIST - 5) -#define MAXPPINTRONDIST 250 -#define TMPTRAILER 145 -#define MINPPASDIST MINTSTEM_DIST -#define MAXPPASDIST MAXTSTEM_DIST + MAXPPINTRONDIST -#define MINPPTSTPDIST MINTSTEM_DIST + MINTPDIST -#define MAXPPTSTPDIST MAXTSTEM_DIST+ASTEM2_EXT+MAXTPDIST+MAXPPINTRONDIST -#define MAXTMRNALEN (4 + MAXPPASDIST + MAXTPDIST + MAXTAGDIST + TMPTRAILER) -#define TSWEEP 1000 -#define WRAP 2*MAXETRNALEN -#define NPTAG 33 - -/* -NOTE: If MAXPPINTRONDIST is increased, then validity of MAXTMRNALEN -and MAXETRNALEN must be ensured. WRAP = 2*MAXETRNALEN determines the length -of wseq, which contains the wrap around for circular sequences. This -must remain equal to or more than 2*MAXTMRNALEN and TSWEEP. -*/ - -#define BASE 0 -#define FSTEM 1 -#define BSTEM 2 - -#define NOID 0 -#define DLOOP 1 -#define DSTEM 2 -#define CLOOP 3 -#define VAR 4 - -#define NA MAXINTRONLEN -#define ND 100 -#define NT 200 -#define NH 2000 -#define NTH 3000 -#define NC 5000 -#define NGFT 5000 /* 100 */ -#define NTAG 474 /* 367 */ -#define LSEQ 20000 -#define ATBOND 2.5 -#define mtNA 1500 -#define mtND 150 -#define mtNTH 3000 /* 750 */ -#define mtNTM 3 -#define mtNCDS 200 /* 500,20 */ -#define mtNCDSCODON 6000 -#define mtGCBOND 0.0 -#define mtATBOND -0.5 -#define mtGTBOND -1.2 -#define mtTTBOND -2.9 -#define mtGGBOND -3.0 -#define mtGABOND -3.0 -#define mtNOBOND -3.0 -#define mtBONDSTAB 1.5 -#define mtABONDSTAB 2.0 -#define mtTSTTSTAB -2.5 -#define mtTERMSTAB 0.01 -#define mtSENDSTAB 0.01 -#define mtNSTAB 0.1 -#define mt3MMSTAB 1.0 -#define mtGCPENALTY 0.8 -#define mtGCPENALTYD 2.0 -#define mt_DRLmaxlength 16 -#define mt_TVRLmaxlength 18 -#define mtNCLM 3 - -#define SRRNAMAXLEN 1500 -#define SRRNAMINLEN 600 -#define LRRNAMINLEN 1200 -#define LRRNAMAXLEN 3000 - -#define srpMAXLEN 650 -#define srpUMAXLEN 300 -#define srpUMINLEN 100 -#define srpDMAXLEN 300 -#define srpDMINLEN 100 -#define srpNH 200 -#define srpNS 500 /* 100 */ -#define srpMAXHPL 14 -#define srpMAXSP 6 -#define srpMAXSTEM 6500 /* 6000 */ -#define srpDISPMAX 4*srpMAXLEN -#define srpMAXSPACER 12 -#define srpMAXNISTEMS 10 -#define srpNESTMAX 2 /* 3 */ - -#define cdsMAXLEN 3000 -#define NCDS 200 -#define NCDSCODON 1000 - - -typedef struct { long start; - long stop; - int comp; - long antistart; - long antistop; - int genetype; - char species[SHORTSTRLEN]; } annotated_gene; - - -typedef struct { char filename[80]; - FILE *f; - char seqname[STRLEN]; - int datatype; - double gc; - long ps; - long psmax; - long seqstart; - long nextseq; - int ns; - int nagene[NS]; - annotated_gene gene[NGFT]; } data_set; - - -typedef struct { char name[80]; - int seq[MAXTRNALEN+1]; - int eseq[MAXETRNALEN+1]; - int *ps; - int nbase; - int comp; - long start; - long stop; - int astem1; - int astem2; - int aatail; - int spacer1; - int spacer2; - int dstem; - int dloop; - int cstem; - int cloop; - int intron; - int nintron; - int anticodon; - int var; - int varbp; - int tstem; - int tloop; - int genetype; - double energy; - int asst; - int tps; - int tpe; } gene; - -typedef struct { int *pos; - int stem; - int loop; - double energy; } trna_loop; - -typedef struct { int *pos; - int stem; - int loop; - unsigned int bondtype; - double energy; - double stem_energy; } mt_trna_loop; - -typedef struct { int *pos; - int *looppos; - int *end; - int stem; - int loop; - int arm; - int anticodon; - unsigned int bondtype; - double energy; - double stem_energy; } mt_trna_cloop; - -typedef struct { int *pos; - int stem; - int loop; - int *end; - unsigned int bondtype; - double energy; - double stem_energy; } mt_trna_tloop; - - -typedef struct { int *pos; - int *end; - int stem; - int loop; - double energy; } trna_dloop; - - -typedef struct { int *pos1; - int *pos2; - int stem; - double energy; } trna_astem; - -typedef struct { int *pos1; - int *pos2; - int stem; - unsigned int bondtype; - double energy; } mt_trna_astem; - -typedef struct { int *pos; - int comp; - int frame; - int codon; - int win; } mt_cds_codon; - -typedef struct { int *pos1; - int *pos2; - int comp; } mt_cds; - -typedef struct { int *pos1; - int *pos2; - int comp; } mt_rrna; - -typedef struct { int *pos1; - int *pos2; - int stem; - int loop; } rrna_hairpin; - -typedef struct { int *pos1; - int *pos2; - int stem; } rrna_stem; - -typedef struct { int *pos; - int comp; - int frame; - int codon; - int win; } cds_codon; - - - - - -typedef struct { FILE *f; - int batch; - int repeatsn; - int trna; - int tmrna; - int srprna; - int cds; - int mtrna; - int tvloop; - int cloop7; - int peptide; - int geneticcode; - int ngcmod; - int gcmod[MAXGCMOD]; - int gcfix; - int discrim; - int extastem; - int tarm; - int tagthresh; - int tarmlength; - int showconfig; - int libflag; - int verbose; - int linear; - int both; - int reportpseudogenes; - int energydisp; - int secstructdisp; - int seqdisp; - int aataildisp; - int aataildiv; - int sp1max; - int sp2min; - int sp2max; - int mtxdetect; - int mtcdsscan; - int mtcompov; - int matchacceptor; - int maxintronlen; - int minintronlen; - int minintronlenreport; - int ioverlay; - int ifixedpos; - int ireportminintronlen; - int tmstrict; - int iamismatch; - int loffset; - int roffset; - long start; - int comp; - int genespace; - int srpspace; - int ngene[NS]; - int nps; - int annotated; - int nagene[NS]; - int natfn; - int natfp; - int natfpd; - int natfptv; - int nacdsfn; - int nacdsfp; - int lacds; - int ldcds; - long nabase; - double trnathresh; - double ttscanthresh; - double ttarmthresh; - double tdarmthresh; - double tastemthresh; - double tascanthresh; - double mttthresh; - double mtdthresh; - double mtdtthresh; - double mttarmthresh; - double mtdarmthresh; - double tmrnathresh; - double tmathresh; - double tmcthresh; - double tmcathresh; - double tmrthresh; - double srpthresh; - double cdsthresh; - double eref[NS]; - int tmrna_struct[200]; - } csw; - - - -/* Basepair matching matrices */ - - int lbp[3][6][6] = - { { { 0,0,1,1,1,0 }, - { 0,0,1,0,1,0 }, - { 1,1,0,1,1,0 }, - { 1,0,1,0,1,0 }, - { 1,1,1,1,1,0 }, - { 0,0,0,0,0,0 } }, - { { 0,0,0,1,1,0 }, - { 0,0,1,0,1,0 }, - { 0,1,0,1,1,0 }, - { 1,0,1,0,1,0 }, - { 1,1,1,1,1,0 }, - { 0,0,0,0,0,0 } }, - { { 0,0,0,1,1,0 }, - { 0,0,1,0,1,0 }, - { 0,1,0,0,1,0 }, - { 1,0,0,0,1,0 }, - { 1,1,1,1,1,0 }, - { 0,0,0,0,0,0 } } }; - - - - int bp[6][6] = { { 0,0,0,1,1,0 }, - { 0,0,1,0,1,0 }, - { 0,1,0,1,1,0 }, - { 1,0,1,0,1,0 }, - { 1,1,1,1,1,0 }, - { 0,0,0,0,0,0 } }; - - int wbp[6][6] = - { { 0,0,0,2,2,0 }, - { 0,0,2,0,2,0 }, - { 0,2,0,1,2,0 }, - { 2,0,1,0,2,0 }, - { 2,2,2,2,2,0 }, - { 0,0,0,0,0,0 } }; - - int wcbp[6][6] = { { 0,0,0,1,1,0 }, - { 0,0,1,0,1,0 }, - { 0,1,0,0,1,0 }, - { 1,0,0,0,1,0 }, - { 1,1,1,1,1,0 }, - { 0,0,0,0,0,0 } }; - - - int gc[6][6] = { { 0,0,0,0,0,0 }, - { 0,0,1,0,1,0 }, - { 0,1,0,0,1,0 }, - { 0,0,0,0,0,0 }, - { 0,1,1,0,1,0 }, - { 0,0,0,0,0,0 } }; - - int gt[6][6] = { { 0,0,0,0,0,0 }, - { 0,0,0,0,0,0 }, - { 0,0,0,1,1,0 }, - { 0,0,1,0,1,0 }, - { 0,0,1,1,1,0 }, - { 0,0,0,0,0,0 } }; - - int at[6][6] = { { 0,0,0,1,1,0 }, - { 0,0,0,0,0,0 }, - { 0,0,0,0,0,0 }, - { 1,0,0,0,0,0 }, - { 1,0,0,0,1,0 }, - { 0,0,0,0,0,0 } }; - - int tt[6][6] = { { 0,0,0,0,0,0 }, - { 0,0,0,0,0,0 }, - { 0,0,0,0,0,0 }, - { 0,0,0,1,1,0 }, - { 0,0,0,1,1,0 }, - { 0,0,0,0,0,0 } }; - - int stemterm[6][6] = { { 0,0,1,0,1,0 }, - { 0,0,0,0,0,0 }, - { 1,0,0,0,1,0 }, - { 0,0,0,1,1,0 }, - { 1,0,1,1,1,0 }, - { 0,0,0,0,0,0 } }; - - int aastemterm[6][6] = - { { 1,0,1,0,1,0 }, - { 0,0,0,0,0,0 }, - { 1,0,0,0,1,0 }, - { 0,0,0,1,1,0 }, - { 1,0,1,1,1,0 }, - { 0,0,0,0,0,0 } }; - - int ggstemterm[6][6] = - { { 0,0,1,0,1,0 }, - { 0,0,0,0,0,0 }, - { 1,0,1,0,1,0 }, - { 0,0,0,1,1,0 }, - { 1,0,1,1,1,0 }, - { 0,0,0,0,0,0 } }; - - int assymst[6][6] = { { 0,0,0,0,0,0 }, - { 0,0,0,0,0,0 }, - { 1,0,0,0,1,0 }, - { 0,0,0,1,1,0 }, - { 1,0,0,1,1,0 }, - { 0,0,0,0,0,0 } }; - - int assymat[6][6] = { { 0,0,0,1,1,0 }, - { 0,0,0,0,0,0 }, - { 0,0,0,0,0,0 }, - { 0,0,0,0,0,0 }, - { 0,0,0,1,1,0 }, - { 0,0,0,0,0,0 } }; - - - int stackbp[6][6] = { { 0,0,0,1,1,0 }, - { 0,0,1,0,1,0 }, - { 0,1,0,1,1,0 }, - { 1,0,1,1,1,0 }, - { 1,1,1,1,1,0 }, - { 0,0,0,0,0,0 } }; - - int ggstackbp[6][6] = - { { 0,0,0,1,1,0 }, - { 0,0,1,0,1,0 }, - { 0,1,1,1,1,0 }, - { 1,0,1,1,1,0 }, - { 1,1,1,1,1,0 }, - { 0,0,0,0,0,0 } }; - - - int ggbp[6][6] = - { { 0,0,0,1,1,0 }, - { 0,0,1,0,1,0 }, - { 0,1,1,1,1,0 }, - { 1,0,1,0,1,0 }, - { 1,1,1,1,1,0 }, - { 0,0,0,0,0,0 } }; - - int gabp[6][6] = - { { 0,0,1,1,1,0 }, - { 0,0,1,0,1,0 }, - { 1,1,0,1,1,0 }, - { 1,0,1,0,1,0 }, - { 1,1,1,1,1,0 }, - { 0,0,0,0,0,0 } }; - - int assymagbp[6][6] = - { { 0,0,1,1,1,0 }, - { 0,0,1,0,1,0 }, - { 0,1,0,1,1,0 }, - { 1,0,1,0,1,0 }, - { 1,1,1,1,1,0 }, - { 0,0,0,0,0,0 } }; - - int stembp[6][6] = - { { 0,0,1,1,1,0 }, - { 0,0,1,0,1,0 }, - { 1,1,0,1,1,0 }, - { 1,0,1,1,1,0 }, - { 1,1,1,1,1,0 }, - { 0,0,0,0,0,0 } }; - - int ggstembp[6][6] = - { { 0,0,1,1,1,0 }, - { 0,0,1,0,1,0 }, - { 1,1,1,1,1,0 }, - { 1,0,1,1,1,0 }, - { 1,1,1,1,1,0 }, - { 0,0,0,0,0,0 } }; - - int gastembp[6][6] = - { { 1,0,1,1,1,0 }, - { 0,0,1,0,1,0 }, - { 1,1,1,1,1,0 }, - { 1,0,1,1,1,0 }, - { 1,1,1,1,1,0 }, - { 0,0,0,0,0,0 } }; - - int vbp[6][6] = - { { 0,0,1,4,4,0 }, - { 0,0,4,0,4,0 }, - { 1,4,0,2,4,0 }, - { 4,0,2,0,4,0 }, - { 4,4,4,4,4,0 }, - { 0,0,0,0,0,0 } }; - - int tandemid[mtNTM][4] = - { { 3,2,2,3 }, - { 2,3,3,2 }, - { 3,3,3,3 } }; - - double tandem_em[mtNTM] = { -0.5,-0.5,2.0 }; - - - double send_em[6][6] = - { { 0.0,0.0,0.0,0.0,0.0,0.0 }, - { 0.0,0.0,0.5*mtSENDSTAB,0.0,0.5*mtSENDSTAB,0.0 }, - { 0.0,0.5*mtSENDSTAB,0.0,mtSENDSTAB,mtSENDSTAB,0.0 }, - { 0.0,0.0,mtSENDSTAB,0.0,mtSENDSTAB,0.0 }, - { 0.0,0.5*mtSENDSTAB,mtSENDSTAB,mtSENDSTAB,mtSENDSTAB,0.0 }, - { 0.0,0.0,0.0,0.0,0.0,0.0 } }; - - - double ssend_em[6][6] = - { { 0.0,0.0,0.0,0.0,0.0,0.0 }, - { 0.0,0.0,mtSENDSTAB,0.0,mtSENDSTAB,0.0 }, - { 0.0,mtSENDSTAB,0.0,mtSENDSTAB,mtSENDSTAB,0.0 }, - { 0.0,0.0,mtSENDSTAB,0.0,mtSENDSTAB,0.0 }, - { 0.0,mtSENDSTAB,mtSENDSTAB,mtSENDSTAB,mtSENDSTAB,0.0 }, - { 0.0,0.0,0.0,0.0,0.0,0.0 } }; - - - - int neighbour_map[6][6] = - { { 0,0,1,0,1,0 }, - { 0,0,0,0,0,0 }, - { 1,0,0,0,1,0 }, - { 0,0,0,1,1,0 }, - { 1,0,1,1,1,0 }, - { 0,0,0,0,0,0 } }; - - double neighbour_em[2][6][6] = { - { { 0.0,0.0,0.0,0.0,0.0,0.0 }, - { 0.0,0.0,0.0,0.0,0.0,0.0 }, - { 0.0,0.0,0.0,0.0,0.0,0.0 }, - { 0.0,0.0,0.0,0.0,0.0,0.0 }, - { 0.0,0.0,0.0,0.0,0.0,0.0 }, - { 0.0,0.0,0.0,0.0,0.0,0.0 } }, - - { { 0.0,0.0,0.0,0.0,0.0,0.0 }, - { 0.0,0.0,mtNSTAB,0.0,mtNSTAB,0.0 }, - { 0.0,mtNSTAB,0.0,0.0,mtNSTAB,0.0 }, - { 0.0,0.0,0.0,0.0,0.0,0.0 }, - { 0.0,mtNSTAB,mtNSTAB,0.0,mtNSTAB,0.0 }, - { 0.0,0.0,0.0,0.0,0.0,0.0 } } }; - - unsigned int btmap[6][6] = - { { 0x10000,0x10000,0x1000,0x10,0x00000,0x10000 }, - { 0x10000,0x10000,0x1,0x10000,0x00000,0x10000 }, - { 0x1000,0x1,0x10000,0x100,0x00000,0x10000 }, - { 0x10,0x10000,0x100,0x1000,0x00000,0x10000 }, - { 0x00000,0x00000,0x00000,0x00000,0x00000,0x10000 }, - { 0x10000,0x10000,0x10000,0x10000,0x10000,0x10000 } }; - - - double bem[6][6] = - { { -2.144,-0.428,-2.144, ATBOND, 0.000, 0.000 }, - { -0.428,-2.144, 3.000,-2.144, 0.000, 0.000 }, - { -2.144, 3.000,-2.144, 1.286, 0.000, 0.000 }, - { ATBOND,-2.144, 1.286,-0.428, 0.000, 0.000 }, - { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 }, - { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 } }; - - - - int mt_discrim[3][64][6] = - /* metazoan mt */ - {{{ 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, - { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, - { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, - { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, - - { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, - { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, - { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, - { 0,0,0,0,0,0 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, - - { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, - { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, - { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, - { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, - - { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, - { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, - { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, - { 0,0,0,0,0,0 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }}, - /* standard */ - {{ 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, - { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, - { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, - { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, - - { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, - { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, - { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, - { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, - - { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, - { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, - { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, - { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, - - { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, - { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, - { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, - { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }, { 1,1,1,1,1,1 }}, - /* mammal mt */ - {{ 1,0,0,0,1,1 }, { 1,0,0,0,1,1 }, { 1,0,0,0,1,1 }, { 1,0,0,0,1,1 }, - { 0,0,0,1,1,1 }, { 1,0,0,0,1,1 }, { 1,0,1,1,1,1 }, { 1,1,1,1,1,1 }, - { 1,1,1,1,1,1 }, { 1,0,1,0,1,1 }, { 1,0,0,0,1,1 }, { 1,1,1,1,1,1 }, - { 1,0,0,0,1,1 }, { 1,1,1,1,1,1 }, { 0,1,0,0,1,1 }, { 0,0,1,0,1,1 }, - - { 1,0,0,0,1,1 }, { 1,0,0,0,1,1 }, { 1,0,0,0,1,1 }, { 1,0,1,0,1,1 }, - { 0,0,1,0,1,1 }, { 1,0,0,0,1,1 }, { 1,0,1,1,1,1 }, { 1,0,1,1,1,1 }, - { 1,1,1,1,1,1 }, { 1,0,1,0,1,1 }, { 1,0,0,0,1,1 }, { 1,1,1,1,1,1 }, - { 0,0,0,0,0,0 }, { 1,0,1,1,1,1 }, { 0,0,1,0,1,1 }, { 1,1,1,1,1,1 }, - - { 1,0,0,0,1,1 }, { 1,0,0,0,1,1 }, { 1,0,0,0,1,1 }, { 1,0,1,0,1,1 }, - { 0,1,0,1,1,1 }, { 1,0,0,0,1,1 }, { 1,0,1,1,1,1 }, { 1,1,1,1,1,1 }, - { 1,1,1,1,1,1 }, { 1,0,1,0,1,1 }, { 1,0,0,0,1,1 }, { 1,1,1,1,1,1 }, - { 1,1,0,0,1,1 }, { 1,1,1,1,1,1 }, { 0,1,0,0,1,1 }, { 0,0,1,0,1,1 }, - - { 1,1,0,1,1,1 }, { 1,0,0,0,1,1 }, { 1,0,1,1,1,1 }, { 1,0,0,0,1,1 }, - { 1,0,1,0,1,1 }, { 1,0,0,0,1,1 }, { 1,1,1,1,1,1 }, { 0,0,0,0,0,0 }, - { 1,1,1,1,1,1 }, { 1,0,1,0,1,1 }, { 1,0,1,0,1,1 }, { 1,1,1,1,1,1 }, - { 1,0,0,0,1,1 }, { 1,1,1,1,1,1 }, { 1,0,1,0,1,1 }, { 1,1,1,1,1,1 }}}; - -/* GENETIC CODES (INDEXED BY ANTICODON) */ - - char aapolarity[NAMINOACID+1] = "NNNNPNPPNNPNPPPNNPPP***????"; - char aaletter[NAMINOACID+1] = "FVLICGRSAPTYDHNMWEQK***????"; - char aaname[NAMINOACID][20] = - { "Phe","Val","Leu","Ile","Cys", - "Gly","Arg","Ser","Ala","Pro", - "Thr","Tyr","Asp","His","Asn", - "Met","Trp","Glu","Gln","Lys", - "Stop", - "seC", - "Pyl", - "(Arg|Stop|Ser|Gly)", - "(Ile|Met)", - "(Stop|Trp)", - "(Lys|Asn)" }; - - char ambig_aaname[4] = "???"; - - int aamap[NGENECODE][64] = { - /* composite metazoan mt */ - { Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Met, - Trp,Gly,Arg,23, - Ser,Ala,Pro,Thr, - Pyl,Glu,Gln,Lys, - Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,24, - 25,Gly,Arg,23, - Ser,Ala,Pro,Thr, - Stop,Glu,Gln,26 }, - /* standard */ - { Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Met, - Trp,Gly,Arg,Arg, - Ser,Ala,Pro,Thr, - Pyl,Glu,Gln,Lys, - Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Ile, - SeC,Gly,Arg,Arg, - Ser,Ala,Pro,Thr, - Stop,Glu,Gln,Lys }, - /* vertebrate mt */ - { Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Met, - Trp,Gly,Arg,Stop, - Ser,Ala,Pro,Thr, - Pyl,Glu,Gln,Lys, - Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Met, - Trp,Gly,Arg,Stop, - Ser,Ala,Pro,Thr, - Stop,Glu,Gln,Lys }, - /* yeast mt */ - { Phe,Val,Thr,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Thr,Met, - Trp,Gly,Arg,Arg, - Ser,Ala,Pro,Thr, - Pyl,Glu,Gln,Lys, - Phe,Val,Thr,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Thr,Met, - Trp,Gly,Arg,Arg, - Ser,Ala,Pro,Thr, - Stop,Glu,Gln,Lys }, - /* mold, protozoan, and coelenterate mt */ - { Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Met, - Trp,Gly,Arg,Arg, - Ser,Ala,Pro,Thr, - Pyl,Glu,Gln,Lys, - Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Ile, - Trp,Gly,Arg,Arg, - Ser,Ala,Pro,Thr, - Stop,Glu,Gln,Lys }, - /* invertebrate mt */ - { Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Met, - Trp,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Pyl,Glu,Gln,Lys, - Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Met, - Trp,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Stop,Glu,Gln,Lys }, - /* ciliate */ - { Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Met, - Trp,Gly,Arg,Arg, - Ser,Ala,Pro,Thr, - Gln,Glu,Gln,Lys, - Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Ile, - SeC,Gly,Arg,Arg, - Ser,Ala,Pro,Thr, - Gln,Glu,Gln,Lys }, - /* deleted -> standard */ - { Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Met, - Trp,Gly,Arg,Arg, - Ser,Ala,Pro,Thr, - Pyl,Glu,Gln,Lys, - Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Ile, - SeC,Gly,Arg,Arg, - Ser,Ala,Pro,Thr, - Stop,Glu,Gln,Lys }, - /* deleted -> standard */ - { Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Met, - Trp,Gly,Arg,Arg, - Ser,Ala,Pro,Thr, - Pyl,Glu,Gln,Lys, - Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Ile, - SeC,Gly,Arg,Arg, - Ser,Ala,Pro,Thr, - Stop,Glu,Gln,Lys }, - /* echinoderm and flatworm mt */ - { Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Met, - Trp,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Pyl,Glu,Gln,Lys, - Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Ile, - Trp,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Stop,Glu,Gln,Asn }, - /* euplotid */ - { Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Met, - Trp,Gly,Arg,Arg, - Ser,Ala,Pro,Thr, - Pyl,Glu,Gln,Lys, - Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Ile, - Cys,Gly,Arg,Arg, - Ser,Ala,Pro,Thr, - Stop,Glu,Gln,Lys }, - /* bacterial and plant chloroplast */ - { Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Ser,Met, - Trp,Gly,Arg,Arg, - Ser,Ala,Pro,Thr, - Pyl,Glu,Gln,Lys, - Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Ile, - SeC,Gly,Arg,Arg, - Ser,Ala,Pro,Thr, - Stop,Glu,Gln,Lys }, - /* alternate yeast */ - { Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Ser,Met, - Trp,Gly,Arg,Arg, - Ser,Ala,Pro,Thr, - Pyl,Glu,Gln,Lys, - Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Ile, - SeC,Gly,Arg,Arg, - Ser,Ala,Pro,Thr, - Stop,Glu,Gln,Lys }, - /* ascidian mt */ - { Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Met, - Trp,Gly,Arg,Gly, - Ser,Ala,Pro,Thr, - Pyl,Glu,Gln,Lys, - Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Met, - Trp,Gly,Arg,Gly, - Ser,Ala,Pro,Thr, - Stop,Glu,Gln,Lys }, - /* alternate flatworm mt */ - { Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Met, - Trp,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Pyl,Glu,Gln,Lys, - Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Ile, - Trp,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Glu,Gln,Asn }, - /* blepharisma */ - { Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Met, - Trp,Gly,Arg,Arg, - Ser,Ala,Pro,Thr, - Gln,Glu,Gln,Lys, - Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Ile, - SeC,Gly,Arg,Arg, - Ser,Ala,Pro,Thr, - Stop,Glu,Gln,Lys }, - /* chlorophycean mt */ - { Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Met, - Trp,Gly,Arg,Arg, - Ser,Ala,Pro,Thr, - Leu,Glu,Gln,Lys, - Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Ile, - SeC,Gly,Arg,Arg, - Ser,Ala,Pro,Thr, - Stop,Glu,Gln,Lys }, - /* deleted -> standard */ - { Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Met, - Trp,Gly,Arg,Arg, - Ser,Ala,Pro,Thr, - Pyl,Glu,Gln,Lys, - Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Ile, - SeC,Gly,Arg,Arg, - Ser,Ala,Pro,Thr, - Stop,Glu,Gln,Lys }, - /* deleted -> standard */ - { Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Met, - Trp,Gly,Arg,Arg, - Ser,Ala,Pro,Thr, - Pyl,Glu,Gln,Lys, - Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Ile, - SeC,Gly,Arg,Arg, - Ser,Ala,Pro,Thr, - Stop,Glu,Gln,Lys }, - /* deleted -> standard */ - { Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Met, - Trp,Gly,Arg,Arg, - Ser,Ala,Pro,Thr, - Pyl,Glu,Gln,Lys, - Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Ile, - SeC,Gly,Arg,Arg, - Ser,Ala,Pro,Thr, - Stop,Glu,Gln,Lys }, - /* deleted -> standard */ - { Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Met, - Trp,Gly,Arg,Arg, - Ser,Ala,Pro,Thr, - Pyl,Glu,Gln,Lys, - Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Ile, - SeC,Gly,Arg,Arg, - Ser,Ala,Pro,Thr, - Stop,Glu,Gln,Lys }, - /* trematode mt */ - { Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Met, - Trp,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Pyl,Glu,Gln,Lys, - Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Met, - Trp,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Stop,Glu,Gln,Lys }, - /* scenedesmus obliquus mt*/ - { Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Met, - Trp,Gly,Arg,Arg, - Ser,Ala,Pro,Thr, - Leu,Glu,Gln,Lys, - Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Leu,Ile, - SeC,Gly,Arg,Arg, - Stop,Ala,Pro,Thr, - Stop,Glu,Gln,Lys }, - /* thraustochytrium mt */ - { Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Leu,Val,Ser,Met, - Trp,Gly,Arg,Arg, - Ser,Ala,Pro,Thr, - Pyl,Glu,Gln,Lys, - Phe,Val,Leu,Ile, - Cys,Gly,Arg,Ser, - Ser,Ala,Pro,Thr, - Tyr,Asp,His,Asn, - Stop,Val,Leu,Ile, - SeC,Gly,Arg,Arg, - Ser,Ala,Pro,Thr, - Stop,Glu,Gln,Lys } }; - - - - -/* POINTERS TO DETECTED GENES */ - - gene *ts; - - -/* TOOLS */ - -char upcasec(char c) -{ return((c >= 'a')?c-32:c); } - - -int length(char *s) -{ int i=0; - while (*s++) i++; - return(i); } - -char *strpos(char *s, char *k) -{ char c,d; - int i; - d = *k; - while (c = *s) - { if (c == d) - { i = 0; - do if (!k[++i]) return(s); while (s[i] == k[i]); } - s++; } - return(NULL); } - - -char *softstrpos(char *s, char *k) -{ char c,d; - int i; - d = upcasec(*k); - while (c = *s) - { if (upcasec(c) == d) - { i = 0; - do if (!k[++i]) return(s); - while (upcasec(s[i]) == upcasec(k[i])); } - s++; } - return(NULL); } - - -char *marginstring(char *s, char *k, int margin) -{ char c,d; - int i,j; - j = 0; - d = *k; - while (c = *s) - { if (c == d) - { i = 0; - do if (!k[++i]) return(s); while (s[i] == k[i]); } - s++; - if (++j >= margin) break; } - return(NULL); } - - -int margindetect(char *line, int margin) -{ int i; - char c,*s; - i = 0; - s = line; - while (c = *s++) - { if (!space(c)) break; - if (c == '\t') i += 7; - if (++i >= margin) return(0); } - if (c == '\n') return(0); - if (c == '\r') return(0); - if (c == '\0') return(0); - return(1); } - - -char *dconvert(char *s, double *r) -{ static char zero='0',nine='9'; - int shift,expshift,sgn,expsgn,exponent; - char c,limit; - double result; - shift = 0; - expshift = 0; - sgn = 1; - expsgn = 1; - limit = 0; - exponent = 0; - result = 0.0; - if ((c = *s) == '-') - { sgn = -1; - c = *++s; } - else if (c == '+') c= *++s; - if (c >= zero) - if (c <= nine) - { result = (double)(c - zero); - while ((c = *++s) >= zero) - { if (c > nine) break; - if (++limit < 15) result = result*10.0 + (double)(c - zero); }} - if (c == '.') - while ((c = *++s) >= zero) - { if (c > nine) break; - if (++limit < 15) - { result = result*10.0 + (double)(c - zero); - shift++; }} - if ((c == 'E')||(c == 'e')||(c == 'D')||(c == 'd')) - { if ((c = *++s) == '-') - { expsgn = -1; - c = *++s; } - else - if (c == '+') c = *++s; - if (c >= zero) - if (c <= nine) - { exponent = c - zero; - while ((c = *++s) >= zero) - { if (c > nine) break; - exponent = exponent*10 + c - zero; - if (++expshift > 3) break; }}} - result *= (double)sgn; - exponent = exponent*expsgn - shift; - if (exponent >= 0) - while (exponent--) result *= 10.0; - else - while (exponent++) result /= 10.0; - (*r) *= 0.01*result; - return(s); } - -char *lconvert(char *s, long *r) -{ static char zero='0',nine='9'; - long sgn; - long result; - char c; - sgn = 1L; - result = 0L; - if ((c = *s) == '-') - { sgn = -1L; - c = *++s; } - else if (c == '+') c= *++s; - if (c >= zero) - if (c <= nine) - { result = (long)(c - zero); - while ((c = *++s) >= zero) - { if (c > nine) break; - result = result*10L + (long)(c - zero); }} - *r = result * sgn; - return(s); } - - -char *getlong(char *line, long *l) -{ static char zero='0',nine='9'; - char c1,c2,*s; - s = line; - while (c1 = *s) - { if (c1 >= zero) - { if (c1 <= nine) return(lconvert(s,l)); } - else - if ((c1 == '-') || (c1 == '+')) - { c2 = s[1]; - if (c2 >= zero) - if (c2 <= nine) - return(lconvert(s,l)); } - s++; } - return(NULL); } - - - -char *copy(char *from, char *to) -{ while (*to++ = *from++); - return(--to); } - - -char *copy3cr(char *from, char *to, int n) -{ while (*to = *from++) - { if (*to == DLIM) - { *to = '\0'; - break; } - if (--n <= 0) - { *++to = '\0'; - break; } - to++; } - return(to); } - -char *quotestring(char *line, char *a, int n) -{ char ch; - while (ch = *line++) - if (ch == '"') - { while (ch = *line++) - if (ch != '"') - { *a++ = ch; - if (--n <= 0) break; } - else break; - break; } - *a = '\0'; - return(a); } - -/* LIBRARY */ - -long process_sequence_heading(data_set *d, csw *sw) -{ int i,ic,nagene; - long l,realstart; - char line[STRLEN],c,*s,*sq; - annotated_gene *agene; - FILE *f; - f = d->f; - d->datatype = FASTA; - fseek(f,d->seqstart,SEEK_SET); - do { if ((ic = getc(f)) == EOF) return(-1L); - c = (char)ic; } - while (space(c)); - if (!fgets(d->seqname,STRLENM1,f)) return(-1L); - if (c != '>') - { if (upcasec(c) != 'L') goto FNSN; - if (!(s = softstrpos(d->seqname,"OCUS"))) goto FNSN; - s += 4; - while (space(*s)) s++; - sq = d->seqname; - while (!space(*s)) *sq++ = *s++; - *sq++ = ' '; - if (!fgets(line,STRLENM1,f)) return(-1L); - if (!(s = softstrpos(line,"DEFINITION"))) return(-1L); - s += 10; - while (space(*s)) s++; - copy(s,sq); - if (!fgets(line,STRLENM1,f)) return(-1L); - for (i = 0; i < NS; i++) d->nagene[i] = 0; - nagene = 0; - while (!marginstring(line,"ORIGIN",10)) - { if (nagene >= NGFT) goto GBNL; - if (!(s = marginstring(line,"tRNA",10))) goto CDSEQ; - agene = &(d->gene[nagene]); - agene->genetype = tRNA; - if (softstrpos(s,"complement")) agene->comp = 1; - else agene->comp = 0; - if (!(s = getlong(s,&l))) l = -1L; - agene->start = l; - if (!(s = getlong(s,&l))) l = -1L; - agene->stop = l; - copy("tRNA-???",agene->species); - agene->antistart = -1L; - agene->antistop = -1L; - if (!fgets(line,STRLENM1,f)) return(-1L); - while (!margindetect(line,10)) - { if (s = softstrpos(line,"product=")) - if (s = softstrpos(s,"tRNA-")) - { s += 5; - while (space(*s)) s++; - copy3cr(s,agene->species+5,3); } - if (s = softstrpos(line,"anticodon=")) - { s += 10; - if (!(s = getlong(s,&l))) l = -1L; - agene->antistart = l; - if (!(s = getlong(s,&l))) l = -1L; - agene->antistop = l; } - if (!fgets(line,STRLENM1,f)) return(-1L); } - d->nagene[tRNA]++; - nagene++; - continue; - CDSEQ: - if (!(s = marginstring(line,"CDS",10))) - if (!(s = marginstring(line,"mRNA",10))) - goto RRNA; - agene = &(d->gene[nagene]); - agene->genetype = CDS; - if (softstrpos(s,"complement")) agene->comp = 1; - else agene->comp = 0; - if (!(s = getlong(s,&l))) l = -1L; - agene->start = l; - if (!(s = getlong(s,&l))) l = -1L; - agene->stop = l; - copy("???",agene->species); - if (!fgets(line,STRLENM1,f)) return(-1L); - while (!margindetect(line,10)) - { if (s = softstrpos(line,"gene=")) - { s += 5; - quotestring(s,agene->species,SHORTSTRLENM1); } - else if (s = softstrpos(line,"product=")) - { s += 8; - quotestring(s,agene->species,SHORTSTRLENM1); } - if (!fgets(line,STRLENM1,f)) return(-1L); } - d->nagene[CDS]++; - nagene++; - continue; - RRNA: - if (!(s = marginstring(line,"rRNA",10))) goto GBNL; - agene = &(d->gene[nagene]); - agene->genetype = rRNA; - if (softstrpos(s,"complement")) agene->comp = 1; - else agene->comp = 0; - if (!(s = getlong(s,&l))) l = -1L; - agene->start = l; - if (!(s = getlong(s,&l))) l = -1L; - agene->stop = l; - copy("???",agene->species); - if (!fgets(line,STRLENM1,f)) return(-1L); - while (!margindetect(line,10)) - { if (s = softstrpos(line,"gene=")) - { s += 5; - quotestring(s,agene->species,SHORTSTRLENM1); } - else if (s = softstrpos(line,"product=")) - { s += 8; - quotestring(s,agene->species,SHORTSTRLENM1); } - if (!fgets(line,STRLENM1,f)) return(-1L); } - d->nagene[rRNA]++; - nagene++; - continue; - GBNL: - if (!fgets(line,STRLENM1,f)) return(-1L); } - d->datatype = GENBANK; - d->nagene[NS-1] = nagene; - sw->annotated = 1; - realstart = ftell(f); } - else - { MH: - realstart = ftell(f); - do { if ((ic = getc(f)) == EOF) return(-1L); - c = (char)ic; } - while (space(c)); - if (c == '>') - { if (!fgets(line,STRLENM1,f)) return(-1L); - goto MH; } - fseek(f,realstart,SEEK_SET); } - s = d->seqname; - i = 0; - while ((c = *s) != '\0') - { if (c == '\n') break; - if (c == '\r') break; - if (++i >= STRLEN) break; - s++; } - *s = '\0'; - return(realstart); - FNSN: - realstart = d->seqstart; - s = copy("Unnamed sequence ",d->seqname); - fseek(f,realstart,SEEK_SET); - if (fgets(line,STRLENM1,f)) copy3cr(line,s,50); - fseek(f,realstart,SEEK_SET); - return(realstart); } - - -int move_forward(data_set *d) -{ int ic; - long nextbase; - static int map[256] = - { -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, - -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, - -4,-4,-4,-4,-4,-4,NOBASE,-3,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, - -4,-4,-2,-4,-4,Adenine,AMBIG,Cytosine,AMBIG,-4,-4,Guanine,AMBIG, - -4,-4,AMBIG,-5,AMBIG,AMBIG,-4,-4,-4, - AMBIG,AMBIG,Thymine,Thymine,AMBIG,AMBIG,-4, - AMBIG,-4,-4,-4,-4,INSERT,NOBASE,-4,Adenine,AMBIG,Cytosine,AMBIG, - -4,-4,Guanine,AMBIG,-4,-4,AMBIG,-5,AMBIG,AMBIG,-4,-4,-4, - AMBIG,AMBIG,Thymine,Thymine,AMBIG,AMBIG,-4, - AMBIG,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, - -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, - -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, - -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, - -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, - -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, - -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4 }; - if (d->ps >= d->psmax) - if (d->psmax > 0L) - { fseek(d->f,d->seqstart,SEEK_SET); - d->ps = 0L; } - NL: - if ((ic = getc(d->f)) == EOF) goto FAIL; - SC: - ic = map[ic]; - BS: - if (ic >= Adenine) - { d->ps++; - return(ic); } - if (ic == -2) - { d->nextseq = ftell(d->f) - 1L; - return(TERM); } - if (ic == -3) - if (d->datatype == GENBANK) - { if ((ic = getc(d->f)) == EOF) goto FAIL; - if ((ic = map[ic]) != -3) goto BS; - do if ((ic = getc(d->f)) == EOF) goto FAIL; - while (space(ic)); - d->nextseq = ftell(d->f) - 1L; - return(TERM); } - if (ic == -5) - { nextbase = ftell(d->f); - if ((ic = getc(d->f)) == EOF) goto FAIL; - if (upcasec(ic) == 'O') - { if ((ic = getc(d->f)) == EOF) goto FAIL; - if (upcasec(ic) == 'C') - { if ((ic = getc(d->f)) == EOF) goto FAIL; - if (upcasec(ic) == 'U') - { if ((ic = getc(d->f)) == EOF) goto FAIL; - if (upcasec(ic) == 'S') - { d->nextseq = nextbase - 1L; - return(TERM); }}}} - fseek(d->f,nextbase,SEEK_SET); } - goto NL; - FAIL: - d->nextseq = -1L; - if (d->psmax > 0L) - { d->ps = d->psmax; - return(NOBASE); } - else return(TERM); } - - -int seq_init(data_set *d, csw *sw) -{ long ngc; - int ic; - if ((d->seqstart = process_sequence_heading(d,sw)) < 0L) return(0); - d->ps = 0L; - d->psmax = -1L; - ngc = 0L; - while ((ic = move_forward(d)) >= Adenine) - if (ic >= Cytosine) - if (ic <= Guanine) - ngc++; - if ((d->psmax = d->ps) <= 0L) return(0); - d->gc = (double)ngc/(double)d->psmax; - fseek(d->f,d->seqstart,SEEK_SET); - d->ps = 0L; - return(1); } - - - -char cbase(int c) -{ static char base[6] = "acgt.."; - if (c < Adenine) return('#'); - if (c > NOBASE) return((char)c); - return(base[c]); } - - -char cpbase(int c) -{ static char base[6] = "ACGT.."; - if (c < Adenine) return('#'); - if (c > NOBASE) return((char)c); - return(base[c]); } - - - -char *aa(int *anticodon, csw *sw) -{ int p1,p2,p3; - if ((p1 = *anticodon) >= AMBIG) return(ambig_aaname); - if ((p2 = anticodon[1]) >= AMBIG) return(ambig_aaname); - if ((p3 = anticodon[2]) >= AMBIG) return(ambig_aaname); - return(aaname[aamap[sw->geneticcode][(p1<<4) + (p2<<2) + p3]]); } - - -char *translate(int *codon, csw *sw) -{ int p1,p2,p3,aa; - if ((p1 = *codon) >= AMBIG) return(ambig_aaname); - if ((p2 = codon[1]) >= AMBIG) return(ambig_aaname); - if ((p3 = codon[2]) >= AMBIG) return(ambig_aaname); - aa = aamap[sw->geneticcode][((3-p3)<<4)+((3-p2)<<2)+(3-p1)]; - if ((aa == SeC) || (aa == Pyl)) aa = Stop; - return(aaname[aa]); } - -char ltranslate(int *codon, gene *t, csw *sw) -{ int code,p1,p2,p3; - if (t->genetype == CDS) code = t->asst; - else code = sw->geneticcode; - if ((p1 = *codon) >= AMBIG) return(ambig_aaname[0]); - if ((p2 = codon[1]) >= AMBIG) return(ambig_aaname[0]); - if ((p3 = codon[2]) >= AMBIG) return(ambig_aaname[0]); - return(aaletter[aamap[code][((3-p3)<<4)+((3-p2)<<2)+(3-p1)]]); } - - -char ptranslate(int *codon, csw *sw) -{ int p1,p2,p3; - if ((p1 = *codon) >= AMBIG) return(ambig_aaname[0]); - if ((p2 = codon[1]) >= AMBIG) return(ambig_aaname[0]); - if ((p3 = codon[2]) >= AMBIG) return(ambig_aaname[0]); - return(aapolarity[aamap[sw->geneticcode][((3-p3)<<4)+((3-p2)<<2)+(3-p1)]]); } - - -double gc_content(gene *t) -{ int *s,*se; - double ngc; - static double score[6] = { 0.0,1.0,1.0,0.0,0.0,0.0 }; - ngc = 0.0; - if ((t->nintron > 0) && (t->asst == 0)) - { s = t->eseq; - se = s + t->intron; - while (s < se) ngc += score[*s++]; - s = se + t->nintron; - se = t->eseq + t->nbase + t->nintron; - while (s < se) ngc += score[*s++]; } - else - { s = t->seq; - se = s + t->nbase; - while (s < se) ngc += score[*s++]; } - return(ngc/(double)t->nbase); } - - -void write_seq(FILE *f, int *seq, int newline) -{ int i,c; - i = 0; - while ((c = *seq++) >= Adenine) - { fputc(cbase(c),f); - if (newline) - if (++i >= 50) - { fputc('\n',f); - i = 0; }} - if (i > 0) fputc('\n',f); } - - -int find_var_hairpin(gene *t) -{ int e,stem,vstem,loop,*sn,*sen,*pos1,*pos2,*sb,*se,*sc,*sd,*sf,*s; - unsigned int c,cn,m; - static unsigned int A[6] = { 0,0,0x100,0x400,0,0 }; - static unsigned int C[6] = { 0,0,0x400,0,0,0 }; - static unsigned int G[6] = { 0x100,0x400,0,0x200,0,0 }; - static unsigned int T[6] = { 0x400,0,0x200,0,0,0 }; - static unsigned int te[6] = { 0,0,0,0,0,0 }; - if (t->genetype != tRNA) return(0); - if (t->var < 13) return(0); - e = 0; - sb = t->seq + t->astem1 + t->spacer1 + 2*t->dstem + t->dloop + - t->spacer2 + 2*t->cstem + t->cloop + t->nintron; - sc = sb + 3; /* 4 */ - se = sb + t->var - 2; /* 3 */ - sf = se - 2; - te[0] = A[*se]; - te[1] = C[*se]; - te[2] = G[*se]; - te[3] = T[*se]; - while (--se > sf) - { te[0] = (te[0] >> 4) | A[*se]; - te[1] = (te[1] >> 4) | C[*se]; - te[2] = (te[2] >> 4) | G[*se]; - te[3] = (te[3] >> 4) | T[*se]; } - while (se >= sc) - { te[0] = ((te[0] >> 4) | A[*se]); - te[1] = ((te[1] >> 4) | C[*se]); - te[2] = ((te[2] >> 4) | G[*se]); - te[3] = ((te[3] >> 4) | T[*se]); - s = se - 5; - sd = se - 7; - m = te[*s]; - while (--s > sd) m = (m >> 4) + te[*s]; - while (s >= sb) - { m = (m >> 4) + te[*s]; - c = m & 0xf; - if (c >= 9) - { stem = 3; - loop = (int)(se - s) - 3; - sen = se; - sn = s + 2; - while (loop >= 6) - { if ((cn = vbp[sen[-1]][sn[1]]) <= 0) break; - c += cn; - stem++; - loop -= 2; - sen--; - sn++; } - if (c > e) - { e = c; - pos1 = s; - pos2 = sen; - vstem = stem; }} - s--; } - se--; } - if (e > 0) - return((((int)(pos1 - sb)) << 10) + (((int)(pos2 - sb)) << 5) + vstem); - else - return(0); } - - - -void write_to_library(FILE *f, gene *t, csw *sw) -{ int *s; - static char trnatype[2][6] = { "tRNA","mtRNA" }; - s = t->seq + t->anticodon; - fprintf(f,">%s",t->name); - if (!softstrpos(t->name,"RNA")) - switch (t->genetype) - { case CDS: - fprintf(f," CDS"); - break; - case srpRNA: - fprintf(f," srpRNA"); - break; - case tmRNA: - if (t->asst > 0) fprintf(f," Permuted"); - fprintf(f," tmRNA"); - break; - case tRNA: - default: - t->varbp = find_var_hairpin(t); - if (t->tstem == 0) fprintf(f," TV-loop"); - else if (t->dstem == 0) fprintf(f," D-loop"); - switch(t->cloop) - { case 6: - fprintf(f," %s-?""?""?(%c%c)",trnatype[sw->mtrna], - cbase(*s),cbase(*(s+1))); - break; - case 8: - fprintf(f," %s-?""?""?(%c%c%c%c)",trnatype[sw->mtrna], - cbase(*s),cbase(s[1]),cbase(s[2]),cbase(s[3])); - break; - case 7: - default: - fprintf(f," %s-%s(%c%c%c)",trnatype[sw->mtrna], - aa(s,sw),cbase(*s),cbase(*(s+1)),cbase(*(s+2))); - break; } - break; } - if (strpos(t->name,"bases)")) - fprintf(f,"\n"); - else - fprintf(f," (%d bases)\n",t->nbase); - fprintf(f,"sequence =\n"); - write_seq(f,t->seq,1); - if (*t->eseq >= Adenine) - { fprintf(f,"extended sequence =\n"); - write_seq(f,t->eseq,1); } - fprintf(f,"nbase = %d\n",t->nbase); - fprintf(f,"sense = %d\n",t->comp); - fprintf(f,"start = %ld\n",t->start); - fprintf(f,"stop = %ld\n",t->stop); - fprintf(f,"astem1 = %d\n",t->astem1); - fprintf(f,"astem2 = %d\n",t->astem2); - fprintf(f,"atail = %d\n",t->aatail); - fprintf(f,"spacer1 = %d\n",t->spacer1); - fprintf(f,"spacer2 = %d\n",t->spacer2); - fprintf(f,"dstem = %d\n",t->dstem); - fprintf(f,"dloop = %d\n",t->dloop); - fprintf(f,"cstem = %d\n",t->cstem); - fprintf(f,"cloop = %d\n",t->cloop); - fprintf(f,"anticodon = %d\n",t->anticodon); - fprintf(f,"nintron = %d\n",t->nintron); - fprintf(f,"intron = %d\n",t->intron); - fprintf(f,"asst = %d",t->asst); - if (t->genetype == tmRNA) - if (t->asst > 0) fprintf(f," permuted"); - fprintf(f,"\ntps = %d\n",t->tps); - fprintf(f,"tpe = %d\n",t->tpe); - fprintf(f,"var = %d\n",t->var); - fprintf(f,"varbp = %d,%d,%d\n",((t->varbp >> 10)&0x1f), - ((t->varbp >> 5)&0x1f),(t->varbp&0x1f)); - fprintf(f,"tstem = %d\n",t->tstem); - fprintf(f,"tloop = %d\n",t->tloop); - fprintf(f,"gc = %g\n\n",gc_content(t)); } - - - -void init_tmrna(FILE *f, csw *sw) -{ int c,*s; - s = sw->tmrna_struct; - while ((c = *s++) != TERM) itmparam(cbase(c),f); } - - - - - -int *make_tv(int *seq, char matrix[][MATY], - int *x, int *y, int orient, int tv) -{ int i,px,py,stem; - static int ux[4] = { 1,0,-1,0 }; - static int uy[4] = { 0,1,0,-1 }; - static int vx[4] = { 0,-1,0,1 }; - static int vy[4] = { 1,0,-1,0 }; - static int loopu[26][26] = - { { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, - { 1,1,1,0,0,-1,-1 }, - { 1,1,1,1,0,-1,-1,-1 }, - { 1,1,1,1,0,0,-1,-1,-1 }, - { 1,1,1,1,1,0,-1,-1,-1,-1 }, - { 1,1,1,1,1,0,0,-1,-1,-1,-1 }, - { 1,1,1,1,1,1,0,-1,-1,-1,-1,-1 }, - { 1,1,1,1,1,1,0,0,-1,-1,-1,-1,-1 }, - { 1,1,1,1,1,1,0,0,0,-1,-1,-1,-1,-1 }, - { 1,1,1,1,1,1,1,0,0,-1,-1,-1,-1,-1,-1 }, - { 1,1,1,1,1,1,1,0,0,0,-1,-1,-1,-1,-1,-1 }, - { 1,1,1,1,1,1,1,1,0,0,-1,-1,-1,-1,-1,-1,-1 }, - { 1,1,1,1,1,1,1,1,0,0,0,-1,-1,-1,-1,-1,-1,-1 }, - { 1,1,1,1,1,1,1,1,1,0,0,-1,-1,-1,-1,-1,-1,-1,-1 }, - { 1,1,1,1,1,1,1,1,1,0,0,0,-1,-1,-1,-1,-1,-1,-1,-1 }, - { 1,1,1,1,1,1,1,1,1,1,0,0,-1,-1,-1,-1,-1,-1,-1,-1,-1 }, - { 1,1,1,1,1,1,1,1,1,1,0,0,0,-1,-1,-1,-1,-1,-1,-1,-1,-1 }, - { 1,1,1,1,1,1,1,1,1,1,1,0,0,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 }, - { 1,1,1,1,1,1,1,1,1,1,1,0,0,0,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 }, - { 1,1,1,1,1,1,1,1,1,1,1,1,0,0,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 }, - { 1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 } }; - static int loopv[26][26] = - { { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, - { 0,1,1,1,1,1,1 }, - { -1,1,1,1,1,1,1,1 }, - { -1,1,1,1,1,1,1,1,0 }, - { -1,0,1,1,1,1,1,1,1,0 }, - { -1,0,1,1,1,1,1,1,1,0,0 }, - { -1,0,0,1,1,1,1,1,1,1,0,0 }, - { -1,0,0,1,1,1,1,1,1,1,0,0,0 }, - { -1,0,0,1,1,1,1,1,1,1,0,0,0,0 }, - { -1,0,0,1,1,1,1,1,1,1,0,0,0,0,0 }, - { -1,0,0,0,1,1,1,1,1,1,1,0,0,0,0,0 }, - { -1,0,0,0,1,1,1,1,1,1,1,0,0,0,0,0,0 }, - { -1,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,0,0 }, - { -1,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,0,0,0 }, - { -1,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,0,0,0 }, - { -1,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0 }, - { -1,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0 }, - { -1,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0 }, - { -1,0,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0 }, - { -1,0,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0 }, - { -1,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0 } }; - px = *x; - py = *y; - stem = 0; - if (tv < 6) - { px += ux[orient]; - py += uy[orient]; - i = 0; - while (i < tv) - { px += vx[orient]; - py += vy[orient]; - matrix[px][py] = cbase(*seq++); - i++; } - py += (6-i)*vy[orient]; - goto FN; } - if (tv > 25) - { if (tv % 2) - stem = (tv - 25)/2; - else - stem = (tv - 24)/2; - tv = tv - 2*stem; } - i = 0; - while (i < stem) - { px += ux[orient]; - py += uy[orient]; - matrix[px][py] = cbase(*seq++); - i++; } - i = 0; - while (i < tv) - { px += ux[orient]*loopu[tv][i] + vx[orient]*loopv[tv][i]; - py += uy[orient]*loopu[tv][i] + vy[orient]*loopv[tv][i]; - matrix[px][py] = cbase(*seq++); - i++; } - px += ux[orient]*loopu[tv][i] + vx[orient]*loopv[tv][i]; - py += uy[orient]*loopu[tv][i] + vy[orient]*loopv[tv][i]; - i = 0; - while (i < stem) - { matrix[px][py] = cbase(*seq++); - px -= (ux[orient]); - py -= (uy[orient]); - i++; } - FN: - *x = px; - *y = py; - return(seq); } - - -int base_match(char b1, char b2) -{ int i,s; - static char base1[11] = "acgtgtagtg"; - static char base2[11] = "tgcatggatg"; - static int score[11] = { 2,2,2,2,1,1,3,3,3,3 }; - s = 0; - for (i = 0; i < 10; i++) - if (b1 == base1[i]) - if (b2 == base2[i]) - { s = score[i]; - break; } - return(s); } - - -int *make_clover(int *seq, int b, int e, int stemlength, - char matrix[][MATY], int *x, int *y, int orient) -{ int i,px,py,pxb,pyb,pxe,pye,l,xlg,xlgd,ylgh,ylg; - int *s,*se; - static int ux[9] = { 1,0,-1,0,0,1,1,-1,-1 }; - static int uy[9] = { 0,1,0,-1,1,-1,1,1,-1 }; - static int vx[9] = { 0,-1,0,1,1,1,1,-1,-1 }; - static int vy[9] = { 1,0,-1,0,0,0,0,0,0 }; - static int loopu[18][18] = - { { -1 }, { 0,-1 }, { 0,0,-1 }, { 0,1,-1,-1 }, { 0,1,0,-1,-1 }, - { 0,1,0,0,-1,-1 }, { 0,1,1,0,-1,-1,-1 }, { 0,1,1,0,0,-1,-1,-1 }, - { 0,1,1,1,0,-1,-1,-1,-1 }, { 0,1,1,1,0,0,-1,-1,-1,-1 }, - { 0,1,1,1,0,0,0,-1,-1,-1,-1 }, - { 0,1,1,1,1,0,0,-1,-1,-1,-1,-1 }, - { 0,1,1,1,1,0,0,0,-1,-1,-1,-1,-1 }, - { 0,1,1,1,1,1,0,0,-1,-1,-1,-1,-1,-1 }, - { 0,1,1,1,1,1,0,0,0,-1,-1,-1,-1,-1,-1 }, - { 0,1,1,1,1,1,1,0,0,-1,-1,-1,-1,-1,-1,-1 }, - { 0,1,1,1,1,1,1,0,0,0,-1,-1,-1,-1,-1,-1,-1 }, - { 0,1,1,1,1,1,1,0,0,0,0,-1,-1,-1,-1,-1,-1,-1 } }; - static int loopv[18][18] = - { { 2 }, { 1,1 }, { 0,1,1 }, { -1,2,2,-1 }, { -1,1,1,2,-1 }, - { -1,1,1,1,1,-1 }, { -1,0,1,1,1,1,-1 }, { -1,0,1,1,1,1,0,-1 }, - { -1,0,1,1,1,1,0,0,-1 }, { -1,0,0,1,1,1,1,0,0,-1 }, - { -1,0,0,0,1,1,1,1,0,0,-1 }, - { -1,0,0,0,1,1,1,1,0,0,0,-1 }, - { -1,0,0,0,1,1,1,1,0,0,0,0,-1 }, - { -1,0,0,0,0,1,1,1,1,0,0,0,0,-1 }, - { -1,0,0,0,0,1,1,1,1,0,0,0,0,0,-1 }, - { -1,0,0,0,0,0,1,1,1,1,0,0,0,0,0,-1 }, - { -1,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,-1 }, - { -1,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,-1 } }; - static int dloopu[18][18] = - { { -1 }, { 0,-1 }, { 0,0,-1 }, { 0,1,-1,-1 }, { 0,1,0,-1,-1 }, - { 0,1,0,0,-1,-1 }, { 0,1,1,0,-1,-1,-1 }, { 0,1,1,0,0,-1,-1,-1 }, - { 0,1,1,0,0,0,-1,-1,-1 }, { 0,1,1,1,0,0,-1,-1,-1,-1 }, - { 0,1,1,1,0,0,0,-1,-1,-1,-1 }, - { 0,1,1,1,1,0,0,-1,-1,-1,-1,-1 }, - { 0,1,1,1,1,0,0,0,-1,-1,-1,-1,-1 }, - { 0,1,1,1,1,1,0,0,-1,-1,-1,-1,-1,-1 }, - { 0,1,1,1,1,1,0,0,0,-1,-1,-1,-1,-1,-1 }, - { 0,1,1,1,1,1,1,0,0,-1,-1,-1,-1,-1,-1,-1 }, - { 0,1,1,1,1,1,1,0,0,0,-1,-1,-1,-1,-1,-1,-1 }, - { 0,1,1,1,1,1,1,0,0,0,0,-1,-1,-1,-1,-1,-1,-1 } }; - static int dloopv[18][18] = - { { 2 }, { 1,1 }, { 0,1,1 }, { -1,2,2,-1 }, { -1,1,1,2,-1 }, - { -1,1,1,1,1,-1 }, { -1,0,1,1,1,1,-1 }, { -1,0,1,1,1,1,0,-1 }, - { -1,0,1,1,1,1,1,-1,-1 }, { -1,0,0,1,1,1,1,0,0,-1 }, - { -1,0,0,0,1,1,1,1,0,0,-1 }, - { -1,0,0,0,1,1,1,1,0,0,0,-1 }, - { -1,0,0,0,1,1,1,1,0,0,0,0,-1 }, - { -1,0,0,0,0,1,1,1,1,0,0,0,0,-1 }, - { -1,0,0,0,0,1,1,1,1,0,0,0,0,0,-1 }, - { -1,0,0,0,0,0,1,1,1,1,0,0,0,0,0,-1 }, - { -1,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,-1 }, - { -1,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,-1 } }; - static char bond1[5] = " +!:"; - static char bond2[5] = " +-."; - px = *x; - py = *y; - s = seq + b; - se = s + stemlength; - while (s < se) - { matrix[px][py] = cbase(*s++); - px += ux[orient]; - py += uy[orient]; } - l = e - b - 2*stemlength; - if (l < 0) l = 0; - if (l < 18) - { i = 0; - if (orient == DOWN) - { while (i < l) - { px += ux[orient]*dloopu[l][i] + vx[orient]*dloopv[l][i]; - py += uy[orient]*dloopu[l][i] + vy[orient]*dloopv[l][i]; - matrix[px][py] = cbase(*s++); - i++; } - px += ux[orient]*dloopu[l][i] + vx[orient]*dloopv[l][i]; - py += uy[orient]*dloopu[l][i] + vy[orient]*dloopv[l][i]; } - else - { while (i < l) - { px += ux[orient]*loopu[l][i] + vx[orient]*loopv[l][i]; - py += uy[orient]*loopu[l][i] + vy[orient]*loopv[l][i]; - matrix[px][py] = cbase(*s++); - i++; } - px += ux[orient]*loopu[l][i] + vx[orient]*loopv[l][i]; - py += uy[orient]*loopu[l][i] + vy[orient]*loopv[l][i]; }} - else - { ylgh = ((l >> 2) - 2) >> 1; - ylg = (ylgh << 1) + 2; - xlgd = l - ylg - 2*ylgh + 1; - xlg = (xlgd + 1) >> 1; - pxb = px - ylgh*vx[orient]; - if ((pxb < 0) || (pxb >= MATX)) goto NOLOOP; - pyb = py - ylgh*vy[orient]; - if ((pyb < 0) || (pyb >= MATY)) goto NOLOOP; - pxe = px + xlg*ux[orient] + (ylg - ylgh + 1)*vx[orient]; - if ((pxe < 0) || (pxe >= MATX)) goto NOLOOP; - pye = py + xlg*uy[orient] + (ylg - ylgh + 1)*vy[orient]; - if ((pye < 0) || (pye >= MATY)) goto NOLOOP; - for (i = 0; i < ylgh; i++) - { px -= vx[orient]; - py -= vy[orient]; - matrix[px][py] = cbase(*s++); } - for (i = 0; i < xlg; i++) - { px += ux[orient]; - py += uy[orient]; - matrix[px][py] = cbase(*s++); } - for (i = 1; i < ylg; i++) - { px += vx[orient]; - py += vy[orient]; - matrix[px][py] = cbase(*s++); } - px += vx[orient]; - py += vy[orient]; - if (!(xlgd & 1)) matrix[px][py] = cbase(*s++); - for (i = 0; i < xlg; i++) - { px -= ux[orient]; - py -= uy[orient]; - matrix[px][py] = cbase(*s++); } - for (i = 1; i < ylgh; i++) - { px -= vx[orient]; - py -= vy[orient]; - matrix[px][py] = cbase(*s++); } - px -= (ux[orient] + vx[orient]); - py -= (uy[orient] + vy[orient]); } - goto STEMBOND; - NOLOOP: - px += ux[orient]*loopu[0][0] + vx[orient]*loopv[0][0]; - py += uy[orient]*loopu[0][0] + vy[orient]*loopv[0][0]; - STEMBOND: - se = seq + e; - s = se - stemlength; - while (s < se) - { matrix[px][py] = cbase(*s++); - i = base_match(matrix[px][py], - matrix[px - 2*vx[orient]][py - 2*vy[orient]]); - switch(orient) - { case RIGHT: - case LEFT: matrix[px - vx[orient]][py - vy[orient]] = bond1[i]; - break; - case SLANTDR: - case SLANTUR: - case SLANTUL: - case SLANTDL: - case UPRIGHT: - case UP: - case DOWN: matrix[px - vx[orient]][py - vy[orient]] = bond2[i]; - break; } - px -= ux[orient]; - py -= uy[orient]; } - *x = px; - *y = py; - return(se); } - - - - - -int *make_dv(int *seq, char matrix[][MATY], int dloop, - int orient, int *xp, int *yp) -{ int i,x,y; - static int ux[5] = { 1,0,-1,0,0 }; - static int uy[5] = { 0,1,0,-1,1 }; - static int vx[5] = { 0,-1,0,1,1 }; - static int vy[5] = { 1,0,-1,0,0 }; - static int loopu[22][22] = - { { -1 }, { -1,0 }, - { -1,-1,1 }, - { -1,-1,0,1 }, - { -1,-1,0,0,1 }, - { -1,-1,-1,0,1,1 }, - { -1,-1,-1,0,0,1,1 }, - { -1,-1,-1,-1,0,1,1,1 }, - { -1,-1,-1,-1,0,0,1,1,1 }, - { -1,-1,-1,-1,-1,0,1,1,1,1 }, - { -1,-1,-1,-1,-1,0,0,1,1,1,1 }, - { -1,-1,-1,-1,-1,-1,0,1,1,1,1,1 }, - { -1,-1,-1,-1,-1,-1,0,0,1,1,1,1,1 }, - { -1,-1,-1,-1,-1,-1,-1,0,1,1,1,1,1,1 }, - { -1,-1,-1,-1,-1,-1,0,-1,0,1,1,1,1,1,1 }, - { -1,-1,-1,-1,-1,-1,-1,-1,0,1,1,1,1,1,1,1 }, - { -1,-1,-1,-1,-1,-1,-1,-1,0,0,1,1,1,1,1,1,1 }, - { -1,-1,-1,-1,-1,-1,-1,-1,-1,0,1,1,1,1,1,1,1,1 }, - { -1,-1,-1,-1,-1,-1,-1,-1,-1,0,0,1,1,1,1,1,1,1,1 }, - { -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0,1,1,1,1,1,1,1,1,1 }, - { -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0,0,1,1,1,1,1,1,1,1,1 }, - { -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0,1,1,1,1,1,1,1,1,1,1 } }; - static int loopv[22][22] = - { { -6 }, { -3,-3 }, - { -2,-2,-2 }, - { -2,-1,-1,-2 }, - { -1,-1,-2,-1,-1 }, - { -1,-1,-1,-1,-1,-1 }, - { 0,-1,-1,-1,-1,-1,-1 }, - { 0,-1,0,-1,-1,-1,-1,-1 }, - { 0,-1,0,-1,-1,-1,0,-1,-1 }, - { 0,0,-1,0,-1,-1,-1,0,-1,-1 }, - { 0,0,-1,0,-1,-1,-1,0,-1,0,-1 }, - { 0,0,0,-1,0,-1,-1,-1,0,-1,0,-1 }, - { 0,0,0,-1,0,-1,-1,-1,0,-1,0,0,-1 }, - { 0,0,0,0,-1,0,-1,-1,-1,0,-1,0,0,-1 }, - { 0,0,0,0,-1,0,-1,-1,-1,0,-1,0,0,0,-1 }, - { 0,0,0,0,0,-1,0,-1,-1,-1,0,-1,0,0,0,-1 }, - { 0,0,0,0,0,-1,0,-1,-1,-1,-1,0,0,0,0,0,-1 }, - { 0,0,0,0,0,0,-1,0,-1,-1,-1,0,-1,0,0,0,0,-1 }, - { 0,0,0,0,0,0,-1,0,-1,-1,-1,-1,0,0,0,0,0,0,-1 }, - { 0,0,0,0,0,0,0,-1,0,-1,-1,-1,0,-1,0,0,0,0,0,-1 }, - { 0,0,0,0,0,0,0,-1,0,-1,-1,-1,-1,0,0,0,0,0,0,0,-1 }, - { 0,0,0,0,0,0,0,0,-1,0,-1,-1,-1,0,-1,0,0,0,0,0,0,-1 } }; - x = *xp; - y = *yp; - if ((dloop < 2) || (dloop > 21)) - { x--; - y-= 6; - seq += dloop; - goto FN; } - i = 0; - while (i < dloop) - { x += ux[orient]*loopu[dloop][i] + vx[orient]*loopv[dloop][i]; - y += uy[orient]*loopu[dloop][i] + vy[orient]*loopv[dloop][i]; - matrix[x][y] = cbase(*seq++); - i++; } - x += ux[orient]*loopu[dloop][i] + vx[orient]*loopv[dloop][i]; - y += uy[orient]*loopu[dloop][i] + vy[orient]*loopv[dloop][i]; - FN: - *xp = x; - *yp = y; - return(seq); } - - -int *make_var(int *seq, char matrix[][MATY], - int *x, int *y, int orient, int var, int varbp) -{ int i,b,e,p,px,py,pxf,pyf,l,stem; - static int ux[4] = { 1,0,-1,0 }; - static int uy[4] = { 0,1,0,-1 }; - static int vx[4] = { 0,-1,0,1 }; - static int vy[4] = { 1,0,-1,0 }; - static int preu[4][5][4] = - { { {0},{1},{1},{1},{1} }, - { {0},{1,1},{1,1},{1,1},{1,1} }, - { {0},{0,1,1},{0,1,1},{1,1,1},{1,1,1} }, - { {0},{0,0,1,1},{0,0,1,1},{0,1,1,1},{0,1,1,1} } }; - static int prev[4][5][4] = - { { {0},{0},{0},{0},{-1} }, - { {0},{0,1},{0,0},{-1,0},{-1,0} }, - { {0},{0,1,0},{0,0,0},{-1,0,0},{-1,0,-1} }, - { {0},{0,1,0,0},{0,1,0,-1},{0,0,0,-1},{0,0,-1,-1} } }; - static int postu[4][5][4] = - { { {0},{0},{1,-1},{0,-1,0},{1,-1,-1,0} }, - { {0},{0},{0,-1},{0,-1,0},{0,-1,-1,0} }, - { {0},{0},{0,-1},{0,-1,-1},{1,-1,-1,-1} }, - { {0},{0},{0,-1},{0,-1,-1},{1,-1,-1,-1} } }; - static int postv[4][5][4] = - { { {0},{0},{0,1},{0,1,1},{0,1,1,1} }, - { {0},{0},{0,1},{0,1,1},{0,1,1,1} }, - { {0},{0},{0,1},{0,1,1},{0,1,1,1} }, - { {0},{0},{0,1},{0,1,1},{0,1,1,1} } }; - static int loopu[10][10] = - { { 2 }, { 1,1 }, { 1,1,0 }, { 1,0,1,0 }, { 1,1,0,0,0 }, - { 1,1,1,-1,-1,1 }, { 1,1,1,0,0,-1,0 }, { 1,1,1,1,0,-1,-1,0 }, - { 1,1,1,1,0,0,-1,-1,0 }, { 1,1,1,1,1,0,-1,-1,-1,0 } }; - static int loopv[10][10] = - { { 3 }, { 1,2 }, { 0,1,2 }, { 0,1,1,1 }, { -1,1,1,1,1 }, - { -1,0,1,1,1,1 }, { -1,-1,1,1,1,1,1 }, { -1,-1,0,1,1,1,1,1 }, - { -1,-1,-1,1,1,1,1,1,1 }, { -1,-1,-1,0,1,1,1,1,1,1 } }; - px = *x; - py = *y; - if (var < 0) var = 0; - if (var > 30) var = 30; - if (varbp > 0) - { b = (varbp >> 10) & 0x1f; - if (b > 3) goto NBP; - stem = varbp & 0x1f; - e = stem + ((varbp >> 5) & 0x1f); - p = var - e; - if (p < 1) goto NBP; /* 2 */ - if (p > 4) goto NBP; - pxf = px + 2*ux[orient] + 3*vx[orient]; - pyf = py + 2*uy[orient] + 3*vy[orient]; - i = 0; - while (i < b) - { px += ux[orient]*preu[b][p][i] + vx[orient]*prev[b][p][i]; - py += uy[orient]*preu[b][p][i] + vy[orient]*prev[b][p][i]; - matrix[px][py] = cbase(*seq++); - i++; } - px += ux[orient]*preu[b][p][b] + vx[orient]*prev[b][p][b]; - py += uy[orient]*preu[b][p][b] + vy[orient]*prev[b][p][b]; - seq = make_clover(seq,0,e-b,stem,matrix,&px,&py,orient+SLANT); - i = 0; - while (i < p) - { px += ux[orient]*postu[b][p][i] + vx[orient]*postv[b][p][i]; - py += uy[orient]*postu[b][p][i] + vy[orient]*postv[b][p][i]; - matrix[px][py] = cbase(*seq++); - i++; } - *x = pxf; - *y = pyf; - goto FIN; } - NBP: - if (var > 9) - { if (var % 2) stem = (var - 7)/2; - else stem = (var - 6)/2; } - else stem = 0; - l = var - 2*stem; - i = 0; - while (i < stem) - { px += ux[orient] - vx[orient]; - py += uy[orient] - vy[orient]; - matrix[px][py] = cbase(*seq++); - i++; } - i = 0; - while (i < l) - { px += ux[orient]*loopu[l][i] + vx[orient]*loopv[l][i]; - py += uy[orient]*loopu[l][i] + vy[orient]*loopv[l][i]; - matrix[px][py] = cbase(*seq++); - i++; } - px += ux[orient]*loopu[l][i] + vx[orient]*loopv[l][i]; - py += uy[orient]*loopu[l][i] + vy[orient]*loopv[l][i]; - i = 0; - while (i < stem) - { matrix[px][py] = cbase(*seq++); - px -= (ux[orient] - vx[orient]); - py -= (uy[orient] - vy[orient]); - i++; } - *x = px; - *y = py; - FIN: - return(seq); } - - - - - -void remove_inserts(int *s1, int *s2) -{ int flag,c; - flag = 0; - while ((c = *s1++) != TERM) - { if (c == INSERT) - { flag = 1 - flag; - continue; } - if (flag) continue; - *s2++ = c; } - *s2 = TERM; } - - - -void build_trna(gene *t, char matrix[][MATY], int x, int y, csw *sw) -{ int i,j,e,c,*seq; - int rseq[150]; - static char bond2[5] = " +-."; - t->varbp = find_var_hairpin(t); - remove_inserts(t->seq,rseq); - seq = rseq; - i = 0; - while (i < t->astem1) - { matrix[x][y] = cbase(*seq++); - y--; - i++; } - if (t->spacer1 > 0) - { x--; - if (t->spacer1 >= 3) matrix[x][y+1] = cbase(*seq++); - matrix[x][y] = cbase(*seq++); - y--; - x--; - if (t->spacer1 >= 2) matrix[x][y] = cbase(*seq++); - if ((t->spacer2 < 2) || (t->spacer1 > 1)) - { x--; - y--; }} - if (t->dstem > 0) - { e = 2*t->dstem + t->dloop; - seq = make_clover(seq,0,e,t->dstem,matrix,&x,&y,LEFT); - if (t->spacer2 > 1) x--; - y--; - if (t->spacer2 > 0) matrix[x][y] = cbase(*seq++); - y--; - if (t->spacer2 > 1) - { if (t->spacer1 > 1) x++; - matrix[x][y] = cbase(*seq++); - if (t->spacer1 < 2) y--; } - x++; } - else - seq = make_dv(seq,matrix,t->dloop,RIGHT,&x,&y); - e = 2*t->cstem + t->cloop; - seq = make_clover(seq,0,e,t->cstem,matrix,&x,&y,DOWN); - if (t->tstem > 0) - { seq = make_var(seq,matrix,&x,&y,RIGHT,t->var,t->varbp); - e = 2*t->tstem + t->tloop; - seq = make_clover(seq,0,e,t->tstem,matrix,&x,&y,RIGHT); - y++; } - else - seq = make_tv(seq,matrix,&x,&y,RIGHT,t->tloop); - e = t->astem2; - i = 0; - while (i < e) - { if ((c = *seq++) < Adenine) break; - matrix[x][y] = cbase(c); - j = base_match(matrix[x][y],matrix[x - 2][y]); - matrix[x - 1][y] = bond2[j]; - y++; - i++; } - i = 0; - e = (sw->aataildisp)?ASTEM2_EXTD:t->aatail; - j = (e < 2)?e:2; - while (i < j) - { if ((c = *seq++) < Adenine) break; - matrix[x][y] = cbase(c); - x++; - y++; - i++; } - e -= j; - i = 0; - while (i < e) - { if ((c = *seq++) < Adenine) break; - matrix[x][y] = cbase(c); - x++; - i++; }} - - - - - -void build_tmrna(gene *t, char matrix[][MATY], int x, int y, csw *sw) -{ int i,j,e,c,tarm,*seq; - int rseq[2*MAXTMRNALEN+1]; - static char bond2[5] = " +-."; - remove_inserts(t->eseq,rseq); - seq = rseq + t->asst; - i = 0; - while (i < t->astem1) - { matrix[x][y] = cbase(*seq++); - y--; - i++; } - seq = make_dv(seq,matrix,t->dloop,RIGHT,&x,&y); - tarm = 2*t->tstem + t->tloop; - e = (t->asst > 0)? - (t->cstem - t->dloop - t->astem1 - t->asst + 54): - (2*t->cstem + t->cloop + t->nintron); - seq = make_clover(seq,0,e,t->cstem,matrix,&x,&y,DOWN); - seq = make_var(seq,matrix,&x,&y,RIGHT,t->var,t->varbp); - seq = make_clover(seq,0,tarm,t->tstem,matrix,&x,&y,RIGHT); - y++; - e = t->astem2; - i = 0; - while (i < e) - { if ((c = *seq++) == TERM) break; - matrix[x][y] = cbase(c); - j = base_match(matrix[x][y],matrix[x - 2][y]); - matrix[x - 1][y] = bond2[j]; - y++; - i++; } - e = (sw->aataildisp)?ASTEM2_EXTD:t->aatail; - j = (e < 2)?e:2; - i = 0; - while (i < j) - { if ((c = *seq++) == TERM) break; - matrix[x][y] = cbase(c); - x++; - y++; - i++; } - e -= j; - i = 0; - while (i < e) - { if ((c = *seq++) == TERM) break; - matrix[x][y] = cbase(c); - x++; - i++; } } - - -void init_matrix(char matrix[][MATY]) -{ int i,j; - for (i =0; i < MATY; i++) - for (j = 0; j < MATX; j++) matrix[j][i] = ' '; } - - -void disp_matrix(FILE *f, char matrix[][MATY], int ylines) -{ int i,j,k; - i = ylines; - while (--i >= 0) - { k = MATX; - while (--k > 0) if (matrix[k][i] != ' ') break; - for (j = 0; j <= k; j++) fputc(matrix[j][i],f); - fputc('\n',f); } - fputc('\n',f); } - - -void xcopy(char m[][MATY], int x, int y, char *s, int l) -{ int i; - char c; - i = 0; - while (i < l) - { if (x >= MATX) break; - if (!(c = *s++)) break; - m[x++][y] = c; - i++; }} - - - - -int identify_tag(char tag[], int len, char (*thit)[50], int nt) -{ int i,n; - char *s,*st,*sb,*sd; - static struct { char name[50]; char tag[50]; } tagdatabase[NTAG] = - { { "Cyanidioschyzon merolae Chloroplast","ANQILPFSIPVKHLAV" }, - { "Mesostigma viride chloroplast","ANNILPFNRKTAVAV" }, - { "Nephroselmis olivacea chloroplast","TTYHSCLEGHLS" }, - { "Pirellula sp.","AEENFALAA" }, - { "Rhodopirellula baltica","AEENFALAA" }, - { "Desulfotalea psychrophila","ADDYNYAVAA" }, - { "Desulfuromonas acetoxidans","ADTDVSYALAA" }, - { "Exiguobacterium sp.","GKTNTQLAAA" }, - { "Mycoplasma gallisepticum","DKTSKELADENFVLNQLASNNYALNF" }, - { "Aquifex aeolicus","APEAELALAA" }, - { "Thermotoga maritima ","ANEPVAVAA" }, - { "Thermotoga neapolitana","ANEPVAVAA" }, - { "Chloroflexus aurantiacus","ANTNTRAQARLALAA" }, - { "Thermus thermophilus","ANTNYALAA" }, - { "Deinococcus radiodurans","GNQNYALAA" }, - { "Deinococcus geothermalis","GNQNYALAA" }, - { "Cytophaga hutchinsonii","GEESYAMAA" }, - { "Bacteroides fragilis","GETNYALAA" }, - { "Tannerella forsythensis","GENNYALAA" }, - { "Porphyromonas gingivalis","GENNYALAA" }, - { "Prevotella intermedia","GENNYALAA" }, - { "Chlorobium tepidum","ADDYSYAMAA" }, - { "Chlorobium chlorochromatii","ADDYSYAMAA" }, - { "Salinibacter ruber","ADDYSYAMAA" }, - { "Gemmata obscuriglobus","AEPQYSLAA" }, - { "Chlammydophila pneumoniae","AEPKAECEIISLFDSVEERLAA" }, - { "Chlammydophila caviae","AEPKAECEIISFSDLTEERLAA" }, - { "Chlammydophila abortus","AEPKAKCEIISFSELSEQRLAA" }, - { "Chlammydia trachomatis","AEPKAECEIISFADLEDLRVAA" }, - { "Chlammydia muridarum","AEPKAECEIISFADLNDLRVAA" }, - { "Nostoc PCC7120","ANNIVKFARKDALVAA" }, - { "Nostoc punctiforme","ANNIVNFARKDALVAA" }, - { "Fremyella diplosiphon","ANNIVKFARKEALVAA" }, - { "Plectonema boryanum","ANNIVPFARKTAPVAA" }, - { "Trichodesmium erythraeum","ANNIVPFARKQVAALA" }, - { "Oscillatoria 6304","ANNIVPFARKAAPVAA" }, - { "Chroococcidiopsis PCC6712","ANNIVKFERQAVFA" }, - { "Synechocystis PCC6803","ANNIVSFKRVAIAA" }, - { "Thermosynechococcus elongatus","ANNIVPFARKAAAVA" }, - { "Synechococcus PCC6301","ANNIVPFARKAAPVAA" }, - { "Synechococcus elongatus","ANNIVPFARKAAPVAA" }, - { "Synechococcus WH8102","ANNIVRFSRHAAPVAA" }, - { "Synechococcus PCC6307","ANNIVRFSRQAAPVAA" }, - { "Synechococcus PCC7002","ANNIVPFARKAAAVA" }, - { "Synechococcus PCC7009","ANNIVRFSRQAAPVAA" }, - { "Synechococcus PCC6904","ANNIVRFSRQAAPVAA" }, - { "Synechococcus CC9311","ANNIVRFSRQAAPVAA" }, - { "Synechococcus CC9902","ANNIVRFSRQAAPVAA" }, - { "Synechococcus CC9605","ANNIVRFSRQAAPVAA" }, - { "Prochlorococcus marinus 1","ANKIVSFSRQTAPVAA" }, - { "Prochlorococcus marinus 2","ANNIVRFSRQPALVAA" }, - { "Prochlorococcus marinus 3","ANKIVSFSRQTAPVAA" }, - { "Cyanophora paradoxa chloroplast","ATNIVRFNRKAAFAV" }, - { "Thalassiosira weissflogii chloroplast","ANNIIPFIFKAVKTKKEAMALNFAV" }, - { "Odontella sinensis chloroplast","ANNLISSVFKSLSTKQNSLNLSFAV" }, - { "Bolidomonas pacifica chloroplast","ANNILAFNRKSLSFA" }, - { "Pavlova lutheri chloroplast","ANNILSFNRVAVA" }, - { "Porphyra purpurea chloroplast","AENNIIAFSRKLAVA" }, - { "Guillardia theta chloroplast","ASNIVSFSSKRLVSFA" }, - { "Fibrobacter succinogenes","ADENYALAA" }, - { "Treponema pallidum","ANSDSFDYALAA" }, - { "Treponema denticola","AENNDSFDYALAA" }, - { "Leptospira interrogans","ANNELALAA" }, - { "Borrelia burgdorferi","AKNNNFTSSNLVMAA" }, - { "Borrelia garinii","AKNNNFTSSNLVMAA" }, - { "Caulobacter crescentus","ANDNFAEEFAVAA" }, - { "Rhodobacter sphaeroides","ANDNRAPVALAA" }, - { "Silicibacter pomeroyi","ANDNRAPVALAA" }, - { "Silicibacter TM1040","ANDNRAPVALAA" }, - { "Paracoccus denitrificans","ANDNRAPVALAA" }, - { "Nitrobacter hamburgensis","ANDNYAPVAQAA" }, - { "Nitrobacter winogradskyi","ANDNYAPVAQAA" }, - { "Nitrobacter Nb-311A","ANDNYAPVAQAA" }, - { "Rhodopseudomonas palustris","ANDNYAPVAQAA" }, - { "Rhodopseudomonas palustris 4","ANDNVRMNEVRLAA" }, - { "Bradyrhizobium japonicum","ANDNFAPVAQAA" }, - { "Agrobacterium tumefaciens 1","ANDNNAKEYALAA" }, - { "Agrobacterium tumefaciens 2","ANDNNAKECALAA" }, - { "Rhizobium leguminosarum","ANDNYAEARLAA" }, - { "Sinorhizobium meliloti","ANDNYAEARLAA" }, - { "Mesorhizobium loti","ANDNYAEARLAA" }, - { "Mesorhizobium sp.","ANDNYAEARLAA" }, - { "Bartonella henselae","ANDNYAEARLAA" }, - { "Bartonella quintana","ANDNYAEARLAA" }, - { "Brucella melitensis","ANDNNAQGYALAA" }, - { "Brucella abortus","ANDNNAQGYALAA" }, - { "Brucella suis","ANDNNAQGYALAA" }, - { "Methylobacterium extorquens","ANDNFAPVAVAA" }, - { "Magnetospirillum magnetotacticum 1","ANDNFAPVAVAA" }, - { "Magnetospirillum magnetotacticum 2","ANDNVELAAAA" }, - { "Rhodospirillum rubrum","ANDNVELAAAA" }, - { "Novosphingobium aromaticivorans","ANDNEALALAA" }, - { "Sphingopyxis alaskensis","ANDNEALALAA" }, - { "Erythrobacter litoralis","ANDNEALALAA" }, - { "Ehrlichia chaffeensis","ANDNFVFANDNNSSANLVAA" }, - { "Anaplasma phagocytophilum","ANDDFVAANDNVETAFVAAA" }, - { "Wolbachi.sp","ANDNFAAEDNVDAIAA" }, - { "Rickettsia conorii","ANDNNRSVGHLALAA" }, - { "Rickettsia sibirica","ANDNNRSVGHLALAA" }, - { "Rickettsia typhi","ANDNKRYVGVAALAAA" }, - { "Rickettsia prowazekii","ANDNRYVGVPALAAA" }, - { "Neisseria gonorrhoeae","ANDETYALAA" }, - { "Neisseria meningitidis","ANDETYALAA" }, - { "Neisseria lactamica","ANDETYALAA" }, - { "Chromobacterium violaceum","ANDETYALAA" }, - { "Uncultured U02","ANDEQFALAA" }, - { "Nitrosomonas europaea","ANDENYALAA" }, - { "Nitrosomonas cryotolerans","ANDENYALAA" }, - { "Methylobacillus glycogenes","ANDETYALAA" }, - { "Methylobacillus flagellatus","ANDETYALAA" }, - { "Moraxella catarrhalis","ANDETYALAA" }, - { "Uncultured U04","ANDETYALAA" }, - { "Ralstonia pickettii","ANDERYALAA" }, - { "Ralstonia solanacearum","ANDNRYQLAA" }, - { "Ralstonia eutropha","ANDERYALAA" }, - { "Ralstonia metallidurans","ANDERYALAA" }, - { "Alcaligenes faecalis","ANDERFALAA" }, - { "Comamonas testosteroni","ANDERFALAA" }, - { "Variovorax paradoxus","ANDERFALAA" }, - { "Hydrogenophaga palleronii","ANDERFALAA" }, - { "Burkholderia pseudomallei","ANDDTFALAA" }, - { "Burkholderia mallei","ANDDTFALAA" }, - { "Burkholderia fungorum","ANDDTFALAA" }, - { "Burkholderia cepacia","ANDDTFALAA" }, - { "Burkholderia cenocepacia","ANDDTFALAA" }, - { "Burkholderia thailandensis","ANDDTFALAA" }, - { "Burkholderia vietnamiensis","ANDDTFALAA" }, - { "Burkholderia sp. 383","ANDDTFALAA" }, - { "Bordetella avium","ANDERFALAA" }, - { "Bordetella pertussis","ANDERFALAA" }, - { "Bordetella parapertussis","ANDERFALAA" }, - { "Bordetella bronchiseptica","ANDERFALAA" }, - { "Polaromonas JS666","ANDERFALAA" }, - { "Rubrivivax gelatinosus","ANDERFALAA" }, - { "Uncultured stronglyoides1","ANDERFALAA" }, - { "Azoarcus BH72","ANDERFALAA" }, - { "Xylella fastidiosa 1","ANEDNFAVAA" }, - { "Xylella fastidiosa 2","ANEDNFALAA" }, - { "Xylella fastidiosa 3","ANEDNFAIAA" }, - { "Xylella fastidiosa 4","ANEDNFALAA" }, - { "Xanthomonas campestris 1","ANDDNYGSDFAIAA" }, - { "Xanthomonas campestris 2","ANDDNYGSDSAIAA" }, - { "Xanthomonas axonopodis","ANDDNYGSDFAIAA" }, - { "Xanthomonas oryzae","ANDDNYGSDFAIAA" }, - { "Legionella pneumophila","ANDENFAGGEAIAA" }, - { "Coxiella burnetii","ANDSNYLQEAYA" }, - { "Methylococcus capsulatus","ANDDVYALAA" }, - { "Uncultured U01a","ANDSNYALAA" }, - { "Dichelobacter nodosus","ANDDNYALAA" }, - { "Francisella tularensis 1","GNKKANRVAANDSNFAAVAKAA" }, - { "Francisella tularensis 2","ANDSNFAAVAKAA" }, - { "Acidithiobacillus ferrooxidans","ANDSNYALAA" }, - { "Acinetobacter ADP1","ANDETYALAA" }, - { "Psychrobacter 2734","ANDENYALAA" }, - { "Psychrobacter cryohalolentis","ANDENYALAA" }, - { "Psychrobacter arcticus","ANDENYALAA" }, - { "Azotobacter vinelandii","ANDDNYALAA" }, - { "Pseudomonas aeruginosa","ANDDNYALAA" }, - { "Pseudomonas syringae 1","ANDENYGAQLAA" }, - { "Pseudomonas syringae 2","ANDETYGEYALAA" }, - { "Pseudomonas syringae 3","ANDENYGAQLAA" }, - { "Pseudomonas fluorescens 1","ANDDQYGAALAA" }, - { "Pseudomonas fluorescens 2","ANDENYGQEFALAA" }, - { "Pseudomonas putida 1","ANDENYGAEYKLAA" }, - { "Marinobacter hydrocarbonoclasticus","ANDENYALAA" }, - { "Marinobacter aquaeolei","ANDENYALAA" }, - { "Pseudoalteromonas haloplanktis","ANDDNYSLAA" }, - { "Pseudoalteromonas atlantica","ANDENYALAA" }, - { "Uncultured WW11","ANDDNYALAA" }, - { "Shewanella oneidensis","ANDDNYALAA" }, - { "Shewanella putrefaciens","ANDDNYALAA" }, - { "Shewanella PV-4","ANDDNYALAA" }, - { "Shewanella amazonensis","ANDDNYALAA" }, - { "Shewanella SAR-1","ANDDNYALAA" }, - { "Shewanella ANA-3","ANDDNYALAA" }, - { "Idiomarina loihiensis","ANDDNYALAA" }, - { "Photorhabdus asymbiotica","ANDNEYALVA" }, - { "Microbulbifer degradans","ANDDNYGAQLAA" }, - { "Saccharophagus degradans","ANDDNYGAQLAA" }, - { "Colwellia sp","ANDDTFALAA" }, - { "Colwellia psychrerythraea","ANDDTFALAA" }, - { "Photobacterium phosphoreum","ANDENYALAA" }, - { "Vibrio cholerae","ANDENYALAA" }, - { "Vibrio vulnificus","ANDENYALAA" }, - { "Vibrio Ex25","ANDENYALAA" }, - { "Vibrio parahemolyticus","ANDENYALAA" }, - { "Aeromonas salmonicida","ANDENYALAA" }, - { "Aeromonas hydrophila 1","ANDENYALAA" }, - { "Aeromonas hydrophila 2","ANDENYALAA" }, - { "Uncultured VLW3","ANDENYALAA" }, - { "Uncultured VLS13","ANDENYALAA" }, - { "Uncultured WW9","ANDENYALAA" }, - { "Uncultured WW10","ANDENYALAV" }, - { "Uncultured VLW5","ANDENYALAA" }, - { "Uncultured RCA4","ANDETYALAA" }, - { "Uncultured LEM1","ANDETYALAA" }, - { "Uncultured LEM2","ANDETHALAA" }, - { "Wigglesworthia brevipalpis","AKHKYNEPALLAA" }, - { "Wigglesworthia glossinidia","AKHKYNEPALLAA" }, - { "Buchnera aphidicola 1","ANNKQNYALAA" }, - { "Buchnera aphidicola 2","ANNKQNYALAA" }, - { "Buchnera aphidicola 3","AKQNQYALAA" }, - { "Shigella dysenteriae 1","ANDENYALAA" }, - { "Shigella dysenteriae 2","ANDENYALAA" }, - { "Shigella flexneri","ANDENYALAA" }, - { "Shigella boydii","ANDENYALAA" }, - { "Shigella sonnei","ANDENYALAA" }, - { "Escherichia coli","ANDENYALAA" }, - { "Providencia rettgeri","ANDENYALAA" }, - { "Serratia marcescens","ANDENYALAA" }, - { "Klebsiella pneumoniae","ANDENYALAA" }, - { "Pectobacterium carotovora","ANDENYALAA" }, - { "Erwinia chrysanthemi","ANDENFAPAALAA" }, - { "Erwinia amylovora","ANDENFAPAALAA" }, - { "Erwinia carotovora","ANDENYALAA" }, - { "Salmonella bongori","ANDENYALAA" }, - { "Salmonella typhimurium","ANDETYALAA" }, - { "Salmonella typhi","ANDETYALAA" }, - { "Salmonella paratyphi","ANDENYALAA" }, - { "Salmonella enterica 1","ANDETYALAA" }, - { "Salmonella enterica 2","ANDENYALAA" }, - { "Salmonella enterica 3","ANDETYALAA" }, - { "Salmonella enterica 5","ANDETYALAA" }, - { "Salmonella enterica 6","ANDENYALAA" }, - { "Uncultured RCA1","ANDENYALAA" }, - { "Uncultured VLS1","ANDENYALAA" }, - { "Uncultured WW1","ANDENYALAA" }, - { "Uncultured RCA2","SNDENYALAA" }, - { "Uncultured WW2","ANDENYALAA" }, - { "Uncultured QL1","ANVENYALAA" }, - { "Uncultured WW4","ANDGNYALAA" }, - { "Uncultured VLS5","ANDETYALAA" }, - { "Uncultured FS1","ANDETYALAA" }, - { "Uncultured VLS6","ANDENYALAA" }, - { "Uncultured FS2","ANDENYALAA" }, - { "Uncultured WW5","ANDENYALAA" }, - { "Uncultured VLW1","ANDENYALAA" }, - { "Uncultured VLS7","ANDENYALAA" }, - { "Uncultured VLS9","ANDENYALAA" }, - { "Uncultured VLW2","ANDENYALAA" }, - { "Uncultured WW7","ANDENCALAA" }, - { "Uncultured WW8","ANDENYALAA" }, - { "Yersinia enterocolitica","ANDSQYESAALAA" }, - { "Yersinia intermedia","ANDSQYESAALAA" }, - { "Yersinia mollaretii","ANDSQYESAALAA" }, - { "Yersinia bercovieri","ANDSQYESAALAA" }, - { "Yersinia pestis","ANDENYALAA" }, - { "Yersinia frederiksenii","ANDENYALAA" }, - { "Yersinia pseudotuberculosis","ANDENYALAA" }, - { "Mannheimia haemolytica","ANDEQYALAA" }, - { "Mannheimia succiniciproducens","ANDEQYALAA" }, - { "Haemophilus ducreyi","ANDEQYALAA" }, - { "Haemophilus influenzae","ANDEQYALAA" }, - { "Haemophilus somnus","ANDEQYALAA" }, - { "Pasteurella multocida","ANDEQYALAA" }, - { "Actinobacillus actinomycetemcomitans","ANDEQYALAA" }, - { "Actinobacillus pleuropneumoniae","ANDEQYALAA" }, - { "Lawsonia intracellularis","ANNNYDYALAA" }, - { "Desulfovibrio desulfuricans","ANNDYDYAYAA" }, - { "Desulfovibrio vulgaris","ANNYDYALAA" }, - { "Desulfovibrio yellowstonii","ANNELALAA" }, - { "Geobacter sulfurreducens","ADNYDYAVAA" }, - { "Geobacter metallireducens","ADNYDYAVAA" }, - { "Helicobacter pylori 1","VNNTDYAPAYAKAA" }, - { "Helicobacter pylori 2","VNNTDYAPAYAKAA" }, - { "Helicobacter pylori 3","VNNADYAPAYAKAA" }, - { "Campylobacter jejuni","ANNVKFAPAYAKAA" }, - { "Campylobacter lari","ANNVKFAPAYAKAA" }, - { "Campylobacter fetus 2","ANNVKFAPAYAKAA" }, - { "Campylobacter coli","ANNVKFAPAYAKAA" }, - { "Fusobacterium nucleatum 1","GNKDYALAA" }, - { "Fusobacterium nucleatum 2","GNKEYALAA" }, - { "Dehalococcoides ethenogenes","GERELVLAG" }, - { "Mycobacterium leprae","ADSYQRDYALAA" }, - { "Mycobacterium avium","ADSHQRDYALAA" }, - { "Mycobacterium bovis","ADSHQRDYALAA" }, - { "Mycobacterium tuberculosis","ADSHQRDYALAA" }, - { "Mycobacterium marinum","ADSHQRDYALAA" }, - { "Mycobacterium microti","ADSHQRDYALAA" }, - { "Mycobacterium africanum","ADSHQRDYALAA" }, - { "Mycobacterium smegmatis","ADSNQRDYALAA" }, - { "Corynebacterium diphtheriae","AENTQRDYALAA" }, - { "Corynebacterium glutamicum","AEKSQRDYALAA" }, - { "Thermobifida fusca","ANSKRTEFALAA" }, - { "Streptomyces coelicolor","ANTKRDSSQQAFALAA" }, - { "Streptomyces lividans","ANTKRDSSQQAFALAA" }, - { "Tropheryma whipplei","ANLKRTDLSLAA" }, - { "Clavibacter michiganensis","ANNKQSSFVLAA" }, - { "Bifidobacterium longum","AKSNRTEFALAA" }, - { "Bifidobacterium longum","AKSNRTEFALAA" }, - { "Bacillus anthracis","GKQNNLSLAA" }, - { "Bacillus thuringiensis","GKQNNLSLAA" }, - { "Bacillus cereus","GKQNNLSLAA" }, - { "Bacillus megaterium","GKSNNNFALAA" }, - { "Bacillus halodurans","GKENNNFALAA" }, - { "Bacillus clausii","GKENNNFALAA" }, - { "Bacillus subtilis","GKTNSFNQNVALAA" }, - { "Bacillus stearothermophilus","GKQNYALAA" }, - { "Geobacillus kaustophilus","GKQNYALAA" }, - { "Staphylococcus aureus","GKSNNNFAVAA" }, - { "Staphylococcus saprophyticus","GKENNNFAVAA" }, - { "Staphylococcus xylosus","GKENNNFAVAA" }, - { "Staphylococcus epidermidis","DKSNNNFAVAA" }, - { "Oceanobacillus iheyensis","GKETNQPVLAAA" }, - { "Listeria monocytogenes","GKEKQNLAFAA" }, - { "Listeria innocua","GKEKQNLAFAA" }, - { "Listeria welshimeri","GKEKQNLAFAA" }, - { "Listeria seeligeri","GKEKQNLAFAA" }, - { "Listeria grayi 1","GKEKQNLAFAA" }, - { "Listeria grayi 2","GKQNNNLAFAA" }, - { "Listeria ivanovii","GKEKQNLAFAA" }, - { "Lactobacillus gasseri","ANNENSYAVAA" }, - { "Lactobacillus johnsonii","ANNENSYAVAA" }, - { "Lactobacillus sakei","ANNNNSYAVAA" }, - { "Lactobacillus helveticus","ANNKNSYALAA" }, - { "Lactobacillus gallinarum","ANNKNSYALAA" }, - { "Lactobacillus acidophilus","ANNKNSYALAA" }, - { "Lactobacillus plantarum","AKNNNNSYALAA" }, - { "Pediococcus pentosaceus","AKNNNNSYALAA" }, - { "Leuconostoc mesenteroides","AKNENSFAIAA" }, - { "Leuconostoc lactis","AKNENSFAIAA" }, - { "Leuconostoc pseudomesenteroides","AKNENSYAIAA" }, - { "Enterococcus durans","AKNENNSYALAA" }, - { "Oenococcus oeni","AKNNEPSYALAA" }, - { "Enterococcus faecium","AKNENNSYALAA" }, - { "Enterococcus faecalis","AKNENNSFALAA" }, - { "Streptococcus equi","AKNNTTYALAA" }, - { "Streptococcus zooepidemicus","AKNNTTYALAA" }, - { "Streptococcus suis","AKNTNTYALAA" }, - { "Streptococcus uberis","AKNTNSYALAA" }, - { "Streptococcus pyogenes","AKNTNSYALAA" }, - { "Streptococcus agalactiae","AKNTNSYALAA" }, - { "Streptococcus mutans","AKNTNSYAVAA" }, - { "Streptococcus sobrinus","AKNTNSYAVAA" }, - { "Streptococcus gordonii","AKNNTSYALAA" }, - { "Streptococcus pneumoniae","AKNNTSYALAA" }, - { "Streptococcus mitis","AKNNTSYALAA" }, - { "Streptococcus thermophilus","AKNTNSYAVAA" }, - { "Lactococcus raffinolactis","AKNTQTYAVAA" }, - { "Lactococcus plantarum","AKNTQTYALAA" }, - { "Lactococcus garvieae","AKNNTSYALAA" }, - { "Lactococcus lactis","AKNNTQTYAMAA" }, - { "Mycoplasma capricolum","ANKNEETFEMPAFMMNNASAGANFMFA" }, - { "Mesoplasma florum","ANKNEENTNEVPTFMLNAGQANYAFA" }, - { "Spiroplasma kunkelii","ASKKQKEDKIEMPAFMMNNQLAVSMLAA" }, - { "Ureaplasma urealyticum","AENKKSSEVELNPAFMASATNANYAFAY" }, - { "Ureaplasma parvum","AENKKSSEVELNPAFMASATNANYAFAY" }, - { "Mycoplasma pulmonis","GTKKQENDYQDLMISQNLNQNLAFASV" }, - { "Mycoplasma penetrans","AKNNKNEAVEVELNDFEINALSQNANLALYA" }, - { "Mycoplasma genitalium 1","DKENNEVLVEPNLIINQQASVNFAFA" }, - { "Mycoplasma genitalium 2","DKENNEVLVDPNLIINQQASVNFAFA" }, - { "Mycoplasma pneumoniae","DKNNDEVLVDPMLIANQQASINYAFA" }, - { "Thermoanaerobacter tengcongensis","ADRELAYAA" }, - { "Heliobacillus mobilis","AEDNYALAA" }, - { "Desulfitobacterium hafniense","ANDDNYALAA" }, - { "Nitrosococcus oceani","ANDDNYALAA" }, - { "Thiomicrospira crunogena","ANDDNYALAA" }, - { "Stenotrophomonas maltophilia","ANDDNYALAA" }, - { "Carboxydothermus hydrogenoformans","ANENYALAA" }, - { "Ruminococcus albus","GHGYFAKAS" }, - { "Clostridium acetobutylicum","DNENNLALAA" }, - { "Clostridium perfringens","AEDNFALAA" }, - { "Clostridium thermocellum","ANEDNYALAAA" }, - { "Clostridium botulinum","ANDNFALAA" }, - { "Clostridium tetani","ADDNFVLAA" }, - { "Clostridium difficile","ADDNFAIAA" }, - { "Hyphomonas neptunium","ANDNFAEGELLAA" }, - { "Vibrio fischeri","ANDENYALAA" }, - { "Corynebacterium efficiens","AEKTQRDYALAA" }, - { "Streptomyces avermitilus","ANTKSDSQSFALAA" }, - { "Brevibacterium linens","AKSNNRTDFALAA" }, - { "Lactobacillus delbrueckii 1","AKNENNSYALAA" }, - { "Lactobacillus delbrueckii 2","ANENSYAVAA" }, - { "Lactobacillus casei","AKNENSYALAA" }, - { "Lactobacillus brevis","AKNNNNSYALAA" }, - { "Streptomyces thermophilus","AKNTNSYAVAA" }, - { "Bacillusphage G","AKLNITNNELQVA" }, - { "Thermodesulfobacterium commune","ANEYAYALAA" }, - { "Thermomicrobium roseum","GERELALAA" }, - { "Leptospirillum groupII","ANEELALAA" }, - { "Leptospirillum groupIII","ANEELALAA" }, - { "Gloeobacter violaceus","ATNNVVPFARARATVAA" }, - { "Crocosphaera watsonii","ANNIVSFKRVAVAA" }, - { "Thalassiosira pseudonana chloroplast","ANNIMPFMFNVVKTNRSLTTLNFAV" }, - { "Emiliania huxleyi chloroplast","ANNILNFNSKLAIA" }, - { "Cyanidium caldarium chloroplast","ANNIIEISNIRKPALVV" }, - { "Gracilaria tenuistipitata chloroplast","AKNNILTLSRRLIYA" }, - { "Prevotella ruminicola","GNNEYALAA" }, - { "Jannaschia sp. CCS1","ANDNRAPAMALAA" }, - { "Agrobacterium vitis","ANDNNAQGYAVAA" }, - { "Alphaproteobacteria SAR-1","ANDELALAA" }, - { "Gluconobacter oxydans","ANDNSEVLAVAA" }, - { "Sphingomonas elodea","ANDNEALAIAA" }, - { "Ehrlichia ruminantium 1","ANDNFVSANDNNSTANLVAA" }, - { "Ehrlichia ruminantium 2","ANDNFVSANDNNSTANLVAA" }, - { "Ehrlichia canis","ANDNFVFANDNNSSVAGLVAA" }, - { "Anaplasma marginale","ANDDFVAANDNMETAFVAAA" }, - { "Wolbachia sp. 2 (Brugi)","ANDNFAAEGDVAVAA" }, - { "Wolbachia sp. 3 (Culex)","ANDNFAAEDNVALAA" }, - { "Wolbachia sp. 4 (Dros.)","ANDNFAAEEYRVAA" }, - { "Rickettsia rickettsii","ANDNNRSVGRLALAA" }, - { "Tremblaya princeps 1 (Dysmicoccus)", - "APSNRFTIVANDCIDALVRRAVV" }, - { "Azoarcus EbN1","ANDERFAVAA" }, - { "Dechloromonas aromatica","ANDEQFAIAA" }, - { "Dechloromonas agitata","ANDEQFAIAA" }, - { "Thiobacillus denitrificans","AKSKAARRNPACSAGVMELKA" }, - { "Shewanella SAR-2, version 2","ADYGYMAAA" }, - { "Shewanella SAR-1, version 2","ANNDNYALAA" }, - { "Uncultured marineEBAC20E09","ANNDNYALAA" }, - { "Pseudomonas fluorescens 3 (Pf-5)","ANDETYGDYALAA" }, - { "Uncultured remanei","ANDESYALAA" }, - { "Chromohalobacter salexigens","ANDDNYAQGALAA" }, - { "Gammaproteobacteria SAR-1","ANNYNYSLAA" }, - { "Shewanella denitrificans","ANDSNYSLAA" }, - { "Shewanella frigidimarina","ANDSNYSLAA" }, - { "Shewanella baltica","ANDSNYSLAA" }, - { "Photobacterium profundum","ANDENFALAA" }, - { "Blochmannia floridanus","AKNKYNEPVALAA" }, - { "Blochmannia pennsylvanicus","ANNTTYRESVALAA" }, - { "Photorhabdus luminescens","ANDEKYALAA" }, - { "Proteus mirabilis","ANDNQYKALAA" }, - { "Magnetococcus sp.","ANDEHYAPAFAAA" }, - { "Proteobacteria SAR-1, version 1","GENADYALAA" }, - { "Proteobacteria SAR-1, version 2","ANNYNYSLAA" }, - { "Proteobacteria SAR-1, version 3","ADNGYMAAA" }, - { "Desulfovibrio desulfuricans 2 (G20)","ANNDYEYAMAA" }, - { "Uncultured ciona","ANDEFFDARLRA" }, - { "Bacteriovorax marinus","AESNFAPAMAA" }, - { "Bdellovibrio bacteriovorus","GNDYALAA" }, - { "Myxococcus xanthus","ANDNVELALAA" }, - { "Wolinella succinogenes","ALSSHPKRGKRLGLPITSALGA" }, - { "Campylobacter upsaliensis","ANNAKFAPAYAKVA" }, - { "Helicobacter mustelae","ANNKNYAPAYAKVA" }, - { "Helicobacter hepaticus","ANNANYAPAYAKVA" }, - { "Ruminococcus albus","DNDNFAMAA" }, - { "Coprothermobacter proteolyticus","AEPEFALAA" }, - { "Moorella thermoacetica","ADDNLALAA" }, - { "Mycoplasma mycoides","ADKNEENFEMPAFMINNASAGANYMFA" }, - { "Mycoplasma mobile","GKEKQLEVSPLLMSSSQSNLVFA" }, - { "Mycoplasma arthritidis","GNLETSEDKKLDLQFVMNSQTQQNLLFA" }, - { "Paenibacillus larvae","GKQQNNYALAA" }, - { "Bacillus licheniformis","GKSNQNLALAA" }, - { "Actinomyces naeslundii","ADNTRTDFALAA" }, - { "Arthrobacter FB24","AKQTRTDFALAA" }, - { "Leifsonia xyli","ANSKSTVSAKADFALAA" }, - { "Nocardia farcinica","ADSHQREYALAA" }, - { "Propionibacterium acnes 1","AENTRTDFALAA" }, - { "Propionibacterium acnes 2","AENTRTDFALAA" }, - { "Streptomyces collinus","ANTKRDSSSFALAA" }, - { "Streptomyces aureofaciens","ANSKRDSQQFALAA" }, - { "Kineococcus radiotolerans","ADSKRTEFALAA" }, - { "Frankia sp. CcI3","ANKTQPTTPTYALAA" }, - { "Frankia sp. EAN1pec","ATKTQPASSTFALAA" }, - { "Rubrobacter xylanophilus","ANDREMALAA" }, - { "Parachlamydia UWE25","ANNSNKIAKVDFQEGTFARAA" }, - { "Verrucomicrobium spinosum","ANSNELALAA" }, - { "Acidobacterium capsulatum","ANNNLALAA" }, - { "Acidobacterium Ellin6076","ANTQFAYAA" }, - { "Solibacter usitatus","ANTQFAYAA" }, - { "Dictyoglomus thermophilum","ANTNLALAA" }, - { "Mycobacteriophage Bxz1 virion","ATDTDATVTDAEIEAFFAEEAAALV" }, - { "Catera virion","ATDTDATVTDAEIEAFFAEEAAALV" }, - { "Cyanobium gracile","ANNIVRFSRQAAPVAA" }, - { "Anabaena variabilis","ANNIVKFARKDALVAA" }, - { "Nitrosospira multiformis","ANDENYALAA" }, - { "Enterobacter sakazakii","ANDENYALAA" }, - { "Pantoea stewartii","ANDENYALAA" }, - { "Citrobacter rodentium","ANDENYALAA" }, - { "Prochlorococcus marinus","ANNIVSFSRQTAPVAA" }, - { "Azospira oryzae","ANDERFAIAA" }, - { "Uncultured phakopsora","ANDNSYALAA" }, - { "Syntrophus aciditrophicus","ANDYEYALAA" }, - { "Alkaliphilus metalliredigenes","ANDNYSLAAA" }, - { "Caldicellulosiruptor saccharolyticus","ADKAELALAA" } }; - n = 0; - st = tag + len; - while (*--st == '*'); - for (i = 0; i < NTAG; i++) - { s = st; - sb = tagdatabase[i].tag; - sd = sb; - while (*++sd); - while (*s-- == *--sd) - { if (s < tag) - { if (sd > sb) goto PAR; - if (n >= nt) goto MANY; - copy(tagdatabase[i].name,thit[n]); - n++; - break; } - if (sd > sb) continue; - PAR: - if (n >= nt) goto MANY; - s = copy(tagdatabase[i].name,thit[n]); - copy(" (partial match)",s); - n++; - break; }} - return(n); - MANY: - return(-1); } - - -void disp_peptide_tag(FILE *f, gene *t, csw *sw) -{ int i,lx,nm,nmh,c1,c2,c3,*s,*se; - char tag[50],thit[21][50]; - fprintf(f,"Tag peptide (at %d)\nTag sequence: ",t->tps+1); - se = t->eseq + t->tps; - lx = (t->tpe - t->tps + 1); - if (ltranslate(se+lx,t,sw) == '*') - { lx += 3; - if (ltranslate(se+lx,t,sw) == '*') lx += 3; } - lx /= 3; - s = se; - for (i = 0; i < lx; i++) - { if (i > 0) fputc('-',f); - if ((c1 = *s++) >= AMBIG) continue; - if ((c2 = *s++) >= AMBIG) continue; - if ((c3 = *s++) >= AMBIG) continue; - fputc(cbase(c1),f); - fputc(cbase(c2),f); - fputc(cbase(c3),f); } - s = se; - fprintf(f,"\nTag peptide: "); - for (i = 0; i < lx; i++) - { fprintf(f,"%s",translate(s,sw)); - s += 3; - if (i < (lx-1)) fputc('-',f); } - s = se; - fprintf(f,"\nTag peptide: "); - for (i = 0; i < lx; i++) - { tag[i] = ltranslate(s,t,sw); - fprintf(f,"%c",tag[i]); - s += 3; } - tag[lx] = '\0'; - if (sw->energydisp) - { s = se; - fprintf(f,"\nTag Polarity: "); - for (i = 0; i < lx; i++) - { fprintf(f,"%c",ptranslate(s,sw)); - s += 3; }} - fputc('\n',f); - nmh = identify_tag(tag,lx,thit,21); - if (nmh > 0) - { if (nmh > 1) - { fprintf(f,"Match with tmRNA tags from:\n"); - i = 0; - for (nm = 0; nm < nmh; nm++) - { if (++i > 3) - { fputc('\n',f); - i = 1; } - if (i > 1) fprintf(f,", "); - fprintf(f,"%s",thit[nm]); } - fputc('\n',f); } - else - fprintf(f,"Match with %s tmRNA tag\n",thit[0]); } - else - if (nmh == -1) - fprintf(f,"Match with many tmRNA tags\n"); - else - fprintf(f,"Tag not identified\n"); - fputc('\n',f); } - - -void sense_switch(int *seq1, int *seq2, int lseq) -{ int i,b; - int *sseq,*cseq; - sseq = seq1; - cseq = seq2 + lseq; - while (--cseq >= seq2) - { b = *sseq++; - if (b >= Adenine) - { if (b <= Thymine) - *cseq = Thymine - b; - else - { if (b <= NOBASE) *cseq = b; - else *cseq = NOBASE; }} - else *cseq = NOBASE; }} - - -double nenergy(gene *t, csw *sw) -{ double eref; - if (t->genetype != tRNA) eref = sw->eref[t->genetype]; - else - if (sw->mtrna) - { if (t->dstem == 0) eref = mtRNAtthresh; - else - if (t->tstem == 0) eref = mtRNAdthresh; - else eref = mtRNAdtthresh; } - else eref = sw->eref[tRNA]; - return(100.0*t->energy/eref); } - - -char *position(char *s, gene *t, csw *sw) -{ long start; - start = t->start; - if (sw->linear) if (start <= 0) start--; - if (t->comp) - sprintf(s,"c[%ld,%ld]",start,t->stop); - else - sprintf(s,"[%ld,%ld]",start,t->stop); - return(s); } - - -void location(char *s, gene *t, csw *sw, char *m) -{ char sp[80]; - sprintf(s,"%s %s",m,position(sp,t,sw)); } - -void disp_location(gene *t, csw *sw, char *m) -{ char sp[80]; - fprintf(sw->f,"%s %s\n",m,position(sp,t,sw)); } - - - -char *name(gene *t, char *si, int proc, csw *sw) -{ int s[5],*ss,*sin,*sm,*s0,*s1,*s2,*s3,nintron; - char *sb,*st; - static char trnatype[2][6] = { "tRNA","mtRNA" }; - switch (t->genetype) - { case CDS: - sprintf(si,"CDS"); - break; - case srpRNA: - sprintf(si,"srpRNA"); - break; - case tmRNA: - if (t->asst > 0) - sprintf(si,"tmRNA (Permuted)"); - else - sprintf(si,"tmRNA"); - break; - case tRNA: - ss = (proc?t->seq:t->ps); - sm = ss + t->anticodon - 1; - s0 = sm + 1; - s1 = s0 + 1; - s2 = s1 + 1; - s3 = s2 + 1; - nintron = t->nintron; - if ((proc == 0) && (nintron > 0)) - { sin = ss + t->intron; - if (sm >= sin) sm += nintron; - if (s0 >= sin) s0 += nintron; - if (s1 >= sin) s1 += nintron; - if (s2 >= sin) s2 += nintron; - if (s3 >= sin) s3 += nintron; } - s[0] = *sm; - s[1] = *s0; - s[2] = *s1; - s[3] = *s2; - s[4] = *s3; - st = trnatype[sw->mtrna]; - sb = si; - if (t->dstem == 0) - { sprintf(sb,"D-loop "); - sb += 7; } - if (t->tstem == 0) - { sprintf(sb,"TV-loop "); - sb += 8; } - if (t->cloop == 8) - sprintf(sb,"%s-?(%s|%s)(%c%c%c%c)",st, - aa(s+1,sw),aa(s+2,sw), - cbase(s[1]),cbase(s[2]),cbase(s[3]), - cbase(s[4])); - else if (t->cloop == 6) - sprintf(sb,"%s-?(%s|%s)(%c%c)",st, - aa(s,sw),aa(s+1,sw), - cbase(s[1]),cbase(s[2])); - else - sprintf(sb,"%s-%s(%c%c%c)",st, - aa(s+1,sw),cbase(s[1]),cbase(s[2]),cbase(s[3])); - break; - default: *si = '\0'; - break; } - return(si); } - - -void disp_intron(FILE *f, gene *t, csw *sw) -{ int i,c,*s,*sb,*se; - char genename[100]; - if (t->nintron <= 0) return; - name(t,genename,1,sw); - fprintf(f,"Intron from %s\n",genename); - fprintf(f,"1 . 10 . 20 . 30 . 40 . 50\n"); - sb = t->eseq + t->intron; - s = sb; - se = sb + t->nintron; - i = 0; - while (s < se) - { if ((c = *s++) < Adenine) break; - fputc(cbase(c),f); - if (++i >= 50) - { fputc('\n',f); - i = 0; }} - if (i > 0) fputc('\n',f); - fputc('\n',f); - fprintf(f,"Intron Length: %d\n",t->nintron); - fprintf(f,"Intron Insertion Position(%d-%d): ",t->intron,t->intron+1); - s = sb - 5; - for (i = 0; i < 5; i++) fputc(cbase(*s++),f); - fprintf(f,"-Intron-"); - s = se; - for (i = 0; i < 5; i++) fputc(cbase(*s++),f); - fputc('\n',f); - fputc('\n',f); } - -void disp_fasta_seq(FILE *f, gene *t, int ns, int n, int nsp, int c, csw *sw) -{ int i,*s,*se; - char genename[100],genepos[100]; - if (t->nintron > 0) - { s = t->eseq; - se = s + t->nbase + t->nintron; } - else - { s = t->seq; - se = s + t->nbase; } - name(t,genename,1,sw); - position(genepos,t,sw); - if (nsp > 0) - { if (ns > 0) fprintf(f,">%d-%d%s%s\n",ns,n,genename,genepos); - else fprintf(f,">%s%s\n",genename,genepos); } - else - { if (ns > 0) fprintf(f,">%d-%d %s %s\n",ns,n,genename,genepos); - else fprintf(f,">%s %s\n",genename,genepos); } - i = 0; - while (s < se) - { if (c) fputc(cpbase(*s++),f); - else fputc(cbase(*s++),f); - if (++i >= 50) - { fputc('\n',f); - i = 0; }} - if (i > 0) fputc('\n',f); } - - -void disp_seq(FILE *f, gene *t, csw *sw) -{ int i,j,k,varbp,stem,ab,ae,hl,*s,*se,*sl,*sb,*sr; - char genename[100]; - static int bplb[2] = { '.','(' }; - static int bprb[2] = { '.',')' }; - if (t->nintron > 0) - { s = t->eseq; - se = s + t->nbase + t->nintron; } - else - { s = t->seq; - se = s + t->nbase; } - if (sw->seqdisp >= 3) - { if (!sw->batch) fputc('\n',f); - if (sw->seqdisp == 3) disp_fasta_seq(f,t,0,0,0,0,sw); - else disp_fasta_seq(f,t,0,0,0,1,sw); } - else - { if (!sw->batch) - { name(t,genename,1,sw); - fprintf(f,"\nPrimary sequence for %s\n",genename); } - if ((sw->seqdisp == 2) && (t->genetype == tRNA)) - { sl = s; - while (sl < se) fputc(cbase(*sl++),f); - fputc('\n',f); - sl = s; - sr = se - t->aatail - 1; - for (i = 0; i < t->astem1; i++,sl++,sr--) fputc(bplb[bp[*sl][*sr]],f); - for (i = 0; i < t->spacer1; i++) fputc(' ',f); - sl += t->spacer1; - sb = sl + t->dstem - 1; - sr = sb + t->dstem + t->dloop; - for (i = 0; i < t->dstem; i++,sl++,sr--) fputc(bplb[bp[*sl][*sr]],f); - for (i = 0; i < t->dloop; i++) fputc('d',f); - sl += t->dloop; - for (i = 0; i < t->dstem; i++,sl++,sb--) fputc(bprb[bp[*sl][*sb]],f); - for (i = 0; i < t->spacer2; i++) fputc(' ',f); - sl += t->spacer2; - sb = sl + t->cstem - 1; - sr = sb + t->cstem + t->cloop + t->nintron; - for (i = 0; i < t->cstem; i++,sl++,sr--) fputc(bplb[bp[*sl][*sr]],f); - hl = t->astem1 + t->spacer1 + 2*t->dstem + t->dloop + - t->spacer2 + t->cstem; - if (t->nintron > 0) - { j = t->intron - hl; - ab = t->anticodon - hl; - ae = ab + t->cloop - 5; - for (i = 0; i < j; i++) - if (i <= ae) - if (i >= ab) fputc('A',f); - else fputc(' ',f); - else fputc(' ',f); - for (i = 0; i < t->nintron; i++) fputc('i',f); - for (i = j; i < t->cloop; i++) - if (i <= ae) - if (i >= ab) fputc('A',f); - else fputc(' ',f); - else fputc(' ',f); } - else - { j = t->cloop - 4; - ab = t->anticodon - hl; - ae = t->cloop - ab - j; - for (i = 0; i < ab; i++) fputc(' ',f); - for (i = 0; i < j; i++) fputc('A',f); - for (i = 0; i < ae; i++) fputc(' ',f); } - sl += (t->cloop + t->nintron); - for (i = 0; i < t->cstem; i++,sl++,sb--) fputc(bprb[bp[*sl][*sb]],f); - varbp = find_var_hairpin(t); - if (varbp > 0) - { j = (varbp >> 10); - k = (varbp >> 5) & 0x1f; - stem = (varbp & 0x1f); - sr = sl + k + stem - 1; - sl += j; - sb = sl + stem - 1; - for (i = 0; i < j; i++) fputc(' ',f); - for (i = 0; i < stem; i++,sl++,sr--) fputc(bplb[bp[*sl][*sr]],f); - for (i = j+stem; i < k; i++,sl++) fputc('v',f); - for (i = 0; i < stem; i++,sl++,sb--) fputc(bprb[bp[*sl][*sb]],f); - for (i = k+stem; i < t->var; i++,sl++) fputc(' ',f); } - else - { for (i = 0; i < t->var; i++) fputc(' ',f); - sl += t->var; } - sb = sl + t->tstem - 1; - sr = sb + t->tstem + t->tloop; - for (i = 0; i < t->tstem; i++,sl++,sr--) fputc(bplb[bp[*sl][*sr]],f); - for (i = 0; i < t->tloop; i++) fputc('t',f); - sl += t->tloop; - for (i = 0; i < t->tstem; i++,sl++,sb--) fputc(bprb[bp[*sl][*sb]],f); - sb = s + t->astem1 - 1; - for (i = 0; i < t->astem2; i++,sl++,sb--) fputc(bprb[bp[*sl][*sb]],f); - fputc('\n',f); } - else - { if (!sw->batch) - fprintf(f,"1 . 10 . 20 . 30 . 40 . 50\n"); - i = 0; - while (s < se) - { fputc(cbase(*s++),f); - if (++i >= 50) - { fputc('\n',f); - i = 0; }} - if (i > 0) fputc('\n',f); }} - if (!sw->batch) - { fputc('\n',f); - fputc('\n',f); }} - - - - -void disp_tmrna_seq(FILE *f, gene *t, csw *sw) -{ int i,*s,*sb,*se; - if (t->nintron <= 0) return; - if (*(t->name) == '\0') fprintf(f,"tmRNA Sequence\n\n"); - else fprintf(f,"tmRNA Sequence in %s\n\n",t->name); - fprintf(f,"1 . 10 . 20 . 30 . 40 . 50\n"); - sb = t->eseq; - s = sb; - se = sb + t->intron; - i = 0; - while (s < se) - { fputc(cbase(*s++),f); - if (++i >= 50) - { fputc('\n',f); - i = 0; }} - se = sb + t->tps; - while (s < se) - { fputc(cpbase(*s++),f); - if (++i >= 50) - { fputc('\n',f); - i = 0; }} - se = sb + t->tpe + 1; - while (ltranslate(se,t,sw) == '*') se += 3; - while (s < se) - { fputc(cbase(*s++),f); - if (++i >= 50) - { fputc('\n',f); - i = 0; }} - se = sb + t->intron + t->nintron; - while (s < se) - { fputc(cpbase(*s++),f); - if (++i >= 50) - { fputc('\n',f); - i = 0; }} - se = sb + t->nbase + t->nintron; - while (s < se) - { fputc(cbase(*s++),f); - if (++i >= 50) - { fputc('\n',f); - i = 0; }} - if (i > 0) fputc('\n',f); - fputc('\n',f); - fprintf(f,"Resume consensus sequence (at %d): ",t->tps - 6); - s = t->eseq + t->tps - 7; - for (i = 0; i < 18; i++) fputc(cbase(*s++),f); - fputc('\n',f); - fputc('\n',f); - disp_peptide_tag(f,t,sw); } - - - -void disp_tmrna_perm_seq(FILE *f, gene *t, csw *sw) -{ int i,*s,*sb,*se; - if (t->nintron <= 0) return; - if (*(t->name) == '\0') fprintf(f,"tmRNA Sequence\n\n"); - else fprintf(f,"tmRNA Sequence in %s\n\n",t->name); - fprintf(f,"Permuted\n"); - fprintf(f,"1 . 10 . 20 . 30 . 40 . 50\n"); - sb = t->eseq; - s = sb; - se = sb + 54; - i = 0; - while (s < se) - { fputc(cpbase(*s++),f); - if (++i >= 50) - { fputc('\n',f); - i = 0; }} - se = sb + t->intron; - while (s < se) - { fputc(cbase(*s++),f); - if (++i >= 50) - { fputc('\n',f); - i = 0; }} - se = sb + t->asst; - while (s < se) - { fputc(cpbase(*s++),f); - if (++i >= 50) - { fputc('\n',f); - i = 0; }} - se = sb + t->asst + t->astem1 + t->dloop + t->cstem; - while (s < se) - { fputc(cbase(*s++),f); - if (++i >= 50) - { fputc('\n',f); - i = 0; }} - se = sb + t->tps; - while (s < se) - { fputc(cpbase(*s++),f); - if (++i >= 50) - { fputc('\n',f); - i = 0; }} - se = sb + t->tpe + 1; - while (ltranslate(se,t,sw) == '*') se += 3; - while (s < se) - { fputc(cbase(*s++),f); - if (++i >= 50) - { fputc('\n',f); - i = 0; }} - se = sb + t->tpe + TMPTRAILER - 54; - while (s <= se) - { fputc(cpbase(*s++),f); - if (++i >= 50) - { fputc('\n',f); - i = 0; }} - if (i > 0) fputc('\n',f); - fprintf(f,"\nResume consensus sequence (at %d): ",t->tps - 6); - s = t->eseq + t->tps - 7; - for (i = 0; i < 18; i++) fputc(cbase(*s++),f); - fputc('\n',f); - fputc('\n',f); - disp_peptide_tag(f,t,sw); } - - -void disp_cds(FILE *f, gene *t, csw *sw) -{ int i,ncodon,*s,*se; - char c; - ncodon = t->nbase/3; - if (!t->tps) ncodon--; - fprintf(f,"\n%d codons, start = %c%c%c, stop = ",ncodon, - cbase(t->seq[0]),cbase(t->seq[1]),cbase(t->seq[2])); - s = t->seq + 3; - while ((i = *s++) != TERM) fputc(cbase(i),f); - if (t->tps) fprintf(f," incomplete"); - fprintf(f,"\n1 . 10 . 20 . 30 . 40 . 50\n"); - s = t->eseq; - se = s; - while (*se != TERM) se++; - if (t->tps) se -= 3; - i = 0; - while (s < se) - { c = ltranslate(s,t,sw); - fputc(c,f); - if (++i >= 50) - { fputc('\n',f); - i = 0; } - s += 3; } - if (i > 0) fputc('\n',f); - if (sw->energydisp) - fprintf(f,"Score = %lg\n",t->energy); - fputc('\n',f); - fputc('\n',f); } - - -int pseudogene(gene *t) -{ -if (t->energy < 100.0) return(1); -if (t->genetype == tRNA) - if (t->cloop != 7) - return(1); -return(0); -} - - -void disp_gene(gene *t, char m[][MATY], csw *sw) -{ double gc; - char stat[80]; - switch(t->genetype) - { case tmRNA: - build_tmrna(t,m,13,27,sw); - xcopy(m,4,3,"tmRNA (tRNA domain)",19); - break; - case tRNA: - build_trna(t,m,13,27,sw); - name(t,stat,1,sw); - xcopy(m,4,3,stat,length(stat)); - break; } - location(stat,t,sw,"Sequence"); - xcopy(m,4,1,stat,length(stat)); - gc = gc_content(t); - sprintf(stat,"%d bases, %%GC = %2.1f",t->nbase,100.0*gc); - xcopy(m,4,2,stat,length(stat)); - if (sw->reportpseudogenes) - if (pseudogene(t)) - xcopy(m,4,4,"Possible Pseudogene",19); - if (sw->energydisp) - { sprintf(stat,"Score = %g\n",t->energy); - xcopy(m,4,0,stat,length(stat)); }} - - -void disp_batch_trna(FILE *f, gene *t, csw *sw) -{ int ls,ps,*s,anticodon; - char pos[50],species[50]; - static char type[2][6] = { "tRNA","mtRNA" }; - static char asterisk[2] = { ' ','*'}; - anticodon = 1 + t->anticodon; - if (t->nintron > 0) - if (t->intron <= t->anticodon) - anticodon += t->nintron; - s = t->seq + t->anticodon; - ps = sw->reportpseudogenes?(pseudogene(t)?1:0):0; - switch(t->cloop) - { case 6: - case 8: - sprintf(species,"%s-???%c",type[sw->mtrna],asterisk[ps]); - break; - case 7: - default: - sprintf(species,"%s-%s%c",type[sw->mtrna],aa(s,sw),asterisk[ps]); - break; } - position(pos,t,sw); - ls = length(species); - if (ls <= 10) fprintf(f,"%-10s%28s",species,pos); - else if (ls <= 17) fprintf(f,"%-17s%21s",species,pos); - else fprintf(f,"%-25s%13s",species,pos); - if (sw->energydisp) - { fprintf(f,"\t%5.1f",t->energy); } - fprintf(f,"\t%-4d",anticodon); - switch(t->cloop) - { case 6: - fprintf(f,"\t(%c%c) ",cbase(*s),cbase(s[1])); - break; - case 8: - fprintf(f,"\t(%c%c%c%c) ", - cbase(*s),cbase(s[1]),cbase(s[2]),cbase(s[3])); - break; - case 7: - default: - fprintf(f,"\t(%c%c%c)",cbase(*s),cbase(s[1]),cbase(s[2])); - break; } - if (t->nintron > 0) - fprintf(f,"i(%d,%d)",t->intron+1,t->nintron); - fputc('\n',f); - if (sw->seqdisp) disp_seq(f,t,sw); } - - -void disp_batch_tmrna(FILE *f, gene *t, csw *sw) -{ int ps,tpe,*sb,*se; - char pos[50]; - static char permask[2][2][3] = - { {" ","p "},{"* ","p*"} }; - ps = (t->energy < 100.0)?1:0; - position(pos,t,sw); - fprintf(f,"tmRNA%2s%31s",permask[(t->asst == 0)?0:1][ps],pos); - if (sw->energydisp) - { fprintf(f,"\t%5.1f\t",t->energy); } - tpe = t->tpe; - sb = t->eseq + t->tps; - se = t->eseq + tpe + 1; - while (ltranslate(se,t,sw) == '*') - { se += 3; - tpe += 3; } - fprintf(f,"\t%d,%d\t",t->tps+1,tpe+1); - while (sb < se) - { fputc(ltranslate(sb,t,sw),f); - sb += 3; } - fputc('\n',f); - if (sw->seqdisp) disp_seq(f,t,sw); } - - -void disp_batch_srprna(FILE *f, gene *t, csw *sw) -{ int ps,tpe,*sb,*se; - char pos[50]; - static char asterisk[2] = { ' ','*'}; - ps = (t->energy < 100.0)?1:0; - position(pos,t,sw); - fprintf(f,"srpRNA%c %25s",asterisk[ps],pos); - if (sw->energydisp) - { fprintf(f,"\t%5.1f",t->energy); } - fputc('\n',f); - if (sw->seqdisp) disp_seq(f,t,sw); } - -void disp_batch_cds(FILE *f, gene *t, csw *sw) -{ int ps,tpe,*sb,*se; - char pos[50]; - static char asterisk[2] = { ' ','*'}; - ps = (t->energy < 100.0)?1:0; - position(pos,t,sw); - fprintf(f,"CDS%c %25s",asterisk[ps],pos); - if (sw->energydisp) - { fprintf(f,"\t%5.1f",t->energy); } - fputc('\n',f); - if (sw->seqdisp) disp_seq(f,t,sw); } - -double vloop_stability(int *sb, int var, int *varbp) -{ int e,stem,vstem,loop,*sn,*sen,*pos1,*pos2,*se,*sc,*sd,*sf,*s; - unsigned int c,cn,m; - static unsigned int A[6] = { 0,0,0x100,0x400,0,0 }; - static unsigned int C[6] = { 0,0,0x400,0,0,0 }; - static unsigned int G[6] = { 0x100,0x400,0,0x200,0,0 }; - static unsigned int T[6] = { 0x400,0,0x200,0,0,0 }; - static unsigned int te[6] = { 0,0,0,0,0,0 }; - e = 0; - sc = sb + 3; - se = sb + var - 2; - sf = se - 2; - te[0] = A[*se]; - te[1] = C[*se]; - te[2] = G[*se]; - te[3] = T[*se]; - while (--se > sf) - { te[0] = (te[0] >> 4) | A[*se]; - te[1] = (te[1] >> 4) | C[*se]; - te[2] = (te[2] >> 4) | G[*se]; - te[3] = (te[3] >> 4) | T[*se]; } - while (se >= sc) - { te[0] = ((te[0] >> 4) | A[*se]); - te[1] = ((te[1] >> 4) | C[*se]); - te[2] = ((te[2] >> 4) | G[*se]); - te[3] = ((te[3] >> 4) | T[*se]); - s = se - 5; - sd = se - 7; - m = te[*s]; - while (--s > sd) m = (m >> 4) + te[*s]; - while (s >= sb) - { m = (m >> 4) + te[*s]; - c = m & 0xf; - if (c >= 9) - { stem = 3; - loop = (int)(se - s) - 3; - sen = se; - sn = s + 2; - while (loop >= 6) - { if ((cn = vbp[sen[-1]][sn[1]]) <= 0) break; - c += cn; - stem++; - loop -= 2; - sen--; - sn++; } - if (c > e) - { e = c; - pos1 = s; - pos2 = sen; - vstem = stem; }} - s--; } - se--; } - if (e > 0) - { *varbp = (((int)(pos1-sb))<<10) + (((int)(pos2-sb))<<5) + vstem; - return((double)(3*(vstem - 4))); } - else - { *varbp = 0; - return(-12.0); }} - - - -double find_tag_upstream_hairpin(int *se) -{ int *sb,*sd,*sf,*sh,*s; - unsigned int c,m,mx; - static unsigned int A[6] = { 0,0,0,0x10000,0,0 }; - static unsigned int C[6] = { 0,0,0x10000,0,0,0 }; - static unsigned int G[6] = { 0,0x10000,0,0x10000,0,0 }; - static unsigned int T[6] = { 0x10000,0,0x10000,0,0,0 }; - static unsigned int t[6] = { 0,0,0,0,0,0 }; - mx = 0; - sf = se - 4; - sb = se - 20; - t[0] = A[*se]; - t[1] = C[*se]; - t[2] = G[*se]; - t[3] = T[*se]; - while (--se > sf) - { t[0] = (t[0] >> 4) | A[*se]; - t[1] = (t[1] >> 4) | C[*se]; - t[2] = (t[2] >> 4) | G[*se]; - t[3] = (t[3] >> 4) | T[*se]; } - sh = se - 4; - sd = se - 30; - while (se > sb) - { t[0] = ((t[0] >> 4) | A[*se]); - t[1] = ((t[1] >> 4) | C[*se]); - t[2] = ((t[2] >> 4) | G[*se]); - t[3] = ((t[3] >> 4) | T[*se]); - s = sh; - m = t[*s]; - while (--s > sd) - { m = (m >> 4) + t[*s]; - c = m & 0xf; - if (c > mx) mx = c; - if (mx == 5) goto FND; } - sd--; - sh--; - se--; } - return(0.0); - FND: - return(15.0); } - - - -double find_taghairpin(int *seq) -{ int i,*s,*sb,*se,*sf; - unsigned int c,m,mx; - static unsigned int A[6] = { 0,0,0,1,0,0 }; - static unsigned int C[6] = { 0,0,1,0,0,0 }; - static unsigned int G[6] = { 0,1,0,1,0,0 }; - static unsigned int T[6] = { 1,0,1,0,0,0 }; - static unsigned int t[6] = { 0,0,0,0,0,0 }; - mx = 0; - sb = seq - 20; - se = seq - 13; - sf = seq - 4; - t[0] = A[*sb]; - t[1] = C[*sb]; - t[2] = G[*sb]; - t[3] = T[*sb]; - while (++sb < se) - { t[0] = (t[0] << 4) | A[*sb]; - t[1] = (t[1] << 4) | C[*sb]; - t[2] = (t[2] << 4) | G[*sb]; - t[3] = (t[3] << 4) | T[*sb]; } - while (sb < sf) - { t[0] = ((t[0] << 4) | A[*sb]) & 0xffffffff; - t[1] = ((t[1] << 4) | C[*sb]) & 0xffffffff; - t[2] = ((t[2] << 4) | G[*sb]) & 0xffffffff; - t[3] = ((t[3] << 4) | T[*sb]) & 0xffffffff; - sb++; - s = seq + 20; - se = seq + 2; - m = t[*s--]; - while (s > se) - { m = (m >> 4) + t[*s--]; - c = m & 0xf; - if (c > mx) mx = c; } - i = 7 - (int)mx; - while (i-- > 0) - { m = m >> 4; - c = m & 0xf; - if (c > mx) mx = c; }} - return((double)(mx << 1)); } - -double stem_energy(int *s1, int *s2, int stem) -{ int *se; - double energy; - static double bem[6][6] = - { { -1.072,-0.214,-1.072, ATBOND, 0.000, 0.000 }, - { -0.214,-1.072, 3.000,-1.072, 0.000, 0.000 }, - { -1.072, 3.000,-1.072, 1.286, 0.000, 0.000 }, - { ATBOND,-1.072, 1.286,-0.214, 0.000, 0.000 }, - { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 }, - { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 } }; - se = s1 + stem; - energy = bem[*s1++][*--s2]; - while (s1 < se) - energy += bem[*s1++][*--s2]; - return(energy); } - -double astem_energy(int *s1, int *s2, int stem) -{ int *se; - double energy; - static double abem[6][6] = - { { -2.144,-0.428,-2.144, ATBOND, 0.000, 0.000 }, - { -0.428,-2.144, 3.000,-2.144, 0.000, 0.000 }, - { -2.144, 3.000,-2.144, 1.286, 0.000, 0.000 }, - { ATBOND,-2.144, 1.286,-0.428, 0.000, 0.000 }, - { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 }, - { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 } }; - se = s1 + stem; - energy = abem[*s1++][*--s2]; - while (s1 < se) - energy += abem[*s1++][*--s2]; - return(energy); } - - -void trna_score(FILE *f, gene *t) -{ int *s,*tpos,tarm,varbp; - double ea,eta,evls; - static double bem[6][6] = - { { -2.144,-0.428,-2.144, ATBOND, 0.000, 0.000 }, - { -0.428,-2.144, 3.000,-2.144, 0.000, 0.000 }, - { -2.144, 3.000,-2.144, 1.286, 0.000, 0.000 }, - { ATBOND,-2.144, 1.286,-0.428, 0.000, 0.000 }, - { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 }, - { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 } }; - static double A[6] = { 1.0,0.0,0.0,0.0,0.0,0.0 }; - static double C[6] = { 0.0,1.0,0.0,0.0,0.0,0.0 }; - static double G[6] = { 0.0,0.0,1.0,0.0,0.0,0.0 }; - static double T[6] = { 0.0,0.0,0.0,1.0,0.0,0.0 }; - if (t->genetype != tRNA) return; - tarm = 2*t->tstem + t->tloop; - tpos = t->seq + t->astem1 + t->spacer1 + t->dloop + 2*t->dstem + 1 + - 2*t->cstem + t->cloop + t->var; - s = tpos + t->tstem - 1; - eta = 6.0*(G[s[0]] + T[s[1]] + T[s[2]] + C[s[3]]) + 3.0*A[s[1]]; - s += t->tloop - 3; - eta += 2.0*(G[*s] + A[s[1]] + T[s[3]] + C[s[4]] + C[s[5]]); - eta += astem_energy(tpos,tpos+tarm,t->tstem); - eta += bem[tpos[t->tstem]][tpos[t->tstem + 4]]; - eta -= 3.0*(double)(5 - t->tstem); - if (t->tloop > 7) eta -= 3.0*(double)(t->tloop - 7); - else eta -= 3.0*(double)(7 - t->tloop); - s = t->seq; - if (t->astem1 > 7) s++; - ea = astem_energy(s,tpos+tarm+7,7); - if (t->var > 17) evls = vloop_stability(tpos-t->var,t->var,&varbp); - else evls = 0.0; - fprintf(f,"\n"); - fprintf(f," T-arm score: %g\n",eta); - fprintf(f," A-stem score: %g\n",ea); - fprintf(f," V-loop stability: %g\n",evls); - fprintf(f,"\n"); } - - -void tmrna_score(FILE *f, gene *t, csw *sw) -{ int r,j,te,*s,*sb,*se,*tpos,tarm; - double e,er,et,eal,esp,ed,ec,ea,egga,etcca,egg,eta,edgg; - double ehairpin,euhairpin; - static int gtemplate[6] = { 0x00,0x00,0x11,0x00,0x00,0x00 }; - static double tagend_score[4] = { 36.0, 66.0, 62.0, 72.0 }; - static int nps[126] = - { 0,0,0,0, - 0,0,0,0, - 0,0,0,0, - 1,1,1,1, - 0,0,0,0, - 1,1,1,1, - 0,0,0,0, - 1,1,1,1, - 0,0,0,0, - 1,1,1,1, - 1,1,1,1, - 1,1,1,1, - 2,1,2,1, - 0,0,0,0, - 2,1,1,1, - 1,1,1,1, - 0,0,0,0, - 0,0,0,0, - 0,0,0,0, - 0,0,0,0, - 0,0,0,0, - 0,0,0,0 }; - static double bem[6][6] = - { { -2.144,-0.428,-2.144, ATBOND, 0.000, 0.000 }, - { -0.428,-2.144, 3.000,-2.144, 0.000, 0.000 }, - { -2.144, 3.000,-2.144, 1.286, 0.000, 0.000 }, - { ATBOND,-2.144, 1.286,-0.428, 0.000, 0.000 }, - { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 }, - { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 } }; - static double A[6] = { 1.0,0.0,0.0,0.0,0.0,0.0 }; - static double C[6] = { 0.0,1.0,0.0,0.0,0.0,0.0 }; - static double G[6] = { 0.0,0.0,1.0,0.0,0.0,0.0 }; - static double K[6] = { 0.0,0.0,1.0,1.0,0.0,0.0 }; - static double R[6] = { 1.0,0.0,1.0,0.0,0.0,0.0 }; - static double T[6] = { 0.0,0.0,0.0,1.0,0.0,0.0 }; - static double Y[6] = { 0.0,1.0,0.0,1.0,0.0,0.0 }; - static double nA[6] = { 0,1.0,1.0,1.0,1.0,1.0 }; - static double nV[6] = { 0,0,0,1.0,1.0,1.0 }; - static double nM[6] = { 0,0,1.0,1.0,1.0,1.0 }; - if (t->genetype != tmRNA) return; - tarm = 2*t->tstem + t->tloop; - s = t->eseq + t->tps - 7; - er = A[s[1]]+2.0*T[s[2]]+C[s[2]]+3.0*A[s[3]]+R[s[4]]+Y[s[6]]+ - 3.0*G[s[7]]+C[s[8]]; - if (sw->tmstrict) er -= (nV[s[10]] + nV[s[11]] + nM[s[14]] + nA[s[17]]); - er *= 4.0; - s = t->eseq + t->tpe - 8; - te = ((nps[(s[0]<<4) + (s[1]<<2) + s[2]] & 1) << 1) - | (nps[(s[3]<<4) + (s[4]<<2) + s[5]] & 1); - et = tagend_score[te]; - if (sw->tmstrict) - { eal = 0.0; - j = -3; - while (j < 6) - { te = s[j++]; - te = (te << 2) | s[j++]; - if (te == 9) eal = (double)(11 + 2*((j + 1)/3)); - j++; } - ehairpin = find_taghairpin(s + 8); - euhairpin = find_tag_upstream_hairpin(t->eseq + t->tps - 10); } - else - { eal = 15.0; - ehairpin = 16.0; - euhairpin = 15.0; } - tpos = t->eseq; - if (t->asst > 0) - { tpos += t->cstem + t->var + 54; - ed = 0.0; } - else - { tpos += t->astem1 + t->dloop + 2*t->cstem + t->nintron + t->var; - ed = 0.001*(double)(t->tps - (long)(tpos - t->eseq)); } - s = tpos + t->tstem - 10; - e = K[s[0]] + G[s[1]] + A[s[2]]; - egga = K[s[1]] + G[s[2]] + A[s[3]]; - if (e > egga) egga = e; - egga *= 6.0; - if (egga < 18.0) egga = 0.0; - s = tpos + tarm + 4; - etcca = 10.0*(T[s[0]] + C[s[1]] + C[s[2]] + A[s[3]]); - s = t->eseq + t->asst; - egg = 7.0*(G[s[1]] + G[s[2]]); - edgg = 0.0; - s = t->eseq + t->asst + t->astem1; - sb = s + 3; - se = s + 7; - r = gtemplate[*sb++]; - while (sb < se) - { r = (r >> 4) + gtemplate[*sb++]; - if ((r & 3) == 2) - { edgg = 14.0; - break; }} - s = tpos + t->tstem - 1; - if (sw->tmstrict && (t->asst == 0)) - eta = 6.0*(G[s[0]] + T[s[1]] + T[s[2]] + C[s[3]]) + 3.0*A[s[1]]; - else - eta = 6.0*(G[s[0]] + (G[s[1]] + T[s[1]]) + - (G[s[2]] + T[s[2]]) + C[s[3]]) + 3.0*A[s[1]]; - s += t->tloop - 3; - eta += 2.0*(G[*s] + A[s[1]] + T[s[3]] + C[s[4]] + C[s[5]]); - eta += astem_energy(tpos,tpos+tarm,t->tstem); - eta += bem[tpos[t->tstem]][tpos[t->tstem + 4]]; - eta -= 3.0*(double)(5 - t->tstem); - if (t->tloop > 7) eta -= 3.0*(double)(t->tloop - 7); - else eta -= 3.0*(double)(7 - t->tloop); - eta *= 1.59; - s = t->eseq + t->asst + t->astem1 + t->dloop; - ec = stem_energy(s,tpos-t->var,t->cstem); - s = t->eseq + t->asst; - ea = astem_energy(s,tpos+tarm+t->astem1,t->astem1); - esp = ((t->tpe - t->tps) < 24)?-15.0:0.0; - e = er + et + ed + eal + esp + egga + egg + etcca + eta + ec + ea + - edgg + ehairpin + euhairpin; - fprintf(f,"\n"); - fprintf(f," Resume sequence score: %g\n",er); - fprintf(f,"Resume-Tarm distance score: %g\n",ed); - fprintf(f," Tag peptide score: %g\n",et); - fprintf(f," Tag end alanine score: %g\n",eal); - fprintf(f," Short tag penalty: %g\n",esp); - fprintf(f," Tag hairpin score: %g\n",ehairpin); - fprintf(f,"Tag upstream hairpin score: %g\n",euhairpin); - fprintf(f," V-loop GGA score: %g\n",egga); - fprintf(f," A-stem GG score: %g\n",egg); - fprintf(f," A-stem TCCA score: %g\n",etcca); - fprintf(f," D-loop GG score: %g\n",edgg); - fprintf(f," T-arm score: %g\n",eta); - fprintf(f," C-stem score: %g\n",ec); - fprintf(f," A-stem score: %g\n",ea); - fprintf(f," C-stem + A-stem score: %g\n",ea + ec); - fprintf(f," Total score: %g\n",e); - fprintf(f," Normalised score: %g\n",nenergy(t,sw)); - fprintf(f,"\n"); } - - - -int find_tstems(int *s, int ls, trna_loop hit[], int nh, csw *sw) -{ int i,r,c,tstem,tloop,ithresh1; - int *s1,*s2,*se,*ss,*si,*sb,*sc,*sf,*sl,*sx,*template; - double ec,energy,penalty,thresh2; - static double bem[6][6] = - { { -2.144,-0.428,-2.144, ATBOND, 0.000, 0.000 }, - { -0.428,-2.144, 3.000,-2.144, 0.000, 0.000 }, - { -2.144, 3.000,-2.144, 1.286, 0.000, 0.000 }, - { ATBOND,-2.144, 1.286,-0.428, 0.000, 0.000 }, - { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 }, - { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 } }; - static double A[6] = { 2.0,0.0,0.0,0.0,0.0,0.0 }; - static double C[6] = { 0.0,2.0,0.0,0.0,0.0,0.0 }; - static double G[6] = { 0.0,0.0,2.0,0.0,0.0,0.0 }; - static double T[6] = { 0.0,0.0,0.0,2.0,0.0,0.0 }; - static int template_trna[6] = - { 0x0100, 0x0002, 0x2000, 0x0220, 0x0000, 0x0000 }; - static int template_tmrna[6] = - { 0x0100, 0x0002, 0x2220, 0x0220, 0x0000, 0x0000 }; - i = 0; - template = (sw->tmrna)?template_tmrna:template_trna; - ithresh1 = (int)sw->ttscanthresh; - thresh2 = sw->ttarmthresh; - ss = s + sw->loffset; - si = ss + 4 - 1; - sl = s + ls - sw->roffset + 5 + 3; - r = template[*si++]; - r = (r >> 4) + template[*si++]; - r = (r >> 4) + template[*si++]; - while (si < sl) - { r = (r >> 4) + template[*si++]; - if ((c = (r & 0xF)) < ithresh1) continue; - sb = si - 7; - sf = sb + 13; - ec = (double)(3*c); - for (tstem = 4; tstem <= 5; tstem++) - { if (sb >= (sl-8)) goto NX; - sc = sf; - sx = si - 2; - for (tloop = 5; tloop <= 9; tloop++) - { if (tloop > 7) - penalty = 3.0*(double)(tloop - tstem - 2); - else - penalty = 3.0*(double)(12 - tloop - tstem); - s1 = sb; - s2 = sc; - se = s1 + tstem; - energy = ec + bem[*se][se[4]] + bem[*s1++][*--s2] - penalty; - while (s1 < se) energy += bem[*s1++][*--s2]; - energy += G[*sx] + A[sx[1]] + T[sx[3]] + C[sx[4]] + C[sx[5]]; - if (energy >= thresh2) - { if (i >= nh) - { fprintf(stderr,"Too many tstem hits\n"); - goto FN; } - hit[i].pos = sb; - hit[i].loop = tloop; - hit[i].stem = tstem; - hit[i].energy = energy; - i++; } - sx++; - sc++; } - NX: - if (--sb < ss) break; - sf++; }} - FN: - return(i); } - - - - -int find_astem5(int *si, int *sl, int *astem3, int n3, - trna_loop hit[], int nh, csw *sw) -{ int i,k; - int *s1,*s2,*se; - unsigned int r,tascanthresh; - double tastemthresh,energy; - static unsigned int template[6] = { 0,0,0,0,0,0 }; - static unsigned int A[6] = { 0,0,0,2,0,0 }; - static unsigned int C[6] = { 0,0,2,0,0,0 }; - static unsigned int G[6] = { 0,2,0,1,0,0 }; - static unsigned int T[6] = { 2,0,1,0,0,0 }; - static double abem[6][6] = - { { -2.144,-0.428,-2.144, ATBOND, 0.000, 0.000 }, - { -0.428,-2.144, 3.000,-2.144, 0.000, 0.000 }, - { -2.144, 3.000,-2.144, 1.286, 0.000, 0.000 }, - { ATBOND,-2.144, 1.286,-0.428, 0.000, 0.000 }, - { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 }, - { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 } }; - tascanthresh = (unsigned int)sw->tascanthresh; - tastemthresh = sw->tastemthresh; - i = 0; - sl += n3; - se = astem3 + n3 - 1; - template[0] = A[*se]; - template[1] = C[*se]; - template[2] = G[*se]; - template[3] = T[*se]; - while (--se >= astem3) - { template[0] = (template[0] << 4) + A[*se]; - template[1] = (template[1] << 4) + C[*se]; - template[2] = (template[2] << 4) + G[*se]; - template[3] = (template[3] << 4) + T[*se]; } - r = template[*si++]; - k = 1; - while (++k < n3) r = (r >> 4) + template[*si++]; - while (si < sl) - { r = (r >> 4) + template[*si++]; - if ((r & 15) >= tascanthresh) - { s1 = astem3; - s2 = si; - se = s1 + n3; - energy = abem[*s1++][*--s2]; - while (s1 < se) - energy += abem[*s1++][*--s2]; - if (energy >= tastemthresh) - { if (i >= nh) - { fprintf(stderr,"Too many astem5 hits\n"); - goto FN; } - hit[i].pos = si - n3; - hit[i].energy = energy; - i++; }}} - FN: - return(i); } - - - -/* -Resume consensus sequence is: WAUARNYGCNAANNANNA -Williams, K. P., Martindale, K. A. & Bartel, D. P. (1999) -EMBO J. 18, 5423-5433 -A more general consensus sequence is NATARNYGCNRVNNMNNH -aragorn strict search uses NATARNYGCNRVNNMNNA -aragorn relaxed search uses NATARNYGC -R = A or G -Y = C or T -W = A or T -V = A or C or G -M = A or C -H = A or C or T -K = G or T - -*/ - -int find_resume_seq(int *s, int ls, trna_loop hit[], int nh, csw *sw) -{ int e,i,j,k,a,aa[3],*si,*sb,*sf,*st,*sl; - double al; - unsigned int r,c,thresh; - static int nps[105] = - { 0,0,0,0, 0,0,0,0, - 0,0,0,0, 1,1,1,1, - 0,0,0,0, 1,1,1,1, - 0,0,0,0, 1,1,1,1, - 0,0,0,0, 1,1,1,1, - 1,1,1,1, 1,1,1,1, - 0,1,0,1, 0,0,0,0, - 0,1,1,1, 1,1,1,1, - 0,0,0,0, 0,0,0,0, - 0,0,0,0, 0,0,0,0, - 0,0,0,0, 0,0,0,0, - 0,0,0,0, 0,0,0,0, - 0,0,0,0, 0,0,0,0,0 }; - static double score[4] = { 36.0, 66.0, 62.0, 72.0 }; - static unsigned int template[6] = - { 0x10310000, 0x01000101, 0x00010030, - 0x02000100, 0x00000000, 0x00000000 }; - static int A[6] = { 0,1,1,1,1,1 }; - static int V[6] = { 0,0,0,1,1,1 }; - static int M[6] = { 0,0,1,1,1,1 }; - thresh = (unsigned int)sw->tmrthresh; - i = 0; - sl = s + ls; - r = template[*s++]; - r = (r >> 4) + template[*s++]; - r = (r >> 4) + template[*s++]; - r = (r >> 4) + template[*s++]; - r = (r >> 4) + template[*s++]; - r = (r >> 4) + template[*s++]; - r = (r >> 4) + template[*s++]; - if (sw->tmstrict) - while (s < sl) - { r = (r >> 4) + template[*s++]; - if ((c = (r & 0xF)) < thresh) continue; - c -= (V[s[1]] + V[s[2]] + M[s[5]] + A[s[8]]); - if (c < thresh) continue; - if (i >= nh) goto FL; - st = s - 2; - si = st; - sb = st + MINTAGDIST + 2; - sf = st + MAXTAGDIST; - while (si < sf) - { if (*si++ != Thymine) - si++; - else - if (*si == Adenine) - { if (!(*++si & 5)) goto ST1; } - else - if (*si == Guanine) - { if (*++si == Adenine) goto ST1; } - else si++; - si++; } - continue; - ST1: - if (si < sb) continue; - al = 0.0; - k = 0; - j = -11; - while (j < -2) - { a = si[j++]; - a = (a << 2) | si[j++]; - if (a == 9) al = (double)(11 + 2*((j + 9)/3)); - a = (a << 2) | si[j++]; - aa[k++] = a; } - hit[i].pos = st; - hit[i].stem = (int)(si - st); - e = (nps[aa[1]] << 1) | (nps[aa[2]]); - hit[i].energy = (double)(c << 2) + score[e] + al + - find_taghairpin(si) + - find_tag_upstream_hairpin(st-10); - i++; } - else - while (s < sl) - { r = (r >> 4) + template[*s++]; - if ((c = (r & 0xF)) < thresh) continue; - if (i >= nh) goto FL; - st = s - 2; - si = st + MINTAGDIST; - sf = st + MAXTAGDIST; - while (si < sf) - { if (*si++ != Thymine) - si++; - else - if (*si == Adenine) - { if (!(*++si & 5)) goto ST2; } - else - if (*si == Guanine) - { if (*++si == Adenine) goto ST2; } - else si++; - si++; } - continue; - ST2: - hit[i].pos = st; - hit[i].stem = (int)(si - st); - e = (nps[(si[-8] << 4) | (si[-7] << 2) | si[-6]] << 1) | - (nps[(si[-5] << 4) | (si[-4] << 2) | si[-3]]); - hit[i].energy = 46.0 + (double)(c << 2) + score[e]; - i++; } - FN: - return(i); - FL: - fprintf(stderr,"Too many resume sequence hits\n"); - goto FN; } - - -int *base_copy3(int *from, int *to, int n) -{ while (n-- > 0) *to++ = *from++; - *to = TERM; - return(to); } - - - -void remove_intron(int *s1, int *s2, int nbase, int intron, int nintron) -{ int *s1e; - s1e = s1 + intron; - nbase -= intron; - while (s1 < s1e) *s2++ = *s1++; - s1 += nintron; - s1e = s1 + nbase; - while (s1 < s1e) *s2++ = *s1++; - *s2 = TERM; } - - - - -gene *nearest_trna_gene(data_set *d, int nt, gene *t, csw *sw) -{ int n,i,comp,mtrna,mtcompov,maxintronlen,ilength; - long a,b,c,e,score,thresh,psmax; - static long proximity = 7*MINCTRNALEN/10; - double energy; - psmax = d->psmax; - comp = t->comp; - mtrna = sw->mtrna; - mtcompov = sw->mtcompov; - maxintronlen = sw->maxintronlen; - n = -1; - energy = INACTIVE; - a = t->start; - b = t->stop; - thresh = b-a; - if (b < a) - { b += psmax; - thresh += psmax; - for (i = 0; i < nt; i++) - { c = ts[i].start; - e = ts[i].stop; - if (e < c) - { e += psmax; - if (a > e) goto NXTW; - if (b < c) goto NXTW; - if (ts[i].genetype != tRNA) continue; - if (ts[i].comp != comp) - { if (!mtrna) continue; - if (mtcompov) continue; } - if (maxintronlen > 0) - { ilength = e - c; - if ((2*thresh) > (5*ilength)) continue; - if ((2*ilength) > (5*thresh)) continue; } - score = (a >= c)?((b >= e)?e-a:thresh):((b >= e)?thresh:b-c); - if (score >= proximity) - if (ts[i].energy < energy) - { n = i; - energy = ts[i].energy; } - NXTW: - c -= psmax; - e -= psmax; } - if (a > e) continue; - if (b < c) continue; - if (ts[i].genetype != tRNA) continue; - if (ts[i].comp != comp) - { if (!mtrna) continue; - if (mtcompov) continue; } - if (maxintronlen > 0) - { ilength = e - c; - if ((2*thresh) > (5*ilength)) continue; - if ((2*ilength) > (5*thresh)) continue; } - score = (a >= c)?((b >= e)?e-a:thresh):((b >= e)?thresh:b-c); - if (score >= proximity) - if (ts[i].energy < energy) - { n = i; - energy = ts[i].energy; } } - a -= psmax; - b -= psmax; } - for (i = 0; i < nt; i++) - { c = ts[i].start; - e = ts[i].stop; - if (e < c) - { e += psmax; - if (a > e) goto NXTN; - if (b < c) goto NXTN; - if (ts[i].genetype != tRNA) continue; - if (ts[i].comp != comp) - { if (!mtrna) continue; - if (mtcompov) continue; } - if (maxintronlen > 0) - { ilength = e - c; - if ((2*thresh) > (5*ilength)) continue; - if ((2*ilength) > (5*thresh)) continue; } - score = (a >= c)?((b >= e)?e-a:thresh):((b >= e)?thresh:b-c); - if (score >= proximity) - if (ts[i].energy < energy) - { n = i; - energy = ts[i].energy; } - NXTN: - c -= psmax; - e -= psmax; } - if (a > e) continue; - if (b < c) continue; - if (ts[i].genetype != tRNA) continue; - if (ts[i].comp != comp) - { if (!mtrna) continue; - if (mtcompov) continue; } - if (maxintronlen > 0) - { ilength = e - c; - if ((2*thresh) > (5*ilength)) continue; - if ((2*ilength) > (5*thresh)) continue; } - score = (a >= c)?((b >= e)?e-a:thresh):((b >= e)?thresh:b-c); - if (score >= proximity) - if (ts[i].energy < energy) - { n = i; - energy = ts[i].energy; } } - if (n >= 0) return(ts + n); - return(NULL); } - - -gene *nearest_tmrna_gene(data_set *d, int nt, gene *t) -{ int n,i,comp; - long a,b,c,e,score,smax,thresh,psmax; - psmax = d->psmax; - comp = t->comp; - smax = -1; - n = -1; - a = t->start; - b = t->stop; - thresh = b-a; - if (b < a) - { b += psmax; - thresh += psmax; - for (i = 0; i < nt; i++) - { c = ts[i].start; - e = ts[i].stop; - if (e < c) - { e += psmax; - if (a > e) goto NXTW; - if (b < c) goto NXTW; - if (ts[i].genetype != tmRNA) continue; - if (ts[i].comp != comp) continue; - score = (a >= c)?((b >= e)?e-a:thresh):((b >= e)?thresh:b-c); - if (score >= smax) - if (score > smax) - { n = i; - smax = score; } - else - if (ts[i].energy < ts[n].energy) - n = i; - NXTW: - c -= psmax; - e -= psmax; } - if (a > e) continue; - if (b < c) continue; - if (ts[i].genetype != tmRNA) continue; - if (ts[i].comp != comp) continue; - score = (a >= c)?((b >= e)?e-a:thresh):((b >= e)?thresh:b-c); - if (score >= smax) - if (score > smax) - { n = i; - smax = score; } - else - if (ts[i].energy < ts[n].energy) - n = i; } - a -= psmax; - b -= psmax; } - for (i = 0; i < nt; i++) - { c = ts[i].start; - e = ts[i].stop; - if (e < c) - { e += psmax; - if (a > e) goto NXTN; - if (b < c) goto NXTN; - if (ts[i].genetype != tmRNA) continue; - if (ts[i].comp != comp) continue; - score = (a >= c)?((b >= e)?e-a:thresh):((b >= e)?thresh:b-c); - if (score >= smax) - if (score > smax) - { n = i; - smax = score; } - else - if (ts[i].energy < ts[n].energy) - n = i; - NXTN: - c -= psmax; - e -= psmax; } - if (a > e) continue; - if (b < c) continue; - if (ts[i].genetype != tmRNA) continue; - if (ts[i].comp != comp) continue; - score = (a >= c)?((b >= e)?e-a:thresh):((b >= e)?thresh:b-c); - if (score >= smax) - if (score > smax) - { n = i; - smax = score; } - else - if (ts[i].energy < ts[n].energy) - n = i; } - if ((10*smax) > (9*thresh)) return(ts + n); - return(NULL); } - - -void overlap(data_set *d, int sort[], int n, int it, csw *sw) -{ int i,j,flag,cross,crosstoo; - long a,b,e,f,a2,b2,e2,f2,psmax; - char sname[100],s[100]; - flag = 0; - cross = 0; - psmax = d->psmax; - a = ts[it].start; - b = ts[it].stop; - if (b < a) - { a2 = a - psmax; - b2 = b; - b += psmax; - cross = 1; } - j = -1; - while (++j < n) - { i = sort[j]; - if (i == it) continue; - e = ts[i].start; - f = ts[i].stop; - crosstoo = 0; - if (f < e) - { e2 = e - psmax; - f2 = f; - f += psmax; - crosstoo = 1; } - if (a <= f) - if (b >= e) - goto OV; - if (crosstoo) - if (a <= f2) - if (b >= e2) - goto OV; - if (cross) - { if (a2 <= f) - if (b2 >= e) - goto OV; - if (crosstoo) - if (a2 <= f2) - if (b2 >= e2) - goto OV; } - continue; - OV: - if (!flag) fputc('\n',sw->f); - name(ts+i,sname,1,sw); - location(s,ts+i,sw,sname); - fprintf(sw->f,"Overlap with %d: %s\n", j+1,s); - flag = 1; } - if (flag) fputc('\n',sw->f); } - - - -void init_gene(int nstart, int nstop) -{ int i; - for (i = nstart; i < nstop; i++) - { ts[i].energy = -1.0; - ts[i].genetype = noGENE; - ts[i].tps = 0; - *(ts[i].name) = '\0'; }} - - - -gene *find_slot(data_set *d, gene *t, int *nts, csw *sw) -{ int i,newspace; - char s1[80],s2[80],s3[80],s4[80]; - gene *tn,*tsn; - if (sw->comp) - { t->stop = sw->start - t->start - 1; - t->start = t->stop - t->nbase - t->nintron + 1; - t->comp = 1; } - else - { t->start += sw->start; - t->stop = t->start + t->nbase + t->nintron - 1; - t->comp = 0; } - if (!sw->linear) - { t->start = sq(t->start); - t->stop = sq(t->stop); } - if (t->genetype == tRNA) - tn = nearest_trna_gene(d,*nts,t,sw); - else - if (t->genetype == tmRNA) - tn = nearest_tmrna_gene(d,*nts,t); - else tn = NULL; - if (tn) - { if (t->energy <= tn->energy) return(NULL); - copy(tn->name,t->name); - if (sw->verbose) - { fprintf(stderr,"%s %s ",name(t,s1,0,sw),position(s3,t,sw)); - if (sw->energydisp) fprintf(stderr,"(%g) ",nenergy(t,sw)); - fprintf(stderr,"replacing %s %s",name(tn,s2,1,sw), - position(s4,tn,sw)); - if (sw->energydisp) fprintf(stderr," (%g)",nenergy(tn,sw)); - fprintf(stderr,"\n"); }} - else - { if (*nts >= sw->genespace) - { newspace = (d->ps > 0)?(sw->genespace*(1 + d->psmax/d->ps)): - (sw->genespace + NT); - tsn = (gene *)realloc((void *)ts,newspace*sizeof(gene)); - if (tsn == NULL) - { fprintf(stderr,"No more memory to store detected genes\n"); - fprintf(stderr,"Gene lost\n"); - return(NULL); } - if (sw->verbose) - fprintf(stderr, - "Expanding detected gene store from %d genes to %d genes\n", - sw->genespace,newspace); - ts = tsn; - init_gene(sw->genespace,newspace); - sw->genespace = newspace; } - copy3cr(d->seqname,t->name,79); - tn = ts + (*nts); - *nts = (*nts) + 1; - if (sw->verbose) - { fprintf(stderr,"%s at %s",name(t,s1,0,sw),position(s2,t,sw)); - if (sw->energydisp) fprintf(stderr," (%g)",nenergy(t,sw)); - fprintf(stderr,"\n"); }} - return(tn); } - - -int aatail(int *s, int *ext, csw *sw) -{ int score,e; - static int A[6] = { 1,0,0,0,0,0 }; - static int C[6] = { 0,1,0,0,0,0 }; - if (sw->aataildiv) - { score = 0; - e = 0; - if (A[s[3]]) - { score++; - e = 3; } - if (C[s[2]]) - { score++; - if (!e) e = 2; } - if (C[s[1]]) - { score++; - if (!e) e = 1; } - if (score < 2) - if (A[*s]) score++; - *ext = ++e; - return(score); } - else - { score = 1; - e = 1; - if (C[s[1]]) - { score++; - e = 2; - if (C[s[2]]) - { score++; - e = 3; - if (A[s[3]]) - { score++; - e = 4; }}} - *ext = e; - return(score); }} - - - - - - - - - -int find_mt_trna(data_set *d, int *seq, int lseq, int nts, csw *sw) -{ int nah,ndh,nch,nth,ncdsh,h,i,j,k,n,p,y,av,gcc,cgcc,catc,athresh; - int igc,nbase,b8,b9,b48,b57,nc,na,nt,nti,nd,ndi,dposmap[32]; - int dl,tl,extastem,astem8,astem8d,ti,di,ser,tastem,tastem8,tastem8d; - int astem,asteme,as,as8,aext,aext8,nbasefext,cloop,dloop,tloop,tc; - int carm,cstem,darm,dstem,tarm,tstem,var,varbp,spacer1,spacer2,anticodon; - int ds,dstemmotif,cloop7,mtxdetect,incds; - int *s,*sl,*s1,*s2,*s4,*sa,*sb,*sc,*se,*sf,*sg,*si; - int *slm,*slm1,*sle,*slb,*sld,*sge; - int *dpos,*cpos,*cend,*tpos,*tend,*apos1,*apos2,*aend1,*aend2; - int *clooppos,*cloopend; - unsigned int bondtype,abondtype,mabondtype,acbondtype,cbondtype; - unsigned int agcat,cgcat,tgcat,dbondtype,dtbondtype,tbondtype; - unsigned int r,ct[6],cm,cv,q,tendmap[63]; - double gcv,e,ec,ea,eas,ed,et,ev,energy,stem_energy; - double darmthresh,tarmthresh,tthresh,dthresh,dtthresh,thresh; - mt_trna_cloop chit[6]; - static mt_trna_loop dhit[mtND+1]; - static mt_trna_tloop thit[mtNTH+1]; - static mt_trna_astem ahit[mtNA+1]; - static mt_cds cdshit[mtNCDS]; - gene *tn; - static gene te = - { "",{TERM},{TERM},NULL,0,0,0L,0L,7,7,1,2,1,4,7,5,7,0,0,0,5,0,5,7, - tRNA,0.0,0,0,0 }; - static int cAI[6] = { 8,0,0,0,8,0 }; - static int cfCI[6] = { 0,16,0,0,16,0 }; - static int cRI[6] = { 8,0,4,0,8,0 }; - static int cTI[6] = { 0,0,0,16,16,0 }; - static int cYI[6] = { 0,8,0,4,8,0 }; - static int AI[6] = { 1,0,0,0,1,0 }; - static int CI[6] = { 0,1,0,0,1,0 }; - static int GI[6] = { 0,0,1,0,1,0 }; - static int TI[6] = { 0,0,0,1,1,0 }; - static int RI[6] = { 1,0,1,0,1,0 }; - static int YI[6] = { 0,1,0,1,1,0 }; - static int WI[6] = { 1,0,0,1,1,0 }; - static unsigned int template[6] = { 0,0,0,0,0,0 }; - static unsigned int At[6] = { 0,0,0,1,1,0 }; - static unsigned int Ct[6] = { 0,0,1,0,1,0 }; - static unsigned int Gt[6] = { 0,1,0,1,1,0 }; - static unsigned int Tt[6] = { 1,0,1,0,1,0 }; - static unsigned int cAt[6] = { 0,0,0,2,2,0 }; - static unsigned int cCt[6] = { 0,0,2,0,2,0 }; - static unsigned int cGt[6] = { 0,2,0,1,2,0 }; - static unsigned int cTt[6] = { 2,0,1,0,2,0 }; - static unsigned int aAt[6] = { 0,0,1,2,2,0 }; - static unsigned int aCt[6] = { 0,0,2,0,2,0 }; - static unsigned int aGt[6] = { 1,2,0,1,2,0 }; - static unsigned int aTt[6] = { 2,0,1,1,2,0 }; - static unsigned int dAt[6] = { 0,0,1,2,2,0 }; - static unsigned int dCt[6] = { 0,0,2,0,2,0 }; - static unsigned int dGt[6] = { 1,2,0,2,2,0 }; - static unsigned int dTt[6] = { 2,0,2,1,2,0 }; - static unsigned int clmotif[mtNCLM] = - { 0x1321300,0x3321300,0x1323002 }; - static int dloopi[mt_DRLmaxlength+1][4] = - { { -1 }, { -1 }, { -1 }, { -1 }, { -1 }, { -1 }, { -1 }, - { 0,2,-1 }, { 0,2,-1 }, { 0,2,3,-1 }, { 0,3,-1 }, { 0,3,-1 }, - { 0,3,4,-1 }, { 0,4,-1 }, { 0,5,-1 }, { 0,5,6,-1 }, { 0,5,6,-1 } }; - static int tloopa[12][4] = - { { -1 }, { -1 }, { -1 }, { 0,1,-1 }, { 0,2,1,-1 }, { 4,3,2,-1 }, - { 4,3,-1 }, { 4,3,-1 }, { 4,3,-1 }, { 5,4,3,-1 }, { 5,4,-1 }, { 5,-1 } }; - static double dA[6] = { 1.0,0.0,0.0,0.0,1.0,0.0 }; - static double dT[6] = { 0.0,0.0,0.0,1.0,1.0,0.0 }; - static double C[6] = { 0.0,1.0,0.0,0.0,1.0,0.0 }; - static double G[6] = { 0.0,0.0,1.0,0.0,1.0,0.0 }; - static double T[6] = { 0.0,0.0,0.0,1.0,1.0,0.0 }; - static double AX[6] = { 0.0,-1.0,-1.0,-1.0,0.0,-1.0 }; - static double AX37[6] = { 0.0,-4.0,-1.0,-4.0,0.0,-4.0 }; - static double AXX[6] = { 0.0,-3.0,-1.5,-3.0,0.0,-3.0 }; - static double AXX37[6] = { 0.0,-4.0,-4.0,-4.0,0.0,-4.0 }; - static double AX7[6] = { 0.0,-0.7,-0.7,-0.7,0.0,-0.7 }; - static double CX[6] = { -2.0,0.0,-2.0,-1.0,0.0,-2.0 }; - static double CXX[6] = { -4.0,0.0,-4.0,-2.0,0.0,-4.0 }; - static double CX7[6] = { -0.7,0.0,-0.7,-0.7,0.0,-0.7 }; - static double TX[6] = { -1.0,-1.0,-1.0,0.0,0.0,-1.0 }; - static double TXX[6] = { -2.0,-2.0,-2.0,0.0,0.0,-2.0 }; - static double YX[6] = { -1.0,0.0,-1.0,0.0,0.0,-1.0 }; - static double tC[6] = { 0.0,0.01,0.0,0.0,0.01,0.0 }; - static double tG[6] = { 0.0,0.0,0.01,0.0,0.01,0.0 }; - static double tT[6] = { 0.0,0.0,0.0,0.01,0.01,0.0 }; - static double cA[6] = { 0.8,0.0,0.0,0.0,0.8,0.0 }; - static double cfC[6] = { 0.0,2.6,0.0,0.0,2.6,0.0 }; - static double cR[6] = { 0.8,-2.0,0.8,-0.8,0.8,-0.8 }; - static double cT[6] = { -0.8,0.0,-0.8,2.6,2.6,-0.8 }; - static double cY[6] = { -0.8,0.8,-0.8,0.8,0.8,-0.8 }; - static double loop_stab[41] = - { 10.0,2.0,1.0,0.4,0.3,0.2,0.1,0.0,0.1,0.2,0.3,0.4,0.5,1.6,1.7,1.8, - 1.9,2.0,2.1,2.2,2.3,3.9,4.0,4.1,4.2,4.3,4.4,4.5,4.6,4.7,4.8,4.9, - 5.0,5.1,5.2,5.3,5.4,5.5,5.6,5.7 }; - static double bem[6][6] = - { { mtNOBOND, mtNOBOND, mtGABOND, mtATBOND, mtATBOND, mtNOBOND }, - { mtNOBOND, mtNOBOND, mtGCBOND, mtNOBOND, mtGCBOND, mtNOBOND }, - { mtGABOND, mtGCBOND, mtGGBOND, mtGTBOND, mtGCBOND, mtNOBOND }, - { mtATBOND, mtNOBOND, mtGTBOND, mtTTBOND, mtATBOND, mtNOBOND }, - { mtATBOND, mtGCBOND, mtGCBOND, mtATBOND, mtGCBOND, mtNOBOND }, - { mtNOBOND, mtNOBOND, mtNOBOND, mtNOBOND, mtNOBOND, mtNOBOND } }; - static double hbem[5][5] = - { { 0.0,0.0,0.0,mtBONDSTAB+0.5*mtATBOND,mtBONDSTAB+0.5*mtATBOND }, - { 0.0,0.0,mtBONDSTAB+0.5*mtGCBOND,0.0,mtBONDSTAB+0.5*mtGCBOND }, - { 0.0,mtBONDSTAB+0.5*mtGCBOND,0.0,mtBONDSTAB+0.5*mtGTBOND, - mtBONDSTAB+0.5*mtGCBOND }, - { mtBONDSTAB+0.5*mtATBOND,0.0,mtBONDSTAB+0.5*mtGTBOND,0.0, - mtBONDSTAB+0.5*mtATBOND }, - { mtBONDSTAB+0.5*mtATBOND,mtBONDSTAB+0.5*mtGCBOND, - mtBONDSTAB+0.5*mtGCBOND, - mtBONDSTAB+0.5*mtATBOND, - mtBONDSTAB+0.5*mtGCBOND } }; - tarmthresh = sw->mttarmthresh; - tthresh = sw->mttthresh; - dthresh = sw->mtdthresh; - dtthresh = sw->mtdtthresh; - ds = sw->discrim; - extastem = sw->extastem; - cloop7 = sw->cloop7; - mtxdetect = sw->mtxdetect; - - /* find coding sequences */ - - ncdsh = 0; - - /* find cstems */ - - sc = seq + sw->loffset; - sl = seq + lseq - sw->roffset; - h = sc[16]; - p = sc[15]; - j = sc[14]; - k = sc[13]; - n = sc[12]; - y = sc[11]; - ct[0] = cAt[h]|(cAt[p]<<4)|(cAt[j]<<8)|(cAt[k]<<12)| - (cAt[n]<<16)|(cAt[y]<<20); - ct[1] = cCt[h]|(cCt[p]<<4)|(cCt[j]<<8)|(cCt[k]<<12)| - (cCt[n]<<16)|(cCt[y]<<20); - ct[2] = cGt[h]|(cGt[p]<<4)|(cGt[j]<<8)|(cGt[k]<<12)| - (cGt[n]<<16)|(cGt[y]<<20); - ct[3] = cTt[h]|(cTt[p]<<4)|(cTt[j]<<8)|(cTt[k]<<12)| - (cTt[n]<<16)|(cTt[y]<<20); - ct[4] = 0; - ct[5] = 0; - for (; sc < sl; sc++) - { p = sc[17]; - ct[0] = (ct[0] << 4) | cAt[p]; - ct[1] = (ct[1] << 4) | cCt[p]; - ct[2] = (ct[2] << 4) | cGt[p]; - ct[3] = (ct[3] << 4) | cTt[p]; - cm = (ct[sc[4]] >> 16) + (ct[sc[3]] >> 12) + (ct[sc[2]] >> 8) + - (ct[sc[1]] >> 4) + ct[*sc]; - - /* 7 base cloop */ - - cv = (cm & 0xf0); - athresh = 12; - nch = 0; - - /* exclude the following cloops */ - /* RRnnnNN, NRnnnYN */ - /* NRnnnNN with cstem < 3 Watson-Crick basepairs or equivalent */ - /* RYnnnYN */ - /* NYnnnNN with cstem < 1 Watson-Crick basepair or equivalent */ - /* NYnnnNN with cstem < 2 Watson-Crick basepairs or equivalent */ - /* unless cloop = CTnnnAN */ - - if (RI[sc[6]]) - { if (RI[sc[5]]) goto CLOOP6; - if (YI[sc[10]]) goto CLOOP6; - if (cv < 0x60) goto CLOOP6; } - else - { if (YI[sc[10]]) - if (RI[sc[5]]) goto CLOOP6; - if (cv < 0x40) - { if (cv < 0x20) goto CLOOP6; - if (sc[5] != Cytosine) goto CLOOP6; - if (sc[6] != Thymine) goto CLOOP6; - if (sc[10] != Adenine) goto CLOOP6; - athresh = 11; } - else if (cv < 0x70) - { athresh = 11; - k = cYI[sc[5]] + cTI[sc[6]] + cRI[sc[10]] + cAI[sc[11]]; - if (sc[6] == Cytosine) - if (sc[5] == Cytosine) - k += 16; - else - if (sc[5] == Thymine) - if (sc[11] == Adenine) - k += 16; - if (cv == 0x40) - { if (k < 40) goto CLOOP6; } - else - if (cv == 0x50) - { if (k < 28) goto CLOOP6; } - else - { if (k < 20) goto CLOOP6; - athresh = 9; }} - else - athresh = (cv < 10)?9:8; } - chit[0].pos = sc; - chit[0].stem = 5; - chit[0].loop = 7; - chit[0].looppos = sc + 5; - chit[0].arm = 17; - chit[0].end = sc + 17; - chit[0].anticodon = (sc[7] << 4) + (sc[8] << 2) + sc[9]; - if (bp[sc[-1]][sc[17]]) - { chit[1].pos = sc-1; - chit[1].stem = 6; - chit[1].loop = 7; - chit[1].looppos = sc + 5; - chit[1].arm = 19; - chit[1].end = sc + 18; - chit[1].anticodon = chit[0].anticodon; - nch = 2; } - else nch = 1; - - /* 6 base cloop */ - /* exclude cstem < 4 Watson-Crick basepairs or equivalent */ - /* exclude cloop = RRnnNN */ - /* exclude cloop = NNnnYY */ - - CLOOP6: - if (cloop7) goto CLOOPE; - if ((cm & 0xf00) >= 0x800) - { if (!YI[sc[6]]) - if (!YI[sc[5]]) - goto CLOOP8; - if (!RI[sc[9]]) - if (!RI[sc[10]]) - goto CLOOP8; - se = sc + 20; - sg = sc; - while (sg < se) - { sf = sg + 5; - while (sf < (sg + 11)) - { if (*sf == *sg) - if (sf[1] == sg[1]) - if (sf[2] == sg[2]) - if (sf[3] == sg[3]) - if (sf[4] == sg[4]) - { sb = sg + 5; - s = sf + 5; - i = 0; - while (sb < sf) - if (*sb++ != *s++) - if (++i > 1) goto NXSEG6; - goto CLOOPE; } - NXSEG6: - sf++; } - sg++; } - chit[nch].pos = sc; - chit[nch].stem = 5; - chit[nch].loop = 6; - chit[nch].looppos = sc + 5; - chit[nch].arm = 16; - chit[nch].end = sc + 16; - chit[nch++].anticodon = 0; - if (athresh > 10) athresh = 10; - if (bp[sc[-1]][sc[16]]) - { chit[nch].pos = sc-1; - chit[nch].stem = 6; - chit[nch].loop = 6; - chit[nch].looppos = sc + 5; - chit[nch].arm = 18; - chit[nch].end = sc + 17; - chit[nch++].anticodon = 0; }} - - /* 8 base cloop */ - /* exclude cstem < 4 Watson-Crick basepairs or equivalent */ - /* exclude cloop = RRnnnnNN */ - /* exclude cloop = NNnnnnYY */ - - CLOOP8: - if ((cm & 0xf) >= 0x8) - { if (!YI[sc[5]]) - if (!YI[sc[6]]) - goto CLOOPE; - if (!RI[sc[12]]) - if (!RI[sc[11]]) - goto CLOOPE; - se = sc + 20; - sg = sc; - while (sg < se) - { sf = sg + 5; - while (sf < (sg + 11)) - { if (*sf == *sg) - if (sf[1] == sg[1]) - if (sf[2] == sg[2]) - if (sf[3] == sg[3]) - if (sf[4] == sg[4]) - { sb = sg + 5; - s = sf + 5; - i = 0; - while (sb < sf) - if (*sb++ != *s++) - if (++i > 1) goto NXSEG8; - goto CLOOPE; } - NXSEG8: - sf++; } - sg++; } - chit[nch].pos = sc; - chit[nch].stem = 5; - chit[nch].loop = 8; - chit[nch].looppos = sc + 5; - chit[nch].arm = 18; - chit[nch].end = sc + 18; - chit[nch++].anticodon = 0; - if (athresh > 10) athresh = 10; - if (bp[sc[-1]][sc[18]]) - { chit[nch].pos = sc-1; - chit[nch].stem = 6; - chit[nch].loop = 8; - chit[nch].looppos = sc + 5; - chit[nch].arm = 20; - chit[nch].end = sc + 19; - chit[nch++].anticodon = 0; }} - - /* calculate carm energy */ - - CLOOPE: - if (nch < 1) continue; - for (nc = 0; nc < nch; nc++) - { s1 = chit[nc].pos; - cstem = chit[nc].stem; - cloop = chit[nc].loop; - s4 = s1 + cstem; - s2 = s4 + cloop; - energy = (cloop == 7)?0.0:-4.0; - energy += cY[*s4] + cT[s4[1]] + cR[s2[-2]] + cA[s2[-1]]; - if (s4[1] == Cytosine) - if (*s4 == Cytosine) - energy += 2.6; - else - if (*s4 == Thymine) - if (s2[-1] == Adenine) - energy += 2.6; - s2 += cstem; - stem_energy = bem[*s1][*--s2]; - k = neighbour_map[*s1][*s2]; - stem_energy += neighbour_em[k][s1[1]][s2[-1]]; - bondtype = btmap[*s1][*s2]; - if (bp[*s1][*s2]) - { if (assymst[s2[1]][s1[-1]]) stem_energy += mtTERMSTAB; - else stem_energy += send_em[*s2][*s1]; } - else - { if (assymst[*s2][*s1]) stem_energy += mtTERMSTAB; - else stem_energy += send_em[s2[-1]][s1[1]]; } - while (++s1 < s4) - { if (!wcbp[*s1][*--s2]) - { if (!wcbp[s1[-1]][s2[1]]) - { for (j = 0; j < mtNTM; j++) - if (*s1 == tandemid[j][1]) - if (*s2 == tandemid[j][3]) - if (s1[-1] == tandemid[j][0]) - if (s2[1] == tandemid[j][2]) - { stem_energy += tandem_em[j]; - break; } - if (s1 < (s4-1)) - if (!bp[s1[1]][s2[-1]]) stem_energy -= mt3MMSTAB; } - k = neighbour_map[*s1][*s2]; - stem_energy += (neighbour_em[k][s1[-1]][s2[1]] + - neighbour_em[k][s1[1]][s2[-1]]); } - bondtype += btmap[*s1][*s2]; - stem_energy += bem[*s1][*s2]; } - if (!bp[*--s1][*s2]) - { s1--; - s2++; } - if (assymst[s1[1]][s2[-1]]) stem_energy += mtTERMSTAB; - else stem_energy += send_em[*s1][*s2]; - cgcc = bondtype & 0xf; - if (cgcc <= 0) - { catc = (bondtype & 0xf0) >> 4; - if (catc < cstem) energy -= mtGCPENALTY; } - if (cstem == 6) energy += 1.0; - chit[nc].bondtype = bondtype; - chit[nc].stem_energy = stem_energy; - chit[nc].energy = energy + stem_energy; } - - /* find tarms */ - - nth = 0; - slm = sc + 61; - sle = sc + 57; - sb = sc + 21; - sg = sc + 16; - sge = sg + 30; - slb = sg + 32; - template[0] = At[*slm]; - template[1] = Ct[*slm]; - template[2] = Gt[*slm]; - template[3] = Tt[*slm]; - while (--slm > sle) - { template[0] = (template[0] << 4) | At[*slm]; - template[1] = (template[1] << 4) | Ct[*slm]; - template[2] = (template[2] << 4) | Gt[*slm]; - template[3] = (template[3] << 4) | Tt[*slm]; } - while (slm >= sb) - { template[0] = ((template[0] << 4) | At[*slm]) & 0xfffff; - template[1] = ((template[1] << 4) | Ct[*slm]) & 0xfffff; - template[2] = ((template[2] << 4) | Gt[*slm]) & 0xfffff; - template[3] = ((template[3] << 4) | Tt[*slm]) & 0xfffff; - sf = slm + 3; - if (sf > sge) sf = sge; - apos2 = slm + 5; - si = sg; - s = si + 4; - r = template[*si]; - while (++si < s) r = (r >> 4) + template[*si]; - while (si <= sf) - { if (si < slm) - r = (r >> 4) + template[*si++]; - else - { si++; - r = r >> 4; } - q = r & 0xf; - if (slm > slb) - { if (q < 5) continue; - tloop = (int)(slm - si); } - else - { if (q < 2) continue; - if (q < 3) - { if (!wcbp[si[-5]][apos2[-1]]) continue; - if (!wcbp[si[-4]][apos2[-2]]) continue; - tloop = (int)(slm - si); - if (tloop > 5) continue; } - else - { tloop = (int)(slm - si); - if (q < 4) - if (!bp[si[-4]][apos2[-2]]) - if (!bp[si[-2]][apos2[-4]]) - { if (tloop < 4) continue; - if (si[-1] != Guanine) continue; - if (*si != Thymine) continue; - if (si[1] != Thymine) continue; }}} - if (tloop < 7) - { if (tloop < 2) - if (tloop <= 0) - { if (tloop <= -2) - { if (!wcbp[si[-5]][apos2[-1]]) continue; - if (!wcbp[si[-4]][apos2[-2]]) continue; - tstem = 2; - tloop += 6; } - else - if (bp[si[-3]][apos2[-3]]) - { tstem = 3; - tloop += 4; } - else - { if (!wcbp[si[-5]][apos2[-1]]) continue; - if (!wcbp[si[-4]][apos2[-2]]) continue; - tstem = 2; - tloop += 6; }} - else - { if (bp[si[-2]][apos2[-4]]) - { tstem = 4; - tloop += 2; } - else - if (bp[si[-3]][apos2[-3]]) - { tstem = 3; - tloop += 4; } - else - { if (!wcbp[si[-5]][apos2[-1]]) continue; - if (!wcbp[si[-4]][apos2[-2]]) continue; - tstem = 2; - tloop += 6; }} - else - { if (bp[si[-1]][apos2[-5]]) - { if (q != 4) tstem = 5; - else - { if (bp[si[-2]][apos2[-4]]) tstem = 5; - else - { k = GI[si[-3]] + TI[si[-2]] + TI[si[-1]] + CI[*si]; - if (k >= 2) - { tstem = 3; - tloop += 4; } - else tstem = 5; }}} - else - { if (bp[si[-2]][apos2[-4]]) - { tstem = 4; - tloop += 2; } - else - if (bp[si[-3]][apos2[-3]]) - { tstem = 3; - tloop += 4; } - else - { if (!wcbp[si[-5]][apos2[-1]]) continue; - if (!wcbp[si[-4]][apos2[-2]]) continue; - tstem = 2; - tloop += 6; } - }} - if (tloop < 3) - if (tstem > 3) - { tstem--; - tloop += 2; }} - else - { if (!bp[si[-1]][apos2[-5]]) - if (!bp[si[-2]][apos2[-4]]) - { tstem = 3; - tloop += 4; } - else - { tstem = 4; - tloop += 2; } - else tstem = 5; } - if (tloop > 17) - if (tstem < 5) - continue; - - /* calculate tarm energy */ - - s1 = si - 5; - tpos = s1; - s4 = s1 + tstem; - s2 = apos2; - if (tt[*s1][*--s2]) - { energy = mtTSTTSTAB; - if (tt[*++s1][*--s2]) - { energy += mtTSTTSTAB; - bondtype = btmap[*s1++][*s2--]; } - else bondtype = 0; } - else - { energy = 0.0; - bondtype = 0; } - - /* calculate tstem energy */ - - stem_energy = bem[*s1][*s2]; - k = neighbour_map[*s1][*s2]; - stem_energy += neighbour_em[k][s1[1]][s2[-1]]; - bondtype += btmap[*s1][*s2]; - while (++s1 < s4) - { if (!wcbp[*s1][*--s2]) - { if (!wcbp[s1[-1]][s2[1]]) - { for (j = 0; j < mtNTM; j++) - if (*s1 == tandemid[j][1]) - if (*s2 == tandemid[j][3]) - if (s1[-1] == tandemid[j][0]) - if (s2[1] == tandemid[j][2]) - { stem_energy += tandem_em[j]; - break; } - if (s1 < (s4-1)) - if (!bp[s1[1]][s2[-1]]) stem_energy -= mt3MMSTAB; } - k = neighbour_map[*s1][*s2]; - stem_energy += (neighbour_em[k][s1[-1]][s2[1]] + - neighbour_em[k][s1[1]][s2[-1]]); } - bondtype += btmap[*s1][*s2]; - stem_energy += bem[*s1][*s2]; } - s1--; - if (tloop < 4) stem_energy += ssend_em[*s1][*s2]; - else - if (assymst[s1[1]][s2[-1]]) stem_energy += mtTERMSTAB; - else stem_energy += send_em[*s1][*s2]; - - /* compile possible tarms */ - - energy += (stem_energy - mtBONDSTAB*(double)(5-tstem)); - if (energy >= tarmthresh) - { thit[nth].pos = tpos; - s1 = tpos + tstem; - s2 = apos2 - tstem; - thit[nth].energy = energy - loop_stab[tloop] + - tG[s1[-1]] + tT[*s1] + tT[s1[1]] + tC[s1[2]]; - thit[nth].stem_energy = stem_energy; - thit[nth].bondtype = bondtype; - thit[nth].stem = tstem; - thit[nth].loop = tloop; - thit[nth].end = tpos + 2*tstem + tloop; - if (++nth >= mtNTH) - { fprintf(stderr,"Too many mt-tstem hits\n"); - break; } - if (tstem > 2) - if (tloop < 10) - if (gt[s1[-1]][*s2]) - { thit[nth].pos = tpos; - thit[nth].energy = energy - mtBONDSTAB - mtGTBOND - - loop_stab[tloop+2] + - tG[s1[-2]] + tT[s1[-1]] + tT[*s1] + tC[s1[1]]; - thit[nth].stem_energy = stem_energy - mtGTBOND; - thit[nth].bondtype = bondtype - 0x100; - thit[nth].stem = tstem - 1; - thit[nth].loop = tloop + 2; - thit[nth].end = thit[nth-1].end; - if (++nth >= mtNTH) - { fprintf(stderr,"Too many mt-tstem hits\n"); - break; } - if (tstem > 3) - if (tloop < 8) - if (gt[s1[-2]][s2[1]]) - { thit[nth].pos = tpos; - thit[nth].energy = energy - 2.0*mtBONDSTAB - 2.0*mtGTBOND - - loop_stab[tloop+4] + tG[s1[-3]] + - tT[s1[-2]] + tT[s1[-1]] + tC[*s1]; - thit[nth].stem_energy = stem_energy - 2.0*mtGTBOND; - thit[nth].bondtype = bondtype - 0x200; - thit[nth].stem = tstem - 2; - thit[nth].loop = tloop + 4; - thit[nth].end = thit[nth-1].end; - if (++nth >= mtNTH) - { fprintf(stderr,"Too many mt-tstem hits\n"); - break; }}} - if (tstem < 5) - { if (tloop < 11) continue; - if (tloop > 16) continue; - if (!wcbp[s1[1]][s2[-2]]) continue; - bondtype += btmap[*s1][s2[-1]] + btmap[s1[1]][s2[-2]]; - tstem += 2; - tloop -= 4; } - else - { if (tloop < 9) continue; - if (wcbp[*s1][s2[-1]]) - { if (tloop > 14) continue; - tstem++; - tloop -= 2; - bondtype += btmap[*s1][s2[-1]]; } - else - { if (tloop < 11) continue; - if (tloop > 16) continue; - if (!wcbp[s1[1]][s2[-2]]) continue; - bondtype += btmap[*s1][s2[-1]] + btmap[s1[1]][s2[-2]]; - tstem += 2; - tloop -= 4; }} - thit[nth].pos = tpos; - s1 = tpos + tstem; - thit[nth].energy = energy - loop_stab[tloop] + - tG[s1[-1]] + tT[*s1] + tT[s1[1]] + tC[s1[2]]; - thit[nth].stem_energy = stem_energy; - thit[nth].bondtype = bondtype; - thit[nth].stem = tstem; - thit[nth].loop = tloop; - thit[nth].end = thit[nth-1].end; - if (++nth >= mtNTH) - { fprintf(stderr,"Too many mt-tstem hits\n"); - break; } - if (tloop < 9) continue; - if (!wcbp[*s1][apos2[-tstem-1]]) continue; - if (++tstem > 7) continue; - if (tloop > 14) continue; - tloop -= 2; - thit[nth].pos = tpos; - s1 = tpos + tstem; - thit[nth].energy = energy - loop_stab[tloop] + - tG[s1[-1]] + tT[*s1] + tT[s1[1]] + tC[s1[2]]; - thit[nth].stem_energy = stem_energy; - thit[nth].bondtype = bondtype; - thit[nth].stem = tstem; - thit[nth].loop = tloop; - thit[nth].end = thit[nth-1].end; - if (++nth >= mtNTH) - { fprintf(stderr,"Too many mt-tstem hits\n"); - break; }}} - slm--; } - - /* find darms */ - - ndh = 0; - sle = sc - 4; - slb = sc - 8; - slm = sc - 1; - template[0] = dAt[*slm]; - template[1] = dCt[*slm]; - template[2] = dGt[*slm]; - template[3] = dTt[*slm]; - while (--slm > sle) - { template[0] = (template[0] << 4) | dAt[*slm]; - template[1] = (template[1] << 4) | dCt[*slm]; - template[2] = (template[2] << 4) | dGt[*slm]; - template[3] = (template[3] << 4) | dTt[*slm]; } - slm1 = slm; - while (slm > slb) - { template[0] = ((template[0] << 4) | dAt[*slm]) & 0xffff; - template[1] = ((template[1] << 4) | dCt[*slm]) & 0xffff; - template[2] = ((template[2] << 4) | dGt[*slm]) & 0xffff; - template[3] = ((template[3] << 4) | dTt[*slm]) & 0xffff; - slm--; - si = slm - 18; - s = si + 3; - r = template[*si]; - while (++si < s) r = (r >> 4) + template[*si]; - while (si <= slm1) - { if (si < slm) r = (r >> 4) + template[*si++]; - else - { r = r >> 4; - si++; } - if ((q = (r & 0xf)) < 6) - { q += (unsigned int)(TI[si[-6]] + RI[si[-5]]); - if (q < 6) continue; } - - /* calculate darm energy */ - - s1 = si - 4; - dhit[ndh].pos = s1; - energy = dT[s1[-2]] + dA[s1[-1]]; - dloop = (int)(slm1 - si); - if (dloop > 2) - if (bp[si[-1]][*slm1]) - { dstem = 4; - goto EC; } - if (dloop > 0) - if ((ggstembp[si[-2]][slm[2]]) || (gabp[si[-1]][*slm1])) - { dstem = 3; - dloop += 2; - energy += mtNOBOND; - goto EC; } - if (!wcbp[si[-3]][slm[3]]) continue; - if (!gc[si[-4]][slm[4]]) continue; - dstem = 2; - dloop += 4; - if (dloop > 5) energy += mtNOBOND; - energy += mtNOBOND; - - EC: - s2 = slm + 4; - s4 = s1 + dstem; - if (!wcbp[s1[1]][s2[-1]]) - if (stemterm[s1[1]][s2[-1]]) energy -= 1.0; - else - if (bp[s1[1]][s2[-1]]) energy -= 1.5; - else energy -= 2.0; - - /* calculate dstem energy */ - - stem_energy = bem[*s1][*s2]; - k = neighbour_map[*s1][*s2]; - stem_energy += neighbour_em[k][s1[1]][s2[-1]]; - bondtype = btmap[*s1][*s2]; - if (bp[*s1][*s2]) - { if (assymst[s2[1]][s1[-1]]) stem_energy += mtTERMSTAB; - else stem_energy += send_em[*s2][*s1]; - s1++; - s2--; } - else - { s1++; - s2--; - if (assymst[s2[1]][s1[-1]]) stem_energy += mtTERMSTAB; - else stem_energy += send_em[*s2][*s1]; } - stem_energy += bem[*s1][*s2]; - k = neighbour_map[*s1][*s2]; - stem_energy += (neighbour_em[k][s1[-1]][s2[1]] + - neighbour_em[k][s1[1]][s2[-1]]); - bondtype += btmap[*s1][*s2]; - while (++s1 < s4) - { if (!wcbp[*s1][*--s2]) - { if (!wcbp[s1[-1]][s2[1]]) - { for (j = 0; j < mtNTM; j++) - if (*s1 == tandemid[j][1]) - if (*s2 == tandemid[j][3]) - if (s1[-1] == tandemid[j][0]) - if (s2[1] == tandemid[j][2]) - { stem_energy += tandem_em[j]; - break; } - if (s1 < (s4-1)) - if (!bp[s1[1]][s2[-1]]) stem_energy -= mt3MMSTAB; } - k = neighbour_map[*s1][*s2]; - stem_energy += (neighbour_em[k][s1[-1]][s2[1]] + - neighbour_em[k][s1[1]][s2[-1]]); } - bondtype += btmap[*s1][*s2]; - stem_energy += bem[*s1][*s2]; } - if (!bp[*--s1][*s2]) - { s1--; - s2++; } - if (dloop < 4) stem_energy += ssend_em[*s1][*s2]; - else - if (assymst[s1[1]][s2[-1]]) stem_energy += mtTERMSTAB; - else stem_energy += send_em[*s1][*s2]; - - /* compile possible darms */ - - energy += stem_energy; - dhit[ndh].energy = energy; - dhit[ndh].stem_energy = stem_energy; - dhit[ndh].bondtype = bondtype; - dhit[ndh].stem = dstem; - dhit[ndh].loop = dloop; - if (++ndh >= mtND) - { fprintf(stderr,"Too many mt-dstem hits\n"); - break; } - if (dstem == 4) - { if (dloop >= 6) - if (bondtype < 0x1000) - { s1 = si - 5; - s2 = slm + 5; - if (bp[*s1][*s2]) - { dhit[ndh].pos = s1; - e = 0.5 + bem[*s1][*s2]; - dhit[ndh].energy = energy + e; - if (wcbp[*s1][*s2]) - dhit[ndh].energy += (dT[s1[-2]] + dA[s1[-1]] - - dT[s1[-1]] - dA[*s1]); - dhit[ndh].stem_energy = stem_energy + e; - dhit[ndh].bondtype = bondtype + btmap[*s1][*s2]; - dhit[ndh].stem = 5; - dhit[ndh].loop = dloop; - if (++ndh >= mtND) - { fprintf(stderr,"Too many mt-dstem hits\n"); - break; }}}} - else - if (dloop >= 6) - { s1 = si - 1; - s2 = slm1; - if (stemterm[*s1][*s2]) - { dhit[ndh].pos = si - 4; - dhit[ndh].energy = energy; - dhit[ndh].stem_energy = stem_energy; - dhit[ndh].bondtype = bondtype; - dhit[ndh].stem = 4; - dhit[ndh].loop = dloop - 2; - if (++ndh >= mtND) - { fprintf(stderr,"Too many mt-dstem hits\n"); - break; }}} - if (dloop >= 4) continue; - s1 = si - 4 + dstem - 1; - s2 = s1 + dloop + 1; - if (bp[*s1][*s2]) continue; - dhit[ndh].pos = si - 4; - dhit[ndh].energy = energy + 0.001; - dhit[ndh].stem_energy = stem_energy; - dhit[ndh].bondtype = bondtype; - dhit[ndh].stem = dstem - 1; - dhit[ndh].loop = dloop + 2; - if (++ndh >= mtND) - { fprintf(stderr,"Too many mt-dstem hits\n"); - break; } } - slm1--; } - - /* build darm exclusion map */ - /* 5' astems further from carm than */ - /* mt_DRLmaxlength must match a darm */ - - for (i = 3; i <= 30; i++) dposmap[i] = 0; - sf = sc - mt_DRLmaxlength - 1; - sld = sf; - if (ndh > 0) - { s = dhit[0].pos; - for (nd = 0; nd < ndh; nd++) - { se = dhit[nd].pos; - if (se < s) s = se; - i = (int)(sc - se); - if (dposmap[++i] < 1) dposmap[i] = 1; - dposmap[++i] = 2; - if (dposmap[++i] < 1) dposmap[i] = 1; } - s -= 4; - if (s < sf) sf = s; } - - /* build tarm exclusion map */ - /* 3' astems further from carm than */ - /* mt_TVRLmaxlength must match a tarm */ - - for (i = 17; i <= 62; i++) tendmap[i] = 0; - s2 = sc + mt_TVRLmaxlength + 17; - sle = s2; - if (nth > 0) - { s = thit[0].end; - for (nt = 0; nt < nth; nt++) - { se = thit[nt].end; - if (se > s) s = se; - i = (int)(se - sc); - bondtype = thit[nt].bondtype; - if (tendmap[i]) - { if (bondtype < tendmap[i]) tendmap[i] = bondtype; } - else tendmap[i] = bondtype; } - if (s > s2) s2 = s; } - - /* find astems in 3 categories: */ - /* high energy astems close to carm */ - /* high energy astems matching a high energy tarm far from carm */ - /* low energy astem matching a darm and tarm */ - - nah = 0; - sa = sc - 3; - sg = sf - 6; - sb = sc + 17; - se = s2 + 6; - template[0] = aAt[*se]; - template[1] = aCt[*se]; - template[2] = aGt[*se]; - template[3] = aTt[*se]; - while (--se > s2) - { template[0] = (template[0] << 4) | aAt[*se]; - template[1] = (template[1] << 4) | aCt[*se]; - template[2] = (template[2] << 4) | aGt[*se]; - template[3] = (template[3] << 4) | aTt[*se]; } - ti = (int)(se - sc); - while (se >= sb) - { template[0] = ((template[0] << 4) | aAt[*se]) & 0xfffffff; - template[1] = ((template[1] << 4) | aCt[*se]) & 0xfffffff; - template[2] = ((template[2] << 4) | aGt[*se]) & 0xfffffff; - template[3] = ((template[3] << 4) | aTt[*se]) & 0xfffffff; - if (tendmap[ti]) - { nti = (tendmap[ti] < 0x2000)?1:0; } - else - { if (se > sle) goto ANX; - nti = -1; } - si = sg; - r = template[*si]; - while (++si < sf) r = (r >> 4) + template[*si]; - di = (int)(sc - si); - while (si < sa) - { r = (r >> 4) + template[*si++]; - if (dposmap[--di]) - { if (nti <= 0) - { if (nti < 0) - if (dposmap[di] < 2) continue; - if ((av = (r & 0xf)) < athresh) continue; }} - else - { if (si < sld) continue; - if (nti < 0) continue; - if ((av = (r & 0xf)) < athresh) continue; } - if (nah >= mtNA) - { fprintf(stderr,"Too many mt-astem hits\n"); - break; } - - /* predict astem length and calculate astem energy */ - - s1 = si - 7; - s2 = se + 6; - if (bp[*s1][*s2]) - { astem = 7; - energy = 0.0; - ahit[nah].pos1 = s1; - ahit[nah].pos2 = se; } - else - if (ggstemterm[*s1][*s2]) - { astem = 7; - ahit[nah].pos1 = s1; - ahit[nah].pos2 = se; - energy = bem[*s1++][*s2--]; } - else - { energy = bem[*s1++][*s2--]; - if (bp[*s1][*s2]) - { astem = 6; - ahit[nah].pos1 = s1; - ahit[nah].pos2 = se; } - else - if (ggstemterm[*s1][*s2]) - { astem = 6; - ahit[nah].pos1 = s1; - ahit[nah].pos2 = se; - energy += bem[*s1++][*s2--]; } - else - { astem = 5; - energy += bem[*s1++][*s2--]; - ahit[nah].pos1 = s1; - ahit[nah].pos2 = se; }} - ahit[nah].stem = astem; - bondtype = btmap[*s1][*s2]; - energy += bem[*s1][*s2]; - k = neighbour_map[*s1][*s2]; - energy += neighbour_em[k][s1[1]][s2[-1]]; - energy += bem[*++s1][*--s2]; - k = neighbour_map[*s1][*s2]; - energy += (neighbour_em[k][s1[-1]][s2[1]] + - neighbour_em[k][s1[1]][s2[-1]]); - bondtype += btmap[*s1][*s2]; - while (++s1 < si) - { if (!wcbp[*s1][*--s2]) - { if (!wcbp[s1[-1]][s2[1]]) - { for (j = 0; j < mtNTM; j++) - if (*s1 == tandemid[j][1]) - if (*s2 == tandemid[j][3]) - if (s1[-1] == tandemid[j][0]) - if (s2[1] == tandemid[j][2]) - { energy += tandem_em[j]; - break; } - if (s1 < (si-1)) - if (!bp[s1[1]][s2[-1]]) energy -= mt3MMSTAB; } - k = neighbour_map[*s1][*s2]; - energy += (neighbour_em[k][s1[-1]][s2[1]] + - neighbour_em[k][s1[1]][s2[-1]]); } - bondtype += btmap[*s1][*s2]; - energy += bem[*s1][*s2]; } - if (!bp[*--s1][*s2]) - if (!bp[*--s1][*++s2]) - if (!bp[*--s1][*++s2]) - if (!bp[*--s1][*++s2]) - goto NOST; - if (assymst[s1[1]][s2[-1]]) energy += mtTERMSTAB; - NOST: - ahit[nah].energy = energy; - ahit[nah].bondtype = bondtype; - nah++; } - ANX: - se--; - ti--; } - if (nah <= 0) continue; - - - /* build mttrna genes */ - /* cycle through astems first so that */ - /* GC content is only calculated once per astem */ - - thresh = -INACTIVE; - te.ps = NULL; - for (na = 0; na < nah; na++) - { apos2 = ahit[na].pos2; - apos1 = ahit[na].pos1; - astem = ahit[na].stem; - aend1 = apos1 + astem; - astem8 = (astem == 7)?(wcbp[apos1[-1]][apos2[7]]):0; - asteme = 0; - ea = ahit[na].energy; - abondtype = ahit[na].bondtype; - agcat = ((abondtype >> 4) + abondtype) & 0xf; - - /* GC content */ - - s = apos1; - aend2 = apos2 + astem; - nbase = (int)(aend2 - apos1) + 1; - igc = 0; - while (s <= aend2) - { k = *s++; - if (k >= Cytosine) if (k <= Guanine) igc++; } - gcv = 10.0*(double)igc/(double)nbase; - if (gcv < 1.0) - { if (gcv < 0.55) continue; - ea -= 0.5; } - if (nbase > 60) - { if (gcv > 6.0) ea -= 2.0*(gcv - 6.0); } - else - { if (gcv > 5.0) ea -= 2.0*(gcv - 5.0); } - if (gcv > 6.6) - { ea -= 6.0; - if (gcv > 7.0) ea -= 6.0; } - - /* findout if inside a coding sequence */ - - - - incds = 0; - i = -1; - while (++i < ncdsh) - if (apos1 > cdshit[i].pos1) - if (aend2 <= cdshit[i].pos2) - { incds = 1; - ea -= 2.0; - break; } - -/* if (incds) continue; */ - - - -/* - s = apos1 + nbase/2; - if (incodon(s-75,s+75) > 30.0) /@ 3.5,3.0,2.5 @/ - { incds = 1; - ea -= 2.0; } - else - incds = 0; -*/ - -/* - s = apos1 + nbase/2; - if (incodon(s-150,s+150) > 0.0) - { incds = 1; - ea -= 2.0; } - else - incds = 0; -*/ - - - /* cycle through carms that fall between astem */ - - nc = -1; - while (++nc < nch) - { cpos = chit[nc].pos; - dloop = (int)(cpos - aend1); - if (dloop < 3) continue; - if (dloop > 26) continue; - cend = chit[nc].end; - tloop = (int)(apos2 - cend); - if (tloop < 5) continue; - cloop = chit[nc].loop; - cstem = chit[nc].stem; - clooppos = chit[nc].looppos; - cloopend = clooppos + cloop; - carm = chit[nc].arm; - anticodon = chit[nc].anticodon; - cbondtype = chit[nc].bondtype; - acbondtype = abondtype + cbondtype; - cgcat = ((cbondtype >> 4) + cbondtype) & 0xf; - ec = ea + chit[nc].energy; - - /* astem,cstem stability (GC bond count) */ - - if ((abondtype & 0xf) <= 0) - if ((cbondtype & 0xf) <= 0) - { ec -= mtGCPENALTYD; - if (((cbondtype & 0xf0) >> 4) >= 5) ec += 0.5; } - - - /* anticodon to astem discriminator base match */ - - astem8d = 0; - if (cloop == 7) - { if (!mt_discrim[ds][anticodon][apos2[astem]]) - if (astem8) - if (mt_discrim[ds][anticodon][apos2[8]]) astem8d = 1; - else ec -= 3.0; - else - if (astem <= 6) - { if (!mt_discrim[ds][anticodon][apos2[7]]) - if (astem == 5) - { if (!mt_discrim[ds][anticodon][apos2[6]]) ec -= 3.0; } - else - ec -= 3.0; } - else - ec -= 3.0; } - - - /* build TV-replacement loop mttrna genes */ - - if (tloop <= mt_TVRLmaxlength) - { if (!sw->tvloop) goto TVN; - - /* astem termination */ - /* (only need to calculate once per astem) */ - - if (!asteme) - { asteme = 1; - s = aend1 - 1; - se = apos2; - while (!bp[*s][*se]) - { if (--s <= apos1) - { eas = 0.0; - goto NOST2; } - se++; } - if (!aastemterm[s[1]][se[-1]]) eas = -0.5; - else - { eas = 0.0; - while (se >= apos2) - { s++; - se--; - if (aastemterm[*s][*se]) eas += 1.0; }}} - - /* choose darm */ - - NOST2: - energy = 94.0 + ec + eas; - nd = -1; - ndi = -1; - ed = -INACTIVE; - while (++nd < ndh) - { dpos = dhit[nd].pos; - spacer1 = (int)(dpos - aend1); - if (spacer1 != 2) continue; - dl = dhit[nd].loop; - dstem = dhit[nd].stem; - if (dstem > 4) continue; - darm = 2*dstem + dl; - spacer2 = (int)(cpos - dpos) - darm; - - /* astem,darm,cstem interspacing */ - - if (spacer2 < 1) continue; - e = dhit[nd].energy; - if (spacer2 > 1) - { if (spacer2 > 2) continue; - if (!stembp[*cpos][cend[-1]]) continue; - if (tloop > 12) e -= 2.0; - if ((dhit[nd].bondtype & 0xf) < 1) - if ((agcat + cgcat + 1) < (cstem + astem)) e -= 3.0; } - else - if (dl > 11) - { if (!RI[cpos[-1]]) e -= 2.0; } - else - { if (cpos[-1] == Cytosine) e -= 2.0; } - - /* small,large dloop, dstem R motif */ - - if (dl < 3) e -= 2.0; - if (dl > 12) e -= 2.0; - if (!RI[*dpos]) e -= 1.0; - - /* darm,tloop tertiary interaction */ - - k = 0; - di = ((dl >= 12)?3:((dl >= 9)?2:1)); - tl = (tloop >= 14)?5:((dl >= 9)?((tloop >= 10)?4:3):3); - if (!ggstackbp[dpos[dstem+di]][cend[tl]]) - { if (tl > 3) - { if (!ggstackbp[dpos[dstem+di]][cend[tl-1]]) e -= 1.5; - else k++; } - else - if (di > 1) - { if (!ggstackbp[dpos[dstem+di-1]][cend[tl]]) e -= 1.5; - else k++; } - else - e -= 1.5; } - else k++; - if (stemterm[dpos[dstem-1]][dpos[darm-dstem]]) - { e -= 0.5; - if (cend[2] == dpos[dstem-2]) - { if (bp[cend[2]][dpos[darm-dstem+1]]) k++; } - else - { if (cend[2] == dpos[darm-dstem+1]) - if (bp[cend[2]][dpos[dstem-2]]) k++; }} - else - { if (cend[2] == dpos[dstem-1]) - { if (!bp[cend[2]][dpos[darm-dstem]]) e -= 0.5; - else k++; } - else - { if (cend[2] != dpos[darm-dstem]) e -= 0.5; - else - if (!bp[cend[2]][dpos[dstem-1]]) e -= 0.5; - else k++; }} - if (cend[1] == *dpos) - { if (!stackbp[cend[1]][dpos[darm-1]]) e -= 0.5; - else k++; } - else - { if (cend[1] != dpos[darm-1]) e -= 0.5; - else - if (!bp[cend[1]][*dpos]) e -= 0.5; - else k++; } - - /* darm stability */ - - dstemmotif = wcbp[dpos[1]][dpos[darm-2]]; - if (spacer2 == 2) - if ((k < 3) || (dhit[nd].bondtype > 0x200) || (!dstemmotif)) - { if (abondtype >= 0x10000) e -= 2.0; - if (dstem > 3) e -= 1.0; - e -= 0.5; } - - /* darm tertiary interactions */ - - j = 0; - b8 = dpos[-2]; - b9 = dpos[-1]; - if (!bp[b8][dpos[dstem]]) e -= 1.0; - else if (wcbp[b8][dpos[dstem]]) j++; - if (!bp[b8][dpos[darm-dstem-1]]) e-= 1.0; - else if (wcbp[b8][dpos[darm-dstem-1]]) j++; - if (!wcbp[dpos[2]][dpos[darm-3]]) - { if (!gastembp[b8][dpos[dstem]]) e -= 2.0; - else if (!gastembp[b8][dpos[darm-dstem-1]]) e -= 2.0; - if (!ggstembp[dpos[2]][dpos[darm-3]]) e -= 1.0; } - else j++; - if (!bp[b9][dpos[2]]) - { if (!bp[b9][dpos[darm-3]]) e -= 1.0; - else j++; } - else j++; - -/* more extensive tertiary interaction between darm,tloop */ - - if (dstemmotif) - { if (k >= 3) - if (bp[dpos[2]][dpos[darm-3]]) - { if (b8 != Thymine) e += 0.5; - if (dl > 3) - if (bp[dpos[dstem+2]][cend[tl+1]]) e += 0.7; - else - if (gabp[dpos[dstem+2]][cend[tl+1]]) e += 0.5; - if (tloop >= 6) - if (spacer2 < 2) - if (dl >= 3) - { di = (dl > 11)?2:1; - if (bp[dpos[dstem+di]][cend[tl]]) - { if (chit[nc].stem_energy > -4.8) e += 0.5; - if (wcbp[dpos[dstem+di]][cend[tl]]) - if (gcv > 1.2) - if (clooppos[1] == Thymine) - if (cbondtype < 0x200) - if ((cbondtype & 0xf) > 0) - if (abondtype < 0x2000) - { e += 1.5; - if (dl > 3) - if (wcbp[dpos[dstem+di+1]][cend[tl+1]]) - e += 1.0; }}}} - if (j >= 4) e += 0.25; } - if (e > ed) - { ed = e; - ndi = nd; - ti = k; }} - if (ndi < 0) goto TVN; - energy += ed; - dpos = dhit[ndi].pos; - dstem = dhit[ndi].stem; - dl = dhit[ndi].loop; - darm = 2*dstem + dl; - dbondtype = dhit[ndi].bondtype; - spacer2 = (int)(cpos - dpos) - darm; - spacer1 = (int)(dpos - aend1); - b8 = *aend1; - b9 = aend1[1]; - - /* false positive suppression */ - - if (dloop < 15) energy -= 2.0; - if (cstem > 5) energy -= 1.0; - if (tloop < 6) energy -= 1.0; - if (tloop > 12) - { energy -= 1.0; - if (agcat < 6) energy -= 2.0; - if (tloop > 15) energy -= 2.5; } - if (!stackbp[*dpos][dpos[darm-1]]) energy -= 1.0; - if (dstem < 4) - if (gcv > 1.2) - if ((dbondtype & 0xf0f) == 0) energy -= 1.5; - if (b8 != Thymine) - { if (dl < 4) - if (abondtype > 0x10000) - energy -= 1.5; - if (b8 == Adenine) - if (YI[cloopend[-2]]) - energy -= 1.0; } - if (dl > 10) - { if (tloop < 7) energy -= 2.0; - if (spacer2 > 1) energy -= 2.0; - if (dhit[ndi].stem_energy < -3.4) energy -= 2.0; } - if (gcv < 2.0) - if (dbondtype > 0x10000) energy -= 2.0; - if ((cbondtype & 0xf) < 1) - if (abondtype > 0x100) - { if (cgcat < 4) - energy -= 1.5; - if (!wcbp[dpos[2]][dpos[darm-3]]) energy -= 1.0; } - if (b8 != Thymine) - if ((clooppos[1] != Thymine) || - (*clooppos != Cytosine)) - if (dl > 3) - if (dbondtype > 0x10000) - energy -= 1.0; - if (!RI[cend[1]]) - if (b9 != Guanine) energy -= 1.0; - else energy -= 0.5; - if (b9 == Guanine) - { if (!RI[*cend]) energy -= 1.0; - if (spacer2 != 1) energy -= 3.0; - else - { tl = (tloop >= 14)?5:((dl >= 9)?((tloop >= 7)?4:3):3); - s = dpos + dstem; - if (!wcbp[s[1]][cend[tl]]) - { energy -= 2.5; - if (dl >= 5) - if (chit[nc].energy > 2.0) - if (wcbp[s[2]][cend[tl]]) - if (wcbp[s[3]][cend[tl+1]]) - energy += 6.0; } - else - if (b8 == Thymine) - if (dl >= 5) - if (chit[nc].energy > 2.0) - if (wcbp[s[2]][cend[tl+1]]) - energy += 3.5; }} - else if (b9 != Adenine) energy -= 3.0; - if (b8 != Thymine) - if (b8 == Guanine) - { if (!RI[dpos[dstem]]) energy -= 1.0; - else - if (RI[dpos[darm-dstem-1]]) energy += 2.0; } - else energy -= 1.0; - - /* carm termination */ - - if (assymst[cend[-1]][*cpos]) energy += 1.0; - - /* CTnnnAA cloop motif */ - - energy += CX7[*clooppos] + AX7[cloopend[-2]]; - if (clooppos[1] == Cytosine) energy -= 2.0; - - /* NNnnnAA cloop motif */ - - if (cloopend[-2] == Adenine) - if (cloopend[-1] == Adenine) - if (spacer1 == 2) - if (dbondtype < 0x1000) - { if (abondtype < 0x100) energy += 1.0; - else - if (cbondtype < 0x100) energy += 1.0; } - - /* global stem damage level */ - - bondtype = acbondtype + dbondtype; - i = (int)((bondtype >> 16) & 0xf); - j = (int)((bondtype >> 12) & 0xf); - k = (int)((bondtype >> 8) & 0xf); - if (k > 0) - if (i > 0) - { k += (i + j); - if (k > 5) energy -= 1.0*(double)(k - 5); } - - /* global stem stability (GC bond count) */ - - gcc = bondtype & 0xf; - if (gcc < 2) - { if (ti >= 2) - { if (cbondtype < 0x100) - if ((cbondtype & 0xf) > 0) goto NGCC1; - if (ti >= 3) - if (cgcat >= 4) - { if ((cbondtype & 0xf) > 0) goto NGCC1; - if (cbondtype < 0x100) goto NGCC2; }} - energy -= (double)(3 - gcc); - NGCC2: - if (gcc < 1) - { if (agcat < 5) energy -= 2.0; - if (bondtype > 0x10000) energy -= 1.5; }} - NGCC1: - - - /* global stability */ - /* (stem stability,dloop-tloop tertiary interaction,dloop size) */ - - if (abondtype > 0x1000) - if (ti < 3) - { if (chit[nc].stem_energy < -6.0) - energy -= 1.5; - if (dl > 9) - if (((dbondtype + cbondtype) & 0xf) < 1) - energy -= 1.0; } - - /* tloop,dloop tertiary interaction */ - /* (alternative dloop position) */ - - if (bondtype < 0x1000) - if (b8 == Thymine) - if (RI[b9]) - if (dl > 4) - if (!bp[cend[3]][dpos[dstem+1]]) - if (bp[cend[3]][dpos[dstem+2]]) - energy += 0.5; - - - /* "near perfect" TV-loop mttRNA: */ - /* darm-tloop tertiary interaction,low global stem damage, */ - /* TR motif at b8-9, good astem,darm,carm interspacing */ - - if (ti >= 2) - if (agcat >= 6) - if (cbondtype < 0x100) - if (dbondtype < 0x100) - if (RI[b9]) - if (b8 == Thymine) - if ((abondtype & 0xf) > 0) - if ((dbondtype & 0xf) > 0) - if (spacer1 == 2) - if (spacer2 == 1) - energy += 1.5; - - /* find exceptions */ - - if (energy < dthresh) - { if (!mtxdetect) goto TVN; - if (incds) goto TVN; - if (energy < (thresh - 7.0)) goto TVN; - if (energy < (dthresh - 7.0)) goto TVN; - if (nbase > 68) goto TVN; - if (abondtype > 0x20100) goto TVN; - if (dl > 9) - { if (dl > 10) goto TVN; - if (dstem < 4) goto TVN; - if (dbondtype > 0x100) goto TVN; } - if (dstem > 4) goto TVN; - if (b9 != Adenine) - { if (b9 != Guanine) goto TVN; - if (cbondtype > 0x100) goto TVN; - if (dbondtype > 0x200) goto TVN; } - if (cloop != 7) goto TVN; - if (YI[cloopend[-2]]) goto TVN; - if (b8 == Thymine) - { if (apos2[-1] == Thymine) - if (apos2[-2] == Thymine) - if (tloop < 8) - if (tt[aend1[-1]][*apos2]) - if (wcbp[dpos[2]][dpos[darm-3]]) - if (((dbondtype + cbondtype) & 0xf) > 0) - energy += 3.0; } - else - if (b8 == Adenine) - { if (apos2[-1] == Adenine) - if (apos2[-2] == Adenine) - { if (assymat[aend1[-1]][*apos2]) - if (assymat[apos2[1]][aend1[-2]]) energy += 2.0; - if (agcat >= 5) - if (cgcat >= 4) - if (dbondtype < 0x100) - if (at[aend1[-1]][*apos2]) - if (at[apos2[1]][aend1[-2]]) - energy += 1.0; } - if (ti >= 3) - if (cgcat >= 4) - if (agcat >= 4) - if ((cbondtype & 0xf) > 0) - if ((abondtype & 0xf) > 1) - if (dbondtype < 0x200) - if (wcbp[dpos[1]][dpos[darm-2]]) - if (clooppos[1] == Thymine) - if (YI[*clooppos]) - if (RI[cloopend[-2]]) - if (RI[cloopend[-1]]) - energy += 5.0; } - if (bondtype < 0x100) - { if (spacer2 == 1) - if (*clooppos == Cytosine) - if (clooppos[1] == Thymine) - if (cloopend[-2] == Adenine) - if (cloopend[-1] == Adenine) - energy += 2.0; } - else - { if (spacer2 == 1) - { if (b8 == Thymine) - if (dl > 3) - if (dbondtype < 0x200) - { if (cbondtype < 0x100) - { if (!bp[dpos[dstem+1]][cend[3]]) - if (bp[dpos[dstem+1]][cend[4]]) - energy += 2.0; - if (dbondtype < 0x100) - if (abondtype < 0x20000) - if (ti >= 2) - if (dstem >= 3) - if (tloop < 13) - if ((cbondtype & 0xf) > 0) - energy += 4.0; }} - else - if (dstem > 3) - if (dbondtype < 0x300) - { if (bondtype < 0x10000) - if (ti >= 3) - if ((acbondtype & 0xf) > 0) - if (wcbp[dpos[2]][dpos[darm-3]]) - energy += 4.0; } - if (tloop < 8) - { if (dbondtype < 0x200) - { if (cbondtype < 0x100) - if (ti >= 2) - { - if (wcbp[dpos[dstem+1]][cend[3]]) - { - if (b8 == Thymine) - if (abondtype < 0x3000) - energy += 5.0; - if (agcat >= 5) - if (gcv > 1.2) - if (RI[cloopend[-1]]) - energy += 7.0; - } - if (dbondtype < 0x100) - if (agcat >= 6) - if (YI[*clooppos]) - if (clooppos[1] == Thymine) - if (RI[cloopend[-2]]) - if (RI[cloopend[-1]]) - energy += 2.0; - } - if (cbondtype < 0x300) - if (ti >= 3) - if (abondtype < 0x2000) - if ((dbondtype & 0xf) > 0) - if ((acbondtype & 0xf) > 0) - if (ahit[na].energy >= -7.0) - if (dstem >= 4) - energy += 3.0; } - if (dbondtype < 0x300) - if (cgcat >= 4) - if (abondtype < 0x2000) - if (ahit[na].energy >= -7.0) - if (cbondtype < 0x10000) - if ((cbondtype & 0xf) > 0) - if (cstem < 6) - if (ti >= 3) - energy += 4.0; }} - if (tloop > 8) - if (agcat >= 6) - if (cbondtype < 0x100) - if ((cbondtype & 0xf) > 0) - if (b8 == Thymine) - if (wcbp[dpos[dstem+1]][cend[3]]) - if (wcbp[dpos[1]][dpos[darm-2]]) - energy += 7.0; } - if (dbondtype < 0x100) - if (cgcat >= 4) - if (agcat >= 5) - if (wcbp[dpos[1]][dpos[darm-2]]) - if ((cbondtype & 0xf) > 0) - if ((abondtype & 0xf) > 0) - if ((dbondtype & 0xf) > 0) - energy += 0.5; - if (cbondtype < 0x100) - if (dbondtype < 0x200) - if (agcat >= 5) - if (b8 == Thymine) - if (tloop < 8) - if (wcbp[dpos[1]][dpos[darm-2]]) - if (wcbp[dpos[2]][dpos[darm-3]]) - if ((cbondtype & 0xf) > 0) - if ((abondtype & 0xf) > 0) - if ((dbondtype & 0xf) > 0) - if (clooppos[1] == Thymine) - if (YI[*clooppos]) - if (RI[cloopend[-2]]) - energy += 3.0; - if (energy < dthresh) goto TVN; - energy -= (0.9*(energy - dthresh) + 5.0); } - - /* remember fully formed TV-loop replacement mttRNA gene */ - /* if threshold reached */ - - if (energy < thresh) goto TVN; - te.energy = energy; - thresh = energy; - te.ps = apos1; - te.dstem = dstem; - te.dloop = dl; - te.spacer1 = spacer1; - te.spacer2 = spacer2; - te.cstem = cstem; - te.cloop = cloop; - k = astem + spacer1 + darm + spacer2; - te.anticodon = k + cstem + 2; - te.nintron = 0; - te.intron = 0; - te.var = 0; - te.varbp = 0; - te.tstem = 0; - te.tloop = tloop; - te.nbase = k + carm + tloop; - tastem = astem; - tastem8 = astem8; - tastem8d = astem8d; - - /* build D-replacement loop mttrna genes */ - - TVN: - if (tloop < 10) continue; } - if (dloop > mt_DRLmaxlength) goto DN; - if (gcv < 1.2) goto DN; - energy = 91.0 + ec; - - /* CCnnnAA cloop */ - - if (clooppos[1] == Cytosine) - { if (*clooppos != Cytosine) goto DN; - if (cloopend[-2] != Adenine) goto DN; - if (cloopend[-1] != Adenine) goto DN; - energy -= 1.0; } - - /* choose tarm */ - - nt = -1; - nti = -1; - et = -INACTIVE; - while (++nt < nth) - { tl = thit[nt].loop; - if (tl > 11) continue; - if (thit[nt].end != apos2) continue; - tpos = thit[nt].pos; - tstem = thit[nt].stem; - - /* var loop (3-7 bases long) */ - - var = (int)(tpos - cend); - if (var < 3) continue; - e = thit[nt].energy; - if (var > 5) - { if (var > 7) continue; - if (tl < 7) continue; - e -= 1.0; - if ((dloop < 10) || (tstem < 4)) - e -= 2.0*(double)(var - 5); } - - /* tloop RA or RG motif */ - - s = tpos + tstem; - k = 0; - n = 0; - i = 0; - while ((j = tloopa[tl][i++]) >= 0) - if (s[j] == Adenine) - { k = 1; - if (dloop >= 3) - if (tl > 3) - { b57 = s[j-1]; - if (RI[b57] || (tl < 5)) - { if (bp[b57][aend1[0]]) - { e += 1.5; - n = 1; - break; } - if (bp[b57][aend1[1]]) - { e += 1.5; - n = 1; - break; } - if (dloop > 10) - if (bp[b57][aend1[2]]) - { e += 1.5; - n = 1; - break; }}}} - if (!k) - { i = 0; - while ((j = tloopa[tl][i++]) >= 0) - if (s[j] == Guanine) - if (RI[s[j-1]]) - { k = 1; - break; } - if ( j < 0) e -= ((tl > 5)?2.0:1.0); } - - /* tertiary interaction between tloop and start of dloop */ - - ti = (tl > 5)?1:((dloop > 5)?1:0); - di = (dloop > 5)?2:1; - if (stackbp[aend1[di]][s[ti]]) e += 1.0; - - /* tloop GTTC motif */ - - i = (s[-1] == Guanine)?1:0; - if (tl >= 5) - { ti = i + TI[*s] + TI[s[1]] + CI[s[2]]; - if (n) - if (!i) - if (TI[*s]) - if (TI[s[1]]) - if (AI[s[2]]) - if (tl >= 7) - ti++; - if ((i > 0) || (ti >= 3)) e += (double)ti; } - else - { ti = i + TI[*s] + TI[s[1]]; - if ((i > 0) || (ti >= 2)) e += (double)ti; } - if (e > et) - { et = e; - nti = nt; - tc = k; }} - - if (nti < 0) goto DN; - energy += et; - tpos = thit[nti].pos; - tstem = thit[nti].stem; - tl = thit[nti].loop; - tbondtype = thit[nti].bondtype; - var = (int)(tpos - cend); - - /* tertiary interaction between b48(=tpos[-1]) and dloop */ - - b48 = tpos[-1]; - if (dloop <= 7) - { if (YI[b48]) tc++; - else energy -= 1.0; } - else - { i = 0; - while ((j = dloopi[dloop][i++]) >= 0) - if (assymagbp[b48][aend1[j]]) - { tc++; - break; } - if (j < 0) energy -= 1.0; } - - /* large dloop, large tloop */ - - if (dloop > 7) - { if (tl >= 6) - if (tc < 2) - energy -= 2.0; - if (tstem < 3) - energy -= 1.0; } - - /* carm termination */ - - s = cpos - 1; - se = cend; - if (cstem > 5) - { s++; - se--; } - if (!stackbp[*s][*se]) energy -= 1.0; - se = cpos - 3; - if (!bp[cend[-1]][*cpos]) - { if (assymst[cend[-1]][*cpos]) - { if (dloop < 5) se++; - energy += 1.5; } - else if (dloop < 13) se++; } - else - { if (cstem > 5) { if (dloop < 13) se++; } - else if (dloop < 5) se++; } - - /* tertiary interaction between tloop and dloop near carm */ - - s = tpos + tstem; - if (tl >= 5) - { ti = (tl >= 10)?4:((tl >= 7)?3:2); - b57 = s[ti]; - if (!gabp[*se][b57]) energy -= 2.0; - else - { k = (var > 3)?2:((var > 1)?1:0); - if (bp[cend[k]][b57]) energy += 1.0; }} - - /* R motif at end of tstem */ - - if (!RI[s[-1]]) energy -= 2.0; - - /* large tloop */ - - if (tl > 9) - if (tbondtype > 0x200) energy -= 2.0; - - /* dloop,var,tloop T repeat motif */ - /* present in some nematode D-loop replacement tRNA-Ser genes */ - - if (dloop >= 4) - { k = 1; - se = aend1; - while (se < cpos) if (*se++ == Thymine) k++; - if (k >= dloop) - { if (var >= 3) - { se = cend; - while (se < tpos) if (*se++ == Thymine) k++; - if (k >= (var + dloop)) - { energy += 3.0; - se = s + ((tl > 5)?5:tl); - while (s < se) if (*s++ != Thymine) break; - if (s >= se) energy += 5.5; }}}} - - /* astem stability */ - - if (ea < -6.1) - if (tl > 4) - { if (*s == Thymine) - if (s[-1] == Guanine) - if (s[1] == Thymine) - goto NASI; - if (ea > -8.3) - if (*clooppos == Cytosine) - if (clooppos[1] == Thymine) - if (cloopend[-2] == Adenine) - if (cloopend[-1] == Adenine) - goto NASI; - energy -= 3.0; } - NASI: - - /* cstem stability (GC bond count) */ - - bondtype = acbondtype + tbondtype; - if ((cbondtype & 0xf) < 1) - if ((bondtype & 0xf) < 3) energy -= 1.0; - - /* cloop CTnnnAA motif */ - - if (bondtype >= 0x400) - energy += CX[*clooppos] + TX[clooppos[1]] + - AXX[cloopend[-1]] + AXX37[cloopend[-2]]; - else - energy += CX[*clooppos] + TX[clooppos[1]] + - AX[cloopend[-1]] + AX37[cloopend[-2]]; - - /* large dloop */ - - if (dloop >= 9) - { k = tloop - dloop - 4; - if (k < 0) - if (bondtype >= 0x1000) energy += (double)k; - if (dloop >= 12) - { if (dloop >= 14) energy -= 2.0; - else - if (tstem < 6) energy -= ((dloop >= 13)?2.0:1.0); }} - - /* small dloop, small tarm */ - - if (dloop <= 10) - if (tstem < 3) - if (ea > -2.6) - if (tl <= 7) - if (cgcat >= 4) - if (gc[*tpos][apos2[-1]]) - if (gc[tpos[1]][apos2[-2]]) - if (gcv > 1.2) - if ((abondtype & 0xf) > 0) - if ((cbondtype & 0xf) > 0) - energy += (4.5 + (mtBONDSTAB - 0.5)*(double)(5 - tstem)); - - /* global stem damage level */ - - i = (int)((bondtype >> 16) & 0xf); - j = (int)((bondtype >> 12) & 0xf) + i; - k = (int)((bondtype >> 8) & 0xf); - if (tstem > 3) - { if ((k > 0) || (tl > 9)) - if ((j > 0) || (k > 5)) - { n = j + k; - if ((s[-1] != Guanine) || (*s != Thymine) || - (s[1] != Thymine) || (tstem < 5)) - if (n > 4) energy -= 2.0*(double)(n - 4); }} - else - { n = j + k; - if (n > 3) energy -= 2.0*(double)(n - 3); } - - /* long tstem with tloop GTT motif */ - - if (s[-1] == Guanine) - if (*s == Thymine) - if (s[1] == Thymine) - if (tstem >= 6) - if (tbondtype < 0x100) - energy += 1.5; - - /* find exceptions */ - - if (energy < tthresh) - { if (!mtxdetect) goto DN; - if (incds) goto DN; - if (energy < (thresh - 13.5)) goto DN; - if (energy < (tthresh - 13.5)) goto DN; - if (k > 1) - { if (i > 2) goto DN; - if (k > 4) - if (i > 1) goto DN; } - if (nbase > 70) goto DN; - if (var > 4) - { if (var > 5) goto DN; - if (var > tl) goto DN; } - if (tstem < 4) - if ((agcat + cgcat + 2) < (astem + cstem)) - goto DN; - if (tl > 9) goto DN; - if (dloop > 13) goto DN; - if (!YI[*clooppos]) goto DN; - if ((abondtype & 0xf) < 2) - { if ((abondtype & 0xf) < 1) goto DN; - if (cbondtype > 0x200) - if (tbondtype > 0x100) - if (abondtype > 0x200) - goto DN; } - if ((tbondtype & 0xf) < 1) - { if ((acbondtype & 0xf) < 1) goto DN; - if (acbondtype > 0x200) goto DN; } - if ((dloop + 19) < tloop) goto DN; - if (gcv > 5.5) goto DN; - tgcat = ((tbondtype >> 4) + tbondtype) & 0xf; - if ((tgcat + 2) < tstem) goto DN; - if (cloop != 7) goto DN; - if (bp[*cpos][cend[-1]]) - if (bp[cpos[-1]][*cend]) - if (bp[cpos[-2]][cend[1]]) - energy += 2.0; - if (bondtype < 0x20000) - if (thit[nti].stem_energy > -4.6) - if (tstem >= 4) - if ((tstem >= 5) || (s[-1] == Guanine)) - if (stackbp[cpos[1]][cend[-2]]) - if (stackbp[*cpos][cend[-1]]) - if (stackbp[cpos[-1]][*cend]) - { energy += 1.5; - if (s[-1] == Guanine) - if (*s == Thymine) - if (s[1] == Thymine) - energy += 1.0; - if (agcat >= 6) energy += 0.5; } - if (tc > 0) - if (tstem >= 5) - if (var < 6) - if (var > 2) - { if (acbondtype < 0x100) energy += 5.0; - else - if ((abondtype + tbondtype) < 0x100) - energy += 3.0; - else - if (cloopend[-2] == Thymine) - if (cloopend[-1] == Thymine) - if (dloop > 7) - if (tbondtype < 0x100) - if (!tt[*tpos][apos2[-1]]) - if ((agcat+cgcat) >= 10) - energy += 13.5; } - if (s[-1] == Guanine) - if (*s == Thymine) - if (s[1] == Thymine) - if ((tstem >= 5) || (s[2] == Cytosine)) - { energy += 1.5; - if (tstem >= 5) - if (tbondtype < 0x1000) - if (s[2] == Cytosine) - { if (abondtype < 0x10000) - { if (*clooppos == Cytosine) - if (clooppos[1] == Thymine) - if (cloopend[-2] == Adenine) - if (cloopend[-1] == Adenine) - energy += 3.0; - if (tbondtype < 0x200) - if (bondtype < 0x10000) - if (tl == 7) - if (s[4] == Adenine) - energy += 4.0; }} - else - if (tbondtype < 0x200) - if ((tbondtype & 0xf) >= 2) - if (*clooppos == Cytosine) - if (clooppos[1] == Thymine) - if (cloopend[-2] == Adenine) - if (cloopend[-1] == Adenine) - energy += 1.0; } - if (tstem >= 4) - if (tbondtype < 0x100) - if (cbondtype < 0x200) - if (agcat >= 5) energy += 1.5; - if (energy > tthresh) energy = tthresh; - if (ea > -1.8) energy += 3.0; - else - if (abondtype < 0x60) energy += 1.5; - else - if (acbondtype < 0x200) energy += 0.75; - if (*clooppos == Cytosine) - if (cloopend[-2] == Adenine) - if (cloopend[-1] == Adenine) - { if (tstem >= 5) - if (tbondtype < 0x100) - if (clooppos[1] == Thymine) - { energy += 3.0; - if (tstem >= 6) energy += 1.0; } - else - if (clooppos[1] == Cytosine) - energy += 1.0; - if (tc >= 2) - if (clooppos[1] == Thymine) - if (bondtype < 0x1000) - if (tstem >= 4) - if (var < 6) - if (var > 2) - energy += 3.0; } - if (cbondtype < 0x100) - if (agcat >= 5) - if (tc > 0) - if (clooppos[1] == Thymine) - if (YI[*clooppos]) - if (RI[cloopend[-2]]) - if (RI[cloopend[-1]]) - if (tbondtype < 0x100) - energy += 4.0; - else - if (agcat >= 6) - if ((tgcat + 1) >= tstem) - if (tstem >= 4) - energy += 4.0; - if (bondtype < 0x1000) - { energy += 0.5; - if (bondtype < 0x200) energy += 0.75; } - if (energy < tthresh) goto DN; - energy -= (3.0 + 0.9*(energy - tthresh)); } - - /* mammalian cloop motif constraint */ - - if (ds == MAMMAL_MT) - { s1 = clooppos; - s2 = s1 + cloop; - r = *s1++; - while (s1 < s2) r = (r << 4) + *s1++; - if (r != clmotif[0]) - if (r != clmotif[1]) - if (r != clmotif[2]) - energy -= 5.0; } - - /* remember fully formed D-loop replacement mttRNA gene */ - /* if threshold reached */ - - if (energy < thresh) goto DN; - te.energy = energy; - thresh = energy; - te.ps = apos1; - te.spacer1 = 0; - te.spacer2 = 0; - te.dstem = 0; - te.dloop = dloop; - te.cstem = cstem; - te.cloop = cloop; - te.anticodon = astem + dloop + cstem + 2; - te.nintron = 0; - te.intron = 0; - te.var = var; - te.varbp = 0; - te.tstem = tstem; - te.tloop = tl; - te.nbase = astem + dloop + carm + var + - 2*tstem + tl; - tastem = astem; - tastem8 = astem8; - tastem8d = astem8d; - - /* build fully formed cloverleaf mttRNA genes */ - - DN: - if (dloop < 10) continue; - - /* choose tarm */ - - nt = -1; - nti = -1; - et = -INACTIVE; - while (++nt < nth) - { tend = thit[nt].end; - if (tend != apos2) continue; - e = thit[nt].energy; - tpos = thit[nt].pos; - tstem = thit[nt].stem; - - /* GT motif on tloop */ - - s = tpos + tstem; - if (*s == Thymine) - if (s[-1] == Guanine) - if (tstem >= 5) - if (!stackbp[*tpos][tend[-1]]) - { e += 0.5; - if (!bp[tpos[1]][tend[-2]]) e += 0.5; } - - /* large var loop */ - - var = (int)(tpos - cend); - if (var > 5) - { ev = (double)(var - 5); - if (tstem < 5) e -= 3.0*ev; - else e -= (0.5 + 0.5*ev); - - /* allow large var loop if tarm looks nuclear */ - /* (GTTC motif, very large var loop base-pairing) */ - - if (var > 9) - { if ((thit[nt].bondtype & 0xf) < 1) e -= 1.0; - e -= (0.25*(double)(var - 8)); - if (*s == Thymine) - if (s[-1] == Guanine) - if (s[1] == Thymine) - if (s[2] == Cytosine) - e += 4.0; - if (var > 17) - { if (var > 25) continue; - e += 0.5*vloop_stability(cend,var,&varbp); }}} - - /* small var loop */ - - if (var < 3) - { if (tstem > 5) - if (s[-1] != Guanine) - e -= 0.5; - if (var < 2) - { if (var < 1) - { if (var < 0) continue; - if (tstem < 4) - if (thit[nt].stem_energy < -4.0) - continue; } - e -= 3.0; }} - if (e > et) - { et = e; - nti = nt; }} - - if (nti < 0) continue; - tpos = thit[nti].pos; - tstem = thit[nti].stem; - tl = thit[nti].loop; - tarm = 2*tstem + tl; - var = (int)(tpos - cend); - b48 = tpos[-1]; - tbondtype = thit[nti].bondtype; - bondtype = acbondtype + tbondtype; - ti = (int)(((bondtype >> 16) & 0xf) + ((bondtype >> 12) & 0xf) + - ((bondtype >> 8) & 0xf)); - - /* choose darm */ - - nd = -1; - ndi = -1; - ed = -INACTIVE; - while (++nd < ndh) - { dl = dhit[nd].loop; - dstem = dhit[nd].stem; - darm = 2*dstem + dl; - dpos = dhit[nd].pos; - e = dhit[nd].energy; - - /* spacing between astem,darm,carm */ - - spacer1 = (int)(dpos - aend1); - spacer2 = (int)(cpos - dpos) - darm; - if (spacer1 < 2) - { if (spacer1 < 1) continue; - if (dstem < 3) continue; - if (dl > 12) e -= 2.0; - if (astem < 7) e -= 1.0; - if (spacer2 != 2) - { if (spacer2 < 1) continue; - if (spacer2 > 2) continue; - if ((abondtype & 0xf) < 1) - if ((dhit[nd].bondtype & 0xf) < 1) - e -= 0.5; - if (var > 7) e -= 1.0; - if (dl > 12) e -= 1.0; - if (cloop != 7) e-= 2.0; - if (cstem < 6) e -= 3.6; - else e -= 0.5; } - else - { if (cstem > 5) continue; - s = cpos; - se = cend-1; - while (!bp[*s][*se]) - { s++; - se--; } - if (!stemterm[s[-1]][se[1]]) e -= 0.5; - e -= 0.8; }} - else - { if (spacer1 > 2) - { if (spacer1 > 3) continue; - if (dstem > 4) continue; - if (dstem < 3) continue; - if (tl > 15) continue; - if (astem < 7) e -= 1.0; - if (ti > 4) e -= 1.0; - if (cloop != 7) e-= 2.0; - if (tbondtype > 0x2000) - if (!RI[tpos[tstem-1]]) e -= 2.0; - e -= 1.0; - if (spacer2 != 1) e -= 0.5; - else - if (dhit[nd].bondtype < 0x100) - if (var >= 3) - if (var <= 5) - if (tstem >= 3) - { e += 1.0; - if (agcat >= 5) - if (wcbp[*aend1][*apos2]) - if (!bp[aend1[-1]][*apos2]) - if (bp[b48][dpos[dstem+1]]) - e += 0.5; } - } - if (spacer2 > 1) - { if (spacer2 > 2) continue; - if (astem < 7) - if (spacer1 == 2) - e -= 1.0; - if (cloop != 7) e -= 2.0; - if (ea < -5.8) e -= 2.0; - e -= 2.5; - if (bp[b48][dpos[dstem+1]]) - { if (dhit[nd].bondtype < 0x1000) - if (wcbp[dpos[1]][dpos[darm-2]]) - if (wcbp[dpos[2]][dpos[darm-3]]) - if (var < 6) - if (dl > 3) - e += 2.0; } - else e -= 1.0; - } - else - if (spacer2 < 1) - { if (spacer2 < 0) continue; - if (var > 6) continue; - if (dstem > 4) continue; - if (dhit[nd].stem_energy < -4.3) continue; - if (astem < 7) - if (spacer1 == 2) - e -= 1.0; - if (cloop != 7) e-= 2.0; - e -= mtBONDSTAB; } - if (cstem > 5) - if ((!gt[*cpos][cend[-1]]) || astem8) e-= mtBONDSTAB; } - - /* very large or very small dloop */ - - if (dl < 3) e -= 2.0; - if (dl > 11) - { if (dl > 14) e -= 2.0; - else - if (dl > 13) - { if (dhit[nd].bondtype >= 0x100) e -= 2.0; - else e -= 1.0; } - else - if (dl > 12) - { if (dhit[nd].bondtype >= 0x1000) e -= 2.0; - else e -= 1.0; } - else - if (dhit[nd].bondtype >= 0x10000) e -= 2.0; } - - /* tertiary interactions in darm */ - - b8 = dpos[-2]; - b9 = dpos[-1]; - if (dl > 2) - { if (dl > 5) - if (!stackbp[dpos[dstem+1]][b48]) e -= 1.0; - if (!stackbp[b8][dpos[dstem]]) e-= 0.25; - if (!stackbp[b8][dpos[dstem+dl-1]]) e -= 0.25; } - if (!bp[b9][dpos[2]]) - if (!bp[b9][dpos[darm-3]]) - e -= 1.0; - - /* TR motif at b8-9 */ - - if (RI[b9]) - { if (b8 == Thymine) - if (spacer1 == 2) - if (ti < 6) - if (((bondtype & 0xf) > 2) || (bondtype < 0x1000) || - ((tbondtype < 0x100) && (tstem > 3))) - if ((cbondtype & 0xf) < 5) - if (stembp[dpos[1]][dpos[darm-2]]) - if (var < 6) - if (var > 2) e += 1.5; - else - if (tstem > 3) - if (cloopend[-2] == Adenine) - e += 1.5; } - else - { e -= 1.0; - if (b9 == Thymine) - if (spacer1 == 2) e -= 2.0; } - if (e > ed) - { ed = e; - ndi = nd; }} - - if (ndi < 0) continue; - energy = 100.0 + ec + ed + et; - dl = dhit[ndi].loop; - dstem = dhit[ndi].stem; - darm = 2*dstem + dl; - dpos = dhit[ndi].pos; - dbondtype = dhit[ndi].bondtype; - spacer1 = (int)(dpos - aend1); - spacer2 = (int)(cpos - dpos) - darm; - b8 = dpos[-2]; - - /* tertiary structure interaction between tloop and dloop */ - - if (tl >= 3) - if (dl >= 4) - { di = (dl < 7)?(darm-dstem-2):(darm-dstem-3); - ti = (tl < 9)?(tstem+2):((tl < 13)?(tstem+3):(tstem+5)); - if (ggbp[dpos[di]][tpos[ti]]) - if (ggbp[dpos[di-1]][tpos[ti-1]]) - { energy += 2.0; - if (spacer1 != 2) - if (spacer2 != 2) - if (dstem < 4) - if (tl > 7) - if (bp[dpos[di+1]][tpos[ti+1]]) - energy += 4.0; - if (ea > -2.5) - if (wcbp[dpos[1]][dpos[darm-2]]) - if (wcbp[dpos[2]][dpos[darm-3]]) - energy += 3.0; } - if (tl > 10) - if (dl > 10) - energy -= 1.0; } - else - if (dl == 3) - if (wcbp[dpos[dstem+1]][b48]) energy += 1.0; - - /* small darm and tarm */ - - if (tloop <= 18) - if (tarm <= 13) - if (dl <= 8) - if (spacer1 == 2) - if (spacer2 == 1) - if (abondtype < 0x1000) - if (tbondtype < 0x100) - if (dbondtype < 0x200) - { et = (mtBONDSTAB - 0.5)*(double)(5 - tstem) + - 0.1*(double)(7-tl); - ed = mtBONDSTAB*(double)(4 - dstem); - energy += (0.8*(et + ed)); } - - /* GTTC motif on tloop */ - - s = tpos + tstem; - if (tl < 5) - if (tl < 2) energy += G[s[-1]]; - else - { et = (G[s[-1]] + T[*s] + T[s[1]]); - if (tl > 3) - if (bp[*s][s[tl-1]]) - { e = (G[*s] + T[s[1]] + T[s[2]]); - if (e > et) et = e; } - if (tstem < 5) - { e = (G[s[-2]] + T[s[-1]] + T[*s] + C[s[1]]); - if (e > et) et = e; } - energy += et; } - else energy += (G[s[-1]] + T[*s] + T[s[1]] + C[s[2]]); - - /* long astem */ - - if (astem8) - if (bp[apos1[0]][apos2[6]]) - if (bp[apos1[1]][apos2[5]]) - if (bp[apos1[2]][apos2[4]]) - if (bp[apos1[3]][apos2[3]]) - energy += hbem[apos1[-1]][apos2[7]]; - - /* false positive supression */ - - if (!RI[cend[0]]) energy -= 1.0; - if (!RI[cpos[-1]]) energy -= 1.0; - if (tarm < (var + 3)) energy -= 2.0; - if (gcv < 1.5) - if (dbondtype > 0x10000) energy -= 2.0; - if (tarm > 27) - { energy -= 1.0; - if (spacer2 != 1) energy -= 1.0; } - if (dstem < 3) - { if (var > 5) energy -= 1.0; - if (tloop > (dloop + 8)) energy -= 0.5; } - if (b8 != Thymine) - if (dl > 3) - if (dbondtype > 0x100) - if ((b8 == Cytosine) || - (dbondtype > 0x10000)) - if (*clooppos != Cytosine) - if (!wcbp[dpos[dstem+1]][b48]) - energy -= 1.0; - - - /* high GC false positive suppression */ - - if (gcv >= 5.1) - { if ((abondtype & 0xf) >= 4) - { s1 = apos1; - s2 = apos2 + astem; - n = 0; - while (--s2 >= apos2) - if (gc[*s1++][*s2]) - { if (++n >= 4) - { energy -= 2.0; - break; }} - else n = 0; } - if ((dbondtype & 0xf) >= 4) energy -= 3.0; - if ((cbondtype & 0xf) >= 5) energy -= 3.5; - if ((tbondtype & 0xf) >= tstem) energy -= 4.0; } - - /* global stem damage level */ - - tc = tstem + dstem; - dtbondtype = dbondtype + tbondtype; - mabondtype = dtbondtype + cbondtype; - bondtype = acbondtype + dtbondtype; - if (bondtype < 0x100) energy += 0.5; - if ((dtbondtype & 0xf) < 1) - { energy -= 1.0; - if (tc >= 10) energy -= 2.0; - if ((bondtype & 0xf) < 3) - if (nbase > 75) energy -= 1.0; } - i = (int)((bondtype >> 16) & 0xf); - j = (int)((bondtype >> 12) & 0xf) + i; - k = (int)((bondtype >> 8) & 0xf) + j; - ti = (tc > 6)?5:((tc > 5)?4:3); - if (k > ti) - { ev = (double)(k - ti); - energy -= 0.5*ev; - if (cbondtype > 0x10000) - if (tstem < 5) - energy -= ev; - if (i > 0) - if (k > 8) - energy -= 1.5*(double)(k - 8); } - - /* low GC false positive supression */ - - if (gcv < 3.5) - if ((bondtype & 0xf) < 2) - { if ((bondtype & 0xf) < 1) energy -= 1.0; - if (dl > 3) - if (var > 2) - if (!wcbp[dpos[dstem+1]][b48]) energy -= 1.0; } - - /* small variable loop */ - - if (var < 3) - { if (dloop > 18) - { if (dloop > (tloop + 2)) energy -= 1.0; - if (tloop > 20) - if ((((dtbondtype >> 4) + dtbondtype) & 0xf) < 6) - energy -= 2.0; } - if (astem < 7) - { energy -= 1.0; - if (agcat >= 5) - if (bondtype < 0x300) - if (gcv > 1.2) - if (gcv < 5.0) - energy += 2.0; }} - else - - /* NNNNNAA cloop */ - - if (cloopend[-2] == Adenine) - if (cloopend[-1] == Adenine) - if (spacer1 > 1) - if ((dbondtype < 0x2000) || (dloop > mt_DRLmaxlength)) - { if (abondtype < 0x100) energy += 1.0; - else - if (cbondtype < 0x100) energy += 1.0; - else - if (tstem >= 5) - if (tbondtype < 0x100) - { energy += 1.0; - if (*clooppos == Cytosine) - if (clooppos[1] == Thymine) - if (dbondtype < 0x100) - energy += 0.5; - - if (cgcat >= 3) - if ((tbondtype & 0xf) > 0) - if (ggbp[dpos[dstem+1]][b48]) - if (wcbp[dpos[1]][dpos[darm-2]]) - if (tl < 10) - if (spacer1 == 2) - if (spacer2 == 1) - if (dl > 2) - if (var >= 2) - if (var < 6) - { if (agcat >= 6) energy += 3.0; - else - if (agcat >= 5) - if (cgcat >= 4) - if (dbondtype < 0x100) - if (*s == Thymine) - if (s[-1] == Guanine) - if (s[1] == Thymine) - energy += 3.0; }}} - - /* large tloop */ - - if (tl > 12) - { if (tbondtype > 0x10000) energy -= 2.0; - if (agcat < 5) - if (spacer1 != 2) - if (spacer2 != 1) - energy -= 1.0; } - - /* find exceptions */ - - if (energy < dtthresh) - { if (!mtxdetect) continue; - if (incds) continue; - if (energy < (thresh - 12.0)) continue; - if (energy < (dtthresh - 12.0)) continue; - if (nbase > 75) continue; - if (dstem > 4) continue; - if (dstem < 3) continue; - if (astem < 7) - if (acbondtype > 0x21000) - continue; - if (var > 5) - { if (var > 6) continue; - if (tarm < 12) continue; } - if (gcv <= 1.2) - { if (gcv < 0.9) continue; - if ((mabondtype & 0xf) < 1) continue; } - if (tl > 9) - { if (tl > 13) continue; - if (!wcbp[dpos[1]][dpos[darm-2]]) - continue; } - if (dl > 7) - { if (bondtype > 0x20000) - if (dloop > (tloop + 4)) continue; - if (dl > 10) - { if (dl > 12) - if (abondtype > 0x1000) - continue; - if (tbondtype > 0x200) continue; - if (tt[*tpos][apos2[-1]]) continue; - if (var > 5) continue; - if (dloop > (tloop + 8)) - if (bondtype > 0x10000) continue; - if (astem < 7) continue; }} - if (RI[clooppos[1]]) continue; - b9 = dpos[-1]; - if (cstem >= 6) - { if (cbondtype > 0x200) continue; - if (var < 3) continue; - if (YI[b9]) continue; } - if (cloop != 7) continue; - if (ds == MAMMAL_MT) continue; - if (mabondtype < 0x400) - { if ((b8 == Thymine) || (mabondtype < 0x300)) - if (ea < -5.45) - if (chit[nc].stem_energy > -3.2) - if (dbondtype < 0x200) - if (spacer1 > 1) - if ((spacer2 == 1) || (mabondtype < 0x100)) - if ((spacer1 < 3) || (tstem > 3) || - (tbondtype < 0x100)) - if ((spacer1 < 3) || - ((var > 2) && (var < 6) && (tbondtype < 0x2000) && - (tl < 10))) - if (dstem < 5) - if (var >= 2) - if (dl > 2) - if (tl < 15) - if ((b8 != Cytosine) || - (*clooppos == Cytosine)) - if (RI[b9]) - if (*clooppos != Adenine) - if (clooppos[1] == Thymine) - if (RI[cloopend[-2]]) - { s1 = apos1; - s2 = apos2 + astem; - n = 0; - while (--s2 >= apos2) - if (wcbp[*s1++][*s2]) - { if (++n >= 3) break; } - else n = 0; - if (n >= 3) - { energy += 3.0; - if ((abondtype & 0xf) > 0) energy += 2.0; - if (bp[dpos[dstem+1]][b48]) - if (wcbp[dpos[1]][dpos[darm-2]]) - if (var <= 5) - energy += 1.0; } - if (dtbondtype < 0x200) - if (agcat < 2) - if (wcbp[dpos[dstem+1]][b48]) - if (wcbp[dpos[1]][dpos[darm-2]]) - if (wcbp[dpos[2]][dpos[darm-3]]) - if (gcv > 1.2) - if (var <= 5) - if (tstem >= 3) - if (dstem >= 3) - if (tl > 3) - if (tl < 9) - if (dl < 9) - if (spacer1 == 2) - energy += 10.0; } - if ((tbondtype & 0xf) > 0) - if (mabondtype < 0x300) - { if (mabondtype < 0x100) - { if ((spacer1 < 3) || (tstem > 2)) - if (var > 0) - if (YI[*clooppos]) - if ((spacer2 > 0) || (clooppos[1] == Thymine)) - energy += 2.5; } - else - if ((dbondtype & 0xf) > 0) - if (b9 != Cytosine) - if (var <= 7) - if (spacer2 == 1) - if (tarm < 22) - if (gcv > 1.2) - if (dstem >= 4) - { if (tstem >= 5) - energy += 5.0; - else - if (tstem >= 3) - if (tbondtype < 0x100) - energy += 1.0; } - else - if (tstem >= 5) - energy += 1.0; } - else - if ((dbondtype & 0xf) > 0) - { if (tstem >= 5) - if (s[-1] == Guanine) - if (*s == Thymine) - if (s[1] == Thymine) - if (*clooppos == Cytosine) - if (clooppos[1] == Thymine) - if (cloopend[-2] == Adenine) - if (cloopend[-1] == Adenine) - - energy += 1.0; - if (bondtype < 0x1000) - if (cbondtype < 0x100) - energy += 1.0; }} - - if (tstem >= 5) - if (*clooppos == Cytosine) - { - if (dl > 3) - if (dtbondtype < 0x200) - if ((tbondtype & 0xf) > 0) - if (clooppos[1] == Thymine) - { - if (clooppos[2] == Thymine) - if (clooppos[3] == Adenine) - if (clooppos[4] == Cytosine) - if (clooppos[5] == Adenine) - if (cloop == 7) - energy += 0.5; - - if (cgcat >= 4) - if (wcbp[dpos[1]][dpos[darm-2]]) - if (bp[dpos[dstem+1]][b48]) - if (tl < 10) - if (var < 6) - if (spacer1 == 2) - if (spacer2 == 1) - if (dstem >= 3) - energy += 3.0; - - } - - - if (clooppos[1] == Cytosine) - if (clooppos[2] == Cytosine) - if (clooppos[3] == Adenine) - if (clooppos[4] == Thymine) - if (s[-1] == Guanine) - if (*s == Thymine) - if (s[1] == Thymine) - energy += 1.0; - } - - - if (RI[b9]) - { - if (b8 == Thymine) - { - if (clooppos[1] == Thymine) - { - if (cloopend[-2] == Adenine) - { - - if (wcbp[dpos[1]][dpos[darm-2]]) - { - if (*clooppos == Cytosine) - { - if (abondtype < 0x200) - energy += 1.0; - - if (bondtype < 0x10000) - if (dtbondtype < 0x200) - if (agcat >= 3) - if (cgcat >= 4) - if (tl < 10) - if (var < 6) - if (spacer1 == 2) - if (spacer2 == 1) - if (tstem >= 3) - energy += 3.0; - } - - - if (tstem >= 5) - if (s[-1] == Guanine) - if (*s == Thymine) - if (s[1] == Thymine) - { energy += 1.0; - if (tl >= 5) - if (spacer1 == 2) - if (spacer2 == 1) - if (tbondtype < 0x100) - if (wcbp[dpos[dstem+1]][b48]) - energy += 3.0; } - - if (tstem >= 3) - if (tl < 10) - if (spacer1 == 2) - if (spacer2 == 1) - if (RI[cloopend[-1]]) - if (dl > 2) - if (var >= 2) - if (var < 6) - if (ggbp[dpos[dstem+1]][b48]) - { - if (dtbondtype < 0x100) - { energy += 3.5; - if ((bondtype & 0xf00) == 0) - if (*clooppos == Cytosine) - energy += 1.5; } - - - if (bondtype < 0x10000) - if (tstem > 2) - if (tbondtype < 0x200) energy += 2.5; - - if (abondtype < 0x100) - if (wcbp[dpos[2]][dpos[darm-3]]) - energy += 3.0; - - if (tbondtype < 0x100) - if (agcat >= 6) - if (tstem >= 5) - if ((tbondtype & 0xf) > 0) - if (RI[cloopend[-1]]) - if (cgcat >= 4) - energy += 2.0; - } - else - if (!ggstembp[*tpos][apos2[-1]]) - if (wcbp[dpos[dstem+1]][*tpos]) - energy += 1.5; - } - - if ((abondtype & 0xf) < 1) - if (abondtype < 0x100) - if (gcv > 1.2) - if (dl > 3) - if (bp[dpos[dstem+1]][b48]) - if (spacer1 == 2) - if (spacer2 == 1) - if (*clooppos == Cytosine) - energy += 5.0; - - if (cbondtype < 0x100) - if (tbondtype < 0x100) - if (tstem >= 3) - if (dl > 3) - if (var < 6) - if (bp[dpos[dstem+1]][b48]) - if (spacer1 == 2) - if (spacer2 == 1) - energy += 2.5; - - } - - if (stembp[dpos[dstem+1]][b48]) - { - - if (*clooppos == Thymine) - if (cloopend[-2] == Guanine) - if (clooppos[2] == Guanine) - if (clooppos[3] == Thymine) - if (clooppos[4] == Guanine) - if (dl > 2) - energy += 1.0; - - if (cbondtype < 0x100) - if (dbondtype < 0x10000) - if (wcbp[dpos[1]][dpos[darm-2]]) - if (var < 6) - if (tstem >= 3) - if (gcv >= 1.2) - if (dl > 3) - energy += 1.0; - - if (tstem >= 5) - if (dtbondtype < 0x200) - if (*clooppos == Cytosine) - if (spacer1 == 2) - if (spacer2 == 1) - if (RI[cloopend[-2]]) - energy += 0.5; - } - - } - - - if (tstem > 2) - if (tarm < 28) - if (spacer1 == 2) - if (spacer2 == 1) - if (dl > 3) - if (j < 1) - if (k > ti) - if (ggstembp[dpos[dstem+1]][b48]) - energy += 2.5; - - if (dtbondtype < 0x100) - if ((tbondtype & 0xf) > 0) - if (bp[dpos[dstem+1]][b48]) - if (b9 == Adenine) - if ((dbondtype & 0xf) > 0) energy += 2.0; - else - if (spacer2 == 1) - energy += 0.5; - if (cloopend[-2] == Adenine) - if (cloopend[-1] == Adenine) - if (cbondtype < 0x2000) - if (spacer1 > 1) - if (dl > 2) - if (var < 6) - energy += 0.75; - - - } - - if (var > 2) - if (dl > 2) - { - if (cbondtype < 0x200) - if (((mabondtype & 0xf) > 3) || (bondtype < 0x1000)) - { - if (bp[dpos[dstem+1]][b48]) - energy += 1.0; - - if (cbondtype < 0x100) - if (dbondtype < 0x100) - if (bp[b8][dpos[dstem]]) - if (bp[b8][dpos[darm-dstem-1]]) - if (var < 6) - if (tstem >= 3) - if (tl < 10) - if (spacer1 == 2) - if (spacer2 == 1) - if (clooppos[1] == Thymine) - if (cloopend[-2] == Adenine) - energy += 3.0; - } - - if (clooppos[1] == Thymine) - if (RI[cloopend[-2]]) - { - if (*clooppos == Cytosine) - { - - if (dtbondtype < 0x200) - if (agcat >= 3) - if (cgcat >= 4) - if (var < 6) - if (tstem >= 3) - if (tl < 10) - if (spacer1 == 2) - if (spacer2 == 1) - { if (abondtype > 0x20000) - if (bp[dpos[dstem+1]][b48]) - energy += 7.0; - if (agcat >= 6) energy += 2.0; } - - if ((bondtype & 0xf00) == 0) - if (gcv > 5.0) - if (s[-1] == Guanine) - if (*s == Thymine) - if (tstem >= 5) - if (var < 6) - if (tl < 10) - if (spacer1 == 2) - if (spacer2 == 1) - energy += 2.0; - - if (abondtype < 0x100) - if (cbondtype < 0x10000) - if (bp[dpos[dstem+1]][b48]) - if (cgcat >= 4) - if (tstem >= 3) - if (var < 6) - if (tstem >= 3) - if (tl < 10) - if (spacer1 == 2) - if (spacer2 == 1) - energy += 1.5; - } - - - if (dtbondtype < 0x100) - if (agcat >= 4) - if (cgcat >= 4) - if (var < 6) - if (tstem >= 3) - if (tl < 10) - { if (spacer1 == 2) - { if (abondtype < 0x3000) - if (stackbp[dpos[dstem+1]][b48]) - energy += 3.0; - if (b8 == Thymine) - if (s[-1] == Guanine) - if (*s == Thymine) - if (s[1] == Thymine) - energy += 3.5; } - if (agcat >= 6) - if (YI[*clooppos]) - if (s[-1] == Guanine) - if (*s == Thymine) - if ((dtbondtype & 0xf) > 0) - energy += 3.0; } - - if (mabondtype < 0x10000) - if (dtbondtype < 0x400) - if (agcat >= 5) - if (cgcat >= 3) - if (tl < 10) - if (var < 6) - if (spacer1 == 2) - if (spacer2 == 1) - { - - if (dtbondtype < 0x200) - if (cbondtype < 0x300) - if (bondtype < 0x10000) - if (tstem >= 3) - energy += 1.0; - - if (tstem >= 5) - if (s[-1] == Guanine) - energy += 4.0; - } - } - } - - } - else - if (bondtype < 0x10000) - if (mabondtype < 0x500) - if (dbondtype < 0x100) - if (b8 == Thymine) - if (agcat >= 4) - if (clooppos[1] == Thymine) - if (cloopend[-2] == Adenine) - if (cloopend[-1] == Adenine) - if (spacer1 == 2) - if (spacer2 == 1) - if (dstem >= 3) - if (tstem >= 5) - if (dl > 2) - if (tl < 10) - if (var < 6) - energy += 7.0; - - if (agcat >= 5) - if (cgcat >= 4) - if ((acbondtype & 0xf) >= 3) - { if (tbondtype < 0x100) - if ((dbondtype & 0xf) > 0) - if ((((dbondtype >> 4) + dbondtype) & 0xf) >= 3) - if (wcbp[dpos[dstem+1]][b48]) - if (b8 == Thymine) - if (RI[b9]) - if (clooppos[1] == Thymine) - if (YI[*clooppos]) - if (RI[cloopend[-2]]) - if (RI[cloopend[-1]]) - if (spacer1 == 2) - if (spacer2 == 1) - if (dl > 2) - if (tl < 10) - if (var < 6) - if (var > 2) - energy += 6.0; - if (cgcat >= 5) - if (abondtype < 0x10000) - if (bp[dpos[dstem+1]][b48]) - if (clooppos[1] == Thymine) - if (YI[*clooppos]) - if (RI[cloopend[-2]]) - if (dl > 2) - if (tl < 10) - if (var < 6) - if (var > 2) - energy += 6.0; } - - if (energy >= dtthresh) - energy -= (0.9*(energy - dtthresh) + 5.0); - else continue; } - - /* remember fully formed mttRNA gene if threshold reached */ - - if (energy < thresh) continue; - te.energy = energy; - thresh = energy; - te.ps = apos1; - te.spacer1 = spacer1; - te.dstem = dstem; - te.dloop = dl; - te.spacer2 = spacer2; - te.cstem = cstem; - te.cloop = cloop; - te.var = var; - te.varbp = (var > 17)?varbp:0; - te.tstem = tstem; - te.tloop = tl; - k = astem + spacer1 + darm + spacer2; - te.anticodon = k + cstem + 2; - te.nintron = 0; - te.intron = 0; - te.nbase = k + carm + var + 2*tstem + tl; - tastem = astem; - tastem8 = astem8; - tastem8d = astem8d; - } } - - /* for highest energy mttRNA gene */ - /* decide astem length, look for NCCA acceptor tail */ - /* and calculate total length */ - - if (te.ps) - { apos2 = te.ps + te.nbase; - if (extastem) - if (tastem8d) - { te.astem1 = 8; - te.astem2 = 8; - te.ps--; - te.nbase++; - te.anticodon++; - as = aatail(apos2+8,&aext,sw); } - else - { te.astem1 = tastem; - te.astem2 = tastem; - as = aatail(apos2+tastem,&aext,sw); - if (tastem8) - { as8 = aatail(apos2+8,&aext8,sw); - if (as8 >= as) - { te.ps--; - te.nbase++; - te.anticodon++; - te.astem1 = 8; - te.astem2 = 8; - as = as8; - aext = aext8; }}} - else - { te.astem1 = tastem; - te.astem2 = tastem; - as = aatail(apos2+tastem,&aext,sw); } - if (as < 2) aext = 1; - te.nbase += te.astem2; - nbasefext = te.nbase + ASTEM2_EXT; - te.nbase += aext; - - /* store mttRNA gene if there are no */ - /* higher energy overlapping mttRNA genes */ - - te.start = (long)(te.ps - seq); - if (tn = find_slot(d,&te,&nts,sw)) - { base_copy3(te.ps,te.seq,nbasefext); - base_copy3(te.ps,te.eseq,nbasefext); - te.aatail = aext; - *tn = te; }} - } - return(nts); } - - - -int tmopt(data_set *d, - trna_loop *th, int tarm, double the, - trna_loop *ahit, int nah, - int nts,int *seq, csw *sw) -{ int r,na,nr,nrh,ibase,flag,as,aext,nbasefext; - int *s,*v,*s1,*s2,*sa,*sb,*se,*sf,*ps,*tpos,pseq[MAXETRNALEN+1]; - static int gtemplate[6] = { 0x00,0x00,0x11,0x00,0x00,0x00 }; - static double A[6] = { 6.0,0.0,0.0,0.0,0.0,0.0 }; - static double Ar[6] = { 10.0,0.0,0.0,0.0,0.0,0.0 }; - static double Cr[6] = { 0.0,10.0,0.0,0.0,0.0,0.0 }; - static double G[6] = { 0.0,0.0,6.0,0.0,0.0,0.0 }; - static double Ga[6] = { 0.0,0.0,7.0,0.0,0.0,0.0 }; - static double K[6] = { 0.0,0.0,6.0,6.0,0.0,0.0 }; - static double Tr[6] = { 0.0,0.0,0.0,10.0,0.0,0.0 }; - double e,energy,penergy,tenergy,aenergy,athresh,cthresh,cathresh; - static double bem[6][6] = - { { -1.072,-0.214,-1.072, ATBOND, 0.000, 0.000 }, - { -0.214,-1.072, 3.000,-1.072, 0.000, 0.000 }, - { -1.072, 3.000,-1.072, 1.286, 0.000, 0.000 }, - { ATBOND,-1.072, 1.286,-0.214, 0.000, 0.000 }, - { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 }, - { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 } }; - static trna_loop rhit[NH]; - gene te,*tn; - static gene t = - { "",{TERM},{TERM},NULL,0,0,0L,0L,7,7,1,0,0,0,13,8,0,28,0,0,3,0,5,7, - tmRNA,0.0,0,0,0 }; - tpos = th->pos; - flag = 0; - te.energy = sw->tmrnathresh; - athresh = sw->tmathresh; - cthresh = sw->tmcthresh; - cathresh = sw->tmcathresh; - s = tpos + tarm + 4; - v = tpos + th->stem - 10; - energy = K[*v] + G[v[1]] + A[v[2]]; - e = K[v[1]] + G[v[2]] + A[v[3]]; - if (e > energy) energy = e; - if (energy < 18.0) energy = 0.0; - tenergy = Tr[*s]+Cr[s[1]]+Cr[s[2]]+Ar[s[3]] + energy + 1.59*the; - nrh = find_resume_seq(tpos-MAXTPTSDIST,TPWINDOW,rhit,NH,sw); - nr = -1; - while (++nr < nrh) - { ps = rhit[nr].pos; - penergy = tenergy + rhit[nr].energy - 0.001*((double)(tpos - ps)); - if (rhit[nr].stem < 24) penergy -= 15.0; - na = -1; - while (++na < nah) - { aenergy = ahit[na].energy; - if (aenergy < athresh) continue; - t.ps = ahit[na].pos; - if (t.ps < (ps - MAXTPDIST)) continue; - if (t.ps > (ps - MINTPDIST)) break; - energy = -INACTIVE; - sa = t.ps + t.astem1; - for (sb=sa+9, se=sb+t.cstem; sb <= (sa+16); sb++,se++) - for (sf = tpos-3; sf >= (tpos-7); sf--) - { s1 = sb; - s2 = sf; - e = bem[*s1++][*--s2]; - while (s1 < se) e += bem[*s1++][*--s2]; - if (e > energy) - { energy = e; - t.var = (int)(tpos - sf); - t.dloop = (int)(sb - sa); }} - if (energy < cthresh) continue; - energy += aenergy; - if (energy < cathresh) continue; - sb = sa + 3; - sf = sa + 7; - r = gtemplate[*sb++]; - while (sb < sf) - { r = (r >> 4) + gtemplate[*sb++]; - if ((r & 3) == 2) - { energy += 14.0; - break; }} - t.energy = penergy + Ga[t.ps[1]] + Ga[t.ps[2]] + energy; - if (t.energy > te.energy) - { flag = 1; - t.tstem = th->stem; - t.tloop = th->loop; - t.tps = (int)(ps - t.ps); - t.tpe = t.tps + rhit[nr].stem; - ibase = (int)(tpos - t.ps); - t.nintron = ibase - t.var - 2*t.cstem - - t.dloop - t.astem1; - t.nbase = ibase + tarm + t.astem2 - t.nintron; - te = t; }}} - if (flag) - { te.start = (long)(te.ps - seq); - s = te.ps + te.nbase + te.nintron; - as = aatail(s,&aext,sw); - nbasefext = te.nbase + ASTEM2_EXT; - te.nbase += aext; - tn = find_slot(d,&te,&nts,sw); - if (tn) - { te.intron = te.astem1 + te.dloop + te.cstem; - te.asst = 0; - base_copy3(te.ps,te.eseq,nbasefext+te.nintron); - remove_intron(te.ps,pseq,nbasefext, - te.intron,te.nintron); - base_copy3(pseq,te.seq,te.nbase); - te.aatail = aext; - *tn = te; }} - return(nts); } - - -int tmopt_perm(data_set *d, - trna_loop *th, int tarm, double the, - trna_loop *ahit, int nah, - int nts, int *seq, csw *sw) -{ int r,na,nr,nrh,flag,as,aext; - int *s,*v,*s1,*s2,*sa,*sb,*se,*sf,*ps,*apos,*tpos; - static int gtemplate[6] = { 0x00,0x00,0x11,0x00,0x00,0x00 }; - double e,energy,penergy,tenergy,aenergy,athresh,cthresh,cathresh; - static double A[6] = { 6.0,0.0,0.0,0.0,0.0,0.0 }; - static double Ar[6] = { 10.0,0.0,0.0,0.0,0.0,0.0 }; - static double Cr[6] = { 0.0,10.0,0.0,0.0,0.0,0.0 }; - static double G[6] = { 0.0,0.0,6.0,0.0,0.0,0.0 }; - static double Ga[6] = { 0.0,0.0,7.0,0.0,0.0,0.0 }; - static double K[6] = { 0.0,0.0,6.0,6.0,0.0,0.0 }; - static double Tr[6] = { 0.0,0.0,0.0,10.0,0.0,0.0 }; - static double bem[6][6] = - { { -1.072,-0.214,-1.072, ATBOND, 0.000, 0.000 }, - { -0.214,-1.072, 3.000,-1.072, 0.000, 0.000 }, - { -1.072, 3.000,-1.072, 1.286, 0.000, 0.000 }, - { ATBOND,-1.072, 1.286,-0.214, 0.000, 0.000 }, - { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 }, - { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 } }; - static trna_loop rhit[NH]; - gene te,*tn; - static gene t = - { "",{TERM},{TERM},NULL,0,0,0L,0L,7,7,1,0,0,0,13,8,0,28,0,0,3,0,5,7, - tmRNA,0.0,0,0,0 }; - tpos = th->pos; - flag = 0; - te.energy = sw->tmrnathresh; - athresh = sw->tmathresh; - cthresh = sw->tmcthresh; - cathresh = sw->tmcathresh; - s = tpos + tarm + 4; - v = tpos + th->stem - 10; - energy = K[*v] + G[v[1]] + A[v[2]]; - e = K[v[1]] + G[v[2]] + A[v[3]]; - if (e > energy) energy = e; - if (energy < 18.0) energy = 0.0; - tenergy = Tr[*s]+Cr[s[1]]+Cr[s[2]]+Ar[s[3]]+ energy + 1.59*the; - na = -1; - while (++na < nah) - { aenergy = ahit[na].energy; - if (aenergy < athresh) continue; - apos = ahit[na].pos; - if (apos < (tpos + MINTSTEM_DIST)) continue; - if (apos > (tpos + MAXTSTEM_DIST + MAXPPINTRONDIST)) break; - energy = -INACTIVE; - sa = apos + t.astem1; - for (sb=sa+9, se=sb+t.cstem; sb <= (sa+16); sb++,se++) - for (sf = tpos-3; sf >= (tpos-7); sf--) - { s1 = sb; - s2 = sf; - e = bem[*s1++][*--s2]; - while (s1 < se) e += bem[*s1++][*--s2]; - if (e > energy) - { energy = e; - t.var = (int)(tpos - sf); - t.dloop = (int)(sb - sa); }} - if (energy < cthresh) continue; - energy += aenergy; - if (energy < cathresh) continue; - sb = sa + 3; - sf = sa + 7; - r = gtemplate[*sb++]; - while (sb < sf) - { r = (r >> 4) + gtemplate[*sb++]; - if ((r & 3) == 2) - { energy += 14.0; - break; }} - penergy = tenergy + Ga[apos[1]] + Ga[apos[2]] + energy; - nrh = find_resume_seq(apos+MINTPDIST,TPWINDOW,rhit,NH,sw); - nr = -1; - while (++nr < nrh) - { ps = rhit[nr].pos; - t.energy = penergy + rhit[nr].energy; - if (rhit[nr].stem < 24) t.energy -= 15.0; - if (t.energy > te.energy) - { flag = 1; - t.tstem = th->stem; - t.tloop = th->loop; - t.asst = (long)(apos - tpos) + t.var + t.cstem; - t.ps = tpos - t.var - t.cstem; - t.tps = (int)(ps - t.ps); - t.tpe = t.tps + rhit[nr].stem; - te = t; }}} - if (flag) - { te.start = (long)(te.ps - seq) - 54; - te.intron = te.cstem + te.var + 2*te.tstem + te.tloop + - te.astem2; - as = aatail(te.ps + te.intron,&aext,sw); - te.aatail = aext; - base_copy3(te.ps-54,te.eseq,te.tpe+1+TMPTRAILER); - te.nbase = te.astem1 + te.dloop + te.cstem; - base_copy3(te.ps+te.asst,te.seq,te.nbase); - base_copy3(te.ps,te.seq+te.nbase,te.intron + ASTEM2_EXT); - te.intron += aext; - te.nbase += te.intron; - te.nintron = te.tpe - te.nbase + 1 + TMPTRAILER; - te.intron += 54; - te.tps += 54; - te.tpe += 54; - te.asst += 54; - tn = find_slot(d,&te,&nts,sw); - if (tn) *tn = te; } - return(nts); } - - - - - -int ti_genedetected(data_set *d, int nts, int *seq, gene *te, csw *sw) -{ int as,aext,as8,aext8,nbasefext,*s; - int pseq[2*MAXETRNALEN+1]; - gene *tn; - te->nbase = te->astem1 + te->spacer1 + te->spacer2 + 2*te->dstem + - te->dloop + 2*te->cstem + te->cloop + - te->var + 2*te->tstem + te->tloop + te->astem2; - s = te->ps + te->nbase + te->nintron; - as = aatail(s,&aext,sw); - if (sw->extastem) - if (te->astem1 == 7) - if (bp[te->ps[-1]][*s]) - { as8 = aatail(s+1,&aext8,sw); - if (as8 >= as) - { te->ps--; - te->nbase += 2; - te->anticodon++; - if (te->nintron > 0) te->intron++; - te->astem1 = 8; - te->astem2 = 8; - as = as8; - aext = aext8; }} - nbasefext = te->nbase + ASTEM2_EXT; - te->nbase += aext; - te->start = (long)(te->ps - seq); - tn = find_slot(d,te,&nts,sw); - if (tn) - { if (te->nintron == 0) - base_copy3(te->ps,te->seq,nbasefext); - else - { base_copy3(te->ps,te->eseq,nbasefext + te->nintron); - remove_intron(te->ps,pseq,nbasefext, - te->intron,te->nintron); - base_copy3(pseq,te->seq,nbasefext); } - te->aatail = aext; - *tn = *te; } - return(nts); } - - -int tmioptimise(data_set *d, int *seq, int lseq, int nts, csw *sw) -{ int i,j,k,intron,nt,nth,nd1,nd2,ndx,ndh,na,nah,nppah,nc,nch,tfold,tarm; - int dstem,dloop,flag,mindist,maxdist,tmindist,tmaxdist,tmmindist,tmmaxdist; - int tarmthresh,tmstrict,sp2min,sp2max,ige[7]; - int *se,*sc,*sb,*si,*tpos,*tend,*apos,*dpos,*tloopfold,*tmv,*cend; - int *s1,*s2,*sd,*sf,*sl,*sg1,*sg2,*cposmin,*cposmax,*cpos; - unsigned int r,q,c; - double e,ec,he,the,thet,ethresh,energy,cenergy,denergy,ienergy; - double tdarmthresh,genergy,energy2,energyf,energyf6; - static unsigned int TT[6] = - { 0x00, 0x00, 0x00, 0x11, 0x00, 0x00 }; - static unsigned int GG[6] = - { 0x00, 0x00, 0x11, 0x00, 0x00, 0x00 }; - static unsigned int ct[6] = { 0,0,0,0,0,0 }; - static unsigned int cA[6] = { 0,0,0,2,0,0 }; - static unsigned int cC[6] = { 0,0,2,0,0,0 }; - static unsigned int cG[6] = { 0,2,0,1,0,0 }; - static unsigned int cT[6] = { 2,0,1,0,0,0 }; - static int yic[9] = { 1,0,0,0,0,0,0,0,0 }; - static int tic[9] = { 1,1,0,0,0,0,0,0,0 }; - static int a1ic[9] = { 1,1,1,0,0,0,0,0,0 }; - static int a2ic[9] = { 1,1,1,1,0,0,0,0,0 }; - static int a3ic[9] = { 1,1,1,1,1,0,0,0,0 }; - static int ric[9] = { 1,1,1,1,1,1,0,0,0 }; - static int goffb[13] = { 0,0,0,0,1,2,2,2,2,2,2,2,2 }; - static int goffe[13] = { 0,0,0,0,2,3,4,4,5,6,6,6,6 }; - static int cY[6] = { 0,1,0,1,0,0 }; - static int cR[6] = { 1,0,1,0,0,0 }; - static double ilw = 0.002; - static double G[6] = { 0.0,0.0,6.0,0.0,0.0,0.0 }; - static double T[6] = { 0.0,0.0,0.0,7.0,0.0,0.0 }; - static double Y[6] = { 0.0,3.0,0.0,3.0,0.0,0.0 }; - static double R[6] = { 2.0,0.0,2.0,0.0,0.0,0.0 }; - static double YP[6] = { 0.0,3.0,0.0,3.0,0.0,0.0 }; - static double RP[6] = { 2.0,0.0,2.0,0.0,0.0,0.0 }; - static double RI[6] = { 0.1,0.0,0.05,0.0,0.0,0.0 }; - static double GI[6] = { 0.0,0.0,0.1,0.0,0.0,0.0 }; - static double YI[6] = { 0.0,0.1,0.0,0.1,0.0,0.0 }; - static double AI[6] = { 1.0,0.0,0.0,0.0,0.0,0.0 }; - static double GC[6] = { 0.0,1.5,6.0,0.0,0.0,0.0 }; - static double G3[6] = { 0.0,6.0,12.0,12.0,0.0,0.0 }; - static double dR[6] = { 6.0,0.0,6.0,0.0,0.0,0.0 }; - static double RH[6] = { 3.0,0.0,3.0,0.0,0.0,0.0 }; - static double AGT[6] = { 6.0,0.0,6.0,6.0,0.0,0.0 }; - static double dT[6] = { 0.0,0.0,0.0,6.0,0.0,0.0 }; - static double dbem[6][6] = - { { -2.144,-0.428,-2.144, ATBOND, 0.000, 0.000 }, - { -0.428,-2.144, 3.000,-2.144, 0.000, 0.000 }, - { -2.144, 3.000,-2.144, 1.286, 0.000, 0.000 }, - { ATBOND,-2.144, 1.286,-0.428, 0.000, 0.000 }, - { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 }, - { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 } }; - static double dfem[6][6] = - { { -4.000,-4.000,-4.000, ATBOND, 0.000, 0.000 }, - { -4.000,-4.000, 3.000,-4.000, 0.000, 0.000 }, - { -4.000, 3.000,-4.000, 1.286, 0.000, 0.000 }, - { ATBOND,-4.000, 1.286,-4.000, 0.000, 0.000 }, - { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 }, - { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 } }; - static double cbem[6][6] = - { { -1.072,-0.214,-1.072,2.0*ATBOND, 0.000, 0.000 }, - { -0.214,-1.072, 6.000,-1.072, 0.000, 0.000 }, - { -1.072, 6.000,-1.072, 3.400, 0.000, 0.000 }, - { 2.0*ATBOND,-1.072, 3.400,-0.214, 0.000, 0.000 }, - { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 }, - { 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 } }; - static trna_loop thit[NTH],chit[NC],ahit[NA]; - static trna_dloop dhit[ND]; - gene te; - static gene t = - { "",{TERM},{TERM},NULL,0,0,0L,0L,7,7,1,2,1,3,9,5,7,0,0,0,15,0,5,7, - tRNA,0.0,0,0,0 }; - if (sw->mtrna) - { nts = find_mt_trna(d,seq,lseq,nts,sw); - if (!sw->tmrna) return(nts); } - ethresh = sw->trnathresh; - tmmindist = MINTPTSDIST + MINTPDIST; - tmmaxdist = MAXTPTSDIST + MAXTPDIST; - tmindist = (MINTRNALEN + sw->minintronlen - MAXTSTEM_DIST); - tmaxdist = (MAXTRNALEN + sw->maxintronlen - MINTSTEM_DIST); - if (sw->trna) - { if (sw->tmrna) - { mindist = (tmindist < tmmindist)?tmindist:tmmindist; - maxdist = (tmaxdist > tmmaxdist)?tmaxdist:tmmaxdist; } - else - { mindist = tmindist; - maxdist = tmaxdist; }} - else - { mindist = tmmindist; - maxdist = tmmaxdist; } - tarmthresh = sw->ttarmthresh; - tdarmthresh = sw->tdarmthresh; - tmstrict = sw->tmstrict; - sp2min = sw->sp2min; - sp2max = sw->sp2max; - nth = find_tstems(seq,lseq,thit,NTH,sw); - nt = -1; - while (++nt < nth) - { tpos = thit[nt].pos; - t.tloop = thit[nt].loop; - t.tstem = thit[nt].stem; - tfold = tpos[-1]; - tloopfold = tpos + t.tstem + 1; - tarm = 2*t.tstem + t.tloop; - tend = tpos + tarm; - tmv = tpos - VARMIN; - flag = 0; - te.energy = ethresh; - the = thit[nt].energy; - nah = find_astem5(tpos-maxdist,tpos-mindist,tend,7,ahit,NA,sw); - if (sw->tmrna) - { thet = the - G[tpos[t.tstem]] - G[tpos[t.tstem+1]]; - if (tmstrict) - { if (thet >= tarmthresh) - nts = tmopt(d,thit+nt,tarm,thet,ahit,nah,nts,seq,sw); } - else - nts = tmopt(d,thit+nt,tarm,the,ahit,nah,nts,seq,sw); - nppah = find_astem5(tpos+MINPPASDIST,tpos+MAXPPASDIST, - tend,7,ahit+nah,NA-nah,sw); - nts = tmopt_perm(d,thit+nt,tarm,the,ahit+nah,nppah,nts,seq,sw); - if (thet < tarmthresh) continue; - the = thet; } - if (!sw->trna) continue; - na = -1; - while (++na < nah) - { apos = ahit[na].pos; - if (apos < (tpos - tmaxdist)) continue; - if (apos > (tpos - tmindist)) break; - he = the + ahit[na].energy; - - /* find dstems */ - - ndh = 0; - sc = apos + 8; - energyf = dfem[sc[5]][tfold]; - sl = sc + sw->sp1max; - while (sc < sl) - { energy2 = dT[sc[-2]] + RH[*(sc-1)] + GC[*sc] + dfem[sc[-2]][sc[4]]; - energyf6 = dfem[sc[6]][tfold]; - for (dstem = 3; dstem <= 4; dstem++) - { sd = sc + dstem; - dloop = 3; - se = sd + dloop; - energy = energy2 + 6.0 + dR[*(se-1)] + energyf; - if (dstem == 3) - if (energyf < 0.0) energyf = energyf6; - se += dstem; - s1 = sc; - s2 = se; - sf = s1 + dstem; - while (s1 < sf) energy += dbem[*s1++][*--s2]; - if (energy >= tdarmthresh) - { if (ndh >= ND) goto DFL; - dhit[ndh].pos = sc; - dhit[ndh].end = se; - dhit[ndh].loop = dloop; - dhit[ndh].stem = dstem; - dhit[ndh].energy = energy; - ndh++; } - sg1 = sd + 1; - sg2 = sd + 6; - q = GG[*sg1++]; - ige[1] = q & 3; - j = 2; - while (sg1 <= sg2) - { q = (q >> 4) + GG[*sg1++]; - ige[j++] = q & 3; } - for (dloop = 4; dloop <= 11; dloop++) - { j = goffb[dloop]; - k = goffe[dloop]; - c = ige[j++]; - while (j <= k) c = c | ige[j++]; - genergy = G3[c]; - se = sd + dloop; - energy = energy2 + genergy + dR[*(se-1)] + energyf; - se += dstem; - s1 = sc; - s2 = se; - sf = s1 + dstem; - while (s1 < sf) energy += dbem[*s1++][*--s2]; - if (energy >= tdarmthresh) - { if (ndh >= ND) goto DFL; - dhit[ndh].pos = sc; - dhit[ndh].end = se; - dhit[ndh].loop = dloop; - dhit[ndh].stem = dstem; - dhit[ndh].energy = energy; - ndh++; }}} - s1 = sc; - s2 = sc + 16; - sd = sc + 6; - j = bp[*s1][*--s2]; - while (++s1 < sd) j += bp[*s1][*--s2]; - if (j >= 6) - { energy = dT[sc[-1]] + RH[*sc] + GC[*(sc+1)] + energyf6; - energy += G[*++sd]; - energy += G[*++sd]; - energy += AGT[*++sd] + dfem[sc[-1]][sc[4]]; - sd += 7; - s1 = sc; - s2 = sd; - sf = s1 + 6; - while (s1 < sf) energy += dbem[*s1++][*--s2]; - if (energy >= tdarmthresh) - { if (ndh >= ND) goto DFL; - dhit[ndh].pos = sc; - dhit[ndh].end = sd; - dhit[ndh].loop = 4; - dhit[ndh].stem = 6; - dhit[ndh].energy = energy; - ndh++; }} - s1 = sc; - s2 = sc + 18; - sd = sc + 7; - j = bp[*s1][*--s2]; - while (++s1 < sd) j += bp[*s1][*--s2]; - if (j >= 7) - { energy = energy2 + dfem[sc[7]][tfold]; - energy += G[*++sd]; - energy += G[*++sd]; - energy += AGT[*++sd]; - sd += 8; - s1 = sc; - s2 = sd; - sf = s1 + 7; - while (s1 < sf) energy += dbem[*s1++][*--s2]; - if (energy >= tdarmthresh) - { if (ndh >= ND) goto DFL; - dhit[ndh].pos = sc; - dhit[ndh].end = sd; - dhit[ndh].loop = 4; - dhit[ndh].stem = 7; - dhit[ndh].energy = energy; - ndh++; }} - energyf = energyf6; - sc++; } - goto DFN; - DFL: - fprintf(stderr,"Too many D-stem hits\n"); - DFN: - - /* End of find dstems routine */ - - nd1 = ndh; - while (--nd1 >= 0) - { dstem = dhit[nd1].stem; - dpos = dhit[nd1].pos; - if ((int)(dpos - apos) < 9) - dhit[nd1].energy -= 3.0; - if (*tloopfold == Guanine) - { sb = dpos + dstem + 2; - sc = sb; - se = sb + t.dloop - 3; - r = TT[*sb++]; - while (sb < se) - { r = (r >> 4) + TT[*sb++]; - if (r & 2) - { dhit[nd1].energy += 10.0; - break; }} - r = GG[*sc++]; - while (sc < se) - { r = (r >> 4) + GG[*sc++]; - if (r & 2) - { dhit[nd1].energy -= 12.0; - break; }}}} - nd1 = ndh; - while (--nd1 >= 0) - { if (!dhit[nd1].end) continue; - cpos = dhit[nd1].end; - denergy = dhit[nd1].energy; - ndx = nd1; - nd2 = nd1; - while (--nd2 >= 0) - { if (dhit[nd2].end != cpos) continue; - e = dhit[nd2].energy; - if (e > denergy) - { denergy = e; - dhit[ndx].end = NULL; - ndx = nd2; }}} - nd1 = ndh; - while (--nd1 >= 0) - { if (!dhit[nd1].end) continue; - cposmin = dhit[nd1].end; - cposmax = cposmin; - break; } - nd2 = nd1; - while (--nd2 >= 0) - { if (!(cpos = dhit[nd2].end)) continue; - if (cpos < cposmin) cposmin = cpos; - if (cpos > cposmax) cposmax = cpos; } - for (cpos = cposmin + sp2min; cpos <= (cposmax + sp2max); cpos++) - { denergy = -INACTIVE; - ndx = -1; - nd1 = ndh; - while (--nd1 >= 0) - { if (!dhit[nd1].end) continue; - if ((dhit[nd1].end + sp2max) < cpos) continue; - if ((dhit[nd1].end + sp2min) > cpos) continue; - e = dhit[nd1].energy; - if (e > denergy) - { denergy = e; - ndx = nd1; }} - if (ndx < 0) continue; - denergy += he; - if (denergy < (te.energy - 49.0)) continue; - - /* find cstems */ - - nch = 0; - si = cpos; - sc = cpos + 5; - se = cpos + 4; - ct[0] = cA[*se]; - ct[1] = cC[*se]; - ct[2] = cG[*se]; - ct[3] = cT[*se]; - while (--se >= cpos) - { ct[0] = (ct[0] << 4) + cA[*se]; - ct[1] = (ct[1] << 4) + cC[*se]; - ct[2] = (ct[2] << 4) + cG[*se]; - ct[3] = (ct[3] << 4) + cT[*se]; } - si += 11; - se = tmv - VARDIFF - 5; - if (si < se) si = se; - r = ct[*si++]; - r = (r >> 4) + ct[*si++]; - r = (r >> 4) + ct[*si++]; - r = (r >> 4) + ct[*si++]; - while (si < tmv) - { r = (r >> 4) + ct[*si++]; - if ((r & 0xf) >= 5) - { if (nch >= NC) - { fprintf(stderr,"Too many cstem hits\n"); - goto FN; } - chit[nch].pos = si; - chit[nch].stem = 5; - chit[nch].loop = (int)(si - sc - 5); - if (chit[nch].loop == 9) - if (bp[*sc][si[-6]]) - if (cY[sc[2]]) - if (cR[sc[6]]) - if (cY[sc[1]]) - { chit[nch].stem = 6; - chit[nch].loop = 7; } - s1 = cpos; - s2 = si; - se = s1 + chit[nch].stem; - chit[nch].energy = cbem[*s1++][*--s2]; - while (s1 < se) - chit[nch].energy += cbem[*s1++][*--s2]; - nch++; }} - FN: - - /* end of find cstems routine */ - - nc = -1; - while (++nc < nch) - { energy = denergy + chit[nc].energy; - if (energy < (te.energy - 19.0)) continue; - cend = chit[nc].pos; - t.var = (int)(tpos - cend); - t.cloop = chit[nc].loop; - t.cstem = chit[nc].stem; - intron = 0; - if (t.cloop < 9) - { if (sw->minintronlen > 0) continue; - if (sw->cloop7) - if (t.cloop != 7) continue; - t.nintron = 0; - if (t.var > 17) energy += vloop_stability(cend,t.var,&t.varbp); - sb = cpos + t.cstem; - energy += T[*(sb + 1)] + Y[*(sb)] + R[*(sb + 5)] - - 0.05*t.var - ((t.cloop == 7)?0.0:6.0); } - else - { t.nintron = t.cloop - 7; - if (t.nintron > sw->maxintronlen) continue; - if (t.nintron < sw->minintronlen) continue; - if (t.var > 17) energy += vloop_stability(cend,t.var,&t.varbp); - if (energy < (te.energy - 9.0)) continue; - t.cloop = 7; - sb = cpos + t.cstem; - se = sb + t.nintron; - if (sw->ifixedpos) - { intron = 6; - cenergy = YP[*sb] + T[sb[1]] + RP[sb[5]]; } - else - { cenergy = YP[*se] + T[*(se+1)] + RP[*(se+5)]; - ienergy = cenergy + RI[*sb] + GI[*(se-1)] + - AI[se[-2]]*YI[se[-1]]; - for (j = 1; j <= 7; j++) - { si = se + j - 1; - ec = YP[*(sb + yic[j]*t.nintron)] + - T[*(sb + tic[j]*t.nintron + 1)] + - RP[*(sb + ric[j]*t.nintron + 5)]; - e = ec + RI[*(sb + j)] + GI[*si] + AI[si[-1]]*YI[*si]; - if (j == 6) e += 0.01; - if (e > ienergy) - { ienergy = e; - cenergy = ec; - intron = j; }}} - energy += cenergy - 10.0 - ilw*(t.nintron + 1.1*t.var); - if (t.nintron >= 130) - { si = se + intron; - j = si[-1]; - if (j != Guanine) - { if (si[-2] != Adenine) energy -= 4.0; - if (j != Cytosine) - if (j != Thymine) - energy -= 8.0; }}} - dstem = dhit[ndx].stem; - dpos = dhit[ndx].pos; - if (dstem >= 6) - { if (sb[2 + a1ic[intron]*t.nintron] != Thymine) continue; - if (sb[3 + a2ic[intron]*t.nintron] != Cytosine) continue; - if (sb[4 + a3ic[intron]*t.nintron] != Adenine) continue; - energy += 3.0; } - else - if (!(dpos[-1] & 5)) - { i = 0; - si = cend; - se = cend + 4; - while (si < se) - { if (!(*si++ & 5)) - { if (++i >= 2) - { energy += 3.0; - break; }} - else - i = 0; }} - if (t.cstem >= 6) - { if (sb[2 + a1ic[intron]*t.nintron] == Cytosine) - if (sb[3 + a2ic[intron]*t.nintron] == Thymine) - if (sb[4 + a3ic[intron]*t.nintron] == Adenine) - energy += 4.0; } - if (energy < ethresh) continue; - t.energy = energy; - t.astem1 = (t.dstem < 6)?7:((t.tstem < 5)?9:8); - t.astem2 = t.astem1; - t.ps = apos + 7 - t.astem1; - t.nbase = (int)(tend - t.ps) + t.astem2; - t.dstem = dstem; - t.dloop = dhit[ndx].loop; - t.spacer1 = (int)(dpos - apos) - 7; - t.spacer2 = (int)(cpos - dhit[ndx].end); - j = (int)(cpos - t.ps) + t.cstem; - t.anticodon = j + 2; - if (t.nintron > 0) - { t.intron = j + intron; - if ((t.nbase + t.nintron) > MAXTRNALEN) - { nts = ti_genedetected(d,nts,seq,&t,sw); - continue; }} - if (energy < te.energy) continue; - flag = 1; - te = t; } }} - if (flag) nts = ti_genedetected(d,nts,seq,&te,sw); } - return(nts); } - - -void disp_ftable_entry(FILE *f, int n[], int i, int m, csw *sw) - { if (m > 0) - switch(sw->geneticcode) - { case METAZOAN_MT: - if (i < 2) fprintf(f," %-4s %-4d",aa(n,sw),m); - else fprintf(f," %-18s %-4d",aa(n,sw),m); - break; - case STANDARD: - case VERTEBRATE_MT: - default: - fprintf(f," %-4s %-5d",aa(n,sw),m); - break; } - else - switch(sw->geneticcode) - { case METAZOAN_MT: - if (i < 2) fprintf(f," %-4s ",aa(n,sw)); - else fprintf(f," %-18s ",aa(n,sw)); - break; - case STANDARD: - case VERTEBRATE_MT: - default: - fprintf(f," %-4s ",aa(n,sw)); - break; }} - - -void disp_freq_table(int nt, csw *sw) -{ int i,j,k,m,ambig,*s,c1,c2,c3,c[3],n[3],table[4][4][4]; - static int cgflip[4] = { 0,2,1,3 }; - FILE *f = sw->f; - ambig = 0; - for (i = 0; i < 4; i++) - for (j = 0; j < 4; j++) - for (k = 0; k < 4; k++) - table[i][j][k] = 0; - for (i = 0; i < nt; i++) - if (ts[i].energy >= 0.0) - if (ts[i].genetype == tRNA) - if (ts[i].cloop == 7) - { s = ts[i].seq + ts[i].anticodon; - c1 = *s; - c2 = s[1]; - c3 = s[2]; - if ((c1 >= Adenine) && (c1 <= Thymine)) - if ((c2 >= Adenine) && (c2 <= Thymine)) - if ((c3 >= Adenine) && (c3 <= Thymine)) - table[*s][s[1]][s[2]]++; - else ambig++; - else ambig++; - else ambig++; } - else ambig++; - fprintf(f,"tRNA Anticodon Frequency\n"); - for (j = 0; j < 4; j++) - { n[2] = cgflip[j]; - for (k = 0; k < 4; k++) - { n[1] = cgflip[k]; - for (i = 0; i < 4; i++) - { n[0] = cgflip[i]; - fprintf(f,"%c%c%c",cpbase(n[0]),cpbase(n[1]),cpbase(n[2])); - m = table[n[0]][n[1]][n[2]]; - disp_ftable_entry(f,n,i,m,sw); } - fputc('\n',f); }} - if (ambig > 0) fprintf(f,"Ambiguous: %d\n",ambig); - fprintf(f,"\ntRNA Codon Frequency\n"); - for (i = 0; i < 4; i++) - { n[0] = 3 - cgflip[i]; - for (j = 0; j < 4; j++) - { n[1] = 3 - cgflip[j]; - for (k = 0; k < 4; k++) - { n[2] = 3 - cgflip[k]; - fprintf(f,"%c%c%c",cpbase(n[0]),cpbase(n[1]),cpbase(n[2])); - c[0] = 3 - n[2]; - c[1] = 3 - n[1]; - c[2] = 3 - n[0]; - m = table[c[0]][c[1]][c[2]]; - disp_ftable_entry(f,c,k,m,sw); } - fputc('\n',f); }} - if (ambig > 0) fprintf(f,"Ambiguous: %d\n",ambig); - fputc('\n',f); } - -void disp_energy_stats(data_set *d, int nt, csw *sw) -{ int i,n[NS],genetype,introns,nintron,trna,mtrna,ntv,nd,nps; - double gc,gcmin[NS],gcmax[NS]; - static char genetype_name[NS][30] = - { "tRNA genes","tmRNA genes","srpRNA genes","rRNA genes","CDS genes","Overall" }; - FILE *f = sw->f; - mtrna = sw->mtrna; - trna = sw->trna | mtrna; - nps = 0; - if (mtrna) - { ntv = 0; - nd = 0; } - if ((sw->trna) && (sw->maxintronlen > 0)) - { introns = 1; - nintron = 0; } - else - introns = 0; - for (i = 0; i < NS; i++) - { n[i] = 0; - gcmin[i] = 1.0; - gcmax[i] = 0.0; } - for (i = 0; i < nt; i++) - if (ts[i].energy >= 0.0) - { n[NS-1]++; - genetype = ts[i].genetype; - n[genetype]++; - if (pseudogene(ts + i)) nps++; - if (genetype == tRNA) - { if (mtrna) - { if (ts[i].tstem == 0) ntv++; - if (ts[i].dstem == 0) nd++; } - if (introns) if (ts[i].nintron > 0) nintron++; - gc = gc_content(ts+i); - if (gc < gcmin[genetype]) gcmin[genetype] = gc; - if (gc > gcmax[genetype]) gcmax[genetype] = gc; }} - fputc('\n',f); - fputc('\n',f); - if (sw->repeatsn) - if ((n[tRNA] + n[tmRNA]) > 0) - fprintf(f,"%s\n\n",d->seqname); - if (trna) - { sw->ngene[tRNA] += n[tRNA]; - if (n[tRNA] > 3) disp_freq_table(nt,sw); - if ((n[tRNA] > 1) || ((sw->tmrna) && (n[tmRNA] > 0))) - { if (introns) - { if (sw->minintronlen == 0) - fprintf(f,"Number of tRNA genes with no introns = %d\n", - n[0]-nintron); - fprintf(f,"Number of tRNA genes with C-loop introns = %d\n", - nintron); } - else - fprintf(f,"Number of %s = %d\n",genetype_name[tRNA],n[tRNA]); - if (mtrna) - { if (sw->tvloop) - fprintf(f,"Number of TV replacement loop tRNA genes = %d\n", - ntv); - fprintf(f,"Number of D replacement loop tRNA genes = %d\n", - nd); } - if (n[tRNA] > 1) - fprintf(f,"tRNA GC range = %2.1f%% to %2.1f%%\n", - gcmin[0]*100.0,gcmax[0]*100.0); }} - if (sw->tmrna) - { sw->ngene[tmRNA] += n[tmRNA]; - if ((n[tmRNA] > 1) || (trna && (n[tRNA] > 0))) - fprintf(f,"Number of %s = %d\n",genetype_name[tmRNA],n[tmRNA]); } - sw->nps += nps; - if (sw->reportpseudogenes) - if (nps > 0) - if (n[NS-1] > 1) - fprintf(f,"Number of possible pseudogenes = %d\n",nps); - fputc('\n',f); - fputc('\n',f); } - -void batch_energy_stats(data_set *d, int nt, csw *sw) -{ int i,n[NS],genetype,introns,nintron,trna,mtrna,ntv,nd,nps; - double gc,gcmin[NS],gcmax[NS]; - FILE *f = sw->f; - mtrna = sw->mtrna; - trna = sw->trna | mtrna; - nps = 0; - if (mtrna) - { ntv = 0; - nd = 0; } - if ((sw->trna) && (sw->maxintronlen > 0)) - { introns = 1; - nintron = 0; } - else - introns = 0; - for (i = 0; i < NS; i++) - { n[i] = 0; - gcmin[i] = 1.0; - gcmax[i] = 0.0; } - for (i = 0; i < nt; i++) - if (ts[i].energy >= 0.0) - { n[NS-1]++; - genetype = ts[i].genetype; - n[genetype]++; - if (ts[i].energy < 100.0) nps++; - if (genetype == tRNA) - { if (mtrna) - { if (ts[i].tstem == 0) ntv++; - if (ts[i].dstem == 0) nd++; } - if (introns) if (ts[i].nintron > 0) nintron++; - gc = gc_content(ts+i); - if (gc < gcmin[genetype]) gcmin[genetype] = gc; - if (gc > gcmax[genetype]) gcmax[genetype] = gc; }} - if (trna) sw->ngene[tRNA] += n[tRNA]; - if (sw->tmrna) sw->ngene[tmRNA] += n[tmRNA]; - sw->nps += nps; } - - -int gene_sort(data_set *d, int nt, int sort[], csw *sw) -{ int i,n,j,k; - long starti,startj,stopi,stopj,psmax; - psmax = d->psmax; - n = 0; - for (i = 0; i < nt; i++) - if (ts[i].energy >= 0.0) - { if (sw->ireportminintronlen == 1) - if (ts[i].genetype == tRNA) - if (ts[i].nintron < sw->minintronlenreport) - continue; - sort[n++] = i; } - i = -1; - while (++i < (n-1)) - { j = i; - while (++j < n) - { starti = ts[sort[i]].start; - startj = ts[sort[j]].start; - stopi = ts[sort[i]].stop; - stopj = ts[sort[j]].stop; - if (stopi < starti) - if ((psmax - starti) < stopi) starti -= psmax; - else stopi += psmax; - if (stopj < startj) - if ((psmax - startj) < stopj) startj -= psmax; - else stopj += psmax; - if (starti > startj) - { k = sort[i]; - sort[i] = sort[j]; - sort[j] = k; } - else - if (starti == startj) - if (stopi < stopj) - { k = sort[i]; - sort[i] = sort[j]; - sort[j] = k; }}} - return(n); } - - -int iamatch(data_set *d, gene *t, csw *sw) -{ char key[5],*k,s[100]; - if (!(k = softstrpos(d->seqname,"TRNA-"))) return(-1); - copy3cr(k+5,key,3); - name(t,s,1,sw); - if (softstrpos(s,key)) return(1); - return(0); } - - -int nearest_annotated_gene(data_set *d, gene *t, int matchgenetype) -{ int n,i,nagene,max; - long a,b,c,e,score,thresh,psmax,proximity; - annotated_gene *ta; - psmax = d->psmax; - nagene = d->nagene[NS-1]; - ta = d->gene; - n = -1; - max = 0; - proximity = matchgenetype?40:1; - a = t->start; - b = t->stop; - thresh = b-a; - if (b < a) - { b += psmax; - thresh += psmax; - for (i = 0; i < nagene; i++) - { c = ta[i].start; - e = ta[i].stop; - if (e < c) - { e += psmax; - if (a > e) goto NXTW; - if (b < c) goto NXTW; - if (matchgenetype) - if (ta[i].genetype != t->genetype) continue; - score = (a >= c)?((b >= e)?e-a:thresh):((b >= e)?thresh:b-c); - if (score >= proximity) - if (score > max) - { n = i; - max = score; } - NXTW: - c -= psmax; - e -= psmax; } - if (a > e) continue; - if (b < c) continue; - if (matchgenetype) - if (ta[i].genetype != t->genetype) continue; - score = (a >= c)?((b >= e)?e-a:thresh):((b >= e)?thresh:b-c); - if (score >= proximity) - if (score > max) - { n = i; - max = score; } } - a -= psmax; - b -= psmax; } - for (i = 0; i < nagene; i++) - { c = ta[i].start; - e = ta[i].stop; - if (e < c) - { e += psmax; - if (a > e) goto NXTN; - if (b < c) goto NXTN; - if (matchgenetype) - if (ta[i].genetype != t->genetype) continue; - score = (a >= c)?((b >= e)?e-a:thresh):((b >= e)?thresh:b-c); - if (score >= proximity) - if (score > max) - { n = i; - max = score; } - NXTN: - c -= psmax; - e -= psmax; } - if (a > e) continue; - if (b < c) continue; - if (matchgenetype) - if (ta[i].genetype != t->genetype) continue; - score = (a >= c)?((b >= e)?e-a:thresh):((b >= e)?thresh:b-c); - if (score >= proximity) - if (score > max) - { n = i; - max = score; } } - return(n); } - - -int nearest_detected_gene(data_set *d, int *sort, int ns, - int proxtype, int *overlap, - annotated_gene *t) -{ int n,i,is; - long a,b,c,e,score,thresh,scoremax,psmax; - long proximity; - double energy; - psmax = d->psmax; - n = -1; - energy = -INACTIVE; - scoremax = -1; - a = t->start; - b = t->stop; - thresh = b-a; - proximity = thresh; - if (proximity < 0) proximity = -proximity; - proximity = 1 + proximity/2; - if (proximity > 40) proximity = 40; - if (b < a) - { b += psmax; - thresh += psmax; - for (i = 0; i < ns; i++) - { is = sort[i]; - c = ts[is].start; - e = ts[is].stop; - if (e < c) - { e += psmax; - if (a > e) goto NXTW; - if (b < c) goto NXTW; - if (ts[is].genetype != t->genetype) continue; - score = (a >= c)?((b >= e)?e-a:thresh):((b >= e)?thresh:b-c); - if (score >= proximity) - if (proxtype) - { if (score > scoremax) - { n = i; - scoremax = score; }} - else - if (ts[is].energy > energy) - { n = i; - scoremax = score; - energy = ts[is].energy; } - NXTW: - c -= psmax; - e -= psmax; } - if (a > e) continue; - if (b < c) continue; - if (ts[is].genetype != t->genetype) continue; - score = (a >= c)?((b >= e)?e-a:thresh):((b >= e)?thresh:b-c); - if (score >= proximity) - if (proxtype) - { if (score > scoremax) - { n = i; - scoremax = score; }} - else - if (ts[is].energy > energy) - { n = i; - scoremax = score; - energy = ts[is].energy; } } - a -= psmax; - b -= psmax; } - for (i = 0; i < ns; i++) - { is = sort[i]; - c = ts[is].start; - e = ts[is].stop; - if (e < c) - { e += psmax; - if (a > e) goto NXTN; - if (b < c) goto NXTN; - if (ts[is].genetype != t->genetype) continue; - score = (a >= c)?((b >= e)?e-a:thresh):((b >= e)?thresh:b-c); - if (score >= proximity) - if (proxtype) - { if (score > scoremax) - { n = i; - scoremax = score; }} - else - if (ts[is].energy > energy) - { n = i; - scoremax = score; - energy = ts[is].energy; } - NXTN: - c -= psmax; - e -= psmax; } - if (a > e) continue; - if (b < c) continue; - if (ts[is].genetype != t->genetype) continue; - score = (a >= c)?((b >= e)?e-a:thresh):((b >= e)?thresh:b-c); - if (score >= proximity) - if (proxtype) - { if (score > scoremax) - { n = i; - scoremax = score; }} - else - if (ts[is].energy > energy) - { n = i; - scoremax = score; - energy = ts[is].energy; } } - *overlap = (scoremax + 1); - return(n); } - - -void disp_match(data_set *d, int *sort, int nd, csw *sw) -{ int i,ld,fn,fp,fpd,fptv,w,alen,overlap,length,detect[NGFT],n[NS]; - char nm[100],anm[100],ps[100],*s; - FILE *f = sw->f; - gene *t; - annotated_gene *agene; - static char comp[3] = " c"; - for (i = 0; i < NS; i++) n[i] = 0; - for (i = 0; i < nd; i++) - { w = sort[i]; - if (ts[w].energy >= 0.0) - { n[NS-1]++; - n[ts[w].genetype]++; }} - fprintf(f,"\n%s\n",d->seqname); - fprintf(f,"%ld nucleotides in sequence\n",d->psmax); - fprintf(f,"Mean G+C content = %2.1f%%\n",100.0*d->gc); - fprintf(f,"GenBank to Aragorn Comparison\n"); - if (sw->trna | sw->mtrna) - { fn = 0; - fp = 0; - fpd = 0; - fptv = 0; - fprintf(f,"\n%d annotated tRNA genes\n",d->nagene[tRNA]); - fprintf(f,"%d detected tRNA genes\n\n",n[tRNA]); - fprintf(f," GenBank\t\t\t\tAragorn\n"); - ld = 0; - for (i = 0; i < d->nagene[NS-1]; i++) - { agene = d->gene + i; - if (agene->genetype != tRNA) continue; - detect[i] = nearest_detected_gene(d,sort,nd,0,&overlap,agene); - while (ld < nd) - { t = ts + sort[ld]; - if (detect[i] >= 0) - if (ld >= detect[i]) break; - if (t->start < t->stop) - if (t->start > agene->start) break; - fprintf(f,"* Not annotated %s ",name(t,nm,1,sw)); - fprintf(f,"%s",position(ps,t,sw)); - if (sw->reportpseudogenes) - if (pseudogene(t)) - fprintf(f," PS"); - fputc('\n',f); - fp++; - if (t->genetype == tRNA) - { if (t->dstem == 0) fpd++; - if (t->tstem == 0) fptv++; } - ld++; } - if (detect[i] >= 0) - { ld = detect[i] + 1; - w = 0; - t = ts + sort[detect[i]]; - s = aa(t->seq + t->anticodon,sw); - if (!softstrpos(s,agene->species+5)) w += 1; - if (agene->comp != t->comp) w += 2; - alen = agene->stop - agene->start; - if (alen < 0) alen = -alen; - if (alen < (t->nbase - 10)) w += 4; - else if (alen > (t->nbase + 10)) w += 4; - if (w > 0) fputc('*',f); - else fputc(' ',f); } - else - fputc('*',f); - sprintf(anm," %s %c(%ld,%ld)", - agene->species,comp[agene->comp],agene->start,agene->stop); - fprintf(f,"%-30s ",anm); - if (detect[i] >= 0) - { fprintf(f,"%s ",name(t,nm,1,sw)); - fprintf(f,"%s",position(ps,t,sw)); - if (sw->reportpseudogenes) - if (pseudogene(t)) - fprintf(f," PS"); - if (w & 1) fprintf(f," AAM"); - if (w & 2) fprintf(f," SM"); - if (w & 4) fprintf(f," LM"); - fputc('\n',f); } - else - { fprintf(f,"Not detected\n"); - fn++; }} - while (ld < nd) - { fprintf(f,"* Not annotated\t\t\t%s ",name(ts + sort[ld],nm,1,sw)); - fprintf(f,"%s\n",position(ps,ts + sort[ld],sw)); - fp++; - if (t->genetype == tRNA) - { if (t->dstem == 0) fpd++; - if (t->tstem == 0) fptv++; } - ld++; } - fprintf(f,"\nNumber of false negative genes = %d\n",fn); - fprintf(f,"Number of false positive genes = %d\n",fp); - fprintf(f,"Number of false positive D-replacement tRNA genes = %d\n",fpd); - fprintf(f,"Number of false positive TV-replacement tRNA genes = %d\n",fptv); - fprintf(f,"\n\n"); - sw->nagene[tRNA] += d->nagene[tRNA]; - sw->natfn += fn; - sw->natfp += fp; - sw->natfpd += fpd; - sw->natfptv += fptv; } - if (sw->cds) - { fn = 0; - fp = 0; - fprintf(f,"\n%d annotated CDS genes\n",d->nagene[CDS]); - fprintf(f,"%d detected CDS genes\n\n",n[CDS]); - fprintf(f," GenBank\t\t\t\t Aragorn\n"); - ld = 0; - for (i = 0; i < d->nagene[NS-1]; i++) - { agene = d->gene + i; - if (agene->genetype != CDS) continue; - length = (int)(agene->stop - agene->start) + 1; - sw->lacds += length; - detect[i] = nearest_detected_gene(d,sort,nd,1,&overlap,agene); - while (ld < nd) - { t = ts + sort[ld]; - if (detect[i] >= 0) - if (ld >= detect[i]) break; - if (t->start < t->stop) - if (t->start > agene->start) break; - fprintf(f,"* Not annotated "); - sprintf(anm,"%s %s", - name(t,nm,1,sw),position(ps,t,sw)); - fprintf(f,"%-18s",anm); - if (sw->energydisp) fprintf(f," %lg",t->energy); - if (sw->reportpseudogenes) - if (pseudogene(t)) - fprintf(f," PS"); - fputc('\n',f); - fp++; - ld++; } - if (detect[i] >= 0) - { ld = detect[i] + 1; - t = ts + sort[detect[i]]; - fputc(' ',f); } - else - fputc('*',f); - fprintf(f," %-33s",agene->species); - sprintf(anm,"%c(%ld,%ld)",comp[agene->comp],agene->start,agene->stop); - fprintf(f,"%14s ",anm); - if (detect[i] >= 0) - { sprintf(anm,"%s %s",name(t,nm,1,sw),position(ps,t,sw)); - fprintf(f,"%-18s",anm); - if (sw->energydisp) fprintf(f," %lg",t->energy); - if (sw->reportpseudogenes) - if (pseudogene(t)) - fprintf(f," PS"); - fputc('\n',f); - length = (int)(t->stop - t->start) + 1; - sw->ldcds += length; } - else - { fprintf(f,"Not detected\n"); - fn++; }} - while (ld < nd) - { t = ts + sort[ld]; - fprintf(f,"* Not annotated "); - sprintf(anm,"%s %s",name(t,nm,1,sw),position(ps,t,sw)); - fprintf(f,"%-18s",anm); - if (sw->energydisp) fprintf(f," %lg",t->energy); - if (sw->reportpseudogenes) - if (pseudogene(t)) - fprintf(f," PS"); - fputc('\n',f); - fp++; - ld++; } - fprintf(f,"\nNumber of false negative CDS genes = %d\n",fn); - fprintf(f,"Number of false positive CDS genes = %d\n",fp); - fprintf(f,"\n\n"); - sw->nagene[CDS] += d->nagene[CDS]; - sw->nacdsfn += fn; - sw->nacdsfp += fp; } - sw->nabase += d->psmax; } - - -void disp_gene_set(data_set *d, int nt, csw *sw) -{ int i,j,n,a,vsort[NT],*sort; - char m[MATX][MATY],s[20]; - static char comp[3] = " c"; - gene *t; - FILE *f = sw->f; - if (nt <= NT) - sort = vsort; - else - { sort = (int *)malloc(nt*sizeof(int)); - if (sort == NULL) - { fprintf(stderr,"Not enough memory to sort genes\n"); - exit(1); }} - n = gene_sort(d,nt,sort,sw); - j = sw->tmrna_struct[54]; - for (i = 55; i <= 60; i++) j += sw->tmrna_struct[i]; - if (j != ((sw->tmrna_struct[0] << 4) + 9)) return; - if (sw->libflag < 2) - { if (n > 0) - for (j = 0; j < n;) - { i = sort[j++]; - t = ts + i; - t->energy = nenergy(t,sw); - switch(t->genetype) - { case tRNA: - init_matrix(m); - disp_gene(t,m,sw); - sprintf(s,"%d.",j); - xcopy(m,0,32,s,length(s)); - disp_matrix(f,m,MATY); - if (sw->matchacceptor) - if (iamatch(d,t,sw) == 0) - { fprintf(f," Iso-acceptor mismatch\n"); - sw->iamismatch++; } - if (sw->annotated) - if ((a = nearest_annotated_gene(d,t,1)) < 0) - { fprintf(f," Annotation false positive\n"); - if ((a = nearest_annotated_gene(d,t,0)) >= 0) - fprintf(f," Overlap with %s %c(%ld,%ld)\n", - d->gene[a].species,comp[d->gene[a].comp], - d->gene[a].start,d->gene[a].stop); - fputc('\n',f); } - overlap(d,sort,n,i,sw); - if (sw->seqdisp) disp_seq(f,t,sw); - if (t->nintron > 0) disp_intron(f,t,sw); - if (sw->energydisp > 1) trna_score(f,t); - break; - case tmRNA: - if (sw->secstructdisp == 1) - { init_matrix(m); - disp_gene(t,m,sw); - sprintf(s,"%d.",j); - xcopy(m,0,32,s,length(s)); - disp_matrix(f,m,MATY); } - else - { fprintf(f,"\n%d.\n",j); - disp_location(t,sw,"Location"); - if (sw->energydisp) - fprintf(f,"Score = %g\n",t->energy); } - overlap(d,sort,n,i,sw); - if (t->asst == 0) disp_tmrna_seq(f,t,sw); - else disp_tmrna_perm_seq(f,t,sw); - if (sw->energydisp > 1) tmrna_score(f,t,sw); - break; - case CDS: - fprintf(f,"\n%d.\nCDS gene\n",j); - disp_location(t,sw,"Location"); - overlap(d,sort,n,i,sw); - disp_cds(f,t,sw); - break; - } - if (sw->libflag > 0) write_to_library(f,t,sw); } - else - if (*(d->seqname) != '\0') - fprintf(f,"\nNothing found in %s\n\n\n",d->seqname); - else - fprintf(f,"\nNothing found\n\n\n"); } - else - { if (n > 0) - for (i = 0; i < n; i++) - write_to_library(f,ts + sort[i],sw); } - disp_energy_stats(d,nt,sw); - if (d->datatype == GENBANK) disp_match(d,sort,n,sw); - if (nt > NT) free((void *)sort); } - - -void batch_gene_set(data_set *d, int nt, csw *sw) -{ int i,j,n,vsort[NT],nspaces,caps,*sort; - gene *t; - FILE *f = sw->f; - if (nt <= NT) - sort = vsort; - else - { sort = (int *)malloc(nt*sizeof(int)); - if (sort == NULL) - { fprintf(stderr,"Not enough memory to sort genes\n"); - exit(1); }} - n = gene_sort(d,nt,sort,sw); - j = sw->tmrna_struct[54]; - for (i = 55; i <= 60; i++) j += sw->tmrna_struct[i]; - if (j != ((sw->tmrna_struct[0] << 4) + 9)) return; - if (sw->libflag < 2) - if (sw->batch >= 2) - { nspaces = (sw->batch & 0x4); - caps = (sw->batch & 0x10); - if (sw->batch & 0x8) - for (i = 0; i < n; i++) - disp_fasta_seq(f,ts + sort[i],d->ns+1,i+1,nspaces,caps,sw); - else - for (i = 0; i < n; i++) - disp_fasta_seq(f,ts + sort[i],0,0,nspaces,caps,sw); } - else - { fprintf(f,"%d genes found\n",n); - for (j = 0; j < n; j++) - { fprintf(f,"%-3d ",j+1); - t = ts + sort[j]; - t->energy = nenergy(t,sw); - switch(t->genetype) - { case tRNA: disp_batch_trna(f,t,sw); - break; - case tmRNA: disp_batch_tmrna(f,t,sw); - break; - case srpRNA:disp_batch_srprna(f,t,sw); - break; - case CDS: disp_batch_cds(f,t,sw); - break; - default: break; }}} - if (sw->libflag > 0) - { for (i = 0; i < n; i++) - write_to_library(f,ts + sort[i],sw); } - batch_energy_stats(d,nt,sw); - if (nt > NT) free((void *)sort); } - - -void remove_overlapping_trna(data_set *d, int nt, csw *sw) -{ int i,n,ioverlay; - long a,b,c,e,len,leni,overlap,psmax; - char s1[80],s2[80]; - gene *t,*ti; - static long proximity = 7*MINCTRNALEN/10; - psmax = d->psmax; - ioverlay = sw->ioverlay; - for (n = 0; n < nt; n++) - { t = ts + n; - if (t->genetype != tRNA) continue; - if (t->energy < 0.0) continue; - if (t->nintron <= 0) continue; - a = t->start; - b = t->stop; - if (b < a) b += psmax; - len = b - a; - for (i = 0; i < nt; i++) - { if (i == n) continue; - ti = ts + i; - if (ti->genetype != tRNA) continue; - if (ti->comp != t->comp) continue; - if (ti->energy < 0.0) continue; - c = ti->start; - e = ti->stop; - if (e < c) e += psmax; - leni = e - c; - if (ioverlay) - { if ((2*len) > (5*leni)) continue; - if ((2*leni) > (5*len)) continue; } - overlap = (a >= c)?((b >= e)?e-a:len):((b >= e)?len:b-c); - if (overlap >= proximity) - if (t->energy < ti->energy) - { if (sw->verbose) - { fprintf(stderr,"Removing %s at %s",name(t,s1,0,sw),position(s2,t,sw)); - if (sw->energydisp) fprintf(stderr," (%g)",nenergy(t,sw)); - fprintf(stderr,"\n"); } - t->energy = -1.0; - break; }}} - for (n = 0; n < (nt-1); n++) - { t = ts + n; - if (t->genetype != tRNA) continue; - if (t->energy < 0.0) continue; - a = t->start; - b = t->stop; - if (b < a) b += psmax; - len = b - a; - for (i = n + 1; i < nt; i++) - { ti = ts + i; - if (ti->genetype != tRNA) continue; - if (ti->comp != t->comp) continue; - if (ti->energy < 0.0) continue; - c = ti->start; - e = ti->stop; - if (e < c) e += psmax; - leni = e - c; - if (ioverlay) - { if ((2*len) > (5*leni)) continue; - if ((2*leni) > (5*len)) continue; } - overlap = (a >= c)?((b >= e)?e-a:len):((b >= e)?len:b-c); - if (overlap >= proximity) - if (t->energy < ti->energy) - { if (sw->verbose) - { fprintf(stderr,"Removing %s at %s",name(t,s1,0,sw),position(s2,t,sw)); - if (sw->energydisp) fprintf(stderr," (%g)",nenergy(t,sw)); - fprintf(stderr,"\n"); } - t->energy = -1.0; - break; } - else if (ti->energy < t->energy) - { if (sw->verbose) - { fprintf(stderr,"Removing %s at %s",name(ti,s1,0,sw),position(s2,ti,sw)); - if (sw->energydisp) fprintf(stderr," (%g)",nenergy(ti,sw)); - fprintf(stderr,"\n"); } - ti->energy = -1.0; }}}} - - - -void iopt_fastafile(data_set *d, csw *sw) -{ int i,nt,flag,len,aragorn,anticodon; - int *s,*sf,*se,*sc,*swrap; - int seq[2*LSEQ+WRAP+1],cseq[2*LSEQ+WRAP+1],wseq[2*WRAP+1]; - long gap,start,rewind,drewind,psmax,tmaxlen,vstart,vstop; - double sensitivity,sel1,sel2; - char c1,c2,c3; - static char trnatypename[3][25] = - { "Metazoan mitochondrial","Cytosolic","Mammalian mitochondrial" }; - static char genecodename[NGENECODE][50] = - { "composite Metazoan Mitochondrial", - "standard", - "Vertebrate Mitochondrial", - "Yeast Mitochondrial", - "Mold/Protozoan/Coelenterate Mitochondrial", - "Invertebrate Mitochondrial", - "Ciliate", - "deleted -> standard", - "deleted -> standard", - "Echinoderm/Flatworm Mitochondrial", - "Euplotid", - "Bacterial/Plant Chloroplast", - "Alternative Yeast", - "Ascidian Mitochondrial", - "Alternative Flatworm Mitochondrial", - "Blepharisma", - "Chlorophycean Mitochondrial", - "deleted -> standard", - "deleted -> standard", - "deleted -> standard", - "deleted -> standard", - "Trematode Mitochondrial", - "Scenedesmus obliquus Mitochondrial", - "Thraustochytrium Mitochondrial" }; - FILE *f = sw->f; - init_tmrna(f,sw); - aragorn = (sw->trna || sw->tmrna || sw->cds || sw->srprna); - fprintf(f,"\nPlease reference the following paper"); - if (aragorn && sw->mtrna) fputc('s',f); - fprintf(f," if you use this\n"); - fprintf(f,"program as part of any published research.\n\n"); - if (aragorn) - { fprintf(f,"Laslett, D. and Canback, B. (2004) ARAGORN, a\n"); - fprintf(f,"program for the detection of transfer RNA and\n"); - fprintf(f,"transfer-messenger RNA genes in nucleotide sequences.\n"); - fprintf(f,"Nucleic Acids Research, 32;11-16.\n\n"); } - if (sw->mtrna) - { fprintf(f,"Laslett, D. and Canback, B. (2008) ARWEN: a\n"); - fprintf(f,"program to detect tRNA genes in metazoan mitochondrial\n"); - fprintf(f,"nucleotide sequences\n"); - fprintf(f,"Bioinformatics, 24(2); 172-175.\n\n\n"); } - fputc('\n',f); - if (sw->mtrna) - { fprintf(f,"Searching for %s tRNA genes\n",trnatypename[sw->discrim]); - if (!sw->tvloop) - fprintf(f,"TV replacement loop tRNA genes not detected\n"); } - else - if (sw->trna) - { fprintf(f,"Searching for tRNA genes"); - if (sw->maxintronlen > 0) fprintf(f," with introns in anticodon loop"); - else fprintf(f," with no introns"); - fputc('\n',f); - if (sw->maxintronlen > 0) - { fprintf(f,"Intron length from %d to %d bases\n", - sw->minintronlen,sw->maxintronlen); - if (sw->ifixedpos) - { fprintf(f,"Intron position fixed between positions 37 and 38\n"); - fprintf(f,"on C-loop (one base after anticodon)\n"); } - if (sw->ioverlay) - fprintf(f,"Allowing overlay of long tRNA genes\n"); }} - if (sw->tmrna) - fprintf(f,"Searching for tmRNA genes\n"); - if (sw->linear) - fprintf(f,"Assuming linear topology, search will not wrap around ends\n"); - else - fprintf(f,"Assuming circular topology, search wraps around ends\n"); - if (sw->both == 2) - fprintf(f,"Searching both strands\n"); - else - if (sw->both == 1) - fprintf(f,"Searching complementary (antisense) strand only\n"); - else - fprintf(f,"Searching single (sense) strand only\n"); - if (sw->mtrna) - if (sw->mtcompov) - fprintf(f,"Reporting overlapping candidates on opposite strands\n"); - if ((sw->mtrna) || (sw->trna) || (sw->tmrna)) - { fprintf(f,"Using %s genetic code\n",genecodename[sw->geneticcode]); - if (sw->ngcmod > 0) - { fprintf(f,"Specified modifications:\n"); - for (i = 0; i < sw->ngcmod; i++) - { anticodon = sw->gcmod[i]; - c1 = cpbase(Thymine - (anticodon & 0x3)); - c2 = cpbase(Thymine - ((anticodon >> 2) & 0x3)); - c3 = cpbase(Thymine - ((anticodon >> 4) & 0x3)); - fprintf(f,"%c%c%c = %s\n",c1,c2,c3, - aaname[aamap[sw->geneticcode][anticodon]]); }}} - fputc('\n',f); - fputc('\n',f); - rewind = MAXTAGDIST + 20; - if (sw->trna | sw->mtrna) - { tmaxlen = MAXTRNALEN + sw->maxintronlen; - if (rewind < tmaxlen) rewind = tmaxlen; } - if (sw->tmrna) - if (rewind < MAXTMRNALEN) rewind = MAXTMRNALEN; - if (sw->peptide) - if (sw->tagthresh >= 5) - if (rewind < TSWEEP) rewind = TSWEEP; - sw->loffset = rewind; - sw->roffset = rewind; - drewind = 2*rewind; - d->ns = 0; - d->nextseq = 0L; - while (d->nextseq >= 0L) - { d->seqstart = d->nextseq; - if (!seq_init(d,sw)) break; - psmax = d->psmax; - if (sw->verbose) - { fprintf(stderr,"%s\n",d->seqname); - fprintf(stderr,"%ld nucleotides in sequence\n",psmax); - fprintf(stderr,"Mean G+C content = %2.1f%%\n",100.0*d->gc); - if ((sw->mtrna) || (sw->trna) || (sw->tmrna)) - { fprintf(stderr,"Using %s genetic code\n",genecodename[sw->geneticcode]); - if (sw->ngcmod > 0) - { fprintf(stderr,"Specified modifications:\n"); - for (i = 0; i < sw->ngcmod; i++) - { anticodon = sw->gcmod[i]; - c1 = cpbase(Thymine - (anticodon & 0x3)); - c2 = cpbase(Thymine - ((anticodon >> 2) & 0x3)); - c3 = cpbase(Thymine - ((anticodon >> 4) & 0x3)); - fprintf(stderr,"%c%c%c = %s\n",c1,c2,c3, - aaname[aamap[sw->geneticcode][anticodon]]); }}}} - fprintf(f,"%s\n",d->seqname); - fprintf(f,"%ld nucleotides in sequence\n",psmax); - fprintf(f,"Mean G+C content = %2.1f%%\n",100.0*d->gc); - init_gene(0,NT); - nt = 0; - flag = 0; - start = 1L; - se = seq; - if (sw->linear) - { for (i = 0; i < rewind; i++) *se++ = NOBASE; - start -= rewind; } - else - { if (psmax <= drewind) - { gap = drewind - psmax; - sc = se + gap; - while (se < sc) *se++ = NOBASE; - swrap = wseq; - sc = se + psmax; - while (se < sc) - { *se = move_forward(d); - *swrap++ = *se++; } - sc = swrap + gap; - while (swrap < sc) *swrap++ = NOBASE; - swrap = wseq; - sc = swrap + psmax; - while (swrap < sc) *se++ = *swrap++; - swrap = wseq; - sc = swrap + drewind; - while (swrap < sc) *se++ = *swrap++; - sw->loffset = drewind; - sw->roffset = drewind; - start -= drewind; - flag = 1; - goto SH; } - else - { swrap = wseq; - sc = seq + drewind; - while (se < sc) - { *se = move_forward(d); - *swrap++ = *se++; }}} - sc = seq + LSEQ; - NX: - while (se < sc) - { if (d->ps >= psmax) - { if (sw->linear) - for (i = 0; i < rewind; i++) *se++ = NOBASE; - else - { sc = wseq + drewind; - swrap = wseq; - while (swrap < sc) *se++ = *swrap++; } - flag = 1; - break; } - else *se++ = move_forward(d); } - SH: - len = (int)(se - seq); - if (sw->verbose) - { vstart = sq(start + sw->loffset); - vstop = sq(start + len - sw->roffset - 1); - if (vstop < vstart) - { fprintf(stderr,"Searching from %ld to %ld\n",vstart,psmax); - fprintf(stderr,"Searching from 1 to %ld\n",vstop); } - else - fprintf(stderr,"Searching from %ld to %ld\n",vstart,vstop); } - if (sw->both != 1) - { sw->start = start; - sw->comp = 0; - nt = tmioptimise(d,seq,len,nt,sw); } - if (sw->both > 0) - { sense_switch(seq,cseq,len); - sw->start = start+len; - sw->comp = 1; - nt = tmioptimise(d,cseq,len,nt,sw); } - if (!flag) - { s = seq; - sf = se - drewind; - se = seq + drewind; - while (s < se) *s++ = *sf++; - start += len - drewind; - goto NX; } - if (sw->maxintronlen > 0) remove_overlapping_trna(d,nt,sw); - disp_gene_set(d,nt,sw); - if (sw->verbose) fprintf(stderr,"%s\nSearch Finished\n\n",d->seqname); - d->ns++; } - if (d->ns > 1) - { fprintf(f,"\n\n%d sequences searched\n",d->ns); - if (sw->trna | sw->mtrna) - { fprintf(f,"Total tRNA genes = %d\n",sw->ngene[tRNA]); - if (sw->matchacceptor) - fprintf(f,"Total iso-acceptor mismatches = %d\n",sw->iamismatch); } - if (sw->tmrna) fprintf(f,"Total tmRNA genes = %d\n",sw->ngene[tmRNA]); - if (sw->reportpseudogenes) - if (sw->nps > 0) - fprintf(f,"Total number of possible pseudogenes = %d\n",sw->nps); - if (sw->annotated) - { if (sw->trna | sw->mtrna) - { fprintf(f,"\nTotal number of annotated tRNA genes = %d\n", - sw->nagene[tRNA]); - fprintf(f,"Total number of annotated false negatives = %d\n",sw->natfn); - fprintf(f,"Total number of annotated false positives = %d\n",sw->natfp); - fprintf(f,"Total number of annotated DRL false positives = %d\n", - sw->natfpd); - fprintf(f,"Total number of annotated TVRL false positives = %d\n", - sw->natfptv); - fprintf(f,"Total annotated sequence length = %ld bases\n",sw->nabase); - sensitivity = (sw->nagene[tRNA] > 0)? - 100.0*(double)(sw->nagene[tRNA] - sw->natfn)/ - (double)sw->nagene[tRNA]:0.0; - sel1 = (sw->nagene[tRNA] > 0)? - 100.0*(double)(sw->natfp)/ - (double)sw->nagene[tRNA]:0.0; - sel2 = (sw->nabase > 0)? - 1000000.0*(double)(sw->natfp)/ - (double)sw->nabase:0.0; - fprintf(f,"Sensitivity = %lg%%\n",sensitivity); - fprintf(f,"Selectivity = %lg%% or %lg per Megabase\n\n",sel1,sel2); } - if (sw->cds) - { fprintf(f,"\nTotal number of annotated CDS genes = %d\n", - sw->nagene[CDS]); - fprintf(f,"Total number of annotated false negatives = %d\n",sw->nacdsfn); - fprintf(f,"Total number of annotated false positives = %d\n",sw->nacdsfp); - fprintf(f,"Total annotated sequence length = %ld bases\n",sw->nabase); - sensitivity = (sw->nagene[CDS] > 0)? - 100.0*(double)(sw->nagene[CDS] - sw->nacdsfn)/ - (double)sw->nagene[CDS]:0.0; - sel1 = (sw->nagene[CDS] > 0)? - 100.0*(double)(sw->nacdsfp)/ - (double)sw->nagene[CDS]:0.0; - sel2 = (sw->nabase > 0)? - 1000000.0*(double)(sw->nacdsfp)/ - (double)sw->nabase:0.0; - fprintf(f,"Sensitivity = %lg%%\n",sensitivity); - fprintf(f,"Selectivity = %lg%% or %lg per Megabase\n",sel1,sel2); - sensitivity = (sw->lacds > 0)? - 100.0*(double)sw->ldcds/(double)sw->lacds:0.0; - fprintf(f,"Length sensitivity = %lg%%\n\n",sensitivity); } - } } - } - - -void bopt_fastafile(data_set *d, csw *sw) -{ int i,nt,flag,len; - int *s,*sf,*se,*sc,*swrap; - int seq[2*LSEQ+WRAP+1],cseq[2*LSEQ+WRAP+1],wseq[2*WRAP+1]; - long gap,start,rewind,drewind,psmax,tmaxlen,vstart,vstop; - FILE *f = sw->f; - rewind = MAXTAGDIST + 20; - if (sw->trna | sw->mtrna) - { tmaxlen = MAXTRNALEN + sw->maxintronlen; - if (rewind < tmaxlen) rewind = tmaxlen; } - if (sw->tmrna) - if (rewind < MAXTMRNALEN) rewind = MAXTMRNALEN; - if (sw->peptide) - if (sw->tagthresh >= 5) - if (rewind < TSWEEP) rewind = TSWEEP; - sw->loffset = rewind; - sw->roffset = rewind; - drewind = 2*rewind; - d->ns = 0; - d->nextseq = 0L; - while (d->nextseq >= 0L) - { d->seqstart = d->nextseq; - if (!seq_init(d,sw)) break; - psmax = d->psmax; - if (sw->verbose) - { fprintf(stderr,"%s\n",d->seqname); - fprintf(stderr,"%ld nucleotides in sequence\n",psmax); - fprintf(stderr,"Mean G+C content = %2.1f%%\n",100.0*d->gc); } - if (sw->batch < 2) fprintf(f,">%s\n",d->seqname); - init_gene(0,NT); - nt = 0; - flag = 0; - start = 1L; - se = seq; - if (sw->linear) - { for (i = 0; i < rewind; i++) *se++ = NOBASE; - start -= rewind; } - else - { if (psmax <= drewind) - { gap = drewind - psmax; - sc = se + gap; - while (se < sc) *se++ = NOBASE; - swrap = wseq; - sc = se + psmax; - while (se < sc) - { *se = move_forward(d); - *swrap++ = *se++; } - sc = swrap + gap; - while (swrap < sc) *swrap++ = NOBASE; - swrap = wseq; - sc = swrap + psmax; - while (swrap < sc) *se++ = *swrap++; - swrap = wseq; - sc = swrap + drewind; - while (swrap < sc) *se++ = *swrap++; - sw->loffset = drewind; - sw->roffset = drewind; - start -= drewind; - flag = 1; - goto SH; } - else - { swrap = wseq; - sc = seq + drewind; - while (se < sc) - { *se = move_forward(d); - *swrap++ = *se++; }}} - sc = seq + LSEQ; - NX: - while (se < sc) - { *se++ = move_forward(d); - if (d->ps >= psmax) - { if (sw->linear) - for (i = 0; i < rewind; i++) *se++ = NOBASE; - else - { sc = wseq + drewind; - swrap = wseq; - while (swrap < sc) *se++ = *swrap++; } - flag = 1; - break; }} - SH: - len = (int)(se - seq); - if (sw->verbose) - { vstart = sq(start + sw->loffset); - vstop = sq(start + len - sw->roffset - 1); - if (vstop < vstart) - { fprintf(stderr,"Searching from %ld to %ld\n",vstart,psmax); - fprintf(stderr,"Searching from 1 to %ld\n",vstop); } - else - fprintf(stderr,"Searching from %ld to %ld\n",vstart,vstop); } - if (sw->both != 1) - { sw->start = start; - sw->comp = 0; - nt = tmioptimise(d,seq,len,nt,sw); } - if (sw->both > 0) - { sense_switch(seq,cseq,len); - sw->start = start+len; - sw->comp = 1; - nt = tmioptimise(d,cseq,len,nt,sw); } - if (!flag) - { s = seq; - sf = se - drewind; - se = seq + drewind; - while (s < se) *s++ = *sf++; - start += len - drewind; - goto NX; } - if (sw->maxintronlen > 0) remove_overlapping_trna(d,nt,sw); - batch_gene_set(d,nt,sw); - if (sw->verbose) fprintf(stderr,"%s\nSearch Finished\n\n",d->seqname); - d->ns++; } - if ((d->ns > 1) && (sw->batch < 2)) - { fprintf(f,">end \t%d sequences",d->ns); - if (sw->trna || sw->mtrna) fprintf(f," %d tRNA genes",sw->ngene[tRNA]); - if (sw->tmrna) fprintf(f," %d tmRNA genes",sw->ngene[tmRNA]); - fputc('\n',f); } } - - -void aragorn_help_menu() -{ printf("\n"); - printf("----------------------------\n"); - printf("ARAGORN v1.2.36 Dean Laslett\n"); - printf("----------------------------\n"); - printf("\n"); - printf("Please reference the following papers if you use this\n"); - printf("program as part of any published research.\n\n"); - printf("Laslett, D. and Canback, B. (2004) ARAGORN, a\n"); - printf("program for the detection of transfer RNA and transfer-messenger\n"); - printf("RNA genes in nucleotide sequences\n"); - printf("Nucleic Acids Research, 32;11-16\n\n"); - printf("Laslett, D. and Canback, B. (2008) ARWEN: a\n"); - printf("program to detect tRNA genes in metazoan mitochondrial\n"); - printf("nucleotide sequences\n"); - printf("Bioinformatics, 24(2); 172-175.\n\n\n"); - printf("ARAGORN detects tRNA, mtRNA, and tmRNA genes.\n"); - printf("\n"); - printf("Usage:\n"); - printf("aragorn -v -s -d -c -l -a -w -j -ifro, -t -mt -m"); - printf(" -tv -gc -seq -br -fasta -fo -o \n\n"); - printf(" is assumed to contain one or more sequences\n"); - printf("in FASTA format. Results of the search are printed to\n"); - printf("STDOUT. All switches are optional and case-insensitive.\n"); - printf("Unless -i is specified, tRNA genes containing introns\n"); - printf("are not detected. \n"); - printf("\n"); - printf(" -m Search for tmRNA genes.\n"); - printf(" -t Search for tRNA genes.\n"); - printf(" By default, both are detected. If one of\n"); - printf(" -m or -t is specified, then the other\n"); - printf(" is not detected unless specified as well.\n"); - printf(" -mt Search for Metazoan mitochondrial tRNA\n"); - printf(" genes. -i switch ignored. Composite\n"); - printf(" Metazoan mitochondrial genetic code used.\n"); - printf(" -mtmam Search for Mammalian mitochondrial tRNA\n"); - printf(" genes. -i switch ignored. -tv switch set.\n"); - printf(" Mammalian mitochondrial genetic code used.\n"); - printf(" -mtx Same as -mt but low scoring tRNA genes are\n"); - printf(" not reported.\n"); - printf(" -gc Use the GenBank transl_table = genetic code.\n"); - printf(" -gcstd Use standard genetic code.\n"); - printf(" -gcmet Use composite Metazoan mitochondrial genetic code.\n"); - printf(" -gcvert Use Vertebrate mitochondrial genetic code.\n"); - printf(" -gcinvert Use Invertebrate mitochondrial genetic code.\n"); - printf(" -gcyeast Use Yeast mitochondrial genetic code.\n"); - printf(" -gcprot Use Mold/Protozoan/Coelenterate"); - printf(" mitochondrial genetic code.\n"); - printf(" -gcciliate Use Ciliate genetic code.\n"); - printf(" -gcflatworm Use Echinoderm/Flatworm mitochondrial genetic code.\n"); - printf(" -gceuplot Use Euplotid genetic code.\n"); - printf(" -gcbact Use Bacterial/Plant Chloroplast genetic code.\n"); - printf(" -gcaltyeast Use alternative Yeast genetic code.\n"); - printf(" -gcascid Use Ascidian Mitochondrial genetic code.\n"); - printf(" -gcaltflat Use alternative Flatworm Mitochondrial genetic code.\n"); - printf(" -gcblep Use Blepharisma genetic code.\n"); - printf(" -gcchloroph Use Chlorophycean Mitochondrial genetic code.\n"); - printf(" -gctrem Use Trematode Mitochondrial genetic code.\n"); - printf(" -gcscen Use Scenedesmus obliquus Mitochondrial genetic code.\n"); - printf(" -gcthraust Use Thraustochytrium Mitochondrial genetic code.\n"); - printf(" Individual modifications can be appended using\n"); - printf(" ,BBB= B = A,C,G, or T. is the three letter\n"); - printf(" code for an amino-acid. More than one modification\n"); - printf(" can be specified. eg -gcvert,aga=Trp,agg=Trp uses\n"); - printf(" the Vertebrate Mitochondrial code and the codons\n"); - printf(" AGA and AGG changed to Tryptophan.\n"); - printf(" -tv Do not search for mitochondrial "); - printf("TV replacement\n"); - printf(" loop tRNA genes. Only relevant if -mt used. \n"); - printf(" -i Search for tRNA genes with introns in\n"); - printf(" anticodon loop with maximum length %d\n", - MAXINTRONLEN); - printf(" bases. Minimum intron length is 0 bases.\n"); - printf(" Ignored if -m is specified.\n"); - printf(" -i Search for tRNA genes with introns in\n"); - printf(" anticodon loop with maximum length \n"); - printf(" bases. Minimum intron length is 0 bases.\n"); - printf(" Ignored if -m is specified.\n"); - printf(" -i, Search for tRNA genes with introns in\n"); - printf(" anticodon loop with maximum length \n"); - printf(" bases, and minimum length bases.\n"); - printf(" Ignored if -m is specified.\n"); - printf(" -io Same as -i, but allow tRNA genes with long\n"); - printf(" introns to overlap shorter tRNA genes.\n"); - printf(" -if Same as -i, but fix intron between positions\n"); - printf(" 37 and 38 on C-loop (one base after anticodon).\n"); - printf(" -ifo Same as -if and -io combined.\n"); - printf(" -ir Same as -i, but search for tRNA genes with minimum intron\n"); - printf(" length 0 bases, and only report tRNA genes with minimum\n"); - printf(" intron length bases.\n"); - printf(" -c Assume that each sequence has a circular\n"); - printf(" topology. Search wraps around each end.\n"); - printf(" Default setting.\n"); - printf(" -l Assume that each sequence has a linear\n"); - printf(" topology. Search does not wrap.\n"); - printf(" -d Double. Search both strands of each\n"); - printf(" sequence. Default setting.\n"); - printf(" -s or -s+ Single. Do not search the complementary\n"); - printf(" (antisense) strand of each sequence.\n"); - printf(" -sc or -s- Single complementary. Do not search the sense\n"); - printf(" strand of each sequence.\n"); - printf(" -ss Use the stricter canonical 1-2 bp spacer1 and\n"); - printf(" 1 bp spacer2. Ignored if -mt set. Default is to\n"); - printf(" allow 3 bp spacer1 and 0-2 bp spacer2, which may\n"); - printf(" degrade selectivity.\n"); - printf(" -ps Lower scoring thresholds to 95%% of default levels.\n"); - printf(" -ps Change scoring thresholds to percent of default levels.\n"); - printf(" -rp Flag possible pseudogenes (score < 100 or tRNA anticodon\n"); - printf(" loop <> 7 bases long). Note that genes with score < 100\n"); - printf(" will not be detected or flagged if scoring thresholds are not\n"); - printf(" also changed to below 100%% (see -ps switch).\n"); - printf(" -seq Print out primary sequence.\n"); - printf(" -br Show secondary structure of tRNA gene primary\n"); - printf(" sequence with round brackets.\n"); - printf(" -fasta Print out primary sequence in fasta format.\n"); - printf(" -fo Print out primary sequence in fasta format only\n"); - printf(" (no secondary structure).\n"); - printf(" -fon Same as -fo, with sequence and gene numbering in header.\n"); - printf(" -fos Same as -fo, with no spaces in header.\n"); - printf(" -fons Same as -fo, with sequence and gene numbering, but no spaces.\n"); - printf(" -j Display 4-base sequence on 3' end of astem\n"); - printf(" regardless of predicted amino-acyl acceptor\n"); - printf(" length.\n"); - printf(" -jr Allow some divergence of 3' "); - printf("amino-acyl acceptor\n"); - printf(" sequence from NCCA.\n"); - printf(" -jr4 Allow some divergence of 3' "); - printf("amino-acyl acceptor\n"); - printf(" sequence from NCCA, and display 4 bases.\n"); - printf(" -v Verbose. Prints out search progress\n"); - printf(" to STDERR.\n"); - printf(" -a Print out tRNA domain for tmRNA genes\n"); - printf(" -o print output into . If \n"); - printf(" exists, it is overwritten.\n"); - printf(" By default, output goes to STDOUT.\n"); - printf(" -w Print out genes in batch mode.\n"); - printf(" For tRNA genes, output is in the form:\n\n"); - printf(" Sequence name\n"); - printf(" N genes found\n"); - printf(" 1 tRNA- [locus 1]"); - printf(" (nnn)\n"); - printf(" i(,)\n"); - printf(" . \n"); - printf(" . \n"); - printf(" N tRNA- [Locus N]"); - printf(" (nnn)\n"); - printf(" i(,)\n"); - printf("\n N is the number of genes found\n"); - printf(" is the tRNA iso-acceptor species\n"); - printf(" is the tRNA anticodon "); - printf("relative position\n"); - printf(" (nnn) is the tRNA anticodon base triplet\n"); - printf(" i means the tRNA gene has a C-loop intron\n"); - printf("\n For tmRNA genes, output is in the form:\n"); - printf("\n n tmRNA(p) [Locus n] ,"); - printf("\n"); - printf(" \n\n"); - printf(" p means the tmRNA gene is permuted\n"); - printf("\n\n"); } - -void error_report(int n, char *s) -{ switch(n) - { case 0: fprintf(stderr, - "-%s not recognised, type aragorn -h for help\n",s); - break; - case 1: fprintf(stderr, - "-%s not understood, type aragorn -h for help\n",s); - break; - case 2: fprintf(stderr,"Could not open %s\n",s); - break; - case 3: fprintf(stderr, - "No sequence file specified, type aragorn -h for help\n"); - break; - case 4: fprintf(stderr,"Don't know genetic code %s\n",s); - break; - case 5: fprintf(stderr,"Too many genetic code modifications (max=%d)\n", - MAXGCMOD); - break; - default: break; } - exit(0); } - - -void process_genecode_switch(char *s, csw *sw) -{ int i,m,lmax,len[NGENECODE],anticodon,b[3]; - long l; - char c,*ss,*se; - static char genecodetag[NGENECODE][10] = - { "MET", - "STD","VERT","YEAST","PROT","INVERT", - "CILIATE","DELETED","DELETED","FLATWORM","EUPLOT", - "BACT","ALTYEAST","ASCID","ALTFLAT","BLEP", - "CHLOROPH","DELETED","DELETED","DELETED","DELETED", - "TREM","SCEN","THRAUST" }; - sw->geneticcode = STANDARD; - sw->gcfix = 1; - c = *s; - if (c >= '0') - if (c <= '9') - { lconvert(s,&l); - i = (int)l; - if ((i >= 0) && (i < NGENECODE)) sw->geneticcode = i; - goto MOD; } - for (i = 0; i < NGENECODE; i++) - { len[i] = 0; - ss = s; - se = genecodetag[i]; - while (c == *ss++) - { if (upcasec(c) != *se++) break; - len[i]++; }} - m = -1; - lmax = 0; - i = -1; - while (++i < NGENECODE) - if (len[i] > lmax) - { m = i; - lmax = len[i]; } - if (m >= 0) sw->geneticcode = m; - else error_report(4,s); - MOD: - sw->ngcmod = 0; - ss = s; - while (ss = strpos(ss,",")) - { if (sw->ngcmod >= MAXGCMOD) error_report(5,NULL); - ss++; - for (i = 0; i < 3; i++) - { b[i] = Adenine; - c = upcasec(ss[i]); - if (c == 'C') b[i] = Cytosine; - if (c == 'G') b[i] = Guanine; - if (c == 'T') b[i] = Thymine; - if (c == 'U') b[i] = Thymine; } - anticodon = ((Thymine - b[2])<<4) + ((Thymine - b[1])<<2) + (Thymine - b[0]); - if (!(se = strpos(ss,"="))) break; - se++; - for (i = 0; i < NAMINOACID; i++) - if (upcasec(se[0]) == upcasec(aaname[i][0])) - if (upcasec(se[1]) == upcasec(aaname[i][1])) - if (upcasec(se[2]) == upcasec(aaname[i][2])) - { aamap[sw->geneticcode][anticodon] = i; - sw->gcmod[sw->ngcmod] = anticodon; - sw->ngcmod++; - break; } }} - - - -void change_thresholds(csw *sw, double psthresh) -{ -sw->cdsthresh *= psthresh; -sw->srpthresh *= psthresh; -sw->tmrnathresh *= psthresh; -sw->mtdtthresh *= psthresh; -sw->mttthresh *= psthresh; -sw->mtdthresh *= psthresh; -sw->trnathresh *= psthresh; -} - - -int main(int z, char *v[]) -{ int i,lv,filecounter; - long l; - double psthresh; - char c1,c2,c3,c4,*s; - data_set d; - static csw sw = - { NULL,0,0,0,0,0,0,0,1,0,0, - STANDARD,0,{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},0,METAZOAN_MT, - 1,0,5,5,1,0,0,0,2,0,0,0,0,0,0,3,0,2,1,1,0,0,0,0,0,0,0,0,1, - 0,0,0,0,0,0,0,{0,0,0,0,0},0,0,{0,0,0,0,0},0,0,0,0,0,0,0,0,0L, - tRNAthresh,4.0,29.0,26.0,7.5,8.0, - mtRNAtthresh,mtRNAdthresh,mtRNAdtthresh,-7.9,-6.0, - tmRNAthresh,14.0,10.0,25.0,9.0,srpRNAthresh,CDSthresh, - {tRNAthresh,tmRNAthresh,srpRNAthresh,0.0,CDSthresh }, - { 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, - 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, - 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, - 10, 65, 82, 65, 71, 79, 82, 78, 32, - 118, 49, 46, 50, 46, 51, 54, 32, 32, 32, - 68, 101, 97,110, 32, 76, 97, 115, 108, - 101, 116, 116, 10, - 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, - 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, - 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, - 10, TERM }}; - sw.f = stdout; - filecounter = 0; - i = 0; - while (++i < z) - if (*(v[i]) == '-') - { lv = length(v[i]); - if (lv < 2) continue; - s = v[i] + 1; - c1 = upcasec(*s); - c2 = (lv > 2)?upcasec(s[1]):' '; - c3 = (lv > 3)?upcasec(s[2]):' '; - c4 = (lv > 4)?upcasec(s[3]):' '; - switch(c1) - { case 'E': sw.energydisp = (c2 == 'S')?2:1; - break; - case 'A': if (c2 == '7') sw.extastem = 0; - else - if (c2 == 'A') sw.matchacceptor = 1; - else sw.secstructdisp = 1; - break; - case 'B': if (c2 == 'R') sw.seqdisp = 2; - else sw.libflag = 1; - break; - case 'X': sw.libflag = 2; - break; - case 'W': if (sw.batch < 1) sw.batch = 1; - break; - case 'V': sw.verbose = 1; - break; - case 'S': if (c2 == 'S') - { sw.sp1max = 2; - sw.sp2min = 1; - sw.sp2max = 1; - break; } - if (c2 == 'E') - { if (sw.seqdisp < 1) sw.seqdisp = 1; - break; } - if ((c2 == 'C') || (c2 == '-')) - { sw.both = 1; - break; } - sw.both = 0; - break; - case 'F': if (softstrpos(s,"O")) - { sw.batch = 2; - if (softstrpos(s,"S")) sw.batch |= 0x4; - if (softstrpos(s,"N")) sw.batch |= 0x8; - if (softstrpos(s,"C")) sw.batch |= 0x10; } - else - { if (softstrpos(s,"C")) sw.seqdisp = 4; - else sw.seqdisp = 3; } - break; - case 'D': sw.both = 2; - break; - case 'L': sw.linear = 1; - break; - case 'C': if (c2 == '7') sw.cloop7 = 1; - else sw.linear = 0; - break; - case 'J': if (lv > 2) - { if (c2 == 'R') sw.aataildiv = 1; - if (c3 == '4') sw.aataildisp = 1; } - else sw.aataildisp = 1; - break; - case '1': sw.minintronlen = 10; - break; - case 'I': if (c2 == 'O') { sw.ioverlay = 1; s++; lv--; } - else if (c2 == 'F') { sw.ifixedpos = 1; s++; lv--; } - else if (c2 == 'R') { sw.ireportminintronlen = 1; s++; lv--; } - if (c3 == 'O') { sw.ioverlay = 1; s++; lv--; } - else if (c3 == 'F') { sw.ifixedpos = 1; s++; lv--; } - else if (c3 == 'R') { sw.ireportminintronlen = 1; s++; lv--; } - if (c4 == 'O') { sw.ioverlay = 1; s++; lv--; } - else if (c4 == 'F') { sw.ifixedpos = 1; s++; lv--; } - else if (c4 == 'R') { sw.ireportminintronlen = 1; s++; lv--; } - if (lv > 2) s = lconvert(s+1,&l); - else goto IMAX; - if (*s == ',') - { if (sw.ireportminintronlen == 1) - sw.minintronlenreport = (int)l; - else - sw.minintronlen = (int)l; - lconvert(s+1,&l); - sw.maxintronlen = (int)l; } - else sw.maxintronlen = (int)l; - if (sw.maxintronlen > (LSEQ - MAXTRNALEN)) - sw.maxintronlen = (LSEQ - MAXTRNALEN); - if (sw.maxintronlen > MAXINTRONLEN) - sw.maxintronlen = MAXINTRONLEN; - if ((sw.minintronlen < 0) || - (sw.maxintronlen < sw.minintronlen)) - error_report(1,v[i]); - if ((sw.minintronlenreport < 0) || - (sw.maxintronlen < sw.minintronlenreport)) - error_report(1,v[i]); - break; - IMAX: - sw.maxintronlen = MAXINTRONLEN; - break; - case 'T': if (c2 == 'V') - { sw.tvloop = 0; - break; } - sw.trna = 1; - if (lv > 2) - { s = dconvert(s+1,&sw.trnathresh); - if (*s == ',') dconvert(s+1,&sw.ttarmthresh); } - break; - case 'M': if (c2 == 'T') - { sw.mtrna = 1; - if (!sw.gcfix) sw.geneticcode = METAZOAN_MT; - if (lv > 3) - { s += 2; - c3 = upcasec(*s); - if (c3 == 'M') - { do c3 = upcasec(*++s); - while ((c3 == 'A') || (c3 == 'M') - || (c3 == 'L')); - sw.tvloop = 0; - sw.geneticcode = VERTEBRATE_MT; - sw.discrim = MAMMAL_MT; } - MTNXTC: - if (c3 == 'X') - { c3 = upcasec(*++s); - sw.mtxdetect = 0; - goto MTNXTC; } - if (c3 == 'C') - { c3 = upcasec(*++s); - sw.mtcdsscan = 0; - goto MTNXTC; } - if (c3 == 'D') - { c3 = upcasec(*++s); - sw.mtcompov = 1; - goto MTNXTC; } - if (c3 != '-') - if (c3 != '.') - if ((c3 < '0') || (c3 > '9')) - break; - s = dconvert(s,&sw.mtdtthresh); - if (*s == ',') s = dconvert(s+1,&sw.mttthresh); - if (*s == ',') s = dconvert(s+1,&sw.mtdthresh); - if (*s == ',') s = dconvert(s+1,&sw.mttarmthresh); - if (*s == ',') dconvert(s+1,&sw.mtdarmthresh); }} - else - { sw.tmrna = 1; - if (lv > 2) - dconvert(s+1,&sw.tmrnathresh); } - break; - case 'P': if (c2 == 'S') - { if (c3 != '-') - if (c3 != '.') - if ((c3 < '0') || (c3 > '9')) - { change_thresholds(&sw,PSEUDOGENElevel); - break; } - psthresh = 1.0; - dconvert(s+2,&psthresh); - change_thresholds(&sw,psthresh); - break; } - break; - case 'G': if (c2 != 'C') break; - process_genecode_switch(s+2,&sw); - break; - case 'R': if (c2 == 'N') sw.repeatsn = 1; - else - if (c2 == 'P') sw.reportpseudogenes = 1; - else sw.tmstrict = 0; - break; - case 'Q': sw.showconfig = 0; - break; - case 'H': aragorn_help_menu(); - exit(0); - case 'O': if (lv > 2) - s++; - else - { if (++i >= z) break; - s = v[i]; } - sw.f = fopen(s,"w"); - if (!sw.f) error_report(2,s); - break; - default: error_report(0,s); }} - else - if (filecounter < 1) - { d.f = fopen(v[i],"r"); - if (d.f) - filecounter++; - else - error_report(2,v[i]); } - else - if (filecounter < 2) - { sw.f = fopen(v[i],"w"); - if (!sw.f) error_report(2,v[i]); - filecounter++; } - else - error_report(0,v[i]); - if (filecounter < 1) - error_report(3,NULL); - if ((!sw.trna) & (!sw.tmrna)) - { sw.trna = 1; - sw.tmrna = 1; } - if (sw.mtrna) sw.trna = 0; - ts = (gene *)malloc(NT*sizeof(gene)); - if (ts == NULL) - { fprintf(stderr,"Not enough memory available to store detected genes\n"); - exit(1); } - sw.genespace = NT; - if (sw.libflag) fprintf(sw.f,"Library\n"); - if (sw.batch) bopt_fastafile(&d,&sw); - else iopt_fastafile(&d,&sw); - free((void *)ts); - fclose(d.f); - if (!sw.batch && sw.showconfig) - { fprintf(sw.f,"Configuration: "); - i = -1; - while (++i < z) fprintf(sw.f,"%s ",v[i]); - fputc('\n',sw.f); } - if (sw.f != stdout) fclose(sw.f); - return(0); } -