cds/tools/chlorodb added
Former-commit-id: 0579e878a69b7c285ca71870e9ca5730649a2fda Former-commit-id: 7cced5b488441d87bf070a9a444317db0e048880
This commit is contained in:
@ -1,21 +1,21 @@
|
||||
#!/bin/csh -f
|
||||
#
|
||||
# Annotate CDS - Pass1
|
||||
# Annotate CDS - Exonerate
|
||||
#
|
||||
#========================================================================================
|
||||
#
|
||||
# Annotate CDS of chlorodb/core proteins using exonerate
|
||||
# Annotate CDS using exonerate
|
||||
#
|
||||
# pass1.sh <FASTAFILE> <FAMILY> [<OUTDIR>]
|
||||
# do_exonerate.sh <FASTAGENOM> <FASTAPROT> [<OUTDIR>]
|
||||
#
|
||||
# - <FASTAFILE> : The fasta file containing the genome to annotate
|
||||
# - <FAMILY> : Name of the protein family (defined in chlorodb/core)
|
||||
# - <FASTAGENOM> : The fasta file containing the genome to annotate
|
||||
# - <FASTAPROT> : The fasta file containing the protein family
|
||||
#
|
||||
# Results are in file : `basename <FASTAFILE>:r`.<FAMILY>.res
|
||||
# Results are in file : `basename <FASTAGENOM>:r`.`basename <FASTAPROT>:r`.res
|
||||
#
|
||||
#========================================================================================
|
||||
#
|
||||
# usage: go_pass1.sh fasta family [outdir]
|
||||
# usage: do_exonerate.sh dna.fasta prot.fasta [outdir]
|
||||
#
|
||||
unsetenv ORG_SOURCED
|
||||
|
||||
@ -31,12 +31,14 @@ NeedArg 2
|
||||
|
||||
set GenoFile = $Argv[1]
|
||||
set GenoName = `basename $GenoFile:r`
|
||||
set ProtName = $Argv[2]
|
||||
set ProtDir = $CDS_DATA_DIR/chlorodb/core
|
||||
set ProtFile = $ProtDir/$ProtName.fst
|
||||
|
||||
set ProtFile = $Argv[2]
|
||||
set ProtDir = `dirname $ProtFile`
|
||||
set ProtName = `basename $ProtFile:r`
|
||||
|
||||
NeedFile $GenoFile
|
||||
NeedFile $ProtFile
|
||||
NeedFile $ProtDir/Annot.lst
|
||||
|
||||
set OutDir = .
|
||||
if ($#Argv >= 3) set OutDir = $3
|
||||
@ -101,7 +103,7 @@ endif
|
||||
|
||||
if ($PASS1_SPEEDUP != 0) then
|
||||
|
||||
$PROG_DIR/go_filterbx.sh $GenoFile $ProtFile \
|
||||
$PROG_DIR/do_filterbx.sh $GenoFile $ProtFile \
|
||||
$PASS1_BLASTX_FILTER_IDMIN \
|
||||
$PASS1_BLASTX_FILTER_NBMIN \
|
||||
$PASS1_BLASTX_FILTER_NBMAX > D_$$
|
||||
@ -159,8 +161,7 @@ $AwkCmd -v MAX_SPAN=$PASS1_MAX_SPAN \
|
||||
# get annotations
|
||||
#
|
||||
|
||||
egrep "^$ProtName " $CDS_DATA_DIR/chlorodb/core/Annot.lst |\
|
||||
awk '{print "c annot", $0}' > T_$$
|
||||
egrep "^$ProtName " $ProtDir/Annot.lst | awk '{print "c annot", $0}' > T_$$
|
||||
|
||||
#
|
||||
# extend start/stop
|
@ -5,7 +5,7 @@
|
||||
#
|
||||
# output on stdout
|
||||
#
|
||||
# usage: go_filterbx.sh dna_fasta prot_fasta [idmin nbmin nbmax]
|
||||
# usage: do_filterbx.sh dna.fasta prot.fasta [idmin nbmin nbmax]
|
||||
#
|
||||
unsetenv ORG_SOURCED
|
||||
|
@ -13,7 +13,7 @@
|
||||
# Results are printed to the standard output
|
||||
#
|
||||
#========================================================================================
|
||||
# usage: go_cds.sh fasta
|
||||
# usage: go_cds.sh fasta [db_core]
|
||||
#
|
||||
unsetenv ORG_SOURCED
|
||||
|
||||
@ -22,13 +22,19 @@ source $ORG_HOME/scripts/csh_init.sh
|
||||
|
||||
NeedArg 1
|
||||
|
||||
set Fasta = $Argv[1]
|
||||
set Fasta = $Argv[1]; Shift
|
||||
|
||||
NeedFile $Fasta
|
||||
|
||||
set Genome = `basename $Fasta:r`
|
||||
|
||||
NeedFile $CDS_DATA_DIR/chlorodb/core
|
||||
set DbCore = $CDS_DATA_DIR/chlorodb/core
|
||||
|
||||
if ($#Argv > 0) then
|
||||
set DbCore = $Argv[1]; Shift
|
||||
endif
|
||||
|
||||
NeedFile $DbCore/Annot.lst
|
||||
|
||||
#
|
||||
# run everything into temporary place
|
||||
@ -44,15 +50,15 @@ endif
|
||||
# pass1: run exonerate
|
||||
#
|
||||
|
||||
set fams = `ls $CDS_DATA_DIR/chlorodb/core/*.fst`
|
||||
set fams = `ls $DbCore/*.fst`
|
||||
|
||||
Notify "running pass1: exonerate of $Genome"
|
||||
Notify "running pass1: exonerate of $Genome on $DbCore"
|
||||
|
||||
foreach f ($fams)
|
||||
set prot = `basename $f:r`
|
||||
$PROG_DIR/go_pass1.sh $Fasta $prot $temp
|
||||
$PROG_DIR/do_exonerate.sh $Fasta $f $temp
|
||||
end
|
||||
|
||||
|
||||
#
|
||||
# pass2: transsplicing
|
||||
#
|
||||
|
@ -1,5 +1,7 @@
|
||||
#!/bin/csh -f
|
||||
|
||||
setenv Verbose 1
|
||||
|
||||
setenv ORG_HOME `dirname $0`/../../..
|
||||
source $ORG_HOME/scripts/csh_init.sh
|
||||
|
||||
@ -8,9 +10,9 @@ echo "+ testing CDS"
|
||||
setenv TMP_CLEANUP 0
|
||||
setenv PASS1_SPEEDUP 1
|
||||
setenv PASS1_SLOWDOWN 0
|
||||
setenv PASS1_BLASTX_FILTER_NBMAX 10
|
||||
setenv PASS1_BLASTX_FILTER_NBMAX 5
|
||||
|
||||
`dirname $0`/../bin/go_cds.sh test.fst > test.bak
|
||||
`dirname $0`/../bin/go_cds.sh test.fst test.db > test.bak
|
||||
|
||||
diff -q test.bak test.ref >& /dev/null
|
||||
|
||||
@ -18,7 +20,7 @@ set stat = $status
|
||||
|
||||
if ($stat == 0) then
|
||||
echo "+ $VTC[3]CDS test Ok$VTC[1]"
|
||||
\rm -r test.bak test.tmp
|
||||
\rm -r test.bak test.tmp test.db/*.fst.p??
|
||||
else
|
||||
echo "* $VTC[2]CDS test Failure$VTC[1]"
|
||||
endif
|
||||
|
18
detectors/cds/test/test.db/Annot.lst
Normal file
18
detectors/cds/test/test.db/Annot.lst
Normal file
@ -0,0 +1,18 @@
|
||||
atpf atpF 483 1:80_2:401_3:1_4:1 POLYEX ATP_synthase_CF0_B_subunit
|
||||
ccsa ccsA 477 1:476_2:1 POLYEX cytochrome_c_biogenesis_protein
|
||||
ndha ndhA 384 1:9_2:375 POLYEX NADH_dehydrogenase_subunit_1
|
||||
ndhb ndhB 699 1:5_2:693_3:1 POLYEX NADH_dehydrogenase_subunit_2
|
||||
ndhd ndhD 383 1:383 MONEX NADH_dehydrogenase_subunit_4
|
||||
ndhe ndhE 395 1:395 MONEX NADH_dehydrogenase_subunit_4L
|
||||
ndhf ndhF 384 1:384 MONEX NADH_dehydrogenase_subunit_5
|
||||
ndhg ndhG 386 1:386 MONEX NADH_dehydrogenase_subunit_6
|
||||
ndhh ndhH 406 1:400_2:6 POLYEX NADH_dehydrogenase_subunit_7
|
||||
ndhi ndhI 386 1:386 MONEX NADH_dehydrogenase_subunit_I
|
||||
psac psaC 498 1:491_2:4_3:3 POLYEX photosystem_I_subunit_VII
|
||||
rpl2 rpl2 800 1:101_2:698_5:1 POLYEX ribosomal_protein_L2
|
||||
rpl23 rpl23 787 1:782_2:2_3:1_4:2 POLYEX ribosomal_protein_L23
|
||||
rpl32 rpl32 474 1:474 MONEX ribosomal_protein_L32
|
||||
rps15 rps15 480 1:479_2:1 POLYEX ribosomal_protein_S15
|
||||
rps7 rps7 853 1:853 MONEX ribosomal_protein_S7
|
||||
ycf1 ycf1 408 1:406_2:2 POLYEX hypothetical_chloroplast_RF1
|
||||
ycf2 ycf2 654 1:649_2:5 POLYEX Ycf2
|
50
detectors/cds/test/test.db/atpf.fst
Normal file
50
detectors/cds/test/test.db/atpf.fst
Normal file
@ -0,0 +1,50 @@
|
||||
>AC_000188@LyesCp071@atpf@11803@13043@R@2@185 ATP_synthase_CF0_B_chain
|
||||
MKNVTDSFVSLGHWPSAGSFGFNTDILATNPINLSVVLGVLIFFGKGVLS
|
||||
DLLDNRKQRILNTIRNSEELRGGAIEQLEKARSRLRKVETEAEQFRVNGY
|
||||
SEIEREKLNLINSTYKTLEQLENYKNETIQFEQQRAINQVRQRVFQQALR
|
||||
GALGTLNSCLNNELHLRTISANIGMLGTMKEITD
|
||||
>NC_000925@PopuCp059@atpf@45392@45943@R@1@184 ATP_synthase_CF0_B_subunit
|
||||
MNSIVNITPIIIILSEHSSEHTFGFNSDIFEANVINILLLLFGLIYVLKQ
|
||||
SLGSTLNERQLKVLAAIQESEERLEQASSRLSESEKQLAQTQIIINQIKK
|
||||
EAQLTAEKVRSSILAQGQIDIERLAITGKSNIETAEKQIRRQIQQQIAFL
|
||||
ALKKVTLQLENQMSSDIQLRIIDNNIAKLGDQL
|
||||
>NC_000926@GuthCp075@atpf@65505@66053@R@1@183 ATP_synthase_CF0_B_subunit
|
||||
MDIISGFYNTINLAELSNAKTFGFNPNILEANVLNIAILLSGVIYLGRNF
|
||||
LTSALESRQQKVTEAIQEAEERLQQANVKLLDAEKQLTQAQTVIEQIKKE
|
||||
AEKTARTVKETILAQGKLDIERLTNNGKSSIEKAELQIKKQIQQHITDLA
|
||||
IKKVSAQMETFMTDNLQVKVIDTNIASLGGKI
|
||||
>NC_000927@NeolCp025@atpf@19121@19651@R@1@177 ATP_synthase_CF0_B_chain
|
||||
MFHFLALTPLAHSEGFGLNTNILETNILNLAAVFALLAYVGTDFVSSLLK
|
||||
TRKESILKSLRDADERYQDAVNQLKQALQELETARTNAAEIRRQSEINAE
|
||||
AIRQRLELLTQEEMARLEEAKETIIKLEEEKAVAEVCTKVISMALVRAEK
|
||||
KIISSMDEAMHRRVMDMYLNLLREVY
|
||||
>NC_000932@ArthCp008@atpf@11529@12798@R@2@185 ATP_synthase_CF0_B_subunit
|
||||
MKNLTDSFVYLGHWPSAGSFGFNTDILATNPINLSVVFGVLIFFGKGVLN
|
||||
DLLDNRKQRILNTIRNSEELREGAIQQLENARARLRNVETEADKFRVNGY
|
||||
SEIEREKLNLINSTYKTLKQLENYKNETILFEQQRTINQVRERVFQQALQ
|
||||
GAIGTLNSCLSNELHLRTINANIGMFGTMKEITD
|
||||
>NC_001319@MapoCp012@atpf@18468@19609@D@2@185 ATP_synthase_CF0_B_subunit
|
||||
MENGTYFIISSNFWTIAGSFGLNTNLLETNLINLGVVLGLLVYFGKGVLS
|
||||
NLLNNRKLTILNTIQDAEERYKEATDKLNQARTRLQQAKQKADDIRINGL
|
||||
SQMEKEKQDLINAADEDSKRLEDSKNATIRFEKQRAIEQVRQQVSRLALE
|
||||
RALETLKSRLNSELHLRMIDYHIGLLRAMESTIE
|
||||
>NC_001320@OrsajCp021@atpf@32741@34111@D@2@181 ATP_synthase_CF0_B_subunit
|
||||
MKNVTHSFVFLAHWPSAGSFGLNTDILATNLINLTVVVGVLIYFGKGVLK
|
||||
DLLDNRKQRILSTIRNSEELRRGTIEQLEKARIRLQKVELEADEYRMNGY
|
||||
SEIEREKANLINATSISLEQLEKSKNETLYFEKQRAMNQVRQRVFQQAVQ
|
||||
GALGTLNSCLNTELHFRTIRANISILGAME
|
||||
>NC_001603@EugrCp038@atpf@64813@66970@D@4@184 ATP_synthase_CF0_B_chain
|
||||
MVIDNFNIFTIISNAKTFGINTNVFETNIINLAIVVGTLFYYGKLTLSDL
|
||||
LKTRKKTIIKNILDIDEKIRSSQSSLYLAELEFENAAKKASLIRSNGTTF
|
||||
CLKSFDIIRSSVNEDIKRLKQSKRLILRTEDKKSVREIFKNLYSQACQKA
|
||||
KATIIKRLNSKIHKKIILKKMEKMSLKKLKPKY
|
||||
>NC_001631@PithCp015@atpf@11663@12972@R@2@185 ATP_synthase_CF0_B_subunit
|
||||
MKNVIDPFISLSYWPSAGGFGSNTNILETNIINSSVVLSVLIYFGKGVLS
|
||||
NLLDNRKQKILETIRNSEELCKGAIDQLEKARACLRNVEMIADEIQVNGN
|
||||
SQIEREKEDLLNTASDNLEQLEDPKNETIYSEQQRAFDQIRQQVSRQALR
|
||||
RAIGTLNSRLNTELHLRTIDHNIGLLRTMMNTND
|
||||
>NC_001666@ZemaCp019@atpf@35097@36479@D@2@184 ATP_synthase_CF0_B_subunit
|
||||
MKNVTHSFVFLAHWPFAGSFGLNTDILATNLINLTVVVGVLIFFGKGVLK
|
||||
DLLDNRKQRILSTIRNSEELRKGTLEQLEKARIRLQKVELEADEYRMNGY
|
||||
SEIEREKENLINATSISLEQLEKSKNETLYFEKQRAMNQVRQQGFQQAVQ
|
||||
GALGTLNSCLNTELHFRTIRANIGILGAIEWKR
|
78
detectors/cds/test/test.db/ccsa.fst
Normal file
78
detectors/cds/test/test.db/ccsa.fst
Normal file
@ -0,0 +1,78 @@
|
||||
>AC_000188@LyesCp084@ccsa@115765@116706@D@1@314 cytochrome_c_biogenesis_protein
|
||||
MIFSTLEHILTHISFSIVSIVITIHLITFLVDEIVKLYDSSEKGIIVTFF
|
||||
CITGLLVTRWVSSGHFPLSDLYESLIFLSWSFSLIHIIPYFKKNVLILSK
|
||||
ITGPSAILTQGFATSGILTEIHQSGILVPALQSEWLIMHVSMMILGYAAL
|
||||
LCGSLLSVALLVITFRKNRKLFSKSNVFLNESFFLGENVVENTSFFCTKN
|
||||
YYRSQLIQQLDYWSYRVISLGFTFLTIGILSGAVWANEAWGSYWNWDPKE
|
||||
TWAFITWIVFAIYLHTRTNRNLRGPNSAIVASIGFLIIWICYFGVNLLGI
|
||||
GLHSYGSFPSTFN
|
||||
>NC_000925@PopuCp184@ccsa@160407@161366@D@1@320 cytochrome_c_biogenesis_protein
|
||||
MNLEMMQNSCVNFAFGGLLTAMLVYWSSLAFPRISGLNKLAALITLLVNI
|
||||
ALALTLSSRWFANGYFPLSNLYESLLFLAWGLTFVHLFIESKTKSRLIGA
|
||||
VSIPVAMFVTAFASLALPIEMQKASPLVPALKSNWLMMHVSIMMISYSIL
|
||||
ILGSLLSILFLIITRGQDINLKGSSVGTGSYTVKSLDSNPSFAFSNPSGI
|
||||
VQEQSNMLINSTRMNLLESIDNLSYRIIGLGFPLLTIGIVAGAVWANEAW
|
||||
GSYWSWDPKETWALITWLIFAAYLHCRITKSWQGKRPAILASVGFLVVWI
|
||||
CYLGVNFLGKGLHSYGWLA
|
||||
>NC_000926@GuthCp037@ccsa@33657@34562@R@1@302 cytochrome_c_biogenesis_protein
|
||||
MFNVQFDIFNFSNNITFLTLLISLISYWLGLIFKKIKNVFYIGYGSTILA
|
||||
CITITIILGTRWIESGYFPLSNLYESLMFLTWGLLFSAIYLEYKTNLYLI
|
||||
GAIVSPISLFIVSFSTLSLPQDMQKAAPLVPALKSNWLMMHVSVMMLSYS
|
||||
TLIIGSLLAILYLVLIKAQQKKHSLKDFAFANLEFTFPKSTNSTNFNLLE
|
||||
TLDNLSYRTIGFGFPLLTIGIIAGAVWANEAWGTYWSWDPKETWALITWL
|
||||
VFAAYLHARITKSWTGERPAYLAALGFVVVWICYLGVNFLGKGLHSYGWL
|
||||
N
|
||||
>NC_000927@NeolCp108@ccsa@128438@129259@D@1@274 cytochrome_c_biogenesis_protein
|
||||
MSTFSILSLVAFATLFVTMLLYFFQRQPLARQSMWIAHTSLAGLLLLRWV
|
||||
QSGHFPLSNLYESCLFLSWAVTLGHFVVEKDASRAGFLDLGIFTAPMAFF
|
||||
VYAFATFSLPPTMQEAGPLVPALRSHWLMMHVTLMILSYAALLFGSVLSL
|
||||
AFLVITTGPRKNSEKLQSLASTFDTLSYRTLGIGFPLLTVGILSGAVWAN
|
||||
EAWGSYWSWDPKETWALITWLIFAIYLHSRLTYGWNGQKAALIASVGFFL
|
||||
IWICYLGVNLLGKGLHSYGWLTS
|
||||
>NC_000927@NeolCp129@ccsa@163667@164488@R@1@274 cytochrome_c_biogenesis_protein
|
||||
MSTFSILSLVAFATLFVTMLLYFFQRQPLARQSMWIAHTSLAGLLLLRWV
|
||||
QSGHFPLSNLYESCLFLSWAVTLGHFVVEKDASRAGFLDLGIFTAPMAFF
|
||||
VYAFATFSLPPTMQEAGPLVPALRSHWLMMHVTLMILSYAALLFGSVLSL
|
||||
AFLVITTGPRKNSEKLQSLASTFDTLSYRTLGIGFPLLTVGILSGAVWAN
|
||||
EAWGSYWSWDPKETWALITWLIFAIYLHSRLTYGWNGQKAALIASVGFFL
|
||||
IWICYLGVNLLGKGLHSYGWLTS
|
||||
>NC_000932@ArthCp073@ccsa@114461@115447@D@1@329 cytochrome_c_biogenesis_protein
|
||||
MIFSILEHILTHISFSVVSIVLTIYFLTLLVNLDEIIGFFDSSDKGIIIT
|
||||
FFGITGLLLTRWIYSGHFPLSNLYESLIFLSWAFSIIHMVSYFNKKQQNK
|
||||
LNTITAPSVIFIQGFATSGLLNKMPQSAILVPALQSQWLMMHVSMMILGY
|
||||
GALLCGSLLSIALLVITFRKVGPTFWKKNIKKNFLLNELFSFDVLYYINE
|
||||
RNSILLQQNINFSFSRNYYRYQLIQQLDFWSFRIISLGFIFLTVGILSGA
|
||||
VWANETWGSYWNWDPKETWAFITWTIFAIYLHIKTNRNVRGINSAIVALI
|
||||
GFILIWICYFGVNLLGIGLHSYGSFTSN
|
||||
>NC_001319@MapoCp078@ccsa@95482@96444@D@1@321 cytochrome_c_biogenesis_protein
|
||||
MPFITLERILAHTSFFLLFFVTFIYWGKFLYINIKPITILGEISMKIACF
|
||||
FITTFLLIRWSSSGHFPLSNLYESSMFLSWSFTLIHLILENKSKNTWLGI
|
||||
ITAPSAMLTHGFATLSLPKEMQESVFLVPALQSHWLMMHVTMMMLSYSTL
|
||||
LCGSLLAITILIITLTKQKNLPILTSYFNFPFNSFIFKNLLQPMENEILS
|
||||
YKTQKVFSFINFRKWQLIKELDNWSYRVISLGFPLLTIGILSGAVWANEA
|
||||
WGSYWNWDPKETWALITWLIFAIYLHTRMIKGWQGKKPAIIASLGFFIVW
|
||||
ICYLGVNLLGKGLHSYGWLI
|
||||
>NC_001320@OrsajCp087@ccsa@105236@106201@D@1@322 cytochrome_c_biogenesis_protein
|
||||
MLFATLEHILTHISFSTISIVITIHLITLLVRELGGLRDSSEKGMIATFF
|
||||
CITGFLVSRWASSGHFPLSNLYESLIFLSWALYILHMIPKIQNSKNDLST
|
||||
ITTPSTILTQGFATSGLLTEMHQSTILVPALQSQWLMMHVSMMLLSYATL
|
||||
LCGSLLSAALLMIRFRKNLDFFSKKKKNVLSKTFFFNEIEYFYAKRSALK
|
||||
STFFPLFPNYYKYQLIERLDSWSYRVISLGFTLLTIGILCGAVWANEAWG
|
||||
SYWNWDPKETWAFITWTIFAIYLHSRTNPNWKGTKSAFVASIGFLIIWIC
|
||||
YFGINLLGIGLHSYGSFTLPI
|
||||
>NC_001631@PithCp147@ccsa@104925@105887@R@1@321 cytochrome_c_biogenesis_protein
|
||||
MIFITLEHILAHISFSLILVVTLIYWGTLVYRIEGLSSSGGKGMIVTFLC
|
||||
TTGLLINRWLYSGHLPLSNLYESFMFLSWSSSVFHILLEVRSRDDRWLGA
|
||||
ITAPSAMLTHGFATLGLPEEMQRSGMLVPALQSHWSMMHVSMILFSYATL
|
||||
LCGSLASIALLVIMSGVNRQVIFGAMDNLFSRAILPNENFYSHEKQKSDL
|
||||
QYTVYFSSTNYRKCQLIKQLDHWSYRAIGLGFSLSTIGTLSGAIWANEAW
|
||||
GSYWSWDPKETWALITWTIFAIYLHTRMNKGWQGEEPAIVASLGFFIVWI
|
||||
RYLGVNLLGIGLHSYGWLEP
|
||||
>NC_001666@ZemaCp085@ccsa@108995@109960@D@1@322 cytochrome_c_biogenesis_protein
|
||||
MLFATLEHILTHISFSTISIVITIHLITLLVRELRGLRDSSEKGMIATFF
|
||||
SITGFLVSRWVSSGHFPLSNLYESLIFLSWTLYILHTIPKIQNSKNDLST
|
||||
ITTPSTILTQGFATSGLLTEMHQSTILVPALQSQWLMMHVSMMLLSYATL
|
||||
LCGSLLSAALLIIRFRKNFDFFSLKKNVFLKTFFFSEIEYLYAKRSALKN
|
||||
TSFPVFPNYYKYQLTERLDSWSYRVISLGFTLLTVGILCGAVWANEAWGS
|
||||
YWNWDPKETWAFITWTIFAIYLHSRKNPNWKGTNSALVASIGFLIIWICY
|
||||
FGINLLGIGLHSYGSFTLPSK
|
90
detectors/cds/test/test.db/ndha.fst
Normal file
90
detectors/cds/test/test.db/ndha.fst
Normal file
@ -0,0 +1,90 @@
|
||||
>AC_000188@LyesCp085@ndha@121113@123337@R@2@364 NADH_dehydrogenase_subunit_1
|
||||
MIIDTTEIETINSFSKLESLKEVYGIIWMLVPIVTLVLGITIGVLVIVWL
|
||||
EREISAGIQQRIGPEYAGPLGILQALADGTKLLLKENLIPSTGDTRLFSI
|
||||
GPSIAVISIFLSYSVIPFGDHLVLADLSIGVFFWIAISSIAPVGLLMSGY
|
||||
GSNNKYSFLGGLRAAAQSISYEIPLALCVLSISLLSNSLSTVDIVEAQSK
|
||||
YGFWGWNLWRQPIGFIVFLISSLAECERLPFDLPEAEEELVAGYQTEYSG
|
||||
IKFGLFYIASYLNLLVSSLFVTVLYLGGWNLSIPYIFVPDIFGINKGGKV
|
||||
FGTLIGIFITLAKTYLFLFIPIATRWTLPRLRMDQLLNLGWKFLLPISLG
|
||||
NLLLTTSSQLLSL
|
||||
>NC_000927@NeolCp116@ndha@144380@145468@D@1@363 NADH_dehydrogenase_subunit_1
|
||||
MTYVLDLKRSFLDACTWLIGDNFRDLGSVLWVPLPILSLVIVATLGVLVI
|
||||
VWLERKISAGVQQRVGPEYGGALGLLQPLADGLKLVFKEDVVPAKSDTWL
|
||||
FTLGPAVVVIPIFLAYLVVPFGQQLIIADLRIGIFFWIAISSIAPIGLLM
|
||||
SGYGSNNKYSFLGGLRAAAQSISYELPLAICVLSVCLLADSLSTVDIVES
|
||||
QSSWGILTWNIWRQPIGFVAFLIAALAECERLPFDLPEAEEELVAGYQTE
|
||||
YTGMKFGLFYVGSYVNLLVSGCFVTVLYLGGWHGPFAIDGILPDSPPFQV
|
||||
LDAFLGITWTLLKTFLFLFAAILTRWTLPRVRIDQLLDLGWKFLLPVSLG
|
||||
NLLLTASLKLLF
|
||||
>NC_000932@ArthCp079@ndha@119847@122009@R@2@361 NADH_dehydrogenase_subunit_1
|
||||
MIIYATAVQTINSFVKLESLKEVYGLIWIFVPIFSLVLGIITGVLVIVWL
|
||||
EREISAGIQQRIGPEYAGPLGILQALADGTKLLFKENLRPSRGNTPLFSI
|
||||
GPSIAVISILLSYSVIPFSNHLVLADLNIGIFLWIAISSIAPIGLLMSGY
|
||||
GSNNKYSFLGGLRAAAQSISYEIPLTLCVLSISLLSNSLSTVDIVEAQSK
|
||||
YGFWGWNLWRQPIGFIIFLISSLAECERLPFDLPEAEEELIAGYQTEYSG
|
||||
IKFGLFYVASYLNLLISSLFVTVLYLGGWNISIPYISILELFQRDQIFGT
|
||||
TIGIFITLAKTYLFLFVSIATRWTLPRLRMDQLLNLGWKFLLPISLGNLL
|
||||
LTTSFQLFSL
|
||||
>NC_001319@MapoCp084@ndha@100382@102200@R@2@369 NADH_dehydrogenase_subunit_1
|
||||
MISNINLEDKFFSFFFTLGFSKEFFNFLWIIFSILILMLGVTIGVLVLVW
|
||||
LERKISAAIQQRIGPEYAGPLGIIQALADGIKLFLKEDIVPAQGDVWLFN
|
||||
IGPILVLIPVFLSYLVIPFEYNVILANFSIGVFFWIAVSSVVPLGLLMAG
|
||||
YGSNNKYSFLGGLRAAAQSISYEIPLALSVLSIALLSNSLSTVDIVEAQS
|
||||
KYGFLSWNLWRQPIGFIVFFIASLAECERLPFDLPEAEEELVAGYQTEYS
|
||||
GMKFAFFYLASYLNLLVSSLFVTILYLGGWHFSIPFFSLFKNFEWNLMSN
|
||||
GISEVISIIIGIVITLVKSYLFLFISIMTRWTLPRIRIDQLLNLGWKFLL
|
||||
PIALGNLLLTTSFQLFLL
|
||||
>NC_001320@OrsajCp093@ndha@110631@112706@R@2@363 NADH_dehydrogenase_subunit_1
|
||||
MIIDRVQVEAINSFSNLELLKEVYGLIWILPILTLLLGITIEVLVIVWLE
|
||||
REISASIQQRIGPEYAGPLGLLQAIADGTKLLFKEDILPSRGDIPLFSIG
|
||||
PSIAVISILLSFLVIPLGYRFVLADLSIGVFLWIAISSIAPIGLLMAGYS
|
||||
SNNKYSFSGGLRAAAQSISYEIPLTFCVLAISLLSNSSSTVDIVEAQSKY
|
||||
GFFGWNLWRQPIGFLVFLISSLAECERLPFDLPEAEEELVAGYQTEYSGI
|
||||
KYGLFYLVSYLNLLVSSLFVTVLYLGGWNLSIPYISFFGFFQMNKMVGIL
|
||||
EMTMSIFITLTKAYLFLFISITIRWTLPRMRMDQLLNLGWKFLLPISLGN
|
||||
LLLTTSSQLVSL
|
||||
>NC_001666@ZemaCp091@ndha@114343@116454@R@2@363 NADH_dehydrogenase_subunit_1
|
||||
MIIDRVEVETINSFSKSELFKEIYGLIWILPIFALLLGITIEVLVIVWLE
|
||||
REISASIQQRIGPEYAGPLGLLQAIADGTKLLLKEDILPSRGDIPLFSIG
|
||||
PSIAVISILLSFLVIPLGYRFVLADLSIGVFLWIAISSIAPIGLLMAGYS
|
||||
SNNKYSFSGGLRAAAQSISYEIPLTFCVLAISLLSNSSSTVDIVEAQSKY
|
||||
GFFGWNLWRQPIGFLVFLISSLAECERLPFDLPEAEEELVAGYQTEYSGI
|
||||
KYGLFYLVSYLNLLVSSLFVTVLYLGGWNFSIPYISFFGFFQMNKIIGIL
|
||||
EMVIGIFITLTKAYLFLFISITIRWTLPRMRMDQLLNLGWKFLLPISLGN
|
||||
LLLTTSSQLVSL
|
||||
>NC_001879@NitaCp088@ndha@121696@123935@R@2@364 NADH_dehydrogenase_subunit_1
|
||||
MIIDTTEIETINSFSKLESLKEVYGIIWMLFPILTLVLGITIGVLVIVWL
|
||||
EREISAGIQQRIGPEYAGPLGILQALADGTKLLLKENLIPSTGDTRLFSI
|
||||
GPSIAVISIFLSYSVIPFGDHLVLADLSIGVFFWIAISSIAPVGLLMSGY
|
||||
GSNNKYSFLGGLRAAAQSISYEIPLALCVLSISLLSNSLSTVDIVEAQSK
|
||||
YGFWGWNLWRQPIGFIVFLISSLAECERLPFDLPEAEEELVAGYQTEYSG
|
||||
IKFGLFYIASYLNLLVSSLFVTVLYLGGWNLSIPYIFVPELFGINKRGKV
|
||||
FGTLIGIFITLAKTYLFLFIPIATRWTLPRLRMDQLLNLGWKFLLPISLG
|
||||
NLLLTTSSQLLSL
|
||||
>NC_002186@MeviCp102@ndha@106489@107592@R@1@368 NADH_dehydrogenase_subunit_1
|
||||
MLLTINLKDSFLTFFSNLGFSNEFSKALWIPLPILLLIILAVVGVLVVVW
|
||||
LERKISAAVQQRIGPEYAGPLGVLQPLADGLKLAFKEDIIPSKGDVLLFT
|
||||
LGPAIVVIPIFLSYLIVPFGENLIVSNINLGIFFWITVSSVAPLGLLMSG
|
||||
YGSNNKYSFLGGLRATAQSLSYEIPLALCVLSICLLSDSLSTIDIVQKQS
|
||||
TYGILGWNIWRQPIGFIAFIIAALAECERLPFDLPEAEEELVAGYQTEYT
|
||||
GMKFGLFYIGSYVNLLVSALFASVLYLGGWSLPIPIEFLLNKMSLNASDS
|
||||
EVQVISAFLGIGMTLLKTYLFLFLSILTRWTMPRVRIDQLLDLGWKFLLP
|
||||
ISLGNLLLTASLKIALL
|
||||
>NC_002202@SpolCp090@ndha@117598@119774@R@2@366 NADH_dehydrogenase_subunit_1
|
||||
MIIDTTTTKVQAINSFSRLEFLKEVYETIWMLFPILILVLGITIGVLVIV
|
||||
WLEREISASIQQRIGPEYAGPLGILQALADGTKLLFKENLLPSRGDTYLF
|
||||
SIGPSIAVISILLGYLIIPFGSRLVLADLSIGVFLWIAVSSIAPIGLLMS
|
||||
GYGSNNKYSFLGGLRAAAQSISYEIPLTLCVLSISLLSNSSSTVDIVEAQ
|
||||
SKYGFWGWNLWRQPIGFIVFIISSLAECERLPFDLPEAEEELVAGYQTEY
|
||||
SGIKFGLFYVASYLNLLISSLFVTVLYLGGWNLSIPYIFISEFFEINKID
|
||||
GVFGTTIGIFITLAKTFLFLFIPITTRWTLPRLRMDQLLNLGWKFLLPIS
|
||||
LGNLLLTTSSQLFSL
|
||||
>NC_002693@OeelhCp102@ndha@125123@127258@R@2@364 NADH_dehydrogenase_subunit_1
|
||||
MIIDTTAVQDMNSFSRLQSLKEVSGIIWMLVPILSLVLGITLGVLVIVWL
|
||||
EREISAGIQQRIGPEYAGPMGILQALADGIKLIFKENLLPSRGDTRLFSI
|
||||
GPSIAVISILLSYSVIPFSSHLVLSDLNIGVFLWIAVSSIAPIGLLMSGY
|
||||
GSNNKYSFLGGLRAAAQSISYEIPLTLCLLSISLLSNSSSTVDIVEAQSK
|
||||
YGLWGWNLWRQPIGFLVFLISSLAECERLPFDLPEAEEELVAGYQTEYSG
|
||||
IKFGLFYVASYLNLLVSSLFVTVLYLGGWNISISYIFVPGLFEITKVGRV
|
||||
FGTTIGIFTTLAKTYLFLFISITTRWTLPRLRMDQLLNLGWKFLLPISLG
|
||||
NLLLTTSSQLLSL
|
120
detectors/cds/test/test.db/ndhb.fst
Normal file
120
detectors/cds/test/test.db/ndhb.fst
Normal file
@ -0,0 +1,120 @@
|
||||
>AC_000188@LyesCp066@ndhb@96224@98435@R@2@511 NADH_dehydrogenase_subunit_2
|
||||
MIWHVQNENFILDSTRIFMKAFHLLLFDGSLIFPECILIFGLILLLMIDS
|
||||
TSDQKDIPWLYFISSTSLVMSITALLFRWREEPMISFSGNFQTNNFNEIF
|
||||
QFLILLCSTLCIPLSVEYIECTEMAITEFLLFVLTATLGGMFLCGANDLI
|
||||
TIFVAPECFSLCSYLLSGYTKKDVRSNEATMKYLLMGGASSSILVHGFSW
|
||||
LYGSSGGEIELQEIVNGLINTQMYNSPGISIALIFITVGIGFKLSPAPSH
|
||||
QWTPDVYEGSPTPVVAFLSVTSKVAASASATRIFNIPFYFSSNEWHLLLE
|
||||
ILAILSMILGNLIAITQTSMKRMLAYSSIGQIGYVIIGIIVGDSNDGYAS
|
||||
MITYMLFYISMNLGTFACIVLFGLRTGTDNIRDYAGLYTKDPFLALSLAL
|
||||
CLLSLGGLPPLAGFFGKLYLFWCGWQAGLYFLVLIGLLTSVVSIYYYLKI
|
||||
IKLLMTGRNQEITPHVRNYRRSPLRSNNSIELSMIVCVIASTIPGISMNP
|
||||
IIAIAQDSLF
|
||||
>AC_000188@LyesCp086@ndhb@142909@145120@D@2@511 NADH_dehydrogenase_subunit_2
|
||||
MIWHVQNENFILDSTRIFMKAFHLLLFDGSLIFPECILIFGLILLLMIDS
|
||||
TSDQKDIPWLYFISSTSLVMSITALLFRWREEPMISFSGNFQTNNFNEIF
|
||||
QFLILLCSTLCIPLSVEYIECTEMAITEFLLFVLTATLGGMFLCGANDLI
|
||||
TIFVAPECFSLCSYLLSGYTKKDVRSNEATMKYLLMGGASSSILVHGFSW
|
||||
LYGSSGGEIELQEIVNGLINTQMYNSPGISIALIFITVGIGFKLSPAPSH
|
||||
QWTPDVYEGSPTPVVAFLSVTSKVAASASATRIFNIPFYFSSNEWHLLLE
|
||||
ILAILSMILGNLIAITQTSMKRMLAYSSIGQIGYVIIGIIVGDSNDGYAS
|
||||
MITYMLFYISMNLGTFACIVLFGLRTGTDNIRDYAGLYTKDPFLALSLAL
|
||||
CLLSLGGLPPLAGFFGKLYLFWCGWQAGLYFLVLIGLLTSVVSIYYYLKI
|
||||
IKLLMTGRNQEITPHVRNYRRSPLRSNNSIELSMIVCVIASTIPGISMNP
|
||||
IIAIAQDSLF
|
||||
>NC_000927@NeolCp033@ndhb@28430@29950@D@1@507 NADH_dehydrogenase_subunit_2
|
||||
MELSDILASFHASNLIPEGIVACTILLVLLLDLVYSRTCHAWLAWVAMAG
|
||||
LSLASVLLGQQWYQLMNLPTATMTFGGSFQADSLSLVFRAIIAMSCVLCI
|
||||
LLSIDYVESTGTAPSEFLVLIATASLGGMLVAGSNDLLMMFVSLETLGLA
|
||||
SYLLTGYMKRDVRSNEASLKYLLVGAASSGLFLYGISWMYGISGGHMELN
|
||||
SIAHAIVSLDETKTTTCALALVLMTVGVGFKVAAAPFHQWTPDVYQGSPT
|
||||
PVVAFLSVGSKAAGFILAVRMCTTLFPSFNTEWHLIFTILSILSMIVGNF
|
||||
IAVTQTSLKRMLGYSSVGQAGVMMIGMLTDSPDGYASLIVYLLIYLFMNL
|
||||
GAFACVILFGLRTGTDQIQDYSGLLARDPFLALCLSLCLLSLGGIPPLAG
|
||||
FFGKMYLFLAAWDAGQYSLVWVGLITSVVSIYYYLSVVKIMLVPATQEMS
|
||||
LAVREYPRRAWSLEPIQPLEVGIFVCVLGSILVGVAGNSMVNLMTITMSQ
|
||||
APSLGV
|
||||
>NC_000932@ArthCp068@ndhb@94941@97164@R@2@513 NADH_dehydrogenase_subunit_2
|
||||
MIWHVQNENFILDSTRIFMKAFHLLLFDGSFIFPECILIFGLILLLMIDS
|
||||
TSDQKDIPWLYFISSTSFVMSITALLFRWREEPMISFSGNFQTNNFNEIF
|
||||
QFLILLCSTLCIPLSVEYIECTEMAITEFLLFILTATLGGMFLCGANDLI
|
||||
TIFVAPECFSLCSYLLSGYTKKDIRSNEATMKYLLMGGASSSILVHGFSW
|
||||
LYGSSGGEIELQEIVNGLINTQMYNSPGISIALIFITVGIGFKLSLAPSH
|
||||
QWTPDVYEGSPTPVVAFLSVTSKVAASASATRIFDIPFYFSSNEWHLLLE
|
||||
ILAILSMIFGNLIAITQTSMKRMLAYSSIGQIGYVIIGIIVGDSNGGYAS
|
||||
MITYMLFYIAMNLGTFACIILFGLRTGTDNIRDYAGLYTKDPFLALSLAL
|
||||
CLLSLGGLPPLAGFFGKLHLFWCGWQAGLYFLVSIGLLTSVLSIYYYLKI
|
||||
IKLLMTGRNQEITPHMRNYRISPLRSNNSIELSMIVCVIASTIPGISMNP
|
||||
IIAIAQDTLFSF
|
||||
>NC_000932@ArthCp086@ndhb@141485@143708@D@2@513 NADH_dehydrogenase_subunit_2
|
||||
MIWHVQNENFILDSTRIFMKAFHLLLFDGSFIFPECILIFGLILLLMIDS
|
||||
TSDQKDIPWLYFISSTSFVMSITALLFRWREEPMISFSGNFQTNNFNEIF
|
||||
QFLILLCSTLCIPLSVEYIECTEMAITEFLLFILTATLGGMFLCGANDLI
|
||||
TIFVAPECFSLCSYLLSGYTKKDIRSNEATMKYLLMGGASSSILVHGFSW
|
||||
LYGSSGGEIELQEIVNGLINTQMYNSPGISIALIFITVGIGFKLSLAPSH
|
||||
QWTPDVYEGSPTPVVAFLSVTSKVAASASATRIFDIPFYFSSNEWHLLLE
|
||||
ILAILSMIFGNLIAITQTSMKRMLAYSSIGQIGYVIIGIIVGDSNGGYAS
|
||||
MITYMLFYIAMNLGTFACIILFGLRTGTDNIRDYAGLYTKDPFLALSLAL
|
||||
CLLSLGGLPPLAGFFGKLHLFWCGWQAGLYFLVSIGLLTSVLSIYYYLKI
|
||||
IKLLMTGRNQEITPHMRNYRISPLRSNNSIELSMIVCVIASTIPGISMNP
|
||||
IIAIAQDTLFSF
|
||||
>NC_001319@MapoCp003@ndhb@1514@3555@D@2@502 NADH_dehydrogenase_subunit_2
|
||||
MKLELDMFFLYGSTILPECILIFSLLIILIIDLTFPKKDTIWLYFISLTS
|
||||
LLISIIILLFQYKTDPIISFLGSFQTDSFNRIFQSFIVFCSILCIPLSIE
|
||||
YIKCAKMAIPEFLIFILTATVGGMFLCGANDLVTIFVSLECLSLCSYLLC
|
||||
GYTKRDIRSNEAAIKYLLIGGTSSSILAYGFSWLYGLSGGETNIQKITNG
|
||||
LLNAETYNSSGTFIAFICILVGLAFKLSLVPFHQWTPDIYEGSPTPVVAF
|
||||
LSVTSKIAGLALATRILNILFSFSPNEWKIFLEILAILSMILGNLVAITQ
|
||||
TSMKRMLAYSSISQIGYILIGLITGDLKGYTSMTIYVFFYIFMNLGTFAC
|
||||
IILYSLRTGTDNIRDYAGLYIKDPLLSFSLTLCLLSLGGLPPLTGFFGKL
|
||||
YLFWCGWQSGFYLLVFIALITSVISLYYYLKIIKLILTKKNNEINPYIQA
|
||||
YIITSPTFFSKNPIEFVMIFCVLGSTFLGIIINPIFSFFQDSLSLSVFFI
|
||||
K
|
||||
>NC_001320@OrsajCp075@ndhb@85395@87639@R@2@511 NADH_dehydrogenase_subunit_2
|
||||
MIWHVQNENFILDSTRIFMKAFHLLLFQGSFIFPECILIFGLILLLMIDL
|
||||
TSDQKDRPWFYFISSTSLVISITALLFRWREEPIISFSGNFQTNNFNEIF
|
||||
QFLILLCSTLCIPLSVEYIECTEMAITEFLLFVLTATLGGMFLCGANDLI
|
||||
TIFVAPECFSLCSYLLSGYTKRDLRSNEATMKYLLMGGASSSILVHGFSW
|
||||
LYGSSGGEIELQEIVNGLINTQMYNSPGISIALISITVGLGFKLSPAPFH
|
||||
QWTPDVYEGSPTPVVAFLSVTSKVAASASATRILDIPFYFSSNEWHLLLE
|
||||
ILAILSMILGNLLAITQTSMKRMLAYSSIGQIGYVIIGIIVGDSNDGYAS
|
||||
MITYMLFYISMNLGTFACIVLFGLRTGTDNIRDYAGLYTKDPFLALSLAL
|
||||
CLLSLGGLPPLAGFFGKLYLFWCGWQAGLYFLVSIGLLTSVLSIYYYLKI
|
||||
VKLLMTGRNQEITPYVRNYRRSPLRSNNSIELSMTVCVIASTIPGISMNP
|
||||
ILAIAQDTLF
|
||||
>NC_001320@OrsajCp104@ndhb@127479@129723@D@2@511 NADH_dehydrogenase_subunit_2
|
||||
MIWHVQNENFILDSTRIFMKAFHLLLFQGSFIFPECILIFGLILLLMIDL
|
||||
TSDQKDRPWFYFISSTSLVISITALLFRWREEPIISFSGNFQTNNFNEIF
|
||||
QFLILLCSTLCIPLSVEYIECTEMAITEFLLFVLTATLGGMFLCGANDLI
|
||||
TIFVAPECFSLCSYLLSGYTKRDLRSNEATMKYLLMGGASSSILVHGFSW
|
||||
LYGSSGGEIELQEIVNGLINTQMYNSPGISIALISITVGLGFKLSPAPFH
|
||||
QWTPDVYEGSPTPVVAFLSVTSKVAASASATRILDIPFYFSSNEWHLLLE
|
||||
ILAILSMILGNLLAITQTSMKRMLAYSSIGQIGYVIIGIIVGDSNDGYAS
|
||||
MITYMLFYISMNLGTFACIVLFGLRTGTDNIRDYAGLYTKDPFLALSLAL
|
||||
CLLSLGGLPPLAGFFGKLYLFWCGWQAGLYFLVSIGLLTSVLSIYYYLKI
|
||||
VKLLMTGRNQEITPYVRNYRRSPLRSNNSIELSMTVCVIASTIPGISMNP
|
||||
ILAIAQDTLF
|
||||
>NC_001666@ZemaCp075@ndhb@89236@91472@R@2@511 NADH_dehydrogenase_subunit_2
|
||||
MIWHVQNENFILDSTRIFMKAFHLLLFHGSFIFPECILIFGLILLLMIDL
|
||||
TSDQKDRPWFYFISSTSLVISITALLFRWREEPIISFSGNFQTNNFNEIF
|
||||
QFLILLCSTLCIPLSVEYIECTEMAITEFLLFVLTATLGGMFLCGANDLI
|
||||
TIFVAPECFSLCSYLLSGYTKRDLRSNEATMKYLLMGGASSSILVHGFSW
|
||||
LYGSSGGEIELQEIVNGLINTQMYNSPGISIALIFITVGLGFKLSPAPFH
|
||||
QWTPDVYEGSPTPVVAFLSVTSKVAASALATRILDIPFYFSSNEWHLLLE
|
||||
ILAILSMILGNLLAITQTSMKRMLAYSSIGQIGYVIIGIIVGDSNDGYAS
|
||||
MITYMLFYISMNLGTFACIVLFGLRTGTDNIRDYAGLYTKDPFLALSLAL
|
||||
CLLSLGGLPPLAGFFGKLYLFWCGWQAGLYFLVSIGLLTSVLSIYYYLKI
|
||||
IKLLMTGRNQEITPYVRNYRRSPLRSNNSIELSMTVCVIASTIPGISMNP
|
||||
ILAIAQDTLF
|
||||
>NC_001666@ZemaCp101@ndhb@131265@133501@D@2@511 NADH_dehydrogenase_subunit_2
|
||||
MIWHVQNENFILDSTRIFMKAFHLLLFHGSFIFPECILIFGLILLLMIDL
|
||||
TSDQKDRPWFYFISSTSLVISITALLFRWREEPIISFSGNFQTNNFNEIF
|
||||
QFLILLCSTLCIPLSVEYIECTEMAITEFLLFVLTATLGGMFLCGANDLI
|
||||
TIFVAPECFSLCSYLLSGYTKRDLRSNEATMKYLLMGGASSSILVHGFSW
|
||||
LYGSSGGEIELQEIVNGLINTQMYNSPGISIALIFITVGLGFKLSPAPFH
|
||||
QWTPDVYEGSPTPVVAFLSVTSKVAASALATRILDIPFYFSSNEWHLLLE
|
||||
ILAILSMILGNLLAITQTSMKRMLAYSSIGQIGYVIIGIIVGDSNDGYAS
|
||||
MITYMLFYISMNLGTFACIVLFGLRTGTDNIRDYAGLYTKDPFLALSLAL
|
||||
CLLSLGGLPPLAGFFGKLYLFWCGWQAGLYFLVSIGLLTSVLSIYYYLKI
|
||||
IKLLMTGRNQEITPYVRNYRRSPLRSNNSIELSMTVCVIASTIPGISMNP
|
||||
ILAIAQDTLF
|
113
detectors/cds/test/test.db/ndhd.fst
Normal file
113
detectors/cds/test/test.db/ndhd.fst
Normal file
@ -0,0 +1,113 @@
|
||||
>AC_000188@LyesCp081@ndhd@116944@118446@R@1@501 NADH_dehydrogenase_subunit_4
|
||||
MNYFPWLTIIVVFPIFAGSLIFFLPHKGNRVIRWYTICICILELLLTTYA
|
||||
FCYHFQSDDPLIQLVEDYKWIDFFDFHWRLGIDGLSIGPILLTGFITTLA
|
||||
TLAAWPVTRDSRLFHFLMLAMYSGQIGLFSSRDLLLFFIMWELELIPVYL
|
||||
LLAMWGGKKRLYSATKFILYTAGGSVFLLMGVLGVALYGSNEPTLNFETS
|
||||
VNQSYPVVLEIIFYIGFFIAFAVKSPIIPLHTWLPDTHGEAHYSTCMLLA
|
||||
GILLKMGAYGLIRINMELLPHAHSIFSPWLMIIGTIQIIYAASTSLGQRN
|
||||
LKKRIAYSSVSHMGFIIIGISSLTDTGLNGALLQIISHGFIGAALFFLAG
|
||||
TTYDRIRLVYLDEMGGIAIPMPKMFTMFSSFSMASLALPGMSGFVAELIV
|
||||
FFGIITGQKYLLMPKLLITFVMAIGIILTPIYSLSMPRQMFYGYKLFNAP
|
||||
KDSFFDSGPRELFLSISIFLPVIGIGIYPDFVLSLAVDKVEVILSNFFYR
|
||||
>NC_000927@NeolCp113@ndhd@139299@140801@D@1@501 NADH_dehydrogenase_subunit_4
|
||||
MTHFPWLSTIVLFPLLASLAIPWLPDRKGTTVRWYALGVGLIDFSLIAYM
|
||||
FGRYYDFEQTSLQFVEDITWIDRLHLHWSLGVDGLSMPLVLLTGFITTLA
|
||||
TLAAWPVTKNPRLFYFLMLAMYTGQLGVFVVQDLLLFFLMWELELIPVYL
|
||||
LVSCWGGKKRLYAATKFILYTALGSIFILLGALTMPFMGIQGVTFDMSTL
|
||||
AYREYSLPVEILLYTGFLIAYGVKLPAIPVHTWLPDTHGEAHYSTCMLLA
|
||||
GILLKMGGYALIRINMNMLPHAHALFSPWLIGVGVVNIIYAALTSFAQRN
|
||||
LKRKIAYSSVSHMGFVLIGIGSLSEAGLNGAMLQMISHGLIGASLFFLAG
|
||||
TTYDRTRTLVLEEMGGLATFMPKTFALFTACSLASLALPGMSGFFAELLV
|
||||
FLGLVTSEAYSPTFRAIMTVFEAIGILLTPIYLLSMLRQLFYGRSIGRPK
|
||||
ALIDAGPREVFVVSCLLVPILGIGIYPKLATAIYVNTTDHVVQHVLSALR
|
||||
>NC_000932@ArthCp074@ndhd@115665@117167@R@1@501 NADH_dehydrogenase_subunit_4
|
||||
MNDFPWLTIIVVFPISAGSLMLFLPHRGNKVNKWYTICICILELLLTTYA
|
||||
FCYNFKMDDPLIQLSEDYKWIDFFDFYWRMGIDGLSIGTILLTGFITTLA
|
||||
TLAAFPVTRDSRFFHFLMLAMYSGQIGSFSSRDLLLFFIMWELELIPVYL
|
||||
LLSMWGGKKRLYSATKFILYTAGSSIFLLIGVLGISLYGSNEPTLNLELL
|
||||
ANKSYPVTLEILFYIGFLIAFAVKSPIIPLHTWLPDTHGEAHYSTCMLLA
|
||||
GILLKMGAYGLVRINMELLPHAHSMFSPWLLVVGTIQIIYAASTSPGQRN
|
||||
LKKRIAYSSVSHMGFIIIGISSITDPGLNGAILQIISHGFIGAALFFLAG
|
||||
TSYDRIRLVYLDEMGGMAISIPKIFTMFTILSMASLALPGMSGFIAEFIV
|
||||
FFGIITSQKYFLISKIFIIFVMAIGMILTPIYLLSMLRQMFYGYKLINIK
|
||||
NFSFFDSGPRELFLSISILLPIIGIGIYPDFVLSLASDKVESILSNYFYG
|
||||
>NC_001319@MapoCp079@ndhd@96665@98164@R@1@500 NADH_dehydrogenase_subunit_4
|
||||
MNHFPWLTIIVLFPISAGLVIPFLPSTGNKIIRWYTLGVCLLEFLLITYI
|
||||
FCYHYQFNDHLIQLKEDYNWISFINFHWRLGIDGFSIGLILLTGFITTLA
|
||||
TLAAWPVTRNPRLFYFLMLAMYSGQIGLFASQDILLFFFMWELELLPVYL
|
||||
LLAMWGGKRRLYAATKFILYTAAGSLFILIGGLIMAFYNSNEFTFDFQFL
|
||||
INKKYPLELEIIIYLSFLIAYAVKLPIIPFHTWLPDTHGEAHYSTCMLLA
|
||||
GILLKMGAYGLIRINMELLPHAHSFFAPWLVIVGAIQIVYAALTSLSQRN
|
||||
LKRRIAYSSVSHMGFVLIGIGSITNLGLNGAILQMISHGLIGASLFFLAG
|
||||
ISYDRTRTLVLDQMGGIGNSMPKIFTLFTSCSMASLALPGMSGFIAELMI
|
||||
FLGVIDNPNYSSLFKIIIIIIQGIGIILTPIYLLSMLRQMFYGYKFSNTL
|
||||
EPYFMDAGPREIFILICLFFPIISIGIYPNFVLSIWNSKVNFLLSNNFF
|
||||
>NC_001320@OrsajCp088@ndhd@106398@107900@R@1@501 NADH_dehydrogenase_subunit_4
|
||||
MSSFPWLTILVVLPIFAGSLIFFLPHRGNKIVRWYTMSICLLEFLLMTYA
|
||||
FCYHFQLEDPLIQLKEDSKWIDVFNFHWRLGIDGLSLGSILLTGFMTTLA
|
||||
TLAAWPVTRNSRLFYFLMLAMYSGQIGLFSSRDLLLFFIMWELELIPVYL
|
||||
LLSMWGGKRRLYSATKFILYTAGGSIFFLIGVLGMGLYGSNEPRLDLERL
|
||||
INQSYPATLEILFYFGFLIAYAVKLPIIPLHTWLPDTHGEAHYSTCMLLA
|
||||
GILLKMGAYGLIRINMELLPHAHYLFSPWLVIIGAMQIIYAASTSLGQRN
|
||||
FKKRIAYSSVSHMGFIIIGIGSITNIGLNGAILQILSHGFIGATLFFLAG
|
||||
TACDRMRLVYLEELGGASIPMPKIFTMFSSFSMASLALPGMSGFVAELVV
|
||||
FFGLITSPKFLLMPKMLITFVMAIGMILTPIYLLSMLRQMFYGYKLFHVP
|
||||
NENFEDSGPRELFLLICIFLPVIGIGIYPDFVLSLSVDRVEALLSNYYPK
|
||||
>NC_001666@ZemaCp086@ndhd@110138@111640@R@1@501 NADH_dehydrogenase_subunit_4
|
||||
MSYFPWLTILVVLPIFAGSLIFFLPHKGNKIVRWYTIAICLLEFLLMTYA
|
||||
FCYHFQLEDPLIQLKEDSKWIDVFDFHWRLGIDPLSLGSILLTGFITTLA
|
||||
TLAAWPVTRNSQLFYFLMLAMYSGQIGLFSSRDLLLFFIMWELELIPVYL
|
||||
LLSMWGGKRRLYSATKFILYTAGGSIFFLIGVLGMGLYGSNEPGLDLERL
|
||||
INQSYPTTWEILLYFGFLIAYAVKLPIIPLHTWLPYTHGEAHYSTCMLLA
|
||||
GILLKMGAYGLIRVNMELLPHAHYLFSPWLVIIGAVQIIYAASTSLGQRN
|
||||
FKKRIAYSSVSHMGFIIIGIGSITNIGLNGAILQILSHGFIGATLFFLAG
|
||||
TACDRMRLVYLEELGGISIPMPKIFTMFSSFSMASLALPGMSGFVAELVV
|
||||
FFGLITSPKFMLMPKMLITFVMAIGMILTPIYLLSMLRQMFYGYKLFHVP
|
||||
NKNFVDSGPRELFLLICIFLPVIGIGIYPDLVLSLSVDRVEVLLSNYYTK
|
||||
>NC_001879@NitaCp083@ndhd@117525@119027@R@1@501 NADH_dehydrogenase_subunit_4
|
||||
MNYFPWLTIIVVFPIFAGSLIFFLPHKGNRVIRWYTICICILELLLTTYA
|
||||
FCYHFQSDDPLIQLVEDYKWINFFDFHWRLGIDGLSIGPILLTGFITTLA
|
||||
TLAAWPVTRDSRLFHFLMLAMYSGQIGSFSSRDLLLFFIMWELELIPVYL
|
||||
LLCMWGGKKRLYSATKFILYTAGGSVFLLMGVLGLALYGSNEPTLNFETS
|
||||
VNQSYPVVLEIIFYIGFFIAFAVKSPIIPLHTWLPDTHGEAHYSTCMLLA
|
||||
GILLKMGAYGLIRINMELLPHAHSIFSPWLMIIGTIQIIYAALTSLGQRN
|
||||
LKKRIAYSSVSHMGFIIIGISSLTDTGLNGALLQIISHGFIGAALFFLAG
|
||||
TTYDRIRLVYLDEMGGIAIPMPKMFTMFSSFSMASLALPGMSGFVAELIV
|
||||
FFGIITGQKYLLIPKILITFVMAIGMILTPIYSLSMSRQMFYGYKLFNAP
|
||||
KDSFFDSGPRELFLSISIFLPVIGIGIYPDFVLSLAVDKVEVILSNFFYR
|
||||
>NC_002186@MeviCp085@ndhd@90277@91800@D@1@508 NADH_dehydrogenase_subunit_4
|
||||
MNNFPWITSIVMLPILAGLLIPFIPDENGKNVRWYALGIGLLDFLLISYI
|
||||
FGYKYNIQDTSLQLIDDYEWISSINFHWRLGIDGLSIPLILLTGFITTLA
|
||||
MLGAWPIQKNAKLFYFLMLAMYSGQLGVFASQDLLLFFLMWELELIPIYI
|
||||
LLIIWGGKKRLYAATKFILYTALGSIFILIAAFGMAFYGENMSFDMQILG
|
||||
EKEYPINLEILFYICFLIAYAVKLPAFPVHTWLPDTHGEAHYSTCMLLAG
|
||||
ILLKMGGYALIRINMNMLPNAHIYFAPYLAIIGVINIIYAALTSFAQRNI
|
||||
KRKIAYSSISHMGFVLIGISSFTDIGLSGAMLQMVSHGLIGASLFFLAGT
|
||||
TYDRTRTLILEDMGGIAKYMPKIFAMFTTCSLASLALPGMSGFVAELMVF
|
||||
LGFANSNAYSIEFRGIITFLEAIGIIVTPIYLLSMLRQVFYGSENLKLLK
|
||||
VNNLIDASAREIFIISCLLVPVIGIGIYPRILTQIYDLKTNAIIEHLEII
|
||||
RSNSQIM
|
||||
>NC_002202@SpolCp085@ndhd@113491@114996@R@1@502 NADH_dehydrogenase_subunit_4
|
||||
MTNSFPWLTTIVVLPIFAGSLIFLFPHRGNKVIRWYTICISMIELLLMTY
|
||||
VFFYHFQPDDPLIQLVEDYKWINFFDFHWRLGIDGLSIGPILLTGFITTL
|
||||
ATLAAWPVTRNSQLFHFLMLAMYSAQIGLFSSRDLLLFFIMWELELIPVY
|
||||
LLLSMWGGKKRLYSATKFILYTAGGSIFLLMGVLGVGLYGSNEPTLNLET
|
||||
LVNQSYPVALEIIFYIGFFIAFAVKLPIIPLHTWLPDTHGEAHYSTCMLL
|
||||
AGILLKMGAYGLVRINMELLPHAHSIFSPWLMIIGTMQIIYAASTSPGQR
|
||||
NLKKRIAYSSVSHMGFIIIGISSITDTGLNGAILQIISHGFIGAALFFLA
|
||||
GTSYDRIRLVYLDEMGGIAIPMPKIFTLFSSFSMASLALPGMSGFIAELI
|
||||
VFFGLITSQKYLLIPKLLITFGMAIGMILTPIYLLSMSRQMFYGYKLFNI
|
||||
SNSSFFDSGPRELFVSTSIFLPVIGIGVYPDLVLSLSVEKVEAILSNYFY
|
||||
R
|
||||
>NC_002693@OeelhCp097@ndhd@121124@122647@R@1@508 NADH_dehydrogenase_subunit_4
|
||||
MNSFPWLTIIVVFPILTGSLIFLLPHRGNKVMKWYTLCICILELLLTTYT
|
||||
FCYHFQLDDPLTQLTENYKWIHFFDFYWRLGIDGLSIGPILLTGFITTLA
|
||||
TLAAWPVTRDAQLFHFLMLAMYSGQIGSFSSRDLLLFFLMWEFELIPVYL
|
||||
LLSMWGGKKRLYSATKFILYTAGGSIFLLIGVLGIGLYGSNEPTLNFETL
|
||||
ANQSYPVALEVIFYVGFLIAFAVKLPIIPFHTWLPDTHGEAHYSTCMLLA
|
||||
GILLKMGAYGLVRINMELLPHAHCLFSPGLIIVGAIQIIYAASTSPGQLN
|
||||
LKKRIAYSSISHMGFIIIGIGSLSDTGLNGAILQIISHGFIGAALFFLAG
|
||||
TSYDRIRLLYLDEMGGMAIPLPKLFTMLSILSMASLALPGLSGFVAELLV
|
||||
FFGIITSQKYLLMPKILIAFLMAIGMILTPIYSLSMLRQMFYGYKLFNVP
|
||||
NYYFFDSGPRELFVSISLLLPIIGIGIYPDFVLSLSVEKVEAIISHFFFS
|
||||
IVFKKKE
|
39
detectors/cds/test/test.db/ndhe.fst
Normal file
39
detectors/cds/test/test.db/ndhe.fst
Normal file
@ -0,0 +1,39 @@
|
||||
>AC_000188@LyesCp080@ndhe@119061@119366@R@1@102 NADH_dehydrogenase_subunit_4
|
||||
MILEHVLVLSAYLFSIGIYGLITSRNMVRALMCLELILNAVNINFVTFSD
|
||||
FFDNRQLKGDIFSIFVIAIAAAEAAIGLAIVSSIYRNRKSTRINQSNLLN
|
||||
N
|
||||
>NC_000927@NeolCp119@ndhe@147326@147631@D@1@102 NADH_dehydrogenase_subunit_4L
|
||||
MIFQSYLLIAASMFCIGLYGLLTSRNVVRVLMSLELLLNAVNLNLLTFSN
|
||||
FVDSHEMKGQVLALFVIALAAAEAAIGLAIILSIYRNQRTVDPEQFNLLK
|
||||
W
|
||||
>NC_000932@ArthCp076@ndhe@117804@118109@R@1@102 NADH_dehydrogenase_subunit_4L
|
||||
MILEHVLVLSAYLFLIGLYGLITSRNMVRALMCLELILNAVNMNFVTFSD
|
||||
FFDNSQLKGEIFCIFVIAIAAAEAAIGLAIVSSIYRNRKSIRINQSTLLN
|
||||
K
|
||||
>NC_001319@MapoCp081@ndhe@98757@99059@R@1@101 NADH_dehydrogenase_subunit_4L
|
||||
MLEHILTLSAFLFCIGVFGLITSRNMVRALMCLELIFNAVNINLVAFSNF
|
||||
LDSSQIKGEIFSIFIIAIAAAEATIGLAIVLAIYRNRKSTRIDQFNLLKW
|
||||
>NC_001320@OrsajCp090@ndhe@108712@109017@R@1@102 NADH_dehydrogenase_subunit_4L
|
||||
MMFEHVLFLSVYLFSIGIYGLITSRNMVRALICLELILNSINLNLVTFSD
|
||||
LFDSRQLKGDIFAIFVIALAAAEAAIGLSILSSIHRNRKSTRINQSNFLN
|
||||
N
|
||||
>NC_001666@ZemaCp088@ndhe@112473@112778@R@1@102 NADH_dehydrogenase_subunit_4L
|
||||
MMFERVLFLSVYLFSIGIYGLITSRNMVRALICLELILNSINLNLVTFSD
|
||||
LFDSRQLKGDIFAIFVIALAAAEAAIGLSILSSIHRNRKSTRINQSNFLN
|
||||
N
|
||||
>NC_001879@NitaCp085@ndhe@119652@119957@R@1@102 NADH_dehydrogenase_subunit_4L
|
||||
MILEHVLVLSAYLFSIGIYGLITSRNMVRALMCLELILNAVNINFVTFSD
|
||||
FFDNRQLKGDIFSIFVIAIAAAEAAIGLAIVSSIYRNRKSTRINQSNLLN
|
||||
N
|
||||
>NC_002186@MeviCp099@ndhe@104916@105221@R@1@102 NADH_dehydrogenase_subunit_4L
|
||||
MYIENFLLLASALFCIGIYGLLTSRNIVRVLMCLELCLNAININFIAFSN
|
||||
FIDYEKINGQVIAIFIMTIAAAEAAIGLALVLTIYRNRETVDIENFDLLK
|
||||
G
|
||||
>NC_002202@SpolCp087@ndhe@115663@115968@R@1@102 NADH_dehydrogenase_subunit_4L
|
||||
MILEHVLVLSAFLFSIGIYGLVTSRNLVRALMCLELILNAVNLNFVTFSD
|
||||
FFDSRQLKGNIFSIFVIAIAAAEAAIGPAIVSSIYRNRKSIRINQSNLLN
|
||||
K
|
||||
>NC_002693@OeelhCp099@ndhe@123298@123603@R@1@102 NADH_dehydrogenase_subunit_4L
|
||||
MILEHVLVLSAYLFSIGIYGLITSRNMVRALMCLELILNSVNLNFVTFSD
|
||||
FFDSRQLKGDIFSIFIIAIAAAEAAIGLAIVSSIYRNRKSIRINQSNLLN
|
||||
K
|
157
detectors/cds/test/test.db/ndhf.fst
Normal file
157
detectors/cds/test/test.db/ndhf.fst
Normal file
@ -0,0 +1,157 @@
|
||||
>AC_000188@LyesCp073@ndhf@111508@113721@R@1@738 NADH_dehydrogenase_subunit_5
|
||||
MEQTYEYAWIIPFIPLPVPMLIGAGLILFPTATKRFRRMWAFQSVLLLSI
|
||||
VMIFSIYLSIQQINSSSVYQYVWSWIINNDFSLDFGYLIDPLTSIMSILI
|
||||
TTVGIMVLIYSDNYMAHDQGYLRFFAYMSFFSTSMLGLVTSSNLIQIYIF
|
||||
WELVGLCSYLLIGFWFTRPVAANACQKAFVTNRVGDFGLLLGILGFYWIT
|
||||
GSFEFRDLFEIFNNLIYNNELNFLFVTLCAVLLFAGAVAKSAQFPLHVWL
|
||||
PDAMEGPTPISALIHAATMVAAGIFLVARLLPLFRVIPYIMYLISVIGII
|
||||
TVLLGATLALAQKDIKRGLAYSTMSQLGYMMLALGMGSYRSALFHLITHA
|
||||
YSKALLFLGSGSIIHSMETIVGYSPAKSQNMGLMGGLRKHVPITKITFLL
|
||||
GTLSLCGIPPLACFWSKDEILNDSWLYSPIFAIIAWATAGLTAFYMFRIY
|
||||
LLTFEGHLNAHFQNYGGKQKIPFYSISLWGKNGVKKNSCLLTMNNNESTY
|
||||
FLSKTKYPIAKNGRKMTRPFMTIAHFKHKAVSSYPYESDNTMLFPIFVLG
|
||||
LFTLFVGAIGIPFNQEGVNLDILSKWLAPSINLLHPKSNNSLDWNEFLKD
|
||||
AVVSVSIAYFGIFIASFLYKPIYSSLKNLEFINSFVKKGPKRILWDKILN
|
||||
GIYDWSYNRAYIDAFYTRFFVGGIRGLAEFTHFVDRRVIDGMTNGVGVIS
|
||||
FIVGEGIKYIGGGRISSYLFLYLAYVSVFLLVYYLLF
|
||||
>NC_000927@NeolCp114@ndhf@140837@142783@R@1@649 NADH_dehydrogenase_subunit_5
|
||||
MEIFHQAIWLIPVLPLSASMLSGIGLLTFRETTSDLRRLHGALAIGAMAL
|
||||
SFVVSLGVLWNQLHGIAPVRWIIEWMLTDTFRLEIGYWVDPLTSTMLVVV
|
||||
TSVALLVMIYSDEYMHVDEGYVRFFVYLSIFTTSMLGLVLSPNLVQVYGF
|
||||
WELVGMCSYLLVGFWFTRPTAAEASQKAFITNRVGDFGLLLGILALYWMT
|
||||
GSFEFASIADRLGDLLIAIPSLRTIACIACILVFMGPIAKSAQFPLHVWL
|
||||
PDAMEGPTPISALIHAATMVAAGVFLVARMFPVFDQLPLVMELIAWTGTL
|
||||
TAFLGATMALTQSDIKKGLAYSTMSQLGYMIMALGTGAYSEALFHLTTHA
|
||||
YSKALLFLAAGSVIHGMEPVVGFSPMQNQNMHRMGGLRKYMPLTAMTFLL
|
||||
GTCSICGIPPLACFWSKDAILAEVFATHPTCWLIAWLTAGMTGFYMFRIY
|
||||
FLTFEGSFRSDLGRAKPKESHLGMVAPLIILAIPTVAIGSLGTPFAPVWE
|
||||
TFVHAPGQLSSLDEEFDLAEFLEMAGSSVGIGLLGISLSSLMYRNYAIDA
|
||||
TRISEYFSPLNRLFASKWYIDDLYAQVIVQGTRTIAQTLLIFDQRIIDGA
|
||||
VNLTAFGTLSAADTLKYWENGRVQFYILSIIFGVLFGSWLLTTHLSSL
|
||||
>NC_000932@ArthCp071@ndhf@110398@112638@R@1@747 NADH_dehydrogenase_subunit_5
|
||||
MEHTYQYSWIIPFIPLPVPILLGVGLLLFPTATKNLRRMWTFLSIFLLSI
|
||||
VMIFSIYLSIQQIFLSCIHQNVWSWTINNEFSFEFGYFIDPLTSIMSILI
|
||||
TTVGILVLIYSDNYMSHDQGYLRFFAYMGFFNTSMLGLVTSSNLIQVYFF
|
||||
WELVGMCSYLLIGFWFTRPIAANACQKAFVTNRVGDFGLLLGILGLYWIT
|
||||
GSFEFQDLFEIFNNLILNNRVNLLFLTLCAFLLFVGPIAKSAQFPLHVWL
|
||||
PDAMEGPTPISALIHAATMVAAGIFLVARLLPLFIVIPSIMYIISLIGII
|
||||
TVLLGATLALAQKDIKRGLAYSTMSQLGYMMLALGMGSYRSALFHLITHA
|
||||
YSKALLFLGSGSIIHSMEAIVGYSPDKSQNMILMGGLTKHVPITKTAFLI
|
||||
GTLSLCGIPPLACFWSKDEILNDSLLFSPIFAIIACSTAGLTAFYMFRIY
|
||||
LLTFEGHLNTYFLNYSGKKSGSFYSLSLWGKEEEKKLNKNFGLVPLLTMN
|
||||
NTKRASFFCNKTYKISNNVRNQIFITVENFGLNTRTFYYPHESDNTILFP
|
||||
MLILVLFTLFIGAIGIPFNQEGIDFDILSKFFTPSINLLHKNSQNFVDWY
|
||||
EFLRNATFSVSIAFFGIFIAYCLYKPFYSSLLNLTLLNSFQKWNSKRIHW
|
||||
EKLINFVYNWSYNRGYIDSFFKTSLIESIRRLAKQTTFFDKRIIDGITNG
|
||||
VGITSFFVGEVTKYIGGSRISSYLFLYLSYVLIFLMILFFFYFEKF
|
||||
>NC_001319@MapoCp074@ndhf@91101@93179@R@1@693 NADH_dehydrogenase_subunit_5
|
||||
MELIFQNVWFVPLFPFLASILLGIGLFFFPNSIKKFRRLSSFISIMFLNI
|
||||
AMLLSFHFFWQQITGSPIHRYLWSWVLYKNFVLEIGYLLDPLTSIMLVLV
|
||||
TTVAVMVMIYSDSYMFYDEGYIKFFCYLSLFTASMLGLVLSPNLIQVYIF
|
||||
WELVGMCSYLLIGFWFTRPSAANACQKAFVTNRIGDFGLLLGILGFYWIT
|
||||
GSFDFQQLSKRFFELLSYNQINLVFATLCALFLFLGPVAKSAQFPLHIWL
|
||||
PDAMEGPTPISALIHAATMVAAGIFLVARMFPLFQMLPFVMSIISWTGAI
|
||||
TALLGATIALAQKDLKKGLAYSTMSQLGYMMLALGIGSYKAGLFHLITHA
|
||||
YSKALLFLGSGSVIHSMEPIVGYHPNKSQNMIFMGGLRQYMPITAITFLF
|
||||
GTLSLCGIPPFACFWSKDEILVNSWLHFPILGSIAFFTAGLTAFYMFRIY
|
||||
FLTFEGDFRGHFFDDVKKLSSISIWGSLEFNKEQFKLDKKSTLYPKEANN
|
||||
IMLFPLIILTIPTVFIGFIGILFDENKMNVDSLSYWLTLSINSFNYSNSE
|
||||
KFLEFLFNAIPSVSIAFFGILIAFYLYGPNFSFLKKEKKKLQLKSEIDIV
|
||||
LKSFSNFIYNWSYYRAYIDGFYSSFFIKGLRFLIKIVSFIDRWIIDGIIN
|
||||
GIGIFSFFGGESLKYIEGGRISSYLFFIIFCMFLFFLYSYII
|
||||
>NC_001320@OrsajCp085@ndhf@101433@103637@R@1@735 NADH_dehydrogenase_subunit_5
|
||||
MEHTYQYAWVIPLLPLPVIMSMGFGLFLVPTATKNLRRIWAFPSVLLLSI
|
||||
AMVFSVHLSIQQINGSSIYQYLWSWTVNNDFSLEFGYLIDPLTSIMLILI
|
||||
TTVGILVLIYSDDYMSHDEGYLRFFVYISFFNTSMLGLVTSSNLIQIYFF
|
||||
WELVGMCSYLLIGFWFTRPIAASACQKAFVTNRVGDFGLLLGILGFFWIT
|
||||
GSLEFRDLFKIANNWIPNNEINSLLTILCAFLLFLGAVAKSAQFPLHVWL
|
||||
PDAMEGPTPISALIHATTMVAAGIFLIARLLPLFISLPLIMSFISLIGTL
|
||||
TLFLGATLALAQRDIKRSLAYSTMSQLGYMMLALGIGSYQAALFHLITHA
|
||||
YSKALLFLGSGSVIHSMEPLVGYSPDKSQNMVLMGGLRKYIPITRTCFLW
|
||||
GTLSLCGIPPLACFWSKDEILSNSWLYSPFFGIIASFTAGLTAFYMFRIY
|
||||
LLTFDGYLRVHFQNYSSTKEDSLYSISLWGKRISKGVNRDFVLSTAKSGV
|
||||
SFFSQNLSKIHVNTGNRIGSFSTSLGTKNTFVYPHEPGNTMLFPLLILLL
|
||||
CTLFIGSIGIHFDNEIGELTILSKWLTPSINFFQESSNSSINSYEFITNA
|
||||
ISSVSLAIFGLFIGIYVLWICLLFFSEFDLINSFVKGGPKKYFFHQLKKK
|
||||
IYSWSYNRGYIDIFYTRTFTLGIRGLTELTQFFDKGVIDGITNGVGLASF
|
||||
CIGEEIKYVGGGRISSYLFFFLCYVSVFLFFFLS
|
||||
>NC_001666@ZemaCp083@ndhf@105072@107288@R@1@739 NADH_dehydrogenase_subunit_5
|
||||
MEHTYQYAWVIPLLPLPVIMSMGFGLFLIPTATKNLRRIWAFPSILLLSI
|
||||
AMVFSLHLSIQQINGSSIYQYLWSWTINNDFSLEFGYLVDPLTSIMLILI
|
||||
TTVGILVLIYSDDYMSHDEGYLRFFVYISFFNTSMLGLVTSSNLIQIYFF
|
||||
WELVGMCSYLLIGFWFTRPIAASACQKAFVTNRVGDFGLLLGILGFFWIT
|
||||
GSLEFRDLFKIANNWIPNNGINSLLTTLCAFLLFLGAVAKSAQFPLHVWL
|
||||
PDAMEGPTPISALIHAATMVAAGIFLLARLLPLFISLPWIMSFISLIGTI
|
||||
TLFLGATLALAQRDIKRSLAYSTMSQLGYMMLALGIGSYQAALFHLITHA
|
||||
YSKALLFLGSGSVIHSMEPLVGYSPDKSQNMVLMGGLRKYVPITRTTFLC
|
||||
GTLSLCGIPPLACFWSKDEILSNSWLYSPFFGIIASFTAGLTAFYMFRIY
|
||||
LLTFDGYLRVHFQNYSSTKEGSLYSISLWGKSISKGVNRDFVLSTMKSGV
|
||||
SFFSQNIPKIPANTRNKIGSFSTPFGAKNTFVYPHETGNTMLFPLLILLL
|
||||
FTLFIGSIGIHFDNGVKDNRILELTILSKWLTPSINLFQENSNSSINSYE
|
||||
FLTNAISSVSLAIFGLFIAYIFYGSAYSFFQNLNFQNSLVKKNPKKSFLD
|
||||
EVKKKIYSWSYNRGYIDFFYTRVFILGIRKLAELTHFFDKGVIDGITNGV
|
||||
GLAGFCIGEEIKYVGGGRISSYLFFFLCYVSLFLFFIP
|
||||
>NC_001879@NitaCp080@ndhf@112072@114294@R@1@741 NADH_dehydrogenase_subunit_5
|
||||
MEQTYEYAWIIPFIPLPVPMLIGAGLFLFPTATKSFRRMWAFQSVLLLSI
|
||||
VMVFSIYLSIQQINSSSFYQYVWSWIINNDFSLDFGYLIDPLTSIMSILI
|
||||
TTVGIMVLIYSDNYMAHDQGYLRFFAYMSFFSTSMLGLVTSSNLIQIYIF
|
||||
WELVGLCSYLLIGFWFTRPVAANACQKAFVTNRVGDFGLLLGILGFYWIT
|
||||
GSFEFRDLFEIFNNLIYNNEVDFLFVTLCAVLLFAGAVAKSAQFPLHVWL
|
||||
PDAMEGPTPISALIHAATMVAAGIFLVARLLPLFRVIPYIMYLISVIGII
|
||||
TVLLGATLALAQKDIKRGLAYSTMSQLGYMMLALGMGSYRSALFHLITHA
|
||||
YSKALLFLGSGSIIHSMETIVGYSPAKSQNMGLMGGLRKHVPISKITFLL
|
||||
GTLSLCGIPPLACFWSKDEILNDSWLYSPIFAIIAWATAGLTAFYMFRIY
|
||||
LLTFEGHLNAHFPNYGGKQKTPFYSISLWGKNGVKKNSCLLTMNNNESTY
|
||||
FFAKTKYPIDKNGRKMTRPFMTIAHFEHKAVYSYPYESDNTMLFPIFVLG
|
||||
LFTLFVGSIGIPFNQEGGNLDILSKWLAPSINLLHQKSNNSMDWNEFLKD
|
||||
AVLSVSIAYFGIFIASFLYKPIYSSLKNFELINSFVKKGPKRILWDKIIN
|
||||
GIYDWSYNRAYIDAFYTRFLVGGIRGLAEFTHFFDRRVIDGMTNGVGVIS
|
||||
FIVGEGIKYIGGGRISSYLFLYLAYVSIFLLVYYLLFSTL
|
||||
>NC_002186@MeviCp105@ndhf@109803@111761@D@1@653 NADH_dehydrogenase_subunit_5
|
||||
MESISQYAWLIPIFPLAGSLLIGIGLISFRRATNILRWRYSFLIIALLGI
|
||||
SLILSCLILFSQINATPSYQWIFQWIVTNNFLLEIGYFVDPLTAVMLVIV
|
||||
TTVAILVLIYTDGYMSYDEGYVRFFAYLSLFTTSMLGLVLSPNLLQIYVF
|
||||
WELVGMCSYLLIGFWFTRPAAADACQKAFVTNRVGDFGLLLGILGFYWMT
|
||||
GSFEFDVISMKLLQLAEYDNFNTQLAIFFGFLIFLGPVAKSAQFPLHVWL
|
||||
PDAMEGPTPISALIHAATMVAAGVFLVARMFPIFSQFPFLMDLIAWTGAI
|
||||
TAIIGATIAVTQVDLKKGLAYSTMSQLGYMIMAMGMGSYTASLFHLMTHA
|
||||
YSKALLFLSAGSTIHGMEPIVGFNPAKNQNMSLMGGIRKYMPITGNAFLI
|
||||
GTLSLCGIPPLACFWSKDAILSNAFVHSPLLWFIGWSTAGLTSFYMFRMY
|
||||
FLVFEGEFRGNSVNQEKIRSNKLPKESNTKMTLPLIILTLFSITIGWIGT
|
||||
PFNNQFMFLIHTINQEIEPFDINEFLFIAGSSVGIALLGCYTAYLIYIKD
|
||||
KNTDKFANLLQPFYQLSFNKWYIDDIYEYIFVKGNRQLAQQTLLFDKKII
|
||||
DGFVNLTGLITLVSSESLRSIENGKIQSYILMIIFTLLTILGISQTYYSL
|
||||
IL
|
||||
>NC_002202@SpolCp080@ndhf@107770@109998@R@1@743 NADH_dehydrogenase_subunit_5
|
||||
MEHIYQYAWIIPFLPLPVPLLIGAGLLFFPTATKNLRRIWAFSSISLLSI
|
||||
VMIFSMKLAIQQINSNSIYQYLWSWTINNDFSLEFGYLMDPLTSIMSMLI
|
||||
TTVAILVLIYSDNYMSHDQGYLRFFAYMSFFNTSMLGLVTSSNLIQIYIF
|
||||
WELVGMCSYLLIGFWFTRPIAANACQKAFVTNRVGDFGLLLGILGLYWIT
|
||||
GSFEFRDLFEIFNNLIKNNEVNSLFCILCAFLLFAGAVAKSAQFPLHVWL
|
||||
PDAMEGPTPISALIHAATMVAAGIFLVARLLPLFVVIPYIMYVISFIGII
|
||||
TVLLGATLALAQKDIKRSLAYYTMSQLGYMMLALGMGSYRTALFHLITHA
|
||||
YSKALLFLASGSLIHSMGTIVGYSPDKSQNMVLMGGLTKHVPITKTSFLI
|
||||
GTLSLCGIPPLACFWSKDEILNDSWVYSPIFAIIAYFTAGLTAFYMFRIY
|
||||
LLTFEGHLNFFCKNYSGKKSSSFYSISLWGKKELKTINQKISLLNLLTMN
|
||||
NKERASFFSKKPYEINVKLTKLLRSFITITYFENKNISLYPYESDNTMLF
|
||||
PLIILIMFTLFVGFIGIPFNQEGMDLDILTKWLTPSINLLHSNSENFVDW
|
||||
YEFVINAIFSISIAFFGIFIAFFFYKPIYSSLKNFDLINSFDKRGQKRIL
|
||||
GDNIITIIYNWSANRGYIDAFYSTFLIKGIRSLSELVSFFDRRIIDGIPN
|
||||
GFGVTSFFVGEGIKYVGGGRISSYLFWYLLYVSIFLFIFTFT
|
||||
>NC_002693@OeelhCp094@ndhf@115809@118142@R@1@778 NADH_dehydrogenase_subunit_5
|
||||
MEYTYQYSWIIPFIPLPVPILIGMGLLLFPTATKNHRRVWSFPSILLLSM
|
||||
VMLLSVYLSIQQINRSFIYQYVWSWTINNDFSLEFGHLIDPLASIMLILI
|
||||
TTVGILVLFYSDNYMSHDQGYLRFFAYLSFFNTSMLGLVTSSNLIQIYIF
|
||||
WELVGMCSYLLIGFWFTRPIAATACQKAFVTNRVGDFGLLLGILGLYWIT
|
||||
GSFEFRDLFEIVNNLIDNNNQVHFLFVTLCSFLLFAGAVAKSAQFPLHVW
|
||||
LPDAMEGPTPISALIHAATMVAAGIFLVARLLPLFVITPYIMNLISLIGI
|
||||
ITVLLGATLALAQKDIKRSLAYSTMSQLGYMMLALGMGSYRAALFHLITH
|
||||
AYSKALLFLGSGSIIHSMESIVGYSPDKSQNMVLMGGLKKHVPITKTAFL
|
||||
VGTLSLCGIPPLACFWSKDEILNDSWLYSPIFAIIACSTAGFTAFYMFRV
|
||||
YLLTFDGHLNVHFQNYSGQKSSSVYSISLWGKQVPKRIQNPFCLLNLLTM
|
||||
NNNESTSFFWNNKCKLDGNVKKRIRPFITVTHFPNRKTFSYPHESDNTML
|
||||
FSLFVLVLFTLFVAAIGIPFNQEGSDCDILSKLLNPSINLLHQNSNNFTD
|
||||
WYEFVTNASFSVSIALLGIFIATFLYKPIYSSLQNFNLLNSFYKRSANRV
|
||||
MWDKIQNWIYDWSYNRGYIDSFYTISLTGGIRGLAELSHFFDRRVIDGIL
|
||||
NGFGLTSFFLGESLKYFGGGRISSYLLLYSIFIFIFLLMDSFFTNLPFFV
|
||||
LCQFLDSSFSMSISGFLLYENFLYENF
|
50
detectors/cds/test/test.db/ndhg.fst
Normal file
50
detectors/cds/test/test.db/ndhg.fst
Normal file
@ -0,0 +1,50 @@
|
||||
>AC_000188@LyesCp077@ndhg@119590@120120@R@1@177 NADH_degydrogenase_subunit_6
|
||||
MDLSEPIHDFLLVFLGSGLILGGLGVVLLPNPIYSAFSLGLVLVCTSLFY
|
||||
ILSNAYFVAAAQLLIYVGAINVLIIFAVMFMNGSEYYKDFHLWTVGDGIT
|
||||
SMVCISLFISLITTISDTSWYGIIWTTRSNQIIEQDFLSNSQQIGIHLST
|
||||
DFFLPFELISIILLVALIGAIAVARQ
|
||||
>NC_000927@NeolCp118@ndhg@146302@146811@D@1@170 NADH_dehydrogenase_subunit_6
|
||||
MEIVQNFSSAALTTGILLGCLGVIFLPSIVYAAFLLGAVFFCLAGIYVLL
|
||||
HADFVAAAQVLVYVGAINVLILFAIMLVNPQDAPPRALDSPPLIPGIACI
|
||||
GLLGVLVQMISTTSWLTPPWTPEPNSLPVLGGHLFSDCLLAFEVMSLVLL
|
||||
VALVGAIVLARREPVERSS
|
||||
>NC_000932@ArthCp077@ndhg@118377@118907@R@1@177 NADH_dehydrogenase_subunit_6
|
||||
MDLPGPIHDFLLVFLGSGLLVGGLGVVLLPNPIFSAFSLGFVLVCISLLY
|
||||
ILSNSHFVAAAQLLIYVGAINVLIIFAVMFMNDSEYSTDFNLWTIGNGIT
|
||||
SLVCTTILFLLMSTILDTSWYGVIWTTKLNQILEQDLISNSQQIGIHLST
|
||||
DFFLPFELISIILLVALIGAISVARQ
|
||||
>NC_001319@MapoCp082@ndhg@99113@99688@R@1@192 NADH_dehydrogenase_subunit_6
|
||||
MKLPESFYETIFLFLESGLILGSLGVILLTNIVYSALFLGFVFVCISLLY
|
||||
LLLNADFVAAAQILIYVGAVNVLIIFAVMLINKKQYSNFFVYWTIGDGIT
|
||||
LTLCTSIFLLLNNFISNTSWSKIFLMTKPNLVVKDIILINTVRHIGSELL
|
||||
TEFLLPFELMSIILLVALIGAITLARREKKIELEKNDFFNF
|
||||
>NC_001320@OrsajCp091@ndhg@109227@109757@R@1@177 NADH_dehydrogenase_subunit_6
|
||||
MDLPGPIHEILVLFGGFVLLLGGLGVVLLTNPTFSAFSLGLVLVCISLFY
|
||||
ILLNSYFVAVAQLLIYVGAINVLIIFAVMFVNGSEWSKDKNFWTIGDGFT
|
||||
SLVCITIPFSLMTTIPDTSWYGILWTTRSNQIVEQGLINNVQQIGIHLAT
|
||||
DFYLPFELISIILLVSLIGAITMARQ
|
||||
>NC_001666@ZemaCp089@ndhg@112993@113523@R@1@177 NADH_dehydrogenase_subunit_6
|
||||
MDLPGPIHEILVLFGGFGLLLGGLGVVLLTNPIYSAFSLGLVLVCISLFY
|
||||
FLLNSYFVAVAQLLIYVGAINVLIIFAVMFVNGSEWSKDKNYWTIGDGFT
|
||||
LLLCITIPFSLMTTIPDTSWYGILWTTRSNQIVEQGLINNVQQIGIHLAT
|
||||
DFYLPFELISLILLVSLIGAITMARQ
|
||||
>NC_001879@NitaCp086@ndhg@120181@120711@R@1@177 NADH_dehydrogenase_subunit_6
|
||||
MDLSEPIHDFLLVFLGSGLILGGLGVVLLPNPIYSAFSLGLVLVCTSLFY
|
||||
ILSNSYFVAAAQLLIYVGAINVLIIFAVMFMNGSEYYKDFHLWTVGDGIT
|
||||
SMVCISLFISLITTISDTSWYGIIWTTRSNQIIEQDFISNSQQIGIHLST
|
||||
DFFLPFELISIILLVALIGAIAVARQ
|
||||
>NC_002186@MeviCp100@ndhg@105252@105821@R@1@190 NADH_dehydrogenase_subunit_6
|
||||
MSFSEQIQNLSLLLLEIGTIIGALGVVLLPNILYSGFLLGGVLICIAGIY
|
||||
LLLNAEFIAAAQVLIYVGAINVIILFAIMLVNKIENLNPSNNQMMRNGLS
|
||||
SFICFSFFILLSNMIFDTQWIDTVGVSTKYSISIIGNHIFSDFLLPFEIV
|
||||
SVLLLVTLVGAVFIARKEDASEIEISKISFLNLPDPSKK
|
||||
>NC_002202@SpolCp088@ndhg@116164@116694@R@1@177 NADH_dehydrogenase_subunit_6
|
||||
MDLPGPIHDFLLVFLGSGLILGALGVVLFTNPIFSAFSLGLVLVCISLFY
|
||||
ILANSHFVASAQLLIYVGAINVLIIFSVMFMSGPEYDKKFQLWTVGDGVT
|
||||
SLVCISLFVSLISTILNTSWYGIIWTTKSNQILEQDLINASQQIGIHLST
|
||||
DFFLPFELISIILLVSLIGAIAVARQ
|
||||
>NC_002693@OeelhCp100@ndhg@123796@124326@R@1@177 NADH_dehydrogenase_subunit_6
|
||||
MDLPGPIHDFLLVFLGSGLIVGGLGVVLLTNPIFSAFSLGLVLVCISLFF
|
||||
SLSNSYFVAAAQLLIYVGAINVLILFAVMFMNGSEYSKDLTLWTVGDGIT
|
||||
SLVCTSIFISLITTILDTSWYGIIWTTKSNQIIEQDLIGNSQQIGIHLST
|
||||
DFFLPFELISIILLVSLIGAIAVARQ
|
90
detectors/cds/test/test.db/ndhh.fst
Normal file
90
detectors/cds/test/test.db/ndhh.fst
Normal file
@ -0,0 +1,90 @@
|
||||
>AC_000188@LyesCp078@ndhh@123339@124520@R@1@394 NADH_dehydrogenase_subunit_7
|
||||
MTAPTTRKDLMIVNMGPQHPSMHGVLRLIVTLDGEDVVDCEPILGYLHRG
|
||||
MEKIAENRTIIQYLPYVTRWDYLATMFTEAITINGPEQLGNIQVPKRASY
|
||||
IRVIMLELSRIASHLLWLGPFMADIGAQTPFFYIFRERELIYDLFEAATG
|
||||
MRMMHNYFRIGGVAADLPYGWIDKCLDFCDYFLTGVAEYQKLITRNPIFL
|
||||
ERVEGVGIIGRDEALNWGLSGPMLRASGIEWDLRKVDHYESYDEFDWQVQ
|
||||
WQREGDSLARYLVRIGEMTESIKIIQQALEGIPGGPYENLEMRRFDRLKD
|
||||
PEWNDFEYRFISKKPSPTFELSKQELYVRVEAPKGELGIFLIGDQSVFPW
|
||||
RWKIRPPGFINLQILPQLVKRMKLADIMTILGSIDIIMGEVDR
|
||||
>NC_000927@NeolCp115@ndhh@143205@144380@D@1@392 NADH_dehydrogenase_subunit_7
|
||||
MIEAKTDPMIVSMGPHHPSMHGVLRLIVTLDGENVLDCEPVVGYLHRGME
|
||||
KIAENRTIVQYLPYVTRWDYLATMFTEAITVNAPERLANIEVPRRASYLR
|
||||
VIMLELSRIASHLLWLGPFMADLGAQTPFFYILREREMIYDLFEAATGMR
|
||||
MMHNYFRVGGVAADVPYGWIDKCLDFCEYFLPKVDEYEALITRNPIFLKR
|
||||
VKGVGTISPQQAINWGLSGPMLRASGVSWDLRKVDRYECYEDFHWSVESE
|
||||
ETGDCLARYLVRIREMRTSTKIVQQALKSIPGGPTENLEARQLSQGRTSP
|
||||
WNEFDYQFLGKKASPTFKMPRQEHYVRVEAPKGELGVFLIGDDHVFPWRW
|
||||
KIRPPGFINVQILPNLVQGMKLADIMTILGSIDIIMGEVDR
|
||||
>NC_000932@ArthCp080@ndhh@122011@123192@R@1@394 NADH_dehydrogenase_subunit_7
|
||||
MKRPVTGKDLMIVNMGPHHPSMHGVLRLIVTLDGEDVVDCEPILGYLHRG
|
||||
MEKIAENRAIIQYLPYVTRWDYLATMFTEAITVNGPEQLGNIQVPKRASY
|
||||
IRVIMLELSRIASHLLWLGPFMADIGAQTPFFYIFREREFVYDLFEAATG
|
||||
MRMMHNFFRIGGIAADLPYGWIDKCLDFCDYFLTEVVEYQKLITRNPIFL
|
||||
ERVEGVGIIGGEEAINWGLSGPMLRASGIPWDLRKIDRYESYDEFEWEIQ
|
||||
WQKQGDSLARYLVRLSEMTESIKIIQQALEGLPGGPYENLESRGFDRKRN
|
||||
PEWNDFEYRFISKKPSPTFELSKQELYVRVEAPKGELGIFLIGDQSGFPW
|
||||
RWKIRPPGFINLQILPELVKRMKLADIMTILGSIDIIMGEVDR
|
||||
>NC_001319@MapoCp085@ndhh@102202@103380@R@1@393 NADH_dehydrogenase_subunit_7
|
||||
MMILTKNKPMIVSMGPHHPSMHGVLRLIVTLDGEDVLDCEPVLGYLHRGM
|
||||
EKIAENRTIVQYLPYVTRWDYLATMFTEAITVNAPEKLTNIQVPKRASYI
|
||||
RIIMLELSRIASHLLWLGPFMADIGAQTPFFYIFREREMIYDLFESATGM
|
||||
RMMHNYFRIGGVAVDLPYGWIDKCLDFCDYFLPKINEYERLITNNPIFLK
|
||||
RVEGIGTVTREEAINWGLSGPMLRASGVQWDLRKVDHYECYDELDWKIQW
|
||||
QKEGDSLARYLVRIGEMKESVKIIQQALKAIPGGPFENLEARRLNQGKNS
|
||||
EWNLFEYQFISKKPSPTFKLPKQEHYVRVEAPKGELGIFLIGDDSVFPWR
|
||||
LKIRSPGFINLQILPQLVKGMKLADIMTILGSIDIIMGEVDR
|
||||
>NC_001320@OrsajCp094@ndhh@112708@113889@R@1@394 NADH_dehydrogenase_subunit_7
|
||||
MSLPLTRKDLMIVNMGPQHPSMHGVLRLIVTLDGEDVIDCEPILGYLHRG
|
||||
MEKIAENRTIIQYLPYVTRWDYLATMFTEAITVNAPEFLENIQIPQRASY
|
||||
IRVIMLELSRIASHLLWLGPFMADLGAQTPFFYIFRERELIYDLFEAATG
|
||||
MRMMHNYFRIGGVAADLPYGWIDKCLDFCDYFLRGVIEYQQLITQNPIFL
|
||||
ERVEGVGFISGEEAVNWGLSGPMLRASGIQWDLRKVDLYESYNQFDWKVQ
|
||||
WQKEGDSLARYLVRIGEMRESIKIIQQAVEKIPGGPYENLEVRRFKKAKN
|
||||
SEWNDFEYRFLGKKPSPNFELSKQELYARVEAPKGELGIYLVGDDSLFPW
|
||||
RWKIRPPGFINLQILPQLVKKMKLADIMTILGSIDIIMGEVDR
|
||||
>NC_001666@ZemaCp092@ndhh@116456@117637@R@1@394 NADH_dehydrogenase_subunit_7
|
||||
MSLSLKRKDLMIVNMGPQHPSMHGVLRLIVTLDGEDVIDCEPILGYLHRG
|
||||
MEKIAENRSIIQYLPYVTRWDYLATMFTEAITVNAPEFLENIQIPKRASY
|
||||
IRVIMLELSRIASHLLWLGPFMADLGAQTPFFYIFRERELIYDLFEAVTG
|
||||
MRMMHNYFRIGGVAADLPYGWMDKCLDFCDYFLQGVVEYQELITQNPIFL
|
||||
ERVEGVGFISGEEAVNWGLSGPMLRASGIQWDLRKIDPYESYNQFDWKVQ
|
||||
WQKEGDSLARYLVRVGEMRESIKIIQQAVEKIPGGPYENLEARRFKKAKN
|
||||
PEWNDFEYRFLGKKPSPNFELSKQELYVRVEAPKGELGIYLVGDDSLFPW
|
||||
RWKIRPPGFINLQILPQLVKKMKLADIMTILGSIDIIMGEVDR
|
||||
>NC_001879@NitaCp089@ndhh@123937@125118@R@1@394 NADH_dehydrogenase_subunit_7
|
||||
MTAPTTRKDLMIVNMGPQHPSMHGVLRLIVTLDGEDVVDCEPILGYLHRG
|
||||
MEKIAENRTIIQYLPYVTRWDYLATMFTEAITINGPEQLGNIQVPKRASY
|
||||
IRVIMLELSRIASHLLWLGPFMADIGAQTPFFYIFRERELIYDLFEAATG
|
||||
MRMMHNYFRIGGVAADLPYGWIDKCLDFCDYFLTGVAEYQKLITRNPIFL
|
||||
ERVEGVGIIGGDEALNWGLSGPMLRASGIEWDLRKVDHYESYDEFDWQVQ
|
||||
WQREGDSLARYLVRIGEMTESIKIIQQALEGIPGGPYENLEIRRFDRLKD
|
||||
PEWNDFEYRFISKKPSPTFELSKQELYVRVEAPKGELGIFLIGDQSVFPW
|
||||
RWKIRPPGFINLQILPQLVKRMKLADIMTILGSIDIIMGEVDR
|
||||
>NC_002186@MeviCp103@ndhh@107611@108792@R@1@394 NADH_dehydrogenase_subunit_7
|
||||
MTMLQTKTDPMVISMGPHHPSMHGVLRLIVTLDGENVIDCEPVLGYLHRA
|
||||
MEKIAENRTIVQYLPYVTRWDYLATMFTEAITVNAPEKLANIEVPKRASY
|
||||
IRVIMLELSRIASHLLWLGPFMADIGAQTPFFYILREREMIYDLFEAATG
|
||||
MRMMHNYFRIGGVASDLPYGWVDKCLDFSDYFLPKVDEYERLITNNPIFL
|
||||
KRVRDVGFISREEAINWGLSGPMLRASGVQWDLRKVDNYECYGELDWNVQ
|
||||
WQSDGDCLARYLVRLGEMRESTKIIQQALKAIPGGPYENLEARRLSKGRK
|
||||
SEWNNFEYQFVGKKPSPTFKIPKQEHYVRVEAPKGELGVFLMGDDNVFPW
|
||||
RWKIRSPGFINVQILPELVRGMKLADIMTILGSIDIIMGEVDR
|
||||
>NC_002202@SpolCp091@ndhh@119776@120957@R@1@394 NADH_dehydrogenase_subunit_7
|
||||
MAVPTTRKDLMIVNMGPHHPSMHGVLRLIVTLDGEDVIDCEPIVGYLHRG
|
||||
MEKIAENRTIIQYLPYVTRWDYLATMFTEAITVNGPEQLGNIQVPKRASY
|
||||
IRVIMLELSRIASHLLWLGPFMADIGAQTPFFYILRERELIYDLFEAATG
|
||||
MRMMHNYFRIGGVAADLPYGWIDKCLDFCDYFLIGLTEYQKLITRNPIFL
|
||||
ERVENVGIIGGEEAINWGLSGPMLRASGIQWDLRKVDHYECYDEFDWEVQ
|
||||
WQKEGDSLARYLIRIGEMAESVKIIQQALEGIPGGPYENLEIRRFNRIKY
|
||||
PEWNDFEYRFISKKPSPAFELSKQELYVRVEAPKGELGIFLIGDQSVFPW
|
||||
RWKIRPPGFINLQILPQLVKKMKLADIMTILGSIDIIMGEVDR
|
||||
>NC_002693@OeelhCp103@ndhh@127260@128441@R@1@394 NADH_dehydrogenase_subunit_7
|
||||
MNVTTTRKDLMIVNMGPHHPSMHGVLRLILTLDGEDVIDCEPILGYLHRG
|
||||
MEKIAENRTVIQYLPYVTRWDYLATMFTEAITINGPEQLGNIQVPKRASY
|
||||
IRIIMLELSRIASHLLWLGPFMADIGAQTPFFYIFRERELVYDLFEAATG
|
||||
MRMMHNYFRIGGVAADLPYGWIDKCLDFCDYFLTAVSEYQKLITRNPIFL
|
||||
ERVEGVGIIGGEEAINWGLSGPMLRASGIEWDLRKVDRYECYGELDWEIR
|
||||
WQKEGDSLARYLVRMSEMTESIKIIQQALEGIPGGPYENLEIRCFDREKD
|
||||
PEWDGFEYRFISKKPSPTFELPKQELYVRVEAPKGELGIFLIGDQSGFPW
|
||||
RWKIRPPGFINLQILPQLVKRMKLADIMTILGSIDIIMGEVDR
|
50
detectors/cds/test/test.db/ndhi.fst
Normal file
50
detectors/cds/test/test.db/ndhi.fst
Normal file
@ -0,0 +1,50 @@
|
||||
>AC_000188@LyesCp076@ndhi@120525@121028@R@1@168 NADH_dehydrogenase_subunit_I
|
||||
MLPMITEFINYGQQTIRAARYIGQGFMITLSHANRLPVTIQYPYEKLITS
|
||||
ERFRGRIHFEFDKCIACEVCVRVCPIDLPVVDWKLETDIRKKRLLNYSID
|
||||
FGICIFCGNCVEYCPTNCLSMTEEYELSTYDRHELNYNQIALGRLPMSVI
|
||||
DDYTIRTISNLPQINNE
|
||||
>NC_000927@NeolCp117@ndhi@145747@146268@D@1@174 NADH_dehydrogenase_subunit_I
|
||||
MFDFLTSLQTYRQEAAQAAQYIGQGFGVTFDHMSRRPITIHYPYEKLIPS
|
||||
ERFRGRIHFEFDKCIACEVCVRVCPINLPVVDWDYQKSVKKKQLRSYSID
|
||||
FGVCIFCGNCVEYCPTNCLSMTEEYELSVYDRHELNFDHVALGRVPTSVV
|
||||
QDTLVTPVLGLGYLPKGELSSLP
|
||||
>NC_000932@ArthCp078@ndhi@119244@119762@R@1@173 NADH_dehydrogenase_subunit_I
|
||||
MLPMITGFMNYGQQTLRAARYIGQGFMITLSHTNRLPVTIQYPYEKLITS
|
||||
ERFRGRIHFEFDKCIACEVCVRVCPIDLPVVDWKLETNIRKKRLLNYSID
|
||||
FGICIFCGNCVEYCPTNCLSMTEEYEFSTYDRHELNYNQIALGRLPMSVI
|
||||
DDYTIRTIWNSPQTKNGVNPLI
|
||||
>NC_001319@MapoCp083@ndhi@99779@100330@R@1@184 NADH_dehydrogenase_subunit_I
|
||||
MFSIINGLKNYNQQAIQAARYIGQGFLVTLDHMNRLPTTIQYPYEKLIPS
|
||||
ERFRGRIHFEFDKCIACEVCVRVCPINLPVVDWELKKTIKKKQLKNYSID
|
||||
FGVCIFCGNCVEYCPTNCLSMTEEYELSTYNRHELNYDQIALGRLPISII
|
||||
EDSTIENIFNLTSLPKGKIEGHIYSRNITNIVN
|
||||
>NC_001320@OrsajCp092@ndhi@110000@110536@R@1@179 NADH_dehydrogenase_subunit_I
|
||||
MFPMVTGFMGQQTIRAARYIGQSFIITLSHTNRLPITIHYPYEKSITSER
|
||||
FRGRIHFEFDKCIACEVCVRVCPIDLPLVDWRFEKDIKRKQLLNYSIDFG
|
||||
VCIFCGNCVEYCPTNCLSMTEEYELSTYDRHELNYNQIALSRLPISIMGD
|
||||
YTIQTIRNSTQSKIDEEKSWNSRTITDY
|
||||
>NC_001666@ZemaCp090@ndhi@113707@114249@R@1@181 NADH_dehydrogenase_subunit_I
|
||||
MFPMLTGFISYGQQTIRAARYIGQSFIITLSHTNRLPITIHYPYEKSITS
|
||||
ERFRGRIHFEFDKCIACEVCVRVCPIDLPLVDWRFEKDIKRKQLLNYSID
|
||||
FGVCIFCGNCVEYCPTNCLSMTEEYELSTYDRHELNYNQIALSRLPISIM
|
||||
GDYTIQTIRNSPQSKIDEEKSWNSRTITDY
|
||||
>NC_001879@NitaCp087@ndhi@121108@121611@R@1@168 NADH_dehydrogenase_subunit_I
|
||||
MLPMITEFINYGQQTIRAARYIGQGFMITLSHANRLPVTIQYPYEKLITS
|
||||
ERFRGRIHFEFDKCIACEVCVRVCPIDLPVVDWKLETDIRKKRLLNYSID
|
||||
FGICIFCGNCVEYCPTNCLSMTEEYELSTYDRHELNYNQIALGRLPMSVI
|
||||
DDYTIRTISNLPQIKNE
|
||||
>NC_002186@MeviCp101@ndhi@105946@106476@R@1@177 NADH_dehydrogenase_subunit_I
|
||||
MFNFIDNVQTYSKEALQAAKYIGQGFMVTFDHMNRRAITIQYPYEKLIPS
|
||||
ERFRGRIHFEFDKCIACEVCVRVCPINLPVVNWEFQKEKKKKQLQTYSID
|
||||
FGVCIFCGNCVEYCPTNCLSMTEEYELSVYDRHELNYDNFALGRLPTMVN
|
||||
NDSMVKGIKGLGYLPKGIIEGHIDNQ
|
||||
>NC_002202@SpolCp089@ndhi@116981@117493@R@1@171 NADH_dehydrogenase_subunit_I
|
||||
MFPMVTGFINYGQQTIRAARYIGQSFMITLSHANRLPVTIQYPYEKLITS
|
||||
ERFRGRIHFEFDKCIACEVCVRACPIDLPVVDWKLETDIRKKRLLNYSID
|
||||
FGICIFCGNCVEYCPTNCLSMTEEYELSTYDRHELNYNQIALGRLPISIT
|
||||
DDYTIRTILNSPQTKEKACD
|
||||
>NC_002693@OeelhCp101@ndhi@124541@125038@R@1@166 NADH_dehydrogenase_subunit_I
|
||||
MFPMVTGFMNYGQQTVRAARYIGQGFMITLSHANRLPVTIQYPYEKLITS
|
||||
ERFRGRIHFEFDKCIACEVCVRVCPIDLPVVDWKLETGVRKKRLLNYSID
|
||||
FGVCIFCGNCVEYCPTNCLSMTEEYELSTYDRHELNYNQIALGRLPMSVI
|
||||
DDYTIRTILNSAQIK
|
30
detectors/cds/test/test.db/psac.fst
Normal file
30
detectors/cds/test/test.db/psac.fst
Normal file
@ -0,0 +1,30 @@
|
||||
>AC_000188@LyesCp065@psac@118564@118809@R@1@82 photosystem_I_subunit_VII
|
||||
MSHSVKIYDTCIGCTQCVRACPTDVLEMIPWDGCKAKQIASAPRTEDCVG
|
||||
CKRCESACPTDFLSVRVYLWHETTRSMGLAY
|
||||
>NC_000925@PopuCp189@psac@164452@164697@R@1@82 photosystem_I_subunit_VII
|
||||
MAHSVKVYDTCIGCTQCVRACPCDVLEMVPWDGCKAKQIASAPRTEDCIG
|
||||
CKRCETACPTDFLSVRVYLGAETTRSMGLAY
|
||||
>NC_000926@GuthCp035@psac@33008@33253@D@1@82 photosystem_I_subunit_VII
|
||||
MSHSVKVYDTCIGCTQCVRACPCDVLEMVAWDGCKAGQIASAPRTEDCIG
|
||||
CKRCETACPTDFLSVRVYLGGETTRSMGLAY
|
||||
>NC_000927@NeolCp112@psac@138977@139222@D@1@82 photosystem_I_subunit_VII
|
||||
MSHSVKIYDTCIGCTQCVRACPTDVLEMVPWGGCKAAQIASAPRTEDCVG
|
||||
CKRCESACPTDFLSVRVYLGAETTRSMGLAY
|
||||
>NC_000932@ArthCp075@psac@117318@117563@R@1@82 photosystem_I_subunit_VII
|
||||
MSHSVKIYDTCIGCTQCVRACPTDVLEMIPWDGCKAKQIASAPRTEDCVG
|
||||
CKRCESACPTDFLSVRVYLWHETTRSMGLAY
|
||||
>NC_001319@MapoCp080@psac@98289@98534@R@1@82 photosystem_I_subunit_VII
|
||||
MAHAVKIYDTCIGCTQCVRACPTDVLEMIPWDGCKANQIASAPRTEDCVG
|
||||
CKRCESRCPTDFLSVRVYLGNETTRSMGLSY
|
||||
>NC_001320@OrsajCp089@psac@108020@108265@R@1@82 photosystem_I_subunit_VII
|
||||
MSHSVKIYDTCIGCTQCVRACPTDVLEMIPWDGCKAKQIASAPRTEDCVG
|
||||
CKRCESACPTDFLSVRVYLGPETTRSMALSY
|
||||
>NC_001603@EugrCp046@psac@74937@75893@R@3@82 photosystem_I_subunit_VII
|
||||
MSHSVKIYNTCIGCTQCVRACPTDVLEMVPWDGCKAGQIASSPRTEDCVG
|
||||
CKRCESACPTDFLSVRVYLGSETSRSMGLAY
|
||||
>NC_001631@PithCp145@psac@103521@103766@D@1@82 photosystem_I_subunit_VII
|
||||
MAHSVKIYDTCIGCTQCVRACPTDVLEMIPWEGCKAKQIASAPRTEDCAG
|
||||
CKRCESACPTDFLSVRVYLWHETTRSMGLAY
|
||||
>NC_001666@ZemaCp087@psac@111760@112005@R@1@82 photosystem_I_subunit_VII
|
||||
MSHSVKIYDTCIGCTHCVRACPTDVLEMIPWDGCKAKQIASAPRTEDCVG
|
||||
CKRCESACPTDFLSVRVYLGPETTRSMALSY
|
70
detectors/cds/test/test.db/rpl2.fst
Normal file
70
detectors/cds/test/test.db/rpl2.fst
Normal file
@ -0,0 +1,70 @@
|
||||
>AC_000188@LyesCp030@rpl2@86038@87528@R@2@275 ribosomal_protein_L2
|
||||
MAIHLYKTSTPSTRNGTVDSQVKSNPRNNLIYGQRRCGKGRNARGIITAR
|
||||
HRGGGHKRLYRKIDFRRNEKDIYGRIVTIEYDPNRNAYICLIHYGDGEKR
|
||||
YILHPRGAIIGDTIVSGTEVPIKMGNALPLTDMPLGTAIHNIEITLGKGG
|
||||
QLARAAGAVAKLIAKEGKSATLKLPSGEVRLISKNCSATVGQVGNVGVNQ
|
||||
KSLGRAGSKRWLGKRPVVRGVVMNPVDHPHGGGEGRAPIGRKKPTTPWGY
|
||||
PALGRRSRKRNKYSDNLILRRRSK
|
||||
>AC_000188@LyesCp087@rpl2@153816@155306@D@2@275 ribosomal_protein_L2
|
||||
MAIHLYKTSTPSTRNGTVDSQVKSNPRNNLIYGQRRCGKGRNARGIITAR
|
||||
HRGGGHKRLYRKIDFRRNEKDIYGRIVTIEYDPNRNAYICLIHYGDGEKR
|
||||
YILHPRGAIIGDTIVSGTEVPIKMGNALPLTDMPLGTAIHNIEITLGKGG
|
||||
QLARAAGAVAKLIAKEGKSATLKLPSGEVRLISKNCSATVGQVGNVGVNQ
|
||||
KSLGRAGSKRWLGKRPVVRGVVMNPVDHPHGGGEGRAPIGRKKPTTPWGY
|
||||
PALGRRSRKRNKYSDNLILRRRSK
|
||||
>NC_000925@PopuCp126@rpl2@103379@104263@R@1@295 ribosomal_protein_L2
|
||||
MAIRLYRAYTPGTRNRTVSTFSEITTDKPEKSLINKHHFCKGRNNRGVIT
|
||||
CRHKGGGHKQRYRLIDFKRNRHNIIAKVASIEYDPNRNARIALLHYLDGE
|
||||
KRYILHPRSLSVGAIVVSGPMAPIEVGNALPLSTIPLGTAVHNIELRPYC
|
||||
GGQIVRSAGTYAQIVAKEGNFVTVKLPSSEVRMIRKECYATIGQVGNIDA
|
||||
SNITLGKAGRSRWLGKRPTVRGVVMNPVDHPHGGGGEGKSPIGRSRPVTP
|
||||
WGKPALGVKTRNPNKYSNPYVLLVVNKVYLTYNLILKYNVEINT
|
||||
>NC_000926@GuthCp113@rpl2@100540@101367@D@1@276 ribosomal_protein_L2
|
||||
MGIRIYKSYTPGTRNRSSSDFVEITKSKPEKSLLRKKLSCAGRNNRGLIT
|
||||
VRHKGGGHKQRYRLVDFKRNKLDIPAIVASVEYDPNRNARIALLHYQDGE
|
||||
KRYILHPKKLAVGDKIYSGINVPIEIGNAMPLYNVPLGTAVHNVELIPGR
|
||||
GGQIVRSAGTSAQVVAKDGQVVTIKMPSNEVRMIYKNCYATIGEVGNADI
|
||||
KNIRLGKAGRKRWLGIRPSVRGVVMNPCDHPHGGGEGRSPIGRAKPVTPW
|
||||
GKPALGVKTRRQNKYSDFCIIRSRN
|
||||
>NC_000927@NeolCp022@rpl2@15904@16731@R@1@276 ribosomal_protein_L2
|
||||
MGIRFYRAHTPGTRNRSVSDFHEITTSTPTKSLTHANHRARGRNHSGSIT
|
||||
TRWRGGGHKRLYRQIDFRRDKVGVLARVATVEYDPNRSARIALLHYQDGS
|
||||
KRYILHPQGLAIGAEVMSSPEAPISIGNALPLVNMPLGTEVHNIELRPYN
|
||||
GGQLVRAAGAVAQLVAKEGGFGTLRMPSGEVRLVAKDCWATVGQVGHVES
|
||||
INLTLGKAGRSRWLDRRPRVRGSVMNACDHPHGGGEGRCPIGHPGPLTPW
|
||||
GKPALGQRTRARKKYSDALLVRRRK
|
||||
>NC_000932@ArthCp064@rpl2@84337@85843@R@2@275 ribosomal_protein_L2
|
||||
MAIHLYKTSTPSTRNGAVDSQVKSNPRNNLICGQHHCGKGRNARGIITAR
|
||||
HRGGGHKRLYRKIDFRRNAKDIYGRIVTIEYDPNRNAYICLIHYGDGEKR
|
||||
YILHPRGAIIGDTIVSGTEVPIKMGNALPLTDMPLGTAIHNIEITLGRGG
|
||||
QLARAAGAVAKLIAKEGKSATLKLPSGEVRLISKNCSATVGQVGNVGVNQ
|
||||
KSLGRAGSKCWLGKRPVVRGVVMNPVDHPHGGGEGRAPIGRKKPVTPWGY
|
||||
PALGRRTRKRKKYSETLILRRRSK
|
||||
>NC_000932@ArthCp085@rpl2@152806@154312@D@2@275 ribosomal_protein_L2
|
||||
MAIHLYKTSTPSTRNGAVDSQVKSNPRNNLICGQHHCGKGRNARGIITAR
|
||||
HRGGGHKRLYRKIDFRRNAKDIYGRIVTIEYDPNRNAYICLIHYGDGEKR
|
||||
YILHPRGAIIGDTIVSGTEVPIKMGNALPLTDMPLGTAIHNIEITLGRGG
|
||||
QLARAAGAVAKLIAKEGKSATLKLPSGEVRLISKNCSATVGQVGNVGVNQ
|
||||
KSLGRAGSKCWLGKRPVVRGVVMNPVDHPHGGGEGRAPIGRKKPVTPWGY
|
||||
PALGRRTRKRKKYSETLILRRRSK
|
||||
>NC_001319@MapoCp072@rpl2@79137@80514@R@2@278 ribosomal_protein_L2
|
||||
MAIRLYRAYTPGTRNRSVPKFDEIVKCQPQKKLTYNKHIKKGRNNRGIIT
|
||||
SQHRGGGHKRLYRKIDFQRNKKYITGKIKTIEYDPNRNTYICLINYEDGE
|
||||
KRYILYPRGIKLDDTIISSEEAPILIGNTLPLTNMPLGTAIHNIEITPGK
|
||||
GGQLVRAAGTVAKIIAKEGQLVTLRLPSGEIRLISQKCLATIGQIGNVDV
|
||||
NNLRIGKAGSKRWLGKRPKVRGVVMNPIDHPHGGGEGRAPIGRKKPLTPW
|
||||
GHPALGKRSRKNNKYSDTLILRRRKNS
|
||||
>NC_001320@OrsajCp069@rpl2@81180@82664@R@2@274 ribosomal_protein_L2
|
||||
MAKHLYKTPIPSTRKGTIDRQVKSNPRNNLIHGRHRCGKGRNSRGIITAR
|
||||
HRGGGHKRLYRKIDFRRNQKDISGRIVTIEYDPNRNAYICLIHYGDGEKG
|
||||
YILHPRGAIIGDTIVSGTKVPISMGNALPLTDMPLGTAIHNIEITRGRGG
|
||||
QLARAAGAVAKLIAKEGKSATLRLPSGEVRLVSQNCLATVGQVGNVGVNQ
|
||||
KSLGRAGSKCWLGKRPVVRGVVMNPVDHPHGGGEGKAPIGRKKPTTPWGY
|
||||
PALGRRTRKRKKYSDSFILRRRK
|
||||
>NC_001320@OrsajCp109@rpl2@132454@133938@D@2@274 ribosomal_protein_L2
|
||||
MAKHLYKTPIPSTRKGTIDRQVKSNPRNNLIHGRHRCGKGRNSRGIITAR
|
||||
HRGGGHKRLYRKIDFRRNQKDISGRIVTIEYDPNRNAYICLIHYGDGEKG
|
||||
YILHPRGAIIGDTIVSGTKVPISMGNALPLTDMPLGTAIHNIEITRGRGG
|
||||
QLARAAGAVAKLIAKEGKSATLRLPSGEVRLVSQNCLATVGQVGNVGVNQ
|
||||
KSLGRAGSKCWLGKRPVVRGVVMNPVDHPHGGGEGKAPIGRKKPTTPWGY
|
||||
PALGRRTRKRKKYSDSFILRRRK
|
30
detectors/cds/test/test.db/rpl23.fst
Normal file
30
detectors/cds/test/test.db/rpl23.fst
Normal file
@ -0,0 +1,30 @@
|
||||
>AC_000188@LyesCp026@rpl23@87547@87828@R@1@94 ribosomal_protein_L23
|
||||
MDGIKYAVFTDKSIRLLGKNQYTSNVESGSTRTEIKHWVELFFGVKVIAM
|
||||
NSHRLPGKSRRMGPIMGHTMHYRRMIITLQPGYSIPPLRKKRT
|
||||
>AC_000188@LyesCp083@rpl23@153516@153797@D@1@94 ribosomal_protein_L23
|
||||
MDGIKYAVFTDKSIRLLGKNQYTSNVESGSTRTEIKHWVELFFGVKVIAM
|
||||
NSHRLPGKSRRMGPIMGHTMHYRRMIITLQPGYSIPPLRKKRT
|
||||
>NC_000925@PopuCp127@rpl23@104311@104613@R@1@101 ribosomal_protein_L23
|
||||
MDSIDSRDLLDLVKYPIITDKTTKLLEENQYCFAVDPNATKINIKAAIQY
|
||||
IFNVQVTGVNTCHPPKKKRSIGRFIGKRPHYKKAIITLASKDSINLFPET
|
||||
>NC_000926@GuthCp112@rpl23@100237@100524@D@1@96 ribosomal_protein_L23
|
||||
MHALIDLVKYPLITDKATRLLELNQYTFLTSRVATKTDIKNAIEFLFNVK
|
||||
VISINTCLLPLKRKRLGKFVGSKPRYKKAVVTLEKNNTINLFSEN
|
||||
>NC_000927@NeolCp023@rpl23@16871@17149@R@1@93 ribosomal_protein_L23
|
||||
MIIDLVKRPVITEKATRILEKNQYTFDVELSLTKPKIKALIEKAFKVEVV
|
||||
SVNTHRPPRRKRRLGTTQGYLPRYKRAIITLKRGFMIPLTPF
|
||||
>NC_000932@ArthCp065@rpl23@85862@86143@R@1@94 ribosomal_protein_L23
|
||||
MDGIKYAVFTDKSIRLLGKNQYTFNVESGSTRTEIKHWVELFFGVKVIAM
|
||||
NSHRLPGKVKRMGPILGHTMHYRRMIITLQPGYSIPPLRKKRT
|
||||
>NC_000932@ArthCp084@rpl23@152506@152787@D@1@94 ribosomal_protein_L23
|
||||
MDGIKYAVFTDKSIRLLGKNQYTFNVESGSTRTEIKHWVELFFGVKVIAM
|
||||
NSHRLPGKVKRMGPILGHTMHYRRMIITLQPGYSIPPLRKKRT
|
||||
>NC_001319@MapoCp073@rpl23@80550@80825@R@1@92 ribosomal_protein_L23
|
||||
MNQVKYPVLTEKTIRLLEKNQYSFDVNIDSNKTQIKKWIELFFNVKVISV
|
||||
NSHRLPKKKKKIGTTTGYTVRYKRMIIKLQSGYSIPLFSNK
|
||||
>NC_001320@OrsajCp071@rpl23@82683@82964@R@1@94 ribosomal_protein_L23
|
||||
MDGIKYAVFTEKSLRLLGKNQYTFNVESGFTKTEIKHWVELFFGVKVVAV
|
||||
NSHRLPGKGRRMGPILGHTMHYRRMIITLQPGYSIPLLDREKN
|
||||
>NC_001320@OrsajCp108@rpl23@132154@132435@D@1@94 ribosomal_protein_L23
|
||||
MDGIKYAVFTEKSLRLLGKNQYTFNVESGFTKTEIKHWVELFFGVKVVAV
|
||||
NSHRLPGKGRRMGPILGHTMHYRRMIITLQPGYSIPLLDREKN
|
30
detectors/cds/test/test.db/rpl32.fst
Normal file
30
detectors/cds/test/test.db/rpl32.fst
Normal file
@ -0,0 +1,30 @@
|
||||
>AC_000188@LyesCp036@rpl32@114504@114671@D@1@56 ribosomal_protein_L32
|
||||
MAVPKKRTSTSKKRIRKNIWKRKGYWVALKAFSLAKSLSTGNSKSFFVRQ
|
||||
TKINK
|
||||
>NC_000925@PopuCp022@rpl32@16239@16418@D@1@60 ribosomal_protein_L32
|
||||
MAVPKKRTSKAKKNARKANWKNQAKTEAQKALSLAKSVLTGKSNGFVYNT
|
||||
LEVADAIVE
|
||||
>NC_000926@GuthCp026@rpl32@24770@24931@D@1@54 ribosomal_protein_L32
|
||||
MAVPKKRTSRSKTNSRFANWLNKSNLQAQRAISKAKSITNKKNTVNDETI
|
||||
ETE
|
||||
>NC_000927@NeolCp122@rpl32@149220@149447@D@1@76 ribosomal_protein_L32
|
||||
MAVPKKRKSKSRANSQNHVWKREIVKQARRAVSLAKALLGGNTNFLLVSP
|
||||
GPTTPIKPNPKKQTGRRPRSQRRRT
|
||||
>NC_000932@ArthCp072@rpl32@113449@113607@D@1@53 ribosomal_protein_L32
|
||||
MAVPKKRTSISKKRIRKKIWKRKGYWTSLKAFSLGKSLSTGNSKSFFVQQ
|
||||
NK
|
||||
>NC_001319@MapoCp076@rpl32@93886@94095@D@1@70 ribosomal_protein_L32
|
||||
MAVPKKRTSKSKTRIRKAIWKNKANKSALRAFSLAKSILTNRSKSFYYTI
|
||||
NDKLLNSSKSISTSKLDES
|
||||
>NC_001320@OrsajCp086@rpl32@104352@104543@D@1@64 ribosomal_protein_L32
|
||||
MAVPKKRTSMSKKRIRKNLWKKKTYFSIVQSYSLAKSRSFSGVSEHPKPK
|
||||
GFSRQQTNNRVLG
|
||||
>NC_001603@EugrCp047@rpl32@75928@76092@R@1@55 ribosomal_protein_L32
|
||||
MAVPKKKMSKSRRNSRKSNWKKKVLKKVLFALSLGKSFEANTNVNFSFGD
|
||||
KLPQ
|
||||
>NC_001631@PithCp150@rpl32@107399@107611@R@1@71 ribosomal_protein_L32
|
||||
MAVPKKRTSRSKKKIRKNVRKGKAYRAAIKAFSLAKSISTGHSKSFYCIV
|
||||
NDDSSGSSESKLTAIDLDDP
|
||||
>NC_001666@ZemaCp084@rpl32@108127@108306@D@1@60 ribosomal_protein_L32
|
||||
MAVPKKRTSMSKKRIRKNLWKKKTYFSIVQSYSLAKSRSFSRGNEHPKPK
|
||||
GFSGQQANK
|
30
detectors/cds/test/test.db/rps15.fst
Normal file
30
detectors/cds/test/test.db/rps15.fst
Normal file
@ -0,0 +1,30 @@
|
||||
>AC_000188@LyesCp029@rps15@124632@124895@R@1@88 ribosomal_protein_S15
|
||||
MVKNSVISVISQEEKKGSVEFQVFNFTNKIRRLTSHLELHKKDYLSQRGL
|
||||
KKILGKRQRLLAYLAKKNRVRYKELINRLDIRETKTR
|
||||
>NC_000932@ArthCp081@rps15@123296@123562@R@1@89 ribosomal_protein_S15
|
||||
MIKNIVISFEEQKEESRGSVEFQVFSFTNKIRRLTSHLELHRKDYLSQRG
|
||||
LRKILGKRQRLLAYLSKKNRVRYKELINQLNIRELKTR
|
||||
>NC_001319@MapoCp086@rps15@103433@103699@R@1@89 ribosomal_protein_S15
|
||||
MSKNLFMDLSSISEKEKGSVEFQIFRLTNRVVKLTYHFKKHGKDYSSQRG
|
||||
LWKILGKRKRLLAYLFKTNFVSYENLIIQLGIRGLKKN
|
||||
>NC_001320@OrsajCp083@rps15@100818@101090@D@1@91 ribosomal_protein_S15
|
||||
MKKKGGRKIFGFMVKEEKEENWGSVEFQVFSFTNKIRRLASHLELHKKDF
|
||||
SSERGLRRLLGKRQRLLAYLAKKNRVRYKKLISQLDIRER
|
||||
>NC_001320@OrsajCp095@rps15@114028@114300@R@1@91 ribosomal_protein_S15
|
||||
MKKKGGRKIFGFMVKEEKEENWGSVEFQVFSFTNKIRRLASHLELHKKDF
|
||||
SSERGLRRLLGKRQRLLAYLAKKNRVRYKKLISQLDIRER
|
||||
>NC_001631@PithCp139@rps15@101136@101402@D@1@89 ribosomal_protein_S15
|
||||
MINNLSISSSLIPDKQRGSVESQVFYLTNRVLRLTQHLQLHGRDYSSQRG
|
||||
LWKILSKRKQLLVYLSKRDKLRYDDLIGQLGIRGLKTR
|
||||
>NC_001666@ZemaCp082@rps15@104729@104965@D@1@79 ribosomal_protein_S15
|
||||
MVKEEKQENRGSVEFQVFSFTNKIRRLASHLELHKKDFSSERGLRRLLGK
|
||||
RQRLLAYLAKKNRVRYKKLISQLDIREK
|
||||
>NC_001666@ZemaCp093@rps15@117772@118008@R@1@79 ribosomal_protein_S15
|
||||
MVKEEKQENRGSVEFQVFSFTNKIRRLASHLELHKKDFSSERGLRRLLGK
|
||||
RQRLLAYLAKKNRVRYKKLISQLDIREK
|
||||
>NC_001879@NitaCp090@rps15@125230@125493@R@1@88 ribosomal_protein_S15
|
||||
MVKNSVISVISQEEKRGSVEFQVFNFTNKIRRLTSHLELHKKDYLSQRGL
|
||||
KKILGKRQRLLAYLSKKNRVRYKELINQLDIRETKTR
|
||||
>NC_002186@MeviCp104@rps15@109145@109417@R@1@91 ribosomal_protein_S15
|
||||
MLKKKIIKTHANHTNDTGSTQVQVSLLSSRVAQLTKHLNNHKNDYSSQRG
|
||||
LKKLLGQRKRLLKYLFVKDPLGYNNLIIQLGIRPGKSLVN
|
50
detectors/cds/test/test.db/rps7.fst
Normal file
50
detectors/cds/test/test.db/rps7.fst
Normal file
@ -0,0 +1,50 @@
|
||||
>AC_000188@LyesCp008@rps7@98721@99188@R@1@156 ribosomal_protein_S7
|
||||
MSRRGTAEKKTAKSDPIYRNRLVNMLVNRILKHGKKSLAYQIIYRAVKKI
|
||||
QQKTETNPLSVLRQAIRGVTPDITVKARRVGGSTHQVPIEIGSTQGKALA
|
||||
IRWLLAASRKRPGRNMAFKLSSELVDAAKGSGDAIRKKEETHRMAEANRA
|
||||
FAHFR
|
||||
>AC_000188@LyesCp022@rps7@142156@142623@D@1@156 ribosomal_protein_S7
|
||||
MSRRGTAEKKTAKSDPIYRNRLVNMLVNRILKHGKKSLAYQIIYRAVKKI
|
||||
QQKTETNPLSVLRQAIRGVTPDITVKARRVGGSTHQVPIEIGSTQGKALA
|
||||
IRWLLAASRKRPGRNMAFKLSSELVDAAKGSGDAIRKKEETHRMAEANRA
|
||||
FAHFR
|
||||
>NC_000925@PopuCp103@rps7@92209@92679@R@1@157 ribosomal_protein_S7
|
||||
MSRRNTAKKRFASPDPLYKSRLVSMLTVRILKSGKKTLAQRIIYQALDIV
|
||||
KERTETDPLNVLEKAIRNITPLVEVKARRVGGSTYQVPIEVRAYRGTNLA
|
||||
LRWITRFSRERSGKSMSMKLANEIMDAANETGNSIRKREETHRMAEANKA
|
||||
FAHYRY
|
||||
>NC_000926@GuthCp136@rps7@111745@112215@D@1@157 ribosomal_protein_S7
|
||||
MSRRSTTKKKLALPDPIYNSRLVNMLTVRILKEGKKHLAQRIIYNAFDII
|
||||
KQRTGEDAILVFESAIKKVTPLVEVKARRIGGSTYQVPMEVRAFRGTNLA
|
||||
LRWITKYARERAGKSMSMKLANEIMDAANETGSSIRKREEIHRMAEANKA
|
||||
FAHYRF
|
||||
>NC_000927@NeolCp044@rps7@48318@48788@D@1@157 ribosomal_protein_S7
|
||||
MSRRNTAVKRSISSDPVYNSQLIHMMISHILKEGKKALAYRLMYDAMKRI
|
||||
EKTTQQDPILVVERAVRNATPTIEVKARRMGGSIYQVPLEVKPERGTALA
|
||||
LRWILLAARNRTGRDMVAKLSNELMDASNRIGNAVRKRDEMHRMAEANKA
|
||||
FAHIRV
|
||||
>NC_000932@ArthCp069@rps7@97478@97945@R@1@156 ribosomal_protein_S7
|
||||
MSRRGTAEEKTAKSDPIYRNRLVNMLVNRILKHGKKSLAYQIIYRALKKI
|
||||
QQKTETNPLSVLRQAIRGVTPDIAVKARRVGGSTHQVPIEIGSTQGKALA
|
||||
IRWLLGASRKRPGRNMAFKLSSELVDAAKGSGDAIRKKEETHRMAEANRA
|
||||
FAHFR
|
||||
>NC_000932@ArthCp088@rps7@140704@141171@D@1@156 ribosomal_protein_S7
|
||||
MSRRGTAEEKTAKSDPIYRNRLVNMLVNRILKHGKKSLAYQIIYRALKKI
|
||||
QQKTETNPLSVLRQAIRGVTPDIAVKARRVGGSTHQVPIEIGSTQGKALA
|
||||
IRWLLGASRKRPGRNMAFKLSSELVDAAKGSGDAIRKKEETHRMAEANRA
|
||||
FAHFR
|
||||
>NC_001319@MapoCp002@rps7@892@1359@D@1@156 ribosomal_protein_S7
|
||||
MSRKSIAEKQVAKPDPIYRNRLVNMLVNRILKNGKKSLAYRILYKAMKNI
|
||||
KQKTKKNPLFVLRQAVRKVTPNVTVKARRIDGSTYQVPLEIKSTQGKALA
|
||||
IRWLLGASRKRSGQNMAFKLSYELIDAARDNGIAIRKKEETHKMAEANRA
|
||||
FAHFR
|
||||
>NC_001320@OrsajCp076@rps7@87944@88414@R@1@157 ribosomal_protein_S7
|
||||
MSRRGTAEKRTAKSDPIFRNRLVNMVVNRIMKDGKKSLAYQILYRAVKKI
|
||||
QQKTETNPLLVLRQAIRRVTPNIGVKTRRNKKGSTRKVPIEIGSKQGRAL
|
||||
AIRWLLEASQKRPGRNMAFKLSSELVDAAKGGGGAIRKKEATHRMAEANR
|
||||
ALAHFR
|
||||
>NC_001320@OrsajCp103@rps7@126704@127174@D@1@157 ribosomal_protein_S7
|
||||
MSRRGTAEKRTAKSDPIFRNRLVNMVVNRIMKDGKKSLAYQILYRAVKKI
|
||||
QQKTETNPLLVLRQAIRRVTPNIGVKTRRNKKGSTRKVPIEIGSKQGRAL
|
||||
AIRWLLEASQKRPGRNMAFKLSSELVDAAKGGGGAIRKKEATHRMAEANR
|
||||
ALAHFR
|
353
detectors/cds/test/test.db/ycf1.fst
Normal file
353
detectors/cds/test/test.db/ycf1.fst
Normal file
@ -0,0 +1,353 @@
|
||||
>AC_000188@LyesCp019@ycf1@125297@130972@R@1@1892 ycf1_protein
|
||||
MIFQSFLLGNLVSLCMKIINSVVVVGLYYGFLTTFSIGPSYLFLLRALVM
|
||||
EEGTEKKVSATTGFITGQLMMFISIYYAPLHLALGRPHTITVLALPYLLF
|
||||
HFFWNNHKHFFDYGSTTRNSMRNLSIQCVFLNNLIFQLFNHFILPSSMLA
|
||||
RLVNIYLFRCNNKILFVTSGFVGWLIGHILFMKWLGLVLVWIRQNHSIRS
|
||||
NKYIRSNKYLVLELRNSMARIFSILLFITCVYYLGRIPSPILTKKLKEAS
|
||||
KTEERVESEEERDVEIETASEMKGTKQEQEGSTEEDPYPSPSLFSEEGWD
|
||||
PDKIDETEEIRVNGKDKIKDKFHSHLTETGYNNINTSNSPIYDYQDSYLN
|
||||
NNNTGNLENCKLQLLDKKNENQEQDLFWFQKPLVSLLFDYNRWNRPFRYI
|
||||
KNNRFEQAVRTEMSQYFFDTCKSDGKQKISFTYPPSLSTFWKMIKRKIPL
|
||||
LSLQKTLPNELDTQWVSTNKEKSNNLNKEFLNRLEILDKESLSLDILETR
|
||||
TRFCNDDTKKEYVPKMYDPLLNGLYRGTIKKGVSSSIINNTLLENWEKRV
|
||||
RLNRIHTIFLPNIDYQEFEQKAYTIDKKPLSTEIDEFLTLINELGNEAKS
|
||||
SLNLKGLSLFSDQEQRRANSEKRTKFVKFVFNALDPNETKSGKKSIGIKE
|
||||
ISKKVPRWSHKLITELDQQMGEFKDRASMDHQLRSRKAKRVVIFTDNKAT
|
||||
KDAEEEVALISYSQQSDFRRGIITGSMRAQRRKTFISKLFQANVHSPLFV
|
||||
DRITPLRLFSFDISELIKPILKNWTDKEGEFKILESREEQTKREEKKEKD
|
||||
KKEDNKRKEQARIAIEEAWDTIPLAQIIRGYMLITQSILRKYILLPALII
|
||||
AKNIGRMLFLQLPEWSEDLQEWNREMQIKCTYNGVQLSETEFPKNWLRDG
|
||||
IQIKILFPFCLKPWHISKLYPSRRELMKKQKQKDDFCFLTVWGMEAELPF
|
||||
GSPRKRPSFFEPIFKELEKKIGKFKKKYFLTLKILKGKTKLFRKVSKETT
|
||||
KLFIKSIGFLKKIKKELSKVNLIVLFRFKEISESNETKKEKDYLISNQII
|
||||
NESFRQIESGNWPNSSLIETKMKDLTNRTSTIKNKIERITKEKKKVTPEI
|
||||
DINPNKTNNIKKFESPKKIFQILKSRNTRVIWKFHYFLKLFIQRLYINLF
|
||||
LSIINIPRITTQLFLKSTNKLIEKFISNNEINQEKINNKKKIHFMFISTI
|
||||
KKSLYNISKKNSHILCDLSYLSQAYVFYKLSQTQVINFSKFRSVLQYNTT
|
||||
SCFLKTKIKDYFKTLGIFHSELKHKKLQSYRINQWKNWLRWHYQYDLSQI
|
||||
RWSRLMPKKWRTRVNQSCMAQNKNRNLNKWNSYEKDQLLHYKKENDSELY
|
||||
SLSNEKDNFKKCYGYGLLAYKSINYENKSDSFFSRLPFEVQVKKNLEISY
|
||||
NSNTSKHNFVDMPGNLHINNYLRKGNILDRERNLDRKYFDWKIIHFSLRQ
|
||||
KGDIEAWVKIDTNSNPNTKIGINNYQIIDKIEKKGVFYLTTHQNPEKTQK
|
||||
NSKKFFFDWMGMNEKIFNRPILNLEFWFFPEFVLLYNVYKIKPWIIPSKF
|
||||
LLFNLNTNKNVSQNKNQNFFLPSNKKIKIKNRSQEAKEPPSQRERGSDIE
|
||||
NKGNLSPVFSKHQTDLEKDYVESDTKKGKNKKQYKSNTEAELDLFLKRYL
|
||||
LFQLRWNGALNQRMFENIKVYCLLLRLINPTKITISSIQRREMSLDIMLI
|
||||
QANLPLTDLMKKGVLIIEPIRLSVKDNGQFIMYQTIGISLIHKSKHQTNQ
|
||||
RYREQRYVDKKNFDEFILQPQTQRINTEKTHFGLLVPENILWSRRRRELR
|
||||
IRSFFNSWNWNVVDRNSVFCNETNVKNWSQFLGERKPLYKDKNELIKFKF
|
||||
FFWPNYRLEDLACMNRYWFDTNNGSRFSILRIHMYPRLKIN
|
||||
>NC_000932@ArthCp070@ycf1@109405@110436@D@1@344 hypothetical_protein
|
||||
MMVFQSFILGNLVSLCMKIINSVVVVGLYYGFLTTFSIGPSYLFLLRARV
|
||||
MDEGEEGTEKKVSATTGFIAGQLMMFISIYYAPLHLALGRPHTITVLALP
|
||||
YLLFHFFWNNHKHFFDYGSTTRNEMRNLRIQCVFLNNLIFQLFNHFILPS
|
||||
SMLARLVNIYMFRCNNKMLFVTSSFVGWLIGHILFMKWVGLVLVWIQQNN
|
||||
SIRSNVVIRSNKYKFLVSELRNSMARIFSILLFITCVYYLGRIPSPIFTK
|
||||
KLKGTSETGGTKQDQEVSTEEAPFPSLFSEEGEDLDKIDEMEEIRVNGKD
|
||||
KINKDDEFHVRTYYNYKTVSENLYGNKENSNLEFFKIKKKEDH
|
||||
>NC_000932@Arthcp087@ycf1@123884@129244@R@1@1787 Ycf1
|
||||
MMVFQSFILGNLVSLCMKIINSVVVVGLYYGFLTTFSIGPSYLFLLRARV
|
||||
MDEGEEGTEKKVSATTGFIAGQLMMFISIYYAPLHLALGRPHTITVLALP
|
||||
YLLFHFFWNNHKHFFDYGSTTRNEMRNLRIQCVFLNNLIFQLFNHFILPS
|
||||
SMLARLVNIYMFRCNNKMLFVTSSFVGWLIGHILFMKWVGLVLVWIQQNN
|
||||
SIRSNVVIRSNKYKFLVSELRNSMARIFSILLFITCVYYLGRIPSPIFTK
|
||||
KLKGTSETGGTKQDQEVSTEEAPFPSLFSEEGEDLDKIDEMEEIRVNGKD
|
||||
KINKDDEFHVRTYYNYKTVSENLYGNKENSNLEFFKIKKKEDHFLWFEKP
|
||||
FVTLVFDYKRWNRPNRYIKNDKIENIVRNEMSQYFFYTCQSDGKERISFT
|
||||
YPPNLSTFFEMIQKRIPSFTKEKKTFDQVSTYWSLIHEEKRENLKKEFLN
|
||||
RIEALDKEWSVENILEKTTRFCYNEAKKEYLPKIYDPFLHGISRGRIKKL
|
||||
PPFQIITETYRKNNLGGSWINKIHGLLLKINYKKFEQTIEKFNRKSLSIE
|
||||
KKLSFFSEPQQEEKINSEEEIKTFKFLFDIVRTDSNDQTLIKNFMDFPEI
|
||||
NKKVPRWSYKLISELEELEGENEENVPMEPGIRSRKAKRVVVFTDKEPHG
|
||||
EIYTNLKDNQNSDQNDEMALIRYSQQSDFRREIIKGSMRSQRRKTVIWEF
|
||||
FQAKVHSPLFFDRIDKLFFFSFDIWGLKKKIIKNFIWKKKIDKKEEEQSK
|
||||
REETRRIEIAETWDSFLFAQIIRGSLLVTQSILRKYIILPLLIIIKNSVR
|
||||
MLLFQFPEWSQDLKDWKREMHVKCTYNGVQLSETEFPRNWLTDGIQIKIL
|
||||
FPFYLKPWHKSKFQASQKARLKKTKDKGEKNDFCFLTVWGMETELPFGSA
|
||||
QRKPSFFEPISKELKKRIKKLKKKSFVVLKIFKERAPIFLKVAKETKNWI
|
||||
LKNFIFIKGISKRNLIPLFGPREIYELNEPKKDSIISNQMIHELSVQNKS
|
||||
LEWTNSSLSEKKIKNLIDRKKTIRNQIEEISKEKQNLTNSCTKLRYDSKI
|
||||
IESSKKIWQTFKRKNTRLIRKSIFFFKFCIEQMSIAIFLGIINIPRITTQ
|
||||
LFFESTKKILDKYIYKNEENGEKKKNTLYFISTIKNLISNKKKMSYDLCS
|
||||
LSQAYVFYKLSQIKVSNFCKLKAVLEYNICITSFFVKNKIKVFFQEHGIF
|
||||
HYELKNKTFLNSEVNQWKNWLRSQYQYNLPQISWARLVTQNWKNKINKDS
|
||||
LVLNPSLTKEDSYEKKKFDNYKKQKFFEADSLLNPKHNVKKDSIYNLFCY
|
||||
KSIHSTEKNFDMSIGIALDNCLVSSFLEKYNIRGMGEIRHRKYLDWRILN
|
||||
FWFTKKVTIEPWVDTKSKKKYINTKVQNYQKIDKITQTDLANKKRNFFDW
|
||||
MGMNEEILNQRITNFEFFFFPEFFLFSSTYKMKPWVIPIKLLLLNFNENI
|
||||
NVNKKIIRKKKGFIPSNEKESLRFYNLNKEEKESAGQVELESDKETKRNP
|
||||
EAARLNQEKNIEENFAESTIKKRKNKKQYKSNTEAELDLFLTRYSRFQLR
|
||||
WNCFFNQKILNNVKVYCLLVRLNNPNEIAVSSIERGEMSLDILMIEKNFT
|
||||
FAKLMKKGILIIEPVRLSVQNDGQLIIYRTIGISLVHKNKHKISKRYKKK
|
||||
SYINKKFFEKSITKYQNKTVNKKKNNYDFFVPEKILSPKRRREFRILICF
|
||||
NLKKKNARDTNSRFDKNIQNLTTVLHKKKDLDLDKDKNNLINLKSFLWPN
|
||||
FKLEDLACMNRYWFNTTNGNHFSMIRIRMYTRFPIP
|
||||
>NC_001879@NitaCp151@ycf1@125891@131599@R@1@1903 Ycf1
|
||||
MMIFQSFLLGNLVSLCMKIINSVVVVGLYYGFLTTFSIGPSYLFLLRALV
|
||||
MEEGTEKKVSATTGFITGQLMMFISIYYAPLHLALGRPHTITVLALPYLL
|
||||
FHFFWNNHKHFFDYGSTTRNSMRNLSIQCVFLNNLIFQLFNHFILPSSML
|
||||
ARLVNIYLFRCNSKILFVTSGFVGWLIGHILFMKWLGLVLVWIRQNHSIR
|
||||
SNKYIRSNKYLVLELRNSMARIFSILLFITCVYYLGRIPSPILTKKLKEA
|
||||
SKTEERVESEEERDVEIETASEMKGTKQEQEGSTEEDPYPSPSLFSEERW
|
||||
DPDKIDETEEIRVNGKDKIKDKFHSHLTETGYNNINTSNSPIYDYEDSYL
|
||||
NNNNTGNTEIFKLQLLDKKNENKDLFWFQQPLVSLLFDYNRWNRPFRYIK
|
||||
NNRFEQAIRTEMSQYFFNTCKSDGKQRISFTYPPSLSTFWKMIKRRIPLL
|
||||
SLQKTLPNELDNQWISTNKEKSNNLNKEFLNRLEVLDKESFSLDILETRT
|
||||
RLCNDDTKKEYVPKMYDPLLNGPYRGTIKKKFSPSIINNTSLENLKERVR
|
||||
INRIHTIFLPNTDYQELEQKVDTVAKKPLSTEIDEFLTLINEFGNEPKSS
|
||||
LNLKDLSLFSDQEQGRVNSEKRTKFVKFVFNAIAPNGTTSEKKSIGIKEI
|
||||
SKKIPRWSHKLITELEQQSGDYQEGVPLDHQIRSRKAKRVVIFTANNQNN
|
||||
DPDTKDTDTADQDQTKEVALIRYSQQPDFRRGIIKGSMRAQRRKTVIWKL
|
||||
FQANVHSPLFLDRITPPFLFSFDISGLIKPIFRNWSGKEGEFKILESREE
|
||||
QTKREEKKEKDKKGENKRKEKARIEIAEAWDTIPFAQIIRGYMLITQSIL
|
||||
RKYIVLPSLIIAKNLGRMLVLQLPEWSEDLQEWNREMHIKCTYNGVQLSE
|
||||
TEFPKNWLKDGIQIKILFPFCLKPWHISKLYSSRGELMKKKKQKDDFCFL
|
||||
TVWGMEAELPFGSPRKRPSFFEPIFKELEKKIGKFKKKYFITLKVFKGKI
|
||||
KLFRRISKETKKWLIKSSLFIKKMKKELSKVNPIVLFRLKEIDESNETKK
|
||||
EKDSLMSNQIINESFSQIESGNWPNSSLIESKMKDLTDRTSTIKNQIERI
|
||||
TKEKKKVTPEIDISPNKTNNIKKFESPKKIFQILKRRNTRLIWKFHYFLK
|
||||
LFIQRLYIDLFLSIINIPRINTQLFLESTNKLIDKYISNNEINQEKINNQ
|
||||
KKIHFISTIKKSLYNISKKNSHIFFDLSYLSQAYVFYKLSQPQVINLSKL
|
||||
RSVLQYNRTSFFLKTKIKDYFRTLGIFHSELKHKKLQSYRINQWKNWLRR
|
||||
HYQYDLSQIRWSRLMPQKWRNRVNQGCMAQNRNLNKWNSYEKDQLIHYKK
|
||||
ENDSELYSLANQKDNFQKCYRYDLLAYKSINYEKKNDSFISRLPFQVNKN
|
||||
LEISSNSNTSKHNLFDMLGNLHINNYLRKGNILYIERNLDRKYFDWKIIH
|
||||
FSLRQKEDIEAWVKIDTNSNPNTKIGINNYQIIDKIDKKGFFYLTIHQNP
|
||||
ENNQKNSKKAFFDWMGMNEKILNRPILNLEFWFFPEFVPLYNVYKIKPWI
|
||||
IPSKLLLLNLNTNENVSQNKNINKNQKQNFFLRSNKKIKNRIQEAKEPAS
|
||||
QGEKERGSDIENKGNLGPVLSKHQNALKKDYAESDTKKGKKKKQYKSNTE
|
||||
AELDLFLKRYLLFQLRWNDALNQRMIENIKVYCLLLRLINPSKIAISSIQ
|
||||
RREMSLDIMLIQKNLTLTELMKKGILIIEPIRLSVKNNGQFIMYQTIGIS
|
||||
LVHKSKHQTNQRYPEQRYVDKKNFDEFILQPQTQRINTDKNHFDLLVPEN
|
||||
ILWSRRRRELRIRSLFNSLNWNGIDRNSVFCNENNVKNWSQFLDERKPLY
|
||||
KEKNELIKLKFFLWPNYRLEDLACMNRYWFDTNNGSRFSILRIHMYPQLK
|
||||
IN
|
||||
>NC_002202@SpolCp093@ycf1@121596@127097@R@1@1834 ycf1_protein
|
||||
MIFQSFLLGNLVSLCMKIINSVVVVGLYYGFLTTFSIGPSYLFLLRAQVM
|
||||
EEGEEGTEKKVSGTTGFIMGQLMMFISIYYTPLHLALGRPHTITVLALPY
|
||||
LLFHFFWNNHKHFFDYGSTSRNSMRNLSIQCVFLNNLIFQLFNYFILPSS
|
||||
MLARLVNIYMFRCNNKMLFVTSSFVGWLIGHILFMKWVGLVLVWIQQNNS
|
||||
IRSNKYLVSELRNSMARIFSILFFITCVYYLGRMPSPIFTNKLKQMLETN
|
||||
EIEEETNLEIEKTSETKETKQEEEGFTEEDPSPSLFSEEKEDPDKIDETE
|
||||
KIRVNGKDKTKDEFHLKEACYKNSPTSYSGNQDISKLEILKKEKKILFWF
|
||||
QKPLIFLLFDYKRWNRPMRYIKNNRFENAVRNEMSQYFFYTCQNDGKQRI
|
||||
SFTYPPSLSIFWEMIQRKISLATTEKFLYDDELYNYWIYTNEQKKNSLSN
|
||||
EFANRITVLDKGLFYIDVLDKKTRLCKSKNEYLQKDHDPLLNGSYRGIIK
|
||||
KTLLPFINNDETTVKKLIDEIFINKIHSVLGNCNNYQEFEYKKDPFKKNP
|
||||
ISSKIRHFVTLMSQFDGESTFNQKGISLLSEHKQICSEDPEIFFKFLVDT
|
||||
IIADSFTQTIPKESIGIKEISKKVPHWSYQLIDESEQEEMENEKQVSWPH
|
||||
QIRSRSGKEVVFFTDKQENTDNPTPNTADISEQADEVVLTRYPQESDFRR
|
||||
DIIKGSMRSQRRKIVIWELFQANIHSPLFLDRTNKSSFFSITFSRLIKRI
|
||||
FKNYMGKNPELDISNYKEEELKKKEKAKEHKKDKEKKQEQIRLDIAETWD
|
||||
TIPGAQIIRSLILLTQSILRKYILLPLLITGKNIGRILLFQLPEWSDDFK
|
||||
EWTSEMHIKCTYNGVQLSEKEFPKNWLTDGMQIKILSPFCLKPWHKSMIR
|
||||
PYHQDKKKKEQNQIDAFCFLTVVGLETDIPFGPPRKRPSFFQPIFKQLDK
|
||||
KIEKLIKGNFQVRKRLKEKILFFLKLQNETNNWIIEIFPFFKKIIRKMST
|
||||
VNTIGVFGLKEASSEIKSEKDSRIKNHMIHESSVQIRFLNQTNSSVTEKK
|
||||
MKDLANRTRIIKNKIEKISNDKLKMSPKKTRYGTKNLGQILKRRNARLIR
|
||||
NSNYILKFFRERIYGDIFLYIINIPKINTQLFLESTKNGIDKSIYNNESI
|
||||
TKTNKNRIQFISTINKKFLPFLSTSKNNSKIISDFSFLSQAYVFYKLSQA
|
||||
KILNLYKLRLVLQYRGISLFLKNEIKDFFGTQGITNSELKTKKLPNSGMN
|
||||
QWKNWLKLKNNYQYNLSQLKWSRLVPQKWRNRVTEHCEVENTNLYQNEEL
|
||||
INSKKHLLLLPDQKYNFQKNYRYDVLSYKFFNYKNKNDSYRYSYGLPFQV
|
||||
NKNQEFSYTYNYNINNNKFIDMWWNIPISNFSYLEKTKIMDIDKNIDRKY
|
||||
LDFKILDFSLRNKIDIEDWIDISTSINENTKTEPRNYQIVEKINKKSLVY
|
||||
STIYQEIKQSDQKNKLFDWMGMNEKILSRPISNLEFWFFSEFFSFYNAYK
|
||||
MKPWVIPINLLFSNSNVSEKFSKNKSINRKKKTNPFIPSNEKKSFELENR
|
||||
NQDEKELVSKEDLGSYVQENYEKDIEEDYISFIDIKKPIKQKQPKSVIEA
|
||||
EFDLFLKRYLLFQLKWADSLNEKLMDNIQVYCLVLRLINPIEILISSIER
|
||||
KELSMDIMLDRKDFNCPNWKQKRVLIIEPIRLSIRGDGQFLLYQTIGISL
|
||||
VHKSKHQNNQKRYSENVDKKFLGERNKNNFDLLAPENLLSPRRRRELRIL
|
||||
LCLNSRNNNGVNTNPMENRVKNCNQFFDEKKDLDRDKNTLRNLKFFLWPN
|
||||
YRLEDLACMNRFWFDTNNGSRFSILRIHMYPQF
|
||||
>NC_002693@OeelhCp105@ycf1@129224@136615@R@1@2464 Ycf1
|
||||
MVNLVYVCMKINNSVVMVGLYYGFISAFSIGSSYLFLLRPRFLNDDPDAI
|
||||
EKKASETAGFFTGQLLIFISILYGPLHLALGRPHTILLLLAPYFFFHYLF
|
||||
SNSGQWPSQRFAFPLLTKSMRNRRFQLVFLNNLLFQLFSLSLLGRPMLTR
|
||||
LSYIYIFRCNNKMLFVLSSFVGWLIGHILVLKWAGLVFVWLLQVIRSKTM
|
||||
KYITCNVLIPATKYIIEKWRNSFVAGLIREILAMKQVESALVRIKNSKLL
|
||||
DDARWWIRGSSLISGLKINIRFYARLILRGFENVYVGAKFRQDMEHLFSI
|
||||
ILFAIFLLYLDQTPLLYADPADKKLQLQRKLSNETQAARAEKKLEERLTK
|
||||
KFEAQRRAQRAAQRQALQEFKQGVVESYLAKQVAKDANQIQAQKDEKQIQ
|
||||
AEQKARRIRAEQVVQYTFWLIEAQRREMEIEAARAMQEAYKGMLAAQEGY
|
||||
VEEGVQEKQEGFPEELISPSPIFHSEEREENPKLLILKEKISILKKKISI
|
||||
LKKKISILKEKNDLFSFEIPIITSLFDPQKPLRPLRYIKTCAGVEKAVKN
|
||||
EMSQYFFYACRSDGKQRICFTYPPSLATFWEMIQRKMASRFPRIYAKAKW
|
||||
RALRWSAPGSYRQWISRNKKKKNSLSTEFQNRIKTLDKKKSLLNVLARRK
|
||||
RSSLLNVLARRKRSSLQNVLETRKRLCNYKTNKTKKEYLPEKEYLPEIAD
|
||||
PFLTGALRGKSDPEVDDGGRKTSDLIKVVFLKNNITMATLRNKNDDDLRE
|
||||
QKNAIALLSRMKNPVNKLHLLFVNERDYPFVKTLVNRINGPAVPKKKKKI
|
||||
SKSKQKNVKSKQKNVKSKQKNVKSKQKNVKSKQNEIKRKVNEIKRKVNEI
|
||||
KRKQNESYPRGVKFGATPKTEINPHGIRFDAATIEKYSFATGYSYSPPSF
|
||||
DDILFHAFVTEPQRNKKAVIELEEEINKKVPRWSYQLIDELEQLEGAEGE
|
||||
TQFSDHEIRILPFKRVAVFTEKDSKKRKPLIDEQGNFVRHRKTYAIRFLG
|
||||
HMSDFRRGLIKGSARQDRRKAYVCRTTQVNARSPLFALGPRTFLDGLVNL
|
||||
AVQVKFFYETRIKGEKIVDDDDDNEKDEFKVMIPDTKSIVAETREMLKQA
|
||||
GAEDGQSYEDVEDDIRIENVTEMWENIDYGQVIRTFILLLHIFLRKKVVF
|
||||
PAFIIGKNIARMLLLQATEWKIDFARLKRERYAICTYNGMKVSEKIAFDQ
|
||||
FPPDWADDGIQILVTNPFYLKPWYRSKTRSIQKDPKKEKDPKKEKGPKKE
|
||||
PWYRFKTRFIQKDPKKEKGPKKEKGPKKEPWYRRFFFQKDPKKEKGPKKG
|
||||
KAQFEGDRGVRFLTSFGILTDRPFGDLITPDWGVFFNPIRNELKKKIRQF
|
||||
EKKHSIILSKRFRNVLKKTKKWFIKSFLFLKRARLKRHPIELSGGRETPE
|
||||
FTRSQKDIDNLKNEQDFRMSRNPRISESLLQGPVRALKDDSLPEEKVADP
|
||||
EKEPSDLDNELRAVWDEIDKVTKERKKIVFTPKPDSPDKLVQAKKNILKK
|
||||
LERIKSRRHKFYFLRIRKSYYVLLFFIKRISRNIKRIYLNPLERAISIRK
|
||||
IHPQRFFEFSKKMIEKSIGIGKTETNKETVYKTKKKKKKKNPFISIFKES
|
||||
LYDKDIRISENDIKLGDTWNGYKYKRKKATDTSDLASMSQAYVFYKLYQT
|
||||
QQTQLIHLDKLRYVLQYDGTSRFLKKELKDYFEAQELFHSKLKHKNSLNS
|
||||
GKNQWKNWLKAQHQYSVSPIIWNSLSPQKWRTKVNQERMDENTDLNKRYS
|
||||
NEKRKQFFEANSLDDEENVVETYLGQRAGDIKNSIKSYSYDLFSYQSINS
|
||||
EDKYVCINNKQKNSYNYNRRKVNLVDSPEGIALSNQFLVQNDLLDLYTFP
|
||||
DRKYVPWRLFPGSLIGGNDKDKDRFVKMWTATNSGNAVKYWTAANGNTSI
|
||||
KPGVFWTFQNSQRTKKQNPLFDWRGMNTELPNRCISDLKGWFFFSELLKL
|
||||
DLRYQVKPWILSKNLLFENLIFENQEENPNLIQNPIEDGRKNVIQNENEN
|
||||
DPIEDGRQNVIQNENENAIQNLIDFFLEKKNSPKDTNQELHAQAKARIWD
|
||||
ALVASLKQKREQKERKNKRIAQLIEKKKQKEIEKQKRKIEKQKRKKEKIE
|
||||
NAKKKIENEKKKIETEEEKIEKEKRKKERKKEKLKKKVAKNIEKLKNKVA
|
||||
KNVAKNIEKLKKQRAKNIARMEEEDKKARKKRKRKVQVQENKIPYTAFGS
|
||||
DKWQRPIAEYPKSGDIRNFQVILPEDDDEDDEEDRLDELKLNAYELSRIQ
|
||||
KITDEKRMKRNLLSSIKRERLKMEFSTRNNSLATIMLTHGIFSIEPLRIS
|
||||
RQNQDASFLIYQLIKISLVEQLDPYDHNDSFELTEKYRARRNFFMPKTNA
|
||||
ETMHKSDSDLFVPETILSTKRRRELRILISFYSRRGKRKNRIYKNPVFWK
|
||||
YVKNCGEVVDNSEKKKKKLIKSFLWPNYRLEDLACMNRYWFNAQNGSRFS
|
||||
MLRIRMYPRLKIR
|
||||
>NC_002694@LocoCp080@ycf1@120497@125878@R@1@1794 hypothetical_protein
|
||||
MIFQSFILDNLVSLCLKIINSVIVVGLYYGFMTTFSTGPSYLFLLRAHVM
|
||||
EEGTEKKISATTGFITGQLVMFISIYYAPLHIALDRPHTITVITLPYLLL
|
||||
YFLGNNQKNFLNYVYKNQNSIRHFSIQRIFFQNLFFQLLNPFFLPSSILM
|
||||
RLANIYIFQSNNKVLFLTSSFVGWLIGHVFFMKWIGLMLVWIQEKNNSIK
|
||||
STVAIRSNKGVLAKFRKSMFQIFLIFFFITCLYYLGRIPPIYFFTPKMSE
|
||||
IKERGEIEKREGEIDIEINSQRAGSKQEQKITAEEKLSPYLFSKKNNNLD
|
||||
KIKEENDIFGFQKPLVTILFDYNRWNRPLRYIKNDRFENVVRNEISQFFF
|
||||
FTCQSDGKERISFTYPPNLSTFQKMMEMKISLFTRDIISYEELSNSWRST
|
||||
NEEKKKKLTNEFLNRVEVLDKESLPVDIFENRIRLCNDEKKQKYLTKEYD
|
||||
PFLNGPCRGQIQKWFSPPIQKETYKKNSLFINKIHGILFSNTNNYPKFEQ
|
||||
KKNIFDRKSLLTDINFFFNLITKFSRKSVSSLNFEGLYLFPKDNKGKMSS
|
||||
KKKKFLFDTIRPDLNDNKIVNLQKCIGINEIVKKLPRWSYNLIDELEQLE
|
||||
GKKKVEYHQIRSRKAKRVVLLTKNSQNDDNYDETTDTDNTEKKKELALIR
|
||||
YSQQPDFRRDIIKGSIRAQRRKTVTCKLFQRSVDSPLFLEKMEKTSFFCF
|
||||
DILDSSKIFFMFKNWIRKKKELKNSDYTDEKAKESQKKEEEKIKKNEKEE
|
||||
KRRIEIGEAWDSIIFAQVIRGCLLITQSILRKYILLPSLIITKNIVRILL
|
||||
FQFPEWSEDFRDWQREMYIKCTYNGVQLSETEFPKKWLTDGIQIKILFPF
|
||||
RLKPWHRSKLRFTEKKKDPLKNKKVKKKNFCFLTIFGMEVELPFSGYPRN
|
||||
RFSFFDPILKELKKKMKKLKNNFFLILKIVNERTKNFITTLKETSKRIIQ
|
||||
SILKKVLFLNKKIKKLYNYLFLFRFKKIDELNQNKKNFPITKNNPIIYES
|
||||
TILIQAINKTNCSLTEKKIKAINAKTKKIIKKIERMTKENKGGFLISEIN
|
||||
SNSKKTSSNTKGLELEKKILQILQRRNVQLTHKLYSFFKFLLNFMKKVYT
|
||||
DIFLCIVSVPRINVQFFLESTKKIINQSIYNKKTNEEIIDKTNQSIIHFI
|
||||
SIINKSSNTKNTNSAANSYEVSALSQAYVFFKISQIQVLNVYKYKFKYVF
|
||||
DYDGRSFFIKDEIKDYFFGIQGIIHSKLRHKNSPVSLKNQWTNWLKVHYQ
|
||||
YDLSQNRWSRLVQKNLKNRINKHRLDQNKDLTKCDSYKKTQLIVSKNKKQ
|
||||
QVDFLVNLLIQKKIKKQSRYDLLLYKFINYAEKKELSIYGYRSPFQANKK
|
||||
RAISYDYNTQKKEFFDRMDDISIKNYIAEDAIRYIEQNRDRKYFDWVVMD
|
||||
VKIQNNSISNLQFSFFFKFLRFYDAYRNKPWIIPIKFLFLHFSVNQNFNK
|
||||
IKNIIEKKRRIDIFKPWKKKKILEVELETPNRAKKEYTSRVDLNKPSLSN
|
||||
QEKDIEEDYGESDSKKGGKDKNKKKYKNKIEAEVNLLLRKYLNFHLNWKG
|
||||
SLNKRVINNVKVYCLLIRLKNIKQIAISSIQRGELSLDIMMIQNEKDSTL
|
||||
TGFRKKKEFIEKGIFIIEPVRLSRKNNEQFFMYETARLLLIHKSKRQINQ
|
||||
RNPEKSDLDKQIFYKNIPPKRDQRITQNKEKKHYALVVIENILSARRRRE
|
||||
LRILICFNPRSINSMPRKTIFDNENKINNCCQVFAKNKDLDKEKKILMNL
|
||||
KLILWPNYRLEDLACINRYWFDTYNGSRFSIVRIHMYPRLKMR
|
||||
>NC_003119@MetrCp012@ycf1@13390@18672@R@1@1761 hypothetical_chloroplast_RF1
|
||||
MIYQLFILDRLVGLWLKILNSAIVMGLYYGFLTTFSIGPSYLFLIRARVM
|
||||
DKGTETEIAATTGFITGQLMMFISIYYAPLHLALIRPHTITVLTLPYLFF
|
||||
NFVYKNNKHYYSADSHFYLDLDYGYKNPNSIRKFRIYKVFFNNLFFQLSN
|
||||
PLLFPSSILLRLMNIYLFRSNNKLLFLTSSFLGWLIGHIFLMKCIGLILL
|
||||
VWSKQKNSIKSKLTMRFDKYILLQLRNYVGQIFVVFSFVIVVHYLGRTPV
|
||||
PYLYTYTDEILEYDEKQKDEINGETEIDVEIDSEQEQNGSIEDEEDILSY
|
||||
LFPKKDKTLENIEQDNNLLALEKPLVTTLFDYRKWNRPLRYIKNDHFERV
|
||||
VRDENSQFFFHICQSDGKERISFTYPPDLSSFLKIMEKKMDLFTKDKISY
|
||||
NDNELSNYWSSNNKEKRKKLSNEFFKRAKVLDKKYKKYKKFIPVDVFENR
|
||||
IRLSNDKRKIKYLTKIYDPFLNGPFRGQSFSPSIQNETYTTNSILINKIH
|
||||
GLLLINSNYPEFDNSNYPEFDNSNYPEFEQKIDQFDRKFLLTEIGFFFNL
|
||||
ISQFSEKSVSSFNFDGLYLFPEHEQVKIYSEEKKRKKKFLFEAIRTDQNN
|
||||
QTIFNRKKCTGINEISKQVPRWSYELIDELEQMTERLTKEFQIRSAKAER
|
||||
MVIFNGNTDSLTLNIGPRNDNDAIPEVDLNHEFFLVNFLREPDFDRDIIK
|
||||
GSMRPLRRKIATTKLSQGNAQPHSPIFLEMIDPLYFIFGDLFDDLSQIFK
|
||||
EMFRKPGTDNSEFVEFQERLEHKYEEDAKDDAEIRRLKIEEDWESILYGL
|
||||
IIRSFVLLIQSFFRKYILLPSLIITKNIIRILLFQNPEWSEDFRDWSREV
|
||||
HIKCTYQGIPVSDKELPKNWFDEGIQIRILNPFVLKAWHKSKVQSTEKKK
|
||||
KKRSTEKKYTENKNFWFLTGYGTLVESYLDEGFPRDPLSIFGPVLKTIRK
|
||||
QLKKDLKKHFFLVLKFLNERKKWFPTMLKKIENWNIKRILKSILFRFKII
|
||||
DELSESKKTSTISKNNSKIEVIEVIEESPVKMESINWTNSSFTEKRIKDL
|
||||
NVKTKTIIKQIETMTEEKKEGILTSEINLNSNKTTYDAKRLELQKNNLQI
|
||||
LQRRFVRLIRKSYSFFKIFIEGVYIDILLCISSIARIHRQRFLDFLESTD
|
||||
KILNVKKPIYDKKKKMEEMEERFENLSVSRLISILEKSENITNMNSQNSW
|
||||
DVSSLSSLSQEYVFYKLSQIQFSNGSKFKIRSILESPGRSFFLKNEIKDY
|
||||
FFRMQGTYNSKLRHKKRSDSLMNPWTNWFKVLYQYDLPEKRWSRLVSQNW
|
||||
RNRINEHRVAQNKDLVEYDSYEKNQLIWKELILSKKQEQEGDLLKIEIKN
|
||||
KIKKQYRYDLFSYQYLNFANKKKSSIYGYRSPNKNQAISYNYNISIQNYL
|
||||
EEYDILDMEKNLEKNLDRKYFNWMGMNVKRKKTSRPKDKFLIPGFWFFSK
|
||||
LSKLYCAYKMNPWILPIKFFVLQLDNLELTTEEYVNTVDEDLKSVSYYYK
|
||||
GSDSKYRTDLKGERDFLLSKYLGFYLHCDSSDEEIGMDNTNLFCLLLRMK
|
||||
KFNKIVIMSIKKLELDIEMLVDSRTKDFCYTECRDTEDLKERLIFFIEPI
|
||||
RLPRKKHEQSLLYQTIRLPLIHKSKTRKSWSWKKKKSRVDQKITENKDKN
|
||||
LYDLFVPENLLSTRRRRELRILTCFNPRNRNTVHRKTINDNENQIKNVSQ
|
||||
VLTKNKDLDSETKKLMNFKLFLWPNYRLEDLACINRYWFNTHNGSHFSIL
|
||||
RIHMYPRLKD
|
||||
>NC_003386@PsnuCp088@ycf1@112439@117550@R@1@1704 hypothetical_protein
|
||||
MIGRLYMKKLKNLFLFLSSLCPVFPWISQISLVMPFGLYYGFLTALPIGP
|
||||
SQILSIRTFFLEGNRSGIICILGSMMGQFVILLSIYCSPLYVMLVKPHLM
|
||||
TLLVIPYMFYYWYRTKNPSRYYILHPIKSLTHAHTRNLLLDSFIFQLLNP
|
||||
ILLPNPVLTRLLNLFLFRYSSNVFFLTSSLLGWLCGHILFINSIKLLLFR
|
||||
IEHDSPIIYILMKRSISRTFSILISITFFLYLGRSPVPLITKKFADEITL
|
||||
SDQKIKENLWEESLWLYRPWPTSFFDQYRWNRPIRYIPNSKSSHNGFVKK
|
||||
QVSKFFYDECITDGKNAISFASQPSLSIFKKQLMNYLHNSDISISTKDSY
|
||||
KGWIETKREKRDALNNEFKDRIQFVYNSSTIEEAMENKTGFSHDRNHFLV
|
||||
KVNDPFLSGSSRIRIPNKKYSSSLLKLHDSKDQTMKISKKTKRKHTRNKM
|
||||
RNWIFNKHKKWQHNKFPLPWEPIPTKAEKVFWRILNESENPIILEMLTTL
|
||||
NSIKEKNYQFRITWEHIFQLPRIEKAIFLFRSKQEIEDSIFRYPSHLSLK
|
||||
NLTLFNIFTRSKNIFYSAKIAVSPILQIEEMQKELPRYNSRLRSDRIDAV
|
||||
NVDVDIRQRKIKNLGPRKGKLEDKEKEKEKAAQTQTEVKKEREKEKEERV
|
||||
IKRFQNQSDFRRKLVKGSIRARRRKTGIWRLYQSGTHSPFFLRMKEIPIS
|
||||
FQSSINALRLNKMKDERAILGIGKELRPFNLYKKRSKADRLTIAARFDFP
|
||||
IAHAGRGVLLIIQSNIRKYVILPILIICKNIGRIMLFQSPEWKEDWAEWN
|
||||
QEIHIKCTYDGIEVSHRHLPAHWFKEGLQIKILYPFHLKPWHIHRTNNIN
|
||||
DLRNEAQIQKEISDFGKQRKLSFSYLTIWGYQTSSVFGSMKKRPSFWRPI
|
||||
ANALKKKLQRNLFSKLTWISHFFYEIILLSRTFIISKKPNNIPEMSIQSN
|
||||
ELRYDVSDYELIQKYPNSNEKNDYVVMNEISIESNNRNGKEISHESQDQY
|
||||
KDNFNNIRSFNDIETLLTDISGTSVEESYRDRIETYLRLNKKNHRYAINI
|
||||
RLIWNKQLVQTQQEFSRFRRIIMQFMHKGYRLAKRFLTKFYREIFRRFTF
|
||||
SIQLSIQLVLRLTKNITKLSEKNKVYQNLNLLKKNEQNLKIDSSRNKPVL
|
||||
SQAYVFQKLWHARTRTKIDVHYLVQSLEREIVNSIENNELKASKLKDLKW
|
||||
NEHNYLNDHIKDLLEIQGLLKETQTFTEKNWKEWLHCFTRYQISSKIEYG
|
||||
IVPQKWKNEVKKRWKSNTNKLDKNKEYKTLEKENKYSLYETNNMLKQRIN
|
||||
NRNNYCEFYNLLYSFIDSTKASNIIKLPIQQKGKEDPIQYINDINKIHEN
|
||||
IHLNSKKKYKRPQFQSISTEKGDIDSNLMLWLLPNLLDTKPESVTNSLDS
|
||||
YSFEMYLSQNEDKDSLKKEIRFNAKKLNLDTKEPTSDAMKPTSDTKELIS
|
||||
DTNEPTSDIKSDDQSENQNKPLKEKSIRERKHHRPIPQVKWKSKSVEKKM
|
||||
QRINNLTSFLSVIEDRKNMENYIISFCMKMGIDIDLLNSFFTNTEDELSI
|
||||
QLLDDSAHRLPRLLNDQTLVRKMVSILLNFEKQFEEGITSKISSQSISSI
|
||||
YRTEKKYSVNSYNLEDIMLSRRYRELRILNSLILEKQYVNFDHWIDKSEK
|
||||
YPFLNLPSQVQIIKRFLWPTYRLEDLACMNRFWFNTNNGSRFAMLKLRMY
|
||||
CPD
|
||||
>NC_004115@ChglCp095@ycf1@114383@118735@R@1@1451 hypothetical_chloroplast_RF1
|
||||
MITTYSTFLFNFLSQFQYLVNIPEPLILFGLYYGFLTTLPISFSHIVVIR
|
||||
NRLIEGKTSSVMAFCGLITGQLCMIGTIYYTPLYKLFIKPHLILLLSIIY
|
||||
SFFYWQRLRNNQNYDDLREAQSLINVRNFFSFFDSFVFQILNPILLPTPI
|
||||
FFRLNNVFLFRYSNNLNFFLSFFIGSLIGNFLFFNALNWIRYRFEQDSNV
|
||||
IYPVLKLLINKSIIPIVFCICLIPIAKYSHIPFCTMKQKEGQSSYSFDKN
|
||||
WPNIIFDSNQPHRPIRIFSETKTDDNLNINDNLSKKQTSQFFFKECISDG
|
||||
NVRISYTYPSTLANFQTDLSSSFQDFSLSEQSFDNLYSNWKLEKLSRKDN
|
||||
LNNLLLTKIKLLNNKKEWFYKHFQNKFGTFIKDDNNYNKFVKKSNDVRLK
|
||||
QSSKIQIKKSKLLTSDIRDISTTQSGFYDLKKNKLKSFISQKFKMNSNNS
|
||||
TLPVWNHLNKQLLQNELKRIKKQLQDKTKNIKENDFNNLKLLKSNIETID
|
||||
NTINDIHHNKIKQITSVDLIKIFATNNKTLLLETLAFNKKITQKDNFNFN
|
||||
KLFQHKNKKFTTNSGNENTYLNLNDIFKNIKRLPKWRTFSKHVVYDEVSD
|
||||
IRRRAIKSNSKLKIANKDSDIIIFEYKKSLNFRARLPKGSLRARRKNKFT
|
||||
WKLFHNNLNSPFFIRSKQLLNKTDIPFLKYNENYLNFFKNFISPDKNINY
|
||||
LNNDISEMRRQELLFKWDKTNVHILRSMVLVGQAFFRKYIKLPIFIFFKN
|
||||
LSRQLLYQPSEWTKDWSNWMNEWYIFCYYDGTELAKDQWPEMWLQRGIQI
|
||||
KLINPFYIKPWYIQKSFIKNKQNKKTRTSYLTVFGSQQELPFGKKIRMPS
|
||||
FWKPVRREVSKSIKLKLYFPFLTLQKNTIILFEKVFNKKRINEDNKTIEK
|
||||
SILNKKNEQLILKKDEVIPNNKSIAGKLSKLDFHNQNITKTSIKNATKQI
|
||||
LIKNEYNSLLKENKNLFTKNKIVFLKIKNILNKLNLKLIKVKINFTYKIK
|
||||
TVLKIISRNLLKFYSIIQFQLENLGRNNSNDLSYKNQLSYQKDFPNFNNF
|
||||
CLNQANIIQNLCKNNILKHKKLNQNFQINSKNLNQTNIIDVNPENIKAQD
|
||||
FKNLLENIYTFTPTINLWDKLSTNNWKISVQNNWKQKSYNNYDLTKKALV
|
||||
SKNLNFISYFYQNNLINNLNKKIKHTKIFNLSKNYLSLNNLNQNQIKNFD
|
||||
FQNSLNNNITYKKNIKNFTIRQNVPSQLRRWDWKNNKIKKFVNRLLQKNT
|
||||
ILLKEEVFNLIPFFDRFTIQNPMIRNWSHPISSILDDEIFTYELLDTFLQ
|
||||
INKNIDFLHTKQIEDNLSSNSNQAIASLPLSSTTAENFLYYLTTVEDLIS
|
||||
IEDKKELKILNSLNFNKSTPNYIKTNVVEKSLNENLSKNLQSILSKETLD
|
||||
SINNTQILKKFLWASYRCEDLACMNRFWFSTNNGSRFGTLRLRLYPNLKN
|
466
detectors/cds/test/test.db/ycf2.fst
Normal file
466
detectors/cds/test/test.db/ycf2.fst
Normal file
@ -0,0 +1,466 @@
|
||||
>AC_000188@LyesCp004@ycf2@88196@95032@D@1@2279 Ycf2_protein
|
||||
MRGHQFKSWIFELREILREIKNSHHFLDSWTQFNSVGSFIHIFFHQERFL
|
||||
KLFDPRIWSILLSRNSQGSPSNRYFTIKGVILFVVAVLIYRINNRNMVER
|
||||
KNLYLIGLLPIPMNSIGPRNDTLEESVGSSNINRLIVSLLYLPKGKKISE
|
||||
SCFLNPKESTWVLPITKKCSMPESNWGSRWWRNWIGKKRDSSCKISNETV
|
||||
AGIEILFKEKDLKYLEFLFVYYMDDPIRKDHDWELFDRLSLRKSRNRINL
|
||||
NSGPLFEILVKHWISYLMSAFREKIPIEVEGFFKQQGAGSTIQSNDIEHV
|
||||
SHLFSRNKWAISLQNCAQFHMWQFRQDLFVSWGKNPPESDFLRNVSRENW
|
||||
IWLDNVWLVNKDRFFSKVQNVSSNIQYDSTRSSFVQVTDSSQLKGSSDQS
|
||||
RDHLDSISNEDSEYHTLINQREIQQRKERSILWDPSFLQTERKEIESGRF
|
||||
PKCLSGYSSMSRLFTEREKQMINHLFPEEIEEFLGNPTRSVRSFFSDRWS
|
||||
ELHLGSNPTERSTRDQKLLKKQQDLSFVPSRRSEKKEMVNIFKIITYLQN
|
||||
TVSIHPISSDPGCDMVPKDEPDMDSSNKISFLNKNPFFDLFHLFHDRNRG
|
||||
GYTLHYDFASEERFQEMADLFTLSITEPDLVYHKGFAFSIDSCGLDQKQF
|
||||
LNEARDESKKKSLLVLPPIFYEENESFSRRIRKKWVRISCGNDLEDPKPK
|
||||
IVVFASNNIMEAVTQYRLIRNLIQIQYSTYGYIRNVLNRFFLMNRSDRNF
|
||||
EYGIQRDQIGKDTLNHRTIMKYTINQYLSNLKKSQKKWFEPLILISRTER
|
||||
SMNRDPDAYRYKWSNGSKSFQEHLEQSVSKQKSRFQVVFDRLRINQYSID
|
||||
WSEVIDKKDLSKSLRFFLSKSLLFLSKLLLFLSNSLPFFCVSFGNIPIHR
|
||||
SEIYIYEELKGPNDQLCNQLLESIGLQIVHLKKLKPFLLDDHDTSQKSKF
|
||||
LINGGTISPFLFNKIPKWMIDSFHTRNNRRKSFDNPDSYFSMIFHDQDNW
|
||||
LNPVKPFHRSSLISSFYKANRLRFLNNPHHFCFYWNTRFPFSVEKARINN
|
||||
SDFTYGQFLNILFIRNKIFSLCVGKKKHAFWGRDTISPIESQVSNIFIPN
|
||||
DFPQSGDETYNLYKSFHFPSRSDPFVRRAIYSIADISGTPLTEGQIVNFE
|
||||
RTYCQPLSDMNLSDSEGKNLHQYLNFNSNMGLIHTPCSEKDLSSEKRKKW
|
||||
SLCLKKCVEKGQTYRTFQRDSAFSTLSKWNLFQTYMPWFLTSTGYKYLNL
|
||||
IFLDTFSDLLPILSSSQKFVSIFPDIMHGSGISWRILQKKLCLPQWNLIS
|
||||
EISSKCLHNLLLSEEMIHRNNESPLISTHLRSPNAREFLYSILFLLLVAG
|
||||
YLVRTHLLFVSRASSELQTEFERVKSLMTPSSMIELRKLLDRYPTSEPNS
|
||||
FWLKNLFLVALEQLGDSLEEIRGSASGGNMLGPAYGVKSIRSKKKDWNIN
|
||||
LIEIIDLIPNPINRITFSRNTRHLSHTSKEIYSLIRKRKNVNGDWIDEKI
|
||||
ESWVANSDSIDDEEREFLVQFSTLTTENRIDQILLSLTHSDHLSKNDSGY
|
||||
QMIEQPGAIYLRYLVDIHKKHLMNYEFNPSCLAERRIFLAHYQTITYSQT
|
||||
SCGENSFHFPSHGKPFSLRLALSPSRGILVIGSIGTGRSYLVKYLATNSY
|
||||
VPFITVFLNKFLDNKSKGFLLDEIDIDDSDDIDDSDNLDASDDIDRDLDT
|
||||
ELELLTRMNGLTVDMMPEIDRFYITLQFELAKAMSPCIIWIPNIHDLDVN
|
||||
ESNDLSLGLLVNHLSRDCERCSTRNILVIASTHIPQKVDPALIAPNKLNT
|
||||
CIKIRRLLIPQQRKHFFTLSYTRGFHLEKKMFHTNGFGSITMGSNARDLV
|
||||
ALTNEVLSISITQKKSIIDTNTIRSALHRQTWDLRSQVRSVQDHGILFYQ
|
||||
IGRAVAQNVLLSNCPIDPISIYMKKKSCNEGDSYLYKWYFELGTSMKRLT
|
||||
ILLYLLSCSAGSVAQDLWSLSVPDEKNGITSYGLVENDSDLVHGLLEVEG
|
||||
ALVGSSRTEKDCSQFDNDRVTLLLRPEPRNPLDMMQKGSWSILDQRFLYE
|
||||
KYESEFEEGEGEGALDPQEDLFNHIVWAPRIWRPWGFLFDCIERPNELGF
|
||||
PYWSRSFRGKRIIYDEEDELQENDSGFLQSGTMQYQTRDRSQGLFRISQF
|
||||
IWDPADPLFFLFKDQPPGSVFSHRELFADEEMSKGLLTSQTDPPTSLYKR
|
||||
WFIKNTQEKHFELLINRQRWLRTNSSLSNGSFRSNTLSESYQYLSNLFLS
|
||||
NGTLLDQMPKTLLRKRWLFPDEMKIGFM
|
||||
>AC_000188@LyesCp055@ycf2@146312@153148@R@1@2279 Ycf2_protein
|
||||
MRGHQFKSWIFELREILREIKNSHHFLDSWTQFNSVGSFIHIFFHQERFL
|
||||
KLFDPRIWSILLSRNSQGSPSNRYFTIKGVILFVVAVLIYRINNRNMVER
|
||||
KNLYLIGLLPIPMNSIGPRNDTLEESVGSSNINRLIVSLLYLPKGKKISE
|
||||
SCFLNPKESTWVLPITKKCSMPESNWGSRWWRNWIGKKRDSSCKISNETV
|
||||
AGIEILFKEKDLKYLEFLFVYYMDDPIRKDHDWELFDRLSLRKSRNRINL
|
||||
NSGPLFEILVKHWISYLMSAFREKIPIEVEGFFKQQGAGSTIQSNDIEHV
|
||||
SHLFSRNKWAISLQNCAQFHMWQFRQDLFVSWGKNPPESDFLRNVSRENW
|
||||
IWLDNVWLVNKDRFFSKVQNVSSNIQYDSTRSSFVQVTDSSQLKGSSDQS
|
||||
RDHLDSISNEDSEYHTLINQREIQQRKERSILWDPSFLQTERKEIESGRF
|
||||
PKCLSGYSSMSRLFTEREKQMINHLFPEEIEEFLGNPTRSVRSFFSDRWS
|
||||
ELHLGSNPTERSTRDQKLLKKQQDLSFVPSRRSEKKEMVNIFKIITYLQN
|
||||
TVSIHPISSDPGCDMVPKDEPDMDSSNKISFLNKNPFFDLFHLFHDRNRG
|
||||
GYTLHYDFASEERFQEMADLFTLSITEPDLVYHKGFAFSIDSCGLDQKQF
|
||||
LNEARDESKKKSLLVLPPIFYEENESFSRRIRKKWVRISCGNDLEDPKPK
|
||||
IVVFASNNIMEAVTQYRLIRNLIQIQYSTYGYIRNVLNRFFLMNRSDRNF
|
||||
EYGIQRDQIGKDTLNHRTIMKYTINQYLSNLKKSQKKWFEPLILISRTER
|
||||
SMNRDPDAYRYKWSNGSKSFQEHLEQSVSKQKSRFQVVFDRLRINQYSID
|
||||
WSEVIDKKDLSKSLRFFLSKSLLFLSKLLLFLSNSLPFFCVSFGNIPIHR
|
||||
SEIYIYEELKGPNDQLCNQLLESIGLQIVHLKKLKPFLLDDHDTSQKSKF
|
||||
LINGGTISPFLFNKIPKWMIDSFHTRNNRRKSFDNPDSYFSMIFHDQDNW
|
||||
LNPVKPFHRSSLISSFYKANRLRFLNNPHHFCFYWNTRFPFSVEKARINN
|
||||
SDFTYGQFLNILFIRNKIFSLCVGKKKHAFWGRDTISPIESQVSNIFIPN
|
||||
DFPQSGDETYNLYKSFHFPSRSDPFVRRAIYSIADISGTPLTEGQIVNFE
|
||||
RTYCQPLSDMNLSDSEGKNLHQYLNFNSNMGLIHTPCSEKDLSSEKRKKW
|
||||
SLCLKKCVEKGQTYRTFQRDSAFSTLSKWNLFQTYMPWFLTSTGYKYLNL
|
||||
IFLDTFSDLLPILSSSQKFVSIFPDIMHGSGISWRILQKKLCLPQWNLIS
|
||||
EISSKCLHNLLLSEEMIHRNNESPLISTHLRSPNAREFLYSILFLLLVAG
|
||||
YLVRTHLLFVSRASSELQTEFERVKSLMTPSSMIELRKLLDRYPTSEPNS
|
||||
FWLKNLFLVALEQLGDSLEEIRGSASGGNMLGPAYGVKSIRSKKKDWNIN
|
||||
LIEIIDLIPNPINRITFSRNTRHLSHTSKEIYSLIRKRKNVNGDWIDEKI
|
||||
ESWVANSDSIDDEEREFLVQFSTLTTENRIDQILLSLTHSDHLSKNDSGY
|
||||
QMIEQPGAIYLRYLVDIHKKHLMNYEFNPSCLAERRIFLAHYQTITYSQT
|
||||
SCGENSFHFPSHGKPFSLRLALSPSRGILVIGSIGTGRSYLVKYLATNSY
|
||||
VPFITVFLNKFLDNKSKGFLLDEIDIDDSDDIDDSDNLDASDDIDRDLDT
|
||||
ELELLTRMNGLTVDMMPEIDRFYITLQFELAKAMSPCIIWIPNIHDLDVN
|
||||
ESNDLSLGLLVNHLSRDCERCSTRNILVIASTHIPQKVDPALIAPNKLNT
|
||||
CIKIRRLLIPQQRKHFFTLSYTRGFHLEKKMFHTNGFGSITMGSNARDLV
|
||||
ALTNEVLSISITQKKSIIDTNTIRSALHRQTWDLRSQVRSVQDHGILFYQ
|
||||
IGRAVAQNVLLSNCPIDPISIYMKKKSCNEGDSYLYKWYFELGTSMKRLT
|
||||
ILLYLLSCSAGSVAQDLWSLSVPDEKNGITSYGLVENDSDLVHGLLEVEG
|
||||
ALVGSSRTEKDCSQFDNDRVTLLLRPEPRNPLDMMQKGSWSILDQRFLYE
|
||||
KYESEFEEGEGEGALDPQEDLFNHIVWAPRIWRPWGFLFDCIERPNELGF
|
||||
PYWSRSFRGKRIIYDEEDELQENDSGFLQSGTMQYQTRDRSQGLFRISQF
|
||||
IWDPADPLFFLFKDQPPGSVFSHRELFADEEMSKGLLTSQTDPPTSLYKR
|
||||
WFIKNTQEKHFELLINRQRWLRTNSSLSNGSFRSNTLSESYQYLSNLFLS
|
||||
NGTLLDQMPKTLLRKRWLFPDEMKIGFM
|
||||
>NC_000932@ArthCp066@ycf2@86474@93358@D@1@2295 Ycf2
|
||||
MKGHQFKSWIFELREIVREIKNAHYFLDSWTQFNSVGSFIHIFFHQERFR
|
||||
KLLDPRIFSILLLRNSQGSTSNRYFTIKGVVLFVVAALLYRINNRNMVES
|
||||
KNLYLKGLLPIPMNSIGPRNDTSEESFGSCNINRLIVSLLYLTKGKKISE
|
||||
SCFRDPKESTWVLPITQKCIMPESNWSSRWWRNWIGKKRGFCCKISNETV
|
||||
AGIDISFKEKDIKYLEFLFVYYMDDPIRKGHDWELFDRLSPSKRRNIINL
|
||||
NSGQLFEILVKDWICYLMFAFREKIPIEVEGFCKQQGAGSTIQSNDIEHV
|
||||
SHLFSRNKWAISLQNCAQFHMWQFHQDLFVSWGKNPHESDFFRKISRENW
|
||||
IWLDNVWLVNKDRFFSKVRNVSSNIQYDSTRSSFVQVTDSSQLNGSSDQF
|
||||
IDPFDSISNEDSEYHYHTLINQREIQQLKERSILLDPSFIQTEGREIESD
|
||||
RFPKYLSGYSSMPRLFTEREKRMNNHLLPEESEEFLGNPTRAIRSFFSDR
|
||||
WSELHLGSNPTERSTRDQKLLKKEQDVSFVPSRRSENKEIVNIFKIITYL
|
||||
QNTVSIHPISSDLGCDMVPKDELDMDSSNKISFLNKNPFFDLFHLFHERK
|
||||
RGGYTLRHESEERFQEMADLFTLSITEPDLVYHKGFAFSIDSYGLDQRQF
|
||||
LKEVFNFRDESKKKSLLVLPPIFYEENESFYRRLRKIWVRISCGNYLEDQ
|
||||
KRVVFASNNIMEAVNQYRLIRNMIQIQFQYSPYGYIRNVLNRFFLMKRPD
|
||||
RNFEYGIQRDLIGNDTLNHRTIMKDTINQHLSNLKKSQKKWFDPLIFLSQ
|
||||
TERSINRDPNAYRYKWSNGSKNFQEHLEHFVSERKSRFQVVFDQLCINQY
|
||||
SIDWSEVIDKKDLSKSLRFFLSKLLRFFLSKLLLFLSKLLLFLSNSLPFF
|
||||
FVSFENIPIHRSEIHIYELKGPNDQLCNQLLESIGLQIVHLKKLKPFLLD
|
||||
DHNTSQKSKFLINGGTISPFLFNKIPKWMIDSFHTRKNRRKSFDNTDSAY
|
||||
FSIVSHDQDNWLNPVKPFQRSSLISSFSKANRLRFLNNPHHFCFYCNKRF
|
||||
PFYVEKARLNNSDFTFTYGQFLTILFIHNKTFSSCGGKKKHAFLERDTIS
|
||||
PSSIESQVSNIFISNDFPQSGDERYNLYKSFHFPIRSDPLVRRAIYSIAD
|
||||
ISGTPLIEGQRVNFERTYCQTLSDMNLSDSEEKSLHQYLNFNSNMGLIHT
|
||||
PCSEKYLQRKKRSLCLKKCVDKGQMDRTFQRDSAFSTLSKWNLFQTYMPW
|
||||
FFTSTGYKYLNLIFLDTFSDLLRILSSSQKFVSIFHDIMHGLDISWRILQ
|
||||
KKLCLPQRNLISEISSKSLHNLLLSEEMIHRNNESSLISTHLRSPNVREV
|
||||
LYSILFLLLVAGYIVRTHLLFVSRAYSELQTEFEKIKSLMIPSYMIELRK
|
||||
LLDRYPTSELNSFWLKNLFLVALEQLGDCLEEIRGSGGNMLWGGDPAYGV
|
||||
KSIRSKKKDLKINFIDIIDLISIIPNPINRITFSRNTRHLSHTSKEIYSL
|
||||
IRKRKNVSGDWIDDKIESWVANSDSIDDKEREFLVQFSTLRAEKRIDQIL
|
||||
LSLTHSDHLSKNDSGYQMIEQPGTIYLRYLVDIHKKYLMNYEFNTSCLAE
|
||||
RRIFLAHYQTITYSQTSCGANSFHFPSHGKPFSLRLALSPSRSILVIGSI
|
||||
GTGRSYLVKYLATNSYVPFITVFLNKFLDNKPKGFFIDDIDIDDSDDIDA
|
||||
SNDIDRELDTELELLTMMNALTMDMMLEIDRFYITLQFELAKAMSPCIIW
|
||||
IPNIHDLDVNESSYLALGLLVNSLSRDCERCSTRNILVIASTHIPQKVDP
|
||||
ALIAPNKLNTCIKIRRLLIPQQRKHFFTLSYTRGFHLEKKMFHTNGFESI
|
||||
TMGSSARDLVALTNEALSISITQKKSIIDTNTIRSALHRQTWDLRSQVRS
|
||||
VQDHGILFYQIGRAVAQNVLISNCPIDPISIYMKKKSCNEGDSYLYKWYF
|
||||
ELGTSMKKFTILLYLLSCSAGSVAQDLWSLPVPDEKNRITSYGFVENDSD
|
||||
LVHGLLEVQGALVGSSRTEKDCSQFDNDRVTLLFRSEPRDPLYMMQDGSC
|
||||
SIVDQRFLYEKYESEFEEGEGEGVLDPQQIEEDLFNHIVWAPRIWRPRGF
|
||||
LFDCIERPNELGFPYSAGSFRGKRIIYDEKYELQENDSEFLQSGTMQYQR
|
||||
RDRSSKEQGFFRISQFIWDPADPLFFLFKDQPFVSVFSHREFFADEEMSK
|
||||
GLLTSQTDPPTSIYKRWFIKNTQEKHFELLIQRQRWLRTNSSLSNGFFRS
|
||||
NTRSESYQYLSNLFISNGTLLDRMTKTLLKKRWLFSDEMKIGFM
|
||||
>NC_000932@ArthCp083@ycf2@145291@152175@R@1@2295 Ycf2
|
||||
MKGHQFKSWIFELREIVREIKNAHYFLDSWTQFNSVGSFIHIFFHQERFR
|
||||
KLLDPRIFSILLLRNSQGSTSNRYFTIKGVVLFVVAALLYRINNRNMVES
|
||||
KNLYLKGLLPIPMNSIGPRNDTSEESFGSCNINRLIVSLLYLTKGKKISE
|
||||
SCFRDPKESTWVLPITQKCIMPESNWSSRWWRNWIGKKRGFCCKISNETV
|
||||
AGIDISFKEKDIKYLEFLFVYYMDDPIRKGHDWELFDRLSPSKRRNIINL
|
||||
NSGQLFEILVKDWICYLMFAFREKIPIEVEGFCKQQGAGSTIQSNDIEHV
|
||||
SHLFSRNKWAISLQNCAQFHMWQFHQDLFVSWGKNPHESDFFRKISRENW
|
||||
IWLDNVWLVNKDRFFSKVRNVSSNIQYDSTRSSFVQVTDSSQLNGSSDQF
|
||||
IDPFDSISNEDSEYHYHTLINQREIQQLKERSILLDPSFIQTEGREIESD
|
||||
RFPKYLSGYSSMPRLFTEREKRMNNHLLPEESEEFLGNPTRAIRSFFSDR
|
||||
WSELHLGSNPTERSTRDQKLLKKEQDVSFVPSRRSENKEIVNIFKIITYL
|
||||
QNTVSIHPISSDLGCDMVPKDELDMDSSNKISFLNKNPFFDLFHLFHERK
|
||||
RGGYTLRHESEERFQEMADLFTLSITEPDLVYHKGFAFSIDSYGLDQRQF
|
||||
LKEVFNFRDESKKKSLLVLPPIFYEENESFYRRLRKIWVRISCGNYLEDQ
|
||||
KRVVFASNNIMEAVNQYRLIRNMIQIQFQYSPYGYIRNVLNRFFLMKRPD
|
||||
RNFEYGIQRDLIGNDTLNHRTIMKDTINQHLSNLKKSQKKWFDPLIFLSQ
|
||||
TERSINRDPNAYRYKWSNGSKNFQEHLEHFVSERKSRFQVVFDQLCINQY
|
||||
SIDWSEVIDKKDLSKSLRFFLSKLLRFFLSKLLLFLSKLLLFLSNSLPFF
|
||||
FVSFENIPIHRSEIHIYELKGPNDQLCNQLLESIGLQIVHLKKLKPFLLD
|
||||
DHNTSQKSKFLINGGTISPFLFNKIPKWMIDSFHTRKNRRKSFDNTDSAY
|
||||
FSIVSHDQDNWLNPVKPFQRSSLISSFSKANRLRFLNNPHHFCFYCNKRF
|
||||
PFYVEKARLNNSDFTFTYGQFLTILFIHNKTFSSCGGKKKHAFLERDTIS
|
||||
PSSIESQVSNIFISNDFPQSGDERYNLYKSFHFPIRSDPLVRRAIYSIAD
|
||||
ISGTPLIEGQRVNFERTYCQTLSDMNLSDSEEKSLHQYLNFNSNMGLIHT
|
||||
PCSEKYLQRKKRSLCLKKCVDKGQMDRTFQRDSAFSTLSKWNLFQTYMPW
|
||||
FFTSTGYKYLNLIFLDTFSDLLRILSSSQKFVSIFHDIMHGLDISWRILQ
|
||||
KKLCLPQRNLISEISSKSLHNLLLSEEMIHRNNESSLISTHLRSPNVREV
|
||||
LYSILFLLLVAGYIVRTHLLFVSRAYSELQTEFEKIKSLMIPSYMIELRK
|
||||
LLDRYPTSELNSFWLKNLFLVALEQLGDCLEEIRGSGGNMLWGGDPAYGV
|
||||
KSIRSKKKDLKINFIDIIDLISIIPNPINRITFSRNTRHLSHTSKEIYSL
|
||||
IRKRKNVSGDWIDDKIESWVANSDSIDDKEREFLVQFSTLRAEKRIDQIL
|
||||
LSLTHSDHLSKNDSGYQMIEQPGTIYLRYLVDIHKKYLMNYEFNTSCLAE
|
||||
RRIFLAHYQTITYSQTSCGANSFHFPSHGKPFSLRLALSPSRSILVIGSI
|
||||
GTGRSYLVKYLATNSYVPFITVFLNKFLDNKPKGFFIDDIDIDDSDDIDA
|
||||
SNDIDRELDTELELLTMMNALTMDMMLEIDRFYITLQFELAKAMSPCIIW
|
||||
IPNIHDLDVNESSYLALGLLVNSLSRDCERCSTRNILVIASTHIPQKVDP
|
||||
ALIAPNKLNTCIKIRRLLIPQQRKHFFTLSYTRGFHLEKKMFHTNGFESI
|
||||
TMGSSARDLVALTNEALSISITQKKSIIDTNTIRSALHRQTWDLRSQVRS
|
||||
VQDHGILFYQIGRAVAQNVLISNCPIDPISIYMKKKSCNEGDSYLYKWYF
|
||||
ELGTSMKKFTILLYLLSCSAGSVAQDLWSLPVPDEKNRITSYGFVENDSD
|
||||
LVHGLLEVQGALVGSSRTEKDCSQFDNDRVTLLFRSEPRDPLYMMQDGSC
|
||||
SIVDQRFLYEKYESEFEEGEGEGVLDPQQIEEDLFNHIVWAPRIWRPRGF
|
||||
LFDCIERPNELGFPYSAGSFRGKRIIYDEKYELQENDSEFLQSGTMQYQR
|
||||
RDRSSKEQGFFRISQFIWDPADPLFFLFKDQPFVSVFSHREFFADEEMSK
|
||||
GLLTSQTDPPTSIYKRWFIKNTQEKHFELLIQRQRWLRTNSSLSNGFFRS
|
||||
NTRSESYQYLSNLFISNGTLLDRMTKTLLKKRWLFSDEMKIGFM
|
||||
>NC_001568@EpviCp27@ycf2@22045@28695@D@1@2217 Ycf2
|
||||
MKEHPFPYKSWILELREIKNSHYFLDSWTKFNSVGSYINIFSHQERFIKL
|
||||
FDPRILSILLSRNSQGSTSNRYFTIKGVILFVVAVLIYRINNRNMVEIKN
|
||||
IYWRGLLPIPMNSIGPRNDTLEELVGSYNINRFIVSLLYLTKGKNISESF
|
||||
FLNLKESTLVLPITKKCSMPESNWGSRWWRNWTGKNRDYSCKISNETVAG
|
||||
IEILFKEKDKKYLEFIFFYYMDDPIRKDRDWELFDRLSPSKRLNKINFYS
|
||||
GPLFEILVKRRIYYLMSAFREKIPIEVVKGFFKQQKVGSTIQSNDIEHVS
|
||||
HFFSRNKRAISLKNSAQFNMWQFRQDLLVSWGENPHESDFLRNVSRANWI
|
||||
WLNNVWLVNKYRFCRKVRNVSSNIKYKYDSTRSRSSFVQVTDSSQLKGSY
|
||||
YKSSGHFYSVISNEDSEYHTLINQREIKPLKSIFFDPSFLQTEATEIESD
|
||||
QLQKRPSGYSSTLFTEHEKQMINHMLPEEIEEFIGNPTRLVHSFLSDRWS
|
||||
ELHLGSNPTERSTRDHKLLKKQQDLSFVPSRRSENKELVNILKIITYLKN
|
||||
TVSIHPISSDPGCDGVLKDEPDMDSSNKISVFNKNTFIYLFHLFHDWNRV
|
||||
GYTLNLHHDFELEERFQEKADLFTLSITEPDLVYHKGFSFSIYMDQKQKM
|
||||
VVFASNNIMEAVNQSRFIRNMIKIQYSTYGYIRNVLHRFFLMNRSDHNLE
|
||||
YEIKRDQIGKDTLNHRTIIKYMINQHLSNFKKSQNKWFNPILFFSRTERS
|
||||
VNRNPDAYRYKRSNGSNNFLEHLEHFVSEQKSHFKFKIVFDLIRFNQYSI
|
||||
DWSAFIDTKDLSKPLRFFLSKLLFFLSNSLPFFCVSFGNIPIHRSEIYIY
|
||||
ELKDPNDQLCNQFLEPIDLKIVHLKKRKPFLLGYHGTSRKLKLLITGGRP
|
||||
FLFNKIPRCMIDSFHTINNRSKSFDNTDSYLSMIFHNKDNWLNLVKPFHR
|
||||
SSLISYFYKANRLQFLNNPHNFCFYCNTRLPFYVEKAHIHNYYFTYGQFL
|
||||
NILFIRNKIFSLCVDKKKHAFWGGRDTISPIESQVSKIFIPKNFPQSGDE
|
||||
TYNLSQPFHFPSRYDPFVRLIANIYGTPLTEGQIVNLGRTYCQPLSDMNL
|
||||
SDSEGKNFHQYLNFNSNMGLIHTPCSDKYLPSEKRKKRSLCINKYKCVEK
|
||||
GQMYRTFQRKVAFSTLSKWNLFQTYMPWFLTSAGYKYINLIFLDTFSELL
|
||||
SILSSSKKFVSIFNNIMHGSGISWRIINKKRCLPQWNLISEISSKCLHNL
|
||||
LLSEETIRQNNESPLISTHLRSPNVREFLYSILFLLLVVGYLVRTHLLFV
|
||||
SRASSELQTEFKRVKSLMIPSSMIELRKLLNRYPTPASNSFWLKNLFIVA
|
||||
MEQLVYSLEEIRASGGNLLGPAYGVKSICSKNKYFNINLIDLIPNPINRI
|
||||
IFSRNMRHLSHTSKEIYSLIRKRKNVNGDWIDDIIESWVANSDSIDDEER
|
||||
EFLVQFSALTTEKRIYQILLSLTHSDHLSKNDSGYKMIEQPGAIYLRYLV
|
||||
DIHKKYLLNYECNTSCLVERRVFLAHSQTITYSQTSRGTNTLHFPSQGKP
|
||||
FSISLALSPSKGILVIGSIGTGRSFLVKYLATNSYVPFITVFLNKFLDNK
|
||||
PKGFLVDDNDDNDSSDDIYASDDINSDLDTELELITMMNALTMDMMLELD
|
||||
RFFTTLQLELAKAMSPWIIWIPNIHDLDVNESNYLSFGLLVNHLSERCST
|
||||
NNIIVIASTHIPKKVDPALLAPNKLNTCIKIRRLLIPQQRKHFCTLSYTR
|
||||
GFHLENKIFHTNGFGSITMGSSARDLVALTNEALSISITQNKSILDTNTI
|
||||
RSALHRQTWDLRSGVRSFQDNGILSYQIGRAITQNVLLSNCPIDPISIYM
|
||||
KKKSCTCNGGDYYFYKWYFGLGTSMKKLTILLYLLSCSAGSVAQDLWSLP
|
||||
GPAEKNGITSYGLVENDSDLVRGLLEVEGALVGSSRTEKDCSPFDNDRVI
|
||||
FTLILRPEPGNPLDIIKKGSCSIFDHRFIYEKYESEFEEGYGEGALDPQQ
|
||||
IEEDLFNHIVWAPRIWRPWGFIFYCIERPNELGFPYWSRSFRGKRIVYDK
|
||||
DEEGELQENDSELLKSGTVQYQTRDRSSKEQGLLKINQFIWDPADPLFFL
|
||||
LKDQPPGSVFSHRRFFADEEMSKGLLTSQKDPPTSIYKRWFIKNTQEQHF
|
||||
ELLINRQRWLRTKSSLSKSNGSFRSNTLFESYQYLSTLFLSNGTLFDKMT
|
||||
KTLLIKRWLFPDEMQM
|
||||
>NC_001568@EpviCp32@ycf2@61133@67783@R@1@2217 Ycf2
|
||||
MKEHPFPYKSWILELREIKNSHYFLDSWTKFNSVGSYINIFSHQERFIKL
|
||||
FDPRILSILLSRNSQGSTSNRYFTIKGVILFVVAVLIYRINNRNMVEIKN
|
||||
IYWRGLLPIPMNSIGPRNDTLEELVGSYNINRFIVSLLYLTKGKNISESF
|
||||
FLNLKESTLVLPITKKCSMPESNWGSRWWRNWTGKNRDYSCKISNETVAG
|
||||
IEILFKEKDKKYLEFIFFYYMDDPIRKDRDWELFDRLSPSKRLNKINFYS
|
||||
GPLFEILVKRRIYYLMSAFREKIPIEVVKGFFKQQKVGSTIQSNDIEHVS
|
||||
HFFSRNKRAISLKNSAQFNMWQFRQDLLVSWGENPHESDFLRNVSRANWI
|
||||
WLNNVWLVNKYRFCRKVRNVSSNIKYKYDSTRSRSSFVQVTDSSQLKGSY
|
||||
YKSSGHFYSVISNEDSEYHTLINQREIKPLKSIFFDPSFLQTEATEIESD
|
||||
QLQKRPSGYSSTLFTEHEKQMINHMLPEEIEEFIGNPTRLVHSFLSDRWS
|
||||
ELHLGSNPTERSTRDHKLLKKQQDLSFVPSRRSENKELVNILKIITYLKN
|
||||
TVSIHPISSDPGCDGVLKDEPDMDSSNKISVFNKNTFIYLFHLFHDWNRV
|
||||
GYTLNLHHDFELEERFQEKADLFTLSITEPDLVYHKGFSFSIYMDQKQKM
|
||||
VVFASNNIMEAVNQSRFIRNMIKIQYSTYGYIRNVLHRFFLMNRSDHNLE
|
||||
YEIKRDQIGKDTLNHRTIIKYMINQHLSNFKKSQNKWFNPILFFSRTERS
|
||||
VNRNPDAYRYKRSNGSNNFLEHLEHFVSEQKSHFKFKIVFDLIRFNQYSI
|
||||
DWSAFIDTKDLSKPLRFFLSKLLFFLSNSLPFFCVSFGNIPIHRSEIYIY
|
||||
ELKDPNDQLCNQFLEPIDLKIVHLKKRKPFLLGYHGTSRKLKLLITGGRP
|
||||
FLFNKIPRCMIDSFHTINNRSKSFDNTDSYLSMIFHNKDNWLNLVKPFHR
|
||||
SSLISYFYKANRLQFLNNPHNFCFYCNTRLPFYVEKAHIHNYYFTYGQFL
|
||||
NILFIRNKIFSLCVDKKKHAFWGGRDTISPIESQVSKIFIPKNFPQSGDE
|
||||
TYNLSQPFHFPSRYDPFVRLIANIYGTPLTEGQIVNLGRTYCQPLSDMNL
|
||||
SDSEGKNFHQYLNFNSNMGLIHTPCSDKYLPSEKRKKRSLCINKYKCVEK
|
||||
GQMYRTFQRKVAFSTLSKWNLFQTYMPWFLTSAGYKYINLIFLDTFSELL
|
||||
SILSSSKKFVSIFNNIMHGSGISWRIINKKRCLPQWNLISEISSKCLHNL
|
||||
LLSEETIRQNNESPLISTHLRSPNVREFLYSILFLLLVVGYLVRTHLLFV
|
||||
SRASSELQTEFKRVKSLMIPSSMIELRKLLNRYPTPASNSFWLKNLFIVA
|
||||
MEQLVYSLEEIRASGGNLLGPAYGVKSICSKNKYFNINLIDLIPNPINRI
|
||||
IFSRNMRHLSHTSKEIYSLIRKRKNVNGDWIDDIIESWVANSDSIDDEER
|
||||
EFLVQFSALTTEKRIYQILLSLTHSDHLSKNDSGYKMIEQPGAIYLRYLV
|
||||
DIHKKYLLNYECNTSCLVERRVFLAHSQTITYSQTSRGTNTLHFPSQGKP
|
||||
FSISLALSPSKGILVIGSIGTGRSFLVKYLATNSYVPFITVFLNKFLDNK
|
||||
PKGFLVDDNDDNDSSDDIYASDDINSDLDTELELITMMNALTMDMMLELD
|
||||
RFFTTLQLELAKAMSPWIIWIPNIHDLDVNESNYLSFGLLVNHLSERCST
|
||||
NNIIVIASTHIPKKVDPALLAPNKLNTCIKIRRLLIPQQRKHFCTLSYTR
|
||||
GFHLENKIFHTNGFGSITMGSSARDLVALTNEALSISITQNKSILDTNTI
|
||||
RSALHRQTWDLRSGVRSFQDNGILSYQIGRAITQNVLLSNCPIDPISIYM
|
||||
KKKSCTCNGGDYYFYKWYFGLGTSMKKLTILLYLLSCSAGSVAQDLWSLP
|
||||
GPAEKNGITSYGLVENDSDLVRGLLEVEGALVGSSRTEKDCSPFDNDRVI
|
||||
FTLILRPEPGNPLDIIKKGSCSIFDHRFIYEKYESEFEEGYGEGALDPQQ
|
||||
IEEDLFNHIVWAPRIWRPWGFIFYCIERPNELGFPYWSRSFRGKRIVYDK
|
||||
DEEGELQENDSELLKSGTVQYQTRDRSSKEQGLLKINQFIWDPADPLFFL
|
||||
LKDQPPGSVFSHRRFFADEEMSKGLLTSQKDPPTSIYKRWFIKNTQEQHF
|
||||
ELLINRQRWLRTKSSLSKSNGSFRSNTLFESYQYLSTLFLSNGTLFDKMT
|
||||
KTLLIKRWLFPDEMQM
|
||||
>NC_001879@NitaCp068@ycf2@88885@95727@D@1@2281 Ycf2
|
||||
MRGHQFKSWIFELREILREIKNSHHFLDSWTQFNSAGSFIHIFFHQERFL
|
||||
KLFDPRIWSILLSRNSQGSTSNRYFTIKGVILFVVAVLIYRINNRNMVER
|
||||
KNLYLIGLLPIPMNSIGPRNDTLEESVGSSNINRLIVSLLYLPKGKKISE
|
||||
SCFLNPKESTWVLPITKKCSMPESNWGSRWWRNWIGKKRDSSCKISNETV
|
||||
AGIEILFKEKDLKYLEFLFVYYMDDPIRKDHDWELFDRLSLRKSRNRINL
|
||||
NSGPLFEILVKHWISYLMSAFREKIPIEVEGFFKQQGAGSTIQSNDIEHV
|
||||
SHLFSRNKWAISLQNCAQFHMWQFRQDLFVSWGKNPPESDFLRNVSRENW
|
||||
IWLDNVWLVNKDRFFSKVQNVSSNIQYDSTRSSFVQVTDSSQLKGSSDQS
|
||||
RDHLDSISNEDSEYHTLINQREIQQRKERSILWDPSFLQTERKEIESGRF
|
||||
PKCLSGYSSMSRLFTEREKQMINHLFPEEIEEFLGNPTRSVRSFFSDRWS
|
||||
ELHLGSNPTERSTRDQKLLKKQQDLSFVPSKRSENKEMVNIFKIITYLQN
|
||||
TVSIHPISSDPGCDMVPKDEPDMDSSNKISFLNKNPFFDLFHLFHDRNRG
|
||||
GYTLHYDFESEERFQEMADLFTLSITEPDLVYHKGFAFSIDSCGLDQKQF
|
||||
LNEARDESKKKSLLVLPPIFYEENESFSRRIRKKWVRISCGNDLEDPKPK
|
||||
IVVFASNNIMEAVTQYRLIRNLIQIQYSTYGYIRNVLNRFFLMNRSDRNF
|
||||
EYGIQRDQIGKDTLNHRTIMKYTINQYLSNLKKSQKKWFEPLILISRTER
|
||||
SMNRDPDAYRYKWSNGSKNFQEHLEQSVSEQKSRFQVVFDRLRINQYSID
|
||||
WSEVIDKKDLSKPLRFFLSKSLLFLSKLLFFLSNSLPFFCVSFGNIPIHR
|
||||
SEIYIYELKGPNDQLCNQLLESIGLQIVHLKKWKPFLLDDHDTSQKSKFL
|
||||
INGGTISPFLFNKIPKWMIDSFHTRNNRRKSFDNPDSYFSMIFHDQDNWL
|
||||
NPVKPFHRSSLISSFYKANRLRFLNNPHHFCFYWNTRFPFSVEKARINNS
|
||||
DFTYGQFLNILFIRNKIFSLCVGKKKHAFWGRDTISPIESQVSNIFIPND
|
||||
FPQSGDETYNLYKSFHFPSRSDPFVRRAIYSIADISGTPLTEGQIVNFER
|
||||
TYCQPLSDMNLSDSEGKNLHQYLNFNSNMGLIHTPCSEKDLSSEKRKKRS
|
||||
LCLKKCVEKGQMYRTFQRDSAFSTLSKWNLFQTYMPWFLTSTGYKYLNLI
|
||||
FLDTFSDLLPILSSSQKFVPIFHDIMHGSGISWRILQKKLCLPQWNLISE
|
||||
ISSKCLHNLLLSEEMIHRNNESPLISTHLRSPNAREFLYSILFLLLVAGY
|
||||
LVRTHLLFVSRASSELQTEFEKVKSLMIPSSMIELRKLLDRYPTSEPNSF
|
||||
WLKNLFLVALEQLGDSLEEIRGSASGGNMLGPAYGVKSIRSKKKDWNINL
|
||||
IEIIDLIPNPINRITFSRNTRHLSHTSKEIYSLIRKRKNVNGDWIDDKIE
|
||||
SWVANSDSIDDEEREFLVQFSTLTTENRIDQILLSLTHSDRLSKNDSGYQ
|
||||
MIEQPGAIYLRYLVDIHKKHLMNYEFNPSCLAERRIFLAHYQTITYSQTS
|
||||
CGENSFHFPSHGKPFSLRLALSPSRGILVIGSIGTGRSYLVKYLATNSYV
|
||||
PFITVFLNKFLDNKPKGFLLDEIDIDDSDDIDDSDNLDASDDIDRDLDTE
|
||||
LKLLTRMNGLTMDMMPEIDRFYITLQFELAKAMSPCIIWIPNIHDLDVNE
|
||||
SNDLALGLLVNHLSRDCERCSTRNILVIASTHIPQKVDPALIAPNKLNTC
|
||||
IKIRRLLLPQQRKHFFTLSYTRGFHLEKKMFHTNGFGSITMGSNARDLVA
|
||||
LTNEVLSISITQKKSIIDTNTIRSALHRQTWDLRSQVRSVQDHGILFYQI
|
||||
GRAVAQNVLLSNCPIDPISIYMKKKSCNEGDSYLYKWYFELGTSMKRLTI
|
||||
LLYLLSCSAGSVAQDLWSLSGPDEKNGITSYGLVENDSDLVHGLLEVEGA
|
||||
LVGSSRTEKDCSQFDNDRVTLLLRPEPRNPLDMMQNGSCSILDQRFLYEK
|
||||
YESEFEEGEGEGALDPQEDLFNHIVWAPRIWRPWGFLFDCIERPNELGFP
|
||||
YWSRSFRGKRIIYDEEDELQENDSEFLQSGTMQYQTRDRSSKEQGLFRIS
|
||||
QFIWDPADPLFFLFKDQPPGSVFSHRELFADEEMSKGLLTSQTDPPTSIY
|
||||
KRWFIKNTQEKHFELLINRQRWLRTNSSLSNGSFRSNTLSESYQYLSNLF
|
||||
LSNGTLLDQMTKTLLRKRWLFPDEMKIGFM
|
||||
>NC_001879@NitaCp102@ycf2@146903@153745@R@1@2281 Ycf2
|
||||
MRGHQFKSWIFELREILREIKNSHHFLDSWTQFNSAGSFIHIFFHQERFL
|
||||
KLFDPRIWSILLSRNSQGSTSNRYFTIKGVILFVVAVLIYRINNRNMVER
|
||||
KNLYLIGLLPIPMNSIGPRNDTLEESVGSSNINRLIVSLLYLPKGKKISE
|
||||
SCFLNPKESTWVLPITKKCSMPESNWGSRWWRNWIGKKRDSSCKISNETV
|
||||
AGIEILFKEKDLKYLEFLFVYYMDDPIRKDHDWELFDRLSLRKSRNRINL
|
||||
NSGPLFEILVKHWISYLMSAFREKIPIEVEGFFKQQGAGSTIQSNDIEHV
|
||||
SHLFSRNKWAISLQNCAQFHMWQFRQDLFVSWGKNPPESDFLRNVSRENW
|
||||
IWLDNVWLVNKDRFFSKVQNVSSNIQYDSTRSSFVQVTDSSQLKGSSDQS
|
||||
RDHLDSISNEDSEYHTLINQREIQQRKERSILWDPSFLQTERKEIESGRF
|
||||
PKCLSGYSSMSRLFTEREKQMINHLFPEEIEEFLGNPTRSVRSFFSDRWS
|
||||
ELHLGSNPTERSTRDQKLLKKQQDLSFVPSKRSENKEMVNIFKIITYLQN
|
||||
TVSIHPISSDPGCDMVPKDEPDMDSSNKISFLNKNPFFDLFHLFHDRNRG
|
||||
GYTLHYDFESEERFQEMADLFTLSITEPDLVYHKGFAFSIDSCGLDQKQF
|
||||
LNEARDESKKKSLLVLPPIFYEENESFSRRIRKKWVRISCGNDLEDPKPK
|
||||
IVVFASNNIMEAVTQYRLIRNLIQIQYSTYGYIRNVLNRFFLMNRSDRNF
|
||||
EYGIQRDQIGKDTLNHRTIMKYTINQYLSNLKKSQKKWFEPLILISRTER
|
||||
SMNRDPDAYRYKWSNGSKNFQEHLEQSVSEQKSRFQVVFDRLRINQYSID
|
||||
WSEVIDKKDLSKPLRFFLSKSLLFLSKLLFFLSNSLPFFCVSFGNIPIHR
|
||||
SEIYIYELKGPNDQLCNQLLESIGLQIVHLKKWKPFLLDDHDTSQKSKFL
|
||||
INGGTISPFLFNKIPKWMIDSFHTRNNRRKSFDNPDSYFSMIFHDQDNWL
|
||||
NPVKPFHRSSLISSFYKANRLRFLNNPHHFCFYWNTRFPFSVEKARINNS
|
||||
DFTYGQFLNILFIRNKIFSLCVGKKKHAFWGRDTISPIESQVSNIFIPND
|
||||
FPQSGDETYNLYKSFHFPSRSDPFVRRAIYSIADISGTPLTEGQIVNFER
|
||||
TYCQPLSDMNLSDSEGKNLHQYLNFNSNMGLIHTPCSEKDLSSEKRKKRS
|
||||
LCLKKCVEKGQMYRTFQRDSAFSTLSKWNLFQTYMPWFLTSTGYKYLNLI
|
||||
FLDTFSDLLPILSSSQKFVPIFHDIMHGSGISWRILQKKLCLPQWNLISE
|
||||
ISSKCLHNLLLSEEMIHRNNESPLISTHLRSPNAREFLYSILFLLLVAGY
|
||||
LVRTHLLFVSRASSELQTEFEKVKSLMIPSSMIELRKLLDRYPTSEPNSF
|
||||
WLKNLFLVALEQLGDSLEEIRGSASGGNMLGPAYGVKSIRSKKKDWNINL
|
||||
IEIIDLIPNPINRITFSRNTRHLSHTSKEIYSLIRKRKNVNGDWIDDKIE
|
||||
SWVANSDSIDDEEREFLVQFSTLTTENRIDQILLSLTHSDRLSKNDSGYQ
|
||||
MIEQPGAIYLRYLVDIHKKHLMNYEFNPSCLAERRIFLAHYQTITYSQTS
|
||||
CGENSFHFPSHGKPFSLRLALSPSRGILVIGSIGTGRSYLVKYLATNSYV
|
||||
PFITVFLNKFLDNKPKGFLLDEIDIDDSDDIDDSDNLDASDDIDRDLDTE
|
||||
LKLLTRMNGLTMDMMPEIDRFYITLQFELAKAMSPCIIWIPNIHDLDVNE
|
||||
SNDLALGLLVNHLSRDCERCSTRNILVIASTHIPQKVDPALIAPNKLNTC
|
||||
IKIRRLLLPQQRKHFFTLSYTRGFHLEKKMFHTNGFGSITMGSNARDLVA
|
||||
LTNEVLSISITQKKSIIDTNTIRSALHRQTWDLRSQVRSVQDHGILFYQI
|
||||
GRAVAQNVLLSNCPIDPISIYMKKKSCNEGDSYLYKWYFELGTSMKRLTI
|
||||
LLYLLSCSAGSVAQDLWSLSGPDEKNGITSYGLVENDSDLVHGLLEVEGA
|
||||
LVGSSRTEKDCSQFDNDRVTLLLRPEPRNPLDMMQNGSCSILDQRFLYEK
|
||||
YESEFEEGEGEGALDPQEDLFNHIVWAPRIWRPWGFLFDCIERPNELGFP
|
||||
YWSRSFRGKRIIYDEEDELQENDSEFLQSGTMQYQTRDRSSKEQGLFRIS
|
||||
QFIWDPADPLFFLFKDQPPGSVFSHRELFADEEMSKGLLTSQTDPPTSIY
|
||||
KRWFIKNTQEKHFELLINRQRWLRTNSSLSNGSFRSNTLSESYQYLSNLF
|
||||
LSNGTLLDQMTKTLLRKRWLFPDEMKIGFM
|
||||
>NC_002202@SpolCp103@ycf2@142690@149085@R@1@2132 Ycf2
|
||||
MKGHQFKSWIFELREILREIKNSHYFLDSWTQFNSVGSFIHIFFYQERFL
|
||||
KLFDPRIWSILLSPNSQGSTSNRYFTIKGVVLFVVVVLIYRITNRNMVER
|
||||
KNLYLIGLFPIPMNSIGPRNDTLEKSFGSSNINRLIVSLLYLPKGKKISE
|
||||
SYFLDPKESTWFLPITKKCIMPESNRGSRWWRNWIGKRRDSSCKISNETV
|
||||
AGIEISFKEKDIQYLEFPFVYYMDDPIRKDHDWELFDCLSLFLRNVSREN
|
||||
WIWLDNVRLVNKDRFFSKVRNVSSNIQYDFTRSSFVQVTDSSQLKESSDQ
|
||||
SRDRSNSISNADSEYHTLINKREIQQLKERSILRDPSFLQTEGTEIESDR
|
||||
FPKCLSGYSSMPRLFTAREKQMIIHLLPEEIEQLLENPTRSIRSFFSGRW
|
||||
SELHLGSNPTERSTRDPQLLKKQQDVSFAPSRQSENKEMVNIFKIIKYLQ
|
||||
NTVSIHPISSDPGCDMVPKDELDMDSSDKISFLNKNSFFDLFHLFHDRNR
|
||||
GGYALHHDFESEEKFQEMADLFTLSITDPDLVYHRGFSFSIDSCGLDQKQ
|
||||
FLNEVFNSRDESKKKSLLVLSPIFYEENESFYRRIRKKGVRISRNVLNRF
|
||||
FLINRSDRSFEYGIQRDQIGNDTLNHRTIRKYMINQDFSNLKKSQKKWFD
|
||||
PLIFLSRTERFMNRDPDAYRYKWFNGSKNFQEHLEHFVSEQKSRFQVVFD
|
||||
QLRINQYSIDWSEVIDKKDLSKSLRFFLSKSLRFFLSKLLLFLSNSLPFF
|
||||
FVSFGNIPINRSEIRIYELKGPNDQLYNPLVESIGLQIVHLKKWKAFLLD
|
||||
DHDTFQKSKFLINGGTISPFLFNKIPKWMIDSFHTRNNSGKSFDNTDSYF
|
||||
SMISHDQNNWLNPVKPFHRSSLISSFYKANQLRFLNNPHHFCFYCNKRFP
|
||||
FYMEKARINNSDFTYRQFLNILFIHNKLFSLCVGKKKHAFLERDTISPIE
|
||||
SQVSNIFLPNDFPIRSDLLVRRTIYSIADISGTPLTEGQLVHFERTYCQP
|
||||
LSDMNLSDSEKKNLHQYLNFNSNMGFIYTPCSEKYLLSEKRKKRSLCLKK
|
||||
CVEKGQMYRTFQRDSAFSTLSKWNLFQTYMPWFLTSTGYKYLNFLFLDTF
|
||||
SDLLPILSSSQKFLSILHDIMHGSGISWRILQKKLCLPPWNLISEISSKC
|
||||
LHNLLLPEEMIHRNNESPLIWTHLASPNVREFFYSILFLLFVAGYLVRTH
|
||||
LLFVFRASSELQTEFERVKSLMIPSYMIELRKLLDRYPTSEPNSFWLKNL
|
||||
FLVALEQLGDSLEEIRGSASGDNMLLGGGPGPAYGFKSIRSKKKYLNINL
|
||||
IDILDLISIIPNPINRITFSRNTRHLSHTSKEIYSLIRKRKRVNGDWIDD
|
||||
KIESWVASSDSIDDEEREFLVQFSTLTTEKRIDQILLSLTHSDHLSKNDS
|
||||
GYQLIEQPGAIYLRYLVDIHKKYLMNYEFNTSCLAERRVFLAHYQTITYS
|
||||
QTSCGANSFHFPSHGKPFSLRLALSPSRGILVIGSIGTGRSYLVKYLATN
|
||||
SYVPFITVFLNKFLDNKPKGSLIDASDDIDRDLDTELELLTMMNALTMDM
|
||||
MPEIDQFSITLQFELAKAMSPCIIWIPNIHDLDVNESNYLSLGLFVNYLS
|
||||
RDCERGSTRNILVIASTHIPQKVDPALIAPNQLNTCIKIRRLRIPQQRKH
|
||||
FFTLSYTRGFHLEKKMFHTNGFGSITMGSNVRDLVAFINEALSISITQKK
|
||||
SIIDTNTIRSALHRQTWDLRSQVRSVQDHGILFYQIGRAVAQNVLLSNCP
|
||||
IDPISTYMKKKSCNEGDSYLYKWYFELGTSMKKLTILLYLLSCSAGSVAQ
|
||||
DLWSLPGPDEKNGITSYGLVENDSYLVHGLLEVEGALVGSSRIEKACSQN
|
||||
DRVTLFLRPELRNPLDMMQNGSCSILDHRFLYEKYESELEEGEGALDPQQ
|
||||
IEEDLFNHIVWAPRIWNPWGFLFDCIERPNELGFPYWARSFRGKRSIYDK
|
||||
EDELQENDSEFLQSGTMQYQTRDRSSKEQGFFRISQFIWDPADPLFFLFK
|
||||
DQPFVSVFSHREFFADEEISKGLLTSQMNPPISIFQRWFIKNTQEKHFEL
|
||||
LINRQRWLRTNSSLSNGSFRSNTLSESYQYLSNLFLSNGTLLDQMTKTLL
|
||||
RKRWLFPDEMKIGFMQEEKDFPFLSRKDMWP
|
||||
>NC_002693@OeelhCp081@ycf2@92143@99165@D@1@2341 Ycf2
|
||||
MGNQRNRVNLNPFRFWVFELREILREIKNSRYPFNSVGSFIHIFVHQERF
|
||||
LKLLDPRIWSVLRSQGSTGVVLFLVAVLIYRINNRNMIERKNIYLTGLLP
|
||||
IPTNFAGPRNETLEESFLSSNINRLIVSLLHLPKGKRLSESCFLDPKEST
|
||||
RVLPITKWRNWIGKRRDSSQLKGSSDQSRDHFDSIGTEDSEYHTLINQRE
|
||||
IQQRKERSSLLDPSFLQTERTEIESDRFSKGLSGSSSKSRLFTEGEKEMN
|
||||
NHLPPEEIEEFLGNPTRSILSFFSDEWSELHLGSNPTERSTVDQKLLKKE
|
||||
QEVSFAPFRRSETKEIVNLFKTMAYLQKTVSIHPISSDPGCDMVPKDELD
|
||||
SEERFQEMADLFTLSITEPDLVYHKGFAFSIDSSVLDQKQFLAEARDESK
|
||||
KKSLLVLPPVFYQENESFYRRIRKRGVQISCGNDLEDPKPKIVVFASNNI
|
||||
VEAVNQYRWIRNLIQIQYSTHGYIRNVLNRFFLMNRSDRNFEYGIQRDQI
|
||||
GNDTLNHRTFMKYTINQHLSNLKKSQKKGSDPLILISRTERSVNRDPNAY
|
||||
RYKWSKGSKNFQEHLEHFVSEQKSRFQVVFDRYRSIRNRYRSIRNRYRSR
|
||||
INQYSSDRSEVSDKKDNRYRSRINQYSSDRSEVSDQKNLAKFRSFVFSKL
|
||||
LLFLSNSLPFFFVSFGNTPPIQRSEIRVSELKGPNDRLCNQFLESIGLQL
|
||||
VYLKKLKPFLLDDHETSQKSKLLFNKKPEGMIDSFHTRNNRGKSLDSYFS
|
||||
MISHDQDNWLNPVKPFHRSSLISSFYKANRLRFLNNPHDFGFFCNKRFPF
|
||||
YVDIKNLDFTYGQFLNILFIRNTKFSLCGDKKKHAFLERDTISSIESQVS
|
||||
NLFKDFPQSGDERYNFYKYFHLAMRSDPLVRRAIYSIADISGTPLTEGQR
|
||||
VNFERTYCQPLSDMNLSDSEGKNLYQYLNFNSNMGLIYSEKCFSSEKRKK
|
||||
KKPEKRKEKKPEKRKEKKPEKRKEKKPEKRKEKKPEKRKEKKPEKRKEKK
|
||||
PEKRKEKKQSLYLKQWVEKVQMDRALQGERVSLILSNWNLFKTYVMPFSL
|
||||
TSTGYNLLKLMFLDTLGSYVMPLLRSSPKFVSICYAISDPCGISWRILQK
|
||||
KLCLLQWNWISAISNKCFHKLLLSEESIHRNNESPSMTDLRWPNLGAFLY
|
||||
SILFLLFVAGHLVFSHLLFLSQDFSELQRDFARAQSLMIPSYIVELRELL
|
||||
DMYPAPRSFKKLFLAAREKLVNYLRWGGGRKSFLIHLFELLNITPNPIDR
|
||||
IAFLKNTRHLSHTSKELYSLITELGDFSSLCSGQRYRYDQIIENVNGPCC
|
||||
LIDDKIESWISNCDAIEDKEREFLVPFCNFTRETRIDQILLSLTHSDHLS
|
||||
NNDSASQMSEEPGAFYLRHLVDIHKKGLMNYECNTSCLAERRIFLAHYQT
|
||||
ITYSPCGDNRSHFPSHGKTFSLRLPLHPSRATLVIGSIGSGRSYLVKSLA
|
||||
TNSYVPLITVVLNKFLKNWTPQGFDIHESGVYDEYGDDAEEANDYGASFF
|
||||
DFLDNDSDDYEDRDSDDYDEPGASDDYEDRDMEDFVDSEMTEWLTKTNVP
|
||||
LVYQLLDDEIDEFYITLQFELAKAMSPCILWIPNIHDLDAKESDYLSLGL
|
||||
LVNHLSRDCGRRSTKNEILVIASTHIPQKVDPSLIGPDGLSTCIKTRRLL
|
||||
VPQQQQCLFTLSYTRGFHLENKMFHTHTNEFESTILGPSVPDLVALTNEA
|
||||
LSISITQKKSIIDTTTIRYALHRKTWDLEADRNLSPAKEHGTLFYQVGRA
|
||||
FAHTVLLRNCPIDPISIYIKKNLCEAGDSSLYKWYFELGTSMKKLTILLY
|
||||
LLTCSAGSIAQDLLSPPGPDEQNLITSYGLVENDSDLVHGLSDIVHGLLE
|
||||
LEGALVGSSPTEEEVEGTEEEVEGTEEEVEGTEEEVEGTEEEVEGTEEEV
|
||||
EGTEEEVEGTEDEEGEGTEEEVEGTEDEEGEGTEEEVEGTEDEEGEGTEE
|
||||
EVEGTEDEEGEGTEDEEGEGTEEEVEGTEEEVEGTEDEEGEGTEKDSSQF
|
||||
DNDRVTLLLRPKPRNPLDIQRLIYQHQKYESELEEDDDDDEDVFAPQKML
|
||||
EDLFSELVWSPRIWHPWDFILDCEAEIPAEEIPEEEDPLPEEALETEVAV
|
||||
WGEEEEGEADDEEDERLEAQQEDELLEEEDEELKEEEDELHEEEEEEEEE
|
||||
EEEEEEDELHEEEEEEEEEDELQENDSEFFRSETQQPQARDGFSEEEGCF
|
||||
RISQFMWVPGDPLSFLYKDTPFVEVLSYPEEATEISKELLRLLNPKTKRD
|
||||
APKRARQRWWTKKKQDKHYELVLDRQRWLITKSSLSKSNGFFRSNTPSES
|
||||
YQYLSNLFLSNRRLLDQITKTFFRKKWLFPDEMKIGFMEQ
|
@ -45,28 +45,27 @@ FT AAAQSISYEIPLALCVLSISLRVIR"
|
||||
FT gene 42910..45121
|
||||
FT /gene="ndhB"
|
||||
FT /locus_tag=""
|
||||
FT CDS join(42910..43780,44367..45121)
|
||||
FT CDS join(42910..43685,44365..45121)
|
||||
FT /codon_start=1
|
||||
FT /transl_table=11
|
||||
FT /gene="ndhB"
|
||||
FT /locus_tag=""
|
||||
FT /product="NADH dehydrogenase subunit 2"
|
||||
FT /inference="similar to DNA sequence:NC_008535:CoarCp067"
|
||||
FT /inference="similar to DNA sequence:AC_000188:LyesCp066"
|
||||
FT /translation="MIWHVQNENFILDSTRIFMKAFHLLLFDGSLIFPECILIFGLILL
|
||||
FT LMIDSTSDQKDIPWLYFISSTSLVMSITALLFRWREEPMISFSGNFQTNNFNEIFQFLI
|
||||
FT LLCSTLCIPLSVEYIECTEMAITEFLLFVLTATLGGMFLCGANDLITIFVAPECFSLCS
|
||||
FT YLLSGYTKKDVRSNEATMKYLLMGGASSSILVHGFSWLYGSSGGEIELQEIVNGLINTQ
|
||||
FT MYNSPGISIALIFITVGIGFKLSPAPSHQWTPDVYEGVRFVREIPTSLSISEMFGFFKT
|
||||
FT PWTCRREMLSPTPVVAFLSVTSKVAASASATRIFNIPFYFSSNEWHLLLEILAILSMIL
|
||||
FT GNLIAITQTSMKRMLAYSSIGQIGYVIIGIIVGDSNDGYASMITYMLFYISMNLGTFAC
|
||||
FT IVLFGLRTGTDNIRDYAGLYTKDPFLALSLALCLLSLGGLPPLAGFFGKLYLFWCGWQA
|
||||
FT GLYFLVLIGLLTSVVSIYYYLKIIKLLMTGRNQEITPHVRNYRRSPLRSNNSIELSMIV
|
||||
FT CVIASTIPGISMNPIIAIAQDSLF"
|
||||
FT exon 42910..43780
|
||||
FT MYNSPGISIALIFITVGIGFKLSPAPSHQWTPDVYEGSPTPVVAFLSVTSKVAASASAT
|
||||
FT RIFNIPFYFSSNEWHLLLEILAILSMILGNLIAITQTSMKRMLAYSSIGQIGYVIIGII
|
||||
FT VGDSNDGYASMITYMLFYISMNLGTFACIVLFGLRTGTDNIRDYAGLYTKDPFLALSLA
|
||||
FT LCLLSLGGLPPLAGFFGKLYLFWCGWQAGLYFLVLIGLLTSVVSIYYYLKIIKLLMTGR
|
||||
FT NQEITPHVRNYRRSPLRSNNSIELSMIVCVIASTIPGISMNPIIAIAQDSLF"
|
||||
FT exon 42910..43685
|
||||
FT /gene="ndhB"
|
||||
FT /locus_tag=""
|
||||
FT /number=1
|
||||
FT exon 44367..45121
|
||||
FT exon 44365..45121
|
||||
FT /gene="ndhB"
|
||||
FT /locus_tag=""
|
||||
FT /number=2
|
||||
@ -79,7 +78,7 @@ FT /transl_table=11
|
||||
FT /gene="ndhD"
|
||||
FT /locus_tag=""
|
||||
FT /product="NADH dehydrogenase subunit 4"
|
||||
FT /inference="similar to DNA sequence:NC_007898:LyesC2p017"
|
||||
FT /inference="similar to DNA sequence:AC_000188:LyesCp081"
|
||||
FT /translation="MNYFPWLTIIVVFPIFAGSLIFFLPHKGNRVIRWYTICICILELL
|
||||
FT LTTYAFCYHFQSDDPLIQLVEDYKWIDFFDFHWRLGIDGLSIGPILLTGFITTLATLAA
|
||||
FT WPVTRDSRLFHFLMLAMYSGQIGLFSSRDLLLFFIMWELELIPVYLLLAMWGGKKRLYS
|
||||
@ -98,7 +97,7 @@ FT /transl_table=11
|
||||
FT /gene="ndhE"
|
||||
FT /locus_tag=""
|
||||
FT /product="NADH dehydrogenase subunit 4L"
|
||||
FT /inference="similar to DNA sequence:NC_001879:NitaCp085"
|
||||
FT /inference="similar to DNA sequence:AC_000188:LyesCp080"
|
||||
FT /translation="MILEHVLVLSAYLFSIGIYGLITSRNMVRALMCLELILNAVNINF
|
||||
FT VTFSDFFDNRQLKGDIFSIFVIAIAAAEAAIGLAIVSSIYRNRKSTRINQSNLLNN"
|
||||
FT gene complement(11509..13722)
|
||||
@ -178,7 +177,7 @@ FT /transl_table=11
|
||||
FT /gene="psaC"
|
||||
FT /locus_tag=""
|
||||
FT /product="photosystem I subunit VII"
|
||||
FT /inference="similar to DNA sequence:NC_023792:CP89_p014"
|
||||
FT /inference="similar to DNA sequence:AC_000188:LyesCp065"
|
||||
FT /translation="MSHSVKIYDTCIGCTQCVRACPTDVLEMIPWDGCKAKQIASAPRT
|
||||
FT EDCVGCKRCESACPTDFLSVRVYLWHETTRSMGLAY"
|
||||
FT gene 53817..55307
|
||||
@ -190,7 +189,7 @@ FT /transl_table=11
|
||||
FT /gene="rpl2"
|
||||
FT /locus_tag=""
|
||||
FT /product="ribosomal protein L2"
|
||||
FT /inference="similar to DNA sequence:NC_007898:LyesC2p002"
|
||||
FT /inference="similar to DNA sequence:AC_000188:LyesCp030"
|
||||
FT /translation="MAIHLYKTSTPSTRNGTVDSQVKSNPRNNLIYGQRRCGKGRNARG
|
||||
FT IITARHRGGGHKRLYRKIDFRRNEKDIYGRIVTIEYDPNRNAYICLIHYGDGEKRYILH
|
||||
FT PRGAIIGDTIVSGTEVPIKMGNALPSTDMPLGTAIHNIEITLGKGGQLARAAGAVAKLI
|
||||
@ -213,7 +212,7 @@ FT /transl_table=11
|
||||
FT /gene="rpl23"
|
||||
FT /locus_tag=""
|
||||
FT /product="ribosomal protein L23"
|
||||
FT /inference="similar to DNA sequence:NC_007898:LyesC2p003"
|
||||
FT /inference="similar to DNA sequence:AC_000188:LyesCp026"
|
||||
FT /translation="MDGIKYAVFTDKSIRLLGKNQYTSNVESGSTRTEIKHWVELFFGV
|
||||
FT KVIAMNSHRLPGKSRRMGPIMGHTMHYRRMIITLQPGYSIPPLRKKRT"
|
||||
FT gene 14505..14672
|
||||
@ -249,7 +248,7 @@ FT /transl_table=11
|
||||
FT /gene="rps7"
|
||||
FT /locus_tag=""
|
||||
FT /product="ribosomal protein S7"
|
||||
FT /inference="similar to DNA sequence:NC_007898:LyesC2p007"
|
||||
FT /inference="similar to DNA sequence:AC_000188:LyesCp008"
|
||||
FT /translation="MSRRGTAEKKTAKSDPIYRNRLVNMLVNRILKHGKKSLAYQIIYR
|
||||
FT AVKKIQQKTETNPLSVLRQAIRGVTPDITVKARRVGGSTHQVPIEIGSTQGKALAIRWL
|
||||
FT LAASRKRPGRNMAFKLSSELVDAAKGSGDAIRKKEETHRMAEANRAFAHFR"
|
||||
|
254
detectors/cds/tools/chlorodb/go_chlorodb.sh
Executable file
254
detectors/cds/tools/chlorodb/go_chlorodb.sh
Executable file
@ -0,0 +1,254 @@
|
||||
#!/bin/csh -f
|
||||
#
|
||||
# make ChloroDB's
|
||||
#
|
||||
# usage: copy genbank/embl files into 'DB_DIR/download'
|
||||
# usage: [create a paramter.sh file in 'DB_DIR']
|
||||
# usage: go_chlorodb [DB_DIR]
|
||||
#
|
||||
unsetenv ORG_SOURCED
|
||||
|
||||
setenv ORG_HOME `dirname $0`/../../../..
|
||||
source $ORG_HOME/scripts/csh_init.sh
|
||||
|
||||
#
|
||||
# which DB to process
|
||||
#
|
||||
|
||||
set DB_BASE = $DATA_DIR/cds/chlorodb # default location
|
||||
|
||||
if ($#Argv > 0) then
|
||||
set DB_BASE = $Argv[1]; Shift
|
||||
endif
|
||||
|
||||
set DB_BASE = `cd $DB_BASE && pwd -P`
|
||||
|
||||
NeedDir $DB_BASE/download
|
||||
|
||||
if (! -d $DB_BASE/info) mkdir $DB_BASE/info
|
||||
if (! -d $DB_BASE/fasta) mkdir $DB_BASE/fasta
|
||||
|
||||
cd $DB_BASE/info
|
||||
|
||||
#
|
||||
# params
|
||||
#
|
||||
|
||||
if (! -e $DB_BASE/parameters.sh) then
|
||||
@ n = `find $DB_BASE/download -depth 1 -type f -print | wc -l`
|
||||
@ cor_cutoff = $n / 2
|
||||
@ atg_cutoff = $n / 10
|
||||
@ dbs_cutoff = $n / 4
|
||||
if ($cor_cutoff == 0) @ cor_cutoff = 1
|
||||
if ($atg_cutoff == 0) @ atg_cutoff = 1
|
||||
if ($dbs_cutoff == 0) @ dbs_cutoff = 1
|
||||
echo "# sourced file" > $DB_BASE/parameters.sh
|
||||
echo "" >> $DB_BASE/parameters.sh
|
||||
echo "set CORE_NCDS_CUTOFF = $cor_cutoff" >> $DB_BASE/parameters.sh
|
||||
echo "set CORE_START_ATG_CUTOFF = $atg_cutoff" >> $DB_BASE/parameters.sh
|
||||
echo "set CORE_START_DFT_CUTOFF = $atg_cutoff" >> $DB_BASE/parameters.sh
|
||||
echo "set CORE_START_OTH_CUTOFF = 10" >> $DB_BASE/parameters.sh
|
||||
echo "set CORE_STOP_CUTOFF = $cor_cutoff" >> $DB_BASE/parameters.sh
|
||||
echo "set CORE_SPLICE_CUTOFF = $atg_cutoff" >> $DB_BASE/parameters.sh
|
||||
echo "" >> $DB_BASE/parameters.sh
|
||||
echo "set SHEL_NCDS_CUTOFF = 10" >> $DB_BASE/parameters.sh
|
||||
echo "" >> $DB_BASE/parameters.sh
|
||||
echo "set CORE_DELTA = Inf" >> $DB_BASE/parameters.sh
|
||||
echo "set CORE_COVMIN = 30" >> $DB_BASE/parameters.sh
|
||||
echo "set CORE_PMAX = 1e-6" >> $DB_BASE/parameters.sh
|
||||
echo "set CORE_IDMIN = 30" >> $DB_BASE/parameters.sh
|
||||
echo "set CORE_SIZMIN = $cor_cutoff" >> $DB_BASE/parameters.sh
|
||||
echo "" >> $DB_BASE/parameters.sh
|
||||
echo "set SHEL_DELTA = 0.5" >> $DB_BASE/parameters.sh
|
||||
echo "set SHEL_COVMIN = 30" >> $DB_BASE/parameters.sh
|
||||
echo "set SHEL_PMAX = 1e-6" >> $DB_BASE/parameters.sh
|
||||
echo "set SHEL_IDMIN = 30" >> $DB_BASE/parameters.sh
|
||||
echo "set SHEL_SIZMIN = $dbs_cutoff" >> $DB_BASE/parameters.sh
|
||||
echo "" >> $DB_BASE/parameters.sh
|
||||
echo "set DUST_DELTA = 0.5" >> $DB_BASE/parameters.sh
|
||||
echo "set DUST_COVMIN = 30" >> $DB_BASE/parameters.sh
|
||||
echo "set DUST_PMAX = 1e-6" >> $DB_BASE/parameters.sh
|
||||
echo "set DUST_IDMIN = 30" >> $DB_BASE/parameters.sh
|
||||
echo "set DUST_SIZMIN = 10" >> $DB_BASE/parameters.sh
|
||||
|
||||
endif
|
||||
|
||||
source $DB_BASE/parameters.sh
|
||||
|
||||
##set CMIN_COD = 0
|
||||
##set FMIN_COD = 0.01
|
||||
|
||||
#
|
||||
# temporarily uncompress
|
||||
#
|
||||
|
||||
set ff = `find $DB_BASE/download -depth 1 -name \*.gz -print`
|
||||
|
||||
if ($#ff != 0) then
|
||||
Notify "uncompressing $#ff entries"
|
||||
foreach f ($ff)
|
||||
gunzip -f $f
|
||||
end
|
||||
endif
|
||||
|
||||
#
|
||||
# convert gbk/embl to fasta
|
||||
#
|
||||
|
||||
set ff = `find $DB_BASE/download -depth 1 \( -name \*.gbk -or -name \*.embl \) -print`
|
||||
|
||||
Notify "convert $#ff gbk/embl entries to fasta"
|
||||
|
||||
foreach f ($ff)
|
||||
set nom = `basename $f:r`
|
||||
set typ = $f:e
|
||||
$AwkCmd -f $LIB_DIR/$typ.tofasta.awk $f > $DB_BASE/fasta/$nom.fst
|
||||
end
|
||||
|
||||
#
|
||||
# get gbk/embl info
|
||||
#
|
||||
|
||||
Notify "get gbk/embl info for $#ff entries"
|
||||
|
||||
echo "" | awk -v HEADONLY=1 -f $LIB_DIR/gbk.info.awk > db.info.txt # just get header
|
||||
|
||||
foreach f ($ff)
|
||||
set nom = `basename $f:r`
|
||||
set typ = $f:e
|
||||
$AwkCmd -f $LIB_DIR/$typ.oneliner.awk $f |\
|
||||
$AwkCmd -f $LIB_DIR/libutil.awk -f $LIB_DIR/$typ.info.awk |\
|
||||
egrep -v '^#' >> db.info.txt
|
||||
end
|
||||
|
||||
#
|
||||
# get cds info
|
||||
#
|
||||
|
||||
Notify "get gbk/embl cds for $#ff entries"
|
||||
|
||||
echo "" | awk -v HEADONLY=1 -f $LIB_DIR/gbk.cds_long.awk > db.cds.txt # just get header
|
||||
|
||||
foreach f ($ff)
|
||||
set nom = `basename $f:r`
|
||||
set typ = $f:e
|
||||
$AwkCmd -f $LIB_DIR/$typ.oneliner.awk $f |\
|
||||
$AwkCmd -v FASTA=$DB_BASE/fasta/$nom.fst -f $LIB_DIR/libutil.awk \
|
||||
-f $LIB_DIR/$typ.cds_long.awk |\
|
||||
egrep -v '^#' >> db.cds.txt
|
||||
end
|
||||
|
||||
#
|
||||
# get fasta for prots
|
||||
#
|
||||
|
||||
Notify "get prots"
|
||||
$AwkCmd -f $LIB_DIR/libutil.awk -f $LIB_DIR/cds2fasta.awk db.cds.txt > db.prot.fst
|
||||
|
||||
#
|
||||
# get introns
|
||||
#
|
||||
|
||||
Notify "get gbk/embl introns for $#ff entries"
|
||||
|
||||
echo "" | awk -v HEADONLY=1 -f $LIB_DIR/gbk.intron.awk > db.intron.txt # just get header
|
||||
|
||||
foreach f ($ff)
|
||||
set nom = `basename $f:r`
|
||||
set typ = $f:e
|
||||
$AwkCmd -f $LIB_DIR/$typ.oneliner.awk $f |\
|
||||
$AwkCmd -v FASTA=$DB_BASE/fasta/$nom.fst -f $LIB_DIR/libutil.awk \
|
||||
-f $LIB_DIR/$typ.intron.awk |\
|
||||
egrep -v '^#' >> db.intron.txt
|
||||
end
|
||||
|
||||
#
|
||||
# make models
|
||||
#
|
||||
|
||||
Notify "Making models"
|
||||
|
||||
echo -n "" > db.models.params.txt
|
||||
echo "CORE_NCDS_CUTOFF <- $CORE_NCDS_CUTOFF" >> db.models.params.txt
|
||||
echo "CORE_START_ATG_CUTOFF <- $CORE_START_ATG_CUTOFF" >> db.models.params.txt
|
||||
echo "CORE_START_DFT_CUTOFF <- $CORE_START_DFT_CUTOFF" >> db.models.params.txt
|
||||
echo "CORE_START_OTH_CUTOFF <- $CORE_START_OTH_CUTOFF" >> db.models.params.txt
|
||||
echo "CORE_STOP_CUTOFF <- $CORE_STOP_CUTOFF" >> db.models.params.txt
|
||||
echo "CORE_SPLICE_CUTOFF <- $CORE_SPLICE_CUTOFF" >> db.models.params.txt
|
||||
echo "SHEL_NCDS_CUTOFF <- $SHEL_NCDS_CUTOFF" >> db.models.params.txt
|
||||
|
||||
$LIB_DIR/make.models.r |& Cat
|
||||
|
||||
GetStatus
|
||||
OnError then
|
||||
Error 2 "model parameter too stringent"
|
||||
endif
|
||||
|
||||
#
|
||||
# add matrices
|
||||
#
|
||||
|
||||
cp -f $PROG_DIR/matrices/* models
|
||||
|
||||
#
|
||||
# make subDBs
|
||||
#
|
||||
|
||||
if (-e db.core.pat.txt) then
|
||||
Notify "Making core DB (take some time... please wait)"
|
||||
$PROG_DIR/subdb/go_subdb.sh db.prot.fst db.core.pat.txt \
|
||||
$CORE_DELTA $CORE_COVMIN $CORE_PMAX $CORE_IDMIN $CORE_SIZMIN
|
||||
endif
|
||||
|
||||
if (-e db.shell.pat.txt) then
|
||||
Notify "Making shell DB (take some time... please wait)"
|
||||
$PROG_DIR/subdb/go_subdb.sh db.prot.fst db.shell.pat.txt \
|
||||
$SHEL_DELTA $SHEL_COVMIN $SHEL_PMAX $SHEL_IDMIN $SHEL_SIZMIN
|
||||
endif
|
||||
|
||||
if (-e db.dust.pat.txt) then
|
||||
Notify "Making dust DB (take some time... please wait)"
|
||||
$PROG_DIR/subdb/go_subdb.sh db.prot.fst db.dust.pat.txt \
|
||||
$DUST_DELTA $DUST_COVMIN $DUST_PMAX $DUST_IDMIN $DUST_SIZMIN
|
||||
endif
|
||||
|
||||
#
|
||||
# recompress entries
|
||||
#
|
||||
|
||||
set ff = `find $DB_BASE/download -depth 1 -type f -print`
|
||||
|
||||
if ($#ff != 0) then
|
||||
Notify "recompressing $#ff entries"
|
||||
foreach f ($ff)
|
||||
gzip -f $f
|
||||
end
|
||||
endif
|
||||
|
||||
# compress fasta
|
||||
|
||||
set ff = `find $DB_BASE/fasta -depth 1 -name \*.fst -print`
|
||||
|
||||
if ($#ff != 0) then
|
||||
Notify "compressing $#ff fasta entries"
|
||||
foreach f ($ff)
|
||||
gzip -f $f
|
||||
end
|
||||
endif
|
||||
|
||||
# install everything in proper directory
|
||||
|
||||
foreach dir ("core" "shell" "dust")
|
||||
if (-e $DB_BASE/$dir) \rm -r $DB_BASE/$dir
|
||||
if ((-d db.$dir.pat.db) && (-e db.$dir.pat.db/Annot.lst)) then
|
||||
Notify "installing $DB_BASE/$dir"
|
||||
\mv -f db.$dir.pat.db $DB_BASE/$dir
|
||||
endif
|
||||
end
|
||||
|
||||
if (-e $DB_BASE/models) \rm -r $DB_BASE/models
|
||||
if (-d models) \mv -f models $DB_BASE
|
||||
|
||||
Notify "Done"
|
||||
exit 0
|
||||
|
29
detectors/cds/tools/chlorodb/matrices/blosum62.mat
Normal file
29
detectors/cds/tools/chlorodb/matrices/blosum62.mat
Normal file
@ -0,0 +1,29 @@
|
||||
#
|
||||
# blosum62 substitution matrix
|
||||
# with larger penalty for stops
|
||||
#
|
||||
A R N D C Q E G H I L K M F P S T W Y V B Z X *
|
||||
A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -50
|
||||
R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -50
|
||||
N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -50
|
||||
D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1 -50
|
||||
C 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -50
|
||||
Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1 -50
|
||||
E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1 -50
|
||||
G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -50
|
||||
H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1 -50
|
||||
I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1 -50
|
||||
L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1 -50
|
||||
K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1 -50
|
||||
M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1 -50
|
||||
F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1 -50
|
||||
P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2 -50
|
||||
S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0 -50
|
||||
T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0 -50
|
||||
W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2 -50
|
||||
Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1 -50
|
||||
V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1 -50
|
||||
B -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1 -50
|
||||
Z -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1 -50
|
||||
X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -50
|
||||
* -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 1
|
195
detectors/cds/tools/chlorodb/subdb/go_subdb.sh
Executable file
195
detectors/cds/tools/chlorodb/subdb/go_subdb.sh
Executable file
@ -0,0 +1,195 @@
|
||||
#!/bin/csh -f
|
||||
#
|
||||
# usage: go_subdb.sh prot.fst pat.txt [deltalen covmin pmax idmin sizmin]
|
||||
# usage: prot.fst : proteins fasta file
|
||||
# usage: pat.txt : text file containing patterns and names for families to extract
|
||||
# usage: output directory containig subdbs : basename <pat:r>.db
|
||||
#
|
||||
|
||||
unsetenv ORG_SOURCED
|
||||
|
||||
setenv ORG_HOME `dirname $0`/../../../../..
|
||||
source $ORG_HOME/scripts/csh_init.sh
|
||||
|
||||
NeedArg 2
|
||||
|
||||
set ProtFile = $Argv[1]; Shift
|
||||
set PatFile = $Argv[1]; Shift
|
||||
|
||||
NeedFile $ProtFile
|
||||
NeedFile $PatFile
|
||||
|
||||
#
|
||||
# parameters
|
||||
#
|
||||
|
||||
set Delta = 0.5
|
||||
set Covmin = 30
|
||||
set Pmax = 1e-6
|
||||
set Idmin = 30
|
||||
set Sizmin = 5
|
||||
|
||||
if ($#Argv > 0) then
|
||||
set Delta = $Argv[1]; Shift
|
||||
endif
|
||||
|
||||
if ($#Argv > 0) then
|
||||
set Covmin = $Argv[1]; Shift
|
||||
endif
|
||||
|
||||
if ($#Argv > 0) then
|
||||
set Pmax = $Argv[1]; Shift
|
||||
endif
|
||||
|
||||
if ($#Argv > 0) then
|
||||
set Idmin = $Argv[1]; Shift
|
||||
endif
|
||||
|
||||
if ($#Argv > 0) then
|
||||
set Sizmin = $Argv[1]; Shift
|
||||
endif
|
||||
|
||||
#
|
||||
# output directory
|
||||
#
|
||||
|
||||
set OutDir = `basename $PatFile:r`.db
|
||||
|
||||
if (-d $OutDir) \rm -r $OutDir
|
||||
mkdir $OutDir
|
||||
|
||||
set OutLog = `basename $PatFile:r`.log
|
||||
|
||||
echo -n '' > $OutLog
|
||||
|
||||
alias Report 'egrep "^>" \!:1 | wc -l | awk -v P=`basename \!:1` -v H=\!:2 '"'{print H,P,"'$1}'"'"' >> $OutLog'
|
||||
|
||||
#
|
||||
# remove entries with bad symbols
|
||||
#
|
||||
|
||||
Notify "cleanup $ProtFile"
|
||||
|
||||
Report $ProtFile "init_size"
|
||||
|
||||
$AwkCmd -f $LIB_DIR/db.filter.sym.awk $ProtFile > P_$$
|
||||
|
||||
Report $ProtFile "cleanup_size"
|
||||
|
||||
#
|
||||
# select by name pattern
|
||||
#
|
||||
|
||||
Notify "select by patterns"
|
||||
|
||||
mkdir D_$$
|
||||
mkdir E_$$
|
||||
mkdir F_$$
|
||||
|
||||
set noms = `awk '{print $1}' $PatFile`
|
||||
|
||||
foreach nom ($noms)
|
||||
set pat = `egrep "^$nom " $PatFile | awk '{print $2}'`
|
||||
$AwkCmd -f $LIB_DIR/db.filter.pat.awk -v PAT="$pat" P_$$ > D_$$/$nom.fst
|
||||
set n = `egrep '^>' D_$$/$nom.fst | wc -l`
|
||||
Notify " pattern : $nom : $n"
|
||||
Report D_$$/$nom.fst "pattern_filter"
|
||||
if ($n <= $Sizmin) \rm -f D_$$/$nom.fst
|
||||
end
|
||||
|
||||
set ok = `ls D_$$ | wc -l`
|
||||
if ($ok == 0) goto fin
|
||||
|
||||
#
|
||||
# select by length
|
||||
#
|
||||
|
||||
Notify "select by length"
|
||||
|
||||
foreach f (D_$$/*.fst)
|
||||
set nom = `basename $f:r`
|
||||
$AwkCmd -f $LIB_DIR/db.getlen.awk $f > L_$$
|
||||
$LIB_DIR/db.filter.len.r L_$$ $Delta |\
|
||||
$AwkCmd '($NF == "TRUE") {print $2}' > M_$$
|
||||
$AwkCmd -v FILE=M_$$ -f $LIB_DIR/db.subdb.awk $f > E_$$/$nom.fst
|
||||
Report E_$$/$nom.fst "length_filter"
|
||||
set n = `egrep '^>' E_$$/$nom.fst | wc -l`
|
||||
Notify " length filter : $nom : $n"
|
||||
if ($n <= $Sizmin) \rm -f E_$$/$nom.fst
|
||||
end
|
||||
|
||||
set ok = `ls E_$$ | wc -l`
|
||||
if ($ok == 0) goto fin
|
||||
|
||||
|
||||
#
|
||||
# select by similarity
|
||||
#
|
||||
|
||||
Notify "select by similarity"
|
||||
|
||||
foreach f (E_$$/*.fst)
|
||||
set nom = `basename $f:r`
|
||||
|
||||
Notify " blasting $nom"
|
||||
|
||||
makeblastdb -dbtype 'prot' -in $f >>& db.log
|
||||
blastp -db $f -query $f -outfmt 7 > $f.blast.out
|
||||
\rm -f $f.p??
|
||||
|
||||
$AwkCmd -v COVMIN=$Covmin -v PMAX=$Pmax -v IDMIN=$Idmin \
|
||||
-f $LIB_DIR/db.blastlink.awk $f.blast.out |\
|
||||
$AwkCmd -f $LIB_DIR/db.todl.awk > G_$$
|
||||
|
||||
($LIB_DIR/db.cc.r G_$$ > $f.cc.txt) >>& db.log
|
||||
|
||||
awk -v NAME=$nom -f $LIB_DIR/db.reportcc.awk $f.cc.txt >> $OutLog
|
||||
|
||||
$AwkCmd -f $LIB_DIR/db.selcc.awk $f.cc.txt > S_$$
|
||||
$AwkCmd -v FILE=S_$$ -f $LIB_DIR/db.subdb.awk $f > F_$$/$nom.fst
|
||||
|
||||
Report F_$$/$nom.fst "similarity_filter"
|
||||
|
||||
set n = `egrep '^>' F_$$/$nom.fst | wc -l`
|
||||
Notify " blast filter : $nom : $n"
|
||||
if ($n <= $Sizmin) \rm -f F_$$/$nom.fst
|
||||
|
||||
end
|
||||
|
||||
set ok = `ls D_$$ | wc -l`
|
||||
if ($ok == 0) goto fin
|
||||
|
||||
#
|
||||
# annotations
|
||||
#
|
||||
|
||||
echo -n "" > J_$$
|
||||
|
||||
foreach f (F_$$/*.fst)
|
||||
$AwkCmd -f $LIB_DIR/db.annot.awk $f >> J_$$
|
||||
end
|
||||
|
||||
awk '(NF >= 3) {print $1, $NF}' $PatFile | sort > A_$$
|
||||
sort J_$$ | egrep -v '^ *$' > B_$$
|
||||
join A_$$ B_$$ > F_$$/Annot.lst
|
||||
|
||||
#
|
||||
# copy files
|
||||
#
|
||||
|
||||
set n = `ls F_$$/* | wc -l`
|
||||
Notify "copy $n files to $OutDir"
|
||||
|
||||
\mv -f F_$$/* $OutDir
|
||||
|
||||
#
|
||||
# end
|
||||
#
|
||||
|
||||
fin:
|
||||
Notify "output directory : $OutDir"
|
||||
|
||||
\rm -r ?_$$
|
||||
|
||||
|
||||
exit 0
|
39
detectors/cds/tools/chlorodb/subdb/lib/db.annot.awk
Normal file
39
detectors/cds/tools/chlorodb/subdb/lib/db.annot.awk
Normal file
@ -0,0 +1,39 @@
|
||||
#
|
||||
|
||||
/^>/ {
|
||||
N++
|
||||
na = split($1, a, "@")
|
||||
if (a[na-1] > NEXMAX) NEXMAX = a[na-1]
|
||||
NEX[a[na-1]]++
|
||||
ANNOT[$NF]++
|
||||
}
|
||||
|
||||
END {
|
||||
na = split(FILENAME, a, "/")
|
||||
na = split(a[na], a, "\\.")
|
||||
printf("%s %d ", a[1], N)
|
||||
s = ""
|
||||
for (i = 1 ; i <= NEXMAX ; i ++) {
|
||||
if (NEX[i] != 0)
|
||||
s = s "" i ":" NEX[i] "_"
|
||||
}
|
||||
gsub("_+$", "", s)
|
||||
printf("%s ", s)
|
||||
|
||||
s = (NEXMAX == 1) ? "MONEX" : "POLYEX"
|
||||
printf("%s ", s)
|
||||
|
||||
nmax = 0
|
||||
amax = "none"
|
||||
for (e in ANNOT) {
|
||||
if (ANNOT[e] > nmax) {
|
||||
nmax = ANNOT[e]
|
||||
amax = e
|
||||
}
|
||||
}
|
||||
print amax
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
48
detectors/cds/tools/chlorodb/subdb/lib/db.blastlink.awk
Normal file
48
detectors/cds/tools/chlorodb/subdb/lib/db.blastlink.awk
Normal file
@ -0,0 +1,48 @@
|
||||
#
|
||||
|
||||
function min(x, y) {
|
||||
return ((x < y) ? x : y)
|
||||
}
|
||||
|
||||
BEGIN {
|
||||
if (COVMIN == "") COVMIN = 50
|
||||
if (PMAX == "") PMAX = 1e-6
|
||||
if (IDMIN == "") IDMIN = 30
|
||||
}
|
||||
|
||||
/^#/ {
|
||||
hitnum = 0;
|
||||
next;
|
||||
}
|
||||
|
||||
{
|
||||
if ($1 == $2) next
|
||||
|
||||
hitnum++;
|
||||
|
||||
na = split($1, a, "@");
|
||||
if (na < 2) {
|
||||
print "query file not properly formatted" > "/dev/stderr"
|
||||
exit(1);
|
||||
}
|
||||
len1 = a[na];
|
||||
|
||||
na = split($2, a, "@");
|
||||
if (na < 2) {
|
||||
print "bank file not properly formatted" > "/dev/stderr"
|
||||
exit(1);
|
||||
}
|
||||
len2 = a[na];
|
||||
|
||||
id = $3 + 0.0;
|
||||
ali = $4;
|
||||
|
||||
covmin = ali * 100. / min(len1, len2);
|
||||
|
||||
proba = $11 + 0.0;
|
||||
|
||||
if ((covmin > COVMIN) && ((proba < PMAX) || (proba == 0)) && (id > IDMIN)) {
|
||||
print $1, $2, hitnum, id, covmin, proba, ali, len1, len2;
|
||||
}
|
||||
}
|
||||
|
18
detectors/cds/tools/chlorodb/subdb/lib/db.cc.r
Executable file
18
detectors/cds/tools/chlorodb/subdb/lib/db.cc.r
Executable file
@ -0,0 +1,18 @@
|
||||
#!/usr/bin/env Rscript
|
||||
#
|
||||
|
||||
require(igraph, warn.conflicts=F)
|
||||
|
||||
args <- commandArgs(T)
|
||||
path <- if(length(args) > 0) args[1] else 'graph.dl'
|
||||
|
||||
g <- read.graph(path, format='dl')
|
||||
|
||||
cc <- clusters(g)
|
||||
|
||||
res <- cbind(V(g)$name, membership(cc))
|
||||
|
||||
write.table(res, quote=FALSE, row.names=FALSE, col.names=FALSE)
|
||||
|
||||
quit(save="no")
|
||||
|
19
detectors/cds/tools/chlorodb/subdb/lib/db.filter.len.r
Executable file
19
detectors/cds/tools/chlorodb/subdb/lib/db.filter.len.r
Executable file
@ -0,0 +1,19 @@
|
||||
#!/usr/bin/env Rscript
|
||||
#
|
||||
|
||||
args <- commandArgs(T)
|
||||
path <- if(length(args) > 0) args[1] else 'len.txt'
|
||||
delta <- if(length(args) > 1) args[2] else 0.5
|
||||
|
||||
tab <- read.table(path, header=T)
|
||||
|
||||
lmed <- median(tab$len)
|
||||
|
||||
dlen <- lmed * as.numeric(delta)
|
||||
|
||||
tab$ok <- (abs(tab$len-lmed)/lmed) <= delta
|
||||
|
||||
write.table(tab, quote=F)
|
||||
|
||||
quit(save='no')
|
||||
|
10
detectors/cds/tools/chlorodb/subdb/lib/db.filter.pat.awk
Normal file
10
detectors/cds/tools/chlorodb/subdb/lib/db.filter.pat.awk
Normal file
@ -0,0 +1,10 @@
|
||||
#
|
||||
|
||||
/^>/ {
|
||||
split($1, a, "@")
|
||||
ok = a[3] ~ PAT
|
||||
}
|
||||
|
||||
ok {
|
||||
print $0
|
||||
}
|
30
detectors/cds/tools/chlorodb/subdb/lib/db.filter.sym.awk
Normal file
30
detectors/cds/tools/chlorodb/subdb/lib/db.filter.sym.awk
Normal file
@ -0,0 +1,30 @@
|
||||
#
|
||||
#
|
||||
#
|
||||
|
||||
function Check(seq) {
|
||||
if (seq == "") return 0
|
||||
gsub("[ACDEFGHIKLMNPQRSTVWXY\n]+", "", seq)
|
||||
return (length(seq) == 0)
|
||||
}
|
||||
|
||||
/^>/ {
|
||||
if (Check(Seq)) {
|
||||
print Name
|
||||
printf("%s", Seq)
|
||||
}
|
||||
Name = $0
|
||||
Seq = ""
|
||||
next
|
||||
}
|
||||
|
||||
{
|
||||
Seq = Seq "" $0 "\n"
|
||||
}
|
||||
|
||||
END {
|
||||
if (Check(Seq)) {
|
||||
print Name
|
||||
printf("%s", Seq)
|
||||
}
|
||||
}
|
10
detectors/cds/tools/chlorodb/subdb/lib/db.getlen.awk
Normal file
10
detectors/cds/tools/chlorodb/subdb/lib/db.getlen.awk
Normal file
@ -0,0 +1,10 @@
|
||||
#
|
||||
BEGIN {
|
||||
print "id len"
|
||||
}
|
||||
|
||||
/^>/ {
|
||||
na = split($1, a, "@")
|
||||
print substr($1, 2), a[na]
|
||||
}
|
||||
|
15
detectors/cds/tools/chlorodb/subdb/lib/db.reportcc.awk
Normal file
15
detectors/cds/tools/chlorodb/subdb/lib/db.reportcc.awk
Normal file
@ -0,0 +1,15 @@
|
||||
#
|
||||
#
|
||||
|
||||
{
|
||||
cnt[$NF]++
|
||||
}
|
||||
|
||||
END {
|
||||
n = asort(cnt)
|
||||
printf("cc_size %s", NAME)
|
||||
for (i = n ; i >= 1 ; i--)
|
||||
printf(" %d", cnt[i])
|
||||
print ""
|
||||
}
|
||||
|
19
detectors/cds/tools/chlorodb/subdb/lib/db.selcc.awk
Normal file
19
detectors/cds/tools/chlorodb/subdb/lib/db.selcc.awk
Normal file
@ -0,0 +1,19 @@
|
||||
#
|
||||
|
||||
{
|
||||
N[$NF]++
|
||||
E[$NF, N[$NF]] = $1
|
||||
}
|
||||
|
||||
END {
|
||||
cmax = 1
|
||||
nmax = N[1]
|
||||
for (i in N) {
|
||||
if (N[i] > nmax) {
|
||||
nmax = N[i]
|
||||
cmax = i
|
||||
}
|
||||
}
|
||||
for (i = 1 ; i <= nmax ; i++)
|
||||
print E[cmax, i]
|
||||
}
|
17
detectors/cds/tools/chlorodb/subdb/lib/db.subdb.awk
Normal file
17
detectors/cds/tools/chlorodb/subdb/lib/db.subdb.awk
Normal file
@ -0,0 +1,17 @@
|
||||
#
|
||||
|
||||
BEGIN {
|
||||
if (FILE == "") FILE = "db.sel.txt"
|
||||
while (getline < FILE)
|
||||
INC[$1] = $1
|
||||
close(FILE)
|
||||
}
|
||||
|
||||
/^>/ {
|
||||
name = substr($1, 2)
|
||||
ok = name in INC
|
||||
}
|
||||
|
||||
ok {
|
||||
print $0
|
||||
}
|
21
detectors/cds/tools/chlorodb/subdb/lib/db.todl.awk
Normal file
21
detectors/cds/tools/chlorodb/subdb/lib/db.todl.awk
Normal file
@ -0,0 +1,21 @@
|
||||
#
|
||||
|
||||
{
|
||||
node[$1]++
|
||||
node[$2]++
|
||||
link[++M] = $1 " " $2
|
||||
}
|
||||
|
||||
|
||||
END {
|
||||
for (n in node)
|
||||
N++
|
||||
print "DL n=" N
|
||||
print "format = edgelist1"
|
||||
print "labels embedded:"
|
||||
print "data:"
|
||||
for (i = 1 ; i <= M ; i++)
|
||||
print link[i]
|
||||
}
|
||||
|
||||
|
@ -47,8 +47,8 @@ $AwkCmd -f $LIB_DIR/libutil.awk -f $LIB_DIR/$PrdType.cds_short.awk > P_$$
|
||||
|
||||
Notify "compare bank to predictions"
|
||||
|
||||
$AwkCmd -f $LIB_DIR/libnws.awk \
|
||||
-f $LIB_DIR/compareCds.awk \
|
||||
$AwkCmd -f $LIB_DIR/libnws.awk \
|
||||
-f $LIB_DIR/compare.cds.awk \
|
||||
R_$$ P_$$ > S_$$
|
||||
|
||||
# base statistics
|
||||
|
@ -15,6 +15,8 @@ NeedArg 1
|
||||
|
||||
egrep '^#|^MATCH' $* | awk -f $LIB_DIR/summary.cmp.awk > compare.txt
|
||||
|
||||
Notify "text file: compare.txt"
|
||||
|
||||
$LIB_DIR/summarize_cmp.r
|
||||
|
||||
|
||||
|
19
detectors/cds/tools/lib/cds2fasta.awk
Normal file
19
detectors/cds/tools/lib/cds2fasta.awk
Normal file
@ -0,0 +1,19 @@
|
||||
#
|
||||
# get fasta sequence from cds list
|
||||
#
|
||||
# [-v FIELD=13] CDS sequence
|
||||
# [-v FIELD=14] Prot Sequence
|
||||
#
|
||||
|
||||
BEGIN {
|
||||
if (CHARPERLINE == "") CHARPERLINE = 50
|
||||
if (FIELD == "") FIELD = 14
|
||||
}
|
||||
|
||||
/^#/ { next }
|
||||
|
||||
{
|
||||
name = $1 "@" $2 "@" $3 "@" $5 "@" $6 "@" $7 "@" $8 "@" int($9/3)
|
||||
comment = $NF
|
||||
PrintFasta($FIELD, name " " comment)
|
||||
}
|
97
detectors/cds/tools/lib/embl.cds_long.awk
Normal file
97
detectors/cds/tools/lib/embl.cds_long.awk
Normal file
@ -0,0 +1,97 @@
|
||||
#
|
||||
# get cds features from embl (long version)
|
||||
#
|
||||
# -v FASTA
|
||||
|
||||
# @include lib.embl.awk
|
||||
|
||||
|
||||
BEGIN {
|
||||
print "#locus locustag genefam gene from to strand nexon length status start stop dnaseq protseq product"
|
||||
|
||||
if (HEADONLY != "") exit(0)
|
||||
|
||||
if (MAXSPAN == "") MAXSPAN = 10000
|
||||
|
||||
if (FASTA == "") Error("No FASTA file specified", 1)
|
||||
|
||||
if (! TestPath(FASTA)) Error("Fasta file: '" FASTA "' not found", 1)
|
||||
|
||||
Seq = tolower(ReadFasta(FASTA))
|
||||
}
|
||||
|
||||
/^ID / {
|
||||
locus = $2
|
||||
gsub(";", "", locus)
|
||||
incds = 0
|
||||
next
|
||||
}
|
||||
|
||||
/^FT CDS/ {
|
||||
revstrand = match($3, "^complement")
|
||||
s = substr($0, 22)
|
||||
gsub("^complement", "", s)
|
||||
ok = ! match(s, "complement|order")
|
||||
nexon = Nexons(s)
|
||||
SpanLocation(s, sloc)
|
||||
spanlen = sloc[2] - sloc[1] + 1
|
||||
len = LenLocation(s)
|
||||
ok = ok && (len < MAXSPAN)
|
||||
cdsseq = ok ? SeqLocation(Seq, s, revstrand) : "XXX"
|
||||
cstart = substr(cdsseq, 1,3)
|
||||
cstop = substr(cdsseq, length(cdsseq)-2)
|
||||
|
||||
gene = "none"
|
||||
locustag = "none"
|
||||
product = "none"
|
||||
translation = "X"
|
||||
incds = 1
|
||||
next
|
||||
}
|
||||
|
||||
(incds && /^FT [^ ]/) {
|
||||
print locus, locustag, GeneFamily(gene), gene, sloc[1], sloc[2], (revstrand ? "R" : "D"),
|
||||
nexon, len, (ok ? "Ok" : "Error"), cstart, cstop, cdsseq, translation, product
|
||||
incds = 0
|
||||
next
|
||||
}
|
||||
|
||||
/^FT \/gene=/ {
|
||||
split($0, a, "=")
|
||||
gene = a[2]
|
||||
gsub("^[^a-z,A-Z]+", "", gene)
|
||||
gsub("\"", "", gene)
|
||||
gsub(" ", "_", gene)
|
||||
next
|
||||
}
|
||||
|
||||
/^FT \/locus_tag=/ {
|
||||
split($0, a, "=")
|
||||
locustag = a[2]
|
||||
gsub("\"", "", locustag)
|
||||
gsub(" ", "_", locustag)
|
||||
next
|
||||
}
|
||||
|
||||
/^FT \/product=/ {
|
||||
split($0, a, "=")
|
||||
product = a[2]
|
||||
gsub("\"", "", product)
|
||||
gsub(" ", "_", product)
|
||||
next
|
||||
}
|
||||
|
||||
/^FT \/translation=/ {
|
||||
split($0, a, "=")
|
||||
translation = a[2]
|
||||
gsub("\"", "", translation)
|
||||
gsub(" ", "", translation)
|
||||
next
|
||||
}
|
||||
|
||||
END {
|
||||
if (incds) {
|
||||
print locus, locustag, GeneFamily(gene), gene, sloc[1], sloc[2], (revstrand ? "R" : "D"),
|
||||
nexon, len, (ok ? "Ok" : "Error"), cstart, cstop, cdsseq, translation, product
|
||||
}
|
||||
}
|
97
detectors/cds/tools/lib/embl.info.awk
Normal file
97
detectors/cds/tools/lib/embl.info.awk
Normal file
@ -0,0 +1,97 @@
|
||||
#
|
||||
# get feature info from embl
|
||||
#
|
||||
|
||||
# @include libgbk.awk
|
||||
|
||||
function GC(s, _local_, i, len) {
|
||||
s = toupper(s)
|
||||
len = length(s)
|
||||
gsub("G|C", "", s)
|
||||
return ((len - length(s)) * 100 / (len ? len : 1))
|
||||
}
|
||||
|
||||
#
|
||||
# rules
|
||||
#
|
||||
|
||||
BEGIN {
|
||||
print "#locus orga len oklen gc nbCds nbCds_int0 nbCds_int1 nbCds_intsup1 perCds_noex meanCdsSize nbtRNA nbrRNA nboRNA"
|
||||
}
|
||||
|
||||
/^ID/ {
|
||||
locus = $2
|
||||
gsub(";", "", locus)
|
||||
next
|
||||
}
|
||||
|
||||
/^OS/ {
|
||||
orga = substr($0, 6)
|
||||
gsub(" ", "_", orga)
|
||||
next
|
||||
}
|
||||
|
||||
/^FT source/ {
|
||||
GetLoc($3, loc);
|
||||
len = loc[2];
|
||||
next
|
||||
}
|
||||
|
||||
/^FT CDS/ {
|
||||
meanCds = meanCds * nbCds + LenLocation($3)
|
||||
nbCds++
|
||||
meanCds /= nbCds
|
||||
n = Nexons($3)
|
||||
if (n > 3) n = 3
|
||||
nbCdx[n]++
|
||||
next
|
||||
}
|
||||
|
||||
/^FT tRNA/ {
|
||||
nbTrna++
|
||||
next
|
||||
}
|
||||
|
||||
/^FT rRNA/ {
|
||||
nbRrna++
|
||||
next
|
||||
}
|
||||
|
||||
/^FT mRNA/ {
|
||||
next
|
||||
}
|
||||
|
||||
/^FT .*RNA/ {
|
||||
nbOrna++
|
||||
next
|
||||
}
|
||||
|
||||
/^SQ / {
|
||||
inseq = 1
|
||||
seq = ""
|
||||
next
|
||||
}
|
||||
|
||||
inseq && /^ / {
|
||||
s = $0
|
||||
gsub("[0-9]+", "", s)
|
||||
gsub(" ", "", s)
|
||||
seq = seq "" s
|
||||
next
|
||||
}
|
||||
|
||||
/^\/\// {
|
||||
oklen = (len == length(seq) ? "ok" : "wrong")
|
||||
gc = GC(seq)
|
||||
print locus, orga, len, oklen, gc, nbCds+0, nbCdx[1]+0, \
|
||||
nbCdx[2]+0, nbCdx[3]+0, (nbCdx[1]+0)*100/Max(1, nbCds+0), \
|
||||
meanCds+0, nbTrna+0, nbRrna+0, nbOrna+0
|
||||
nbCds = nbTrna = nbRrna = nbOrna = len = inseq = meanCds = 0
|
||||
delete nbCdx
|
||||
orga = locus = "?"
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
97
detectors/cds/tools/lib/embl.intron.awk
Normal file
97
detectors/cds/tools/lib/embl.intron.awk
Normal file
@ -0,0 +1,97 @@
|
||||
#
|
||||
# get intron features from embl
|
||||
#
|
||||
|
||||
# @include libembl.awk
|
||||
|
||||
BEGIN {
|
||||
print "#locus locustag genefam gene from to strand intron_num intron_nb acceptor-donor status"
|
||||
|
||||
if (HEADONLY != "") exit(0)
|
||||
|
||||
if (FASTA == "") Error("No FASTA file specified", 1)
|
||||
|
||||
if (! TestPath(FASTA)) Error("Fasta file: '" FASTA "' not found", 1)
|
||||
|
||||
Seq = tolower(ReadFasta(FASTA))
|
||||
}
|
||||
|
||||
/^ID / {
|
||||
locus = $2
|
||||
gsub(";", "", locus)
|
||||
next
|
||||
}
|
||||
|
||||
/^FT CDS/ {
|
||||
revstrand = match($3, "^complement")
|
||||
s = substr($0, 22)
|
||||
gsub("^complement", "", s)
|
||||
ok = ! match(s, "complement|order")
|
||||
if (! ok) next
|
||||
|
||||
na = ParseLocation(s, locs)
|
||||
if (na < 2) next
|
||||
|
||||
delete SINfo
|
||||
Ninfo = 0
|
||||
|
||||
val = locs[1][1]
|
||||
for (i = 2 ; i <= na ; i++) {
|
||||
if (locs[i][1] < val) ok = 0
|
||||
val = locs[i][1]
|
||||
}
|
||||
if (! ok) next
|
||||
|
||||
val = locs[1][2]
|
||||
for (i = 2 ; i <= na ; i++) {
|
||||
if (locs[i][2] < val) ok = 0
|
||||
val = locs[i][2]
|
||||
}
|
||||
if (! ok) next
|
||||
|
||||
from = locs[1][2] + 1
|
||||
for (i = 2 ; i <= na ; i++) {
|
||||
to = locs[i][1] - 1
|
||||
inseq = SeqLocation(Seq, (from - 4) ".." (to + 4), revstrand)
|
||||
SINfo[++Ninfo] = from " " to " " (revstrand ? "R" : "D") " "\
|
||||
(revstrand ? na-i+1 : i-1) " " na-1 " "\
|
||||
substr(inseq, 1,4) "."\
|
||||
substr(inseq, 5,6) "-"\
|
||||
substr(inseq, length(inseq)-9, 6) "."\
|
||||
substr(inseq, length(inseq)-3, 4) " "\
|
||||
"ok"
|
||||
from = locs[i][2] + 1
|
||||
}
|
||||
|
||||
gene = "none"
|
||||
locustag = "none"
|
||||
next
|
||||
}
|
||||
|
||||
/^FT \/gene=/ {
|
||||
split($0, a, "=")
|
||||
gene = a[2]
|
||||
gsub("^[^a-z,A-Z]+", "", gene)
|
||||
gsub("\"", "", gene)
|
||||
gsub(" ", "_", gene)
|
||||
next
|
||||
}
|
||||
|
||||
/^FT \/locus_tag=/ {
|
||||
split($0, a, "=")
|
||||
locustag = a[2]
|
||||
gsub("\"", "", locustag)
|
||||
gsub(" ", "_", locustag)
|
||||
next
|
||||
}
|
||||
|
||||
/^FT \/translation=/ {
|
||||
for (i = 1 ; i <= Ninfo ; i++)
|
||||
print locus, locustag, GeneFamily(gene), gene, SINfo[i]
|
||||
Ninfo = 0
|
||||
next
|
||||
}
|
||||
|
||||
/^\/\// {
|
||||
locus = "?"
|
||||
}
|
36
detectors/cds/tools/lib/embl.tofasta.awk
Normal file
36
detectors/cds/tools/lib/embl.tofasta.awk
Normal file
@ -0,0 +1,36 @@
|
||||
#
|
||||
# get fasta sequence from embl
|
||||
#
|
||||
|
||||
/^ID / {
|
||||
locus = $2
|
||||
gsub(";", "", locus)
|
||||
next
|
||||
}
|
||||
|
||||
/^SQ / {
|
||||
inseq = 1
|
||||
nln = 0
|
||||
delete seq
|
||||
next
|
||||
}
|
||||
|
||||
/^\/\// {
|
||||
inseq = 0
|
||||
print ">" locus
|
||||
for (i = 1 ; i <= nln ; i++)
|
||||
print seq[i]
|
||||
next
|
||||
}
|
||||
|
||||
inseq {
|
||||
s = $0
|
||||
gsub(" ", "", s)
|
||||
gsub("[0-9]+", "", s)
|
||||
seq[++nln] = s
|
||||
next
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
99
detectors/cds/tools/lib/gbk.cds_long.awk
Normal file
99
detectors/cds/tools/lib/gbk.cds_long.awk
Normal file
@ -0,0 +1,99 @@
|
||||
#
|
||||
# get cds features from genbank (long version)
|
||||
#
|
||||
# -v FASTA
|
||||
|
||||
# @include libgbk.awk
|
||||
|
||||
BEGIN {
|
||||
print "#locus locustag genefam gene from to strand nexon length status start stop dnaseq protseq product"
|
||||
|
||||
if (HEADONLY != "") exit(0)
|
||||
|
||||
if (MAXSPAN == "") MAXSPAN = 10000
|
||||
|
||||
if (FASTA == "") Error("No FASTA file specified", 1)
|
||||
|
||||
if (! TestPath(FASTA)) Error("Fasta file: '" FASTA "' not found", 1)
|
||||
|
||||
Seq = tolower(ReadFasta(FASTA))
|
||||
}
|
||||
|
||||
/^LOCUS/ {
|
||||
locus = $2
|
||||
incds = 0
|
||||
next
|
||||
}
|
||||
|
||||
/^ CDS/ {
|
||||
revstrand = match($2, "^complement")
|
||||
s = substr($0, 22)
|
||||
gsub("^complement", "", s)
|
||||
ok = ! match(s, "complement|order")
|
||||
nexon = Nexons(s)
|
||||
SpanLocation(s, sloc)
|
||||
spanlen = sloc[2] - sloc[1] + 1
|
||||
len = LenLocation(s)
|
||||
ok = ok && (len < MAXSPAN)
|
||||
cdsseq = ok ? SeqLocation(Seq, s, revstrand) : "XXX"
|
||||
cstart = substr(cdsseq, 1,3)
|
||||
cstop = substr(cdsseq, length(cdsseq)-2)
|
||||
|
||||
gene = "none"
|
||||
locustag = "none"
|
||||
product = "none"
|
||||
translation = "X"
|
||||
incds = 1
|
||||
next
|
||||
}
|
||||
|
||||
(incds && /^ [^ ]/) {
|
||||
print locus, locustag, GeneFamily(gene), gene, sloc[1], sloc[2], (revstrand ? "R" : "D"),
|
||||
nexon, len, (ok ? "Ok" : "Error"), cstart, cstop, cdsseq, translation, product
|
||||
incds = 0
|
||||
next
|
||||
}
|
||||
|
||||
/^ \/gene=/ {
|
||||
split($0, a, "=")
|
||||
gene = a[2]
|
||||
gsub("^[^a-z,A-Z]+", "", gene)
|
||||
gsub("\"", "", gene)
|
||||
gsub(" ", "_", gene)
|
||||
next
|
||||
}
|
||||
|
||||
/^ \/locus_tag=/ {
|
||||
split($0, a, "=")
|
||||
locustag = a[2]
|
||||
gsub("\"", "", locustag)
|
||||
gsub(" ", "_", locustag)
|
||||
next
|
||||
}
|
||||
|
||||
/^ \/product=/ {
|
||||
split($0, a, "=")
|
||||
product = a[2]
|
||||
gsub("\"", "", product)
|
||||
gsub(" ", "_", product)
|
||||
next
|
||||
}
|
||||
|
||||
/^ \/translation=/ {
|
||||
split($0, a, "=")
|
||||
translation = a[2]
|
||||
gsub("\"", "", translation)
|
||||
gsub(" ", "", translation)
|
||||
next
|
||||
}
|
||||
|
||||
/^\/\// {
|
||||
locus = "?"
|
||||
}
|
||||
|
||||
END {
|
||||
if (incds) {
|
||||
print locus, locustag, GeneFamily(gene), gene, sloc[1], sloc[2], (revstrand ? "R" : "D"),
|
||||
nexon, len, (ok ? "Ok" : "Error"), cstart, cstop, cdsseq, translation, product
|
||||
}
|
||||
}
|
97
detectors/cds/tools/lib/gbk.info.awk
Normal file
97
detectors/cds/tools/lib/gbk.info.awk
Normal file
@ -0,0 +1,97 @@
|
||||
#
|
||||
# get feature info from genbank
|
||||
#
|
||||
|
||||
# @include libgbk.awk
|
||||
|
||||
function GC(s, _local_, i, len) {
|
||||
s = toupper(s)
|
||||
len = length(s)
|
||||
gsub("G|C", "", s)
|
||||
return ((len - length(s)) * 100 / len)
|
||||
}
|
||||
|
||||
#
|
||||
# rules
|
||||
#
|
||||
|
||||
BEGIN {
|
||||
print "#locus orga len oklen gc nbCds nbCds_int0 nbCds_int1 nbCds_intsup1 perCds_noex meanCdsSize nbtRNA nbrRNA nboRNA"
|
||||
}
|
||||
|
||||
/^LOCUS/ {
|
||||
locus = $2
|
||||
next
|
||||
}
|
||||
|
||||
/^ ORGANISM/ {
|
||||
orga = substr($0, 13)
|
||||
split(orga, a, ";")
|
||||
orga = a[1]
|
||||
gsub(" ", "_", orga)
|
||||
next
|
||||
}
|
||||
|
||||
/^ source/ {
|
||||
GetLoc($2, loc);
|
||||
len = loc[2];
|
||||
next
|
||||
}
|
||||
|
||||
/^ CDS/ {
|
||||
meanCds = meanCds * nbCds + LenLocation($2)
|
||||
nbCds++
|
||||
meanCds /= nbCds
|
||||
n = Nexons($2)
|
||||
if (n > 3) n = 3
|
||||
nbCdx[n]++
|
||||
next
|
||||
}
|
||||
|
||||
/^ tRNA/ {
|
||||
nbTrna++
|
||||
next
|
||||
}
|
||||
|
||||
/^ rRNA/ {
|
||||
nbRrna++
|
||||
next
|
||||
}
|
||||
|
||||
/^ mRNA/ {
|
||||
next
|
||||
}
|
||||
|
||||
/^ .*RNA/ {
|
||||
nbOrna++
|
||||
next
|
||||
}
|
||||
|
||||
/^ORIGIN/ {
|
||||
inseq = 1
|
||||
seq = ""
|
||||
next
|
||||
}
|
||||
|
||||
inseq && /^ +[1-9][0-9]*/ {
|
||||
s = substr($0, 11)
|
||||
gsub(" ", "", s)
|
||||
seq = seq "" s
|
||||
next
|
||||
}
|
||||
|
||||
/^\/\// {
|
||||
oklen = (len == length(seq) ? "ok" : "wrong")
|
||||
gc = GC(seq)
|
||||
print locus, orga, len, oklen, gc, nbCds+0, nbCdx[1]+0, \
|
||||
nbCdx[2]+0, nbCdx[3]+0, (nbCdx[1]+0)*100/Max(1, nbCds+0), \
|
||||
meanCds+0, nbTrna+0, nbRrna+0, nbOrna+0
|
||||
nbCds = nbTrna = nbRrna = nbOrna = len = inseq = meanCds = 0
|
||||
delete nbCdx
|
||||
orga = locus = "?"
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
96
detectors/cds/tools/lib/gbk.intron.awk
Normal file
96
detectors/cds/tools/lib/gbk.intron.awk
Normal file
@ -0,0 +1,96 @@
|
||||
#
|
||||
# get intron features from genbank
|
||||
#
|
||||
|
||||
# @include libgbk.awk
|
||||
|
||||
BEGIN {
|
||||
print "#locus locustag genefam gene from to strand intron_num intron_nb acceptor-donor status"
|
||||
|
||||
if (HEADONLY != "") exit(0)
|
||||
|
||||
if (FASTA == "") Error("No FASTA file specified", 1)
|
||||
|
||||
if (! TestPath(FASTA)) Error("Fasta file: '" FASTA "' not found", 1)
|
||||
|
||||
Seq = tolower(ReadFasta(FASTA))
|
||||
}
|
||||
|
||||
/^LOCUS/ {
|
||||
locus = $2
|
||||
next
|
||||
}
|
||||
|
||||
/^ CDS/ {
|
||||
revstrand = match($2, "^complement")
|
||||
s = substr($0, 22)
|
||||
gsub("^complement", "", s)
|
||||
ok = ! match(s, "complement|order")
|
||||
if (! ok) next
|
||||
|
||||
na = ParseLocation(s, locs)
|
||||
if (na < 2) next
|
||||
|
||||
delete SINfo
|
||||
Ninfo = 0
|
||||
|
||||
val = locs[1][1]
|
||||
for (i = 2 ; i <= na ; i++) {
|
||||
if (locs[i][1] < val) ok = 0
|
||||
val = locs[i][1]
|
||||
}
|
||||
if (! ok) next
|
||||
|
||||
val = locs[1][2]
|
||||
for (i = 2 ; i <= na ; i++) {
|
||||
if (locs[i][2] < val) ok = 0
|
||||
val = locs[i][2]
|
||||
}
|
||||
if (! ok) next
|
||||
|
||||
from = locs[1][2] + 1
|
||||
for (i = 2 ; i <= na ; i++) {
|
||||
to = locs[i][1] - 1
|
||||
inseq = SeqLocation(Seq, (from - 4) ".." (to + 4), revstrand)
|
||||
SINfo[++Ninfo] = from " " to " " (revstrand ? "R" : "D") " "\
|
||||
(revstrand ? na-i+1 : i-1) " " na-1 " "\
|
||||
substr(inseq, 1,4) "."\
|
||||
substr(inseq, 5,6) "-"\
|
||||
substr(inseq, length(inseq)-9, 6) "."\
|
||||
substr(inseq, length(inseq)-3, 4) " "\
|
||||
"ok"
|
||||
from = locs[i][2] + 1
|
||||
}
|
||||
|
||||
gene = "none"
|
||||
locustag = "none"
|
||||
next
|
||||
}
|
||||
|
||||
/^ \/gene=/ {
|
||||
split($0, a, "=")
|
||||
gene = a[2]
|
||||
gsub("^[^a-z,A-Z]+", "", gene)
|
||||
gsub("\"", "", gene)
|
||||
gsub(" ", "_", gene)
|
||||
next
|
||||
}
|
||||
|
||||
/^ \/locus_tag=/ {
|
||||
split($0, a, "=")
|
||||
locustag = a[2]
|
||||
gsub("\"", "", locustag)
|
||||
gsub(" ", "_", locustag)
|
||||
next
|
||||
}
|
||||
|
||||
/^ \/translation=/ {
|
||||
for (i = 1 ; i <= Ninfo ; i++)
|
||||
print locus, locustag, GeneFamily(gene), gene, SINfo[i]
|
||||
Ninfo = 0
|
||||
next
|
||||
}
|
||||
|
||||
/^\/\// {
|
||||
locus = "?"
|
||||
}
|
32
detectors/cds/tools/lib/gbk.tofasta.awk
Normal file
32
detectors/cds/tools/lib/gbk.tofasta.awk
Normal file
@ -0,0 +1,32 @@
|
||||
#
|
||||
# get fasta sequence from genbank
|
||||
#
|
||||
|
||||
/^LOCUS/ {
|
||||
locus = $2
|
||||
next
|
||||
}
|
||||
|
||||
/^ORIGIN/ {
|
||||
inseq = 1
|
||||
nln = 0
|
||||
delete seq
|
||||
}
|
||||
|
||||
inseq && /^ +[1-9][0-9]*/ {
|
||||
s = substr($0, 11)
|
||||
gsub(" ", "", s)
|
||||
seq[++nln] = s
|
||||
next
|
||||
}
|
||||
|
||||
/^\/\// {
|
||||
print ">" locus
|
||||
for (i = 1 ; i <= nln ; i++)
|
||||
print seq[i]
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
31
detectors/cds/tools/lib/install.rpackages.r
Executable file
31
detectors/cds/tools/lib/install.rpackages.r
Executable file
@ -0,0 +1,31 @@
|
||||
#!/usr/bin/env Rscript
|
||||
#
|
||||
# check and install required packages
|
||||
#
|
||||
|
||||
out <- function(...) {
|
||||
cat(paste0('+ ', ..., '\n'), file=stderr())
|
||||
}
|
||||
|
||||
installed <- function(package) {
|
||||
package %in% rownames(installed.packages())
|
||||
}
|
||||
|
||||
check <- function(package, repos="http://cran.univ-lyon1.fr") {
|
||||
if (installed(package)) {
|
||||
out("R package ", package, " installed")
|
||||
} else {
|
||||
out("Installing R package ", package, " from ", repos)
|
||||
install.packages(package, repos=repos)
|
||||
}
|
||||
invisible(installed(package))
|
||||
}
|
||||
|
||||
check("grid")
|
||||
check("gridExtra")
|
||||
check("vcd")
|
||||
check("plotrix")
|
||||
check("igraph")
|
||||
|
||||
quit(save='no', status=0)
|
||||
|
224
detectors/cds/tools/lib/make.models.r
Executable file
224
detectors/cds/tools/lib/make.models.r
Executable file
@ -0,0 +1,224 @@
|
||||
#!/usr/bin/env Rscript
|
||||
#
|
||||
# compute start, stop, splice-junctions models for core DB
|
||||
#
|
||||
# source("make.models.r")
|
||||
#
|
||||
|
||||
LIB_DIR <- Sys.getenv("LIB_DIR")
|
||||
if (LIB_DIR == "") LIB_DIR = "."
|
||||
|
||||
source(paste0(LIB_DIR, "/util.base.r"))
|
||||
source(paste0(LIB_DIR, "/util.cons.r"))
|
||||
source(paste0(LIB_DIR, "/util.modelio.r"))
|
||||
|
||||
# -------------------------------
|
||||
# parameters
|
||||
# -------------------------------
|
||||
|
||||
# core cutoffs
|
||||
|
||||
source("db.models.params.txt")
|
||||
|
||||
# -------------------------------
|
||||
# genome infos
|
||||
# -------------------------------
|
||||
|
||||
notify("loading info table")
|
||||
chromo <- read.table("db.info.txt", com="", head=T, stringsAsFactors=F)
|
||||
|
||||
# -------------------------------
|
||||
# CDS
|
||||
# -------------------------------
|
||||
|
||||
notify("loading cds table")
|
||||
cds <- read.table("db.cds.txt", com="", header=T, stringsAsFactors=F)
|
||||
|
||||
cds$start <- as.factor(cds$start)
|
||||
cds$stop <- as.factor(cds$stop)
|
||||
|
||||
cds <- cds[cds$status=="Ok",]
|
||||
cds <- cds[cds$genefam!="none",]
|
||||
|
||||
cds$categ <- "dust"
|
||||
|
||||
x <- sort(table(cds$genefam), dec=T)
|
||||
ok <- names(x[x >= CORE_NCDS_CUTOFF])
|
||||
|
||||
cds$categ[cds$genefam %in% ok] <- "core"
|
||||
|
||||
x <- x[! names(x) %in% ok]
|
||||
ok <- names(x[x >= SHEL_NCDS_CUTOFF])
|
||||
|
||||
cds$categ[cds$genefam %in% ok] <- "shell"
|
||||
|
||||
#
|
||||
|
||||
cds.ori <- cds
|
||||
|
||||
cds.lst <- split(cds.ori, cds.ori$categ)
|
||||
|
||||
#
|
||||
# write out families
|
||||
#
|
||||
|
||||
# patterns & names
|
||||
|
||||
invisible(lapply(cds.lst, function(cds) {
|
||||
|
||||
x <- sort(table(cds$genefam), decreasing=T)
|
||||
tab <- paste0("^", names(x), "$")
|
||||
names(tab) <- names(x)
|
||||
|
||||
y <- sapply(split(cds$gene, cds$genefam), function(g) {
|
||||
head(names(sort(table(g), decreasing=T)), 1)
|
||||
})
|
||||
|
||||
tab <- cbind(tab, y[names(x)])
|
||||
|
||||
categ <- unique(cds$categ)
|
||||
f <- paste0("db.", categ, ".pat.txt")
|
||||
notify("writing patterns for", categ, ":", f)
|
||||
write.table(tab, file=f, quote=F, col.names=F, row.names=T)
|
||||
}))
|
||||
|
||||
# -------------------------------
|
||||
# Start models (core only)
|
||||
# -------------------------------
|
||||
|
||||
if (! "core" %in% names(cds.lst)) {
|
||||
notify("*** no gene found in core")
|
||||
notify("*** please change parameters")
|
||||
quit(save='no', status=1)
|
||||
}
|
||||
|
||||
cds <- cds.lst[["core"]]
|
||||
|
||||
#
|
||||
# start by genes
|
||||
#
|
||||
|
||||
tab <- split(cds$start, cds$genefam)
|
||||
|
||||
fatg <- sapply(tab, function(x) table(x)["atg"]/length(x)*100)
|
||||
names(fatg) <- names(tab)
|
||||
|
||||
start.dft <- names(which(fatg >= CORE_START_ATG_CUTOFF))
|
||||
start.spc <- names(which(fatg < CORE_START_ATG_CUTOFF))
|
||||
|
||||
tab <- cds[cds$genefam %in% start.dft,]
|
||||
tab <- table(tab$start)
|
||||
|
||||
# default model
|
||||
|
||||
x <- sort(tab[tab>=CORE_START_DFT_CUTOFF], decreasing=T)
|
||||
write.model.start(x, "default")
|
||||
|
||||
# gene specific models
|
||||
|
||||
invisible(sapply(start.spc, function(g) {
|
||||
x <- cds[cds$genefam == g,]
|
||||
tx <- table(x$start)
|
||||
tx <- sort(tx[tx>=CORE_START_OTH_CUTOFF], decreasing=T)
|
||||
write.model.start(tx, g)
|
||||
}))
|
||||
|
||||
# -------------------------------
|
||||
# Stop models (core only)
|
||||
# -------------------------------
|
||||
|
||||
# write default stop model
|
||||
|
||||
tab <- table(cds$stop)
|
||||
x <- sort(tab[tab>=CORE_STOP_CUTOFF], decreasing=T)
|
||||
write.model.stop(x, "default")
|
||||
|
||||
# -------------------------------
|
||||
# splice junctions
|
||||
# -------------------------------
|
||||
|
||||
notify("loading intron table")
|
||||
intron <- read.table("db.intron.txt", com="", header=T, stringsAsFactors=F)
|
||||
|
||||
# remove invalid sequences
|
||||
|
||||
intron$seq <- gsub("\\.|-", "", intron$acceptor.donor)
|
||||
|
||||
lseq <- nchar(gsub("[^acgt]", "", intron$seq))
|
||||
|
||||
intron <- intron[lseq == 20,]
|
||||
|
||||
# remove genes out of core
|
||||
|
||||
intron <- intron[intron$genefam %in% cds$genefam,]
|
||||
|
||||
# acceptors / donors
|
||||
|
||||
intron$acc <- substr(intron$seq, 5, 6)
|
||||
intron$don <- substr(intron$seq, 15, 16)
|
||||
|
||||
# consensus
|
||||
|
||||
cons.px <- cons.build(intron$acceptor.donor)
|
||||
cons.px <- cons.px[,! is.nan(colSums(cons.px))]
|
||||
|
||||
seq.px <- sapply(intron$acceptor.donor, function(s) gsub("[^acgt]", "", s))
|
||||
|
||||
conf.px <- cons.confusion(cons.px, seq.px)
|
||||
|
||||
sfam <- split(conf.px$l2scor, intron$genefam)
|
||||
sfam <- sfam[order(sapply(sfam, median))]
|
||||
|
||||
# extract splice exceptions
|
||||
|
||||
name.bad <- names(which(sapply(sfam, median) < 0))
|
||||
name.spc <- names(which(sapply(sfam[name.bad], length) >= CORE_SPLICE_CUTOFF))
|
||||
name.ok <- setdiff(unique(intron$genefam), name.bad)
|
||||
name.bad <- setdiff(name.bad, name.spc)
|
||||
name.list <- c(sapply(name.spc, function(x) x), list(default=name.ok))
|
||||
|
||||
cons <- lapply(name.list, function(x) cons.build(intron[intron$genefam %in% x, "acceptor.donor"]))
|
||||
|
||||
# write junction models
|
||||
|
||||
invisible(sapply(names(cons), function(n) write.model.splice3(cons[[n]], n)))
|
||||
invisible(sapply(names(cons), function(n) write.model.splice5(cons[[n]], n)))
|
||||
|
||||
# use uniform model for bad guys
|
||||
|
||||
invisible(sapply(name.bad, function(n) write.unif.splice(3, n)))
|
||||
invisible(sapply(name.bad, function(n) write.unif.splice(5, n)))
|
||||
|
||||
invisible(write.unif.splice('', "none"))
|
||||
|
||||
# -------------------------------
|
||||
# keep data for plotting
|
||||
# -------------------------------
|
||||
|
||||
DB <- list()
|
||||
|
||||
params <- list()
|
||||
|
||||
params$CORE_NCDS_CUTOFF <- CORE_NCDS_CUTOFF
|
||||
params$CORE_START_ATG_CUTOFF <- CORE_START_ATG_CUTOFF
|
||||
params$CORE_START_DFT_CUTOFF <- CORE_START_DFT_CUTOFF
|
||||
params$CORE_START_OTH_CUTOFF <- CORE_START_OTH_CUTOFF
|
||||
params$CORE_STOP_CUTOFF <- CORE_STOP_CUTOFF
|
||||
params$CORE_SPLICE_CUTOFF <- CORE_SPLICE_CUTOFF
|
||||
|
||||
params$SHEL_NCDS_CUTOFF <- SHEL_NCDS_CUTOFF
|
||||
|
||||
DB$params <- params
|
||||
DB$chromo <- chromo
|
||||
DB$cds.lst <- cds.lst
|
||||
DB$intron <- intron
|
||||
DB$cons <- cons
|
||||
|
||||
notify("saving db.data.Rdata")
|
||||
save(DB, file="db.data.Rdata")
|
||||
|
||||
# -------------------------------
|
||||
# end
|
||||
# -------------------------------
|
||||
|
||||
quit(save='no')
|
424
detectors/cds/tools/lib/plot.models.r
Executable file
424
detectors/cds/tools/lib/plot.models.r
Executable file
@ -0,0 +1,424 @@
|
||||
#!/usr/bin/env Rscript
|
||||
#
|
||||
# plots models previously computed by make.models.r
|
||||
#
|
||||
# source("plot.models.r")
|
||||
#
|
||||
|
||||
require(vcd)
|
||||
require(plotrix)
|
||||
|
||||
LIBDIR <- Sys.getenv("LIB_DIR")
|
||||
if (LIBDIR == "") LIBDIR = "."
|
||||
|
||||
source(paste0(LIBDIR, "/util.base.r"))
|
||||
source(paste0(LIBDIR, "/util.plot.r"))
|
||||
source(paste0(LIBDIR, "/util.cons.r"))
|
||||
source(paste0(LIBDIR, "/util.grid.r"))
|
||||
|
||||
# -------------------------------
|
||||
# setup
|
||||
# -------------------------------
|
||||
|
||||
OUT.DEV <- TRUE
|
||||
OUT.TYPE <- "pdf"
|
||||
OUT.FILE <- "models"
|
||||
|
||||
if (OUT.DEV) uplot.init.dev(OUT.FILE, OUT.TYPE)
|
||||
|
||||
# -------------------------------
|
||||
# Load data
|
||||
# -------------------------------
|
||||
|
||||
notify("loading DB data")
|
||||
load("db.data.Rdata")
|
||||
|
||||
params <- DB$params
|
||||
chromo <- DB$chromo
|
||||
cds.lst <- DB$cds.lst
|
||||
intron <- DB$intron
|
||||
cons <- DB$cons
|
||||
|
||||
# -------------------------------
|
||||
# Genomes infos
|
||||
# -------------------------------
|
||||
|
||||
grd.titlepage("Species")
|
||||
grd.textpage(lineno=1, "# org: ", nrow(chromo))
|
||||
|
||||
#
|
||||
# general stats
|
||||
#
|
||||
|
||||
grd.hist(chromo, "len", main="Histogram of chromosome length")
|
||||
grd.hist(chromo, "gc", pos.quant=c(0.75, 0.6), main="Histogram of chromosome GC")
|
||||
grd.hist(chromo, "nbCds")
|
||||
grd.fplot(chromo, "len", "nbCds")
|
||||
|
||||
#
|
||||
# nb cds no introns
|
||||
#
|
||||
|
||||
chromo$nbCds_Mono <- chromo$nbCds_int0
|
||||
chromo$nbCds_Poly <- chromo$nbCds_int1 + chromo$nbCds_intsup1
|
||||
chromo$percentPoly <- round(chromo$nbCds_Poly * 100 / (chromo$nbCds_Poly + chromo$nbCds_Mono))
|
||||
|
||||
grd.hist(chromo, "nbCds_Mono", main="Histogram of monoexonic Cds")
|
||||
grd.hist(chromo, "nbCds_Poly", main="Histogram of polyexonic Cds")
|
||||
grd.hist(chromo, "percentPoly", pos.sum=c(0.23,0.6), main="Histogram of % polyexonic")
|
||||
|
||||
grd.fplot(chromo, "nbCds", "nbCds_Mono", TRUE, ablin=list(a=0, b=1, col=3))
|
||||
grd.fplot(chromo, "nbCds", "nbCds_Poly")
|
||||
|
||||
#
|
||||
# cds size
|
||||
#
|
||||
|
||||
grd.hist(chromo, "meanCdsSize", pos.quant=NULL, main="Histogram of Cds size")
|
||||
|
||||
# -------------------------------
|
||||
# CDS
|
||||
# -------------------------------
|
||||
|
||||
cds.all <- do.call(rbind, cds.lst)
|
||||
|
||||
grd.titlepage("CDS")
|
||||
|
||||
grd.textpage(lineno=1, "# core cds core cutoff: ", params$CORE_NCDS_CUTOFF)
|
||||
grd.textpage(lineno=2, "# core cds shell cutoff: ", params$SHEL_NCDS_CUTOFF)
|
||||
|
||||
grd.textpage(lineno=4, "# total cds: ", nrow(cds.all))
|
||||
grd.textpage(lineno=5, "# core cds: ", nrow(cds.lst[["core"]]))
|
||||
grd.textpage(lineno=6, "# shell cds: ", nrow(cds.lst[["shell"]]))
|
||||
grd.textpage(lineno=7, "# dust cds: ", nrow(cds.lst[["dust"]]))
|
||||
|
||||
grd.textpage(lineno=9, "# total org: ", length(unique(cds.all$X.locus)))
|
||||
grd.textpage(lineno=10, "# core org: ", length(unique(cds.lst[["core"]]$X.locus)))
|
||||
grd.textpage(lineno=11, "# shell org: ", length(unique(cds.lst[["shell"]]$X.locus)))
|
||||
grd.textpage(lineno=12, "# dust org: ", length(unique(cds.lst[["dust"]]$X.locus)))
|
||||
|
||||
|
||||
grd.textpage(lineno=14, "# total families: ", length(unique(cds.all$genefam)))
|
||||
grd.textpage(lineno=15, "# core families: ", length(unique(cds.lst[["core"]]$genefam)))
|
||||
grd.textpage(lineno=16, "# shell families: ", length(unique(cds.lst[["shell"]]$genefam)))
|
||||
grd.textpage(lineno=17, "# dust families: ", length(unique(cds.lst[["dust"]]$genefam)))
|
||||
|
||||
uplot.setup(mfrow=c(2,2), xpd=NA)
|
||||
|
||||
x <- sapply(cds.lst, nrow)
|
||||
uplot.pie(x, main="CDS", text.r=1.1, col=c(3,2,4))
|
||||
|
||||
x <- sapply(cds.lst, function(cds) length(unique(cds$X.locus)))
|
||||
uplot.pie(x, main="ORG", text.r=1.1, col=c(3,2,4))
|
||||
|
||||
x <- sapply(cds.lst, function(cds) length(unique(cds$genefam)))
|
||||
uplot.pie(x, main="FAM", text.r=1.1, col=c(3,2,4))
|
||||
|
||||
uplot.setup(xpd=F)
|
||||
|
||||
#
|
||||
# plot genes cutoff
|
||||
#
|
||||
|
||||
cds.all <- do.call(rbind, cds.lst)
|
||||
cds.byfam <- split(cds.all, cds.all$genefam)
|
||||
|
||||
tab <- sort(sapply(cds.byfam, nrow), decreasing=T)
|
||||
cols <- rep("red", length(tab))
|
||||
cols[tab >= params$SHEL_NCDS_CUTOFF] <- "blue"
|
||||
cols[tab >= params$CORE_NCDS_CUTOFF] <- "green"
|
||||
barplot(tab, col=cols, border=NA, main="# genes")
|
||||
|
||||
cols <- cols[tab >= 50]
|
||||
tab <- tab[tab >= 50]
|
||||
barplot(tab, col=cols, border=NA, las=2, cex.names=0.5, main="# genes in core")
|
||||
abline(h=params$CORE_NCDS_CUTOFF, col=1)
|
||||
text(50, 200, "CORE_NCDS_CUTOFF", pos=3)
|
||||
|
||||
#
|
||||
# cds length for core
|
||||
#
|
||||
|
||||
invisible(sapply(c("core", "shell", "dust"), function(what) {
|
||||
|
||||
cds <- cds.lst[[what]]
|
||||
|
||||
x <- split(cds$length, cds$genefam)
|
||||
x <- x[order(sapply(x, mean))]
|
||||
|
||||
uplot.setup(mfrow=c(2,1))
|
||||
|
||||
boxplot(x, pars = list(boxwex = 0.8, staplewex = 0.5, outwex = 0.5, outcex = 0.1),
|
||||
las=2, cex.axis=0.5, main=paste0(what, " genes - length distribution"))
|
||||
|
||||
boxplot(x, pars = list(boxwex = 0.8, staplewex = 0.5, outwex = 0.5, outcex = 0.1), ylim=c(0,2000),
|
||||
las=2, cex.axis=0.5, main=paste0(what, " genes - length distribution zoom"))
|
||||
|
||||
uplot.setup()
|
||||
}))
|
||||
|
||||
# -------------------------------
|
||||
# starts & stops
|
||||
# -------------------------------
|
||||
|
||||
cds <- cds.lst[["core"]]
|
||||
|
||||
grd.titlepage("Starts and Stops")
|
||||
|
||||
tab <- sort(table(cds$start), dec=T)
|
||||
tab <- tab[tab >= 100]
|
||||
tab <- tab / sum(tab) * 100
|
||||
|
||||
barplot(tab, log="y", las=1, ylim=c(0.1, 100), main="start frequencies (%)")
|
||||
text(0.7, 50, round(tab[1], 2))
|
||||
text(1.9, 2.3, round(tab[2], 2))
|
||||
text(3.1, 2.3, round(tab[3], 2))
|
||||
|
||||
#
|
||||
# start by org and gc
|
||||
#
|
||||
|
||||
x <- split(cds$start, cds$X.locus)
|
||||
|
||||
fatg <- sapply(x, function(x) table(x)["atg"]/length(x)*100)
|
||||
names(fatg) <- names(x)
|
||||
chromo$fatg <- round(fatg[chromo$X.locus], 2)
|
||||
|
||||
fgtg <- sapply(x, function(x) table(x)["gtg"]/length(x)*100)
|
||||
names(fgtg) <- names(x)
|
||||
chromo$fgtg <- round(fgtg[chromo$X.locus], 2)
|
||||
|
||||
facg <- sapply(x, function(x) table(x)["acg"]/length(x)*100)
|
||||
names(facg) <- names(x)
|
||||
chromo$facg <- round(facg[chromo$X.locus], 2)
|
||||
|
||||
grd.hist(chromo, "fatg", pos.quant=c(0.5, 0.6), main="Histogram of atg freq. by org")
|
||||
grd.hist(chromo, "fgtg", main="Histogram of gtg freq. by org")
|
||||
grd.hist(chromo, "facg", pos.sum=c(0.3, 0.6), main="Histogram of acg freq. by org")
|
||||
|
||||
grd.fplot(chromo, "gc", "fatg", main="atg freq. by org GC", pos=c(0.2, 0.3))
|
||||
grd.fplot(chromo, "gc", "fgtg", main="gtg freq. by org GC")
|
||||
grd.fplot(chromo, "gc", "facg", main="acg freq. by org GC")
|
||||
|
||||
ter <- cbind(fatg, fgtg, facg)
|
||||
colnames(ter) <- c("ATG", "GTG", "ACG")
|
||||
igc <- cut(chromo$gc, breaks=quantile(chromo$gc, seq(0, 1, 0.1)), include.lowest=T, labels=1:10)
|
||||
cols <- rainbow(10)[igc]
|
||||
ternaryplot(ter, col=cols, cex=0.2, main="Start by org", labels="outside")
|
||||
|
||||
#
|
||||
# start by common genes
|
||||
#
|
||||
|
||||
x <- split(cds$start, cds$genefam)
|
||||
|
||||
fatg <- sapply(x, function(x) table(x)["atg"]/length(x)*100)
|
||||
names(fatg) <- names(x)
|
||||
|
||||
fgtg <- sapply(x, function(x) table(x)["gtg"]/length(x)*100)
|
||||
names(fgtg) <- names(x)
|
||||
|
||||
facg <- sapply(x, function(x) table(x)["acg"]/length(x)*100)
|
||||
names(facg) <- names(x)
|
||||
|
||||
barplot(sort(fatg)[1:10], las=2, main="atg freq. by gene")
|
||||
barplot(sort(fgtg, dec=T)[1:10], las=2, main="gtg freq. by gene")
|
||||
barplot(sort(facg, dec=T)[1:10], las=2, main="acg freq. by gene")
|
||||
|
||||
ter <- cbind(fatg, fgtg, facg)
|
||||
colnames(ter) <- c("ATG", "GTG", "ACG")
|
||||
ternaryplot(ter, col=1, cex=0.5, id=rownames(ter), main="Starts by genes", labels="outside")
|
||||
|
||||
# -------------------------------
|
||||
# stops
|
||||
# -------------------------------
|
||||
|
||||
#
|
||||
# stop by org and gc
|
||||
#
|
||||
|
||||
tab <- sort(table(cds$stop), dec=T)
|
||||
tab <- tab[tab >= 100]
|
||||
tab <- tab / sum(tab) * 100
|
||||
|
||||
barplot(tab, log="y", las=1, ylim=c(0.1, 100), main="stop frequencies (%)")
|
||||
text(0.7, 80, round(tab[1], 2))
|
||||
text(1.9, 30, round(tab[2], 2))
|
||||
text(3.1, 25, round(tab[3], 2))
|
||||
|
||||
x <- split(cds$stop, cds$X.locus)
|
||||
|
||||
ftaa <- sapply(x, function(x) table(x)["taa"]/length(x)*100)
|
||||
names(ftaa) <- names(x)
|
||||
chromo$ftaa <- round(ftaa[chromo$X.locus], 2)
|
||||
|
||||
ftag <- sapply(x, function(x) table(x)["tag"]/length(x)*100)
|
||||
names(ftag) <- names(x)
|
||||
chromo$ftag <- round(ftag[chromo$X.locus], 2)
|
||||
|
||||
ftga <- sapply(x, function(x) table(x)["tga"]/length(x)*100)
|
||||
names(ftga) <- names(x)
|
||||
chromo$ftga <- round(ftga[chromo$X.locus], 2)
|
||||
|
||||
grd.hist(chromo, "ftaa", pos.quant=c(0.7, 0.6), main="Histogram of taa freq. by org")
|
||||
grd.hist(chromo, "ftag", pos.quant=c(0.8, 0.6), main="Histogram of tag freq. by org")
|
||||
grd.hist(chromo, "ftga", pos.quant=c(0.8, 0.6), main="Histogram of tga freq. by org")
|
||||
|
||||
grd.fplot(chromo, "gc", "ftaa", main="taa freq. by org GC", pos=c(0.2, 0.3))
|
||||
grd.fplot(chromo, "gc", "ftag", main="tag freq. by org GC")
|
||||
grd.fplot(chromo, "gc", "ftga", main="tga freq. by org GC")
|
||||
|
||||
ter <- cbind(ftaa, ftag, ftga)
|
||||
colnames(ter) <- c("TAA", "TAG", "TGA")
|
||||
igc <- cut(chromo$gc, breaks=quantile(chromo$gc, seq(0, 1, 0.1)), include.lowest=T, labels=1:10)
|
||||
cols <- rainbow(10)[igc]
|
||||
ternaryplot(ter, col=cols, cex=0.2, main="Stops by org", labels="outside")
|
||||
|
||||
#
|
||||
# stop by common genes
|
||||
#
|
||||
|
||||
x <- split(cds$stop, cds$genefam)
|
||||
|
||||
ftaa <- sapply(x, function(x) table(x)["taa"]/length(x)*100)
|
||||
names(ftaa) <- names(x)
|
||||
|
||||
ftag <- sapply(x, function(x) table(x)["tag"]/length(x)*100)
|
||||
names(ftag) <- names(x)
|
||||
|
||||
ftga <- sapply(x, function(x) table(x)["tga"]/length(x)*100)
|
||||
names(ftga) <- names(x)
|
||||
|
||||
barplot(sort(ftaa), las=2, cex.names=0.5, ylim=c(0,100), main="taa freq. by gene")
|
||||
barplot(sort(ftag), las=2, cex.names=0.5, ylim=c(0,100), main="tag freq. by gene")
|
||||
barplot(sort(ftga), las=2, cex.names=0.5, ylim=c(0,100), main="tga freq. by gene")
|
||||
|
||||
ter <- cbind(ftaa, ftag, ftga)
|
||||
colnames(ter) <- c("TAA", "TAG", "TGA")
|
||||
ternaryplot(ter, col=1, cex=0.3, id=rownames(ter), main="Stops by genes", labels="outside")
|
||||
|
||||
# -------------------------------
|
||||
# splice junctions
|
||||
# -------------------------------
|
||||
|
||||
grd.titlepage("Splice Junctions")
|
||||
|
||||
grd.textpage(lineno=1, "# intron in core: ", nrow(intron))
|
||||
|
||||
#
|
||||
# intron size
|
||||
#
|
||||
|
||||
intron$size <- intron$to - intron$from + 1
|
||||
|
||||
grd.hist(intron, "size", pos.quant=NULL, main="Histogram of intron size", br=1000, xlim=c(0,2000))
|
||||
|
||||
#
|
||||
# nb intron / gene
|
||||
#
|
||||
|
||||
x <- split(intron, intron$genefam)
|
||||
x <- x[order(sapply(x, function(x) mean(x$intron_nb)), decreasing=T)]
|
||||
|
||||
nmax <- max(intron$intron_nb)
|
||||
lintron <- lapply(x, function(x) x$intron_nb)
|
||||
mintron <- sapply(lintron, function(x) table(factor(x, levels=1:nmax)))
|
||||
|
||||
lintron0 <- table(cds[cds$nexon == 1,"genefam"])[names(lintron)]
|
||||
mintron <- rbind("0"=lintron0, mintron)
|
||||
mintron <- t(t(mintron)/colSums(mintron))
|
||||
|
||||
mintron[mintron==0] <- NA
|
||||
|
||||
nn <- nrow(mintron)
|
||||
xx <- mintron[nn:1,]
|
||||
ll <- lapply(1:nn, function(i) xx[i,])
|
||||
mintron <- mintron[,do.call(order, c(ll, decreasing=T))]
|
||||
|
||||
battleship.plot(mintron, maxxspan=0.3, maxyspan=0.3,
|
||||
cex.labels=0.7,
|
||||
main="% intron per polyexonic gene")
|
||||
|
||||
#
|
||||
# acceptors / donors
|
||||
#
|
||||
|
||||
tab <- sort(table(intron$acc), dec=T)
|
||||
tab <- tab[tab >= 100]
|
||||
tab <- tab / sum(tab) * 100
|
||||
|
||||
barplot(tab, log="y", las=1, ylim=c(0.1, 100), main="acceptor frequencies (%)")
|
||||
text(0.7, 50, round(tab[1], 2))
|
||||
text(1.9, 3, round(tab[2], 2))
|
||||
text(3.1, 2.3, round(tab[3], 2))
|
||||
|
||||
tab <- sort(table(intron$don), dec=T)
|
||||
tab <- tab[tab >= 100]
|
||||
tab <- tab / sum(tab) * 100
|
||||
|
||||
barplot(tab, log="y", las=1, ylim=c(0.1, 100), main="donor frequencies (%)")
|
||||
text(0.7, 50, round(tab[1], 1))
|
||||
text(1.9, 40, round(tab[2], 1))
|
||||
text(3.1, 15, round(tab[3], 1))
|
||||
text(4.3, 12, round(tab[4], 1))
|
||||
|
||||
#
|
||||
# consensus all
|
||||
#
|
||||
|
||||
cons$all <- cons.build(intron$acceptor.donor)
|
||||
|
||||
invisible(sapply(rev(names(cons)), function(what) {
|
||||
cons.plot(cons[[what]], paste0("consensus ", what))
|
||||
}))
|
||||
|
||||
#
|
||||
# default consensus score by consensus length
|
||||
#
|
||||
|
||||
cons.def <- cons[["default"]]
|
||||
cons.def <- cons.def[,! is.nan(colSums(cons.def))]
|
||||
seq.def <- sapply(intron$acceptor.donor, function(s) gsub("[^acgt]", "", s))
|
||||
|
||||
epx <- apply(cons.def, 2, function(col) -sum(col * log(col, base=4)))
|
||||
opx <- order(epx)
|
||||
|
||||
conf.def <- lapply(seq(2, length(opx), by=2), function(n) {
|
||||
pos <- head(opx, n)
|
||||
notify(n, "/", length(opx))
|
||||
cons.confusion(cons.def, seq.def, thresh=0, pos=pos)
|
||||
})
|
||||
|
||||
acc <- sapply(conf.def, function(x) x$acc)
|
||||
sen <- sapply(conf.def, function(x) x$sen)
|
||||
sel <- sapply(conf.def, function(x) x$sel)
|
||||
|
||||
plot(sel, ylim=c(0.7, 1), pch=1, type="b", main="accuracy by nb consensus positions", ylab="")
|
||||
lines(sen, type="b", pch=2)
|
||||
lines(acc, type="b", pch=3)
|
||||
legend(1, 0.95, c("sensit.", "select.", "accur."), pch=1:3, horiz=T, bty="n")
|
||||
|
||||
#
|
||||
# default consensus score by genes
|
||||
#
|
||||
|
||||
conf.def <- cons.confusion(cons.def, seq.def, thresh=0)
|
||||
cons.histconf(conf.def)
|
||||
|
||||
sfam <- split(conf.def$l2scor, intron$genefam)
|
||||
sfam <- sfam[order(sapply(sfam, median))]
|
||||
|
||||
boxplot(sfam, pars = list(boxwex = 0.8, staplewex = 0.5, outwex = 0.5, outcex = 0.1),
|
||||
las=2, cex.axis=0.7, main="default junction logr score by genes")
|
||||
abline(h=0)
|
||||
|
||||
#
|
||||
# end
|
||||
#
|
||||
|
||||
if (OUT.DEV) {
|
||||
cat("+ plot file:", paste0(OUT.FILE, ".", OUT.TYPE), "\n")
|
||||
invisible(dev.off())
|
||||
}
|
||||
|
||||
quit(save='no')
|
71
detectors/cds/tools/lib/summarize_cmp.r
Executable file
71
detectors/cds/tools/lib/summarize_cmp.r
Executable file
@ -0,0 +1,71 @@
|
||||
#!/usr/bin/env Rscript
|
||||
#
|
||||
# plot summary graphics of comparisons
|
||||
#
|
||||
#
|
||||
|
||||
LIBDIR <- Sys.getenv("LIB_DIR")
|
||||
if (LIBDIR == "") LIBDIR = "."
|
||||
|
||||
source(paste0(LIBDIR, "/util.plot.r"))
|
||||
|
||||
COLORS <- 2:10
|
||||
|
||||
#
|
||||
|
||||
OUT.DEV <- TRUE
|
||||
OUT.TYPE <- "pdf"
|
||||
OUT.FILE <- "compare"
|
||||
if (OUT.DEV) uplot.init.dev(OUT.FILE, OUT.TYPE)
|
||||
|
||||
#
|
||||
|
||||
tab <- read.table("compare.txt", header=T, comment.char="", stringsAsFactors=F)
|
||||
|
||||
#
|
||||
|
||||
par(xpd=NA)
|
||||
|
||||
#
|
||||
|
||||
sel <- c("cor", "alcor", "acc", "wrong", "over", "misstot")
|
||||
tab$ptot <- rowSums(tab[,sel])
|
||||
for (s in sel)
|
||||
tab[,paste0("p", s)] <- tab[,s] * 100 / tab$ptot
|
||||
|
||||
colors <- head(COLORS, length(sel))
|
||||
|
||||
cols <- paste0("p", sel)
|
||||
ord <- order(tab$pcor+tab$palcor+tab$pacc, decreasing=T)
|
||||
|
||||
barplot(t(tab[ord,cols]), names.arg=tab$X.org[ord],
|
||||
ylim=c(0,100), col=colors, las=2, cex.names=0.5)
|
||||
|
||||
legend(0, 110, sel, fill=colors, cex=0.7, horiz=T)
|
||||
|
||||
#
|
||||
|
||||
sel <- c("cor", "alcor", "acc", "wrong", "over", "misschlo")
|
||||
tab$rtot <- rowSums(tab[,sel])
|
||||
for (s in sel)
|
||||
tab[,paste0("r", s)] <- tab[,s] * 100 / tab$rtot
|
||||
|
||||
colors <- head(COLORS, length(sel))
|
||||
|
||||
cols <- paste0("r", sel)
|
||||
ord <- order(tab$rcor+tab$ralcor+tab$racc, decreasing=T)
|
||||
|
||||
barplot(t(tab[ord,cols]), names.arg=tab$X.org[ord],
|
||||
ylim=c(0,100), col=colors, las=2, cex.names=0.5)
|
||||
|
||||
legend(0, 110, sel, fill=colors, cex=0.7, horiz=T)
|
||||
|
||||
#
|
||||
|
||||
if (OUT.DEV) {
|
||||
cat("# plot file:", paste0(OUT.FILE, ".", OUT.TYPE), "\n")
|
||||
invisible(dev.off())
|
||||
}
|
||||
|
||||
quit(save='no')
|
||||
|
54
detectors/cds/tools/lib/summary.cmp.awk
Normal file
54
detectors/cds/tools/lib/summary.cmp.awk
Normal file
@ -0,0 +1,54 @@
|
||||
#
|
||||
#
|
||||
|
||||
function getOrg(s, _local_, a, na, org) {
|
||||
na = split(s, a, "/")
|
||||
na = split(a[na], a, "\\.")
|
||||
return a[1]
|
||||
}
|
||||
|
||||
|
||||
BEGIN {
|
||||
PROCINFO["sorted_in"] = "@ind_num_asc"
|
||||
print "#org tot cor alcor acc wrong over misstot misschlo missoth"
|
||||
}
|
||||
|
||||
/MISSED in ChloroDB/ {
|
||||
org = getOrg($1)
|
||||
Org[org]++
|
||||
Cnt[org]["MISSCHLORO"] = $2
|
||||
next
|
||||
}
|
||||
|
||||
/MISSED not in ChloroDB/ {
|
||||
org = getOrg($1)
|
||||
Org[org]++
|
||||
Cnt[org]["MISSNOTCHLORO"] = $2
|
||||
next
|
||||
}
|
||||
|
||||
/^#/ { next }
|
||||
|
||||
/^.*:MATCH/ {
|
||||
org = getOrg($1)
|
||||
Org[org]++
|
||||
split($NF, a, "\\.")
|
||||
Cnt[org][a[1]]++
|
||||
}
|
||||
|
||||
END {
|
||||
for (org in Org) {
|
||||
Cnt[org]["TOTAL"] = Cnt[org]["CORRECT"] + Cnt[org]["ALMOST_CORRECT"] \
|
||||
+ Cnt[org]["ACCEPTABLE"] + Cnt[org]["WRONG"] \
|
||||
+ Cnt[org]["MISSED"]
|
||||
}
|
||||
for (org in Org) {
|
||||
print org, Cnt[org]["TOTAL"]+0, Cnt[org]["CORRECT"]+0, \
|
||||
Cnt[org]["ALMOST_CORRECT"]+0, Cnt[org]["ACCEPTABLE"]+0, \
|
||||
Cnt[org]["WRONG"]+0, Cnt[org]["OVERPRED"]+0, \
|
||||
Cnt[org]["MISSED"]+0, \
|
||||
Cnt[org]["MISSCHLORO"]+0, Cnt[org]["MISSNOTCHLORO"]+0
|
||||
|
||||
}
|
||||
|
||||
}
|
12
detectors/cds/tools/lib/util.base.r
Normal file
12
detectors/cds/tools/lib/util.base.r
Normal file
@ -0,0 +1,12 @@
|
||||
#
|
||||
# R basic utilities
|
||||
#
|
||||
|
||||
#
|
||||
# notify on stderr
|
||||
#
|
||||
|
||||
notify <- function(...) cat("+", ..., "\n")
|
||||
|
||||
|
||||
|
109
detectors/cds/tools/lib/util.cons.r
Normal file
109
detectors/cds/tools/lib/util.cons.r
Normal file
@ -0,0 +1,109 @@
|
||||
#
|
||||
# R consensus utilities
|
||||
#
|
||||
|
||||
#
|
||||
# compute consensus
|
||||
#
|
||||
|
||||
cons.build <- function(seqs, backcount=1) {
|
||||
xx <- do.call(rbind, sapply(seqs, strsplit, "", USE.NAMES=F))
|
||||
lv <- c("a", "c", "g", "t", ".", "-")
|
||||
mx <- apply(xx, 2, function(x) table(factor(x, levels=lv)))[1:4,]
|
||||
cx <- colSums(mx)
|
||||
mx <- mx + backcount
|
||||
mx[,cx==0] <- 0
|
||||
apply(mx, 2, function(x) x / sum(x))
|
||||
}
|
||||
|
||||
#
|
||||
# score consensus
|
||||
#
|
||||
|
||||
cons.score <- function(cons, seq, pos=1:ncol(cons)) {
|
||||
seq <- strsplit(seq, "")[[1]]
|
||||
if (length(seq) != ncol(cons)) {
|
||||
warning("incompatible seq and cons size")
|
||||
return(NA)
|
||||
}
|
||||
ppx <- sapply(pos, function(i) cons[seq[i],i])
|
||||
sum(log10(ppx+1e-6))
|
||||
}
|
||||
|
||||
#
|
||||
# logratio to uniform model score
|
||||
#
|
||||
|
||||
cons.logratio <- function(cons, seq, m0=NULL, pos=1:ncol(cons)) {
|
||||
if (is.null(m0)) {
|
||||
m0 <- matrix(rep(0.25, 4), nrow=4, ncol=ncol(cons))
|
||||
rownames(m0) <- c('a', 'c', 'g', 't')
|
||||
}
|
||||
|
||||
sc <- cons.score(cons, seq, pos=pos)
|
||||
sc0 <- cons.score(m0, seq, pos=pos)
|
||||
|
||||
2 * (log(10^sc, base=2) - log(10^sc0, base=2))
|
||||
}
|
||||
|
||||
#
|
||||
# shuffle sequence
|
||||
#
|
||||
|
||||
seq.shuf <- function(seq) {
|
||||
paste0(sample(strsplit(seq, "")[[1]], nchar(seq), replace=F), collapse="")
|
||||
}
|
||||
|
||||
#
|
||||
# compute confusion matrix between actual and shuffled sequences
|
||||
#
|
||||
|
||||
cons.confusion <- function(cons, seq, m0=NULL, pos=1:ncol(cons), thresh=0) {
|
||||
som <- function(x) sum(x, na.rm=T)
|
||||
|
||||
res <- list()
|
||||
res$l2scor <- l2scor <- sapply(seq, function(s) cons.logratio(cons, s, m0=m0, pos=pos))
|
||||
|
||||
seq <- sapply(seq, seq.shuf)
|
||||
res$r2scor <- r2scor <- sapply(seq, function(s) cons.logratio(cons, s, m0=m0, pos=pos))
|
||||
|
||||
res$conf <- conf <- matrix(c(som(l2scor >= thresh), som(l2scor < thresh),
|
||||
som(r2scor >= thresh), som(r2scor < thresh)),
|
||||
nrow=2, byrow=T)
|
||||
|
||||
res$acc <- sum(diag(conf)) / sum(conf)
|
||||
res$sen <- conf[1,1] / sum(conf[1,])
|
||||
res$sel <- conf[1,1] / sum(conf[,1])
|
||||
|
||||
res
|
||||
}
|
||||
|
||||
#
|
||||
# plot consensus
|
||||
#
|
||||
|
||||
|
||||
cons.plot <- function(cons, main="consensus") {
|
||||
cols <- c("blue", "orange", "red", "green")
|
||||
bp <- barplot(cons, col=cols, ylim=c(0,1), main=main)
|
||||
plx <- apply(cons, 2, function(col) -sum(col * log(col+1e-6, base=4)))
|
||||
lines(bp, plx, type="b", pch=19)
|
||||
legend(0, 1.1, c("a","c","g","t"), fill=cols, horiz=T, xpd=NA, bty="n")
|
||||
legend(20, 1.1, "entropy", pch=19, horiz=T, xpd=NA, bty="n")
|
||||
invisible()
|
||||
}
|
||||
|
||||
#
|
||||
# plot confusion scores histograms
|
||||
#
|
||||
|
||||
cons.histconf <- function(conf, main="junction logr score") {
|
||||
lrh <- hist(c(conf$l2scor, conf$r2scor), br=50, plot=F)
|
||||
lh <- hist(conf$l2scor, br=lrh$breaks, plot=F)
|
||||
rh <- hist(conf$r2scor, br=lrh$breaks, plot=F)
|
||||
xx <- rbind(lh$counts, rh$counts) / sum(lh$counts)
|
||||
colnames(xx) <- lrh$mids
|
||||
barplot(xx, col=c(3,2), beside=T, main=main)
|
||||
legend(0, 0.1, c("true", "shuffled"), fill=c(3,2), horiz=F, xpd=NA)
|
||||
invisible()
|
||||
}
|
93
detectors/cds/tools/lib/util.grid.r
Normal file
93
detectors/cds/tools/lib/util.grid.r
Normal file
@ -0,0 +1,93 @@
|
||||
#
|
||||
# R misc grid plotting
|
||||
#
|
||||
|
||||
require(grid)
|
||||
require(gridExtra)
|
||||
|
||||
#
|
||||
# get line height
|
||||
#
|
||||
|
||||
grd.lineheight <- function(s="X") {
|
||||
convertHeight(unit(1,"strheight", s), "native", valueOnly=T)
|
||||
}
|
||||
|
||||
#
|
||||
# quantile table
|
||||
#
|
||||
|
||||
grd.qtab <- function(df, what, cols, n=5) {
|
||||
df <- df[order(df[,what], decreasing=T),cols]
|
||||
sep <- head(df,1)
|
||||
sep[] <- "-"
|
||||
rbind(head(df, n), sep, tail(df, n))
|
||||
}
|
||||
|
||||
#
|
||||
# histogram with tables
|
||||
#
|
||||
|
||||
grd.hist <- function(df, what, cols = c(1,2, which(colnames(df) == what)),
|
||||
breaks=50, pos.sum=c(0.2,0.6), pos.quant=c(0.7,0.6), cex=0.7,
|
||||
main=paste0("Histogram of ", what), ...) {
|
||||
hist(df[,what], breaks=breaks, xlab=what, main=main, ...)
|
||||
if (! is.null(pos.sum)) {
|
||||
pushViewport(viewport(pos.sum[1], pos.sum[2], gp=gpar(cex=cex)))
|
||||
grid.table(x<-summary(df[,what]), rows=names(x))
|
||||
popViewport()
|
||||
}
|
||||
if (! is.null(pos.quant)) {
|
||||
pushViewport(viewport(pos.quant[1], pos.quant[2], gp=gpar(cex=cex)))
|
||||
grid.table(grd.qtab(df, what, cols), rows=NULL)
|
||||
popViewport()
|
||||
}
|
||||
invisible()
|
||||
}
|
||||
|
||||
#
|
||||
# plot with fit
|
||||
#
|
||||
|
||||
grd.fplot <- function(df, what.x, what.y, linfit=T, pos=c(0.2, 0.8), ablin=NULL, ...) {
|
||||
plot(df[,what.x], df[,what.y], xlab=what.x, ylab=what.y, ...)
|
||||
if (linfit) {
|
||||
fit <- lm(df[,what.y] ~ df[,what.x])
|
||||
abline(fit, col=2)
|
||||
pushViewport(viewport(gp=gpar(col=2)))
|
||||
a <- sprintf("%.2e", coef(fit)[2])
|
||||
b <- sprintf("%.2e", coef(fit)[1])
|
||||
grid.text(paste0(what.y, " = ", a, " * ", what.x, " + ", b),
|
||||
pos[1], pos[2], just="left")
|
||||
pos[2] = pos[2] - 2 * grd.lineheight()
|
||||
grid.text(paste0("R2=", round(summary(fit)$r.squared, 3)),
|
||||
pos[1], pos[2], just="left")
|
||||
popViewport()
|
||||
}
|
||||
if (! is.null(ablin))
|
||||
do.call(abline, ablin)
|
||||
invisible()
|
||||
}
|
||||
|
||||
#
|
||||
# write text
|
||||
#
|
||||
|
||||
grd.textpage <- function(..., lineno=0, left=0.1, top=0.9, cex=1, fact=1.4) {
|
||||
txt <- do.call(paste0, list(...))
|
||||
pushViewport(viewport(gp=gpar(cex=cex)))
|
||||
grid.text(txt, left, top-lineno*grd.lineheight()*fact, just="left")
|
||||
popViewport()
|
||||
invisible(txt)
|
||||
}
|
||||
|
||||
#
|
||||
# title page
|
||||
#
|
||||
|
||||
grd.titlepage <- function(title, x=0.5, y=0.7, cex=3, ...) {
|
||||
notify("processing", title)
|
||||
grid.newpage()
|
||||
grid.text(title, x, y, gp=gpar(cex=cex), ...)
|
||||
invisible()
|
||||
}
|
90
detectors/cds/tools/lib/util.modelio.r
Normal file
90
detectors/cds/tools/lib/util.modelio.r
Normal file
@ -0,0 +1,90 @@
|
||||
#
|
||||
# R models I/O utilities
|
||||
#
|
||||
|
||||
#
|
||||
# write start model
|
||||
#
|
||||
|
||||
write.model.start <- function(frq, what) {
|
||||
dir.create("models", showWarnings=F)
|
||||
fil <- paste0("models/start.", what, ".frq")
|
||||
notify("writing start model:", fil)
|
||||
cat("# start model :", what, "\n", file=fil)
|
||||
for (x in names(frq))
|
||||
cat(x, frq[x]/sum(frq), frq[x], "\n", file=fil, append=T)
|
||||
invisible(fil)
|
||||
}
|
||||
|
||||
#
|
||||
# write stop model
|
||||
#
|
||||
|
||||
write.model.stop <- function(frq, what) {
|
||||
dir.create("models", showWarnings=F)
|
||||
fil <- paste0("models/stop.", what, ".frq")
|
||||
notify("writing stop model:", fil)
|
||||
cat("# stop model :", what, "(freq. ignored)\n", file=fil)
|
||||
for (x in names(frq))
|
||||
cat(x, frq[x]/sum(frq), frq[x], "\n", file=fil, append=T)
|
||||
invisible(fil)
|
||||
}
|
||||
|
||||
#
|
||||
# write splice3 model
|
||||
# [FIXME] positions are hard-coded
|
||||
#
|
||||
|
||||
write.model.splice3 <- function(cons, what) {
|
||||
dir.create("models", showWarnings=F)
|
||||
fil <- paste0("models/splice3.", what, ".frq")
|
||||
notify("writing splice3 model:", fil)
|
||||
.catcons <- function(i) {
|
||||
cat(round(cons[c("a","c","g","t"), i]*100, 0), "\n",
|
||||
file=fil, append=T)
|
||||
}
|
||||
cat("# 3' splice model :", what, "\n", file=fil)
|
||||
cat("# A C G T\n", file=fil, append=T)
|
||||
sapply(seq.int(1, 4), .catcons)
|
||||
cat("splice\n", file=fil, append=T)
|
||||
sapply(seq.int(6, 11), .catcons)
|
||||
invisible(fil)
|
||||
}
|
||||
|
||||
#
|
||||
# write splice5 model
|
||||
# [FIXME] positions are hard-coded
|
||||
#
|
||||
|
||||
write.model.splice5 <- function(cons, what) {
|
||||
dir.create("models", showWarnings=F)
|
||||
fil <- paste0("models/splice5.", what, ".frq")
|
||||
notify("writing splice5 model:", fil)
|
||||
.catcons <- function(i) {
|
||||
cat(round(cons[c("a","c","g","t"), i]*100, 0), "\n",
|
||||
file=fil, append=T)
|
||||
}
|
||||
cat("# 5' splice model :", what, "\n", file=fil)
|
||||
cat("# A C G T\n", file=fil, append=T)
|
||||
sapply(seq.int(13, 18), .catcons)
|
||||
cat("splice\n", file=fil, append=T)
|
||||
sapply(seq.int(20, 23), .catcons)
|
||||
invisible(fil)
|
||||
}
|
||||
|
||||
#
|
||||
# write splice3/5 uniform model
|
||||
#
|
||||
|
||||
write.unif.splice <- function(pos, what) {
|
||||
dir.create("models", showWarnings=F)
|
||||
fil <- paste0("models/splice", pos, ".", what, ".frq")
|
||||
notify("writing uniform splice", pos, "model:", fil)
|
||||
cat("# 3'/5' splice null model", file=fil)
|
||||
cat("# A C G T\n", file=fil, append=T)
|
||||
cat("25 25 25 25\n", file=fil, append=T)
|
||||
cat("splice\n", file=fil, append=T)
|
||||
cat("25 25 25 25\n", file=fil, append=T)
|
||||
invisible(fil)
|
||||
}
|
||||
|
105
detectors/cds/tools/lib/util.plot.r
Normal file
105
detectors/cds/tools/lib/util.plot.r
Normal file
@ -0,0 +1,105 @@
|
||||
#
|
||||
# R plot utilities
|
||||
#
|
||||
|
||||
#
|
||||
# setup graphic device
|
||||
# tiff: high resolution 600 dpi
|
||||
# pdf
|
||||
#
|
||||
|
||||
uplot.init.dev <- function(fname, type="pdf", width=7, height=7, resol=600, ...) {
|
||||
fname <- paste0(fname, ".", type)
|
||||
res <- NULL
|
||||
if (type == "tiff") {
|
||||
res <- tiff(fname, width=width, height=height, units="in", res=resol, ...)
|
||||
}
|
||||
if (type == "pdf") {
|
||||
res <- pdf(fname, width=width, height=height, ...)
|
||||
}
|
||||
invisible(res)
|
||||
}
|
||||
|
||||
#
|
||||
# convert pdf to tiff using ghostscript
|
||||
#
|
||||
|
||||
uplot.convert2tiff <- function(fname, resol=600) {
|
||||
infile <- paste0(fname, ".pdf")
|
||||
oufile <- paste0(fname, ".tif")
|
||||
cmd <- paste0("echo quit | gs -r", resol, "-dBATCH -dNOPAUSE -sDEVICE=tiff12nc -sCompression=lzw -sOutputFile=", oufile, " ", infile)
|
||||
system(cmd)
|
||||
}
|
||||
|
||||
#
|
||||
# default plot setup
|
||||
#
|
||||
|
||||
uplot.setup <- function(mfrow=c(1,1),
|
||||
las=1,
|
||||
mgp=c(2, 0.7, 0),
|
||||
oma=c(0, 0, 0, 0),
|
||||
mar=c(4, 3, 3, 2),
|
||||
cex.main=1,
|
||||
font.main=1,
|
||||
family='Helvetica', ...) {
|
||||
par(mfrow=mfrow, las=las, mgp=mgp, oma=oma, mar=mar, cex.main=cex.main, font.main=font.main, family=family, ...)
|
||||
}
|
||||
|
||||
#
|
||||
# pie plot
|
||||
#
|
||||
|
||||
uplot.pie <- function(tab, main="", labels=c("name", "val", "per"), text.r=0.5, text.col="black", text.cex=1, main.pos=c(0,0), main.col="black", ...) {
|
||||
pie(tab, edges=2000, main="", labels="", ...)
|
||||
text(main.pos[1], main.pos[2], main, cex=1.5, col=main.col)
|
||||
prop <- tab/sum(tab)
|
||||
theta <- 2*pi * (cumsum(prop) - prop/2)
|
||||
lab <- list(name=names(tab), val=tab, per=sprintf("%d%%", round(prop*100)))
|
||||
lab <- apply(data.frame(lab[labels]), 1, paste, collapse="\n")
|
||||
if (length(lab) > 0)
|
||||
text(text.r*cos(theta), text.r*sin(theta), lab, cex=text.cex, col=text.col)
|
||||
invisible(NULL)
|
||||
}
|
||||
|
||||
#
|
||||
# plot utility : color representation of a table
|
||||
#
|
||||
|
||||
uplot.table <- function(tab, col=heat.colors(100), with.lines=TRUE) {
|
||||
image(as.matrix(tab), xaxt="n", yaxt="n", col=col)
|
||||
nli <- nrow(tab)
|
||||
nco <- ncol(tab)
|
||||
dx <- 0.5 / (nli-1)
|
||||
dy <- 0.5 / (nco-1)
|
||||
xf <- (seq_len(nli)-1)/(nli-1) - dx
|
||||
yf <- (seq_len(nco)-1)/(nco-1) - dy
|
||||
if (with.lines) {
|
||||
segments(xf, -dy, xf, 1+dy)
|
||||
segments(-dx, yf, 1+dx, yf)
|
||||
}
|
||||
Axis(c(0,1), at=xf+dx, side=1, labels=rownames(tab), las=2, cex.axis=0.5, padj=0)
|
||||
Axis(c(0,1), at=yf+dy, side=2, labels=colnames(tab), las=2, cex.axis=0.5, padj=0)
|
||||
invisible(NULL)
|
||||
}
|
||||
|
||||
#
|
||||
# plot utility : identify points within user's rectangle
|
||||
#
|
||||
|
||||
rect.identify <- function(data) {
|
||||
if (is.null(dim(data))) data <- cbind(seq_along(data), data)
|
||||
xy <- locator(n=2, type='n')
|
||||
r <- matrix(c(range(xy$x), range(xy$y)), ncol=2, byrow=TRUE)
|
||||
rect(r[1,1], r[2,1], r[1,2], r[2,2], border='red')
|
||||
.in.range <- function(p, r) {
|
||||
.in.int <- function(i) {
|
||||
(p[i] >= r[i,1]) && (p[i] <= r[i,2])
|
||||
}
|
||||
.in.int(1) && .in.int(2)
|
||||
}
|
||||
isel <- which(apply(data, 1, .in.range, r))
|
||||
points(data[isel,], col='red', pch=19)
|
||||
isel
|
||||
}
|
||||
|
Reference in New Issue
Block a user