added cds/tools/compare

Former-commit-id: 07fb256bf17db3f0ffc1730b0383f8255fbb9129
Former-commit-id: fd6a1fe72a39c5633c2f9fb6de09af979c2a48f3
This commit is contained in:
alain viari
2015-11-09 01:15:14 +01:00
parent c3c2be254f
commit d77a0dfe67
16 changed files with 12503 additions and 16 deletions

View File

@ -0,0 +1,89 @@
#!/bin/csh -f
#
# compare CDS annotation in reference file to predicted file
# annotation file are in Genbank/Embl format
#
# usage: go_compare reference predicted
#
# output on stdout
#
unsetenv ORG_SOURCED
setenv ORG_HOME `dirname $0`/../../../..
source $ORG_HOME/scripts/csh_init.sh
NeedArg 2
set RefFile = $Argv[1]
set PrdFile = $Argv[2]
NeedFile $RefFile
NeedFile $PrdFile
set RefType = $RefFile:e
set PrdType = $PrdFile:e
#
# parse ref and prediction
#
Notify "get genome info from $RefFile"
$AwkCmd -f $LIB_DIR/$RefType.oneliner.awk $RefFile |\
$AwkCmd -f $LIB_DIR/libutil.awk -f $LIB_DIR/$RefType.cds.awk > R_$$
Notify "get prediction info from $PrdFile"
$AwkCmd -f $LIB_DIR/$PrdType.oneliner.awk $PrdFile |\
$AwkCmd -f $LIB_DIR/libutil.awk -f $LIB_DIR/$PrdType.cds.awk > P_$$
#
# compare
#
Notify "compare bank to predictions"
$AwkCmd -f $LIB_DIR/libnws.awk \
-f $LIB_DIR/compareCds.awk \
R_$$ P_$$ > S_$$
# base statistics
egrep "^MATCH" S_$$ | tr '.' ' ' | awk '{print $5}' |\
sort | uniq -c | sort -nr | awk '{print "#",$0}' > U_$$
# add chlorodb/core statistics
if (-d $DATA_DIR/cds/chlorodb/core) then
ls $DATA_DIR/cds/chlorodb/core/*.fst |\
sed -e 's@^.*core/@@1' | sed -e 's/.fst$//g' |\
sort > C_$$
egrep "^MATCH" S_$$ | grep "MISSED" | awk '{print $2}' | sort | uniq > D_$$
join D_$$ C_$$ > E_$$
@ nc = `cat C_$$ | wc -l`
@ mt = `cat D_$$ | wc -l`
@ mc = `cat E_$$ | wc -l`
@ mn = $mt - $mc
set LC = `cat E_$$`
echo "#" >> U_$$
echo "# $mc MISSED in ChloroDB-Core ($LC)" >> U_$$
echo "# $mn MISSED not in ChloroDB-Core" >> U_$$
echo "#" >> U_$$
echo "" >> U_$$
endif
cat S_$$ >> U_$$
cat U_$$
#
# end
#
(\rm -f ?_$$) >> /dev/null
Exit 0

View File

@ -0,0 +1,98 @@
#
#
#
function Min(a, b) {
return (a < b ? a : b)
}
function Max(a, b) {
return (a > b ? a : b)
}
function Align(s1, s2, _local_, d, l) {
if (s1 == s2) return 100
d = AlignNWS(s1, s2, Identity)
l = Max(length(s1), length(s2))
return int((l - d) * 100 / l)
}
BEGIN {
PROCINFO["sorted_in"] = "@ind_num_asc"
IdentityMatrix("ABCDEFGHIJKLMNOPQRSTUVWXYZ*", Identity)
}
BEGINFILE {
NFile++
File[NFile] = FILENAME
}
/^#/ { next }
{
strand = $5
stop = (strand == "D" ? $4 : $3)
Stop[stop]++
i = ++NRec[NFile]
Rec[NFile][i]["record"] = $0
Rec[NFile][i]["genefam"] = $1
Rec[NFile][i]["gene"] = $2
Rec[NFile][i]["from"] = $3
Rec[NFile][i]["to"] = $4
Rec[NFile][i]["strand"] = $5
Rec[NFile][i]["nexon"] = $6
Rec[NFile][i]["length"] = $7
Rec[NFile][i]["protseq"] = $9
if (NFile == 1)
Indx1[stop] = i
else
Indx2[stop] = i
}
END {
for (st in Stop) {
if (Indx1[st])
print "FILE1 " Rec[1][Indx1[st]]["record"]
else
print "FILE1 NONE"
if (Indx2[st])
print "FILE2 " Rec[2][Indx2[st]]["record"]
else
print "FILE2 NONE"
if (Indx1[st] && Indx2[st]) {
fm = Rec[1][Indx1[st]]["genefam"]
id = Align(Rec[1][Indx1[st]]["protseq"], Rec[2][Indx2[st]]["protseq"])
printf("MATCH %s ID %d ", fm, id)
if (id == 100)
status = "CORRECT"
else if (id >= 90)
status = "ALMOST_CORRECT"
else if (id >= 80)
status = "ACCEPTABLE"
else
status = "WRONG"
if (status != "CORRECT") {
if (Rec[1][Indx1[st]]["nexon"] != Rec[2][Indx2[st]]["nexon"])
status = status ".BAD_NBEXON"
start1 = Rec[1][Indx1[st]]["strand"] == "D" ? Rec[1][Indx1[st]]["from"] : Rec[1][Indx1[st]]["to"]
start2 = Rec[2][Indx2[st]]["strand"] == "D" ? Rec[2][Indx2[st]]["from"] : Rec[2][Indx2[st]]["to"]
if (start1 != start2)
status = status ".BAD_START"
else
status = status ".BAD_JUNCTION"
}
print status
}
else if (Indx1[st]) {
fm = Rec[1][Indx1[st]]["genefam"]
print "MATCH " fm " ID 0 MISSED.WRONG_STOP"
}
else if (Indx2[st]) {
fm = Rec[2][Indx2[st]]["genefam"]
print "MATCH " fm " ID 0 OVERPRED.WRONG_STOP"
}
print ""
}
}

View File

@ -0,0 +1,80 @@
#
# get cds features from embl (short version)
#
# @include lib.embl.awk
BEGIN {
if (MAXSPAN == "") MAXSPAN = 10000
print "#genefam gene from to strand nexon length status protseq product"
}
/^FT CDS/ {
revstrand = match($3, "^complement")
s = substr($0, 22)
gsub("^complement", "", s)
ok = ! match(s, "complement|order")
nexon = Nexons(s)
SpanLocation(s, sloc)
spanlen = sloc[2] - sloc[1] + 1
len = LenLocation(s)
ok = ok && (len < MAXSPAN)
cdsseq = ok ? SeqLocation(seq, s, revstrand) : "XXX"
cstart = substr(cdsseq, 1,3)
cstop = substr(cdsseq, length(cdsseq)-2)
gene = "none"
locustag = "none"
product = "none"
translation = "X"
incds = 1
next
}
(incds && /^FT [^ ]/) {
print GeneFamily(gene), gene, sloc[1], sloc[2], (revstrand ? "R" : "D"),
nexon, len, (ok ? "Ok" : "Error"), translation, product
incds = 0
next
}
/^FT \/gene=/ {
split($0, a, "=")
gene = a[2]
gsub("^[^a-z,A-Z]+", "", gene)
gsub("\"", "", gene)
gsub(" ", "_", gene)
next
}
/^FT \/locus_tag=/ {
split($0, a, "=")
locustag = a[2]
gsub("\"", "", locustag)
gsub(" ", "_", locustag)
next
}
/^FT \/product=/ {
split($0, a, "=")
product = a[2]
gsub("\"", "", product)
gsub(" ", "_", product)
next
}
/^FT \/translation=/ {
split($0, a, "=")
translation = a[2]
gsub("\"", "", translation)
gsub(" ", "", translation)
next
}
END {
if (incds) {
print GeneFamily(gene), gene, sloc[1], sloc[2], (revstrand ? "R" : "D"),
nexon, len, (ok ? "Ok" : "Error"), translation, product
}
}

View File

@ -0,0 +1,28 @@
#
# embl oneLiner
#
/^FT / {
InFeat = 1
}
(InFeat == 0) && ($1 != "CC") && ($1 == pkey) && /^.. [^ ]/ {
line = line " " substr($0, 6)
next
}
(InFeat == 1) && /^FT [^\/]/ {
line = line "" substr($0, 22)
next
}
{
if (line != "") print line
line = $0
pkey = $1
next
}
END {
if (line != "") print line
}

View File

@ -0,0 +1,84 @@
#
# get cds features from genbank (short version)
#
# @include lib.gbk.awk
BEGIN {
if (MAXSPAN == "") MAXSPAN = 10000
print "#genefam gene from to strand nexon length status protseq product"
}
/^ CDS/ {
revstrand = match($2, "^complement")
s = substr($0, 22)
gsub("^complement", "", s)
ok = ! match(s, "complement|order")
nexon = Nexons(s)
SpanLocation(s, sloc)
spanlen = sloc[2] - sloc[1] + 1
len = LenLocation(s)
ok = ok && (len < MAXSPAN)
cdsseq = ok ? SeqLocation(seq, s, revstrand) : "XXX"
cstart = substr(cdsseq, 1,3)
cstop = substr(cdsseq, length(cdsseq)-2)
gene = "none"
locustag = "none"
product = "none"
translation = "X"
incds = 1
next
}
(incds && /^ [^ ]/) {
print GeneFamily(gene), gene, sloc[1], sloc[2], (revstrand ? "R" : "D"),
nexon, len, (ok ? "Ok" : "Error"), translation, product
incds = 0
next
}
/^ \/gene=/ {
split($0, a, "=")
gene = a[2]
gsub("^[^a-z,A-Z]+", "", gene)
gsub("\"", "", gene)
gsub(" ", "_", gene)
next
}
/^ \/locus_tag=/ {
split($0, a, "=")
locustag = a[2]
gsub("\"", "", locustag)
gsub(" ", "_", locustag)
next
}
/^ \/product=/ {
split($0, a, "=")
product = a[2]
gsub("\"", "", product)
gsub(" ", "_", product)
next
}
/^ \/translation=/ {
split($0, a, "=")
translation = a[2]
gsub("\"", "", translation)
gsub(" ", "", translation)
next
}
/^\/\// {
locus = "?"
}
END {
if (incds) {
print GeneFamily(gene), gene, sloc[1], sloc[2], (revstrand ? "R" : "D"),
nexon, len, (ok ? "Ok" : "Error"), translation, product
}
}

View File

@ -0,0 +1,29 @@
#
# genbank oneLiner
#
/^ [^ ]/ {
line = line "" substr($0, 12)
next
}
/^ [^\/]/ {
line = line "" substr($0, 21)
next
}
/^ ORGANISM/ {
if (line != "") print line
line = $0 ";"
next
}
{
if (line != "") print line
line = $0
next
}
END {
print line
}

View File

@ -0,0 +1,146 @@
#
# NWS alignment
#
# -----------------------
# make identity substitution matrix for given alphabet
#
function IdentityMatrix(alpha, mat, diag, ndiag, _local_, n, i, j, ci, cj) {
if (diag == "") diag = 0
if (ndiag == "") ndiag = 1
alpha = toupper(alpha)
delete mat
n = length(alpha)
for (i = 1 ; i <= n ; i++) {
ci = substr(alpha, i, 1)
for (j = 1 ; j <= n ; j++) {
cj = substr(alpha, j, 1)
mat[ci][cj] = (i == j) ? diag : ndiag
}
}
}
# -----------------------
# internal utility : reverse string
#
function _Reverse(s, _local_, i, n, rs) {
rs = "";
n = length(s);
for (i = n ; i >= 1 ; i--)
rs = rs "" substr(s, i, 1)
return rs;
}
# -----------------------
# internal utility : alignment traceback for NWS
#
function _Traceback(dir, s1, s2, n1, n2, align, _local_, i, c1, c2, c3) {
delete align
while (dir[n1][n2] != 0) {
if (dir[n1][n2] == "s") {
c1 = substr(s1, n1--, 1)
c2 = substr(s2, n2--, 1)
c3 = (c1 == c2) ? tolower(c1) : "*"
}
else if (dir[n1][n2] == "i") {
c1 = "-"
c2 = substr(s2, n2--, 1)
c3 = "-"
}
else {
c1 = substr(s1, n1--, 1)
c2 = "-"
c3 = "-"
}
align[1] = align[1] "" c1
align[2] = align[2] "" c2
align[3] = align[3] "" c3
}
for (i = 1 ; i <= 3 ; i++)
align[i] = _Reverse(align[i])
}
# -----------------------
# internal utility : min
#
function _Min(a, b) {
return (a < b ? a : b)
}
# -----------------------
# sequence alignment NWS
#
# todo : check alphabet
#
# --> i
# |
# v d
#
function AlignNWS(s1, s2, subst, indel, local, align,
_local_, rev, n1, n2, i, j, c1, c2, m2,
ws, wi, wd, w,
mat, dir) {
s1 = toupper(s1) ; s2 = toupper(s2)
n1 = length(s1) ; n2 = length(s2)
if (local && (n2 < n1)) {
rev = s1 ; s1 = s2 ; s2 = rev
rev = n1 ; n1 = n2 ; n2 = rev
}
if (indel == "") indel = 1
#
# nws alignment
#
for (i = 0 ; i <= n1 ; i++) {
c1 = substr(s1, i, 1)
for (j = 0 ; j <= n2 ; j++) {
c2 = substr(s2, j, 1)
if (i && j) {
ws = mat[i-1][j-1] + subst[c1][c2]
wd = mat[i-1][j] + indel
wi = mat[i][j-1] + indel
w = _Min(ws, _Min(wi, wd))
mat[i][j] = w
dir[i][j] = (w == ws) ? "s" : (w == wi) ? "i" : "d"
} else if (i) {
mat[i][j] = mat[i-1][j] + indel
dir[i][j] = "d"
} else if (j) {
mat[i][j] = mat[i][j-1] + (local ? 0 : indel)
dir[i][j] = "i"
} else {
mat[i][j] = 0
dir[i][j] = 0
}
}
# delete mat[i-1]
}
#
# adjust last line in local mode
#
if (local) {
m2 = n2
for (j = m2 ; j >= 0 ; j--) {
if (mat[n1][j] < mat[n1][m2])
m2 = j
}
mat[n1][n2] = mat[n1][m2]
for (j = m2 + 1 ; j <= n2 ; j++) {
dir[n1][j] = "i"
}
}
#
# traceback
#
_Traceback(dir, s1, s2, n1, n2, align)
if (rev) {
rev = align[1] ; align[1] = align[2]; align[2] = rev
}
return mat[n1][n2]
}

View File

@ -0,0 +1,123 @@
#
# utilities library
#
function Min(a, b) {
return (a < b ? a : b)
}
function Max(a, b) {
return (a > b ? a : b)
}
function Strip(s) {
gsub("complement|join|order|\\)|\\(|<|>| ", "", s)
return s
}
function GetLoc(s, loc, _local_, a, tmp) {
delete loc
loc[1] = loc[2] = 0
split(s, loc, "\\.\\.")
if (loc[1] > loc[2]) {
tmp = loc[1]
loc[1] = loc[2]
loc[2] = tmp
}
}
function ParseLocation(s, locs, _local_, i, na, a, loc) {
delete locs
s = Strip(s)
na = split(s, a, ",")
for (i = 1 ; i <= na ; i++) {
GetLoc(a[i], loc)
locs[i][1] = loc[1]
locs[i][2] = loc[2]
}
return na
}
function SpanLocation(s, sloc, _local_, i, na, locs) {
delete sloc
na = ParseLocation(s, locs)
sloc[1] = (na > 0 ? locs[1][1] : 0)
sloc[2] = (na > 0 ? locs[1][2] : 0)
for (i = 2 ; i <= na ; i++) {
sloc[1] = Min(sloc[1], locs[i][1])
sloc[2] = Max(sloc[2], locs[i][2])
}
}
function LenLocation(s, _local_, i, na, locs, len) {
len = 0
na = ParseLocation(s, locs)
for (i = 1 ; i <= na ; i++) {
len += locs[i][2] - locs[i][1] + 1
}
return len
}
function Nexons(s, _local_, a) {
s = Strip(s)
return split(s, a, ",")
}
function Reverse(s, _local_, i, n, rs) {
rs = "";
n = length(s);
for (i = n ; i >= 1 ; i--)
rs = rs "" substr(s, i, 1)
return rs;
}
function RevComplement(seq, _local_, n, i, c, rs) {
n = length(seq)
rs = ""
for (i = n ; i >= 1 ; i--) {
c = substr(seq, i, 1)
rs = rs "" (_DnaC[c] ? _DnaC[c] : "X")
}
return rs
}
function SubSeq(seq, from, to, revstrand) {
seq = substr(seq, from, to-from+1)
if (revstrand) seq = RevComplement(seq)
return seq
}
function SeqLocation(seq, s, revstrand, _local_, sloc, i, na, locs) {
sloc = ""
na = ParseLocation(s, locs)
for (i = 1 ; i <= na ; i++) {
sloc = sloc "" SubSeq(seq, locs[i][1], locs[i][2], 0)
}
return (revstrand ? RevComplement(sloc) : sloc)
}
function GeneFamily(s) {
s = tolower(s)
gsub("(_|-)[0-9]+$", "", s)
gsub("(_|-)(a|b|c|i)+$", "", s)
gsub("'+$", "", s)
gsub("/.+$", "", s)
if (match(s, "[^a-z,0-9]")) s = "none"
return s
}
#
# constants
#
BEGIN {
# complementary of _IupacDna
_DnaC["A"] = "T"; _DnaC["C"] = "G"; _DnaC["G"] = "C"; _DnaC["T"] = "A"
_DnaC["R"] = "Y"; _DnaC["Y"] = "R"; _DnaC["M"] = "K"; _DnaC["K"] = "M"
_DnaC["W"] = "W"; _DnaC["S"] = "S"; _DnaC["B"] = "V"; _DnaC["V"] = "B"
_DnaC["D"] = "H"; _DnaC["H"] = "D"; _DnaC["N"] = "N"; _DnaC["X"] = "X"
_DnaC["a"] = "t"; _DnaC["c"] = "g"; _DnaC["g"] = "c"; _DnaC["t"] = "a"
_DnaC["r"] = "y"; _DnaC["y"] = "r"; _DnaC["m"] = "k"; _DnaC["k"] = "m"
_DnaC["w"] = "w"; _DnaC["s"] = "s"; _DnaC["b"] = "v"; _DnaC["v"] = "b"
_DnaC["d"] = "h"; _DnaC["h"] = "d"; _DnaC["n"] = "n"; _DnaC["x"] = "x"
}

View File

@ -0,0 +1,21 @@
#!/bin/csh -f
setenv ORG_HOME `dirname $0`/../../../../..
source $ORG_HOME/scripts/csh_init.sh
echo "+ testing go_compare.sh"
`dirname $0`/../go_compare.sh ref.gbk pred.embl > test.bak
diff -q test.bak test.ref >& /dev/null
set stat = $status
if ($stat == 0) then
echo "+ $VTC[3]test Ok$VTC[1]"
\rm -r test.bak
else
echo "* $VTC[2]test Failure$VTC[1]"
endif
exit $stat

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,367 @@
# 65 CORRECT
# 13 ALMOST_CORRECT
# 5 MISSED
# 3 WRONG
# 2 OVERPRED
# 1 ACCEPTABLE
#
# 0 MISSED in ChloroDB-Core ()
# 3 MISSED not in ChloroDB-Core
#
FILE1 psba psbA 540 1601 R 1 1062 Ok MTAILERRESESLWGRFCNWITSTENRLYIGWFGVLMIPTLLTATSVFIIAFIAAPPVDIDGIREPVSGSLLYGNNIISGAIIPTSAAIGLHFYPIWEAASVDEWLYNGGPYELIVLHFLLGVACYMGREWELSFRLGMRPWIAVAYSAPVAAATAVFLIYPIGQGSFSDGMPLGISGTFNFMIVFQAEHNILMHPFHMLGVAGVFGGSLFSAMHGSLVTSSLIRETTENESANEGYRFGQEEETYNIVAAHGYFGRLIFQYASFNNSRSLHFFLAAWPVVGIWFTALGISTMAFNLNGFNFNQSVVDSQGRVINTWADIINRANLGMEVMHERNAHNFPLDLAAIEAPSTNG photosystem_II_protein_D1
FILE2 psba psbA 540 1601 R 1 1062 Ok MTAILERRESESLWGRFCNWITSTENRLYIGWFGVLMIPTLLTATSVFIIAFIAAPPVDIDGIREPVSGSLLYGNNIISGAIIPTSAAIGLHFYPIWEAASVDEWLYNGGPYELIVLHFLLGVACYMGREWELSFRLGMRPWIAVAYSAPVAAATAVFLIYPIGQGSFSDGMPLGISGTFNFMIVFQAEHNILMHPFHMLGVAGVFGGSLFSAMHGSLVTSSLIRETTENESANEGYRFGQEEETYNIVAAHGYFGRLIFQYASFNNSRSLHFFLAAWPVVGIWFTALGISTMAFNLNGFNFNQSVVDSQGRVINTWADIINRANLGMEVMHERNAHNFPLDLAAIEAPSTNG photosystem_II_protein_D1
MATCH psba ID 100 CORRECT
FILE1 matk matK 2127 3656 R 1 1530 Ok MEEIHRYLQPDSSQQHNFLYPLIFQEYIYALAQDHGLNRNRSILLENSGYNNKFSFLIVKRLITRMDQQNHLIISTNDSNKNPFLGCNKSLYSQMISEGFACIVEIPFSIRLISSLSSFEGKKIFKSHNLRSIHSTFPFLEDNFSHLNYVLDILIPYPVHLEILVQTLRYWVKDASSLHLLRFFLHEYCNLNSLITSKKPGYSFSKKNQRFFFFLYNSYVYECESTFVFLRNQSSHLRSTSFGALLERIYFYGKIERLVEAFAKDFQVTLWLFKDPVMHYVRYEGKSILASKGTFPWMNKWKFYLVNFWQCHFSMYFNTGRIHINQLSNHSRDFMGYLSSVRLNHSMVRSQMLENSFLINNPIKKFDTLVPIIPLIGSLAKAHFCTGLGHPISKPVWSDLSDSDIIDRFGRICRNLFHYYSGSSKKKTLYRIKYILRLSCARTLARKHKSTVRTFLKRSGSELLEEFLTSEEEVLSLTFPRASSSLWGVYRSRIWYLDIFCINDLANSQ maturase
FILE2 matk matK 2127 3656 R 1 1530 Ok MEEIHRYLQPDSSQQHNFLYPLIFQEYIYALAQDHGLNRNRSILLENSGYNNKFSFLIVKRLITRMDQQNHLIISTNDSNKNPFLGCNKSLYSQMISEGFACIVEIPFSIRLISSLSSFEGKKIFKSHNLRSIHSTFPFLEDNFSHLNYVLDILIPYPVHLEILVQTLRYWVKDASSLHLLRFFLHEYCNLNSLITSKKPGYSFSKKNQRFFFFLYNSYVYECESTFVFLRNQSSHLRSTSFGALLERIYFYGKIERLVEAFAKDFQVTLWLFKDPVMHYVRYEGKSILASKGTFPWMNKWKFYLVNFWQCHFSMYFNTGRIHINQLSNHSRDFMGYLSSVRLNHSMVRSQMLENSFLINNPIKKFDTLVPIIPLIGSLAKAHFCTGLGHPISKPVWSDLSDSDIIDRFGRICRNLFHYYSGSSKKKTLYRIKYILRLSCARTLARKHKSTVRTFLKRSGSELLEEFLTSEEEVLSLTFPRASSSLWGVYRSRIWYLDIFCINDLANSQ maturase_K
MATCH matk ID 100 CORRECT
FILE1 rps16 rps16 4937 6067 R 2 267 Ok MVKLRLKRCGRKQRAVYRIVAIDVRSRREGKDLQKVGFYDPIKNQTYLNVPAILYFLEKGAQPTETVQDILKKAEVFKELRLNQPKFN ribosomal_protein_S16
FILE2 rps16 rps16 4937 6067 R 2 267 Ok MVKLRLKRCGRKQRAVYRIVAIDVRSRREGKDLQKVGFYDPIKNQTYLNVPAILYFLEKGAQPTETVQDILKKAEVFKELRLNQPKFN ribosomal_protein_S16
MATCH rps16 ID 100 CORRECT
FILE1 psbk psbK 7587 7772 D 1 186 Ok MLNTFSLIGICLNSTLYSSSFFFGKLPEAYAFLNPIVDIMPVIPLFFFLLAFVWQAAVSFR photosystem_II_protein_K
FILE2 psbk psbK 7587 7772 D 1 186 Ok MLNTFSLIGICLNSTLYSSSFFFGKLPEAYAFLNPIVDIMPVIPLFFFLLAFVWQAAVSFR photosystem_II_protein_K
MATCH psbk ID 100 CORRECT
FILE1 psbi psbI 8134 8244 D 1 111 Ok MLTLKLFVYTVVIFFVSLFIFGFLSNDPGRNPGREE photosystem_II_protein_I
FILE2 psbi psbI 8083 8244 D 1 162 Ok MIYSLFFFQKNHLGDCVMLTLKLFVYTVVIFFVSLFIFGFLSNDPGRNPGREE photosystem_II_protein_I
MATCH psbi ID 67 WRONG.BAD_START
FILE1 atpa atpA 10224 11747 R 1 1524 Ok MVTIRADEISNIIRERIEQYNREVKIVNTGTVLQVGDGIARIHGLDEVMAGELVEFEEGTIGIALNLESNNVGVVLMGDGLLIQEGSSVKATGRIAQIPVSEAYLGRVVNALAKPIDGRGEISASEFRLIESAAPGIISRRSVYEPLQTGLIAIDSMIPIGRGQRELIIGDRQTGKTAVATDTILNQQGQNVICVYVAIGQKASSVAQVVTTLQERGAMEYTIVVAETADSPATLQYLAPYTGAALAEYFMYRERHTLIIYDDLSKQAQAYRQMSLLLRRPPGREAYPGDVFYLHSRLLERAAKLSSSLGEGSMTALPIVETQSGDVSAYIPTNVISITDGQIFLSADLFNSGIRPAINVGISVSRVGSAAQIKAMKQVAGKLKLELAQFAELEAFAQFASDLDKATQNQLARGQRLRELLKQSQSAPLTVEEQIMTIYTGTNGYLDSLEVGQVRKFLVELRTYLKTTKPQFQEIISSTKTFTEEAEALLKEAIQEQMDRFILQEQA ATP_synthase_CF1_alpha_chain
FILE2 atpa atpA 10224 11747 R 1 1524 Ok MVTIRADEISNIIRERIEQYNREVKIVNTGTVLQVGDGIARIHGLDEVMAGELVEFEEGTIGIALNLESNNVGVVLMGDGLLIQEGSSVKATGRIAQIPVSEAYLGRVVNALAKPIDGRGEISASEFRLIESAAPGIISRRSVYEPLQTGLIAIDSMIPIGRGQRELIIGDRQTGKTAVATDTILNQQGQNVICVYVAIGQKASSVAQVVTTLQERGAMEYTIVVAETADSPATLQYLAPYTGAALAEYFMYRERHTLIIYDDLSKQAQAYRQMSLLLRRPPGREAYPGDVFYLHSRLLERAAKLSSSLGEGSMTALPIVETQSGDVSAYIPTNVISITDGQIFLSADLFNSGIRPAINVGISVSRVGSAAQIKAMKQVAGKLKLELAQFAELEAFAQFASDLDKATQNQLARGQRLRELLKQSQSAPLTVEEQIMTIYTGTNGYLDSLEVGQVRKFLVELRTYLKTTKPQFQEIISSTKTFTEEAEALLKEAIQEQMDRFILQEQA ATP_synthase_CF1_alpha_subunit
MATCH atpa ID 100 CORRECT
FILE1 atpf atpF 11803 13043 R 2 555 Ok MKNVTDSFVSLGHWPSAGSFGFNTDILATNPINLSVVLGVLIFFGKGVLSDLLDNRKQRILNTIRNSEELRGGAIEQLEKARSRLRKVETEAEQFRVNGYSEIEREKLNLINSTYKTLEQLENYKNETIQFEQQRAINQVRQRVFQQALRGALGTLNSCLNNELHLRTISANIGMLGTMKEITD ATP_synthase_CF0_B_chain
FILE2 atpf atpF 11803 13043 R 2 555 Ok MKNVTDSFVSLGHWPSAGSFGFNTDILATNPINLSVVLGVLIFFGKGVCGDLLDNRKQRILNTIRNSEELRGGAIEQLEKARSRLRKVETEAEQFRVNGYSEIEREKLNLINSTYKTLEQLENYKNETIQFEQQRAINQVRQRVFQQALRGALGTLNSCLNNELHLRTISANIGMLGTMKEITD ATP_synthase_CF0_B_subunit
MATCH atpf ID 98 ALMOST_CORRECT.BAD_JUNCTION
FILE1 atph atpH 13442 13687 R 1 246 Ok MNPLISAASVIAAGLAVGLASIGPGVGQGTAAGQAVEGIARQPEAEGKIRGTLLLSLAFMEALTIYGLVVALALLFANPFV ATP_synthase_CF0_C_chain
FILE2 atph atpH 13442 13687 R 1 246 Ok MNPLISAASVIAAGLAVGLASIGPGVGQGTAAGQAVEGIARQPEAEGKIRGTLLLSLAFMEALTIYGLVVALALLFANPFV ATP_synthase_CF0_C_subunit
MATCH atph ID 100 CORRECT
FILE1 atpi atpI 14845 15588 R 1 744 Ok MNVLSCSINTLKGLYDISGVEVGQHFYWQIGGFQVHGQVLITSWVVIAILLGSATIAVRNPQTIPTGGQNFFEYVLEFIRDVSKTQIGEEYGPWVPFIGTMFLFIFVSNWSGALLPWKIIQLPHGELAAPTNDINTTVALALLTSVAYFYAGLTKRGLGYFGKYIQPTPILLPINILEDFTKPLSLSFRLFGNILADELVVVVLVSLVPLVVPIPVMLLGLFTSGIQALIFATLAAAYIGESMEGHH ATP_synthase_CF0_A_chain
FILE2 atpi atpI 14845 15588 R 1 744 Ok MNVLSCSINTLKGLYDISGVEVGQHFYWQIGGFQVHGQVLITSWVVIAILLGSATIAVRNPQTIPTGGQNFFEYVLEFIRDVSKTQIGEEYGPWVPFIGTMFLFIFVSNWSGALLPWKIIQLPHGELAAPTNDINTTVALALLTSVAYFYAGLTKRGLGYFGKYIQPTPILLPINILEDFTKPLSLSFRLFGNILADELVVVVLVSLVPLVVPIPVMLLGLFTSGIQALIFATLAAAYIGESMEGHH ATP_synthase_CF0_A_subunit
MATCH atpi ID 100 CORRECT
FILE1 rps2 rps2 15825 16535 R 1 711 Ok MTRRYWNINLEEMMEAGVHFGHGTRKWNPKMAPYISAKRKGIHITNLTRTARFLSEACDLVFDAASRGKQFLIVGTKNKAADSVEWAAIRARCHYVNKKWLGGMLTNWSTTETRLHKFRDLRMEQKTGRLNRLPKRDAAMLKRQLSRLQTYLGGIKYMTGVPDIVIIVDQHEEYTALRECITLGIPTICLTDTNCDPDLADISIPANDDAISSIRLILNKLVFAICEGRSSYIRNP ribosomal_protein_S2
FILE2 rps2 rps2 15825 16535 R 1 711 Ok MTRRYWNINLEEMMEAGVHFGHGTRKWNPKMAPYISAKRKGIHITNLTRTARFLSEACDLVFDAASRGKQFLIVGTKNKAADSVEWAAIRARCHYVNKKWLGGMLTNWSTTETRLHKFRDLRMEQKTGRLNRLPKRDAAMLKRQLSRLQTYLGGIKYMTGVPDIVIIVDQHEEYTALRECITLGIPTICLTDTNCDPDLADISIPANDDAISSIRLILNKLVFAICEGRSSYIRNP ribosomal_protein_S2
MATCH rps2 ID 100 CORRECT
FILE1 rpoc2 rpoC2 16761 20939 R 1 4179 Ok MEVLMAERANLVFHNKAIDGTAMKRLISRLIEHFGMAYTSHILDQVKTLGFQQATATSISLGIDDLLTIPSKGWLVQDAEQQSLILEKHHQYGNVHAVEKLRQSIEIWYATSEYLRQEMNPNFRMTDPFNPVHIMSFSGARGNASQVHQLVGMRGLMSDPQGQMIDLPIQSNLREGLSLTEYIISCYGARKGVVDTAVRTSDAGYLTRRLVEVVQHIVVRRTDCGTARGISVSPRNGIMPERIFSQTLIGRVLADDIYMGSRCIATRNQAIGIGLVNRFITFRAQPISIRTPFTCRSTSWICRLCYGRSPTHGDLVELGEAVGIIAGQSIGEPGTQLTLRTFHTGGVFTGGTAEHVRAPSNGKIKFNEDLVHPTRTRHGHPAFLCSIDLYVTIESEDILHNVNIPPKSLLLVQNDQYVESEQVIAEIRAGISTLNFKEKVRKHIYSDSDGEMHWSTDVYHAPEFTYGNVHLLPKTSHLWILLGGPCRSSLVYLSIHKDQDQMNAHSLSGKRRYTSNLSVTNDQARQKLFSSDFYGQKEDRIPDYSDLNRIICTGQYNLVYSPILHGNSALLSKRRRNKFIIPLHSIQELENELMPCSGISIEIPVNGIFRRNSILAYFDDPRYRRKSSGIIKYGTIETHSVIKKEDLIEYRGVKEFRPKYQMKVDRFFFIPEEVHILPGSSSLMVRNNSIVGVDTQITLNLRSRVGGLVRVERKKKRIELKIFSGDIHFPGETDKISRHTGVLIPPGTGKRNSKEYKKVQNWIYVQRITPSKKRFFVLVRPVVTYEITDGINLGTLFPPDPLQERDNVQLRIVNYILYGNGKPIRGISDTSIQLVRTCLVLNWNQDKKSSSCEEARASFVEIRTNGLIRHFLKINLVKSPISYIGKRNDPSGSGLLSDNGSDCTNINPFSAIYSYSKAKIQQSLNQPQGTIHTLLNRNKECQSLIILSAANCSRMEPFKDVKYHSVIKESIKKDPLIPIRNSLGPLGTCLPIENFYSSYHLITHNQILVTKYLQLDNLKQTFQVIKLKYYLMDENGKIFNPDPCRNIILNPFNLNWSFLHHYYCAETSKIISLGQFICENVCIAKNGPPLKSGQVILVQVDSIVIRSAKPYLATPGATVHGHYGETLYEGDTLVTFIYEKSRSGDITQGLPKVEQVLEVRSIDSISMNLEKRVEGWNKCIPRILGIPWGFLIGAELTIAQSRISLVNKIQQVYRSQGVQIHNRHIEIIVRQITSKVLISEDGMSNVFSPGELIGLLRAERMGRALEEAICYRVVLLGITRASLNTQSFISEASFQETARVLAKAALRGRIDWLKGLKENVVLGGVIPVGTGFKGLVHPSKQHNNIPLETKKTNLFEGEMRDILFHHRKLFDSCLSKKFHDIPEQSFIGFNDS RNA_polymerase_beta''_chain
FILE2 rpoc2 rpoC2 16761 20939 R 1 4179 Ok MEVLMAERANLVFHNKAIDGTAMKRLISRLIEHFGMAYTSHILDQVKTLGFQQATATSISLGIDDLLTIPSKGWLVQDAEQQSLILEKHHQYGNVHAVEKLRQSIEIWYATSEYLRQEMNPNFRMTDPFNPVHIMSFSGARGNASQVHQLVGMRGLMSDPQGQMIDLPIQSNLREGLSLTEYIISCYGARKGVVDTAVRTSDAGYLTRRLVEVVQHIVVRRTDCGTARGISVSPRNGIMPERIFSQTLIGRVLADDIYMGSRCIATRNQAIGIGLVNRFITFRAQPISIRTPFTCRSTSWICRLCYGRSPTHGDLVELGEAVGIIAGQSIGEPGTQLTLRTFHTGGVFTGGTAEHVRAPSNGKIKFNEDLVHPTRTRHGHPAFLCSIDLYVTIESEDILHNVNIPPKSLLLVQNDQYVESEQVIAEIRAGISTLNFKEKVRKHIYSDSDGEMHWSTDVYHAPEFTYGNVHLLPKTSHLWILLGGPCRSSLVYLSIHKDQDQMNAHSLSGKRRYTSNLSVTNDQARQKLFSSDFYGQKEDRIPDYSDLNRIICTGQYNLVYSPILHGNSALLSKRRRNKFIIPLHSIQELENELMPCSGISIEIPVNGIFRRNSILAYFDDPRYRRKSSGIIKYGTIETHSVIKKEDLIEYRGVKEFRPKYQMKVDRFFFIPEEVHILPGSSSLMVRNNSIVGVDTQITLNLRSRVGGLVRVERKKKRIELKIFSGDIHFPGETDKISRHTGVLIPPGTGKRNSKEYKKVQNWIYVQRITPSKKRFFVLVRPVVTYEITDGINLGTLFPPDPLQERDNVQLRIVNYILYGNGKPIRGISDTSIQLVRTCLVLNWNQDKKSSSCEEARASFVEIRTNGLIRHFLKINLVKSPISYIGKRNDPSGSGLLSDNGSDCTNINPFSAIYSYSKAKIQQSLNQPQGTIHTLLNRNKECQSLIILSAANCSRMEPFKDVKYHSVIKESIKKDPLIPIRNSLGPLGTCLPIENFYSSYHLITHNQILVTKYLQLDNLKQTFQVIKLKYYLMDENGKIFNPDPCRNIILNPFNLNWSFLHHYYCAETSKIISLGQFICENVCIAKNGPPLKSGQVILVQVDSIVIRSAKPYLATPGATVHGHYGETLYEGDTLVTFIYEKSRSGDITQGLPKVEQVLEVRSIDSISMNLEKRVEGWNKCIPRILGIPWGFLIGAELTIAQSRISLVNKIQQVYRSQGVQIHNRHIEIIVRQITSKVLISEDGMSNVFSPGELIGLLRAERMGRALEEAICYRVVLLGITRASLNTQSFISEASFQETARVLAKAALRGRIDWLKGLKENVVLGGVIPVGTGFKGLVHPSKQHNNIPLETKKTNLFEGEMRDILFHHRKLFDSCLSKKFHDIPEQSFIGFNDS RNA_polymerase_beta''_subunit
MATCH rpoc2 ID 100 CORRECT
FILE1 rpoc1 rpoC1 21080 23883 R 2 2067 Ok MNNNFSSMIDRYKHQQLRIGSVSPQQISAWATKILPNGEIVGEVTKPYTFHYKTNKPEKDGLFCERIFGPIKSGICACGNYRVIGDEKEDPKFCEQCGVEFVDSRIRRYQMGYIKLACPVTHVWYLKRLPSYIANLLDKPLKELEGLVYCDFSFARPITKKPTFLRLRGLFEYEIQSWKYSIPLFFTTQGFDTFRNREISTGAGAIREQLADLDLRIIIENSLVEWEELGEEGHTGNEWEDRKVGRRKDFLVRRVELAKHFIRTNIEPEWMVLCLLPVLPPELRPIIQIDGGKLMSSDINELYRRVIYRNNTLTDLLTTSRSTPGELVMCQEKLVQEAVDTLLDNGIRGQPMRDGHNKVYKSFSDVIEGKEGRFRETLLGKRVDYSGRSVIVVGPSLSLHRCGLPREIAIELFQTFVIRGLIRQHLASNIGVAKSKIREKEPIVWEILQEVMQGHPVLLNRAPTLHRLGIQAFQPVLVEGRAICLHPLVCKGFNADFDGDQMAVHVPLSLEAQVEARLLMFSHMNLLSPAIGDPISVPTQDMLIGLYVLTSGNHRGICVNRYNPCNRRNYQNQKRSDNSYYKYTKEPFFSNSYDAIGAYRQKRINLDSPLWLRWRLDQRVIASRETPIEVHYESLGTFYEIYGHYLIVRSLKKKILFIYIRTTVGHIALYREIEEAIQGFSRAYSYAT RNA_polymerase_beta'_chain
FILE2 rpoc1 rpoC1 21080 23883 R 2 2067 Ok MNNNFSSMIDRYKHQQLRIGSVSPQQISAWATKILPNGEIVGEVTKPYTFHYKTNKPEKDGLFCERIFGPIKSGICACGNYRVIGDEKEDPKFCEQCGVEFVDSRIRRYQMGYIKLACPVTHVWYLKRLPSYIANLLDKPLKELEGLVYCDFSFARPITKKPTFLRLRGLFEYEIQSWKYSIPLFFTTQGFDTFRNREISTGAGAIREQLADLDLRIIIENSLVEWEELGEEGHTGNEWEDRKVGRRKDFLVRRVELAKHFIRTNIEPEWMVLCLLPVLPPELRPIIQIDGGKLMSSDINELYRRVIYRNNTLTDLLTTSRSTPGELVMCQEKLVQEAVDTLLDNGIRGQPMRDGHNKVYKSFSDVIEGKEGRFRETLLGKRVDYSGRSVIVVGPSLSLHRCGLPREIAIELFQTFVIRGLIRQHLASNIGVAKSKIREKEPIVWEILQEVMQGHPVLLNRAPTLHRLGIQAFQPVLVEGRAICLHPLVCKGFNADFDGDQMAVHVPLSLEAQVEARLLMFSHMNLLSPAIGDPISVPTQDMLIGLYVLTSGNHRGICVNRYNPCNRRNYQNQKRSDNSYYKYTKEPFFSNSYDAIGAYRQKRINLDSPLWLRWRLDQRVIASRETPIEVHYESLGTFYEIYGHYLIVRSLKKKILFIYIRTTVGHIALYREIEEAIQGFSRAYSYAT RNA_polymerase_beta'_subunit
MATCH rpoc1 ID 100 CORRECT
FILE1 rpob rpoB 23889 27101 R 1 3213 Ok MLGDGNEGISTIPGFNQIQFEGFCRFIDQGLTEELYKFPKIEDTDQEIEFQLFVETYQLVEPLIKERDAVYESLTYSSELYVSAGLIWKNSRDMQEQTIFIGNIPLMNSLGTSIVNGIYRIVINQILQSPGIYYRSELDHNGISVYTGTIISDWGGRSELEIDRKARIWARVSRKQKISILVLSSAMGLNLREILENVCYPEIFLSFLNDKERKKIGSKENSILEFYQQFACVGGDPVFSESLCKELQKKFFQQRCELGRIGRRNMNRKLNLDIPQNNTFLLPRDILAAADHLIGLKFGMGALDDMNHLKNKRIRSVADLLQDQFGLALVRLENVVRGTICGAIRHKLIPTPQNLVTSPPLTTTYESFFGLHPLSQVLDRTNPLTQIVHGRKLSYLGPGGLTGRTASFRIRDIHPSHYGRICPIDTSEGINVGLIGSLSIHARIGHWGSLESPFYEISERSTGVRMLYLSPGSDEYYMVAAGNSLALNRDIQEEQVVPARYRQEFLTIAWEQVHLRSIFPFQYFSIGASLIPFIEHNDANRALMSSNMQRQAVPLSRSEKCIVGTGLERQAALDSGALAIAEREGRIVYTNTHKILLAGNGDILSIPLVIYQRSNKNTCMHQKFRVPRGKCIKKGQILADGAATVGGELALGKNVLVAYMPWEGYNSEDAVLISERLVYEDIYTSFHIRKYEIHTHVTSQGPEKVTNEIPHLEAHLLRNLDKKGIVMLGSWVETGDILVGKLTPQVVKESSYAPEDRLLRAILGIQVSTSKETCLKLPIGGRGRVIDVRWIQKRGGSSYNPETIRVYISQKREIKVGDKVAGRHGNKGIISKILPRQDMPYLQDGRSVDMVFNPLGVPSRMNVGQIFECSLGLAGSLLDRHYRIAPFDERYEQEASRKLVFSELYEASKQTANPWVFEPEYPGKSRIFDGRTGNPFEQPVIIGKPYILKLIHQVDDKIHGRSSGHYALVTQQPLRGRAKQGGQRVGEMEVWALEGFGVAHILQEMLTYKSDHIRARQEVLGTTIIGGTIPNPEDAPESFRLLVRELRSLALELNHFLVSEKNFQINRKEA RNA_polymerase_beta_chain
FILE2 rpob rpoB 23889 27101 R 1 3213 Ok MLGDGNEGISTIPGFNQIQFEGFCRFIDQGLTEELYKFPKIEDTDQEIEFQLFVETYQLVEPLIKERDAVYESLTYSSELYVSAGLIWKNSRDMQEQTIFIGNIPLMNSLGTSIVNGIYRIVINQILQSPGIYYRSELDHNGISVYTGTIISDWGGRSELEIDRKARIWARVSRKQKISILVLSSAMGLNLREILENVCYPEIFLSFLNDKERKKIGSKENSILEFYQQFACVGGDPVFSESLCKELQKKFFQQRCELGRIGRRNMNRKLNLDIPQNNTFLLPRDILAAADHLIGLKFGMGALDDMNHLKNKRIRSVADLLQDQFGLALVRLENVVRGTICGAIRHKLIPTPQNLVTSPPLTTTYESFFGLHPLSQVLDRTNPLTQIVHGRKLSYLGPGGLTGRTASFRIRDIHPSHYGRICPIDTSEGINVGLIGSLSIHARIGHWGSLESPFYEISERSTGVRMLYLSPGSDEYYMVAAGNSLALNRDIQEEQVVPARYRQEFLTIAWEQVHLRSIFPFQYFSIGASLIPFIEHNDANRALMSSNMQRQAVPLSRSEKCIVGTGLERQAALDSGALAIAEREGRIVYTNTHKILLAGNGDILSIPLVIYQRSNKNTCMHQKFRVPRGKCIKKGQILADGAATVGGELALGKNVLVAYMPWEGYNSEDAVLISERLVYEDIYTSFHIRKYEIHTHVTSQGPEKVTNEIPHLEAHLLRNLDKKGIVMLGSWVETGDILVGKLTPQVVKESSYAPEDRLLRAILGIQVSTSKETCLKLPIGGRGRVIDVRWIQKRGGSSYNPETIRVYISQKREIKVGDKVAGRHGNKGIISKILPRQDMPYLQDGRSVDMVFNPLGVPSRMNVGQIFECSLGLAGSLLDRHYRIAPFDERYEQEASRKLVFSELYEASKQTANPWVFEPEYPGKSRIFDGRTGNPFEQPVIIGKPYILKLIHQVDDKIHGRSSGHYALVTQQPLRGRAKQGGQRVGEMEVWALEGFGVAHILQEMLTYKSDHIRARQEVLGTTIIGGTIPNPEDAPESFRLLVRELRSLALELNHFLVSEKNFQINRKEA RNA_polymerase_beta_subunit
MATCH rpob ID 100 CORRECT
FILE1 petn petN 29144 29233 D 1 90 Ok MDIVSLAWAALMVVFTFSLSLVVWGRSGL cytochrome_b6_/f_complex_subunit_VIII
FILE2 petn petN 29135 29233 D 1 99 Ok MIHMDIVSLAWAALMVVFTFSLSLVVWGRSGL cytochrome_b6/f_complex_subunit_VIII
MATCH petn ID 90 ALMOST_CORRECT.BAD_START
FILE1 psbm psbM 30341 30445 R 1 105 Ok MEVNILAFIATALFILVPTAFLLIIYVKTVSQND photosystem_II_protein_M
FILE2 psbm psbM 30341 30451 R 1 111 Ok EIMEVNILAFIATALFILVPTAFLLIIYVKTVSQND photosystem_II_protein_M
MATCH psbm ID 94 ALMOST_CORRECT.BAD_START
FILE1 psbd psbD 33585 34646 D 1 1062 Ok MTIAIGKFTKDENDLFDIMDDWLRRDRFVFVGWSGLLLFPCAYFAVGGWFTGTTFVTSWYTHGLASSYLEGCNFLTAAVSTPANSLAHSLLLLWGPEAQGDFTRWCQLGGLWTFVALHGAFGLIGFMLRQFELARSVQLRPYNAIAFSGPIAVFVSVFLIYPLGQSGWFFAPSFGVAAIFRFILFFQGFHNWTLNPFHMMGVAGVLGAALLCAIHGATVENTLFEDGDGANTFRAFNPTQAEETYSMVTANRFWSQIFGVAFSNKRWLHFFMLFVPVTGLWMSALGVVGLALNLRAYDFVSQEIRAAEDPEFETFYTKNILLNEGIRAWMAAQDQPHENLIFPEEVLPRGNAL photosystem_II_protein_D2
FILE2 psbd psbD 33585 34646 D 1 1062 Ok MTIAIGKFTKDENDLFDIMDDWLRRDRFVFVGWSGLLLFPCAYFAVGGWFTGTTFVTSWYTHGLASSYLEGCNFLTAAVSTPANSLAHSLLLLWGPEAQGDFTRWCQLGGLWTFVALHGAFGLIGFMLRQFELARSVQLRPYNAIAFSGPIAVFVSVFLIYPLGQSGWFFAPSFGVAAIFRFILFFQGFHNWTLNPFHMMGVAGVLGAALLCAIHGATVENTLFEDGDGANTFRAFNPTQAEETYSMVTANRFWSQIFGVAFSNKRWLHFFMLFVPVTGLWMSALGVVGLALNLRAYDFVSQEIRAAEDPEFETFYTKNILLNEGIRAWMAAQDQPHENLIFPEEVLPRGNAL photosystem_II_protein_D2
MATCH psbd ID 100 CORRECT
FILE1 psbc psbC 34630 36015 D 1 1386 Ok METLFNGTLALAGRDQETTGFAWWAGNARLINLSGKLLGAHVAHAGLIVFWAGAMNLFEVAHFVPEKPMYEQGLILLPHLATLGWGVGPGGEVIDTFPYFVSGVLHLISSAVLGFGGIYHALLGPETLEESFPFFGYVWKDRNKMTTILGIHLILLGIGAFLLVFKALYFGGVYDTWAPGGGDVRKITNLTLSPSIIFGYLLKSPFGGEGWIVSVDDLEDIIGGHVWLGSICILGGIWHILTKPFAWARRALVWSGEAYLSYSLGALAVFGFIACCFVWFNNTAYPSEFYGPTGPEASQAQAFTFLVRDQRLGANVGSAQGPTGLGKYLMRSPTGEVIFGGETMRFWDLRAPWLEPLRGPNGLDLSRLKKDIQPWQERRSAEYMTHAPLGSLNSVGGVATEINAVNYVSPRSWLATSHFVLGFFFFVGHLWHAGRARAAAAGFEKGIDRDFEPVLSMTPLN photosystem_II_44_kDa_protein
FILE2 psbc psbC 34552 36015 D 1 1464 Ok MKVFALGWRLKISLMKTLYSLRRFYHVETLFNGTLALAGRDQETTGFAWWAGNARLINLSGKLLGAHVAHAGLIVFWAGAMNLFEVAHFVPEKPMYEQGLILLPHLATLGWGVGPGGEVIDTFPYFVSGVLHLISSAVLGFGGIYHALLGPETLEESFPFFGYVWKDRNKMTTILGIHLILLGIGAFLLVFKALYFGGVYDTWAPGGGDVRKITNLTLSPSIIFGYLLKSPFGGEGWIVSVDDLEDIIGGHVWLGSICILGGIWHILTKPFAWARRALVWSGEAYLSYSLGALAVFGFIACCFVWFNNTAYPSEFYGPTGPEASQAQAFTFLVRDQRLGANVGSAQGPTGLGKYLMRSPTGEVIFGGETMRFWDLRAPWLEPLRGPNGLDLSRLKKDIQPWQERRSAEYMTHAPLGSLNSVGGVATEINAVNYVSPRSWLATSHFVLGFFFFVGHLWHAGRARAAAAGFEKGIDRDFEPVLSMTPLN photosystem_II_44_kDa_protein
MATCH psbc ID 94 ALMOST_CORRECT.BAD_START
FILE1 ycf9 ycf9 36709 36897 D 1 189 Ok MTLAFQLAVFALIATSLILLISVPVVFASPDGWSSNKNVVFSGTSLWIGLVFLVGILNSLIS Ycf9_protein
FILE2 psbz psbZ 36709 36897 D 1 189 Ok MTLAFQLAVFALIATSLILLISVPVVFASPDGWSSNKNVVFSGTSLWIGLVFLVGILNSLIS photosystem_II_protein_Z
MATCH ycf9 ID 100 CORRECT
FILE1 rps14 rps14 37671 37973 R 1 303 Ok MARKSLIQREKKRQKLEQKYHSIRRSSKKEISKVPSLSDKWEIYGKLQSLPRNSAPTRLHRRCFLTGRPRANYRDFGLSGHILREMVHACLLPGATRSSW ribosomal_protein_S14
FILE2 rps14 rps14 37671 37973 R 1 303 Ok MARKSLIQREKKRQKLEQKYHSIRRSSKKEISKVPSLSDKWEIYGKLQSLPRNSAPTRLHRRCFLTGRPRANYRDFGLSGHILREMVHACLLPGATRSSW ribosomal_protein_S14
MATCH rps14 ID 100 CORRECT
FILE1 psab psaB 38092 40296 R 1 2205 Ok MALRFPRFSQGLAQDPTTRRIWFGIATAHDFESHDDITEERLYQNIFASHFGQLAIIFLWTSGNLFHVAWQGNFESWVQDPLHVRPIAHAIWDPHFGQPAVEAFTRGGALGPVNIAYSGVYQWWYTIGLRTNEDLYTGALFLLFLSAISLIAGWLHLQPKWKPSVSWFKNAESRLNHHLSGLFGVSSLAWTGHLVHVAIPASRGEYVRWNNFLDVLPHPQGLGPLFTGQWNLYAQNPDSSSHLFGTAEGAGTAILTLLGGFHPQTQSLWLTDIAHHHLAIAFIFLVAGHMYRTNFGIGHSMKDLLDAHIPPGGRLGRGHKGLYDTINNSLHFQLGLALASLGVITSLVAQHMYSLPAYAFIAQDFTTQAALYTHHQYIAGFIMTGAFAHGAIFFIRDYNPEQNEDNVLARMLDHKEAIISHLSWASLFLGFHTLGLYVHNDVMLAFGTPEKQILIEPIFAQWIQSAHGKTSYGFDVLLSSTTGPAFNAGRSIWLPGWLNAVNENSNSLFLTIGPGDFLVHHAIALGLHTTTLILVKGALDARGSKLMPDKKDFGYSFPCDGPGRGGTCDISAWDAFYLAVFWMLNTIGWVTFYWHWKHITLWQGNVSQFNESSTYLMGWLRDYLWLNSSQLINGYNPFGMNSLSVWAWMFLFGHLVWATGFMFLISWRGYWQELIETLAWAHERTPLANLIRWRDKPVALSIVQARLVGLAHFSVGYIFTYAAFLIASTSGKFG photosystem_I_P700_apoprotein_A2
FILE2 psab psaB_1 38092 40296 R 1 2205 Ok MALRFPRFSQGLAQDPTTRRIWFGIATAHDFESHDDITEERLYQNIFASHFGQLAIIFLWTSGNLFHVAWQGNFESWVQDPLHVRPIAHAIWDPHFGQPAVEAFTRGGALGPVNIAYSGVYQWWYTIGLRTNEDLYTGALFLLFLSAISLIAGWLHLQPKWKPSVSWFKNAESRLNHHLSGLFGVSSLAWTGHLVHVAIPASRGEYVRWNNFLDVLPHPQGLGPLFTGQWNLYAQNPDSSSHLFGTAEGAGTAILTLLGGFHPQTQSLWLTDIAHHHLAIAFIFLVAGHMYRTNFGIGHSMKDLLDAHIPPGGRLGRGHKGLYDTINNSLHFQLGLALASLGVITSLVAQHMYSLPAYAFIAQDFTTQAALYTHHQYIAGFIMTGAFAHGAIFFIRDYNPEQNEDNVLARMLDHKEAIISHLSWASLFLGFHTLGLYVHNDVMLAFGTPEKQILIEPIFAQWIQSAHGKTSYGFDVLLSSTTGPAFNAGRSIWLPGWLNAVNENSNSLFLTIGPGDFLVHHAIALGLHTTTLILVKGALDARGSKLMPDKKDFGYSFPCDGPGRGGTCDISAWDAFYLAVFWMLNTIGWVTFYWHWKHITLWQGNVSQFNESSTYLMGWLRDYLWLNSSQLINGYNPFGMNSLSVWAWMFLFGHLVWATGFMFLISWRGYWQELIETLAWAHERTPLANLIRWRDKPVALSIVQARLVGLAHFSVGYIFTYAAFLIASTSGKFG photosystem_I_P700_chlorophyll_a_apoprotein_A2
MATCH psab ID 100 CORRECT
FILE1 psaa psaA 40322 42574 R 1 2253 Ok MIIRSPEPEVKILVDRDPVKTSFEEWARPGHFSRTIAKGPDTTTWIWNLHADAHDFDSHTSDLEEISRKVFSAHFGQLSIIFLWLSGMYFHGARFSNYEAWLSDPTHIGPSAQVVWPIVGQEILNGDVGGGFRGIQITSGFFQLWRASGITSELQLYCTAIGALVFAALMLFAGWFHYHKAAPKLAWFQDVESMLNHHLAGLLGLGSLSWAGHQVHVSLPINQFLNAGVDPKEIPLPHEFILNRDLLAQLYPSFAEGATPFFTLNWSKYADFLTFRGGLDPVTGGLWLTDIAHHHLAIAILFLIAGHMYRTNWGIGHGLKDILEAHKGPFTGQGHKGLYEILTTSWHAQLSLNLAMLGSLTIVVAHHMYSMPPYPYLATDYGTQLSLFTHHMWIGGFLIVGAAAHAAIFMVRDYDPTTRYNDLLDRVLRHRDAIISHLNWACIFLGFHSFGLYIHNDTMSALGRPQDMFSDTAIQLQPVFAQWIQNTHALAPGATAPGATASTSLTWGGGDLVAVGGKVALLPIPLGTADFLVHHIHAFTIHVTVLILLKGVLFARSSRLIPDKANLGFRFPCDGPGRGGTCQVSAWDHVFLGLFWMYNSISVVIFHFSWKMQSDVWGSVSDQGVVTHITGGNFAQSSITINGWLRDFLWAQASQVIQSYGSSLSAYGLFFLGAHFVWAFSLMFLFSGRGYWQELIESIVWAHNKLKVAPATQPRALSIIQGRAVGVTHYLLGGIATTWAFFLARIIAVG photosystem_I_P700_apoprotein_A1
FILE2 psab psaB_2 40322 42574 R 2 2220 Ok MIIRSPEPEVKILVDRDPVKTSFEEWARPGHFSRTIAKGPDTTTWIWNLHADAHDFDSHTSDLEEISRKVFSAHFGQLSIIFLWLSGMYFHGARFSNYEAWLSDIWNLHADAHDFDSHTSDLEEISRKVFSAHFGQLSIIFLWLSGMYFHGARFSNYEAWLSDPTHIGPSAQVVWPIVGQEILNGDVGGGFRGIQITSGFFQLWRASGITSELQLYCTAIGAPTHIGPSAQVVWPIVGQEILNGDVGGGFRGIQITSGFFQLWRASGITSELQLYCTAIGALVFAALMLFAGWFHYHKAAPKLAWFQDVESMLNHHLAGLLGLGSLSWAGHQVHVSLPINLVFAALMLFAGWFHYHKAAPKLAWFQDVESMLNHHLAGLLGLGSLSWAGHQVHVSLPINQFLNAGVDPKEIPLPHEFILNRDLLAQLYPSFAEGATPFFTLNWSKYADFLTFRGGLDPQFLNAGVDPKEIPLPHEFILNRDLLAQLYPSFAEGATPFFTLNWSKYADFLTFRGGLDPVTGGLWLTDIAHHHLAIAILFLIAGHMYRTNWGIGHGLKDILEAHKGPFTGQGHKGLYEVTGGLWLTDIAHHHLAIAILFLIAGHMYRTNWGIGHGLKDILEAHKGPFTGQGHKGLYEILTTSWHAQLSLNLAMLGSLTIVVAHHMYSMPPYPYLATDYGTQLSLFTHHMWIGGFLIILTTSWHAQLSLNLAMLGSLTIVVAHHMYSMPPYPYLATDYGTQLSLFTHHMWIGGFLIVGAAAHAAIFMVRDYDPTTRYNDLLDRVLRHRDAIISHLNWACIFLGFHSFGLYIHNDTVGAAAHAAIFMVRDYDPTTRYNDLLDRVLRHRDAIISHLNWACIFLGFHSFGLYIHNDTMSALGRPQDMFSDTAIQLQPVFAQWIQNTHALAPGATAPGATASTSLTWGGGDLVAVGGMSALGRPQDMFSDTAIQLQPVFAQWIQNTHALAPGATAPGATASTSLTWGGGDLVAVGGKVALLPIPLGTADFLVHHIHAFTIHVTVLILLKGVLFARSSRLIPDKANLGFRFPCDGPKVALLPIPLGTADFLVHHIHAFTIHVTVLILLKGVLFARSSRLIPDKANLGFRFPCDGPGRGGTCQVSAWDHVFLGLFWMYNSISVVIFHFSWKMQSDVWGSVSDFAQSSITINGWLRGRGGTCQVSAWDHVFLGLFWMYNSISVVIFHFSWKMQSDVWGSVSDQGVVTHITGGNFADFLWAQASQVIQSYGSSLSAYGLFFLGAHFVWAFSLMFLFSGRGYWQELIESIVWAHNKQSSITINGWLRDFLWAQASQVIQSYGSSLSAYGLFFLGAHFVWAFSLMFLFSGRGYWQELIESIVWAHNKLKVAPATQPRALSIIQGRAVGVTHYLLGGIATTWAFFLARIIAVGLKVAPATQPRALSIIQGRAVGVTHYLLGGIATTWAFFLARIIAVG photosystem_I_P700_chlorophyll_a_apoprotein_A2
MATCH psaa ID 51 WRONG.BAD_NBEXON.BAD_JUNCTION
FILE1 ycf3 ycf3 43333 45318 R 3 507 Ok MPRSRINGNFIDKTFSIVADILLRVIPTTSGEKEAFTYYRDGMSAQSEGNYAEALQNYYEAMRLEIDPYDRSYILYNIGLIHTSNGEHTKALEYYFRALERNPFLPQAFNNMAVICHYRGEQAIQQGDSEIAEAWFDQAAEYWKQAIALTPGNYIEARNWLKITRRFE photosystem_I_assembly_protein_Ycf3
FILE2 ycf3 ycf3 43333 45318 R 3 516 Ok MPRSRINGNFIDKTFSIVADILLRVIPTTSGEKEAFTYYRDGAILSAQSEGNYAEALQNYYEAMRLEIDPYDRSYILYNIGLIHTSNGEHTKALEYYFRALERNPFLPQAFNNMAVICHYVRGEQAIQQGDSEIAEAWFDQAAEYWKQAIALTPGNYIEARNWLKITRRFE photosystem_I_assembly_protein_Ycf3
MATCH ycf3 ID 97 ALMOST_CORRECT.BAD_JUNCTION
FILE1 rps4 rps4 46609 47214 R 1 606 Ok MSRYRGPRFKKIRRLGALPGLTNKKPRTGSDLRNQSRSGKKSQYRIRLEEKQKLRFHYGLTERQLLKYVRIARKAKGSTGQVLLQLLEMRLDNILFRLGMASTIPAARQLVNHRHILVNGHIVDIPSYRCKPRDIITAKDEQKSRALIQISLDSSPHEELPNHLTLQPFQYKGLVNQIIDSKWVGLKINELLVVEYYSRQT ribosomal_protein_S4
FILE2 rps4 rps4 46609 47214 R 1 606 Ok MSRYRGPRFKKIRRLGALPGLTNKKPRTGSDLRNQSRSGKKSQYRIRLEEKQKLRFHYGLTERQLLKYVRIARKAKGSTGQVLLQLLEMRLDNILFRLGMASTIPAARQLVNHRHILVNGHIVDIPSYRCKPRDIITAKDEQKSRALIQISLDSSPHEELPNHLTLQPFQYKGLVNQIIDSKWVGLKINELLVVEYYSRQT ribosomal_protein_S4
MATCH rps4 ID 100 CORRECT
FILE1 ndhj ndhJ 50074 50550 R 1 477 Ok MQGRLSAWLVKHGLIHRSLGFDYQGIETLQIKPEDWHSIAVIFYVYGYNYLRSQCAYDVAPGGLLASVYHLTRIEDGVAQPEELCIKVFASRRNPRIPSVFWVWKSVDFQERESYDMLGISYDNHPRLKRILMPESWIGWPLRKDYIAPNFYEIQDAH NADH_dehydrogenase_subunit_J
FILE2 ndhj ndhJ 50074 50550 R 1 477 Ok MQGRLSAWLVKHGLIHRSLGFDYQGIETLQIKPEDWHSIAVIFYVYGYNYLRSQCAYDVAPGGLLASVYHLTRIEDGVAQPEELCIKVFASRRNPRIPSVFWVWKSVDFQERESYDMLGISYDNHPRLKRILMPESWIGWPLRKDYIAPNFYEIQDAH NADH_dehydrogenase_subunit_J
MATCH ndhj ID 100 CORRECT
FILE1 ndhk ndhK 50656 51510 R 1 855 Ok MGNEFRRIGCICIYRSFHFRAYLNYWFSLCMAKGGIGMVLAPEYSDNKKKNGKNKIETVMNSIQFPLLDRTAPNSVISTTLNDLSNWSRLSSLWPLLYGTSCCFIEFASLIGSRFDFDRYGLVPRSSPRQSDLILTAGTVTMKMAPSLVRLYEQMPEPKYVIAMGACTITGGMFSTDSYSTVRGVDKLIPVDVYLPGCPPKPEAVIDAITKLRKKISRELYEDRIRSQRANRCFTTNHKFHVRRSIHTGNYDQRVLYQPPSTSEIPTEIFFKYKNSVSSAELVN NADH_dehydrogenase_subunit_K
FILE2 ndhk ndhK 50656 51510 R 1 855 Ok MGNEFRRIGCICIYRSFHFRAYLNYWFSLCMAKGGIGMVLAPEYSDNKKKNGKNKIETVMNSIQFPLLDRTAPNSVISTTLNDLSNWSRLSSLWPLLYGTSCCFIEFASLIGSRFDFDRYGLVPRSSPRQSDLILTAGTVTMKMAPSLVRLYEQMPEPKYVIAMGACTITGGMFSTDSYSTVRGVDKLIPVDVYLPGCPPKPEAVIDAITKLRKKISRELYEDRIRSQRANRCFTTNHKFHVRRSIHTGNYDQRVLYQPPSTSEIPTEIFFKYKNSVSSAELVN NADH_dehydrogenase_subunit_K
MATCH ndhk ID 100 CORRECT
FILE1 ndhc ndhC 51390 51752 R 1 363 Ok MFLLYEYDFFWAFLIISILVPILAFFISGVLAPISKGPEKLSTYESGIEPMGDAWLQFRIRYYMFALVFVVFDVETVFLYPWAMSFDVLGVSVFIEAFIFVLILIIGLVYAWRKGALEWS NADH_dehydrogenase_subunit_3
FILE2 ndhc ndhC 51390 51752 R 1 363 Ok MFLLYEYDFFWAFLIISILVPILAFFISGVLAPISKGPEKLSTYESGIEPMGDAWLQFRIRYYMFALVFVVFDVETVFLYPWAMSFDVLGVSVFIEAFIFVLILIIGLVYAWRKGALEWS NADH_dehydrogenase_subunit_3
MATCH ndhc ID 100 CORRECT
FILE1 atpe atpE 53977 54378 R 1 402 Ok MTLNLSVLTPNRIVWDSEVEEIVLSTNSGQIGILPNHAPIATAVDIGILRIRLNDQWLTMALMGGFARIGNNEITVLVNDAEKGSDINPQEAQQTLEIAEANVKKAEGRRQKIEANLALRRARTRVEASNPIS ATP_synthase_CF1_epsilon_chain
FILE2 atpe atpE 53977 54378 R 1 402 Ok MTLNLSVLTPNRIVWDSEVEEIVLSTNSGQIGILPNHAPIATAVDIGILRIRLNDQWLTMALMGGFARIGNNEITVLVNDAEKGSDINPQEAQQTLEIAEANVKKAEGRRQKIEANLALRRARTRVEASNPIS ATP_synthase_CF1_epsilon_subunit
MATCH atpe ID 100 CORRECT
FILE1 atpb atpB 54375 55871 R 1 1497 Ok MRINPTTSGSGVSTLEKKNPGRVVQIIGPVLDVAFPPGKMPNIYNALVVQGRDSVGQPINVACEVQQLLGNNRVRAVAMSATEGLTRGMAVIDTGAPISVPVGGATLGRIFNVLGEPVDNLGPVDTSTTSPIHRSAPAFIQLDTKLSIFETGIKVVDLLAPYRRGGKIGLFGGAGVGKTVLIMELINNIAKAHGGVSVFGGVGERTREGNDLYMEMKESGVINKENIAESKVALVYGQMNEPPGARMRVGLTALTMAEYFRDVNEQDVLLFIDNIFRFVQAGSEVSALLGRMPSAVGYQPTLSTEMGSLQERITSTKEGSITSIQAVYVPADDLTDPAPATTFAHLDATTVLSRGLAAKGIYPAVDPLDSTSTMLQPRIVGEEHYETAQRVKQTLQRYKELQDIIAILGLDELSEEDRLLVARARKIERFLSQPFFVAEVFTGSPGKYVGLAETIRGFQLILSGELDGLPEQAFYLVGTIDEATAKAMNLEMESNLKK ATP_synthase_CF1_beta_chain
FILE2 atpb atpB 54375 55871 R 1 1497 Ok MRINPTTSGSGVSTLEKKNPGRVVQIIGPVLDVAFPPGKMPNIYNALVVQGRDSVGQPINVACEVQQLLGNNRVRAVAMSATEGLTRGMAVIDTGAPISVPVGGATLGRIFNVLGEPVDNLGPVDTSTTSPIHRSAPAFIQLDTKLSIFETGIKVVDLLAPYRRGGKIGLFGGAGVGKTVLIMELINNIAKAHGGVSVFGGVGERTREGNDLYMEMKESGVINKENIAESKVALVYGQMNEPPGARMRVGLTALTMAEYFRDVNEQDVLLFIDNIFRFVQAGSEVSALLGRMPSAVGYQPTLSTEMGSLQERITSTKEGSITSIQAVYVPADDLTDPAPATTFAHLDATTVLSRGLAAKGIYPAVDPLDSTSTMLQPRIVGEEHYETAQRVKQTLQRYKELQDIIAILGLDELSEEDRLLVARARKIERFLSQPFFVAEVFTGSPGKYVGLAETIRGFQLILSGELDGLPEQAFYLVGTIDEATAKAMNLEMESNLKK ATP_synthase_CF1_beta_subunit
MATCH atpb ID 100 CORRECT
FILE1 rbcl rbcL 56686 58119 D 1 1434 Ok MSPQTETKASVGFKAGVKEYKLTYYTPEYQTKDTDILAAFRVTPQPGVPPEEAGAAVAAESSTGTWTTVWTDGLTSLDRYKGRCYRIERVVGEKDQYIAYVAYPLDLFEEGSVTNMFTSIVGNVFGFKALRALRLEDLRIPPAYVKTFQGPPHGIQVERDKLNKYGRPLLGCTIKPKLGLSAKNYGRAVYECLRGGLDFTKDDENVNSQPFMRWRDRFLFCAEALFKAQTETGEIKGHYLNATAGTCEEMIKRAVFARELGVPIVMHDYLTGGFTANTTLAHYCRDNGLLLHIHRAMHAVIDRQKNHGIHFRVLAKALRMSGGDHIHSGTVVGKLEGERDITLGFVDLLRDDFVEQDRSRGIYFTQDWVSLPGVLPVASGGIHVWHMPALTEIFGDDSVLQFGGGTLGHPWGNAPGAVANRVALEACVKARNEGRDLAREGNEIIREACKWSPELAAACEVWKEIVFNFAAVDVLDK ribulose_1,5-bisphosphate_carboxylase_/oxygenase_large_chain
FILE2 rbcl rbcL 56686 58119 D 1 1434 Ok MSPQTETKASVGFKAGVKEYKLTYYTPEYQTKDTDILAAFRVTPQPGVPPEEAGAAVAAESSTGTWTTVWTDGLTSLDRYKGRCYRIERVVGEKDQYIAYVAYPLDLFEEGSVTNMFTSIVGNVFGFKALRALRLEDLRIPPAYVKTFQGPPHGIQVERDKLNKYGRPLLGCTIKPKLGLSAKNYGRAVYECLRGGLDFTKDDENVNSQPFMRWRDRFLFCAEALFKAQTETGEIKGHYLNATAGTCEEMIKRAVFARELGVPIVMHDYLTGGFTANTTLAHYCRDNGLLLHIHRAMHAVIDRQKNHGIHFRVLAKALRMSGGDHIHSGTVVGKLEGERDITLGFVDLLRDDFVEQDRSRGIYFTQDWVSLPGVLPVASGGIHVWHMPALTEIFGDDSVLQFGGGTLGHPWGNAPGAVANRVALEACVKARNEGRDLAREGNEIIREACKWSPELAAACEVWKEIVFNFAAVDVLDK ribulose-1,5-bisphosphate_carboxylase/oxygenase_large_subunit
MATCH rbcl ID 100 CORRECT
FILE1 accd accD 58879 60402 D 1 1524 Ok MTIHLLYFHANRGQENSMERWWFNSMLFKKEFERRCGLNKSMGSLGPIENTSEDPNLKVKNIHSCSNVDYLFGVKDIWNFISNDTFLVSDRNGDSYSIYFDIENHIFEVDNDHSFLSELESSFYSYRNSSYLNNGFRGEDPYYNSYMSYMYDTQYSWNNHINSCIDNYLQSQICIDTSIISGSESNGDSYIYRAICSGQSLNSSENEGSSRRTRTKDSDLTIRESSNDLEVTQKYKHLWVQCENCYGLNYKKFLKSKMNICEQCGYHLKMSSSDRIELLIDPGTWDPMDEDMVSLDPIEFHSEEEPYKDRIDSYQRKTGLTEAVQTGIGQLNGIPVAIGVMDFQFMGGSMGSVVGEKITRLIEHAANQNLPLMIVCASGGARMQEGSLSLMQMAKISSALYDYQLNKKLFYVSILTSPTTGGVTASFGMLGDIIIAEPNAYIAFAGKRVIEQTLNKTVPEGSQAAEYLFQKGLFDLIVPRNLLKSVLSELFKLHAFFPLNQKSSKIK acetyl-CoA_carboxylase_beta_subunit
FILE2 accd accD 58879 60402 D 1 1524 Ok MTIHLLYFHANRGQENSMERWWFNSMLFKKEFERRCGLNKSMGSLGPIENTSEDPNLKVKNIHSCSNVDYLFGVKDIWNFISNDTFLVSDRNGDSYSIYFDIENHIFEVDNDHSFLSELESSFYSYRNSSYLNNGFRGEDPYYNSYMSYMYDTQYSWNNHINSCIDNYLQSQICIDTSIISGSESNGDSYIYRAICSGQSLNSSENEGSSRRTRTKDSDLTIRESSNDLEVTQKYKHLWVQCENCYGLNYKKFLKSKMNICEQCGYHLKMSSSDRIELLIDPGTWDPMDEDMVSLDPIEFHSEEEPYKDRIDSYQRKTGLTEAVQTGIGQLNGIPVAIGVMDFQFMGGSMGSVVGEKITRLIEHAANQNLPLMIVCASGGARMQEGSLSLMQMAKISSALYDYQLNKKLFYVSILTSPTTGGVTASFGMLGDIIIAEPNAYIAFAGKRVIEQTLNKTVPEGSQAAEYLFQKGLFDLIVPRNLLKSVLSELFKLHAFFPLNQKSSKIK acetyl-CoA_carboxylase_beta_subunit
MATCH accd ID 100 CORRECT
FILE1 psai psaI 61149 61259 D 1 111 Ok MTNLNLPSIFVPLVGLVFPAIAMASLFLHVQKNKIV photosystem_I_subunit_VIII
FILE2 psai psaI 61149 61259 D 1 111 Ok MTNLNLPSIFVPLVGLVFPAIAMASLFLHVQKNKIV photosystem_I_subunit_VIII
MATCH psai ID 100 CORRECT
FILE1 ycf4 ycf4 61704 62258 D 1 555 Ok MTWRSDDIWIELITGSRKISNFCWALILFLGSLGFLLVGTSSYLGRNLLSFFPPQQIIFFPQGIVMSFYGIAGLFISSYLWCTISWNVGSGYDRFDRKEGIVCIFRWGFPGKNRRIFLRFLIKDIQSVRIEVKEGIYARRVLYMDIRGQGSIPLTRTDENLTPREIEQKAAELAYFLRVPIEVF photosystem_I_assembly_protein_Ycf4
FILE2 ycf4 ycf4 61704 62258 D 1 555 Ok MTWRSDDIWIELITGSRKISNFCWALILFLGSLGFLLVGTSSYLGRNLLSFFPPQQIIFFPQGIVMSFYGIAGLFISSYLWCTISWNVGSGYDRFDRKEGIVCIFRWGFPGKNRRIFLRFLIKDIQSVRIEVKEGIYARRVLYMDIRGQGSIPLTRTDENLTPREIEQKAAELAYFLRVPIEVF photosystem_I_assembly_protein_Ycf4
MATCH ycf4 ID 100 CORRECT
FILE1 ycf10 ycf10 62988 63677 D 1 690 Ok MAKKKAFTPLFYLASIVFLPWWISFSVNKWLESWVTNWWNTGQSQIVLNNIQEKSLLEKFRELEELLFLDEMIKEYSETHLEEFGIGIHKETIQLITIQNENRMDTILHFSTNIIWFGILSGYSILGKEKLVILNSWAQEFLYNLSDTAKALCILLVSEFFLGYHSPPGWEFVIRSIYNEVGVVANEQTITILVCILPVIFDTCFKYWLFRYLTSLSPSILLLYDSITE potential_heme-binding_protein
FILE2 cema cemA 62988 63677 D 1 690 Ok MAKKKAFTPLFYLASIVFLPWWISFSVNKWLESWVTNWWNTGQSQIVLNNIQEKSLLEKFRELEELLFLDEMIKEYSETHLEEFGIGIHKETIQLITIQNENRMDTILHFSTNIIWFGILSGYSILGKEKLVILNSWAQEFLYNLSDTAKALCILLVSEFFLGYHSPPGWEFVIRSIYNEVGVVANEQTITILVCILPVIFDTCFKYWLFRYLTSLSPSILLLYDSITE envelope_membrane_protein
MATCH ycf10 ID 100 CORRECT
FILE1 peta petA 63897 64859 D 1 963 Ok MQTRNAFSWLKKQITRSISVSLMIYILTRTSISSAYPIFAQQGYENPREATGRIVCANCHLANKPVEIEVPQAVLPDTVFEAVVRIPYDMQLKQVLANGKKGGLNVGAVLILPEGFELAPPDRISPEMKEKIGNLSFQSYRPNKTNILVVGPVPGKKYSEITFPILSPDPATKKDVHFLKYPIYVGGNRGRGQIYPDGNKSNNTVYNATAAGIVSKIIRKEKGGYEITITDASEGRQVVDIIPPGPELLVSEGESIKFDQPLTSNPNVGGFGQGDAEIVLQDPLRVQGLLFFLASVILAQIFLVLKKKQFEKVQLAEMNF cytochrome_f
FILE2 peta petA 63897 64859 D 1 963 Ok MQTRNAFSWLKKQITRSISVSLMIYILTRTSISSAYPIFAQQGYENPREATGRIVCANCHLANKPVEIEVPQAVLPDTVFEAVVRIPYDMQLKQVLANGKKGGLNVGAVLILPEGFELAPPDRISPEMKEKIGNLSFQSYRPNKTNILVVGPVPGKKYSEITFPILSPDPATKKDVHFLKYPIYVGGNRGRGQIYPDGNKSNNTVYNATAAGIVSKIIRKEKGGYEITITDASEGRQVVDIIPPGPELLVSEGESIKFDQPLTSNPNVGGFGQGDAEIVLQDPLRVQGLLFFLASVILAQIFLVLKKKQFEKVQLAEMNF cytochrome_f
MATCH peta ID 100 CORRECT
FILE1 psbj psbJ 65928 66050 R 1 123 Ok MADTTGRIPLWIIGTVAGILVIGLIGIFFYGSYSGLGSSL PSII_reaction_center_subunit_X
FILE2 psbj psbJ 65928 66050 R 1 123 Ok MADTTGRIPLWIIGTVAGILVIGLIGIFFYGSYSGLGSSL photosystem_II_protein_J
MATCH psbj ID 100 CORRECT
FILE1 psbl psbL 66175 66291 R 1 117 Ok MTQSNPNEQNVELNRTSLYWGLLLIFVLAVLFSNYFFN photosystem_II_protein_L
FILE2 psbl psbL 66175 66291 R 1 117 Ok MTQSNPNEQNVELNRTSLYWGLLLIFVLAVLFSNYFFN photosystem_II_protein_L
MATCH psbl ID 100 CORRECT
FILE1 psbf psbF 66314 66433 R 1 120 Ok MTIDRTYPIFTVRWLAVHGLAVPTVFFLGSISAMQFIQR cytochrome_b559_beta_chain
FILE2 psbf psbF 66314 66433 R 1 120 Ok MTIDRTYPIFTVRWLAVHGLAVPTVFFLGSISAMQFIQR photosystem_II_protein_VI
MATCH psbf ID 100 CORRECT
FILE1 psbe psbE 66443 66694 R 1 252 Ok MSGSTGERSFADIITSIRYWVIHSITIPSLFIAGWLFVSTGLAYDVFGSPRPNEYFTESRQGIPLITGRFDPLEQLDEFSRSF cytochrome_b559_alpha_chain
FILE2 psbe psbE 66443 66724 R 1 282 Ok MTVQEYVELSMSGSTGERSFADIITSIRYWVIHSITIPSLFIAGWLFVSTGLAYDVFGSPRPNEYFTESRQGIPLITGRFDPLEQLDEFSRSF photosystem_II_protein_V
MATCH psbe ID 89 ACCEPTABLE.BAD_START
FILE1 petl petL 67692 67787 D 1 96 Ok MLTITSYFGFLLAALTITSALFIGLSKIRLI cytochrome_b6_/f_complex_subunit_VI
FILE2 petl petL 67692 67787 D 1 96 Ok MLTITSYFGFLLAALTITSALFIGLSKIRLI cytochrome_b6/f_complex_subunit_VI
MATCH petl ID 100 CORRECT
FILE1 petg petG 67971 68084 D 1 114 Ok MIEVFLFGIVLGLIPITLAGLFVTAYLQYRRGDQLDL cytochrome_b6_/f_complex_subunit_V
FILE2 petg petG 67971 68084 D 1 114 Ok MIEVFLFGIVLGLIPITLAGLFVTAYLQYRRGDQLDL cytochrome_b6/f_complex_subunit_V
MATCH petg ID 100 CORRECT
FILE1 psaj psaJ 68960 69094 D 1 135 Ok MRDLKTYLSVAPVLSTLWFGALAGLLIEINRFFPDALTFPFFSF photosystem_I_subunit_IX
FILE2 psaj psaJ 68960 69094 D 1 135 Ok MRDLKTYLSVAPVLSTLWFGALAGLLIEINRFFPDALTFPFFSF photosystem_I_subunit_IX
MATCH psaj ID 100 CORRECT
FILE1 rpl33 rpl33 69532 69732 D 1 201 Ok MAKGKDVRVTVILECTSCVRNSVDKVSRGISRYITQKNRHNTPNRFELKKFCPYCYKHTIHGEIKK ribosomal_protein_L33
FILE2 rpl33 rpl33 69532 69732 D 1 201 Ok MAKGKDVRVTVILECTSCVRNSVDKVSRGISRYITQKNRHNTPNRFELKKFCPYCYKHTIHGEIKK ribosomal_protein_L33
MATCH rpl33 ID 100 CORRECT
FILE1 rps18 rps18 69923 70228 D 1 306 Ok MDKSKRPFLKFKRSFRRRLPPIQSGDRIDYRNMSLISRFISEQGKILSRRVNRLTLKQQRLITLAIKQARILSLLPFLNNEKQFERTESTARTTGFKARNK ribosomal_protein_S18
FILE2 rps18 rps18 69923 70228 D 1 306 Ok MDKSKRPFLKFKRSFRRRLPPIQSGDRIDYRNMSLISRFISEQGKILSRRVNRLTLKQQRLITLAIKQARILSLLPFLNNEKQFERTESTARTTGFKARNK ribosomal_protein_S18
MATCH rps18 ID 100 CORRECT
FILE1 rpl20 rpl20 70443 70829 R 1 387 Ok MTRIKRGYIARRRRTKIRLFASSFRGAHSRLTRTITQQKIRALVSAHRDRDRKKRDFRRLWITRINAVIRERGVSYSYSRLIHDLYKRQLLLNRKILAQIAISNRNCLYMISNEIIKEVDWKESTRII ribosomal_protein_L20
FILE2 rpl20 rpl20 70443 70829 R 1 387 Ok MTRIKRGYIARRRRTKIRLFASSFRGAHSRLTRTITQQKIRALVSAHRDRDRKKRDFRRLWITRINAVIRERGVSYSYSRLIHDLYKRQLLLNRKILAQIAISNRNCLYMISNEIIKEVDWKESTRII ribosomal_protein_L20
MATCH rpl20 ID 100 CORRECT
FILE1 rps12 rps12 71639 100035 R 3 372 Ok MPTIKQLIRNTRQPIRNVTKSPALRGCPQRRGTCTRVYTITPKKPNSALRKVARVRLTSGFEITAYIPGIGHNSQEHSVVLVRGGRVKDLPGVRYHIVRGTLDAVGVKDRQQGRSKYGVKKPK ribosomal_protein_S12
FILE2 NONE
MATCH rps12 ID 0 MISSED.WRONG_STOP
FILE1 clpp clpP 71861 73896 R 3 621 Ok MPIGVPRVVFRNPGDPISSWVDIYNRLYRERLLFLGQGIGTELSNQLIGLMLYLSMEDENKDLYLFVNSPGGWVIPGIAIYDTMQFVRPDIHTICLGLAASMGSFILAGGQLTKRIAFPHARVMIHEPYSGFYMAQVGEFVLEAIEMAKLRETLTRVYAEKTGQPVWVIHEDMERDIFMSATEAQAYGIVDFVAVQGKEHGFHADL ATP-dependent_Clp_protease_proteolytic_subunit
FILE2 clpp clpP 71861 73896 R 3 621 Ok MPIGVPRVVFRNPGDPISSWVDIYNRLYRERLLFLGQGIGTELSNQLIGLMLYLSMEDENKDLYLFVNSPGGWVIPGIAIYDTMQFVRPDIHTICLGLAASMGSFILAGGQLTKRIAFPHARVMIHEPYSGFYMAQVGEFVLEAIEMAKLRETLTRVYAEKTGQPVWVIHEDMERDIFMSATEAQAYGIVDFVAVQGKEHGFHADL ATP-dependent_Clp_protease_proteolytic_subunit
MATCH clpp ID 100 CORRECT
FILE1 psbb psbB 74341 75867 D 1 1527 Ok MGLPWYRVHTVVLNDPGRLLSVHIMHTALVAGWAGSMALYELAVFDPSDPVLDPMWRQGMFVIPFMTRLGITNSWGGWSITGGTVTNPGIWSYEGVAGAHIVFSGLCFLAAIWHWVYWDLEIFCDERTGKPSLDLPKIFGIHLFLSGVACFGFGAFHVTGLYGPGIWVSDPYGLTGKVQPVNPAWGVEGFDPFVPGGIASHHIAAGTLGILAGLFHLSVRPPQRLYKGLRMGNIETVLSSSIAAVFFAAFVVAGTMWYGSATTPIELFGPTRYQWDQGYFQQEIYRRVSAGLAENQSLSEAWSKIPEKLAFYDYIGNNPAKGGLFRAGSMDNGDGIAVGWLGHPIFRDKEGRELFVRRMPTFFETFPVVLVDGDGIVRADVPFRRAESKYSVEQVGVTVEFYGGELNGVSYSDPATVKKYARRAQLGEIFELDRATLKSDGVFRSSPRGWFTFGHASFALLFFFGHIWHGARTLFRDVFAGIDPDLDAQVEFGAFQKLGDPTTKRQAA photosystem_II_47_kDa_protein
FILE2 psbb psbB 74341 75867 D 1 1527 Ok MGLPWYRVHTVVLNDPGRLLSVHIMHTALVAGWAGSMALYELAVFDPSDPVLDPMWRQGMFVIPFMTRLGITNSWGGWSITGGTVTNPGIWSYEGVAGAHIVFSGLCFLAAIWHWVYWDLEIFCDERTGKPSLDLPKIFGIHLFLSGVACFGFGAFHVTGLYGPGIWVSDPYGLTGKVQPVNPAWGVEGFDPFVPGGIASHHIAAGTLGILAGLFHLSVRPPQRLYKGLRMGNIETVLSSSIAAVFFAAFVVAGTMWYGSATTPIELFGPTRYQWDQGYFQQEIYRRVSAGLAENQSLSEAWSKIPEKLAFYDYIGNNPAKGGLFRAGSMDNGDGIAVGWLGHPIFRDKEGRELFVRRMPTFFETFPVVLVDGDGIVRADVPFRRAESKYSVEQVGVTVEFYGGELNGVSYSDPATVKKYARRAQLGEIFELDRATLKSDGVFRSSPRGWFTFGHASFALLFFFGHIWHGARTLFRDVFAGIDPDLDAQVEFGAFQKLGDPTTKRQAA photosystem_II_47_kDa_protein
MATCH psbb ID 100 CORRECT
FILE1 psbt psbT 76069 76173 D 1 105 Ok MEALVYTFLLVSTLGIIFFAIFFREPPTIRTKKN photosystem_II_protein_T
FILE2 psbt psbT 76069 76173 D 1 105 Ok MEALVYTFLLVSTLGIIFFAIFFREPPTIRTKKN photosystem_II_protein_T
MATCH psbt ID 100 CORRECT
FILE1 psbn psbN 76251 76382 R 1 132 Ok METATLVAIFISGLLVSFTGYALYTAFGQPSQQLRDPFEEHGD photosystem_II_protein_N
FILE2 psbn psbN 76251 76382 R 1 132 Ok METATLVAIFISGLLVSFTGYALYTAFGQPSQQLRDPFEEHGD photosystem_II_protein_N
MATCH psbn ID 100 CORRECT
FILE1 psbh psbH 76494 76715 D 1 222 Ok MATQTVENSSRSGPRRTAVGDLLKPLNSEYGKVAPGWGTTPLMGVAMALFAVFLSIILEIYNSSVLLDGISMN photosystem_II_phosphoprotein
FILE2 psbh psbH 76476 76715 D 1 240 Ok MNTIGFMATQTVENSSRSGPRRTAVGDLLKPLNSEYGKVAPGWGTTPLMGVAMALFAVFLSIILEIYNSSVLLDGISMN photosystem_II_protein_H
MATCH psbh ID 92 ALMOST_CORRECT.BAD_START
FILE1 petb petB 76845 78239 D 2 648 Ok MSKVYDWFEERLEIQAIADDITSKYVPPHVNIFYCLGGITLTCFLVQVATGFAMTFYYRPTVTEAFASVQYIMTEANFGWLIRSVHRWSASMMVLMMILHVFRVYLTGGFKKPRELTWVTGVVLAVLTASFGVTGYSLPWDQIGYWAVKIVTGVPDAIPVIGSPLVELLRGSASVGQSTLTRFYSLHTFVLPLLTAVFMLMHFPMIRKQGISGPL cytochrome_b6
FILE2 petb petB 77559 78239 D 1 681 Ok MYGSQRGSSAYLNKVYDWFEERLEIQAIADDITSKYVPPHVNIFYCLGGITLTCFLVQVATGFAMTFYYRPTVTEAFASVQYIMTEANFGWLIRSVHRWSASMMVLMMILHVFRVYLTGGFKKPRELTWVTGVVLAVLTASFGVTGYSLPWDQIGYWAVKIVTGVPDAIPVIGSPLVELLRGSASVGQSTLTRFYSLHTFVLPLLTAVFMLMHFPMIRKQGISGPL cytochrome_b6
MATCH petb ID 95 ALMOST_CORRECT.BAD_NBEXON.BAD_START
FILE1 petd petD 78434 79654 D 2 483 Ok MGVTKKPDLNDPVLRAKLAKGMGHNYYGEPAWPNDLLYIFPVVILGTIACNVGLAVLEPSMIGEPADPFATPLEILPEWYFFPVFQILRTVPNKLLGVLLMVSVPAGLLTVPFLENVNKFQNPFRRPVATTVFLIGTAVALWLGIGATLPIDKSLTLGLF cytochrome_b6_/f_complex_subunit_IV
FILE2 petd petD 79127 79654 D 1 528 Ok MMSSSLGGWIYKNSPIPITKKPDLNDPVLRAKLAKGMGHNYYGEPAWPNDLLYIFPVVILGTIACNVGLAVLEPSMIGEPADPFATPLEILPEWYFFPVFQILRTVPNKLLGVLLMVSVPAGLLTVPFLENVNKFQNPFRRPVATTVFLIGTAVALWLGIGATLPIDKSLTLGLF cytochrome_b6/f_complex_subunit_IV
MATCH petd ID 90 ALMOST_CORRECT.BAD_NBEXON.BAD_START
FILE1 rpoa rpoA 79846 80859 R 1 1014 Ok MVREKVTVSTRTLQWKCVESRTDSKRLYYGRFILSPLMKGQADTIGIAMRRALLGEIEGTCITRVKSEKVPHEYSTITGIQESVHEILMNLKEIVLRSNLYGTSEASICVKGPGYVTAQDIILPPYVEIVDNTQHIASLTEPIDFCIGLQIERNRGYLIKTPHNFQDGSYPIDAVFMPVRNANHSIHSYGNGNEKQEILFIEIWTNGSLTPKEALHDASRNLIDLFIPFLHMEEDNLYLQDNQHTVPLSPFTFHDKLAKLIKNKKKIALKSIFIDQSELSSRIYNCLKMSNIYTLLDLLNNSQEDLMKIEHFRSEDIKQILDILEKYFVIDLAKNKF RNA_polymerase_alpha_chain
FILE2 rpoa rpoA 79846 80859 R 1 1014 Ok MVREKVTVSTRTLQWKCVESRTDSKRLYYGRFILSPLMKGQADTIGIAMRRALLGEIEGTCITRVKSEKVPHEYSTITGIQESVHEILMNLKEIVLRSNLYGTSEASICVKGPGYVTAQDIILPPYVEIVDNTQHIASLTEPIDFCIGLQIERNRGYLIKTPHNFQDGSYPIDAVFMPVRNANHSIHSYGNGNEKQEILFIEIWTNGSLTPKEALHDASRNLIDLFIPFLHMEEDNLYLQDNQHTVPLSPFTFHDKLAKLIKNKKKIALKSIFIDQSELSSRIYNCLKMSNIYTLLDLLNNSQEDLMKIEHFRSEDIKQILDILEKYFVIDLAKNKF RNA_polymerase_alpha_subunit
MATCH rpoa ID 100 CORRECT
FILE1 rps11 rps11 80925 81341 R 1 417 Ok MAKAIPKISSRRNGRISSRKGARRIPKGVIHVQASFNNTIVTVTDVRGRVVSWSSAGTSGFKGTRRGTPFAAQTAAANAIRTVVDQGMQRAEVMIKGPGLGRDAALRAIRRSGILLTFVRDVTPMPHNGCRPPKKRRV ribosomal_protein_S11
FILE2 rps11 rps11 80925 81341 R 1 417 Ok MAKAIPKISSRRNGRISSRKGARRIPKGVIHVQASFNNTIVTVTDVRGRVVSWSSAGTSGFKGTRRGTPFAAQTAAANAIRTVVDQGMQRAEVMIKGPGLGRDAALRAIRRSGILLTFVRDVTPMPHNGCRPPKKRRV ribosomal_protein_S11
MATCH rps11 ID 100 CORRECT
FILE1 rpl36 rpl36 81443 81556 R 1 114 Ok MKIRASVRKICEKCRLIRRRGRIIVICSNPRHKQRQG ribosomal_protein_L36
FILE2 rpl36 rpl36 81443 81556 R 1 114 Ok MKIRASVRKICEKCRLIRRRGRIIVICSNPRHKQRQG ribosomal_protein_L36
MATCH rpl36 ID 100 CORRECT
FILE1 NONE
FILE2 infa infA 81668 81772 R 1 105 Ok MQILPGDRVKIEVSPYDSTKGHIIYRLHNKDLKD translation_initiation_factor_1
MATCH infa ID 0 OVERPRED.WRONG_STOP
FILE1 rps8 rps8 81881 82285 R 1 405 Ok MGRDTIAEIITSIRNADMDRKRVVRIASTNITENIVQILLREGFIENVRKHRENNKYFLVLTLRHRRNRKRPYRNILNLKRISRPGLRIYSNYQRIPRILGGMGIVILSTSRGIMTDREARLEGIGGEILCYIW ribosomal_protein_S8
FILE2 rps8 rps8 81881 82285 R 1 405 Ok MGRDTIAEIITSIRNADMDRKRVVRIASTNITENIVQILLREGFIENVRKHRENNKYFLVLTLRHRRNRKRPYRNILNLKRISRPGLRIYSNYQRIPRILGGMGIVILSTSRGIMTDREARLEGIGGEILCYIW ribosomal_protein_S8
MATCH rps8 ID 100 CORRECT
FILE1 rpl14 rpl14 82453 82821 R 1 369 Ok MIQPQTHLNVADNSGARELMCIRIIGASNRRYAHIGDVIVAVIKEAVPNMPLERSEVVRAVIVRTCKELKRDNGMIIRYDDNAAVVIDQEGNPKGTRIFGAIARELRELNFTKIVSLAPEVL ribosomal_protein_L14
FILE2 rpl14 rpl14 82453 82821 R 1 369 Ok MIQPQTHLNVADNSGARELMCIRIIGASNRRYAHIGDVIVAVIKEAVPNMPLERSEVVRAVIVRTCKELKRDNGMIIRYDDNAAVVIDQEGNPKGTRIFGAIARELRELNFTKIVSLAPEVL ribosomal_protein_L14
MATCH rpl14 ID 100 CORRECT
FILE1 rpl16 rpl16 82947 84369 R 2 405 Ok MLSPKRTRFRKQHRGRMKGISYRGNRISFGKYALQALEPAWITSRQIEAGRRAMTRNARRGGKIWVRIFPDKPVTLRPAETRMGSGKGSPEYWVAVVKPGRILYEMGGVTENIARRAISLAASKMPIRTQFIIS ribosomal_protein_L16
FILE2 rpl16 rpl16 82947 83354 R 1 408 Ok MNYNPKRTRFRKQHRGRMKGISYRGNRISFGKYALQALEPAWITSRQIEAGRRAMTRNARRGGKIWVRIFPDKPVTLRPAETRMGSGKGSPEYWVAVVKPGRILYEMGGVTENIARRAISLAASKMPIRTQFIIS ribosomal_protein_L16
MATCH rpl16 ID 97 ALMOST_CORRECT.BAD_NBEXON.BAD_START
FILE1 rps3 rps3 84531 85187 R 1 657 Ok MGQKINPLGFRLGTTQSHHSLWFSQPKNYSEGLQEDKKIRDCIKNYVQKNMRTSSGIEGIARIEIQKRIDLIQVIIFMGFPKLLIESRPRGIEELQMTLQKEFNCVNRKLNIAVTRIAKPYGNPNILAEFIAGQLKNRVSFRKAMKKAIELTEQADTKGIQIQIAGRIDGKEIARVEWIREGRVPLQTIRAKIDYCSYTVRTIYGILGIKIWIFLDEE ribosomal_protein_S3
FILE2 rps3 rps3 84531 85187 R 1 657 Ok MGQKINPLGFRLGTTQSHHSLWFSQPKNYSEGLQEDKKIRDCIKNYVQKNMRTSSGIEGIARIEIQKRIDLIQVIIFMGFPKLLIESRPRGIEELQMTLQKEFNCVNRKLNIAVTRIAKPYGNPNILAEFIAGQLKNRVSFRKAMKKAIELTEQADTKGIQIQIAGRIDGKEIARVEWIREGRVPLQTIRAKIDYCSYTVRTIYGILGIKIWIFLDEE ribosomal_protein_S3
MATCH rps3 ID 100 CORRECT
FILE1 rpl22 rpl22 85172 85639 R 1 468 Ok MLKKKKTEVYALGEHISMSADKARRVIDQIRGRSYEETLMILELMPYRACYPILKLVYSAAANASYNMGSSETNLVISKAEVNEGTTVKKLKPRARGRSFPIKRSTCHITIVMKDISLDDEYGEMSSLKKTRWKKKSTAMTYRDMYNSGGLWDKK ribosomal_protein_L22
FILE2 rpl22 rpl22 85172 85639 R 1 468 Ok MLKKKKTEVYALGEHISMSADKARRVIDQIRGRSYEETLMILELMPYRACYPILKLVYSAAANASYNMGSSETNLVISKAEVNEGTTVKKLKPRARGRSFPIKRSTCHITIVMKDISLDDEYGEMSSLKKTRWKKKSTAMTYRDMYNSGGLWDKK ribosomal_protein_L22
MATCH rpl22 ID 100 CORRECT
FILE1 rps19 rps19 85692 85970 R 1 279 Ok MTRSLKKNPFVANHLLKKIDKLNTKAEKEIIVTWSRASTIIPTMIGHTIAIHNGKEHLPIYITDSMVGHKLGEFAPTLNFRGHAKSDNRSRR ribosomal_protein_S19
FILE2 rps19 rps19 85692 85970 R 1 279 Ok MTRSLKKNPFVANHLLKKIDKLNTKAEKEIIVTWSRASTIIPTMIGHTIAIHNGKEHLPIYITDSMVGHKLGEFAPTLNFRGHAKSDNRSRR ribosomal_protein_S19
MATCH rps19 ID 100 CORRECT
FILE1 rpl2 rpl2 86038 87528 R 2 825 Ok MAIHLYKTSTPSTRNGTVDSQVKSNPRNNLIYGQRRCGKGRNARGIITARHRGGGHKRLYRKIDFRRNEKDIYGRIVTIEYDPNRNAYICLIHYGDGEKRYILHPRGAIIGDTIVSGTEVPIKMGNALPLTDMPLGTAIHNIEITLGKGGQLARAAGAVAKLIAKEGKSATLKLPSGEVRLISKNCSATVGQVGNVGVNQKSLGRAGSKRWLGKRPVVRGVVMNPVDHPHGGGEGRAPIGRKKPTTPWGYPALGRRSRKRNKYSDNLILRRRSK ribosomal_protein_L2
FILE2 rpl2 rpl2_1 86038 87528 R 2 825 Ok MAIHLYKTSTPSTRNGTVDSQVKSNPRNNLIYGQRRCGKGRNARGIITARHRGGGHKRLYRKIDFRRNEKDIYGRIVTIEYDPNRNAYICLIHYGDGEKRYILHPRGAIIGDTIVSGTEVPIKMGNALPSTDMPLGTAIHNIEITLGKGGQLARAAGAVAKLIAKEGKSATLKLPSGEVRLISKNCSATVGQVGNVGVNQKSLGRAGSKRWLGKRPVVRGVVMNPVDHPHGGGEGRAPIGRKKPTTPWGYPALGRRSRKRNKYSDNLILRRRSK ribosomal_protein_L2
MATCH rpl2 ID 99 ALMOST_CORRECT.BAD_JUNCTION
FILE1 rpl23 rpl23 87547 87828 R 1 282 Ok MDGIKYAVFTDKSIRLLGKNQYTSNVESGSTRTEIKHWVELFFGVKVIAMNSHRLPGKSRRMGPIMGHTMHYRRMIITLQPGYSIPPLRKKRT ribosomal_protein_L23
FILE2 rpl23 rpl23_1 87547 87828 R 1 282 Ok MDGIKYAVFTDKSIRLLGKNQYTSNVESGSTRTEIKHWVELFFGVKVIAMNSHRLPGKSRRMGPIMGHTMHYRRMIITLQPGYSIPPLRKKRT ribosomal_protein_L23
MATCH rpl23 ID 100 CORRECT
FILE1 ycf2 ycf2 88196 95032 D 1 6837 Ok MRGHQFKSWIFELREILREIKNSHHFLDSWTQFNSVGSFIHIFFHQERFLKLFDPRIWSILLSRNSQGSPSNRYFTIKGVILFVVAVLIYRINNRNMVERKNLYLIGLLPIPMNSIGPRNDTLEESVGSSNINRLIVSLLYLPKGKKISESCFLNPKESTWVLPITKKCSMPESNWGSRWWRNWIGKKRDSSCKISNETVAGIEILFKEKDLKYLEFLFVYYMDDPIRKDHDWELFDRLSLRKSRNRINLNSGPLFEILVKHWISYLMSAFREKIPIEVEGFFKQQGAGSTIQSNDIEHVSHLFSRNKWAISLQNCAQFHMWQFRQDLFVSWGKNPPESDFLRNVSRENWIWLDNVWLVNKDRFFSKVQNVSSNIQYDSTRSSFVQVTDSSQLKGSSDQSRDHLDSISNEDSEYHTLINQREIQQRKERSILWDPSFLQTERKEIESGRFPKCLSGYSSMSRLFTEREKQMINHLFPEEIEEFLGNPTRSVRSFFSDRWSELHLGSNPTERSTRDQKLLKKQQDLSFVPSRRSEKKEMVNIFKIITYLQNTVSIHPISSDPGCDMVPKDEPDMDSSNKISFLNKNPFFDLFHLFHDRNRGGYTLHYDFASEERFQEMADLFTLSITEPDLVYHKGFAFSIDSCGLDQKQFLNEARDESKKKSLLVLPPIFYEENESFSRRIRKKWVRISCGNDLEDPKPKIVVFASNNIMEAVTQYRLIRNLIQIQYSTYGYIRNVLNRFFLMNRSDRNFEYGIQRDQIGKDTLNHRTIMKYTINQYLSNLKKSQKKWFEPLILISRTERSMNRDPDAYRYKWSNGSKSFQEHLEQSVSKQKSRFQVVFDRLRINQYSIDWSEVIDKKDLSKSLRFFLSKSLLFLSKLLLFLSNSLPFFCVSFGNIPIHRSEIYIYEELKGPNDQLCNQLLESIGLQIVHLKKLKPFLLDDHDTSQKSKFLINGGTISPFLFNKIPKWMIDSFHTRNNRRKSFDNPDSYFSMIFHDQDNWLNPVKPFHRSSLISSFYKANRLRFLNNPHHFCFYWNTRFPFSVEKARINNSDFTYGQFLNILFIRNKIFSLCVGKKKHAFWGRDTISPIESQVSNIFIPNDFPQSGDETYNLYKSFHFPSRSDPFVRRAIYSIADISGTPLTEGQIVNFERTYCQPLSDMNLSDSEGKNLHQYLNFNSNMGLIHTPCSEKDLSSEKRKKWSLCLKKCVEKGQTYRTFQRDSAFSTLSKWNLFQTYMPWFLTSTGYKYLNLIFLDTFSDLLPILSSSQKFVSIFPDIMHGSGISWRILQKKLCLPQWNLISEISSKCLHNLLLSEEMIHRNNESPLISTHLRSPNAREFLYSILFLLLVAGYLVRTHLLFVSRASSELQTEFERVKSLMTPSSMIELRKLLDRYPTSEPNSFWLKNLFLVALEQLGDSLEEIRGSASGGNMLGPAYGVKSIRSKKKDWNINLIEIIDLIPNPINRITFSRNTRHLSHTSKEIYSLIRKRKNVNGDWIDEKIESWVANSDSIDDEEREFLVQFSTLTTENRIDQILLSLTHSDHLSKNDSGYQMIEQPGAIYLRYLVDIHKKHLMNYEFNPSCLAERRIFLAHYQTITYSQTSCGENSFHFPSHGKPFSLRLALSPSRGILVIGSIGTGRSYLVKYLATNSYVPFITVFLNKFLDNKSKGFLLDEIDIDDSDDIDDSDNLDASDDIDRDLDTELELLTRMNGLTVDMMPEIDRFYITLQFELAKAMSPCIIWIPNIHDLDVNESNDLSLGLLVNHLSRDCERCSTRNILVIASTHIPQKVDPALIAPNKLNTCIKIRRLLIPQQRKHFFTLSYTRGFHLEKKMFHTNGFGSITMGSNARDLVALTNEVLSISITQKKSIIDTNTIRSALHRQTWDLRSQVRSVQDHGILFYQIGRAVAQNVLLSNCPIDPISIYMKKKSCNEGDSYLYKWYFELGTSMKRLTILLYLLSCSAGSVAQDLWSLSVPDEKNGITSYGLVENDSDLVHGLLEVEGALVGSSRTEKDCSQFDNDRVTLLLRPEPRNPLDMMQKGSWSILDQRFLYEKYESEFEEGEGEGALDPQEDLFNHIVWAPRIWRPWGFLFDCIERPNELGFPYWSRSFRGKRIIYDEEDELQENDSGFLQSGTMQYQTRDRSQGLFRISQFIWDPADPLFFLFKDQPPGSVFSHRELFADEEMSKGLLTSQTDPPTSLYKRWFIKNTQEKHFELLINRQRWLRTNSSLSNGSFRSNTLSESYQYLSNLFLSNGTLLDQMPKTLLRKRWLFPDEMKIGFM Ycf2_protein
FILE2 ycf2 ycf2_1 88196 95032 D 1 6837 Ok MRGHQFKSWIFELREILREIKNSHHFLDSWTQFNSVGSFIHIFFHQERFLKLFDPRIWSILLSRNSQGSPSNRYFTIKGVILFVVAVLIYRINNRNMVERKNLYLIGLLPIPMNSIGPRNDTLEESVGSSNINRLIVSLLYLPKGKKISESCFLNPKESTWVLPITKKCSMPESNWGSRWWRNWIGKKRDSSCKISNETVAGIEILFKEKDLKYLEFLFVYYMDDPIRKDHDWELFDRLSLRKSRNRINLNSGPLFEILVKHWISYLMSAFREKIPIEVEGFFKQQGAGSTIQSNDIEHVSHLFSRNKWAISLQNCAQFHMWQFRQDLFVSWGKNPPESDFLRNVSRENWIWLDNVWLVNKDRFFSKVQNVSSNIQYDSTRSSFVQVTDSSQLKGSSDQSRDHLDSISNEDSEYHTLINQREIQQRKERSILWDPSFLQTERKEIESGRFPKCLSGYSSMSRLFTEREKQMINHLFPEEIEEFLGNPTRSVRSFFSDRWSELHLGSNPTERSTRDQKLLKKQQDLSFVPSRRSEKKEMVNIFKIITYLQNTVSIHPISSDPGCDMVPKDEPDMDSSNKISFLNKNPFFDLFHLFHDRNRGGYTLHYDFASEERFQEMADLFTLSITEPDLVYHKGFAFSIDSCGLDQKQFLNEARDESKKKSLLVLPPIFYEENESFSRRIRKKWVRISCGNDLEDPKPKIVVFASNNIMEAVTQYRLIRNLIQIQYSTYGYIRNVLNRFFLMNRSDRNFEYGIQRDQIGKDTLNHRTIMKYTINQYLSNLKKSQKKWFEPLILISRTERSMNRDPDAYRYKWSNGSKSFQEHLEQSVSKQKSRFQVVFDRLRINQYSIDWSEVIDKKDLSKSLRFFLSKSLLFLSKLLLFLSNSLPFFCVSFGNIPIHRSEIYIYEELKGPNDQLCNQLLESIGLQIVHLKKLKPFLLDDHDTSQKSKFLINGGTISPFLFNKIPKWMIDSFHTRNNRRKSFDNPDSYFSMIFHDQDNWLNPVKPFHRSSLISSFYKANRLRFLNNPHHFCFYWNTRFPFSVEKARINNSDFTYGQFLNILFIRNKIFSLCVGKKKHAFWGRDTISPIESQVSNIFIPNDFPQSGDETYNLYKSFHFPSRSDPFVRRAIYSIADISGTPLTEGQIVNFERTYCQPLSDMNLSDSEGKNLHQYLNFNSNMGLIHTPCSEKDLSSEKRKKWSLCLKKCVEKGQTYRTFQRDSAFSTLSKWNLFQTYMPWFLTSTGYKYLNLIFLDTFSDLLPILSSSQKFVSIFPDIMHGSGISWRILQKKLCLPQWNLISEISSKCLHNLLLSEEMIHRNNESPLISTHLRSPNAREFLYSILFLLLVAGYLVRTHLLFVSRASSELQTEFERVKSLMTPSSMIELRKLLDRYPTSEPNSFWLKNLFLVALEQLGDSLEEIRGSASGGNMLGPAYGVKSIRSKKKDWNINLIEIIDLIPNPINRITFSRNTRHLSHTSKEIYSLIRKRKNVNGDWIDEKIESWVANSDSIDDEEREFLVQFSTLTTENRIDQILLSLTHSDHLSKNDSGYQMIEQPGAIYLRYLVDIHKKHLMNYEFNPSCLAERRIFLAHYQTITYSQTSCGENSFHFPSHGKPFSLRLALSPSRGILVIGSIGTGRSYLVKYLATNSYVPFITVFLNKFLDNKSKGFLLDEIDIDDSDDIDDSDNLDASDDIDRDLDTELELLTRMNGLTVDMMPEIDRFYITLQFELAKAMSPCIIWIPNIHDLDVNESNDLSLGLLVNHLSRDCERCSTRNILVIASTHIPQKVDPALIAPNKLNTCIKIRRLLIPQQRKHFFTLSYTRGFHLEKKMFHTNGFGSITMGSNARDLVALTNEVLSISITQKKSIIDTNTIRSALHRQTWDLRSQVRSVQDHGILFYQIGRAVAQNVLLSNCPIDPISIYMKKKSCNEGDSYLYKWYFELGTSMKRLTILLYLLSCSAGSVAQDLWSLSVPDEKNGITSYGLVENDSDLVHGLLEVEGALVGSSRTEKDCSQFDNDRVTLLLRPEPRNPLDMMQKGSWSILDQRFLYEKYESEFEEGEGEGALDPQEDLFNHIVWAPRIWRPWGFLFDCIERPNELGFPYWSRSFRGKRIIYDEEDELQENDSGFLQSGTMQYQTRDRSQGLFRISQFIWDPADPLFFLFKDQPPGSVFSHRELFADEEMSKGLLTSQTDPPTSLYKRWFIKNTQEKHFELLINRQRWLRTNSSLSNGSFRSNTLSESYQYLSNLFLSNGTLLDQMPKTLLRKRWLFPDEMKIGFM Ycf2
MATCH ycf2 ID 100 CORRECT
FILE1 ycf15 ycf15 95123 95386 D 1 264 Ok METLVSSIFWTLAPWKNMLLLKHGRIEILDQNTMYGWYELPKQEFLNSKQPVQIFTTKKYWILFRIGPERRRKAGMPIGVYYIEFTR Ycf15_protein
FILE2 NONE
MATCH ycf15 ID 0 MISSED.WRONG_STOP
FILE1 ndhb ndhB 96224 98435 R 2 1533 Ok MIWHVQNENFILDSTRIFMKAFHLLLFDGSLIFPECILIFGLILLLMIDSTSDQKDIPWLYFISSTSLVMSITALLFRWREEPMISFSGNFQTNNFNEIFQFLILLCSTLCIPLSVEYIECTEMAITEFLLFVLTATLGGMFLCGANDLITIFVAPECFSLCSYLLSGYTKKDVRSNEATMKYLLMGGASSSILVHGFSWLYGSSGGEIELQEIVNGLINTQMYNSPGISIALIFITVGIGFKLSPAPSHQWTPDVYEGSPTPVVAFLSVTSKVAASASATRIFNIPFYFSSNEWHLLLEILAILSMILGNLIAITQTSMKRMLAYSSIGQIGYVIIGIIVGDSNDGYASMITYMLFYISMNLGTFACIVLFGLRTGTDNIRDYAGLYTKDPFLALSLALCLLSLGGLPPLAGFFGKLYLFWCGWQAGLYFLVLIGLLTSVVSIYYYLKIIKLLMTGRNQEITPHVRNYRRSPLRSNNSIELSMIVCVIASTIPGISMNPIIAIAQDSLF NADH_dehydrogenase_subunit_2
FILE2 ndhb ndhB_1 96224 98435 R 2 1626 Ok MIWHVQNENFILDSTRIFMKAFHLLLFDGSLIFPECILIFGLILLLMIDSTSDQKDIPWLYFISSTSLVMSITALLFRWREEPMISFSGNFQTNNFNEIFQFLILLCSTLCIPLSVEYIECTEMAITEFLLFVLTATLGGMFLCGANDLITIFVAPECFSLCSYLLSGYTKKDVRSNEATMKYLLMGGASSSILVHGFSWLYGSSGGEIELQEIVNGLINTQMYNSPGISIALIFITVGIGFKLSPAPSHQWTPDVYEGVRFVREIPTSLSISEMFGFFKTPWTCRREMLSPTPVVAFLSVTSKVAASASATRIFNIPFYFSSNEWHLLLEILAILSMILGNLIAITQTSMKRMLAYSSIGQIGYVIIGIIVGDSNDGYASMITYMLFYISMNLGTFACIVLFGLRTGTDNIRDYAGLYTKDPFLALSLALCLLSLGGLPPLAGFFGKLYLFWCGWQAGLYFLVLIGLLTSVVSIYYYLKIIKLLMTGRNQEITPHVRNYRRSPLRSNNSIELSMIVCVIASTIPGISMNPIIAIAQDSLF NADH_dehydrogenase_subunit_2
MATCH ndhb ID 94 ALMOST_CORRECT.BAD_JUNCTION
FILE1 rps7 rps7 98721 99188 R 1 468 Ok MSRRGTAEKKTAKSDPIYRNRLVNMLVNRILKHGKKSLAYQIIYRAVKKIQQKTETNPLSVLRQAIRGVTPDITVKARRVGGSTHQVPIEIGSTQGKALAIRWLLAASRKRPGRNMAFKLSSELVDAAKGSGDAIRKKEETHRMAEANRAFAHFR ribosomal_protein_S7
FILE2 rps7 rps7_1 98721 99188 R 1 468 Ok MSRRGTAEKKTAKSDPIYRNRLVNMLVNRILKHGKKSLAYQIIYRAVKKIQQKTETNPLSVLRQAIRGVTPDITVKARRVGGSTHQVPIEIGSTQGKALAIRWLLAASRKRPGRNMAFKLSSELVDAAKGSGDAIRKKEETHRMAEANRAFAHFR ribosomal_protein_S7
MATCH rps7 ID 100 CORRECT
FILE1 ndhf ndhF 111508 113721 R 1 2214 Ok MEQTYEYAWIIPFIPLPVPMLIGAGLILFPTATKRFRRMWAFQSVLLLSIVMIFSIYLSIQQINSSSVYQYVWSWIINNDFSLDFGYLIDPLTSIMSILITTVGIMVLIYSDNYMAHDQGYLRFFAYMSFFSTSMLGLVTSSNLIQIYIFWELVGLCSYLLIGFWFTRPVAANACQKAFVTNRVGDFGLLLGILGFYWITGSFEFRDLFEIFNNLIYNNELNFLFVTLCAVLLFAGAVAKSAQFPLHVWLPDAMEGPTPISALIHAATMVAAGIFLVARLLPLFRVIPYIMYLISVIGIITVLLGATLALAQKDIKRGLAYSTMSQLGYMMLALGMGSYRSALFHLITHAYSKALLFLGSGSIIHSMETIVGYSPAKSQNMGLMGGLRKHVPITKITFLLGTLSLCGIPPLACFWSKDEILNDSWLYSPIFAIIAWATAGLTAFYMFRIYLLTFEGHLNAHFQNYGGKQKIPFYSISLWGKNGVKKNSCLLTMNNNESTYFLSKTKYPIAKNGRKMTRPFMTIAHFKHKAVSSYPYESDNTMLFPIFVLGLFTLFVGAIGIPFNQEGVNLDILSKWLAPSINLLHPKSNNSLDWNEFLKDAVVSVSIAYFGIFIASFLYKPIYSSLKNLEFINSFVKKGPKRILWDKILNGIYDWSYNRAYIDAFYTRFFVGGIRGLAEFTHFVDRRVIDGMTNGVGVISFIVGEGIKYIGGGRISSYLFLYLAYVSVFLLVYYLLF NADH_dehydrogenase_subunit_5
FILE2 ndhf ndhF 111508 113721 R 1 2214 Ok MEQTYEYAWIIPFIPLPVPMLIGAGLILFPTATKRFRRMWAFQSVLLLSIVMIFSIYLSIQQINSSSVYQYVWSWIINNDFSLDFGYLIDPLTSIMSILITTVGIMVLIYSDNYMAHDQGYLRFFAYMSFFSTSMLGLVTSSNLIQIYIFWELVGLCSYLLIGFWFTRPVAANACQKAFVTNRVGDFGLLLGILGFYWITGSFEFRDLFEIFNNLIYNNELNFLFVTLCAVLLFAGAVAKSAQFPLHVWLPDAMEGPTPISALIHAATMVAAGIFLVARLLPLFRVIPYIMYLISVIGIITVLLGATLALAQKDIKRGLAYSTMSQLGYMMLALGMGSYRSALFHLITHAYSKALLFLGSGSIIHSMETIVGYSPAKSQNMGLMGGLRKHVPITKITFLLGTLSLCGIPPLACFWSKDEILNDSWLYSPIFAIIAWATAGLTAFYMFRIYLLTFEGHLNAHFQNYGGKQKIPFYSISLWGKNGVKKNSCLLTMNNNESTYFLSKTKYPIAKNGRKMTRPFMTIAHFKHKAVSSYPYESDNTMLFPIFVLGLFTLFVGAIGIPFNQEGVNLDILSKWLAPSINLLHPKSNNSLDWNEFLKDAVVSVSIAYFGIFIASFLYKPIYSSLKNLEFINSFVKKGPKRILWDKILNGIYDWSYNRAYIDAFYTRFFVGGIRGLAEFTHFVDRRVIDGMTNGVGVISFIVGEGIKYIGGGRISSYLFLYLAYVSVFLLVYYLLF NADH_dehydrogenase_subunit_5
MATCH ndhf ID 100 CORRECT
FILE1 none none 110372 111511 D 1 1140 Ok MIFQSFLLGNLVSLCMKIINSVVVVGLYYGFLTTFSIGPSYLFLLRALVMEEGTEKKVSATTGFITGQLMMFISIYYAPLHLALGRPHTITVLALPYLLFHFFWNNHKHFFDYGSTTRNSMRNLSIQCVFLNNLIFQLFNHFILPSSMLARLVNIYLFRCNNKILFVTSGFVGWLIGHILFMKWLGLVLVWIRQNHSIRSNKYIRSNKYLVLELRNSMARIFSILLFITCVYYLGRIPSPILTKKLKEASKTEERVESEEERDVEIETASEMKGTKQEQEGSTEEDPYPSPSLFSEEGWDPDKIDETEEIRVNGKDKIKDKFHSHLTETGYNNINTSNSPIYDYQDSYLNNNNTGNLENCKLQLLDKKNENQEFLIQKV hypothetical_protein
FILE2 NONE
MATCH none ID 0 MISSED.WRONG_STOP
FILE1 rpl32 rpl32 114504 114671 D 1 168 Ok MAVPKKRTSTSKKRIRKNIWKRKGYWVALKAFSLAKSLSTGNSKSFFVRQTKINK ribosomal_protein_L32
FILE2 rpl32 rpl32 114504 114671 D 1 168 Ok MAVPKKRTSTSKKRIRKNIWKRKGYWVALKAFSLAKSLSTGNSKSFFVRQTKINK ribosomal_protein_L32
MATCH rpl32 ID 100 CORRECT
FILE1 ccsa ccsA 115765 116706 D 1 942 Ok MIFSTLEHILTHISFSIVSIVITIHLITFLVDEIVKLYDSSEKGIIVTFFCITGLLVTRWVSSGHFPLSDLYESLIFLSWSFSLIHIIPYFKKNVLILSKITGPSAILTQGFATSGILTEIHQSGILVPALQSEWLIMHVSMMILGYAALLCGSLLSVALLVITFRKNRKLFSKSNVFLNESFFLGENVVENTSFFCTKNYYRSQLIQQLDYWSYRVISLGFTFLTIGILSGAVWANEAWGSYWNWDPKETWAFITWIVFAIYLHTRTNRNLRGPNSAIVASIGFLIIWICYFGVNLLGIGLHSYGSFPSTFN cytochrome_c_biogenesis_protein
FILE2 ccsa ccsA 115765 116706 D 1 942 Ok MIFSTLEHILTHISFSIVSIVITIHLITFLVDEIVKLYDSSEKGIIVTFFCITGLLVTRWVSSGHFPLSDLYESLIFLSWSFSLIHIIPYFKKNVLILSKITGPSAILTQGFATSGILTEIHQSGILVPALQSEWLIMHVSMMILGYAALLCGSLLSVALLVITFRKNRKLFSKSNVFLNESFFLGENVVENTSFFCTKNYYRSQLIQQLDYWSYRVISLGFTFLTIGILSGAVWANEAWGSYWNWDPKETWAFITWIVFAIYLHTRTNRNLRGPNSAIVASIGFLIIWICYFGVNLLGIGLHSYGSFPSTFN cytochrome_c_biogenesis_protein
MATCH ccsa ID 100 CORRECT
FILE1 ndhd ndhD 116944 118446 R 1 1503 Ok MNYFPWLTIIVVFPIFAGSLIFFLPHKGNRVIRWYTICICILELLLTTYAFCYHFQSDDPLIQLVEDYKWIDFFDFHWRLGIDGLSIGPILLTGFITTLATLAAWPVTRDSRLFHFLMLAMYSGQIGLFSSRDLLLFFIMWELELIPVYLLLAMWGGKKRLYSATKFILYTAGGSVFLLMGVLGVALYGSNEPTLNFETSVNQSYPVVLEIIFYIGFFIAFAVKSPIIPLHTWLPDTHGEAHYSTCMLLAGILLKMGAYGLIRINMELLPHAHSIFSPWLMIIGTIQIIYAASTSLGQRNLKKRIAYSSVSHMGFIIIGISSLTDTGLNGALLQIISHGFIGAALFFLAGTTYDRIRLVYLDEMGGIAIPMPKMFTMFSSFSMASLALPGMSGFVAELIVFFGIITGQKYLLMPKLLITFVMAIGIILTPIYSLSMPRQMFYGYKLFNAPKDSFFDSGPRELFLSISIFLPVIGIGIYPDFVLSLAVDKVEVILSNFFYR NADH_dehydrogenase_subunit_4
FILE2 ndhd ndhD 116944 118446 R 1 1503 Ok MNYFPWLTIIVVFPIFAGSLIFFLPHKGNRVIRWYTICICILELLLTTYAFCYHFQSDDPLIQLVEDYKWIDFFDFHWRLGIDGLSIGPILLTGFITTLATLAAWPVTRDSRLFHFLMLAMYSGQIGLFSSRDLLLFFIMWELELIPVYLLLAMWGGKKRLYSATKFILYTAGGSVFLLMGVLGVALYGSNEPTLNFETSVNQSYPVVLEIIFYIGFFIAFAVKSPIIPLHTWLPDTHGEAHYSTCMLLAGILLKMGAYGLIRINMELLPHAHSIFSPWLMIIGTIQIIYAASTSLGQRNLKKRIAYSSVSHMGFIIIGISSLTDTGLNGALLQIISHGFIGAALFFLAGTTYDRIRLVYLDEMGGIAIPMPKMFTMFSSFSMASLALPGMSGFVAELIVFFGIITGQKYLLMPKLLITFVMAIGIILTPIYSLSMPRQMFYGYKLFNAPKDSFFDSGPRELFLSISIFLPVIGIGIYPDFVLSLAVDKVEVILSNFFYR NADH_dehydrogenase_subunit_4
MATCH ndhd ID 100 CORRECT
FILE1 psac psaC 118564 118809 R 1 246 Ok MSHSVKIYDTCIGCTQCVRACPTDVLEMIPWDGCKAKQIASAPRTEDCVGCKRCESACPTDFLSVRVYLWHETTRSMGLAY photosystem_I_subunit_VII
FILE2 psac psaC 118564 118809 R 1 246 Ok MSHSVKIYDTCIGCTQCVRACPTDVLEMIPWDGCKAKQIASAPRTEDCVGCKRCESACPTDFLSVRVYLWHETTRSMGLAY photosystem_I_subunit_VII
MATCH psac ID 100 CORRECT
FILE1 ndhe ndhE 119061 119366 R 1 306 Ok MILEHVLVLSAYLFSIGIYGLITSRNMVRALMCLELILNAVNINFVTFSDFFDNRQLKGDIFSIFVIAIAAAEAAIGLAIVSSIYRNRKSTRINQSNLLNN NADH_dehydrogenase_subunit_4
FILE2 ndhe ndhE 119061 119366 R 1 306 Ok MILEHVLVLSAYLFSIGIYGLITSRNMVRALMCLELILNAVNINFVTFSDFFDNRQLKGDIFSIFVIAIAAAEAAIGLAIVSSIYRNRKSTRINQSNLLNN NADH_dehydrogenase_subunit_4L
MATCH ndhe ID 100 CORRECT
FILE1 ndhg ndhG 119590 120120 R 1 531 Ok MDLSEPIHDFLLVFLGSGLILGGLGVVLLPNPIYSAFSLGLVLVCTSLFYILSNAYFVAAAQLLIYVGAINVLIIFAVMFMNGSEYYKDFHLWTVGDGITSMVCISLFISLITTISDTSWYGIIWTTRSNQIIEQDFLSNSQQIGIHLSTDFFLPFELISIILLVALIGAIAVARQ NADH_degydrogenase_subunit_6
FILE2 ndhg ndhG 119590 120120 R 1 531 Ok MDLSEPIHDFLLVFLGSGLILGGLGVVLLPNPIYSAFSLGLVLVCTSLFYILSNAYFVAAAQLLIYVGAINVLIIFAVMFMNGSEYYKDFHLWTVGDGITSMVCISLFISLITTISDTSWYGIIWTTRSNQIIEQDFLSNSQQIGIHLSTDFFLPFELISIILLVALIGAIAVARQ NADH_dehydrogenase_subunit_6
MATCH ndhg ID 100 CORRECT
FILE1 ndhi ndhI 120525 121028 R 1 504 Ok MLPMITEFINYGQQTIRAARYIGQGFMITLSHANRLPVTIQYPYEKLITSERFRGRIHFEFDKCIACEVCVRVCPIDLPVVDWKLETDIRKKRLLNYSIDFGICIFCGNCVEYCPTNCLSMTEEYELSTYDRHELNYNQIALGRLPMSVIDDYTIRTISNLPQINNE NADH_dehydrogenase_subunit_I
FILE2 ndhi ndhI 120525 121028 R 1 504 Ok MLPMITEFINYGQQTIRAARYIGQGFMITLSHANRLPVTIQYPYEKLITSERFRGRIHFEFDKCIACEVCVRVCPIDLPVVDWKLETDIRKKRLLNYSIDFGICIFCGNCVEYCPTNCLSMTEEYELSTYDRHELNYNQIALGRLPMSVIDDYTIRTISNLPQINNE NADH_dehydrogenase_subunit_I
MATCH ndhi ID 100 CORRECT
FILE1 ndha ndhA 121113 123337 R 2 1092 Ok MIIDTTEIETINSFSKLESLKEVYGIIWMLVPIVTLVLGITIGVLVIVWLEREISAGIQQRIGPEYAGPLGILQALADGTKLLLKENLIPSTGDTRLFSIGPSIAVISIFLSYSVIPFGDHLVLADLSIGVFFWIAISSIAPVGLLMSGYGSNNKYSFLGGLRAAAQSISYEIPLALCVLSISLLSNSLSTVDIVEAQSKYGFWGWNLWRQPIGFIVFLISSLAECERLPFDLPEAEEELVAGYQTEYSGIKFGLFYIASYLNLLVSSLFVTVLYLGGWNLSIPYIFVPDIFGINKGGKVFGTLIGIFITLAKTYLFLFIPIATRWTLPRLRMDQLLNLGWKFLLPISLGNLLLTTSSQLLSL NADH_dehydrogenase_subunit_1
FILE2 ndha ndhA_1 121113 121652 R 1 540 Ok LSNSLSTVDIVEAQSKYGFWGWNLWRQPIGFIVFLISSLAECERLPFDLPEAEEELVAGYQTEYSGIKFGLFYIASYLNLLVSSLFVTVLYLGGWNLSIPYIFVPDIFGINKGGKVFGTLIGIFITLAKTYLFLFIPIATRWTLPRLRMDQLLNLGWKFLLPISLGNLLLTTSSQLLSL NADH_dehydrogenase_subunit_1
MATCH ndha ID 49 WRONG.BAD_NBEXON.BAD_START
FILE1 NONE
FILE2 ndha ndhA_2 122771 123337 R 1 567 Ok MIIDTTEIETINSFSKLESLKEVYGIIWMLVPIVTLVLGITIGVLVIVWLEREISAGIQQRIGPEYAGPLGILQALADGTKLLLKENLIPSTGDTRLFSIGPSIAVISIFLSYSVIPFGDHLVLADLSIGVFFWIAISSIAPVGLLMSGYGSNNKYSFLGGLRAAAQSISYEIPLALCVLSISLRVIR NADH_dehydrogenase_subunit_1
MATCH ndha ID 0 OVERPRED.WRONG_STOP
FILE1 ndhh ndhH 123339 124520 R 1 1182 Ok MTAPTTRKDLMIVNMGPQHPSMHGVLRLIVTLDGEDVVDCEPILGYLHRGMEKIAENRTIIQYLPYVTRWDYLATMFTEAITINGPEQLGNIQVPKRASYIRVIMLELSRIASHLLWLGPFMADIGAQTPFFYIFRERELIYDLFEAATGMRMMHNYFRIGGVAADLPYGWIDKCLDFCDYFLTGVAEYQKLITRNPIFLERVEGVGIIGRDEALNWGLSGPMLRASGIEWDLRKVDHYESYDEFDWQVQWQREGDSLARYLVRIGEMTESIKIIQQALEGIPGGPYENLEMRRFDRLKDPEWNDFEYRFISKKPSPTFELSKQELYVRVEAPKGELGIFLIGDQSVFPWRWKIRPPGFINLQILPQLVKRMKLADIMTILGSIDIIMGEVDR NADH_dehydrogenase_subunit_7
FILE2 ndhh ndhH 123339 124520 R 1 1182 Ok MTAPTTRKDLMIVNMGPQHPSMHGVLRLIVTLDGEDVVDCEPILGYLHRGMEKIAENRTIIQYLPYVTRWDYLATMFTEAITINGPEQLGNIQVPKRASYIRVIMLELSRIASHLLWLGPFMADIGAQTPFFYIFRERELIYDLFEAATGMRMMHNYFRIGGVAADLPYGWIDKCLDFCDYFLTGVAEYQKLITRNPIFLERVEGVGIIGRDEALNWGLSGPMLRASGIEWDLRKVDHYESYDEFDWQVQWQREGDSLARYLVRIGEMTESIKIIQQALEGIPGGPYENLEMRRFDRLKDPEWNDFEYRFISKKPSPTFELSKQELYVRVEAPKGELGIFLIGDQSVFPWRWKIRPPGFINLQILPQLVKRMKLADIMTILGSIDIIMGEVDR NADH_dehydrogenase_subunit_7
MATCH ndhh ID 100 CORRECT
FILE1 rps15 rps15 124632 124895 R 1 264 Ok MVKNSVISVISQEEKKGSVEFQVFNFTNKIRRLTSHLELHKKDYLSQRGLKKILGKRQRLLAYLAKKNRVRYKELINRLDIRETKTR ribosomal_protein_S15
FILE2 rps15 rps15 124632 124895 R 1 264 Ok MVKNSVISVISQEEKKGSVEFQVFNFTNKIRRLTSHLELHKKDYLSQRGLKKILGKRQRLLAYLAKKNRVRYKELINRLDIRETKTR ribosomal_protein_S15
MATCH rps15 ID 100 CORRECT
FILE1 ycf1 ycf1 125297 130972 R 1 5676 Ok MIFQSFLLGNLVSLCMKIINSVVVVGLYYGFLTTFSIGPSYLFLLRALVMEEGTEKKVSATTGFITGQLMMFISIYYAPLHLALGRPHTITVLALPYLLFHFFWNNHKHFFDYGSTTRNSMRNLSIQCVFLNNLIFQLFNHFILPSSMLARLVNIYLFRCNNKILFVTSGFVGWLIGHILFMKWLGLVLVWIRQNHSIRSNKYIRSNKYLVLELRNSMARIFSILLFITCVYYLGRIPSPILTKKLKEASKTEERVESEEERDVEIETASEMKGTKQEQEGSTEEDPYPSPSLFSEEGWDPDKIDETEEIRVNGKDKIKDKFHSHLTETGYNNINTSNSPIYDYQDSYLNNNNTGNLENCKLQLLDKKNENQEQDLFWFQKPLVSLLFDYNRWNRPFRYIKNNRFEQAVRTEMSQYFFDTCKSDGKQKISFTYPPSLSTFWKMIKRKIPLLSLQKTLPNELDTQWVSTNKEKSNNLNKEFLNRLEILDKESLSLDILETRTRFCNDDTKKEYVPKMYDPLLNGLYRGTIKKGVSSSIINNTLLENWEKRVRLNRIHTIFLPNIDYQEFEQKAYTIDKKPLSTEIDEFLTLINELGNEAKSSLNLKGLSLFSDQEQRRANSEKRTKFVKFVFNALDPNETKSGKKSIGIKEISKKVPRWSHKLITELDQQMGEFKDRASMDHQLRSRKAKRVVIFTDNKATKDAEEEVALISYSQQSDFRRGIITGSMRAQRRKTFISKLFQANVHSPLFVDRITPLRLFSFDISELIKPILKNWTDKEGEFKILESREEQTKREEKKEKDKKEDNKRKEQARIAIEEAWDTIPLAQIIRGYMLITQSILRKYILLPALIIAKNIGRMLFLQLPEWSEDLQEWNREMQIKCTYNGVQLSETEFPKNWLRDGIQIKILFPFCLKPWHISKLYPSRRELMKKQKQKDDFCFLTVWGMEAELPFGSPRKRPSFFEPIFKELEKKIGKFKKKYFLTLKILKGKTKLFRKVSKETTKLFIKSIGFLKKIKKELSKVNLIVLFRFKEISESNETKKEKDYLISNQIINESFRQIESGNWPNSSLIETKMKDLTNRTSTIKNKIERITKEKKKVTPEIDINPNKTNNIKKFESPKKIFQILKSRNTRVIWKFHYFLKLFIQRLYINLFLSIINIPRITTQLFLKSTNKLIEKFISNNEINQEKINNKKKIHFMFISTIKKSLYNISKKNSHILCDLSYLSQAYVFYKLSQTQVINFSKFRSVLQYNTTSCFLKTKIKDYFKTLGIFHSELKHKKLQSYRINQWKNWLRWHYQYDLSQIRWSRLMPKKWRTRVNQSCMAQNKNRNLNKWNSYEKDQLLHYKKENDSELYSLSNEKDNFKKCYGYGLLAYKSINYENKSDSFFSRLPFEVQVKKNLEISYNSNTSKHNFVDMPGNLHINNYLRKGNILDRERNLDRKYFDWKIIHFSLRQKGDIEAWVKIDTNSNPNTKIGINNYQIIDKIEKKGVFYLTTHQNPEKTQKNSKKFFFDWMGMNEKIFNRPILNLEFWFFPEFVLLYNVYKIKPWIIPSKFLLFNLNTNKNVSQNKNQNFFLPSNKKIKIKNRSQEAKEPPSQRERGSDIENKGNLSPVFSKHQTDLEKDYVESDTKKGKNKKQYKSNTEAELDLFLKRYLLFQLRWNGALNQRMFENIKVYCLLLRLINPTKITISSIQRREMSLDIMLIQANLPLTDLMKKGVLIIEPIRLSVKDNGQFIMYQTIGISLIHKSKHQTNQRYREQRYVDKKNFDEFILQPQTQRINTEKTHFGLLVPENILWSRRRRELRIRSFFNSWNWNVVDRNSVFCNETNVKNWSQFLGERKPLYKDKNELIKFKFFFWPNYRLEDLACMNRYWFDTNNGSRFSILRIHMYPRLKIN ycf1_protein
FILE2 ycf1 ycf1 125297 130972 R 1 5676 Ok MIFQSFLLGNLVSLCMKIINSVVVVGLYYGFLTTFSIGPSYLFLLRALVMEEGTEKKVSATTGFITGQLMMFISIYYAPLHLALGRPHTITVLALPYLLFHFFWNNHKHFFDYGSTTRNSMRNLSIQCVFLNNLIFQLFNHFILPSSMLARLVNIYLFRCNNKILFVTSGFVGWLIGHILFMKWLGLVLVWIRQNHSIRSNKYIRSNKYLVLELRNSMARIFSILLFITCVYYLGRIPSPILTKKLKEASKTEERVESEEERDVEIETASEMKGTKQEQEGSTEEDPYPSPSLFSEEGWDPDKIDETEEIRVNGKDKIKDKFHSHLTETGYNNINTSNSPIYDYQDSYLNNNNTGNLENCKLQLLDKKNENQEQDLFWFQKPLVSLLFDYNRWNRPFRYIKNNRFEQAVRTEMSQYFFDTCKSDGKQKISFTYPPSLSTFWKMIKRKIPLLSLQKTLPNELDTQWVSTNKEKSNNLNKEFLNRLEILDKESLSLDILETRTRFCNDDTKKEYVPKMYDPLLNGLYRGTIKKGVSSSIINNTLLENWEKRVRLNRIHTIFLPNIDYQEFEQKAYTIDKKPLSTEIDEFLTLINELGNEAKSSLNLKGLSLFSDQEQRRANSEKRTKFVKFVFNALDPNETKSGKKSIGIKEISKKVPRWSHKLITELDQQMGEFKDRASMDHQLRSRKAKRVVIFTDNKATKDAEEEVALISYSQQSDFRRGIITGSMRAQRRKTFISKLFQANVHSPLFVDRITPLRLFSFDISELIKPILKNWTDKEGEFKILESREEQTKREEKKEKDKKEDNKRKEQARIAIEEAWDTIPLAQIIRGYMLITQSILRKYILLPALIIAKNIGRMLFLQLPEWSEDLQEWNREMQIKCTYNGVQLSETEFPKNWLRDGIQIKILFPFCLKPWHISKLYPSRRELMKKQKQKDDFCFLTVWGMEAELPFGSPRKRPSFFEPIFKELEKKIGKFKKKYFLTLKILKGKTKLFRKVSKETTKLFIKSIGFLKKIKKELSKVNLIVLFRFKEISESNETKKEKDYLISNQIINESFRQIESGNWPNSSLIETKMKDLTNRTSTIKNKIERITKEKKKVTPEIDINPNKTNNIKKFESPKKIFQILKSRNTRVIWKFHYFLKLFIQRLYINLFLSIINIPRITTQLFLKSTNKLIEKFISNNEINQEKINNKKKIHFMFISTIKKSLYNISKKNSHILCDLSYLSQAYVFYKLSQTQVINFSKFRSVLQYNTTSCFLKTKIKDYFKTLGIFHSELKHKKLQSYRINQWKNWLRWHYQYDLSQIRWSRLMPKKWRTRVNQSCMAQNKNRNLNKWNSYEKDQLLHYKKENDSELYSLSNEKDNFKKCYGYGLLAYKSINYENKSDSFFSRLPFEVQVKKNLEISYNSNTSKHNFVDMPGNLHINNYLRKGNILDRERNLDRKYFDWKIIHFSLRQKGDIEAWVKIDTNSNPNTKIGINNYQIIDKIEKKGVFYLTTHQNPEKTQKNSKKFFFDWMGMNEKIFNRPILNLEFWFFPEFVLLYNVYKIKPWIIPSKFLLFNLNTNKNVSQNKNQNFFLPSNKKIKIKNRSQEAKEPPSQRERGSDIENKGNLSPVFSKHQTDLEKDYVESDTKKGKNKKQYKSNTEAELDLFLKRYLLFQLRWNGALNQRMFENIKVYCLLLRLINPTKITISSIQRREMSLDIMLIQANLPLTDLMKKGVLIIEPIRLSVKDNGQFIMYQTIGISLIHKSKHQTNQRYREQRYVDKKNFDEFILQPQTQRINTEKTHFGLLVPENILWSRRRRELRIRSFFNSWNWNVVDRNSVFCNETNVKNWSQFLGERKPLYKDKNELIKFKFFFWPNYRLEDLACMNRYWFDTNNGSRFSILRIHMYPRLKIN hypothetical_chloroplast_RF1
MATCH ycf1 ID 100 CORRECT
FILE1 rps12 rps12 71639 142102 D 3 372 Error MPTIKQLIRNTRQPIRNVTKSPALRGCPQRRGTCTRVYTITPKKPNSALRKVARVRLTSGFEITAYIPGIGHNSQEHSVVLVRGGRVKDLPGVRYHIVRGTLDAVGVKDRQQGRSKYGVKKPK ribosomal_protein_S12
FILE2 NONE
MATCH rps12 ID 0 MISSED.WRONG_STOP
FILE1 rps7 rps7 142156 142623 D 1 468 Ok MSRRGTAEKKTAKSDPIYRNRLVNMLVNRILKHGKKSLAYQIIYRAVKKIQQKTETNPLSVLRQAIRGVTPDITVKARRVGGSTHQVPIEIGSTQGKALAIRWLLAASRKRPGRNMAFKLSSELVDAAKGSGDAIRKKEETHRMAEANRAFAHFR ribosomal_protein_S7
FILE2 rps7 rps7_2 142156 142623 D 1 468 Ok MSRRGTAEKKTAKSDPIYRNRLVNMLVNRILKHGKKSLAYQIIYRAVKKIQQKTETNPLSVLRQAIRGVTPDITVKARRVGGSTHQVPIEIGSTQGKALAIRWLLAASRKRPGRNMAFKLSSELVDAAKGSGDAIRKKEETHRMAEANRAFAHFR ribosomal_protein_S7
MATCH rps7 ID 100 CORRECT
FILE1 ndhb ndhB 142909 145120 D 2 1533 Ok MIWHVQNENFILDSTRIFMKAFHLLLFDGSLIFPECILIFGLILLLMIDSTSDQKDIPWLYFISSTSLVMSITALLFRWREEPMISFSGNFQTNNFNEIFQFLILLCSTLCIPLSVEYIECTEMAITEFLLFVLTATLGGMFLCGANDLITIFVAPECFSLCSYLLSGYTKKDVRSNEATMKYLLMGGASSSILVHGFSWLYGSSGGEIELQEIVNGLINTQMYNSPGISIALIFITVGIGFKLSPAPSHQWTPDVYEGSPTPVVAFLSVTSKVAASASATRIFNIPFYFSSNEWHLLLEILAILSMILGNLIAITQTSMKRMLAYSSIGQIGYVIIGIIVGDSNDGYASMITYMLFYISMNLGTFACIVLFGLRTGTDNIRDYAGLYTKDPFLALSLALCLLSLGGLPPLAGFFGKLYLFWCGWQAGLYFLVLIGLLTSVVSIYYYLKIIKLLMTGRNQEITPHVRNYRRSPLRSNNSIELSMIVCVIASTIPGISMNPIIAIAQDSLF NADH_dehydrogenase_subunit_2
FILE2 ndhb ndhB_2 142909 145120 D 2 1626 Ok MIWHVQNENFILDSTRIFMKAFHLLLFDGSLIFPECILIFGLILLLMIDSTSDQKDIPWLYFISSTSLVMSITALLFRWREEPMISFSGNFQTNNFNEIFQFLILLCSTLCIPLSVEYIECTEMAITEFLLFVLTATLGGMFLCGANDLITIFVAPECFSLCSYLLSGYTKKDVRSNEATMKYLLMGGASSSILVHGFSWLYGSSGGEIELQEIVNGLINTQMYNSPGISIALIFITVGIGFKLSPAPSHQWTPDVYEGVRFVREIPTSLSISEMFGFFKTPWTCRREMLSPTPVVAFLSVTSKVAASASATRIFNIPFYFSSNEWHLLLEILAILSMILGNLIAITQTSMKRMLAYSSIGQIGYVIIGIIVGDSNDGYASMITYMLFYISMNLGTFACIVLFGLRTGTDNIRDYAGLYTKDPFLALSLALCLLSLGGLPPLAGFFGKLYLFWCGWQAGLYFLVLIGLLTSVVSIYYYLKIIKLLMTGRNQEITPHVRNYRRSPLRSNNSIELSMIVCVIASTIPGISMNPIIAIAQDSLF NADH_dehydrogenase_subunit_2
MATCH ndhb ID 94 ALMOST_CORRECT.BAD_JUNCTION
FILE1 ycf15 ycf15 145958 146221 R 1 264 Ok METLVSSIFWTLAPWKNMLLLKHGRIEILDQNTMYGWYELPKQEFLNSKQPVQIFTTKKYWILFRIGPERRRKAGMPIGVYYIEFTR ycf15_protein
FILE2 NONE
MATCH ycf15 ID 0 MISSED.WRONG_STOP
FILE1 ycf2 ycf2 146312 153148 R 1 6837 Ok MRGHQFKSWIFELREILREIKNSHHFLDSWTQFNSVGSFIHIFFHQERFLKLFDPRIWSILLSRNSQGSPSNRYFTIKGVILFVVAVLIYRINNRNMVERKNLYLIGLLPIPMNSIGPRNDTLEESVGSSNINRLIVSLLYLPKGKKISESCFLNPKESTWVLPITKKCSMPESNWGSRWWRNWIGKKRDSSCKISNETVAGIEILFKEKDLKYLEFLFVYYMDDPIRKDHDWELFDRLSLRKSRNRINLNSGPLFEILVKHWISYLMSAFREKIPIEVEGFFKQQGAGSTIQSNDIEHVSHLFSRNKWAISLQNCAQFHMWQFRQDLFVSWGKNPPESDFLRNVSRENWIWLDNVWLVNKDRFFSKVQNVSSNIQYDSTRSSFVQVTDSSQLKGSSDQSRDHLDSISNEDSEYHTLINQREIQQRKERSILWDPSFLQTERKEIESGRFPKCLSGYSSMSRLFTEREKQMINHLFPEEIEEFLGNPTRSVRSFFSDRWSELHLGSNPTERSTRDQKLLKKQQDLSFVPSRRSEKKEMVNIFKIITYLQNTVSIHPISSDPGCDMVPKDEPDMDSSNKISFLNKNPFFDLFHLFHDRNRGGYTLHYDFASEERFQEMADLFTLSITEPDLVYHKGFAFSIDSCGLDQKQFLNEARDESKKKSLLVLPPIFYEENESFSRRIRKKWVRISCGNDLEDPKPKIVVFASNNIMEAVTQYRLIRNLIQIQYSTYGYIRNVLNRFFLMNRSDRNFEYGIQRDQIGKDTLNHRTIMKYTINQYLSNLKKSQKKWFEPLILISRTERSMNRDPDAYRYKWSNGSKSFQEHLEQSVSKQKSRFQVVFDRLRINQYSIDWSEVIDKKDLSKSLRFFLSKSLLFLSKLLLFLSNSLPFFCVSFGNIPIHRSEIYIYEELKGPNDQLCNQLLESIGLQIVHLKKLKPFLLDDHDTSQKSKFLINGGTISPFLFNKIPKWMIDSFHTRNNRRKSFDNPDSYFSMIFHDQDNWLNPVKPFHRSSLISSFYKANRLRFLNNPHHFCFYWNTRFPFSVEKARINNSDFTYGQFLNILFIRNKIFSLCVGKKKHAFWGRDTISPIESQVSNIFIPNDFPQSGDETYNLYKSFHFPSRSDPFVRRAIYSIADISGTPLTEGQIVNFERTYCQPLSDMNLSDSEGKNLHQYLNFNSNMGLIHTPCSEKDLSSEKRKKWSLCLKKCVEKGQTYRTFQRDSAFSTLSKWNLFQTYMPWFLTSTGYKYLNLIFLDTFSDLLPILSSSQKFVSIFPDIMHGSGISWRILQKKLCLPQWNLISEISSKCLHNLLLSEEMIHRNNESPLISTHLRSPNAREFLYSILFLLLVAGYLVRTHLLFVSRASSELQTEFERVKSLMTPSSMIELRKLLDRYPTSEPNSFWLKNLFLVALEQLGDSLEEIRGSASGGNMLGPAYGVKSIRSKKKDWNINLIEIIDLIPNPINRITFSRNTRHLSHTSKEIYSLIRKRKNVNGDWIDEKIESWVANSDSIDDEEREFLVQFSTLTTENRIDQILLSLTHSDHLSKNDSGYQMIEQPGAIYLRYLVDIHKKHLMNYEFNPSCLAERRIFLAHYQTITYSQTSCGENSFHFPSHGKPFSLRLALSPSRGILVIGSIGTGRSYLVKYLATNSYVPFITVFLNKFLDNKSKGFLLDEIDIDDSDDIDDSDNLDASDDIDRDLDTELELLTRMNGLTVDMMPEIDRFYITLQFELAKAMSPCIIWIPNIHDLDVNESNDLSLGLLVNHLSRDCERCSTRNILVIASTHIPQKVDPALIAPNKLNTCIKIRRLLIPQQRKHFFTLSYTRGFHLEKKMFHTNGFGSITMGSNARDLVALTNEVLSISITQKKSIIDTNTIRSALHRQTWDLRSQVRSVQDHGILFYQIGRAVAQNVLLSNCPIDPISIYMKKKSCNEGDSYLYKWYFELGTSMKRLTILLYLLSCSAGSVAQDLWSLSVPDEKNGITSYGLVENDSDLVHGLLEVEGALVGSSRTEKDCSQFDNDRVTLLLRPEPRNPLDMMQKGSWSILDQRFLYEKYESEFEEGEGEGALDPQEDLFNHIVWAPRIWRPWGFLFDCIERPNELGFPYWSRSFRGKRIIYDEEDELQENDSGFLQSGTMQYQTRDRSQGLFRISQFIWDPADPLFFLFKDQPPGSVFSHRELFADEEMSKGLLTSQTDPPTSLYKRWFIKNTQEKHFELLINRQRWLRTNSSLSNGSFRSNTLSESYQYLSNLFLSNGTLLDQMPKTLLRKRWLFPDEMKIGFM Ycf2_protein
FILE2 ycf2 ycf2_2 146312 153148 R 1 6837 Ok MRGHQFKSWIFELREILREIKNSHHFLDSWTQFNSVGSFIHIFFHQERFLKLFDPRIWSILLSRNSQGSPSNRYFTIKGVILFVVAVLIYRINNRNMVERKNLYLIGLLPIPMNSIGPRNDTLEESVGSSNINRLIVSLLYLPKGKKISESCFLNPKESTWVLPITKKCSMPESNWGSRWWRNWIGKKRDSSCKISNETVAGIEILFKEKDLKYLEFLFVYYMDDPIRKDHDWELFDRLSLRKSRNRINLNSGPLFEILVKHWISYLMSAFREKIPIEVEGFFKQQGAGSTIQSNDIEHVSHLFSRNKWAISLQNCAQFHMWQFRQDLFVSWGKNPPESDFLRNVSRENWIWLDNVWLVNKDRFFSKVQNVSSNIQYDSTRSSFVQVTDSSQLKGSSDQSRDHLDSISNEDSEYHTLINQREIQQRKERSILWDPSFLQTERKEIESGRFPKCLSGYSSMSRLFTEREKQMINHLFPEEIEEFLGNPTRSVRSFFSDRWSELHLGSNPTERSTRDQKLLKKQQDLSFVPSRRSEKKEMVNIFKIITYLQNTVSIHPISSDPGCDMVPKDEPDMDSSNKISFLNKNPFFDLFHLFHDRNRGGYTLHYDFASEERFQEMADLFTLSITEPDLVYHKGFAFSIDSCGLDQKQFLNEARDESKKKSLLVLPPIFYEENESFSRRIRKKWVRISCGNDLEDPKPKIVVFASNNIMEAVTQYRLIRNLIQIQYSTYGYIRNVLNRFFLMNRSDRNFEYGIQRDQIGKDTLNHRTIMKYTINQYLSNLKKSQKKWFEPLILISRTERSMNRDPDAYRYKWSNGSKSFQEHLEQSVSKQKSRFQVVFDRLRINQYSIDWSEVIDKKDLSKSLRFFLSKSLLFLSKLLLFLSNSLPFFCVSFGNIPIHRSEIYIYEELKGPNDQLCNQLLESIGLQIVHLKKLKPFLLDDHDTSQKSKFLINGGTISPFLFNKIPKWMIDSFHTRNNRRKSFDNPDSYFSMIFHDQDNWLNPVKPFHRSSLISSFYKANRLRFLNNPHHFCFYWNTRFPFSVEKARINNSDFTYGQFLNILFIRNKIFSLCVGKKKHAFWGRDTISPIESQVSNIFIPNDFPQSGDETYNLYKSFHFPSRSDPFVRRAIYSIADISGTPLTEGQIVNFERTYCQPLSDMNLSDSEGKNLHQYLNFNSNMGLIHTPCSEKDLSSEKRKKWSLCLKKCVEKGQTYRTFQRDSAFSTLSKWNLFQTYMPWFLTSTGYKYLNLIFLDTFSDLLPILSSSQKFVSIFPDIMHGSGISWRILQKKLCLPQWNLISEISSKCLHNLLLSEEMIHRNNESPLISTHLRSPNAREFLYSILFLLLVAGYLVRTHLLFVSRASSELQTEFERVKSLMTPSSMIELRKLLDRYPTSEPNSFWLKNLFLVALEQLGDSLEEIRGSASGGNMLGPAYGVKSIRSKKKDWNINLIEIIDLIPNPINRITFSRNTRHLSHTSKEIYSLIRKRKNVNGDWIDEKIESWVANSDSIDDEEREFLVQFSTLTTENRIDQILLSLTHSDHLSKNDSGYQMIEQPGAIYLRYLVDIHKKHLMNYEFNPSCLAERRIFLAHYQTITYSQTSCGENSFHFPSHGKPFSLRLALSPSRGILVIGSIGTGRSYLVKYLATNSYVPFITVFLNKFLDNKSKGFLLDEIDIDDSDDIDDSDNLDASDDIDRDLDTELELLTRMNGLTVDMMPEIDRFYITLQFELAKAMSPCIIWIPNIHDLDVNESNDLSLGLLVNHLSRDCERCSTRNILVIASTHIPQKVDPALIAPNKLNTCIKIRRLLIPQQRKHFFTLSYTRGFHLEKKMFHTNGFGSITMGSNARDLVALTNEVLSISITQKKSIIDTNTIRSALHRQTWDLRSQVRSVQDHGILFYQIGRAVAQNVLLSNCPIDPISIYMKKKSCNEGDSYLYKWYFELGTSMKRLTILLYLLSCSAGSVAQDLWSLSVPDEKNGITSYGLVENDSDLVHGLLEVEGALVGSSRTEKDCSQFDNDRVTLLLRPEPRNPLDMMQKGSWSILDQRFLYEKYESEFEEGEGEGALDPQEDLFNHIVWAPRIWRPWGFLFDCIERPNELGFPYWSRSFRGKRIIYDEEDELQENDSGFLQSGTMQYQTRDRSQGLFRISQFIWDPADPLFFLFKDQPPGSVFSHRELFADEEMSKGLLTSQTDPPTSLYKRWFIKNTQEKHFELLINRQRWLRTNSSLSNGSFRSNTLSESYQYLSNLFLSNGTLLDQMPKTLLRKRWLFPDEMKIGFM Ycf2
MATCH ycf2 ID 100 CORRECT
FILE1 rpl23 rpl23 153516 153797 D 1 282 Ok MDGIKYAVFTDKSIRLLGKNQYTSNVESGSTRTEIKHWVELFFGVKVIAMNSHRLPGKSRRMGPIMGHTMHYRRMIITLQPGYSIPPLRKKRT ribosomal_protein_L23
FILE2 rpl23 rpl23_2 153516 153797 D 1 282 Ok MDGIKYAVFTDKSIRLLGKNQYTSNVESGSTRTEIKHWVELFFGVKVIAMNSHRLPGKSRRMGPIMGHTMHYRRMIITLQPGYSIPPLRKKRT ribosomal_protein_L23
MATCH rpl23 ID 100 CORRECT
FILE1 rpl2 rpl2 153816 155306 D 2 825 Ok MAIHLYKTSTPSTRNGTVDSQVKSNPRNNLIYGQRRCGKGRNARGIITARHRGGGHKRLYRKIDFRRNEKDIYGRIVTIEYDPNRNAYICLIHYGDGEKRYILHPRGAIIGDTIVSGTEVPIKMGNALPLTDMPLGTAIHNIEITLGKGGQLARAAGAVAKLIAKEGKSATLKLPSGEVRLISKNCSATVGQVGNVGVNQKSLGRAGSKRWLGKRPVVRGVVMNPVDHPHGGGEGRAPIGRKKPTTPWGYPALGRRSRKRNKYSDNLILRRRSK ribosomal_protein_L2
FILE2 rpl2 rpl2_2 153816 155306 D 2 825 Ok MAIHLYKTSTPSTRNGTVDSQVKSNPRNNLIYGQRRCGKGRNARGIITARHRGGGHKRLYRKIDFRRNEKDIYGRIVTIEYDPNRNAYICLIHYGDGEKRYILHPRGAIIGDTIVSGTEVPIKMGNALPSTDMPLGTAIHNIEITLGKGGQLARAAGAVAKLIAKEGKSATLKLPSGEVRLISKNCSATVGQVGNVGVNQKSLGRAGSKRWLGKRPVVRGVVMNPVDHPHGGGEGRAPIGRKKPTTPWGYPALGRRSRKRNKYSDNLILRRRSK ribosomal_protein_L2
MATCH rpl2 ID 99 ALMOST_CORRECT.BAD_JUNCTION

View File

@ -1,18 +1,21 @@
#!/bin/csh -f
echo "+ [testing IR]"
setenv ORG_HOME `dirname $0`/../../..
source $ORG_HOME/scripts/csh_init.sh
../bin/go_ir.sh test.fst > test.bak
echo "+ testing IR"
`dirname $0`/../bin/go_ir.sh test.fst > test.bak
diff -q test.bak test.ref >& /dev/null
set stat = $status
if ($stat == 0) then
echo '+ IR test Ok'
echo "+ $VTC[3]IR test Ok$VTC[1]"
\rm -r test.bak
else
echo '+ IR test Failure'
echo "* $VTC[2]IR test Failure$VTC[1]"
endif
exit $stat

View File

@ -1,18 +1,21 @@
#!/bin/csh -f
echo "+ [testing Normalize]"
setenv ORG_HOME `dirname $0`/../../..
source $ORG_HOME/scripts/csh_init.sh
../bin/go_normalize.sh test.fst > test.bak
echo "+ testing Normalize"
`dirname $0`/../bin/go_normalize.sh test.fst > test.bak
diff -q test.bak test.ref >& /dev/null
set stat = $status
if ($stat == 0) then
echo '+ Normalize test Ok'
echo "+ $VTC[3]Normalize test Ok$VTC[1]"
\rm -r test.bak
else
echo '+ Normalize test Failure'
echo "* $VTC[2]Normalize test Failure$VTC[1]"
endif
exit $stat

View File

@ -1,18 +1,21 @@
#!/bin/csh -f
echo "+ [testing rRNA]"
setenv ORG_HOME `dirname $0`/../../..
source $ORG_HOME/scripts/csh_init.sh
../bin/go_rrna.sh test.fst > test.bak
echo "+ testing rRNA"
`dirname $0`/../bin/go_rrna.sh test.fst > test.bak
diff -q test.bak test.ref >& /dev/null
set stat = $status
if ($stat == 0) then
echo '+ rRNA test Ok'
echo "+ $VTC[3]rRNA test Ok$VTC[1]"
\rm -r test.bak
else
echo '+ rRNA test Failure'
echo "* $VTC[2]rRNA test Failure$VTC[1]"
endif
exit $stat

View File

@ -1,18 +1,21 @@
#!/bin/csh -f
echo "+ [testing tRNA]"
setenv ORG_HOME `dirname $0`/../../..
source $ORG_HOME/scripts/csh_init.sh
../bin/go_trna.sh test.fst > test.bak
echo "+ testing tRNA"
`dirname $0`/../bin/go_trna.sh test.fst > test.bak
diff -q test.bak test.ref >& /dev/null
set stat = $status
if ($stat == 0) then
echo '+ tRNA test Ok'
echo "+ $VTC[3]tRNA test Ok$VTC[1]"
\rm -r test.bak
else
echo '+ tRNA test Failure'
echo "* $VTC[2]tRNA test Failure$VTC[1]"
endif
exit $stat