CDS detector added
Former-commit-id: 93fac7a70052d06c2a12bf8af59820c653edd31b Former-commit-id: 0869fdad0f550941a0f78f1e4c57f4fcdb3f6076
This commit is contained in:
78
detectors/cds/bin/go_cds.sh
Executable file
78
detectors/cds/bin/go_cds.sh
Executable file
@ -0,0 +1,78 @@
|
||||
#!/bin/csh -f
|
||||
#
|
||||
# Annotate CDS
|
||||
#
|
||||
#========================================================================================
|
||||
#
|
||||
# Annotate CDS
|
||||
#
|
||||
# go_cds.sh <FASTAFILE>
|
||||
#
|
||||
# - <FASTAFILE> : The fasta file containing the genome to annotate
|
||||
#
|
||||
# Results are printed to the standard output
|
||||
#
|
||||
#========================================================================================
|
||||
# usage: go_cds.sh fasta
|
||||
#
|
||||
|
||||
setenv ORG_HOME `dirname $0`/../../..
|
||||
source $ORG_HOME/scripts/csh_init.sh
|
||||
|
||||
NeedArg 1
|
||||
|
||||
set Fasta = $Argv[1]
|
||||
|
||||
NeedFile $Fasta
|
||||
|
||||
set Genome = `basename $Fasta:r`
|
||||
|
||||
NeedFile $CDS_DATA_DIR/chlorodb/core
|
||||
|
||||
#
|
||||
# run everything into temporary place
|
||||
#
|
||||
|
||||
set temp = $Genome.tmp
|
||||
if (! -d $temp) then
|
||||
Notify "making directory $temp"
|
||||
mkdir $temp
|
||||
endif
|
||||
|
||||
#
|
||||
# pass1: run exonerate
|
||||
#
|
||||
|
||||
set fams = `ls $CDS_DATA_DIR/chlorodb/core/*.fst`
|
||||
|
||||
Notify "running pass1: exonerate of $Genome"
|
||||
|
||||
foreach f ($fams)
|
||||
set prot = `basename $f:r`
|
||||
$LIB_DIR/go_pass1.sh $Fasta $prot $temp
|
||||
end
|
||||
|
||||
#
|
||||
# pass2: transsplicing
|
||||
#
|
||||
|
||||
#
|
||||
# pass3: prokov
|
||||
#
|
||||
|
||||
#
|
||||
# end : output on stdout
|
||||
#
|
||||
|
||||
cat $temp/*.res
|
||||
|
||||
# cleanup everything
|
||||
|
||||
AssignUndef TMP_CLEANUP 1
|
||||
|
||||
if ($TMP_CLEANUP != 0) then
|
||||
Notify " cleanup $temp"
|
||||
(\rm -r $temp) >& /dev/null
|
||||
endif
|
||||
|
||||
Exit 0
|
131
detectors/cds/lib/bestclust.awk
Normal file
131
detectors/cds/lib/bestclust.awk
Normal file
@ -0,0 +1,131 @@
|
||||
#
|
||||
# select best cluster(s)
|
||||
#
|
||||
# -v MAX_SPAN ALLOW_STOP EXCLUDE
|
||||
#
|
||||
|
||||
BEGIN {
|
||||
PROCINFO["sorted_in"] = "@ind_num_asc"
|
||||
if (MAX_SPAN == "") MAX_SPAN = 10000
|
||||
if (ALLOW_STOP == "") ALLOW_STOP = 0
|
||||
if (EXCLUDE == "") EXCLUDE = 0
|
||||
}
|
||||
|
||||
/^\# --- START OF GFF DUMP ---/ {
|
||||
State = "gff"
|
||||
NbEntry++
|
||||
next
|
||||
}
|
||||
|
||||
/^\# --- END OF GFF DUMP ---/ {
|
||||
State = 0
|
||||
next
|
||||
}
|
||||
|
||||
/^C4 Alignment:/ {
|
||||
for (i = 1 ; i <= 8 ; i++)
|
||||
getline
|
||||
State = "align"
|
||||
Align = ""
|
||||
next
|
||||
}
|
||||
|
||||
/^\#/ { next }
|
||||
|
||||
(State == 0) { next }
|
||||
|
||||
(State == "gff") && ($3 == "gene") {
|
||||
span = $5 - $4 + 1
|
||||
valid = (span < MAX_SPAN) \
|
||||
&& (ALLOW_STOP || (Align !~ "\\*")) \
|
||||
&& ((EXCLUDE == 0) || (! (Organism ~ EXCLUDE)))
|
||||
Entry[NbEntry]["valid"] = valid
|
||||
Entry[NbEntry]["from"] = $4+0
|
||||
Entry[NbEntry]["to"] = $5+0
|
||||
Entry[NbEntry]["score"] = $6+0
|
||||
Entry[NbEntry]["strand"] = $7
|
||||
if (valid) {
|
||||
for (i = $4+0 ; i <= $5+0; i++)
|
||||
Cover[i] = 1
|
||||
}
|
||||
}
|
||||
|
||||
(State == "gff") && ($3 == "exon") {
|
||||
Entry[NbEntry]["nbexon"]++
|
||||
}
|
||||
|
||||
(State == "gff") {
|
||||
n = ++Entry[NbEntry]["nbline"]
|
||||
$1=$2=""
|
||||
Entry[NbEntry][n] = $0
|
||||
}
|
||||
|
||||
(State == "align") && /^vulgar/ {
|
||||
State = 0
|
||||
split($2, a, "@")
|
||||
Organism = a[1]
|
||||
next
|
||||
}
|
||||
|
||||
(State == "align") {
|
||||
getline; getline
|
||||
Align = Align "" $0
|
||||
getline; getline
|
||||
next
|
||||
}
|
||||
|
||||
END {
|
||||
|
||||
# make clusters
|
||||
pi = -1
|
||||
for (i in Cover) {
|
||||
if (i+0 > pi+1)
|
||||
Clust[++NbClust]["from"] = i
|
||||
pi = Clust[NbClust]["to"] = i
|
||||
}
|
||||
|
||||
# get highest scoring clusters
|
||||
for (i = 1 ; i <= NbEntry ; i++) {
|
||||
valid = Entry[i]["valid"]
|
||||
if (! valid) continue
|
||||
clusno = 0
|
||||
for (j = 1; j <= NbClust; j++) {
|
||||
if ((Entry[i]["from"] >= Clust[j]["from"]) && (Entry[i]["to"] <= Clust[j]["to"]))
|
||||
clusno = j
|
||||
}
|
||||
valid = (clusno != 0)
|
||||
if (! valid) continue
|
||||
|
||||
score = Entry[i]["score"]
|
||||
if (score > Clust[clusno]["score"]+0) {
|
||||
Clust[clusno]["score"] = score
|
||||
Clust[clusno]["strand"] = Entry[i]["strand"]
|
||||
Clust[clusno]["entry"] = i
|
||||
}
|
||||
}
|
||||
|
||||
# print cluster info
|
||||
print "c nclust", NbClust+0
|
||||
for (i = 1 ; i <= NbClust ; i++) {
|
||||
print "c cluster", i, "from", Clust[i]["from"], "to", Clust[i]["to"],\
|
||||
"strand", Clust[i]["strand"], "score", Clust[i]["score"]
|
||||
}
|
||||
|
||||
# print best clusters
|
||||
for (i = 1 ; i <= NbClust ; i++) {
|
||||
if (Clust[i]["score"] == 0) continue
|
||||
j = Clust[i]["entry"]
|
||||
s = Clust[i]["strand"]
|
||||
n = Entry[j]["nbline"]
|
||||
ne = Entry[j]["nbexon"]
|
||||
print "c begin_entry", j, "cluster", i, "strand", s, "nbexon", ne
|
||||
for (k = 1 ; k <= n ; k++) {
|
||||
entry = Entry[j][k]
|
||||
gsub("^ +", "", entry)
|
||||
print "e", entry
|
||||
}
|
||||
print "c end_entry", j, "cluster", i
|
||||
}
|
||||
}
|
||||
|
||||
|
18
detectors/cds/lib/cutline.awk
Normal file
18
detectors/cds/lib/cutline.awk
Normal file
@ -0,0 +1,18 @@
|
||||
#
|
||||
|
||||
{
|
||||
line = $0
|
||||
if (length(line) > 80) {
|
||||
print substr(line, 1, 80)
|
||||
rest = substr(line, 81)
|
||||
while (length(rest) > 59) {
|
||||
print "FT " substr(rest, 1, 59)
|
||||
rest = substr(rest, 60)
|
||||
}
|
||||
if (length(rest) > 0)
|
||||
print "FT " rest
|
||||
}
|
||||
else {
|
||||
print line
|
||||
}
|
||||
}
|
135
detectors/cds/lib/extend.awk
Normal file
135
detectors/cds/lib/extend.awk
Normal file
@ -0,0 +1,135 @@
|
||||
#
|
||||
# extend start/stop
|
||||
#
|
||||
# -v FASTA START_MODEL STOP_MODEL START_WALK STOP_WALK
|
||||
#
|
||||
|
||||
function UpStart(pos, strand, _local_, seq, i, imax, smax, s) {
|
||||
seq = Seq
|
||||
|
||||
if (strand == "-") {
|
||||
pos = LenSeq - pos + 1
|
||||
seq = RevSeq
|
||||
}
|
||||
|
||||
imax = 0
|
||||
smax = 0
|
||||
|
||||
for (i = pos ; i >= Max(1, pos-START_WALK) ; i -= 3) {
|
||||
s = substr(seq, i, 3)
|
||||
if (s in StopModel) break
|
||||
if ((s in StartModel) && (StartModel[s] > smax)) {
|
||||
imax = i
|
||||
smax = StartModel[s]
|
||||
}
|
||||
}
|
||||
|
||||
if (strand == "-") {
|
||||
imax = (imax > 0) ? LenSeq - imax + 1 : imax
|
||||
}
|
||||
|
||||
return imax
|
||||
}
|
||||
|
||||
#
|
||||
|
||||
function DownStop(pos, strand, _local_, seq, i, imax, s) {
|
||||
seq = Seq
|
||||
|
||||
if (strand == "-") {
|
||||
pos = LenSeq - pos + 1
|
||||
seq = RevSeq
|
||||
}
|
||||
|
||||
imax = 0
|
||||
|
||||
for (i = pos ; i < Min(LenSeq, pos+STOP_WALK) ; i += 3) {
|
||||
s = substr(seq, i, 3)
|
||||
if (s in StopModel) {
|
||||
imax = i
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if (strand == "-") {
|
||||
imax = (imax > 0) ? LenSeq - imax + 1 : imax
|
||||
}
|
||||
|
||||
return imax
|
||||
}
|
||||
|
||||
#
|
||||
# rules
|
||||
#
|
||||
|
||||
BEGIN {
|
||||
|
||||
if (START_MODEL == "") START_MODEL="Models/start.default.frq"
|
||||
if (STOP_MODEL == "") STOP_MODEL="Models/stop.default.frq"
|
||||
if (START_WALK == "") START_WALK=120
|
||||
if (STOP_WALK == "") STOP_WALK=-1
|
||||
|
||||
if (! TestPath(FASTA)) Error("Fasta file: '" FASTA "' not found", 1)
|
||||
|
||||
Seq = ReadFasta(FASTA)
|
||||
LenSeq = length(Seq)
|
||||
|
||||
RevSeq = RevComplement(Seq)
|
||||
|
||||
if (START_WALK < 0) START_WALK = LenSeq
|
||||
if (STOP_WALK < 0) STOP_WALK = LenSeq
|
||||
|
||||
if (! TestPath(START_MODEL)) Error("model file: '" START_MODEL "' not found", 2)
|
||||
if (! TestPath(STOP_MODEL)) Error("model file: '" STOP_MODEL "' not found", 2)
|
||||
|
||||
ReadModel(START_MODEL, StartModel)
|
||||
ReadModel(STOP_MODEL, StopModel)
|
||||
}
|
||||
|
||||
#
|
||||
|
||||
/^c begin_entry/ {
|
||||
Strand = $7
|
||||
nbexon = $9+0
|
||||
StartExon = 1 # always first (even on - strand)
|
||||
StopExon = nbexon # always last (even on - strand)
|
||||
NbExon = 0
|
||||
}
|
||||
|
||||
/^c/ {
|
||||
print $0
|
||||
next
|
||||
}
|
||||
|
||||
/^e exon/ {
|
||||
NbExon++
|
||||
if (NbExon == StartExon) {
|
||||
pos = (Strand == "+" ? $3+0 : $4+0)
|
||||
start = UpStart(pos, Strand)
|
||||
mod_start = (start == 0 ? (Strand == "+" ? "<" : ">") : "=")
|
||||
start = (start == 0 ? pos : start)
|
||||
$(Strand == "+" ? 3 : 4) = start
|
||||
} else {
|
||||
mod_start = "="
|
||||
}
|
||||
if (NbExon == StopExon) {
|
||||
pos = (Strand == "+" ? $4+0 : $3+0)
|
||||
pos += (Strand == "+" ? 1 : -1)
|
||||
stop = DownStop(pos, Strand)
|
||||
mod_stop = (stop == 0 ? (Strand == "+" ? ">" : "<") : "=")
|
||||
last = (stop == 0 ? pos : stop)
|
||||
last += (Strand == "+" ? -1 : 1)
|
||||
stop = last + (stop == 0 ? 0 : (Strand == "+") ? 3 : -3)
|
||||
$(Strand == "+" ? 4 : 3) = stop
|
||||
} else {
|
||||
mod_stop = "="
|
||||
}
|
||||
modif = (Strand == "+" ? mod_start "" mod_stop : mod_stop "" mod_start)
|
||||
print $0, "; modifier \"" modif "\""
|
||||
next
|
||||
}
|
||||
|
||||
/^e (intron|splice|similarity)/ {
|
||||
print $0
|
||||
next
|
||||
}
|
27
detectors/cds/lib/filterbx.awk
Normal file
27
detectors/cds/lib/filterbx.awk
Normal file
@ -0,0 +1,27 @@
|
||||
#
|
||||
#
|
||||
# filter BlastX results for speedup
|
||||
#
|
||||
|
||||
BEGIN {
|
||||
if (IDMIN == "") IDMIN = 80
|
||||
if (NBMIN == "") NBMIN = 50
|
||||
if (NBMAX == "") NBMAX = 200
|
||||
}
|
||||
|
||||
/^#/ { next }
|
||||
|
||||
{
|
||||
if ((($3+0 >= IDMIN) || (HitNum <= NBMIN)) && (HitIndex[$2] == 0)) {
|
||||
HitIndex[$2] = ++HitNum
|
||||
IndexHit[HitNum] = $2
|
||||
}
|
||||
}
|
||||
|
||||
END {
|
||||
n = (HitNum > NBMAX) ? NBMAX : HitNum
|
||||
for (i = 1 ; i <= n ; i++)
|
||||
print IndexHit[i]
|
||||
}
|
||||
|
||||
|
62
detectors/cds/lib/go_filterbx.sh
Executable file
62
detectors/cds/lib/go_filterbx.sh
Executable file
@ -0,0 +1,62 @@
|
||||
#!/bin/csh -f
|
||||
#
|
||||
# filter a DB thru BlastX
|
||||
# usually to speedup further DB search
|
||||
#
|
||||
# output on stdout
|
||||
#
|
||||
# usage: go_filterbx.sh dna_fasta prot_fasta [idmin nbmin nbmax]
|
||||
#
|
||||
|
||||
setenv ORG_HOME `dirname $0`/../../..
|
||||
source $ORG_HOME/scripts/csh_init.sh
|
||||
|
||||
NeedArg 2
|
||||
|
||||
set GenoFile = $Argv[1]; Shift
|
||||
set ProtFile = $Argv[1]; Shift
|
||||
|
||||
NeedFile $GenoFile
|
||||
NeedFile $ProtFile
|
||||
|
||||
set IDMIN = 70
|
||||
set NBMIN = 50
|
||||
set NBMAX = 200
|
||||
|
||||
if ($#Argv >= 1) set IDMIN = $Argv[1]
|
||||
if ($#Argv >= 2) set NBMIN = $Argv[2]
|
||||
if ($#Argv >= 3) set NBMAX = $Argv[3]
|
||||
|
||||
#
|
||||
# format ProtFile
|
||||
#
|
||||
|
||||
if (! -e $ProtFile.phr) then
|
||||
Notify " formatting $ProtFile"
|
||||
(makeblastdb -dbtype prot -in $ProtFile) >& /dev/null
|
||||
CheckAbort 10 "makeblastdb failure"
|
||||
endif
|
||||
|
||||
#
|
||||
# blastx
|
||||
#
|
||||
|
||||
Notify " blasting $ProtFile"
|
||||
blastx -query $GenoFile -db $ProtFile -outfmt 7 |\
|
||||
$AwkCmd -v IDMIN=$IDMIN \
|
||||
-v NBMIN=$NBMIN \
|
||||
-v NBMAX=$NBMAX \
|
||||
-f $LIB_DIR/filterbx.awk > T_$$
|
||||
#
|
||||
# extract subdb
|
||||
#
|
||||
|
||||
$AwkCmd -v FILE=T_$$ -f $LIB_DIR/subdb.awk $ProtFile
|
||||
|
||||
#
|
||||
# end
|
||||
#
|
||||
|
||||
(\rm -f ?_$$) >> /dev/null
|
||||
|
||||
Exit 0
|
197
detectors/cds/lib/go_pass1.sh
Executable file
197
detectors/cds/lib/go_pass1.sh
Executable file
@ -0,0 +1,197 @@
|
||||
#!/bin/csh -f
|
||||
#
|
||||
# Annotate CDS - Pass1
|
||||
#
|
||||
#========================================================================================
|
||||
#
|
||||
# Annotate CDS of chlorodb/core proteins using exonerate
|
||||
#
|
||||
# pass1.sh <FASTAFILE> <FAMILY> [<OUTDIR>]
|
||||
#
|
||||
# - <FASTAFILE> : The fasta file containing the genome to annotate
|
||||
# - <FAMILY> : Name of the protein family (defined in chlorodb/core)
|
||||
#
|
||||
# Results are in file : `basename <FASTAFILE>:r`.<FAMILY>.res
|
||||
#
|
||||
#========================================================================================
|
||||
#
|
||||
# usage: go_pass1.sh fasta family [outdir]
|
||||
#
|
||||
|
||||
setenv ORG_HOME `dirname $0`/../../..
|
||||
source $ORG_HOME/scripts/csh_init.sh
|
||||
|
||||
set PARAMS_DIR = $LIB_DIR/../params
|
||||
set MODELS_DIR = $LIB_DIR/../models
|
||||
|
||||
alias Override 'if (-e \!:2) set \!:1 = \!:2'
|
||||
|
||||
NeedArg 2
|
||||
|
||||
set GenoFile = $Argv[1]
|
||||
set GenoName = `basename $GenoFile:r`
|
||||
set ProtName = $Argv[2]
|
||||
set ProtDir = $CDS_DATA_DIR/chlorodb/core
|
||||
set ProtFile = $ProtDir/$ProtName.fst
|
||||
|
||||
NeedFile $GenoFile
|
||||
NeedFile $ProtFile
|
||||
|
||||
set OutDir = .
|
||||
if ($#Argv >= 3) set OutDir = $3
|
||||
if (! -d $OutDir) mkdir $OutDir
|
||||
|
||||
#
|
||||
# general parameters
|
||||
#
|
||||
|
||||
source $PARAMS_DIR/default
|
||||
|
||||
#
|
||||
# family specific parameters
|
||||
#
|
||||
|
||||
if (-e $PARAMS_DIR/$ProtName) then
|
||||
source $PARAMS_DIR/$ProtName
|
||||
endif
|
||||
|
||||
#
|
||||
# start/stop/splice models
|
||||
#
|
||||
|
||||
if ($?STARTMODEL == 0) then
|
||||
set STARTMODEL = $MODELS_DIR/start.default.frq
|
||||
Override STARTMODEL $MODELS_DIR/start.$ProtName.frq
|
||||
endif
|
||||
|
||||
if ($?STOPMODEL == 0) then
|
||||
set STOPMODEL = $MODELS_DIR/stop.default.frq
|
||||
Override STOPMODEL $MODELS_DIR/stop.$ProtName.frq
|
||||
endif
|
||||
|
||||
if ($?SPLICE3MODEL == 0) then
|
||||
set SPLICE3MODEL = $MODELS_DIR/splice3.default.frq
|
||||
Override SPLICE3MODEL $MODELS_DIR/splice3.$ProtName.frq
|
||||
endif
|
||||
|
||||
if ($?SPLICE5MODEL == 0) then
|
||||
set SPLICE5MODEL = $MODELS_DIR/splice5.default.frq
|
||||
Override SPLICE5MODEL $MODELS_DIR/splice5.$ProtName.frq
|
||||
endif
|
||||
|
||||
#
|
||||
# out files prefix
|
||||
#
|
||||
|
||||
set base = $OutDir/$GenoName.$ProtName
|
||||
|
||||
#
|
||||
# skip exonerate calculations if already done
|
||||
#
|
||||
|
||||
if (-e $base.exo.raw) then
|
||||
Notify " file $base.exo.raw found <exonerate skipped>"
|
||||
goto parse
|
||||
endif
|
||||
|
||||
#
|
||||
# speedup exonerate
|
||||
#
|
||||
|
||||
if ($PASS1_SPEEDUP != 0) then
|
||||
|
||||
$LIB_DIR/go_filterbx.sh $GenoFile $ProtFile \
|
||||
$PASS1_BLASTX_FILTER_IDMIN \
|
||||
$PASS1_BLASTX_FILTER_NBMIN \
|
||||
$PASS1_BLASTX_FILTER_NBMAX > D_$$
|
||||
|
||||
set n = `egrep "^>" D_$$ | wc -l`
|
||||
if ($n > 0) then
|
||||
Notify " $n sequences kept"
|
||||
set DbFile = D_$$
|
||||
else
|
||||
Notify " no sequence match"
|
||||
if ($PASS1_SLOWDOWN != 0) then
|
||||
Notify " reverting to original $ProtName"
|
||||
set DbFile = $ProtFile
|
||||
else
|
||||
echo "" > $base.exo.raw
|
||||
goto parse
|
||||
endif
|
||||
endif
|
||||
else
|
||||
set DbFile = $ProtFile
|
||||
endif
|
||||
|
||||
#
|
||||
# run exonerate
|
||||
#
|
||||
|
||||
Notify " running exonerate of $GenoName on $ProtName"
|
||||
exonerate --model protein2genome \
|
||||
--percent $PASS1_PERCENT \
|
||||
--showalignment TRUE \
|
||||
--showvulgar TRUE \
|
||||
--showtargetgff TRUE \
|
||||
--geneticcode $PASS1_GENETIC_CODE \
|
||||
--minintron $PASS1_MIN_INTRON \
|
||||
--maxintron $PASS1_MAX_INTRON \
|
||||
--bestn $PASS1_BESTN \
|
||||
--frameshift $PASS1_FRAMESHIFT \
|
||||
--proteinsubmat $PASS1_SUBMAT \
|
||||
--splice3 $SPLICE3MODEL \
|
||||
--splice5 $SPLICE5MODEL \
|
||||
$DbFile $GenoFile > $base.exo.raw
|
||||
CheckAbort 20 "exonerate failure"
|
||||
|
||||
#
|
||||
# extract best clusters
|
||||
#
|
||||
parse:
|
||||
|
||||
$AwkCmd -v MAX_SPAN=$PASS1_MAX_SPAN \
|
||||
-v ALLOW_STOP=$PASS1_ALLOW_STOP \
|
||||
-v EXCLUDE=$GenoName \
|
||||
-f $LIB_DIR/bestclust.awk $base.exo.raw > $base.exo.best
|
||||
|
||||
#
|
||||
# get annotations
|
||||
#
|
||||
|
||||
egrep "^$ProtName " $CDS_DATA_DIR/chlorodb/core/Annot.lst |\
|
||||
awk '{print "c annot", $0}' > T_$$
|
||||
|
||||
#
|
||||
# extend start/stop
|
||||
#
|
||||
|
||||
$AwkCmd -f $LIB_DIR/libutil.awk -f $LIB_DIR/extend.awk \
|
||||
-v FASTA=$GenoFile \
|
||||
-v START_MODEL=$STARTMODEL \
|
||||
-v STOP_MODEL=$STOPMODEL \
|
||||
-v START_WALK=$PASS1_START_WALK \
|
||||
-v STOP_WALK=$PASS1_STOP_WALK \
|
||||
$base.exo.best >> T_$$
|
||||
#
|
||||
# translate
|
||||
#
|
||||
|
||||
$AwkCmd -v FASTA=$GenoFile -f $LIB_DIR/libutil.awk \
|
||||
-f $LIB_DIR/translate.awk T_$$ > $base.iff
|
||||
|
||||
#
|
||||
# convert to embl
|
||||
#
|
||||
|
||||
$AwkCmd -f $LIB_DIR/toEmbl.awk $base.iff |\
|
||||
$AwkCmd -f $LIB_DIR/cutline.awk > $base.res
|
||||
|
||||
#
|
||||
# end
|
||||
#
|
||||
|
||||
Notify " output file: $base.res"
|
||||
|
||||
(\rm -f ?_$$) >> /dev/null
|
||||
|
||||
Exit 0
|
179
detectors/cds/lib/libutil.awk
Normal file
179
detectors/cds/lib/libutil.awk
Normal file
@ -0,0 +1,179 @@
|
||||
#
|
||||
# utilities library
|
||||
#
|
||||
|
||||
END {
|
||||
if (_ForceExit_) exit(_ForceExit_)
|
||||
}
|
||||
|
||||
function Notify(key, msg) {
|
||||
print "# " key " " msg >> "/dev/stderr"
|
||||
}
|
||||
|
||||
function Info(msg) {
|
||||
Notify("info", msg)
|
||||
return 1
|
||||
}
|
||||
|
||||
function Warning(msg) {
|
||||
Notify("warning", msg)
|
||||
return 0
|
||||
}
|
||||
|
||||
function Exit(status) {
|
||||
exit(_ForceExit_ = status)
|
||||
}
|
||||
|
||||
function Error(msg, status) {
|
||||
Notify("error", msg)
|
||||
Exit(status ? status : 1)
|
||||
}
|
||||
|
||||
function Assert(condition, msg, status) {
|
||||
if (! condition) {
|
||||
msg = FILENAME ":" FNR ": " msg
|
||||
return status ? Error(msg, status) : Warning(msg)
|
||||
}
|
||||
}
|
||||
|
||||
#
|
||||
|
||||
function Min(a, b) {
|
||||
return (a < b ? a : b)
|
||||
}
|
||||
|
||||
function Max(a, b) {
|
||||
return (a > b ? a : b)
|
||||
}
|
||||
|
||||
#
|
||||
|
||||
function Trim(s, regexp) {
|
||||
if (regexp == 0) regexp = "[ \t]+"
|
||||
gsub("^" regexp "|" regexp "$", "", s)
|
||||
return s
|
||||
}
|
||||
|
||||
function ShieldPath(path) {
|
||||
gsub(" ", "\\ ", path)
|
||||
return path
|
||||
}
|
||||
|
||||
function TestPath(path, test, _local_, stat) {
|
||||
if (test == 0) test = "-f"
|
||||
if (Trim(path) == "")
|
||||
return 0 # because of a bug in 'test'
|
||||
stat = system("test " test " " ShieldPath(path))
|
||||
return stat ? 0 : 1
|
||||
}
|
||||
|
||||
#
|
||||
|
||||
function Reverse(s, _local_, i, n, rs) {
|
||||
rs = "";
|
||||
n = length(s);
|
||||
for (i = n ; i >= 1 ; i--)
|
||||
rs = rs "" substr(s, i, 1)
|
||||
return rs;
|
||||
}
|
||||
|
||||
function RevComplement(seq, _local_, n, i, c, rs) {
|
||||
n = length(seq)
|
||||
rs = ""
|
||||
for (i = n ; i >= 1 ; i--) {
|
||||
c = substr(seq, i, 1)
|
||||
rs = rs "" (_DnaC[c] ? _DnaC[c] : "X")
|
||||
}
|
||||
return rs
|
||||
}
|
||||
|
||||
#
|
||||
|
||||
function _AssertCode(name, _local_, n, i1, i2, i3, b1, b2, b3) {
|
||||
if (_InitCod[name] != 0) return 1
|
||||
for (i1 = 1 ; i1 <= 4 ; i1++) {
|
||||
b1 = substr(_NucOrder, i1, 1)
|
||||
for (i2 = 1 ; i2 <= 4 ; i2++) {
|
||||
b2 = substr(_NucOrder, i2, 1)
|
||||
for (i3 = 1 ; i3 <= 4 ; i3++) {
|
||||
b3 = substr(_NucOrder, i3, 1)
|
||||
_GenCod[name][b1 "" b2 "" b3] = substr(_Cod2Aa[name], ++n, 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
_InitCod[name] = 1
|
||||
return 1
|
||||
}
|
||||
|
||||
function Translate(seq, codname, _local_, n, i, c, p) {
|
||||
if (codname == 0) codname = "universal"
|
||||
_AssertCode(codname)
|
||||
seq = toupper(seq)
|
||||
n = length(seq)
|
||||
p = ""
|
||||
for (i = 1 ; i <= n ; i += 3) {
|
||||
c = substr(seq, i, 3)
|
||||
p = p "" ((c in _GenCod[codname]) ? _GenCod[codname][c] : "X")
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
#
|
||||
|
||||
function SubSeq(seq, from, to, revstrand) {
|
||||
seq = substr(seq, from, to-from+1)
|
||||
if (revstrand) seq = RevComplement(seq)
|
||||
return seq
|
||||
}
|
||||
|
||||
#
|
||||
|
||||
function ReadFasta(file, _local_, seq, context) {
|
||||
context = $0
|
||||
seq = ""
|
||||
getline < file
|
||||
while(getline < file) seq = seq "" $0
|
||||
$0 = context
|
||||
return seq
|
||||
}
|
||||
|
||||
#
|
||||
|
||||
function ReadModel(file, a, _local_, line, context) {
|
||||
context = $0
|
||||
delete a
|
||||
while(getline < file)
|
||||
if (! ($0 ~ "^#")) a[$1] = $2
|
||||
$0 = context
|
||||
}
|
||||
|
||||
#
|
||||
# constants
|
||||
#
|
||||
|
||||
BEGIN {
|
||||
# complementary of _IupacDna
|
||||
_DnaC["A"] = "T"; _DnaC["C"] = "G"; _DnaC["G"] = "C"; _DnaC["T"] = "A"
|
||||
_DnaC["R"] = "Y"; _DnaC["Y"] = "R"; _DnaC["M"] = "K"; _DnaC["K"] = "M"
|
||||
_DnaC["W"] = "W"; _DnaC["S"] = "S"; _DnaC["B"] = "V"; _DnaC["V"] = "B"
|
||||
_DnaC["D"] = "H"; _DnaC["H"] = "D"; _DnaC["N"] = "N"; _DnaC["X"] = "X"
|
||||
_DnaC["a"] = "t"; _DnaC["c"] = "g"; _DnaC["g"] = "c"; _DnaC["t"] = "a"
|
||||
_DnaC["r"] = "y"; _DnaC["y"] = "r"; _DnaC["m"] = "k"; _DnaC["k"] = "m"
|
||||
_DnaC["w"] = "w"; _DnaC["s"] = "s"; _DnaC["b"] = "v"; _DnaC["v"] = "b"
|
||||
_DnaC["d"] = "h"; _DnaC["h"] = "d"; _DnaC["n"] = "n"; _DnaC["x"] = "x"
|
||||
|
||||
# genetic codes
|
||||
_NucOrder = "ACGT"
|
||||
#1 AAAAAAAAAAAAAAAACCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGGTTTTTTTTTTTTTTTT
|
||||
#2 AAAACCCCGGGGTTTTAAAACCCCGGGGTTTTAAAACCCCGGGGTTTTAAAACCCCGGGGTTTT
|
||||
#3 ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
|
||||
_Cod2Aa["universal"] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSS*CWCLFLF"
|
||||
_Cod2Aa["mito-yeast"] = "KNKNTTTTRSRSMIMIQHQHPPPPRRRRTTTTEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF"
|
||||
_Cod2Aa["mito-vertebrates"] = "KNKNTTTT*S*SMIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF"
|
||||
_Cod2Aa["mito-insects"] = "KNKNTTTTSSSSMIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF"
|
||||
_Cod2Aa["mito-echinoderms"] = "NNKNTTTTSSSSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF"
|
||||
_Cod2Aa["mycoplasma"] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF"
|
||||
_Cod2Aa["ciliata"] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVVQYQYSSSS*CWCLFLF"
|
||||
_Cod2Aa["euplotes"] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSCCWCLFLF"
|
||||
_Cod2Aa["candida"] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRXLSLEDEDAAAAGGGGVVVV*Y*YSSSS*CWCLFLF"
|
||||
}
|
20
detectors/cds/lib/subdb.awk
Normal file
20
detectors/cds/lib/subdb.awk
Normal file
@ -0,0 +1,20 @@
|
||||
#
|
||||
# select subDB from fasta DB
|
||||
#
|
||||
# -v FILE
|
||||
|
||||
BEGIN {
|
||||
if (FILE == "") FILE = "dbsel.txt"
|
||||
while (getline < FILE)
|
||||
INC[$1] = $1
|
||||
close(FILE)
|
||||
}
|
||||
|
||||
/^>/ {
|
||||
name = substr($1, 2)
|
||||
ok = name in INC
|
||||
}
|
||||
|
||||
ok {
|
||||
print $0
|
||||
}
|
141
detectors/cds/lib/toEmbl.awk
Normal file
141
detectors/cds/lib/toEmbl.awk
Normal file
@ -0,0 +1,141 @@
|
||||
#
|
||||
# iff -> embl
|
||||
#
|
||||
|
||||
function FromLoc(i, _local_, s) {
|
||||
s = substr(Exon[i]["modif"], 1, 1) "" Exon[i]["from"]
|
||||
gsub("=", "", s)
|
||||
return s
|
||||
}
|
||||
|
||||
function ToLoc(i, _local_, s) {
|
||||
s = substr(Exon[i]["modif"], 2, 1) "" Exon[i]["to"]
|
||||
gsub("=", "", s)
|
||||
return s
|
||||
}
|
||||
|
||||
function GeneLocation(_local_, d, s) {
|
||||
d = Exon[1]["strand"]
|
||||
if (d == "+")
|
||||
s = FromLoc(1) ".." ToLoc(Nexon)
|
||||
else
|
||||
s = FromLoc(Nexon) ".." ToLoc(1)
|
||||
if (d == "-") s = "complement(" s ")"
|
||||
return s
|
||||
}
|
||||
|
||||
function CdsLocation(_local_, d, i, s) {
|
||||
d = Exon[1]["strand"]
|
||||
if (d == "+") {
|
||||
for (i = 1 ; i <= Nexon ; i++)
|
||||
s = s "," FromLoc(i) ".." ToLoc(i)
|
||||
}
|
||||
else {
|
||||
for (i = Nexon ; i >= 1 ; i--)
|
||||
s = s "," FromLoc(i) ".." ToLoc(i)
|
||||
}
|
||||
s = substr(s, 2)
|
||||
if (Nexon > 1) s = "join(" s ")"
|
||||
if (d == "-") s = "complement(" s ")"
|
||||
return s
|
||||
}
|
||||
|
||||
function ExonLocation(i, _local_, s) {
|
||||
s = FromLoc(i) ".." ToLoc(i)
|
||||
if (Exon[i]["strand"] == "-") s = "complement(" s ")"
|
||||
return s
|
||||
}
|
||||
|
||||
function Pad(s, len) {
|
||||
while (length(s) < len)
|
||||
s = s " "
|
||||
return s
|
||||
}
|
||||
|
||||
function Feature(feat, loc, _local_, s) {
|
||||
s = Pad("FT " feat, 21)
|
||||
print s "" loc
|
||||
}
|
||||
|
||||
function SQualifier(qual, val, _local_, s) {
|
||||
s = Pad("FT", 21)
|
||||
print s "/" qual "=" val
|
||||
}
|
||||
|
||||
function QQualifier(qual, val) {
|
||||
SQualifier(qual, "\"" val "\"")
|
||||
}
|
||||
|
||||
#
|
||||
# rules
|
||||
#
|
||||
|
||||
/^c annot/ {
|
||||
GeneName = $4
|
||||
Product = $NF
|
||||
gsub("_", " ", Product)
|
||||
next
|
||||
}
|
||||
|
||||
/^c nclust/ {
|
||||
Ngene = $3
|
||||
next
|
||||
}
|
||||
|
||||
/^c begin_entry/ {
|
||||
Nexon = 0
|
||||
delete Exon
|
||||
next
|
||||
}
|
||||
|
||||
/^e exon/ {
|
||||
Nexon++
|
||||
Exon[Nexon]["from"] = $3
|
||||
Exon[Nexon]["to"] = $4
|
||||
Exon[Nexon]["strand"] = $6
|
||||
Exon[Nexon]["indels"] = $9 "+" $12
|
||||
modif = $15; gsub("\"", "", modif)
|
||||
Exon[Nexon]["modif"] = modif
|
||||
next
|
||||
}
|
||||
|
||||
/^e similarity/ {
|
||||
split($12, a, "@")
|
||||
Simil = a[1] ":" a[2]
|
||||
next
|
||||
}
|
||||
|
||||
/^e translate/ {
|
||||
Translat = $3
|
||||
next
|
||||
}
|
||||
|
||||
/^c end_entry/ {
|
||||
|
||||
gname = (Ngene == 1 ? GeneName : GeneName "_" ++Igene)
|
||||
locus = ""
|
||||
|
||||
Feature("gene", GeneLocation())
|
||||
QQualifier("gene", gname)
|
||||
QQualifier("locus_tag", locus)
|
||||
|
||||
Feature("CDS", CdsLocation())
|
||||
SQualifier("codon_start", 1)
|
||||
SQualifier("transl_table", 11)
|
||||
QQualifier("gene", gname)
|
||||
QQualifier("locus_tag", locus)
|
||||
SQualifier("product", Product)
|
||||
QQualifier("inference", "similar to DNA sequence:" Simil)
|
||||
QQualifier("translation", Translat)
|
||||
|
||||
if (Nexon > 1) {
|
||||
for (i = 1 ; i <= Nexon ; i++) {
|
||||
Feature("exon", ExonLocation(i))
|
||||
QQualifier("gene", gname)
|
||||
QQualifier("locus_tag", locus)
|
||||
SQualifier("number", Exon[1]["strand"] == "+" ? i : Nexon-i+1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
39
detectors/cds/lib/translate.awk
Normal file
39
detectors/cds/lib/translate.awk
Normal file
@ -0,0 +1,39 @@
|
||||
#
|
||||
# translate CDSs from iff file
|
||||
#
|
||||
|
||||
BEGIN {
|
||||
Seq = ReadFasta(FASTA)
|
||||
}
|
||||
|
||||
/^c end_entry/ {
|
||||
if (RevStrand) Cds = RevComplement(Cds)
|
||||
Prot = Translate(substr(Cds, 1, length(Cds)-3))
|
||||
if (Modif == "=")
|
||||
Prot = "M" substr(Prot, 2)
|
||||
print "e translate " Prot
|
||||
}
|
||||
|
||||
{
|
||||
print $0
|
||||
}
|
||||
|
||||
/^c begin_entry/ {
|
||||
Cds = ""
|
||||
Iexon = 0
|
||||
next
|
||||
}
|
||||
|
||||
/^e exon/ {
|
||||
RevStrand = ($6 == "-")
|
||||
if (++Iexon == 1) { # first is exon with start (even on - strand)
|
||||
Modif = $15
|
||||
gsub("\"", "", Modif)
|
||||
Modif = (RevStrand ? substr(Modif, 2, 1) : substr(Modif, 1, 1))
|
||||
}
|
||||
if (RevStrand)
|
||||
Cds = SubSeq(Seq, $3, $4) "" Cds
|
||||
else
|
||||
Cds = Cds "" SubSeq(Seq, $3, $4)
|
||||
next
|
||||
}
|
29
detectors/cds/models/blosum62.mat
Normal file
29
detectors/cds/models/blosum62.mat
Normal file
@ -0,0 +1,29 @@
|
||||
#
|
||||
# blosum62 substitution matrix
|
||||
# with larger penalty for stops
|
||||
#
|
||||
A R N D C Q E G H I L K M F P S T W Y V B Z X *
|
||||
A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -50
|
||||
R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -50
|
||||
N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -50
|
||||
D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1 -50
|
||||
C 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -50
|
||||
Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1 -50
|
||||
E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1 -50
|
||||
G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -50
|
||||
H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1 -50
|
||||
I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1 -50
|
||||
L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1 -50
|
||||
K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1 -50
|
||||
M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1 -50
|
||||
F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1 -50
|
||||
P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2 -50
|
||||
S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0 -50
|
||||
T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0 -50
|
||||
W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2 -50
|
||||
Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1 -50
|
||||
V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1 -50
|
||||
B -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1 -50
|
||||
Z -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1 -50
|
||||
X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -50
|
||||
* -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 -50 1
|
6
detectors/cds/models/splice.none.frq
Normal file
6
detectors/cds/models/splice.none.frq
Normal file
@ -0,0 +1,6 @@
|
||||
# 3'/5' splice null model
|
||||
# A C G T
|
||||
25 25 25 25
|
||||
splice
|
||||
25 25 25 25
|
||||
# end of 3'/5' splice null model
|
13
detectors/cds/models/splice3.default.frq
Normal file
13
detectors/cds/models/splice3.default.frq
Normal file
@ -0,0 +1,13 @@
|
||||
# 3' splice model : default
|
||||
# A C G T
|
||||
19 18 27 36
|
||||
36 5 34 25
|
||||
20 7 51 23
|
||||
29 13 14 44
|
||||
splice
|
||||
5 2 78 15
|
||||
3 7 15 75
|
||||
4 1 83 12
|
||||
5 51 13 31
|
||||
5 7 79 9
|
||||
53 10 27 11
|
13
detectors/cds/models/splice3.ndha.frq
Normal file
13
detectors/cds/models/splice3.ndha.frq
Normal file
@ -0,0 +1,13 @@
|
||||
# 3' splice model : ndha
|
||||
# A C G T
|
||||
1 43 1 56
|
||||
0 57 0 42
|
||||
41 1 2 56
|
||||
57 42 0 1
|
||||
splice
|
||||
1 55 42 2
|
||||
0 1 57 42
|
||||
1 1 43 56
|
||||
2 11 56 31
|
||||
1 29 42 28
|
||||
41 1 57 1
|
13
detectors/cds/models/splice3.psba.frq
Normal file
13
detectors/cds/models/splice3.psba.frq
Normal file
@ -0,0 +1,13 @@
|
||||
# 3' splice model : psba
|
||||
# A C G T
|
||||
26 14 40 21
|
||||
13 44 35 9
|
||||
41 3 33 23
|
||||
1 13 5 81
|
||||
splice
|
||||
37 5 27 31
|
||||
62 5 9 24
|
||||
56 13 23 8
|
||||
32 28 6 33
|
||||
35 21 17 28
|
||||
53 12 10 26
|
13
detectors/cds/models/splice5.default.frq
Normal file
13
detectors/cds/models/splice5.default.frq
Normal file
@ -0,0 +1,13 @@
|
||||
# 5' splice model : default
|
||||
# A C G T
|
||||
15 42 3 40
|
||||
15 46 6 33
|
||||
13 32 2 53
|
||||
45 17 21 16
|
||||
70 13 6 11
|
||||
21 39 8 32
|
||||
splice
|
||||
37 27 9 26
|
||||
31 35 17 17
|
||||
29 23 26 22
|
||||
30 25 28 17
|
13
detectors/cds/models/splice5.ndha.frq
Normal file
13
detectors/cds/models/splice5.ndha.frq
Normal file
@ -0,0 +1,13 @@
|
||||
# 5' splice model : ndha
|
||||
# A C G T
|
||||
0 54 2 44
|
||||
43 1 1 56
|
||||
57 1 7 35
|
||||
8 1 39 52
|
||||
56 2 40 2
|
||||
55 1 0 44
|
||||
splice
|
||||
1 0 2 97
|
||||
40 1 1 58
|
||||
55 0 1 44
|
||||
1 41 1 58
|
13
detectors/cds/models/splice5.psba.frq
Normal file
13
detectors/cds/models/splice5.psba.frq
Normal file
@ -0,0 +1,13 @@
|
||||
# 5' splice model : psba
|
||||
# A C G T
|
||||
38 15 9 37
|
||||
40 5 13 42
|
||||
41 15 6 37
|
||||
32 9 15 44
|
||||
22 8 10 60
|
||||
0 9 83 8
|
||||
splice
|
||||
33 15 27 24
|
||||
31 6 26 37
|
||||
17 28 6 49
|
||||
13 15 23 49
|
8
detectors/cds/models/start.default.frq
Normal file
8
detectors/cds/models/start.default.frq
Normal file
@ -0,0 +1,8 @@
|
||||
# start model : default
|
||||
atg 0.9692592 36165
|
||||
acg 0.01173885 438
|
||||
gtg 0.007504288 280
|
||||
ata 0.003886149 145
|
||||
atc 0.00337693 126
|
||||
att 0.002840909 106
|
||||
ttg 0.001393654 52
|
7
detectors/cds/models/start.ndhd.frq
Normal file
7
detectors/cds/models/start.ndhd.frq
Normal file
@ -0,0 +1,7 @@
|
||||
# start model : ndhd
|
||||
acg 0.5066667 190
|
||||
atg 0.3493333 131
|
||||
atc 0.05066667 19
|
||||
ata 0.03466667 13
|
||||
acc 0.02933333 11
|
||||
gtg 0.02933333 11
|
3
detectors/cds/models/start.rps19.frq
Normal file
3
detectors/cds/models/start.rps19.frq
Normal file
@ -0,0 +1,3 @@
|
||||
# start model : rps19
|
||||
gtg 0.6261023 355
|
||||
atg 0.3738977 212
|
4
detectors/cds/models/stop.default.frq
Normal file
4
detectors/cds/models/stop.default.frq
Normal file
@ -0,0 +1,4 @@
|
||||
# stop model : default (freq. ignored)
|
||||
taa 0.5742367 21968
|
||||
tag 0.2351265 8995
|
||||
tga 0.1906368 7293
|
50
detectors/cds/params/default
Normal file
50
detectors/cds/params/default
Normal file
@ -0,0 +1,50 @@
|
||||
#
|
||||
# CDS Searching parameters
|
||||
#
|
||||
|
||||
#
|
||||
# general parameters
|
||||
# (may be overriden by enviroment variables)
|
||||
#
|
||||
|
||||
AssignUndef TMP_CLEANUP 1
|
||||
|
||||
#
|
||||
# pass1: exonerate speedup
|
||||
#
|
||||
AssignUndef PASS1_SPEEDUP 1
|
||||
AssignUndef PASS1_SLOWDOWN 0
|
||||
|
||||
AssignUndef PASS1_BLASTX_FILTER_IDMIN 70
|
||||
AssignUndef PASS1_BLASTX_FILTER_NBMIN 50
|
||||
AssignUndef PASS1_BLASTX_FILTER_NBMAX 200
|
||||
|
||||
#
|
||||
# pass1: exonerate parameters
|
||||
#
|
||||
AssignUndef PASS1_PERCENT 30
|
||||
AssignUndef PASS1_MIN_INTRON 30
|
||||
AssignUndef PASS1_MAX_INTRON 6000
|
||||
AssignUndef PASS1_GENETIC_CODE 11
|
||||
AssignUndef PASS1_BESTN 10
|
||||
AssignUndef PASS1_FRAMESHIFT -100
|
||||
|
||||
# pass1: exonerate use Blosum62 with larger penalty for Stops
|
||||
AssignUndef PASS1_SUBMAT $MODELS_DIR/blosum62.mat
|
||||
|
||||
# comment next lines to use default or specific splice model
|
||||
# AssignUndef SPLICE3MODEL $MODELS_DIR/splice.none.frq
|
||||
# AssignUndef SPLICE5MODEL $MODELS_DIR/splice.none.frq
|
||||
|
||||
#
|
||||
# pass1: cluster selection parameters
|
||||
#
|
||||
AssignUndef PASS1_MAX_SPAN 10000
|
||||
AssignUndef PASS1_ALLOW_STOP 0
|
||||
|
||||
#
|
||||
# extension parameters
|
||||
#
|
||||
AssignUndef PASS1_START_WALK 120
|
||||
AssignUndef PASS1_STOP_WALK -1
|
||||
|
22
detectors/cds/test/go_test.sh
Executable file
22
detectors/cds/test/go_test.sh
Executable file
@ -0,0 +1,22 @@
|
||||
#!/bin/csh -f
|
||||
|
||||
echo "+ [testing CDS]"
|
||||
|
||||
setenv PASS1_SPEEDUP 1
|
||||
setenv PASS1_SLOWDOWN 0
|
||||
setenv PASS1_BLASTX_FILTER_NBMAX 50
|
||||
|
||||
../bin/go_cds.sh test.fst > test.bak
|
||||
|
||||
diff -q test.bak test.ref >& /dev/null
|
||||
|
||||
set stat = $status
|
||||
|
||||
if ($stat == 0) then
|
||||
echo '+[0;32m CDS test Ok[m'
|
||||
\rm -r test.bak
|
||||
else
|
||||
echo '*[0;32m CDS test Failure[m'
|
||||
endif
|
||||
|
||||
exit $stat
|
1111
detectors/cds/test/test.fst
Normal file
1111
detectors/cds/test/test.fst
Normal file
File diff suppressed because it is too large
Load Diff
347
detectors/cds/test/test.ref
Normal file
347
detectors/cds/test/test.ref
Normal file
@ -0,0 +1,347 @@
|
||||
FT gene 15766..16707
|
||||
FT /gene="ccsA"
|
||||
FT /locus_tag=""
|
||||
FT CDS 15766..16707
|
||||
FT /codon_start=1
|
||||
FT /transl_table=11
|
||||
FT /gene="ccsA"
|
||||
FT /locus_tag=""
|
||||
FT /product=cytochrome c biogenesis protein
|
||||
FT /inference="similar to DNA sequence:AC_000188:LyesCp084"
|
||||
FT /translation="MIFSTLEHILTHISFSIVSIVITIHLITFLVDEIVKLYDSSEKGI
|
||||
FT IVTFFCITGLLVTRWVSSGHFPLSDLYESLIFLSWSFSLIHIIPYFKKNVLILSKITGP
|
||||
FT SAILTQGFATSGILTEIHQSGILVPALQSEWLIMHVSMMILGYAALLCGSLLSVALLVI
|
||||
FT TFRKNRKLFSKSNVFLNESFFLGENVVENTSFFCTKNYYRSQLIQQLDYWSYRVISLGF
|
||||
FT TFLTIGILSGAVWANEAWGSYWNWDPKETWAFITWIVFAIYLHTRTNRNLRGPNSAIVA
|
||||
FT SIGFLIIWICYFGVNLLGIGLHSYGSFPSTFN"
|
||||
FT gene complement(21114..>21653)
|
||||
FT /gene="ndhA_1"
|
||||
FT /locus_tag=""
|
||||
FT CDS complement(21114..>21653)
|
||||
FT /codon_start=1
|
||||
FT /transl_table=11
|
||||
FT /gene="ndhA_1"
|
||||
FT /locus_tag=""
|
||||
FT /product=NADH dehydrogenase subunit 1
|
||||
FT /inference="similar to DNA sequence:AC_000188:LyesCp085"
|
||||
FT /translation="LSNSLSTVDIVEAQSKYGFWGWNLWRQPIGFIVFLISSLAECERL
|
||||
FT PFDLPEAEEELVAGYQTEYSGIKFGLFYIASYLNLLVSSLFVTVLYLGGWNLSIPYIFV
|
||||
FT PDIFGINKGGKVFGTLIGIFITLAKTYLFLFIPIATRWTLPRLRMDQLLNLGWKFLLPI
|
||||
FT SLGNLLLTTSSQLLSL"
|
||||
FT gene complement(22772..23338)
|
||||
FT /gene="ndhA_2"
|
||||
FT /locus_tag=""
|
||||
FT CDS complement(22772..23338)
|
||||
FT /codon_start=1
|
||||
FT /transl_table=11
|
||||
FT /gene="ndhA_2"
|
||||
FT /locus_tag=""
|
||||
FT /product=NADH dehydrogenase subunit 1
|
||||
FT /inference="similar to DNA sequence:AC_000188:LyesCp085"
|
||||
FT /translation="MIIDTTEIETINSFSKLESLKEVYGIIWMLVPIVTLVLGITIGVL
|
||||
FT VIVWLEREISAGIQQRIGPEYAGPLGILQALADGTKLLLKENLIPSTGDTRLFSIGPSI
|
||||
FT AVISIFLSYSVIPFGDHLVLADLSIGVFFWIAISSIAPVGLLMSGYGSNNKYSFLGGLR
|
||||
FT AAAQSISYEIPLALCVLSISLRVIR"
|
||||
FT gene 42910..45121
|
||||
FT /gene="ndhB"
|
||||
FT /locus_tag=""
|
||||
FT CDS join(42910..43780,44367..45121)
|
||||
FT /codon_start=1
|
||||
FT /transl_table=11
|
||||
FT /gene="ndhB"
|
||||
FT /locus_tag=""
|
||||
FT /product=NADH dehydrogenase subunit 2
|
||||
FT /inference="similar to DNA sequence:NC_008535:CoarCp067"
|
||||
FT /translation="MIWHVQNENFILDSTRIFMKAFHLLLFDGSLIFPECILIFGLILL
|
||||
FT LMIDSTSDQKDIPWLYFISSTSLVMSITALLFRWREEPMISFSGNFQTNNFNEIFQFLI
|
||||
FT LLCSTLCIPLSVEYIECTEMAITEFLLFVLTATLGGMFLCGANDLITIFVAPECFSLCS
|
||||
FT YLLSGYTKKDVRSNEATMKYLLMGGASSSILVHGFSWLYGSSGGEIELQEIVNGLINTQ
|
||||
FT MYNSPGISIALIFITVGIGFKLSPAPSHQWTPDVYEGVRFVREIPTSLSISEMFGFFKT
|
||||
FT PWTCRREMLSPTPVVAFLSVTSKVAASASATRIFNIPFYFSSNEWHLLLEILAILSMIL
|
||||
FT GNLIAITQTSMKRMLAYSSIGQIGYVIIGIIVGDSNDGYASMITYMLFYISMNLGTFAC
|
||||
FT IVLFGLRTGTDNIRDYAGLYTKDPFLALSLALCLLSLGGLPPLAGFFGKLYLFWCGWQA
|
||||
FT GLYFLVLIGLLTSVVSIYYYLKIIKLLMTGRNQEITPHVRNYRRSPLRSNNSIELSMIV
|
||||
FT CVIASTIPGISMNPIIAIAQDSLF"
|
||||
FT exon 42910..43780
|
||||
FT /gene="ndhB"
|
||||
FT /locus_tag=""
|
||||
FT /number=1
|
||||
FT exon 44367..45121
|
||||
FT /gene="ndhB"
|
||||
FT /locus_tag=""
|
||||
FT /number=2
|
||||
FT gene complement(16945..18447)
|
||||
FT /gene="ndhD"
|
||||
FT /locus_tag=""
|
||||
FT CDS complement(16945..18447)
|
||||
FT /codon_start=1
|
||||
FT /transl_table=11
|
||||
FT /gene="ndhD"
|
||||
FT /locus_tag=""
|
||||
FT /product=NADH dehydrogenase subunit 4
|
||||
FT /inference="similar to DNA sequence:NC_007898:LyesC2p017"
|
||||
FT /translation="MNYFPWLTIIVVFPIFAGSLIFFLPHKGNRVIRWYTICICILELL
|
||||
FT LTTYAFCYHFQSDDPLIQLVEDYKWIDFFDFHWRLGIDGLSIGPILLTGFITTLATLAA
|
||||
FT WPVTRDSRLFHFLMLAMYSGQIGLFSSRDLLLFFIMWELELIPVYLLLAMWGGKKRLYS
|
||||
FT ATKFILYTAGGSVFLLMGVLGVALYGSNEPTLNFETSVNQSYPVVLEIIFYIGFFIAFA
|
||||
FT VKSPIIPLHTWLPDTHGEAHYSTCMLLAGILLKMGAYGLIRINMELLPHAHSIFSPWLM
|
||||
FT IIGTIQIIYAASTSLGQRNLKKRIAYSSVSHMGFIIIGISSLTDTGLNGALLQIISHGF
|
||||
FT IGAALFFLAGTTYDRIRLVYLDEMGGIAIPMPKMFTMFSSFSMASLALPGMSGFVAELI
|
||||
FT VFFGIITGQKYLLMPKLLITFVMAIGIILTPIYSLSMPRQMFYGYKLFNAPKDSFFDSG
|
||||
FT PRELFLSISIFLPVIGIGIYPDFVLSLAVDKVEVILSNFFYR"
|
||||
FT gene complement(19062..19367)
|
||||
FT /gene="ndhE"
|
||||
FT /locus_tag=""
|
||||
FT CDS complement(19062..19367)
|
||||
FT /codon_start=1
|
||||
FT /transl_table=11
|
||||
FT /gene="ndhE"
|
||||
FT /locus_tag=""
|
||||
FT /product=NADH dehydrogenase subunit 4L
|
||||
FT /inference="similar to DNA sequence:AC_000188:LyesCp080"
|
||||
FT /translation="MILEHVLVLSAYLFSIGIYGLITSRNMVRALMCLELILNAVNINF
|
||||
FT VTFSDFFDNRQLKGDIFSIFVIAIAAAEAAIGLAIVSSIYRNRKSTRINQSNLLNN"
|
||||
FT gene complement(11509..13722)
|
||||
FT /gene="ndhF"
|
||||
FT /locus_tag=""
|
||||
FT CDS complement(11509..13722)
|
||||
FT /codon_start=1
|
||||
FT /transl_table=11
|
||||
FT /gene="ndhF"
|
||||
FT /locus_tag=""
|
||||
FT /product=NADH dehydrogenase subunit 5
|
||||
FT /inference="similar to DNA sequence:AC_000188:LyesCp073"
|
||||
FT /translation="MEQTYEYAWIIPFIPLPVPMLIGAGLILFPTATKRFRRMWAFQSV
|
||||
FT LLLSIVMIFSIYLSIQQINSSSVYQYVWSWIINNDFSLDFGYLIDPLTSIMSILITTVG
|
||||
FT IMVLIYSDNYMAHDQGYLRFFAYMSFFSTSMLGLVTSSNLIQIYIFWELVGLCSYLLIG
|
||||
FT FWFTRPVAANACQKAFVTNRVGDFGLLLGILGFYWITGSFEFRDLFEIFNNLIYNNELN
|
||||
FT FLFVTLCAVLLFAGAVAKSAQFPLHVWLPDAMEGPTPISALIHAATMVAAGIFLVARLL
|
||||
FT PLFRVIPYIMYLISVIGIITVLLGATLALAQKDIKRGLAYSTMSQLGYMMLALGMGSYR
|
||||
FT SALFHLITHAYSKALLFLGSGSIIHSMETIVGYSPAKSQNMGLMGGLRKHVPITKITFL
|
||||
FT LGTLSLCGIPPLACFWSKDEILNDSWLYSPIFAIIAWATAGLTAFYMFRIYLLTFEGHL
|
||||
FT NAHFQNYGGKQKIPFYSISLWGKNGVKKNSCLLTMNNNESTYFLSKTKYPIAKNGRKMT
|
||||
FT RPFMTIAHFKHKAVSSYPYESDNTMLFPIFVLGLFTLFVGAIGIPFNQEGVNLDILSKW
|
||||
FT LAPSINLLHPKSNNSLDWNEFLKDAVVSVSIAYFGIFIASFLYKPIYSSLKNLEFINSF
|
||||
FT VKKGPKRILWDKILNGIYDWSYNRAYIDAFYTRFFVGGIRGLAEFTHFVDRRVIDGMTN
|
||||
FT GVGVISFIVGEGIKYIGGGRISSYLFLYLAYVSVFLLVYYLLF"
|
||||
FT gene complement(19591..20121)
|
||||
FT /gene="ndhG"
|
||||
FT /locus_tag=""
|
||||
FT CDS complement(19591..20121)
|
||||
FT /codon_start=1
|
||||
FT /transl_table=11
|
||||
FT /gene="ndhG"
|
||||
FT /locus_tag=""
|
||||
FT /product=NADH dehydrogenase subunit 6
|
||||
FT /inference="similar to DNA sequence:AC_000188:LyesCp077"
|
||||
FT /translation="MDLSEPIHDFLLVFLGSGLILGGLGVVLLPNPIYSAFSLGLVLVC
|
||||
FT TSLFYILSNAYFVAAAQLLIYVGAINVLIIFAVMFMNGSEYYKDFHLWTVGDGITSMVC
|
||||
FT ISLFISLITTISDTSWYGIIWTTRSNQIIEQDFLSNSQQIGIHLSTDFFLPFELISIIL
|
||||
FT LVALIGAIAVARQ"
|
||||
FT gene complement(23340..24521)
|
||||
FT /gene="ndhH"
|
||||
FT /locus_tag=""
|
||||
FT CDS complement(23340..24521)
|
||||
FT /codon_start=1
|
||||
FT /transl_table=11
|
||||
FT /gene="ndhH"
|
||||
FT /locus_tag=""
|
||||
FT /product=NADH dehydrogenase subunit 7
|
||||
FT /inference="similar to DNA sequence:AC_000188:LyesCp078"
|
||||
FT /translation="MTAPTTRKDLMIVNMGPQHPSMHGVLRLIVTLDGEDVVDCEPILG
|
||||
FT YLHRGMEKIAENRTIIQYLPYVTRWDYLATMFTEAITINGPEQLGNIQVPKRASYIRVI
|
||||
FT MLELSRIASHLLWLGPFMADIGAQTPFFYIFRERELIYDLFEAATGMRMMHNYFRIGGV
|
||||
FT AADLPYGWIDKCLDFCDYFLTGVAEYQKLITRNPIFLERVEGVGIIGRDEALNWGLSGP
|
||||
FT MLRASGIEWDLRKVDHYESYDEFDWQVQWQREGDSLARYLVRIGEMTESIKIIQQALEG
|
||||
FT IPGGPYENLEMRRFDRLKDPEWNDFEYRFISKKPSPTFELSKQELYVRVEAPKGELGIF
|
||||
FT LIGDQSVFPWRWKIRPPGFINLQILPQLVKRMKLADIMTILGSIDIIMGEVDR"
|
||||
FT gene complement(20526..21029)
|
||||
FT /gene="ndhI"
|
||||
FT /locus_tag=""
|
||||
FT CDS complement(20526..21029)
|
||||
FT /codon_start=1
|
||||
FT /transl_table=11
|
||||
FT /gene="ndhI"
|
||||
FT /locus_tag=""
|
||||
FT /product=NADH dehydrogenase subunit I
|
||||
FT /inference="similar to DNA sequence:AC_000188:LyesCp076"
|
||||
FT /translation="MLPMITEFINYGQQTIRAARYIGQGFMITLSHANRLPVTIQYPYE
|
||||
FT KLITSERFRGRIHFEFDKCIACEVCVRVCPIDLPVVDWKLETDIRKKRLLNYSIDFGIC
|
||||
FT IFCGNCVEYCPTNCLSMTEEYELSTYDRHELNYNQIALGRLPMSVIDDYTIRTISNLPQ
|
||||
FT INNE"
|
||||
FT gene complement(18565..18810)
|
||||
FT /gene="psaC"
|
||||
FT /locus_tag=""
|
||||
FT CDS complement(18565..18810)
|
||||
FT /codon_start=1
|
||||
FT /transl_table=11
|
||||
FT /gene="psaC"
|
||||
FT /locus_tag=""
|
||||
FT /product=photosystem I subunit VII
|
||||
FT /inference="similar to DNA sequence:NC_022409:CEXI_CP_p076"
|
||||
FT /translation="MSHSVKIYDTCIGCTQCVRACPTDVLEMIPWDGCKAKQIASAPRT
|
||||
FT EDCVGCKRCESACPTDFLSVRVYLWHETTRSMGLAY"
|
||||
FT gene 53817..55307
|
||||
FT /gene="rpl2"
|
||||
FT /locus_tag=""
|
||||
FT CDS join(53817..54201,54868..55307)
|
||||
FT /codon_start=1
|
||||
FT /transl_table=11
|
||||
FT /gene="rpl2"
|
||||
FT /locus_tag=""
|
||||
FT /product=ribosomal protein L2
|
||||
FT /inference="similar to DNA sequence:AC_000188:LyesCp030"
|
||||
FT /translation="MAIHLYKTSTPSTRNGTVDSQVKSNPRNNLIYGQRRCGKGRNARG
|
||||
FT IITARHRGGGHKRLYRKIDFRRNEKDIYGRIVTIEYDPNRNAYICLIHYGDGEKRYILH
|
||||
FT PRGAIIGDTIVSGTEVPIKMGNALPSTDMPLGTAIHNIEITLGKGGQLARAAGAVAKLI
|
||||
FT AKEGKSATLKLPSGEVRLISKNCSATVGQVGNVGVNQKSLGRAGSKRWLGKRPVVRGVV
|
||||
FT MNPVDHPHGGGEGRAPIGRKKPTTPWGYPALGRRSRKRNKYSDNLILRRRSK"
|
||||
FT exon 53817..54201
|
||||
FT /gene="rpl2"
|
||||
FT /locus_tag=""
|
||||
FT /number=1
|
||||
FT exon 54868..55307
|
||||
FT /gene="rpl2"
|
||||
FT /locus_tag=""
|
||||
FT /number=2
|
||||
FT gene 53517..53798
|
||||
FT /gene="rpl23"
|
||||
FT /locus_tag=""
|
||||
FT CDS 53517..53798
|
||||
FT /codon_start=1
|
||||
FT /transl_table=11
|
||||
FT /gene="rpl23"
|
||||
FT /locus_tag=""
|
||||
FT /product=ribosomal protein L23
|
||||
FT /inference="similar to DNA sequence:AC_000188:LyesCp026"
|
||||
FT /translation="MDGIKYAVFTDKSIRLLGKNQYTSNVESGSTRTEIKHWVELFFGV
|
||||
FT KVIAMNSHRLPGKSRRMGPIMGHTMHYRRMIITLQPGYSIPPLRKKRT"
|
||||
FT gene 14505..14672
|
||||
FT /gene="rpl32"
|
||||
FT /locus_tag=""
|
||||
FT CDS 14505..14672
|
||||
FT /codon_start=1
|
||||
FT /transl_table=11
|
||||
FT /gene="rpl32"
|
||||
FT /locus_tag=""
|
||||
FT /product=ribosomal protein L32
|
||||
FT /inference="similar to DNA sequence:AC_000188:LyesCp036"
|
||||
FT /translation="MAVPKKRTSTSKKRIRKNIWKRKGYWVALKAFSLAKSLSTGNSKS
|
||||
FT FFVRQTKINK"
|
||||
FT gene complement(24633..24896)
|
||||
FT /gene="rps15"
|
||||
FT /locus_tag=""
|
||||
FT CDS complement(24633..24896)
|
||||
FT /codon_start=1
|
||||
FT /transl_table=11
|
||||
FT /gene="rps15"
|
||||
FT /locus_tag=""
|
||||
FT /product=ribosomal protein S15
|
||||
FT /inference="similar to DNA sequence:AC_000188:LyesCp029"
|
||||
FT /translation="MVKNSVISVISQEEKKGSVEFQVFNFTNKIRRLTSHLELHKKDYL
|
||||
FT SQRGLKKILGKRQRLLAYLAKKNRVRYKELINRLDIRETKTR"
|
||||
FT gene 42157..42624
|
||||
FT /gene="rps7"
|
||||
FT /locus_tag=""
|
||||
FT CDS 42157..42624
|
||||
FT /codon_start=1
|
||||
FT /transl_table=11
|
||||
FT /gene="rps7"
|
||||
FT /locus_tag=""
|
||||
FT /product=ribosomal protein S7
|
||||
FT /inference="similar to DNA sequence:AC_000188:LyesCp008"
|
||||
FT /translation="MSRRGTAEKKTAKSDPIYRNRLVNMLVNRILKHGKKSLAYQIIYR
|
||||
FT AVKKIQQKTETNPLSVLRQAIRGVTPDITVKARRVGGSTHQVPIEIGSTQGKALAIRWL
|
||||
FT LAASRKRPGRNMAFKLSSELVDAAKGSGDAIRKKEETHRMAEANRAFAHFR"
|
||||
FT gene complement(25298..30973)
|
||||
FT /gene="ycf1"
|
||||
FT /locus_tag=""
|
||||
FT CDS complement(25298..30973)
|
||||
FT /codon_start=1
|
||||
FT /transl_table=11
|
||||
FT /gene="ycf1"
|
||||
FT /locus_tag=""
|
||||
FT /product=hypothetical chloroplast RF1
|
||||
FT /inference="similar to DNA sequence:AC_000188:LyesCp019"
|
||||
FT /translation="MIFQSFLLGNLVSLCMKIINSVVVVGLYYGFLTTFSIGPSYLFLL
|
||||
FT RALVMEEGTEKKVSATTGFITGQLMMFISIYYAPLHLALGRPHTITVLALPYLLFHFFW
|
||||
FT NNHKHFFDYGSTTRNSMRNLSIQCVFLNNLIFQLFNHFILPSSMLARLVNIYLFRCNNK
|
||||
FT ILFVTSGFVGWLIGHILFMKWLGLVLVWIRQNHSIRSNKYIRSNKYLVLELRNSMARIF
|
||||
FT SILLFITCVYYLGRIPSPILTKKLKEASKTEERVESEEERDVEIETASEMKGTKQEQEG
|
||||
FT STEEDPYPSPSLFSEEGWDPDKIDETEEIRVNGKDKIKDKFHSHLTETGYNNINTSNSP
|
||||
FT IYDYQDSYLNNNNTGNLENCKLQLLDKKNENQEQDLFWFQKPLVSLLFDYNRWNRPFRY
|
||||
FT IKNNRFEQAVRTEMSQYFFDTCKSDGKQKISFTYPPSLSTFWKMIKRKIPLLSLQKTLP
|
||||
FT NELDTQWVSTNKEKSNNLNKEFLNRLEILDKESLSLDILETRTRFCNDDTKKEYVPKMY
|
||||
FT DPLLNGLYRGTIKKGVSSSIINNTLLENWEKRVRLNRIHTIFLPNIDYQEFEQKAYTID
|
||||
FT KKPLSTEIDEFLTLINELGNEAKSSLNLKGLSLFSDQEQRRANSEKRTKFVKFVFNALD
|
||||
FT PNETKSGKKSIGIKEISKKVPRWSHKLITELDQQMGEFKDRASMDHQLRSRKAKRVVIF
|
||||
FT TDNKATKDAEEEVALISYSQQSDFRRGIITGSMRAQRRKTFISKLFQANVHSPLFVDRI
|
||||
FT TPLRLFSFDISELIKPILKNWTDKEGEFKILESREEQTKREEKKEKDKKEDNKRKEQAR
|
||||
FT IAIEEAWDTIPLAQIIRGYMLITQSILRKYILLPALIIAKNIGRMLFLQLPEWSEDLQE
|
||||
FT WNREMQIKCTYNGVQLSETEFPKNWLRDGIQIKILFPFCLKPWHISKLYPSRRELMKKQ
|
||||
FT KQKDDFCFLTVWGMEAELPFGSPRKRPSFFEPIFKELEKKIGKFKKKYFLTLKILKGKT
|
||||
FT KLFRKVSKETTKLFIKSIGFLKKIKKELSKVNLIVLFRFKEISESNETKKEKDYLISNQ
|
||||
FT IINESFRQIESGNWPNSSLIETKMKDLTNRTSTIKNKIERITKEKKKVTPEIDINPNKT
|
||||
FT NNIKKFESPKKIFQILKSRNTRVIWKFHYFLKLFIQRLYINLFLSIINIPRITTQLFLK
|
||||
FT STNKLIEKFISNNEINQEKINNKKKIHFMFISTIKKSLYNISKKNSHILCDLSYLSQAY
|
||||
FT VFYKLSQTQVINFSKFRSVLQYNTTSCFLKTKIKDYFKTLGIFHSELKHKKLQSYRINQ
|
||||
FT WKNWLRWHYQYDLSQIRWSRLMPKKWRTRVNQSCMAQNKNRNLNKWNSYEKDQLLHYKK
|
||||
FT ENDSELYSLSNEKDNFKKCYGYGLLAYKSINYENKSDSFFSRLPFEVQVKKNLEISYNS
|
||||
FT NTSKHNFVDMPGNLHINNYLRKGNILDRERNLDRKYFDWKIIHFSLRQKGDIEAWVKID
|
||||
FT TNSNPNTKIGINNYQIIDKIEKKGVFYLTTHQNPEKTQKNSKKFFFDWMGMNEKIFNRP
|
||||
FT ILNLEFWFFPEFVLLYNVYKIKPWIIPSKFLLFNLNTNKNVSQNKNQNFFLPSNKKIKI
|
||||
FT KNRSQEAKEPPSQRERGSDIENKGNLSPVFSKHQTDLEKDYVESDTKKGKNKKQYKSNT
|
||||
FT EAELDLFLKRYLLFQLRWNGALNQRMFENIKVYCLLLRLINPTKITISSIQRREMSLDI
|
||||
FT MLIQANLPLTDLMKKGVLIIEPIRLSVKDNGQFIMYQTIGISLIHKSKHQTNQRYREQR
|
||||
FT YVDKKNFDEFILQPQTQRINTEKTHFGLLVPENILWSRRRRELRIRSFFNSWNWNVVDR
|
||||
FT NSVFCNETNVKNWSQFLGERKPLYKDKNELIKFKFFFWPNYRLEDLACMNRYWFDTNNG
|
||||
FT SRFSILRIHMYPRLKIN"
|
||||
FT gene complement(46313..53149)
|
||||
FT /gene="ycf2"
|
||||
FT /locus_tag=""
|
||||
FT CDS complement(46313..53149)
|
||||
FT /codon_start=1
|
||||
FT /transl_table=11
|
||||
FT /gene="ycf2"
|
||||
FT /locus_tag=""
|
||||
FT /product=Ycf2
|
||||
FT /inference="similar to DNA sequence:AC_000188:LyesCp004"
|
||||
FT /translation="MRGHQFKSWIFELREILREIKNSHHFLDSWTQFNSVGSFIHIFFH
|
||||
FT QERFLKLFDPRIWSILLSRNSQGSPSNRYFTIKGVILFVVAVLIYRINNRNMVERKNLY
|
||||
FT LIGLLPIPMNSIGPRNDTLEESVGSSNINRLIVSLLYLPKGKKISESCFLNPKESTWVL
|
||||
FT PITKKCSMPESNWGSRWWRNWIGKKRDSSCKISNETVAGIEILFKEKDLKYLEFLFVYY
|
||||
FT MDDPIRKDHDWELFDRLSLRKSRNRINLNSGPLFEILVKHWISYLMSAFREKIPIEVEG
|
||||
FT FFKQQGAGSTIQSNDIEHVSHLFSRNKWAISLQNCAQFHMWQFRQDLFVSWGKNPPESD
|
||||
FT FLRNVSRENWIWLDNVWLVNKDRFFSKVQNVSSNIQYDSTRSSFVQVTDSSQLKGSSDQ
|
||||
FT SRDHLDSISNEDSEYHTLINQREIQQRKERSILWDPSFLQTERKEIESGRFPKCLSGYS
|
||||
FT SMSRLFTEREKQMINHLFPEEIEEFLGNPTRSVRSFFSDRWSELHLGSNPTERSTRDQK
|
||||
FT LLKKQQDLSFVPSRRSEKKEMVNIFKIITYLQNTVSIHPISSDPGCDMVPKDEPDMDSS
|
||||
FT NKISFLNKNPFFDLFHLFHDRNRGGYTLHYDFASEERFQEMADLFTLSITEPDLVYHKG
|
||||
FT FAFSIDSCGLDQKQFLNEARDESKKKSLLVLPPIFYEENESFSRRIRKKWVRISCGNDL
|
||||
FT EDPKPKIVVFASNNIMEAVTQYRLIRNLIQIQYSTYGYIRNVLNRFFLMNRSDRNFEYG
|
||||
FT IQRDQIGKDTLNHRTIMKYTINQYLSNLKKSQKKWFEPLILISRTERSMNRDPDAYRYK
|
||||
FT WSNGSKSFQEHLEQSVSKQKSRFQVVFDRLRINQYSIDWSEVIDKKDLSKSLRFFLSKS
|
||||
FT LLFLSKLLLFLSNSLPFFCVSFGNIPIHRSEIYIYEELKGPNDQLCNQLLESIGLQIVH
|
||||
FT LKKLKPFLLDDHDTSQKSKFLINGGTISPFLFNKIPKWMIDSFHTRNNRRKSFDNPDSY
|
||||
FT FSMIFHDQDNWLNPVKPFHRSSLISSFYKANRLRFLNNPHHFCFYWNTRFPFSVEKARI
|
||||
FT NNSDFTYGQFLNILFIRNKIFSLCVGKKKHAFWGRDTISPIESQVSNIFIPNDFPQSGD
|
||||
FT ETYNLYKSFHFPSRSDPFVRRAIYSIADISGTPLTEGQIVNFERTYCQPLSDMNLSDSE
|
||||
FT GKNLHQYLNFNSNMGLIHTPCSEKDLSSEKRKKWSLCLKKCVEKGQTYRTFQRDSAFST
|
||||
FT LSKWNLFQTYMPWFLTSTGYKYLNLIFLDTFSDLLPILSSSQKFVSIFPDIMHGSGISW
|
||||
FT RILQKKLCLPQWNLISEISSKCLHNLLLSEEMIHRNNESPLISTHLRSPNAREFLYSIL
|
||||
FT FLLLVAGYLVRTHLLFVSRASSELQTEFERVKSLMTPSSMIELRKLLDRYPTSEPNSFW
|
||||
FT LKNLFLVALEQLGDSLEEIRGSASGGNMLGPAYGVKSIRSKKKDWNINLIEIIDLIPNP
|
||||
FT INRITFSRNTRHLSHTSKEIYSLIRKRKNVNGDWIDEKIESWVANSDSIDDEEREFLVQ
|
||||
FT FSTLTTENRIDQILLSLTHSDHLSKNDSGYQMIEQPGAIYLRYLVDIHKKHLMNYEFNP
|
||||
FT SCLAERRIFLAHYQTITYSQTSCGENSFHFPSHGKPFSLRLALSPSRGILVIGSIGTGR
|
||||
FT SYLVKYLATNSYVPFITVFLNKFLDNKSKGFLLDEIDIDDSDDIDDSDNLDASDDIDRD
|
||||
FT LDTELELLTRMNGLTVDMMPEIDRFYITLQFELAKAMSPCIIWIPNIHDLDVNESNDLS
|
||||
FT LGLLVNHLSRDCERCSTRNILVIASTHIPQKVDPALIAPNKLNTCIKIRRLLIPQQRKH
|
||||
FT FFTLSYTRGFHLEKKMFHTNGFGSITMGSNARDLVALTNEVLSISITQKKSIIDTNTIR
|
||||
FT SALHRQTWDLRSQVRSVQDHGILFYQIGRAVAQNVLLSNCPIDPISIYMKKKSCNEGDS
|
||||
FT YLYKWYFELGTSMKRLTILLYLLSCSAGSVAQDLWSLSVPDEKNGITSYGLVENDSDLV
|
||||
FT HGLLEVEGALVGSSRTEKDCSQFDNDRVTLLLRPEPRNPLDMMQKGSWSILDQRFLYEK
|
||||
FT YESEFEEGEGEGALDPQEDLFNHIVWAPRIWRPWGFLFDCIERPNELGFPYWSRSFRGK
|
||||
FT RIIYDEEDELQENDSGFLQSGTMQYQTRDRSQGLFRISQFIWDPADPLFFLFKDQPPGS
|
||||
FT VFSHRELFADEEMSKGLLTSQTDPPTSLYKRWFIKNTQEKHFELLINRQRWLRTNSSLS
|
||||
FT NGSFRSNTLSESYQYLSNLFLSNGTLLDQMPKTLLRKRWLFPDEMKIGFM"
|
17
detectors/go_test.sh
Executable file
17
detectors/go_test.sh
Executable file
@ -0,0 +1,17 @@
|
||||
#!/bin/csh -f
|
||||
#
|
||||
|
||||
set dirs = ("normalize" "ir" "rrna" "trna" "cds")
|
||||
|
||||
echo -n "" > test.log
|
||||
|
||||
@ nerr = 0
|
||||
|
||||
foreach d ($dirs)
|
||||
(cd $d/test && go_test.sh) |& tee -a test.log |& egrep '^\+|\*'
|
||||
@ nerr += $status
|
||||
end
|
||||
|
||||
exit $nerr
|
||||
|
||||
|
18
detectors/ir/test/go_test.sh
Executable file
18
detectors/ir/test/go_test.sh
Executable file
@ -0,0 +1,18 @@
|
||||
#!/bin/csh -f
|
||||
|
||||
echo "+ [testing IR]"
|
||||
|
||||
../bin/go_ir.sh test.fst > test.bak
|
||||
|
||||
diff -q test.bak test.ref >& /dev/null
|
||||
|
||||
set stat = $status
|
||||
|
||||
if ($stat == 0) then
|
||||
echo '+[0;32m IR test Ok[m'
|
||||
\rm -r test.bak
|
||||
else
|
||||
echo '+[0;31m IR test Failure[m'
|
||||
endif
|
||||
|
||||
exit $stat
|
2593
detectors/ir/test/test.fst
Normal file
2593
detectors/ir/test/test.fst
Normal file
File diff suppressed because it is too large
Load Diff
10
detectors/ir/test/test.ref
Normal file
10
detectors/ir/test/test.ref
Normal file
@ -0,0 +1,10 @@
|
||||
FT misc_feature 1..85882
|
||||
FT /note="large single copy region (LSC)"
|
||||
FT repeat_region 85883..111490
|
||||
FT /rpt_type=INVERTED
|
||||
FT /note="left inverted repeat B; IRB"
|
||||
FT misc_feature 111491..129853
|
||||
FT /note="small single copy region (SSC)"
|
||||
FT repeat_region 129854..155461
|
||||
FT /rpt_type=INVERTED
|
||||
FT /note="left inverted repeat A; IRA"
|
18
detectors/normalize/test/go_test.sh
Executable file
18
detectors/normalize/test/go_test.sh
Executable file
@ -0,0 +1,18 @@
|
||||
#!/bin/csh -f
|
||||
|
||||
echo "+ [testing Normalize]"
|
||||
|
||||
../bin/go_normalize.sh test.fst > test.bak
|
||||
|
||||
diff -q test.bak test.ref >& /dev/null
|
||||
|
||||
set stat = $status
|
||||
|
||||
if ($stat == 0) then
|
||||
echo '+[0;32m Normalize test Ok[m'
|
||||
\rm -r test.bak
|
||||
else
|
||||
echo '+[0;31m Normalize test Failure[m'
|
||||
endif
|
||||
|
||||
exit $stat
|
2593
detectors/normalize/test/test.fst
Normal file
2593
detectors/normalize/test/test.fst
Normal file
File diff suppressed because it is too large
Load Diff
2593
detectors/normalize/test/test.ref
Normal file
2593
detectors/normalize/test/test.ref
Normal file
File diff suppressed because it is too large
Load Diff
18
detectors/rrna/test/go_test.sh
Executable file
18
detectors/rrna/test/go_test.sh
Executable file
@ -0,0 +1,18 @@
|
||||
#!/bin/csh -f
|
||||
|
||||
echo "+ [testing rRNA]"
|
||||
|
||||
../bin/go_rrna.sh test.fst > test.bak
|
||||
|
||||
diff -q test.bak test.ref >& /dev/null
|
||||
|
||||
set stat = $status
|
||||
|
||||
if ($stat == 0) then
|
||||
echo '+[0;32m rRNA test Ok[m'
|
||||
\rm -r test.bak
|
||||
else
|
||||
echo '+[0;31m rRNA test Failure[m'
|
||||
endif
|
||||
|
||||
exit $stat
|
1111
detectors/rrna/test/test.fst
Normal file
1111
detectors/rrna/test/test.fst
Normal file
File diff suppressed because it is too large
Load Diff
24
detectors/rrna/test/test.ref
Normal file
24
detectors/rrna/test/test.ref
Normal file
@ -0,0 +1,24 @@
|
||||
FT rRNA 1969..3468
|
||||
FT /gene="rrn16S"
|
||||
FT /product="16S ribosomal RNA"
|
||||
FT rRNA complement(37878..39377)
|
||||
FT /gene="rrn16S"
|
||||
FT /product="16S ribosomal RNA"
|
||||
FT rRNA 5664..8473
|
||||
FT /gene="rrn23S"
|
||||
FT /product="23S ribosomal RNA"
|
||||
FT rRNA complement(32873..35682)
|
||||
FT /gene="rrn23S"
|
||||
FT /product="23S ribosomal RNA"
|
||||
FT rRNA 8575..8677
|
||||
FT /gene="rrn4.5S"
|
||||
FT /product="4.5S ribosomal RNA"
|
||||
FT rRNA complement(32669..32771)
|
||||
FT /gene="rrn4.5S"
|
||||
FT /product="4.5S ribosomal RNA"
|
||||
FT rRNA 8934..9054
|
||||
FT /gene="rrn5S"
|
||||
FT /product="5S ribosomal RNA"
|
||||
FT rRNA complement(32292..32412)
|
||||
FT /gene="rrn5S"
|
||||
FT /product="5S ribosomal RNA"
|
18
detectors/trna/test/go_test.sh
Executable file
18
detectors/trna/test/go_test.sh
Executable file
@ -0,0 +1,18 @@
|
||||
#!/bin/csh -f
|
||||
|
||||
echo "+ [testing tRNA]"
|
||||
|
||||
../bin/go_trna.sh test.fst > test.bak
|
||||
|
||||
diff -q test.bak test.ref >& /dev/null
|
||||
|
||||
set stat = $status
|
||||
|
||||
if ($stat == 0) then
|
||||
echo '+[0;32m tRNA test Ok[m'
|
||||
\rm -r test.bak
|
||||
else
|
||||
echo '+[0;31m tRNA test Failure[m'
|
||||
endif
|
||||
|
||||
exit $stat
|
1111
detectors/trna/test/test.fst
Normal file
1111
detectors/trna/test/test.fst
Normal file
File diff suppressed because it is too large
Load Diff
52
detectors/trna/test/test.ref
Normal file
52
detectors/trna/test/test.ref
Normal file
@ -0,0 +1,52 @@
|
||||
FT tRNA 1670..1741
|
||||
FT /gene="trnV"
|
||||
FT /anticodon="GAC"
|
||||
FT /product="tRNA-Val(GAC)"
|
||||
FT tRNA join(3769..3767,3802..4562)
|
||||
FT /gene="trnE"
|
||||
FT /anticodon="UUC"
|
||||
FT /product="tRNA-Glu(UUC)"
|
||||
FT tRNA join(4627..4625,4665..5510)
|
||||
FT /gene="trnA"
|
||||
FT /anticodon="UGC"
|
||||
FT /product="tRNA-Ala(UGC)"
|
||||
FT tRNA 9317..9391
|
||||
FT /gene="trnR"
|
||||
FT /anticodon="ACG"
|
||||
FT /product="tRNA-Arg(ACG)"
|
||||
FT tRNA complement(9972..10045)
|
||||
FT /gene="trnN"
|
||||
FT /anticodon="GUU"
|
||||
FT /product="tRNA-Asn(GUU)"
|
||||
FT tRNA 15583..15662
|
||||
FT /gene="trnL"
|
||||
FT /anticodon="UAG"
|
||||
FT /product="tRNA-Leu(UAG)"
|
||||
FT tRNA 31301..31374
|
||||
FT /gene="trnN"
|
||||
FT /anticodon="GUU"
|
||||
FT /product="tRNA-Asn(GUU)"
|
||||
FT tRNA complement(31955..32029)
|
||||
FT /gene="trnR"
|
||||
FT /anticodon="ACG"
|
||||
FT /product="tRNA-Arg(ACG)"
|
||||
FT tRNA complement(join(35836..35834,35874..36719))
|
||||
FT /gene="trnA"
|
||||
FT /anticodon="UGC"
|
||||
FT /product="tRNA-Ala(UGC)"
|
||||
FT tRNA complement(join(36784..36782,36817..37577))
|
||||
FT /gene="trnE"
|
||||
FT /anticodon="UUC"
|
||||
FT /product="tRNA-Glu(UUC)"
|
||||
FT tRNA complement(39605..39676)
|
||||
FT /gene="trnV"
|
||||
FT /anticodon="GAC"
|
||||
FT /product="tRNA-Val(GAC)"
|
||||
FT tRNA 45660..45742
|
||||
FT /gene="trnL"
|
||||
FT /anticodon="CAA"
|
||||
FT /product="tRNA-Leu(CAA)"
|
||||
FT tRNA 53278..53352
|
||||
FT /gene="trnI"
|
||||
FT /anticodon="CAU"
|
||||
FT /product="tRNA-Ile(CAU)"
|
Reference in New Issue
Block a user