First version of the tRNA detector and of the global organnot.sh script

Former-commit-id: f2a75cf99b24875c90c426c2afb22a75b972bf60
Former-commit-id: 65e3dfb35df06ca69bb29b690c9a40e8940ac6bf
This commit is contained in:
2015-10-11 10:39:59 -03:00
parent 6015339839
commit c32f7cdde6
9 changed files with 175 additions and 133 deletions

View File

@ -61,4 +61,4 @@ pushTmpDir ORG.ir
popTmpDir
exit 0
exit 0

View File

@ -27,12 +27,14 @@
SCRIPT_DIR="$(dirname ${BASH_SOURCE[0]})"
source ${SCRIPT_DIR}/../lib/lookforIR.lib.sh
ORG_DEBUG=1
pushTmpDir ORG.normalize
tmpfasta1="tmp_$$_1.fasta"
tmpfasta2="tmp_$$_2.fasta"
logdebug "Running on : $QUERY"
loginfo "Computing the genome size..."
genome_length=$(seqlength $QUERY)

View File

@ -47,7 +47,9 @@ function lookForIR {
}
SCDB="${IR_DATA_DIR}/SC_RefDB"
QUERY="${CALL_DIR}/$1"
openLogFile "${QUERY/.*/}.log"
if [[ ! "$1" =~ ^/ ]]; then
QUERY="${CALL_DIR}/$1"
else
QUERY="$1"
fi

View File

@ -31,7 +31,7 @@ pushTmpDir ORG.buildSCDB
loginfo "Building LSC coorientation graph..."
${PROG_DIR}/coorienteSC.sh LSC.fasta 20000 ${LOGFILE} > LSC.tgf
${PROG_DIR}/coorienteSC.sh LSC.fasta 20000 ${ORG_LOGFILE} > LSC.tgf
${PROG_DIR}/cc.py LSC.tgf > LSC.cc
loginfo " --> $(awk '{print $1}' LSC.cc | uniq | wc -l) connected componants"
loginfo "Done"
@ -73,7 +73,7 @@ pushTmpDir ORG.buildSCDB
loginfo "Checking LCS homogeneity..."
${PROG_DIR}/coorienteSC.sh LSC.direct.fasta 20000 ${LOGFILE} > LSC_RefDB.tgf
${PROG_DIR}/coorienteSC.sh LSC.direct.fasta 20000 ${ORG_LOGFILE} > LSC_RefDB.tgf
${PROG_DIR}/cc.py LSC_RefDB.tgf > LSC_RefDB.cc
NCC=$(awk '{print $1}' LSC_RefDB.cc | uniq | wc -l)
if (( $NCC == 1 )); then
@ -103,7 +103,7 @@ pushTmpDir ORG.buildSCDB
loginfo "Building SSC coorientation graph..."
${PROG_DIR}/coorienteSC.sh SSC.fasta 5000 ${LOGFILE} > SSC.tgf
${PROG_DIR}/coorienteSC.sh SSC.fasta 5000 ${ORG_LOGFILE} > SSC.tgf
${PROG_DIR}/cc.py SSC.tgf > SSC.cc
loginfo " --> $(awk '{print $1}' SSC.cc | uniq | wc -l) connected componants"
loginfo "Done"
@ -146,7 +146,7 @@ pushTmpDir ORG.buildSCDB
loginfo "Checking SSC homogeneity..."
${PROG_DIR}/coorienteSC.sh SSC.direct.fasta 5000 ${LOGFILE} > SSC_RefDB.tgf
${PROG_DIR}/coorienteSC.sh SSC.direct.fasta 5000 ${ORG_LOGFILE} > SSC_RefDB.tgf
${PROG_DIR}/cc.py SSC_RefDB.tgf > SSC_RefDB.cc
NCC=$(awk '{print $1}' SSC_RefDB.cc | uniq | wc -l)
if (( $NCC == 1 )); then

42
detectors/trna/bin/go_trna.sh Executable file
View File

@ -0,0 +1,42 @@
#!/bin/bash
#
# Annotate tRNA
#
#========================================================================================
#
# Annotate tRNA based on the Aragorn software predictions.
# go_trna.sh <FASTAFILE>
#
# - <FASTAFILE> : The fasta file containing the genome to annotate
#
# Results are printed to the standart output
#
#========================================================================================
# -- CAUTION -- Works as long than the script
# is not called through a symlink
SCRIPT_DIR="$(dirname ${BASH_SOURCE[0]})"
source "${SCRIPT_DIR}/../../../scripts/bash_init.sh"
pushTmpDir ORG.trna
CAUTRNADB="${TRNA_DATA_DIR}/CAU_tRNA_DB.fasta"
export CAUTRNADB
if [[ ! "$1" =~ ^/ ]]; then
QUERY="${CALL_DIR}/$1"
else
QUERY="$1"
fi
TRNA=$(basename ${QUERY})
aragorn -i -w -seq ${QUERY} | \
${PROG_DIR}/../lib/aragorn_wrapper.awk
popTmpDir
exit 0

View File

@ -7,17 +7,9 @@ function genomeid() {
return gid;
}
function home() {
"echo $ORGANNOT_HOME" | getline homedir;
return homedir;
}
function prog(program) {
return home() "/" program;
}
function trnalib(prognam) {
return home() "/lib/trnaCAU.ref.fasta";
function trnalib() {
"echo $CAUTRNADB" | getline ref;
return ref
}
function awkPID() {
@ -65,19 +57,20 @@ function patchtRNA(anticodon,trna,seq) {
if (anticodon == "cat") {
file=printfasta(trna "_" anticodon,seq,"");
command= prog("sumatra") " -d -n " file " " trnalib();
command= "sumatra -t 0.9 -x -n " file " " trnalib() " 2>> /dev/null";
while ((command | getline output) > 0) {
split(output,field," ");
n[field[2]]++;
d[field[2]]=field[3];
match(field[2],"trn.M?");
trna=substr(field[2],RSTART,RLENGTH);
n[trna]+=field[5];
}
close(command)
dmin=1;
nmax=0;
for (i in n) {
dist=d[i]/n[i];
if (dist < dmin) {
dmin=dist;
dist=n[i];
if (n[i] > nmax) {
nmax=n[i];
trna=i;
}
}
@ -94,6 +87,42 @@ function gene2product(gene) {
return "tRNA-" AA3[substr(gene,4,1)];
}
function emblTRNA(geneid,trna,loc,anti,intron,seq) {
if (loc ~ /^c/) {
sub("c\\[","complement(",loc);
sub("\\]",")",loc);
sub(",","..",loc)}
else {
sub("\\[","",loc);
sub("\\]","",loc);
sub(",","..",loc)}
anti=toupper(anti);
gsub("T","U",anti);
product=gene2product(trna);
if (intron!="") {
l=length(intron);
intron=substr(intron,2,l-2);
split(intron,intronpos,",");
ib=intronpos[0];
ie=intronpos[1];
match(loc,"[0-9][0-9]*");
gb=substr(loc,RSTART,RLENGTH);
sub("\\.\\.",".." (gb + ib -2) "," (gb + ie) "..",loc); \
sub("complement","complement(join",loc);\
if (substr(loc,1,1) ~ /[0-9]/) {
loc="join("loc}
loc=loc")";
}
print "FT tRNA " loc;
print "FT /gene=\""trna"\"";
print "FT /anticodon=\""anti"\"";
print "FT /product=\""product"("anti")\"";
}
function gffTRNA(geneid,trna,loc,anti,intron,seq) {
if (loc ~ /^c/) {
complement="-";
@ -139,7 +168,6 @@ function gffTRNA(geneid,trna,loc,anti,intron,seq) {
}
BEGIN {
print ARGV[1];
AA1["Ala"]="A";
AA1["Cys"]="C";
AA1["Asp"]="D";
@ -201,7 +229,7 @@ BEGIN {
{ seq=epissage(intron,seq);
trna=patchtRNA(anti,trna,seq);
# print geneid,trna,loc,anti,"'"intron"'",seq;
gffTRNA(geneid,trna,loc,anti,intron,seq);
emblTRNA(geneid,trna,loc,anti,intron,seq);
seq=""
}
@ -225,5 +253,5 @@ BEGIN {
END { seq=epissage(intron,seq);
trna=patchtRNA(anti,trna,seq);
# print geneid,trna,loc,anti,"'"intron"'",seq;
gffTRNA(geneid,trna,loc,anti,intron,seq);
emblTRNA(geneid,trna,loc,anti,intron,seq);
}