First version of the tRNA detector and of the global organnot.sh script
Former-commit-id: f2a75cf99b24875c90c426c2afb22a75b972bf60 Former-commit-id: 65e3dfb35df06ca69bb29b690c9a40e8940ac6bf
This commit is contained in:
@ -61,4 +61,4 @@ pushTmpDir ORG.ir
|
||||
|
||||
popTmpDir
|
||||
|
||||
exit 0
|
||||
exit 0
|
@ -27,12 +27,14 @@
|
||||
SCRIPT_DIR="$(dirname ${BASH_SOURCE[0]})"
|
||||
source ${SCRIPT_DIR}/../lib/lookforIR.lib.sh
|
||||
|
||||
ORG_DEBUG=1
|
||||
|
||||
pushTmpDir ORG.normalize
|
||||
|
||||
tmpfasta1="tmp_$$_1.fasta"
|
||||
tmpfasta2="tmp_$$_2.fasta"
|
||||
|
||||
logdebug "Running on : $QUERY"
|
||||
|
||||
loginfo "Computing the genome size..."
|
||||
genome_length=$(seqlength $QUERY)
|
||||
|
@ -47,7 +47,9 @@ function lookForIR {
|
||||
}
|
||||
|
||||
SCDB="${IR_DATA_DIR}/SC_RefDB"
|
||||
QUERY="${CALL_DIR}/$1"
|
||||
|
||||
|
||||
openLogFile "${QUERY/.*/}.log"
|
||||
if [[ ! "$1" =~ ^/ ]]; then
|
||||
QUERY="${CALL_DIR}/$1"
|
||||
else
|
||||
QUERY="$1"
|
||||
fi
|
||||
|
@ -31,7 +31,7 @@ pushTmpDir ORG.buildSCDB
|
||||
|
||||
|
||||
loginfo "Building LSC coorientation graph..."
|
||||
${PROG_DIR}/coorienteSC.sh LSC.fasta 20000 ${LOGFILE} > LSC.tgf
|
||||
${PROG_DIR}/coorienteSC.sh LSC.fasta 20000 ${ORG_LOGFILE} > LSC.tgf
|
||||
${PROG_DIR}/cc.py LSC.tgf > LSC.cc
|
||||
loginfo " --> $(awk '{print $1}' LSC.cc | uniq | wc -l) connected componants"
|
||||
loginfo "Done"
|
||||
@ -73,7 +73,7 @@ pushTmpDir ORG.buildSCDB
|
||||
|
||||
|
||||
loginfo "Checking LCS homogeneity..."
|
||||
${PROG_DIR}/coorienteSC.sh LSC.direct.fasta 20000 ${LOGFILE} > LSC_RefDB.tgf
|
||||
${PROG_DIR}/coorienteSC.sh LSC.direct.fasta 20000 ${ORG_LOGFILE} > LSC_RefDB.tgf
|
||||
${PROG_DIR}/cc.py LSC_RefDB.tgf > LSC_RefDB.cc
|
||||
NCC=$(awk '{print $1}' LSC_RefDB.cc | uniq | wc -l)
|
||||
if (( $NCC == 1 )); then
|
||||
@ -103,7 +103,7 @@ pushTmpDir ORG.buildSCDB
|
||||
|
||||
|
||||
loginfo "Building SSC coorientation graph..."
|
||||
${PROG_DIR}/coorienteSC.sh SSC.fasta 5000 ${LOGFILE} > SSC.tgf
|
||||
${PROG_DIR}/coorienteSC.sh SSC.fasta 5000 ${ORG_LOGFILE} > SSC.tgf
|
||||
${PROG_DIR}/cc.py SSC.tgf > SSC.cc
|
||||
loginfo " --> $(awk '{print $1}' SSC.cc | uniq | wc -l) connected componants"
|
||||
loginfo "Done"
|
||||
@ -146,7 +146,7 @@ pushTmpDir ORG.buildSCDB
|
||||
|
||||
|
||||
loginfo "Checking SSC homogeneity..."
|
||||
${PROG_DIR}/coorienteSC.sh SSC.direct.fasta 5000 ${LOGFILE} > SSC_RefDB.tgf
|
||||
${PROG_DIR}/coorienteSC.sh SSC.direct.fasta 5000 ${ORG_LOGFILE} > SSC_RefDB.tgf
|
||||
${PROG_DIR}/cc.py SSC_RefDB.tgf > SSC_RefDB.cc
|
||||
NCC=$(awk '{print $1}' SSC_RefDB.cc | uniq | wc -l)
|
||||
if (( $NCC == 1 )); then
|
||||
|
42
detectors/trna/bin/go_trna.sh
Executable file
42
detectors/trna/bin/go_trna.sh
Executable file
@ -0,0 +1,42 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Annotate tRNA
|
||||
#
|
||||
#========================================================================================
|
||||
#
|
||||
# Annotate tRNA based on the Aragorn software predictions.
|
||||
|
||||
# go_trna.sh <FASTAFILE>
|
||||
#
|
||||
# - <FASTAFILE> : The fasta file containing the genome to annotate
|
||||
#
|
||||
# Results are printed to the standart output
|
||||
#
|
||||
#========================================================================================
|
||||
|
||||
# -- CAUTION -- Works as long than the script
|
||||
# is not called through a symlink
|
||||
SCRIPT_DIR="$(dirname ${BASH_SOURCE[0]})"
|
||||
source "${SCRIPT_DIR}/../../../scripts/bash_init.sh"
|
||||
|
||||
pushTmpDir ORG.trna
|
||||
|
||||
CAUTRNADB="${TRNA_DATA_DIR}/CAU_tRNA_DB.fasta"
|
||||
export CAUTRNADB
|
||||
|
||||
if [[ ! "$1" =~ ^/ ]]; then
|
||||
QUERY="${CALL_DIR}/$1"
|
||||
else
|
||||
QUERY="$1"
|
||||
fi
|
||||
|
||||
TRNA=$(basename ${QUERY})
|
||||
|
||||
aragorn -i -w -seq ${QUERY} | \
|
||||
${PROG_DIR}/../lib/aragorn_wrapper.awk
|
||||
|
||||
|
||||
popTmpDir
|
||||
|
||||
exit 0
|
||||
|
@ -7,17 +7,9 @@ function genomeid() {
|
||||
return gid;
|
||||
}
|
||||
|
||||
function home() {
|
||||
"echo $ORGANNOT_HOME" | getline homedir;
|
||||
return homedir;
|
||||
}
|
||||
|
||||
function prog(program) {
|
||||
return home() "/" program;
|
||||
}
|
||||
|
||||
function trnalib(prognam) {
|
||||
return home() "/lib/trnaCAU.ref.fasta";
|
||||
function trnalib() {
|
||||
"echo $CAUTRNADB" | getline ref;
|
||||
return ref
|
||||
}
|
||||
|
||||
function awkPID() {
|
||||
@ -65,19 +57,20 @@ function patchtRNA(anticodon,trna,seq) {
|
||||
if (anticodon == "cat") {
|
||||
file=printfasta(trna "_" anticodon,seq,"");
|
||||
|
||||
command= prog("sumatra") " -d -n " file " " trnalib();
|
||||
command= "sumatra -t 0.9 -x -n " file " " trnalib() " 2>> /dev/null";
|
||||
while ((command | getline output) > 0) {
|
||||
split(output,field," ");
|
||||
n[field[2]]++;
|
||||
d[field[2]]=field[3];
|
||||
match(field[2],"trn.M?");
|
||||
trna=substr(field[2],RSTART,RLENGTH);
|
||||
n[trna]+=field[5];
|
||||
}
|
||||
close(command)
|
||||
|
||||
dmin=1;
|
||||
nmax=0;
|
||||
for (i in n) {
|
||||
dist=d[i]/n[i];
|
||||
if (dist < dmin) {
|
||||
dmin=dist;
|
||||
dist=n[i];
|
||||
if (n[i] > nmax) {
|
||||
nmax=n[i];
|
||||
trna=i;
|
||||
}
|
||||
}
|
||||
@ -94,6 +87,42 @@ function gene2product(gene) {
|
||||
return "tRNA-" AA3[substr(gene,4,1)];
|
||||
}
|
||||
|
||||
function emblTRNA(geneid,trna,loc,anti,intron,seq) {
|
||||
if (loc ~ /^c/) {
|
||||
sub("c\\[","complement(",loc);
|
||||
sub("\\]",")",loc);
|
||||
sub(",","..",loc)}
|
||||
else {
|
||||
sub("\\[","",loc);
|
||||
sub("\\]","",loc);
|
||||
sub(",","..",loc)}
|
||||
|
||||
anti=toupper(anti);
|
||||
gsub("T","U",anti);
|
||||
product=gene2product(trna);
|
||||
|
||||
if (intron!="") {
|
||||
l=length(intron);
|
||||
intron=substr(intron,2,l-2);
|
||||
split(intron,intronpos,",");
|
||||
ib=intronpos[0];
|
||||
ie=intronpos[1];
|
||||
match(loc,"[0-9][0-9]*");
|
||||
gb=substr(loc,RSTART,RLENGTH);
|
||||
sub("\\.\\.",".." (gb + ib -2) "," (gb + ie) "..",loc); \
|
||||
sub("complement","complement(join",loc);\
|
||||
if (substr(loc,1,1) ~ /[0-9]/) {
|
||||
loc="join("loc}
|
||||
loc=loc")";
|
||||
}
|
||||
|
||||
print "FT tRNA " loc;
|
||||
print "FT /gene=\""trna"\"";
|
||||
print "FT /anticodon=\""anti"\"";
|
||||
print "FT /product=\""product"("anti")\"";
|
||||
|
||||
}
|
||||
|
||||
function gffTRNA(geneid,trna,loc,anti,intron,seq) {
|
||||
if (loc ~ /^c/) {
|
||||
complement="-";
|
||||
@ -139,7 +168,6 @@ function gffTRNA(geneid,trna,loc,anti,intron,seq) {
|
||||
}
|
||||
|
||||
BEGIN {
|
||||
print ARGV[1];
|
||||
AA1["Ala"]="A";
|
||||
AA1["Cys"]="C";
|
||||
AA1["Asp"]="D";
|
||||
@ -201,7 +229,7 @@ BEGIN {
|
||||
{ seq=epissage(intron,seq);
|
||||
trna=patchtRNA(anti,trna,seq);
|
||||
# print geneid,trna,loc,anti,"'"intron"'",seq;
|
||||
gffTRNA(geneid,trna,loc,anti,intron,seq);
|
||||
emblTRNA(geneid,trna,loc,anti,intron,seq);
|
||||
seq=""
|
||||
}
|
||||
|
||||
@ -225,5 +253,5 @@ BEGIN {
|
||||
END { seq=epissage(intron,seq);
|
||||
trna=patchtRNA(anti,trna,seq);
|
||||
# print geneid,trna,loc,anti,"'"intron"'",seq;
|
||||
gffTRNA(geneid,trna,loc,anti,intron,seq);
|
||||
emblTRNA(geneid,trna,loc,anti,intron,seq);
|
||||
}
|
Reference in New Issue
Block a user