Files
annotate/detectors/trna/tools/buildCAURefDB.sh
Eric Coissac ee634cc779 Simplify CAU tRNA reference database building to keep onlyCAU tRNA
from plastomes where the three categories of CAU tRNA (Met/Ile/fMet)
are annotated

Former-commit-id: 67dc445698e22fe8a503c6700977c79e4817d302
Former-commit-id: 6e84303543b0752a7946bdde6e5114cfe6eef8da
2018-04-05 17:55:31 +02:00

73 lines
2.7 KiB
Bash
Executable File

#!/bin/bash
#
# BUILD REFERENCE THE CAU TRNA LIBRARy
#
#========================================================================================
# -- CAUTION -- Works as long than the script
# is not called through a symlink
THIS_DIR="$(dirname ${BASH_SOURCE[0]})"
source "${THIS_DIR}/../../../scripts/bash_init.sh"
function fasta1li {
$AwkCmd '/^>/ {if (sequence) \
{print sequence}; \
print $0; \
sequence=""} \
!/^>/ {sequence = sequence $0} \
END {print sequence}' $1
}
function filtertrna() {
$AwkCmd -F '_' 'BEGIN {RS=">"} \
(! /^$/) {trna=$1; \
ac=$2"_"$3;} \
(ac!=oldac && \
trnas["trnfM"]==1 && \
trnas["trnM"]==1 && \
trnas["trnI"]==1 \
) {print seqs} \
(ac!=oldac) {trnas["trnfM"]=0; \
trnas["trnM"]=0; \
trnas["trnI"]=0; \
seqs=""; \
oldac=ac \
} \
(! /^$/) {seqs=seqs"\n>"$0; \
trnas[trna]=1;} \
END {if (trnas["trnfM"]==1 && \
trnas["trnM"]==1 && \
trnas["trnI"]==1) \
print seqs}' $1 | \
egrep -v "^ *$"
}
pushTmpDir ORG.buildSCDB
CAUFILE=CAU.fasta
openLogFile "${TRNA_DATA_DIR}/CAU_tRNA_DB.log"
loginfo "Selecting Viridiplantae genebank entries..."
VIRIDIPLANTAE=$(${PROG_DIR}/../../normalize/tools/selectViridiplantae.sh $*)
loginfo " --> $(echo ${VIRIDIPLANTAE} | wc -w) entries selected"
loginfo "Done"
loginfo "Extracting the CAU tRNA from the plants entries..."
${PROG_DIR}/extract_refCAUtRNA.sh ${VIRIDIPLANTAE} | \
fasta1li | \
egrep -A 1 '^>trn(I|M|fM)' | \
grep -v -- -- | \
filtertrna > ${CAUFILE}
loginfo "Done"
loginfo "Installing the CAU tRNA database..."
cp ${CAUFILE} "${TRNA_DATA_DIR}/CAU_tRNA_DB.fasta"
loginfo "Done"
popTmpDir