2015-10-10 19:12:02 -03:00
|
|
|
#!/bin/bash
|
|
|
|
#
|
|
|
|
# BUILD REFERENCE THE CAU TRNA LIBRARy
|
|
|
|
#
|
|
|
|
#========================================================================================
|
|
|
|
|
|
|
|
# -- CAUTION -- Works as long than the script
|
|
|
|
# is not called through a symlink
|
2015-11-08 14:28:57 +01:00
|
|
|
THIS_DIR="$(dirname ${BASH_SOURCE[0]})"
|
|
|
|
source "${THIS_DIR}/../../../scripts/bash_init.sh"
|
2015-10-10 19:12:02 -03:00
|
|
|
|
|
|
|
function fasta1li {
|
|
|
|
|
2015-11-08 19:33:00 +01:00
|
|
|
$AwkCmd '/^>/ {if (sequence) \
|
2015-10-10 19:12:02 -03:00
|
|
|
{print sequence}; \
|
2018-04-05 17:55:31 +02:00
|
|
|
print $0; \
|
|
|
|
sequence=""} \
|
|
|
|
!/^>/ {sequence = sequence $0} \
|
|
|
|
END {print sequence}' $1
|
2015-10-10 19:12:02 -03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2018-04-05 17:55:31 +02:00
|
|
|
function filtertrna() {
|
2015-10-10 19:12:02 -03:00
|
|
|
|
2018-04-05 17:55:31 +02:00
|
|
|
$AwkCmd -F '_' 'BEGIN {RS=">"} \
|
|
|
|
(! /^$/) {trna=$1; \
|
|
|
|
ac=$2"_"$3;} \
|
|
|
|
(ac!=oldac && \
|
|
|
|
trnas["trnfM"]==1 && \
|
|
|
|
trnas["trnM"]==1 && \
|
|
|
|
trnas["trnI"]==1 \
|
|
|
|
) {print seqs} \
|
|
|
|
(ac!=oldac) {trnas["trnfM"]=0; \
|
|
|
|
trnas["trnM"]=0; \
|
|
|
|
trnas["trnI"]=0; \
|
|
|
|
seqs=""; \
|
|
|
|
oldac=ac \
|
|
|
|
} \
|
|
|
|
(! /^$/) {seqs=seqs"\n>"$0; \
|
|
|
|
trnas[trna]=1;} \
|
|
|
|
END {if (trnas["trnfM"]==1 && \
|
|
|
|
trnas["trnM"]==1 && \
|
|
|
|
trnas["trnI"]==1) \
|
|
|
|
print seqs}' $1 | \
|
|
|
|
egrep -v "^ *$"
|
2015-10-10 19:12:02 -03:00
|
|
|
}
|
|
|
|
|
|
|
|
pushTmpDir ORG.buildSCDB
|
|
|
|
|
|
|
|
CAUFILE=CAU.fasta
|
|
|
|
|
|
|
|
openLogFile "${TRNA_DATA_DIR}/CAU_tRNA_DB.log"
|
|
|
|
|
|
|
|
loginfo "Selecting Viridiplantae genebank entries..."
|
|
|
|
VIRIDIPLANTAE=$(${PROG_DIR}/../../normalize/tools/selectViridiplantae.sh $*)
|
|
|
|
loginfo " --> $(echo ${VIRIDIPLANTAE} | wc -w) entries selected"
|
|
|
|
loginfo "Done"
|
|
|
|
|
|
|
|
loginfo "Extracting the CAU tRNA from the plants entries..."
|
|
|
|
${PROG_DIR}/extract_refCAUtRNA.sh ${VIRIDIPLANTAE} | \
|
|
|
|
fasta1li | \
|
|
|
|
egrep -A 1 '^>trn(I|M|fM)' | \
|
2018-04-05 17:55:31 +02:00
|
|
|
grep -v -- -- | \
|
|
|
|
filtertrna > ${CAUFILE}
|
2015-10-10 19:12:02 -03:00
|
|
|
loginfo "Done"
|
|
|
|
|
|
|
|
loginfo "Installing the CAU tRNA database..."
|
|
|
|
|
2018-04-05 17:55:31 +02:00
|
|
|
cp ${CAUFILE} "${TRNA_DATA_DIR}/CAU_tRNA_DB.fasta"
|
2015-10-10 19:12:02 -03:00
|
|
|
|
|
|
|
loginfo "Done"
|
|
|
|
|
|
|
|
popTmpDir
|