Files
annotate/detectors/nucrrna/bin/go_nucrrna.sh
2025-05-27 15:20:55 +02:00

96 lines
2.8 KiB
Bash
Executable File

#!/bin/bash
#
# Annotate the Intergenic Spacer (ITS) of nuclear rDNA cluster
#
#========================================================================================
#
# This script is based on ITSx
#
#
# go_its.sh <FASTAFILE>
#
# - <FASTAFILE> : The fasta file containing the cluster to annotate
#
# Results are printed to the standart output
#
#========================================================================================
# -- CAUTION -- Works as long than the script
# is not called through a symlink
THIS_DIR="$(dirname ${BASH_SOURCE[0]})"
source "${THIS_DIR}/../../../scripts/bash_init.sh"
pushTmpDir ORG.its
loginfo "Annotating ITS and TSU..."
RRNADB="${NUCRRNA_DATA_DIR}/plants/nuc_RRNA.hmm"
if [[ ! "$1" =~ ^/ ]]; then
QUERY="${CALL_DIR}/$1"
else
QUERY="$1"
fi
ITSx -p "${ITS_DATA_DIR}/ITSx_db/HMMs" -i "${QUERY}" -o "output.itsx"
ITS1=( $(sed -E 's/.*ITS1: *([0-9]+)-([0-9]+).*/\1 \2/' "output.itsx.positions.txt" ) )
ITS2=( $(sed -E 's/.*ITS2: *([0-9]+)-([0-9]+).*/\1 \2/' "output.itsx.positions.txt" ) )
TSU=( $(sed -E 's/.*5\.8S: *([0-9]+)-([0-9]+).*/\1 \2/' "output.itsx.positions.txt" ) )
if [[ ${#ITS1[@]} == "2" ]] ; then
echo "FT misc_RNA ${ITS1[0]}..${ITS1[1]}"
echo 'FT /gene="ITS1"'
echo 'FT /note="internal transcribed spacer 1, ITS1"'
echo 'FT /locus_tag=""'
fi
if [[ ${#TSU[@]} == "2" ]] ; then
echo "FT rRNA ${TSU[0]}..${TSU[1]}"
echo 'FT /gene="5.8S rRNA"'
echo 'FT /product="5.8S ribosomal RNA"'
echo 'FT /locus_tag=""'
fi
if [[ ${#ITS2[@]} == "2" ]] ; then
echo "FT misc_RNA ${ITS2[0]}..${ITS2[1]}"
echo 'FT /gene="ITS2"'
echo 'FT /note="internal transcribed spacer 2, ITS2"'
echo 'FT /locus_tag=""'
fi
hmmsearch --max ${RRNADB} ${QUERY} | \
$AwkCmd '/Query: / {
profil=$2
match($3,"[0-9][0-9]*");
lprof=substr($3,RSTART,RLENGTH)}
/ [0-9][0-9]* ! / {
print profil,lprof,$7,$8,$10,$11}' | \
$AwkCmd '($3 <=5) && (($2-$4) <=5) {
full=1;$5=$5-$3+1;$6=$6+($2-$4)
}
{ loc=$5".."$6 }
($1 ~ /_RC$/) {
loc="complement("loc")"
}
(full==1) {
match($1,"_..*S")
rrna=substr($1,RSTART+1,RLENGTH-1)
print "FT rRNA " loc
print "FT /gene=\""rrna" rRNA\""
print "FT /product=\""rrna" ribosomal RNA\""
print "FT /locus_tag=\"\""
full=0
}'
loginfo "Done."
popTmpDir
exit 0