Files
annotate/detectors/normalizerdna/bin/go_normalizerdna.sh
Eric Coissac 8113b80d47 Add annotation of nuclear rDNA cistron
Former-commit-id: ee54019ddddbea4d17956622968f6ce673b609e1
Former-commit-id: 5e5381cf59409ca3dc01098b0e3f330efe0a6a32
2016-05-02 10:56:40 +02:00

71 lines
1.8 KiB
Bash
Executable File

#!/bin/bash
#
# Annotate the Intergenic Spacer (ITS) of nuclear rDNA cluster
#
#========================================================================================
#
# This script is based on ITSx
#
#
# go_its.sh <FASTAFILE>
#
# - <FASTAFILE> : The fasta file containing the cluster to annotate
#
# Results are printed to the standart output
#
#========================================================================================
# -- CAUTION -- Works as long than the script
# is not called through a symlink
THIS_DIR="$(dirname ${BASH_SOURCE[0]})"
source "${THIS_DIR}/../../../scripts/bash_init.sh"
pushTmpDir ORG.its
loginfo "Normalizing nuclear rDNA cistron..."
RRNADB="${NUCRRNA_DATA_DIR}/plants/nuc_RRNA.hmm"
if [[ ! "$1" =~ ^/ ]]; then
QUERY="${CALL_DIR}/$1"
else
QUERY="$1"
fi
strand=( $(hmmsearch --max ${RRNADB} ${QUERY} | \
$AwkCmd '/Query: / { \
profil=$2; \
match($3,"[0-9][0-9]*");\
lprof=substr($3,RSTART,RLENGTH)} \
/ [0-9][0-9]* ! / { \
print profil,lprof,$7,$8,$10,$11}' | \
$AwkCmd '($3 <=5) && (($2-$4) <=5) { \
full=1;$5=$5-$3+1;$6=$6+($2-$4)} \
{loc="Forward"} \
($1 ~ /_RC$/) { \
loc="Reverse"} \
(full==1) {match($1,"_..*S");\
rrna=substr($1,RSTART+1,RLENGTH-1);\
print loc;\
full=0
}' | sort | uniq) )
if [[ "${#strand[@]}" == 1 ]] ; then
if [[ "${strand[0]}" == "Forward" ]] ; then
cat ${QUERY}
else
fastarevcomp -f ${QUERY}
fi
else
logerror "Cannot determine the Cistron orientation"
exit 1
fi
loginfo "Done."
popTmpDir
exit 0