71 lines
1.8 KiB
Bash
71 lines
1.8 KiB
Bash
|
#!/bin/bash
|
||
|
#
|
||
|
# Annotate the Intergenic Spacer (ITS) of nuclear rDNA cluster
|
||
|
#
|
||
|
#========================================================================================
|
||
|
#
|
||
|
# This script is based on ITSx
|
||
|
#
|
||
|
#
|
||
|
# go_its.sh <FASTAFILE>
|
||
|
#
|
||
|
# - <FASTAFILE> : The fasta file containing the cluster to annotate
|
||
|
#
|
||
|
# Results are printed to the standart output
|
||
|
#
|
||
|
#========================================================================================
|
||
|
|
||
|
# -- CAUTION -- Works as long than the script
|
||
|
# is not called through a symlink
|
||
|
THIS_DIR="$(dirname ${BASH_SOURCE[0]})"
|
||
|
source "${THIS_DIR}/../../../scripts/bash_init.sh"
|
||
|
|
||
|
pushTmpDir ORG.its
|
||
|
|
||
|
loginfo "Normalizing nuclear rDNA cistron..."
|
||
|
|
||
|
RRNADB="${NUCRRNA_DATA_DIR}/plants/nuc_RRNA.hmm"
|
||
|
|
||
|
if [[ ! "$1" =~ ^/ ]]; then
|
||
|
QUERY="${CALL_DIR}/$1"
|
||
|
else
|
||
|
QUERY="$1"
|
||
|
fi
|
||
|
|
||
|
|
||
|
strand=( $(hmmsearch --max ${RRNADB} ${QUERY} | \
|
||
|
$AwkCmd '/Query: / { \
|
||
|
profil=$2; \
|
||
|
match($3,"[0-9][0-9]*");\
|
||
|
lprof=substr($3,RSTART,RLENGTH)} \
|
||
|
/ [0-9][0-9]* ! / { \
|
||
|
print profil,lprof,$7,$8,$10,$11}' | \
|
||
|
$AwkCmd '($3 <=5) && (($2-$4) <=5) { \
|
||
|
full=1;$5=$5-$3+1;$6=$6+($2-$4)} \
|
||
|
{loc="Forward"} \
|
||
|
($1 ~ /_RC$/) { \
|
||
|
loc="Reverse"} \
|
||
|
(full==1) {match($1,"_..*S");\
|
||
|
rrna=substr($1,RSTART+1,RLENGTH-1);\
|
||
|
print loc;\
|
||
|
full=0
|
||
|
}' | sort | uniq) )
|
||
|
|
||
|
if [[ "${#strand[@]}" == 1 ]] ; then
|
||
|
if [[ "${strand[0]}" == "Forward" ]] ; then
|
||
|
cat ${QUERY}
|
||
|
else
|
||
|
fastarevcomp -f ${QUERY}
|
||
|
fi
|
||
|
else
|
||
|
logerror "Cannot determine the Cistron orientation"
|
||
|
exit 1
|
||
|
fi
|
||
|
|
||
|
loginfo "Done."
|
||
|
|
||
|
popTmpDir
|
||
|
|
||
|
exit 0
|
||
|
|