Files
annotate/detectors/normalizerdna/bin/go_normalizerdna.sh
Eric Coissac a25ab81b38 Add logs to print the sequence length and if the sequence is reverse
complemented

Former-commit-id: ba55f354ea7a51119fe44bcb36aa5927194293e2
Former-commit-id: dd7715be54ac92c9625f0a2c30e572b7aee76dc7
2018-01-18 22:00:07 +01:00

72 lines
1.9 KiB
Bash
Executable File

#!/bin/bash
#
# Annotate the Intergenic Spacer (ITS) of nuclear rDNA cluster
#
#========================================================================================
#
# This script is based on ITSx
#
#
# go_its.sh <FASTAFILE>
#
# - <FASTAFILE> : The fasta file containing the cluster to annotate
#
# Results are printed to the standart output
#
#========================================================================================
# -- CAUTION -- Works as long than the script
# is not called through a symlink
THIS_DIR="$(dirname ${BASH_SOURCE[0]})"
source "${THIS_DIR}/../../../scripts/bash_init.sh"
pushTmpDir ORG.its
loginfo "Normalizing nuclear rDNA cistron..."
RRNADB="${NUCRRNA_DATA_DIR}/plants/nuc_RRNA.hmm"
if [[ ! "$1" =~ ^/ ]]; then
QUERY="${CALL_DIR}/$1"
else
QUERY="$1"
fi
loginfo "Sequence length $(seqlength ${QUERY})"
strand=( $(hmmsearch --max ${RRNADB} ${QUERY} | \
$AwkCmd '/Query: / { \
profil=$2; \
match($3,"[0-9][0-9]*");\
lprof=substr($3,RSTART,RLENGTH)} \
/ [0-9][0-9]* ! / { \
print profil,lprof,$7,$8,$10,$11}' | \
$AwkCmd '($3 <=5) && (($2-$4) <=5) { \
full=1;$5=$5-$3+1;$6=$6+($2-$4)} \
{loc="Forward"} \
($1 ~ /_RC$/) { \
loc="Reverse"} \
(full==1) {match($1,"_..*S");\
rrna=substr($1,RSTART+1,RLENGTH-1);\
print loc;\
full=0
}' | sort | uniq) )
if [[ "${#strand[@]}" == 1 ]] ; then
if [[ "${strand[0]}" == "Forward" ]] ; then
cat ${QUERY}
else
loginfo "Revert complement rDNA cluster cistron"
fastarevcomp ${QUERY}
fi
else
logerror "Cannot determine the Cistron orientation"
exit 1
fi
loginfo "Done."
popTmpDir
exit 0