2021-11-04 13:36:28 +01:00
|
|
|
#!/bin/bash
|
|
|
|
#
|
|
|
|
# Annotate CDS
|
|
|
|
#
|
|
|
|
#========================================================================================
|
|
|
|
#
|
|
|
|
# Annotate CDS
|
|
|
|
#
|
|
|
|
# go_cds.sh <FASTAFILE> [DBROOT]
|
|
|
|
#
|
|
|
|
# - <FASTAFILE> : The fasta file containing the genome to annotate
|
|
|
|
# - [DBROOT] : optionnal argument allowing to specify database directory
|
|
|
|
#
|
|
|
|
# Results are printed to the standard output
|
|
|
|
#
|
|
|
|
#========================================================================================
|
|
|
|
# usage: go_cds.sh fasta [db_root]
|
|
|
|
#
|
|
|
|
|
|
|
|
# -- CAUTION -- Works as long than the script
|
|
|
|
# is not called through a symlink
|
|
|
|
THIS_DIR="$(dirname ${BASH_SOURCE[0]})"
|
|
|
|
source "${THIS_DIR}/../../../scripts/bash_init.sh"
|
|
|
|
|
2025-05-24 08:46:13 +02:00
|
|
|
if [[ -z "$ANNOT_MAXCPU" ]] ; then
|
|
|
|
Threads=$(nproc)
|
|
|
|
else
|
|
|
|
Threads=$ANNOT_MAXCPU
|
|
|
|
fi
|
|
|
|
|
2021-11-04 13:36:28 +01:00
|
|
|
needarg 1
|
|
|
|
|
|
|
|
Fasta=$1; shift
|
|
|
|
|
2025-03-05 21:56:39 +01:00
|
|
|
needfile "$Fasta"
|
2021-11-04 13:36:28 +01:00
|
|
|
|
|
|
|
# Genome names is set from the base
|
|
|
|
# name of the genome file without its extension
|
|
|
|
Genome=$(basename ${Fasta%.*})
|
|
|
|
|
|
|
|
# DbRoot is set to its default values except
|
|
|
|
# if the second argument precise another DbRoot
|
|
|
|
|
2022-02-16 22:50:17 +01:00
|
|
|
DbRoot="$CDS_DATA_DIR/sp_chlorodb"
|
2021-11-04 13:36:28 +01:00
|
|
|
|
|
|
|
if (( $# > 0)) ; then
|
|
|
|
DbRoot="$1"; Shift
|
|
|
|
fi
|
|
|
|
|
2022-02-16 22:50:17 +01:00
|
|
|
AnnotFile="$DbRoot/Annot.lst"
|
|
|
|
|
2021-11-04 13:36:28 +01:00
|
|
|
needdir $DbRoot
|
|
|
|
needdir $DbRoot/core
|
2022-02-16 22:50:17 +01:00
|
|
|
needfile $AnnotFile
|
2021-11-04 13:36:28 +01:00
|
|
|
needdir $DbRoot/models
|
|
|
|
|
|
|
|
assignundef cdsdetection_pass1 yes
|
|
|
|
assignundef cdsdetection_pass2 yes
|
2022-02-16 22:50:17 +01:00
|
|
|
assignundef cdsdetection_pass3 yes
|
2021-11-04 13:36:28 +01:00
|
|
|
|
|
|
|
temp=$(mktempdir $(hostname))
|
|
|
|
|
|
|
|
AbsGenoFile=$(getAbsolutePath $Fasta)
|
|
|
|
pushd $temp >& /dev/null
|
|
|
|
ln -s $AbsGenoFile genome.fasta
|
|
|
|
popd >& /dev/null
|
|
|
|
|
|
|
|
Fasta="$temp/genome.fasta"
|
|
|
|
|
|
|
|
#
|
|
|
|
# pass1: run exonerate
|
|
|
|
#
|
|
|
|
|
|
|
|
if [[ "$cdsdetection_pass1" == "yes" ]] ; then
|
2022-02-17 18:41:27 +01:00
|
|
|
( for dir in "core" ; do
|
2021-11-04 13:36:28 +01:00
|
|
|
if [[ -d $DbRoot/$dir ]] ; then
|
2022-02-16 22:50:17 +01:00
|
|
|
fams=$(ls $DbRoot/$dir/*.fst)
|
2021-11-04 13:36:28 +01:00
|
|
|
loginfo "running pass1:$dir exonerate of $Genome on $DbRoot"
|
|
|
|
for f in $fams ; do
|
2022-02-17 18:41:27 +01:00
|
|
|
echo tcsh -f $PROG_DIR/do_exonerate.csh Pass1 $Fasta $f $AnnotFile $DbRoot/models $temp
|
2021-11-04 13:36:28 +01:00
|
|
|
done
|
|
|
|
fi
|
2025-05-24 08:46:13 +02:00
|
|
|
done ) | parallel -j $Threads
|
2021-11-04 13:36:28 +01:00
|
|
|
|
2022-02-16 22:50:17 +01:00
|
|
|
mv $temp/genome.cds.fasta $Genome.cds_pass1.fasta
|
2021-11-04 13:36:28 +01:00
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
#
|
2022-02-16 22:50:17 +01:00
|
|
|
# pass2: RPS12 gene with transsplicing
|
2021-11-04 13:36:28 +01:00
|
|
|
#
|
|
|
|
|
|
|
|
if [[ "$cdsdetection_pass2" == "yes" ]] ; then
|
2022-02-16 22:50:17 +01:00
|
|
|
loginfo "running pass2:rps12 exonerate of $Genome on $DbRoot"
|
2021-11-04 13:36:28 +01:00
|
|
|
$PROG_DIR/do_rps12.sh $Fasta $temp
|
|
|
|
fi
|
|
|
|
|
|
|
|
#
|
2022-02-16 22:50:17 +01:00
|
|
|
# pass3: run exonerate on shell and dust
|
2021-11-04 13:36:28 +01:00
|
|
|
#
|
|
|
|
|
2022-02-16 22:50:17 +01:00
|
|
|
if [[ "$cdsdetection_pass3" == "yes" ]] ; then
|
2022-02-17 18:41:27 +01:00
|
|
|
(for dir in "shell" ; do
|
2022-02-16 22:50:17 +01:00
|
|
|
if [[ -d $DbRoot/$dir ]] ; then
|
|
|
|
fams=$(ls $DbRoot/$dir/*.fst)
|
|
|
|
loginfo $fams
|
|
|
|
loginfo "running pass3:$dir exonerate of $Genome on $DbRoot"
|
|
|
|
for f in $fams ; do
|
2022-02-17 18:41:27 +01:00
|
|
|
echo tcsh -f $PROG_DIR/do_exonerate.csh Pass3 $Fasta $f $AnnotFile $DbRoot/models $temp
|
2022-02-16 22:50:17 +01:00
|
|
|
done
|
|
|
|
fi
|
2025-05-24 08:46:13 +02:00
|
|
|
done) | parallel -j $Threads
|
2022-02-16 22:50:17 +01:00
|
|
|
mv $temp/genome.cds.fasta $Genome.cds_pass2.fasta
|
|
|
|
fi
|
|
|
|
|
2021-11-04 13:36:28 +01:00
|
|
|
# $PROG_DIR/do_prokov.sh $Fasta $Genome.cds.fasta $temp
|
|
|
|
|
|
|
|
#
|
|
|
|
# end : output on stdout
|
|
|
|
#
|
|
|
|
|
|
|
|
cat $temp/*.res
|
|
|
|
|
|
|
|
# cleanup everything
|
|
|
|
|
|
|
|
assignundef TMP_CLEANUP 1
|
|
|
|
|
|
|
|
if (( $TMP_CLEANUP != 0 )) ; then
|
|
|
|
loginfo " cleanup $temp"
|
|
|
|
(\rm -r $temp) >& /dev/null
|
|
|
|
fi
|
|
|
|
|
|
|
|
exit 0
|