Change setup for the blast filtering before exonerate

Former-commit-id: 139685eca58c1fb2272854dee31de3821c54af80
Former-commit-id: dc5c345ce72e9895cbdcc3321499b869040a24da
This commit is contained in:
2022-02-17 18:41:27 +01:00
parent 3b584dbebd
commit 9d93a68b3a
3 changed files with 26 additions and 18 deletions

View File

@ -127,7 +127,10 @@ endif
# speedup exonerate # speedup exonerate
# #
if ($PASS1_SPEEDUP != 0) then set AvgLengh = `$AwkCmd '/>/ {nseq++} \\!/>/ {lseq+=length($0)} END {print int(lseq/nseq)}' $ProtFile`
Notify " Protein $ProtName has an average length of $AvgLengh"
if (($PASS1_SPEEDUP != 0) && ($AvgLengh > $PASS1_BLASTX_FILTER_MINLEN)) then
tcsh -f $PROG_DIR/do_filterbx.csh $GenoFile $ProtFile \ tcsh -f $PROG_DIR/do_filterbx.csh $GenoFile $ProtFile \
$PASS1_BLASTX_FILTER_IDMIN \ $PASS1_BLASTX_FILTER_IDMIN \
@ -135,20 +138,24 @@ if ($PASS1_SPEEDUP != 0) then
$PASS1_BLASTX_FILTER_NBMAX > D_$$ $PASS1_BLASTX_FILTER_NBMAX > D_$$
set n = `egrep "^>" D_$$ | wc -l` set n = `egrep "^>" D_$$ | wc -l`
if ($n > 0) then if ($n >= $PASS1_SLOWDOWN) then
Notify " $n sequences kept" Notify " $n sequences kept"
set DbFile = D_$$ set DbFile = D_$$
else else
Notify " no sequence match" if ($PASS1_SLOWDOWN > 0) then
if ($PASS1_SLOWDOWN != 0) then Notify " not enought sequence match (only $n)"
Notify " reverting to original $ProtName" Notify " reverting to complete $ProtName db"
set DbFile = $ProtFile set DbFile = $ProtFile
else else
Notify " no blast match for $ProtName stop annotation"
echo "" > $base.exo.raw echo "" > $base.exo.raw
goto parse goto parse
endif endif
endif endif
else else
if ($AvgLengh <= $PASS1_BLASTX_FILTER_MINLEN) then
Notify " too short protein, reverting to original $ProtName"
endif
set DbFile = $ProtFile set DbFile = $ProtFile
endif endif
@ -207,7 +214,7 @@ set sp_match=`awk '/^e similarity/ {print $12}' $base.exo.best | head -1`
if ( ${%sp_match} > 0 ) then if ( ${%sp_match} > 0 ) then
set sp_ac=`awk -v sp="$sp_match" '($1 ~ sp) {sub(/SP_AC=/,"",$2); sub(/;$/,"",$2); print $2} ' $DbFile` set sp_ac=`awk -v sp="$sp_match" '($1 ~ sp) {sub(/SP_AC=/,"",$2); sub(/;$/,"",$2); print $2} ' $DbFile`
echo " " patch ac $sp_match to $sp_ac > /dev/stderr Notify " for $ProtName patches swiss ID $sp_match to AC $sp_ac"
sed "s/$sp_match/$sp_ac/g" $base.exo.best > T_$$ sed "s/$sp_match/$sp_ac/g" $base.exo.best > T_$$
mv T_$$ $base.exo.best mv T_$$ $base.exo.best
endif endif

View File

@ -66,15 +66,15 @@ Fasta="$temp/genome.fasta"
# #
if [[ "$cdsdetection_pass1" == "yes" ]] ; then if [[ "$cdsdetection_pass1" == "yes" ]] ; then
for dir in "core" ; do ( for dir in "core" ; do
if [[ -d $DbRoot/$dir ]] ; then if [[ -d $DbRoot/$dir ]] ; then
fams=$(ls $DbRoot/$dir/*.fst) fams=$(ls $DbRoot/$dir/*.fst)
loginfo "running pass1:$dir exonerate of $Genome on $DbRoot" loginfo "running pass1:$dir exonerate of $Genome on $DbRoot"
for f in $fams ; do for f in $fams ; do
tcsh -f $PROG_DIR/do_exonerate.csh Pass1 $Fasta $f $AnnotFile $DbRoot/models $temp echo tcsh -f $PROG_DIR/do_exonerate.csh Pass1 $Fasta $f $AnnotFile $DbRoot/models $temp
done done
fi fi
done done ) | parallel -j 8
mv $temp/genome.cds.fasta $Genome.cds_pass1.fasta mv $temp/genome.cds.fasta $Genome.cds_pass1.fasta
fi fi
@ -94,16 +94,16 @@ fi
# #
if [[ "$cdsdetection_pass3" == "yes" ]] ; then if [[ "$cdsdetection_pass3" == "yes" ]] ; then
for dir in "shell" ; do (for dir in "shell" ; do
if [[ -d $DbRoot/$dir ]] ; then if [[ -d $DbRoot/$dir ]] ; then
fams=$(ls $DbRoot/$dir/*.fst) fams=$(ls $DbRoot/$dir/*.fst)
loginfo $fams loginfo $fams
loginfo "running pass3:$dir exonerate of $Genome on $DbRoot" loginfo "running pass3:$dir exonerate of $Genome on $DbRoot"
for f in $fams ; do for f in $fams ; do
tcsh -f $PROG_DIR/do_exonerate.csh Pass3 $Fasta $f $AnnotFile $DbRoot/models $temp echo tcsh -f $PROG_DIR/do_exonerate.csh Pass3 $Fasta $f $AnnotFile $DbRoot/models $temp
done done
fi fi
done done) | parallel -j 8
mv $temp/genome.cds.fasta $Genome.cds_pass2.fasta mv $temp/genome.cds.fasta $Genome.cds_pass2.fasta
fi fi

View File

@ -15,13 +15,14 @@ AssignUndef TMP_CLEANUP 1
# pass1: exonerate speedup # pass1: exonerate speedup
# #
AssignUndef PASS1_SPEEDUP 1 AssignUndef PASS1_SPEEDUP 1
AssignUndef PASS1_SLOWDOWN 0 AssignUndef PASS1_SLOWDOWN 5
AssignUndef PASS1_BLASTX_FILTER_IDMIN 80 AssignUndef PASS1_BLASTX_FILTER_MINLEN 100
AssignUndef PASS1_BLASTX_FILTER_NBMIN 20 AssignUndef PASS1_BLASTX_FILTER_IDMIN 50
AssignUndef PASS1_BLASTX_FILTER_NBMAX 50 AssignUndef PASS1_BLASTX_FILTER_NBMIN 20
AssignUndef PASS1_BLASTX_FILTER_COMIN 0.8 AssignUndef PASS1_BLASTX_FILTER_NBMAX 20
AssignUndef PASS1_BLASTX_FILTER_GCODE 11 AssignUndef PASS1_BLASTX_FILTER_COMIN 0.8
AssignUndef PASS1_BLASTX_FILTER_GCODE 11
# #
# pass1: exonerate parameters # pass1: exonerate parameters