diff --git a/detectors/cds/bin/do_exonerate.csh b/detectors/cds/bin/do_exonerate.csh index 98b409e..1da136f 100755 --- a/detectors/cds/bin/do_exonerate.csh +++ b/detectors/cds/bin/do_exonerate.csh @@ -127,7 +127,10 @@ endif # speedup exonerate # -if ($PASS1_SPEEDUP != 0) then +set AvgLengh = `$AwkCmd '/>/ {nseq++} \\!/>/ {lseq+=length($0)} END {print int(lseq/nseq)}' $ProtFile` +Notify " Protein $ProtName has an average length of $AvgLengh" + +if (($PASS1_SPEEDUP != 0) && ($AvgLengh > $PASS1_BLASTX_FILTER_MINLEN)) then tcsh -f $PROG_DIR/do_filterbx.csh $GenoFile $ProtFile \ $PASS1_BLASTX_FILTER_IDMIN \ @@ -135,20 +138,24 @@ if ($PASS1_SPEEDUP != 0) then $PASS1_BLASTX_FILTER_NBMAX > D_$$ set n = `egrep "^>" D_$$ | wc -l` - if ($n > 0) then + if ($n >= $PASS1_SLOWDOWN) then Notify " $n sequences kept" set DbFile = D_$$ else - Notify " no sequence match" - if ($PASS1_SLOWDOWN != 0) then - Notify " reverting to original $ProtName" + if ($PASS1_SLOWDOWN > 0) then + Notify " not enought sequence match (only $n)" + Notify " reverting to complete $ProtName db" set DbFile = $ProtFile else + Notify " no blast match for $ProtName stop annotation" echo "" > $base.exo.raw goto parse endif endif else + if ($AvgLengh <= $PASS1_BLASTX_FILTER_MINLEN) then + Notify " too short protein, reverting to original $ProtName" + endif set DbFile = $ProtFile endif @@ -207,7 +214,7 @@ set sp_match=`awk '/^e similarity/ {print $12}' $base.exo.best | head -1` if ( ${%sp_match} > 0 ) then set sp_ac=`awk -v sp="$sp_match" '($1 ~ sp) {sub(/SP_AC=/,"",$2); sub(/;$/,"",$2); print $2} ' $DbFile` - echo " " patch ac $sp_match to $sp_ac > /dev/stderr + Notify " for $ProtName patches swiss ID $sp_match to AC $sp_ac" sed "s/$sp_match/$sp_ac/g" $base.exo.best > T_$$ mv T_$$ $base.exo.best endif diff --git a/detectors/cds/bin/go_cds.sh b/detectors/cds/bin/go_cds.sh index 455e1cf..1a6813e 100755 --- a/detectors/cds/bin/go_cds.sh +++ b/detectors/cds/bin/go_cds.sh @@ -66,15 +66,15 @@ Fasta="$temp/genome.fasta" # if [[ "$cdsdetection_pass1" == "yes" ]] ; then - for dir in "core" ; do + ( for dir in "core" ; do if [[ -d $DbRoot/$dir ]] ; then fams=$(ls $DbRoot/$dir/*.fst) loginfo "running pass1:$dir exonerate of $Genome on $DbRoot" for f in $fams ; do - tcsh -f $PROG_DIR/do_exonerate.csh Pass1 $Fasta $f $AnnotFile $DbRoot/models $temp + echo tcsh -f $PROG_DIR/do_exonerate.csh Pass1 $Fasta $f $AnnotFile $DbRoot/models $temp done fi - done + done ) | parallel -j 8 mv $temp/genome.cds.fasta $Genome.cds_pass1.fasta fi @@ -94,16 +94,16 @@ fi # if [[ "$cdsdetection_pass3" == "yes" ]] ; then - for dir in "shell" ; do + (for dir in "shell" ; do if [[ -d $DbRoot/$dir ]] ; then fams=$(ls $DbRoot/$dir/*.fst) loginfo $fams loginfo "running pass3:$dir exonerate of $Genome on $DbRoot" for f in $fams ; do - tcsh -f $PROG_DIR/do_exonerate.csh Pass3 $Fasta $f $AnnotFile $DbRoot/models $temp + echo tcsh -f $PROG_DIR/do_exonerate.csh Pass3 $Fasta $f $AnnotFile $DbRoot/models $temp done fi - done + done) | parallel -j 8 mv $temp/genome.cds.fasta $Genome.cds_pass2.fasta fi diff --git a/detectors/cds/params/default b/detectors/cds/params/default index aff6734..1a955b5 100644 --- a/detectors/cds/params/default +++ b/detectors/cds/params/default @@ -15,13 +15,14 @@ AssignUndef TMP_CLEANUP 1 # pass1: exonerate speedup # AssignUndef PASS1_SPEEDUP 1 -AssignUndef PASS1_SLOWDOWN 0 +AssignUndef PASS1_SLOWDOWN 5 -AssignUndef PASS1_BLASTX_FILTER_IDMIN 80 -AssignUndef PASS1_BLASTX_FILTER_NBMIN 20 -AssignUndef PASS1_BLASTX_FILTER_NBMAX 50 -AssignUndef PASS1_BLASTX_FILTER_COMIN 0.8 -AssignUndef PASS1_BLASTX_FILTER_GCODE 11 +AssignUndef PASS1_BLASTX_FILTER_MINLEN 100 +AssignUndef PASS1_BLASTX_FILTER_IDMIN 50 +AssignUndef PASS1_BLASTX_FILTER_NBMIN 20 +AssignUndef PASS1_BLASTX_FILTER_NBMAX 20 +AssignUndef PASS1_BLASTX_FILTER_COMIN 0.8 +AssignUndef PASS1_BLASTX_FILTER_GCODE 11 # # pass1: exonerate parameters