d4da1d01fdda6240bbd44f2b89e008cb95780cb9

@ -31,7 +31,7 @@ set GenoName = `basename $GenoFile:r`

set ProtFile = $Argv[1]; Shift
set ProtDir  = `dirname $ProtFile`
set ProtName = `basename $ProtFile:r`
set ProtName = `basename $ProtFile | $AwkCmd -F'.' '{print $1}'`
set ProtType = `basename $ProtDir`

NeedFile $GenoFile
@ -122,7 +122,7 @@ endif

if ($PASS1_SPEEDUP != 0) then

  tcsh -f $PROG_DIR/do_filterbx.sh $GenoFile $ProtFile  \
  tcsh -f $PROG_DIR/do_filterbx.csh $GenoFile $ProtFile  \
            $PASS1_BLASTX_FILTER_IDMIN          \
            $PASS1_BLASTX_FILTER_NBMIN          \
            $PASS1_BLASTX_FILTER_NBMAX > D_$$
@ -55,10 +55,10 @@ endif

foreach dir ("core" "shell" "dust")
  if (-d $DbRoot/$dir) then
    set fams = `ls $DbRoot/$dir/*.fst`
    set fams = `ls $DbRoot/$dir/*.clean.fst`
    Notify "running pass1:$dir exonerate of $Genome on $DbRoot"
    foreach f ($fams)
      tcsh -f $PROG_DIR/do_exonerate.sh $Fasta $f $DbRoot/models $temp
      tcsh -f $PROG_DIR/do_exonerate.csh $Fasta $f $DbRoot/models $temp
    end
  endif
end
@ -3,6 +3,8 @@
#
# -v MAX_SPAN ALLOW_STOP EXCLUDE
#
# 
#

BEGIN {
  PROCINFO["sorted_in"] = "@ind_num_asc"

@ -6,6 +6,9 @@ source "${THIS_DIR}/../../../scripts/bash_init.sh"

CORELIB="${CDS_DATA_DIR}/chlorodb/core"

CDHIT_ID=0.7
CDHIT_DELTA=0.8


function clusterize() {
 
@ -17,12 +20,15 @@ function clusterize() {
 	rm -rf "${prot}"
 	mkdir -p "${prot}"
 	
 	cd-hit -i "${fastain}" \
 	       -o "${prot}/${cdhitout}" \
 	       -c 0.6 -G 1 \
 	       -g 1 -s 0.8 \
 	       -b 150 -p 1 \
 	       -d 0 -n 3
 	cd-hit 	-i "${fastain}" \
 	    	-o "${prot}/${cdhitout}" \
 	    	-c ${CDHIT_DELTA} \
 	    	-G 1 \
			-g 1 \
			-aL 0.95 \
			-s ${CDHIT_ID} \
 	    	-b 350 -p 1 \
 	    	-d 0 -n 3
 	       
 	fasta1line "${fastain}" > "${prot}/${fasta1}"
 	
@ -35,6 +41,8 @@ function clusterize() {
 	                 filename=prot".cluster."cluster".ids"; \
 	                 print $3 >> filename ; \
 	                 close(filename) }' "${cdhitout}.clstr"
 	                
 	rm -f "../$prot.clean.fst"
 	                 
 	for ids in *.cluster.*.ids ; do
 		cluster=$(printf "%03d" $(echo "${ids}" | $AwkCmd -F'.' '{print $3}'))
@ -52,10 +60,10 @@ function clusterize() {
	 	    egrep -v -- '^--$' > "$alignment"
	    fi
	 		
 	    if (( size >= 10 )) ; then
 	    if (( size >= 5 )) ; then
 	    	egrep -f "$ids" -A 1 "${fasta1}" | \
	 	    egrep -v -- '^--$' | \
	 	    formatfasta >> "$prot.clean.fst"
	 	    formatfasta >> "../$prot.clean.fst"
 	    fi
 	    
 	    rm -f "$ids"
@ -73,6 +81,8 @@ function clusterize() {

pushd $CORELIB

rm -rf *.clean.fst

for prot in *.fst ; do
	prot="${prot/.fst/}"
	clusterize "$prot"