cds/tools/chlorodb added

Former-commit-id: 0579e878a69b7c285ca71870e9ca5730649a2fda
Former-commit-id: 7cced5b488441d87bf070a9a444317db0e048880
This commit is contained in:
alain viari
2015-11-13 17:41:18 +01:00
parent 0d5f0c1f20
commit e4d6a8484d
585 changed files with 4750 additions and 50 deletions

View File

@ -1,21 +1,21 @@
#!/bin/csh -f
#
# Annotate CDS - Pass1
# Annotate CDS - Exonerate
#
#========================================================================================
#
# Annotate CDS of chlorodb/core proteins using exonerate
# Annotate CDS using exonerate
#
# pass1.sh <FASTAFILE> <FAMILY> [<OUTDIR>]
# do_exonerate.sh <FASTAGENOM> <FASTAPROT> [<OUTDIR>]
#
# - <FASTAFILE> : The fasta file containing the genome to annotate
# - <FAMILY> : Name of the protein family (defined in chlorodb/core)
# - <FASTAGENOM> : The fasta file containing the genome to annotate
# - <FASTAPROT> : The fasta file containing the protein family
#
# Results are in file : `basename <FASTAFILE>:r`.<FAMILY>.res
# Results are in file : `basename <FASTAGENOM>:r`.`basename <FASTAPROT>:r`.res
#
#========================================================================================
#
# usage: go_pass1.sh fasta family [outdir]
# usage: do_exonerate.sh dna.fasta prot.fasta [outdir]
#
unsetenv ORG_SOURCED
@ -31,12 +31,14 @@ NeedArg 2
set GenoFile = $Argv[1]
set GenoName = `basename $GenoFile:r`
set ProtName = $Argv[2]
set ProtDir = $CDS_DATA_DIR/chlorodb/core
set ProtFile = $ProtDir/$ProtName.fst
set ProtFile = $Argv[2]
set ProtDir = `dirname $ProtFile`
set ProtName = `basename $ProtFile:r`
NeedFile $GenoFile
NeedFile $ProtFile
NeedFile $ProtDir/Annot.lst
set OutDir = .
if ($#Argv >= 3) set OutDir = $3
@ -101,7 +103,7 @@ endif
if ($PASS1_SPEEDUP != 0) then
$PROG_DIR/go_filterbx.sh $GenoFile $ProtFile \
$PROG_DIR/do_filterbx.sh $GenoFile $ProtFile \
$PASS1_BLASTX_FILTER_IDMIN \
$PASS1_BLASTX_FILTER_NBMIN \
$PASS1_BLASTX_FILTER_NBMAX > D_$$
@ -159,8 +161,7 @@ $AwkCmd -v MAX_SPAN=$PASS1_MAX_SPAN \
# get annotations
#
egrep "^$ProtName " $CDS_DATA_DIR/chlorodb/core/Annot.lst |\
awk '{print "c annot", $0}' > T_$$
egrep "^$ProtName " $ProtDir/Annot.lst | awk '{print "c annot", $0}' > T_$$
#
# extend start/stop

View File

@ -5,7 +5,7 @@
#
# output on stdout
#
# usage: go_filterbx.sh dna_fasta prot_fasta [idmin nbmin nbmax]
# usage: do_filterbx.sh dna.fasta prot.fasta [idmin nbmin nbmax]
#
unsetenv ORG_SOURCED

View File

@ -13,7 +13,7 @@
# Results are printed to the standard output
#
#========================================================================================
# usage: go_cds.sh fasta
# usage: go_cds.sh fasta [db_core]
#
unsetenv ORG_SOURCED
@ -22,13 +22,19 @@ source $ORG_HOME/scripts/csh_init.sh
NeedArg 1
set Fasta = $Argv[1]
set Fasta = $Argv[1]; Shift
NeedFile $Fasta
set Genome = `basename $Fasta:r`
NeedFile $CDS_DATA_DIR/chlorodb/core
set DbCore = $CDS_DATA_DIR/chlorodb/core
if ($#Argv > 0) then
set DbCore = $Argv[1]; Shift
endif
NeedFile $DbCore/Annot.lst
#
# run everything into temporary place
@ -44,15 +50,15 @@ endif
# pass1: run exonerate
#
set fams = `ls $CDS_DATA_DIR/chlorodb/core/*.fst`
set fams = `ls $DbCore/*.fst`
Notify "running pass1: exonerate of $Genome"
Notify "running pass1: exonerate of $Genome on $DbCore"
foreach f ($fams)
set prot = `basename $f:r`
$PROG_DIR/go_pass1.sh $Fasta $prot $temp
$PROG_DIR/do_exonerate.sh $Fasta $f $temp
end
#
# pass2: transsplicing
#