Files
annotate/detectors/cds/tools/compare/go_compare.sh
alain viari 9108ce75f1 fixed too many partial CDS bug
Former-commit-id: d733a46f4e92f755f38e452f03a28062de6739f1
Former-commit-id: 36bdc324d2b9a0491d07d40a7e68a4cf7ea73984
2015-11-10 22:15:01 +01:00

94 lines
2.0 KiB
Bash
Executable File

#!/bin/csh -f
#
# compare CDS annotation in reference file to predicted file
# annotation file are in Genbank/Embl format
#
# usage: go_compare reference predicted
#
# output on stdout
#
unsetenv ORG_SOURCED
setenv ORG_HOME `dirname $0`/../../../..
source $ORG_HOME/scripts/csh_init.sh
NeedArg 2
set RefFile = $Argv[1]
set PrdFile = $Argv[2]
NeedFile $RefFile
NeedFile $PrdFile
set RefType = $RefFile:e
set PrdType = $PrdFile:e
if ((! -e $LIB_DIR/$RefType.oneliner.awk) || (! -e $LIB_DIR/$PrdType.oneliner.awk)) then
Error 1 "file extension should be 'gbk' or 'embl'"
endif
#
# parse ref and prediction
#
Notify "get genome info from $RefFile"
$AwkCmd -f $LIB_DIR/$RefType.oneliner.awk $RefFile |\
$AwkCmd -f $LIB_DIR/libutil.awk -f $LIB_DIR/$RefType.cds_short.awk > R_$$
Notify "get prediction info from $PrdFile"
$AwkCmd -f $LIB_DIR/$PrdType.oneliner.awk $PrdFile |\
$AwkCmd -f $LIB_DIR/libutil.awk -f $LIB_DIR/$PrdType.cds_short.awk > P_$$
#
# compare
#
Notify "compare bank to predictions"
$AwkCmd -f $LIB_DIR/libnws.awk \
-f $LIB_DIR/compareCds.awk \
R_$$ P_$$ > S_$$
# base statistics
egrep "^MATCH" S_$$ | tr '.' ' ' | awk '{print $5}' |\
sort | uniq -c | sort -nr | awk '{print "#",$0}' > U_$$
# add chlorodb/core statistics
if (-d $DATA_DIR/cds/chlorodb/core) then
ls $DATA_DIR/cds/chlorodb/core/*.fst |\
sed -e 's@^.*core/@@1' | sed -e 's/.fst$//g' |\
sort > C_$$
egrep "^MATCH" S_$$ | grep "MISSED" | awk '{print $2}' | sort | uniq > D_$$
join D_$$ C_$$ > E_$$
@ nc = `cat C_$$ | wc -l`
@ mt = `cat D_$$ | wc -l`
@ mc = `cat E_$$ | wc -l`
@ mn = $mt - $mc
set LC = `cat E_$$`
echo "#" >> U_$$
echo "# $mc MISSED in ChloroDB-Core ($LC)" >> U_$$
echo "# $mn MISSED not in ChloroDB-Core" >> U_$$
echo "#" >> U_$$
echo "" >> U_$$
endif
cat S_$$ >> U_$$
cat U_$$
#
# end
#
(\rm -f ?_$$) >> /dev/null
Exit 0