added cds/tools/compare
Former-commit-id: 07fb256bf17db3f0ffc1730b0383f8255fbb9129 Former-commit-id: fd6a1fe72a39c5633c2f9fb6de09af979c2a48f3
This commit is contained in:
89
detectors/cds/tools/compare/go_compare.sh
Executable file
89
detectors/cds/tools/compare/go_compare.sh
Executable file
@@ -0,0 +1,89 @@
|
||||
#!/bin/csh -f
|
||||
#
|
||||
# compare CDS annotation in reference file to predicted file
|
||||
# annotation file are in Genbank/Embl format
|
||||
#
|
||||
# usage: go_compare reference predicted
|
||||
#
|
||||
# output on stdout
|
||||
#
|
||||
unsetenv ORG_SOURCED
|
||||
|
||||
setenv ORG_HOME `dirname $0`/../../../..
|
||||
source $ORG_HOME/scripts/csh_init.sh
|
||||
|
||||
NeedArg 2
|
||||
|
||||
set RefFile = $Argv[1]
|
||||
set PrdFile = $Argv[2]
|
||||
|
||||
NeedFile $RefFile
|
||||
NeedFile $PrdFile
|
||||
|
||||
set RefType = $RefFile:e
|
||||
set PrdType = $PrdFile:e
|
||||
|
||||
#
|
||||
# parse ref and prediction
|
||||
#
|
||||
|
||||
Notify "get genome info from $RefFile"
|
||||
|
||||
$AwkCmd -f $LIB_DIR/$RefType.oneliner.awk $RefFile |\
|
||||
$AwkCmd -f $LIB_DIR/libutil.awk -f $LIB_DIR/$RefType.cds.awk > R_$$
|
||||
|
||||
Notify "get prediction info from $PrdFile"
|
||||
|
||||
$AwkCmd -f $LIB_DIR/$PrdType.oneliner.awk $PrdFile |\
|
||||
$AwkCmd -f $LIB_DIR/libutil.awk -f $LIB_DIR/$PrdType.cds.awk > P_$$
|
||||
|
||||
#
|
||||
# compare
|
||||
#
|
||||
|
||||
Notify "compare bank to predictions"
|
||||
|
||||
$AwkCmd -f $LIB_DIR/libnws.awk \
|
||||
-f $LIB_DIR/compareCds.awk \
|
||||
R_$$ P_$$ > S_$$
|
||||
|
||||
# base statistics
|
||||
|
||||
egrep "^MATCH" S_$$ | tr '.' ' ' | awk '{print $5}' |\
|
||||
sort | uniq -c | sort -nr | awk '{print "#",$0}' > U_$$
|
||||
|
||||
# add chlorodb/core statistics
|
||||
|
||||
if (-d $DATA_DIR/cds/chlorodb/core) then
|
||||
|
||||
ls $DATA_DIR/cds/chlorodb/core/*.fst |\
|
||||
sed -e 's@^.*core/@@1' | sed -e 's/.fst$//g' |\
|
||||
sort > C_$$
|
||||
|
||||
egrep "^MATCH" S_$$ | grep "MISSED" | awk '{print $2}' | sort | uniq > D_$$
|
||||
|
||||
join D_$$ C_$$ > E_$$
|
||||
@ nc = `cat C_$$ | wc -l`
|
||||
@ mt = `cat D_$$ | wc -l`
|
||||
@ mc = `cat E_$$ | wc -l`
|
||||
@ mn = $mt - $mc
|
||||
set LC = `cat E_$$`
|
||||
|
||||
echo "#" >> U_$$
|
||||
echo "# $mc MISSED in ChloroDB-Core ($LC)" >> U_$$
|
||||
echo "# $mn MISSED not in ChloroDB-Core" >> U_$$
|
||||
echo "#" >> U_$$
|
||||
echo "" >> U_$$
|
||||
endif
|
||||
|
||||
cat S_$$ >> U_$$
|
||||
|
||||
cat U_$$
|
||||
|
||||
#
|
||||
# end
|
||||
#
|
||||
|
||||
(\rm -f ?_$$) >> /dev/null
|
||||
|
||||
Exit 0
|
Reference in New Issue
Block a user