From 2d404b5b246c8b8693d2b7e633c3efbd2bdf3d00 Mon Sep 17 00:00:00 2001 From: alain viari Date: Sat, 14 Nov 2015 22:13:55 +0100 Subject: [PATCH] removed need of R igraph from chlorodb/subdb Former-commit-id: 574aace9be5804d728a877110f5f475d61644f75 Former-commit-id: 2e7ea63447643830a62f18a364327d7b396ec140 --- TODO | 5 +++ detectors/cds/tools/chlorodb/README.txt | 20 +++++------ .../cds/tools/chlorodb/subdb/go_subdb.sh | 10 ++---- .../cds/tools/chlorodb/subdb/lib/db.cc.awk | 31 +++++++++++++++++ .../cds/tools/chlorodb/subdb/lib/db.cc.r | 18 ---------- .../chlorodb/subdb/lib/db.filter.len.awk | 33 +++++++++++++++++++ .../tools/chlorodb/subdb/lib/db.filter.len.r | 19 ----------- .../tools/chlorodb/subdb/lib/db.getlen.awk | 10 ------ detectors/cds/tools/lib/install.rpackages.r | 1 - 9 files changed, 81 insertions(+), 66 deletions(-) create mode 100644 detectors/cds/tools/chlorodb/subdb/lib/db.cc.awk delete mode 100755 detectors/cds/tools/chlorodb/subdb/lib/db.cc.r create mode 100644 detectors/cds/tools/chlorodb/subdb/lib/db.filter.len.awk delete mode 100755 detectors/cds/tools/chlorodb/subdb/lib/db.filter.len.r delete mode 100644 detectors/cds/tools/chlorodb/subdb/lib/db.getlen.awk diff --git a/TODO b/TODO index 66e715a..af736c1 100644 --- a/TODO +++ b/TODO @@ -18,3 +18,8 @@ o parameters in go_subdb.sh o models in DB_DIR +o check R package igraph installation + +o replace R igraph for cc + + diff --git a/detectors/cds/tools/chlorodb/README.txt b/detectors/cds/tools/chlorodb/README.txt index c58508c..9adf986 100644 --- a/detectors/cds/tools/chlorodb/README.txt +++ b/detectors/cds/tools/chlorodb/README.txt @@ -23,17 +23,15 @@ then (after checking) # notes # -this requires an installed R > 3.0.1 +calculation of models currently requires R +(without any specific package) this will be +replaced in the future... -with the following packages +in addition, optional graphics output (plot.models.r) +requires the following graphic packages : - igraph # <- mandatory + grid + gridExtra + vcd + plotrix - grid # <- the following are not needed - gridExtra # by scripts, but just to - vcd # produce graphics for models - plotrix # by lib/plot.models.r - -you can check and install them by running : - - lib/install.rpackages.r diff --git a/detectors/cds/tools/chlorodb/subdb/go_subdb.sh b/detectors/cds/tools/chlorodb/subdb/go_subdb.sh index 0ea30fb..0d7a5b2 100755 --- a/detectors/cds/tools/chlorodb/subdb/go_subdb.sh +++ b/detectors/cds/tools/chlorodb/subdb/go_subdb.sh @@ -111,9 +111,7 @@ Notify "select by length" foreach f (D_$$/*.fst) set nom = `basename $f:r` - $AwkCmd -f $LIB_DIR/db.getlen.awk $f > L_$$ - $LIB_DIR/db.filter.len.r L_$$ $Delta |\ - $AwkCmd '($NF == "TRUE") {print $2}' > M_$$ + $AwkCmd -v DELTA=$Delta -f $LIB_DIR/db.filter.len.awk $f > M_$$ $AwkCmd -v FILE=M_$$ -f $LIB_DIR/db.subdb.awk $f > E_$$/$nom.fst Report E_$$/$nom.fst "length_filter" set n = `egrep '^>' E_$$/$nom.fst | wc -l` @@ -144,10 +142,8 @@ foreach f (E_$$/*.fst) $AwkCmd -v COVMIN=$Covmin -v PMAX=$Pmax -v IDMIN=$Idmin \ -f $LIB_DIR/db.blastlink.awk $f.blast.out |\ - $AwkCmd -f $LIB_DIR/db.todl.awk > G_$$ - - ($LIB_DIR/db.cc.r G_$$ > $f.cc.txt) >>& db.log - + $AwkCmd -f $LIB_DIR/db.cc.awk > $f.cc.txt + awk -v NAME=$nom -f $LIB_DIR/db.reportcc.awk $f.cc.txt >> $OutLog $AwkCmd -f $LIB_DIR/db.selcc.awk $f.cc.txt > S_$$ diff --git a/detectors/cds/tools/chlorodb/subdb/lib/db.cc.awk b/detectors/cds/tools/chlorodb/subdb/lib/db.cc.awk new file mode 100644 index 0000000..cb64a1e --- /dev/null +++ b/detectors/cds/tools/chlorodb/subdb/lib/db.cc.awk @@ -0,0 +1,31 @@ +# +# + +function visit(u, i, _local_, v) { + if (Visited[u]) return + Visited[u] = i + for (v in Edge[u]) { + visit(v, i) + } +} + +/^#/ { next } + +{ + Node[$1]++ + Node[$2]++ + Edge[$1][$2]++ + Edge[$2][$1]++ +} + +END { + + for (u in Node) { + if (Visited[u]) continue + visit(u, ++NbComp) + } + + for (u in Node) + print u, Visited[u] + +} \ No newline at end of file diff --git a/detectors/cds/tools/chlorodb/subdb/lib/db.cc.r b/detectors/cds/tools/chlorodb/subdb/lib/db.cc.r deleted file mode 100755 index 2a69b8e..0000000 --- a/detectors/cds/tools/chlorodb/subdb/lib/db.cc.r +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env Rscript -# - -require(igraph, warn.conflicts=F) - -args <- commandArgs(T) -path <- if(length(args) > 0) args[1] else 'graph.dl' - -g <- read.graph(path, format='dl') - -cc <- clusters(g) - -res <- cbind(V(g)$name, membership(cc)) - -write.table(res, quote=FALSE, row.names=FALSE, col.names=FALSE) - -quit(save="no") - diff --git a/detectors/cds/tools/chlorodb/subdb/lib/db.filter.len.awk b/detectors/cds/tools/chlorodb/subdb/lib/db.filter.len.awk new file mode 100644 index 0000000..1e059a4 --- /dev/null +++ b/detectors/cds/tools/chlorodb/subdb/lib/db.filter.len.awk @@ -0,0 +1,33 @@ +# + +function Abs(x) { + return (x+0 >= 0) ? x+0 : 0-x +} + +function Median(array, _local_, tmp, size, lhs, rhs) { + size = asort(array, tmp) + lhs = int((size - 1) / 2) + 1 + rhs = int(size / 2) + 1 + return ((tmp[lhs] + tmp[rhs]) / 2.0) +} + +# + +BEGIN { + if (DELTA=="") DELTA=0.5 +} + +/^>/ { + N++ + Id[N] = substr($1, 2) + na = split($1, a, "@") + Len[N] = a[na] +} + +END { + med = Median(Len) + for (i = 1 ; i <= N ; i++) { + if (Abs(Len[i]-med)/med <= DELTA) + print Id[i] + } +} diff --git a/detectors/cds/tools/chlorodb/subdb/lib/db.filter.len.r b/detectors/cds/tools/chlorodb/subdb/lib/db.filter.len.r deleted file mode 100755 index 13cb5a2..0000000 --- a/detectors/cds/tools/chlorodb/subdb/lib/db.filter.len.r +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env Rscript -# - -args <- commandArgs(T) -path <- if(length(args) > 0) args[1] else 'len.txt' -delta <- if(length(args) > 1) args[2] else 0.5 - -tab <- read.table(path, header=T) - -lmed <- median(tab$len) - -dlen <- lmed * as.numeric(delta) - -tab$ok <- (abs(tab$len-lmed)/lmed) <= delta - -write.table(tab, quote=F) - -quit(save='no') - diff --git a/detectors/cds/tools/chlorodb/subdb/lib/db.getlen.awk b/detectors/cds/tools/chlorodb/subdb/lib/db.getlen.awk deleted file mode 100644 index 117c90b..0000000 --- a/detectors/cds/tools/chlorodb/subdb/lib/db.getlen.awk +++ /dev/null @@ -1,10 +0,0 @@ -# -BEGIN { - print "id len" -} - -/^>/ { - na = split($1, a, "@") - print substr($1, 2), a[na] -} - diff --git a/detectors/cds/tools/lib/install.rpackages.r b/detectors/cds/tools/lib/install.rpackages.r index c6f9778..9bc9175 100755 --- a/detectors/cds/tools/lib/install.rpackages.r +++ b/detectors/cds/tools/lib/install.rpackages.r @@ -25,7 +25,6 @@ check("grid") check("gridExtra") check("vcd") check("plotrix") -check("igraph") quit(save='no', status=0)