removed need of R igraph from chlorodb/subdb

Former-commit-id: 574aace9be5804d728a877110f5f475d61644f75
Former-commit-id: 2e7ea63447643830a62f18a364327d7b396ec140
This commit is contained in:
alain viari
2015-11-14 22:13:55 +01:00
parent d83201fd2f
commit 2d404b5b24
9 changed files with 81 additions and 66 deletions

5
TODO
View File

@ -18,3 +18,8 @@ o parameters in go_subdb.sh
o models in DB_DIR
o check R package igraph installation
o replace R igraph for cc

View File

@ -23,17 +23,15 @@ then (after checking)
# notes
#
this requires an installed R > 3.0.1
calculation of models currently requires R
(without any specific package) this will be
replaced in the future...
with the following packages
in addition, optional graphics output (plot.models.r)
requires the following graphic packages :
igraph # <- mandatory
grid
gridExtra
vcd
plotrix
grid # <- the following are not needed
gridExtra # by scripts, but just to
vcd # produce graphics for models
plotrix # by lib/plot.models.r
you can check and install them by running :
lib/install.rpackages.r

View File

@ -111,9 +111,7 @@ Notify "select by length"
foreach f (D_$$/*.fst)
set nom = `basename $f:r`
$AwkCmd -f $LIB_DIR/db.getlen.awk $f > L_$$
$LIB_DIR/db.filter.len.r L_$$ $Delta |\
$AwkCmd '($NF == "TRUE") {print $2}' > M_$$
$AwkCmd -v DELTA=$Delta -f $LIB_DIR/db.filter.len.awk $f > M_$$
$AwkCmd -v FILE=M_$$ -f $LIB_DIR/db.subdb.awk $f > E_$$/$nom.fst
Report E_$$/$nom.fst "length_filter"
set n = `egrep '^>' E_$$/$nom.fst | wc -l`
@ -144,10 +142,8 @@ foreach f (E_$$/*.fst)
$AwkCmd -v COVMIN=$Covmin -v PMAX=$Pmax -v IDMIN=$Idmin \
-f $LIB_DIR/db.blastlink.awk $f.blast.out |\
$AwkCmd -f $LIB_DIR/db.todl.awk > G_$$
($LIB_DIR/db.cc.r G_$$ > $f.cc.txt) >>& db.log
$AwkCmd -f $LIB_DIR/db.cc.awk > $f.cc.txt
awk -v NAME=$nom -f $LIB_DIR/db.reportcc.awk $f.cc.txt >> $OutLog
$AwkCmd -f $LIB_DIR/db.selcc.awk $f.cc.txt > S_$$

View File

@ -0,0 +1,31 @@
#
#
function visit(u, i, _local_, v) {
if (Visited[u]) return
Visited[u] = i
for (v in Edge[u]) {
visit(v, i)
}
}
/^#/ { next }
{
Node[$1]++
Node[$2]++
Edge[$1][$2]++
Edge[$2][$1]++
}
END {
for (u in Node) {
if (Visited[u]) continue
visit(u, ++NbComp)
}
for (u in Node)
print u, Visited[u]
}

View File

@ -1,18 +0,0 @@
#!/usr/bin/env Rscript
#
require(igraph, warn.conflicts=F)
args <- commandArgs(T)
path <- if(length(args) > 0) args[1] else 'graph.dl'
g <- read.graph(path, format='dl')
cc <- clusters(g)
res <- cbind(V(g)$name, membership(cc))
write.table(res, quote=FALSE, row.names=FALSE, col.names=FALSE)
quit(save="no")

View File

@ -0,0 +1,33 @@
#
function Abs(x) {
return (x+0 >= 0) ? x+0 : 0-x
}
function Median(array, _local_, tmp, size, lhs, rhs) {
size = asort(array, tmp)
lhs = int((size - 1) / 2) + 1
rhs = int(size / 2) + 1
return ((tmp[lhs] + tmp[rhs]) / 2.0)
}
#
BEGIN {
if (DELTA=="") DELTA=0.5
}
/^>/ {
N++
Id[N] = substr($1, 2)
na = split($1, a, "@")
Len[N] = a[na]
}
END {
med = Median(Len)
for (i = 1 ; i <= N ; i++) {
if (Abs(Len[i]-med)/med <= DELTA)
print Id[i]
}
}

View File

@ -1,19 +0,0 @@
#!/usr/bin/env Rscript
#
args <- commandArgs(T)
path <- if(length(args) > 0) args[1] else 'len.txt'
delta <- if(length(args) > 1) args[2] else 0.5
tab <- read.table(path, header=T)
lmed <- median(tab$len)
dlen <- lmed * as.numeric(delta)
tab$ok <- (abs(tab$len-lmed)/lmed) <= delta
write.table(tab, quote=F)
quit(save='no')

View File

@ -1,10 +0,0 @@
#
BEGIN {
print "id len"
}
/^>/ {
na = split($1, a, "@")
print substr($1, 2), a[na]
}

View File

@ -25,7 +25,6 @@ check("grid")
check("gridExtra")
check("vcd")
check("plotrix")
check("igraph")
quit(save='no', status=0)