removed need of R igraph from chlorodb/subdb
Former-commit-id: 574aace9be5804d728a877110f5f475d61644f75 Former-commit-id: 2e7ea63447643830a62f18a364327d7b396ec140
This commit is contained in:
5
TODO
5
TODO
@ -18,3 +18,8 @@ o parameters in go_subdb.sh
|
||||
|
||||
o models in DB_DIR
|
||||
|
||||
o check R package igraph installation
|
||||
|
||||
o replace R igraph for cc
|
||||
|
||||
|
||||
|
@ -23,17 +23,15 @@ then (after checking)
|
||||
# notes
|
||||
#
|
||||
|
||||
this requires an installed R > 3.0.1
|
||||
calculation of models currently requires R
|
||||
(without any specific package) this will be
|
||||
replaced in the future...
|
||||
|
||||
with the following packages
|
||||
in addition, optional graphics output (plot.models.r)
|
||||
requires the following graphic packages :
|
||||
|
||||
igraph # <- mandatory
|
||||
grid
|
||||
gridExtra
|
||||
vcd
|
||||
plotrix
|
||||
|
||||
grid # <- the following are not needed
|
||||
gridExtra # by scripts, but just to
|
||||
vcd # produce graphics for models
|
||||
plotrix # by lib/plot.models.r
|
||||
|
||||
you can check and install them by running :
|
||||
|
||||
lib/install.rpackages.r
|
||||
|
@ -111,9 +111,7 @@ Notify "select by length"
|
||||
|
||||
foreach f (D_$$/*.fst)
|
||||
set nom = `basename $f:r`
|
||||
$AwkCmd -f $LIB_DIR/db.getlen.awk $f > L_$$
|
||||
$LIB_DIR/db.filter.len.r L_$$ $Delta |\
|
||||
$AwkCmd '($NF == "TRUE") {print $2}' > M_$$
|
||||
$AwkCmd -v DELTA=$Delta -f $LIB_DIR/db.filter.len.awk $f > M_$$
|
||||
$AwkCmd -v FILE=M_$$ -f $LIB_DIR/db.subdb.awk $f > E_$$/$nom.fst
|
||||
Report E_$$/$nom.fst "length_filter"
|
||||
set n = `egrep '^>' E_$$/$nom.fst | wc -l`
|
||||
@ -144,10 +142,8 @@ foreach f (E_$$/*.fst)
|
||||
|
||||
$AwkCmd -v COVMIN=$Covmin -v PMAX=$Pmax -v IDMIN=$Idmin \
|
||||
-f $LIB_DIR/db.blastlink.awk $f.blast.out |\
|
||||
$AwkCmd -f $LIB_DIR/db.todl.awk > G_$$
|
||||
|
||||
($LIB_DIR/db.cc.r G_$$ > $f.cc.txt) >>& db.log
|
||||
|
||||
$AwkCmd -f $LIB_DIR/db.cc.awk > $f.cc.txt
|
||||
|
||||
awk -v NAME=$nom -f $LIB_DIR/db.reportcc.awk $f.cc.txt >> $OutLog
|
||||
|
||||
$AwkCmd -f $LIB_DIR/db.selcc.awk $f.cc.txt > S_$$
|
||||
|
31
detectors/cds/tools/chlorodb/subdb/lib/db.cc.awk
Normal file
31
detectors/cds/tools/chlorodb/subdb/lib/db.cc.awk
Normal file
@ -0,0 +1,31 @@
|
||||
#
|
||||
#
|
||||
|
||||
function visit(u, i, _local_, v) {
|
||||
if (Visited[u]) return
|
||||
Visited[u] = i
|
||||
for (v in Edge[u]) {
|
||||
visit(v, i)
|
||||
}
|
||||
}
|
||||
|
||||
/^#/ { next }
|
||||
|
||||
{
|
||||
Node[$1]++
|
||||
Node[$2]++
|
||||
Edge[$1][$2]++
|
||||
Edge[$2][$1]++
|
||||
}
|
||||
|
||||
END {
|
||||
|
||||
for (u in Node) {
|
||||
if (Visited[u]) continue
|
||||
visit(u, ++NbComp)
|
||||
}
|
||||
|
||||
for (u in Node)
|
||||
print u, Visited[u]
|
||||
|
||||
}
|
@ -1,18 +0,0 @@
|
||||
#!/usr/bin/env Rscript
|
||||
#
|
||||
|
||||
require(igraph, warn.conflicts=F)
|
||||
|
||||
args <- commandArgs(T)
|
||||
path <- if(length(args) > 0) args[1] else 'graph.dl'
|
||||
|
||||
g <- read.graph(path, format='dl')
|
||||
|
||||
cc <- clusters(g)
|
||||
|
||||
res <- cbind(V(g)$name, membership(cc))
|
||||
|
||||
write.table(res, quote=FALSE, row.names=FALSE, col.names=FALSE)
|
||||
|
||||
quit(save="no")
|
||||
|
33
detectors/cds/tools/chlorodb/subdb/lib/db.filter.len.awk
Normal file
33
detectors/cds/tools/chlorodb/subdb/lib/db.filter.len.awk
Normal file
@ -0,0 +1,33 @@
|
||||
#
|
||||
|
||||
function Abs(x) {
|
||||
return (x+0 >= 0) ? x+0 : 0-x
|
||||
}
|
||||
|
||||
function Median(array, _local_, tmp, size, lhs, rhs) {
|
||||
size = asort(array, tmp)
|
||||
lhs = int((size - 1) / 2) + 1
|
||||
rhs = int(size / 2) + 1
|
||||
return ((tmp[lhs] + tmp[rhs]) / 2.0)
|
||||
}
|
||||
|
||||
#
|
||||
|
||||
BEGIN {
|
||||
if (DELTA=="") DELTA=0.5
|
||||
}
|
||||
|
||||
/^>/ {
|
||||
N++
|
||||
Id[N] = substr($1, 2)
|
||||
na = split($1, a, "@")
|
||||
Len[N] = a[na]
|
||||
}
|
||||
|
||||
END {
|
||||
med = Median(Len)
|
||||
for (i = 1 ; i <= N ; i++) {
|
||||
if (Abs(Len[i]-med)/med <= DELTA)
|
||||
print Id[i]
|
||||
}
|
||||
}
|
@ -1,19 +0,0 @@
|
||||
#!/usr/bin/env Rscript
|
||||
#
|
||||
|
||||
args <- commandArgs(T)
|
||||
path <- if(length(args) > 0) args[1] else 'len.txt'
|
||||
delta <- if(length(args) > 1) args[2] else 0.5
|
||||
|
||||
tab <- read.table(path, header=T)
|
||||
|
||||
lmed <- median(tab$len)
|
||||
|
||||
dlen <- lmed * as.numeric(delta)
|
||||
|
||||
tab$ok <- (abs(tab$len-lmed)/lmed) <= delta
|
||||
|
||||
write.table(tab, quote=F)
|
||||
|
||||
quit(save='no')
|
||||
|
@ -1,10 +0,0 @@
|
||||
#
|
||||
BEGIN {
|
||||
print "id len"
|
||||
}
|
||||
|
||||
/^>/ {
|
||||
na = split($1, a, "@")
|
||||
print substr($1, 2), a[na]
|
||||
}
|
||||
|
@ -25,7 +25,6 @@ check("grid")
|
||||
check("gridExtra")
|
||||
check("vcd")
|
||||
check("plotrix")
|
||||
check("igraph")
|
||||
|
||||
quit(save='no', status=0)
|
||||
|
||||
|
Reference in New Issue
Block a user