cds/tools/chlorodb added
Former-commit-id: 0579e878a69b7c285ca71870e9ca5730649a2fda Former-commit-id: 7cced5b488441d87bf070a9a444317db0e048880
This commit is contained in:
39
detectors/cds/tools/chlorodb/subdb/lib/db.annot.awk
Normal file
39
detectors/cds/tools/chlorodb/subdb/lib/db.annot.awk
Normal file
@@ -0,0 +1,39 @@
|
||||
#
|
||||
|
||||
/^>/ {
|
||||
N++
|
||||
na = split($1, a, "@")
|
||||
if (a[na-1] > NEXMAX) NEXMAX = a[na-1]
|
||||
NEX[a[na-1]]++
|
||||
ANNOT[$NF]++
|
||||
}
|
||||
|
||||
END {
|
||||
na = split(FILENAME, a, "/")
|
||||
na = split(a[na], a, "\\.")
|
||||
printf("%s %d ", a[1], N)
|
||||
s = ""
|
||||
for (i = 1 ; i <= NEXMAX ; i ++) {
|
||||
if (NEX[i] != 0)
|
||||
s = s "" i ":" NEX[i] "_"
|
||||
}
|
||||
gsub("_+$", "", s)
|
||||
printf("%s ", s)
|
||||
|
||||
s = (NEXMAX == 1) ? "MONEX" : "POLYEX"
|
||||
printf("%s ", s)
|
||||
|
||||
nmax = 0
|
||||
amax = "none"
|
||||
for (e in ANNOT) {
|
||||
if (ANNOT[e] > nmax) {
|
||||
nmax = ANNOT[e]
|
||||
amax = e
|
||||
}
|
||||
}
|
||||
print amax
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
48
detectors/cds/tools/chlorodb/subdb/lib/db.blastlink.awk
Normal file
48
detectors/cds/tools/chlorodb/subdb/lib/db.blastlink.awk
Normal file
@@ -0,0 +1,48 @@
|
||||
#
|
||||
|
||||
function min(x, y) {
|
||||
return ((x < y) ? x : y)
|
||||
}
|
||||
|
||||
BEGIN {
|
||||
if (COVMIN == "") COVMIN = 50
|
||||
if (PMAX == "") PMAX = 1e-6
|
||||
if (IDMIN == "") IDMIN = 30
|
||||
}
|
||||
|
||||
/^#/ {
|
||||
hitnum = 0;
|
||||
next;
|
||||
}
|
||||
|
||||
{
|
||||
if ($1 == $2) next
|
||||
|
||||
hitnum++;
|
||||
|
||||
na = split($1, a, "@");
|
||||
if (na < 2) {
|
||||
print "query file not properly formatted" > "/dev/stderr"
|
||||
exit(1);
|
||||
}
|
||||
len1 = a[na];
|
||||
|
||||
na = split($2, a, "@");
|
||||
if (na < 2) {
|
||||
print "bank file not properly formatted" > "/dev/stderr"
|
||||
exit(1);
|
||||
}
|
||||
len2 = a[na];
|
||||
|
||||
id = $3 + 0.0;
|
||||
ali = $4;
|
||||
|
||||
covmin = ali * 100. / min(len1, len2);
|
||||
|
||||
proba = $11 + 0.0;
|
||||
|
||||
if ((covmin > COVMIN) && ((proba < PMAX) || (proba == 0)) && (id > IDMIN)) {
|
||||
print $1, $2, hitnum, id, covmin, proba, ali, len1, len2;
|
||||
}
|
||||
}
|
||||
|
18
detectors/cds/tools/chlorodb/subdb/lib/db.cc.r
Executable file
18
detectors/cds/tools/chlorodb/subdb/lib/db.cc.r
Executable file
@@ -0,0 +1,18 @@
|
||||
#!/usr/bin/env Rscript
|
||||
#
|
||||
|
||||
require(igraph, warn.conflicts=F)
|
||||
|
||||
args <- commandArgs(T)
|
||||
path <- if(length(args) > 0) args[1] else 'graph.dl'
|
||||
|
||||
g <- read.graph(path, format='dl')
|
||||
|
||||
cc <- clusters(g)
|
||||
|
||||
res <- cbind(V(g)$name, membership(cc))
|
||||
|
||||
write.table(res, quote=FALSE, row.names=FALSE, col.names=FALSE)
|
||||
|
||||
quit(save="no")
|
||||
|
19
detectors/cds/tools/chlorodb/subdb/lib/db.filter.len.r
Executable file
19
detectors/cds/tools/chlorodb/subdb/lib/db.filter.len.r
Executable file
@@ -0,0 +1,19 @@
|
||||
#!/usr/bin/env Rscript
|
||||
#
|
||||
|
||||
args <- commandArgs(T)
|
||||
path <- if(length(args) > 0) args[1] else 'len.txt'
|
||||
delta <- if(length(args) > 1) args[2] else 0.5
|
||||
|
||||
tab <- read.table(path, header=T)
|
||||
|
||||
lmed <- median(tab$len)
|
||||
|
||||
dlen <- lmed * as.numeric(delta)
|
||||
|
||||
tab$ok <- (abs(tab$len-lmed)/lmed) <= delta
|
||||
|
||||
write.table(tab, quote=F)
|
||||
|
||||
quit(save='no')
|
||||
|
10
detectors/cds/tools/chlorodb/subdb/lib/db.filter.pat.awk
Normal file
10
detectors/cds/tools/chlorodb/subdb/lib/db.filter.pat.awk
Normal file
@@ -0,0 +1,10 @@
|
||||
#
|
||||
|
||||
/^>/ {
|
||||
split($1, a, "@")
|
||||
ok = a[3] ~ PAT
|
||||
}
|
||||
|
||||
ok {
|
||||
print $0
|
||||
}
|
30
detectors/cds/tools/chlorodb/subdb/lib/db.filter.sym.awk
Normal file
30
detectors/cds/tools/chlorodb/subdb/lib/db.filter.sym.awk
Normal file
@@ -0,0 +1,30 @@
|
||||
#
|
||||
#
|
||||
#
|
||||
|
||||
function Check(seq) {
|
||||
if (seq == "") return 0
|
||||
gsub("[ACDEFGHIKLMNPQRSTVWXY\n]+", "", seq)
|
||||
return (length(seq) == 0)
|
||||
}
|
||||
|
||||
/^>/ {
|
||||
if (Check(Seq)) {
|
||||
print Name
|
||||
printf("%s", Seq)
|
||||
}
|
||||
Name = $0
|
||||
Seq = ""
|
||||
next
|
||||
}
|
||||
|
||||
{
|
||||
Seq = Seq "" $0 "\n"
|
||||
}
|
||||
|
||||
END {
|
||||
if (Check(Seq)) {
|
||||
print Name
|
||||
printf("%s", Seq)
|
||||
}
|
||||
}
|
10
detectors/cds/tools/chlorodb/subdb/lib/db.getlen.awk
Normal file
10
detectors/cds/tools/chlorodb/subdb/lib/db.getlen.awk
Normal file
@@ -0,0 +1,10 @@
|
||||
#
|
||||
BEGIN {
|
||||
print "id len"
|
||||
}
|
||||
|
||||
/^>/ {
|
||||
na = split($1, a, "@")
|
||||
print substr($1, 2), a[na]
|
||||
}
|
||||
|
15
detectors/cds/tools/chlorodb/subdb/lib/db.reportcc.awk
Normal file
15
detectors/cds/tools/chlorodb/subdb/lib/db.reportcc.awk
Normal file
@@ -0,0 +1,15 @@
|
||||
#
|
||||
#
|
||||
|
||||
{
|
||||
cnt[$NF]++
|
||||
}
|
||||
|
||||
END {
|
||||
n = asort(cnt)
|
||||
printf("cc_size %s", NAME)
|
||||
for (i = n ; i >= 1 ; i--)
|
||||
printf(" %d", cnt[i])
|
||||
print ""
|
||||
}
|
||||
|
19
detectors/cds/tools/chlorodb/subdb/lib/db.selcc.awk
Normal file
19
detectors/cds/tools/chlorodb/subdb/lib/db.selcc.awk
Normal file
@@ -0,0 +1,19 @@
|
||||
#
|
||||
|
||||
{
|
||||
N[$NF]++
|
||||
E[$NF, N[$NF]] = $1
|
||||
}
|
||||
|
||||
END {
|
||||
cmax = 1
|
||||
nmax = N[1]
|
||||
for (i in N) {
|
||||
if (N[i] > nmax) {
|
||||
nmax = N[i]
|
||||
cmax = i
|
||||
}
|
||||
}
|
||||
for (i = 1 ; i <= nmax ; i++)
|
||||
print E[cmax, i]
|
||||
}
|
17
detectors/cds/tools/chlorodb/subdb/lib/db.subdb.awk
Normal file
17
detectors/cds/tools/chlorodb/subdb/lib/db.subdb.awk
Normal file
@@ -0,0 +1,17 @@
|
||||
#
|
||||
|
||||
BEGIN {
|
||||
if (FILE == "") FILE = "db.sel.txt"
|
||||
while (getline < FILE)
|
||||
INC[$1] = $1
|
||||
close(FILE)
|
||||
}
|
||||
|
||||
/^>/ {
|
||||
name = substr($1, 2)
|
||||
ok = name in INC
|
||||
}
|
||||
|
||||
ok {
|
||||
print $0
|
||||
}
|
21
detectors/cds/tools/chlorodb/subdb/lib/db.todl.awk
Normal file
21
detectors/cds/tools/chlorodb/subdb/lib/db.todl.awk
Normal file
@@ -0,0 +1,21 @@
|
||||
#
|
||||
|
||||
{
|
||||
node[$1]++
|
||||
node[$2]++
|
||||
link[++M] = $1 " " $2
|
||||
}
|
||||
|
||||
|
||||
END {
|
||||
for (n in node)
|
||||
N++
|
||||
print "DL n=" N
|
||||
print "format = edgelist1"
|
||||
print "labels embedded:"
|
||||
print "data:"
|
||||
for (i = 1 ; i <= M ; i++)
|
||||
print link[i]
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user