Files
annotate/detectors/cds/tools/compare/lib/libnws.awk
alain viari d77a0dfe67 added cds/tools/compare
Former-commit-id: 07fb256bf17db3f0ffc1730b0383f8255fbb9129
Former-commit-id: fd6a1fe72a39c5633c2f9fb6de09af979c2a48f3
2015-11-09 01:15:14 +01:00

147 lines
3.2 KiB
Awk

#
# NWS alignment
#
# -----------------------
# make identity substitution matrix for given alphabet
#
function IdentityMatrix(alpha, mat, diag, ndiag, _local_, n, i, j, ci, cj) {
if (diag == "") diag = 0
if (ndiag == "") ndiag = 1
alpha = toupper(alpha)
delete mat
n = length(alpha)
for (i = 1 ; i <= n ; i++) {
ci = substr(alpha, i, 1)
for (j = 1 ; j <= n ; j++) {
cj = substr(alpha, j, 1)
mat[ci][cj] = (i == j) ? diag : ndiag
}
}
}
# -----------------------
# internal utility : reverse string
#
function _Reverse(s, _local_, i, n, rs) {
rs = "";
n = length(s);
for (i = n ; i >= 1 ; i--)
rs = rs "" substr(s, i, 1)
return rs;
}
# -----------------------
# internal utility : alignment traceback for NWS
#
function _Traceback(dir, s1, s2, n1, n2, align, _local_, i, c1, c2, c3) {
delete align
while (dir[n1][n2] != 0) {
if (dir[n1][n2] == "s") {
c1 = substr(s1, n1--, 1)
c2 = substr(s2, n2--, 1)
c3 = (c1 == c2) ? tolower(c1) : "*"
}
else if (dir[n1][n2] == "i") {
c1 = "-"
c2 = substr(s2, n2--, 1)
c3 = "-"
}
else {
c1 = substr(s1, n1--, 1)
c2 = "-"
c3 = "-"
}
align[1] = align[1] "" c1
align[2] = align[2] "" c2
align[3] = align[3] "" c3
}
for (i = 1 ; i <= 3 ; i++)
align[i] = _Reverse(align[i])
}
# -----------------------
# internal utility : min
#
function _Min(a, b) {
return (a < b ? a : b)
}
# -----------------------
# sequence alignment NWS
#
# todo : check alphabet
#
# --> i
# |
# v d
#
function AlignNWS(s1, s2, subst, indel, local, align,
_local_, rev, n1, n2, i, j, c1, c2, m2,
ws, wi, wd, w,
mat, dir) {
s1 = toupper(s1) ; s2 = toupper(s2)
n1 = length(s1) ; n2 = length(s2)
if (local && (n2 < n1)) {
rev = s1 ; s1 = s2 ; s2 = rev
rev = n1 ; n1 = n2 ; n2 = rev
}
if (indel == "") indel = 1
#
# nws alignment
#
for (i = 0 ; i <= n1 ; i++) {
c1 = substr(s1, i, 1)
for (j = 0 ; j <= n2 ; j++) {
c2 = substr(s2, j, 1)
if (i && j) {
ws = mat[i-1][j-1] + subst[c1][c2]
wd = mat[i-1][j] + indel
wi = mat[i][j-1] + indel
w = _Min(ws, _Min(wi, wd))
mat[i][j] = w
dir[i][j] = (w == ws) ? "s" : (w == wi) ? "i" : "d"
} else if (i) {
mat[i][j] = mat[i-1][j] + indel
dir[i][j] = "d"
} else if (j) {
mat[i][j] = mat[i][j-1] + (local ? 0 : indel)
dir[i][j] = "i"
} else {
mat[i][j] = 0
dir[i][j] = 0
}
}
# delete mat[i-1]
}
#
# adjust last line in local mode
#
if (local) {
m2 = n2
for (j = m2 ; j >= 0 ; j--) {
if (mat[n1][j] < mat[n1][m2])
m2 = j
}
mat[n1][n2] = mat[n1][m2]
for (j = m2 + 1 ; j <= n2 ; j++) {
dir[n1][j] = "i"
}
}
#
# traceback
#
_Traceback(dir, s1, s2, n1, n2, align)
if (rev) {
rev = align[1] ; align[1] = align[2]; align[2] = rev
}
return mat[n1][n2]
}