Former-commit-id: 8d15cb5175de1774a1cb366f7a92ef99f8517af5 Former-commit-id: 58421d7b8dd6855efe9770499e48a4cca6d9e1fd
203 lines
5.6 KiB
Awk
203 lines
5.6 KiB
Awk
#
|
|
# utilities library
|
|
#
|
|
|
|
END {
|
|
if (_ForceExit_) exit(_ForceExit_)
|
|
}
|
|
|
|
function Notify(key, msg) {
|
|
print "# " key " " msg >> "/dev/stderr"
|
|
}
|
|
|
|
function Info(msg) {
|
|
Notify("info", msg)
|
|
return 1
|
|
}
|
|
|
|
function Warning(msg) {
|
|
Notify("warning", msg)
|
|
return 0
|
|
}
|
|
|
|
function Exit(status) {
|
|
exit(_ForceExit_ = status)
|
|
}
|
|
|
|
function Error(msg, status) {
|
|
Notify("error", msg)
|
|
Exit(status ? status : 1)
|
|
}
|
|
|
|
function Assert(condition, msg, status) {
|
|
if (! condition) {
|
|
msg = FILENAME ":" FNR ": " msg
|
|
return status ? Error(msg, status) : Warning(msg)
|
|
}
|
|
}
|
|
|
|
#
|
|
|
|
function Min(a, b) {
|
|
return (a < b ? a : b)
|
|
}
|
|
|
|
function Max(a, b) {
|
|
return (a > b ? a : b)
|
|
}
|
|
|
|
#
|
|
|
|
function Trim(s, regexp) {
|
|
if (regexp == 0) regexp = "[ \t]+"
|
|
gsub("^" regexp "|" regexp "$", "", s)
|
|
return s
|
|
}
|
|
|
|
function ShieldPath(path) {
|
|
gsub(" ", "\\ ", path)
|
|
return path
|
|
}
|
|
|
|
function TestPath(path, test, _local_, stat) {
|
|
if (test == 0) test = "-f"
|
|
if (Trim(path) == "")
|
|
return 0 # because of a bug in 'test'
|
|
stat = system("test " test " " ShieldPath(path))
|
|
return stat ? 0 : 1
|
|
}
|
|
|
|
#
|
|
|
|
function Reverse(s, _local_, i, n, rs) {
|
|
rs = "";
|
|
n = length(s);
|
|
for (i = n ; i >= 1 ; i--)
|
|
rs = rs "" substr(s, i, 1)
|
|
return rs;
|
|
}
|
|
|
|
function RevComplement(seq, _local_, n, i, c, rs) {
|
|
n = length(seq)
|
|
rs = ""
|
|
for (i = n ; i >= 1 ; i--) {
|
|
c = substr(seq, i, 1)
|
|
rs = rs "" (_DnaC[c] ? _DnaC[c] : "X")
|
|
}
|
|
return rs
|
|
}
|
|
|
|
#
|
|
|
|
function _AssertCode(name, _local_, n, i1, i2, i3, b1, b2, b3) {
|
|
if (_InitCod[name] != 0) return 1
|
|
for (i1 = 1 ; i1 <= 4 ; i1++) {
|
|
b1 = substr(_NucOrder, i1, 1)
|
|
for (i2 = 1 ; i2 <= 4 ; i2++) {
|
|
b2 = substr(_NucOrder, i2, 1)
|
|
for (i3 = 1 ; i3 <= 4 ; i3++) {
|
|
b3 = substr(_NucOrder, i3, 1)
|
|
_GenCod[name][b1 "" b2 "" b3] = substr(_Cod2Aa[name], ++n, 1)
|
|
if (name in _Cod2Start) {
|
|
_GenStart[name][b1 "" b2 "" b3] = substr(_Cod2Start[name], n, 1)
|
|
if (_GenStart[name][b1 "" b2 "" b3] == "-") {
|
|
_GenStart[name][b1 "" b2 "" b3] = _GenCod[name][b1 "" b2 "" b3]
|
|
}
|
|
} else {
|
|
_GenStart[name][b1 "" b2 "" b3] = _GenCod[name][b1 "" b2 "" b3]
|
|
}
|
|
}
|
|
}
|
|
}
|
|
_InitCod[name] = 1
|
|
return 1
|
|
}
|
|
|
|
function Translate(seq, modif, codname, _local_, n, i, c, p) {
|
|
if (codname == 0) codname = "universal"
|
|
print "u codname " codname " " _Cod2Aa[name]
|
|
p= ""
|
|
_AssertCode(codname)
|
|
seq = toupper(seq)
|
|
|
|
n = length(seq)
|
|
for (i = 1 ; i <= n ; i += 3) {
|
|
c = substr(seq, i, 3)
|
|
if (i == 1 && modif == "=" )
|
|
aa = ((c in _GenStart[codname]) ? _GenStart[codname][c] : "X")
|
|
else
|
|
aa = ((c in _GenCod[codname]) ? _GenCod[codname][c] : "X")
|
|
|
|
p = p "" aa
|
|
}
|
|
|
|
return p
|
|
}
|
|
|
|
#
|
|
|
|
function SubSeq(seq, from, to, revstrand) {
|
|
seq = substr(seq, from, to-from+1)
|
|
if (revstrand) seq = RevComplement(seq)
|
|
return seq
|
|
}
|
|
|
|
#
|
|
|
|
function ReadFasta(file, _local_, seq, context) {
|
|
context = $0
|
|
seq = ""
|
|
getline < file
|
|
while(getline < file) seq = seq "" $0
|
|
$0 = context
|
|
return seq
|
|
}
|
|
|
|
#
|
|
|
|
function ReadModel(file, a, _local_, line, context) {
|
|
context = $0
|
|
delete a
|
|
while(getline < file)
|
|
if (! ($0 ~ "^#")) a[$1] = $2
|
|
$0 = context
|
|
}
|
|
|
|
#
|
|
# constants
|
|
#
|
|
|
|
BEGIN {
|
|
# complementary of _IupacDna
|
|
_DnaC["A"] = "T"; _DnaC["C"] = "G"; _DnaC["G"] = "C"; _DnaC["T"] = "A"
|
|
_DnaC["R"] = "Y"; _DnaC["Y"] = "R"; _DnaC["M"] = "K"; _DnaC["K"] = "M"
|
|
_DnaC["W"] = "W"; _DnaC["S"] = "S"; _DnaC["B"] = "V"; _DnaC["V"] = "B"
|
|
_DnaC["D"] = "H"; _DnaC["H"] = "D"; _DnaC["N"] = "N"; _DnaC["X"] = "X"
|
|
_DnaC["a"] = "t"; _DnaC["c"] = "g"; _DnaC["g"] = "c"; _DnaC["t"] = "a"
|
|
_DnaC["r"] = "y"; _DnaC["y"] = "r"; _DnaC["m"] = "k"; _DnaC["k"] = "m"
|
|
_DnaC["w"] = "w"; _DnaC["s"] = "s"; _DnaC["b"] = "v"; _DnaC["v"] = "b"
|
|
_DnaC["d"] = "h"; _DnaC["h"] = "d"; _DnaC["n"] = "n"; _DnaC["x"] = "x"
|
|
|
|
# genetic codes
|
|
_NucOrder = "ACGT"
|
|
#1 AAAAAAAAAAAAAAAACCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGGTTTTTTTTTTTTTTTT
|
|
#2 AAAACCCCGGGGTTTTAAAACCCCGGGGTTTTAAAACCCCGGGGTTTTAAAACCCCGGGGTTTT
|
|
#3 ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
|
|
_Cod2Aa["universal"] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSS*CWCLFLF"
|
|
_Cod2Aa["chloroplast"] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSS*CWCLFLF"
|
|
_Cod2Aa["mito-yeast"] = "KNKNTTTTRSRSMIMIQHQHPPPPRRRRTTTTEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF"
|
|
_Cod2Aa["mito-vertebrates"] = "KNKNTTTT*S*SMIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF"
|
|
_Cod2Aa["mito-insects"] = "KNKNTTTTSSSSMIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF"
|
|
_Cod2Aa["mito-echinoderms"] = "NNKNTTTTSSSSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF"
|
|
_Cod2Aa["mycoplasma"] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF"
|
|
_Cod2Aa["ciliata"] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVVQYQYSSSS*CWCLFLF"
|
|
_Cod2Aa["euplotes"] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSCCWCLFLF"
|
|
_Cod2Aa["candida"] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRXLSLEDEDAAAAGGGGVVVV*Y*YSSSS*CWCLFLF"
|
|
|
|
#1 AAAAAAAAAAAAAAAACCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGGTTTTTTTTTTTTTTTT
|
|
#2 AAAACCCCGGGGTTTTAAAACCCCGGGGTTTTAAAACCCCGGGGTTTTAAAACCCCGGGGTTTT
|
|
#3 ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
|
|
_Cod2Start["universal"] = "------------M-----------------M-------------------------------M-"
|
|
_Cod2Start["chloroplast"] = "------------MMMM--------------M---------------M---------------M-"
|
|
}
|