Former-commit-id: 19b149eb57227e4ff3e7dda97f0328207fbc6373 Former-commit-id: ef94884d026004aa80d0fed85121c525cf5610b4
214 lines
4.0 KiB
Awk
214 lines
4.0 KiB
Awk
#
|
|
# iff -> embl
|
|
#
|
|
|
|
function FromLoc(i, _local_, s) {
|
|
s = substr(Exon[i]["modif"], 1, 1) "" Exon[i]["from"]
|
|
gsub("=", "", s)
|
|
return s
|
|
}
|
|
|
|
function ToLoc(i, _local_, s) {
|
|
s = substr(Exon[i]["modif"], 2, 1) "" Exon[i]["to"]
|
|
gsub("=", "", s)
|
|
return s
|
|
}
|
|
|
|
function GeneLocation(_local_, d, s) {
|
|
d = Exon[1]["strand"]
|
|
if (d == "+")
|
|
s = FromLoc(1) ".." ToLoc(Nexon)
|
|
else
|
|
s = FromLoc(Nexon) ".." ToLoc(1)
|
|
if (d == "-") s = "complement(" s ")"
|
|
return s
|
|
}
|
|
|
|
function CdsLocation(_local_, d, i, s) {
|
|
d = Exon[1]["strand"]
|
|
if (d == "+") {
|
|
for (i = 1 ; i <= Nexon ; i++)
|
|
s = s "," FromLoc(i) ".." ToLoc(i)
|
|
}
|
|
else {
|
|
for (i = Nexon ; i >= 1 ; i--)
|
|
s = s "," FromLoc(i) ".." ToLoc(i)
|
|
}
|
|
s = substr(s, 2)
|
|
if (Nexon > 1) s = "join(" s ")"
|
|
if (d == "-") s = "complement(" s ")"
|
|
return s
|
|
}
|
|
|
|
function ExonLocation(i, _local_, s) {
|
|
s = FromLoc(i) ".." ToLoc(i)
|
|
if (Exon[i]["strand"] == "-") s = "complement(" s ")"
|
|
return s
|
|
}
|
|
|
|
function Pad(s, len) {
|
|
while (length(s) < len)
|
|
s = s " "
|
|
return s
|
|
}
|
|
|
|
function Feature(feat, loc, _local_, s) {
|
|
s = Pad("FT " feat, 21)
|
|
print s "" loc
|
|
}
|
|
|
|
function SQualifier(qual, val, _local_, s) {
|
|
s = Pad("FT", 21)
|
|
print s "/" qual "=" val
|
|
}
|
|
|
|
function QQualifier(qual, val) {
|
|
gsub("\"", "''", val)
|
|
SQualifier(qual, "\"" val "\"")
|
|
}
|
|
|
|
function Unk(s) {
|
|
return (s =="" ? "none" : s)
|
|
}
|
|
|
|
#
|
|
# rules
|
|
#
|
|
|
|
/^c pass/ {
|
|
PassType = $3
|
|
PassInfo = $NF
|
|
next
|
|
}
|
|
|
|
/^c annot/ {
|
|
GeneName = $4
|
|
Product = $NF
|
|
gsub("_", " ", Product)
|
|
next
|
|
}
|
|
|
|
/^c nclust/ {
|
|
Ngene = $3
|
|
next
|
|
}
|
|
|
|
/^c begin_entry/ {
|
|
Nexon = 0
|
|
FrameShift=0
|
|
delete Exon
|
|
next
|
|
}
|
|
|
|
/^e exon/ {
|
|
Nexon++
|
|
Exon[Nexon]["from"] = $3
|
|
Exon[Nexon]["to"] = $4
|
|
Exon[Nexon]["strand"] = $6
|
|
|
|
match($0, / insertions +([0-9]+) +/, arr)
|
|
if (arr[1])
|
|
insertions=arr[1]
|
|
else
|
|
insertions=0
|
|
|
|
match($0, / insertions +([0-9]+) +/, arr)
|
|
if (arr[1])
|
|
deletions=arr[1]
|
|
else
|
|
deletions=0
|
|
|
|
Exon[Nexon]["indels"] = insertions "+" $deletions
|
|
|
|
match($0, / modifier +"([^"]*)"/, arr)
|
|
if (arr[1])
|
|
modif=substr(arr[1],1,1)
|
|
else
|
|
modif=""
|
|
if (modif == "=")
|
|
modif=""
|
|
|
|
Exon[Nexon]["modif"] = modif
|
|
|
|
match($0, / frameshifts +([-0-9]+)/, arr)
|
|
if (arr[1]) {
|
|
FrameShift=FrameShift+1
|
|
Exon[Nexon]["frameshift"] = arr[1]
|
|
}
|
|
else
|
|
Exon[Nexon]["frameshift"] = 0
|
|
|
|
next
|
|
}
|
|
|
|
/^e similarity/ {
|
|
split($12, a, "@")
|
|
Simil = a[1] ":" a[2]
|
|
next
|
|
}
|
|
|
|
/^e translate/ {
|
|
Translat = $3
|
|
next
|
|
}
|
|
|
|
/^c end_entry/ {
|
|
|
|
GeneName = Unk(GeneName)
|
|
PassType = Unk(PassType)
|
|
|
|
gname = (Ngene == 1 ? GeneName : GeneName "_" ++Igene)
|
|
locus = ""
|
|
|
|
# Feature("gene", GeneLocation())
|
|
#QQualifier("gene", gname)
|
|
#QQualifier("locus_tag", locus)
|
|
#if (FrameShift)
|
|
# QQualifier("pseudogene","unknown")
|
|
|
|
Feature("CDS", CdsLocation())
|
|
SQualifier("codon_start", 1)
|
|
SQualifier("transl_table", 11)
|
|
QQualifier("gene", gname)
|
|
QQualifier("locus_tag", locus)
|
|
if (FrameShift) {
|
|
QQualifier("pseudogene","unknown")
|
|
if (FrameShift > 1)
|
|
QQualifier("note","nonfunctional due to frameshifts in "FrameShift" exons")
|
|
else
|
|
QQualifier("note","nonfunctional due to a frameshift")
|
|
}
|
|
QQualifier("product", Product)
|
|
QQualifier("inference", "similar to DNA sequence:" Simil)
|
|
# QQualifier("inference", "org.annot -- detect pass:" PassType ":" PassInfo)
|
|
if (FrameShift==0) {
|
|
if (match(Translat,/\*/)>0) {
|
|
QQualifier("pseudogene","unknown")
|
|
QQualifier("note","nonfunctional due to stop codon")
|
|
}
|
|
|
|
QQualifier("translation", Translat)
|
|
}
|
|
|
|
|
|
# if (Nexon > 1) {
|
|
# for (i = 1 ; i <= Nexon ; i++) {
|
|
# Feature("exon", ExonLocation(i))
|
|
# QQualifier("gene", gname)
|
|
# QQualifier("locus_tag", locus)
|
|
|
|
# if (Exon[i]["frameshift"]) {
|
|
# QQualifier("pseudogene","unknown")
|
|
# if (Exon[i]["frameshift"] > 0)
|
|
# QQualifier("note","frameshifted by insertion of " Exon[i]["frameshift"] " bp")
|
|
# else
|
|
# QQualifier("note","frameshifted by deletion of " -Exon[i]["frameshift"] " bp")
|
|
# }
|
|
|
|
# SQualifier("number", Exon[1]["strand"] == "+" ? i : Nexon-i+1)
|
|
# }
|
|
# }
|
|
}
|
|
|
|
|