
Former-commit-id: 07fb256bf17db3f0ffc1730b0383f8255fbb9129 Former-commit-id: fd6a1fe72a39c5633c2f9fb6de09af979c2a48f3
81 lines
1.6 KiB
Awk
81 lines
1.6 KiB
Awk
#
|
|
# get cds features from embl (short version)
|
|
#
|
|
|
|
# @include lib.embl.awk
|
|
|
|
|
|
BEGIN {
|
|
if (MAXSPAN == "") MAXSPAN = 10000
|
|
print "#genefam gene from to strand nexon length status protseq product"
|
|
}
|
|
|
|
/^FT CDS/ {
|
|
revstrand = match($3, "^complement")
|
|
s = substr($0, 22)
|
|
gsub("^complement", "", s)
|
|
ok = ! match(s, "complement|order")
|
|
nexon = Nexons(s)
|
|
SpanLocation(s, sloc)
|
|
spanlen = sloc[2] - sloc[1] + 1
|
|
len = LenLocation(s)
|
|
ok = ok && (len < MAXSPAN)
|
|
cdsseq = ok ? SeqLocation(seq, s, revstrand) : "XXX"
|
|
cstart = substr(cdsseq, 1,3)
|
|
cstop = substr(cdsseq, length(cdsseq)-2)
|
|
|
|
gene = "none"
|
|
locustag = "none"
|
|
product = "none"
|
|
translation = "X"
|
|
incds = 1
|
|
next
|
|
}
|
|
|
|
(incds && /^FT [^ ]/) {
|
|
print GeneFamily(gene), gene, sloc[1], sloc[2], (revstrand ? "R" : "D"),
|
|
nexon, len, (ok ? "Ok" : "Error"), translation, product
|
|
incds = 0
|
|
next
|
|
}
|
|
|
|
/^FT \/gene=/ {
|
|
split($0, a, "=")
|
|
gene = a[2]
|
|
gsub("^[^a-z,A-Z]+", "", gene)
|
|
gsub("\"", "", gene)
|
|
gsub(" ", "_", gene)
|
|
next
|
|
}
|
|
|
|
/^FT \/locus_tag=/ {
|
|
split($0, a, "=")
|
|
locustag = a[2]
|
|
gsub("\"", "", locustag)
|
|
gsub(" ", "_", locustag)
|
|
next
|
|
}
|
|
|
|
/^FT \/product=/ {
|
|
split($0, a, "=")
|
|
product = a[2]
|
|
gsub("\"", "", product)
|
|
gsub(" ", "_", product)
|
|
next
|
|
}
|
|
|
|
/^FT \/translation=/ {
|
|
split($0, a, "=")
|
|
translation = a[2]
|
|
gsub("\"", "", translation)
|
|
gsub(" ", "", translation)
|
|
next
|
|
}
|
|
|
|
END {
|
|
if (incds) {
|
|
print GeneFamily(gene), gene, sloc[1], sloc[2], (revstrand ? "R" : "D"),
|
|
nexon, len, (ok ? "Ok" : "Error"), translation, product
|
|
}
|
|
}
|