
Former-commit-id: 0579e878a69b7c285ca71870e9ca5730649a2fda Former-commit-id: 7cced5b488441d87bf070a9a444317db0e048880
98 lines
1.9 KiB
Awk
98 lines
1.9 KiB
Awk
#
|
|
# get intron features from embl
|
|
#
|
|
|
|
# @include libembl.awk
|
|
|
|
BEGIN {
|
|
print "#locus locustag genefam gene from to strand intron_num intron_nb acceptor-donor status"
|
|
|
|
if (HEADONLY != "") exit(0)
|
|
|
|
if (FASTA == "") Error("No FASTA file specified", 1)
|
|
|
|
if (! TestPath(FASTA)) Error("Fasta file: '" FASTA "' not found", 1)
|
|
|
|
Seq = tolower(ReadFasta(FASTA))
|
|
}
|
|
|
|
/^ID / {
|
|
locus = $2
|
|
gsub(";", "", locus)
|
|
next
|
|
}
|
|
|
|
/^FT CDS/ {
|
|
revstrand = match($3, "^complement")
|
|
s = substr($0, 22)
|
|
gsub("^complement", "", s)
|
|
ok = ! match(s, "complement|order")
|
|
if (! ok) next
|
|
|
|
na = ParseLocation(s, locs)
|
|
if (na < 2) next
|
|
|
|
delete SINfo
|
|
Ninfo = 0
|
|
|
|
val = locs[1][1]
|
|
for (i = 2 ; i <= na ; i++) {
|
|
if (locs[i][1] < val) ok = 0
|
|
val = locs[i][1]
|
|
}
|
|
if (! ok) next
|
|
|
|
val = locs[1][2]
|
|
for (i = 2 ; i <= na ; i++) {
|
|
if (locs[i][2] < val) ok = 0
|
|
val = locs[i][2]
|
|
}
|
|
if (! ok) next
|
|
|
|
from = locs[1][2] + 1
|
|
for (i = 2 ; i <= na ; i++) {
|
|
to = locs[i][1] - 1
|
|
inseq = SeqLocation(Seq, (from - 4) ".." (to + 4), revstrand)
|
|
SINfo[++Ninfo] = from " " to " " (revstrand ? "R" : "D") " "\
|
|
(revstrand ? na-i+1 : i-1) " " na-1 " "\
|
|
substr(inseq, 1,4) "."\
|
|
substr(inseq, 5,6) "-"\
|
|
substr(inseq, length(inseq)-9, 6) "."\
|
|
substr(inseq, length(inseq)-3, 4) " "\
|
|
"ok"
|
|
from = locs[i][2] + 1
|
|
}
|
|
|
|
gene = "none"
|
|
locustag = "none"
|
|
next
|
|
}
|
|
|
|
/^FT \/gene=/ {
|
|
split($0, a, "=")
|
|
gene = a[2]
|
|
gsub("^[^a-z,A-Z]+", "", gene)
|
|
gsub("\"", "", gene)
|
|
gsub(" ", "_", gene)
|
|
next
|
|
}
|
|
|
|
/^FT \/locus_tag=/ {
|
|
split($0, a, "=")
|
|
locustag = a[2]
|
|
gsub("\"", "", locustag)
|
|
gsub(" ", "_", locustag)
|
|
next
|
|
}
|
|
|
|
/^FT \/translation=/ {
|
|
for (i = 1 ; i <= Ninfo ; i++)
|
|
print locus, locustag, GeneFamily(gene), gene, SINfo[i]
|
|
Ninfo = 0
|
|
next
|
|
}
|
|
|
|
/^\/\// {
|
|
locus = "?"
|
|
}
|