cds/tools/chlorodb added
Former-commit-id: 0579e878a69b7c285ca71870e9ca5730649a2fda Former-commit-id: 7cced5b488441d87bf070a9a444317db0e048880
This commit is contained in:
96
detectors/cds/tools/lib/gbk.intron.awk
Normal file
96
detectors/cds/tools/lib/gbk.intron.awk
Normal file
@ -0,0 +1,96 @@
|
||||
#
|
||||
# get intron features from genbank
|
||||
#
|
||||
|
||||
# @include libgbk.awk
|
||||
|
||||
BEGIN {
|
||||
print "#locus locustag genefam gene from to strand intron_num intron_nb acceptor-donor status"
|
||||
|
||||
if (HEADONLY != "") exit(0)
|
||||
|
||||
if (FASTA == "") Error("No FASTA file specified", 1)
|
||||
|
||||
if (! TestPath(FASTA)) Error("Fasta file: '" FASTA "' not found", 1)
|
||||
|
||||
Seq = tolower(ReadFasta(FASTA))
|
||||
}
|
||||
|
||||
/^LOCUS/ {
|
||||
locus = $2
|
||||
next
|
||||
}
|
||||
|
||||
/^ CDS/ {
|
||||
revstrand = match($2, "^complement")
|
||||
s = substr($0, 22)
|
||||
gsub("^complement", "", s)
|
||||
ok = ! match(s, "complement|order")
|
||||
if (! ok) next
|
||||
|
||||
na = ParseLocation(s, locs)
|
||||
if (na < 2) next
|
||||
|
||||
delete SINfo
|
||||
Ninfo = 0
|
||||
|
||||
val = locs[1][1]
|
||||
for (i = 2 ; i <= na ; i++) {
|
||||
if (locs[i][1] < val) ok = 0
|
||||
val = locs[i][1]
|
||||
}
|
||||
if (! ok) next
|
||||
|
||||
val = locs[1][2]
|
||||
for (i = 2 ; i <= na ; i++) {
|
||||
if (locs[i][2] < val) ok = 0
|
||||
val = locs[i][2]
|
||||
}
|
||||
if (! ok) next
|
||||
|
||||
from = locs[1][2] + 1
|
||||
for (i = 2 ; i <= na ; i++) {
|
||||
to = locs[i][1] - 1
|
||||
inseq = SeqLocation(Seq, (from - 4) ".." (to + 4), revstrand)
|
||||
SINfo[++Ninfo] = from " " to " " (revstrand ? "R" : "D") " "\
|
||||
(revstrand ? na-i+1 : i-1) " " na-1 " "\
|
||||
substr(inseq, 1,4) "."\
|
||||
substr(inseq, 5,6) "-"\
|
||||
substr(inseq, length(inseq)-9, 6) "."\
|
||||
substr(inseq, length(inseq)-3, 4) " "\
|
||||
"ok"
|
||||
from = locs[i][2] + 1
|
||||
}
|
||||
|
||||
gene = "none"
|
||||
locustag = "none"
|
||||
next
|
||||
}
|
||||
|
||||
/^ \/gene=/ {
|
||||
split($0, a, "=")
|
||||
gene = a[2]
|
||||
gsub("^[^a-z,A-Z]+", "", gene)
|
||||
gsub("\"", "", gene)
|
||||
gsub(" ", "_", gene)
|
||||
next
|
||||
}
|
||||
|
||||
/^ \/locus_tag=/ {
|
||||
split($0, a, "=")
|
||||
locustag = a[2]
|
||||
gsub("\"", "", locustag)
|
||||
gsub(" ", "_", locustag)
|
||||
next
|
||||
}
|
||||
|
||||
/^ \/translation=/ {
|
||||
for (i = 1 ; i <= Ninfo ; i++)
|
||||
print locus, locustag, GeneFamily(gene), gene, SINfo[i]
|
||||
Ninfo = 0
|
||||
next
|
||||
}
|
||||
|
||||
/^\/\// {
|
||||
locus = "?"
|
||||
}
|
Reference in New Issue
Block a user