Files
annotate/detectors/cds/tools/lib/gbk.cds_long.awk
alain viari e4d6a8484d cds/tools/chlorodb added
Former-commit-id: 0579e878a69b7c285ca71870e9ca5730649a2fda
Former-commit-id: 7cced5b488441d87bf070a9a444317db0e048880
2015-11-13 17:41:18 +01:00

100 lines
2.0 KiB
Awk

#
# get cds features from genbank (long version)
#
# -v FASTA
# @include libgbk.awk
BEGIN {
print "#locus locustag genefam gene from to strand nexon length status start stop dnaseq protseq product"
if (HEADONLY != "") exit(0)
if (MAXSPAN == "") MAXSPAN = 10000
if (FASTA == "") Error("No FASTA file specified", 1)
if (! TestPath(FASTA)) Error("Fasta file: '" FASTA "' not found", 1)
Seq = tolower(ReadFasta(FASTA))
}
/^LOCUS/ {
locus = $2
incds = 0
next
}
/^ CDS/ {
revstrand = match($2, "^complement")
s = substr($0, 22)
gsub("^complement", "", s)
ok = ! match(s, "complement|order")
nexon = Nexons(s)
SpanLocation(s, sloc)
spanlen = sloc[2] - sloc[1] + 1
len = LenLocation(s)
ok = ok && (len < MAXSPAN)
cdsseq = ok ? SeqLocation(Seq, s, revstrand) : "XXX"
cstart = substr(cdsseq, 1,3)
cstop = substr(cdsseq, length(cdsseq)-2)
gene = "none"
locustag = "none"
product = "none"
translation = "X"
incds = 1
next
}
(incds && /^ [^ ]/) {
print locus, locustag, GeneFamily(gene), gene, sloc[1], sloc[2], (revstrand ? "R" : "D"),
nexon, len, (ok ? "Ok" : "Error"), cstart, cstop, cdsseq, translation, product
incds = 0
next
}
/^ \/gene=/ {
split($0, a, "=")
gene = a[2]
gsub("^[^a-z,A-Z]+", "", gene)
gsub("\"", "", gene)
gsub(" ", "_", gene)
next
}
/^ \/locus_tag=/ {
split($0, a, "=")
locustag = a[2]
gsub("\"", "", locustag)
gsub(" ", "_", locustag)
next
}
/^ \/product=/ {
split($0, a, "=")
product = a[2]
gsub("\"", "", product)
gsub(" ", "_", product)
next
}
/^ \/translation=/ {
split($0, a, "=")
translation = a[2]
gsub("\"", "", translation)
gsub(" ", "", translation)
next
}
/^\/\// {
locus = "?"
}
END {
if (incds) {
print locus, locustag, GeneFamily(gene), gene, sloc[1], sloc[2], (revstrand ? "R" : "D"),
nexon, len, (ok ? "Ok" : "Error"), cstart, cstop, cdsseq, translation, product
}
}