cds/tools/chlorodb added
Former-commit-id: 0579e878a69b7c285ca71870e9ca5730649a2fda Former-commit-id: 7cced5b488441d87bf070a9a444317db0e048880
This commit is contained in:
99
detectors/cds/tools/lib/gbk.cds_long.awk
Normal file
99
detectors/cds/tools/lib/gbk.cds_long.awk
Normal file
@ -0,0 +1,99 @@
|
||||
#
|
||||
# get cds features from genbank (long version)
|
||||
#
|
||||
# -v FASTA
|
||||
|
||||
# @include libgbk.awk
|
||||
|
||||
BEGIN {
|
||||
print "#locus locustag genefam gene from to strand nexon length status start stop dnaseq protseq product"
|
||||
|
||||
if (HEADONLY != "") exit(0)
|
||||
|
||||
if (MAXSPAN == "") MAXSPAN = 10000
|
||||
|
||||
if (FASTA == "") Error("No FASTA file specified", 1)
|
||||
|
||||
if (! TestPath(FASTA)) Error("Fasta file: '" FASTA "' not found", 1)
|
||||
|
||||
Seq = tolower(ReadFasta(FASTA))
|
||||
}
|
||||
|
||||
/^LOCUS/ {
|
||||
locus = $2
|
||||
incds = 0
|
||||
next
|
||||
}
|
||||
|
||||
/^ CDS/ {
|
||||
revstrand = match($2, "^complement")
|
||||
s = substr($0, 22)
|
||||
gsub("^complement", "", s)
|
||||
ok = ! match(s, "complement|order")
|
||||
nexon = Nexons(s)
|
||||
SpanLocation(s, sloc)
|
||||
spanlen = sloc[2] - sloc[1] + 1
|
||||
len = LenLocation(s)
|
||||
ok = ok && (len < MAXSPAN)
|
||||
cdsseq = ok ? SeqLocation(Seq, s, revstrand) : "XXX"
|
||||
cstart = substr(cdsseq, 1,3)
|
||||
cstop = substr(cdsseq, length(cdsseq)-2)
|
||||
|
||||
gene = "none"
|
||||
locustag = "none"
|
||||
product = "none"
|
||||
translation = "X"
|
||||
incds = 1
|
||||
next
|
||||
}
|
||||
|
||||
(incds && /^ [^ ]/) {
|
||||
print locus, locustag, GeneFamily(gene), gene, sloc[1], sloc[2], (revstrand ? "R" : "D"),
|
||||
nexon, len, (ok ? "Ok" : "Error"), cstart, cstop, cdsseq, translation, product
|
||||
incds = 0
|
||||
next
|
||||
}
|
||||
|
||||
/^ \/gene=/ {
|
||||
split($0, a, "=")
|
||||
gene = a[2]
|
||||
gsub("^[^a-z,A-Z]+", "", gene)
|
||||
gsub("\"", "", gene)
|
||||
gsub(" ", "_", gene)
|
||||
next
|
||||
}
|
||||
|
||||
/^ \/locus_tag=/ {
|
||||
split($0, a, "=")
|
||||
locustag = a[2]
|
||||
gsub("\"", "", locustag)
|
||||
gsub(" ", "_", locustag)
|
||||
next
|
||||
}
|
||||
|
||||
/^ \/product=/ {
|
||||
split($0, a, "=")
|
||||
product = a[2]
|
||||
gsub("\"", "", product)
|
||||
gsub(" ", "_", product)
|
||||
next
|
||||
}
|
||||
|
||||
/^ \/translation=/ {
|
||||
split($0, a, "=")
|
||||
translation = a[2]
|
||||
gsub("\"", "", translation)
|
||||
gsub(" ", "", translation)
|
||||
next
|
||||
}
|
||||
|
||||
/^\/\// {
|
||||
locus = "?"
|
||||
}
|
||||
|
||||
END {
|
||||
if (incds) {
|
||||
print locus, locustag, GeneFamily(gene), gene, sloc[1], sloc[2], (revstrand ? "R" : "D"),
|
||||
nexon, len, (ok ? "Ok" : "Error"), cstart, cstop, cdsseq, translation, product
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user