Files
annotate/detectors/cds/tools/lib/embl.info.awk

98 lines
1.3 KiB
Awk
Raw Normal View History

#
# get feature info from embl
#
# @include libgbk.awk
function GC(s, _local_, i, len) {
s = toupper(s)
len = length(s)
gsub("G|C", "", s)
return ((len - length(s)) * 100 / (len ? len : 1))
}
#
# rules
#
BEGIN {
print "#locus orga len oklen gc nbCds nbCds_int0 nbCds_int1 nbCds_intsup1 perCds_noex meanCdsSize nbtRNA nbrRNA nboRNA"
}
/^ID/ {
locus = $2
gsub(";", "", locus)
next
}
/^OS/ {
orga = substr($0, 6)
gsub(" ", "_", orga)
next
}
/^FT source/ {
GetLoc($3, loc);
len = loc[2];
next
}
/^FT CDS/ {
meanCds = meanCds * nbCds + LenLocation($3)
nbCds++
meanCds /= nbCds
n = Nexons($3)
if (n > 3) n = 3
nbCdx[n]++
next
}
/^FT tRNA/ {
nbTrna++
next
}
/^FT rRNA/ {
nbRrna++
next
}
/^FT mRNA/ {
next
}
/^FT .*RNA/ {
nbOrna++
next
}
/^SQ / {
inseq = 1
seq = ""
next
}
inseq && /^ / {
s = $0
gsub("[0-9]+", "", s)
gsub(" ", "", s)
seq = seq "" s
next
}
/^\/\// {
oklen = (len == length(seq) ? "ok" : "wrong")
gc = GC(seq)
print locus, orga, len, oklen, gc, nbCds+0, nbCdx[1]+0, \
nbCdx[2]+0, nbCdx[3]+0, (nbCdx[1]+0)*100/Max(1, nbCds+0), \
meanCds+0, nbTrna+0, nbRrna+0, nbOrna+0
nbCds = nbTrna = nbRrna = nbOrna = len = inseq = meanCds = 0
delete nbCdx
orga = locus = "?"
}