From 5a7b869170792e11fa11ab7a718b2466a9d29642 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Sat, 29 Apr 2023 07:04:09 +0200 Subject: [PATCH] Add a better management of and create translation exception when required for initiation codon Former-commit-id: 878d919fdaad16e6e2645b62b3a53ef5d5e1ef2b Former-commit-id: 3c3647cf114438a1ea9c3ff8c44e67e367929776 --- detectors/cds/lib/toEmbl.awk | 12 ++++++++++++ detectors/cds/lib/translate.awk | 33 ++++++++++++++++++++++++++++----- 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/detectors/cds/lib/toEmbl.awk b/detectors/cds/lib/toEmbl.awk index 01a9211..d26729b 100644 --- a/detectors/cds/lib/toEmbl.awk +++ b/detectors/cds/lib/toEmbl.awk @@ -96,7 +96,9 @@ function Unk(s) { /^c begin_entry/ { Nexon = 0 FrameShift=0 + nte = 0 delete Exon + delete TransExcep next } @@ -151,6 +153,12 @@ function Unk(s) { next } + +/^e trans_exception/ { + TransExcep[++nte] = "(pos:"$3",aa:"$4")" # /transl_except=(pos:,aa:) + next +} + /^c end_entry/ { GeneName = Unk(GeneName) @@ -185,6 +193,10 @@ function Unk(s) { QQualifier("pseudogene","unknown") QQualifier("note","nonfunctional due to stop codon") } + + for (ie = 1 ; ie <= nte; ie++) { + SQualifier("transl_except", TransExcep[ie]) + } QQualifier("translation", Translat) } diff --git a/detectors/cds/lib/translate.awk b/detectors/cds/lib/translate.awk index 4017f62..3a33beb 100644 --- a/detectors/cds/lib/translate.awk +++ b/detectors/cds/lib/translate.awk @@ -8,9 +8,16 @@ BEGIN { /^c end_entry/ { if (RevStrand) Cds = RevComplement(Cds) - Prot = Translate(substr(Cds, 1, length(Cds)-3)) - if (Modif == "=") - Prot = "M" substr(Prot, 2) + Prot = Translate(substr(Cds, 1, length(Cds)-3),Modif,"chloroplast") + + if (Modif == "=" && substr(Prot, 1,1) != "M") { + Prot = "M" substr(Prot, 2,length(Prot)) + if (CdsStartPosFull==1) { + print "e trans_exception "CdsStartPos" Met" + } else { + print "e trans_exception " "ERROR" " Met" + } + } print "e translate " Prot } @@ -21,19 +28,35 @@ BEGIN { /^c begin_entry/ { Cds = "" Iexon = 0 + CdsStartPosFullv = 0 next } /^e exon/ { + print "d cds " CDS RevStrand = ($6 == "-") if (++Iexon == 1) { # first is exon with start (even on - strand) Modif = $15 gsub("\"", "", Modif) Modif = (RevStrand ? substr(Modif, 2, 1) : substr(Modif, 1, 1)) } - if (RevStrand) + if (RevStrand){ Cds = SubSeq(Seq, $3, $4) "" Cds - else + if (Iexon==1) { + if(($4 - $3 +1) >= 3) { + CdsStartPos="complement("$4-2".."$4")" + CdsStartPosFull=1 + } + }} + else{ Cds = Cds "" SubSeq(Seq, $3, $4) + if (Iexon==1) { + if (Iexon==1) { + if(($4 - $3 +1) >= 3) { + CdsStartPos=$3".."$3+2 + CdsStartPosFull=1 + } + } + } } next }