From 860cd217d4da0c98a899621b94b9519d160d7c74 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Thu, 6 Oct 2016 08:56:45 -0300 Subject: [PATCH] Add the management of pseudogenes Former-commit-id: 26d91366e483cf17c440b251ab1e8ac5390699fe Former-commit-id: 0d3d69ba351bd174fe08387a474fd1137559e38a --- detectors/cds/lib/toEmbl.awk | 65 ++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 28 deletions(-) diff --git a/detectors/cds/lib/toEmbl.awk b/detectors/cds/lib/toEmbl.awk index e2df51f..9f3cc2c 100644 --- a/detectors/cds/lib/toEmbl.awk +++ b/detectors/cds/lib/toEmbl.awk @@ -95,6 +95,7 @@ function Unk(s) { /^c begin_entry/ { Nexon = 0 + FrameShift=0 delete Exon next } @@ -107,6 +108,9 @@ function Unk(s) { Exon[Nexon]["indels"] = $9 "+" $12 modif = $15; gsub("\"", "", modif) Exon[Nexon]["modif"] = modif + + if ( $0 ~ /frameshifts +[0-9]+/) + FrameShift=1 next } @@ -123,34 +127,39 @@ function Unk(s) { /^c end_entry/ { - GeneName = Unk(GeneName) - PassType = Unk(PassType) - - gname = (Ngene == 1 ? GeneName : GeneName "_" ++Igene) - locus = "" - - Feature("gene", GeneLocation()) - QQualifier("gene", gname) - QQualifier("locus_tag", locus) - - Feature("CDS", CdsLocation()) - SQualifier("codon_start", 1) - SQualifier("transl_table", 11) - QQualifier("gene", gname) - QQualifier("locus_tag", locus) - QQualifier("product", Product) - QQualifier("inference", "similar to DNA sequence:" Simil) - QQualifier("inference", "org.annot -- detect pass:" PassType ":" PassInfo) - QQualifier("translation", Translat) - - if (Nexon > 1) { - for (i = 1 ; i <= Nexon ; i++) { - Feature("exon", ExonLocation(i)) - QQualifier("gene", gname) - QQualifier("locus_tag", locus) - SQualifier("number", Exon[1]["strand"] == "+" ? i : Nexon-i+1) - } - } + GeneName = Unk(GeneName) + PassType = Unk(PassType) + + gname = (Ngene == 1 ? GeneName : GeneName "_" ++Igene) + locus = "" + + Feature("gene", GeneLocation()) + QQualifier("gene", gname) + QQualifier("locus_tag", locus) + if (FrameShift) + QQualifier("pseudogene","unknown") + + Feature("CDS", CdsLocation()) + SQualifier("codon_start", 1) + SQualifier("transl_table", 11) + QQualifier("gene", gname) + QQualifier("locus_tag", locus) + if (FrameShift) + QQualifier("pseudogene","unknown") + QQualifier("product", Product) + QQualifier("inference", "similar to DNA sequence:" Simil) + QQualifier("inference", "org.annot -- detect pass:" PassType ":" PassInfo) + if (FrameShift==0) + QQualifier("translation", Translat) + + if (Nexon > 1) { + for (i = 1 ; i <= Nexon ; i++) { + Feature("exon", ExonLocation(i)) + QQualifier("gene", gname) + QQualifier("locus_tag", locus) + SQualifier("number", Exon[1]["strand"] == "+" ? i : Nexon-i+1) + } + } }