From 79843c25403c00d5a77443b90d2d61e8db8c34f7 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Sat, 29 Apr 2023 07:10:05 +0200 Subject: [PATCH] Assure unicity of gene names Former-commit-id: fedeeda9825456a1946b19e13658b6ec7e53351d Former-commit-id: 7c9a49d7084c4b6dcd1238a1595703dc2048217c --- org-annotate.sh | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/org-annotate.sh b/org-annotate.sh index 3efccff..b4a87d0 100755 --- a/org-annotate.sh +++ b/org-annotate.sh @@ -752,9 +752,44 @@ pushTmpDir ORG.organnot print line}' > "${RESULTS}.sorted.annot" loginfo "Done." + loginfo "Unifying gene names" + $AwkCmd ' + (FNR==NR) && /^FT \/gene="/ { + gene = substr($0,29,100) + gene = substr(gene,0,length(gene)-1) + occurrence[gene]++ + } + + (FNR==1) && (FNR!=NR) { + for(gene in occurrence){ + if (occurrence[gene]==1) { + delete occurrence[gene] + } else { + occurrence[gene] = 1 + } + } + } + + (FNR!=NR) && /^FT \/gene="/ { + gene = substr($0,29,100) + gene = substr(gene,0,length(gene)-1) + n = occurrence[gene] + if (n > 0) { + $0="FT /gene=\""gene"_"n"\"" + occurrence[gene]++ + } + } + + (FNR!=NR) { + print $0 + } + ' "${RESULTS}.sorted.annot" "${RESULTS}.sorted.annot" \ + > "${RESULTS}.uniq_gene.annot" + log-Pinfo "Done." + if [[ "$tagprefix" != "no" ]] ; then loginfo "Adding locus tags from number: $locusshift..." - cat "${RESULTS}.sorted.annot" \ + cat "${RESULTS}.uniq_gene.annot" \ | $AwkCmd -v tagprefix="$tagprefix" \ -v locusshift="$locusshift" ' /^FT +\/locus_tag=""/ { @@ -769,10 +804,11 @@ pushTmpDir ORG.organnot else loginfo "Clearing locus tags done." egrep -v '^FT +\/locus_tag=""' \ - "${RESULTS}.sorted.annot" + "${RESULTS}.uniq_gene.annot" loginfo "Clearing of tags done." fi + loginfo "Closing annotations table..." echo "XX" loginfo "Done."