Assure unicity of gene names

Former-commit-id: fedeeda9825456a1946b19e13658b6ec7e53351d
Former-commit-id: 7c9a49d7084c4b6dcd1238a1595703dc2048217c
This commit is contained in:
2023-04-29 07:10:05 +02:00
parent f3a045f2ac
commit 79843c2540

View File

@ -752,9 +752,44 @@ pushTmpDir ORG.organnot
print line}' > "${RESULTS}.sorted.annot" print line}' > "${RESULTS}.sorted.annot"
loginfo "Done." loginfo "Done."
loginfo "Unifying gene names"
$AwkCmd '
(FNR==NR) && /^FT \/gene="/ {
gene = substr($0,29,100)
gene = substr(gene,0,length(gene)-1)
occurrence[gene]++
}
(FNR==1) && (FNR!=NR) {
for(gene in occurrence){
if (occurrence[gene]==1) {
delete occurrence[gene]
} else {
occurrence[gene] = 1
}
}
}
(FNR!=NR) && /^FT \/gene="/ {
gene = substr($0,29,100)
gene = substr(gene,0,length(gene)-1)
n = occurrence[gene]
if (n > 0) {
$0="FT /gene=\""gene"_"n"\""
occurrence[gene]++
}
}
(FNR!=NR) {
print $0
}
' "${RESULTS}.sorted.annot" "${RESULTS}.sorted.annot" \
> "${RESULTS}.uniq_gene.annot"
log-Pinfo "Done."
if [[ "$tagprefix" != "no" ]] ; then if [[ "$tagprefix" != "no" ]] ; then
loginfo "Adding locus tags from number: $locusshift..." loginfo "Adding locus tags from number: $locusshift..."
cat "${RESULTS}.sorted.annot" \ cat "${RESULTS}.uniq_gene.annot" \
| $AwkCmd -v tagprefix="$tagprefix" \ | $AwkCmd -v tagprefix="$tagprefix" \
-v locusshift="$locusshift" ' -v locusshift="$locusshift" '
/^FT +\/locus_tag=""/ { /^FT +\/locus_tag=""/ {
@ -769,10 +804,11 @@ pushTmpDir ORG.organnot
else else
loginfo "Clearing locus tags done." loginfo "Clearing locus tags done."
egrep -v '^FT +\/locus_tag=""' \ egrep -v '^FT +\/locus_tag=""' \
"${RESULTS}.sorted.annot" "${RESULTS}.uniq_gene.annot"
loginfo "Clearing of tags done." loginfo "Clearing of tags done."
fi fi
loginfo "Closing annotations table..." loginfo "Closing annotations table..."
echo "XX" echo "XX"
loginfo "Done." loginfo "Done."