Small fix in embl and genbank features parser

This commit is contained in:
Celine Mercier
2019-03-15 15:50:11 +01:00
parent c953f0cb00
commit 7737211ac2

View File

@ -11,10 +11,6 @@ import logging
import re import re
from itertools import chain from itertools import chain
# TODO cython
# TODO import Location functions for Genbank stuff (src/obitools/location/__init__.py)
_featureMatcher = re.compile(b'^(FT| ) [^ ].+\n((FT| ) .+\n)+',re.M) _featureMatcher = re.compile(b'^(FT| ) [^ ].+\n((FT| ) .+\n)+',re.M)
_featureCleaner = re.compile(b'^FT',re.M) _featureCleaner = re.compile(b'^FT',re.M)
@ -138,7 +134,7 @@ def extractTaxon(bytes text, dict tags):
s = [s] s = [s]
t = set(int(v[6:]) for v in chain(*tuple(f[b'db_xref'] for f in s if b'db_xref' in f)) t = set(int(v[6:]) for v in chain(*tuple(f[b'db_xref'] for f in s if b'db_xref' in f))
if v[0:6]=='taxon:') if v[0:6]==b'taxon:')
if len(t)==1 : if len(t)==1 :
taxid=t.pop() taxid=t.pop()
if taxid >=0: if taxid >=0:
@ -147,5 +143,3 @@ def extractTaxon(bytes text, dict tags):
t = set(chain(*tuple(f[b'organism'] for f in s if b'organism' in f))) t = set(chain(*tuple(f[b'organism'] for f in s if b'organism' in f)))
if len(t)==1: if len(t)==1:
tags[b'organism']=t.pop() tags[b'organism']=t.pop()