diff --git a/python/obitools3/parsers/genbank.pyx b/python/obitools3/parsers/genbank.pyx index 2828b49..2ede28b 100755 --- a/python/obitools3/parsers/genbank.pyx +++ b/python/obitools3/parsers/genbank.pyx @@ -22,11 +22,11 @@ from libc.stdlib cimport free, malloc, realloc from libc.string cimport strcpy, strlen -_featureMatcher = re.compile(b'^FEATURES.+\n(?=ORIGIN )',re.DOTALL + re.M) +_featureMatcher = re.compile(b'^FEATURES.+\n(?=ORIGIN(\s*))',re.DOTALL + re.M) _headerMatcher = re.compile(b'^LOCUS.+(?=\nFEATURES)', re.DOTALL + re.M) -_seqMatcher = re.compile(b'^ORIGIN .+(?=//\n)', re.DOTALL + re.M) -_cleanSeq1 = re.compile(b'ORIGIN.+\n') +_seqMatcher = re.compile(b'^ORIGIN.+(?=//\n)', re.DOTALL + re.M) +_cleanSeq1 = re.compile(b'ORIGIN(\s*)\n') _cleanSeq2 = re.compile(b'[ \n0-9]+') _acMatcher = re.compile(b'(?<=^ACCESSION ).+',re.M) _deMatcher = re.compile(b'(?<=^DEFINITION ).+\n( .+\n)*',re.M) @@ -155,10 +155,10 @@ def genbankIterator_file(lineiterator, yield seq read+=1 - # Last sequence - seq = genbankParser(entry) - - yield seq + # Last sequence if not empty lines + if entry.strip(): + seq = genbankParser(entry) + yield seq free(entry)