From 2d66e0e965b8675aa172d8fdd79d9896d7fa0c68 Mon Sep 17 00:00:00 2001 From: mercierc Date: Mon, 13 Sep 2021 11:44:38 +1200 Subject: [PATCH] python: genbank parser: better handling of white spaces --- python/obitools3/parsers/genbank.pyx | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/python/obitools3/parsers/genbank.pyx b/python/obitools3/parsers/genbank.pyx index 2828b49..2ede28b 100755 --- a/python/obitools3/parsers/genbank.pyx +++ b/python/obitools3/parsers/genbank.pyx @@ -22,11 +22,11 @@ from libc.stdlib cimport free, malloc, realloc from libc.string cimport strcpy, strlen -_featureMatcher = re.compile(b'^FEATURES.+\n(?=ORIGIN )',re.DOTALL + re.M) +_featureMatcher = re.compile(b'^FEATURES.+\n(?=ORIGIN(\s*))',re.DOTALL + re.M) _headerMatcher = re.compile(b'^LOCUS.+(?=\nFEATURES)', re.DOTALL + re.M) -_seqMatcher = re.compile(b'^ORIGIN .+(?=//\n)', re.DOTALL + re.M) -_cleanSeq1 = re.compile(b'ORIGIN.+\n') +_seqMatcher = re.compile(b'^ORIGIN.+(?=//\n)', re.DOTALL + re.M) +_cleanSeq1 = re.compile(b'ORIGIN(\s*)\n') _cleanSeq2 = re.compile(b'[ \n0-9]+') _acMatcher = re.compile(b'(?<=^ACCESSION ).+',re.M) _deMatcher = re.compile(b'(?<=^DEFINITION ).+\n( .+\n)*',re.M) @@ -155,10 +155,10 @@ def genbankIterator_file(lineiterator, yield seq read+=1 - # Last sequence - seq = genbankParser(entry) - - yield seq + # Last sequence if not empty lines + if entry.strip(): + seq = genbankParser(entry) + yield seq free(entry)