|
|
@ -22,11 +22,11 @@ from libc.stdlib cimport free, malloc, realloc
|
|
|
|
from libc.string cimport strcpy, strlen
|
|
|
|
from libc.string cimport strcpy, strlen
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_featureMatcher = re.compile(b'^FEATURES.+\n(?=ORIGIN )',re.DOTALL + re.M)
|
|
|
|
_featureMatcher = re.compile(b'^FEATURES.+\n(?=ORIGIN(\s*))',re.DOTALL + re.M)
|
|
|
|
|
|
|
|
|
|
|
|
_headerMatcher = re.compile(b'^LOCUS.+(?=\nFEATURES)', re.DOTALL + re.M)
|
|
|
|
_headerMatcher = re.compile(b'^LOCUS.+(?=\nFEATURES)', re.DOTALL + re.M)
|
|
|
|
_seqMatcher = re.compile(b'^ORIGIN .+(?=//\n)', re.DOTALL + re.M)
|
|
|
|
_seqMatcher = re.compile(b'^ORIGIN.+(?=//\n)', re.DOTALL + re.M)
|
|
|
|
_cleanSeq1 = re.compile(b'ORIGIN.+\n')
|
|
|
|
_cleanSeq1 = re.compile(b'ORIGIN(\s*)\n')
|
|
|
|
_cleanSeq2 = re.compile(b'[ \n0-9]+')
|
|
|
|
_cleanSeq2 = re.compile(b'[ \n0-9]+')
|
|
|
|
_acMatcher = re.compile(b'(?<=^ACCESSION ).+',re.M)
|
|
|
|
_acMatcher = re.compile(b'(?<=^ACCESSION ).+',re.M)
|
|
|
|
_deMatcher = re.compile(b'(?<=^DEFINITION ).+\n( .+\n)*',re.M)
|
|
|
|
_deMatcher = re.compile(b'(?<=^DEFINITION ).+\n( .+\n)*',re.M)
|
|
|
@ -155,9 +155,9 @@ def genbankIterator_file(lineiterator,
|
|
|
|
yield seq
|
|
|
|
yield seq
|
|
|
|
read+=1
|
|
|
|
read+=1
|
|
|
|
|
|
|
|
|
|
|
|
# Last sequence
|
|
|
|
# Last sequence if not empty lines
|
|
|
|
|
|
|
|
if entry.strip():
|
|
|
|
seq = genbankParser(entry)
|
|
|
|
seq = genbankParser(entry)
|
|
|
|
|
|
|
|
|
|
|
|
yield seq
|
|
|
|
yield seq
|
|
|
|
|
|
|
|
|
|
|
|
free(entry)
|
|
|
|
free(entry)
|
|
|
|