Cython API: obi import can now import ngsfilter files and tabular files

This commit is contained in:
Celine Mercier
2018-03-12 18:10:43 +01:00
parent 8a0b95c1d6
commit 15e43bb9a1
9 changed files with 168 additions and 142 deletions

View File

@ -31,7 +31,7 @@ default_config = { 'software' : "The OBITools",
'fileformat' : None, 'fileformat' : None,
'skiperror' : True, 'skiperror' : True,
'qualityformat' : b'sanger', 'qualityformat' : b'sanger',
'qualityoffset' : -1, 'offset' : -1,
'noquality' : False, 'noquality' : False,
'seqtype' : b'nuc', 'seqtype' : b'nuc',
"header" : False, "header" : False,

View File

@ -25,8 +25,8 @@ def __addInputOption(optionManager):
group.add_argument('--na-string', group.add_argument('--na-string',
action="store", dest="obi:nastring", action="store", dest="obi:nastring",
default=b"NA", default="NA",
type=bytes, type=str,
help="String associated to Non Available (NA) values") help="String associated to Non Available (NA) values")
@ -61,7 +61,7 @@ def __addSequenceInputOption(optionManager):
action="store_const", dest="obi:format", action="store_const", dest="obi:format",
default=None, default=None,
const=b'ngsfilter', const=b'ngsfilter',
help="Input file is a ngsfilter file") help="Input file is an ngsfilter file")
group.add_argument('--ecopcr-result', group.add_argument('--ecopcr-result',
action="store_const", dest="obi:format", action="store_const", dest="obi:format",
@ -75,6 +75,12 @@ def __addSequenceInputOption(optionManager):
const=b'ecoprimers', const=b'ecoprimers',
help="Input file is the result of an ecoprimers") help="Input file is the result of an ecoprimers")
group.add_argument('--tabular',
action="store_const", dest="obi:format",
default=None,
const=b'tabular',
help="Input file is a tabular file")
group.add_argument('--skip-on-error', group.add_argument('--skip-on-error',
action="store_true", dest="obi:skiperror", action="store_true", dest="obi:skiperror",
default=False, default=False,
@ -120,13 +126,13 @@ def __addTabularInputOption(optionManager):
group.add_argument('--sep', group.add_argument('--sep',
action="store", dest="obi:sep", action="store", dest="obi:sep",
default=None, default=None,
type=bytes, type=str,
help="Column separator") help="Column separator")
group.add_argument('--dec', group.add_argument('--dec',
action="store", dest="obi:dec", action="store", dest="obi:dec",
default=b".", default=".",
type=bytes, type=str,
help="Decimal separator") help="Decimal separator")
group.add_argument('--strip-white', group.add_argument('--strip-white',
@ -141,8 +147,8 @@ def __addTabularInputOption(optionManager):
group.add_argument('--comment-char', group.add_argument('--comment-char',
action="store", dest="obi:commentchar", action="store", dest="obi:commentchar",
default=b"#", default="#",
type=bytes, type=str,
help="Lines starting by this char are considered as comment") help="Lines starting by this char are considered as comment")
def __addTaxonomyInputOption(optionManager): def __addTaxonomyInputOption(optionManager):
@ -171,7 +177,7 @@ def addSequenceInputOption(optionManager):
__addSequenceInputOption(optionManager) __addSequenceInputOption(optionManager)
def addTabularInputOption(optionManager): def addTabularInputOption(optionManager):
__addInputOption(optionManager) #__addInputOption(optionManager) # TODO discuss conflict
__addTabularInputOption(optionManager) __addTabularInputOption(optionManager)
def addTaxonomyInputOption(optionManager): def addTaxonomyInputOption(optionManager):

View File

@ -1,13 +1,8 @@
#cython: language_level=3 #cython: language_level=3
# TODO cimport generate errors with argument numbers, but without them some variables can't be declared
import sys import sys
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.files.universalopener cimport uopen
from obitools3.parsers.fasta import fastaIterator
from obitools3.parsers.fastq import fastqIterator
from obitools3.dms.view.view cimport View from obitools3.dms.view.view cimport View
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
from obitools3.dms.column.column cimport Column from obitools3.dms.column.column cimport Column
@ -24,7 +19,7 @@ from obitools3.dms.capi.obitypes cimport obitype_t, \
from obitools3.dms.capi.obierrno cimport obi_errno from obitools3.dms.capi.obierrno cimport obi_errno
from obitools3.apps.optiongroups import addSequenceInputOption, addMinimalOutputOption from obitools3.apps.optiongroups import addSequenceInputOption, addTabularInputOption, addMinimalOutputOption
from obitools3.uri.decode import open_uri from obitools3.uri.decode import open_uri
@ -45,6 +40,7 @@ default_config = { 'destview' : None,
def addOptions(parser): def addOptions(parser):
addSequenceInputOption(parser) addSequenceInputOption(parser)
addTabularInputOption(parser)
addMinimalOutputOption(parser) addMinimalOutputOption(parser)
# addTaxdumpInputOption(parser) # addTaxdumpInputOption(parser)
@ -63,8 +59,8 @@ def run(config):
cdef int nb_elts cdef int nb_elts
cdef object d cdef object d
cdef View view cdef View view
cdef object iseq cdef object entries
cdef object seq cdef object entry
cdef Column id_col cdef Column id_col
cdef Column def_col cdef Column def_col
cdef Column seq_col cdef Column seq_col
@ -108,9 +104,9 @@ def run(config):
pb = ProgressBar(1000000, config, seconde=5) # TODO should be number of records in file pb = ProgressBar(1000000, config, seconde=5) # TODO should be number of records in file
iseq = input[1] entries = input[1]
NA_value = config['obi']['nastring'] NA_value = tobytes(config['obi']['nastring']) # TODO
NUC_SEQS_view = False NUC_SEQS_view = False
if isinstance(output[1], View) : if isinstance(output[1], View) :
@ -121,39 +117,39 @@ def run(config):
raise NotImplementedError() raise NotImplementedError()
# Save basic columns in variables for optimization # Save basic columns in variables for optimization
if NUC_SEQS_view : if NUC_SEQS_view :
id_col = view[b"ID"] id_col = view[b"ID"] # TODO use macros or globals for column names
def_col = view[b"DEFINITION"] def_col = view[b"DEFINITION"]
seq_col = view[b"NUC_SEQ"] seq_col = view[b"NUC_SEQ"]
dcols = {} dcols = {}
i = 0 i = 0
for seq in iseq : for entry in entries :
pb(i) pb(i)
if NUC_SEQS_view : if NUC_SEQS_view :
# Check if there is a sequencing quality associated # TODO # Check if there is a sequencing quality associated # TODO
if i == 0: if i == 0:
get_quality = b"QUALITY" in seq get_quality = b"QUALITY" in entry
if get_quality: if get_quality:
Column.new_column(view, b"QUALITY", OBI_QUAL) Column.new_column(view, b"QUALITY", OBI_QUAL)
qual_col = view[b"QUALITY"] qual_col = view[b"QUALITY"]
id_col[i] = seq.id id_col[i] = entry.id
def_col[i] = seq.definition def_col[i] = entry.definition
seq_col[i] = seq.seq seq_col[i] = entry.seq
if get_quality : if get_quality :
qual_col[i] = seq.quality qual_col[i] = entry.quality
for tag in seq : for tag in entry :
if tag != b"ID" and tag != b"DEFINITION" and tag != b"NUC_SEQ" and tag != b"QUALITY" : # TODO hmmm... if tag != b"ID" and tag != b"DEFINITION" and tag != b"NUC_SEQ" and tag != b"QUALITY" : # TODO hmmm...
value = seq[tag] value = entry[tag]
# Check NA value # Check NA value
if value == NA_value : if value == NA_value :

View File

@ -17,7 +17,6 @@ def fastaIterator(lineiterator,
firstline=None, firstline=None,
int buffersize=100000000 int buffersize=100000000
): ):
cdef LineBuffer lb
cdef str ident cdef str ident
cdef str definition cdef str definition
cdef dict tags cdef dict tags
@ -31,23 +30,26 @@ def fastaIterator(lineiterator,
else: else:
ionly=int(only) ionly=int(only)
if isinstance(lineiterator,(str,bytes)): if isinstance(lineiterator, (str, bytes)):
lineiterator=uopen(lineiterator) lineiterator=uopen(lineiterator)
if isinstance(lineiterator, LineBuffer): if isinstance(lineiterator, LineBuffer):
lb=lineiterator iterator = iter(lineiterator)
else: else:
lb=LineBuffer(lineiterator,buffersize) if hasattr(lineiterator, "readlines"):
iterator = iter(LineBuffer(lineiterator, buffersize))
elif hasattr(lineiterator, '__next__'):
iterator = lineiterator
else:
raise Exception("Invalid line iterator")
skipped = 0 skipped = 0
i = iter(lb) i = iterator
if firstline is None: if firstline is None:
line = next(i) line = next(i)
else: else:
line = firstline line = firstline
while True: while True:
if ionly >= 0 and read >= ionly: if ionly >= 0 and read >= ionly:
@ -81,7 +83,7 @@ def fastaIterator(lineiterator,
# definition, # definition,
# tags=tags, # tags=tags,
# ) # )
# TODO # TODO Seq object
yield { "id" : ident, yield { "id" : ident,
"definition" : definition, "definition" : definition,
"sequence" : sequence, "sequence" : sequence,
@ -100,7 +102,6 @@ def fastaNucIterator(lineiterator,
firstline=None, firstline=None,
int buffersize=100000000 int buffersize=100000000
): ):
cdef LineBuffer lb
cdef str ident cdef str ident
cdef str definition cdef str definition
cdef dict tags cdef dict tags
@ -115,14 +116,16 @@ def fastaNucIterator(lineiterator,
ionly = int(only) ionly = int(only)
if isinstance(lineiterator, (str, bytes)): if isinstance(lineiterator, (str, bytes)):
lineiterator=uopen(lineiterator) lineiterator=uopen(lineiterator)
if isinstance(lineiterator, types.GeneratorType):
iterator = lineiterator
if isinstance(lineiterator, LineBuffer): if isinstance(lineiterator, LineBuffer):
iterator = iter(lineiterator) iterator = iter(lineiterator)
else: else:
iterator = iter(LineBuffer(lineiterator, buffersize)) if hasattr(lineiterator, "readlines"):
iterator = iter(LineBuffer(lineiterator, buffersize))
elif hasattr(lineiterator, '__next__'):
iterator = lineiterator
else:
raise Exception("Invalid line iterator")
skipped = 0 skipped = 0
read = 0 read = 0

View File

@ -12,7 +12,7 @@ from obitools3.dms.obiseq cimport Nuc_Seq
def fastqIterator(lineiterator, def fastqIterator(lineiterator,
int skip=0, int skip=0,
only=None, only=None,
int qualityoffset=-1, int offset=-1,
bint noquality=False, bint noquality=False,
firstline=None, firstline=None,
int buffersize=100000000 int buffersize=100000000
@ -25,14 +25,14 @@ def fastqIterator(lineiterator,
else: else:
return fastqWithQualityIterator(lineiterator, return fastqWithQualityIterator(lineiterator,
skip,only, skip,only,
qualityoffset, offset,
firstline, firstline,
buffersize) buffersize)
def fastqWithQualityIterator(lineiterator, def fastqWithQualityIterator(lineiterator,
int skip=0, int skip=0,
only=None, only=None,
int qualityoffset=-1, int offset=-1,
firstline=None, firstline=None,
int buffersize=100000000 int buffersize=100000000
): ):
@ -49,21 +49,25 @@ def fastqWithQualityIterator(lineiterator,
ionly=-1 ionly=-1
else: else:
ionly=int(only) ionly=int(only)
if isinstance(lineiterator,(str,bytes)): if isinstance(lineiterator, (str, bytes)):
lineiterator=uopen(lineiterator) lineiterator=uopen(lineiterator)
if isinstance(lineiterator, LineBuffer): if isinstance(lineiterator, LineBuffer):
lb=lineiterator iterator = iter(lineiterator)
else: else:
lb=LineBuffer(lineiterator,buffersize) if hasattr(lineiterator, "readlines"):
iterator = iter(LineBuffer(lineiterator, buffersize))
i = iter(lb) elif hasattr(lineiterator, '__next__'):
iterator = lineiterator
else:
raise Exception("Invalid line iterator")
i = iterator
lines_to_skip = skip*4 - (firstline is not None) lines_to_skip = skip*4 - (firstline is not None)
for skipped in range(lines_to_skip): for skipped in range(lines_to_skip):
next(i) next(i)
if skip > 0: if skip > 0:
firstline=None firstline=None
@ -88,7 +92,7 @@ def fastqWithQualityIterator(lineiterator,
sequence, sequence,
definition=definition, definition=definition,
quality=quality, quality=quality,
offset=qualityoffset, offset=offset,
tags=tags) tags=tags)
yield seq yield seq
@ -97,7 +101,7 @@ def fastqWithQualityIterator(lineiterator,
# "definition" : definition, # "definition" : definition,
# "sequence" : sequence, # "sequence" : sequence,
# "quality" : quality, # "quality" : quality,
# "offset" : qualityoffset, # "offset" : offset,
# "tags" : tags, # "tags" : tags,
# "annotation" : {} # "annotation" : {}
# } # }
@ -112,7 +116,6 @@ def fastqWithoutQualityIterator(lineiterator,
firstline=None, firstline=None,
int buffersize=100000000 int buffersize=100000000
): ):
cdef LineBuffer lb
cdef str ident cdef str ident
cdef str definition cdef str definition
cdef dict tags cdef dict tags
@ -126,15 +129,19 @@ def fastqWithoutQualityIterator(lineiterator,
else: else:
ionly=int(only) ionly=int(only)
if isinstance(lineiterator,(str,bytes)): if isinstance(lineiterator, (str, bytes)):
lineiterator=uopen(lineiterator) lineiterator=uopen(lineiterator)
if isinstance(lineiterator, LineBuffer): if isinstance(lineiterator, LineBuffer):
lb=lineiterator iterator = iter(lineiterator)
else: else:
lb=LineBuffer(lineiterator,buffersize) if hasattr(lineiterator, "readlines"):
iterator = iter(LineBuffer(lineiterator, buffersize))
elif hasattr(lineiterator, '__next__'):
iterator = lineiterator
else:
raise Exception("Invalid line iterator")
i = iter(lb) i = iterator
lines_to_skip = skip*4 - (firstline is not None) lines_to_skip = skip*4 - (firstline is not None)
for skipped in range(lines_to_skip): for skipped in range(lines_to_skip):

View File

@ -1,5 +1,4 @@
#cython: language_level=3 #cython: language_level=3
cdef object __etag__(str x)
cpdef tuple parseHeader(str header) cpdef tuple parseHeader(str header)

View File

@ -6,54 +6,12 @@ Created on 25 mars 2016
@author: coissac @author: coissac
''' '''
from obitools3.utils cimport __etag__
import re import re
__ret__ = re.compile('''(([^ ]+)=('[^']*'|"[^"]*"|[^;]+); *)+?''')
__re_int__ = re.compile("^[+-]?[0-9]+$")
__re_float__ = re.compile("^[+-]?[0-9]+(\.[0-9]*)?([eE][+-]?[0-9]+)?$")
__re_str__ = re.compile("""^"[^"]*"|'[^']*'$""")
__re_dict__ = re.compile("""^\{\ *
(
("[^"]*"|'[^']*')
\ *:\ *
([^,}]+|
"[^"]*"|
'[^']*'
)
)?
(\ *,\ *
("[^"]*"|'[^']*')
\ *:\ *
([^,}]+|
"[^"]*"|
'[^']*'
)
)*\ *\}$""", re.VERBOSE)
__re_val__ = re.compile("""(("[^"]*"|'[^']*') *: *([^,}]+|"[^"]*"|'[^']*') *[,}] *)""") __ret__ = re.compile('''(([^ ]+)=('[^']*'|"[^"]*"|[^;]+); *)+?''')
cdef object __etag__(str x):
cdef list elements
cdef tuple i
if __re_int__.match(x):
v=int(x)
elif __re_float__.match(x):
v=float(x)
elif __re_str__.match(x):
v=x[1:-1]
elif x=='None':
v=None
elif x=='False':
v=False
elif x=='True':
v=True
elif __re_dict__.match(x):
elements=__re_val__.findall(x)
v=dict([(i[1][1:-1],__etag__(i[2])) for i in elements])
else:
v=x
return v
cpdef tuple parseHeader(str header): cpdef tuple parseHeader(str header):
cdef list m cdef list m

View File

@ -3,12 +3,14 @@
import re import re
from obitools3.parsers.fasta import fastaNucIterator from obitools3.parsers.fasta import fastaNucIterator
from obitools3.parsers.fastq import fastqIterator from obitools3.parsers.fastq import fastqIterator
from obitools3.parsers.tab import tabIterator
from obitools3.parsers.ngsfilter import ngsfilterIterator
oligore = re.compile("^[ACGTRYSWKMBDHVN]+$",re.I) oligore = re.compile("^[ACGTRYSWKMBDHVN]+$",re.I)
tagre = re.compile("^([ACGTRYSWKMBDHVN]+|-)(:([ACGTRYSWKMBDHVN]+)|-)?$",re.I) tagre = re.compile("^([ACGTRYSWKMBDHVN]+|-)(:([ACGTRYSWKMBDHVN]+)|-)?$",re.I)
def is_ngsfilter_line(line): def is_ngsfilter_line(line): # TODO doesn't work?
try: try:
parts = line.split() parts = line.split()
ok = tagre.match(parts[2]) ok = tagre.match(parts[2])
@ -23,7 +25,7 @@ def entryIteratorFactory(lineiterator,
int skip=0, int skip=0,
only=None, only=None,
bytes seqtype=b'nuc', bytes seqtype=b'nuc',
int qualityoffset=-1, int offset=-1,
bint noquality=False, bint noquality=False,
bint skiperror=True, bint skiperror=True,
bint header=False, bint header=False,
@ -35,15 +37,19 @@ def entryIteratorFactory(lineiterator,
bytes commentchar=b"#", bytes commentchar=b"#",
int buffersize=100000000): int buffersize=100000000):
if isinstance(lineiterator,(str,bytes)): if isinstance(lineiterator, (str, bytes)):
lineiterator=uopen(lineiterator) lineiterator=uopen(lineiterator)
if isinstance(lineiterator, LineBuffer): if isinstance(lineiterator, LineBuffer):
lb=lineiterator iterator = iter(lineiterator)
else: else:
lb=LineBuffer(lineiterator, buffersize) if hasattr(lineiterator, "readlines"):
iterator = iter(LineBuffer(lineiterator, buffersize))
elif hasattr(lineiterator, '__next__'):
iterator = lineiterator
else:
raise Exception("Invalid line iterator")
i = iter(lb) i = iterator
first=next(i) first=next(i)
@ -57,11 +63,11 @@ def entryIteratorFactory(lineiterator,
format=b"embl" format=b"embl"
elif first[0:6]=='LOCUS ': elif first[0:6]=='LOCUS ':
format=b"genbank" format=b"genbank"
elif first[0:11]=='#@ecopcr-v2': elif first[0:11]=='#@ecopcr-v2': # TODO v2????
format=b"ecopcrfile" format=b"ecopcrfile"
elif is_ngsfilter_line(first): elif is_ngsfilter_line(first):
format=b"ngsfilter" format=b"ngsfilter"
# TODO Temporary fix # TODO Temporary fix
first=None first=None
lineiterator.seek(0) lineiterator.seek(0)
@ -78,12 +84,36 @@ def entryIteratorFactory(lineiterator,
elif format==b'fastq': elif format==b'fastq':
return (fastqIterator(lineiterator, return (fastqIterator(lineiterator,
skip=skip,only=only, skip=skip,only=only,
qualityoffset=qualityoffset, offset=offset,
noquality=noquality, noquality=noquality,
firstline=first, firstline=first,
buffersize=buffersize), buffersize=buffersize),
Nuc_Seq) Nuc_Seq)
elif format==b'tabular':
return (tabIterator(lineiterator,
header = header,
sep = sep,
dec = dec,
stripwhite = stripwhite,
blanklineskip = blanklineskip,
commentchar = commentchar,
skip = skip,
only = only,
firstline=first,
buffersize=buffersize),
dict)
elif format==b'ngsfilter':
return (ngsfilterIterator(lineiterator,
sep = sep,
dec = dec,
stripwhite = stripwhite,
blanklineskip = blanklineskip,
commentchar = commentchar,
skip = skip,
only = only,
firstline=first,
buffersize=buffersize),
dict)
raise NotImplementedError('File format not yet implemented') raise NotImplementedError('File format not yet implemented')

View File

@ -7,11 +7,15 @@ from obitools3.dms.dms import DMS
from obitools3.parsers.fasta import fastaNucIterator from obitools3.parsers.fasta import fastaNucIterator
from obitools3.parsers.fastq import fastqIterator from obitools3.parsers.fastq import fastqIterator
from obitools3.parsers.tab import tabIterator
from obitools3.parsers.ngsfilter import ngsfilterIterator
from obitools3.parsers.universal import entryIteratorFactory from obitools3.parsers.universal import entryIteratorFactory
from obitools3.dms.obiseq import Nuc_Seq from obitools3.dms.obiseq import Nuc_Seq
from obitools3.apps.config import getConfiguration,logger from obitools3.apps.config import getConfiguration,logger
from obitools3.apps.temp import get_temp_dms from obitools3.apps.temp import get_temp_dms
from obitools3.utils cimport tobytes # TODO because can't read options as bytes
class MalformedURIException(RuntimeError): class MalformedURIException(RuntimeError):
pass pass
@ -210,22 +214,24 @@ def open_uri(uri,
if file is not None: if file is not None:
qualifiers=parse_qs(urip.query) qualifiers=parse_qs(urip.query)
if b'format' in qualifiers: if b'format' in qualifiers:
format = qualifiers[b'format'][0] format = qualifiers[b'format'][0]
else: else:
try: try:
format=config["obi"]["fileformat"] format=config["obi"]["format"]
except KeyError: except KeyError:
format=None format=None
if b'seqtype' in qualifiers: if b'seqtype' in qualifiers:
seqtype=qualifiers[b'seqtype'][0] seqtype=qualifiers[b'seqtype'][0]
else: else:
try: if format == b"ngsfilter": # TODO discuss
seqtype=config["obi"]["seqtype"] seqtype=None
except KeyError: else:
seqtype=b'nuc' try:
seqtype=config["obi"]["seqtype"]
except KeyError:
seqtype=b"nuc"
if b'skip' in qualifiers: if b'skip' in qualifiers:
skip=int(qualifiers[b"skip"][0]) skip=int(qualifiers[b"skip"][0])
@ -286,7 +292,7 @@ def open_uri(uri,
offset=33 offset=33
elif config["obi"]["qualityformat"][0]=="solexa": elif config["obi"]["qualityformat"][0]=="solexa":
offset=64 offset=64
#offset=config["obi"]["qualityoffset"] # TODO discuss #offset=config["obi"]["offset"] # TODO discuss
except KeyError: except KeyError:
offset=33 offset=33
@ -304,10 +310,10 @@ def open_uri(uri,
raise MalformedURIException('Malformed header argument in URI') raise MalformedURIException('Malformed header argument in URI')
if b"sep" in qualifiers: if b"sep" in qualifiers:
sep=qualifiers[b"sep"][0][0] sep=tobytes(qualifiers[b"sep"][0][0])
else: else:
try: try:
sep=config["obi"]["sep"] sep=tobytes(config["obi"]["sep"])
except KeyError: except KeyError:
sep=None sep=None
@ -315,18 +321,18 @@ def open_uri(uri,
# pass # pass
if b"dec" in qualifiers: if b"dec" in qualifiers:
dec=qualifiers[b"dec"][0][0] dec=tobytes(qualifiers[b"dec"][0][0])
else: else:
try: try:
dec=config["obi"]["dec"] dec=tobytes(config["obi"]["dec"])
except KeyError: except KeyError:
dec=b"." dec=b"."
if b"nastring" in qualifiers: if b"nastring" in qualifiers:
nastring=qualifiers[b"nastring"][0] nastring=tobytes(qualifiers[b"nastring"][0])
else: else:
try: try:
nastring=config["obi"]["nastring"] nastring=tobytes(config["obi"]["nastring"])
except KeyError: except KeyError:
nastring=b'NA' nastring=b'NA'
@ -357,15 +363,15 @@ def open_uri(uri,
raise MalformedURIException('Malformed blanklineskip argument in URI') raise MalformedURIException('Malformed blanklineskip argument in URI')
if b"commentchar" in qualifiers: if b"commentchar" in qualifiers:
commentchar=qualifiers[b"commentchar"][0][0] commentchar=tobytes(qualifiers[b"commentchar"][0][0])
else: else:
try: try:
commentchar=config["obi"]["commentchar"] commentchar=tobytes(config["obi"]["commentchar"])
except KeyError: except KeyError:
commentchar=b'#' commentchar=b'#'
if format is not None: if format is not None:
if qualifiers[b"seqtype"]==b"nuc": if seqtype==b"nuc":
objclass = Nuc_Seq objclass = Nuc_Seq
if format==b"fasta": if format==b"fasta":
iseq = fastaNucIterator(file, iseq = fastaNucIterator(file,
@ -379,8 +385,29 @@ def open_uri(uri,
noquality=noquality) noquality=noquality)
else: else:
raise NotImplementedError('Sequence file format not implemented') raise NotImplementedError('Sequence file format not implemented')
elif qualifiers[b"seqtype"]==b"prot": elif seqtype==b"prot":
raise NotImplementedError() raise NotImplementedError()
elif format==b"tabular":
objclass = dict
iseq = tabIterator(file,
header = header,
sep = sep,
dec = dec,
stripwhite = stripwhite,
blanklineskip = blanklineskip,
commentchar = commentchar,
skip = skip,
only = only)
elif format==b"ngsfilter":
objclass = dict
iseq = ngsfilterIterator(file,
sep = sep,
dec = dec,
stripwhite = stripwhite,
blanklineskip = blanklineskip,
commentchar = commentchar,
skip = skip,
only = only)
else: else:
iseq,objclass = entryIteratorFactory(file, iseq,objclass = entryIteratorFactory(file,
skip, only, skip, only,