Cython API: obi import can now import ngsfilter files and tabular files
This commit is contained in:
@ -31,7 +31,7 @@ default_config = { 'software' : "The OBITools",
|
||||
'fileformat' : None,
|
||||
'skiperror' : True,
|
||||
'qualityformat' : b'sanger',
|
||||
'qualityoffset' : -1,
|
||||
'offset' : -1,
|
||||
'noquality' : False,
|
||||
'seqtype' : b'nuc',
|
||||
"header" : False,
|
||||
|
@ -25,8 +25,8 @@ def __addInputOption(optionManager):
|
||||
|
||||
group.add_argument('--na-string',
|
||||
action="store", dest="obi:nastring",
|
||||
default=b"NA",
|
||||
type=bytes,
|
||||
default="NA",
|
||||
type=str,
|
||||
help="String associated to Non Available (NA) values")
|
||||
|
||||
|
||||
@ -61,7 +61,7 @@ def __addSequenceInputOption(optionManager):
|
||||
action="store_const", dest="obi:format",
|
||||
default=None,
|
||||
const=b'ngsfilter',
|
||||
help="Input file is a ngsfilter file")
|
||||
help="Input file is an ngsfilter file")
|
||||
|
||||
group.add_argument('--ecopcr-result',
|
||||
action="store_const", dest="obi:format",
|
||||
@ -75,6 +75,12 @@ def __addSequenceInputOption(optionManager):
|
||||
const=b'ecoprimers',
|
||||
help="Input file is the result of an ecoprimers")
|
||||
|
||||
group.add_argument('--tabular',
|
||||
action="store_const", dest="obi:format",
|
||||
default=None,
|
||||
const=b'tabular',
|
||||
help="Input file is a tabular file")
|
||||
|
||||
group.add_argument('--skip-on-error',
|
||||
action="store_true", dest="obi:skiperror",
|
||||
default=False,
|
||||
@ -120,13 +126,13 @@ def __addTabularInputOption(optionManager):
|
||||
group.add_argument('--sep',
|
||||
action="store", dest="obi:sep",
|
||||
default=None,
|
||||
type=bytes,
|
||||
type=str,
|
||||
help="Column separator")
|
||||
|
||||
group.add_argument('--dec',
|
||||
action="store", dest="obi:dec",
|
||||
default=b".",
|
||||
type=bytes,
|
||||
default=".",
|
||||
type=str,
|
||||
help="Decimal separator")
|
||||
|
||||
group.add_argument('--strip-white',
|
||||
@ -141,8 +147,8 @@ def __addTabularInputOption(optionManager):
|
||||
|
||||
group.add_argument('--comment-char',
|
||||
action="store", dest="obi:commentchar",
|
||||
default=b"#",
|
||||
type=bytes,
|
||||
default="#",
|
||||
type=str,
|
||||
help="Lines starting by this char are considered as comment")
|
||||
|
||||
def __addTaxonomyInputOption(optionManager):
|
||||
@ -171,7 +177,7 @@ def addSequenceInputOption(optionManager):
|
||||
__addSequenceInputOption(optionManager)
|
||||
|
||||
def addTabularInputOption(optionManager):
|
||||
__addInputOption(optionManager)
|
||||
#__addInputOption(optionManager) # TODO discuss conflict
|
||||
__addTabularInputOption(optionManager)
|
||||
|
||||
def addTaxonomyInputOption(optionManager):
|
||||
|
@ -1,13 +1,8 @@
|
||||
#cython: language_level=3
|
||||
|
||||
# TODO cimport generate errors with argument numbers, but without them some variables can't be declared
|
||||
|
||||
import sys
|
||||
|
||||
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.files.universalopener cimport uopen
|
||||
from obitools3.parsers.fasta import fastaIterator
|
||||
from obitools3.parsers.fastq import fastqIterator
|
||||
from obitools3.dms.view.view cimport View
|
||||
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||
from obitools3.dms.column.column cimport Column
|
||||
@ -24,7 +19,7 @@ from obitools3.dms.capi.obitypes cimport obitype_t, \
|
||||
|
||||
from obitools3.dms.capi.obierrno cimport obi_errno
|
||||
|
||||
from obitools3.apps.optiongroups import addSequenceInputOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addSequenceInputOption, addTabularInputOption, addMinimalOutputOption
|
||||
|
||||
from obitools3.uri.decode import open_uri
|
||||
|
||||
@ -45,6 +40,7 @@ default_config = { 'destview' : None,
|
||||
def addOptions(parser):
|
||||
|
||||
addSequenceInputOption(parser)
|
||||
addTabularInputOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
# addTaxdumpInputOption(parser)
|
||||
|
||||
@ -63,8 +59,8 @@ def run(config):
|
||||
cdef int nb_elts
|
||||
cdef object d
|
||||
cdef View view
|
||||
cdef object iseq
|
||||
cdef object seq
|
||||
cdef object entries
|
||||
cdef object entry
|
||||
cdef Column id_col
|
||||
cdef Column def_col
|
||||
cdef Column seq_col
|
||||
@ -108,9 +104,9 @@ def run(config):
|
||||
|
||||
pb = ProgressBar(1000000, config, seconde=5) # TODO should be number of records in file
|
||||
|
||||
iseq = input[1]
|
||||
entries = input[1]
|
||||
|
||||
NA_value = config['obi']['nastring']
|
||||
NA_value = tobytes(config['obi']['nastring']) # TODO
|
||||
|
||||
NUC_SEQS_view = False
|
||||
if isinstance(output[1], View) :
|
||||
@ -122,14 +118,14 @@ def run(config):
|
||||
|
||||
# Save basic columns in variables for optimization
|
||||
if NUC_SEQS_view :
|
||||
id_col = view[b"ID"]
|
||||
id_col = view[b"ID"] # TODO use macros or globals for column names
|
||||
def_col = view[b"DEFINITION"]
|
||||
seq_col = view[b"NUC_SEQ"]
|
||||
|
||||
dcols = {}
|
||||
|
||||
i = 0
|
||||
for seq in iseq :
|
||||
for entry in entries :
|
||||
|
||||
pb(i)
|
||||
|
||||
@ -137,23 +133,23 @@ def run(config):
|
||||
|
||||
# Check if there is a sequencing quality associated # TODO
|
||||
if i == 0:
|
||||
get_quality = b"QUALITY" in seq
|
||||
get_quality = b"QUALITY" in entry
|
||||
if get_quality:
|
||||
Column.new_column(view, b"QUALITY", OBI_QUAL)
|
||||
qual_col = view[b"QUALITY"]
|
||||
|
||||
id_col[i] = seq.id
|
||||
def_col[i] = seq.definition
|
||||
seq_col[i] = seq.seq
|
||||
id_col[i] = entry.id
|
||||
def_col[i] = entry.definition
|
||||
seq_col[i] = entry.seq
|
||||
|
||||
if get_quality :
|
||||
qual_col[i] = seq.quality
|
||||
qual_col[i] = entry.quality
|
||||
|
||||
for tag in seq :
|
||||
for tag in entry :
|
||||
|
||||
if tag != b"ID" and tag != b"DEFINITION" and tag != b"NUC_SEQ" and tag != b"QUALITY" : # TODO hmmm...
|
||||
|
||||
value = seq[tag]
|
||||
value = entry[tag]
|
||||
|
||||
# Check NA value
|
||||
if value == NA_value :
|
||||
|
@ -17,7 +17,6 @@ def fastaIterator(lineiterator,
|
||||
firstline=None,
|
||||
int buffersize=100000000
|
||||
):
|
||||
cdef LineBuffer lb
|
||||
cdef str ident
|
||||
cdef str definition
|
||||
cdef dict tags
|
||||
@ -33,21 +32,24 @@ def fastaIterator(lineiterator,
|
||||
|
||||
if isinstance(lineiterator, (str, bytes)):
|
||||
lineiterator=uopen(lineiterator)
|
||||
|
||||
if isinstance(lineiterator, LineBuffer):
|
||||
lb=lineiterator
|
||||
iterator = iter(lineiterator)
|
||||
else:
|
||||
lb=LineBuffer(lineiterator,buffersize)
|
||||
if hasattr(lineiterator, "readlines"):
|
||||
iterator = iter(LineBuffer(lineiterator, buffersize))
|
||||
elif hasattr(lineiterator, '__next__'):
|
||||
iterator = lineiterator
|
||||
else:
|
||||
raise Exception("Invalid line iterator")
|
||||
|
||||
skipped = 0
|
||||
i = iter(lb)
|
||||
i = iterator
|
||||
|
||||
if firstline is None:
|
||||
line = next(i)
|
||||
else:
|
||||
line = firstline
|
||||
|
||||
|
||||
while True:
|
||||
|
||||
if ionly >= 0 and read >= ionly:
|
||||
@ -81,7 +83,7 @@ def fastaIterator(lineiterator,
|
||||
# definition,
|
||||
# tags=tags,
|
||||
# )
|
||||
# TODO
|
||||
# TODO Seq object
|
||||
yield { "id" : ident,
|
||||
"definition" : definition,
|
||||
"sequence" : sequence,
|
||||
@ -100,7 +102,6 @@ def fastaNucIterator(lineiterator,
|
||||
firstline=None,
|
||||
int buffersize=100000000
|
||||
):
|
||||
cdef LineBuffer lb
|
||||
cdef str ident
|
||||
cdef str definition
|
||||
cdef dict tags
|
||||
@ -116,13 +117,15 @@ def fastaNucIterator(lineiterator,
|
||||
|
||||
if isinstance(lineiterator, (str, bytes)):
|
||||
lineiterator=uopen(lineiterator)
|
||||
|
||||
if isinstance(lineiterator, types.GeneratorType):
|
||||
iterator = lineiterator
|
||||
if isinstance(lineiterator, LineBuffer):
|
||||
iterator = iter(lineiterator)
|
||||
else:
|
||||
if hasattr(lineiterator, "readlines"):
|
||||
iterator = iter(LineBuffer(lineiterator, buffersize))
|
||||
elif hasattr(lineiterator, '__next__'):
|
||||
iterator = lineiterator
|
||||
else:
|
||||
raise Exception("Invalid line iterator")
|
||||
|
||||
skipped = 0
|
||||
read = 0
|
||||
|
@ -12,7 +12,7 @@ from obitools3.dms.obiseq cimport Nuc_Seq
|
||||
def fastqIterator(lineiterator,
|
||||
int skip=0,
|
||||
only=None,
|
||||
int qualityoffset=-1,
|
||||
int offset=-1,
|
||||
bint noquality=False,
|
||||
firstline=None,
|
||||
int buffersize=100000000
|
||||
@ -25,14 +25,14 @@ def fastqIterator(lineiterator,
|
||||
else:
|
||||
return fastqWithQualityIterator(lineiterator,
|
||||
skip,only,
|
||||
qualityoffset,
|
||||
offset,
|
||||
firstline,
|
||||
buffersize)
|
||||
|
||||
def fastqWithQualityIterator(lineiterator,
|
||||
int skip=0,
|
||||
only=None,
|
||||
int qualityoffset=-1,
|
||||
int offset=-1,
|
||||
firstline=None,
|
||||
int buffersize=100000000
|
||||
):
|
||||
@ -52,13 +52,17 @@ def fastqWithQualityIterator(lineiterator,
|
||||
|
||||
if isinstance(lineiterator, (str, bytes)):
|
||||
lineiterator=uopen(lineiterator)
|
||||
|
||||
if isinstance(lineiterator, LineBuffer):
|
||||
lb=lineiterator
|
||||
iterator = iter(lineiterator)
|
||||
else:
|
||||
lb=LineBuffer(lineiterator,buffersize)
|
||||
if hasattr(lineiterator, "readlines"):
|
||||
iterator = iter(LineBuffer(lineiterator, buffersize))
|
||||
elif hasattr(lineiterator, '__next__'):
|
||||
iterator = lineiterator
|
||||
else:
|
||||
raise Exception("Invalid line iterator")
|
||||
|
||||
i = iter(lb)
|
||||
i = iterator
|
||||
lines_to_skip = skip*4 - (firstline is not None)
|
||||
|
||||
for skipped in range(lines_to_skip):
|
||||
@ -88,7 +92,7 @@ def fastqWithQualityIterator(lineiterator,
|
||||
sequence,
|
||||
definition=definition,
|
||||
quality=quality,
|
||||
offset=qualityoffset,
|
||||
offset=offset,
|
||||
tags=tags)
|
||||
|
||||
yield seq
|
||||
@ -97,7 +101,7 @@ def fastqWithQualityIterator(lineiterator,
|
||||
# "definition" : definition,
|
||||
# "sequence" : sequence,
|
||||
# "quality" : quality,
|
||||
# "offset" : qualityoffset,
|
||||
# "offset" : offset,
|
||||
# "tags" : tags,
|
||||
# "annotation" : {}
|
||||
# }
|
||||
@ -112,7 +116,6 @@ def fastqWithoutQualityIterator(lineiterator,
|
||||
firstline=None,
|
||||
int buffersize=100000000
|
||||
):
|
||||
cdef LineBuffer lb
|
||||
cdef str ident
|
||||
cdef str definition
|
||||
cdef dict tags
|
||||
@ -128,13 +131,17 @@ def fastqWithoutQualityIterator(lineiterator,
|
||||
|
||||
if isinstance(lineiterator, (str, bytes)):
|
||||
lineiterator=uopen(lineiterator)
|
||||
|
||||
if isinstance(lineiterator, LineBuffer):
|
||||
lb=lineiterator
|
||||
iterator = iter(lineiterator)
|
||||
else:
|
||||
lb=LineBuffer(lineiterator,buffersize)
|
||||
if hasattr(lineiterator, "readlines"):
|
||||
iterator = iter(LineBuffer(lineiterator, buffersize))
|
||||
elif hasattr(lineiterator, '__next__'):
|
||||
iterator = lineiterator
|
||||
else:
|
||||
raise Exception("Invalid line iterator")
|
||||
|
||||
i = iter(lb)
|
||||
i = iterator
|
||||
lines_to_skip = skip*4 - (firstline is not None)
|
||||
|
||||
for skipped in range(lines_to_skip):
|
||||
|
@ -1,5 +1,4 @@
|
||||
#cython: language_level=3
|
||||
|
||||
cdef object __etag__(str x)
|
||||
|
||||
cpdef tuple parseHeader(str header)
|
||||
|
@ -6,54 +6,12 @@ Created on 25 mars 2016
|
||||
@author: coissac
|
||||
'''
|
||||
|
||||
from obitools3.utils cimport __etag__
|
||||
import re
|
||||
|
||||
|
||||
__ret__ = re.compile('''(([^ ]+)=('[^']*'|"[^"]*"|[^;]+); *)+?''')
|
||||
__re_int__ = re.compile("^[+-]?[0-9]+$")
|
||||
__re_float__ = re.compile("^[+-]?[0-9]+(\.[0-9]*)?([eE][+-]?[0-9]+)?$")
|
||||
__re_str__ = re.compile("""^"[^"]*"|'[^']*'$""")
|
||||
__re_dict__ = re.compile("""^\{\ *
|
||||
(
|
||||
("[^"]*"|'[^']*')
|
||||
\ *:\ *
|
||||
([^,}]+|
|
||||
"[^"]*"|
|
||||
'[^']*'
|
||||
)
|
||||
)?
|
||||
(\ *,\ *
|
||||
("[^"]*"|'[^']*')
|
||||
\ *:\ *
|
||||
([^,}]+|
|
||||
"[^"]*"|
|
||||
'[^']*'
|
||||
)
|
||||
)*\ *\}$""", re.VERBOSE)
|
||||
|
||||
__re_val__ = re.compile("""(("[^"]*"|'[^']*') *: *([^,}]+|"[^"]*"|'[^']*') *[,}] *)""")
|
||||
|
||||
cdef object __etag__(str x):
|
||||
cdef list elements
|
||||
cdef tuple i
|
||||
|
||||
if __re_int__.match(x):
|
||||
v=int(x)
|
||||
elif __re_float__.match(x):
|
||||
v=float(x)
|
||||
elif __re_str__.match(x):
|
||||
v=x[1:-1]
|
||||
elif x=='None':
|
||||
v=None
|
||||
elif x=='False':
|
||||
v=False
|
||||
elif x=='True':
|
||||
v=True
|
||||
elif __re_dict__.match(x):
|
||||
elements=__re_val__.findall(x)
|
||||
v=dict([(i[1][1:-1],__etag__(i[2])) for i in elements])
|
||||
else:
|
||||
v=x
|
||||
return v
|
||||
|
||||
cpdef tuple parseHeader(str header):
|
||||
cdef list m
|
||||
|
@ -3,12 +3,14 @@
|
||||
import re
|
||||
from obitools3.parsers.fasta import fastaNucIterator
|
||||
from obitools3.parsers.fastq import fastqIterator
|
||||
from obitools3.parsers.tab import tabIterator
|
||||
from obitools3.parsers.ngsfilter import ngsfilterIterator
|
||||
|
||||
|
||||
oligore = re.compile("^[ACGTRYSWKMBDHVN]+$",re.I)
|
||||
tagre = re.compile("^([ACGTRYSWKMBDHVN]+|-)(:([ACGTRYSWKMBDHVN]+)|-)?$",re.I)
|
||||
|
||||
def is_ngsfilter_line(line):
|
||||
def is_ngsfilter_line(line): # TODO doesn't work?
|
||||
try:
|
||||
parts = line.split()
|
||||
ok = tagre.match(parts[2])
|
||||
@ -23,7 +25,7 @@ def entryIteratorFactory(lineiterator,
|
||||
int skip=0,
|
||||
only=None,
|
||||
bytes seqtype=b'nuc',
|
||||
int qualityoffset=-1,
|
||||
int offset=-1,
|
||||
bint noquality=False,
|
||||
bint skiperror=True,
|
||||
bint header=False,
|
||||
@ -37,13 +39,17 @@ def entryIteratorFactory(lineiterator,
|
||||
|
||||
if isinstance(lineiterator, (str, bytes)):
|
||||
lineiterator=uopen(lineiterator)
|
||||
|
||||
if isinstance(lineiterator, LineBuffer):
|
||||
lb=lineiterator
|
||||
iterator = iter(lineiterator)
|
||||
else:
|
||||
lb=LineBuffer(lineiterator, buffersize)
|
||||
if hasattr(lineiterator, "readlines"):
|
||||
iterator = iter(LineBuffer(lineiterator, buffersize))
|
||||
elif hasattr(lineiterator, '__next__'):
|
||||
iterator = lineiterator
|
||||
else:
|
||||
raise Exception("Invalid line iterator")
|
||||
|
||||
i = iter(lb)
|
||||
i = iterator
|
||||
|
||||
first=next(i)
|
||||
|
||||
@ -57,7 +63,7 @@ def entryIteratorFactory(lineiterator,
|
||||
format=b"embl"
|
||||
elif first[0:6]=='LOCUS ':
|
||||
format=b"genbank"
|
||||
elif first[0:11]=='#@ecopcr-v2':
|
||||
elif first[0:11]=='#@ecopcr-v2': # TODO v2????
|
||||
format=b"ecopcrfile"
|
||||
elif is_ngsfilter_line(first):
|
||||
format=b"ngsfilter"
|
||||
@ -78,12 +84,36 @@ def entryIteratorFactory(lineiterator,
|
||||
elif format==b'fastq':
|
||||
return (fastqIterator(lineiterator,
|
||||
skip=skip,only=only,
|
||||
qualityoffset=qualityoffset,
|
||||
offset=offset,
|
||||
noquality=noquality,
|
||||
firstline=first,
|
||||
buffersize=buffersize),
|
||||
Nuc_Seq)
|
||||
|
||||
elif format==b'tabular':
|
||||
return (tabIterator(lineiterator,
|
||||
header = header,
|
||||
sep = sep,
|
||||
dec = dec,
|
||||
stripwhite = stripwhite,
|
||||
blanklineskip = blanklineskip,
|
||||
commentchar = commentchar,
|
||||
skip = skip,
|
||||
only = only,
|
||||
firstline=first,
|
||||
buffersize=buffersize),
|
||||
dict)
|
||||
elif format==b'ngsfilter':
|
||||
return (ngsfilterIterator(lineiterator,
|
||||
sep = sep,
|
||||
dec = dec,
|
||||
stripwhite = stripwhite,
|
||||
blanklineskip = blanklineskip,
|
||||
commentchar = commentchar,
|
||||
skip = skip,
|
||||
only = only,
|
||||
firstline=first,
|
||||
buffersize=buffersize),
|
||||
dict)
|
||||
|
||||
raise NotImplementedError('File format not yet implemented')
|
||||
|
||||
|
@ -7,11 +7,15 @@ from obitools3.dms.dms import DMS
|
||||
|
||||
from obitools3.parsers.fasta import fastaNucIterator
|
||||
from obitools3.parsers.fastq import fastqIterator
|
||||
from obitools3.parsers.tab import tabIterator
|
||||
from obitools3.parsers.ngsfilter import ngsfilterIterator
|
||||
from obitools3.parsers.universal import entryIteratorFactory
|
||||
|
||||
from obitools3.dms.obiseq import Nuc_Seq
|
||||
from obitools3.apps.config import getConfiguration,logger
|
||||
from obitools3.apps.temp import get_temp_dms
|
||||
from obitools3.utils cimport tobytes # TODO because can't read options as bytes
|
||||
|
||||
|
||||
class MalformedURIException(RuntimeError):
|
||||
pass
|
||||
@ -210,22 +214,24 @@ def open_uri(uri,
|
||||
if file is not None:
|
||||
qualifiers=parse_qs(urip.query)
|
||||
|
||||
|
||||
if b'format' in qualifiers:
|
||||
format = qualifiers[b'format'][0]
|
||||
else:
|
||||
try:
|
||||
format=config["obi"]["fileformat"]
|
||||
format=config["obi"]["format"]
|
||||
except KeyError:
|
||||
format=None
|
||||
|
||||
if b'seqtype' in qualifiers:
|
||||
seqtype=qualifiers[b'seqtype'][0]
|
||||
else:
|
||||
if format == b"ngsfilter": # TODO discuss
|
||||
seqtype=None
|
||||
else:
|
||||
try:
|
||||
seqtype=config["obi"]["seqtype"]
|
||||
except KeyError:
|
||||
seqtype=b'nuc'
|
||||
seqtype=b"nuc"
|
||||
|
||||
if b'skip' in qualifiers:
|
||||
skip=int(qualifiers[b"skip"][0])
|
||||
@ -286,7 +292,7 @@ def open_uri(uri,
|
||||
offset=33
|
||||
elif config["obi"]["qualityformat"][0]=="solexa":
|
||||
offset=64
|
||||
#offset=config["obi"]["qualityoffset"] # TODO discuss
|
||||
#offset=config["obi"]["offset"] # TODO discuss
|
||||
except KeyError:
|
||||
offset=33
|
||||
|
||||
@ -304,10 +310,10 @@ def open_uri(uri,
|
||||
raise MalformedURIException('Malformed header argument in URI')
|
||||
|
||||
if b"sep" in qualifiers:
|
||||
sep=qualifiers[b"sep"][0][0]
|
||||
sep=tobytes(qualifiers[b"sep"][0][0])
|
||||
else:
|
||||
try:
|
||||
sep=config["obi"]["sep"]
|
||||
sep=tobytes(config["obi"]["sep"])
|
||||
except KeyError:
|
||||
sep=None
|
||||
|
||||
@ -315,18 +321,18 @@ def open_uri(uri,
|
||||
# pass
|
||||
|
||||
if b"dec" in qualifiers:
|
||||
dec=qualifiers[b"dec"][0][0]
|
||||
dec=tobytes(qualifiers[b"dec"][0][0])
|
||||
else:
|
||||
try:
|
||||
dec=config["obi"]["dec"]
|
||||
dec=tobytes(config["obi"]["dec"])
|
||||
except KeyError:
|
||||
dec=b"."
|
||||
|
||||
if b"nastring" in qualifiers:
|
||||
nastring=qualifiers[b"nastring"][0]
|
||||
nastring=tobytes(qualifiers[b"nastring"][0])
|
||||
else:
|
||||
try:
|
||||
nastring=config["obi"]["nastring"]
|
||||
nastring=tobytes(config["obi"]["nastring"])
|
||||
except KeyError:
|
||||
nastring=b'NA'
|
||||
|
||||
@ -357,15 +363,15 @@ def open_uri(uri,
|
||||
raise MalformedURIException('Malformed blanklineskip argument in URI')
|
||||
|
||||
if b"commentchar" in qualifiers:
|
||||
commentchar=qualifiers[b"commentchar"][0][0]
|
||||
commentchar=tobytes(qualifiers[b"commentchar"][0][0])
|
||||
else:
|
||||
try:
|
||||
commentchar=config["obi"]["commentchar"]
|
||||
commentchar=tobytes(config["obi"]["commentchar"])
|
||||
except KeyError:
|
||||
commentchar=b'#'
|
||||
|
||||
if format is not None:
|
||||
if qualifiers[b"seqtype"]==b"nuc":
|
||||
if seqtype==b"nuc":
|
||||
objclass = Nuc_Seq
|
||||
if format==b"fasta":
|
||||
iseq = fastaNucIterator(file,
|
||||
@ -379,8 +385,29 @@ def open_uri(uri,
|
||||
noquality=noquality)
|
||||
else:
|
||||
raise NotImplementedError('Sequence file format not implemented')
|
||||
elif qualifiers[b"seqtype"]==b"prot":
|
||||
elif seqtype==b"prot":
|
||||
raise NotImplementedError()
|
||||
elif format==b"tabular":
|
||||
objclass = dict
|
||||
iseq = tabIterator(file,
|
||||
header = header,
|
||||
sep = sep,
|
||||
dec = dec,
|
||||
stripwhite = stripwhite,
|
||||
blanklineskip = blanklineskip,
|
||||
commentchar = commentchar,
|
||||
skip = skip,
|
||||
only = only)
|
||||
elif format==b"ngsfilter":
|
||||
objclass = dict
|
||||
iseq = ngsfilterIterator(file,
|
||||
sep = sep,
|
||||
dec = dec,
|
||||
stripwhite = stripwhite,
|
||||
blanklineskip = blanklineskip,
|
||||
commentchar = commentchar,
|
||||
skip = skip,
|
||||
only = only)
|
||||
else:
|
||||
iseq,objclass = entryIteratorFactory(file,
|
||||
skip, only,
|
||||
|
Reference in New Issue
Block a user