Cython: updated the URI decoding to handle outputs other than DMS
This commit is contained in:
@ -12,6 +12,11 @@ from obitools3.parsers.ngsfilter import ngsfilterIterator
|
||||
from obitools3.parsers.embl import emblIterator
|
||||
from obitools3.parsers.universal import entryIteratorFactory
|
||||
|
||||
from obitools3.writers.fasta import FastaNucWriter
|
||||
from obitools3.writers.fastq import FastqWriter
|
||||
from obitools3.format.fasta import FastaFormat
|
||||
from obitools3.format.fastq import FastqFormat
|
||||
|
||||
from obitools3.dms.obiseq import Nuc_Seq
|
||||
from obitools3.apps.config import getConfiguration,logger
|
||||
from obitools3.apps.temp import get_temp_dms
|
||||
@ -56,7 +61,9 @@ cdef open_dms(bytes path, bint create=False):
|
||||
pos=pos+1
|
||||
return None
|
||||
|
||||
def open_dms_element(DMS dms, bytes path,
|
||||
|
||||
def open_dms_element(DMS dms,
|
||||
bytes path,
|
||||
bint create=False,
|
||||
type newviewtype=View):
|
||||
"""
|
||||
@ -139,12 +146,21 @@ def open_dms_element(DMS dms, bytes path,
|
||||
if len(path_parts) > 4:
|
||||
raise MalformedURIException('Malformed View URI')
|
||||
|
||||
return (dms,subsubpart)
|
||||
return (dms, subsubpart)
|
||||
|
||||
|
||||
'''
|
||||
#TODO discuss returned object. Return a dict? or some class instance?
|
||||
Reads an URI and returns a tuple containing:
|
||||
(1) The opened file or DMS, or the URI itself if nothing could be opened by the function
|
||||
(2) The opened view or iterator on the opened file or writer
|
||||
(3) The class of object returned or handled by (2)
|
||||
(4) The original URI in bytes
|
||||
'''
|
||||
def open_uri(uri,
|
||||
bint input=True,
|
||||
type newviewtype=View):
|
||||
|
||||
cdef bytes urib = tobytes(uri)
|
||||
cdef bytes scheme
|
||||
cdef tuple dms
|
||||
@ -153,7 +169,7 @@ def open_uri(uri,
|
||||
|
||||
config = getConfiguration()
|
||||
urip = urlparse(urib)
|
||||
|
||||
|
||||
if 'obi' not in config:
|
||||
config['obi']={}
|
||||
|
||||
@ -166,22 +182,26 @@ def open_uri(uri,
|
||||
create=(not input) and (not config["obi"]["nocreatedms"])
|
||||
except KeyError:
|
||||
create=not input
|
||||
|
||||
|
||||
scheme = urip.scheme
|
||||
|
||||
error = None
|
||||
|
||||
if scheme==b"" or scheme==b"dms" :
|
||||
dms = open_dms(urip.path,create)
|
||||
if scheme==b"dms" or \
|
||||
(scheme==b"" and \
|
||||
(((not input) and "outputformat" not in config["obi"]) or \
|
||||
(input and "inputformat" not in config["obi"]))): # TODO maybe not best way
|
||||
|
||||
dms = open_dms(urip.path, create)
|
||||
if dms is None and default_dms is not None:
|
||||
dms=(default_dms, urip.path)
|
||||
|
||||
if dms is not None:
|
||||
try:
|
||||
resource=open_dms_element(dms[0],dms[1],
|
||||
resource=open_dms_element(dms[0],
|
||||
dms[1],
|
||||
create,
|
||||
newviewtype
|
||||
)
|
||||
newviewtype)
|
||||
|
||||
scheme=b"dms"
|
||||
urip = ParseResultBytes(scheme=b"dms",
|
||||
@ -200,32 +220,42 @@ def open_uri(uri,
|
||||
urlunparse(urip))
|
||||
except Exception as e:
|
||||
error=e
|
||||
|
||||
|
||||
if scheme==b"dms" :
|
||||
logger('Error','cannot open DMS: %s', uri)
|
||||
raise FileNotFoundError('uri')
|
||||
|
||||
#if not urip.scheme: # TODO not sure what it was supposed to do but not working as intended
|
||||
# urib=b"file:"+urib
|
||||
|
||||
if not urip.scheme:
|
||||
urib=b"file:"+urib
|
||||
|
||||
try:
|
||||
file = uopen(urib)
|
||||
logger('info','Opened file: %s', tostr(urib))
|
||||
except Exception as e: # TODO discuss: if can't open file, return the character string itself
|
||||
file = urib
|
||||
iseq = urib
|
||||
objclass = bytes
|
||||
if input:
|
||||
try:
|
||||
file = uopen(urip.path, mode='rb')
|
||||
logger('info','Opened file: %s', urip.path)
|
||||
except Exception as e: # TODO discuss: if can't open file, return the character string itself
|
||||
file = tobytes(uri)
|
||||
iseq = urib
|
||||
objclass = bytes
|
||||
else: # TODO update uopen to be able to write?
|
||||
file = open(urip.path, 'wb')
|
||||
|
||||
if file is not None:
|
||||
qualifiers=parse_qs(urip.query)
|
||||
|
||||
if b'format' in qualifiers:
|
||||
if input and b'format' in qualifiers:
|
||||
format = qualifiers[b'format'][0]
|
||||
else:
|
||||
try:
|
||||
format=config["obi"]["format"]
|
||||
except KeyError:
|
||||
format=None
|
||||
else: # TODO discuss priorities
|
||||
if urip.scheme:
|
||||
format = urip.scheme
|
||||
else:
|
||||
try:
|
||||
if input:
|
||||
formatkey = "inputformat"
|
||||
else:
|
||||
formatkey = "outputformat"
|
||||
format=config["obi"][formatkey]
|
||||
except KeyError:
|
||||
format=None
|
||||
|
||||
if b'seqtype' in qualifiers:
|
||||
seqtype=qualifiers[b'seqtype'][0]
|
||||
@ -248,7 +278,6 @@ def open_uri(uri,
|
||||
if skip < 0:
|
||||
raise MalformedURIException('Malformed skip argument in URI')
|
||||
|
||||
|
||||
if b'only' in qualifiers:
|
||||
only=int(qualifiers[b"only"][0])
|
||||
else:
|
||||
@ -259,7 +288,6 @@ def open_uri(uri,
|
||||
if only is not None and only <= 0:
|
||||
raise MalformedURIException('Malformed only argument in URI')
|
||||
|
||||
|
||||
if b"skiperror" in qualifiers:
|
||||
try:
|
||||
skiperror=eval(qualifiers[b"skiperror"][0])
|
||||
@ -332,12 +360,27 @@ def open_uri(uri,
|
||||
dec=tobytes(config["obi"]["dec"])
|
||||
except KeyError:
|
||||
dec=b"."
|
||||
|
||||
if b"printna" in qualifiers:
|
||||
try:
|
||||
printna=eval(qualifiers[b"printna"][0])
|
||||
except Exception as e:
|
||||
raise MalformedURIException("Malformed 'print NA' argument in URI")
|
||||
else:
|
||||
try:
|
||||
printna=config["obi"]["printna"]
|
||||
except KeyError:
|
||||
printna=False
|
||||
|
||||
if b"nastring" in qualifiers:
|
||||
nastring=tobytes(qualifiers[b"nastring"][0])
|
||||
else:
|
||||
try:
|
||||
nastring=tobytes(config["obi"]["nastring"])
|
||||
if input:
|
||||
nakey = "inputnastring"
|
||||
else:
|
||||
nakey = "outputnastring"
|
||||
nastring=tobytes(config["obi"][nakey])
|
||||
except KeyError:
|
||||
nastring=b'NA'
|
||||
|
||||
@ -377,63 +420,91 @@ def open_uri(uri,
|
||||
|
||||
if format is not None:
|
||||
if seqtype==b"nuc":
|
||||
objclass = Nuc_Seq
|
||||
objclass = Nuc_Seq # Nuc_Seq_Stored? TODO
|
||||
if format==b"fasta":
|
||||
iseq = fastaNucIterator(file,
|
||||
if input:
|
||||
iseq = fastaNucIterator(file,
|
||||
skip=skip,
|
||||
only=only)
|
||||
else:
|
||||
iseq = FastaNucWriter(FastaFormat(printNAKeys=printna, NAString=nastring),
|
||||
file,
|
||||
skip=skip,
|
||||
only=only)
|
||||
elif format==b"fastq":
|
||||
if input:
|
||||
iseq = fastqIterator(file,
|
||||
skip=skip,
|
||||
only=only,
|
||||
offset=offset,
|
||||
noquality=noquality)
|
||||
else:
|
||||
iseq = FastqWriter(FastqFormat(printNAKeys=printna, NAString=nastring),
|
||||
file,
|
||||
skip=skip,
|
||||
only=only)
|
||||
elif format==b"embl":
|
||||
if input:
|
||||
iseq = emblIterator(file,
|
||||
skip=skip,
|
||||
only=only)
|
||||
elif format==b"fastq":
|
||||
iseq = fastqIterator(file,
|
||||
skip=skip,
|
||||
only=only,
|
||||
offset=offset,
|
||||
noquality=noquality)
|
||||
elif format==b"embl":
|
||||
iseq = emblIterator(file,
|
||||
skip=skip,
|
||||
only=only)
|
||||
else:
|
||||
raise NotImplementedError('Output sequence file format not implemented')
|
||||
else:
|
||||
raise NotImplementedError('Sequence file format not implemented')
|
||||
elif seqtype==b"prot":
|
||||
raise NotImplementedError()
|
||||
elif format==b"tabular":
|
||||
objclass = dict
|
||||
iseq = tabIterator(file,
|
||||
header = header,
|
||||
sep = sep,
|
||||
dec = dec,
|
||||
stripwhite = stripwhite,
|
||||
blanklineskip = blanklineskip,
|
||||
commentchar = commentchar,
|
||||
skip = skip,
|
||||
only = only)
|
||||
if input:
|
||||
iseq = tabIterator(file,
|
||||
header = header,
|
||||
sep = sep,
|
||||
dec = dec,
|
||||
stripwhite = stripwhite,
|
||||
blanklineskip = blanklineskip,
|
||||
commentchar = commentchar,
|
||||
skip = skip,
|
||||
only = only)
|
||||
else:
|
||||
raise NotImplementedError('Output sequence file format not implemented')
|
||||
elif format==b"ngsfilter":
|
||||
objclass = dict
|
||||
iseq = ngsfilterIterator(file,
|
||||
sep = sep,
|
||||
dec = dec,
|
||||
stripwhite = stripwhite,
|
||||
blanklineskip = blanklineskip,
|
||||
commentchar = commentchar,
|
||||
skip = skip,
|
||||
only = only)
|
||||
if input:
|
||||
iseq = ngsfilterIterator(file,
|
||||
sep = sep,
|
||||
dec = dec,
|
||||
stripwhite = stripwhite,
|
||||
blanklineskip = blanklineskip,
|
||||
commentchar = commentchar,
|
||||
skip = skip,
|
||||
only = only)
|
||||
else:
|
||||
raise NotImplementedError('Output sequence file format not implemented')
|
||||
else:
|
||||
iseq, objclass = entryIteratorFactory(file,
|
||||
skip, only,
|
||||
seqtype,
|
||||
offset,
|
||||
noquality,
|
||||
skiperror,
|
||||
header,
|
||||
sep,
|
||||
dec,
|
||||
nastring,
|
||||
stripwhite,
|
||||
blanklineskip,
|
||||
commentchar)
|
||||
if input:
|
||||
iseq, objclass = entryIteratorFactory(file,
|
||||
skip, only,
|
||||
seqtype,
|
||||
offset,
|
||||
noquality,
|
||||
skiperror,
|
||||
header,
|
||||
sep,
|
||||
dec,
|
||||
nastring,
|
||||
stripwhite,
|
||||
blanklineskip,
|
||||
commentchar)
|
||||
else: # default export is in fasta? or tab? TODO
|
||||
objclass = Nuc_Seq # Nuc_Seq_Stored? TODO
|
||||
iseq = FastaNucWriter(FastaFormat(printNAKeys=printna, NAString=nastring),
|
||||
file,
|
||||
skip=skip,
|
||||
only=only)
|
||||
|
||||
#tmpdms = get_temp_dms()
|
||||
|
||||
|
||||
return (file, iseq, objclass, urib)
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user