Make a first full version of the URI decoder

This commit is contained in:
2017-07-27 16:04:31 +02:00
parent 3c6a05be54
commit 949e5f9baf

View File

@ -1,13 +1,27 @@
from urllib.parse import urlparse
from urllib.parse import urlparse, parse_qs, ParseResultBytes
from os.path import isdir, isfile, basename, join
from obitools3.utils import tobytes
from obitools3.dms.dms import DMS
from obitools3.dms.view.view import View
from obitools3.dms.column.column import Column
from obitools3.dms.taxo import Taxonomy
from obitools3.files.universalopener import uopen
from obitools3.parsers.fasta import fastaNucIterator
from obitools3.parsers.fastq import fastqIterator
from obitools3.parsers.universal import entryIteratorFactory
from obitools3.dms.obiseq import Nuc_Seq
cdef class MalformedURIException(RuntimeError):
pass
cdef findDMS(bytes path,create=False):
cdef open_dms(bytes path,create=False):
"""
Opens a DMS from the path part of an URI
"""
cdef int pos=1
cdef bytes dmspath
cdef bytes dmsdirname
@ -39,28 +53,279 @@ cdef findDMS(bytes path,create=False):
pos=pos+1
return None
def open_dms_element(DMS dms, bytes path):
"""
"""
cdef list path_parts = path.split(b'/')
# The URI is only composed of a DMS
if not path_parts:
return (dms,dms)
# The URI is target a taxonomy
# dms:dmspath/taxonomy/taxoname[/taxid]
if path_parts[0]==b"taxonomy":
if len(path_parts) > 1:
taxo = Taxonomy.open(dms,path_parts[1])
if len(path_parts) == 2:
taxon=taxo[int(path_parts[2])]
return (dms,taxon)
elif (len(path_parts) > 2:
raise MalformedURIException('Malformed Taxonomy URI')
return (dms,taxo)
# The URI is target a view
# dms:dmspath/viewname[/columnname|#line|*[/#line|columnname|*[/subcolumn]]]
view = View.open(dms,path_parts[0])
if len(path_parts) > 1:
if path_parts[1]==b'*':
if len(path_parts) == 2:
return (dms,view)
else:
column = view[path_parts[2]]
if len(path_parts) == 3:
return (dms,column)
elif len(path_parts) == 4:
raise NotImplementedError()
else:
raise MalformedURIException('Malformed View * URI')
try:
part = int(path_parts[1])
except ValueError:
part = path_parts[1]
part = view[part]
else:
return (dms,view)
if len(path_parts) > 2:
if isinstance(part, Column):
if path_parts[2]==b"*":
if len(path_parts) == 4:
raise NotImplementedError()
elif len(path_parts) == 3:
return (dms,part)
else:
raise MalformedURIException('Malformed View * URI')
else:
subpart = part[int(path_parts[2])]
else:
subpart = part[path_parts[2]]
else:
return (dms,part)
if len(path_parts) > 3:
try:
subsubpart = int(path_parts[3])
except ValueError:
subsubpart = path_parts[3]
subsubpart = subpart[subsubpart]
else:
return (dms,subpart)
# URI with too many sub-parts
if len(path_parts) > 4:
raise MalformedURIException('Malformed View URI')
return (dms,subsubpart)
cpdef openURI(uri,defaultDMS=None,input=True):
def open_uri(uri,input=True,config={}):
cdef bytes urib = tobytes(uri)
cdef bytes scheme
cdef tuple dms
urip = urlparse(urib)
default_dms=config["obi"]["defaultdms"]
scheme = urip.scheme
error = None
if scheme==b"" :
if defaultDMS is not None:
scheme=b'file'
dms = findDMS(urip.path)
dms = open_dms(urip.path)
if dms is None and default_dms is not None:
dms=(default_dms,urip.path))
if dms is not None:
scheme=b"dms"
try:
resource=open_dms_element(*dms)
scheme=b"dms"
urip = ParseResultBytes(scheme=b"dms",
netloc=urip.netloc,
path=urip.path,
params=urip.params,
query=urip.query,
fragment=urip.fragment)
if default_dms is None:
config["obi"]["defaultdms"]=resource[0]
return (resource[0],resource[1],urlunparse(urip))
except Exception as e:
error=e
urip = ParseResultBytes(scheme=scheme,
netloc=urip.netloc,
path=urip.path,
params=urip.params,
query=urip.query,
fragment=urip.fragment)
uri=urlunparse(urip)
try:
file = uopen(uri)
except Exception as e:
file = None
error=e
if file is not None:
qualifiers=parse_qs(urip.query)
if b'format' in qualifiers:
format = qualifiers[b'format'][0]
else:
format=config["obi"]["fileformat"]
if b'seqtype' in qualifiers:
seqtype=qualifiers[b'seqtype'][0]
else:
seqtype=config["obi"]["seqtype"]
if b'skip' in qualifiers:
skip=int(qualifiers[b"skip"][0]
else:
skip=config["obi"]["skeep"]
if skip < 0:
raise MalformedURIException('Malformed skip argument in URI')
if b'only' in qualifiers:
only=int(qualifiers[b"only"][0])
else:
only=config["obi"]["only"]
if only <= 0:
raise MalformedURIException('Malformed only argument in URI')
if b"skiperror" in qualifiers:
try:
skiperror=eval(qualifiers[b"skiperror"][0])
except Exception as e:
raise MalformedURIException('Malformed skiperror argument in URI')
else:
skiperror=config["obi"]["skiperror"]
if not isinstance(skiperror, bool):
raise MalformedURIException('Malformed skiperror argument in URI')
if b"noquality" in qualifiers:
try:
noquality=eval(qualifiers[b"noquality"][0])
except Exception as e:
raise MalformedURIException('Malformed noquality argument in URI')
else:
noquality=config["obi"]["noquality"]
if not isinstance(noquality, bool):
raise MalformedURIException('Malformed noquality argument in URI')
if b"qualityformat" in qualifiers:
if qualifiers[b"qualityformat"][0]=="sanger":
offset=33
elif qualifiers[b"qualityformat"][0]=="solexa":
offset=64
else:
offset=config["obi"]["qualityoffset"]
if b"header" in qualifiers:
try:
header=eval(qualifiers[b"header"][0])
except Exception as e:
raise MalformedURIException('Malformed header argument in URI')
else:
header=config["obi"]["header"]
if not isinstance(header, bool):
raise MalformedURIException('Malformed header argument in URI')
if b"sep" in qualifiers:
sep=qualifiers[b"sep"][0][0]
else:
seq=config["obi"]["sep"]
# if b"quote" in qualifiers:
# pass
if b"dec" in qualifiers:
dec=qualifiers[b"dec"][0][0]
else:
dec=config["obi"]["dec"]
if b"nastring" in qualifiers:
nastring=qualifiers[b"nastring"][0]
else:
nastring=config["obi"]["nastring"]
if b"stripwhite" in qualifiers:
try:
stripwhite=eval(qualifiers[b"stripwhite"][0])
except Exception as e:
raise MalformedURIException('Malformed stripwhite argument in URI')
else:
stripwhite=config["obi"]["stripwhite"]
if not isinstance(stripwhite, bool):
raise MalformedURIException('Malformed stripwhite argument in URI')
if b"blanklineskip" in qualifiers:
try:
blanklineskip=eval(qualifiers[b"blanklineskip"][0])
except Exception as e:
raise MalformedURIException('Malformed blanklineskip argument in URI')
else:
blanklineskip=config["obi"]["blanklineskip"]
if not isinstance(blanklineskip, bool):
raise MalformedURIException('Malformed blanklineskip argument in URI')
if b"commentchar" in qualifiers:
nastring=qualifiers[b"commentchar"][0][0]
else:
nastring=config["obi"]["commentchar"]
if format is not None:
if qualifiers[b"seqtype"]==b"nuc":
objclass = Nuc_Seq
if format="fasta":
iseq = fastaNucIterator(file,skip,only)
elif format="fastq":
iseq = fastqIterator(file,
skip,only,
offset,
noquality)
else:
raise NotImplementedError('Sequence file format not implemented')
elif qualifiers[b"seqtype"]==b"prot":
raise NotImplementedError()
else:
iseq,objclass = entryIterator(file,
skip,only,
seqtype,
offset,
noquality,
skiperror,
header,
sep,
dec,
nastring,
stripwhite,
blanklineskip,
commentchar)
return (file,iseq,objclass)
if scheme==b"dms" :
elif scheme==b"file" :