From 949e5f9baff64ce64e1a1808122f05392715493f Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Thu, 27 Jul 2017 16:04:31 +0200 Subject: [PATCH] Make a first full version of the URI decoder --- python/obitools3/uri/decode.pyx | 289 ++++++++++++++++++++++++++++++-- 1 file changed, 277 insertions(+), 12 deletions(-) diff --git a/python/obitools3/uri/decode.pyx b/python/obitools3/uri/decode.pyx index 8c88270..2ff1a1d 100644 --- a/python/obitools3/uri/decode.pyx +++ b/python/obitools3/uri/decode.pyx @@ -1,13 +1,27 @@ -from urllib.parse import urlparse +from urllib.parse import urlparse, parse_qs, ParseResultBytes from os.path import isdir, isfile, basename, join from obitools3.utils import tobytes + from obitools3.dms.dms import DMS +from obitools3.dms.view.view import View +from obitools3.dms.column.column import Column +from obitools3.dms.taxo import Taxonomy +from obitools3.files.universalopener import uopen +from obitools3.parsers.fasta import fastaNucIterator +from obitools3.parsers.fastq import fastqIterator +from obitools3.parsers.universal import entryIteratorFactory +from obitools3.dms.obiseq import Nuc_Seq +cdef class MalformedURIException(RuntimeError): + pass -cdef findDMS(bytes path,create=False): +cdef open_dms(bytes path,create=False): + """ + Opens a DMS from the path part of an URI + """ cdef int pos=1 cdef bytes dmspath cdef bytes dmsdirname @@ -39,28 +53,279 @@ cdef findDMS(bytes path,create=False): pos=pos+1 return None +def open_dms_element(DMS dms, bytes path): + """ + """ + cdef list path_parts = path.split(b'/') + + # The URI is only composed of a DMS + if not path_parts: + return (dms,dms) + + # The URI is target a taxonomy + # dms:dmspath/taxonomy/taxoname[/taxid] + if path_parts[0]==b"taxonomy": + if len(path_parts) > 1: + taxo = Taxonomy.open(dms,path_parts[1]) + if len(path_parts) == 2: + taxon=taxo[int(path_parts[2])] + return (dms,taxon) + elif (len(path_parts) > 2: + raise MalformedURIException('Malformed Taxonomy URI') + return (dms,taxo) + # The URI is target a view + # dms:dmspath/viewname[/columnname|#line|*[/#line|columnname|*[/subcolumn]]] + + view = View.open(dms,path_parts[0]) + + if len(path_parts) > 1: + if path_parts[1]==b'*': + if len(path_parts) == 2: + return (dms,view) + else: + column = view[path_parts[2]] + if len(path_parts) == 3: + return (dms,column) + elif len(path_parts) == 4: + raise NotImplementedError() + else: + raise MalformedURIException('Malformed View * URI') + try: + part = int(path_parts[1]) + except ValueError: + part = path_parts[1] + part = view[part] + else: + return (dms,view) + if len(path_parts) > 2: + if isinstance(part, Column): + if path_parts[2]==b"*": + if len(path_parts) == 4: + raise NotImplementedError() + elif len(path_parts) == 3: + return (dms,part) + else: + raise MalformedURIException('Malformed View * URI') + else: + subpart = part[int(path_parts[2])] + else: + subpart = part[path_parts[2]] + else: + return (dms,part) + + if len(path_parts) > 3: + try: + subsubpart = int(path_parts[3]) + except ValueError: + subsubpart = path_parts[3] + subsubpart = subpart[subsubpart] + else: + return (dms,subpart) + + # URI with too many sub-parts + if len(path_parts) > 4: + raise MalformedURIException('Malformed View URI') + + return (dms,subsubpart) + -cpdef openURI(uri,defaultDMS=None,input=True): +def open_uri(uri,input=True,config={}): cdef bytes urib = tobytes(uri) cdef bytes scheme + cdef tuple dms urip = urlparse(urib) + default_dms=config["obi"]["defaultdms"] + scheme = urip.scheme + + error = None if scheme==b"" : - if defaultDMS is not None: - scheme=b'file' - dms = findDMS(urip.path) - if dms is not None: - scheme=b"dms" - - - if scheme==b"dms" : + dms = open_dms(urip.path) + if dms is None and default_dms is not None: + dms=(default_dms,urip.path)) + - elif scheme==b"file" : + if dms is not None: + try: + resource=open_dms_element(*dms) + scheme=b"dms" + urip = ParseResultBytes(scheme=b"dms", + netloc=urip.netloc, + path=urip.path, + params=urip.params, + query=urip.query, + fragment=urip.fragment) + + if default_dms is None: + config["obi"]["defaultdms"]=resource[0] + + return (resource[0],resource[1],urlunparse(urip)) + except Exception as e: + error=e + + urip = ParseResultBytes(scheme=scheme, + netloc=urip.netloc, + path=urip.path, + params=urip.params, + query=urip.query, + fragment=urip.fragment) + uri=urlunparse(urip) + + try: + file = uopen(uri) + except Exception as e: + file = None + error=e + + if file is not None: + qualifiers=parse_qs(urip.query) + + + if b'format' in qualifiers: + format = qualifiers[b'format'][0] + else: + format=config["obi"]["fileformat"] + + if b'seqtype' in qualifiers: + seqtype=qualifiers[b'seqtype'][0] + else: + seqtype=config["obi"]["seqtype"] + + if b'skip' in qualifiers: + skip=int(qualifiers[b"skip"][0] + else: + skip=config["obi"]["skeep"] + if skip < 0: + raise MalformedURIException('Malformed skip argument in URI') + + + if b'only' in qualifiers: + only=int(qualifiers[b"only"][0]) + else: + only=config["obi"]["only"] + if only <= 0: + raise MalformedURIException('Malformed only argument in URI') + + + if b"skiperror" in qualifiers: + try: + skiperror=eval(qualifiers[b"skiperror"][0]) + except Exception as e: + raise MalformedURIException('Malformed skiperror argument in URI') + else: + skiperror=config["obi"]["skiperror"] + if not isinstance(skiperror, bool): + raise MalformedURIException('Malformed skiperror argument in URI') + + if b"noquality" in qualifiers: + try: + noquality=eval(qualifiers[b"noquality"][0]) + except Exception as e: + raise MalformedURIException('Malformed noquality argument in URI') + else: + noquality=config["obi"]["noquality"] + if not isinstance(noquality, bool): + raise MalformedURIException('Malformed noquality argument in URI') + + if b"qualityformat" in qualifiers: + if qualifiers[b"qualityformat"][0]=="sanger": + offset=33 + elif qualifiers[b"qualityformat"][0]=="solexa": + offset=64 + else: + offset=config["obi"]["qualityoffset"] + + if b"header" in qualifiers: + try: + header=eval(qualifiers[b"header"][0]) + except Exception as e: + raise MalformedURIException('Malformed header argument in URI') + else: + header=config["obi"]["header"] + if not isinstance(header, bool): + raise MalformedURIException('Malformed header argument in URI') + + if b"sep" in qualifiers: + sep=qualifiers[b"sep"][0][0] + else: + seq=config["obi"]["sep"] + +# if b"quote" in qualifiers: +# pass + + if b"dec" in qualifiers: + dec=qualifiers[b"dec"][0][0] + else: + dec=config["obi"]["dec"] + + if b"nastring" in qualifiers: + nastring=qualifiers[b"nastring"][0] + else: + nastring=config["obi"]["nastring"] + + if b"stripwhite" in qualifiers: + try: + stripwhite=eval(qualifiers[b"stripwhite"][0]) + except Exception as e: + raise MalformedURIException('Malformed stripwhite argument in URI') + else: + stripwhite=config["obi"]["stripwhite"] + if not isinstance(stripwhite, bool): + raise MalformedURIException('Malformed stripwhite argument in URI') + + if b"blanklineskip" in qualifiers: + try: + blanklineskip=eval(qualifiers[b"blanklineskip"][0]) + except Exception as e: + raise MalformedURIException('Malformed blanklineskip argument in URI') + else: + blanklineskip=config["obi"]["blanklineskip"] + if not isinstance(blanklineskip, bool): + raise MalformedURIException('Malformed blanklineskip argument in URI') + + if b"commentchar" in qualifiers: + nastring=qualifiers[b"commentchar"][0][0] + else: + nastring=config["obi"]["commentchar"] + + if format is not None: + if qualifiers[b"seqtype"]==b"nuc": + objclass = Nuc_Seq + if format="fasta": + iseq = fastaNucIterator(file,skip,only) + elif format="fastq": + iseq = fastqIterator(file, + skip,only, + offset, + noquality) + else: + raise NotImplementedError('Sequence file format not implemented') + elif qualifiers[b"seqtype"]==b"prot": + raise NotImplementedError() + else: + iseq,objclass = entryIterator(file, + skip,only, + seqtype, + offset, + noquality, + skiperror, + header, + sep, + dec, + nastring, + stripwhite, + blanklineskip, + commentchar) + + return (file,iseq,objclass) + + +