Make a first full version of the URI decoder
This commit is contained in:
@ -1,13 +1,27 @@
|
||||
from urllib.parse import urlparse
|
||||
from urllib.parse import urlparse, parse_qs, ParseResultBytes
|
||||
from os.path import isdir, isfile, basename, join
|
||||
|
||||
from obitools3.utils import tobytes
|
||||
|
||||
from obitools3.dms.dms import DMS
|
||||
from obitools3.dms.view.view import View
|
||||
from obitools3.dms.column.column import Column
|
||||
from obitools3.dms.taxo import Taxonomy
|
||||
|
||||
from obitools3.files.universalopener import uopen
|
||||
from obitools3.parsers.fasta import fastaNucIterator
|
||||
from obitools3.parsers.fastq import fastqIterator
|
||||
from obitools3.parsers.universal import entryIteratorFactory
|
||||
|
||||
from obitools3.dms.obiseq import Nuc_Seq
|
||||
|
||||
cdef class MalformedURIException(RuntimeError):
|
||||
pass
|
||||
|
||||
cdef findDMS(bytes path,create=False):
|
||||
cdef open_dms(bytes path,create=False):
|
||||
"""
|
||||
Opens a DMS from the path part of an URI
|
||||
"""
|
||||
cdef int pos=1
|
||||
cdef bytes dmspath
|
||||
cdef bytes dmsdirname
|
||||
@ -39,28 +53,279 @@ cdef findDMS(bytes path,create=False):
|
||||
pos=pos+1
|
||||
return None
|
||||
|
||||
def open_dms_element(DMS dms, bytes path):
|
||||
"""
|
||||
"""
|
||||
cdef list path_parts = path.split(b'/')
|
||||
|
||||
# The URI is only composed of a DMS
|
||||
if not path_parts:
|
||||
return (dms,dms)
|
||||
|
||||
# The URI is target a taxonomy
|
||||
# dms:dmspath/taxonomy/taxoname[/taxid]
|
||||
if path_parts[0]==b"taxonomy":
|
||||
if len(path_parts) > 1:
|
||||
taxo = Taxonomy.open(dms,path_parts[1])
|
||||
if len(path_parts) == 2:
|
||||
taxon=taxo[int(path_parts[2])]
|
||||
return (dms,taxon)
|
||||
elif (len(path_parts) > 2:
|
||||
raise MalformedURIException('Malformed Taxonomy URI')
|
||||
return (dms,taxo)
|
||||
|
||||
# The URI is target a view
|
||||
# dms:dmspath/viewname[/columnname|#line|*[/#line|columnname|*[/subcolumn]]]
|
||||
|
||||
view = View.open(dms,path_parts[0])
|
||||
|
||||
if len(path_parts) > 1:
|
||||
if path_parts[1]==b'*':
|
||||
if len(path_parts) == 2:
|
||||
return (dms,view)
|
||||
else:
|
||||
column = view[path_parts[2]]
|
||||
if len(path_parts) == 3:
|
||||
return (dms,column)
|
||||
elif len(path_parts) == 4:
|
||||
raise NotImplementedError()
|
||||
else:
|
||||
raise MalformedURIException('Malformed View * URI')
|
||||
try:
|
||||
part = int(path_parts[1])
|
||||
except ValueError:
|
||||
part = path_parts[1]
|
||||
part = view[part]
|
||||
else:
|
||||
return (dms,view)
|
||||
|
||||
if len(path_parts) > 2:
|
||||
if isinstance(part, Column):
|
||||
if path_parts[2]==b"*":
|
||||
if len(path_parts) == 4:
|
||||
raise NotImplementedError()
|
||||
elif len(path_parts) == 3:
|
||||
return (dms,part)
|
||||
else:
|
||||
raise MalformedURIException('Malformed View * URI')
|
||||
else:
|
||||
subpart = part[int(path_parts[2])]
|
||||
else:
|
||||
subpart = part[path_parts[2]]
|
||||
else:
|
||||
return (dms,part)
|
||||
|
||||
if len(path_parts) > 3:
|
||||
try:
|
||||
subsubpart = int(path_parts[3])
|
||||
except ValueError:
|
||||
subsubpart = path_parts[3]
|
||||
subsubpart = subpart[subsubpart]
|
||||
else:
|
||||
return (dms,subpart)
|
||||
|
||||
# URI with too many sub-parts
|
||||
if len(path_parts) > 4:
|
||||
raise MalformedURIException('Malformed View URI')
|
||||
|
||||
return (dms,subsubpart)
|
||||
|
||||
|
||||
|
||||
cpdef openURI(uri,defaultDMS=None,input=True):
|
||||
def open_uri(uri,input=True,config={}):
|
||||
cdef bytes urib = tobytes(uri)
|
||||
cdef bytes scheme
|
||||
cdef tuple dms
|
||||
|
||||
urip = urlparse(urib)
|
||||
|
||||
default_dms=config["obi"]["defaultdms"]
|
||||
|
||||
scheme = urip.scheme
|
||||
|
||||
error = None
|
||||
|
||||
if scheme==b"" :
|
||||
if defaultDMS is not None:
|
||||
|
||||
scheme=b'file'
|
||||
dms = findDMS(urip.path)
|
||||
dms = open_dms(urip.path)
|
||||
if dms is None and default_dms is not None:
|
||||
dms=(default_dms,urip.path))
|
||||
|
||||
|
||||
if dms is not None:
|
||||
try:
|
||||
resource=open_dms_element(*dms)
|
||||
scheme=b"dms"
|
||||
urip = ParseResultBytes(scheme=b"dms",
|
||||
netloc=urip.netloc,
|
||||
path=urip.path,
|
||||
params=urip.params,
|
||||
query=urip.query,
|
||||
fragment=urip.fragment)
|
||||
|
||||
if default_dms is None:
|
||||
config["obi"]["defaultdms"]=resource[0]
|
||||
|
||||
return (resource[0],resource[1],urlunparse(urip))
|
||||
except Exception as e:
|
||||
error=e
|
||||
|
||||
urip = ParseResultBytes(scheme=scheme,
|
||||
netloc=urip.netloc,
|
||||
path=urip.path,
|
||||
params=urip.params,
|
||||
query=urip.query,
|
||||
fragment=urip.fragment)
|
||||
uri=urlunparse(urip)
|
||||
|
||||
try:
|
||||
file = uopen(uri)
|
||||
except Exception as e:
|
||||
file = None
|
||||
error=e
|
||||
|
||||
if file is not None:
|
||||
qualifiers=parse_qs(urip.query)
|
||||
|
||||
|
||||
if b'format' in qualifiers:
|
||||
format = qualifiers[b'format'][0]
|
||||
else:
|
||||
format=config["obi"]["fileformat"]
|
||||
|
||||
if b'seqtype' in qualifiers:
|
||||
seqtype=qualifiers[b'seqtype'][0]
|
||||
else:
|
||||
seqtype=config["obi"]["seqtype"]
|
||||
|
||||
if b'skip' in qualifiers:
|
||||
skip=int(qualifiers[b"skip"][0]
|
||||
else:
|
||||
skip=config["obi"]["skeep"]
|
||||
if skip < 0:
|
||||
raise MalformedURIException('Malformed skip argument in URI')
|
||||
|
||||
|
||||
if b'only' in qualifiers:
|
||||
only=int(qualifiers[b"only"][0])
|
||||
else:
|
||||
only=config["obi"]["only"]
|
||||
if only <= 0:
|
||||
raise MalformedURIException('Malformed only argument in URI')
|
||||
|
||||
|
||||
if b"skiperror" in qualifiers:
|
||||
try:
|
||||
skiperror=eval(qualifiers[b"skiperror"][0])
|
||||
except Exception as e:
|
||||
raise MalformedURIException('Malformed skiperror argument in URI')
|
||||
else:
|
||||
skiperror=config["obi"]["skiperror"]
|
||||
if not isinstance(skiperror, bool):
|
||||
raise MalformedURIException('Malformed skiperror argument in URI')
|
||||
|
||||
if b"noquality" in qualifiers:
|
||||
try:
|
||||
noquality=eval(qualifiers[b"noquality"][0])
|
||||
except Exception as e:
|
||||
raise MalformedURIException('Malformed noquality argument in URI')
|
||||
else:
|
||||
noquality=config["obi"]["noquality"]
|
||||
if not isinstance(noquality, bool):
|
||||
raise MalformedURIException('Malformed noquality argument in URI')
|
||||
|
||||
if b"qualityformat" in qualifiers:
|
||||
if qualifiers[b"qualityformat"][0]=="sanger":
|
||||
offset=33
|
||||
elif qualifiers[b"qualityformat"][0]=="solexa":
|
||||
offset=64
|
||||
else:
|
||||
offset=config["obi"]["qualityoffset"]
|
||||
|
||||
if b"header" in qualifiers:
|
||||
try:
|
||||
header=eval(qualifiers[b"header"][0])
|
||||
except Exception as e:
|
||||
raise MalformedURIException('Malformed header argument in URI')
|
||||
else:
|
||||
header=config["obi"]["header"]
|
||||
if not isinstance(header, bool):
|
||||
raise MalformedURIException('Malformed header argument in URI')
|
||||
|
||||
if b"sep" in qualifiers:
|
||||
sep=qualifiers[b"sep"][0][0]
|
||||
else:
|
||||
seq=config["obi"]["sep"]
|
||||
|
||||
# if b"quote" in qualifiers:
|
||||
# pass
|
||||
|
||||
if b"dec" in qualifiers:
|
||||
dec=qualifiers[b"dec"][0][0]
|
||||
else:
|
||||
dec=config["obi"]["dec"]
|
||||
|
||||
if b"nastring" in qualifiers:
|
||||
nastring=qualifiers[b"nastring"][0]
|
||||
else:
|
||||
nastring=config["obi"]["nastring"]
|
||||
|
||||
if b"stripwhite" in qualifiers:
|
||||
try:
|
||||
stripwhite=eval(qualifiers[b"stripwhite"][0])
|
||||
except Exception as e:
|
||||
raise MalformedURIException('Malformed stripwhite argument in URI')
|
||||
else:
|
||||
stripwhite=config["obi"]["stripwhite"]
|
||||
if not isinstance(stripwhite, bool):
|
||||
raise MalformedURIException('Malformed stripwhite argument in URI')
|
||||
|
||||
if b"blanklineskip" in qualifiers:
|
||||
try:
|
||||
blanklineskip=eval(qualifiers[b"blanklineskip"][0])
|
||||
except Exception as e:
|
||||
raise MalformedURIException('Malformed blanklineskip argument in URI')
|
||||
else:
|
||||
blanklineskip=config["obi"]["blanklineskip"]
|
||||
if not isinstance(blanklineskip, bool):
|
||||
raise MalformedURIException('Malformed blanklineskip argument in URI')
|
||||
|
||||
if b"commentchar" in qualifiers:
|
||||
nastring=qualifiers[b"commentchar"][0][0]
|
||||
else:
|
||||
nastring=config["obi"]["commentchar"]
|
||||
|
||||
if format is not None:
|
||||
if qualifiers[b"seqtype"]==b"nuc":
|
||||
objclass = Nuc_Seq
|
||||
if format="fasta":
|
||||
iseq = fastaNucIterator(file,skip,only)
|
||||
elif format="fastq":
|
||||
iseq = fastqIterator(file,
|
||||
skip,only,
|
||||
offset,
|
||||
noquality)
|
||||
else:
|
||||
raise NotImplementedError('Sequence file format not implemented')
|
||||
elif qualifiers[b"seqtype"]==b"prot":
|
||||
raise NotImplementedError()
|
||||
else:
|
||||
iseq,objclass = entryIterator(file,
|
||||
skip,only,
|
||||
seqtype,
|
||||
offset,
|
||||
noquality,
|
||||
skiperror,
|
||||
header,
|
||||
sep,
|
||||
dec,
|
||||
nastring,
|
||||
stripwhite,
|
||||
blanklineskip,
|
||||
commentchar)
|
||||
|
||||
return (file,iseq,objclass)
|
||||
|
||||
|
||||
if scheme==b"dms" :
|
||||
|
||||
elif scheme==b"file" :
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user