Patch decoding of URL

This commit is contained in:
2017-07-28 12:41:28 +02:00
parent 84bb93096f
commit b9c65a871f
9 changed files with 276 additions and 260 deletions

View File

@ -4,3 +4,7 @@ from obitools3.dms.dms cimport DMS
from obitools3.dms.view.view cimport View
from obitools3.dms.column.column cimport Column
from obitools3.dms.taxo.taxo cimport Taxonomy
from obitools3.utils cimport tobytes, tostr
from obitools3.files.universalopener cimport uopen

View File

@ -3,16 +3,14 @@
from urllib.parse import urlparse, urlunparse, parse_qs, ParseResultBytes
from os.path import isdir, isfile, basename, join
from obitools3.utils import tobytes
from obitools3.dms.dms import DMS
from obitools3.files.universalopener import uopen
from obitools3.parsers.fasta import fastaNucIterator
from obitools3.parsers.fastq import fastqIterator
from obitools3.parsers.universal import entryIteratorFactory
from obitools3.dms.obiseq import Nuc_Seq
from obitools3.apps.config import getConfiguration,logger
class MalformedURIException(RuntimeError):
pass
@ -130,21 +128,29 @@ def open_dms_element(DMS dms, bytes path):
return (dms,subsubpart)
def open_uri(uri,input=True,config={}):
def open_uri(uri,bint input=True):
cdef bytes urib = tobytes(uri)
cdef bytes scheme
cdef tuple dms
cdef dict qualifiers
cdef DMS default_dms
config = getConfiguration()
urip = urlparse(urib)
if 'obi' not in config:
config['obi']={}
default_dms=config["obi"]["defaultdms"]
try:
default_dms=config["obi"]["defaultdms"]
except KeyError:
default_dms=None
scheme = urip.scheme
error = None
if scheme==b"" :
scheme=b'file'
dms = open_dms(urip.path)
if dms is None and default_dms is not None:
dms=(default_dms,urip.path)
@ -167,17 +173,13 @@ def open_uri(uri,input=True,config={}):
return (resource[0],resource[1],urlunparse(urip))
except Exception as e:
error=e
urip = ParseResultBytes(scheme=scheme,
netloc=urip.netloc,
path=urip.path,
params=urip.params,
query=urip.query,
fragment=urip.fragment)
uri=urlunparse(urip)
if not urip.scheme:
urib=b"file:"+urib
try:
file = uopen(uri)
logger('info','Trying to open file : %s', tostr(urib))
file = uopen(tostr(urib))
except Exception as e:
file = None
error=e
@ -189,17 +191,26 @@ def open_uri(uri,input=True,config={}):
if b'format' in qualifiers:
format = qualifiers[b'format'][0]
else:
format=config["obi"]["fileformat"]
try:
format=config["obi"]["fileformat"]
except KeyError:
format=None
if b'seqtype' in qualifiers:
seqtype=qualifiers[b'seqtype'][0]
else:
seqtype=config["obi"]["seqtype"]
try:
seqtype=config["obi"]["seqtype"]
except KeyError:
seqtype=b'nuc'
if b'skip' in qualifiers:
skip=int(qualifiers[b"skip"][0])
else:
skip=config["obi"]["skeep"]
try:
skip=config["obi"]["skip"]
except KeyError:
skip=0
if skip < 0:
raise MalformedURIException('Malformed skip argument in URI')
@ -207,8 +218,11 @@ def open_uri(uri,input=True,config={}):
if b'only' in qualifiers:
only=int(qualifiers[b"only"][0])
else:
only=config["obi"]["only"]
if only <= 0:
try:
only=config["obi"]["only"]
except KeyError:
only=None
if only is not None and only <= 0:
raise MalformedURIException('Malformed only argument in URI')
@ -218,7 +232,10 @@ def open_uri(uri,input=True,config={}):
except Exception as e:
raise MalformedURIException('Malformed skiperror argument in URI')
else:
skiperror=config["obi"]["skiperror"]
try:
skiperror=config["obi"]["skiperror"]
except KeyError:
skiperror=True
if not isinstance(skiperror, bool):
raise MalformedURIException('Malformed skiperror argument in URI')
@ -228,7 +245,10 @@ def open_uri(uri,input=True,config={}):
except Exception as e:
raise MalformedURIException('Malformed noquality argument in URI')
else:
noquality=config["obi"]["noquality"]
try:
noquality=config["obi"]["noquality"]
except KeyError:
noquality=False
if not isinstance(noquality, bool):
raise MalformedURIException('Malformed noquality argument in URI')
@ -238,7 +258,10 @@ def open_uri(uri,input=True,config={}):
elif qualifiers[b"qualityformat"][0]=="solexa":
offset=64
else:
offset=config["obi"]["qualityoffset"]
try:
offset=config["obi"]["qualityoffset"]
except KeyError:
offset=33
if b"header" in qualifiers:
try:
@ -246,14 +269,20 @@ def open_uri(uri,input=True,config={}):
except Exception as e:
raise MalformedURIException('Malformed header argument in URI')
else:
header=config["obi"]["header"]
try:
header=config["obi"]["header"]
except KeyError:
header=False
if not isinstance(header, bool):
raise MalformedURIException('Malformed header argument in URI')
if b"sep" in qualifiers:
sep=qualifiers[b"sep"][0][0]
else:
seq=config["obi"]["sep"]
try:
sep=config["obi"]["sep"]
except KeyError:
sep=None
# if b"quote" in qualifiers:
# pass
@ -261,20 +290,29 @@ def open_uri(uri,input=True,config={}):
if b"dec" in qualifiers:
dec=qualifiers[b"dec"][0][0]
else:
dec=config["obi"]["dec"]
try:
dec=config["obi"]["dec"]
except KeyError:
dec=b"."
if b"nastring" in qualifiers:
nastring=qualifiers[b"nastring"][0]
else:
nastring=config["obi"]["nastring"]
try:
nastring=config["obi"]["nastring"]
except KeyError:
nastring=b'NA'
if b"stripwhite" in qualifiers:
try:
stripwhite=eval(qualifiers[b"stripwhite"][0])
except Exception as e:
raise MalformedURIException('Malformed stripwhite argument in URI')
else:
stripwhite=config["obi"]["stripwhite"]
try:
stripwhite=config["obi"]["stripwhite"]
except KeyError:
stripwhite=True
if not isinstance(stripwhite, bool):
raise MalformedURIException('Malformed stripwhite argument in URI')
@ -284,14 +322,20 @@ def open_uri(uri,input=True,config={}):
except Exception as e:
raise MalformedURIException('Malformed blanklineskip argument in URI')
else:
blanklineskip=config["obi"]["blanklineskip"]
try:
blanklineskip=config["obi"]["blanklineskip"]
except KeyError:
blanklineskip=True
if not isinstance(blanklineskip, bool):
raise MalformedURIException('Malformed blanklineskip argument in URI')
if b"commentchar" in qualifiers:
commentchar=qualifiers[b"commentchar"][0][0]
else:
commentchar=config["obi"]["commentchar"]
try:
commentchar=config["obi"]["commentchar"]
except KeyError:
commentchar=b'#'
if format is not None:
if qualifiers[b"seqtype"]==b"nuc":