407 lines
13 KiB
Cython
407 lines
13 KiB
Cython
#cython: language_level=3
|
|
|
|
from urllib.parse import urlparse, urlunparse, parse_qs, ParseResultBytes
|
|
from os.path import isdir, isfile, basename, join
|
|
|
|
from obitools3.dms.dms import DMS
|
|
|
|
from obitools3.parsers.fasta import fastaNucIterator
|
|
from obitools3.parsers.fastq import fastqIterator
|
|
from obitools3.parsers.universal import entryIteratorFactory
|
|
|
|
from obitools3.dms.obiseq import Nuc_Seq
|
|
from obitools3.apps.config import getConfiguration,logger
|
|
from obitools3.apps.temp import get_temp_dms
|
|
|
|
class MalformedURIException(RuntimeError):
|
|
pass
|
|
|
|
cdef open_dms(bytes path, bint create=False):
|
|
"""
|
|
Opens a DMS from the path part of an URI
|
|
"""
|
|
cdef int pos=1
|
|
cdef bytes dmspath
|
|
cdef bytes dmsdirname
|
|
cdef bytes dmsname
|
|
|
|
while(pos>0):
|
|
pos = path.find(b"/",pos)
|
|
if pos>0:
|
|
dmspath=path[0:pos]
|
|
else:
|
|
dmspath=path
|
|
if not isdir(dmspath):
|
|
dmsdirname=dmspath+b".obidms"
|
|
if isdir(dmsdirname):
|
|
dmsname=basename(dmspath)
|
|
if isfile(join(dmsdirname,dmsname+b"_infos")):
|
|
dms = DMS.open(dmspath)
|
|
if pos > 0:
|
|
return(dms,path[pos+1:])
|
|
else:
|
|
return(dms,b'')
|
|
elif create:
|
|
dms=DMS.new(dmspath)
|
|
if pos > 0:
|
|
return(dms,path[pos+1:])
|
|
else:
|
|
return(dms,b'')
|
|
|
|
pos=pos+1
|
|
return None
|
|
|
|
def open_dms_element(DMS dms, bytes path,
|
|
bint create=False,
|
|
type newviewtype=View):
|
|
"""
|
|
"""
|
|
cdef list path_parts = path.split(b'/')
|
|
|
|
# The URI is only composed of a DMS
|
|
if not path:
|
|
return (dms,dms)
|
|
|
|
# The URI targets a taxonomy
|
|
# dms:dmspath/taxonomy/taxoname[/taxid]
|
|
if path_parts[0]==b"taxonomy":
|
|
if len(path_parts) > 1:
|
|
taxo = Taxonomy.open(dms,path_parts[1])
|
|
if len(path_parts) == 3:
|
|
taxon=taxo[int(path_parts[2])]
|
|
return (dms,taxon)
|
|
elif len(path_parts) > 3:
|
|
raise MalformedURIException('Malformed Taxonomy URI')
|
|
return (dms,taxo)
|
|
|
|
# The URI targets a view
|
|
# dms:dmspath/viewname[/columnname|#line|*[/#line|columnname|*[/subcolumn]]]
|
|
|
|
if create:
|
|
view = newviewtype.new(dms,path_parts[0])
|
|
else:
|
|
view = newviewtype.open(dms,path_parts[0])
|
|
|
|
if len(path_parts) > 1:
|
|
if path_parts[1]==b'*':
|
|
if len(path_parts) == 2:
|
|
return (dms,view)
|
|
else:
|
|
column = view[path_parts[2]]
|
|
if len(path_parts) == 3:
|
|
return (dms,column)
|
|
elif len(path_parts) == 4:
|
|
raise NotImplementedError()
|
|
else:
|
|
raise MalformedURIException('Malformed View * URI')
|
|
try:
|
|
part = int(path_parts[1])
|
|
except ValueError:
|
|
part = path_parts[1]
|
|
part = view[part]
|
|
else:
|
|
return (dms,view)
|
|
|
|
if len(path_parts) > 2:
|
|
if isinstance(part, Column):
|
|
if path_parts[2]==b"*":
|
|
if len(path_parts) == 4:
|
|
raise NotImplementedError()
|
|
elif len(path_parts) == 3:
|
|
return (dms,part)
|
|
else:
|
|
raise MalformedURIException('Malformed View * URI')
|
|
else:
|
|
subpart = part[int(path_parts[2])]
|
|
else:
|
|
subpart = part[path_parts[2]]
|
|
else:
|
|
return (dms,part)
|
|
|
|
if len(path_parts) > 3:
|
|
try:
|
|
subsubpart = int(path_parts[3])
|
|
except ValueError:
|
|
subsubpart = path_parts[3]
|
|
subsubpart = subpart[subsubpart]
|
|
else:
|
|
return (dms,subpart)
|
|
|
|
# URI with too many sub-parts
|
|
if len(path_parts) > 4:
|
|
raise MalformedURIException('Malformed View URI')
|
|
|
|
return (dms,subsubpart)
|
|
|
|
|
|
def open_uri(uri,
|
|
bint input=True,
|
|
type newviewtype=View):
|
|
cdef bytes urib = tobytes(uri)
|
|
cdef bytes scheme
|
|
cdef tuple dms
|
|
cdef dict qualifiers
|
|
cdef DMS default_dms
|
|
|
|
config = getConfiguration()
|
|
urip = urlparse(urib)
|
|
|
|
if 'obi' not in config:
|
|
config['obi']={}
|
|
|
|
try:
|
|
default_dms=config["obi"]["defaultdms"]
|
|
except KeyError:
|
|
default_dms=None
|
|
|
|
try:
|
|
create=(not input) and (not config["obi"]["nocreatedms"])
|
|
except KeyError:
|
|
create=not input
|
|
|
|
scheme = urip.scheme
|
|
|
|
error = None
|
|
|
|
if scheme==b"" or scheme==b"dms" :
|
|
dms = open_dms(urip.path,create)
|
|
if dms is None and default_dms is not None:
|
|
dms=(default_dms, urip.path)
|
|
|
|
if dms is not None:
|
|
try:
|
|
resource=open_dms_element(dms[0],dms[1],
|
|
create,
|
|
newviewtype
|
|
)
|
|
|
|
scheme=b"dms"
|
|
urip = ParseResultBytes(scheme=b"dms",
|
|
netloc=urip.netloc,
|
|
path=urip.path,
|
|
params=urip.params,
|
|
query=urip.query,
|
|
fragment=urip.fragment)
|
|
|
|
if default_dms is None:
|
|
config["obi"]["defaultdms"]=resource[0]
|
|
|
|
return (resource[0],
|
|
resource[1],
|
|
type(resource[1]),
|
|
urlunparse(urip))
|
|
except Exception as e:
|
|
error=e
|
|
|
|
if scheme==b"dms" :
|
|
logger('error','cannot open DMS: %s', uri)
|
|
raise FileNotFoundError('uri')
|
|
|
|
if not urip.scheme:
|
|
urib=b"file:"+urib
|
|
|
|
try:
|
|
file = uopen(tostr(urib))
|
|
logger('info','Opened file : %s', tostr(urib))
|
|
except Exception as e:
|
|
file = None
|
|
error=e
|
|
|
|
if file is not None:
|
|
qualifiers=parse_qs(urip.query)
|
|
|
|
|
|
if b'format' in qualifiers:
|
|
format = qualifiers[b'format'][0]
|
|
else:
|
|
try:
|
|
format=config["obi"]["fileformat"]
|
|
except KeyError:
|
|
format=None
|
|
|
|
if b'seqtype' in qualifiers:
|
|
seqtype=qualifiers[b'seqtype'][0]
|
|
else:
|
|
try:
|
|
seqtype=config["obi"]["seqtype"]
|
|
except KeyError:
|
|
seqtype=b'nuc'
|
|
|
|
if b'skip' in qualifiers:
|
|
skip=int(qualifiers[b"skip"][0])
|
|
else:
|
|
try:
|
|
skip=config["obi"]["skip"]
|
|
except KeyError:
|
|
skip=0
|
|
if skip < 0:
|
|
raise MalformedURIException('Malformed skip argument in URI')
|
|
|
|
|
|
if b'only' in qualifiers:
|
|
only=int(qualifiers[b"only"][0])
|
|
else:
|
|
try:
|
|
only=config["obi"]["only"]
|
|
except KeyError:
|
|
only=None
|
|
if only is not None and only <= 0:
|
|
raise MalformedURIException('Malformed only argument in URI')
|
|
|
|
|
|
if b"skiperror" in qualifiers:
|
|
try:
|
|
skiperror=eval(qualifiers[b"skiperror"][0])
|
|
except Exception as e:
|
|
raise MalformedURIException('Malformed skiperror argument in URI')
|
|
else:
|
|
try:
|
|
skiperror=config["obi"]["skiperror"]
|
|
except KeyError:
|
|
skiperror=True
|
|
if not isinstance(skiperror, bool):
|
|
raise MalformedURIException('Malformed skiperror argument in URI')
|
|
|
|
if b"noquality" in qualifiers:
|
|
try:
|
|
noquality=eval(qualifiers[b"noquality"][0])
|
|
except Exception as e:
|
|
raise MalformedURIException('Malformed noquality argument in URI')
|
|
else:
|
|
try:
|
|
noquality=config["obi"]["noquality"]
|
|
except KeyError:
|
|
noquality=False
|
|
if not isinstance(noquality, bool):
|
|
raise MalformedURIException('Malformed noquality argument in URI')
|
|
|
|
if b"qualityformat" in qualifiers:
|
|
if qualifiers[b"qualityformat"][0]=="sanger":
|
|
offset=33
|
|
elif qualifiers[b"qualityformat"][0]=="solexa":
|
|
offset=64
|
|
else:
|
|
try:
|
|
if config["obi"]["qualityformat"][0]=="sanger":
|
|
offset=33
|
|
elif config["obi"]["qualityformat"][0]=="solexa":
|
|
offset=64
|
|
#offset=config["obi"]["qualityoffset"] # TODO discuss
|
|
except KeyError:
|
|
offset=33
|
|
|
|
if b"header" in qualifiers:
|
|
try:
|
|
header=eval(qualifiers[b"header"][0])
|
|
except Exception as e:
|
|
raise MalformedURIException('Malformed header argument in URI')
|
|
else:
|
|
try:
|
|
header=config["obi"]["header"]
|
|
except KeyError:
|
|
header=False
|
|
if not isinstance(header, bool):
|
|
raise MalformedURIException('Malformed header argument in URI')
|
|
|
|
if b"sep" in qualifiers:
|
|
sep=qualifiers[b"sep"][0][0]
|
|
else:
|
|
try:
|
|
sep=config["obi"]["sep"]
|
|
except KeyError:
|
|
sep=None
|
|
|
|
# if b"quote" in qualifiers:
|
|
# pass
|
|
|
|
if b"dec" in qualifiers:
|
|
dec=qualifiers[b"dec"][0][0]
|
|
else:
|
|
try:
|
|
dec=config["obi"]["dec"]
|
|
except KeyError:
|
|
dec=b"."
|
|
|
|
if b"nastring" in qualifiers:
|
|
nastring=qualifiers[b"nastring"][0]
|
|
else:
|
|
try:
|
|
nastring=config["obi"]["nastring"]
|
|
except KeyError:
|
|
nastring=b'NA'
|
|
|
|
if b"stripwhite" in qualifiers:
|
|
try:
|
|
stripwhite=eval(qualifiers[b"stripwhite"][0])
|
|
except Exception as e:
|
|
raise MalformedURIException('Malformed stripwhite argument in URI')
|
|
else:
|
|
try:
|
|
stripwhite=config["obi"]["stripwhite"]
|
|
except KeyError:
|
|
stripwhite=True
|
|
if not isinstance(stripwhite, bool):
|
|
raise MalformedURIException('Malformed stripwhite argument in URI')
|
|
|
|
if b"blanklineskip" in qualifiers:
|
|
try:
|
|
blanklineskip=eval(qualifiers[b"blanklineskip"][0])
|
|
except Exception as e:
|
|
raise MalformedURIException('Malformed blanklineskip argument in URI')
|
|
else:
|
|
try:
|
|
blanklineskip=config["obi"]["blanklineskip"]
|
|
except KeyError:
|
|
blanklineskip=True
|
|
if not isinstance(blanklineskip, bool):
|
|
raise MalformedURIException('Malformed blanklineskip argument in URI')
|
|
|
|
if b"commentchar" in qualifiers:
|
|
commentchar=qualifiers[b"commentchar"][0][0]
|
|
else:
|
|
try:
|
|
commentchar=config["obi"]["commentchar"]
|
|
except KeyError:
|
|
commentchar=b'#'
|
|
|
|
if format is not None:
|
|
if qualifiers[b"seqtype"]==b"nuc":
|
|
objclass = Nuc_Seq
|
|
if format==b"fasta":
|
|
iseq = fastaNucIterator(file,
|
|
skip=skip,
|
|
only=only)
|
|
elif format==b"fastq":
|
|
iseq = fastqIterator(file,
|
|
skip=skip,
|
|
only=only,
|
|
offset=offset,
|
|
noquality=noquality)
|
|
else:
|
|
raise NotImplementedError('Sequence file format not implemented')
|
|
elif qualifiers[b"seqtype"]==b"prot":
|
|
raise NotImplementedError()
|
|
else:
|
|
iseq,objclass = entryIteratorFactory(file,
|
|
skip, only,
|
|
seqtype,
|
|
offset,
|
|
noquality,
|
|
skiperror,
|
|
header,
|
|
sep,
|
|
dec,
|
|
nastring,
|
|
stripwhite,
|
|
blanklineskip,
|
|
commentchar)
|
|
|
|
#tmpdms = get_temp_dms()
|
|
|
|
return (file, iseq, objclass, urib)
|
|
|
|
|
|
|
|
|
|
|