Patch decoding of URL
This commit is contained in:
@ -22,6 +22,7 @@ default_config = { 'software' : "The OBITools",
|
|||||||
'loglevel' : 'INFO',
|
'loglevel' : 'INFO',
|
||||||
'progress' : True,
|
'progress' : True,
|
||||||
'inputURI' : None,
|
'inputURI' : None,
|
||||||
|
'outputURI' : None,
|
||||||
'defaultdms' : None,
|
'defaultdms' : None,
|
||||||
'inputview' : None,
|
'inputview' : None,
|
||||||
'outputview' : None,
|
'outputview' : None,
|
||||||
|
@ -7,4 +7,4 @@ cdef dict buildDefaultConfiguration(str root_config_name,
|
|||||||
dict config)
|
dict config)
|
||||||
|
|
||||||
cpdef dict getConfiguration(str root_config_name=?,
|
cpdef dict getConfiguration(str root_config_name=?,
|
||||||
dict config=?)
|
dict config=?)
|
||||||
|
@ -101,3 +101,14 @@ cpdef dict getConfiguration(str root_config_name="__default__",
|
|||||||
config['__done__']=True
|
config['__done__']=True
|
||||||
|
|
||||||
return config
|
return config
|
||||||
|
|
||||||
|
def logger(level, *messages):
|
||||||
|
try:
|
||||||
|
config=getConfiguration()
|
||||||
|
root = config["__root_config__"]
|
||||||
|
l = config[root]['logger']
|
||||||
|
if config[root]['verbose']:
|
||||||
|
getattr(l, level)(*messages)
|
||||||
|
except:
|
||||||
|
print(*messages,file=sys.stderr)
|
||||||
|
|
||||||
|
@ -42,5 +42,7 @@ cpdef getLogger(dict config):
|
|||||||
rootlogger.setLevel(loglevel)
|
rootlogger.setLevel(loglevel)
|
||||||
|
|
||||||
config[root]['logger']=rootlogger
|
config[root]['logger']=rootlogger
|
||||||
|
config[root]['verbose']=True
|
||||||
|
|
||||||
return rootlogger
|
return rootlogger
|
||||||
|
|
||||||
|
@ -2,8 +2,8 @@ def __addInputOption(optionManager):
|
|||||||
|
|
||||||
optionManager.add_argument(
|
optionManager.add_argument(
|
||||||
dest='obi:inputURI',
|
dest='obi:inputURI',
|
||||||
metavar='index',
|
metavar='INPUT',
|
||||||
help='index root filename (produced by the oa index command)')
|
help='Data source URI')
|
||||||
|
|
||||||
|
|
||||||
group = optionManager.add_argument_group("Restriction to a sub-part options",
|
group = optionManager.add_argument_group("Restriction to a sub-part options",
|
||||||
@ -23,7 +23,12 @@ def __addInputOption(optionManager):
|
|||||||
type=int,
|
type=int,
|
||||||
help="treat only N sequences")
|
help="treat only N sequences")
|
||||||
|
|
||||||
|
group.add_argument('--na-string',
|
||||||
|
action="store", dest="obi:nastring",
|
||||||
|
default=b"NA",
|
||||||
|
type=bytes,
|
||||||
|
help="String associated to Non Available (NA) values")
|
||||||
|
|
||||||
|
|
||||||
def __addSequenceInputOption(optionManager):
|
def __addSequenceInputOption(optionManager):
|
||||||
group = optionManager.add_argument_group("Input format options for sequence files")
|
group = optionManager.add_argument_group("Input format options for sequence files")
|
||||||
@ -124,12 +129,6 @@ def __addTabularInputOption(optionManager):
|
|||||||
type=bytes,
|
type=bytes,
|
||||||
help="Decimal separator")
|
help="Decimal separator")
|
||||||
|
|
||||||
group.add_argument('--na-string',
|
|
||||||
action="store", dest="obi:nastring",
|
|
||||||
default=b"NA",
|
|
||||||
type=bytes,
|
|
||||||
help="String associated to Non Available (NA) values")
|
|
||||||
|
|
||||||
group.add_argument('--strip-white',
|
group.add_argument('--strip-white',
|
||||||
action="store_false", dest="obi:stripwhite",
|
action="store_false", dest="obi:stripwhite",
|
||||||
default=True,
|
default=True,
|
||||||
@ -161,3 +160,14 @@ def addAllInputOption(optionManager):
|
|||||||
__addInputOption(optionManager)
|
__addInputOption(optionManager)
|
||||||
__addSequenceInputOption(optionManager)
|
__addSequenceInputOption(optionManager)
|
||||||
__addTabularInputOption(optionManager)
|
__addTabularInputOption(optionManager)
|
||||||
|
|
||||||
|
|
||||||
|
def __addOutputOption(optionManager):
|
||||||
|
|
||||||
|
optionManager.add_argument(
|
||||||
|
dest='obi:outputURI',
|
||||||
|
metavar='OUTPUT',
|
||||||
|
help='Data destination URI')
|
||||||
|
|
||||||
|
def addMinimalOutputOption(optionManager):
|
||||||
|
__addOutputOption(optionManager)
|
||||||
|
@ -2,6 +2,8 @@
|
|||||||
|
|
||||||
# TODO cimport generate errors with argument numbers, but without them some variables can't be declared
|
# TODO cimport generate errors with argument numbers, but without them some variables can't be declared
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||||
from obitools3.files.universalopener cimport uopen
|
from obitools3.files.universalopener cimport uopen
|
||||||
from obitools3.parsers.fasta import fastaIterator
|
from obitools3.parsers.fasta import fastaIterator
|
||||||
@ -20,6 +22,8 @@ from obitools3.dms.capi.obitypes cimport obitype_t, \
|
|||||||
|
|
||||||
from obitools3.dms.capi.obierrno cimport obi_errno
|
from obitools3.dms.capi.obierrno cimport obi_errno
|
||||||
|
|
||||||
|
from obitools3.apps.optiongroups import addSequenceInputOption, addMinimalOutputOption
|
||||||
|
from obitools3.uri.decode import open_uri
|
||||||
|
|
||||||
__title__="Imports sequences from different formats into a DMS"
|
__title__="Imports sequences from different formats into a DMS"
|
||||||
|
|
||||||
@ -30,83 +34,14 @@ default_config = { 'destview' : None,
|
|||||||
'skiperror' : False,
|
'skiperror' : False,
|
||||||
'seqinformat' : None,
|
'seqinformat' : None,
|
||||||
'moltype' : 'nuc',
|
'moltype' : 'nuc',
|
||||||
'filename' : None
|
'source' : None
|
||||||
}
|
}
|
||||||
|
|
||||||
def addOptions(parser):
|
def addOptions(parser):
|
||||||
parser.add_argument(dest='import:filename',
|
|
||||||
metavar='<FILENAME>',
|
addSequenceInputOption(parser)
|
||||||
nargs='?',
|
addMinimalOutputOption(parser)
|
||||||
default=None,
|
|
||||||
help='Name of the sequence file to import' )
|
|
||||||
|
|
||||||
group=parser.add_argument_group('obi import specific options')
|
|
||||||
|
|
||||||
group.add_argument('--default-dms','-d',
|
|
||||||
action="store", dest="obi:defaultdms",
|
|
||||||
metavar='<DMS NAME>',
|
|
||||||
default=None,
|
|
||||||
type=str,
|
|
||||||
help="Name of the default DMS for reading and writing data")
|
|
||||||
|
|
||||||
group.add_argument('--destination-view','-v',
|
|
||||||
action="store", dest="import:destview",
|
|
||||||
metavar='<VIEW NAME>',
|
|
||||||
default=None,
|
|
||||||
type=str,
|
|
||||||
required=True,
|
|
||||||
help="Name of the default DMS for reading and writing data")
|
|
||||||
|
|
||||||
group.add_argument('--skip',
|
|
||||||
action="store", dest="import:skip",
|
|
||||||
metavar='<N>',
|
|
||||||
default=0,
|
|
||||||
type=int,
|
|
||||||
help="Skip the N first sequences")
|
|
||||||
|
|
||||||
group.add_argument('--only',
|
|
||||||
action="store", dest="import:only",
|
|
||||||
metavar='<N>',
|
|
||||||
default=None,
|
|
||||||
type=int,
|
|
||||||
help="Treat only N sequences")
|
|
||||||
|
|
||||||
group.add_argument('--skip-on-error',
|
|
||||||
action="store_true", dest="import:skiperror",
|
|
||||||
default=None,
|
|
||||||
help="Skip sequence entries with parse error")
|
|
||||||
|
|
||||||
group.add_argument('--fasta',
|
|
||||||
action="store_const", dest="import:seqinformat",
|
|
||||||
default=None,
|
|
||||||
const='fasta',
|
|
||||||
help="Input file is in fasta nucleic format (including obitools fasta extentions)")
|
|
||||||
|
|
||||||
group.add_argument('--fastq',
|
|
||||||
action="store_const", dest="import:seqinformat",
|
|
||||||
default=None,
|
|
||||||
const='fastq',
|
|
||||||
help="Input file is in sanger fastq nucleic format (standard fastq)")
|
|
||||||
|
|
||||||
group.add_argument('--nuc',
|
|
||||||
action="store_const", dest="import:moltype",
|
|
||||||
default=None,
|
|
||||||
const='nuc',
|
|
||||||
help="Input file contains nucleic sequences")
|
|
||||||
|
|
||||||
group.add_argument('--prot',
|
|
||||||
action="store_const", dest="import:moltype",
|
|
||||||
default=None,
|
|
||||||
const='pep',
|
|
||||||
help="Input file contains protein sequences")
|
|
||||||
|
|
||||||
group.add_argument('--NA',
|
|
||||||
action="store", dest="import:NA",
|
|
||||||
metavar='<NA_value>',
|
|
||||||
default='NA',
|
|
||||||
type=str,
|
|
||||||
help="Character string for Not Available values in the input file "
|
|
||||||
"(default: 'NA'")
|
|
||||||
|
|
||||||
|
|
||||||
def run(config):
|
def run(config):
|
||||||
@ -142,147 +77,159 @@ def run(config):
|
|||||||
cdef ProgressBar pb
|
cdef ProgressBar pb
|
||||||
global obi_errno
|
global obi_errno
|
||||||
|
|
||||||
pb = ProgressBar(1000000, config, seconde=5) # TODO should be number of records in file
|
|
||||||
|
|
||||||
inputs = uopen(config['import']['filename'])
|
|
||||||
|
|
||||||
# Create or open DMS
|
|
||||||
d = DMS.open_or_new(config['obi']['defaultdms'])
|
|
||||||
|
|
||||||
get_quality = False
|
logger=config['obi']['logger']
|
||||||
NUC_SEQS_view = False
|
|
||||||
if config['import']['seqinformat']=='fasta':
|
|
||||||
get_quality = False
|
|
||||||
NUC_SEQS_view = True
|
|
||||||
iseq = fastaIterator(inputs, skip=config['import']['skip'])
|
|
||||||
view = View_NUC_SEQS.new(d, config['import']['destview'], quality=get_quality)
|
|
||||||
elif config['import']['seqinformat']=='fastq':
|
|
||||||
get_quality = True
|
|
||||||
NUC_SEQS_view = True
|
|
||||||
iseq = fastqIterator(inputs, skip=config['import']['skip'])
|
|
||||||
view = View_NUC_SEQS.new(d, config['import']['destview'], quality=get_quality)
|
|
||||||
else:
|
|
||||||
raise RuntimeError('File format not handled')
|
|
||||||
|
|
||||||
# Save basic columns in variables for optimization
|
|
||||||
if NUC_SEQS_view :
|
|
||||||
id_col = view["ID"]
|
|
||||||
def_col = view["DEFINITION"]
|
|
||||||
seq_col = view["NUC_SEQ"]
|
|
||||||
if get_quality :
|
|
||||||
qual_col = view["QUALITY"]
|
|
||||||
|
|
||||||
dcols = {}
|
|
||||||
|
|
||||||
i = 0
|
logger.info("obi import : imports file into an DMS")
|
||||||
for seq in iseq :
|
|
||||||
if i == config['import']['only'] :
|
|
||||||
break
|
|
||||||
else :
|
|
||||||
pb(i)
|
|
||||||
if NUC_SEQS_view :
|
|
||||||
id_col[i] = seq['id']
|
|
||||||
def_col[i] = seq['definition']
|
|
||||||
seq_col[i] = seq['sequence']
|
|
||||||
if get_quality :
|
|
||||||
qual_col[i] = seq['quality']
|
|
||||||
|
|
||||||
for tag in seq['tags'] :
|
|
||||||
|
|
||||||
value = seq['tags'][tag]
|
|
||||||
|
|
||||||
# Check NA value
|
|
||||||
if value == config['import']['NA'] :
|
|
||||||
value = None
|
|
||||||
|
|
||||||
if tag not in dcols :
|
|
||||||
|
|
||||||
value_type = type(value)
|
|
||||||
nb_elts = 1
|
|
||||||
value_obitype = OBI_VOID
|
|
||||||
|
|
||||||
if value_type == dict or value_type == list :
|
|
||||||
nb_elts = len(value)
|
|
||||||
elt_names = list(value)
|
|
||||||
else :
|
|
||||||
nb_elts = 1
|
|
||||||
elt_names = None
|
|
||||||
|
|
||||||
value_obitype = get_obitype(value)
|
|
||||||
|
|
||||||
if value_obitype != OBI_VOID :
|
|
||||||
dcols[tag] = (Column.new_column(view, tag, value_obitype, nb_elements_per_line=nb_elts, elements_names=elt_names), value_obitype)
|
|
||||||
|
|
||||||
# Fill value
|
|
||||||
dcols[tag][0][i] = value
|
|
||||||
|
|
||||||
# TODO else log error?
|
|
||||||
|
|
||||||
else :
|
|
||||||
|
|
||||||
rewrite = False
|
|
||||||
|
|
||||||
# Check type adequation
|
|
||||||
old_type = dcols[tag][1]
|
|
||||||
new_type = OBI_VOID
|
|
||||||
new_type = update_obitype(old_type, value)
|
|
||||||
if old_type != new_type :
|
|
||||||
rewrite = True
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Fill value
|
|
||||||
dcols[tag][0][i] = value
|
|
||||||
|
|
||||||
except IndexError :
|
|
||||||
|
|
||||||
value_type = type(value)
|
|
||||||
old_column = dcols[tag][0]
|
|
||||||
old_nb_elements_per_line = old_column.nb_elements_per_line
|
|
||||||
new_nb_elements_per_line = 0
|
|
||||||
old_elements_names = old_column.elements_names
|
|
||||||
new_elements_names = None
|
|
||||||
|
|
||||||
#####################################################################
|
inputs = open_uri(config['obi']['inputURI'])
|
||||||
|
|
||||||
# Check the length and keys of column lines if needed
|
print(inputs)
|
||||||
if value_type == dict : # Check dictionary keys
|
|
||||||
for k in value :
|
sys.exit()
|
||||||
if k not in old_elements_names :
|
|
||||||
new_elements_names = list(set(old_elements_names+[tobytes(k) for k in value]))
|
# pb = ProgressBar(1000000, config, seconde=5) # TODO should be number of records in file
|
||||||
rewrite = True
|
#
|
||||||
break
|
# inputs = uopen(config['import']['filename'])
|
||||||
|
#
|
||||||
elif value_type == list or value_type == tuple : # Check vector length
|
# # Create or open DMS
|
||||||
if old_nb_elements_per_line < len(value) :
|
# d = DMS.open_or_new(config['obi']['defaultdms'])
|
||||||
new_nb_elements_per_line = len(value)
|
#
|
||||||
rewrite = True
|
# get_quality = False
|
||||||
|
# NUC_SEQS_view = False
|
||||||
#####################################################################
|
# if config['import']['seqinformat']=='fasta':
|
||||||
|
# get_quality = False
|
||||||
if rewrite :
|
# NUC_SEQS_view = True
|
||||||
if new_nb_elements_per_line == 0 and new_elements_names is not None :
|
# iseq = fastaIterator(inputs, skip=config['import']['skip'])
|
||||||
new_nb_elements_per_line = len(new_elements_names)
|
# view = View_NUC_SEQS.new(d, config['import']['destview'], quality=get_quality)
|
||||||
|
# elif config['import']['seqinformat']=='fastq':
|
||||||
# Reset obierrno
|
# get_quality = True
|
||||||
obi_errno = 0
|
# NUC_SEQS_view = True
|
||||||
|
# iseq = fastqIterator(inputs, skip=config['import']['skip'])
|
||||||
dcols[tag] = (view.rewrite_column_with_diff_attributes(old_column.name,
|
# view = View_NUC_SEQS.new(d, config['import']['destview'], quality=get_quality)
|
||||||
new_data_type=new_type,
|
# else:
|
||||||
new_nb_elements_per_line=new_nb_elements_per_line,
|
# raise RuntimeError('File format not handled')
|
||||||
new_elements_names=new_elements_names),
|
#
|
||||||
value_obitype)
|
# # Save basic columns in variables for optimization
|
||||||
|
# if NUC_SEQS_view :
|
||||||
# Update the dictionary:
|
# id_col = view["ID"]
|
||||||
for t in dcols :
|
# def_col = view["DEFINITION"]
|
||||||
dcols[t] = (view[t], dcols[t][1])
|
# seq_col = view["NUC_SEQ"]
|
||||||
|
# if get_quality :
|
||||||
# Fill value
|
# qual_col = view["QUALITY"]
|
||||||
dcols[tag][0][i] = value
|
#
|
||||||
|
# dcols = {}
|
||||||
i+=1
|
#
|
||||||
|
# i = 0
|
||||||
print("\n")
|
# for seq in iseq :
|
||||||
print(view.__repr__())
|
# if i == config['import']['only'] :
|
||||||
|
# break
|
||||||
d.close()
|
# else :
|
||||||
|
# pb(i)
|
||||||
|
# if NUC_SEQS_view :
|
||||||
|
# id_col[i] = seq['id']
|
||||||
|
# def_col[i] = seq['definition']
|
||||||
|
# seq_col[i] = seq['sequence']
|
||||||
|
# if get_quality :
|
||||||
|
# qual_col[i] = seq['quality']
|
||||||
|
#
|
||||||
|
# for tag in seq['tags'] :
|
||||||
|
#
|
||||||
|
# value = seq['tags'][tag]
|
||||||
|
#
|
||||||
|
# # Check NA value
|
||||||
|
# if value == config['import']['NA'] :
|
||||||
|
# value = None
|
||||||
|
#
|
||||||
|
# if tag not in dcols :
|
||||||
|
#
|
||||||
|
# value_type = type(value)
|
||||||
|
# nb_elts = 1
|
||||||
|
# value_obitype = OBI_VOID
|
||||||
|
#
|
||||||
|
# if value_type == dict or value_type == list :
|
||||||
|
# nb_elts = len(value)
|
||||||
|
# elt_names = list(value)
|
||||||
|
# else :
|
||||||
|
# nb_elts = 1
|
||||||
|
# elt_names = None
|
||||||
|
#
|
||||||
|
# value_obitype = get_obitype(value)
|
||||||
|
#
|
||||||
|
# if value_obitype != OBI_VOID :
|
||||||
|
# dcols[tag] = (Column.new_column(view, tag, value_obitype, nb_elements_per_line=nb_elts, elements_names=elt_names), value_obitype)
|
||||||
|
#
|
||||||
|
# # Fill value
|
||||||
|
# dcols[tag][0][i] = value
|
||||||
|
#
|
||||||
|
# # TODO else log error?
|
||||||
|
#
|
||||||
|
# else :
|
||||||
|
#
|
||||||
|
# rewrite = False
|
||||||
|
#
|
||||||
|
# # Check type adequation
|
||||||
|
# old_type = dcols[tag][1]
|
||||||
|
# new_type = OBI_VOID
|
||||||
|
# new_type = update_obitype(old_type, value)
|
||||||
|
# if old_type != new_type :
|
||||||
|
# rewrite = True
|
||||||
|
#
|
||||||
|
# try:
|
||||||
|
# # Fill value
|
||||||
|
# dcols[tag][0][i] = value
|
||||||
|
#
|
||||||
|
# except IndexError :
|
||||||
|
#
|
||||||
|
# value_type = type(value)
|
||||||
|
# old_column = dcols[tag][0]
|
||||||
|
# old_nb_elements_per_line = old_column.nb_elements_per_line
|
||||||
|
# new_nb_elements_per_line = 0
|
||||||
|
# old_elements_names = old_column.elements_names
|
||||||
|
# new_elements_names = None
|
||||||
|
#
|
||||||
|
# #####################################################################
|
||||||
|
#
|
||||||
|
# # Check the length and keys of column lines if needed
|
||||||
|
# if value_type == dict : # Check dictionary keys
|
||||||
|
# for k in value :
|
||||||
|
# if k not in old_elements_names :
|
||||||
|
# new_elements_names = list(set(old_elements_names+[tobytes(k) for k in value]))
|
||||||
|
# rewrite = True
|
||||||
|
# break
|
||||||
|
#
|
||||||
|
# elif value_type == list or value_type == tuple : # Check vector length
|
||||||
|
# if old_nb_elements_per_line < len(value) :
|
||||||
|
# new_nb_elements_per_line = len(value)
|
||||||
|
# rewrite = True
|
||||||
|
#
|
||||||
|
# #####################################################################
|
||||||
|
#
|
||||||
|
# if rewrite :
|
||||||
|
# if new_nb_elements_per_line == 0 and new_elements_names is not None :
|
||||||
|
# new_nb_elements_per_line = len(new_elements_names)
|
||||||
|
#
|
||||||
|
# # Reset obierrno
|
||||||
|
# obi_errno = 0
|
||||||
|
#
|
||||||
|
# dcols[tag] = (view.rewrite_column_with_diff_attributes(old_column.name,
|
||||||
|
# new_data_type=new_type,
|
||||||
|
# new_nb_elements_per_line=new_nb_elements_per_line,
|
||||||
|
# new_elements_names=new_elements_names),
|
||||||
|
# value_obitype)
|
||||||
|
#
|
||||||
|
# # Update the dictionary:
|
||||||
|
# for t in dcols :
|
||||||
|
# dcols[t] = (view[t], dcols[t][1])
|
||||||
|
#
|
||||||
|
# # Fill value
|
||||||
|
# dcols[tag][0][i] = value
|
||||||
|
#
|
||||||
|
# i+=1
|
||||||
|
#
|
||||||
|
# print("\n")
|
||||||
|
# print(view.__repr__())
|
||||||
|
#
|
||||||
|
# d.close()
|
||||||
|
|
||||||
|
@ -46,8 +46,8 @@ def entryIteratorFactory(lineiterator,
|
|||||||
i = iter(lb)
|
i = iter(lb)
|
||||||
|
|
||||||
first=next(i)
|
first=next(i)
|
||||||
|
|
||||||
format="tab"
|
format=b"tabular"
|
||||||
|
|
||||||
if first[0]==">":
|
if first[0]==">":
|
||||||
format=b"fasta"
|
format=b"fasta"
|
||||||
@ -61,9 +61,6 @@ def entryIteratorFactory(lineiterator,
|
|||||||
format=b"ecopcrfile"
|
format=b"ecopcrfile"
|
||||||
elif is_ngsfilter_line(first):
|
elif is_ngsfilter_line(first):
|
||||||
format=b"ngsfilter"
|
format=b"ngsfilter"
|
||||||
else:
|
|
||||||
format=b"tabular"
|
|
||||||
|
|
||||||
|
|
||||||
if format==b'fasta':
|
if format==b'fasta':
|
||||||
if seqtype == b'nuc':
|
if seqtype == b'nuc':
|
||||||
|
@ -4,3 +4,7 @@ from obitools3.dms.dms cimport DMS
|
|||||||
from obitools3.dms.view.view cimport View
|
from obitools3.dms.view.view cimport View
|
||||||
from obitools3.dms.column.column cimport Column
|
from obitools3.dms.column.column cimport Column
|
||||||
from obitools3.dms.taxo.taxo cimport Taxonomy
|
from obitools3.dms.taxo.taxo cimport Taxonomy
|
||||||
|
|
||||||
|
from obitools3.utils cimport tobytes, tostr
|
||||||
|
from obitools3.files.universalopener cimport uopen
|
||||||
|
|
||||||
|
@ -3,16 +3,14 @@
|
|||||||
from urllib.parse import urlparse, urlunparse, parse_qs, ParseResultBytes
|
from urllib.parse import urlparse, urlunparse, parse_qs, ParseResultBytes
|
||||||
from os.path import isdir, isfile, basename, join
|
from os.path import isdir, isfile, basename, join
|
||||||
|
|
||||||
from obitools3.utils import tobytes
|
|
||||||
|
|
||||||
from obitools3.dms.dms import DMS
|
from obitools3.dms.dms import DMS
|
||||||
|
|
||||||
from obitools3.files.universalopener import uopen
|
|
||||||
from obitools3.parsers.fasta import fastaNucIterator
|
from obitools3.parsers.fasta import fastaNucIterator
|
||||||
from obitools3.parsers.fastq import fastqIterator
|
from obitools3.parsers.fastq import fastqIterator
|
||||||
from obitools3.parsers.universal import entryIteratorFactory
|
from obitools3.parsers.universal import entryIteratorFactory
|
||||||
|
|
||||||
from obitools3.dms.obiseq import Nuc_Seq
|
from obitools3.dms.obiseq import Nuc_Seq
|
||||||
|
from obitools3.apps.config import getConfiguration,logger
|
||||||
|
|
||||||
class MalformedURIException(RuntimeError):
|
class MalformedURIException(RuntimeError):
|
||||||
pass
|
pass
|
||||||
@ -130,21 +128,29 @@ def open_dms_element(DMS dms, bytes path):
|
|||||||
return (dms,subsubpart)
|
return (dms,subsubpart)
|
||||||
|
|
||||||
|
|
||||||
def open_uri(uri,input=True,config={}):
|
def open_uri(uri,bint input=True):
|
||||||
cdef bytes urib = tobytes(uri)
|
cdef bytes urib = tobytes(uri)
|
||||||
cdef bytes scheme
|
cdef bytes scheme
|
||||||
cdef tuple dms
|
cdef tuple dms
|
||||||
|
cdef dict qualifiers
|
||||||
|
cdef DMS default_dms
|
||||||
|
|
||||||
|
config = getConfiguration()
|
||||||
urip = urlparse(urib)
|
urip = urlparse(urib)
|
||||||
|
|
||||||
|
if 'obi' not in config:
|
||||||
|
config['obi']={}
|
||||||
|
|
||||||
default_dms=config["obi"]["defaultdms"]
|
try:
|
||||||
|
default_dms=config["obi"]["defaultdms"]
|
||||||
|
except KeyError:
|
||||||
|
default_dms=None
|
||||||
|
|
||||||
scheme = urip.scheme
|
scheme = urip.scheme
|
||||||
|
|
||||||
error = None
|
error = None
|
||||||
|
|
||||||
if scheme==b"" :
|
if scheme==b"" :
|
||||||
scheme=b'file'
|
|
||||||
dms = open_dms(urip.path)
|
dms = open_dms(urip.path)
|
||||||
if dms is None and default_dms is not None:
|
if dms is None and default_dms is not None:
|
||||||
dms=(default_dms,urip.path)
|
dms=(default_dms,urip.path)
|
||||||
@ -167,17 +173,13 @@ def open_uri(uri,input=True,config={}):
|
|||||||
return (resource[0],resource[1],urlunparse(urip))
|
return (resource[0],resource[1],urlunparse(urip))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error=e
|
error=e
|
||||||
|
|
||||||
urip = ParseResultBytes(scheme=scheme,
|
if not urip.scheme:
|
||||||
netloc=urip.netloc,
|
urib=b"file:"+urib
|
||||||
path=urip.path,
|
|
||||||
params=urip.params,
|
|
||||||
query=urip.query,
|
|
||||||
fragment=urip.fragment)
|
|
||||||
uri=urlunparse(urip)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
file = uopen(uri)
|
logger('info','Trying to open file : %s', tostr(urib))
|
||||||
|
file = uopen(tostr(urib))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
file = None
|
file = None
|
||||||
error=e
|
error=e
|
||||||
@ -189,17 +191,26 @@ def open_uri(uri,input=True,config={}):
|
|||||||
if b'format' in qualifiers:
|
if b'format' in qualifiers:
|
||||||
format = qualifiers[b'format'][0]
|
format = qualifiers[b'format'][0]
|
||||||
else:
|
else:
|
||||||
format=config["obi"]["fileformat"]
|
try:
|
||||||
|
format=config["obi"]["fileformat"]
|
||||||
|
except KeyError:
|
||||||
|
format=None
|
||||||
|
|
||||||
if b'seqtype' in qualifiers:
|
if b'seqtype' in qualifiers:
|
||||||
seqtype=qualifiers[b'seqtype'][0]
|
seqtype=qualifiers[b'seqtype'][0]
|
||||||
else:
|
else:
|
||||||
seqtype=config["obi"]["seqtype"]
|
try:
|
||||||
|
seqtype=config["obi"]["seqtype"]
|
||||||
|
except KeyError:
|
||||||
|
seqtype=b'nuc'
|
||||||
|
|
||||||
if b'skip' in qualifiers:
|
if b'skip' in qualifiers:
|
||||||
skip=int(qualifiers[b"skip"][0])
|
skip=int(qualifiers[b"skip"][0])
|
||||||
else:
|
else:
|
||||||
skip=config["obi"]["skeep"]
|
try:
|
||||||
|
skip=config["obi"]["skip"]
|
||||||
|
except KeyError:
|
||||||
|
skip=0
|
||||||
if skip < 0:
|
if skip < 0:
|
||||||
raise MalformedURIException('Malformed skip argument in URI')
|
raise MalformedURIException('Malformed skip argument in URI')
|
||||||
|
|
||||||
@ -207,8 +218,11 @@ def open_uri(uri,input=True,config={}):
|
|||||||
if b'only' in qualifiers:
|
if b'only' in qualifiers:
|
||||||
only=int(qualifiers[b"only"][0])
|
only=int(qualifiers[b"only"][0])
|
||||||
else:
|
else:
|
||||||
only=config["obi"]["only"]
|
try:
|
||||||
if only <= 0:
|
only=config["obi"]["only"]
|
||||||
|
except KeyError:
|
||||||
|
only=None
|
||||||
|
if only is not None and only <= 0:
|
||||||
raise MalformedURIException('Malformed only argument in URI')
|
raise MalformedURIException('Malformed only argument in URI')
|
||||||
|
|
||||||
|
|
||||||
@ -218,7 +232,10 @@ def open_uri(uri,input=True,config={}):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise MalformedURIException('Malformed skiperror argument in URI')
|
raise MalformedURIException('Malformed skiperror argument in URI')
|
||||||
else:
|
else:
|
||||||
skiperror=config["obi"]["skiperror"]
|
try:
|
||||||
|
skiperror=config["obi"]["skiperror"]
|
||||||
|
except KeyError:
|
||||||
|
skiperror=True
|
||||||
if not isinstance(skiperror, bool):
|
if not isinstance(skiperror, bool):
|
||||||
raise MalformedURIException('Malformed skiperror argument in URI')
|
raise MalformedURIException('Malformed skiperror argument in URI')
|
||||||
|
|
||||||
@ -228,7 +245,10 @@ def open_uri(uri,input=True,config={}):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise MalformedURIException('Malformed noquality argument in URI')
|
raise MalformedURIException('Malformed noquality argument in URI')
|
||||||
else:
|
else:
|
||||||
noquality=config["obi"]["noquality"]
|
try:
|
||||||
|
noquality=config["obi"]["noquality"]
|
||||||
|
except KeyError:
|
||||||
|
noquality=False
|
||||||
if not isinstance(noquality, bool):
|
if not isinstance(noquality, bool):
|
||||||
raise MalformedURIException('Malformed noquality argument in URI')
|
raise MalformedURIException('Malformed noquality argument in URI')
|
||||||
|
|
||||||
@ -238,7 +258,10 @@ def open_uri(uri,input=True,config={}):
|
|||||||
elif qualifiers[b"qualityformat"][0]=="solexa":
|
elif qualifiers[b"qualityformat"][0]=="solexa":
|
||||||
offset=64
|
offset=64
|
||||||
else:
|
else:
|
||||||
offset=config["obi"]["qualityoffset"]
|
try:
|
||||||
|
offset=config["obi"]["qualityoffset"]
|
||||||
|
except KeyError:
|
||||||
|
offset=33
|
||||||
|
|
||||||
if b"header" in qualifiers:
|
if b"header" in qualifiers:
|
||||||
try:
|
try:
|
||||||
@ -246,14 +269,20 @@ def open_uri(uri,input=True,config={}):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise MalformedURIException('Malformed header argument in URI')
|
raise MalformedURIException('Malformed header argument in URI')
|
||||||
else:
|
else:
|
||||||
header=config["obi"]["header"]
|
try:
|
||||||
|
header=config["obi"]["header"]
|
||||||
|
except KeyError:
|
||||||
|
header=False
|
||||||
if not isinstance(header, bool):
|
if not isinstance(header, bool):
|
||||||
raise MalformedURIException('Malformed header argument in URI')
|
raise MalformedURIException('Malformed header argument in URI')
|
||||||
|
|
||||||
if b"sep" in qualifiers:
|
if b"sep" in qualifiers:
|
||||||
sep=qualifiers[b"sep"][0][0]
|
sep=qualifiers[b"sep"][0][0]
|
||||||
else:
|
else:
|
||||||
seq=config["obi"]["sep"]
|
try:
|
||||||
|
sep=config["obi"]["sep"]
|
||||||
|
except KeyError:
|
||||||
|
sep=None
|
||||||
|
|
||||||
# if b"quote" in qualifiers:
|
# if b"quote" in qualifiers:
|
||||||
# pass
|
# pass
|
||||||
@ -261,20 +290,29 @@ def open_uri(uri,input=True,config={}):
|
|||||||
if b"dec" in qualifiers:
|
if b"dec" in qualifiers:
|
||||||
dec=qualifiers[b"dec"][0][0]
|
dec=qualifiers[b"dec"][0][0]
|
||||||
else:
|
else:
|
||||||
dec=config["obi"]["dec"]
|
try:
|
||||||
|
dec=config["obi"]["dec"]
|
||||||
|
except KeyError:
|
||||||
|
dec=b"."
|
||||||
|
|
||||||
if b"nastring" in qualifiers:
|
if b"nastring" in qualifiers:
|
||||||
nastring=qualifiers[b"nastring"][0]
|
nastring=qualifiers[b"nastring"][0]
|
||||||
else:
|
else:
|
||||||
nastring=config["obi"]["nastring"]
|
try:
|
||||||
|
nastring=config["obi"]["nastring"]
|
||||||
|
except KeyError:
|
||||||
|
nastring=b'NA'
|
||||||
|
|
||||||
if b"stripwhite" in qualifiers:
|
if b"stripwhite" in qualifiers:
|
||||||
try:
|
try:
|
||||||
stripwhite=eval(qualifiers[b"stripwhite"][0])
|
stripwhite=eval(qualifiers[b"stripwhite"][0])
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise MalformedURIException('Malformed stripwhite argument in URI')
|
raise MalformedURIException('Malformed stripwhite argument in URI')
|
||||||
else:
|
else:
|
||||||
stripwhite=config["obi"]["stripwhite"]
|
try:
|
||||||
|
stripwhite=config["obi"]["stripwhite"]
|
||||||
|
except KeyError:
|
||||||
|
stripwhite=True
|
||||||
if not isinstance(stripwhite, bool):
|
if not isinstance(stripwhite, bool):
|
||||||
raise MalformedURIException('Malformed stripwhite argument in URI')
|
raise MalformedURIException('Malformed stripwhite argument in URI')
|
||||||
|
|
||||||
@ -284,14 +322,20 @@ def open_uri(uri,input=True,config={}):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise MalformedURIException('Malformed blanklineskip argument in URI')
|
raise MalformedURIException('Malformed blanklineskip argument in URI')
|
||||||
else:
|
else:
|
||||||
blanklineskip=config["obi"]["blanklineskip"]
|
try:
|
||||||
|
blanklineskip=config["obi"]["blanklineskip"]
|
||||||
|
except KeyError:
|
||||||
|
blanklineskip=True
|
||||||
if not isinstance(blanklineskip, bool):
|
if not isinstance(blanklineskip, bool):
|
||||||
raise MalformedURIException('Malformed blanklineskip argument in URI')
|
raise MalformedURIException('Malformed blanklineskip argument in URI')
|
||||||
|
|
||||||
if b"commentchar" in qualifiers:
|
if b"commentchar" in qualifiers:
|
||||||
commentchar=qualifiers[b"commentchar"][0][0]
|
commentchar=qualifiers[b"commentchar"][0][0]
|
||||||
else:
|
else:
|
||||||
commentchar=config["obi"]["commentchar"]
|
try:
|
||||||
|
commentchar=config["obi"]["commentchar"]
|
||||||
|
except KeyError:
|
||||||
|
commentchar=b'#'
|
||||||
|
|
||||||
if format is not None:
|
if format is not None:
|
||||||
if qualifiers[b"seqtype"]==b"nuc":
|
if qualifiers[b"seqtype"]==b"nuc":
|
||||||
|
Reference in New Issue
Block a user