obi import with new input/ouput API
This commit is contained in:
@ -8,12 +8,10 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
|||||||
from obitools3.files.universalopener cimport uopen
|
from obitools3.files.universalopener cimport uopen
|
||||||
from obitools3.parsers.fasta import fastaIterator
|
from obitools3.parsers.fasta import fastaIterator
|
||||||
from obitools3.parsers.fastq import fastqIterator
|
from obitools3.parsers.fastq import fastqIterator
|
||||||
from obitools3.dms.dms import DMS # TODO cimport doesn't work
|
|
||||||
from obitools3.dms.view.view cimport View
|
from obitools3.dms.view.view cimport View
|
||||||
from obitools3.dms.view.typed_view.view_NUC_SEQS import View_NUC_SEQS # TODO cimport doesn't work
|
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||||
from obitools3.dms.column.column cimport Column
|
from obitools3.dms.column.column cimport Column
|
||||||
|
from obitools3.dms.obiseq cimport Nuc_Seq
|
||||||
from obitools3.dms.obiseq import Nuc_Seq
|
|
||||||
|
|
||||||
from obitools3.utils cimport tobytes, \
|
from obitools3.utils cimport tobytes, \
|
||||||
get_obitype, \
|
get_obitype, \
|
||||||
@ -25,6 +23,7 @@ from obitools3.dms.capi.obitypes cimport obitype_t, \
|
|||||||
from obitools3.dms.capi.obierrno cimport obi_errno
|
from obitools3.dms.capi.obierrno cimport obi_errno
|
||||||
|
|
||||||
from obitools3.apps.optiongroups import addSequenceInputOption, addMinimalOutputOption
|
from obitools3.apps.optiongroups import addSequenceInputOption, addMinimalOutputOption
|
||||||
|
|
||||||
from obitools3.uri.decode import open_uri
|
from obitools3.uri.decode import open_uri
|
||||||
|
|
||||||
from obitools3.apps.config import logger
|
from obitools3.apps.config import logger
|
||||||
@ -50,6 +49,8 @@ def addOptions(parser):
|
|||||||
|
|
||||||
def run(config):
|
def run(config):
|
||||||
|
|
||||||
|
cdef tuple input
|
||||||
|
cdef tuple output
|
||||||
cdef int i
|
cdef int i
|
||||||
cdef type value_type
|
cdef type value_type
|
||||||
cdef obitype_t value_obitype
|
cdef obitype_t value_obitype
|
||||||
@ -62,7 +63,6 @@ def run(config):
|
|||||||
cdef View view
|
cdef View view
|
||||||
cdef object iseq
|
cdef object iseq
|
||||||
cdef object seq
|
cdef object seq
|
||||||
cdef object inputs
|
|
||||||
cdef Column id_col
|
cdef Column id_col
|
||||||
cdef Column def_col
|
cdef Column def_col
|
||||||
cdef Column seq_col
|
cdef Column seq_col
|
||||||
@ -71,7 +71,7 @@ def run(config):
|
|||||||
cdef bint rewrite
|
cdef bint rewrite
|
||||||
cdef dict dcols
|
cdef dict dcols
|
||||||
cdef int skipping
|
cdef int skipping
|
||||||
cdef str tag
|
cdef bytes tag
|
||||||
cdef object value
|
cdef object value
|
||||||
cdef list elt_names
|
cdef list elt_names
|
||||||
cdef int old_nb_elements_per_line
|
cdef int old_nb_elements_per_line
|
||||||
@ -84,165 +84,157 @@ def run(config):
|
|||||||
|
|
||||||
logger("info","obi import : imports file into an DMS")
|
logger("info","obi import : imports file into an DMS")
|
||||||
|
|
||||||
inputs = open_uri(config['obi']['inputURI'])
|
input = open_uri(config['obi']['inputURI'])
|
||||||
|
|
||||||
if inputs[2]==Nuc_Seq:
|
if input[2]==Nuc_Seq:
|
||||||
v = View_NUC_SEQS
|
v = View_NUC_SEQS
|
||||||
else:
|
else:
|
||||||
v= View
|
v = View
|
||||||
|
|
||||||
print(v)
|
|
||||||
|
|
||||||
output = open_uri(config['obi']['outputURI'],
|
output = open_uri(config['obi']['outputURI'],
|
||||||
input=False,
|
input=False,
|
||||||
newviewtype=v)
|
newviewtype=v)
|
||||||
|
|
||||||
print(input)
|
#print(input)
|
||||||
print(output)
|
#print(output)
|
||||||
|
|
||||||
sys.exit()
|
pb = ProgressBar(1000000, config, seconde=5) # TODO should be number of records in file
|
||||||
|
|
||||||
# pb = ProgressBar(1000000, config, seconde=5) # TODO should be number of records in file
|
iseq = input[1]
|
||||||
#
|
|
||||||
# inputs = uopen(config['import']['filename'])
|
get_quality = False
|
||||||
#
|
NUC_SEQS_view = False
|
||||||
# # Create or open DMS
|
if isinstance(output[1], View) :
|
||||||
# d = DMS.open_or_new(config['obi']['defaultdms'])
|
view = output[1]
|
||||||
#
|
if output[2] == View_NUC_SEQS :
|
||||||
# get_quality = False
|
NUC_SEQS_view = True
|
||||||
# NUC_SEQS_view = False
|
if "QUALITY" in view : # TODO
|
||||||
# if config['import']['seqinformat']=='fasta':
|
get_quality = True
|
||||||
# get_quality = False
|
else:
|
||||||
# NUC_SEQS_view = True
|
raise NotImplementedError()
|
||||||
# iseq = fastaIterator(inputs, skip=config['import']['skip'])
|
|
||||||
# view = View_NUC_SEQS.new(d, config['import']['destview'], quality=get_quality)
|
# Save basic columns in variables for optimization
|
||||||
# elif config['import']['seqinformat']=='fastq':
|
if NUC_SEQS_view :
|
||||||
# get_quality = True
|
id_col = view[b"ID"]
|
||||||
# NUC_SEQS_view = True
|
def_col = view[b"DEFINITION"]
|
||||||
# iseq = fastqIterator(inputs, skip=config['import']['skip'])
|
seq_col = view[b"NUC_SEQ"]
|
||||||
# view = View_NUC_SEQS.new(d, config['import']['destview'], quality=get_quality)
|
if get_quality :
|
||||||
# else:
|
qual_col = view[b"QUALITY"]
|
||||||
# raise RuntimeError('File format not handled')
|
|
||||||
#
|
dcols = {}
|
||||||
# # Save basic columns in variables for optimization
|
|
||||||
# if NUC_SEQS_view :
|
i = 0
|
||||||
# id_col = view["ID"]
|
for seq in iseq :
|
||||||
# def_col = view["DEFINITION"]
|
|
||||||
# seq_col = view["NUC_SEQ"]
|
pb(i)
|
||||||
# if get_quality :
|
|
||||||
# qual_col = view["QUALITY"]
|
if NUC_SEQS_view :
|
||||||
#
|
id_col[i] = seq.id
|
||||||
# dcols = {}
|
def_col[i] = seq.definition
|
||||||
#
|
seq_col[i] = seq.seq
|
||||||
# i = 0
|
|
||||||
# for seq in iseq :
|
if get_quality :
|
||||||
# if i == config['import']['only'] :
|
qual_col[i] = seq.quality
|
||||||
# break
|
|
||||||
# else :
|
for tag in seq :
|
||||||
# pb(i)
|
|
||||||
# if NUC_SEQS_view :
|
if tag != b"ID" and tag != b"DEFINITION" and tag != b"NUC_SEQ" and tag != b"QUALITY" : # TODO hmmm...
|
||||||
# id_col[i] = seq['id']
|
|
||||||
# def_col[i] = seq['definition']
|
value = seq[tag]
|
||||||
# seq_col[i] = seq['sequence']
|
|
||||||
# if get_quality :
|
# Check NA value
|
||||||
# qual_col[i] = seq['quality']
|
if value == config['obi']['nastring'] :
|
||||||
#
|
value = None
|
||||||
# for tag in seq['tags'] :
|
|
||||||
#
|
if tag not in dcols :
|
||||||
# value = seq['tags'][tag]
|
|
||||||
#
|
value_type = type(value)
|
||||||
# # Check NA value
|
nb_elts = 1
|
||||||
# if value == config['import']['NA'] :
|
value_obitype = OBI_VOID
|
||||||
# value = None
|
|
||||||
#
|
if value_type == dict or value_type == list :
|
||||||
# if tag not in dcols :
|
nb_elts = len(value)
|
||||||
#
|
elt_names = list(value)
|
||||||
# value_type = type(value)
|
else :
|
||||||
# nb_elts = 1
|
nb_elts = 1
|
||||||
# value_obitype = OBI_VOID
|
elt_names = None
|
||||||
#
|
|
||||||
# if value_type == dict or value_type == list :
|
value_obitype = get_obitype(value)
|
||||||
# nb_elts = len(value)
|
|
||||||
# elt_names = list(value)
|
if value_obitype != OBI_VOID :
|
||||||
# else :
|
dcols[tag] = (Column.new_column(view, tag, value_obitype, nb_elements_per_line=nb_elts, elements_names=elt_names), value_obitype)
|
||||||
# nb_elts = 1
|
|
||||||
# elt_names = None
|
# Fill value
|
||||||
#
|
dcols[tag][0][i] = value
|
||||||
# value_obitype = get_obitype(value)
|
|
||||||
#
|
# TODO else log error?
|
||||||
# if value_obitype != OBI_VOID :
|
|
||||||
# dcols[tag] = (Column.new_column(view, tag, value_obitype, nb_elements_per_line=nb_elts, elements_names=elt_names), value_obitype)
|
else :
|
||||||
#
|
|
||||||
# # Fill value
|
rewrite = False
|
||||||
# dcols[tag][0][i] = value
|
|
||||||
#
|
# Check type adequation
|
||||||
# # TODO else log error?
|
old_type = dcols[tag][1]
|
||||||
#
|
new_type = OBI_VOID
|
||||||
# else :
|
new_type = update_obitype(old_type, value)
|
||||||
#
|
if old_type != new_type :
|
||||||
# rewrite = False
|
rewrite = True
|
||||||
#
|
|
||||||
# # Check type adequation
|
try:
|
||||||
# old_type = dcols[tag][1]
|
# Fill value
|
||||||
# new_type = OBI_VOID
|
dcols[tag][0][i] = value
|
||||||
# new_type = update_obitype(old_type, value)
|
|
||||||
# if old_type != new_type :
|
except IndexError :
|
||||||
# rewrite = True
|
|
||||||
#
|
value_type = type(value)
|
||||||
# try:
|
old_column = dcols[tag][0]
|
||||||
# # Fill value
|
old_nb_elements_per_line = old_column.nb_elements_per_line
|
||||||
# dcols[tag][0][i] = value
|
new_nb_elements_per_line = 0
|
||||||
#
|
old_elements_names = old_column.elements_names
|
||||||
# except IndexError :
|
new_elements_names = None
|
||||||
#
|
|
||||||
# value_type = type(value)
|
#####################################################################
|
||||||
# old_column = dcols[tag][0]
|
|
||||||
# old_nb_elements_per_line = old_column.nb_elements_per_line
|
# Check the length and keys of column lines if needed
|
||||||
# new_nb_elements_per_line = 0
|
if value_type == dict : # Check dictionary keys
|
||||||
# old_elements_names = old_column.elements_names
|
for k in value :
|
||||||
# new_elements_names = None
|
if k not in old_elements_names :
|
||||||
#
|
new_elements_names = list(set(old_elements_names+[tobytes(k) for k in value]))
|
||||||
# #####################################################################
|
rewrite = True
|
||||||
#
|
break
|
||||||
# # Check the length and keys of column lines if needed
|
|
||||||
# if value_type == dict : # Check dictionary keys
|
elif value_type == list or value_type == tuple : # Check vector length
|
||||||
# for k in value :
|
if old_nb_elements_per_line < len(value) :
|
||||||
# if k not in old_elements_names :
|
new_nb_elements_per_line = len(value)
|
||||||
# new_elements_names = list(set(old_elements_names+[tobytes(k) for k in value]))
|
rewrite = True
|
||||||
# rewrite = True
|
|
||||||
# break
|
#####################################################################
|
||||||
#
|
|
||||||
# elif value_type == list or value_type == tuple : # Check vector length
|
if rewrite :
|
||||||
# if old_nb_elements_per_line < len(value) :
|
if new_nb_elements_per_line == 0 and new_elements_names is not None :
|
||||||
# new_nb_elements_per_line = len(value)
|
new_nb_elements_per_line = len(new_elements_names)
|
||||||
# rewrite = True
|
|
||||||
#
|
# Reset obierrno
|
||||||
# #####################################################################
|
obi_errno = 0
|
||||||
#
|
|
||||||
# if rewrite :
|
dcols[tag] = (view.rewrite_column_with_diff_attributes(old_column.name,
|
||||||
# if new_nb_elements_per_line == 0 and new_elements_names is not None :
|
new_data_type=new_type,
|
||||||
# new_nb_elements_per_line = len(new_elements_names)
|
new_nb_elements_per_line=new_nb_elements_per_line,
|
||||||
#
|
new_elements_names=new_elements_names),
|
||||||
# # Reset obierrno
|
value_obitype)
|
||||||
# obi_errno = 0
|
|
||||||
#
|
# Update the dictionary:
|
||||||
# dcols[tag] = (view.rewrite_column_with_diff_attributes(old_column.name,
|
for t in dcols :
|
||||||
# new_data_type=new_type,
|
dcols[t] = (view[t], dcols[t][1])
|
||||||
# new_nb_elements_per_line=new_nb_elements_per_line,
|
|
||||||
# new_elements_names=new_elements_names),
|
# Fill value
|
||||||
# value_obitype)
|
dcols[tag][0][i] = value
|
||||||
#
|
|
||||||
# # Update the dictionary:
|
i+=1
|
||||||
# for t in dcols :
|
|
||||||
# dcols[t] = (view[t], dcols[t][1])
|
print("\n")
|
||||||
#
|
print(view.__repr__())
|
||||||
# # Fill value
|
|
||||||
# dcols[tag][0][i] = value
|
input[0].close() # TODO
|
||||||
#
|
output[0].close()
|
||||||
# i+=1
|
|
||||||
#
|
|
||||||
# print("\n")
|
|
||||||
# print(view.__repr__())
|
|
||||||
#
|
|
||||||
# d.close()
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user