obi import with new input/ouput API

2017-08-20 17:58:36 +02:00
parent 38029b1f77
commit 6a2759eee6
1 changed files with 149 additions and 157 deletions
--- a/python/obitools3/commands/import.pyx
+++ b/python/obitools3/commands/import.pyx
@ -8,12 +8,10 @@ from obitools3.apps.progress cimport ProgressBar  # @UnresolvedImport
 from obitools3.files.universalopener cimport uopen
 from obitools3.parsers.fasta import fastaIterator
 from obitools3.parsers.fastq import fastqIterator
 from obitools3.dms.dms import DMS       # TODO cimport doesn't work
 from obitools3.dms.view.view cimport View
-from obitools3.dms.view.typed_view.view_NUC_SEQS import View_NUC_SEQS    # TODO cimport doesn't work
+from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
 from obitools3.dms.column.column cimport Column
-
+from obitools3.dms.obiseq cimport Nuc_Seq
 from obitools3.dms.obiseq import Nuc_Seq
 from obitools3.utils cimport tobytes, \
                             get_obitype, \
@ -25,6 +23,7 @@ from obitools3.dms.capi.obitypes cimport obitype_t, \
 from obitools3.dms.capi.obierrno cimport obi_errno
 from obitools3.apps.optiongroups import addSequenceInputOption, addMinimalOutputOption
 from obitools3.uri.decode import open_uri
 from obitools3.apps.config import logger
@ -50,6 +49,8 @@ def addOptions(parser):
 def run(config):
    cdef   tuple       input
    cdef   tuple       output 
    cdef   int         i
    cdef   type        value_type
    cdef   obitype_t   value_obitype
@ -62,7 +63,6 @@ def run(config):
    cdef   View        view
    cdef   object      iseq
    cdef   object      seq
    cdef   object      inputs
    cdef   Column      id_col
    cdef   Column      def_col
    cdef   Column      seq_col
@ -71,7 +71,7 @@ def run(config):
    cdef   bint        rewrite
    cdef   dict        dcols
    cdef   int         skipping
-    cdef   str         tag
+    cdef   bytes       tag
    cdef   object      value
    cdef   list        elt_names
    cdef   int         old_nb_elements_per_line
@ -84,165 +84,157 @@ def run(config):
    logger("info","obi import : imports file into an DMS")
-    inputs = open_uri(config['obi']['inputURI'])
+    input = open_uri(config['obi']['inputURI'])
-    if inputs[2]==Nuc_Seq:
+    if input[2]==Nuc_Seq:
        v = View_NUC_SEQS
    else:
-        v= View 
+        v = View 
    print(v)
    output = open_uri(config['obi']['outputURI'],
                      input=False,
                      newviewtype=v)
-    print(input)
+    #print(input)
-    print(output)
+    #print(output)
-    sys.exit()
+    pb = ProgressBar(1000000, config, seconde=5)   # TODO should be number of records in file
-#     pb = ProgressBar(1000000, config, seconde=5)   # TODO should be number of records in file
+    iseq = input[1]
-#        
+    
-#     inputs = uopen(config['import']['filename'])
+    get_quality = False
-# 
+    NUC_SEQS_view = False
-#     # Create or open DMS
+    if isinstance(output[1], View) :
-#     d = DMS.open_or_new(config['obi']['defaultdms'])
+        view = output[1]
-#     
+        if output[2] == View_NUC_SEQS :
-#     get_quality = False
+            NUC_SEQS_view = True
-#     NUC_SEQS_view = False
+            if "QUALITY" in view :      # TODO
-#     if config['import']['seqinformat']=='fasta':
+                get_quality = True
-#         get_quality = False
+    else: 
-#         NUC_SEQS_view = True
+        raise NotImplementedError()
-#         iseq = fastaIterator(inputs, skip=config['import']['skip'])
+    
-#         view = View_NUC_SEQS.new(d, config['import']['destview'], quality=get_quality)
+    # Save basic columns in variables for optimization
-#     elif config['import']['seqinformat']=='fastq':
+    if NUC_SEQS_view :
-#         get_quality = True
+        id_col = view[b"ID"]
-#         NUC_SEQS_view = True
+        def_col = view[b"DEFINITION"]
-#         iseq = fastqIterator(inputs, skip=config['import']['skip'])
+        seq_col = view[b"NUC_SEQ"]
-#         view = View_NUC_SEQS.new(d, config['import']['destview'], quality=get_quality)
+        if get_quality :
-#     else:
+            qual_col = view[b"QUALITY"]
-#         raise RuntimeError('File format not handled')
+     
-#         
+    dcols = {}
-#     # Save basic columns in variables for optimization
+     
-#     if NUC_SEQS_view :
+    i = 0
-#         id_col = view["ID"]
+    for seq in iseq :
-#         def_col = view["DEFINITION"]
+        
-#         seq_col = view["NUC_SEQ"]
+        pb(i)
-#         if get_quality :
+        
-#             qual_col = view["QUALITY"]
+        if NUC_SEQS_view :
-#     
+            id_col[i] = seq.id
-#     dcols = {}
+            def_col[i] = seq.definition
-#     
+            seq_col[i] = seq.seq
-#     i = 0
+            
-#     for seq in iseq :
+            if get_quality :
-#         if i == config['import']['only'] :
+                qual_col[i] = seq.quality
-#             break
+         
-#         else :
+        for tag in seq :
-#             pb(i)
+            
-#             if NUC_SEQS_view :
+            if tag != b"ID" and tag != b"DEFINITION" and tag != b"NUC_SEQ" and tag != b"QUALITY" :  # TODO hmmm... 
-#                 id_col[i] = seq['id']
+                                
-#                 def_col[i] = seq['definition']
+                value = seq[tag]
-#                 seq_col[i] = seq['sequence']
+                 
-#                 if get_quality :
+                # Check NA value
-#                     qual_col[i] = seq['quality']
+                if value == config['obi']['nastring'] :
-#             
+                    value = None
-#             for tag in seq['tags'] :
+                 
-#                             
+                if tag not in dcols :
-#                 value = seq['tags'][tag]
+                     
-#                 
+                    value_type = type(value)
-#                 # Check NA value
+                    nb_elts = 1
-#                 if value == config['import']['NA'] :
+                    value_obitype = OBI_VOID
-#                     value = None
+                     
-#                 
+                    if value_type == dict or value_type == list :
-#                 if tag not in dcols :
+                        nb_elts = len(value)
-#                     
+                        elt_names = list(value)
-#                     value_type = type(value)
+                    else :
-#                     nb_elts = 1
+                        nb_elts = 1
-#                     value_obitype = OBI_VOID
+                        elt_names = None
-#                     
+                     
-#                     if value_type == dict or value_type == list :
+                    value_obitype = get_obitype(value)
-#                         nb_elts = len(value)
+                     
-#                         elt_names = list(value)
+                    if value_obitype != OBI_VOID :
-#                     else :
+                        dcols[tag] = (Column.new_column(view, tag, value_obitype, nb_elements_per_line=nb_elts, elements_names=elt_names), value_obitype)
-#                         nb_elts = 1
+                                                 
-#                         elt_names = None
+                        # Fill value
-#                     
+                        dcols[tag][0][i] = value
-#                     value_obitype = get_obitype(value)
+                     
-#                     
+                    # TODO else log error?
-#                     if value_obitype != OBI_VOID :
+ 
-#                         dcols[tag] = (Column.new_column(view, tag, value_obitype, nb_elements_per_line=nb_elts, elements_names=elt_names), value_obitype)
+                else :
-#                                                 
+         
-#                         # Fill value
+                    rewrite = False
-#                         dcols[tag][0][i] = value
+ 
-#                     
+                    # Check type adequation
-#                     # TODO else log error?
+                    old_type = dcols[tag][1]
-# 
+                    new_type = OBI_VOID
-#                 else :
+                    new_type = update_obitype(old_type, value)
-#         
+                    if old_type != new_type :
-#                     rewrite = False
+                        rewrite = True
-# 
+ 
-#                     # Check type adequation
+                    try:
-#                     old_type = dcols[tag][1]
+                        # Fill value
-#                     new_type = OBI_VOID
+                        dcols[tag][0][i] = value
-#                     new_type = update_obitype(old_type, value)
+                     
-#                     if old_type != new_type :
+                    except IndexError :
-#                         rewrite = True
+                                                 
-# 
+                        value_type = type(value)
-#                     try:
+                        old_column = dcols[tag][0]
-#                         # Fill value
+                        old_nb_elements_per_line = old_column.nb_elements_per_line
-#                         dcols[tag][0][i] = value
+                        new_nb_elements_per_line = 0
-#                     
+                        old_elements_names = old_column.elements_names
-#                     except IndexError :
+                        new_elements_names = None
-#                                                 
+     
-#                         value_type = type(value)
+                        #####################################################################
-#                         old_column = dcols[tag][0]
+                         
-#                         old_nb_elements_per_line = old_column.nb_elements_per_line
+                        # Check the length and keys of column lines if needed
-#                         new_nb_elements_per_line = 0
+                        if value_type == dict :    # Check dictionary keys
-#                         old_elements_names = old_column.elements_names
+                            for k in value :
-#                         new_elements_names = None
+                                if k not in old_elements_names :
-#     
+                                    new_elements_names = list(set(old_elements_names+[tobytes(k) for k in value]))
-#                         #####################################################################
+                                    rewrite = True
-#                         
+                                    break
-#                         # Check the length and keys of column lines if needed
+                         
-#                         if value_type == dict :    # Check dictionary keys
+                        elif value_type == list or value_type == tuple :  # Check vector length
-#                             for k in value :
+                            if old_nb_elements_per_line < len(value) :
-#                                 if k not in old_elements_names :
+                                new_nb_elements_per_line = len(value)
-#                                     new_elements_names = list(set(old_elements_names+[tobytes(k) for k in value]))
+                                rewrite = True
-#                                     rewrite = True
+                         
-#                                     break
+                        #####################################################################
-#                         
+                         
-#                         elif value_type == list or value_type == tuple :  # Check vector length
+                        if rewrite :
-#                             if old_nb_elements_per_line < len(value) :
+                            if new_nb_elements_per_line == 0 and new_elements_names is not None :
-#                                 new_nb_elements_per_line = len(value)
+                                new_nb_elements_per_line = len(new_elements_names)
-#                                 rewrite = True
+                             
-#                         
+                            # Reset obierrno 
-#                         #####################################################################
+                            obi_errno = 0
-#                         
+ 
-#                         if rewrite :
+                            dcols[tag] = (view.rewrite_column_with_diff_attributes(old_column.name, 
-#                             if new_nb_elements_per_line == 0 and new_elements_names is not None :
+                                                                                   new_data_type=new_type, 
-#                                 new_nb_elements_per_line = len(new_elements_names)
+                                                                                   new_nb_elements_per_line=new_nb_elements_per_line,
-#                             
+                                                                                   new_elements_names=new_elements_names), 
-#                             # Reset obierrno 
+                                          value_obitype)
-#                             obi_errno = 0
+                             
-# 
+                            # Update the dictionary:
-#                             dcols[tag] = (view.rewrite_column_with_diff_attributes(old_column.name, 
+                            for t in dcols :
-#                                                                                    new_data_type=new_type, 
+                                dcols[t] = (view[t], dcols[t][1])
-#                                                                                    new_nb_elements_per_line=new_nb_elements_per_line,
+                             
-#                                                                                    new_elements_names=new_elements_names), 
+                            # Fill value
-#                                           value_obitype)
+                            dcols[tag][0][i] = value
-#                             
+                                    
-#                             # Update the dictionary:
+        i+=1
-#                             for t in dcols :
+ 
-#                                 dcols[t] = (view[t], dcols[t][1])
+    print("\n")
-#                             
+    print(view.__repr__())
-#                             # Fill value
+  
-#                             dcols[tag][0][i] = value
+    input[0].close()    # TODO
-#                                     
+    output[0].close()
 #             i+=1
 # 
 #     print("\n")
 #     print(view.__repr__())
 #  
 #     d.close()