Cython API: obi import can now import ngsfilter files and tabular files
This commit is contained in:
@ -1,13 +1,8 @@
|
||||
#cython: language_level=3
|
||||
|
||||
# TODO cimport generate errors with argument numbers, but without them some variables can't be declared
|
||||
|
||||
import sys
|
||||
|
||||
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.files.universalopener cimport uopen
|
||||
from obitools3.parsers.fasta import fastaIterator
|
||||
from obitools3.parsers.fastq import fastqIterator
|
||||
from obitools3.dms.view.view cimport View
|
||||
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||
from obitools3.dms.column.column cimport Column
|
||||
@ -24,7 +19,7 @@ from obitools3.dms.capi.obitypes cimport obitype_t, \
|
||||
|
||||
from obitools3.dms.capi.obierrno cimport obi_errno
|
||||
|
||||
from obitools3.apps.optiongroups import addSequenceInputOption, addMinimalOutputOption
|
||||
from obitools3.apps.optiongroups import addSequenceInputOption, addTabularInputOption, addMinimalOutputOption
|
||||
|
||||
from obitools3.uri.decode import open_uri
|
||||
|
||||
@ -45,6 +40,7 @@ default_config = { 'destview' : None,
|
||||
def addOptions(parser):
|
||||
|
||||
addSequenceInputOption(parser)
|
||||
addTabularInputOption(parser)
|
||||
addMinimalOutputOption(parser)
|
||||
# addTaxdumpInputOption(parser)
|
||||
|
||||
@ -63,8 +59,8 @@ def run(config):
|
||||
cdef int nb_elts
|
||||
cdef object d
|
||||
cdef View view
|
||||
cdef object iseq
|
||||
cdef object seq
|
||||
cdef object entries
|
||||
cdef object entry
|
||||
cdef Column id_col
|
||||
cdef Column def_col
|
||||
cdef Column seq_col
|
||||
@ -108,9 +104,9 @@ def run(config):
|
||||
|
||||
pb = ProgressBar(1000000, config, seconde=5) # TODO should be number of records in file
|
||||
|
||||
iseq = input[1]
|
||||
entries = input[1]
|
||||
|
||||
NA_value = config['obi']['nastring']
|
||||
NA_value = tobytes(config['obi']['nastring']) # TODO
|
||||
|
||||
NUC_SEQS_view = False
|
||||
if isinstance(output[1], View) :
|
||||
@ -121,39 +117,39 @@ def run(config):
|
||||
raise NotImplementedError()
|
||||
|
||||
# Save basic columns in variables for optimization
|
||||
if NUC_SEQS_view :
|
||||
id_col = view[b"ID"]
|
||||
if NUC_SEQS_view :
|
||||
id_col = view[b"ID"] # TODO use macros or globals for column names
|
||||
def_col = view[b"DEFINITION"]
|
||||
seq_col = view[b"NUC_SEQ"]
|
||||
|
||||
dcols = {}
|
||||
|
||||
i = 0
|
||||
for seq in iseq :
|
||||
|
||||
for entry in entries :
|
||||
|
||||
pb(i)
|
||||
|
||||
if NUC_SEQS_view :
|
||||
|
||||
# Check if there is a sequencing quality associated # TODO
|
||||
if i == 0:
|
||||
get_quality = b"QUALITY" in seq
|
||||
get_quality = b"QUALITY" in entry
|
||||
if get_quality:
|
||||
Column.new_column(view, b"QUALITY", OBI_QUAL)
|
||||
qual_col = view[b"QUALITY"]
|
||||
|
||||
id_col[i] = seq.id
|
||||
def_col[i] = seq.definition
|
||||
seq_col[i] = seq.seq
|
||||
id_col[i] = entry.id
|
||||
def_col[i] = entry.definition
|
||||
seq_col[i] = entry.seq
|
||||
|
||||
if get_quality :
|
||||
qual_col[i] = seq.quality
|
||||
qual_col[i] = entry.quality
|
||||
|
||||
for tag in seq :
|
||||
for tag in entry :
|
||||
|
||||
if tag != b"ID" and tag != b"DEFINITION" and tag != b"NUC_SEQ" and tag != b"QUALITY" : # TODO hmmm...
|
||||
|
||||
value = seq[tag]
|
||||
value = entry[tag]
|
||||
|
||||
# Check NA value
|
||||
if value == NA_value :
|
||||
|
Reference in New Issue
Block a user