obi import: now properly uses macros for column names

This commit is contained in:
Celine Mercier
2019-03-15 11:39:21 +01:00
parent 24a63f8732
commit 2a4f1b8feb

View File

@ -16,6 +16,14 @@ from obitools3.utils cimport tobytes, \
get_obitype, \ get_obitype, \
update_obitype update_obitype
from obitools3.dms.capi.obiview cimport VIEW_TYPE_NUC_SEQS, \
NUC_SEQUENCE_COLUMN, \
ID_COLUMN, \
DEFINITION_COLUMN, \
QUALITY_COLUMN, \
COUNT_COLUMN, \
TAXID_COLUMN
from obitools3.dms.capi.obitypes cimport obitype_t, \ from obitools3.dms.capi.obitypes cimport obitype_t, \
OBI_VOID, \ OBI_VOID, \
OBI_QUAL OBI_QUAL
@ -99,7 +107,7 @@ def run(config):
entry_count = input[4] entry_count = input[4]
logger("info", "Importing %d entries", entry_count) logger("info", "Importing %d entries", entry_count)
# TODO a bit dirty # TODO a bit dirty?
if input[2]==Nuc_Seq: if input[2]==Nuc_Seq:
v = View_NUC_SEQS v = View_NUC_SEQS
else: else:
@ -136,9 +144,9 @@ def run(config):
# Save basic columns in variables for optimization # Save basic columns in variables for optimization
if NUC_SEQS_view : if NUC_SEQS_view :
id_col = view[b"ID"] # TODO use macros or globals for column names id_col = view[ID_COLUMN]
def_col = view[b"DEFINITION"] def_col = view[DEFINITION_COLUMN]
seq_col = view[b"NUC_SEQ"] seq_col = view[NUC_SEQUENCE_COLUMN]
dcols = {} dcols = {}
@ -153,21 +161,23 @@ def run(config):
seq_col[i] = entry.seq seq_col[i] = entry.seq
# Check if there is a sequencing quality associated by checking the first entry # TODO haven't found a more robust solution yet # Check if there is a sequencing quality associated by checking the first entry # TODO haven't found a more robust solution yet
if i == 0: if i == 0:
get_quality = b"QUALITY" in entry get_quality = QUALITY_COLUMN in entry
if get_quality: if get_quality:
Column.new_column(view, b"QUALITY", OBI_QUAL) Column.new_column(view, QUALITY_COLUMN, OBI_QUAL)
qual_col = view[b"QUALITY"] qual_col = view[QUALITY_COLUMN]
if get_quality: if get_quality:
qual_col[i] = entry.quality qual_col[i] = entry.quality
for tag in entry : for tag in entry :
if tag != b"ID" and tag != b"DEFINITION" and tag != b"NUC_SEQ" and tag != b"QUALITY" : # TODO hmmm... if tag != ID_COLUMN and tag != DEFINITION_COLUMN and tag != NUC_SEQUENCE_COLUMN and tag != QUALITY_COLUMN : # TODO dirty
value = entry[tag] value = entry[tag]
if tag == b"taxid": if tag == b"taxid":
tag = b"TAXID" tag = TAXID_COLUMN
if tag == b"count":
tag = COUNT_COLUMN
if tag not in dcols : if tag not in dcols :
value_type = type(value) value_type = type(value)
@ -253,7 +263,7 @@ def run(config):
# Fill value # Fill value
dcols[tag][0][i] = value dcols[tag][0][i] = value
i+=1 i+=1 # TODO Not if None sequence
pb(i, force=True) pb(i, force=True)
print("", file=sys.stderr) print("", file=sys.stderr)