Compare commits
6 Commits
v3.0.0-bet
...
v3.0.0-bet
Author | SHA1 | Date | |
---|---|---|---|
0b4a234671 | |||
d32cfdcce5 | |||
219c0d6fdc | |||
dc9f897917 | |||
bb72682f7d | |||
52920c3c71 |
@ -86,7 +86,24 @@ def run(config):
|
|||||||
if not remove_rev_qual:
|
if not remove_rev_qual:
|
||||||
Column.new_column(o_view, REVERSE_SEQUENCE_COLUMN, OBI_SEQ)
|
Column.new_column(o_view, REVERSE_SEQUENCE_COLUMN, OBI_SEQ)
|
||||||
Column.new_column(o_view, REVERSE_QUALITY_COLUMN, OBI_QUAL, associated_column_name=REVERSE_SEQUENCE_COLUMN, associated_column_version=o_view[REVERSE_SEQUENCE_COLUMN].version)
|
Column.new_column(o_view, REVERSE_QUALITY_COLUMN, OBI_QUAL, associated_column_name=REVERSE_SEQUENCE_COLUMN, associated_column_version=o_view[REVERSE_SEQUENCE_COLUMN].version)
|
||||||
|
|
||||||
|
# Initialize multiple elements columns
|
||||||
|
dict_cols = {}
|
||||||
|
for v in iview_list:
|
||||||
|
for coln in v.keys():
|
||||||
|
if v[coln].nb_elements_per_line > 1:
|
||||||
|
if coln not in dict_cols:
|
||||||
|
dict_cols[coln] = {}
|
||||||
|
dict_cols[coln]['eltnames'] = set(v[coln].elements_names)
|
||||||
|
dict_cols[coln]['nbelts'] = v[coln].nb_elements_per_line
|
||||||
|
dict_cols[coln]['obitype'] = v[coln].data_type_int
|
||||||
|
else:
|
||||||
|
dict_cols[coln]['eltnames'] = set(v[coln].elements_names + list(dict_cols[coln]['eltnames']))
|
||||||
|
dict_cols[coln]['nbelts'] = len(dict_cols[coln]['eltnames'])
|
||||||
|
for coln in dict_cols:
|
||||||
|
Column.new_column(o_view, coln, dict_cols[coln]['obitype'],
|
||||||
|
nb_elements_per_line=dict_cols[coln]['nbelts'], elements_names=list(dict_cols[coln]['eltnames']))
|
||||||
|
|
||||||
# Initialize the progress bar
|
# Initialize the progress bar
|
||||||
pb = ProgressBar(total_len, config, seconde=5)
|
pb = ProgressBar(total_len, config, seconde=5)
|
||||||
|
|
||||||
|
@ -11,6 +11,7 @@ from obitools3.dms.column.column cimport Column
|
|||||||
from obitools3.dms.obiseq cimport Nuc_Seq
|
from obitools3.dms.obiseq cimport Nuc_Seq
|
||||||
from obitools3.dms import DMS
|
from obitools3.dms import DMS
|
||||||
from obitools3.dms.taxo.taxo cimport Taxonomy
|
from obitools3.dms.taxo.taxo cimport Taxonomy
|
||||||
|
from obitools3.files.uncompress cimport CompressedFile
|
||||||
|
|
||||||
|
|
||||||
from obitools3.utils cimport tobytes, \
|
from obitools3.utils cimport tobytes, \
|
||||||
@ -65,6 +66,14 @@ def addOptions(parser):
|
|||||||
addTaxdumpInputOption(parser)
|
addTaxdumpInputOption(parser)
|
||||||
addMinimalOutputOption(parser)
|
addMinimalOutputOption(parser)
|
||||||
|
|
||||||
|
group = parser.add_argument_group('obi import specific options')
|
||||||
|
|
||||||
|
group.add_argument('--preread',
|
||||||
|
action="store_true", dest="import:preread",
|
||||||
|
default=False,
|
||||||
|
help="Do a first readthrough of the dataset if it contains huge dictionaries (more than 100 keys) for "
|
||||||
|
"a much faster import.")
|
||||||
|
|
||||||
|
|
||||||
def run(config):
|
def run(config):
|
||||||
|
|
||||||
@ -169,8 +178,6 @@ def run(config):
|
|||||||
|
|
||||||
if entry_count >= 0:
|
if entry_count >= 0:
|
||||||
pb = ProgressBar(entry_count, config, seconde=5)
|
pb = ProgressBar(entry_count, config, seconde=5)
|
||||||
|
|
||||||
entries = input[1]
|
|
||||||
|
|
||||||
NUC_SEQS_view = False
|
NUC_SEQS_view = False
|
||||||
if isinstance(output[1], View) :
|
if isinstance(output[1], View) :
|
||||||
@ -188,6 +195,60 @@ def run(config):
|
|||||||
|
|
||||||
dcols = {}
|
dcols = {}
|
||||||
|
|
||||||
|
# First read through the entries to prepare columns with dictionaries as they are very time-expensive to rewrite
|
||||||
|
if config['import']['preread']:
|
||||||
|
logger("info", "First readthrough...")
|
||||||
|
entries = input[1]
|
||||||
|
i = 0
|
||||||
|
dict_dict = {}
|
||||||
|
for entry in entries:
|
||||||
|
PyErr_CheckSignals()
|
||||||
|
|
||||||
|
if entry is None: # error or exception handled at lower level, not raised because Python generators can't resume after any exception is raised
|
||||||
|
if config['obi']['skiperror']:
|
||||||
|
i-=1
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
raise Exception("obi import error in first readthrough")
|
||||||
|
|
||||||
|
if pb is not None:
|
||||||
|
pb(i)
|
||||||
|
elif not i%50000:
|
||||||
|
logger("info", "Read %d entries", i)
|
||||||
|
|
||||||
|
for tag in entry :
|
||||||
|
if type(entry[tag]) == dict :
|
||||||
|
if tag in dict_dict:
|
||||||
|
dict_dict[tag][0].update(entry[tag].keys())
|
||||||
|
else:
|
||||||
|
dict_dict[tag] = [set(entry[tag].keys()), get_obitype(entry[tag])]
|
||||||
|
i+=1
|
||||||
|
|
||||||
|
if pb is not None:
|
||||||
|
pb(i, force=True)
|
||||||
|
print("", file=sys.stderr)
|
||||||
|
|
||||||
|
for tag in dict_dict:
|
||||||
|
dcols[tag] = (Column.new_column(view, tag, dict_dict[tag][1], \
|
||||||
|
nb_elements_per_line=len(dict_dict[tag][0]), \
|
||||||
|
elements_names=list(dict_dict[tag][0])), \
|
||||||
|
value_obitype)
|
||||||
|
|
||||||
|
|
||||||
|
# Reinitialize the input
|
||||||
|
if isinstance(input[0], CompressedFile):
|
||||||
|
input_is_file = True
|
||||||
|
if entry_count >= 0:
|
||||||
|
pb = ProgressBar(entry_count, config, seconde=5)
|
||||||
|
try:
|
||||||
|
input[0].close()
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
input = open_uri(config['obi']['inputURI'], force_file=input_is_file)
|
||||||
|
if input is None:
|
||||||
|
raise Exception("Could not open input URI")
|
||||||
|
|
||||||
|
entries = input[1]
|
||||||
i = 0
|
i = 0
|
||||||
for entry in entries :
|
for entry in entries :
|
||||||
|
|
||||||
|
7
python/obitools3/uri/decode.pyx
Executable file → Normal file
7
python/obitools3/uri/decode.pyx
Executable file → Normal file
@ -171,7 +171,8 @@ Reads an URI and returns a tuple containing:
|
|||||||
def open_uri(uri,
|
def open_uri(uri,
|
||||||
bint input=True,
|
bint input=True,
|
||||||
type newviewtype=View,
|
type newviewtype=View,
|
||||||
dms_only=False):
|
dms_only=False,
|
||||||
|
force_file=False):
|
||||||
|
|
||||||
cdef bytes urib = tobytes(uri)
|
cdef bytes urib = tobytes(uri)
|
||||||
cdef bytes scheme
|
cdef bytes scheme
|
||||||
@ -195,9 +196,9 @@ def open_uri(uri,
|
|||||||
if 'obi' not in config:
|
if 'obi' not in config:
|
||||||
config['obi']={}
|
config['obi']={}
|
||||||
|
|
||||||
try:
|
if not force_file and "defaultdms" in config["obi"]:
|
||||||
default_dms=config["obi"]["defaultdms"]
|
default_dms=config["obi"]["defaultdms"]
|
||||||
except KeyError:
|
else:
|
||||||
default_dms=None
|
default_dms=None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
major = 3
|
major = 3
|
||||||
minor = 0
|
minor = 0
|
||||||
serial= '0-beta9'
|
serial= '0-beta11'
|
||||||
|
|
||||||
version ="%d.%02d.%s" % (major,minor,serial)
|
version ="%d.%02d.%s" % (major,minor,serial)
|
||||||
|
@ -100,35 +100,35 @@ int print_assignment_result(Obiview_p output_view, index_t line,
|
|||||||
static int create_output_columns(Obiview_p o_view)
|
static int create_output_columns(Obiview_p o_view)
|
||||||
{
|
{
|
||||||
// Score column
|
// Score column
|
||||||
if (obi_view_add_column(o_view, ECOTAG_SCORE_COLUMN_NAME, -1, NULL, OBI_FLOAT, 0, 1, NULL, false, false, false, NULL, NULL, -1, ECOTAG_SCORE_COLUMN_NAME, true) < 0)
|
if (obi_view_add_column(o_view, ECOTAG_SCORE_COLUMN_NAME, -1, NULL, OBI_FLOAT, 0, 1, NULL, false, false, false, NULL, NULL, -1, "{}", true) < 0)
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError creating the column for the score in ecotag");
|
obidebug(1, "\nError creating the column for the score in ecotag");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Assigned taxid column
|
// Assigned taxid column
|
||||||
if (obi_view_add_column(o_view, ECOTAG_TAXID_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, false, false, NULL, NULL, -1, ECOTAG_TAXID_COLUMN_NAME, true) < 0)
|
if (obi_view_add_column(o_view, ECOTAG_TAXID_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, false, false, NULL, NULL, -1, "{}", true) < 0)
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError creating the column for the assigned taxid in ecotag");
|
obidebug(1, "\nError creating the column for the assigned taxid in ecotag");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Assigned scientific name column
|
// Assigned scientific name column
|
||||||
if (obi_view_add_column(o_view, ECOTAG_NAME_COLUMN_NAME, -1, NULL, OBI_STR, 0, 1, NULL, false, false, false, NULL, NULL, -1, ECOTAG_NAME_COLUMN_NAME, true) < 0)
|
if (obi_view_add_column(o_view, ECOTAG_NAME_COLUMN_NAME, -1, NULL, OBI_STR, 0, 1, NULL, false, false, false, NULL, NULL, -1, "{}", true) < 0)
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError creating the column for the assigned scientific name in ecotag");
|
obidebug(1, "\nError creating the column for the assigned scientific name in ecotag");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Assignement status column
|
// Assignement status column
|
||||||
if (obi_view_add_column(o_view, ECOTAG_STATUS_COLUMN_NAME, -1, NULL, OBI_BOOL, 0, 1, NULL, false, false, false, NULL, NULL, -1, ECOTAG_STATUS_COLUMN_NAME, true) < 0)
|
if (obi_view_add_column(o_view, ECOTAG_STATUS_COLUMN_NAME, -1, NULL, OBI_BOOL, 0, 1, NULL, false, false, false, NULL, NULL, -1, "{}", true) < 0)
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError creating the column for the assignment status in ecotag");
|
obidebug(1, "\nError creating the column for the assignment status in ecotag");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Column for array of best match ids
|
// Column for array of best match ids
|
||||||
if (obi_view_add_column(o_view, ECOTAG_BEST_MATCH_IDS_COLUMN_NAME, -1, NULL, OBI_STR, 0, 1, NULL, false, true, false, NULL, NULL, -1, ECOTAG_BEST_MATCH_IDS_COLUMN_NAME, true) < 0)
|
if (obi_view_add_column(o_view, ECOTAG_BEST_MATCH_IDS_COLUMN_NAME, -1, NULL, OBI_STR, 0, 1, NULL, false, true, false, NULL, NULL, -1, "{}", true) < 0)
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError creating the column for the array of ids of the best match in ecotag");
|
obidebug(1, "\nError creating the column for the array of ids of the best match in ecotag");
|
||||||
return -1;
|
return -1;
|
||||||
|
Reference in New Issue
Block a user