obi import: 'taxid' columns are imported as 'TAXID' to fit view
predicates, and fixed taxdump import and DMS closing
This commit is contained in:
@ -70,6 +70,7 @@ def run(config):
|
||||
cdef Column def_col
|
||||
cdef Column seq_col
|
||||
cdef Column qual_col
|
||||
cdef Column taxid_col
|
||||
cdef Column old_column
|
||||
cdef bint rewrite
|
||||
cdef dict dcols
|
||||
@ -87,40 +88,37 @@ def run(config):
|
||||
|
||||
DMS.obi_atexit()
|
||||
|
||||
logger("info","obi import : imports file into an DMS")
|
||||
logger("info", "obi import : imports file into a DMS")
|
||||
|
||||
input = open_uri(config['obi']['inputURI'])
|
||||
if input is None:
|
||||
raise Exception("Could not open input URI")
|
||||
if not config['obi']['taxdump']: # TODO discuss
|
||||
input = open_uri(config['obi']['inputURI'])
|
||||
if input is None: # TODO check for bytes instead now?
|
||||
raise Exception("Could not open input URI")
|
||||
|
||||
# TODO read taxdump with URI?
|
||||
if 'taxdump' in config['obi']:
|
||||
taxo = Taxonomy.open_taxdump(input[0], config['obi']['taxdump'])
|
||||
# Read prefix (temporary fix)
|
||||
uri = config['obi']['outputURI'].split('/')
|
||||
idx = uri.index('taxonomy') + 1
|
||||
taxo.write(uri[idx])
|
||||
taxo.close()
|
||||
input[0].close()
|
||||
return
|
||||
|
||||
if input[2]==Nuc_Seq:
|
||||
v = View_NUC_SEQS
|
||||
# TODO uuuuh
|
||||
if input[2]==Nuc_Seq:
|
||||
v = View_NUC_SEQS
|
||||
else:
|
||||
v = View
|
||||
else:
|
||||
v = View
|
||||
v = None
|
||||
|
||||
output = open_uri(config['obi']['outputURI'],
|
||||
input=False,
|
||||
newviewtype=v)
|
||||
#quality=get_quality) # TODO
|
||||
|
||||
if output is None:
|
||||
raise Exception("Could not create output view")
|
||||
|
||||
#print("input:", input)
|
||||
#print("output:", output)
|
||||
# Read taxdump
|
||||
if config['obi']['taxdump']: # The input is a taxdump to import in a DMS
|
||||
taxo = Taxonomy.open_taxdump(output[0], config['obi']['inputURI'])
|
||||
taxo.write(output[1])
|
||||
taxo.close()
|
||||
output[0].close()
|
||||
return
|
||||
|
||||
pb = ProgressBar(1000000, config, seconde=5) # TODO should be number of records in file
|
||||
pb = ProgressBar(10000000, config, seconde=5) # TODO should be number of records in file
|
||||
|
||||
entries = input[1]
|
||||
|
||||
@ -168,6 +166,8 @@ def run(config):
|
||||
if tag != b"ID" and tag != b"DEFINITION" and tag != b"NUC_SEQ" and tag != b"QUALITY" : # TODO hmmm...
|
||||
|
||||
value = entry[tag]
|
||||
if tag == b"taxid":
|
||||
tag = b"TAXID"
|
||||
|
||||
# Check NA value
|
||||
if value == NA_value :
|
||||
@ -262,6 +262,12 @@ def run(config):
|
||||
print("\n")
|
||||
print(view.__repr__())
|
||||
|
||||
input[0].close() # TODO ?
|
||||
output[0].close()
|
||||
try:
|
||||
input[0].close()
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
output[0].close()
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
|
Reference in New Issue
Block a user