diff --git a/python/obitools3/commands/sort.pyx b/python/obitools3/commands/sort.pyx new file mode 100644 index 0000000..eb6085d --- /dev/null +++ b/python/obitools3/commands/sort.pyx @@ -0,0 +1,114 @@ +#cython: language_level=3 + +from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport +from obitools3.dms import DMS +from obitools3.dms.view.view cimport View, Line_selection +from obitools3.uri.decode import open_uri +from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyInputOption, addMinimalOutputOption +from obitools3.dms.view import RollbackException +from functools import reduce +from obitools3.apps.config import logger +from obitools3.dms.capi.obitypes cimport OBI_BOOL, \ + OBI_CHAR, \ + OBI_FLOAT, \ + OBI_INT, \ + OBI_QUAL, \ + OBI_SEQ, \ + OBI_STR, \ + OBIBool_NA, \ + OBIChar_NA, \ + OBIFloat_NA, \ + OBIInt_NA + +import time + + +NULL_VALUE = {OBI_BOOL: OBIBool_NA, + OBI_CHAR: OBIChar_NA, + OBI_FLOAT: OBIFloat_NA, + OBI_INT: OBIInt_NA, + OBI_QUAL: [], + OBI_SEQ: b"", + OBI_STR: b""} + + +__title__="Sort view lines according to the value of a given attribute." + + +def addOptions(parser): + + addMinimalInputOption(parser) + addMinimalOutputOption(parser) + + group=parser.add_argument_group('obi sort specific options') + + group.add_argument('--key', '-k', + action="append", dest="sort:keys", + metavar='', + default=[], + type=str, + help="Attribute used to sort the sequence records.") + + group.add_argument('--reverse', '-r', + action="store_true", dest="sort:reverse", + default=False, + help="Sort in reverse order.") + + +def line_cmp(line, key, pb): + pb + if line[key] is None: + return NULL_VALUE[line.view[key].data_type_int] + else: + return line[key] + + +def run(config): + + DMS.obi_atexit() + + logger("info", "obi sort") + + # Open the input + input = open_uri(config['obi']['inputURI']) + if input is None: + raise Exception("Could not read input view") + i_view = input[1] + + # Read the name of the output view + uri = config['obi']['outputURI'].split('/') + if len(uri)==2: + # Check that input and output DMS are the same (predicate, to discuss) + if config['obi']['inputURI'].split('/')[0] != uri[0]: + raise Exception("Input and output DMS must be the same") + output_view_name = uri[1] + else: + output_view_name = uri[0] + + # Initialize the progress bar + pb = ProgressBar(len(i_view), config, seconde=5) # TODO + + keys = config['sort']['keys'] + + selection = Line_selection(i_view) + + for i in range(len(i_view)): # TODO special function? + selection.append(i) + + for k in keys: # TODO order? + selection.sort(key=lambda line_idx: line_cmp(i_view[line_idx], k, pb(line_idx)), reverse=config['sort']['reverse']) + + # Create output view with the sorted line selection + try: + o_view = selection.materialize(output_view_name, comments="obi sort: "+str(config['sort']['keys'])+"\n") + except Exception, e: + raise RollbackException("obi sort error, rollbacking view: "+str(e), o_view) + + # TODO DISCUSS if output URI to different DMS, copy view? + + print("\n") + print(repr(o_view)) + + input[0].close() + #output[0].close() +