obi grep: now able to convert str to bytes in predicate expressions

2019-09-22 16:44:45 +02:00
parent 7b4046c288
commit ba9ba7aa60
1 changed files with 33 additions and 14 deletions
--- a/python/obitools3/commands/grep.pyx
+++ b/python/obitools3/commands/grep.pyx
@ -13,6 +13,7 @@ from functools import reduce
 import time
 import re
 import sys
 import ast
 from cpython.exc cimport PyErr_CheckSignals
@ -33,7 +34,7 @@ def addOptions(parser):
    group.add_argument("--predicate", "-p",
                       action="append", dest="grep:grep_predicates",
                       metavar="<PREDICATE>",
-                       default=None,
+                       default=[],
                       type=str,
                       help="Warning: use bytes for character strings (b'text' instead of 'text'). "
                            "Python boolean expression to be evaluated in the "
@ -140,15 +141,36 @@ def addOptions(parser):
                          "the sequences having at least one of them are ignored.")
 def obi_compile_eval(str expr):
    class MyVisitor(ast.NodeTransformer):
        def visit_Str(self, node: ast.Str):
            result = ast.Bytes(s = node.s.encode('utf-8'))
            return ast.copy_location(result, node)
    expr = "obi_eval_result="+expr
    tree = ast.parse(expr)
    optimizer = MyVisitor()
    tree = optimizer.visit(tree)
    return compile(tree, filename="<ast>", mode="exec")
 def obi_eval(compiled_expr, loc_env, line):
    exec(compiled_expr, {}, loc_env)
    obi_eval_result = loc_env["obi_eval_result"]
    return obi_eval_result
 def Filter_generator(options, tax_filter):
    #taxfilter = taxonomyFilterGenerator(options)
-    
+
    # Initialize conditions
    predicates = None
    if "grep_predicates" in options:
-        predicates = options["grep_predicates"]
+        predicates = [obi_compile_eval(p) for p in options["grep_predicates"]]
    attributes = None
-    if "attributes" in options:
+    if "attributes" in options and len(options["attributes"]) > 0:
        attributes = options["attributes"]
    lmax = None
    if "lmax" in options:
@ -172,7 +194,7 @@ def Filter_generator(options, tax_filter):
    if "def_pattern" in options:
        def_pattern = re.compile(tobytes(options["def_pattern"]))
    attribute_patterns={}
-    if "attribute_patterns" in options:
+    if "attribute_patterns" in options and len(options["attribute_patterns"]) > 0:
        for p in options["attribute_patterns"]:
            attribute, pattern = p.split(":", 1)
            attribute_patterns[tobytes(attribute)] = re.compile(tobytes(pattern))
@ -199,7 +221,7 @@ def Filter_generator(options, tax_filter):
        if good and attribute_patterns:
            good = (reduce(lambda bint x, bint y : x and y, 
-                        (line[attribute] is not None for attribute in attributes),
+                        (line[attribute] is not None for attribute in attribute_patterns),
                        True)
                    and
                    reduce(lambda bint x, bint y: x and y,
@ -207,14 +229,11 @@ def Filter_generator(options, tax_filter):
                        for attribute in attribute_patterns), 
                        True)
                   )
-             
+                     
        if good and predicates:
-            try:
+            good = (reduce(lambda bint x, bint y: x and y,
-                good = (reduce(lambda bint x, bint y: x and y,
+                    (bool(obi_eval(p, loc_env, line))
-                        (bool(eval(p, loc_env, line))
+                    for p in predicates), True))
                        for p in predicates), True))
            except TypeError:
                raise Exception("Try replacing strings in python expression with bytes (by putting 'b' in front of strings, e.g. b'example')")
        if good and lmin:
            good = len(line) >= lmin
@ -313,7 +332,7 @@ def run(config):
        pb(i)
        line = i_view[i]
-        loc_env = {"sequence": line, "line": line, "taxonomy": taxo}
+        loc_env = {"sequence": line, "line": line, "taxonomy": taxo, "obi_eval_result": False}
        good = filter(line, loc_env)