tracing/filter: Use a tree instead of stack for filter_match_preds()
authorSteven Rostedt <srostedt@redhat.com>
Fri, 28 Jan 2011 03:54:33 +0000 (22:54 -0500)
committerSteven Rostedt <rostedt@goodmis.org>
Tue, 8 Feb 2011 01:56:19 +0000 (20:56 -0500)
Currently the filter_match_preds() requires a stack to push
and pop the preds to determine if the filter matches the record or not.
This has two drawbacks:

1) It requires a stack to store state information. As this is done
   in fast paths we can't allocate the storage for this stack, and
   we can't use a global as it must be re-entrant. The stack is stored
   on the kernel stack and this greatly limits how many preds we
   may allow.

2) All conditions are calculated even when a short circuit exists.
   a || b  will always calculate a and b even though a was determined
   to be true.

Using a tree we can walk a constant structure that will save
the state as we go. The algorithm is simply:

  pred = root;
  do {
switch (move) {
case MOVE_DOWN:
if (OR or AND) {
pred = left;
continue;
}
if (pred == root)
break;
match = pred->fn();
pred = pred->parent;
move = left child ? MOVE_UP_FROM_LEFT : MOVE_UP_FROM_RIGHT;
continue;

case MOVE_UP_FROM_LEFT:
/* Only OR or AND can be a parent */
if (match && OR || !match && AND) {
/* short circuit */
if (pred == root)
break;
pred = pred->parent;
move = left child ?
MOVE_UP_FROM_LEFT :
MOVE_UP_FROM_RIGHT;
continue;
}
pred = pred->right;
move = MOVE_DOWN;
continue;

case MOVE_UP_FROM_RIGHT:
if (pred == root)
break;
pred = pred->parent;
move = left child ? MOVE_UP_FROM_LEFT : MOVE_UP_FROM_RIGHT;
continue;
}
done = 1;
  } while (!done);

This way there's no strict limit to how many preds we allow
and it also will short circuit the logical operations when possible.

Cc: Tom Zanussi <tzanussi@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
kernel/trace/trace.h
kernel/trace/trace_events_filter.c

index 254d04a84ec30397ab39f23d21b83ea47a033f65..bba34a72c780c12327f51adc74d82b25e0c428d0 100644 (file)
@@ -664,6 +664,7 @@ struct event_filter {
        int                     n_preds;        /* Number assigned */
        int                     a_preds;        /* allocated */
        struct filter_pred      *preds;
+       struct filter_pred      *root;
        char                    *filter_string;
 };
 
@@ -675,6 +676,9 @@ struct event_subsystem {
        int                     nr_events;
 };
 
+#define FILTER_PRED_INVALID    ((unsigned short)-1)
+#define FILTER_PRED_IS_RIGHT   (1 << 15)
+
 struct filter_pred;
 struct regex;
 
@@ -704,7 +708,10 @@ struct filter_pred {
        int                     offset;
        int                     not;
        int                     op;
-       int                     pop_n;
+       unsigned short          index;
+       unsigned short          parent;
+       unsigned short          left;
+       unsigned short          right;
 };
 
 extern struct list_head ftrace_common_fields;
index 2f5458e244a3a9e2721475570d5fbc9e1b6bf32b..10390491b6d0f5c3c3acffc1e2ad2ade09903b6a 100644 (file)
@@ -123,6 +123,11 @@ struct filter_parse_state {
        } operand;
 };
 
+struct pred_stack {
+       struct filter_pred      **preds;
+       int                     index;
+};
+
 #define DEFINE_COMPARISON_PRED(type)                                   \
 static int filter_pred_##type(struct filter_pred *pred, void *event)   \
 {                                                                      \
@@ -357,52 +362,95 @@ static void filter_build_regex(struct filter_pred *pred)
        pred->not ^= not;
 }
 
+enum move_type {
+       MOVE_DOWN,
+       MOVE_UP_FROM_LEFT,
+       MOVE_UP_FROM_RIGHT
+};
+
+static struct filter_pred *
+get_pred_parent(struct filter_pred *pred, struct filter_pred *preds,
+               int index, enum move_type *move)
+{
+       if (pred->parent & FILTER_PRED_IS_RIGHT)
+               *move = MOVE_UP_FROM_RIGHT;
+       else
+               *move = MOVE_UP_FROM_LEFT;
+       pred = &preds[pred->parent & ~FILTER_PRED_IS_RIGHT];
+
+       return pred;
+}
+
 /* return 1 if event matches, 0 otherwise (discard) */
 int filter_match_preds(struct event_filter *filter, void *rec)
 {
-       int match = -1, top = 0, val1 = 0, val2 = 0;
-       int stack[MAX_FILTER_PRED];
+       int match = -1;
+       enum move_type move = MOVE_DOWN;
        struct filter_pred *preds;
        struct filter_pred *pred;
+       struct filter_pred *root;
        int n_preds = ACCESS_ONCE(filter->n_preds);
-       int i;
+       int done = 0;
 
        /* no filter is considered a match */
        if (!n_preds)
                return 1;
 
        /*
-        * n_preds and filter->preds is protect with preemption disabled.
+        * n_preds, root and filter->preds are protect with preemption disabled.
         */
        preds = rcu_dereference_sched(filter->preds);
+       root = rcu_dereference_sched(filter->root);
+       if (!root)
+               return 1;
 
-       for (i = 0; i < n_preds; i++) {
-               pred = &preds[i];
-               if (!pred->pop_n) {
+       pred = root;
+
+       /* match is currently meaningless */
+       match = -1;
+
+       do {
+               switch (move) {
+               case MOVE_DOWN:
+                       /* only AND and OR have children */
+                       if (pred->left != FILTER_PRED_INVALID) {
+                               /* keep going to leaf node */
+                               pred = &preds[pred->left];
+                               continue;
+                       }
                        match = pred->fn(pred, rec);
-                       stack[top++] = match;
+                       /* If this pred is the only pred */
+                       if (pred == root)
+                               break;
+                       pred = get_pred_parent(pred, preds,
+                                              pred->parent, &move);
+                       continue;
+               case MOVE_UP_FROM_LEFT:
+                       /* Check for short circuits */
+                       if ((match && pred->op == OP_OR) ||
+                           (!match && pred->op == OP_AND)) {
+                               if (pred == root)
+                                       break;
+                               pred = get_pred_parent(pred, preds,
+                                                      pred->parent, &move);
+                               continue;
+                       }
+                       /* now go down the right side of the tree. */
+                       pred = &preds[pred->right];
+                       move = MOVE_DOWN;
+                       continue;
+               case MOVE_UP_FROM_RIGHT:
+                       /* We finished this equation. */
+                       if (pred == root)
+                               break;
+                       pred = get_pred_parent(pred, preds,
+                                              pred->parent, &move);
                        continue;
                }
-               if (pred->pop_n > top) {
-                       WARN_ON_ONCE(1);
-                       return 0;
-               }
-               val1 = stack[--top];
-               val2 = stack[--top];
-               switch (pred->op) {
-               case OP_AND:
-                       match = val1 && val2;
-                       break;
-               case OP_OR:
-                       match = val1 || val2;
-                       break;
-               default:
-                       WARN_ONCE(1, "filter op is not AND or OR");
-               }
-               stack[top++] = match;
-       }
+               done = 1;
+       } while (!done);
 
-       return stack[--top];
+       return match;
 }
 EXPORT_SYMBOL_GPL(filter_match_preds);
 
@@ -539,10 +587,58 @@ static void filter_clear_pred(struct filter_pred *pred)
        pred->regex.len = 0;
 }
 
-static int filter_set_pred(struct filter_pred *dest,
+static int __alloc_pred_stack(struct pred_stack *stack, int n_preds)
+{
+       stack->preds = kzalloc(sizeof(*stack->preds)*(n_preds + 1), GFP_KERNEL);
+       if (!stack->preds)
+               return -ENOMEM;
+       stack->index = n_preds;
+       return 0;
+}
+
+static void __free_pred_stack(struct pred_stack *stack)
+{
+       kfree(stack->preds);
+       stack->index = 0;
+}
+
+static int __push_pred_stack(struct pred_stack *stack,
+                            struct filter_pred *pred)
+{
+       int index = stack->index;
+
+       if (WARN_ON(index == 0))
+               return -ENOSPC;
+
+       stack->preds[--index] = pred;
+       stack->index = index;
+       return 0;
+}
+
+static struct filter_pred *
+__pop_pred_stack(struct pred_stack *stack)
+{
+       struct filter_pred *pred;
+       int index = stack->index;
+
+       pred = stack->preds[index++];
+       if (!pred)
+               return NULL;
+
+       stack->index = index;
+       return pred;
+}
+
+static int filter_set_pred(struct event_filter *filter,
+                          int idx,
+                          struct pred_stack *stack,
                           struct filter_pred *src,
                           filter_pred_fn_t fn)
 {
+       struct filter_pred *dest = &filter->preds[idx];
+       struct filter_pred *left;
+       struct filter_pred *right;
+
        *dest = *src;
        if (src->field_name) {
                dest->field_name = kstrdup(src->field_name, GFP_KERNEL);
@@ -550,8 +646,25 @@ static int filter_set_pred(struct filter_pred *dest,
                        return -ENOMEM;
        }
        dest->fn = fn;
+       dest->index = idx;
 
-       return 0;
+       if (dest->op == OP_OR || dest->op == OP_AND) {
+               right = __pop_pred_stack(stack);
+               left = __pop_pred_stack(stack);
+               if (!left || !right)
+                       return -EINVAL;
+               dest->left = left->index;
+               dest->right = right->index;
+               left->parent = dest->index;
+               right->parent = dest->index | FILTER_PRED_IS_RIGHT;
+       } else
+               /*
+                * Make dest->left invalid to be used as a quick
+                * way to know this is a leaf node.
+                */
+               dest->left = FILTER_PRED_INVALID;
+
+       return __push_pred_stack(stack, dest);
 }
 
 static void __free_preds(struct event_filter *filter)
@@ -574,6 +687,7 @@ static void reset_preds(struct event_filter *filter)
        int i;
 
        filter->n_preds = 0;
+       filter->root = NULL;
        if (!filter->preds)
                return;
 
@@ -707,6 +821,7 @@ static int filter_add_pred_fn(struct filter_parse_state *ps,
                              struct ftrace_event_call *call,
                              struct event_filter *filter,
                              struct filter_pred *pred,
+                             struct pred_stack *stack,
                              filter_pred_fn_t fn)
 {
        int idx, err;
@@ -718,7 +833,7 @@ static int filter_add_pred_fn(struct filter_parse_state *ps,
 
        idx = filter->n_preds;
        filter_clear_pred(&filter->preds[idx]);
-       err = filter_set_pred(&filter->preds[idx], pred, fn);
+       err = filter_set_pred(filter, idx, stack, pred, fn);
        if (err)
                return err;
 
@@ -803,6 +918,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
                           struct ftrace_event_call *call,
                           struct event_filter *filter,
                           struct filter_pred *pred,
+                          struct pred_stack *stack,
                           bool dry_run)
 {
        struct ftrace_event_field *field;
@@ -812,13 +928,10 @@ static int filter_add_pred(struct filter_parse_state *ps,
 
        fn = pred->fn = filter_pred_none;
 
-       if (pred->op == OP_AND) {
-               pred->pop_n = 2;
+       if (pred->op == OP_AND)
                goto add_pred_fn;
-       } else if (pred->op == OP_OR) {
-               pred->pop_n = 2;
+       else if (pred->op == OP_OR)
                goto add_pred_fn;
-       }
 
        field = find_event_field(call, pred->field_name);
        if (!field) {
@@ -867,7 +980,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
 
 add_pred_fn:
        if (!dry_run)
-               return filter_add_pred_fn(ps, call, filter, pred, fn);
+               return filter_add_pred_fn(ps, call, filter, pred, stack, fn);
        return 0;
 }
 
@@ -1248,6 +1361,7 @@ static int replace_preds(struct ftrace_event_call *call,
        char *operand1 = NULL, *operand2 = NULL;
        struct filter_pred *pred;
        struct postfix_elt *elt;
+       struct pred_stack stack = { }; /* init to NULL */
        int err;
        int n_preds = 0;
 
@@ -1262,9 +1376,12 @@ static int replace_preds(struct ftrace_event_call *call,
                return err;
 
        if (!dry_run) {
-               err = __alloc_preds(filter, n_preds);
+               err = __alloc_pred_stack(&stack, n_preds);
                if (err)
                        return err;
+               err = __alloc_preds(filter, n_preds);
+               if (err)
+                       goto fail;
        }
 
        n_preds = 0;
@@ -1276,14 +1393,16 @@ static int replace_preds(struct ftrace_event_call *call,
                                operand2 = elt->operand;
                        else {
                                parse_error(ps, FILT_ERR_TOO_MANY_OPERANDS, 0);
-                               return -EINVAL;
+                               err = -EINVAL;
+                               goto fail;
                        }
                        continue;
                }
 
                if (WARN_ON(n_preds++ == MAX_FILTER_PRED)) {
                        parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
-                       return -ENOSPC;
+                       err = -ENOSPC;
+                       goto fail;
                }
 
                if (elt->op == OP_AND || elt->op == OP_OR) {
@@ -1293,22 +1412,44 @@ static int replace_preds(struct ftrace_event_call *call,
 
                if (!operand1 || !operand2) {
                        parse_error(ps, FILT_ERR_MISSING_FIELD, 0);
-                       return -EINVAL;
+                       err = -EINVAL;
+                       goto fail;
                }
 
                pred = create_pred(elt->op, operand1, operand2);
 add_pred:
-               if (!pred)
-                       return -ENOMEM;
-               err = filter_add_pred(ps, call, filter, pred, dry_run);
+               if (!pred) {
+                       err = -ENOMEM;
+                       goto fail;
+               }
+               err = filter_add_pred(ps, call, filter, pred, &stack, dry_run);
                filter_free_pred(pred);
                if (err)
-                       return err;
+                       goto fail;
 
                operand1 = operand2 = NULL;
        }
 
-       return 0;
+       if (!dry_run) {
+               /* We should have one item left on the stack */
+               pred = __pop_pred_stack(&stack);
+               if (!pred)
+                       return -EINVAL;
+               /* This item is where we start from in matching */
+               filter->root = pred;
+               /* Make sure the stack is empty */
+               pred = __pop_pred_stack(&stack);
+               if (WARN_ON(pred)) {
+                       err = -EINVAL;
+                       filter->root = NULL;
+                       goto fail;
+               }
+       }
+
+       err = 0;
+fail:
+       __free_pred_stack(&stack);
+       return err;
 }
 
 static int replace_system_preds(struct event_subsystem *system,