}
}
+static void
+counter_sched_out(struct perf_counter *counter,
+ struct perf_cpu_context *cpuctx,
+ struct perf_counter_context *ctx)
+{
+ if (counter->state != PERF_COUNTER_STATE_ACTIVE)
+ return;
+
+ counter->state = PERF_COUNTER_STATE_INACTIVE;
+ counter->hw_ops->disable(counter);
+ counter->oncpu = -1;
+
+ if (!is_software_counter(counter))
+ cpuctx->active_oncpu--;
+ ctx->nr_active--;
+ if (counter->hw_event.exclusive || !cpuctx->active_oncpu)
+ cpuctx->exclusive = 0;
+}
+
/*
* Cross CPU call to remove a performance counter
*
curr_rq_lock_irq_save(&flags);
spin_lock(&ctx->lock);
- if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
- counter->state = PERF_COUNTER_STATE_INACTIVE;
- counter->hw_ops->disable(counter);
- ctx->nr_active--;
- cpuctx->active_oncpu--;
- counter->task = NULL;
- counter->oncpu = -1;
- }
+ counter_sched_out(counter, cpuctx, ctx);
+
+ counter->task = NULL;
ctx->nr_counters--;
/*
struct perf_counter_context *ctx,
int cpu)
{
- if (counter->state == PERF_COUNTER_STATE_OFF)
+ if (counter->state <= PERF_COUNTER_STATE_OFF)
return 0;
counter->state = PERF_COUNTER_STATE_ACTIVE;
return -EAGAIN;
}
- cpuctx->active_oncpu++;
+ if (!is_software_counter(counter))
+ cpuctx->active_oncpu++;
ctx->nr_active++;
+ if (counter->hw_event.exclusive)
+ cpuctx->exclusive = 1;
+
return 0;
}
+/*
+ * Return 1 for a group consisting entirely of software counters,
+ * 0 if the group contains any hardware counters.
+ */
+static int is_software_only_group(struct perf_counter *leader)
+{
+ struct perf_counter *counter;
+
+ if (!is_software_counter(leader))
+ return 0;
+ list_for_each_entry(counter, &leader->sibling_list, list_entry)
+ if (!is_software_counter(counter))
+ return 0;
+ return 1;
+}
+
+/*
+ * Work out whether we can put this counter group on the CPU now.
+ */
+static int group_can_go_on(struct perf_counter *counter,
+ struct perf_cpu_context *cpuctx,
+ int can_add_hw)
+{
+ /*
+ * Groups consisting entirely of software counters can always go on.
+ */
+ if (is_software_only_group(counter))
+ return 1;
+ /*
+ * If an exclusive group is already on, no other hardware
+ * counters can go on.
+ */
+ if (cpuctx->exclusive)
+ return 0;
+ /*
+ * If this group is exclusive and there are already
+ * counters on the CPU, it can't go on.
+ */
+ if (counter->hw_event.exclusive && cpuctx->active_oncpu)
+ return 0;
+ /*
+ * Otherwise, try to add it if all previous groups were able
+ * to go on.
+ */
+ return can_add_hw;
+}
+
/*
* Cross CPU call to install and enable a performance counter
*/
int cpu = smp_processor_id();
unsigned long flags;
u64 perf_flags;
+ int err;
/*
* If this is a task context, we need to check whether it is
list_add_counter(counter, ctx);
ctx->nr_counters++;
- counter_sched_in(counter, cpuctx, ctx, cpu);
+ /*
+ * An exclusive counter can't go on if there are already active
+ * hardware counters, and no hardware counter can go on if there
+ * is already an exclusive counter on.
+ */
+ if (counter->state == PERF_COUNTER_STATE_INACTIVE &&
+ !group_can_go_on(counter, cpuctx, 1))
+ err = -EEXIST;
+ else
+ err = counter_sched_in(counter, cpuctx, ctx, cpu);
+
+ if (err && counter->hw_event.pinned)
+ counter->state = PERF_COUNTER_STATE_ERROR;
- if (!ctx->task && cpuctx->max_pertask)
+ if (!err && !ctx->task && cpuctx->max_pertask)
cpuctx->max_pertask--;
hw_perf_restore(perf_flags);
spin_unlock_irq(&ctx->lock);
}
-static void
-counter_sched_out(struct perf_counter *counter,
- struct perf_cpu_context *cpuctx,
- struct perf_counter_context *ctx)
-{
- if (counter->state != PERF_COUNTER_STATE_ACTIVE)
- return;
-
- counter->state = PERF_COUNTER_STATE_INACTIVE;
- counter->hw_ops->disable(counter);
- counter->oncpu = -1;
-
- cpuctx->active_oncpu--;
- ctx->nr_active--;
-}
-
static void
group_sched_out(struct perf_counter *group_counter,
struct perf_cpu_context *cpuctx,
*/
list_for_each_entry(counter, &group_counter->sibling_list, list_entry)
counter_sched_out(counter, cpuctx, ctx);
+
+ if (group_counter->hw_event.exclusive)
+ cpuctx->exclusive = 0;
}
void __perf_counter_sched_out(struct perf_counter_context *ctx,
return -EAGAIN;
}
-/*
- * Return 1 for a software counter, 0 for a hardware counter
- */
-static inline int is_software_counter(struct perf_counter *counter)
-{
- return !counter->hw_event.raw && counter->hw_event.type < 0;
-}
-
-/*
- * Return 1 for a group consisting entirely of software counters,
- * 0 if the group contains any hardware counters.
- */
-static int is_software_only_group(struct perf_counter *leader)
-{
- struct perf_counter *counter;
-
- if (!is_software_counter(leader))
- return 0;
- list_for_each_entry(counter, &leader->sibling_list, list_entry)
- if (!is_software_counter(counter))
- return 0;
- return 1;
-}
-
static void
__perf_counter_sched_in(struct perf_counter_context *ctx,
struct perf_cpu_context *cpuctx, int cpu)
spin_lock(&ctx->lock);
flags = hw_perf_save_disable();
+
+ /*
+ * First go through the list and put on any pinned groups
+ * in order to give them the best chance of going on.
+ */
+ list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+ if (counter->state <= PERF_COUNTER_STATE_OFF ||
+ !counter->hw_event.pinned)
+ continue;
+ if (counter->cpu != -1 && counter->cpu != cpu)
+ continue;
+
+ if (group_can_go_on(counter, cpuctx, 1))
+ group_sched_in(counter, cpuctx, ctx, cpu);
+
+ /*
+ * If this pinned group hasn't been scheduled,
+ * put it in error state.
+ */
+ if (counter->state == PERF_COUNTER_STATE_INACTIVE)
+ counter->state = PERF_COUNTER_STATE_ERROR;
+ }
+
list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+ /*
+ * Ignore counters in OFF or ERROR state, and
+ * ignore pinned counters since we did them already.
+ */
+ if (counter->state <= PERF_COUNTER_STATE_OFF ||
+ counter->hw_event.pinned)
+ continue;
+
/*
* Listen to the 'cpu' scheduling filter constraint
* of counters:
if (counter->cpu != -1 && counter->cpu != cpu)
continue;
- /*
- * If we scheduled in a group atomically and exclusively,
- * or if this group can't go on, don't add any more
- * hardware counters.
- */
- if (can_add_hw || is_software_only_group(counter))
+ if (group_can_go_on(counter, cpuctx, can_add_hw)) {
if (group_sched_in(counter, cpuctx, ctx, cpu))
can_add_hw = 0;
+ }
}
hw_perf_restore(flags);
spin_unlock(&ctx->lock);
*/
perf_flags = hw_perf_save_disable();
- list_for_each_entry(counter, &ctx->counter_list, list_entry)
- counter->state = PERF_COUNTER_STATE_OFF;
+ list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+ if (counter->state != PERF_COUNTER_STATE_ERROR)
+ counter->state = PERF_COUNTER_STATE_OFF;
+ }
hw_perf_restore(perf_flags);
perf_flags = hw_perf_save_disable();
list_for_each_entry(counter, &ctx->counter_list, list_entry) {
- if (counter->state != PERF_COUNTER_STATE_OFF)
+ if (counter->state > PERF_COUNTER_STATE_OFF)
continue;
counter->state = PERF_COUNTER_STATE_INACTIVE;
counter->hw_event.disabled = 0;
if (count != sizeof(cntval))
return -EINVAL;
+ /*
+ * Return end-of-file for a read on a counter that is in
+ * error state (i.e. because it was pinned but it couldn't be
+ * scheduled on to the CPU at some point).
+ */
+ if (counter->state == PERF_COUNTER_STATE_ERROR)
+ return 0;
+
mutex_lock(&counter->mutex);
cntval = perf_counter_read(counter);
mutex_unlock(&counter->mutex);
{
struct perf_data *irqdata, *usrdata;
DECLARE_WAITQUEUE(wait, current);
- ssize_t res;
+ ssize_t res, res2;
irqdata = counter->irqdata;
usrdata = counter->usrdata;
if (signal_pending(current))
break;
+ if (counter->state == PERF_COUNTER_STATE_ERROR)
+ break;
+
spin_unlock_irq(&counter->waitq.lock);
schedule();
spin_lock_irq(&counter->waitq.lock);
__set_current_state(TASK_RUNNING);
spin_unlock_irq(&counter->waitq.lock);
- if (usrdata->len + irqdata->len < count)
+ if (usrdata->len + irqdata->len < count &&
+ counter->state != PERF_COUNTER_STATE_ERROR)
return -ERESTARTSYS;
read_pending:
mutex_lock(&counter->mutex);
/* Switch irq buffer: */
usrdata = perf_switch_irq_data(counter);
- if (perf_copy_usrdata(usrdata, buf + res, count - res) < 0) {
+ res2 = perf_copy_usrdata(usrdata, buf + res, count - res);
+ if (res2 < 0) {
if (!res)
res = -EFAULT;
} else {
- res = count;
+ res += res2;
}
out:
mutex_unlock(&counter->mutex);
*/
if (group_leader->ctx != ctx)
goto err_put_context;
+ /*
+ * Only a group leader can be exclusive or pinned
+ */
+ if (hw_event.exclusive || hw_event.pinned)
+ goto err_put_context;
}
ret = -EINVAL;
cpuctx = &__get_cpu_var(perf_cpu_context);
- if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) {
- child_counter->state = PERF_COUNTER_STATE_INACTIVE;
- child_counter->hw_ops->disable(child_counter);
- cpuctx->active_oncpu--;
- child_ctx->nr_active--;
- child_counter->oncpu = -1;
- }
+ counter_sched_out(child_counter, cpuctx, child_ctx);
list_del_init(&child_counter->list_entry);