forget_original_parent(tsk);
exit_task_namespaces(tsk);
+ /*
+ * Flush inherited counters to the parent - before the parent
+ * gets woken up by child-exit notifications.
+ */
+ perf_counter_exit_task(tsk);
+
write_lock_irq(&tasklist_lock);
if (group_dead)
kill_orphaned_pgrp(tsk->group_leader, NULL);
mpol_put(tsk->mempolicy);
tsk->mempolicy = NULL;
#endif
- /*
- * These must happen late, after the PID is not
- * hashed anymore, but still at a point that may sleep:
- */
- perf_counter_exit_task(tsk);
#ifdef CONFIG_FUTEX
if (unlikely(!list_empty(&tsk->pi_state_list)))
exit_pi_state_list(tsk);
if (tsk->splice_pipe)
__free_pipe_info(tsk->splice_pipe);
+ /*
+ * These must happen late, after the PID is not
+ * hashed anymore, but still at a point that may sleep:
+ */
+ perf_counter_exit_task(tsk);
+
preempt_disable();
/* causes final put_task_struct in finish_task_switch(). */
tsk->state = TASK_DEAD;
#include <linux/uaccess.h>
#include <linux/syscalls.h>
#include <linux/anon_inodes.h>
+#include <linux/kernel_stat.h>
#include <linux/perf_counter.h>
/*
if (ctx->task && cpuctx->task_ctx != ctx)
return;
- spin_lock_irqsave(&ctx->lock, flags);
+ curr_rq_lock_irq_save(&flags);
+ spin_lock(&ctx->lock);
if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
counter->hw_ops->disable(counter);
perf_max_counters - perf_reserved_percpu);
}
- spin_unlock_irqrestore(&ctx->lock, flags);
+ spin_unlock(&ctx->lock);
+ curr_rq_unlock_irq_restore(&flags);
}
if (ctx->task && cpuctx->task_ctx != ctx)
return;
- spin_lock_irqsave(&ctx->lock, flags);
+ curr_rq_lock_irq_save(&flags);
+ spin_lock(&ctx->lock);
/*
* Protect the list operation against NMI by disabling the
if (!ctx->task && cpuctx->max_pertask)
cpuctx->max_pertask--;
- spin_unlock_irqrestore(&ctx->lock, flags);
+ spin_unlock(&ctx->lock);
+ curr_rq_unlock_irq_restore(&flags);
}
/*
struct task_struct *curr = current;
struct perf_counter_context *ctx = &curr->perf_counter_ctx;
struct perf_counter *counter;
+ unsigned long flags;
u64 perf_flags;
int cpu;
if (likely(!ctx->nr_counters))
return 0;
- local_irq_disable();
+ curr_rq_lock_irq_save(&flags);
cpu = smp_processor_id();
+ /* force the update of the task clock: */
+ __task_delta_exec(curr, 1);
+
perf_counter_task_sched_out(curr, cpu);
spin_lock(&ctx->lock);
spin_unlock(&ctx->lock);
- local_irq_enable();
+ curr_rq_unlock_irq_restore(&flags);
return 0;
}
struct task_struct *curr = current;
struct perf_counter_context *ctx = &curr->perf_counter_ctx;
struct perf_counter *counter;
+ unsigned long flags;
u64 perf_flags;
int cpu;
if (likely(!ctx->nr_counters))
return 0;
- local_irq_disable();
+ curr_rq_lock_irq_save(&flags);
cpu = smp_processor_id();
+ /* force the update of the task clock: */
+ __task_delta_exec(curr, 1);
+
spin_lock(&ctx->lock);
/*
if (counter->state != PERF_COUNTER_STATE_OFF)
continue;
counter->state = PERF_COUNTER_STATE_INACTIVE;
+ counter->hw_event.disabled = 0;
}
hw_perf_restore(perf_flags);
perf_counter_task_sched_in(curr, cpu);
- local_irq_enable();
+ curr_rq_unlock_irq_restore(&flags);
return 0;
}
static void __read(void *info)
{
struct perf_counter *counter = info;
+ unsigned long flags;
+ curr_rq_lock_irq_save(&flags);
counter->hw_ops->read(counter);
+ curr_rq_unlock_irq_restore(&flags);
}
static u64 perf_counter_read(struct perf_counter *counter)
.read = cpu_clock_perf_counter_read,
};
-static void task_clock_perf_counter_update(struct perf_counter *counter)
+/*
+ * Called from within the scheduler:
+ */
+static u64 task_clock_perf_counter_val(struct perf_counter *counter, int update)
{
- u64 prev, now;
+ struct task_struct *curr = counter->task;
+ u64 delta;
+
+ WARN_ON_ONCE(counter->task != current);
+
+ delta = __task_delta_exec(curr, update);
+
+ return curr->se.sum_exec_runtime + delta;
+}
+
+static void task_clock_perf_counter_update(struct perf_counter *counter, u64 now)
+{
+ u64 prev;
s64 delta;
prev = atomic64_read(&counter->hw.prev_count);
- now = current->se.sum_exec_runtime;
atomic64_set(&counter->hw.prev_count, now);
static void task_clock_perf_counter_read(struct perf_counter *counter)
{
- task_clock_perf_counter_update(counter);
+ u64 now = task_clock_perf_counter_val(counter, 1);
+
+ task_clock_perf_counter_update(counter, now);
}
static void task_clock_perf_counter_enable(struct perf_counter *counter)
{
- atomic64_set(&counter->hw.prev_count, current->se.sum_exec_runtime);
+ u64 now = task_clock_perf_counter_val(counter, 0);
+
+ atomic64_set(&counter->hw.prev_count, now);
}
static void task_clock_perf_counter_disable(struct perf_counter *counter)
{
- task_clock_perf_counter_update(counter);
+ u64 now = task_clock_perf_counter_val(counter, 0);
+
+ task_clock_perf_counter_update(counter, now);
}
static const struct hw_perf_counter_ops perf_ops_task_clock = {
{
struct perf_counter *parent_counter;
u64 parent_val, child_val;
+ unsigned long flags;
u64 perf_flags;
/*
* Be careful about zapping the list - IRQ/NMI context
* could still be processing it:
*/
- local_irq_disable();
+ curr_rq_lock_irq_save(&flags);
perf_flags = hw_perf_save_disable();
if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) {
list_del_init(&child_counter->list_entry);
hw_perf_restore(perf_flags);
- local_irq_enable();
+ curr_rq_unlock_irq_restore(&flags);
parent_counter = child_counter->parent;
/*
#define task_rq(p) cpu_rq(task_cpu(p))
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
-static inline void update_rq_clock(struct rq *rq)
+inline void update_rq_clock(struct rq *rq)
{
rq->clock = sched_clock_cpu(cpu_of(rq));
}
}
}
+void curr_rq_lock_irq_save(unsigned long *flags)
+ __acquires(rq->lock)
+{
+ struct rq *rq;
+
+ local_irq_save(*flags);
+ rq = cpu_rq(smp_processor_id());
+ spin_lock(&rq->lock);
+}
+
+void curr_rq_unlock_irq_restore(unsigned long *flags)
+ __releases(rq->lock)
+{
+ struct rq *rq;
+
+ rq = cpu_rq(smp_processor_id());
+ spin_unlock(&rq->lock);
+ local_irq_restore(*flags);
+}
+
void task_rq_unlock_wait(struct task_struct *p)
{
struct rq *rq = task_rq(p);
struct task_struct *next)
{
fire_sched_out_preempt_notifiers(prev, next);
- perf_counter_task_sched_out(prev, cpu_of(rq));
prepare_lock_switch(rq, next);
prepare_arch_switch(next);
}
EXPORT_PER_CPU_SYMBOL(kstat);
+/*
+ * Return any ns on the sched_clock that have not yet been banked in
+ * @p in case that task is currently running.
+ */
+unsigned long long __task_delta_exec(struct task_struct *p, int update)
+{
+ s64 delta_exec;
+ struct rq *rq;
+
+ rq = task_rq(p);
+ WARN_ON_ONCE(!runqueue_is_locked());
+ WARN_ON_ONCE(!task_current(rq, p));
+
+ if (update)
+ update_rq_clock(rq);
+
+ delta_exec = rq->clock - p->se.exec_start;
+
+ WARN_ON_ONCE(delta_exec < 0);
+
+ return delta_exec;
+}
+
/*
* Return any ns on the sched_clock that have not yet been banked in
* @p in case that task is currently running.
update_rq_clock(rq);
update_cpu_load(rq);
curr->sched_class->task_tick(rq, curr, 0);
+ perf_counter_task_tick(curr, cpu);
spin_unlock(&rq->lock);
#ifdef CONFIG_SMP
rq->idle_at_tick = idle_cpu(cpu);
trigger_load_balance(rq, cpu);
#endif
- perf_counter_task_tick(curr, cpu);
}
#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
if (likely(prev != next)) {
sched_info_switch(prev, next);
+ perf_counter_task_sched_out(prev, cpu);
rq->nr_switches++;
rq->curr = next;