static atomic_t nr_comm_tracking __read_mostly;
int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */
-int sysctl_perf_counter_mlock __read_mostly = 128; /* 'free' kb per counter */
+int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */
/*
* Lock for (sysadmin-configurable) counter reservations:
}
list_add_rcu(&counter->event_entry, &ctx->event_list);
+ ctx->nr_counters++;
}
static void
{
struct perf_counter *sibling, *tmp;
+ ctx->nr_counters--;
+
list_del_init(&counter->list_entry);
list_del_rcu(&counter->event_entry);
counter_sched_out(counter, cpuctx, ctx);
counter->task = NULL;
- ctx->nr_counters--;
/*
* Protect the list operation against NMI by disabling the
* succeed.
*/
if (!list_empty(&counter->list_entry)) {
- ctx->nr_counters--;
list_del_counter(counter, ctx);
counter->task = NULL;
}
struct perf_counter_context *ctx)
{
list_add_counter(counter, ctx);
- ctx->nr_counters++;
counter->prev_state = PERF_COUNTER_STATE_OFF;
counter->tstamp_enabled = ctx->time;
counter->tstamp_running = ctx->time;
return 0;
}
+void perf_adjust_freq(struct perf_counter_context *ctx)
+{
+ struct perf_counter *counter;
+ u64 irq_period;
+ u64 events, period;
+ s64 delta;
+
+ spin_lock(&ctx->lock);
+ list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+ if (counter->state != PERF_COUNTER_STATE_ACTIVE)
+ continue;
+
+ if (!counter->hw_event.freq || !counter->hw_event.irq_freq)
+ continue;
+
+ events = HZ * counter->hw.interrupts * counter->hw.irq_period;
+ period = div64_u64(events, counter->hw_event.irq_freq);
+
+ delta = (s64)(1 + period - counter->hw.irq_period);
+ delta >>= 1;
+
+ irq_period = counter->hw.irq_period + delta;
+
+ if (!irq_period)
+ irq_period = 1;
+
+ counter->hw.irq_period = irq_period;
+ counter->hw.interrupts = 0;
+ }
+ spin_unlock(&ctx->lock);
+}
+
/*
* Round-robin a context's counters:
*/
cpuctx = &per_cpu(perf_cpu_context, cpu);
ctx = &curr->perf_counter_ctx;
+ perf_adjust_freq(&cpuctx->ctx);
+ perf_adjust_freq(ctx);
+
perf_counter_cpu_sched_out(cpuctx);
__perf_counter_task_sched_out(ctx);
if (atomic_dec_and_mutex_lock(&counter->mmap_count,
&counter->mmap_mutex)) {
+ struct user_struct *user = current_user();
+
+ atomic_long_sub(counter->data->nr_pages + 1, &user->locked_vm);
vma->vm_mm->locked_vm -= counter->data->nr_locked;
perf_mmap_data_free(counter);
mutex_unlock(&counter->mmap_mutex);
static int perf_mmap(struct file *file, struct vm_area_struct *vma)
{
struct perf_counter *counter = file->private_data;
+ struct user_struct *user = current_user();
unsigned long vma_size;
unsigned long nr_pages;
+ unsigned long user_locked, user_lock_limit;
unsigned long locked, lock_limit;
+ long user_extra, extra;
int ret = 0;
- long extra;
if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE))
return -EINVAL;
goto unlock;
}
- extra = nr_pages /* + 1 only account the data pages */;
- extra -= sysctl_perf_counter_mlock >> (PAGE_SHIFT - 10);
- if (extra < 0)
- extra = 0;
+ user_extra = nr_pages + 1;
+ user_lock_limit = sysctl_perf_counter_mlock >> (PAGE_SHIFT - 10);
+ user_locked = atomic_long_read(&user->locked_vm) + user_extra;
- locked = vma->vm_mm->locked_vm + extra;
+ extra = 0;
+ if (user_locked > user_lock_limit)
+ extra = user_locked - user_lock_limit;
lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
lock_limit >>= PAGE_SHIFT;
+ locked = vma->vm_mm->locked_vm + extra;
if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
ret = -EPERM;
goto unlock;
atomic_set(&counter->mmap_count, 1);
+ atomic_long_add(user_extra, &user->locked_vm);
vma->vm_mm->locked_vm += extra;
counter->data->nr_locked = extra;
unlock:
header.size = sizeof(header);
header.misc = PERF_EVENT_MISC_OVERFLOW;
- header.misc |= user_mode(regs) ?
- PERF_EVENT_MISC_USER : PERF_EVENT_MISC_KERNEL;
+ header.misc |= perf_misc_flags(regs);
if (record_type & PERF_RECORD_IP) {
- ip = instruction_pointer(regs);
+ ip = perf_instruction_pointer(regs);
header.type |= PERF_RECORD_IP;
header.size += sizeof(ip);
}
int events = atomic_read(&counter->event_limit);
int ret = 0;
+ counter->hw.interrupts++;
+
/*
* XXX event_limit might not quite work as expected on inherited
* counters
enum hrtimer_restart ret = HRTIMER_RESTART;
struct perf_counter *counter;
struct pt_regs *regs;
+ u64 period;
counter = container_of(hrtimer, struct perf_counter, hw.hrtimer);
counter->pmu->read(counter);
ret = HRTIMER_NORESTART;
}
- hrtimer_forward_now(hrtimer, ns_to_ktime(counter->hw.irq_period));
+ period = max_t(u64, 10000, counter->hw.irq_period);
+ hrtimer_forward_now(hrtimer, ns_to_ktime(period));
return ret;
}
hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hwc->hrtimer.function = perf_swcounter_hrtimer;
if (hwc->irq_period) {
+ u64 period = max_t(u64, 10000, hwc->irq_period);
__hrtimer_start_range_ns(&hwc->hrtimer,
- ns_to_ktime(hwc->irq_period), 0,
+ ns_to_ktime(period), 0,
HRTIMER_MODE_REL, 0);
}
hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hwc->hrtimer.function = perf_swcounter_hrtimer;
if (hwc->irq_period) {
+ u64 period = max_t(u64, 10000, hwc->irq_period);
__hrtimer_start_range_ns(&hwc->hrtimer,
- ns_to_ktime(hwc->irq_period), 0,
+ ns_to_ktime(period), 0,
HRTIMER_MODE_REL, 0);
}
static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
{
- struct perf_counter_hw_event *hw_event = &counter->hw_event;
const struct pmu *pmu = NULL;
- struct hw_perf_counter *hwc = &counter->hw;
/*
* Software counters (currently) can't in general distinguish
case PERF_COUNT_CPU_CLOCK:
pmu = &perf_ops_cpu_clock;
- if (hw_event->irq_period && hw_event->irq_period < 10000)
- hw_event->irq_period = 10000;
break;
case PERF_COUNT_TASK_CLOCK:
/*
else
pmu = &perf_ops_cpu_clock;
- if (hw_event->irq_period && hw_event->irq_period < 10000)
- hw_event->irq_period = 10000;
break;
case PERF_COUNT_PAGE_FAULTS:
case PERF_COUNT_PAGE_FAULTS_MIN:
break;
}
- if (pmu)
- hwc->irq_period = hw_event->irq_period;
-
return pmu;
}
{
const struct pmu *pmu;
struct perf_counter *counter;
+ struct hw_perf_counter *hwc;
long err;
counter = kzalloc(sizeof(*counter), gfpflags);
pmu = NULL;
+ hwc = &counter->hw;
+ if (hw_event->freq && hw_event->irq_freq)
+ hwc->irq_period = div64_u64(TICK_NSEC, hw_event->irq_freq);
+ else
+ hwc->irq_period = hw_event->irq_period;
+
/*
* we currently do not support PERF_RECORD_GROUP on inherited counters
*/
static void sync_child_counter(struct perf_counter *child_counter,
struct perf_counter *parent_counter)
{
- u64 parent_val, child_val;
+ u64 child_val;
- parent_val = atomic64_read(&parent_counter->count);
child_val = atomic64_read(&child_counter->count);
/*
struct perf_counter_context *child_ctx)
{
struct perf_counter *parent_counter;
- struct perf_counter *sub, *tmp;
/*
* If we do not self-reap then we have to wait for the
*/
if (child != current) {
wait_task_inactive(child, 0);
- list_del_init(&child_counter->list_entry);
update_counter_times(child_counter);
+ list_del_counter(child_counter, child_ctx);
} else {
struct perf_cpu_context *cpuctx;
unsigned long flags;
group_sched_out(child_counter, cpuctx, child_ctx);
update_counter_times(child_counter);
- list_del_init(&child_counter->list_entry);
-
- child_ctx->nr_counters--;
+ list_del_counter(child_counter, child_ctx);
perf_enable();
local_irq_restore(flags);
*/
if (parent_counter) {
sync_child_counter(child_counter, parent_counter);
- list_for_each_entry_safe(sub, tmp, &child_counter->sibling_list,
- list_entry) {
- if (sub->parent) {
- sync_child_counter(sub, sub->parent);
- free_counter(sub);
- }
- }
free_counter(child_counter);
}
}
struct perf_counter *child_counter, *tmp;
struct perf_counter_context *child_ctx;
+ WARN_ON_ONCE(child != current);
+
child_ctx = &child->perf_counter_ctx;
if (likely(!child_ctx->nr_counters))
return;
+again:
list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list,
list_entry)
__perf_counter_exit_task(child, child_counter, child_ctx);
+
+ /*
+ * If the last counter was a group counter, it will have appended all
+ * its siblings to the list, but we obtained 'tmp' before that which
+ * will still point to the list head terminating the iteration.
+ */
+ if (!list_empty(&child_ctx->counter_list))
+ goto again;
}
/*