perf_counter: update mmap() counter read
authorPeter Zijlstra <a.p.zijlstra@chello.nl>
Thu, 2 Apr 2009 09:12:04 +0000 (11:12 +0200)
committerIngo Molnar <mingo@elte.hu>
Mon, 6 Apr 2009 07:30:47 +0000 (09:30 +0200)
Paul noted that we don't need SMP barriers for the mmap() counter read
because its always on the same cpu (otherwise you can't access the hw
counter anyway).

So remove the SMP barriers and replace them with regular compiler
barriers.

Further, update the comment to include a race free method of reading
said hardware counter. The primary change is putting the pmc_read
inside the seq-loop, otherwise we can still race and read rubbish.

Noticed-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Orig-LKML-Reference: <20090402091319.577951445@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
include/linux/perf_counter.h
kernel/perf_counter.c

index 90cce0c74a034664092cf510ec7a4cc57a6e7443..f2b914de3f0c784fa2f3a1363f5c8f5b85e30935 100644 (file)
@@ -167,30 +167,28 @@ struct perf_counter_mmap_page {
        /*
         * Bits needed to read the hw counters in user-space.
         *
-        * The index and offset should be read atomically using the seqlock:
-        *
-        *   __u32 seq, index;
-        *   __s64 offset;
+        *   u32 seq;
+        *   s64 count;
         *
         * again:
-        *   rmb();
         *   seq = pc->lock;
-        *
         *   if (unlikely(seq & 1)) {
         *     cpu_relax();
         *     goto again;
         *   }
         *
-        *   index = pc->index;
-        *   offset = pc->offset;
+        *   if (pc->index) {
+        *     count = pmc_read(pc->index - 1);
+        *     count += pc->offset;
+        *   } else
+        *     goto regular_read;
         *
-        *   rmb();
+        *   barrier();
         *   if (pc->lock != seq)
         *     goto again;
         *
-        * After this, index contains architecture specific counter index + 1,
-        * so that 0 means unavailable, offset contains the value to be added
-        * to the result of the raw timer read to obtain this counter's value.
+        * NOTE: for obvious reason this only works on self-monitoring
+        *       processes.
         */
        __u32   lock;                   /* seqlock for synchronization */
        __u32   index;                  /* hardware counter identifier */
index f105a6e696c20c178de06fa332c9407e6bb4ce0f..2a5d4f525567800eb52a8d280e02fe504e7b970d 100644 (file)
@@ -1340,13 +1340,13 @@ void perf_counter_update_userpage(struct perf_counter *counter)
         */
        preempt_disable();
        ++userpg->lock;
-       smp_wmb();
+       barrier();
        userpg->index = counter->hw.idx;
        userpg->offset = atomic64_read(&counter->count);
        if (counter->state == PERF_COUNTER_STATE_ACTIVE)
                userpg->offset -= atomic64_read(&counter->hw.prev_count);
 
-       smp_wmb();
+       barrier();
        ++userpg->lock;
        preempt_enable();
 unlock: