perf evlist: Refcount mmaps
authorArnaldo Carvalho de Melo <acme@redhat.com>
Mon, 8 Sep 2014 16:26:35 +0000 (13:26 -0300)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Thu, 25 Sep 2014 19:46:55 +0000 (16:46 -0300)
We need to know how many fds are using a perf mmap via
PERF_EVENT_IOC_SET_OUTPUT, so that we can know when to ditch an mmap,
refcount it.

v2: Automatically unmap it when the refcount hits one, which will happen
when all fds are filtered by perf_evlist__filter_pollfd(), in later
patches.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jean Pihet <jean.pihet@linaro.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20140908153824.GG2773@kernel.org
Link: http://lkml.kernel.org/n/tip-cpv7v2lw0g74ucmxa39xdpms@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/util/evlist.c
tools/perf/util/evlist.h

index 398dab1a08cc8ae1fdad4c3cafc7bbf2b8076a5c..efddee5a23e94d555c677d7ba1bc4b96b6f52322 100644 (file)
@@ -28,6 +28,8 @@
 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
 
+static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx);
+
 void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
                       struct thread_map *threads)
 {
@@ -651,14 +653,36 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
        return event;
 }
 
+static bool perf_mmap__empty(struct perf_mmap *md)
+{
+       return perf_mmap__read_head(md) != md->prev;
+}
+
+static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx)
+{
+       ++evlist->mmap[idx].refcnt;
+}
+
+static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx)
+{
+       BUG_ON(evlist->mmap[idx].refcnt == 0);
+
+       if (--evlist->mmap[idx].refcnt == 0)
+               __perf_evlist__munmap(evlist, idx);
+}
+
 void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx)
 {
+       struct perf_mmap *md = &evlist->mmap[idx];
+
        if (!evlist->overwrite) {
-               struct perf_mmap *md = &evlist->mmap[idx];
                unsigned int old = md->prev;
 
                perf_mmap__write_tail(md, old);
        }
+
+       if (md->refcnt == 1 && perf_mmap__empty(md))
+               perf_evlist__mmap_put(evlist, idx);
 }
 
 static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx)
@@ -666,6 +690,7 @@ static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx)
        if (evlist->mmap[idx].base != NULL) {
                munmap(evlist->mmap[idx].base, evlist->mmap_len);
                evlist->mmap[idx].base = NULL;
+               evlist->mmap[idx].refcnt = 0;
        }
 }
 
@@ -699,6 +724,20 @@ struct mmap_params {
 static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx,
                               struct mmap_params *mp, int fd)
 {
+       /*
+        * The last one will be done at perf_evlist__mmap_consume(), so that we
+        * make sure we don't prevent tools from consuming every last event in
+        * the ring buffer.
+        *
+        * I.e. we can get the POLLHUP meaning that the fd doesn't exist
+        * anymore, but the last events for it are still in the ring buffer,
+        * waiting to be consumed.
+        *
+        * Tools can chose to ignore this at their own discretion, but the
+        * evlist layer can't just drop it when filtering events in
+        * perf_evlist__filter_pollfd().
+        */
+       evlist->mmap[idx].refcnt = 2;
        evlist->mmap[idx].prev = 0;
        evlist->mmap[idx].mask = mp->mask;
        evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, mp->prot,
@@ -734,10 +773,14 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
                } else {
                        if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
                                return -1;
+
+                       perf_evlist__mmap_get(evlist, idx);
                }
 
-               if (perf_evlist__add_pollfd(evlist, fd) < 0)
+               if (perf_evlist__add_pollfd(evlist, fd) < 0) {
+                       perf_evlist__mmap_put(evlist, idx);
                        return -1;
+               }
 
                if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
                    perf_evlist__id_add_fd(evlist, evsel, cpu, thread, fd) < 0)
index fc013704d9035f775d1c04557424de8d51121364..bd312b01e8766e97dd50f8661f3e4213f8513fde 100644 (file)
@@ -18,9 +18,15 @@ struct record_opts;
 #define PERF_EVLIST__HLIST_BITS 8
 #define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS)
 
+/**
+ * struct perf_mmap - perf's ring buffer mmap details
+ *
+ * @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this
+ */
 struct perf_mmap {
        void             *base;
        int              mask;
+       int              refcnt;
        unsigned int     prev;
        char             event_copy[PERF_SAMPLE_MAX_SIZE];
 };