+/*
+ * Curious locking construct.
+ *
+ * We need to ensure a later event doesn't publish a head when a former
+ * event isn't done writing. However since we need to deal with NMIs we
+ * cannot fully serialize things.
+ *
+ * What we do is serialize between CPUs so we only have to deal with NMI
+ * nesting on a single CPU.
+ *
+ * We only publish the head (and generate a wakeup) when the outer-most
+ * event completes.
+ */
+static void perf_output_lock(struct perf_output_handle *handle)
+{
+ struct perf_mmap_data *data = handle->data;
+ int cpu;
+
+ handle->locked = 0;
+
+ local_irq_save(handle->flags);
+ cpu = smp_processor_id();
+
+ if (in_nmi() && atomic_read(&data->lock) == cpu)
+ return;
+
+ while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
+ cpu_relax();
+
+ handle->locked = 1;
+}
+
+static void perf_output_unlock(struct perf_output_handle *handle)
+{
+ struct perf_mmap_data *data = handle->data;
+ int head, cpu;
+
+ data->done_head = data->head;
+
+ if (!handle->locked)
+ goto out;
+
+again:
+ /*
+ * The xchg implies a full barrier that ensures all writes are done
+ * before we publish the new head, matched by a rmb() in userspace when
+ * reading this position.
+ */
+ while ((head = atomic_xchg(&data->done_head, 0)))
+ data->user_page->data_head = head;
+
+ /*
+ * NMI can happen here, which means we can miss a done_head update.
+ */
+
+ cpu = atomic_xchg(&data->lock, -1);
+ WARN_ON_ONCE(cpu != smp_processor_id());
+
+ /*
+ * Therefore we have to validate we did not indeed do so.
+ */
+ if (unlikely(atomic_read(&data->done_head))) {
+ /*
+ * Since we had it locked, we can lock it again.
+ */
+ while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
+ cpu_relax();
+
+ goto again;
+ }
+
+ if (atomic_xchg(&data->wakeup, 0))
+ perf_output_wakeup(handle);
+out:
+ local_irq_restore(handle->flags);
+}
+