x86: mmiotrace full patch, preview 1
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / arch / x86 / kernel / mmiotrace / kmmio.c
1 /* Support for MMIO probes.
2 * Benfit many code from kprobes
3 * (C) 2002 Louis Zhuang <louis.zhuang@intel.com>.
4 * 2007 Alexander Eichner
5 * 2008 Pekka Paalanen <pq@iki.fi>
6 */
7
8 #include <linux/version.h>
9 #include <linux/list.h>
10 #include <linux/spinlock.h>
11 #include <linux/hash.h>
12 #include <linux/init.h>
13 #include <linux/module.h>
14 #include <linux/slab.h>
15 #include <linux/kernel.h>
16 #include <linux/mm.h>
17 #include <linux/uaccess.h>
18 #include <linux/ptrace.h>
19 #include <linux/preempt.h>
20 #include <linux/percpu.h>
21 #include <linux/kdebug.h>
22 #include <asm/io.h>
23 #include <asm/cacheflush.h>
24 #include <asm/errno.h>
25 #include <asm/tlbflush.h>
26 #include <asm/pgtable.h>
27
28 #include <linux/mmiotrace.h>
29
30 #define KMMIO_PAGE_HASH_BITS 4
31 #define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS)
32
33 struct kmmio_fault_page {
34 struct list_head list;
35 struct kmmio_fault_page *release_next;
36 unsigned long page; /* location of the fault page */
37
38 /*
39 * Number of times this page has been registered as a part
40 * of a probe. If zero, page is disarmed and this may be freed.
41 * Used only by writers (RCU).
42 */
43 int count;
44 };
45
46 struct kmmio_delayed_release {
47 struct rcu_head rcu;
48 struct kmmio_fault_page *release_list;
49 };
50
51 struct kmmio_context {
52 struct kmmio_fault_page *fpage;
53 struct kmmio_probe *probe;
54 unsigned long saved_flags;
55 unsigned long addr;
56 int active;
57 };
58
59 static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val,
60 void *args);
61
62 static DECLARE_MUTEX(kmmio_init_mutex);
63 static DEFINE_SPINLOCK(kmmio_lock);
64
65 /* These are protected by kmmio_lock */
66 static int kmmio_initialized;
67 unsigned int kmmio_count;
68
69 /* Read-protected by RCU, write-protected by kmmio_lock. */
70 static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE];
71 static LIST_HEAD(kmmio_probes);
72
73 static struct list_head *kmmio_page_list(unsigned long page)
74 {
75 return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)];
76 }
77
78 /* Accessed per-cpu */
79 static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx);
80
81 /* protected by kmmio_init_mutex */
82 static struct notifier_block nb_die = {
83 .notifier_call = kmmio_die_notifier
84 };
85
86 /**
87 * Makes sure kmmio is initialized and usable.
88 * This must be called before any other kmmio function defined here.
89 * May sleep.
90 */
91 void reference_kmmio(void)
92 {
93 down(&kmmio_init_mutex);
94 spin_lock_irq(&kmmio_lock);
95 if (!kmmio_initialized) {
96 int i;
97 for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
98 INIT_LIST_HEAD(&kmmio_page_table[i]);
99 if (register_die_notifier(&nb_die))
100 BUG();
101 }
102 kmmio_initialized++;
103 spin_unlock_irq(&kmmio_lock);
104 up(&kmmio_init_mutex);
105 }
106 EXPORT_SYMBOL_GPL(reference_kmmio);
107
108 /**
109 * Clean up kmmio after use. This must be called for every call to
110 * reference_kmmio(). All probes registered after the corresponding
111 * reference_kmmio() must have been unregistered when calling this.
112 * May sleep.
113 */
114 void unreference_kmmio(void)
115 {
116 bool unreg = false;
117
118 down(&kmmio_init_mutex);
119 spin_lock_irq(&kmmio_lock);
120
121 if (kmmio_initialized == 1) {
122 BUG_ON(is_kmmio_active());
123 unreg = true;
124 }
125 kmmio_initialized--;
126 BUG_ON(kmmio_initialized < 0);
127 spin_unlock_irq(&kmmio_lock);
128
129 if (unreg)
130 unregister_die_notifier(&nb_die); /* calls sync_rcu() */
131 up(&kmmio_init_mutex);
132 }
133 EXPORT_SYMBOL(unreference_kmmio);
134
135 /*
136 * this is basically a dynamic stabbing problem:
137 * Could use the existing prio tree code or
138 * Possible better implementations:
139 * The Interval Skip List: A Data Structure for Finding All Intervals That
140 * Overlap a Point (might be simple)
141 * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup
142 */
143 /* Get the kmmio at this addr (if any). You must be holding RCU read lock. */
144 static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
145 {
146 struct kmmio_probe *p;
147 list_for_each_entry_rcu(p, &kmmio_probes, list) {
148 if (addr >= p->addr && addr <= (p->addr + p->len))
149 return p;
150 }
151 return NULL;
152 }
153
154 /* You must be holding RCU read lock. */
155 static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
156 {
157 struct list_head *head;
158 struct kmmio_fault_page *p;
159
160 page &= PAGE_MASK;
161 head = kmmio_page_list(page);
162 list_for_each_entry_rcu(p, head, list) {
163 if (p->page == page)
164 return p;
165 }
166 return NULL;
167 }
168
169 /** Mark the given page as not present. Access to it will trigger a fault. */
170 static void arm_kmmio_fault_page(unsigned long page, int *page_level)
171 {
172 unsigned long address = page & PAGE_MASK;
173 int level;
174 pte_t *pte = lookup_address(address, &level);
175
176 if (!pte) {
177 pr_err("kmmio: Error in %s: no pte for page 0x%08lx\n",
178 __func__, page);
179 return;
180 }
181
182 if (level == PG_LEVEL_2M) {
183 pmd_t *pmd = (pmd_t *)pte;
184 set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_PRESENT));
185 } else {
186 /* PG_LEVEL_4K */
187 set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
188 }
189
190 if (page_level)
191 *page_level = level;
192
193 __flush_tlb_one(page);
194 }
195
196 /** Mark the given page as present. */
197 static void disarm_kmmio_fault_page(unsigned long page, int *page_level)
198 {
199 unsigned long address = page & PAGE_MASK;
200 int level;
201 pte_t *pte = lookup_address(address, &level);
202
203 if (!pte) {
204 pr_err("kmmio: Error in %s: no pte for page 0x%08lx\n",
205 __func__, page);
206 return;
207 }
208
209 if (level == PG_LEVEL_2M) {
210 pmd_t *pmd = (pmd_t *)pte;
211 set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_PRESENT));
212 } else {
213 /* PG_LEVEL_4K */
214 set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
215 }
216
217 if (page_level)
218 *page_level = level;
219
220 __flush_tlb_one(page);
221 }
222
223 /*
224 * This is being called from do_page_fault().
225 *
226 * We may be in an interrupt or a critical section. Also prefecthing may
227 * trigger a page fault. We may be in the middle of process switch.
228 * We cannot take any locks, because we could be executing especially
229 * within a kmmio critical section.
230 *
231 * Local interrupts are disabled, so preemption cannot happen.
232 * Do not enable interrupts, do not sleep, and watch out for other CPUs.
233 */
234 /*
235 * Interrupts are disabled on entry as trap3 is an interrupt gate
236 * and they remain disabled thorough out this function.
237 */
238 int kmmio_handler(struct pt_regs *regs, unsigned long addr)
239 {
240 struct kmmio_context *ctx;
241 struct kmmio_fault_page *faultpage;
242
243 /*
244 * Preemption is now disabled to prevent process switch during
245 * single stepping. We can only handle one active kmmio trace
246 * per cpu, so ensure that we finish it before something else
247 * gets to run.
248 *
249 * XXX what if an interrupt occurs between returning from
250 * do_page_fault() and entering the single-step exception handler?
251 * And that interrupt triggers a kmmio trap?
252 * XXX If we tracing an interrupt service routine or whatever, is
253 * this enough to keep it on the current cpu?
254 */
255 preempt_disable();
256
257 rcu_read_lock();
258 faultpage = get_kmmio_fault_page(addr);
259 if (!faultpage) {
260 /*
261 * Either this page fault is not caused by kmmio, or
262 * another CPU just pulled the kmmio probe from under
263 * our feet. In the latter case all hell breaks loose.
264 */
265 goto no_kmmio;
266 }
267
268 ctx = &get_cpu_var(kmmio_ctx);
269 if (ctx->active) {
270 /*
271 * Prevent overwriting already in-flight context.
272 * If this page fault really was due to kmmio trap,
273 * all hell breaks loose.
274 */
275 pr_emerg("kmmio: recursive probe hit on CPU %d, "
276 "for address 0x%08lx. Ignoring.\n",
277 smp_processor_id(), addr);
278 goto no_kmmio_ctx;
279 }
280 ctx->active++;
281
282 ctx->fpage = faultpage;
283 ctx->probe = get_kmmio_probe(addr);
284 ctx->saved_flags = (regs->flags & (TF_MASK|IF_MASK));
285 ctx->addr = addr;
286
287 if (ctx->probe && ctx->probe->pre_handler)
288 ctx->probe->pre_handler(ctx->probe, regs, addr);
289
290 regs->flags |= TF_MASK;
291 regs->flags &= ~IF_MASK;
292
293 /* Now we set present bit in PTE and single step. */
294 disarm_kmmio_fault_page(ctx->fpage->page, NULL);
295
296 put_cpu_var(kmmio_ctx);
297 rcu_read_unlock();
298 return 1;
299
300 no_kmmio_ctx:
301 put_cpu_var(kmmio_ctx);
302 no_kmmio:
303 rcu_read_unlock();
304 preempt_enable_no_resched();
305 return 0; /* page fault not handled by kmmio */
306 }
307
308 /*
309 * Interrupts are disabled on entry as trap1 is an interrupt gate
310 * and they remain disabled thorough out this function.
311 * This must always get called as the pair to kmmio_handler().
312 */
313 static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
314 {
315 int ret = 0;
316 struct kmmio_probe *probe;
317 struct kmmio_fault_page *faultpage;
318 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
319
320 if (!ctx->active)
321 goto out;
322
323 rcu_read_lock();
324
325 faultpage = get_kmmio_fault_page(ctx->addr);
326 probe = get_kmmio_probe(ctx->addr);
327 if (faultpage != ctx->fpage || probe != ctx->probe) {
328 /*
329 * The trace setup changed after kmmio_handler() and before
330 * running this respective post handler. User does not want
331 * the result anymore.
332 */
333 ctx->probe = NULL;
334 ctx->fpage = NULL;
335 }
336
337 if (ctx->probe && ctx->probe->post_handler)
338 ctx->probe->post_handler(ctx->probe, condition, regs);
339
340 if (ctx->fpage)
341 arm_kmmio_fault_page(ctx->fpage->page, NULL);
342
343 regs->flags &= ~TF_MASK;
344 regs->flags |= ctx->saved_flags;
345
346 /* These were acquired in kmmio_handler(). */
347 ctx->active--;
348 BUG_ON(ctx->active);
349 preempt_enable_no_resched();
350
351 /*
352 * if somebody else is singlestepping across a probe point, flags
353 * will have TF set, in which case, continue the remaining processing
354 * of do_debug, as if this is not a probe hit.
355 */
356 if (!(regs->flags & TF_MASK))
357 ret = 1;
358
359 rcu_read_unlock();
360 out:
361 put_cpu_var(kmmio_ctx);
362 return ret;
363 }
364
365 /* You must be holding kmmio_lock. */
366 static int add_kmmio_fault_page(unsigned long page)
367 {
368 struct kmmio_fault_page *f;
369
370 page &= PAGE_MASK;
371 f = get_kmmio_fault_page(page);
372 if (f) {
373 if (!f->count)
374 arm_kmmio_fault_page(f->page, NULL);
375 f->count++;
376 return 0;
377 }
378
379 f = kmalloc(sizeof(*f), GFP_ATOMIC);
380 if (!f)
381 return -1;
382
383 f->count = 1;
384 f->page = page;
385 list_add_rcu(&f->list, kmmio_page_list(f->page));
386
387 arm_kmmio_fault_page(f->page, NULL);
388
389 return 0;
390 }
391
392 /* You must be holding kmmio_lock. */
393 static void release_kmmio_fault_page(unsigned long page,
394 struct kmmio_fault_page **release_list)
395 {
396 struct kmmio_fault_page *f;
397
398 page &= PAGE_MASK;
399 f = get_kmmio_fault_page(page);
400 if (!f)
401 return;
402
403 f->count--;
404 BUG_ON(f->count < 0);
405 if (!f->count) {
406 disarm_kmmio_fault_page(f->page, NULL);
407 f->release_next = *release_list;
408 *release_list = f;
409 }
410 }
411
412 int register_kmmio_probe(struct kmmio_probe *p)
413 {
414 int ret = 0;
415 unsigned long size = 0;
416
417 spin_lock_irq(&kmmio_lock);
418 kmmio_count++;
419 if (get_kmmio_probe(p->addr)) {
420 ret = -EEXIST;
421 goto out;
422 }
423 list_add_rcu(&p->list, &kmmio_probes);
424 while (size < p->len) {
425 if (add_kmmio_fault_page(p->addr + size))
426 pr_err("kmmio: Unable to set page fault.\n");
427 size += PAGE_SIZE;
428 }
429 out:
430 spin_unlock_irq(&kmmio_lock);
431 /*
432 * XXX: What should I do here?
433 * Here was a call to global_flush_tlb(), but it does not exist
434 * anymore. It seems it's not needed after all.
435 */
436 return ret;
437 }
438 EXPORT_SYMBOL(register_kmmio_probe);
439
440 static void rcu_free_kmmio_fault_pages(struct rcu_head *head)
441 {
442 struct kmmio_delayed_release *dr = container_of(
443 head,
444 struct kmmio_delayed_release,
445 rcu);
446 struct kmmio_fault_page *p = dr->release_list;
447 while (p) {
448 struct kmmio_fault_page *next = p->release_next;
449 BUG_ON(p->count);
450 kfree(p);
451 p = next;
452 }
453 kfree(dr);
454 }
455
456 static void remove_kmmio_fault_pages(struct rcu_head *head)
457 {
458 struct kmmio_delayed_release *dr = container_of(
459 head,
460 struct kmmio_delayed_release,
461 rcu);
462 struct kmmio_fault_page *p = dr->release_list;
463 struct kmmio_fault_page **prevp = &dr->release_list;
464 unsigned long flags;
465 spin_lock_irqsave(&kmmio_lock, flags);
466 while (p) {
467 if (!p->count)
468 list_del_rcu(&p->list);
469 else
470 *prevp = p->release_next;
471 prevp = &p->release_next;
472 p = p->release_next;
473 }
474 spin_unlock_irqrestore(&kmmio_lock, flags);
475 /* This is the real RCU destroy call. */
476 call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages);
477 }
478
479 /*
480 * Remove a kmmio probe. You have to synchronize_rcu() before you can be
481 * sure that the callbacks will not be called anymore.
482 *
483 * Unregistering a kmmio fault page has three steps:
484 * 1. release_kmmio_fault_page()
485 * Disarm the page, wait a grace period to let all faults finish.
486 * 2. remove_kmmio_fault_pages()
487 * Remove the pages from kmmio_page_table.
488 * 3. rcu_free_kmmio_fault_pages()
489 * Actally free the kmmio_fault_page structs as with RCU.
490 */
491 void unregister_kmmio_probe(struct kmmio_probe *p)
492 {
493 unsigned long size = 0;
494 struct kmmio_fault_page *release_list = NULL;
495 struct kmmio_delayed_release *drelease;
496
497 spin_lock_irq(&kmmio_lock);
498 while (size < p->len) {
499 release_kmmio_fault_page(p->addr + size, &release_list);
500 size += PAGE_SIZE;
501 }
502 list_del_rcu(&p->list);
503 kmmio_count--;
504 spin_unlock_irq(&kmmio_lock);
505
506 drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC);
507 if (!drelease) {
508 pr_crit("kmmio: leaking kmmio_fault_page objects.\n");
509 return;
510 }
511 drelease->release_list = release_list;
512
513 /*
514 * This is not really RCU here. We have just disarmed a set of
515 * pages so that they cannot trigger page faults anymore. However,
516 * we cannot remove the pages from kmmio_page_table,
517 * because a probe hit might be in flight on another CPU. The
518 * pages are collected into a list, and they will be removed from
519 * kmmio_page_table when it is certain that no probe hit related to
520 * these pages can be in flight. RCU grace period sounds like a
521 * good choice.
522 *
523 * If we removed the pages too early, kmmio page fault handler might
524 * not find the respective kmmio_fault_page and determine it's not
525 * a kmmio fault, when it actually is. This would lead to madness.
526 */
527 call_rcu(&drelease->rcu, remove_kmmio_fault_pages);
528 }
529 EXPORT_SYMBOL(unregister_kmmio_probe);
530
531 static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val,
532 void *args)
533 {
534 struct die_args *arg = args;
535
536 if (val == DIE_DEBUG)
537 if (post_kmmio_handler(arg->err, arg->regs) == 1)
538 return NOTIFY_STOP;
539
540 return NOTIFY_DONE;
541 }