1 /* Support for MMIO probes.
2 * Benfit many code from kprobes
3 * (C) 2002 Louis Zhuang <louis.zhuang@intel.com>.
4 * 2007 Alexander Eichner
5 * 2008 Pekka Paalanen <pq@iki.fi>
8 #include <linux/version.h>
9 #include <linux/list.h>
10 #include <linux/spinlock.h>
11 #include <linux/hash.h>
12 #include <linux/init.h>
13 #include <linux/module.h>
14 #include <linux/slab.h>
15 #include <linux/kernel.h>
17 #include <linux/uaccess.h>
18 #include <linux/ptrace.h>
19 #include <linux/preempt.h>
20 #include <linux/percpu.h>
21 #include <linux/kdebug.h>
23 #include <asm/cacheflush.h>
24 #include <asm/errno.h>
25 #include <asm/tlbflush.h>
26 #include <asm/pgtable.h>
28 #include <linux/mmiotrace.h>
30 #define KMMIO_PAGE_HASH_BITS 4
31 #define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS)
33 struct kmmio_fault_page
{
34 struct list_head list
;
35 struct kmmio_fault_page
*release_next
;
36 unsigned long page
; /* location of the fault page */
39 * Number of times this page has been registered as a part
40 * of a probe. If zero, page is disarmed and this may be freed.
41 * Used only by writers (RCU).
46 struct kmmio_delayed_release
{
48 struct kmmio_fault_page
*release_list
;
51 struct kmmio_context
{
52 struct kmmio_fault_page
*fpage
;
53 struct kmmio_probe
*probe
;
54 unsigned long saved_flags
;
59 static int kmmio_die_notifier(struct notifier_block
*nb
, unsigned long val
,
62 static DECLARE_MUTEX(kmmio_init_mutex
);
63 static DEFINE_SPINLOCK(kmmio_lock
);
65 /* These are protected by kmmio_lock */
66 static int kmmio_initialized
;
67 unsigned int kmmio_count
;
69 /* Read-protected by RCU, write-protected by kmmio_lock. */
70 static struct list_head kmmio_page_table
[KMMIO_PAGE_TABLE_SIZE
];
71 static LIST_HEAD(kmmio_probes
);
73 static struct list_head
*kmmio_page_list(unsigned long page
)
75 return &kmmio_page_table
[hash_long(page
, KMMIO_PAGE_HASH_BITS
)];
78 /* Accessed per-cpu */
79 static DEFINE_PER_CPU(struct kmmio_context
, kmmio_ctx
);
81 /* protected by kmmio_init_mutex */
82 static struct notifier_block nb_die
= {
83 .notifier_call
= kmmio_die_notifier
87 * Makes sure kmmio is initialized and usable.
88 * This must be called before any other kmmio function defined here.
91 void reference_kmmio(void)
93 down(&kmmio_init_mutex
);
94 spin_lock_irq(&kmmio_lock
);
95 if (!kmmio_initialized
) {
97 for (i
= 0; i
< KMMIO_PAGE_TABLE_SIZE
; i
++)
98 INIT_LIST_HEAD(&kmmio_page_table
[i
]);
99 if (register_die_notifier(&nb_die
))
103 spin_unlock_irq(&kmmio_lock
);
104 up(&kmmio_init_mutex
);
106 EXPORT_SYMBOL_GPL(reference_kmmio
);
109 * Clean up kmmio after use. This must be called for every call to
110 * reference_kmmio(). All probes registered after the corresponding
111 * reference_kmmio() must have been unregistered when calling this.
114 void unreference_kmmio(void)
118 down(&kmmio_init_mutex
);
119 spin_lock_irq(&kmmio_lock
);
121 if (kmmio_initialized
== 1) {
122 BUG_ON(is_kmmio_active());
126 BUG_ON(kmmio_initialized
< 0);
127 spin_unlock_irq(&kmmio_lock
);
130 unregister_die_notifier(&nb_die
); /* calls sync_rcu() */
131 up(&kmmio_init_mutex
);
133 EXPORT_SYMBOL(unreference_kmmio
);
136 * this is basically a dynamic stabbing problem:
137 * Could use the existing prio tree code or
138 * Possible better implementations:
139 * The Interval Skip List: A Data Structure for Finding All Intervals That
140 * Overlap a Point (might be simple)
141 * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup
143 /* Get the kmmio at this addr (if any). You must be holding RCU read lock. */
144 static struct kmmio_probe
*get_kmmio_probe(unsigned long addr
)
146 struct kmmio_probe
*p
;
147 list_for_each_entry_rcu(p
, &kmmio_probes
, list
) {
148 if (addr
>= p
->addr
&& addr
<= (p
->addr
+ p
->len
))
154 /* You must be holding RCU read lock. */
155 static struct kmmio_fault_page
*get_kmmio_fault_page(unsigned long page
)
157 struct list_head
*head
;
158 struct kmmio_fault_page
*p
;
161 head
= kmmio_page_list(page
);
162 list_for_each_entry_rcu(p
, head
, list
) {
169 /** Mark the given page as not present. Access to it will trigger a fault. */
170 static void arm_kmmio_fault_page(unsigned long page
, int *page_level
)
172 unsigned long address
= page
& PAGE_MASK
;
174 pte_t
*pte
= lookup_address(address
, &level
);
177 pr_err("kmmio: Error in %s: no pte for page 0x%08lx\n",
182 if (level
== PG_LEVEL_2M
) {
183 pmd_t
*pmd
= (pmd_t
*)pte
;
184 set_pmd(pmd
, __pmd(pmd_val(*pmd
) & ~_PAGE_PRESENT
));
187 set_pte(pte
, __pte(pte_val(*pte
) & ~_PAGE_PRESENT
));
193 __flush_tlb_one(page
);
196 /** Mark the given page as present. */
197 static void disarm_kmmio_fault_page(unsigned long page
, int *page_level
)
199 unsigned long address
= page
& PAGE_MASK
;
201 pte_t
*pte
= lookup_address(address
, &level
);
204 pr_err("kmmio: Error in %s: no pte for page 0x%08lx\n",
209 if (level
== PG_LEVEL_2M
) {
210 pmd_t
*pmd
= (pmd_t
*)pte
;
211 set_pmd(pmd
, __pmd(pmd_val(*pmd
) | _PAGE_PRESENT
));
214 set_pte(pte
, __pte(pte_val(*pte
) | _PAGE_PRESENT
));
220 __flush_tlb_one(page
);
224 * This is being called from do_page_fault().
226 * We may be in an interrupt or a critical section. Also prefecthing may
227 * trigger a page fault. We may be in the middle of process switch.
228 * We cannot take any locks, because we could be executing especially
229 * within a kmmio critical section.
231 * Local interrupts are disabled, so preemption cannot happen.
232 * Do not enable interrupts, do not sleep, and watch out for other CPUs.
235 * Interrupts are disabled on entry as trap3 is an interrupt gate
236 * and they remain disabled thorough out this function.
238 int kmmio_handler(struct pt_regs
*regs
, unsigned long addr
)
240 struct kmmio_context
*ctx
;
241 struct kmmio_fault_page
*faultpage
;
244 * Preemption is now disabled to prevent process switch during
245 * single stepping. We can only handle one active kmmio trace
246 * per cpu, so ensure that we finish it before something else
249 * XXX what if an interrupt occurs between returning from
250 * do_page_fault() and entering the single-step exception handler?
251 * And that interrupt triggers a kmmio trap?
252 * XXX If we tracing an interrupt service routine or whatever, is
253 * this enough to keep it on the current cpu?
258 faultpage
= get_kmmio_fault_page(addr
);
261 * Either this page fault is not caused by kmmio, or
262 * another CPU just pulled the kmmio probe from under
263 * our feet. In the latter case all hell breaks loose.
268 ctx
= &get_cpu_var(kmmio_ctx
);
271 * Prevent overwriting already in-flight context.
272 * If this page fault really was due to kmmio trap,
273 * all hell breaks loose.
275 pr_emerg("kmmio: recursive probe hit on CPU %d, "
276 "for address 0x%08lx. Ignoring.\n",
277 smp_processor_id(), addr
);
282 ctx
->fpage
= faultpage
;
283 ctx
->probe
= get_kmmio_probe(addr
);
284 ctx
->saved_flags
= (regs
->flags
& (TF_MASK
|IF_MASK
));
287 if (ctx
->probe
&& ctx
->probe
->pre_handler
)
288 ctx
->probe
->pre_handler(ctx
->probe
, regs
, addr
);
290 regs
->flags
|= TF_MASK
;
291 regs
->flags
&= ~IF_MASK
;
293 /* Now we set present bit in PTE and single step. */
294 disarm_kmmio_fault_page(ctx
->fpage
->page
, NULL
);
296 put_cpu_var(kmmio_ctx
);
301 put_cpu_var(kmmio_ctx
);
304 preempt_enable_no_resched();
305 return 0; /* page fault not handled by kmmio */
309 * Interrupts are disabled on entry as trap1 is an interrupt gate
310 * and they remain disabled thorough out this function.
311 * This must always get called as the pair to kmmio_handler().
313 static int post_kmmio_handler(unsigned long condition
, struct pt_regs
*regs
)
316 struct kmmio_probe
*probe
;
317 struct kmmio_fault_page
*faultpage
;
318 struct kmmio_context
*ctx
= &get_cpu_var(kmmio_ctx
);
325 faultpage
= get_kmmio_fault_page(ctx
->addr
);
326 probe
= get_kmmio_probe(ctx
->addr
);
327 if (faultpage
!= ctx
->fpage
|| probe
!= ctx
->probe
) {
329 * The trace setup changed after kmmio_handler() and before
330 * running this respective post handler. User does not want
331 * the result anymore.
337 if (ctx
->probe
&& ctx
->probe
->post_handler
)
338 ctx
->probe
->post_handler(ctx
->probe
, condition
, regs
);
341 arm_kmmio_fault_page(ctx
->fpage
->page
, NULL
);
343 regs
->flags
&= ~TF_MASK
;
344 regs
->flags
|= ctx
->saved_flags
;
346 /* These were acquired in kmmio_handler(). */
349 preempt_enable_no_resched();
352 * if somebody else is singlestepping across a probe point, flags
353 * will have TF set, in which case, continue the remaining processing
354 * of do_debug, as if this is not a probe hit.
356 if (!(regs
->flags
& TF_MASK
))
361 put_cpu_var(kmmio_ctx
);
365 /* You must be holding kmmio_lock. */
366 static int add_kmmio_fault_page(unsigned long page
)
368 struct kmmio_fault_page
*f
;
371 f
= get_kmmio_fault_page(page
);
374 arm_kmmio_fault_page(f
->page
, NULL
);
379 f
= kmalloc(sizeof(*f
), GFP_ATOMIC
);
385 list_add_rcu(&f
->list
, kmmio_page_list(f
->page
));
387 arm_kmmio_fault_page(f
->page
, NULL
);
392 /* You must be holding kmmio_lock. */
393 static void release_kmmio_fault_page(unsigned long page
,
394 struct kmmio_fault_page
**release_list
)
396 struct kmmio_fault_page
*f
;
399 f
= get_kmmio_fault_page(page
);
404 BUG_ON(f
->count
< 0);
406 disarm_kmmio_fault_page(f
->page
, NULL
);
407 f
->release_next
= *release_list
;
412 int register_kmmio_probe(struct kmmio_probe
*p
)
415 unsigned long size
= 0;
417 spin_lock_irq(&kmmio_lock
);
419 if (get_kmmio_probe(p
->addr
)) {
423 list_add_rcu(&p
->list
, &kmmio_probes
);
424 while (size
< p
->len
) {
425 if (add_kmmio_fault_page(p
->addr
+ size
))
426 pr_err("kmmio: Unable to set page fault.\n");
430 spin_unlock_irq(&kmmio_lock
);
432 * XXX: What should I do here?
433 * Here was a call to global_flush_tlb(), but it does not exist
434 * anymore. It seems it's not needed after all.
438 EXPORT_SYMBOL(register_kmmio_probe
);
440 static void rcu_free_kmmio_fault_pages(struct rcu_head
*head
)
442 struct kmmio_delayed_release
*dr
= container_of(
444 struct kmmio_delayed_release
,
446 struct kmmio_fault_page
*p
= dr
->release_list
;
448 struct kmmio_fault_page
*next
= p
->release_next
;
456 static void remove_kmmio_fault_pages(struct rcu_head
*head
)
458 struct kmmio_delayed_release
*dr
= container_of(
460 struct kmmio_delayed_release
,
462 struct kmmio_fault_page
*p
= dr
->release_list
;
463 struct kmmio_fault_page
**prevp
= &dr
->release_list
;
465 spin_lock_irqsave(&kmmio_lock
, flags
);
468 list_del_rcu(&p
->list
);
470 *prevp
= p
->release_next
;
471 prevp
= &p
->release_next
;
474 spin_unlock_irqrestore(&kmmio_lock
, flags
);
475 /* This is the real RCU destroy call. */
476 call_rcu(&dr
->rcu
, rcu_free_kmmio_fault_pages
);
480 * Remove a kmmio probe. You have to synchronize_rcu() before you can be
481 * sure that the callbacks will not be called anymore.
483 * Unregistering a kmmio fault page has three steps:
484 * 1. release_kmmio_fault_page()
485 * Disarm the page, wait a grace period to let all faults finish.
486 * 2. remove_kmmio_fault_pages()
487 * Remove the pages from kmmio_page_table.
488 * 3. rcu_free_kmmio_fault_pages()
489 * Actally free the kmmio_fault_page structs as with RCU.
491 void unregister_kmmio_probe(struct kmmio_probe
*p
)
493 unsigned long size
= 0;
494 struct kmmio_fault_page
*release_list
= NULL
;
495 struct kmmio_delayed_release
*drelease
;
497 spin_lock_irq(&kmmio_lock
);
498 while (size
< p
->len
) {
499 release_kmmio_fault_page(p
->addr
+ size
, &release_list
);
502 list_del_rcu(&p
->list
);
504 spin_unlock_irq(&kmmio_lock
);
506 drelease
= kmalloc(sizeof(*drelease
), GFP_ATOMIC
);
508 pr_crit("kmmio: leaking kmmio_fault_page objects.\n");
511 drelease
->release_list
= release_list
;
514 * This is not really RCU here. We have just disarmed a set of
515 * pages so that they cannot trigger page faults anymore. However,
516 * we cannot remove the pages from kmmio_page_table,
517 * because a probe hit might be in flight on another CPU. The
518 * pages are collected into a list, and they will be removed from
519 * kmmio_page_table when it is certain that no probe hit related to
520 * these pages can be in flight. RCU grace period sounds like a
523 * If we removed the pages too early, kmmio page fault handler might
524 * not find the respective kmmio_fault_page and determine it's not
525 * a kmmio fault, when it actually is. This would lead to madness.
527 call_rcu(&drelease
->rcu
, remove_kmmio_fault_pages
);
529 EXPORT_SYMBOL(unregister_kmmio_probe
);
531 static int kmmio_die_notifier(struct notifier_block
*nb
, unsigned long val
,
534 struct die_args
*arg
= args
;
536 if (val
== DIE_DEBUG
)
537 if (post_kmmio_handler(arg
->err
, arg
->regs
) == 1)