Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /** |
2 | * @file op_model_xscale.c | |
3 | * XScale Performance Monitor Driver | |
4 | * | |
5 | * @remark Copyright 2000-2004 Deepak Saxena <dsaxena@mvista.com> | |
6 | * @remark Copyright 2000-2004 MontaVista Software Inc | |
7 | * @remark Copyright 2004 Dave Jiang <dave.jiang@intel.com> | |
8 | * @remark Copyright 2004 Intel Corporation | |
9 | * @remark Copyright 2004 Zwane Mwaikambo <zwane@arm.linux.org.uk> | |
10 | * @remark Copyright 2004 OProfile Authors | |
11 | * | |
12 | * @remark Read the file COPYING | |
13 | * | |
14 | * @author Zwane Mwaikambo | |
15 | */ | |
16 | ||
17 | /* #define DEBUG */ | |
18 | #include <linux/types.h> | |
19 | #include <linux/errno.h> | |
1618fdd9 | 20 | #include <linux/err.h> |
1da177e4 LT |
21 | #include <linux/sched.h> |
22 | #include <linux/oprofile.h> | |
23 | #include <linux/interrupt.h> | |
2326eb98 RK |
24 | #include <linux/irq.h> |
25 | ||
0ba8b9b2 | 26 | #include <asm/cputype.h> |
1618fdd9 | 27 | #include <asm/pmu.h> |
1da177e4 LT |
28 | |
29 | #include "op_counter.h" | |
30 | #include "op_arm_model.h" | |
31 | ||
32 | #define PMU_ENABLE 0x001 /* Enable counters */ | |
33 | #define PMN_RESET 0x002 /* Reset event counters */ | |
34 | #define CCNT_RESET 0x004 /* Reset clock counter */ | |
35 | #define PMU_RESET (CCNT_RESET | PMN_RESET) | |
36 | #define PMU_CNT64 0x008 /* Make CCNT count every 64th cycle */ | |
37 | ||
1da177e4 LT |
38 | /* |
39 | * Different types of events that can be counted by the XScale PMU | |
40 | * as used by Oprofile userspace. Here primarily for documentation | |
41 | * purposes. | |
42 | */ | |
43 | ||
44 | #define EVT_ICACHE_MISS 0x00 | |
45 | #define EVT_ICACHE_NO_DELIVER 0x01 | |
46 | #define EVT_DATA_STALL 0x02 | |
47 | #define EVT_ITLB_MISS 0x03 | |
48 | #define EVT_DTLB_MISS 0x04 | |
49 | #define EVT_BRANCH 0x05 | |
50 | #define EVT_BRANCH_MISS 0x06 | |
51 | #define EVT_INSTRUCTION 0x07 | |
52 | #define EVT_DCACHE_FULL_STALL 0x08 | |
53 | #define EVT_DCACHE_FULL_STALL_CONTIG 0x09 | |
54 | #define EVT_DCACHE_ACCESS 0x0A | |
55 | #define EVT_DCACHE_MISS 0x0B | |
56 | #define EVT_DCACE_WRITE_BACK 0x0C | |
57 | #define EVT_PC_CHANGED 0x0D | |
58 | #define EVT_BCU_REQUEST 0x10 | |
59 | #define EVT_BCU_FULL 0x11 | |
60 | #define EVT_BCU_DRAIN 0x12 | |
61 | #define EVT_BCU_ECC_NO_ELOG 0x14 | |
62 | #define EVT_BCU_1_BIT_ERR 0x15 | |
63 | #define EVT_RMW 0x16 | |
64 | /* EVT_CCNT is not hardware defined */ | |
65 | #define EVT_CCNT 0xFE | |
66 | #define EVT_UNUSED 0xFF | |
67 | ||
68 | struct pmu_counter { | |
69 | volatile unsigned long ovf; | |
70 | unsigned long reset_counter; | |
71 | }; | |
72 | ||
73 | enum { CCNT, PMN0, PMN1, PMN2, PMN3, MAX_COUNTERS }; | |
74 | ||
75 | static struct pmu_counter results[MAX_COUNTERS]; | |
76 | ||
77 | /* | |
78 | * There are two versions of the PMU in current XScale processors | |
79 | * with differing register layouts and number of performance counters. | |
c852ac80 | 80 | * e.g. IOP32x is xsc1 whilst IOP33x is xsc2. |
1da177e4 LT |
81 | * We detect which register layout to use in xscale_detect_pmu() |
82 | */ | |
83 | enum { PMU_XSC1, PMU_XSC2 }; | |
84 | ||
85 | struct pmu_type { | |
86 | int id; | |
87 | char *name; | |
88 | int num_counters; | |
89 | unsigned int int_enable; | |
90 | unsigned int cnt_ovf[MAX_COUNTERS]; | |
91 | unsigned int int_mask[MAX_COUNTERS]; | |
92 | }; | |
93 | ||
94 | static struct pmu_type pmu_parms[] = { | |
95 | { | |
96 | .id = PMU_XSC1, | |
97 | .name = "arm/xscale1", | |
98 | .num_counters = 3, | |
99 | .int_mask = { [PMN0] = 0x10, [PMN1] = 0x20, | |
100 | [CCNT] = 0x40 }, | |
101 | .cnt_ovf = { [CCNT] = 0x400, [PMN0] = 0x100, | |
102 | [PMN1] = 0x200}, | |
103 | }, | |
104 | { | |
105 | .id = PMU_XSC2, | |
106 | .name = "arm/xscale2", | |
107 | .num_counters = 5, | |
108 | .int_mask = { [CCNT] = 0x01, [PMN0] = 0x02, | |
109 | [PMN1] = 0x04, [PMN2] = 0x08, | |
110 | [PMN3] = 0x10 }, | |
111 | .cnt_ovf = { [CCNT] = 0x01, [PMN0] = 0x02, | |
112 | [PMN1] = 0x04, [PMN2] = 0x08, | |
113 | [PMN3] = 0x10 }, | |
114 | }, | |
115 | }; | |
116 | ||
117 | static struct pmu_type *pmu; | |
118 | ||
119 | static void write_pmnc(u32 val) | |
120 | { | |
121 | if (pmu->id == PMU_XSC1) { | |
122 | /* upper 4bits and 7, 11 are write-as-0 */ | |
123 | val &= 0xffff77f; | |
124 | __asm__ __volatile__ ("mcr p14, 0, %0, c0, c0, 0" : : "r" (val)); | |
125 | } else { | |
126 | /* bits 4-23 are write-as-0, 24-31 are write ignored */ | |
127 | val &= 0xf; | |
128 | __asm__ __volatile__ ("mcr p14, 0, %0, c0, c1, 0" : : "r" (val)); | |
129 | } | |
130 | } | |
131 | ||
132 | static u32 read_pmnc(void) | |
133 | { | |
134 | u32 val; | |
135 | ||
136 | if (pmu->id == PMU_XSC1) | |
137 | __asm__ __volatile__ ("mrc p14, 0, %0, c0, c0, 0" : "=r" (val)); | |
138 | else { | |
139 | __asm__ __volatile__ ("mrc p14, 0, %0, c0, c1, 0" : "=r" (val)); | |
140 | /* bits 1-2 and 4-23 are read-unpredictable */ | |
141 | val &= 0xff000009; | |
142 | } | |
143 | ||
144 | return val; | |
145 | } | |
146 | ||
147 | static u32 __xsc1_read_counter(int counter) | |
148 | { | |
149 | u32 val = 0; | |
150 | ||
151 | switch (counter) { | |
152 | case CCNT: | |
153 | __asm__ __volatile__ ("mrc p14, 0, %0, c1, c0, 0" : "=r" (val)); | |
154 | break; | |
155 | case PMN0: | |
156 | __asm__ __volatile__ ("mrc p14, 0, %0, c2, c0, 0" : "=r" (val)); | |
157 | break; | |
158 | case PMN1: | |
159 | __asm__ __volatile__ ("mrc p14, 0, %0, c3, c0, 0" : "=r" (val)); | |
160 | break; | |
161 | } | |
162 | return val; | |
163 | } | |
164 | ||
165 | static u32 __xsc2_read_counter(int counter) | |
166 | { | |
167 | u32 val = 0; | |
168 | ||
169 | switch (counter) { | |
170 | case CCNT: | |
171 | __asm__ __volatile__ ("mrc p14, 0, %0, c1, c1, 0" : "=r" (val)); | |
172 | break; | |
173 | case PMN0: | |
174 | __asm__ __volatile__ ("mrc p14, 0, %0, c0, c2, 0" : "=r" (val)); | |
175 | break; | |
176 | case PMN1: | |
177 | __asm__ __volatile__ ("mrc p14, 0, %0, c1, c2, 0" : "=r" (val)); | |
178 | break; | |
179 | case PMN2: | |
180 | __asm__ __volatile__ ("mrc p14, 0, %0, c2, c2, 0" : "=r" (val)); | |
181 | break; | |
182 | case PMN3: | |
183 | __asm__ __volatile__ ("mrc p14, 0, %0, c3, c2, 0" : "=r" (val)); | |
184 | break; | |
185 | } | |
186 | return val; | |
187 | } | |
188 | ||
189 | static u32 read_counter(int counter) | |
190 | { | |
191 | u32 val; | |
192 | ||
193 | if (pmu->id == PMU_XSC1) | |
194 | val = __xsc1_read_counter(counter); | |
195 | else | |
196 | val = __xsc2_read_counter(counter); | |
197 | ||
198 | return val; | |
199 | } | |
200 | ||
201 | static void __xsc1_write_counter(int counter, u32 val) | |
202 | { | |
203 | switch (counter) { | |
204 | case CCNT: | |
205 | __asm__ __volatile__ ("mcr p14, 0, %0, c1, c0, 0" : : "r" (val)); | |
206 | break; | |
207 | case PMN0: | |
208 | __asm__ __volatile__ ("mcr p14, 0, %0, c2, c0, 0" : : "r" (val)); | |
209 | break; | |
210 | case PMN1: | |
211 | __asm__ __volatile__ ("mcr p14, 0, %0, c3, c0, 0" : : "r" (val)); | |
212 | break; | |
213 | } | |
214 | } | |
215 | ||
216 | static void __xsc2_write_counter(int counter, u32 val) | |
217 | { | |
218 | switch (counter) { | |
219 | case CCNT: | |
220 | __asm__ __volatile__ ("mcr p14, 0, %0, c1, c1, 0" : : "r" (val)); | |
221 | break; | |
222 | case PMN0: | |
223 | __asm__ __volatile__ ("mcr p14, 0, %0, c0, c2, 0" : : "r" (val)); | |
224 | break; | |
225 | case PMN1: | |
226 | __asm__ __volatile__ ("mcr p14, 0, %0, c1, c2, 0" : : "r" (val)); | |
227 | break; | |
228 | case PMN2: | |
229 | __asm__ __volatile__ ("mcr p14, 0, %0, c2, c2, 0" : : "r" (val)); | |
230 | break; | |
231 | case PMN3: | |
232 | __asm__ __volatile__ ("mcr p14, 0, %0, c3, c2, 0" : : "r" (val)); | |
233 | break; | |
234 | } | |
235 | } | |
236 | ||
237 | static void write_counter(int counter, u32 val) | |
238 | { | |
239 | if (pmu->id == PMU_XSC1) | |
240 | __xsc1_write_counter(counter, val); | |
241 | else | |
242 | __xsc2_write_counter(counter, val); | |
243 | } | |
244 | ||
245 | static int xscale_setup_ctrs(void) | |
246 | { | |
247 | u32 evtsel, pmnc; | |
248 | int i; | |
249 | ||
250 | for (i = CCNT; i < MAX_COUNTERS; i++) { | |
251 | if (counter_config[i].enabled) | |
252 | continue; | |
253 | ||
254 | counter_config[i].event = EVT_UNUSED; | |
255 | } | |
256 | ||
257 | switch (pmu->id) { | |
258 | case PMU_XSC1: | |
259 | pmnc = (counter_config[PMN1].event << 20) | (counter_config[PMN0].event << 12); | |
260 | pr_debug("xscale_setup_ctrs: pmnc: %#08x\n", pmnc); | |
261 | write_pmnc(pmnc); | |
262 | break; | |
263 | ||
264 | case PMU_XSC2: | |
265 | evtsel = counter_config[PMN0].event | (counter_config[PMN1].event << 8) | | |
266 | (counter_config[PMN2].event << 16) | (counter_config[PMN3].event << 24); | |
267 | ||
268 | pr_debug("xscale_setup_ctrs: evtsel %#08x\n", evtsel); | |
269 | __asm__ __volatile__ ("mcr p14, 0, %0, c8, c1, 0" : : "r" (evtsel)); | |
270 | break; | |
271 | } | |
272 | ||
273 | for (i = CCNT; i < MAX_COUNTERS; i++) { | |
274 | if (counter_config[i].event == EVT_UNUSED) { | |
275 | counter_config[i].event = 0; | |
276 | pmu->int_enable &= ~pmu->int_mask[i]; | |
277 | continue; | |
278 | } | |
279 | ||
280 | results[i].reset_counter = counter_config[i].count; | |
281 | write_counter(i, -(u32)counter_config[i].count); | |
282 | pmu->int_enable |= pmu->int_mask[i]; | |
283 | pr_debug("xscale_setup_ctrs: counter%d %#08x from %#08lx\n", i, | |
284 | read_counter(i), counter_config[i].count); | |
285 | } | |
286 | ||
287 | return 0; | |
288 | } | |
289 | ||
290 | static void inline __xsc1_check_ctrs(void) | |
291 | { | |
292 | int i; | |
293 | u32 pmnc = read_pmnc(); | |
294 | ||
295 | /* NOTE: there's an A stepping errata that states if an overflow */ | |
296 | /* bit already exists and another occurs, the previous */ | |
297 | /* Overflow bit gets cleared. There's no workaround. */ | |
298 | /* Fixed in B stepping or later */ | |
299 | ||
300 | /* Write the value back to clear the overflow flags. Overflow */ | |
301 | /* flags remain in pmnc for use below */ | |
302 | write_pmnc(pmnc & ~PMU_ENABLE); | |
303 | ||
304 | for (i = CCNT; i <= PMN1; i++) { | |
305 | if (!(pmu->int_mask[i] & pmu->int_enable)) | |
306 | continue; | |
307 | ||
308 | if (pmnc & pmu->cnt_ovf[i]) | |
309 | results[i].ovf++; | |
310 | } | |
311 | } | |
312 | ||
313 | static void inline __xsc2_check_ctrs(void) | |
314 | { | |
315 | int i; | |
316 | u32 flag = 0, pmnc = read_pmnc(); | |
317 | ||
318 | pmnc &= ~PMU_ENABLE; | |
319 | write_pmnc(pmnc); | |
320 | ||
321 | /* read overflow flag register */ | |
322 | __asm__ __volatile__ ("mrc p14, 0, %0, c5, c1, 0" : "=r" (flag)); | |
323 | ||
324 | for (i = CCNT; i <= PMN3; i++) { | |
325 | if (!(pmu->int_mask[i] & pmu->int_enable)) | |
326 | continue; | |
327 | ||
328 | if (flag & pmu->cnt_ovf[i]) | |
329 | results[i].ovf++; | |
330 | } | |
331 | ||
332 | /* writeback clears overflow bits */ | |
333 | __asm__ __volatile__ ("mcr p14, 0, %0, c5, c1, 0" : : "r" (flag)); | |
334 | } | |
335 | ||
0cd61b68 | 336 | static irqreturn_t xscale_pmu_interrupt(int irq, void *arg) |
1da177e4 LT |
337 | { |
338 | int i; | |
339 | u32 pmnc; | |
340 | ||
341 | if (pmu->id == PMU_XSC1) | |
342 | __xsc1_check_ctrs(); | |
343 | else | |
344 | __xsc2_check_ctrs(); | |
345 | ||
346 | for (i = CCNT; i < MAX_COUNTERS; i++) { | |
347 | if (!results[i].ovf) | |
348 | continue; | |
349 | ||
350 | write_counter(i, -(u32)results[i].reset_counter); | |
0cd61b68 | 351 | oprofile_add_sample(get_irq_regs(), i); |
1da177e4 LT |
352 | results[i].ovf--; |
353 | } | |
354 | ||
355 | pmnc = read_pmnc() | PMU_ENABLE; | |
356 | write_pmnc(pmnc); | |
357 | ||
358 | return IRQ_HANDLED; | |
359 | } | |
360 | ||
1618fdd9 JI |
361 | static const struct pmu_irqs *pmu_irqs; |
362 | ||
1da177e4 LT |
363 | static void xscale_pmu_stop(void) |
364 | { | |
365 | u32 pmnc = read_pmnc(); | |
366 | ||
367 | pmnc &= ~PMU_ENABLE; | |
368 | write_pmnc(pmnc); | |
369 | ||
1618fdd9 JI |
370 | free_irq(pmu_irqs->irqs[0], results); |
371 | release_pmu(pmu_irqs); | |
372 | pmu_irqs = NULL; | |
1da177e4 LT |
373 | } |
374 | ||
375 | static int xscale_pmu_start(void) | |
376 | { | |
377 | int ret; | |
1618fdd9 JI |
378 | u32 pmnc; |
379 | ||
380 | pmu_irqs = reserve_pmu(); | |
381 | if (IS_ERR(pmu_irqs)) | |
382 | return PTR_ERR(pmu_irqs); | |
383 | ||
384 | pmnc = read_pmnc(); | |
1da177e4 | 385 | |
1618fdd9 JI |
386 | ret = request_irq(pmu_irqs->irqs[0], xscale_pmu_interrupt, |
387 | IRQF_DISABLED, "XScale PMU", (void *)results); | |
1da177e4 LT |
388 | |
389 | if (ret < 0) { | |
390 | printk(KERN_ERR "oprofile: unable to request IRQ%d for XScale PMU\n", | |
1618fdd9 JI |
391 | pmu_irqs->irqs[0]); |
392 | release_pmu(pmu_irqs); | |
393 | pmu_irqs = NULL; | |
1da177e4 LT |
394 | return ret; |
395 | } | |
396 | ||
397 | if (pmu->id == PMU_XSC1) | |
398 | pmnc |= pmu->int_enable; | |
399 | else { | |
400 | __asm__ __volatile__ ("mcr p14, 0, %0, c4, c1, 0" : : "r" (pmu->int_enable)); | |
401 | pmnc &= ~PMU_CNT64; | |
402 | } | |
403 | ||
404 | pmnc |= PMU_ENABLE; | |
405 | write_pmnc(pmnc); | |
406 | pr_debug("xscale_pmu_start: pmnc: %#08x mask: %08x\n", pmnc, pmu->int_enable); | |
407 | return 0; | |
408 | } | |
409 | ||
410 | static int xscale_detect_pmu(void) | |
411 | { | |
412 | int ret = 0; | |
413 | u32 id; | |
414 | ||
415 | id = (read_cpuid(CPUID_ID) >> 13) & 0x7; | |
416 | ||
417 | switch (id) { | |
418 | case 1: | |
419 | pmu = &pmu_parms[PMU_XSC1]; | |
420 | break; | |
421 | case 2: | |
422 | pmu = &pmu_parms[PMU_XSC2]; | |
423 | break; | |
424 | default: | |
425 | ret = -ENODEV; | |
426 | break; | |
427 | } | |
428 | ||
429 | if (!ret) { | |
430 | op_xscale_spec.name = pmu->name; | |
431 | op_xscale_spec.num_counters = pmu->num_counters; | |
432 | pr_debug("xscale_detect_pmu: detected %s PMU\n", pmu->name); | |
433 | } | |
434 | ||
435 | return ret; | |
436 | } | |
437 | ||
438 | struct op_arm_model_spec op_xscale_spec = { | |
439 | .init = xscale_detect_pmu, | |
440 | .setup_ctrs = xscale_setup_ctrs, | |
441 | .start = xscale_pmu_start, | |
442 | .stop = xscale_pmu_stop, | |
443 | }; | |
444 |