Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /** |
2 | * @file arch/alpha/oprofile/op_model_ev67.c | |
3 | * | |
4 | * @remark Copyright 2002 OProfile authors | |
5 | * @remark Read the file COPYING | |
6 | * | |
7 | * @author Richard Henderson <rth@twiddle.net> | |
8 | * @author Falk Hueffner <falk@debian.org> | |
9 | */ | |
10 | ||
11 | #include <linux/oprofile.h> | |
12 | #include <linux/init.h> | |
13 | #include <linux/smp.h> | |
14 | #include <asm/ptrace.h> | |
15 | #include <asm/system.h> | |
16 | ||
17 | #include "op_impl.h" | |
18 | ||
19 | ||
20 | /* Compute all of the registers in preparation for enabling profiling. */ | |
21 | ||
22 | static void | |
23 | ev67_reg_setup(struct op_register_config *reg, | |
24 | struct op_counter_config *ctr, | |
25 | struct op_system_config *sys) | |
26 | { | |
27 | unsigned long ctl, reset, need_reset, i; | |
28 | ||
29 | /* Select desired events. */ | |
30 | ctl = 1UL << 4; /* Enable ProfileMe mode. */ | |
31 | ||
32 | /* The event numbers are chosen so we can use them directly if | |
33 | PCTR1 is enabled. */ | |
34 | if (ctr[1].enabled) { | |
35 | ctl |= (ctr[1].event & 3) << 2; | |
36 | } else { | |
37 | if (ctr[0].event == 0) /* cycles */ | |
38 | ctl |= 1UL << 2; | |
39 | } | |
40 | reg->mux_select = ctl; | |
41 | ||
42 | /* Select logging options. */ | |
43 | /* ??? Need to come up with some mechanism to trace only | |
44 | selected processes. EV67 does not have a mechanism to | |
45 | select kernel or user mode only. For now, enable always. */ | |
46 | reg->proc_mode = 0; | |
47 | ||
48 | /* EV67 cannot change the width of the counters as with the | |
49 | other implementations. But fortunately, we can write to | |
50 | the counters and set the value such that it will overflow | |
51 | at the right time. */ | |
52 | reset = need_reset = 0; | |
53 | for (i = 0; i < 2; ++i) { | |
54 | unsigned long count = ctr[i].count; | |
55 | if (!ctr[i].enabled) | |
56 | continue; | |
57 | ||
58 | if (count > 0x100000) | |
59 | count = 0x100000; | |
60 | ctr[i].count = count; | |
61 | reset |= (0x100000 - count) << (i ? 6 : 28); | |
62 | if (count != 0x100000) | |
63 | need_reset |= 1 << i; | |
64 | } | |
65 | reg->reset_values = reset; | |
66 | reg->need_reset = need_reset; | |
67 | } | |
68 | ||
69 | /* Program all of the registers in preparation for enabling profiling. */ | |
70 | ||
71 | static void | |
72 | ev67_cpu_setup (void *x) | |
73 | { | |
74 | struct op_register_config *reg = x; | |
75 | ||
76 | wrperfmon(2, reg->mux_select); | |
77 | wrperfmon(3, reg->proc_mode); | |
78 | wrperfmon(6, reg->reset_values | 3); | |
79 | } | |
80 | ||
81 | /* CTR is a counter for which the user has requested an interrupt count | |
82 | in between one of the widths selectable in hardware. Reset the count | |
83 | for CTR to the value stored in REG->RESET_VALUES. */ | |
84 | ||
85 | static void | |
86 | ev67_reset_ctr(struct op_register_config *reg, unsigned long ctr) | |
87 | { | |
88 | wrperfmon(6, reg->reset_values | (1 << ctr)); | |
89 | } | |
90 | ||
91 | /* ProfileMe conditions which will show up as counters. We can also | |
92 | detect the following, but it seems unlikely that anybody is | |
93 | interested in counting them: | |
94 | * Reset | |
95 | * MT_FPCR (write to floating point control register) | |
96 | * Arithmetic trap | |
97 | * Dstream Fault | |
98 | * Machine Check (ECC fault, etc.) | |
99 | * OPCDEC (illegal opcode) | |
100 | * Floating point disabled | |
101 | * Differentiate between DTB single/double misses and 3 or 4 level | |
102 | page tables | |
103 | * Istream access violation | |
104 | * Interrupt | |
105 | * Icache Parity Error. | |
106 | * Instruction killed (nop, trapb) | |
107 | ||
108 | Unfortunately, there seems to be no way to detect Dcache and Bcache | |
109 | misses; the latter could be approximated by making the counter | |
110 | count Bcache misses, but that is not precise. | |
111 | ||
112 | We model this as 20 counters: | |
113 | * PCTR0 | |
114 | * PCTR1 | |
115 | * 9 ProfileMe events, induced by PCTR0 | |
116 | * 9 ProfileMe events, induced by PCTR1 | |
117 | */ | |
118 | ||
119 | enum profileme_counters { | |
120 | PM_STALLED, /* Stalled for at least one cycle | |
121 | between the fetch and map stages */ | |
122 | PM_TAKEN, /* Conditional branch taken */ | |
123 | PM_MISPREDICT, /* Branch caused mispredict trap */ | |
124 | PM_ITB_MISS, /* ITB miss */ | |
125 | PM_DTB_MISS, /* DTB miss */ | |
126 | PM_REPLAY, /* Replay trap */ | |
127 | PM_LOAD_STORE, /* Load-store order trap */ | |
128 | PM_ICACHE_MISS, /* Icache miss */ | |
129 | PM_UNALIGNED, /* Unaligned Load/Store */ | |
130 | PM_NUM_COUNTERS | |
131 | }; | |
132 | ||
133 | static inline void | |
134 | op_add_pm(unsigned long pc, int kern, unsigned long counter, | |
135 | struct op_counter_config *ctr, unsigned long event) | |
136 | { | |
137 | unsigned long fake_counter = 2 + event; | |
138 | if (counter == 1) | |
139 | fake_counter += PM_NUM_COUNTERS; | |
140 | if (ctr[fake_counter].enabled) | |
141 | oprofile_add_pc(pc, kern, fake_counter); | |
142 | } | |
143 | ||
144 | static void | |
145 | ev67_handle_interrupt(unsigned long which, struct pt_regs *regs, | |
146 | struct op_counter_config *ctr) | |
147 | { | |
148 | unsigned long pmpc, pctr_ctl; | |
149 | int kern = !user_mode(regs); | |
150 | int mispredict = 0; | |
151 | union { | |
152 | unsigned long v; | |
153 | struct { | |
154 | unsigned reserved: 30; /* 0-29 */ | |
155 | unsigned overcount: 3; /* 30-32 */ | |
156 | unsigned icache_miss: 1; /* 33 */ | |
157 | unsigned trap_type: 4; /* 34-37 */ | |
158 | unsigned load_store: 1; /* 38 */ | |
159 | unsigned trap: 1; /* 39 */ | |
160 | unsigned mispredict: 1; /* 40 */ | |
161 | } fields; | |
162 | } i_stat; | |
163 | ||
164 | enum trap_types { | |
165 | TRAP_REPLAY, | |
166 | TRAP_INVALID0, | |
167 | TRAP_DTB_DOUBLE_MISS_3, | |
168 | TRAP_DTB_DOUBLE_MISS_4, | |
169 | TRAP_FP_DISABLED, | |
170 | TRAP_UNALIGNED, | |
171 | TRAP_DTB_SINGLE_MISS, | |
172 | TRAP_DSTREAM_FAULT, | |
173 | TRAP_OPCDEC, | |
174 | TRAP_INVALID1, | |
175 | TRAP_MACHINE_CHECK, | |
176 | TRAP_INVALID2, | |
177 | TRAP_ARITHMETIC, | |
178 | TRAP_INVALID3, | |
179 | TRAP_MT_FPCR, | |
180 | TRAP_RESET | |
181 | }; | |
182 | ||
183 | pmpc = wrperfmon(9, 0); | |
184 | /* ??? Don't know how to handle physical-mode PALcode address. */ | |
185 | if (pmpc & 1) | |
186 | return; | |
187 | pmpc &= ~2; /* clear reserved bit */ | |
188 | ||
189 | i_stat.v = wrperfmon(8, 0); | |
190 | if (i_stat.fields.trap) { | |
191 | switch (i_stat.fields.trap_type) { | |
192 | case TRAP_INVALID1: | |
193 | case TRAP_INVALID2: | |
194 | case TRAP_INVALID3: | |
25985edc | 195 | /* Pipeline redirection occurred. PMPC points |
1da177e4 LT |
196 | to PALcode. Recognize ITB miss by PALcode |
197 | offset address, and get actual PC from | |
198 | EXC_ADDR. */ | |
199 | oprofile_add_pc(regs->pc, kern, which); | |
200 | if ((pmpc & ((1 << 15) - 1)) == 581) | |
201 | op_add_pm(regs->pc, kern, which, | |
202 | ctr, PM_ITB_MISS); | |
203 | /* Most other bit and counter values will be | |
204 | those for the first instruction in the | |
205 | fault handler, so we're done. */ | |
206 | return; | |
207 | case TRAP_REPLAY: | |
208 | op_add_pm(pmpc, kern, which, ctr, | |
209 | (i_stat.fields.load_store | |
210 | ? PM_LOAD_STORE : PM_REPLAY)); | |
211 | break; | |
212 | case TRAP_DTB_DOUBLE_MISS_3: | |
213 | case TRAP_DTB_DOUBLE_MISS_4: | |
214 | case TRAP_DTB_SINGLE_MISS: | |
215 | op_add_pm(pmpc, kern, which, ctr, PM_DTB_MISS); | |
216 | break; | |
217 | case TRAP_UNALIGNED: | |
218 | op_add_pm(pmpc, kern, which, ctr, PM_UNALIGNED); | |
219 | break; | |
220 | case TRAP_INVALID0: | |
221 | case TRAP_FP_DISABLED: | |
222 | case TRAP_DSTREAM_FAULT: | |
223 | case TRAP_OPCDEC: | |
224 | case TRAP_MACHINE_CHECK: | |
225 | case TRAP_ARITHMETIC: | |
226 | case TRAP_MT_FPCR: | |
227 | case TRAP_RESET: | |
228 | break; | |
229 | } | |
230 | ||
231 | /* ??? JSR/JMP/RET/COR or HW_JSR/HW_JMP/HW_RET/HW_COR | |
232 | mispredicts do not set this bit but can be | |
233 | recognized by the presence of one of these | |
234 | instructions at the PMPC location with bit 39 | |
235 | set. */ | |
236 | if (i_stat.fields.mispredict) { | |
237 | mispredict = 1; | |
238 | op_add_pm(pmpc, kern, which, ctr, PM_MISPREDICT); | |
239 | } | |
240 | } | |
241 | ||
242 | oprofile_add_pc(pmpc, kern, which); | |
243 | ||
244 | pctr_ctl = wrperfmon(5, 0); | |
245 | if (pctr_ctl & (1UL << 27)) | |
246 | op_add_pm(pmpc, kern, which, ctr, PM_STALLED); | |
247 | ||
248 | /* Unfortunately, TAK is undefined on mispredicted branches. | |
249 | ??? It is also undefined for non-cbranch insns, should | |
250 | check that. */ | |
251 | if (!mispredict && pctr_ctl & (1UL << 0)) | |
252 | op_add_pm(pmpc, kern, which, ctr, PM_TAKEN); | |
253 | } | |
254 | ||
255 | struct op_axp_model op_model_ev67 = { | |
256 | .reg_setup = ev67_reg_setup, | |
257 | .cpu_setup = ev67_cpu_setup, | |
258 | .reset_ctr = ev67_reset_ctr, | |
259 | .handle_interrupt = ev67_handle_interrupt, | |
260 | .cpu_type = "alpha/ev67", | |
261 | .num_counters = 20, | |
262 | .can_set_proc_mode = 0, | |
263 | }; |