Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rkuo/linux...
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / drivers / gpu / drm / radeon / si.c
1 /*
2 * Copyright 2011 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24 #include <linux/firmware.h>
25 #include <linux/platform_device.h>
26 #include <linux/slab.h>
27 #include <linux/module.h>
28 #include <drm/drmP.h>
29 #include "radeon.h"
30 #include "radeon_asic.h"
31 #include <drm/radeon_drm.h>
32 #include "sid.h"
33 #include "atom.h"
34 #include "si_blit_shaders.h"
35
36 #define SI_PFP_UCODE_SIZE 2144
37 #define SI_PM4_UCODE_SIZE 2144
38 #define SI_CE_UCODE_SIZE 2144
39 #define SI_RLC_UCODE_SIZE 2048
40 #define SI_MC_UCODE_SIZE 7769
41 #define OLAND_MC_UCODE_SIZE 7863
42
43 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
46 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
47 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
50 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
51 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
52 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
53 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
54 MODULE_FIRMWARE("radeon/VERDE_me.bin");
55 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
56 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
57 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
58 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
59 MODULE_FIRMWARE("radeon/OLAND_me.bin");
60 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
61 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
62 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
63
64 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
65 extern void r600_ih_ring_fini(struct radeon_device *rdev);
66 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
67 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
68 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
69 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
70 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
71 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
72
73 static const u32 tahiti_golden_rlc_registers[] =
74 {
75 0xc424, 0xffffffff, 0x00601005,
76 0xc47c, 0xffffffff, 0x10104040,
77 0xc488, 0xffffffff, 0x0100000a,
78 0xc314, 0xffffffff, 0x00000800,
79 0xc30c, 0xffffffff, 0x800000f4,
80 0xf4a8, 0xffffffff, 0x00000000
81 };
82
83 static const u32 tahiti_golden_registers[] =
84 {
85 0x9a10, 0x00010000, 0x00018208,
86 0x9830, 0xffffffff, 0x00000000,
87 0x9834, 0xf00fffff, 0x00000400,
88 0x9838, 0x0002021c, 0x00020200,
89 0xc78, 0x00000080, 0x00000000,
90 0xd030, 0x000300c0, 0x00800040,
91 0xd830, 0x000300c0, 0x00800040,
92 0x5bb0, 0x000000f0, 0x00000070,
93 0x5bc0, 0x00200000, 0x50100000,
94 0x7030, 0x31000311, 0x00000011,
95 0x277c, 0x00000003, 0x000007ff,
96 0x240c, 0x000007ff, 0x00000000,
97 0x8a14, 0xf000001f, 0x00000007,
98 0x8b24, 0xffffffff, 0x00ffffff,
99 0x8b10, 0x0000ff0f, 0x00000000,
100 0x28a4c, 0x07ffffff, 0x4e000000,
101 0x28350, 0x3f3f3fff, 0x2a00126a,
102 0x30, 0x000000ff, 0x0040,
103 0x34, 0x00000040, 0x00004040,
104 0x9100, 0x07ffffff, 0x03000000,
105 0x8e88, 0x01ff1f3f, 0x00000000,
106 0x8e84, 0x01ff1f3f, 0x00000000,
107 0x9060, 0x0000007f, 0x00000020,
108 0x9508, 0x00010000, 0x00010000,
109 0xac14, 0x00000200, 0x000002fb,
110 0xac10, 0xffffffff, 0x0000543b,
111 0xac0c, 0xffffffff, 0xa9210876,
112 0x88d0, 0xffffffff, 0x000fff40,
113 0x88d4, 0x0000001f, 0x00000010,
114 0x1410, 0x20000000, 0x20fffed8,
115 0x15c0, 0x000c0fc0, 0x000c0400
116 };
117
118 static const u32 tahiti_golden_registers2[] =
119 {
120 0xc64, 0x00000001, 0x00000001
121 };
122
123 static const u32 pitcairn_golden_rlc_registers[] =
124 {
125 0xc424, 0xffffffff, 0x00601004,
126 0xc47c, 0xffffffff, 0x10102020,
127 0xc488, 0xffffffff, 0x01000020,
128 0xc314, 0xffffffff, 0x00000800,
129 0xc30c, 0xffffffff, 0x800000a4
130 };
131
132 static const u32 pitcairn_golden_registers[] =
133 {
134 0x9a10, 0x00010000, 0x00018208,
135 0x9830, 0xffffffff, 0x00000000,
136 0x9834, 0xf00fffff, 0x00000400,
137 0x9838, 0x0002021c, 0x00020200,
138 0xc78, 0x00000080, 0x00000000,
139 0xd030, 0x000300c0, 0x00800040,
140 0xd830, 0x000300c0, 0x00800040,
141 0x5bb0, 0x000000f0, 0x00000070,
142 0x5bc0, 0x00200000, 0x50100000,
143 0x7030, 0x31000311, 0x00000011,
144 0x2ae4, 0x00073ffe, 0x000022a2,
145 0x240c, 0x000007ff, 0x00000000,
146 0x8a14, 0xf000001f, 0x00000007,
147 0x8b24, 0xffffffff, 0x00ffffff,
148 0x8b10, 0x0000ff0f, 0x00000000,
149 0x28a4c, 0x07ffffff, 0x4e000000,
150 0x28350, 0x3f3f3fff, 0x2a00126a,
151 0x30, 0x000000ff, 0x0040,
152 0x34, 0x00000040, 0x00004040,
153 0x9100, 0x07ffffff, 0x03000000,
154 0x9060, 0x0000007f, 0x00000020,
155 0x9508, 0x00010000, 0x00010000,
156 0xac14, 0x000003ff, 0x000000f7,
157 0xac10, 0xffffffff, 0x00000000,
158 0xac0c, 0xffffffff, 0x32761054,
159 0x88d4, 0x0000001f, 0x00000010,
160 0x15c0, 0x000c0fc0, 0x000c0400
161 };
162
163 static const u32 verde_golden_rlc_registers[] =
164 {
165 0xc424, 0xffffffff, 0x033f1005,
166 0xc47c, 0xffffffff, 0x10808020,
167 0xc488, 0xffffffff, 0x00800008,
168 0xc314, 0xffffffff, 0x00001000,
169 0xc30c, 0xffffffff, 0x80010014
170 };
171
172 static const u32 verde_golden_registers[] =
173 {
174 0x9a10, 0x00010000, 0x00018208,
175 0x9830, 0xffffffff, 0x00000000,
176 0x9834, 0xf00fffff, 0x00000400,
177 0x9838, 0x0002021c, 0x00020200,
178 0xc78, 0x00000080, 0x00000000,
179 0xd030, 0x000300c0, 0x00800040,
180 0xd030, 0x000300c0, 0x00800040,
181 0xd830, 0x000300c0, 0x00800040,
182 0xd830, 0x000300c0, 0x00800040,
183 0x5bb0, 0x000000f0, 0x00000070,
184 0x5bc0, 0x00200000, 0x50100000,
185 0x7030, 0x31000311, 0x00000011,
186 0x2ae4, 0x00073ffe, 0x000022a2,
187 0x2ae4, 0x00073ffe, 0x000022a2,
188 0x2ae4, 0x00073ffe, 0x000022a2,
189 0x240c, 0x000007ff, 0x00000000,
190 0x240c, 0x000007ff, 0x00000000,
191 0x240c, 0x000007ff, 0x00000000,
192 0x8a14, 0xf000001f, 0x00000007,
193 0x8a14, 0xf000001f, 0x00000007,
194 0x8a14, 0xf000001f, 0x00000007,
195 0x8b24, 0xffffffff, 0x00ffffff,
196 0x8b10, 0x0000ff0f, 0x00000000,
197 0x28a4c, 0x07ffffff, 0x4e000000,
198 0x28350, 0x3f3f3fff, 0x0000124a,
199 0x28350, 0x3f3f3fff, 0x0000124a,
200 0x28350, 0x3f3f3fff, 0x0000124a,
201 0x30, 0x000000ff, 0x0040,
202 0x34, 0x00000040, 0x00004040,
203 0x9100, 0x07ffffff, 0x03000000,
204 0x9100, 0x07ffffff, 0x03000000,
205 0x8e88, 0x01ff1f3f, 0x00000000,
206 0x8e88, 0x01ff1f3f, 0x00000000,
207 0x8e88, 0x01ff1f3f, 0x00000000,
208 0x8e84, 0x01ff1f3f, 0x00000000,
209 0x8e84, 0x01ff1f3f, 0x00000000,
210 0x8e84, 0x01ff1f3f, 0x00000000,
211 0x9060, 0x0000007f, 0x00000020,
212 0x9508, 0x00010000, 0x00010000,
213 0xac14, 0x000003ff, 0x00000003,
214 0xac14, 0x000003ff, 0x00000003,
215 0xac14, 0x000003ff, 0x00000003,
216 0xac10, 0xffffffff, 0x00000000,
217 0xac10, 0xffffffff, 0x00000000,
218 0xac10, 0xffffffff, 0x00000000,
219 0xac0c, 0xffffffff, 0x00001032,
220 0xac0c, 0xffffffff, 0x00001032,
221 0xac0c, 0xffffffff, 0x00001032,
222 0x88d4, 0x0000001f, 0x00000010,
223 0x88d4, 0x0000001f, 0x00000010,
224 0x88d4, 0x0000001f, 0x00000010,
225 0x15c0, 0x000c0fc0, 0x000c0400
226 };
227
228 static const u32 oland_golden_rlc_registers[] =
229 {
230 0xc424, 0xffffffff, 0x00601005,
231 0xc47c, 0xffffffff, 0x10104040,
232 0xc488, 0xffffffff, 0x0100000a,
233 0xc314, 0xffffffff, 0x00000800,
234 0xc30c, 0xffffffff, 0x800000f4
235 };
236
237 static const u32 oland_golden_registers[] =
238 {
239 0x9a10, 0x00010000, 0x00018208,
240 0x9830, 0xffffffff, 0x00000000,
241 0x9834, 0xf00fffff, 0x00000400,
242 0x9838, 0x0002021c, 0x00020200,
243 0xc78, 0x00000080, 0x00000000,
244 0xd030, 0x000300c0, 0x00800040,
245 0xd830, 0x000300c0, 0x00800040,
246 0x5bb0, 0x000000f0, 0x00000070,
247 0x5bc0, 0x00200000, 0x50100000,
248 0x7030, 0x31000311, 0x00000011,
249 0x2ae4, 0x00073ffe, 0x000022a2,
250 0x240c, 0x000007ff, 0x00000000,
251 0x8a14, 0xf000001f, 0x00000007,
252 0x8b24, 0xffffffff, 0x00ffffff,
253 0x8b10, 0x0000ff0f, 0x00000000,
254 0x28a4c, 0x07ffffff, 0x4e000000,
255 0x28350, 0x3f3f3fff, 0x00000082,
256 0x30, 0x000000ff, 0x0040,
257 0x34, 0x00000040, 0x00004040,
258 0x9100, 0x07ffffff, 0x03000000,
259 0x9060, 0x0000007f, 0x00000020,
260 0x9508, 0x00010000, 0x00010000,
261 0xac14, 0x000003ff, 0x000000f3,
262 0xac10, 0xffffffff, 0x00000000,
263 0xac0c, 0xffffffff, 0x00003210,
264 0x88d4, 0x0000001f, 0x00000010,
265 0x15c0, 0x000c0fc0, 0x000c0400
266 };
267
268 static const u32 tahiti_mgcg_cgcg_init[] =
269 {
270 0xc400, 0xffffffff, 0xfffffffc,
271 0x802c, 0xffffffff, 0xe0000000,
272 0x9a60, 0xffffffff, 0x00000100,
273 0x92a4, 0xffffffff, 0x00000100,
274 0xc164, 0xffffffff, 0x00000100,
275 0x9774, 0xffffffff, 0x00000100,
276 0x8984, 0xffffffff, 0x06000100,
277 0x8a18, 0xffffffff, 0x00000100,
278 0x92a0, 0xffffffff, 0x00000100,
279 0xc380, 0xffffffff, 0x00000100,
280 0x8b28, 0xffffffff, 0x00000100,
281 0x9144, 0xffffffff, 0x00000100,
282 0x8d88, 0xffffffff, 0x00000100,
283 0x8d8c, 0xffffffff, 0x00000100,
284 0x9030, 0xffffffff, 0x00000100,
285 0x9034, 0xffffffff, 0x00000100,
286 0x9038, 0xffffffff, 0x00000100,
287 0x903c, 0xffffffff, 0x00000100,
288 0xad80, 0xffffffff, 0x00000100,
289 0xac54, 0xffffffff, 0x00000100,
290 0x897c, 0xffffffff, 0x06000100,
291 0x9868, 0xffffffff, 0x00000100,
292 0x9510, 0xffffffff, 0x00000100,
293 0xaf04, 0xffffffff, 0x00000100,
294 0xae04, 0xffffffff, 0x00000100,
295 0x949c, 0xffffffff, 0x00000100,
296 0x802c, 0xffffffff, 0xe0000000,
297 0x9160, 0xffffffff, 0x00010000,
298 0x9164, 0xffffffff, 0x00030002,
299 0x9168, 0xffffffff, 0x00040007,
300 0x916c, 0xffffffff, 0x00060005,
301 0x9170, 0xffffffff, 0x00090008,
302 0x9174, 0xffffffff, 0x00020001,
303 0x9178, 0xffffffff, 0x00040003,
304 0x917c, 0xffffffff, 0x00000007,
305 0x9180, 0xffffffff, 0x00060005,
306 0x9184, 0xffffffff, 0x00090008,
307 0x9188, 0xffffffff, 0x00030002,
308 0x918c, 0xffffffff, 0x00050004,
309 0x9190, 0xffffffff, 0x00000008,
310 0x9194, 0xffffffff, 0x00070006,
311 0x9198, 0xffffffff, 0x000a0009,
312 0x919c, 0xffffffff, 0x00040003,
313 0x91a0, 0xffffffff, 0x00060005,
314 0x91a4, 0xffffffff, 0x00000009,
315 0x91a8, 0xffffffff, 0x00080007,
316 0x91ac, 0xffffffff, 0x000b000a,
317 0x91b0, 0xffffffff, 0x00050004,
318 0x91b4, 0xffffffff, 0x00070006,
319 0x91b8, 0xffffffff, 0x0008000b,
320 0x91bc, 0xffffffff, 0x000a0009,
321 0x91c0, 0xffffffff, 0x000d000c,
322 0x91c4, 0xffffffff, 0x00060005,
323 0x91c8, 0xffffffff, 0x00080007,
324 0x91cc, 0xffffffff, 0x0000000b,
325 0x91d0, 0xffffffff, 0x000a0009,
326 0x91d4, 0xffffffff, 0x000d000c,
327 0x91d8, 0xffffffff, 0x00070006,
328 0x91dc, 0xffffffff, 0x00090008,
329 0x91e0, 0xffffffff, 0x0000000c,
330 0x91e4, 0xffffffff, 0x000b000a,
331 0x91e8, 0xffffffff, 0x000e000d,
332 0x91ec, 0xffffffff, 0x00080007,
333 0x91f0, 0xffffffff, 0x000a0009,
334 0x91f4, 0xffffffff, 0x0000000d,
335 0x91f8, 0xffffffff, 0x000c000b,
336 0x91fc, 0xffffffff, 0x000f000e,
337 0x9200, 0xffffffff, 0x00090008,
338 0x9204, 0xffffffff, 0x000b000a,
339 0x9208, 0xffffffff, 0x000c000f,
340 0x920c, 0xffffffff, 0x000e000d,
341 0x9210, 0xffffffff, 0x00110010,
342 0x9214, 0xffffffff, 0x000a0009,
343 0x9218, 0xffffffff, 0x000c000b,
344 0x921c, 0xffffffff, 0x0000000f,
345 0x9220, 0xffffffff, 0x000e000d,
346 0x9224, 0xffffffff, 0x00110010,
347 0x9228, 0xffffffff, 0x000b000a,
348 0x922c, 0xffffffff, 0x000d000c,
349 0x9230, 0xffffffff, 0x00000010,
350 0x9234, 0xffffffff, 0x000f000e,
351 0x9238, 0xffffffff, 0x00120011,
352 0x923c, 0xffffffff, 0x000c000b,
353 0x9240, 0xffffffff, 0x000e000d,
354 0x9244, 0xffffffff, 0x00000011,
355 0x9248, 0xffffffff, 0x0010000f,
356 0x924c, 0xffffffff, 0x00130012,
357 0x9250, 0xffffffff, 0x000d000c,
358 0x9254, 0xffffffff, 0x000f000e,
359 0x9258, 0xffffffff, 0x00100013,
360 0x925c, 0xffffffff, 0x00120011,
361 0x9260, 0xffffffff, 0x00150014,
362 0x9264, 0xffffffff, 0x000e000d,
363 0x9268, 0xffffffff, 0x0010000f,
364 0x926c, 0xffffffff, 0x00000013,
365 0x9270, 0xffffffff, 0x00120011,
366 0x9274, 0xffffffff, 0x00150014,
367 0x9278, 0xffffffff, 0x000f000e,
368 0x927c, 0xffffffff, 0x00110010,
369 0x9280, 0xffffffff, 0x00000014,
370 0x9284, 0xffffffff, 0x00130012,
371 0x9288, 0xffffffff, 0x00160015,
372 0x928c, 0xffffffff, 0x0010000f,
373 0x9290, 0xffffffff, 0x00120011,
374 0x9294, 0xffffffff, 0x00000015,
375 0x9298, 0xffffffff, 0x00140013,
376 0x929c, 0xffffffff, 0x00170016,
377 0x9150, 0xffffffff, 0x96940200,
378 0x8708, 0xffffffff, 0x00900100,
379 0xc478, 0xffffffff, 0x00000080,
380 0xc404, 0xffffffff, 0x0020003f,
381 0x30, 0xffffffff, 0x0000001c,
382 0x34, 0x000f0000, 0x000f0000,
383 0x160c, 0xffffffff, 0x00000100,
384 0x1024, 0xffffffff, 0x00000100,
385 0x102c, 0x00000101, 0x00000000,
386 0x20a8, 0xffffffff, 0x00000104,
387 0x264c, 0x000c0000, 0x000c0000,
388 0x2648, 0x000c0000, 0x000c0000,
389 0x55e4, 0xff000fff, 0x00000100,
390 0x55e8, 0x00000001, 0x00000001,
391 0x2f50, 0x00000001, 0x00000001,
392 0x30cc, 0xc0000fff, 0x00000104,
393 0xc1e4, 0x00000001, 0x00000001,
394 0xd0c0, 0xfffffff0, 0x00000100,
395 0xd8c0, 0xfffffff0, 0x00000100
396 };
397
398 static const u32 pitcairn_mgcg_cgcg_init[] =
399 {
400 0xc400, 0xffffffff, 0xfffffffc,
401 0x802c, 0xffffffff, 0xe0000000,
402 0x9a60, 0xffffffff, 0x00000100,
403 0x92a4, 0xffffffff, 0x00000100,
404 0xc164, 0xffffffff, 0x00000100,
405 0x9774, 0xffffffff, 0x00000100,
406 0x8984, 0xffffffff, 0x06000100,
407 0x8a18, 0xffffffff, 0x00000100,
408 0x92a0, 0xffffffff, 0x00000100,
409 0xc380, 0xffffffff, 0x00000100,
410 0x8b28, 0xffffffff, 0x00000100,
411 0x9144, 0xffffffff, 0x00000100,
412 0x8d88, 0xffffffff, 0x00000100,
413 0x8d8c, 0xffffffff, 0x00000100,
414 0x9030, 0xffffffff, 0x00000100,
415 0x9034, 0xffffffff, 0x00000100,
416 0x9038, 0xffffffff, 0x00000100,
417 0x903c, 0xffffffff, 0x00000100,
418 0xad80, 0xffffffff, 0x00000100,
419 0xac54, 0xffffffff, 0x00000100,
420 0x897c, 0xffffffff, 0x06000100,
421 0x9868, 0xffffffff, 0x00000100,
422 0x9510, 0xffffffff, 0x00000100,
423 0xaf04, 0xffffffff, 0x00000100,
424 0xae04, 0xffffffff, 0x00000100,
425 0x949c, 0xffffffff, 0x00000100,
426 0x802c, 0xffffffff, 0xe0000000,
427 0x9160, 0xffffffff, 0x00010000,
428 0x9164, 0xffffffff, 0x00030002,
429 0x9168, 0xffffffff, 0x00040007,
430 0x916c, 0xffffffff, 0x00060005,
431 0x9170, 0xffffffff, 0x00090008,
432 0x9174, 0xffffffff, 0x00020001,
433 0x9178, 0xffffffff, 0x00040003,
434 0x917c, 0xffffffff, 0x00000007,
435 0x9180, 0xffffffff, 0x00060005,
436 0x9184, 0xffffffff, 0x00090008,
437 0x9188, 0xffffffff, 0x00030002,
438 0x918c, 0xffffffff, 0x00050004,
439 0x9190, 0xffffffff, 0x00000008,
440 0x9194, 0xffffffff, 0x00070006,
441 0x9198, 0xffffffff, 0x000a0009,
442 0x919c, 0xffffffff, 0x00040003,
443 0x91a0, 0xffffffff, 0x00060005,
444 0x91a4, 0xffffffff, 0x00000009,
445 0x91a8, 0xffffffff, 0x00080007,
446 0x91ac, 0xffffffff, 0x000b000a,
447 0x91b0, 0xffffffff, 0x00050004,
448 0x91b4, 0xffffffff, 0x00070006,
449 0x91b8, 0xffffffff, 0x0008000b,
450 0x91bc, 0xffffffff, 0x000a0009,
451 0x91c0, 0xffffffff, 0x000d000c,
452 0x9200, 0xffffffff, 0x00090008,
453 0x9204, 0xffffffff, 0x000b000a,
454 0x9208, 0xffffffff, 0x000c000f,
455 0x920c, 0xffffffff, 0x000e000d,
456 0x9210, 0xffffffff, 0x00110010,
457 0x9214, 0xffffffff, 0x000a0009,
458 0x9218, 0xffffffff, 0x000c000b,
459 0x921c, 0xffffffff, 0x0000000f,
460 0x9220, 0xffffffff, 0x000e000d,
461 0x9224, 0xffffffff, 0x00110010,
462 0x9228, 0xffffffff, 0x000b000a,
463 0x922c, 0xffffffff, 0x000d000c,
464 0x9230, 0xffffffff, 0x00000010,
465 0x9234, 0xffffffff, 0x000f000e,
466 0x9238, 0xffffffff, 0x00120011,
467 0x923c, 0xffffffff, 0x000c000b,
468 0x9240, 0xffffffff, 0x000e000d,
469 0x9244, 0xffffffff, 0x00000011,
470 0x9248, 0xffffffff, 0x0010000f,
471 0x924c, 0xffffffff, 0x00130012,
472 0x9250, 0xffffffff, 0x000d000c,
473 0x9254, 0xffffffff, 0x000f000e,
474 0x9258, 0xffffffff, 0x00100013,
475 0x925c, 0xffffffff, 0x00120011,
476 0x9260, 0xffffffff, 0x00150014,
477 0x9150, 0xffffffff, 0x96940200,
478 0x8708, 0xffffffff, 0x00900100,
479 0xc478, 0xffffffff, 0x00000080,
480 0xc404, 0xffffffff, 0x0020003f,
481 0x30, 0xffffffff, 0x0000001c,
482 0x34, 0x000f0000, 0x000f0000,
483 0x160c, 0xffffffff, 0x00000100,
484 0x1024, 0xffffffff, 0x00000100,
485 0x102c, 0x00000101, 0x00000000,
486 0x20a8, 0xffffffff, 0x00000104,
487 0x55e4, 0xff000fff, 0x00000100,
488 0x55e8, 0x00000001, 0x00000001,
489 0x2f50, 0x00000001, 0x00000001,
490 0x30cc, 0xc0000fff, 0x00000104,
491 0xc1e4, 0x00000001, 0x00000001,
492 0xd0c0, 0xfffffff0, 0x00000100,
493 0xd8c0, 0xfffffff0, 0x00000100
494 };
495
496 static const u32 verde_mgcg_cgcg_init[] =
497 {
498 0xc400, 0xffffffff, 0xfffffffc,
499 0x802c, 0xffffffff, 0xe0000000,
500 0x9a60, 0xffffffff, 0x00000100,
501 0x92a4, 0xffffffff, 0x00000100,
502 0xc164, 0xffffffff, 0x00000100,
503 0x9774, 0xffffffff, 0x00000100,
504 0x8984, 0xffffffff, 0x06000100,
505 0x8a18, 0xffffffff, 0x00000100,
506 0x92a0, 0xffffffff, 0x00000100,
507 0xc380, 0xffffffff, 0x00000100,
508 0x8b28, 0xffffffff, 0x00000100,
509 0x9144, 0xffffffff, 0x00000100,
510 0x8d88, 0xffffffff, 0x00000100,
511 0x8d8c, 0xffffffff, 0x00000100,
512 0x9030, 0xffffffff, 0x00000100,
513 0x9034, 0xffffffff, 0x00000100,
514 0x9038, 0xffffffff, 0x00000100,
515 0x903c, 0xffffffff, 0x00000100,
516 0xad80, 0xffffffff, 0x00000100,
517 0xac54, 0xffffffff, 0x00000100,
518 0x897c, 0xffffffff, 0x06000100,
519 0x9868, 0xffffffff, 0x00000100,
520 0x9510, 0xffffffff, 0x00000100,
521 0xaf04, 0xffffffff, 0x00000100,
522 0xae04, 0xffffffff, 0x00000100,
523 0x949c, 0xffffffff, 0x00000100,
524 0x802c, 0xffffffff, 0xe0000000,
525 0x9160, 0xffffffff, 0x00010000,
526 0x9164, 0xffffffff, 0x00030002,
527 0x9168, 0xffffffff, 0x00040007,
528 0x916c, 0xffffffff, 0x00060005,
529 0x9170, 0xffffffff, 0x00090008,
530 0x9174, 0xffffffff, 0x00020001,
531 0x9178, 0xffffffff, 0x00040003,
532 0x917c, 0xffffffff, 0x00000007,
533 0x9180, 0xffffffff, 0x00060005,
534 0x9184, 0xffffffff, 0x00090008,
535 0x9188, 0xffffffff, 0x00030002,
536 0x918c, 0xffffffff, 0x00050004,
537 0x9190, 0xffffffff, 0x00000008,
538 0x9194, 0xffffffff, 0x00070006,
539 0x9198, 0xffffffff, 0x000a0009,
540 0x919c, 0xffffffff, 0x00040003,
541 0x91a0, 0xffffffff, 0x00060005,
542 0x91a4, 0xffffffff, 0x00000009,
543 0x91a8, 0xffffffff, 0x00080007,
544 0x91ac, 0xffffffff, 0x000b000a,
545 0x91b0, 0xffffffff, 0x00050004,
546 0x91b4, 0xffffffff, 0x00070006,
547 0x91b8, 0xffffffff, 0x0008000b,
548 0x91bc, 0xffffffff, 0x000a0009,
549 0x91c0, 0xffffffff, 0x000d000c,
550 0x9200, 0xffffffff, 0x00090008,
551 0x9204, 0xffffffff, 0x000b000a,
552 0x9208, 0xffffffff, 0x000c000f,
553 0x920c, 0xffffffff, 0x000e000d,
554 0x9210, 0xffffffff, 0x00110010,
555 0x9214, 0xffffffff, 0x000a0009,
556 0x9218, 0xffffffff, 0x000c000b,
557 0x921c, 0xffffffff, 0x0000000f,
558 0x9220, 0xffffffff, 0x000e000d,
559 0x9224, 0xffffffff, 0x00110010,
560 0x9228, 0xffffffff, 0x000b000a,
561 0x922c, 0xffffffff, 0x000d000c,
562 0x9230, 0xffffffff, 0x00000010,
563 0x9234, 0xffffffff, 0x000f000e,
564 0x9238, 0xffffffff, 0x00120011,
565 0x923c, 0xffffffff, 0x000c000b,
566 0x9240, 0xffffffff, 0x000e000d,
567 0x9244, 0xffffffff, 0x00000011,
568 0x9248, 0xffffffff, 0x0010000f,
569 0x924c, 0xffffffff, 0x00130012,
570 0x9250, 0xffffffff, 0x000d000c,
571 0x9254, 0xffffffff, 0x000f000e,
572 0x9258, 0xffffffff, 0x00100013,
573 0x925c, 0xffffffff, 0x00120011,
574 0x9260, 0xffffffff, 0x00150014,
575 0x9150, 0xffffffff, 0x96940200,
576 0x8708, 0xffffffff, 0x00900100,
577 0xc478, 0xffffffff, 0x00000080,
578 0xc404, 0xffffffff, 0x0020003f,
579 0x30, 0xffffffff, 0x0000001c,
580 0x34, 0x000f0000, 0x000f0000,
581 0x160c, 0xffffffff, 0x00000100,
582 0x1024, 0xffffffff, 0x00000100,
583 0x102c, 0x00000101, 0x00000000,
584 0x20a8, 0xffffffff, 0x00000104,
585 0x264c, 0x000c0000, 0x000c0000,
586 0x2648, 0x000c0000, 0x000c0000,
587 0x55e4, 0xff000fff, 0x00000100,
588 0x55e8, 0x00000001, 0x00000001,
589 0x2f50, 0x00000001, 0x00000001,
590 0x30cc, 0xc0000fff, 0x00000104,
591 0xc1e4, 0x00000001, 0x00000001,
592 0xd0c0, 0xfffffff0, 0x00000100,
593 0xd8c0, 0xfffffff0, 0x00000100
594 };
595
596 static const u32 oland_mgcg_cgcg_init[] =
597 {
598 0xc400, 0xffffffff, 0xfffffffc,
599 0x802c, 0xffffffff, 0xe0000000,
600 0x9a60, 0xffffffff, 0x00000100,
601 0x92a4, 0xffffffff, 0x00000100,
602 0xc164, 0xffffffff, 0x00000100,
603 0x9774, 0xffffffff, 0x00000100,
604 0x8984, 0xffffffff, 0x06000100,
605 0x8a18, 0xffffffff, 0x00000100,
606 0x92a0, 0xffffffff, 0x00000100,
607 0xc380, 0xffffffff, 0x00000100,
608 0x8b28, 0xffffffff, 0x00000100,
609 0x9144, 0xffffffff, 0x00000100,
610 0x8d88, 0xffffffff, 0x00000100,
611 0x8d8c, 0xffffffff, 0x00000100,
612 0x9030, 0xffffffff, 0x00000100,
613 0x9034, 0xffffffff, 0x00000100,
614 0x9038, 0xffffffff, 0x00000100,
615 0x903c, 0xffffffff, 0x00000100,
616 0xad80, 0xffffffff, 0x00000100,
617 0xac54, 0xffffffff, 0x00000100,
618 0x897c, 0xffffffff, 0x06000100,
619 0x9868, 0xffffffff, 0x00000100,
620 0x9510, 0xffffffff, 0x00000100,
621 0xaf04, 0xffffffff, 0x00000100,
622 0xae04, 0xffffffff, 0x00000100,
623 0x949c, 0xffffffff, 0x00000100,
624 0x802c, 0xffffffff, 0xe0000000,
625 0x9160, 0xffffffff, 0x00010000,
626 0x9164, 0xffffffff, 0x00030002,
627 0x9168, 0xffffffff, 0x00040007,
628 0x916c, 0xffffffff, 0x00060005,
629 0x9170, 0xffffffff, 0x00090008,
630 0x9174, 0xffffffff, 0x00020001,
631 0x9178, 0xffffffff, 0x00040003,
632 0x917c, 0xffffffff, 0x00000007,
633 0x9180, 0xffffffff, 0x00060005,
634 0x9184, 0xffffffff, 0x00090008,
635 0x9188, 0xffffffff, 0x00030002,
636 0x918c, 0xffffffff, 0x00050004,
637 0x9190, 0xffffffff, 0x00000008,
638 0x9194, 0xffffffff, 0x00070006,
639 0x9198, 0xffffffff, 0x000a0009,
640 0x919c, 0xffffffff, 0x00040003,
641 0x91a0, 0xffffffff, 0x00060005,
642 0x91a4, 0xffffffff, 0x00000009,
643 0x91a8, 0xffffffff, 0x00080007,
644 0x91ac, 0xffffffff, 0x000b000a,
645 0x91b0, 0xffffffff, 0x00050004,
646 0x91b4, 0xffffffff, 0x00070006,
647 0x91b8, 0xffffffff, 0x0008000b,
648 0x91bc, 0xffffffff, 0x000a0009,
649 0x91c0, 0xffffffff, 0x000d000c,
650 0x91c4, 0xffffffff, 0x00060005,
651 0x91c8, 0xffffffff, 0x00080007,
652 0x91cc, 0xffffffff, 0x0000000b,
653 0x91d0, 0xffffffff, 0x000a0009,
654 0x91d4, 0xffffffff, 0x000d000c,
655 0x9150, 0xffffffff, 0x96940200,
656 0x8708, 0xffffffff, 0x00900100,
657 0xc478, 0xffffffff, 0x00000080,
658 0xc404, 0xffffffff, 0x0020003f,
659 0x30, 0xffffffff, 0x0000001c,
660 0x34, 0x000f0000, 0x000f0000,
661 0x160c, 0xffffffff, 0x00000100,
662 0x1024, 0xffffffff, 0x00000100,
663 0x102c, 0x00000101, 0x00000000,
664 0x20a8, 0xffffffff, 0x00000104,
665 0x264c, 0x000c0000, 0x000c0000,
666 0x2648, 0x000c0000, 0x000c0000,
667 0x55e4, 0xff000fff, 0x00000100,
668 0x55e8, 0x00000001, 0x00000001,
669 0x2f50, 0x00000001, 0x00000001,
670 0x30cc, 0xc0000fff, 0x00000104,
671 0xc1e4, 0x00000001, 0x00000001,
672 0xd0c0, 0xfffffff0, 0x00000100,
673 0xd8c0, 0xfffffff0, 0x00000100
674 };
675
676 static u32 verde_pg_init[] =
677 {
678 0x353c, 0xffffffff, 0x40000,
679 0x3538, 0xffffffff, 0x200010ff,
680 0x353c, 0xffffffff, 0x0,
681 0x353c, 0xffffffff, 0x0,
682 0x353c, 0xffffffff, 0x0,
683 0x353c, 0xffffffff, 0x0,
684 0x353c, 0xffffffff, 0x0,
685 0x353c, 0xffffffff, 0x7007,
686 0x3538, 0xffffffff, 0x300010ff,
687 0x353c, 0xffffffff, 0x0,
688 0x353c, 0xffffffff, 0x0,
689 0x353c, 0xffffffff, 0x0,
690 0x353c, 0xffffffff, 0x0,
691 0x353c, 0xffffffff, 0x0,
692 0x353c, 0xffffffff, 0x400000,
693 0x3538, 0xffffffff, 0x100010ff,
694 0x353c, 0xffffffff, 0x0,
695 0x353c, 0xffffffff, 0x0,
696 0x353c, 0xffffffff, 0x0,
697 0x353c, 0xffffffff, 0x0,
698 0x353c, 0xffffffff, 0x0,
699 0x353c, 0xffffffff, 0x120200,
700 0x3538, 0xffffffff, 0x500010ff,
701 0x353c, 0xffffffff, 0x0,
702 0x353c, 0xffffffff, 0x0,
703 0x353c, 0xffffffff, 0x0,
704 0x353c, 0xffffffff, 0x0,
705 0x353c, 0xffffffff, 0x0,
706 0x353c, 0xffffffff, 0x1e1e16,
707 0x3538, 0xffffffff, 0x600010ff,
708 0x353c, 0xffffffff, 0x0,
709 0x353c, 0xffffffff, 0x0,
710 0x353c, 0xffffffff, 0x0,
711 0x353c, 0xffffffff, 0x0,
712 0x353c, 0xffffffff, 0x0,
713 0x353c, 0xffffffff, 0x171f1e,
714 0x3538, 0xffffffff, 0x700010ff,
715 0x353c, 0xffffffff, 0x0,
716 0x353c, 0xffffffff, 0x0,
717 0x353c, 0xffffffff, 0x0,
718 0x353c, 0xffffffff, 0x0,
719 0x353c, 0xffffffff, 0x0,
720 0x353c, 0xffffffff, 0x0,
721 0x3538, 0xffffffff, 0x9ff,
722 0x3500, 0xffffffff, 0x0,
723 0x3504, 0xffffffff, 0x10000800,
724 0x3504, 0xffffffff, 0xf,
725 0x3504, 0xffffffff, 0xf,
726 0x3500, 0xffffffff, 0x4,
727 0x3504, 0xffffffff, 0x1000051e,
728 0x3504, 0xffffffff, 0xffff,
729 0x3504, 0xffffffff, 0xffff,
730 0x3500, 0xffffffff, 0x8,
731 0x3504, 0xffffffff, 0x80500,
732 0x3500, 0xffffffff, 0x12,
733 0x3504, 0xffffffff, 0x9050c,
734 0x3500, 0xffffffff, 0x1d,
735 0x3504, 0xffffffff, 0xb052c,
736 0x3500, 0xffffffff, 0x2a,
737 0x3504, 0xffffffff, 0x1053e,
738 0x3500, 0xffffffff, 0x2d,
739 0x3504, 0xffffffff, 0x10546,
740 0x3500, 0xffffffff, 0x30,
741 0x3504, 0xffffffff, 0xa054e,
742 0x3500, 0xffffffff, 0x3c,
743 0x3504, 0xffffffff, 0x1055f,
744 0x3500, 0xffffffff, 0x3f,
745 0x3504, 0xffffffff, 0x10567,
746 0x3500, 0xffffffff, 0x42,
747 0x3504, 0xffffffff, 0x1056f,
748 0x3500, 0xffffffff, 0x45,
749 0x3504, 0xffffffff, 0x10572,
750 0x3500, 0xffffffff, 0x48,
751 0x3504, 0xffffffff, 0x20575,
752 0x3500, 0xffffffff, 0x4c,
753 0x3504, 0xffffffff, 0x190801,
754 0x3500, 0xffffffff, 0x67,
755 0x3504, 0xffffffff, 0x1082a,
756 0x3500, 0xffffffff, 0x6a,
757 0x3504, 0xffffffff, 0x1b082d,
758 0x3500, 0xffffffff, 0x87,
759 0x3504, 0xffffffff, 0x310851,
760 0x3500, 0xffffffff, 0xba,
761 0x3504, 0xffffffff, 0x891,
762 0x3500, 0xffffffff, 0xbc,
763 0x3504, 0xffffffff, 0x893,
764 0x3500, 0xffffffff, 0xbe,
765 0x3504, 0xffffffff, 0x20895,
766 0x3500, 0xffffffff, 0xc2,
767 0x3504, 0xffffffff, 0x20899,
768 0x3500, 0xffffffff, 0xc6,
769 0x3504, 0xffffffff, 0x2089d,
770 0x3500, 0xffffffff, 0xca,
771 0x3504, 0xffffffff, 0x8a1,
772 0x3500, 0xffffffff, 0xcc,
773 0x3504, 0xffffffff, 0x8a3,
774 0x3500, 0xffffffff, 0xce,
775 0x3504, 0xffffffff, 0x308a5,
776 0x3500, 0xffffffff, 0xd3,
777 0x3504, 0xffffffff, 0x6d08cd,
778 0x3500, 0xffffffff, 0x142,
779 0x3504, 0xffffffff, 0x2000095a,
780 0x3504, 0xffffffff, 0x1,
781 0x3500, 0xffffffff, 0x144,
782 0x3504, 0xffffffff, 0x301f095b,
783 0x3500, 0xffffffff, 0x165,
784 0x3504, 0xffffffff, 0xc094d,
785 0x3500, 0xffffffff, 0x173,
786 0x3504, 0xffffffff, 0xf096d,
787 0x3500, 0xffffffff, 0x184,
788 0x3504, 0xffffffff, 0x15097f,
789 0x3500, 0xffffffff, 0x19b,
790 0x3504, 0xffffffff, 0xc0998,
791 0x3500, 0xffffffff, 0x1a9,
792 0x3504, 0xffffffff, 0x409a7,
793 0x3500, 0xffffffff, 0x1af,
794 0x3504, 0xffffffff, 0xcdc,
795 0x3500, 0xffffffff, 0x1b1,
796 0x3504, 0xffffffff, 0x800,
797 0x3508, 0xffffffff, 0x6c9b2000,
798 0x3510, 0xfc00, 0x2000,
799 0x3544, 0xffffffff, 0xfc0,
800 0x28d4, 0x00000100, 0x100
801 };
802
803 static void si_init_golden_registers(struct radeon_device *rdev)
804 {
805 switch (rdev->family) {
806 case CHIP_TAHITI:
807 radeon_program_register_sequence(rdev,
808 tahiti_golden_registers,
809 (const u32)ARRAY_SIZE(tahiti_golden_registers));
810 radeon_program_register_sequence(rdev,
811 tahiti_golden_rlc_registers,
812 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
813 radeon_program_register_sequence(rdev,
814 tahiti_mgcg_cgcg_init,
815 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
816 radeon_program_register_sequence(rdev,
817 tahiti_golden_registers2,
818 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
819 break;
820 case CHIP_PITCAIRN:
821 radeon_program_register_sequence(rdev,
822 pitcairn_golden_registers,
823 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
824 radeon_program_register_sequence(rdev,
825 pitcairn_golden_rlc_registers,
826 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
827 radeon_program_register_sequence(rdev,
828 pitcairn_mgcg_cgcg_init,
829 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
830 break;
831 case CHIP_VERDE:
832 radeon_program_register_sequence(rdev,
833 verde_golden_registers,
834 (const u32)ARRAY_SIZE(verde_golden_registers));
835 radeon_program_register_sequence(rdev,
836 verde_golden_rlc_registers,
837 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
838 radeon_program_register_sequence(rdev,
839 verde_mgcg_cgcg_init,
840 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
841 radeon_program_register_sequence(rdev,
842 verde_pg_init,
843 (const u32)ARRAY_SIZE(verde_pg_init));
844 break;
845 case CHIP_OLAND:
846 radeon_program_register_sequence(rdev,
847 oland_golden_registers,
848 (const u32)ARRAY_SIZE(oland_golden_registers));
849 radeon_program_register_sequence(rdev,
850 oland_golden_rlc_registers,
851 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
852 radeon_program_register_sequence(rdev,
853 oland_mgcg_cgcg_init,
854 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
855 break;
856 default:
857 break;
858 }
859 }
860
861 #define PCIE_BUS_CLK 10000
862 #define TCLK (PCIE_BUS_CLK / 10)
863
864 /**
865 * si_get_xclk - get the xclk
866 *
867 * @rdev: radeon_device pointer
868 *
869 * Returns the reference clock used by the gfx engine
870 * (SI).
871 */
872 u32 si_get_xclk(struct radeon_device *rdev)
873 {
874 u32 reference_clock = rdev->clock.spll.reference_freq;
875 u32 tmp;
876
877 tmp = RREG32(CG_CLKPIN_CNTL_2);
878 if (tmp & MUX_TCLK_TO_XCLK)
879 return TCLK;
880
881 tmp = RREG32(CG_CLKPIN_CNTL);
882 if (tmp & XTALIN_DIVIDE)
883 return reference_clock / 4;
884
885 return reference_clock;
886 }
887
888 /* get temperature in millidegrees */
889 int si_get_temp(struct radeon_device *rdev)
890 {
891 u32 temp;
892 int actual_temp = 0;
893
894 temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
895 CTF_TEMP_SHIFT;
896
897 if (temp & 0x200)
898 actual_temp = 255;
899 else
900 actual_temp = temp & 0x1ff;
901
902 actual_temp = (actual_temp * 1000);
903
904 return actual_temp;
905 }
906
907 #define TAHITI_IO_MC_REGS_SIZE 36
908
909 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
910 {0x0000006f, 0x03044000},
911 {0x00000070, 0x0480c018},
912 {0x00000071, 0x00000040},
913 {0x00000072, 0x01000000},
914 {0x00000074, 0x000000ff},
915 {0x00000075, 0x00143400},
916 {0x00000076, 0x08ec0800},
917 {0x00000077, 0x040000cc},
918 {0x00000079, 0x00000000},
919 {0x0000007a, 0x21000409},
920 {0x0000007c, 0x00000000},
921 {0x0000007d, 0xe8000000},
922 {0x0000007e, 0x044408a8},
923 {0x0000007f, 0x00000003},
924 {0x00000080, 0x00000000},
925 {0x00000081, 0x01000000},
926 {0x00000082, 0x02000000},
927 {0x00000083, 0x00000000},
928 {0x00000084, 0xe3f3e4f4},
929 {0x00000085, 0x00052024},
930 {0x00000087, 0x00000000},
931 {0x00000088, 0x66036603},
932 {0x00000089, 0x01000000},
933 {0x0000008b, 0x1c0a0000},
934 {0x0000008c, 0xff010000},
935 {0x0000008e, 0xffffefff},
936 {0x0000008f, 0xfff3efff},
937 {0x00000090, 0xfff3efbf},
938 {0x00000094, 0x00101101},
939 {0x00000095, 0x00000fff},
940 {0x00000096, 0x00116fff},
941 {0x00000097, 0x60010000},
942 {0x00000098, 0x10010000},
943 {0x00000099, 0x00006000},
944 {0x0000009a, 0x00001000},
945 {0x0000009f, 0x00a77400}
946 };
947
948 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
949 {0x0000006f, 0x03044000},
950 {0x00000070, 0x0480c018},
951 {0x00000071, 0x00000040},
952 {0x00000072, 0x01000000},
953 {0x00000074, 0x000000ff},
954 {0x00000075, 0x00143400},
955 {0x00000076, 0x08ec0800},
956 {0x00000077, 0x040000cc},
957 {0x00000079, 0x00000000},
958 {0x0000007a, 0x21000409},
959 {0x0000007c, 0x00000000},
960 {0x0000007d, 0xe8000000},
961 {0x0000007e, 0x044408a8},
962 {0x0000007f, 0x00000003},
963 {0x00000080, 0x00000000},
964 {0x00000081, 0x01000000},
965 {0x00000082, 0x02000000},
966 {0x00000083, 0x00000000},
967 {0x00000084, 0xe3f3e4f4},
968 {0x00000085, 0x00052024},
969 {0x00000087, 0x00000000},
970 {0x00000088, 0x66036603},
971 {0x00000089, 0x01000000},
972 {0x0000008b, 0x1c0a0000},
973 {0x0000008c, 0xff010000},
974 {0x0000008e, 0xffffefff},
975 {0x0000008f, 0xfff3efff},
976 {0x00000090, 0xfff3efbf},
977 {0x00000094, 0x00101101},
978 {0x00000095, 0x00000fff},
979 {0x00000096, 0x00116fff},
980 {0x00000097, 0x60010000},
981 {0x00000098, 0x10010000},
982 {0x00000099, 0x00006000},
983 {0x0000009a, 0x00001000},
984 {0x0000009f, 0x00a47400}
985 };
986
987 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
988 {0x0000006f, 0x03044000},
989 {0x00000070, 0x0480c018},
990 {0x00000071, 0x00000040},
991 {0x00000072, 0x01000000},
992 {0x00000074, 0x000000ff},
993 {0x00000075, 0x00143400},
994 {0x00000076, 0x08ec0800},
995 {0x00000077, 0x040000cc},
996 {0x00000079, 0x00000000},
997 {0x0000007a, 0x21000409},
998 {0x0000007c, 0x00000000},
999 {0x0000007d, 0xe8000000},
1000 {0x0000007e, 0x044408a8},
1001 {0x0000007f, 0x00000003},
1002 {0x00000080, 0x00000000},
1003 {0x00000081, 0x01000000},
1004 {0x00000082, 0x02000000},
1005 {0x00000083, 0x00000000},
1006 {0x00000084, 0xe3f3e4f4},
1007 {0x00000085, 0x00052024},
1008 {0x00000087, 0x00000000},
1009 {0x00000088, 0x66036603},
1010 {0x00000089, 0x01000000},
1011 {0x0000008b, 0x1c0a0000},
1012 {0x0000008c, 0xff010000},
1013 {0x0000008e, 0xffffefff},
1014 {0x0000008f, 0xfff3efff},
1015 {0x00000090, 0xfff3efbf},
1016 {0x00000094, 0x00101101},
1017 {0x00000095, 0x00000fff},
1018 {0x00000096, 0x00116fff},
1019 {0x00000097, 0x60010000},
1020 {0x00000098, 0x10010000},
1021 {0x00000099, 0x00006000},
1022 {0x0000009a, 0x00001000},
1023 {0x0000009f, 0x00a37400}
1024 };
1025
1026 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1027 {0x0000006f, 0x03044000},
1028 {0x00000070, 0x0480c018},
1029 {0x00000071, 0x00000040},
1030 {0x00000072, 0x01000000},
1031 {0x00000074, 0x000000ff},
1032 {0x00000075, 0x00143400},
1033 {0x00000076, 0x08ec0800},
1034 {0x00000077, 0x040000cc},
1035 {0x00000079, 0x00000000},
1036 {0x0000007a, 0x21000409},
1037 {0x0000007c, 0x00000000},
1038 {0x0000007d, 0xe8000000},
1039 {0x0000007e, 0x044408a8},
1040 {0x0000007f, 0x00000003},
1041 {0x00000080, 0x00000000},
1042 {0x00000081, 0x01000000},
1043 {0x00000082, 0x02000000},
1044 {0x00000083, 0x00000000},
1045 {0x00000084, 0xe3f3e4f4},
1046 {0x00000085, 0x00052024},
1047 {0x00000087, 0x00000000},
1048 {0x00000088, 0x66036603},
1049 {0x00000089, 0x01000000},
1050 {0x0000008b, 0x1c0a0000},
1051 {0x0000008c, 0xff010000},
1052 {0x0000008e, 0xffffefff},
1053 {0x0000008f, 0xfff3efff},
1054 {0x00000090, 0xfff3efbf},
1055 {0x00000094, 0x00101101},
1056 {0x00000095, 0x00000fff},
1057 {0x00000096, 0x00116fff},
1058 {0x00000097, 0x60010000},
1059 {0x00000098, 0x10010000},
1060 {0x00000099, 0x00006000},
1061 {0x0000009a, 0x00001000},
1062 {0x0000009f, 0x00a17730}
1063 };
1064
1065 /* ucode loading */
1066 static int si_mc_load_microcode(struct radeon_device *rdev)
1067 {
1068 const __be32 *fw_data;
1069 u32 running, blackout = 0;
1070 u32 *io_mc_regs;
1071 int i, ucode_size, regs_size;
1072
1073 if (!rdev->mc_fw)
1074 return -EINVAL;
1075
1076 switch (rdev->family) {
1077 case CHIP_TAHITI:
1078 io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1079 ucode_size = SI_MC_UCODE_SIZE;
1080 regs_size = TAHITI_IO_MC_REGS_SIZE;
1081 break;
1082 case CHIP_PITCAIRN:
1083 io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1084 ucode_size = SI_MC_UCODE_SIZE;
1085 regs_size = TAHITI_IO_MC_REGS_SIZE;
1086 break;
1087 case CHIP_VERDE:
1088 default:
1089 io_mc_regs = (u32 *)&verde_io_mc_regs;
1090 ucode_size = SI_MC_UCODE_SIZE;
1091 regs_size = TAHITI_IO_MC_REGS_SIZE;
1092 break;
1093 case CHIP_OLAND:
1094 io_mc_regs = (u32 *)&oland_io_mc_regs;
1095 ucode_size = OLAND_MC_UCODE_SIZE;
1096 regs_size = TAHITI_IO_MC_REGS_SIZE;
1097 break;
1098 }
1099
1100 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1101
1102 if (running == 0) {
1103 if (running) {
1104 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1105 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1106 }
1107
1108 /* reset the engine and set to writable */
1109 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1110 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1111
1112 /* load mc io regs */
1113 for (i = 0; i < regs_size; i++) {
1114 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1115 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1116 }
1117 /* load the MC ucode */
1118 fw_data = (const __be32 *)rdev->mc_fw->data;
1119 for (i = 0; i < ucode_size; i++)
1120 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1121
1122 /* put the engine back into the active state */
1123 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1124 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1125 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1126
1127 /* wait for training to complete */
1128 for (i = 0; i < rdev->usec_timeout; i++) {
1129 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1130 break;
1131 udelay(1);
1132 }
1133 for (i = 0; i < rdev->usec_timeout; i++) {
1134 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1135 break;
1136 udelay(1);
1137 }
1138
1139 if (running)
1140 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1141 }
1142
1143 return 0;
1144 }
1145
1146 static int si_init_microcode(struct radeon_device *rdev)
1147 {
1148 struct platform_device *pdev;
1149 const char *chip_name;
1150 const char *rlc_chip_name;
1151 size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1152 char fw_name[30];
1153 int err;
1154
1155 DRM_DEBUG("\n");
1156
1157 pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
1158 err = IS_ERR(pdev);
1159 if (err) {
1160 printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
1161 return -EINVAL;
1162 }
1163
1164 switch (rdev->family) {
1165 case CHIP_TAHITI:
1166 chip_name = "TAHITI";
1167 rlc_chip_name = "TAHITI";
1168 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1169 me_req_size = SI_PM4_UCODE_SIZE * 4;
1170 ce_req_size = SI_CE_UCODE_SIZE * 4;
1171 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1172 mc_req_size = SI_MC_UCODE_SIZE * 4;
1173 break;
1174 case CHIP_PITCAIRN:
1175 chip_name = "PITCAIRN";
1176 rlc_chip_name = "PITCAIRN";
1177 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1178 me_req_size = SI_PM4_UCODE_SIZE * 4;
1179 ce_req_size = SI_CE_UCODE_SIZE * 4;
1180 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1181 mc_req_size = SI_MC_UCODE_SIZE * 4;
1182 break;
1183 case CHIP_VERDE:
1184 chip_name = "VERDE";
1185 rlc_chip_name = "VERDE";
1186 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1187 me_req_size = SI_PM4_UCODE_SIZE * 4;
1188 ce_req_size = SI_CE_UCODE_SIZE * 4;
1189 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1190 mc_req_size = SI_MC_UCODE_SIZE * 4;
1191 break;
1192 case CHIP_OLAND:
1193 chip_name = "OLAND";
1194 rlc_chip_name = "OLAND";
1195 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1196 me_req_size = SI_PM4_UCODE_SIZE * 4;
1197 ce_req_size = SI_CE_UCODE_SIZE * 4;
1198 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1199 mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1200 break;
1201 default: BUG();
1202 }
1203
1204 DRM_INFO("Loading %s Microcode\n", chip_name);
1205
1206 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1207 err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
1208 if (err)
1209 goto out;
1210 if (rdev->pfp_fw->size != pfp_req_size) {
1211 printk(KERN_ERR
1212 "si_cp: Bogus length %zu in firmware \"%s\"\n",
1213 rdev->pfp_fw->size, fw_name);
1214 err = -EINVAL;
1215 goto out;
1216 }
1217
1218 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1219 err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
1220 if (err)
1221 goto out;
1222 if (rdev->me_fw->size != me_req_size) {
1223 printk(KERN_ERR
1224 "si_cp: Bogus length %zu in firmware \"%s\"\n",
1225 rdev->me_fw->size, fw_name);
1226 err = -EINVAL;
1227 }
1228
1229 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1230 err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
1231 if (err)
1232 goto out;
1233 if (rdev->ce_fw->size != ce_req_size) {
1234 printk(KERN_ERR
1235 "si_cp: Bogus length %zu in firmware \"%s\"\n",
1236 rdev->ce_fw->size, fw_name);
1237 err = -EINVAL;
1238 }
1239
1240 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1241 err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
1242 if (err)
1243 goto out;
1244 if (rdev->rlc_fw->size != rlc_req_size) {
1245 printk(KERN_ERR
1246 "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1247 rdev->rlc_fw->size, fw_name);
1248 err = -EINVAL;
1249 }
1250
1251 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1252 err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
1253 if (err)
1254 goto out;
1255 if (rdev->mc_fw->size != mc_req_size) {
1256 printk(KERN_ERR
1257 "si_mc: Bogus length %zu in firmware \"%s\"\n",
1258 rdev->mc_fw->size, fw_name);
1259 err = -EINVAL;
1260 }
1261
1262 out:
1263 platform_device_unregister(pdev);
1264
1265 if (err) {
1266 if (err != -EINVAL)
1267 printk(KERN_ERR
1268 "si_cp: Failed to load firmware \"%s\"\n",
1269 fw_name);
1270 release_firmware(rdev->pfp_fw);
1271 rdev->pfp_fw = NULL;
1272 release_firmware(rdev->me_fw);
1273 rdev->me_fw = NULL;
1274 release_firmware(rdev->ce_fw);
1275 rdev->ce_fw = NULL;
1276 release_firmware(rdev->rlc_fw);
1277 rdev->rlc_fw = NULL;
1278 release_firmware(rdev->mc_fw);
1279 rdev->mc_fw = NULL;
1280 }
1281 return err;
1282 }
1283
1284 /* watermark setup */
1285 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1286 struct radeon_crtc *radeon_crtc,
1287 struct drm_display_mode *mode,
1288 struct drm_display_mode *other_mode)
1289 {
1290 u32 tmp;
1291 /*
1292 * Line Buffer Setup
1293 * There are 3 line buffers, each one shared by 2 display controllers.
1294 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1295 * the display controllers. The paritioning is done via one of four
1296 * preset allocations specified in bits 21:20:
1297 * 0 - half lb
1298 * 2 - whole lb, other crtc must be disabled
1299 */
1300 /* this can get tricky if we have two large displays on a paired group
1301 * of crtcs. Ideally for multiple large displays we'd assign them to
1302 * non-linked crtcs for maximum line buffer allocation.
1303 */
1304 if (radeon_crtc->base.enabled && mode) {
1305 if (other_mode)
1306 tmp = 0; /* 1/2 */
1307 else
1308 tmp = 2; /* whole */
1309 } else
1310 tmp = 0;
1311
1312 WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1313 DC_LB_MEMORY_CONFIG(tmp));
1314
1315 if (radeon_crtc->base.enabled && mode) {
1316 switch (tmp) {
1317 case 0:
1318 default:
1319 return 4096 * 2;
1320 case 2:
1321 return 8192 * 2;
1322 }
1323 }
1324
1325 /* controller not enabled, so no lb used */
1326 return 0;
1327 }
1328
1329 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1330 {
1331 u32 tmp = RREG32(MC_SHARED_CHMAP);
1332
1333 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1334 case 0:
1335 default:
1336 return 1;
1337 case 1:
1338 return 2;
1339 case 2:
1340 return 4;
1341 case 3:
1342 return 8;
1343 case 4:
1344 return 3;
1345 case 5:
1346 return 6;
1347 case 6:
1348 return 10;
1349 case 7:
1350 return 12;
1351 case 8:
1352 return 16;
1353 }
1354 }
1355
1356 struct dce6_wm_params {
1357 u32 dram_channels; /* number of dram channels */
1358 u32 yclk; /* bandwidth per dram data pin in kHz */
1359 u32 sclk; /* engine clock in kHz */
1360 u32 disp_clk; /* display clock in kHz */
1361 u32 src_width; /* viewport width */
1362 u32 active_time; /* active display time in ns */
1363 u32 blank_time; /* blank time in ns */
1364 bool interlaced; /* mode is interlaced */
1365 fixed20_12 vsc; /* vertical scale ratio */
1366 u32 num_heads; /* number of active crtcs */
1367 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1368 u32 lb_size; /* line buffer allocated to pipe */
1369 u32 vtaps; /* vertical scaler taps */
1370 };
1371
1372 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1373 {
1374 /* Calculate raw DRAM Bandwidth */
1375 fixed20_12 dram_efficiency; /* 0.7 */
1376 fixed20_12 yclk, dram_channels, bandwidth;
1377 fixed20_12 a;
1378
1379 a.full = dfixed_const(1000);
1380 yclk.full = dfixed_const(wm->yclk);
1381 yclk.full = dfixed_div(yclk, a);
1382 dram_channels.full = dfixed_const(wm->dram_channels * 4);
1383 a.full = dfixed_const(10);
1384 dram_efficiency.full = dfixed_const(7);
1385 dram_efficiency.full = dfixed_div(dram_efficiency, a);
1386 bandwidth.full = dfixed_mul(dram_channels, yclk);
1387 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1388
1389 return dfixed_trunc(bandwidth);
1390 }
1391
1392 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1393 {
1394 /* Calculate DRAM Bandwidth and the part allocated to display. */
1395 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1396 fixed20_12 yclk, dram_channels, bandwidth;
1397 fixed20_12 a;
1398
1399 a.full = dfixed_const(1000);
1400 yclk.full = dfixed_const(wm->yclk);
1401 yclk.full = dfixed_div(yclk, a);
1402 dram_channels.full = dfixed_const(wm->dram_channels * 4);
1403 a.full = dfixed_const(10);
1404 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1405 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1406 bandwidth.full = dfixed_mul(dram_channels, yclk);
1407 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1408
1409 return dfixed_trunc(bandwidth);
1410 }
1411
1412 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1413 {
1414 /* Calculate the display Data return Bandwidth */
1415 fixed20_12 return_efficiency; /* 0.8 */
1416 fixed20_12 sclk, bandwidth;
1417 fixed20_12 a;
1418
1419 a.full = dfixed_const(1000);
1420 sclk.full = dfixed_const(wm->sclk);
1421 sclk.full = dfixed_div(sclk, a);
1422 a.full = dfixed_const(10);
1423 return_efficiency.full = dfixed_const(8);
1424 return_efficiency.full = dfixed_div(return_efficiency, a);
1425 a.full = dfixed_const(32);
1426 bandwidth.full = dfixed_mul(a, sclk);
1427 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1428
1429 return dfixed_trunc(bandwidth);
1430 }
1431
1432 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1433 {
1434 return 32;
1435 }
1436
1437 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1438 {
1439 /* Calculate the DMIF Request Bandwidth */
1440 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1441 fixed20_12 disp_clk, sclk, bandwidth;
1442 fixed20_12 a, b1, b2;
1443 u32 min_bandwidth;
1444
1445 a.full = dfixed_const(1000);
1446 disp_clk.full = dfixed_const(wm->disp_clk);
1447 disp_clk.full = dfixed_div(disp_clk, a);
1448 a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1449 b1.full = dfixed_mul(a, disp_clk);
1450
1451 a.full = dfixed_const(1000);
1452 sclk.full = dfixed_const(wm->sclk);
1453 sclk.full = dfixed_div(sclk, a);
1454 a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1455 b2.full = dfixed_mul(a, sclk);
1456
1457 a.full = dfixed_const(10);
1458 disp_clk_request_efficiency.full = dfixed_const(8);
1459 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1460
1461 min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1462
1463 a.full = dfixed_const(min_bandwidth);
1464 bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1465
1466 return dfixed_trunc(bandwidth);
1467 }
1468
1469 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1470 {
1471 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1472 u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1473 u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1474 u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1475
1476 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1477 }
1478
1479 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1480 {
1481 /* Calculate the display mode Average Bandwidth
1482 * DisplayMode should contain the source and destination dimensions,
1483 * timing, etc.
1484 */
1485 fixed20_12 bpp;
1486 fixed20_12 line_time;
1487 fixed20_12 src_width;
1488 fixed20_12 bandwidth;
1489 fixed20_12 a;
1490
1491 a.full = dfixed_const(1000);
1492 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1493 line_time.full = dfixed_div(line_time, a);
1494 bpp.full = dfixed_const(wm->bytes_per_pixel);
1495 src_width.full = dfixed_const(wm->src_width);
1496 bandwidth.full = dfixed_mul(src_width, bpp);
1497 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1498 bandwidth.full = dfixed_div(bandwidth, line_time);
1499
1500 return dfixed_trunc(bandwidth);
1501 }
1502
1503 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1504 {
1505 /* First calcualte the latency in ns */
1506 u32 mc_latency = 2000; /* 2000 ns. */
1507 u32 available_bandwidth = dce6_available_bandwidth(wm);
1508 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1509 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1510 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1511 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1512 (wm->num_heads * cursor_line_pair_return_time);
1513 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1514 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1515 u32 tmp, dmif_size = 12288;
1516 fixed20_12 a, b, c;
1517
1518 if (wm->num_heads == 0)
1519 return 0;
1520
1521 a.full = dfixed_const(2);
1522 b.full = dfixed_const(1);
1523 if ((wm->vsc.full > a.full) ||
1524 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1525 (wm->vtaps >= 5) ||
1526 ((wm->vsc.full >= a.full) && wm->interlaced))
1527 max_src_lines_per_dst_line = 4;
1528 else
1529 max_src_lines_per_dst_line = 2;
1530
1531 a.full = dfixed_const(available_bandwidth);
1532 b.full = dfixed_const(wm->num_heads);
1533 a.full = dfixed_div(a, b);
1534
1535 b.full = dfixed_const(mc_latency + 512);
1536 c.full = dfixed_const(wm->disp_clk);
1537 b.full = dfixed_div(b, c);
1538
1539 c.full = dfixed_const(dmif_size);
1540 b.full = dfixed_div(c, b);
1541
1542 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1543
1544 b.full = dfixed_const(1000);
1545 c.full = dfixed_const(wm->disp_clk);
1546 b.full = dfixed_div(c, b);
1547 c.full = dfixed_const(wm->bytes_per_pixel);
1548 b.full = dfixed_mul(b, c);
1549
1550 lb_fill_bw = min(tmp, dfixed_trunc(b));
1551
1552 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
1553 b.full = dfixed_const(1000);
1554 c.full = dfixed_const(lb_fill_bw);
1555 b.full = dfixed_div(c, b);
1556 a.full = dfixed_div(a, b);
1557 line_fill_time = dfixed_trunc(a);
1558
1559 if (line_fill_time < wm->active_time)
1560 return latency;
1561 else
1562 return latency + (line_fill_time - wm->active_time);
1563
1564 }
1565
1566 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1567 {
1568 if (dce6_average_bandwidth(wm) <=
1569 (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
1570 return true;
1571 else
1572 return false;
1573 };
1574
1575 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
1576 {
1577 if (dce6_average_bandwidth(wm) <=
1578 (dce6_available_bandwidth(wm) / wm->num_heads))
1579 return true;
1580 else
1581 return false;
1582 };
1583
1584 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
1585 {
1586 u32 lb_partitions = wm->lb_size / wm->src_width;
1587 u32 line_time = wm->active_time + wm->blank_time;
1588 u32 latency_tolerant_lines;
1589 u32 latency_hiding;
1590 fixed20_12 a;
1591
1592 a.full = dfixed_const(1);
1593 if (wm->vsc.full > a.full)
1594 latency_tolerant_lines = 1;
1595 else {
1596 if (lb_partitions <= (wm->vtaps + 1))
1597 latency_tolerant_lines = 1;
1598 else
1599 latency_tolerant_lines = 2;
1600 }
1601
1602 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
1603
1604 if (dce6_latency_watermark(wm) <= latency_hiding)
1605 return true;
1606 else
1607 return false;
1608 }
1609
1610 static void dce6_program_watermarks(struct radeon_device *rdev,
1611 struct radeon_crtc *radeon_crtc,
1612 u32 lb_size, u32 num_heads)
1613 {
1614 struct drm_display_mode *mode = &radeon_crtc->base.mode;
1615 struct dce6_wm_params wm;
1616 u32 pixel_period;
1617 u32 line_time = 0;
1618 u32 latency_watermark_a = 0, latency_watermark_b = 0;
1619 u32 priority_a_mark = 0, priority_b_mark = 0;
1620 u32 priority_a_cnt = PRIORITY_OFF;
1621 u32 priority_b_cnt = PRIORITY_OFF;
1622 u32 tmp, arb_control3;
1623 fixed20_12 a, b, c;
1624
1625 if (radeon_crtc->base.enabled && num_heads && mode) {
1626 pixel_period = 1000000 / (u32)mode->clock;
1627 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
1628 priority_a_cnt = 0;
1629 priority_b_cnt = 0;
1630
1631 wm.yclk = rdev->pm.current_mclk * 10;
1632 wm.sclk = rdev->pm.current_sclk * 10;
1633 wm.disp_clk = mode->clock;
1634 wm.src_width = mode->crtc_hdisplay;
1635 wm.active_time = mode->crtc_hdisplay * pixel_period;
1636 wm.blank_time = line_time - wm.active_time;
1637 wm.interlaced = false;
1638 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
1639 wm.interlaced = true;
1640 wm.vsc = radeon_crtc->vsc;
1641 wm.vtaps = 1;
1642 if (radeon_crtc->rmx_type != RMX_OFF)
1643 wm.vtaps = 2;
1644 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
1645 wm.lb_size = lb_size;
1646 if (rdev->family == CHIP_ARUBA)
1647 wm.dram_channels = evergreen_get_number_of_dram_channels(rdev);
1648 else
1649 wm.dram_channels = si_get_number_of_dram_channels(rdev);
1650 wm.num_heads = num_heads;
1651
1652 /* set for high clocks */
1653 latency_watermark_a = min(dce6_latency_watermark(&wm), (u32)65535);
1654 /* set for low clocks */
1655 /* wm.yclk = low clk; wm.sclk = low clk */
1656 latency_watermark_b = min(dce6_latency_watermark(&wm), (u32)65535);
1657
1658 /* possibly force display priority to high */
1659 /* should really do this at mode validation time... */
1660 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
1661 !dce6_average_bandwidth_vs_available_bandwidth(&wm) ||
1662 !dce6_check_latency_hiding(&wm) ||
1663 (rdev->disp_priority == 2)) {
1664 DRM_DEBUG_KMS("force priority to high\n");
1665 priority_a_cnt |= PRIORITY_ALWAYS_ON;
1666 priority_b_cnt |= PRIORITY_ALWAYS_ON;
1667 }
1668
1669 a.full = dfixed_const(1000);
1670 b.full = dfixed_const(mode->clock);
1671 b.full = dfixed_div(b, a);
1672 c.full = dfixed_const(latency_watermark_a);
1673 c.full = dfixed_mul(c, b);
1674 c.full = dfixed_mul(c, radeon_crtc->hsc);
1675 c.full = dfixed_div(c, a);
1676 a.full = dfixed_const(16);
1677 c.full = dfixed_div(c, a);
1678 priority_a_mark = dfixed_trunc(c);
1679 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
1680
1681 a.full = dfixed_const(1000);
1682 b.full = dfixed_const(mode->clock);
1683 b.full = dfixed_div(b, a);
1684 c.full = dfixed_const(latency_watermark_b);
1685 c.full = dfixed_mul(c, b);
1686 c.full = dfixed_mul(c, radeon_crtc->hsc);
1687 c.full = dfixed_div(c, a);
1688 a.full = dfixed_const(16);
1689 c.full = dfixed_div(c, a);
1690 priority_b_mark = dfixed_trunc(c);
1691 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
1692 }
1693
1694 /* select wm A */
1695 arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
1696 tmp = arb_control3;
1697 tmp &= ~LATENCY_WATERMARK_MASK(3);
1698 tmp |= LATENCY_WATERMARK_MASK(1);
1699 WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
1700 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
1701 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
1702 LATENCY_HIGH_WATERMARK(line_time)));
1703 /* select wm B */
1704 tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
1705 tmp &= ~LATENCY_WATERMARK_MASK(3);
1706 tmp |= LATENCY_WATERMARK_MASK(2);
1707 WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
1708 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
1709 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
1710 LATENCY_HIGH_WATERMARK(line_time)));
1711 /* restore original selection */
1712 WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
1713
1714 /* write the priority marks */
1715 WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
1716 WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
1717
1718 }
1719
1720 void dce6_bandwidth_update(struct radeon_device *rdev)
1721 {
1722 struct drm_display_mode *mode0 = NULL;
1723 struct drm_display_mode *mode1 = NULL;
1724 u32 num_heads = 0, lb_size;
1725 int i;
1726
1727 radeon_update_display_priority(rdev);
1728
1729 for (i = 0; i < rdev->num_crtc; i++) {
1730 if (rdev->mode_info.crtcs[i]->base.enabled)
1731 num_heads++;
1732 }
1733 for (i = 0; i < rdev->num_crtc; i += 2) {
1734 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
1735 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
1736 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
1737 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
1738 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
1739 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
1740 }
1741 }
1742
1743 /*
1744 * Core functions
1745 */
1746 static void si_tiling_mode_table_init(struct radeon_device *rdev)
1747 {
1748 const u32 num_tile_mode_states = 32;
1749 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1750
1751 switch (rdev->config.si.mem_row_size_in_kb) {
1752 case 1:
1753 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1754 break;
1755 case 2:
1756 default:
1757 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1758 break;
1759 case 4:
1760 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1761 break;
1762 }
1763
1764 if ((rdev->family == CHIP_TAHITI) ||
1765 (rdev->family == CHIP_PITCAIRN)) {
1766 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1767 switch (reg_offset) {
1768 case 0: /* non-AA compressed depth or any compressed stencil */
1769 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1770 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1771 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1772 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1773 NUM_BANKS(ADDR_SURF_16_BANK) |
1774 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1775 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1776 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1777 break;
1778 case 1: /* 2xAA/4xAA compressed depth only */
1779 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1780 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1781 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1782 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1783 NUM_BANKS(ADDR_SURF_16_BANK) |
1784 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1785 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1786 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1787 break;
1788 case 2: /* 8xAA compressed depth only */
1789 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1790 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1791 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1792 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1793 NUM_BANKS(ADDR_SURF_16_BANK) |
1794 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1795 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1796 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1797 break;
1798 case 3: /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
1799 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1800 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1801 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1802 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1803 NUM_BANKS(ADDR_SURF_16_BANK) |
1804 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1805 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1806 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1807 break;
1808 case 4: /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
1809 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1810 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1811 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1812 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1813 NUM_BANKS(ADDR_SURF_16_BANK) |
1814 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1815 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1816 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1817 break;
1818 case 5: /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
1819 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1820 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1821 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1822 TILE_SPLIT(split_equal_to_row_size) |
1823 NUM_BANKS(ADDR_SURF_16_BANK) |
1824 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1825 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1826 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1827 break;
1828 case 6: /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
1829 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1830 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1831 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1832 TILE_SPLIT(split_equal_to_row_size) |
1833 NUM_BANKS(ADDR_SURF_16_BANK) |
1834 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1835 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1836 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1837 break;
1838 case 7: /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
1839 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1840 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
1841 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1842 TILE_SPLIT(split_equal_to_row_size) |
1843 NUM_BANKS(ADDR_SURF_16_BANK) |
1844 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1845 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1846 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1847 break;
1848 case 8: /* 1D and 1D Array Surfaces */
1849 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1850 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1851 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1852 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1853 NUM_BANKS(ADDR_SURF_16_BANK) |
1854 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1855 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1856 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1857 break;
1858 case 9: /* Displayable maps. */
1859 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1860 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1861 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1862 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1863 NUM_BANKS(ADDR_SURF_16_BANK) |
1864 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1865 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1866 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1867 break;
1868 case 10: /* Display 8bpp. */
1869 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1870 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1871 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1872 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1873 NUM_BANKS(ADDR_SURF_16_BANK) |
1874 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1875 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1876 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1877 break;
1878 case 11: /* Display 16bpp. */
1879 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1880 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1881 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1882 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1883 NUM_BANKS(ADDR_SURF_16_BANK) |
1884 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1885 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1886 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1887 break;
1888 case 12: /* Display 32bpp. */
1889 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1890 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
1891 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1892 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1893 NUM_BANKS(ADDR_SURF_16_BANK) |
1894 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1895 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1896 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1897 break;
1898 case 13: /* Thin. */
1899 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1900 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1901 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1902 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1903 NUM_BANKS(ADDR_SURF_16_BANK) |
1904 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1905 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1906 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1907 break;
1908 case 14: /* Thin 8 bpp. */
1909 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1910 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1911 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1912 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1913 NUM_BANKS(ADDR_SURF_16_BANK) |
1914 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1915 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1916 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1917 break;
1918 case 15: /* Thin 16 bpp. */
1919 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1920 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1921 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1922 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1923 NUM_BANKS(ADDR_SURF_16_BANK) |
1924 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1925 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1926 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1927 break;
1928 case 16: /* Thin 32 bpp. */
1929 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1930 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1931 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1932 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1933 NUM_BANKS(ADDR_SURF_16_BANK) |
1934 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1935 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1936 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1937 break;
1938 case 17: /* Thin 64 bpp. */
1939 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1940 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1941 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1942 TILE_SPLIT(split_equal_to_row_size) |
1943 NUM_BANKS(ADDR_SURF_16_BANK) |
1944 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1945 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1946 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1947 break;
1948 case 21: /* 8 bpp PRT. */
1949 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1950 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1951 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1952 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1953 NUM_BANKS(ADDR_SURF_16_BANK) |
1954 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1955 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1956 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1957 break;
1958 case 22: /* 16 bpp PRT */
1959 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1960 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1961 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1962 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1963 NUM_BANKS(ADDR_SURF_16_BANK) |
1964 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1965 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1966 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
1967 break;
1968 case 23: /* 32 bpp PRT */
1969 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1970 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1971 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1972 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1973 NUM_BANKS(ADDR_SURF_16_BANK) |
1974 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1975 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1976 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1977 break;
1978 case 24: /* 64 bpp PRT */
1979 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1980 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1981 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1982 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1983 NUM_BANKS(ADDR_SURF_16_BANK) |
1984 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1985 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1986 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
1987 break;
1988 case 25: /* 128 bpp PRT */
1989 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1990 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
1991 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1992 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
1993 NUM_BANKS(ADDR_SURF_8_BANK) |
1994 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1995 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1996 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
1997 break;
1998 default:
1999 gb_tile_moden = 0;
2000 break;
2001 }
2002 rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2003 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2004 }
2005 } else if ((rdev->family == CHIP_VERDE) ||
2006 (rdev->family == CHIP_OLAND)) {
2007 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2008 switch (reg_offset) {
2009 case 0: /* non-AA compressed depth or any compressed stencil */
2010 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2011 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2012 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2013 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2014 NUM_BANKS(ADDR_SURF_16_BANK) |
2015 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2016 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2017 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2018 break;
2019 case 1: /* 2xAA/4xAA compressed depth only */
2020 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2021 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2022 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2023 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2024 NUM_BANKS(ADDR_SURF_16_BANK) |
2025 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2026 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2027 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2028 break;
2029 case 2: /* 8xAA compressed depth only */
2030 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2031 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2032 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2033 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2034 NUM_BANKS(ADDR_SURF_16_BANK) |
2035 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2036 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2037 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2038 break;
2039 case 3: /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2040 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2041 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2042 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2043 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2044 NUM_BANKS(ADDR_SURF_16_BANK) |
2045 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2046 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2047 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2048 break;
2049 case 4: /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2050 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2051 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2052 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2053 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2054 NUM_BANKS(ADDR_SURF_16_BANK) |
2055 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2056 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2057 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2058 break;
2059 case 5: /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2060 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2061 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2062 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2063 TILE_SPLIT(split_equal_to_row_size) |
2064 NUM_BANKS(ADDR_SURF_16_BANK) |
2065 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2066 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2067 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2068 break;
2069 case 6: /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2070 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2071 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2072 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2073 TILE_SPLIT(split_equal_to_row_size) |
2074 NUM_BANKS(ADDR_SURF_16_BANK) |
2075 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2076 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2077 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2078 break;
2079 case 7: /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2080 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2081 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2082 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2083 TILE_SPLIT(split_equal_to_row_size) |
2084 NUM_BANKS(ADDR_SURF_16_BANK) |
2085 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2086 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2087 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2088 break;
2089 case 8: /* 1D and 1D Array Surfaces */
2090 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2091 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2092 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2093 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2094 NUM_BANKS(ADDR_SURF_16_BANK) |
2095 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2096 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2097 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2098 break;
2099 case 9: /* Displayable maps. */
2100 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2101 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2102 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2103 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2104 NUM_BANKS(ADDR_SURF_16_BANK) |
2105 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2106 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2107 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2108 break;
2109 case 10: /* Display 8bpp. */
2110 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2111 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2112 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2113 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2114 NUM_BANKS(ADDR_SURF_16_BANK) |
2115 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2116 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2117 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2118 break;
2119 case 11: /* Display 16bpp. */
2120 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2121 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2122 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2123 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2124 NUM_BANKS(ADDR_SURF_16_BANK) |
2125 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2126 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2127 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2128 break;
2129 case 12: /* Display 32bpp. */
2130 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2131 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2132 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2133 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2134 NUM_BANKS(ADDR_SURF_16_BANK) |
2135 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2136 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2137 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2138 break;
2139 case 13: /* Thin. */
2140 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2141 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2142 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2143 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2144 NUM_BANKS(ADDR_SURF_16_BANK) |
2145 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2146 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2147 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2148 break;
2149 case 14: /* Thin 8 bpp. */
2150 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2151 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2152 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2153 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2154 NUM_BANKS(ADDR_SURF_16_BANK) |
2155 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2156 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2157 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2158 break;
2159 case 15: /* Thin 16 bpp. */
2160 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2161 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2162 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2163 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2164 NUM_BANKS(ADDR_SURF_16_BANK) |
2165 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2166 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2167 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2168 break;
2169 case 16: /* Thin 32 bpp. */
2170 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2171 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2172 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2173 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2174 NUM_BANKS(ADDR_SURF_16_BANK) |
2175 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2176 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2177 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2178 break;
2179 case 17: /* Thin 64 bpp. */
2180 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2181 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2182 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2183 TILE_SPLIT(split_equal_to_row_size) |
2184 NUM_BANKS(ADDR_SURF_16_BANK) |
2185 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2186 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2187 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2188 break;
2189 case 21: /* 8 bpp PRT. */
2190 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2191 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2192 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2193 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2194 NUM_BANKS(ADDR_SURF_16_BANK) |
2195 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2196 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2197 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2198 break;
2199 case 22: /* 16 bpp PRT */
2200 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2201 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2202 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2203 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2204 NUM_BANKS(ADDR_SURF_16_BANK) |
2205 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2206 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2207 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2208 break;
2209 case 23: /* 32 bpp PRT */
2210 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2211 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2212 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2213 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2214 NUM_BANKS(ADDR_SURF_16_BANK) |
2215 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2216 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2217 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2218 break;
2219 case 24: /* 64 bpp PRT */
2220 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2221 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2222 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2223 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2224 NUM_BANKS(ADDR_SURF_16_BANK) |
2225 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2226 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2227 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2228 break;
2229 case 25: /* 128 bpp PRT */
2230 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2231 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2232 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2233 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2234 NUM_BANKS(ADDR_SURF_8_BANK) |
2235 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2236 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2237 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2238 break;
2239 default:
2240 gb_tile_moden = 0;
2241 break;
2242 }
2243 rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2244 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2245 }
2246 } else
2247 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2248 }
2249
2250 static void si_select_se_sh(struct radeon_device *rdev,
2251 u32 se_num, u32 sh_num)
2252 {
2253 u32 data = INSTANCE_BROADCAST_WRITES;
2254
2255 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2256 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2257 else if (se_num == 0xffffffff)
2258 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2259 else if (sh_num == 0xffffffff)
2260 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2261 else
2262 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2263 WREG32(GRBM_GFX_INDEX, data);
2264 }
2265
2266 static u32 si_create_bitmask(u32 bit_width)
2267 {
2268 u32 i, mask = 0;
2269
2270 for (i = 0; i < bit_width; i++) {
2271 mask <<= 1;
2272 mask |= 1;
2273 }
2274 return mask;
2275 }
2276
2277 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2278 {
2279 u32 data, mask;
2280
2281 data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2282 if (data & 1)
2283 data &= INACTIVE_CUS_MASK;
2284 else
2285 data = 0;
2286 data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2287
2288 data >>= INACTIVE_CUS_SHIFT;
2289
2290 mask = si_create_bitmask(cu_per_sh);
2291
2292 return ~data & mask;
2293 }
2294
2295 static void si_setup_spi(struct radeon_device *rdev,
2296 u32 se_num, u32 sh_per_se,
2297 u32 cu_per_sh)
2298 {
2299 int i, j, k;
2300 u32 data, mask, active_cu;
2301
2302 for (i = 0; i < se_num; i++) {
2303 for (j = 0; j < sh_per_se; j++) {
2304 si_select_se_sh(rdev, i, j);
2305 data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2306 active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2307
2308 mask = 1;
2309 for (k = 0; k < 16; k++) {
2310 mask <<= k;
2311 if (active_cu & mask) {
2312 data &= ~mask;
2313 WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2314 break;
2315 }
2316 }
2317 }
2318 }
2319 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2320 }
2321
2322 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2323 u32 max_rb_num, u32 se_num,
2324 u32 sh_per_se)
2325 {
2326 u32 data, mask;
2327
2328 data = RREG32(CC_RB_BACKEND_DISABLE);
2329 if (data & 1)
2330 data &= BACKEND_DISABLE_MASK;
2331 else
2332 data = 0;
2333 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2334
2335 data >>= BACKEND_DISABLE_SHIFT;
2336
2337 mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
2338
2339 return data & mask;
2340 }
2341
2342 static void si_setup_rb(struct radeon_device *rdev,
2343 u32 se_num, u32 sh_per_se,
2344 u32 max_rb_num)
2345 {
2346 int i, j;
2347 u32 data, mask;
2348 u32 disabled_rbs = 0;
2349 u32 enabled_rbs = 0;
2350
2351 for (i = 0; i < se_num; i++) {
2352 for (j = 0; j < sh_per_se; j++) {
2353 si_select_se_sh(rdev, i, j);
2354 data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2355 disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2356 }
2357 }
2358 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2359
2360 mask = 1;
2361 for (i = 0; i < max_rb_num; i++) {
2362 if (!(disabled_rbs & mask))
2363 enabled_rbs |= mask;
2364 mask <<= 1;
2365 }
2366
2367 for (i = 0; i < se_num; i++) {
2368 si_select_se_sh(rdev, i, 0xffffffff);
2369 data = 0;
2370 for (j = 0; j < sh_per_se; j++) {
2371 switch (enabled_rbs & 3) {
2372 case 1:
2373 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2374 break;
2375 case 2:
2376 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2377 break;
2378 case 3:
2379 default:
2380 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2381 break;
2382 }
2383 enabled_rbs >>= 2;
2384 }
2385 WREG32(PA_SC_RASTER_CONFIG, data);
2386 }
2387 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2388 }
2389
2390 static void si_gpu_init(struct radeon_device *rdev)
2391 {
2392 u32 gb_addr_config = 0;
2393 u32 mc_shared_chmap, mc_arb_ramcfg;
2394 u32 sx_debug_1;
2395 u32 hdp_host_path_cntl;
2396 u32 tmp;
2397 int i, j;
2398
2399 switch (rdev->family) {
2400 case CHIP_TAHITI:
2401 rdev->config.si.max_shader_engines = 2;
2402 rdev->config.si.max_tile_pipes = 12;
2403 rdev->config.si.max_cu_per_sh = 8;
2404 rdev->config.si.max_sh_per_se = 2;
2405 rdev->config.si.max_backends_per_se = 4;
2406 rdev->config.si.max_texture_channel_caches = 12;
2407 rdev->config.si.max_gprs = 256;
2408 rdev->config.si.max_gs_threads = 32;
2409 rdev->config.si.max_hw_contexts = 8;
2410
2411 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2412 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2413 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2414 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2415 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2416 break;
2417 case CHIP_PITCAIRN:
2418 rdev->config.si.max_shader_engines = 2;
2419 rdev->config.si.max_tile_pipes = 8;
2420 rdev->config.si.max_cu_per_sh = 5;
2421 rdev->config.si.max_sh_per_se = 2;
2422 rdev->config.si.max_backends_per_se = 4;
2423 rdev->config.si.max_texture_channel_caches = 8;
2424 rdev->config.si.max_gprs = 256;
2425 rdev->config.si.max_gs_threads = 32;
2426 rdev->config.si.max_hw_contexts = 8;
2427
2428 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2429 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2430 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2431 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2432 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2433 break;
2434 case CHIP_VERDE:
2435 default:
2436 rdev->config.si.max_shader_engines = 1;
2437 rdev->config.si.max_tile_pipes = 4;
2438 rdev->config.si.max_cu_per_sh = 2;
2439 rdev->config.si.max_sh_per_se = 2;
2440 rdev->config.si.max_backends_per_se = 4;
2441 rdev->config.si.max_texture_channel_caches = 4;
2442 rdev->config.si.max_gprs = 256;
2443 rdev->config.si.max_gs_threads = 32;
2444 rdev->config.si.max_hw_contexts = 8;
2445
2446 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2447 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2448 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2449 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2450 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2451 break;
2452 case CHIP_OLAND:
2453 rdev->config.si.max_shader_engines = 1;
2454 rdev->config.si.max_tile_pipes = 4;
2455 rdev->config.si.max_cu_per_sh = 6;
2456 rdev->config.si.max_sh_per_se = 1;
2457 rdev->config.si.max_backends_per_se = 2;
2458 rdev->config.si.max_texture_channel_caches = 4;
2459 rdev->config.si.max_gprs = 256;
2460 rdev->config.si.max_gs_threads = 16;
2461 rdev->config.si.max_hw_contexts = 8;
2462
2463 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2464 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2465 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2466 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2467 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2468 break;
2469 }
2470
2471 /* Initialize HDP */
2472 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2473 WREG32((0x2c14 + j), 0x00000000);
2474 WREG32((0x2c18 + j), 0x00000000);
2475 WREG32((0x2c1c + j), 0x00000000);
2476 WREG32((0x2c20 + j), 0x00000000);
2477 WREG32((0x2c24 + j), 0x00000000);
2478 }
2479
2480 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2481
2482 evergreen_fix_pci_max_read_req_size(rdev);
2483
2484 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2485
2486 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2487 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2488
2489 rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
2490 rdev->config.si.mem_max_burst_length_bytes = 256;
2491 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2492 rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2493 if (rdev->config.si.mem_row_size_in_kb > 4)
2494 rdev->config.si.mem_row_size_in_kb = 4;
2495 /* XXX use MC settings? */
2496 rdev->config.si.shader_engine_tile_size = 32;
2497 rdev->config.si.num_gpus = 1;
2498 rdev->config.si.multi_gpu_tile_size = 64;
2499
2500 /* fix up row size */
2501 gb_addr_config &= ~ROW_SIZE_MASK;
2502 switch (rdev->config.si.mem_row_size_in_kb) {
2503 case 1:
2504 default:
2505 gb_addr_config |= ROW_SIZE(0);
2506 break;
2507 case 2:
2508 gb_addr_config |= ROW_SIZE(1);
2509 break;
2510 case 4:
2511 gb_addr_config |= ROW_SIZE(2);
2512 break;
2513 }
2514
2515 /* setup tiling info dword. gb_addr_config is not adequate since it does
2516 * not have bank info, so create a custom tiling dword.
2517 * bits 3:0 num_pipes
2518 * bits 7:4 num_banks
2519 * bits 11:8 group_size
2520 * bits 15:12 row_size
2521 */
2522 rdev->config.si.tile_config = 0;
2523 switch (rdev->config.si.num_tile_pipes) {
2524 case 1:
2525 rdev->config.si.tile_config |= (0 << 0);
2526 break;
2527 case 2:
2528 rdev->config.si.tile_config |= (1 << 0);
2529 break;
2530 case 4:
2531 rdev->config.si.tile_config |= (2 << 0);
2532 break;
2533 case 8:
2534 default:
2535 /* XXX what about 12? */
2536 rdev->config.si.tile_config |= (3 << 0);
2537 break;
2538 }
2539 switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
2540 case 0: /* four banks */
2541 rdev->config.si.tile_config |= 0 << 4;
2542 break;
2543 case 1: /* eight banks */
2544 rdev->config.si.tile_config |= 1 << 4;
2545 break;
2546 case 2: /* sixteen banks */
2547 default:
2548 rdev->config.si.tile_config |= 2 << 4;
2549 break;
2550 }
2551 rdev->config.si.tile_config |=
2552 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2553 rdev->config.si.tile_config |=
2554 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2555
2556 WREG32(GB_ADDR_CONFIG, gb_addr_config);
2557 WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
2558 WREG32(DMIF_ADDR_CALC, gb_addr_config);
2559 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2560 WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
2561 WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
2562 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2563 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2564 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2565
2566 si_tiling_mode_table_init(rdev);
2567
2568 si_setup_rb(rdev, rdev->config.si.max_shader_engines,
2569 rdev->config.si.max_sh_per_se,
2570 rdev->config.si.max_backends_per_se);
2571
2572 si_setup_spi(rdev, rdev->config.si.max_shader_engines,
2573 rdev->config.si.max_sh_per_se,
2574 rdev->config.si.max_cu_per_sh);
2575
2576
2577 /* set HW defaults for 3D engine */
2578 WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
2579 ROQ_IB2_START(0x2b)));
2580 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2581
2582 sx_debug_1 = RREG32(SX_DEBUG_1);
2583 WREG32(SX_DEBUG_1, sx_debug_1);
2584
2585 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2586
2587 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
2588 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
2589 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
2590 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
2591
2592 WREG32(VGT_NUM_INSTANCES, 1);
2593
2594 WREG32(CP_PERFMON_CNTL, 0);
2595
2596 WREG32(SQ_CONFIG, 0);
2597
2598 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2599 FORCE_EOV_MAX_REZ_CNT(255)));
2600
2601 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2602 AUTO_INVLD_EN(ES_AND_GS_AUTO));
2603
2604 WREG32(VGT_GS_VERTEX_REUSE, 16);
2605 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2606
2607 WREG32(CB_PERFCOUNTER0_SELECT0, 0);
2608 WREG32(CB_PERFCOUNTER0_SELECT1, 0);
2609 WREG32(CB_PERFCOUNTER1_SELECT0, 0);
2610 WREG32(CB_PERFCOUNTER1_SELECT1, 0);
2611 WREG32(CB_PERFCOUNTER2_SELECT0, 0);
2612 WREG32(CB_PERFCOUNTER2_SELECT1, 0);
2613 WREG32(CB_PERFCOUNTER3_SELECT0, 0);
2614 WREG32(CB_PERFCOUNTER3_SELECT1, 0);
2615
2616 tmp = RREG32(HDP_MISC_CNTL);
2617 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2618 WREG32(HDP_MISC_CNTL, tmp);
2619
2620 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2621 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2622
2623 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2624
2625 udelay(50);
2626 }
2627
2628 /*
2629 * GPU scratch registers helpers function.
2630 */
2631 static void si_scratch_init(struct radeon_device *rdev)
2632 {
2633 int i;
2634
2635 rdev->scratch.num_reg = 7;
2636 rdev->scratch.reg_base = SCRATCH_REG0;
2637 for (i = 0; i < rdev->scratch.num_reg; i++) {
2638 rdev->scratch.free[i] = true;
2639 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2640 }
2641 }
2642
2643 void si_fence_ring_emit(struct radeon_device *rdev,
2644 struct radeon_fence *fence)
2645 {
2646 struct radeon_ring *ring = &rdev->ring[fence->ring];
2647 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2648
2649 /* flush read cache over gart */
2650 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2651 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
2652 radeon_ring_write(ring, 0);
2653 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
2654 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
2655 PACKET3_TC_ACTION_ENA |
2656 PACKET3_SH_KCACHE_ACTION_ENA |
2657 PACKET3_SH_ICACHE_ACTION_ENA);
2658 radeon_ring_write(ring, 0xFFFFFFFF);
2659 radeon_ring_write(ring, 0);
2660 radeon_ring_write(ring, 10); /* poll interval */
2661 /* EVENT_WRITE_EOP - flush caches, send int */
2662 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2663 radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
2664 radeon_ring_write(ring, addr & 0xffffffff);
2665 radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
2666 radeon_ring_write(ring, fence->seq);
2667 radeon_ring_write(ring, 0);
2668 }
2669
2670 /*
2671 * IB stuff
2672 */
2673 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
2674 {
2675 struct radeon_ring *ring = &rdev->ring[ib->ring];
2676 u32 header;
2677
2678 if (ib->is_const_ib) {
2679 /* set switch buffer packet before const IB */
2680 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2681 radeon_ring_write(ring, 0);
2682
2683 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2684 } else {
2685 u32 next_rptr;
2686 if (ring->rptr_save_reg) {
2687 next_rptr = ring->wptr + 3 + 4 + 8;
2688 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2689 radeon_ring_write(ring, ((ring->rptr_save_reg -
2690 PACKET3_SET_CONFIG_REG_START) >> 2));
2691 radeon_ring_write(ring, next_rptr);
2692 } else if (rdev->wb.enabled) {
2693 next_rptr = ring->wptr + 5 + 4 + 8;
2694 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2695 radeon_ring_write(ring, (1 << 8));
2696 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2697 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2698 radeon_ring_write(ring, next_rptr);
2699 }
2700
2701 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2702 }
2703
2704 radeon_ring_write(ring, header);
2705 radeon_ring_write(ring,
2706 #ifdef __BIG_ENDIAN
2707 (2 << 0) |
2708 #endif
2709 (ib->gpu_addr & 0xFFFFFFFC));
2710 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2711 radeon_ring_write(ring, ib->length_dw |
2712 (ib->vm ? (ib->vm->id << 24) : 0));
2713
2714 if (!ib->is_const_ib) {
2715 /* flush read cache over gart for this vmid */
2716 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2717 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
2718 radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
2719 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
2720 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
2721 PACKET3_TC_ACTION_ENA |
2722 PACKET3_SH_KCACHE_ACTION_ENA |
2723 PACKET3_SH_ICACHE_ACTION_ENA);
2724 radeon_ring_write(ring, 0xFFFFFFFF);
2725 radeon_ring_write(ring, 0);
2726 radeon_ring_write(ring, 10); /* poll interval */
2727 }
2728 }
2729
2730 /*
2731 * CP.
2732 */
2733 static void si_cp_enable(struct radeon_device *rdev, bool enable)
2734 {
2735 if (enable)
2736 WREG32(CP_ME_CNTL, 0);
2737 else {
2738 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
2739 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
2740 WREG32(SCRATCH_UMSK, 0);
2741 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2742 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
2743 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
2744 }
2745 udelay(50);
2746 }
2747
2748 static int si_cp_load_microcode(struct radeon_device *rdev)
2749 {
2750 const __be32 *fw_data;
2751 int i;
2752
2753 if (!rdev->me_fw || !rdev->pfp_fw)
2754 return -EINVAL;
2755
2756 si_cp_enable(rdev, false);
2757
2758 /* PFP */
2759 fw_data = (const __be32 *)rdev->pfp_fw->data;
2760 WREG32(CP_PFP_UCODE_ADDR, 0);
2761 for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
2762 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
2763 WREG32(CP_PFP_UCODE_ADDR, 0);
2764
2765 /* CE */
2766 fw_data = (const __be32 *)rdev->ce_fw->data;
2767 WREG32(CP_CE_UCODE_ADDR, 0);
2768 for (i = 0; i < SI_CE_UCODE_SIZE; i++)
2769 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
2770 WREG32(CP_CE_UCODE_ADDR, 0);
2771
2772 /* ME */
2773 fw_data = (const __be32 *)rdev->me_fw->data;
2774 WREG32(CP_ME_RAM_WADDR, 0);
2775 for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
2776 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
2777 WREG32(CP_ME_RAM_WADDR, 0);
2778
2779 WREG32(CP_PFP_UCODE_ADDR, 0);
2780 WREG32(CP_CE_UCODE_ADDR, 0);
2781 WREG32(CP_ME_RAM_WADDR, 0);
2782 WREG32(CP_ME_RAM_RADDR, 0);
2783 return 0;
2784 }
2785
2786 static int si_cp_start(struct radeon_device *rdev)
2787 {
2788 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2789 int r, i;
2790
2791 r = radeon_ring_lock(rdev, ring, 7 + 4);
2792 if (r) {
2793 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2794 return r;
2795 }
2796 /* init the CP */
2797 radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
2798 radeon_ring_write(ring, 0x1);
2799 radeon_ring_write(ring, 0x0);
2800 radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
2801 radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
2802 radeon_ring_write(ring, 0);
2803 radeon_ring_write(ring, 0);
2804
2805 /* init the CE partitions */
2806 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2807 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2808 radeon_ring_write(ring, 0xc000);
2809 radeon_ring_write(ring, 0xe000);
2810 radeon_ring_unlock_commit(rdev, ring);
2811
2812 si_cp_enable(rdev, true);
2813
2814 r = radeon_ring_lock(rdev, ring, si_default_size + 10);
2815 if (r) {
2816 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2817 return r;
2818 }
2819
2820 /* setup clear context state */
2821 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2822 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2823
2824 for (i = 0; i < si_default_size; i++)
2825 radeon_ring_write(ring, si_default_state[i]);
2826
2827 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2828 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2829
2830 /* set clear context state */
2831 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2832 radeon_ring_write(ring, 0);
2833
2834 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2835 radeon_ring_write(ring, 0x00000316);
2836 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2837 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2838
2839 radeon_ring_unlock_commit(rdev, ring);
2840
2841 for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
2842 ring = &rdev->ring[i];
2843 r = radeon_ring_lock(rdev, ring, 2);
2844
2845 /* clear the compute context state */
2846 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
2847 radeon_ring_write(ring, 0);
2848
2849 radeon_ring_unlock_commit(rdev, ring);
2850 }
2851
2852 return 0;
2853 }
2854
2855 static void si_cp_fini(struct radeon_device *rdev)
2856 {
2857 struct radeon_ring *ring;
2858 si_cp_enable(rdev, false);
2859
2860 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2861 radeon_ring_fini(rdev, ring);
2862 radeon_scratch_free(rdev, ring->rptr_save_reg);
2863
2864 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
2865 radeon_ring_fini(rdev, ring);
2866 radeon_scratch_free(rdev, ring->rptr_save_reg);
2867
2868 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
2869 radeon_ring_fini(rdev, ring);
2870 radeon_scratch_free(rdev, ring->rptr_save_reg);
2871 }
2872
2873 static int si_cp_resume(struct radeon_device *rdev)
2874 {
2875 struct radeon_ring *ring;
2876 u32 tmp;
2877 u32 rb_bufsz;
2878 int r;
2879
2880 /* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
2881 WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
2882 SOFT_RESET_PA |
2883 SOFT_RESET_VGT |
2884 SOFT_RESET_SPI |
2885 SOFT_RESET_SX));
2886 RREG32(GRBM_SOFT_RESET);
2887 mdelay(15);
2888 WREG32(GRBM_SOFT_RESET, 0);
2889 RREG32(GRBM_SOFT_RESET);
2890
2891 WREG32(CP_SEM_WAIT_TIMER, 0x0);
2892 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2893
2894 /* Set the write pointer delay */
2895 WREG32(CP_RB_WPTR_DELAY, 0);
2896
2897 WREG32(CP_DEBUG, 0);
2898 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2899
2900 /* ring 0 - compute and gfx */
2901 /* Set ring buffer size */
2902 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2903 rb_bufsz = drm_order(ring->ring_size / 8);
2904 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2905 #ifdef __BIG_ENDIAN
2906 tmp |= BUF_SWAP_32BIT;
2907 #endif
2908 WREG32(CP_RB0_CNTL, tmp);
2909
2910 /* Initialize the ring buffer's read and write pointers */
2911 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2912 ring->wptr = 0;
2913 WREG32(CP_RB0_WPTR, ring->wptr);
2914
2915 /* set the wb address whether it's enabled or not */
2916 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2917 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2918
2919 if (rdev->wb.enabled)
2920 WREG32(SCRATCH_UMSK, 0xff);
2921 else {
2922 tmp |= RB_NO_UPDATE;
2923 WREG32(SCRATCH_UMSK, 0);
2924 }
2925
2926 mdelay(1);
2927 WREG32(CP_RB0_CNTL, tmp);
2928
2929 WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
2930
2931 ring->rptr = RREG32(CP_RB0_RPTR);
2932
2933 /* ring1 - compute only */
2934 /* Set ring buffer size */
2935 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
2936 rb_bufsz = drm_order(ring->ring_size / 8);
2937 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2938 #ifdef __BIG_ENDIAN
2939 tmp |= BUF_SWAP_32BIT;
2940 #endif
2941 WREG32(CP_RB1_CNTL, tmp);
2942
2943 /* Initialize the ring buffer's read and write pointers */
2944 WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
2945 ring->wptr = 0;
2946 WREG32(CP_RB1_WPTR, ring->wptr);
2947
2948 /* set the wb address whether it's enabled or not */
2949 WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
2950 WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
2951
2952 mdelay(1);
2953 WREG32(CP_RB1_CNTL, tmp);
2954
2955 WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
2956
2957 ring->rptr = RREG32(CP_RB1_RPTR);
2958
2959 /* ring2 - compute only */
2960 /* Set ring buffer size */
2961 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
2962 rb_bufsz = drm_order(ring->ring_size / 8);
2963 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2964 #ifdef __BIG_ENDIAN
2965 tmp |= BUF_SWAP_32BIT;
2966 #endif
2967 WREG32(CP_RB2_CNTL, tmp);
2968
2969 /* Initialize the ring buffer's read and write pointers */
2970 WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
2971 ring->wptr = 0;
2972 WREG32(CP_RB2_WPTR, ring->wptr);
2973
2974 /* set the wb address whether it's enabled or not */
2975 WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
2976 WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
2977
2978 mdelay(1);
2979 WREG32(CP_RB2_CNTL, tmp);
2980
2981 WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
2982
2983 ring->rptr = RREG32(CP_RB2_RPTR);
2984
2985 /* start the rings */
2986 si_cp_start(rdev);
2987 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2988 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
2989 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
2990 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2991 if (r) {
2992 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2993 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
2994 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
2995 return r;
2996 }
2997 r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
2998 if (r) {
2999 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3000 }
3001 r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3002 if (r) {
3003 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3004 }
3005
3006 return 0;
3007 }
3008
3009 static u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3010 {
3011 u32 reset_mask = 0;
3012 u32 tmp;
3013
3014 /* GRBM_STATUS */
3015 tmp = RREG32(GRBM_STATUS);
3016 if (tmp & (PA_BUSY | SC_BUSY |
3017 BCI_BUSY | SX_BUSY |
3018 TA_BUSY | VGT_BUSY |
3019 DB_BUSY | CB_BUSY |
3020 GDS_BUSY | SPI_BUSY |
3021 IA_BUSY | IA_BUSY_NO_DMA))
3022 reset_mask |= RADEON_RESET_GFX;
3023
3024 if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3025 CP_BUSY | CP_COHERENCY_BUSY))
3026 reset_mask |= RADEON_RESET_CP;
3027
3028 if (tmp & GRBM_EE_BUSY)
3029 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3030
3031 /* GRBM_STATUS2 */
3032 tmp = RREG32(GRBM_STATUS2);
3033 if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3034 reset_mask |= RADEON_RESET_RLC;
3035
3036 /* DMA_STATUS_REG 0 */
3037 tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3038 if (!(tmp & DMA_IDLE))
3039 reset_mask |= RADEON_RESET_DMA;
3040
3041 /* DMA_STATUS_REG 1 */
3042 tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3043 if (!(tmp & DMA_IDLE))
3044 reset_mask |= RADEON_RESET_DMA1;
3045
3046 /* SRBM_STATUS2 */
3047 tmp = RREG32(SRBM_STATUS2);
3048 if (tmp & DMA_BUSY)
3049 reset_mask |= RADEON_RESET_DMA;
3050
3051 if (tmp & DMA1_BUSY)
3052 reset_mask |= RADEON_RESET_DMA1;
3053
3054 /* SRBM_STATUS */
3055 tmp = RREG32(SRBM_STATUS);
3056
3057 if (tmp & IH_BUSY)
3058 reset_mask |= RADEON_RESET_IH;
3059
3060 if (tmp & SEM_BUSY)
3061 reset_mask |= RADEON_RESET_SEM;
3062
3063 if (tmp & GRBM_RQ_PENDING)
3064 reset_mask |= RADEON_RESET_GRBM;
3065
3066 if (tmp & VMC_BUSY)
3067 reset_mask |= RADEON_RESET_VMC;
3068
3069 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3070 MCC_BUSY | MCD_BUSY))
3071 reset_mask |= RADEON_RESET_MC;
3072
3073 if (evergreen_is_display_hung(rdev))
3074 reset_mask |= RADEON_RESET_DISPLAY;
3075
3076 /* VM_L2_STATUS */
3077 tmp = RREG32(VM_L2_STATUS);
3078 if (tmp & L2_BUSY)
3079 reset_mask |= RADEON_RESET_VMC;
3080
3081 /* Skip MC reset as it's mostly likely not hung, just busy */
3082 if (reset_mask & RADEON_RESET_MC) {
3083 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3084 reset_mask &= ~RADEON_RESET_MC;
3085 }
3086
3087 return reset_mask;
3088 }
3089
3090 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3091 {
3092 struct evergreen_mc_save save;
3093 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3094 u32 tmp;
3095
3096 if (reset_mask == 0)
3097 return;
3098
3099 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3100
3101 evergreen_print_gpu_status_regs(rdev);
3102 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
3103 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3104 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3105 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3106
3107 /* Disable CP parsing/prefetching */
3108 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3109
3110 if (reset_mask & RADEON_RESET_DMA) {
3111 /* dma0 */
3112 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3113 tmp &= ~DMA_RB_ENABLE;
3114 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3115 }
3116 if (reset_mask & RADEON_RESET_DMA1) {
3117 /* dma1 */
3118 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3119 tmp &= ~DMA_RB_ENABLE;
3120 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3121 }
3122
3123 udelay(50);
3124
3125 evergreen_mc_stop(rdev, &save);
3126 if (evergreen_mc_wait_for_idle(rdev)) {
3127 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3128 }
3129
3130 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3131 grbm_soft_reset = SOFT_RESET_CB |
3132 SOFT_RESET_DB |
3133 SOFT_RESET_GDS |
3134 SOFT_RESET_PA |
3135 SOFT_RESET_SC |
3136 SOFT_RESET_BCI |
3137 SOFT_RESET_SPI |
3138 SOFT_RESET_SX |
3139 SOFT_RESET_TC |
3140 SOFT_RESET_TA |
3141 SOFT_RESET_VGT |
3142 SOFT_RESET_IA;
3143 }
3144
3145 if (reset_mask & RADEON_RESET_CP) {
3146 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3147
3148 srbm_soft_reset |= SOFT_RESET_GRBM;
3149 }
3150
3151 if (reset_mask & RADEON_RESET_DMA)
3152 srbm_soft_reset |= SOFT_RESET_DMA;
3153
3154 if (reset_mask & RADEON_RESET_DMA1)
3155 srbm_soft_reset |= SOFT_RESET_DMA1;
3156
3157 if (reset_mask & RADEON_RESET_DISPLAY)
3158 srbm_soft_reset |= SOFT_RESET_DC;
3159
3160 if (reset_mask & RADEON_RESET_RLC)
3161 grbm_soft_reset |= SOFT_RESET_RLC;
3162
3163 if (reset_mask & RADEON_RESET_SEM)
3164 srbm_soft_reset |= SOFT_RESET_SEM;
3165
3166 if (reset_mask & RADEON_RESET_IH)
3167 srbm_soft_reset |= SOFT_RESET_IH;
3168
3169 if (reset_mask & RADEON_RESET_GRBM)
3170 srbm_soft_reset |= SOFT_RESET_GRBM;
3171
3172 if (reset_mask & RADEON_RESET_VMC)
3173 srbm_soft_reset |= SOFT_RESET_VMC;
3174
3175 if (reset_mask & RADEON_RESET_MC)
3176 srbm_soft_reset |= SOFT_RESET_MC;
3177
3178 if (grbm_soft_reset) {
3179 tmp = RREG32(GRBM_SOFT_RESET);
3180 tmp |= grbm_soft_reset;
3181 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3182 WREG32(GRBM_SOFT_RESET, tmp);
3183 tmp = RREG32(GRBM_SOFT_RESET);
3184
3185 udelay(50);
3186
3187 tmp &= ~grbm_soft_reset;
3188 WREG32(GRBM_SOFT_RESET, tmp);
3189 tmp = RREG32(GRBM_SOFT_RESET);
3190 }
3191
3192 if (srbm_soft_reset) {
3193 tmp = RREG32(SRBM_SOFT_RESET);
3194 tmp |= srbm_soft_reset;
3195 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3196 WREG32(SRBM_SOFT_RESET, tmp);
3197 tmp = RREG32(SRBM_SOFT_RESET);
3198
3199 udelay(50);
3200
3201 tmp &= ~srbm_soft_reset;
3202 WREG32(SRBM_SOFT_RESET, tmp);
3203 tmp = RREG32(SRBM_SOFT_RESET);
3204 }
3205
3206 /* Wait a little for things to settle down */
3207 udelay(50);
3208
3209 evergreen_mc_resume(rdev, &save);
3210 udelay(50);
3211
3212 evergreen_print_gpu_status_regs(rdev);
3213 }
3214
3215 int si_asic_reset(struct radeon_device *rdev)
3216 {
3217 u32 reset_mask;
3218
3219 reset_mask = si_gpu_check_soft_reset(rdev);
3220
3221 if (reset_mask)
3222 r600_set_bios_scratch_engine_hung(rdev, true);
3223
3224 si_gpu_soft_reset(rdev, reset_mask);
3225
3226 reset_mask = si_gpu_check_soft_reset(rdev);
3227
3228 if (!reset_mask)
3229 r600_set_bios_scratch_engine_hung(rdev, false);
3230
3231 return 0;
3232 }
3233
3234 /**
3235 * si_gfx_is_lockup - Check if the GFX engine is locked up
3236 *
3237 * @rdev: radeon_device pointer
3238 * @ring: radeon_ring structure holding ring information
3239 *
3240 * Check if the GFX engine is locked up.
3241 * Returns true if the engine appears to be locked up, false if not.
3242 */
3243 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3244 {
3245 u32 reset_mask = si_gpu_check_soft_reset(rdev);
3246
3247 if (!(reset_mask & (RADEON_RESET_GFX |
3248 RADEON_RESET_COMPUTE |
3249 RADEON_RESET_CP))) {
3250 radeon_ring_lockup_update(ring);
3251 return false;
3252 }
3253 /* force CP activities */
3254 radeon_ring_force_activity(rdev, ring);
3255 return radeon_ring_test_lockup(rdev, ring);
3256 }
3257
3258 /**
3259 * si_dma_is_lockup - Check if the DMA engine is locked up
3260 *
3261 * @rdev: radeon_device pointer
3262 * @ring: radeon_ring structure holding ring information
3263 *
3264 * Check if the async DMA engine is locked up.
3265 * Returns true if the engine appears to be locked up, false if not.
3266 */
3267 bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3268 {
3269 u32 reset_mask = si_gpu_check_soft_reset(rdev);
3270 u32 mask;
3271
3272 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
3273 mask = RADEON_RESET_DMA;
3274 else
3275 mask = RADEON_RESET_DMA1;
3276
3277 if (!(reset_mask & mask)) {
3278 radeon_ring_lockup_update(ring);
3279 return false;
3280 }
3281 /* force ring activities */
3282 radeon_ring_force_activity(rdev, ring);
3283 return radeon_ring_test_lockup(rdev, ring);
3284 }
3285
3286 /* MC */
3287 static void si_mc_program(struct radeon_device *rdev)
3288 {
3289 struct evergreen_mc_save save;
3290 u32 tmp;
3291 int i, j;
3292
3293 /* Initialize HDP */
3294 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3295 WREG32((0x2c14 + j), 0x00000000);
3296 WREG32((0x2c18 + j), 0x00000000);
3297 WREG32((0x2c1c + j), 0x00000000);
3298 WREG32((0x2c20 + j), 0x00000000);
3299 WREG32((0x2c24 + j), 0x00000000);
3300 }
3301 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3302
3303 evergreen_mc_stop(rdev, &save);
3304 if (radeon_mc_wait_for_idle(rdev)) {
3305 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3306 }
3307 /* Lockout access through VGA aperture*/
3308 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3309 /* Update configuration */
3310 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3311 rdev->mc.vram_start >> 12);
3312 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3313 rdev->mc.vram_end >> 12);
3314 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3315 rdev->vram_scratch.gpu_addr >> 12);
3316 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3317 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3318 WREG32(MC_VM_FB_LOCATION, tmp);
3319 /* XXX double check these! */
3320 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3321 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3322 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3323 WREG32(MC_VM_AGP_BASE, 0);
3324 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3325 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3326 if (radeon_mc_wait_for_idle(rdev)) {
3327 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3328 }
3329 evergreen_mc_resume(rdev, &save);
3330 /* we need to own VRAM, so turn off the VGA renderer here
3331 * to stop it overwriting our objects */
3332 rv515_vga_render_disable(rdev);
3333 }
3334
3335 static void si_vram_gtt_location(struct radeon_device *rdev,
3336 struct radeon_mc *mc)
3337 {
3338 if (mc->mc_vram_size > 0xFFC0000000ULL) {
3339 /* leave room for at least 1024M GTT */
3340 dev_warn(rdev->dev, "limiting VRAM\n");
3341 mc->real_vram_size = 0xFFC0000000ULL;
3342 mc->mc_vram_size = 0xFFC0000000ULL;
3343 }
3344 radeon_vram_location(rdev, &rdev->mc, 0);
3345 rdev->mc.gtt_base_align = 0;
3346 radeon_gtt_location(rdev, mc);
3347 }
3348
3349 static int si_mc_init(struct radeon_device *rdev)
3350 {
3351 u32 tmp;
3352 int chansize, numchan;
3353
3354 /* Get VRAM informations */
3355 rdev->mc.vram_is_ddr = true;
3356 tmp = RREG32(MC_ARB_RAMCFG);
3357 if (tmp & CHANSIZE_OVERRIDE) {
3358 chansize = 16;
3359 } else if (tmp & CHANSIZE_MASK) {
3360 chansize = 64;
3361 } else {
3362 chansize = 32;
3363 }
3364 tmp = RREG32(MC_SHARED_CHMAP);
3365 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3366 case 0:
3367 default:
3368 numchan = 1;
3369 break;
3370 case 1:
3371 numchan = 2;
3372 break;
3373 case 2:
3374 numchan = 4;
3375 break;
3376 case 3:
3377 numchan = 8;
3378 break;
3379 case 4:
3380 numchan = 3;
3381 break;
3382 case 5:
3383 numchan = 6;
3384 break;
3385 case 6:
3386 numchan = 10;
3387 break;
3388 case 7:
3389 numchan = 12;
3390 break;
3391 case 8:
3392 numchan = 16;
3393 break;
3394 }
3395 rdev->mc.vram_width = numchan * chansize;
3396 /* Could aper size report 0 ? */
3397 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3398 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3399 /* size in MB on si */
3400 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
3401 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
3402 rdev->mc.visible_vram_size = rdev->mc.aper_size;
3403 si_vram_gtt_location(rdev, &rdev->mc);
3404 radeon_update_bandwidth_info(rdev);
3405
3406 return 0;
3407 }
3408
3409 /*
3410 * GART
3411 */
3412 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
3413 {
3414 /* flush hdp cache */
3415 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
3416
3417 /* bits 0-15 are the VM contexts0-15 */
3418 WREG32(VM_INVALIDATE_REQUEST, 1);
3419 }
3420
3421 static int si_pcie_gart_enable(struct radeon_device *rdev)
3422 {
3423 int r, i;
3424
3425 if (rdev->gart.robj == NULL) {
3426 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3427 return -EINVAL;
3428 }
3429 r = radeon_gart_table_vram_pin(rdev);
3430 if (r)
3431 return r;
3432 radeon_gart_restore(rdev);
3433 /* Setup TLB control */
3434 WREG32(MC_VM_MX_L1_TLB_CNTL,
3435 (0xA << 7) |
3436 ENABLE_L1_TLB |
3437 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3438 ENABLE_ADVANCED_DRIVER_MODEL |
3439 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3440 /* Setup L2 cache */
3441 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3442 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3443 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3444 EFFECTIVE_L2_QUEUE_SIZE(7) |
3445 CONTEXT1_IDENTITY_ACCESS_MODE(1));
3446 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3447 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3448 L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3449 /* setup context0 */
3450 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3451 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3452 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3453 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3454 (u32)(rdev->dummy_page.addr >> 12));
3455 WREG32(VM_CONTEXT0_CNTL2, 0);
3456 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3457 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3458
3459 WREG32(0x15D4, 0);
3460 WREG32(0x15D8, 0);
3461 WREG32(0x15DC, 0);
3462
3463 /* empty context1-15 */
3464 /* set vm size, must be a multiple of 4 */
3465 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3466 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3467 /* Assign the pt base to something valid for now; the pts used for
3468 * the VMs are determined by the application and setup and assigned
3469 * on the fly in the vm part of radeon_gart.c
3470 */
3471 for (i = 1; i < 16; i++) {
3472 if (i < 8)
3473 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3474 rdev->gart.table_addr >> 12);
3475 else
3476 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3477 rdev->gart.table_addr >> 12);
3478 }
3479
3480 /* enable context1-15 */
3481 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3482 (u32)(rdev->dummy_page.addr >> 12));
3483 WREG32(VM_CONTEXT1_CNTL2, 4);
3484 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3485 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3486 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3487 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3488 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3489 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3490 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3491 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3492 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3493 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3494 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3495 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3496 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
3497
3498 si_pcie_gart_tlb_flush(rdev);
3499 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3500 (unsigned)(rdev->mc.gtt_size >> 20),
3501 (unsigned long long)rdev->gart.table_addr);
3502 rdev->gart.ready = true;
3503 return 0;
3504 }
3505
3506 static void si_pcie_gart_disable(struct radeon_device *rdev)
3507 {
3508 /* Disable all tables */
3509 WREG32(VM_CONTEXT0_CNTL, 0);
3510 WREG32(VM_CONTEXT1_CNTL, 0);
3511 /* Setup TLB control */
3512 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3513 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3514 /* Setup L2 cache */
3515 WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3516 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3517 EFFECTIVE_L2_QUEUE_SIZE(7) |
3518 CONTEXT1_IDENTITY_ACCESS_MODE(1));
3519 WREG32(VM_L2_CNTL2, 0);
3520 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3521 L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3522 radeon_gart_table_vram_unpin(rdev);
3523 }
3524
3525 static void si_pcie_gart_fini(struct radeon_device *rdev)
3526 {
3527 si_pcie_gart_disable(rdev);
3528 radeon_gart_table_vram_free(rdev);
3529 radeon_gart_fini(rdev);
3530 }
3531
3532 /* vm parser */
3533 static bool si_vm_reg_valid(u32 reg)
3534 {
3535 /* context regs are fine */
3536 if (reg >= 0x28000)
3537 return true;
3538
3539 /* check config regs */
3540 switch (reg) {
3541 case GRBM_GFX_INDEX:
3542 case CP_STRMOUT_CNTL:
3543 case VGT_VTX_VECT_EJECT_REG:
3544 case VGT_CACHE_INVALIDATION:
3545 case VGT_ESGS_RING_SIZE:
3546 case VGT_GSVS_RING_SIZE:
3547 case VGT_GS_VERTEX_REUSE:
3548 case VGT_PRIMITIVE_TYPE:
3549 case VGT_INDEX_TYPE:
3550 case VGT_NUM_INDICES:
3551 case VGT_NUM_INSTANCES:
3552 case VGT_TF_RING_SIZE:
3553 case VGT_HS_OFFCHIP_PARAM:
3554 case VGT_TF_MEMORY_BASE:
3555 case PA_CL_ENHANCE:
3556 case PA_SU_LINE_STIPPLE_VALUE:
3557 case PA_SC_LINE_STIPPLE_STATE:
3558 case PA_SC_ENHANCE:
3559 case SQC_CACHES:
3560 case SPI_STATIC_THREAD_MGMT_1:
3561 case SPI_STATIC_THREAD_MGMT_2:
3562 case SPI_STATIC_THREAD_MGMT_3:
3563 case SPI_PS_MAX_WAVE_ID:
3564 case SPI_CONFIG_CNTL:
3565 case SPI_CONFIG_CNTL_1:
3566 case TA_CNTL_AUX:
3567 return true;
3568 default:
3569 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
3570 return false;
3571 }
3572 }
3573
3574 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
3575 u32 *ib, struct radeon_cs_packet *pkt)
3576 {
3577 switch (pkt->opcode) {
3578 case PACKET3_NOP:
3579 case PACKET3_SET_BASE:
3580 case PACKET3_SET_CE_DE_COUNTERS:
3581 case PACKET3_LOAD_CONST_RAM:
3582 case PACKET3_WRITE_CONST_RAM:
3583 case PACKET3_WRITE_CONST_RAM_OFFSET:
3584 case PACKET3_DUMP_CONST_RAM:
3585 case PACKET3_INCREMENT_CE_COUNTER:
3586 case PACKET3_WAIT_ON_DE_COUNTER:
3587 case PACKET3_CE_WRITE:
3588 break;
3589 default:
3590 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
3591 return -EINVAL;
3592 }
3593 return 0;
3594 }
3595
3596 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
3597 u32 *ib, struct radeon_cs_packet *pkt)
3598 {
3599 u32 idx = pkt->idx + 1;
3600 u32 idx_value = ib[idx];
3601 u32 start_reg, end_reg, reg, i;
3602 u32 command, info;
3603
3604 switch (pkt->opcode) {
3605 case PACKET3_NOP:
3606 case PACKET3_SET_BASE:
3607 case PACKET3_CLEAR_STATE:
3608 case PACKET3_INDEX_BUFFER_SIZE:
3609 case PACKET3_DISPATCH_DIRECT:
3610 case PACKET3_DISPATCH_INDIRECT:
3611 case PACKET3_ALLOC_GDS:
3612 case PACKET3_WRITE_GDS_RAM:
3613 case PACKET3_ATOMIC_GDS:
3614 case PACKET3_ATOMIC:
3615 case PACKET3_OCCLUSION_QUERY:
3616 case PACKET3_SET_PREDICATION:
3617 case PACKET3_COND_EXEC:
3618 case PACKET3_PRED_EXEC:
3619 case PACKET3_DRAW_INDIRECT:
3620 case PACKET3_DRAW_INDEX_INDIRECT:
3621 case PACKET3_INDEX_BASE:
3622 case PACKET3_DRAW_INDEX_2:
3623 case PACKET3_CONTEXT_CONTROL:
3624 case PACKET3_INDEX_TYPE:
3625 case PACKET3_DRAW_INDIRECT_MULTI:
3626 case PACKET3_DRAW_INDEX_AUTO:
3627 case PACKET3_DRAW_INDEX_IMMD:
3628 case PACKET3_NUM_INSTANCES:
3629 case PACKET3_DRAW_INDEX_MULTI_AUTO:
3630 case PACKET3_STRMOUT_BUFFER_UPDATE:
3631 case PACKET3_DRAW_INDEX_OFFSET_2:
3632 case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
3633 case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
3634 case PACKET3_MPEG_INDEX:
3635 case PACKET3_WAIT_REG_MEM:
3636 case PACKET3_MEM_WRITE:
3637 case PACKET3_PFP_SYNC_ME:
3638 case PACKET3_SURFACE_SYNC:
3639 case PACKET3_EVENT_WRITE:
3640 case PACKET3_EVENT_WRITE_EOP:
3641 case PACKET3_EVENT_WRITE_EOS:
3642 case PACKET3_SET_CONTEXT_REG:
3643 case PACKET3_SET_CONTEXT_REG_INDIRECT:
3644 case PACKET3_SET_SH_REG:
3645 case PACKET3_SET_SH_REG_OFFSET:
3646 case PACKET3_INCREMENT_DE_COUNTER:
3647 case PACKET3_WAIT_ON_CE_COUNTER:
3648 case PACKET3_WAIT_ON_AVAIL_BUFFER:
3649 case PACKET3_ME_WRITE:
3650 break;
3651 case PACKET3_COPY_DATA:
3652 if ((idx_value & 0xf00) == 0) {
3653 reg = ib[idx + 3] * 4;
3654 if (!si_vm_reg_valid(reg))
3655 return -EINVAL;
3656 }
3657 break;
3658 case PACKET3_WRITE_DATA:
3659 if ((idx_value & 0xf00) == 0) {
3660 start_reg = ib[idx + 1] * 4;
3661 if (idx_value & 0x10000) {
3662 if (!si_vm_reg_valid(start_reg))
3663 return -EINVAL;
3664 } else {
3665 for (i = 0; i < (pkt->count - 2); i++) {
3666 reg = start_reg + (4 * i);
3667 if (!si_vm_reg_valid(reg))
3668 return -EINVAL;
3669 }
3670 }
3671 }
3672 break;
3673 case PACKET3_COND_WRITE:
3674 if (idx_value & 0x100) {
3675 reg = ib[idx + 5] * 4;
3676 if (!si_vm_reg_valid(reg))
3677 return -EINVAL;
3678 }
3679 break;
3680 case PACKET3_COPY_DW:
3681 if (idx_value & 0x2) {
3682 reg = ib[idx + 3] * 4;
3683 if (!si_vm_reg_valid(reg))
3684 return -EINVAL;
3685 }
3686 break;
3687 case PACKET3_SET_CONFIG_REG:
3688 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
3689 end_reg = 4 * pkt->count + start_reg - 4;
3690 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
3691 (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
3692 (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
3693 DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
3694 return -EINVAL;
3695 }
3696 for (i = 0; i < pkt->count; i++) {
3697 reg = start_reg + (4 * i);
3698 if (!si_vm_reg_valid(reg))
3699 return -EINVAL;
3700 }
3701 break;
3702 case PACKET3_CP_DMA:
3703 command = ib[idx + 4];
3704 info = ib[idx + 1];
3705 if (command & PACKET3_CP_DMA_CMD_SAS) {
3706 /* src address space is register */
3707 if (((info & 0x60000000) >> 29) == 0) {
3708 start_reg = idx_value << 2;
3709 if (command & PACKET3_CP_DMA_CMD_SAIC) {
3710 reg = start_reg;
3711 if (!si_vm_reg_valid(reg)) {
3712 DRM_ERROR("CP DMA Bad SRC register\n");
3713 return -EINVAL;
3714 }
3715 } else {
3716 for (i = 0; i < (command & 0x1fffff); i++) {
3717 reg = start_reg + (4 * i);
3718 if (!si_vm_reg_valid(reg)) {
3719 DRM_ERROR("CP DMA Bad SRC register\n");
3720 return -EINVAL;
3721 }
3722 }
3723 }
3724 }
3725 }
3726 if (command & PACKET3_CP_DMA_CMD_DAS) {
3727 /* dst address space is register */
3728 if (((info & 0x00300000) >> 20) == 0) {
3729 start_reg = ib[idx + 2];
3730 if (command & PACKET3_CP_DMA_CMD_DAIC) {
3731 reg = start_reg;
3732 if (!si_vm_reg_valid(reg)) {
3733 DRM_ERROR("CP DMA Bad DST register\n");
3734 return -EINVAL;
3735 }
3736 } else {
3737 for (i = 0; i < (command & 0x1fffff); i++) {
3738 reg = start_reg + (4 * i);
3739 if (!si_vm_reg_valid(reg)) {
3740 DRM_ERROR("CP DMA Bad DST register\n");
3741 return -EINVAL;
3742 }
3743 }
3744 }
3745 }
3746 }
3747 break;
3748 default:
3749 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
3750 return -EINVAL;
3751 }
3752 return 0;
3753 }
3754
3755 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
3756 u32 *ib, struct radeon_cs_packet *pkt)
3757 {
3758 u32 idx = pkt->idx + 1;
3759 u32 idx_value = ib[idx];
3760 u32 start_reg, reg, i;
3761
3762 switch (pkt->opcode) {
3763 case PACKET3_NOP:
3764 case PACKET3_SET_BASE:
3765 case PACKET3_CLEAR_STATE:
3766 case PACKET3_DISPATCH_DIRECT:
3767 case PACKET3_DISPATCH_INDIRECT:
3768 case PACKET3_ALLOC_GDS:
3769 case PACKET3_WRITE_GDS_RAM:
3770 case PACKET3_ATOMIC_GDS:
3771 case PACKET3_ATOMIC:
3772 case PACKET3_OCCLUSION_QUERY:
3773 case PACKET3_SET_PREDICATION:
3774 case PACKET3_COND_EXEC:
3775 case PACKET3_PRED_EXEC:
3776 case PACKET3_CONTEXT_CONTROL:
3777 case PACKET3_STRMOUT_BUFFER_UPDATE:
3778 case PACKET3_WAIT_REG_MEM:
3779 case PACKET3_MEM_WRITE:
3780 case PACKET3_PFP_SYNC_ME:
3781 case PACKET3_SURFACE_SYNC:
3782 case PACKET3_EVENT_WRITE:
3783 case PACKET3_EVENT_WRITE_EOP:
3784 case PACKET3_EVENT_WRITE_EOS:
3785 case PACKET3_SET_CONTEXT_REG:
3786 case PACKET3_SET_CONTEXT_REG_INDIRECT:
3787 case PACKET3_SET_SH_REG:
3788 case PACKET3_SET_SH_REG_OFFSET:
3789 case PACKET3_INCREMENT_DE_COUNTER:
3790 case PACKET3_WAIT_ON_CE_COUNTER:
3791 case PACKET3_WAIT_ON_AVAIL_BUFFER:
3792 case PACKET3_ME_WRITE:
3793 break;
3794 case PACKET3_COPY_DATA:
3795 if ((idx_value & 0xf00) == 0) {
3796 reg = ib[idx + 3] * 4;
3797 if (!si_vm_reg_valid(reg))
3798 return -EINVAL;
3799 }
3800 break;
3801 case PACKET3_WRITE_DATA:
3802 if ((idx_value & 0xf00) == 0) {
3803 start_reg = ib[idx + 1] * 4;
3804 if (idx_value & 0x10000) {
3805 if (!si_vm_reg_valid(start_reg))
3806 return -EINVAL;
3807 } else {
3808 for (i = 0; i < (pkt->count - 2); i++) {
3809 reg = start_reg + (4 * i);
3810 if (!si_vm_reg_valid(reg))
3811 return -EINVAL;
3812 }
3813 }
3814 }
3815 break;
3816 case PACKET3_COND_WRITE:
3817 if (idx_value & 0x100) {
3818 reg = ib[idx + 5] * 4;
3819 if (!si_vm_reg_valid(reg))
3820 return -EINVAL;
3821 }
3822 break;
3823 case PACKET3_COPY_DW:
3824 if (idx_value & 0x2) {
3825 reg = ib[idx + 3] * 4;
3826 if (!si_vm_reg_valid(reg))
3827 return -EINVAL;
3828 }
3829 break;
3830 default:
3831 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
3832 return -EINVAL;
3833 }
3834 return 0;
3835 }
3836
3837 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3838 {
3839 int ret = 0;
3840 u32 idx = 0;
3841 struct radeon_cs_packet pkt;
3842
3843 do {
3844 pkt.idx = idx;
3845 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
3846 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
3847 pkt.one_reg_wr = 0;
3848 switch (pkt.type) {
3849 case RADEON_PACKET_TYPE0:
3850 dev_err(rdev->dev, "Packet0 not allowed!\n");
3851 ret = -EINVAL;
3852 break;
3853 case RADEON_PACKET_TYPE2:
3854 idx += 1;
3855 break;
3856 case RADEON_PACKET_TYPE3:
3857 pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
3858 if (ib->is_const_ib)
3859 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
3860 else {
3861 switch (ib->ring) {
3862 case RADEON_RING_TYPE_GFX_INDEX:
3863 ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
3864 break;
3865 case CAYMAN_RING_TYPE_CP1_INDEX:
3866 case CAYMAN_RING_TYPE_CP2_INDEX:
3867 ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
3868 break;
3869 default:
3870 dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
3871 ret = -EINVAL;
3872 break;
3873 }
3874 }
3875 idx += pkt.count + 2;
3876 break;
3877 default:
3878 dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
3879 ret = -EINVAL;
3880 break;
3881 }
3882 if (ret)
3883 break;
3884 } while (idx < ib->length_dw);
3885
3886 return ret;
3887 }
3888
3889 /*
3890 * vm
3891 */
3892 int si_vm_init(struct radeon_device *rdev)
3893 {
3894 /* number of VMs */
3895 rdev->vm_manager.nvm = 16;
3896 /* base offset of vram pages */
3897 rdev->vm_manager.vram_base_offset = 0;
3898
3899 return 0;
3900 }
3901
3902 void si_vm_fini(struct radeon_device *rdev)
3903 {
3904 }
3905
3906 /**
3907 * si_vm_set_page - update the page tables using the CP
3908 *
3909 * @rdev: radeon_device pointer
3910 * @ib: indirect buffer to fill with commands
3911 * @pe: addr of the page entry
3912 * @addr: dst addr to write into pe
3913 * @count: number of page entries to update
3914 * @incr: increase next addr by incr bytes
3915 * @flags: access flags
3916 *
3917 * Update the page tables using the CP (SI).
3918 */
3919 void si_vm_set_page(struct radeon_device *rdev,
3920 struct radeon_ib *ib,
3921 uint64_t pe,
3922 uint64_t addr, unsigned count,
3923 uint32_t incr, uint32_t flags)
3924 {
3925 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
3926 uint64_t value;
3927 unsigned ndw;
3928
3929 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
3930 while (count) {
3931 ndw = 2 + count * 2;
3932 if (ndw > 0x3FFE)
3933 ndw = 0x3FFE;
3934
3935 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
3936 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
3937 WRITE_DATA_DST_SEL(1));
3938 ib->ptr[ib->length_dw++] = pe;
3939 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
3940 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
3941 if (flags & RADEON_VM_PAGE_SYSTEM) {
3942 value = radeon_vm_map_gart(rdev, addr);
3943 value &= 0xFFFFFFFFFFFFF000ULL;
3944 } else if (flags & RADEON_VM_PAGE_VALID) {
3945 value = addr;
3946 } else {
3947 value = 0;
3948 }
3949 addr += incr;
3950 value |= r600_flags;
3951 ib->ptr[ib->length_dw++] = value;
3952 ib->ptr[ib->length_dw++] = upper_32_bits(value);
3953 }
3954 }
3955 } else {
3956 /* DMA */
3957 if (flags & RADEON_VM_PAGE_SYSTEM) {
3958 while (count) {
3959 ndw = count * 2;
3960 if (ndw > 0xFFFFE)
3961 ndw = 0xFFFFE;
3962
3963 /* for non-physically contiguous pages (system) */
3964 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
3965 ib->ptr[ib->length_dw++] = pe;
3966 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
3967 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
3968 if (flags & RADEON_VM_PAGE_SYSTEM) {
3969 value = radeon_vm_map_gart(rdev, addr);
3970 value &= 0xFFFFFFFFFFFFF000ULL;
3971 } else if (flags & RADEON_VM_PAGE_VALID) {
3972 value = addr;
3973 } else {
3974 value = 0;
3975 }
3976 addr += incr;
3977 value |= r600_flags;
3978 ib->ptr[ib->length_dw++] = value;
3979 ib->ptr[ib->length_dw++] = upper_32_bits(value);
3980 }
3981 }
3982 } else {
3983 while (count) {
3984 ndw = count * 2;
3985 if (ndw > 0xFFFFE)
3986 ndw = 0xFFFFE;
3987
3988 if (flags & RADEON_VM_PAGE_VALID)
3989 value = addr;
3990 else
3991 value = 0;
3992 /* for physically contiguous pages (vram) */
3993 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
3994 ib->ptr[ib->length_dw++] = pe; /* dst addr */
3995 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
3996 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
3997 ib->ptr[ib->length_dw++] = 0;
3998 ib->ptr[ib->length_dw++] = value; /* value */
3999 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4000 ib->ptr[ib->length_dw++] = incr; /* increment size */
4001 ib->ptr[ib->length_dw++] = 0;
4002 pe += ndw * 4;
4003 addr += (ndw / 2) * incr;
4004 count -= ndw / 2;
4005 }
4006 }
4007 while (ib->length_dw & 0x7)
4008 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0);
4009 }
4010 }
4011
4012 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4013 {
4014 struct radeon_ring *ring = &rdev->ring[ridx];
4015
4016 if (vm == NULL)
4017 return;
4018
4019 /* write new base address */
4020 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4021 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4022 WRITE_DATA_DST_SEL(0)));
4023
4024 if (vm->id < 8) {
4025 radeon_ring_write(ring,
4026 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4027 } else {
4028 radeon_ring_write(ring,
4029 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4030 }
4031 radeon_ring_write(ring, 0);
4032 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4033
4034 /* flush hdp cache */
4035 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4036 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4037 WRITE_DATA_DST_SEL(0)));
4038 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4039 radeon_ring_write(ring, 0);
4040 radeon_ring_write(ring, 0x1);
4041
4042 /* bits 0-15 are the VM contexts0-15 */
4043 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4044 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4045 WRITE_DATA_DST_SEL(0)));
4046 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4047 radeon_ring_write(ring, 0);
4048 radeon_ring_write(ring, 1 << vm->id);
4049
4050 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4051 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4052 radeon_ring_write(ring, 0x0);
4053 }
4054
4055 void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4056 {
4057 struct radeon_ring *ring = &rdev->ring[ridx];
4058
4059 if (vm == NULL)
4060 return;
4061
4062 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4063 if (vm->id < 8) {
4064 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
4065 } else {
4066 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
4067 }
4068 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4069
4070 /* flush hdp cache */
4071 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4072 radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
4073 radeon_ring_write(ring, 1);
4074
4075 /* bits 0-7 are the VM contexts0-7 */
4076 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4077 radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
4078 radeon_ring_write(ring, 1 << vm->id);
4079 }
4080
4081 /*
4082 * RLC
4083 */
4084 void si_rlc_fini(struct radeon_device *rdev)
4085 {
4086 int r;
4087
4088 /* save restore block */
4089 if (rdev->rlc.save_restore_obj) {
4090 r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
4091 if (unlikely(r != 0))
4092 dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r);
4093 radeon_bo_unpin(rdev->rlc.save_restore_obj);
4094 radeon_bo_unreserve(rdev->rlc.save_restore_obj);
4095
4096 radeon_bo_unref(&rdev->rlc.save_restore_obj);
4097 rdev->rlc.save_restore_obj = NULL;
4098 }
4099
4100 /* clear state block */
4101 if (rdev->rlc.clear_state_obj) {
4102 r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
4103 if (unlikely(r != 0))
4104 dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r);
4105 radeon_bo_unpin(rdev->rlc.clear_state_obj);
4106 radeon_bo_unreserve(rdev->rlc.clear_state_obj);
4107
4108 radeon_bo_unref(&rdev->rlc.clear_state_obj);
4109 rdev->rlc.clear_state_obj = NULL;
4110 }
4111 }
4112
4113 int si_rlc_init(struct radeon_device *rdev)
4114 {
4115 int r;
4116
4117 /* save restore block */
4118 if (rdev->rlc.save_restore_obj == NULL) {
4119 r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
4120 RADEON_GEM_DOMAIN_VRAM, NULL,
4121 &rdev->rlc.save_restore_obj);
4122 if (r) {
4123 dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
4124 return r;
4125 }
4126 }
4127
4128 r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
4129 if (unlikely(r != 0)) {
4130 si_rlc_fini(rdev);
4131 return r;
4132 }
4133 r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
4134 &rdev->rlc.save_restore_gpu_addr);
4135 radeon_bo_unreserve(rdev->rlc.save_restore_obj);
4136 if (r) {
4137 dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
4138 si_rlc_fini(rdev);
4139 return r;
4140 }
4141
4142 /* clear state block */
4143 if (rdev->rlc.clear_state_obj == NULL) {
4144 r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
4145 RADEON_GEM_DOMAIN_VRAM, NULL,
4146 &rdev->rlc.clear_state_obj);
4147 if (r) {
4148 dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
4149 si_rlc_fini(rdev);
4150 return r;
4151 }
4152 }
4153 r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
4154 if (unlikely(r != 0)) {
4155 si_rlc_fini(rdev);
4156 return r;
4157 }
4158 r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
4159 &rdev->rlc.clear_state_gpu_addr);
4160 radeon_bo_unreserve(rdev->rlc.clear_state_obj);
4161 if (r) {
4162 dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
4163 si_rlc_fini(rdev);
4164 return r;
4165 }
4166
4167 return 0;
4168 }
4169
4170 static void si_rlc_stop(struct radeon_device *rdev)
4171 {
4172 WREG32(RLC_CNTL, 0);
4173 }
4174
4175 static void si_rlc_start(struct radeon_device *rdev)
4176 {
4177 WREG32(RLC_CNTL, RLC_ENABLE);
4178 }
4179
4180 static int si_rlc_resume(struct radeon_device *rdev)
4181 {
4182 u32 i;
4183 const __be32 *fw_data;
4184
4185 if (!rdev->rlc_fw)
4186 return -EINVAL;
4187
4188 si_rlc_stop(rdev);
4189
4190 WREG32(RLC_RL_BASE, 0);
4191 WREG32(RLC_RL_SIZE, 0);
4192 WREG32(RLC_LB_CNTL, 0);
4193 WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
4194 WREG32(RLC_LB_CNTR_INIT, 0);
4195
4196 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4197 WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
4198
4199 WREG32(RLC_MC_CNTL, 0);
4200 WREG32(RLC_UCODE_CNTL, 0);
4201
4202 fw_data = (const __be32 *)rdev->rlc_fw->data;
4203 for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
4204 WREG32(RLC_UCODE_ADDR, i);
4205 WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
4206 }
4207 WREG32(RLC_UCODE_ADDR, 0);
4208
4209 si_rlc_start(rdev);
4210
4211 return 0;
4212 }
4213
4214 static void si_enable_interrupts(struct radeon_device *rdev)
4215 {
4216 u32 ih_cntl = RREG32(IH_CNTL);
4217 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4218
4219 ih_cntl |= ENABLE_INTR;
4220 ih_rb_cntl |= IH_RB_ENABLE;
4221 WREG32(IH_CNTL, ih_cntl);
4222 WREG32(IH_RB_CNTL, ih_rb_cntl);
4223 rdev->ih.enabled = true;
4224 }
4225
4226 static void si_disable_interrupts(struct radeon_device *rdev)
4227 {
4228 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4229 u32 ih_cntl = RREG32(IH_CNTL);
4230
4231 ih_rb_cntl &= ~IH_RB_ENABLE;
4232 ih_cntl &= ~ENABLE_INTR;
4233 WREG32(IH_RB_CNTL, ih_rb_cntl);
4234 WREG32(IH_CNTL, ih_cntl);
4235 /* set rptr, wptr to 0 */
4236 WREG32(IH_RB_RPTR, 0);
4237 WREG32(IH_RB_WPTR, 0);
4238 rdev->ih.enabled = false;
4239 rdev->ih.rptr = 0;
4240 }
4241
4242 static void si_disable_interrupt_state(struct radeon_device *rdev)
4243 {
4244 u32 tmp;
4245
4246 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4247 WREG32(CP_INT_CNTL_RING1, 0);
4248 WREG32(CP_INT_CNTL_RING2, 0);
4249 tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4250 WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
4251 tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4252 WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
4253 WREG32(GRBM_INT_CNTL, 0);
4254 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4255 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4256 if (rdev->num_crtc >= 4) {
4257 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4258 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4259 }
4260 if (rdev->num_crtc >= 6) {
4261 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4262 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4263 }
4264
4265 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4266 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4267 if (rdev->num_crtc >= 4) {
4268 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4269 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4270 }
4271 if (rdev->num_crtc >= 6) {
4272 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4273 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4274 }
4275
4276 WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
4277
4278 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4279 WREG32(DC_HPD1_INT_CONTROL, tmp);
4280 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4281 WREG32(DC_HPD2_INT_CONTROL, tmp);
4282 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4283 WREG32(DC_HPD3_INT_CONTROL, tmp);
4284 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4285 WREG32(DC_HPD4_INT_CONTROL, tmp);
4286 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4287 WREG32(DC_HPD5_INT_CONTROL, tmp);
4288 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4289 WREG32(DC_HPD6_INT_CONTROL, tmp);
4290
4291 }
4292
4293 static int si_irq_init(struct radeon_device *rdev)
4294 {
4295 int ret = 0;
4296 int rb_bufsz;
4297 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
4298
4299 /* allocate ring */
4300 ret = r600_ih_ring_alloc(rdev);
4301 if (ret)
4302 return ret;
4303
4304 /* disable irqs */
4305 si_disable_interrupts(rdev);
4306
4307 /* init rlc */
4308 ret = si_rlc_resume(rdev);
4309 if (ret) {
4310 r600_ih_ring_fini(rdev);
4311 return ret;
4312 }
4313
4314 /* setup interrupt control */
4315 /* set dummy read address to ring address */
4316 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
4317 interrupt_cntl = RREG32(INTERRUPT_CNTL);
4318 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
4319 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
4320 */
4321 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
4322 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
4323 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
4324 WREG32(INTERRUPT_CNTL, interrupt_cntl);
4325
4326 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
4327 rb_bufsz = drm_order(rdev->ih.ring_size / 4);
4328
4329 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
4330 IH_WPTR_OVERFLOW_CLEAR |
4331 (rb_bufsz << 1));
4332
4333 if (rdev->wb.enabled)
4334 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
4335
4336 /* set the writeback address whether it's enabled or not */
4337 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
4338 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
4339
4340 WREG32(IH_RB_CNTL, ih_rb_cntl);
4341
4342 /* set rptr, wptr to 0 */
4343 WREG32(IH_RB_RPTR, 0);
4344 WREG32(IH_RB_WPTR, 0);
4345
4346 /* Default settings for IH_CNTL (disabled at first) */
4347 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
4348 /* RPTR_REARM only works if msi's are enabled */
4349 if (rdev->msi_enabled)
4350 ih_cntl |= RPTR_REARM;
4351 WREG32(IH_CNTL, ih_cntl);
4352
4353 /* force the active interrupt state to all disabled */
4354 si_disable_interrupt_state(rdev);
4355
4356 pci_set_master(rdev->pdev);
4357
4358 /* enable irqs */
4359 si_enable_interrupts(rdev);
4360
4361 return ret;
4362 }
4363
4364 int si_irq_set(struct radeon_device *rdev)
4365 {
4366 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE;
4367 u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
4368 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
4369 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
4370 u32 grbm_int_cntl = 0;
4371 u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
4372 u32 dma_cntl, dma_cntl1;
4373
4374 if (!rdev->irq.installed) {
4375 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
4376 return -EINVAL;
4377 }
4378 /* don't enable anything if the ih is disabled */
4379 if (!rdev->ih.enabled) {
4380 si_disable_interrupts(rdev);
4381 /* force the active interrupt state to all disabled */
4382 si_disable_interrupt_state(rdev);
4383 return 0;
4384 }
4385
4386 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
4387 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
4388 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
4389 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
4390 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
4391 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
4392
4393 dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4394 dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4395
4396 /* enable CP interrupts on all rings */
4397 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
4398 DRM_DEBUG("si_irq_set: sw int gfx\n");
4399 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
4400 }
4401 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
4402 DRM_DEBUG("si_irq_set: sw int cp1\n");
4403 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
4404 }
4405 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
4406 DRM_DEBUG("si_irq_set: sw int cp2\n");
4407 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
4408 }
4409 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
4410 DRM_DEBUG("si_irq_set: sw int dma\n");
4411 dma_cntl |= TRAP_ENABLE;
4412 }
4413
4414 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
4415 DRM_DEBUG("si_irq_set: sw int dma1\n");
4416 dma_cntl1 |= TRAP_ENABLE;
4417 }
4418 if (rdev->irq.crtc_vblank_int[0] ||
4419 atomic_read(&rdev->irq.pflip[0])) {
4420 DRM_DEBUG("si_irq_set: vblank 0\n");
4421 crtc1 |= VBLANK_INT_MASK;
4422 }
4423 if (rdev->irq.crtc_vblank_int[1] ||
4424 atomic_read(&rdev->irq.pflip[1])) {
4425 DRM_DEBUG("si_irq_set: vblank 1\n");
4426 crtc2 |= VBLANK_INT_MASK;
4427 }
4428 if (rdev->irq.crtc_vblank_int[2] ||
4429 atomic_read(&rdev->irq.pflip[2])) {
4430 DRM_DEBUG("si_irq_set: vblank 2\n");
4431 crtc3 |= VBLANK_INT_MASK;
4432 }
4433 if (rdev->irq.crtc_vblank_int[3] ||
4434 atomic_read(&rdev->irq.pflip[3])) {
4435 DRM_DEBUG("si_irq_set: vblank 3\n");
4436 crtc4 |= VBLANK_INT_MASK;
4437 }
4438 if (rdev->irq.crtc_vblank_int[4] ||
4439 atomic_read(&rdev->irq.pflip[4])) {
4440 DRM_DEBUG("si_irq_set: vblank 4\n");
4441 crtc5 |= VBLANK_INT_MASK;
4442 }
4443 if (rdev->irq.crtc_vblank_int[5] ||
4444 atomic_read(&rdev->irq.pflip[5])) {
4445 DRM_DEBUG("si_irq_set: vblank 5\n");
4446 crtc6 |= VBLANK_INT_MASK;
4447 }
4448 if (rdev->irq.hpd[0]) {
4449 DRM_DEBUG("si_irq_set: hpd 1\n");
4450 hpd1 |= DC_HPDx_INT_EN;
4451 }
4452 if (rdev->irq.hpd[1]) {
4453 DRM_DEBUG("si_irq_set: hpd 2\n");
4454 hpd2 |= DC_HPDx_INT_EN;
4455 }
4456 if (rdev->irq.hpd[2]) {
4457 DRM_DEBUG("si_irq_set: hpd 3\n");
4458 hpd3 |= DC_HPDx_INT_EN;
4459 }
4460 if (rdev->irq.hpd[3]) {
4461 DRM_DEBUG("si_irq_set: hpd 4\n");
4462 hpd4 |= DC_HPDx_INT_EN;
4463 }
4464 if (rdev->irq.hpd[4]) {
4465 DRM_DEBUG("si_irq_set: hpd 5\n");
4466 hpd5 |= DC_HPDx_INT_EN;
4467 }
4468 if (rdev->irq.hpd[5]) {
4469 DRM_DEBUG("si_irq_set: hpd 6\n");
4470 hpd6 |= DC_HPDx_INT_EN;
4471 }
4472
4473 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
4474 WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
4475 WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
4476
4477 WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
4478 WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
4479
4480 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
4481
4482 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
4483 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
4484 if (rdev->num_crtc >= 4) {
4485 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
4486 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
4487 }
4488 if (rdev->num_crtc >= 6) {
4489 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
4490 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
4491 }
4492
4493 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
4494 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
4495 if (rdev->num_crtc >= 4) {
4496 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
4497 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
4498 }
4499 if (rdev->num_crtc >= 6) {
4500 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
4501 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
4502 }
4503
4504 WREG32(DC_HPD1_INT_CONTROL, hpd1);
4505 WREG32(DC_HPD2_INT_CONTROL, hpd2);
4506 WREG32(DC_HPD3_INT_CONTROL, hpd3);
4507 WREG32(DC_HPD4_INT_CONTROL, hpd4);
4508 WREG32(DC_HPD5_INT_CONTROL, hpd5);
4509 WREG32(DC_HPD6_INT_CONTROL, hpd6);
4510
4511 return 0;
4512 }
4513
4514 static inline void si_irq_ack(struct radeon_device *rdev)
4515 {
4516 u32 tmp;
4517
4518 rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
4519 rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
4520 rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
4521 rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
4522 rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
4523 rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
4524 rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
4525 rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
4526 if (rdev->num_crtc >= 4) {
4527 rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
4528 rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
4529 }
4530 if (rdev->num_crtc >= 6) {
4531 rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
4532 rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
4533 }
4534
4535 if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
4536 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4537 if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
4538 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4539 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
4540 WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
4541 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
4542 WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
4543 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
4544 WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
4545 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
4546 WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
4547
4548 if (rdev->num_crtc >= 4) {
4549 if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
4550 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4551 if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
4552 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4553 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
4554 WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
4555 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
4556 WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
4557 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
4558 WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
4559 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
4560 WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
4561 }
4562
4563 if (rdev->num_crtc >= 6) {
4564 if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
4565 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4566 if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
4567 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
4568 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
4569 WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
4570 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
4571 WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
4572 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
4573 WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
4574 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
4575 WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
4576 }
4577
4578 if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
4579 tmp = RREG32(DC_HPD1_INT_CONTROL);
4580 tmp |= DC_HPDx_INT_ACK;
4581 WREG32(DC_HPD1_INT_CONTROL, tmp);
4582 }
4583 if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
4584 tmp = RREG32(DC_HPD2_INT_CONTROL);
4585 tmp |= DC_HPDx_INT_ACK;
4586 WREG32(DC_HPD2_INT_CONTROL, tmp);
4587 }
4588 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
4589 tmp = RREG32(DC_HPD3_INT_CONTROL);
4590 tmp |= DC_HPDx_INT_ACK;
4591 WREG32(DC_HPD3_INT_CONTROL, tmp);
4592 }
4593 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
4594 tmp = RREG32(DC_HPD4_INT_CONTROL);
4595 tmp |= DC_HPDx_INT_ACK;
4596 WREG32(DC_HPD4_INT_CONTROL, tmp);
4597 }
4598 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
4599 tmp = RREG32(DC_HPD5_INT_CONTROL);
4600 tmp |= DC_HPDx_INT_ACK;
4601 WREG32(DC_HPD5_INT_CONTROL, tmp);
4602 }
4603 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
4604 tmp = RREG32(DC_HPD5_INT_CONTROL);
4605 tmp |= DC_HPDx_INT_ACK;
4606 WREG32(DC_HPD6_INT_CONTROL, tmp);
4607 }
4608 }
4609
4610 static void si_irq_disable(struct radeon_device *rdev)
4611 {
4612 si_disable_interrupts(rdev);
4613 /* Wait and acknowledge irq */
4614 mdelay(1);
4615 si_irq_ack(rdev);
4616 si_disable_interrupt_state(rdev);
4617 }
4618
4619 static void si_irq_suspend(struct radeon_device *rdev)
4620 {
4621 si_irq_disable(rdev);
4622 si_rlc_stop(rdev);
4623 }
4624
4625 static void si_irq_fini(struct radeon_device *rdev)
4626 {
4627 si_irq_suspend(rdev);
4628 r600_ih_ring_fini(rdev);
4629 }
4630
4631 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
4632 {
4633 u32 wptr, tmp;
4634
4635 if (rdev->wb.enabled)
4636 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
4637 else
4638 wptr = RREG32(IH_RB_WPTR);
4639
4640 if (wptr & RB_OVERFLOW) {
4641 /* When a ring buffer overflow happen start parsing interrupt
4642 * from the last not overwritten vector (wptr + 16). Hopefully
4643 * this should allow us to catchup.
4644 */
4645 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
4646 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
4647 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
4648 tmp = RREG32(IH_RB_CNTL);
4649 tmp |= IH_WPTR_OVERFLOW_CLEAR;
4650 WREG32(IH_RB_CNTL, tmp);
4651 }
4652 return (wptr & rdev->ih.ptr_mask);
4653 }
4654
4655 /* SI IV Ring
4656 * Each IV ring entry is 128 bits:
4657 * [7:0] - interrupt source id
4658 * [31:8] - reserved
4659 * [59:32] - interrupt source data
4660 * [63:60] - reserved
4661 * [71:64] - RINGID
4662 * [79:72] - VMID
4663 * [127:80] - reserved
4664 */
4665 int si_irq_process(struct radeon_device *rdev)
4666 {
4667 u32 wptr;
4668 u32 rptr;
4669 u32 src_id, src_data, ring_id;
4670 u32 ring_index;
4671 bool queue_hotplug = false;
4672
4673 if (!rdev->ih.enabled || rdev->shutdown)
4674 return IRQ_NONE;
4675
4676 wptr = si_get_ih_wptr(rdev);
4677
4678 restart_ih:
4679 /* is somebody else already processing irqs? */
4680 if (atomic_xchg(&rdev->ih.lock, 1))
4681 return IRQ_NONE;
4682
4683 rptr = rdev->ih.rptr;
4684 DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
4685
4686 /* Order reading of wptr vs. reading of IH ring data */
4687 rmb();
4688
4689 /* display interrupts */
4690 si_irq_ack(rdev);
4691
4692 while (rptr != wptr) {
4693 /* wptr/rptr are in bytes! */
4694 ring_index = rptr / 4;
4695 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
4696 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
4697 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
4698
4699 switch (src_id) {
4700 case 1: /* D1 vblank/vline */
4701 switch (src_data) {
4702 case 0: /* D1 vblank */
4703 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
4704 if (rdev->irq.crtc_vblank_int[0]) {
4705 drm_handle_vblank(rdev->ddev, 0);
4706 rdev->pm.vblank_sync = true;
4707 wake_up(&rdev->irq.vblank_queue);
4708 }
4709 if (atomic_read(&rdev->irq.pflip[0]))
4710 radeon_crtc_handle_flip(rdev, 0);
4711 rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
4712 DRM_DEBUG("IH: D1 vblank\n");
4713 }
4714 break;
4715 case 1: /* D1 vline */
4716 if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
4717 rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
4718 DRM_DEBUG("IH: D1 vline\n");
4719 }
4720 break;
4721 default:
4722 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4723 break;
4724 }
4725 break;
4726 case 2: /* D2 vblank/vline */
4727 switch (src_data) {
4728 case 0: /* D2 vblank */
4729 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
4730 if (rdev->irq.crtc_vblank_int[1]) {
4731 drm_handle_vblank(rdev->ddev, 1);
4732 rdev->pm.vblank_sync = true;
4733 wake_up(&rdev->irq.vblank_queue);
4734 }
4735 if (atomic_read(&rdev->irq.pflip[1]))
4736 radeon_crtc_handle_flip(rdev, 1);
4737 rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
4738 DRM_DEBUG("IH: D2 vblank\n");
4739 }
4740 break;
4741 case 1: /* D2 vline */
4742 if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
4743 rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
4744 DRM_DEBUG("IH: D2 vline\n");
4745 }
4746 break;
4747 default:
4748 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4749 break;
4750 }
4751 break;
4752 case 3: /* D3 vblank/vline */
4753 switch (src_data) {
4754 case 0: /* D3 vblank */
4755 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
4756 if (rdev->irq.crtc_vblank_int[2]) {
4757 drm_handle_vblank(rdev->ddev, 2);
4758 rdev->pm.vblank_sync = true;
4759 wake_up(&rdev->irq.vblank_queue);
4760 }
4761 if (atomic_read(&rdev->irq.pflip[2]))
4762 radeon_crtc_handle_flip(rdev, 2);
4763 rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
4764 DRM_DEBUG("IH: D3 vblank\n");
4765 }
4766 break;
4767 case 1: /* D3 vline */
4768 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
4769 rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
4770 DRM_DEBUG("IH: D3 vline\n");
4771 }
4772 break;
4773 default:
4774 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4775 break;
4776 }
4777 break;
4778 case 4: /* D4 vblank/vline */
4779 switch (src_data) {
4780 case 0: /* D4 vblank */
4781 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
4782 if (rdev->irq.crtc_vblank_int[3]) {
4783 drm_handle_vblank(rdev->ddev, 3);
4784 rdev->pm.vblank_sync = true;
4785 wake_up(&rdev->irq.vblank_queue);
4786 }
4787 if (atomic_read(&rdev->irq.pflip[3]))
4788 radeon_crtc_handle_flip(rdev, 3);
4789 rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
4790 DRM_DEBUG("IH: D4 vblank\n");
4791 }
4792 break;
4793 case 1: /* D4 vline */
4794 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
4795 rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
4796 DRM_DEBUG("IH: D4 vline\n");
4797 }
4798 break;
4799 default:
4800 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4801 break;
4802 }
4803 break;
4804 case 5: /* D5 vblank/vline */
4805 switch (src_data) {
4806 case 0: /* D5 vblank */
4807 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
4808 if (rdev->irq.crtc_vblank_int[4]) {
4809 drm_handle_vblank(rdev->ddev, 4);
4810 rdev->pm.vblank_sync = true;
4811 wake_up(&rdev->irq.vblank_queue);
4812 }
4813 if (atomic_read(&rdev->irq.pflip[4]))
4814 radeon_crtc_handle_flip(rdev, 4);
4815 rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
4816 DRM_DEBUG("IH: D5 vblank\n");
4817 }
4818 break;
4819 case 1: /* D5 vline */
4820 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
4821 rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
4822 DRM_DEBUG("IH: D5 vline\n");
4823 }
4824 break;
4825 default:
4826 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4827 break;
4828 }
4829 break;
4830 case 6: /* D6 vblank/vline */
4831 switch (src_data) {
4832 case 0: /* D6 vblank */
4833 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
4834 if (rdev->irq.crtc_vblank_int[5]) {
4835 drm_handle_vblank(rdev->ddev, 5);
4836 rdev->pm.vblank_sync = true;
4837 wake_up(&rdev->irq.vblank_queue);
4838 }
4839 if (atomic_read(&rdev->irq.pflip[5]))
4840 radeon_crtc_handle_flip(rdev, 5);
4841 rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
4842 DRM_DEBUG("IH: D6 vblank\n");
4843 }
4844 break;
4845 case 1: /* D6 vline */
4846 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
4847 rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
4848 DRM_DEBUG("IH: D6 vline\n");
4849 }
4850 break;
4851 default:
4852 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4853 break;
4854 }
4855 break;
4856 case 42: /* HPD hotplug */
4857 switch (src_data) {
4858 case 0:
4859 if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
4860 rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
4861 queue_hotplug = true;
4862 DRM_DEBUG("IH: HPD1\n");
4863 }
4864 break;
4865 case 1:
4866 if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
4867 rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
4868 queue_hotplug = true;
4869 DRM_DEBUG("IH: HPD2\n");
4870 }
4871 break;
4872 case 2:
4873 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
4874 rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
4875 queue_hotplug = true;
4876 DRM_DEBUG("IH: HPD3\n");
4877 }
4878 break;
4879 case 3:
4880 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
4881 rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
4882 queue_hotplug = true;
4883 DRM_DEBUG("IH: HPD4\n");
4884 }
4885 break;
4886 case 4:
4887 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
4888 rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
4889 queue_hotplug = true;
4890 DRM_DEBUG("IH: HPD5\n");
4891 }
4892 break;
4893 case 5:
4894 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
4895 rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
4896 queue_hotplug = true;
4897 DRM_DEBUG("IH: HPD6\n");
4898 }
4899 break;
4900 default:
4901 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4902 break;
4903 }
4904 break;
4905 case 146:
4906 case 147:
4907 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
4908 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4909 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4910 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4911 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4912 /* reset addr and status */
4913 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
4914 break;
4915 case 176: /* RINGID0 CP_INT */
4916 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
4917 break;
4918 case 177: /* RINGID1 CP_INT */
4919 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
4920 break;
4921 case 178: /* RINGID2 CP_INT */
4922 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
4923 break;
4924 case 181: /* CP EOP event */
4925 DRM_DEBUG("IH: CP EOP\n");
4926 switch (ring_id) {
4927 case 0:
4928 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
4929 break;
4930 case 1:
4931 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
4932 break;
4933 case 2:
4934 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
4935 break;
4936 }
4937 break;
4938 case 224: /* DMA trap event */
4939 DRM_DEBUG("IH: DMA trap\n");
4940 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
4941 break;
4942 case 233: /* GUI IDLE */
4943 DRM_DEBUG("IH: GUI idle\n");
4944 break;
4945 case 244: /* DMA trap event */
4946 DRM_DEBUG("IH: DMA1 trap\n");
4947 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
4948 break;
4949 default:
4950 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
4951 break;
4952 }
4953
4954 /* wptr/rptr are in bytes! */
4955 rptr += 16;
4956 rptr &= rdev->ih.ptr_mask;
4957 }
4958 if (queue_hotplug)
4959 schedule_work(&rdev->hotplug_work);
4960 rdev->ih.rptr = rptr;
4961 WREG32(IH_RB_RPTR, rdev->ih.rptr);
4962 atomic_set(&rdev->ih.lock, 0);
4963
4964 /* make sure wptr hasn't changed while processing */
4965 wptr = si_get_ih_wptr(rdev);
4966 if (wptr != rptr)
4967 goto restart_ih;
4968
4969 return IRQ_HANDLED;
4970 }
4971
4972 /**
4973 * si_copy_dma - copy pages using the DMA engine
4974 *
4975 * @rdev: radeon_device pointer
4976 * @src_offset: src GPU address
4977 * @dst_offset: dst GPU address
4978 * @num_gpu_pages: number of GPU pages to xfer
4979 * @fence: radeon fence object
4980 *
4981 * Copy GPU paging using the DMA engine (SI).
4982 * Used by the radeon ttm implementation to move pages if
4983 * registered as the asic copy callback.
4984 */
4985 int si_copy_dma(struct radeon_device *rdev,
4986 uint64_t src_offset, uint64_t dst_offset,
4987 unsigned num_gpu_pages,
4988 struct radeon_fence **fence)
4989 {
4990 struct radeon_semaphore *sem = NULL;
4991 int ring_index = rdev->asic->copy.dma_ring_index;
4992 struct radeon_ring *ring = &rdev->ring[ring_index];
4993 u32 size_in_bytes, cur_size_in_bytes;
4994 int i, num_loops;
4995 int r = 0;
4996
4997 r = radeon_semaphore_create(rdev, &sem);
4998 if (r) {
4999 DRM_ERROR("radeon: moving bo (%d).\n", r);
5000 return r;
5001 }
5002
5003 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
5004 num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
5005 r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
5006 if (r) {
5007 DRM_ERROR("radeon: moving bo (%d).\n", r);
5008 radeon_semaphore_free(rdev, &sem, NULL);
5009 return r;
5010 }
5011
5012 if (radeon_fence_need_sync(*fence, ring->idx)) {
5013 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
5014 ring->idx);
5015 radeon_fence_note_sync(*fence, ring->idx);
5016 } else {
5017 radeon_semaphore_free(rdev, &sem, NULL);
5018 }
5019
5020 for (i = 0; i < num_loops; i++) {
5021 cur_size_in_bytes = size_in_bytes;
5022 if (cur_size_in_bytes > 0xFFFFF)
5023 cur_size_in_bytes = 0xFFFFF;
5024 size_in_bytes -= cur_size_in_bytes;
5025 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
5026 radeon_ring_write(ring, dst_offset & 0xffffffff);
5027 radeon_ring_write(ring, src_offset & 0xffffffff);
5028 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
5029 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
5030 src_offset += cur_size_in_bytes;
5031 dst_offset += cur_size_in_bytes;
5032 }
5033
5034 r = radeon_fence_emit(rdev, fence, ring->idx);
5035 if (r) {
5036 radeon_ring_unlock_undo(rdev, ring);
5037 return r;
5038 }
5039
5040 radeon_ring_unlock_commit(rdev, ring);
5041 radeon_semaphore_free(rdev, &sem, *fence);
5042
5043 return r;
5044 }
5045
5046 /*
5047 * startup/shutdown callbacks
5048 */
5049 static int si_startup(struct radeon_device *rdev)
5050 {
5051 struct radeon_ring *ring;
5052 int r;
5053
5054 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5055 !rdev->rlc_fw || !rdev->mc_fw) {
5056 r = si_init_microcode(rdev);
5057 if (r) {
5058 DRM_ERROR("Failed to load firmware!\n");
5059 return r;
5060 }
5061 }
5062
5063 r = si_mc_load_microcode(rdev);
5064 if (r) {
5065 DRM_ERROR("Failed to load MC firmware!\n");
5066 return r;
5067 }
5068
5069 r = r600_vram_scratch_init(rdev);
5070 if (r)
5071 return r;
5072
5073 si_mc_program(rdev);
5074 r = si_pcie_gart_enable(rdev);
5075 if (r)
5076 return r;
5077 si_gpu_init(rdev);
5078
5079 /* allocate rlc buffers */
5080 r = si_rlc_init(rdev);
5081 if (r) {
5082 DRM_ERROR("Failed to init rlc BOs!\n");
5083 return r;
5084 }
5085
5086 /* allocate wb buffer */
5087 r = radeon_wb_init(rdev);
5088 if (r)
5089 return r;
5090
5091 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
5092 if (r) {
5093 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5094 return r;
5095 }
5096
5097 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5098 if (r) {
5099 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5100 return r;
5101 }
5102
5103 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5104 if (r) {
5105 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5106 return r;
5107 }
5108
5109 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
5110 if (r) {
5111 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
5112 return r;
5113 }
5114
5115 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5116 if (r) {
5117 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
5118 return r;
5119 }
5120
5121 r = rv770_uvd_resume(rdev);
5122 if (!r) {
5123 r = radeon_fence_driver_start_ring(rdev,
5124 R600_RING_TYPE_UVD_INDEX);
5125 if (r)
5126 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
5127 }
5128 if (r)
5129 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
5130
5131 /* Enable IRQ */
5132 r = si_irq_init(rdev);
5133 if (r) {
5134 DRM_ERROR("radeon: IH init failed (%d).\n", r);
5135 radeon_irq_kms_fini(rdev);
5136 return r;
5137 }
5138 si_irq_set(rdev);
5139
5140 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5141 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
5142 CP_RB0_RPTR, CP_RB0_WPTR,
5143 0, 0xfffff, RADEON_CP_PACKET2);
5144 if (r)
5145 return r;
5146
5147 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5148 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
5149 CP_RB1_RPTR, CP_RB1_WPTR,
5150 0, 0xfffff, RADEON_CP_PACKET2);
5151 if (r)
5152 return r;
5153
5154 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5155 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
5156 CP_RB2_RPTR, CP_RB2_WPTR,
5157 0, 0xfffff, RADEON_CP_PACKET2);
5158 if (r)
5159 return r;
5160
5161 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5162 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
5163 DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
5164 DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
5165 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
5166 if (r)
5167 return r;
5168
5169 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5170 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
5171 DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
5172 DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
5173 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
5174 if (r)
5175 return r;
5176
5177 r = si_cp_load_microcode(rdev);
5178 if (r)
5179 return r;
5180 r = si_cp_resume(rdev);
5181 if (r)
5182 return r;
5183
5184 r = cayman_dma_resume(rdev);
5185 if (r)
5186 return r;
5187
5188 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
5189 if (ring->ring_size) {
5190 r = radeon_ring_init(rdev, ring, ring->ring_size,
5191 R600_WB_UVD_RPTR_OFFSET,
5192 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
5193 0, 0xfffff, RADEON_CP_PACKET2);
5194 if (!r)
5195 r = r600_uvd_init(rdev);
5196 if (r)
5197 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
5198 }
5199
5200 r = radeon_ib_pool_init(rdev);
5201 if (r) {
5202 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
5203 return r;
5204 }
5205
5206 r = radeon_vm_manager_init(rdev);
5207 if (r) {
5208 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
5209 return r;
5210 }
5211
5212 return 0;
5213 }
5214
5215 int si_resume(struct radeon_device *rdev)
5216 {
5217 int r;
5218
5219 /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
5220 * posting will perform necessary task to bring back GPU into good
5221 * shape.
5222 */
5223 /* post card */
5224 atom_asic_init(rdev->mode_info.atom_context);
5225
5226 /* init golden registers */
5227 si_init_golden_registers(rdev);
5228
5229 rdev->accel_working = true;
5230 r = si_startup(rdev);
5231 if (r) {
5232 DRM_ERROR("si startup failed on resume\n");
5233 rdev->accel_working = false;
5234 return r;
5235 }
5236
5237 return r;
5238
5239 }
5240
5241 int si_suspend(struct radeon_device *rdev)
5242 {
5243 radeon_vm_manager_fini(rdev);
5244 si_cp_enable(rdev, false);
5245 cayman_dma_stop(rdev);
5246 r600_uvd_rbc_stop(rdev);
5247 radeon_uvd_suspend(rdev);
5248 si_irq_suspend(rdev);
5249 radeon_wb_disable(rdev);
5250 si_pcie_gart_disable(rdev);
5251 return 0;
5252 }
5253
5254 /* Plan is to move initialization in that function and use
5255 * helper function so that radeon_device_init pretty much
5256 * do nothing more than calling asic specific function. This
5257 * should also allow to remove a bunch of callback function
5258 * like vram_info.
5259 */
5260 int si_init(struct radeon_device *rdev)
5261 {
5262 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5263 int r;
5264
5265 /* Read BIOS */
5266 if (!radeon_get_bios(rdev)) {
5267 if (ASIC_IS_AVIVO(rdev))
5268 return -EINVAL;
5269 }
5270 /* Must be an ATOMBIOS */
5271 if (!rdev->is_atom_bios) {
5272 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
5273 return -EINVAL;
5274 }
5275 r = radeon_atombios_init(rdev);
5276 if (r)
5277 return r;
5278
5279 /* Post card if necessary */
5280 if (!radeon_card_posted(rdev)) {
5281 if (!rdev->bios) {
5282 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
5283 return -EINVAL;
5284 }
5285 DRM_INFO("GPU not posted. posting now...\n");
5286 atom_asic_init(rdev->mode_info.atom_context);
5287 }
5288 /* init golden registers */
5289 si_init_golden_registers(rdev);
5290 /* Initialize scratch registers */
5291 si_scratch_init(rdev);
5292 /* Initialize surface registers */
5293 radeon_surface_init(rdev);
5294 /* Initialize clocks */
5295 radeon_get_clock_info(rdev->ddev);
5296
5297 /* Fence driver */
5298 r = radeon_fence_driver_init(rdev);
5299 if (r)
5300 return r;
5301
5302 /* initialize memory controller */
5303 r = si_mc_init(rdev);
5304 if (r)
5305 return r;
5306 /* Memory manager */
5307 r = radeon_bo_init(rdev);
5308 if (r)
5309 return r;
5310
5311 r = radeon_irq_kms_init(rdev);
5312 if (r)
5313 return r;
5314
5315 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
5316 ring->ring_obj = NULL;
5317 r600_ring_init(rdev, ring, 1024 * 1024);
5318
5319 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5320 ring->ring_obj = NULL;
5321 r600_ring_init(rdev, ring, 1024 * 1024);
5322
5323 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5324 ring->ring_obj = NULL;
5325 r600_ring_init(rdev, ring, 1024 * 1024);
5326
5327 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
5328 ring->ring_obj = NULL;
5329 r600_ring_init(rdev, ring, 64 * 1024);
5330
5331 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
5332 ring->ring_obj = NULL;
5333 r600_ring_init(rdev, ring, 64 * 1024);
5334
5335 r = radeon_uvd_init(rdev);
5336 if (!r) {
5337 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
5338 ring->ring_obj = NULL;
5339 r600_ring_init(rdev, ring, 4096);
5340 }
5341
5342 rdev->ih.ring_obj = NULL;
5343 r600_ih_ring_init(rdev, 64 * 1024);
5344
5345 r = r600_pcie_gart_init(rdev);
5346 if (r)
5347 return r;
5348
5349 rdev->accel_working = true;
5350 r = si_startup(rdev);
5351 if (r) {
5352 dev_err(rdev->dev, "disabling GPU acceleration\n");
5353 si_cp_fini(rdev);
5354 cayman_dma_fini(rdev);
5355 si_irq_fini(rdev);
5356 si_rlc_fini(rdev);
5357 radeon_wb_fini(rdev);
5358 radeon_ib_pool_fini(rdev);
5359 radeon_vm_manager_fini(rdev);
5360 radeon_irq_kms_fini(rdev);
5361 si_pcie_gart_fini(rdev);
5362 rdev->accel_working = false;
5363 }
5364
5365 /* Don't start up if the MC ucode is missing.
5366 * The default clocks and voltages before the MC ucode
5367 * is loaded are not suffient for advanced operations.
5368 */
5369 if (!rdev->mc_fw) {
5370 DRM_ERROR("radeon: MC ucode required for NI+.\n");
5371 return -EINVAL;
5372 }
5373
5374 return 0;
5375 }
5376
5377 void si_fini(struct radeon_device *rdev)
5378 {
5379 si_cp_fini(rdev);
5380 cayman_dma_fini(rdev);
5381 si_irq_fini(rdev);
5382 si_rlc_fini(rdev);
5383 radeon_wb_fini(rdev);
5384 radeon_vm_manager_fini(rdev);
5385 radeon_ib_pool_fini(rdev);
5386 radeon_irq_kms_fini(rdev);
5387 radeon_uvd_fini(rdev);
5388 si_pcie_gart_fini(rdev);
5389 r600_vram_scratch_fini(rdev);
5390 radeon_gem_fini(rdev);
5391 radeon_fence_driver_fini(rdev);
5392 radeon_bo_fini(rdev);
5393 radeon_atombios_fini(rdev);
5394 kfree(rdev->bios);
5395 rdev->bios = NULL;
5396 }
5397
5398 /**
5399 * si_get_gpu_clock_counter - return GPU clock counter snapshot
5400 *
5401 * @rdev: radeon_device pointer
5402 *
5403 * Fetches a GPU clock counter snapshot (SI).
5404 * Returns the 64 bit clock counter snapshot.
5405 */
5406 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
5407 {
5408 uint64_t clock;
5409
5410 mutex_lock(&rdev->gpu_clock_mutex);
5411 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5412 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
5413 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5414 mutex_unlock(&rdev->gpu_clock_mutex);
5415 return clock;
5416 }
5417
5418 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
5419 {
5420 unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
5421 int r;
5422
5423 /* bypass vclk and dclk with bclk */
5424 WREG32_P(CG_UPLL_FUNC_CNTL_2,
5425 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
5426 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
5427
5428 /* put PLL in bypass mode */
5429 WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
5430
5431 if (!vclk || !dclk) {
5432 /* keep the Bypass mode, put PLL to sleep */
5433 WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
5434 return 0;
5435 }
5436
5437 r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
5438 16384, 0x03FFFFFF, 0, 128, 5,
5439 &fb_div, &vclk_div, &dclk_div);
5440 if (r)
5441 return r;
5442
5443 /* set RESET_ANTI_MUX to 0 */
5444 WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
5445
5446 /* set VCO_MODE to 1 */
5447 WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
5448
5449 /* toggle UPLL_SLEEP to 1 then back to 0 */
5450 WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
5451 WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
5452
5453 /* deassert UPLL_RESET */
5454 WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
5455
5456 mdelay(1);
5457
5458 r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
5459 if (r)
5460 return r;
5461
5462 /* assert UPLL_RESET again */
5463 WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
5464
5465 /* disable spread spectrum. */
5466 WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
5467
5468 /* set feedback divider */
5469 WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
5470
5471 /* set ref divider to 0 */
5472 WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
5473
5474 if (fb_div < 307200)
5475 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
5476 else
5477 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
5478
5479 /* set PDIV_A and PDIV_B */
5480 WREG32_P(CG_UPLL_FUNC_CNTL_2,
5481 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
5482 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
5483
5484 /* give the PLL some time to settle */
5485 mdelay(15);
5486
5487 /* deassert PLL_RESET */
5488 WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
5489
5490 mdelay(15);
5491
5492 /* switch from bypass mode to normal mode */
5493 WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
5494
5495 r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
5496 if (r)
5497 return r;
5498
5499 /* switch VCLK and DCLK selection */
5500 WREG32_P(CG_UPLL_FUNC_CNTL_2,
5501 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
5502 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
5503
5504 mdelay(100);
5505
5506 return 0;
5507 }