2 * Copyright 2010 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Alex Deucher
24 #include <linux/firmware.h>
25 #include <linux/platform_device.h>
26 #include <linux/slab.h>
27 #include <linux/module.h>
30 #include "radeon_asic.h"
31 #include <drm/radeon_drm.h>
35 #include "cayman_blit_shaders.h"
37 extern bool evergreen_is_display_hung(struct radeon_device
*rdev
);
38 extern void evergreen_print_gpu_status_regs(struct radeon_device
*rdev
);
39 extern void evergreen_mc_stop(struct radeon_device
*rdev
, struct evergreen_mc_save
*save
);
40 extern void evergreen_mc_resume(struct radeon_device
*rdev
, struct evergreen_mc_save
*save
);
41 extern int evergreen_mc_wait_for_idle(struct radeon_device
*rdev
);
42 extern void evergreen_mc_program(struct radeon_device
*rdev
);
43 extern void evergreen_irq_suspend(struct radeon_device
*rdev
);
44 extern int evergreen_mc_init(struct radeon_device
*rdev
);
45 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device
*rdev
);
46 extern void evergreen_pcie_gen2_enable(struct radeon_device
*rdev
);
47 extern void si_rlc_fini(struct radeon_device
*rdev
);
48 extern int si_rlc_init(struct radeon_device
*rdev
);
50 #define EVERGREEN_PFP_UCODE_SIZE 1120
51 #define EVERGREEN_PM4_UCODE_SIZE 1376
52 #define EVERGREEN_RLC_UCODE_SIZE 768
53 #define BTC_MC_UCODE_SIZE 6024
55 #define CAYMAN_PFP_UCODE_SIZE 2176
56 #define CAYMAN_PM4_UCODE_SIZE 2176
57 #define CAYMAN_RLC_UCODE_SIZE 1024
58 #define CAYMAN_MC_UCODE_SIZE 6037
60 #define ARUBA_RLC_UCODE_SIZE 1536
63 MODULE_FIRMWARE("radeon/BARTS_pfp.bin");
64 MODULE_FIRMWARE("radeon/BARTS_me.bin");
65 MODULE_FIRMWARE("radeon/BARTS_mc.bin");
66 MODULE_FIRMWARE("radeon/BTC_rlc.bin");
67 MODULE_FIRMWARE("radeon/TURKS_pfp.bin");
68 MODULE_FIRMWARE("radeon/TURKS_me.bin");
69 MODULE_FIRMWARE("radeon/TURKS_mc.bin");
70 MODULE_FIRMWARE("radeon/CAICOS_pfp.bin");
71 MODULE_FIRMWARE("radeon/CAICOS_me.bin");
72 MODULE_FIRMWARE("radeon/CAICOS_mc.bin");
73 MODULE_FIRMWARE("radeon/CAYMAN_pfp.bin");
74 MODULE_FIRMWARE("radeon/CAYMAN_me.bin");
75 MODULE_FIRMWARE("radeon/CAYMAN_mc.bin");
76 MODULE_FIRMWARE("radeon/CAYMAN_rlc.bin");
77 MODULE_FIRMWARE("radeon/ARUBA_pfp.bin");
78 MODULE_FIRMWARE("radeon/ARUBA_me.bin");
79 MODULE_FIRMWARE("radeon/ARUBA_rlc.bin");
81 #define BTC_IO_MC_REGS_SIZE 29
83 static const u32 barts_io_mc_regs
[BTC_IO_MC_REGS_SIZE
][2] = {
84 {0x00000077, 0xff010100},
85 {0x00000078, 0x00000000},
86 {0x00000079, 0x00001434},
87 {0x0000007a, 0xcc08ec08},
88 {0x0000007b, 0x00040000},
89 {0x0000007c, 0x000080c0},
90 {0x0000007d, 0x09000000},
91 {0x0000007e, 0x00210404},
92 {0x00000081, 0x08a8e800},
93 {0x00000082, 0x00030444},
94 {0x00000083, 0x00000000},
95 {0x00000085, 0x00000001},
96 {0x00000086, 0x00000002},
97 {0x00000087, 0x48490000},
98 {0x00000088, 0x20244647},
99 {0x00000089, 0x00000005},
100 {0x0000008b, 0x66030000},
101 {0x0000008c, 0x00006603},
102 {0x0000008d, 0x00000100},
103 {0x0000008f, 0x00001c0a},
104 {0x00000090, 0xff000001},
105 {0x00000094, 0x00101101},
106 {0x00000095, 0x00000fff},
107 {0x00000096, 0x00116fff},
108 {0x00000097, 0x60010000},
109 {0x00000098, 0x10010000},
110 {0x00000099, 0x00006000},
111 {0x0000009a, 0x00001000},
112 {0x0000009f, 0x00946a00}
115 static const u32 turks_io_mc_regs
[BTC_IO_MC_REGS_SIZE
][2] = {
116 {0x00000077, 0xff010100},
117 {0x00000078, 0x00000000},
118 {0x00000079, 0x00001434},
119 {0x0000007a, 0xcc08ec08},
120 {0x0000007b, 0x00040000},
121 {0x0000007c, 0x000080c0},
122 {0x0000007d, 0x09000000},
123 {0x0000007e, 0x00210404},
124 {0x00000081, 0x08a8e800},
125 {0x00000082, 0x00030444},
126 {0x00000083, 0x00000000},
127 {0x00000085, 0x00000001},
128 {0x00000086, 0x00000002},
129 {0x00000087, 0x48490000},
130 {0x00000088, 0x20244647},
131 {0x00000089, 0x00000005},
132 {0x0000008b, 0x66030000},
133 {0x0000008c, 0x00006603},
134 {0x0000008d, 0x00000100},
135 {0x0000008f, 0x00001c0a},
136 {0x00000090, 0xff000001},
137 {0x00000094, 0x00101101},
138 {0x00000095, 0x00000fff},
139 {0x00000096, 0x00116fff},
140 {0x00000097, 0x60010000},
141 {0x00000098, 0x10010000},
142 {0x00000099, 0x00006000},
143 {0x0000009a, 0x00001000},
144 {0x0000009f, 0x00936a00}
147 static const u32 caicos_io_mc_regs
[BTC_IO_MC_REGS_SIZE
][2] = {
148 {0x00000077, 0xff010100},
149 {0x00000078, 0x00000000},
150 {0x00000079, 0x00001434},
151 {0x0000007a, 0xcc08ec08},
152 {0x0000007b, 0x00040000},
153 {0x0000007c, 0x000080c0},
154 {0x0000007d, 0x09000000},
155 {0x0000007e, 0x00210404},
156 {0x00000081, 0x08a8e800},
157 {0x00000082, 0x00030444},
158 {0x00000083, 0x00000000},
159 {0x00000085, 0x00000001},
160 {0x00000086, 0x00000002},
161 {0x00000087, 0x48490000},
162 {0x00000088, 0x20244647},
163 {0x00000089, 0x00000005},
164 {0x0000008b, 0x66030000},
165 {0x0000008c, 0x00006603},
166 {0x0000008d, 0x00000100},
167 {0x0000008f, 0x00001c0a},
168 {0x00000090, 0xff000001},
169 {0x00000094, 0x00101101},
170 {0x00000095, 0x00000fff},
171 {0x00000096, 0x00116fff},
172 {0x00000097, 0x60010000},
173 {0x00000098, 0x10010000},
174 {0x00000099, 0x00006000},
175 {0x0000009a, 0x00001000},
176 {0x0000009f, 0x00916a00}
179 static const u32 cayman_io_mc_regs
[BTC_IO_MC_REGS_SIZE
][2] = {
180 {0x00000077, 0xff010100},
181 {0x00000078, 0x00000000},
182 {0x00000079, 0x00001434},
183 {0x0000007a, 0xcc08ec08},
184 {0x0000007b, 0x00040000},
185 {0x0000007c, 0x000080c0},
186 {0x0000007d, 0x09000000},
187 {0x0000007e, 0x00210404},
188 {0x00000081, 0x08a8e800},
189 {0x00000082, 0x00030444},
190 {0x00000083, 0x00000000},
191 {0x00000085, 0x00000001},
192 {0x00000086, 0x00000002},
193 {0x00000087, 0x48490000},
194 {0x00000088, 0x20244647},
195 {0x00000089, 0x00000005},
196 {0x0000008b, 0x66030000},
197 {0x0000008c, 0x00006603},
198 {0x0000008d, 0x00000100},
199 {0x0000008f, 0x00001c0a},
200 {0x00000090, 0xff000001},
201 {0x00000094, 0x00101101},
202 {0x00000095, 0x00000fff},
203 {0x00000096, 0x00116fff},
204 {0x00000097, 0x60010000},
205 {0x00000098, 0x10010000},
206 {0x00000099, 0x00006000},
207 {0x0000009a, 0x00001000},
208 {0x0000009f, 0x00976b00}
211 int ni_mc_load_microcode(struct radeon_device
*rdev
)
213 const __be32
*fw_data
;
214 u32 mem_type
, running
, blackout
= 0;
216 int i
, ucode_size
, regs_size
;
221 switch (rdev
->family
) {
223 io_mc_regs
= (u32
*)&barts_io_mc_regs
;
224 ucode_size
= BTC_MC_UCODE_SIZE
;
225 regs_size
= BTC_IO_MC_REGS_SIZE
;
228 io_mc_regs
= (u32
*)&turks_io_mc_regs
;
229 ucode_size
= BTC_MC_UCODE_SIZE
;
230 regs_size
= BTC_IO_MC_REGS_SIZE
;
234 io_mc_regs
= (u32
*)&caicos_io_mc_regs
;
235 ucode_size
= BTC_MC_UCODE_SIZE
;
236 regs_size
= BTC_IO_MC_REGS_SIZE
;
239 io_mc_regs
= (u32
*)&cayman_io_mc_regs
;
240 ucode_size
= CAYMAN_MC_UCODE_SIZE
;
241 regs_size
= BTC_IO_MC_REGS_SIZE
;
245 mem_type
= (RREG32(MC_SEQ_MISC0
) & MC_SEQ_MISC0_GDDR5_MASK
) >> MC_SEQ_MISC0_GDDR5_SHIFT
;
246 running
= RREG32(MC_SEQ_SUP_CNTL
) & RUN_MASK
;
248 if ((mem_type
== MC_SEQ_MISC0_GDDR5_VALUE
) && (running
== 0)) {
250 blackout
= RREG32(MC_SHARED_BLACKOUT_CNTL
);
251 WREG32(MC_SHARED_BLACKOUT_CNTL
, 1);
254 /* reset the engine and set to writable */
255 WREG32(MC_SEQ_SUP_CNTL
, 0x00000008);
256 WREG32(MC_SEQ_SUP_CNTL
, 0x00000010);
258 /* load mc io regs */
259 for (i
= 0; i
< regs_size
; i
++) {
260 WREG32(MC_SEQ_IO_DEBUG_INDEX
, io_mc_regs
[(i
<< 1)]);
261 WREG32(MC_SEQ_IO_DEBUG_DATA
, io_mc_regs
[(i
<< 1) + 1]);
263 /* load the MC ucode */
264 fw_data
= (const __be32
*)rdev
->mc_fw
->data
;
265 for (i
= 0; i
< ucode_size
; i
++)
266 WREG32(MC_SEQ_SUP_PGM
, be32_to_cpup(fw_data
++));
268 /* put the engine back into the active state */
269 WREG32(MC_SEQ_SUP_CNTL
, 0x00000008);
270 WREG32(MC_SEQ_SUP_CNTL
, 0x00000004);
271 WREG32(MC_SEQ_SUP_CNTL
, 0x00000001);
273 /* wait for training to complete */
274 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
275 if (RREG32(MC_IO_PAD_CNTL_D0
) & MEM_FALL_OUT_CMD
)
281 WREG32(MC_SHARED_BLACKOUT_CNTL
, blackout
);
287 int ni_init_microcode(struct radeon_device
*rdev
)
289 struct platform_device
*pdev
;
290 const char *chip_name
;
291 const char *rlc_chip_name
;
292 size_t pfp_req_size
, me_req_size
, rlc_req_size
, mc_req_size
;
298 pdev
= platform_device_register_simple("radeon_cp", 0, NULL
, 0);
301 printk(KERN_ERR
"radeon_cp: Failed to register firmware\n");
305 switch (rdev
->family
) {
308 rlc_chip_name
= "BTC";
309 pfp_req_size
= EVERGREEN_PFP_UCODE_SIZE
* 4;
310 me_req_size
= EVERGREEN_PM4_UCODE_SIZE
* 4;
311 rlc_req_size
= EVERGREEN_RLC_UCODE_SIZE
* 4;
312 mc_req_size
= BTC_MC_UCODE_SIZE
* 4;
316 rlc_chip_name
= "BTC";
317 pfp_req_size
= EVERGREEN_PFP_UCODE_SIZE
* 4;
318 me_req_size
= EVERGREEN_PM4_UCODE_SIZE
* 4;
319 rlc_req_size
= EVERGREEN_RLC_UCODE_SIZE
* 4;
320 mc_req_size
= BTC_MC_UCODE_SIZE
* 4;
323 chip_name
= "CAICOS";
324 rlc_chip_name
= "BTC";
325 pfp_req_size
= EVERGREEN_PFP_UCODE_SIZE
* 4;
326 me_req_size
= EVERGREEN_PM4_UCODE_SIZE
* 4;
327 rlc_req_size
= EVERGREEN_RLC_UCODE_SIZE
* 4;
328 mc_req_size
= BTC_MC_UCODE_SIZE
* 4;
331 chip_name
= "CAYMAN";
332 rlc_chip_name
= "CAYMAN";
333 pfp_req_size
= CAYMAN_PFP_UCODE_SIZE
* 4;
334 me_req_size
= CAYMAN_PM4_UCODE_SIZE
* 4;
335 rlc_req_size
= CAYMAN_RLC_UCODE_SIZE
* 4;
336 mc_req_size
= CAYMAN_MC_UCODE_SIZE
* 4;
340 rlc_chip_name
= "ARUBA";
341 /* pfp/me same size as CAYMAN */
342 pfp_req_size
= CAYMAN_PFP_UCODE_SIZE
* 4;
343 me_req_size
= CAYMAN_PM4_UCODE_SIZE
* 4;
344 rlc_req_size
= ARUBA_RLC_UCODE_SIZE
* 4;
350 DRM_INFO("Loading %s Microcode\n", chip_name
);
352 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_pfp.bin", chip_name
);
353 err
= request_firmware(&rdev
->pfp_fw
, fw_name
, &pdev
->dev
);
356 if (rdev
->pfp_fw
->size
!= pfp_req_size
) {
358 "ni_cp: Bogus length %zu in firmware \"%s\"\n",
359 rdev
->pfp_fw
->size
, fw_name
);
364 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_me.bin", chip_name
);
365 err
= request_firmware(&rdev
->me_fw
, fw_name
, &pdev
->dev
);
368 if (rdev
->me_fw
->size
!= me_req_size
) {
370 "ni_cp: Bogus length %zu in firmware \"%s\"\n",
371 rdev
->me_fw
->size
, fw_name
);
375 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_rlc.bin", rlc_chip_name
);
376 err
= request_firmware(&rdev
->rlc_fw
, fw_name
, &pdev
->dev
);
379 if (rdev
->rlc_fw
->size
!= rlc_req_size
) {
381 "ni_rlc: Bogus length %zu in firmware \"%s\"\n",
382 rdev
->rlc_fw
->size
, fw_name
);
386 /* no MC ucode on TN */
387 if (!(rdev
->flags
& RADEON_IS_IGP
)) {
388 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_mc.bin", chip_name
);
389 err
= request_firmware(&rdev
->mc_fw
, fw_name
, &pdev
->dev
);
392 if (rdev
->mc_fw
->size
!= mc_req_size
) {
394 "ni_mc: Bogus length %zu in firmware \"%s\"\n",
395 rdev
->mc_fw
->size
, fw_name
);
400 platform_device_unregister(pdev
);
405 "ni_cp: Failed to load firmware \"%s\"\n",
407 release_firmware(rdev
->pfp_fw
);
409 release_firmware(rdev
->me_fw
);
411 release_firmware(rdev
->rlc_fw
);
413 release_firmware(rdev
->mc_fw
);
422 static void cayman_gpu_init(struct radeon_device
*rdev
)
424 u32 gb_addr_config
= 0;
425 u32 mc_shared_chmap
, mc_arb_ramcfg
;
426 u32 cgts_tcc_disable
;
429 u32 cgts_sm_ctrl_reg
;
430 u32 hdp_host_path_cntl
;
432 u32 disabled_rb_mask
;
435 switch (rdev
->family
) {
437 rdev
->config
.cayman
.max_shader_engines
= 2;
438 rdev
->config
.cayman
.max_pipes_per_simd
= 4;
439 rdev
->config
.cayman
.max_tile_pipes
= 8;
440 rdev
->config
.cayman
.max_simds_per_se
= 12;
441 rdev
->config
.cayman
.max_backends_per_se
= 4;
442 rdev
->config
.cayman
.max_texture_channel_caches
= 8;
443 rdev
->config
.cayman
.max_gprs
= 256;
444 rdev
->config
.cayman
.max_threads
= 256;
445 rdev
->config
.cayman
.max_gs_threads
= 32;
446 rdev
->config
.cayman
.max_stack_entries
= 512;
447 rdev
->config
.cayman
.sx_num_of_sets
= 8;
448 rdev
->config
.cayman
.sx_max_export_size
= 256;
449 rdev
->config
.cayman
.sx_max_export_pos_size
= 64;
450 rdev
->config
.cayman
.sx_max_export_smx_size
= 192;
451 rdev
->config
.cayman
.max_hw_contexts
= 8;
452 rdev
->config
.cayman
.sq_num_cf_insts
= 2;
454 rdev
->config
.cayman
.sc_prim_fifo_size
= 0x100;
455 rdev
->config
.cayman
.sc_hiz_tile_fifo_size
= 0x30;
456 rdev
->config
.cayman
.sc_earlyz_tile_fifo_size
= 0x130;
457 gb_addr_config
= CAYMAN_GB_ADDR_CONFIG_GOLDEN
;
461 rdev
->config
.cayman
.max_shader_engines
= 1;
462 rdev
->config
.cayman
.max_pipes_per_simd
= 4;
463 rdev
->config
.cayman
.max_tile_pipes
= 2;
464 if ((rdev
->pdev
->device
== 0x9900) ||
465 (rdev
->pdev
->device
== 0x9901) ||
466 (rdev
->pdev
->device
== 0x9905) ||
467 (rdev
->pdev
->device
== 0x9906) ||
468 (rdev
->pdev
->device
== 0x9907) ||
469 (rdev
->pdev
->device
== 0x9908) ||
470 (rdev
->pdev
->device
== 0x9909) ||
471 (rdev
->pdev
->device
== 0x990B) ||
472 (rdev
->pdev
->device
== 0x990C) ||
473 (rdev
->pdev
->device
== 0x990F) ||
474 (rdev
->pdev
->device
== 0x9910) ||
475 (rdev
->pdev
->device
== 0x9917) ||
476 (rdev
->pdev
->device
== 0x9999)) {
477 rdev
->config
.cayman
.max_simds_per_se
= 6;
478 rdev
->config
.cayman
.max_backends_per_se
= 2;
479 } else if ((rdev
->pdev
->device
== 0x9903) ||
480 (rdev
->pdev
->device
== 0x9904) ||
481 (rdev
->pdev
->device
== 0x990A) ||
482 (rdev
->pdev
->device
== 0x990D) ||
483 (rdev
->pdev
->device
== 0x990E) ||
484 (rdev
->pdev
->device
== 0x9913) ||
485 (rdev
->pdev
->device
== 0x9918)) {
486 rdev
->config
.cayman
.max_simds_per_se
= 4;
487 rdev
->config
.cayman
.max_backends_per_se
= 2;
488 } else if ((rdev
->pdev
->device
== 0x9919) ||
489 (rdev
->pdev
->device
== 0x9990) ||
490 (rdev
->pdev
->device
== 0x9991) ||
491 (rdev
->pdev
->device
== 0x9994) ||
492 (rdev
->pdev
->device
== 0x9995) ||
493 (rdev
->pdev
->device
== 0x9996) ||
494 (rdev
->pdev
->device
== 0x999A) ||
495 (rdev
->pdev
->device
== 0x99A0)) {
496 rdev
->config
.cayman
.max_simds_per_se
= 3;
497 rdev
->config
.cayman
.max_backends_per_se
= 1;
499 rdev
->config
.cayman
.max_simds_per_se
= 2;
500 rdev
->config
.cayman
.max_backends_per_se
= 1;
502 rdev
->config
.cayman
.max_texture_channel_caches
= 2;
503 rdev
->config
.cayman
.max_gprs
= 256;
504 rdev
->config
.cayman
.max_threads
= 256;
505 rdev
->config
.cayman
.max_gs_threads
= 32;
506 rdev
->config
.cayman
.max_stack_entries
= 512;
507 rdev
->config
.cayman
.sx_num_of_sets
= 8;
508 rdev
->config
.cayman
.sx_max_export_size
= 256;
509 rdev
->config
.cayman
.sx_max_export_pos_size
= 64;
510 rdev
->config
.cayman
.sx_max_export_smx_size
= 192;
511 rdev
->config
.cayman
.max_hw_contexts
= 8;
512 rdev
->config
.cayman
.sq_num_cf_insts
= 2;
514 rdev
->config
.cayman
.sc_prim_fifo_size
= 0x40;
515 rdev
->config
.cayman
.sc_hiz_tile_fifo_size
= 0x30;
516 rdev
->config
.cayman
.sc_earlyz_tile_fifo_size
= 0x130;
517 gb_addr_config
= ARUBA_GB_ADDR_CONFIG_GOLDEN
;
522 for (i
= 0, j
= 0; i
< 32; i
++, j
+= 0x18) {
523 WREG32((0x2c14 + j
), 0x00000000);
524 WREG32((0x2c18 + j
), 0x00000000);
525 WREG32((0x2c1c + j
), 0x00000000);
526 WREG32((0x2c20 + j
), 0x00000000);
527 WREG32((0x2c24 + j
), 0x00000000);
530 WREG32(GRBM_CNTL
, GRBM_READ_TIMEOUT(0xff));
532 evergreen_fix_pci_max_read_req_size(rdev
);
534 mc_shared_chmap
= RREG32(MC_SHARED_CHMAP
);
535 mc_arb_ramcfg
= RREG32(MC_ARB_RAMCFG
);
537 tmp
= (mc_arb_ramcfg
& NOOFCOLS_MASK
) >> NOOFCOLS_SHIFT
;
538 rdev
->config
.cayman
.mem_row_size_in_kb
= (4 * (1 << (8 + tmp
))) / 1024;
539 if (rdev
->config
.cayman
.mem_row_size_in_kb
> 4)
540 rdev
->config
.cayman
.mem_row_size_in_kb
= 4;
541 /* XXX use MC settings? */
542 rdev
->config
.cayman
.shader_engine_tile_size
= 32;
543 rdev
->config
.cayman
.num_gpus
= 1;
544 rdev
->config
.cayman
.multi_gpu_tile_size
= 64;
546 tmp
= (gb_addr_config
& NUM_PIPES_MASK
) >> NUM_PIPES_SHIFT
;
547 rdev
->config
.cayman
.num_tile_pipes
= (1 << tmp
);
548 tmp
= (gb_addr_config
& PIPE_INTERLEAVE_SIZE_MASK
) >> PIPE_INTERLEAVE_SIZE_SHIFT
;
549 rdev
->config
.cayman
.mem_max_burst_length_bytes
= (tmp
+ 1) * 256;
550 tmp
= (gb_addr_config
& NUM_SHADER_ENGINES_MASK
) >> NUM_SHADER_ENGINES_SHIFT
;
551 rdev
->config
.cayman
.num_shader_engines
= tmp
+ 1;
552 tmp
= (gb_addr_config
& NUM_GPUS_MASK
) >> NUM_GPUS_SHIFT
;
553 rdev
->config
.cayman
.num_gpus
= tmp
+ 1;
554 tmp
= (gb_addr_config
& MULTI_GPU_TILE_SIZE_MASK
) >> MULTI_GPU_TILE_SIZE_SHIFT
;
555 rdev
->config
.cayman
.multi_gpu_tile_size
= 1 << tmp
;
556 tmp
= (gb_addr_config
& ROW_SIZE_MASK
) >> ROW_SIZE_SHIFT
;
557 rdev
->config
.cayman
.mem_row_size_in_kb
= 1 << tmp
;
560 /* setup tiling info dword. gb_addr_config is not adequate since it does
561 * not have bank info, so create a custom tiling dword.
564 * bits 11:8 group_size
565 * bits 15:12 row_size
567 rdev
->config
.cayman
.tile_config
= 0;
568 switch (rdev
->config
.cayman
.num_tile_pipes
) {
571 rdev
->config
.cayman
.tile_config
|= (0 << 0);
574 rdev
->config
.cayman
.tile_config
|= (1 << 0);
577 rdev
->config
.cayman
.tile_config
|= (2 << 0);
580 rdev
->config
.cayman
.tile_config
|= (3 << 0);
584 /* num banks is 8 on all fusion asics. 0 = 4, 1 = 8, 2 = 16 */
585 if (rdev
->flags
& RADEON_IS_IGP
)
586 rdev
->config
.cayman
.tile_config
|= 1 << 4;
588 switch ((mc_arb_ramcfg
& NOOFBANK_MASK
) >> NOOFBANK_SHIFT
) {
589 case 0: /* four banks */
590 rdev
->config
.cayman
.tile_config
|= 0 << 4;
592 case 1: /* eight banks */
593 rdev
->config
.cayman
.tile_config
|= 1 << 4;
595 case 2: /* sixteen banks */
597 rdev
->config
.cayman
.tile_config
|= 2 << 4;
601 rdev
->config
.cayman
.tile_config
|=
602 ((gb_addr_config
& PIPE_INTERLEAVE_SIZE_MASK
) >> PIPE_INTERLEAVE_SIZE_SHIFT
) << 8;
603 rdev
->config
.cayman
.tile_config
|=
604 ((gb_addr_config
& ROW_SIZE_MASK
) >> ROW_SIZE_SHIFT
) << 12;
607 for (i
= (rdev
->config
.cayman
.max_shader_engines
- 1); i
>= 0; i
--) {
608 u32 rb_disable_bitmap
;
610 WREG32(GRBM_GFX_INDEX
, INSTANCE_BROADCAST_WRITES
| SE_INDEX(i
));
611 WREG32(RLC_GFX_INDEX
, INSTANCE_BROADCAST_WRITES
| SE_INDEX(i
));
612 rb_disable_bitmap
= (RREG32(CC_RB_BACKEND_DISABLE
) & 0x00ff0000) >> 16;
614 tmp
|= rb_disable_bitmap
;
616 /* enabled rb are just the one not disabled :) */
617 disabled_rb_mask
= tmp
;
619 WREG32(GRBM_GFX_INDEX
, INSTANCE_BROADCAST_WRITES
| SE_BROADCAST_WRITES
);
620 WREG32(RLC_GFX_INDEX
, INSTANCE_BROADCAST_WRITES
| SE_BROADCAST_WRITES
);
622 WREG32(GB_ADDR_CONFIG
, gb_addr_config
);
623 WREG32(DMIF_ADDR_CONFIG
, gb_addr_config
);
624 WREG32(HDP_ADDR_CONFIG
, gb_addr_config
);
625 WREG32(DMA_TILING_CONFIG
+ DMA0_REGISTER_OFFSET
, gb_addr_config
);
626 WREG32(DMA_TILING_CONFIG
+ DMA1_REGISTER_OFFSET
, gb_addr_config
);
628 if ((rdev
->config
.cayman
.max_backends_per_se
== 1) &&
629 (rdev
->flags
& RADEON_IS_IGP
)) {
630 if ((disabled_rb_mask
& 3) == 1) {
631 /* RB0 disabled, RB1 enabled */
634 /* RB1 disabled, RB0 enabled */
638 tmp
= gb_addr_config
& NUM_PIPES_MASK
;
639 tmp
= r6xx_remap_render_backend(rdev
, tmp
,
640 rdev
->config
.cayman
.max_backends_per_se
*
641 rdev
->config
.cayman
.max_shader_engines
,
642 CAYMAN_MAX_BACKENDS
, disabled_rb_mask
);
644 WREG32(GB_BACKEND_MAP
, tmp
);
646 cgts_tcc_disable
= 0xffff0000;
647 for (i
= 0; i
< rdev
->config
.cayman
.max_texture_channel_caches
; i
++)
648 cgts_tcc_disable
&= ~(1 << (16 + i
));
649 WREG32(CGTS_TCC_DISABLE
, cgts_tcc_disable
);
650 WREG32(CGTS_SYS_TCC_DISABLE
, cgts_tcc_disable
);
651 WREG32(CGTS_USER_SYS_TCC_DISABLE
, cgts_tcc_disable
);
652 WREG32(CGTS_USER_TCC_DISABLE
, cgts_tcc_disable
);
654 /* reprogram the shader complex */
655 cgts_sm_ctrl_reg
= RREG32(CGTS_SM_CTRL_REG
);
656 for (i
= 0; i
< 16; i
++)
657 WREG32(CGTS_SM_CTRL_REG
, OVERRIDE
);
658 WREG32(CGTS_SM_CTRL_REG
, cgts_sm_ctrl_reg
);
660 /* set HW defaults for 3D engine */
661 WREG32(CP_MEQ_THRESHOLDS
, MEQ1_START(0x30) | MEQ2_START(0x60));
663 sx_debug_1
= RREG32(SX_DEBUG_1
);
664 sx_debug_1
|= ENABLE_NEW_SMX_ADDRESS
;
665 WREG32(SX_DEBUG_1
, sx_debug_1
);
667 smx_dc_ctl0
= RREG32(SMX_DC_CTL0
);
668 smx_dc_ctl0
&= ~NUMBER_OF_SETS(0x1ff);
669 smx_dc_ctl0
|= NUMBER_OF_SETS(rdev
->config
.cayman
.sx_num_of_sets
);
670 WREG32(SMX_DC_CTL0
, smx_dc_ctl0
);
672 WREG32(SPI_CONFIG_CNTL_1
, VTX_DONE_DELAY(4) | CRC_SIMD_ID_WADDR_DISABLE
);
674 /* need to be explicitly zero-ed */
675 WREG32(VGT_OFFCHIP_LDS_BASE
, 0);
676 WREG32(SQ_LSTMP_RING_BASE
, 0);
677 WREG32(SQ_HSTMP_RING_BASE
, 0);
678 WREG32(SQ_ESTMP_RING_BASE
, 0);
679 WREG32(SQ_GSTMP_RING_BASE
, 0);
680 WREG32(SQ_VSTMP_RING_BASE
, 0);
681 WREG32(SQ_PSTMP_RING_BASE
, 0);
683 WREG32(TA_CNTL_AUX
, DISABLE_CUBE_ANISO
);
685 WREG32(SX_EXPORT_BUFFER_SIZES
, (COLOR_BUFFER_SIZE((rdev
->config
.cayman
.sx_max_export_size
/ 4) - 1) |
686 POSITION_BUFFER_SIZE((rdev
->config
.cayman
.sx_max_export_pos_size
/ 4) - 1) |
687 SMX_BUFFER_SIZE((rdev
->config
.cayman
.sx_max_export_smx_size
/ 4) - 1)));
689 WREG32(PA_SC_FIFO_SIZE
, (SC_PRIM_FIFO_SIZE(rdev
->config
.cayman
.sc_prim_fifo_size
) |
690 SC_HIZ_TILE_FIFO_SIZE(rdev
->config
.cayman
.sc_hiz_tile_fifo_size
) |
691 SC_EARLYZ_TILE_FIFO_SIZE(rdev
->config
.cayman
.sc_earlyz_tile_fifo_size
)));
694 WREG32(VGT_NUM_INSTANCES
, 1);
696 WREG32(CP_PERFMON_CNTL
, 0);
698 WREG32(SQ_MS_FIFO_SIZES
, (CACHE_FIFO_SIZE(16 * rdev
->config
.cayman
.sq_num_cf_insts
) |
699 FETCH_FIFO_HIWATER(0x4) |
700 DONE_FIFO_HIWATER(0xe0) |
701 ALU_UPDATE_FIFO_HIWATER(0x8)));
703 WREG32(SQ_GPR_RESOURCE_MGMT_1
, NUM_CLAUSE_TEMP_GPRS(4));
704 WREG32(SQ_CONFIG
, (VC_ENABLE
|
709 WREG32(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ
, DYN_GPR_ENABLE
);
711 WREG32(PA_SC_FORCE_EOV_MAX_CNTS
, (FORCE_EOV_MAX_CLK_CNT(4095) |
712 FORCE_EOV_MAX_REZ_CNT(255)));
714 WREG32(VGT_CACHE_INVALIDATION
, CACHE_INVALIDATION(VC_AND_TC
) |
715 AUTO_INVLD_EN(ES_AND_GS_AUTO
));
717 WREG32(VGT_GS_VERTEX_REUSE
, 16);
718 WREG32(PA_SC_LINE_STIPPLE_STATE
, 0);
720 WREG32(CB_PERF_CTR0_SEL_0
, 0);
721 WREG32(CB_PERF_CTR0_SEL_1
, 0);
722 WREG32(CB_PERF_CTR1_SEL_0
, 0);
723 WREG32(CB_PERF_CTR1_SEL_1
, 0);
724 WREG32(CB_PERF_CTR2_SEL_0
, 0);
725 WREG32(CB_PERF_CTR2_SEL_1
, 0);
726 WREG32(CB_PERF_CTR3_SEL_0
, 0);
727 WREG32(CB_PERF_CTR3_SEL_1
, 0);
729 tmp
= RREG32(HDP_MISC_CNTL
);
730 tmp
|= HDP_FLUSH_INVALIDATE_CACHE
;
731 WREG32(HDP_MISC_CNTL
, tmp
);
733 hdp_host_path_cntl
= RREG32(HDP_HOST_PATH_CNTL
);
734 WREG32(HDP_HOST_PATH_CNTL
, hdp_host_path_cntl
);
736 WREG32(PA_CL_ENHANCE
, CLIP_VTX_REORDER_ENA
| NUM_CLIP_SEQ(3));
744 void cayman_pcie_gart_tlb_flush(struct radeon_device
*rdev
)
746 /* flush hdp cache */
747 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL
, 0x1);
749 /* bits 0-7 are the VM contexts0-7 */
750 WREG32(VM_INVALIDATE_REQUEST
, 1);
753 static int cayman_pcie_gart_enable(struct radeon_device
*rdev
)
757 if (rdev
->gart
.robj
== NULL
) {
758 dev_err(rdev
->dev
, "No VRAM object for PCIE GART.\n");
761 r
= radeon_gart_table_vram_pin(rdev
);
764 radeon_gart_restore(rdev
);
765 /* Setup TLB control */
766 WREG32(MC_VM_MX_L1_TLB_CNTL
,
769 ENABLE_L1_FRAGMENT_PROCESSING
|
770 SYSTEM_ACCESS_MODE_NOT_IN_SYS
|
771 ENABLE_ADVANCED_DRIVER_MODEL
|
772 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU
);
774 WREG32(VM_L2_CNTL
, ENABLE_L2_CACHE
|
775 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE
|
776 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE
|
777 EFFECTIVE_L2_QUEUE_SIZE(7) |
778 CONTEXT1_IDENTITY_ACCESS_MODE(1));
779 WREG32(VM_L2_CNTL2
, INVALIDATE_ALL_L1_TLBS
| INVALIDATE_L2_CACHE
);
780 WREG32(VM_L2_CNTL3
, L2_CACHE_BIGK_ASSOCIATIVITY
|
781 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
783 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR
, rdev
->mc
.gtt_start
>> 12);
784 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR
, rdev
->mc
.gtt_end
>> 12);
785 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR
, rdev
->gart
.table_addr
>> 12);
786 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR
,
787 (u32
)(rdev
->dummy_page
.addr
>> 12));
788 WREG32(VM_CONTEXT0_CNTL2
, 0);
789 WREG32(VM_CONTEXT0_CNTL
, ENABLE_CONTEXT
| PAGE_TABLE_DEPTH(0) |
790 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT
);
796 /* empty context1-7 */
797 /* Assign the pt base to something valid for now; the pts used for
798 * the VMs are determined by the application and setup and assigned
799 * on the fly in the vm part of radeon_gart.c
801 for (i
= 1; i
< 8; i
++) {
802 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR
+ (i
<< 2), 0);
803 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR
+ (i
<< 2), rdev
->vm_manager
.max_pfn
);
804 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR
+ (i
<< 2),
805 rdev
->gart
.table_addr
>> 12);
808 /* enable context1-7 */
809 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR
,
810 (u32
)(rdev
->dummy_page
.addr
>> 12));
811 WREG32(VM_CONTEXT1_CNTL2
, 4);
812 WREG32(VM_CONTEXT1_CNTL
, ENABLE_CONTEXT
| PAGE_TABLE_DEPTH(1) |
813 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT
|
814 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT
|
815 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT
|
816 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT
|
817 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT
|
818 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT
|
819 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT
|
820 VALID_PROTECTION_FAULT_ENABLE_DEFAULT
|
821 READ_PROTECTION_FAULT_ENABLE_INTERRUPT
|
822 READ_PROTECTION_FAULT_ENABLE_DEFAULT
|
823 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT
|
824 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT
);
826 cayman_pcie_gart_tlb_flush(rdev
);
827 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
828 (unsigned)(rdev
->mc
.gtt_size
>> 20),
829 (unsigned long long)rdev
->gart
.table_addr
);
830 rdev
->gart
.ready
= true;
834 static void cayman_pcie_gart_disable(struct radeon_device
*rdev
)
836 /* Disable all tables */
837 WREG32(VM_CONTEXT0_CNTL
, 0);
838 WREG32(VM_CONTEXT1_CNTL
, 0);
839 /* Setup TLB control */
840 WREG32(MC_VM_MX_L1_TLB_CNTL
, ENABLE_L1_FRAGMENT_PROCESSING
|
841 SYSTEM_ACCESS_MODE_NOT_IN_SYS
|
842 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU
);
844 WREG32(VM_L2_CNTL
, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE
|
845 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE
|
846 EFFECTIVE_L2_QUEUE_SIZE(7) |
847 CONTEXT1_IDENTITY_ACCESS_MODE(1));
848 WREG32(VM_L2_CNTL2
, 0);
849 WREG32(VM_L2_CNTL3
, L2_CACHE_BIGK_ASSOCIATIVITY
|
850 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
851 radeon_gart_table_vram_unpin(rdev
);
854 static void cayman_pcie_gart_fini(struct radeon_device
*rdev
)
856 cayman_pcie_gart_disable(rdev
);
857 radeon_gart_table_vram_free(rdev
);
858 radeon_gart_fini(rdev
);
861 void cayman_cp_int_cntl_setup(struct radeon_device
*rdev
,
862 int ring
, u32 cp_int_cntl
)
864 u32 srbm_gfx_cntl
= RREG32(SRBM_GFX_CNTL
) & ~3;
866 WREG32(SRBM_GFX_CNTL
, srbm_gfx_cntl
| (ring
& 3));
867 WREG32(CP_INT_CNTL
, cp_int_cntl
);
873 void cayman_fence_ring_emit(struct radeon_device
*rdev
,
874 struct radeon_fence
*fence
)
876 struct radeon_ring
*ring
= &rdev
->ring
[fence
->ring
];
877 u64 addr
= rdev
->fence_drv
[fence
->ring
].gpu_addr
;
879 /* flush read cache over gart for this vmid */
880 radeon_ring_write(ring
, PACKET3(PACKET3_SET_CONFIG_REG
, 1));
881 radeon_ring_write(ring
, (CP_COHER_CNTL2
- PACKET3_SET_CONFIG_REG_START
) >> 2);
882 radeon_ring_write(ring
, 0);
883 radeon_ring_write(ring
, PACKET3(PACKET3_SURFACE_SYNC
, 3));
884 radeon_ring_write(ring
, PACKET3_TC_ACTION_ENA
| PACKET3_SH_ACTION_ENA
);
885 radeon_ring_write(ring
, 0xFFFFFFFF);
886 radeon_ring_write(ring
, 0);
887 radeon_ring_write(ring
, 10); /* poll interval */
888 /* EVENT_WRITE_EOP - flush caches, send int */
889 radeon_ring_write(ring
, PACKET3(PACKET3_EVENT_WRITE_EOP
, 4));
890 radeon_ring_write(ring
, EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT_TS
) | EVENT_INDEX(5));
891 radeon_ring_write(ring
, addr
& 0xffffffff);
892 radeon_ring_write(ring
, (upper_32_bits(addr
) & 0xff) | DATA_SEL(1) | INT_SEL(2));
893 radeon_ring_write(ring
, fence
->seq
);
894 radeon_ring_write(ring
, 0);
897 void cayman_ring_ib_execute(struct radeon_device
*rdev
, struct radeon_ib
*ib
)
899 struct radeon_ring
*ring
= &rdev
->ring
[ib
->ring
];
901 /* set to DX10/11 mode */
902 radeon_ring_write(ring
, PACKET3(PACKET3_MODE_CONTROL
, 0));
903 radeon_ring_write(ring
, 1);
905 if (ring
->rptr_save_reg
) {
906 uint32_t next_rptr
= ring
->wptr
+ 3 + 4 + 8;
907 radeon_ring_write(ring
, PACKET3(PACKET3_SET_CONFIG_REG
, 1));
908 radeon_ring_write(ring
, ((ring
->rptr_save_reg
-
909 PACKET3_SET_CONFIG_REG_START
) >> 2));
910 radeon_ring_write(ring
, next_rptr
);
913 radeon_ring_write(ring
, PACKET3(PACKET3_INDIRECT_BUFFER
, 2));
914 radeon_ring_write(ring
,
918 (ib
->gpu_addr
& 0xFFFFFFFC));
919 radeon_ring_write(ring
, upper_32_bits(ib
->gpu_addr
) & 0xFF);
920 radeon_ring_write(ring
, ib
->length_dw
|
921 (ib
->vm
? (ib
->vm
->id
<< 24) : 0));
923 /* flush read cache over gart for this vmid */
924 radeon_ring_write(ring
, PACKET3(PACKET3_SET_CONFIG_REG
, 1));
925 radeon_ring_write(ring
, (CP_COHER_CNTL2
- PACKET3_SET_CONFIG_REG_START
) >> 2);
926 radeon_ring_write(ring
, ib
->vm
? ib
->vm
->id
: 0);
927 radeon_ring_write(ring
, PACKET3(PACKET3_SURFACE_SYNC
, 3));
928 radeon_ring_write(ring
, PACKET3_TC_ACTION_ENA
| PACKET3_SH_ACTION_ENA
);
929 radeon_ring_write(ring
, 0xFFFFFFFF);
930 radeon_ring_write(ring
, 0);
931 radeon_ring_write(ring
, 10); /* poll interval */
934 static void cayman_cp_enable(struct radeon_device
*rdev
, bool enable
)
937 WREG32(CP_ME_CNTL
, 0);
939 radeon_ttm_set_active_vram_size(rdev
, rdev
->mc
.visible_vram_size
);
940 WREG32(CP_ME_CNTL
, (CP_ME_HALT
| CP_PFP_HALT
));
941 WREG32(SCRATCH_UMSK
, 0);
942 rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
].ready
= false;
946 static int cayman_cp_load_microcode(struct radeon_device
*rdev
)
948 const __be32
*fw_data
;
951 if (!rdev
->me_fw
|| !rdev
->pfp_fw
)
954 cayman_cp_enable(rdev
, false);
956 fw_data
= (const __be32
*)rdev
->pfp_fw
->data
;
957 WREG32(CP_PFP_UCODE_ADDR
, 0);
958 for (i
= 0; i
< CAYMAN_PFP_UCODE_SIZE
; i
++)
959 WREG32(CP_PFP_UCODE_DATA
, be32_to_cpup(fw_data
++));
960 WREG32(CP_PFP_UCODE_ADDR
, 0);
962 fw_data
= (const __be32
*)rdev
->me_fw
->data
;
963 WREG32(CP_ME_RAM_WADDR
, 0);
964 for (i
= 0; i
< CAYMAN_PM4_UCODE_SIZE
; i
++)
965 WREG32(CP_ME_RAM_DATA
, be32_to_cpup(fw_data
++));
967 WREG32(CP_PFP_UCODE_ADDR
, 0);
968 WREG32(CP_ME_RAM_WADDR
, 0);
969 WREG32(CP_ME_RAM_RADDR
, 0);
973 static int cayman_cp_start(struct radeon_device
*rdev
)
975 struct radeon_ring
*ring
= &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
];
978 r
= radeon_ring_lock(rdev
, ring
, 7);
980 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r
);
983 radeon_ring_write(ring
, PACKET3(PACKET3_ME_INITIALIZE
, 5));
984 radeon_ring_write(ring
, 0x1);
985 radeon_ring_write(ring
, 0x0);
986 radeon_ring_write(ring
, rdev
->config
.cayman
.max_hw_contexts
- 1);
987 radeon_ring_write(ring
, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
988 radeon_ring_write(ring
, 0);
989 radeon_ring_write(ring
, 0);
990 radeon_ring_unlock_commit(rdev
, ring
);
992 cayman_cp_enable(rdev
, true);
994 r
= radeon_ring_lock(rdev
, ring
, cayman_default_size
+ 19);
996 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r
);
1000 /* setup clear context state */
1001 radeon_ring_write(ring
, PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
1002 radeon_ring_write(ring
, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE
);
1004 for (i
= 0; i
< cayman_default_size
; i
++)
1005 radeon_ring_write(ring
, cayman_default_state
[i
]);
1007 radeon_ring_write(ring
, PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
1008 radeon_ring_write(ring
, PACKET3_PREAMBLE_END_CLEAR_STATE
);
1010 /* set clear context state */
1011 radeon_ring_write(ring
, PACKET3(PACKET3_CLEAR_STATE
, 0));
1012 radeon_ring_write(ring
, 0);
1014 /* SQ_VTX_BASE_VTX_LOC */
1015 radeon_ring_write(ring
, 0xc0026f00);
1016 radeon_ring_write(ring
, 0x00000000);
1017 radeon_ring_write(ring
, 0x00000000);
1018 radeon_ring_write(ring
, 0x00000000);
1021 radeon_ring_write(ring
, 0xc0036f00);
1022 radeon_ring_write(ring
, 0x00000bc4);
1023 radeon_ring_write(ring
, 0xffffffff);
1024 radeon_ring_write(ring
, 0xffffffff);
1025 radeon_ring_write(ring
, 0xffffffff);
1027 radeon_ring_write(ring
, 0xc0026900);
1028 radeon_ring_write(ring
, 0x00000316);
1029 radeon_ring_write(ring
, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
1030 radeon_ring_write(ring
, 0x00000010); /* */
1032 radeon_ring_unlock_commit(rdev
, ring
);
1034 /* XXX init other rings */
1039 static void cayman_cp_fini(struct radeon_device
*rdev
)
1041 struct radeon_ring
*ring
= &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
];
1042 cayman_cp_enable(rdev
, false);
1043 radeon_ring_fini(rdev
, ring
);
1044 radeon_scratch_free(rdev
, ring
->rptr_save_reg
);
1047 static int cayman_cp_resume(struct radeon_device
*rdev
)
1049 static const int ridx
[] = {
1050 RADEON_RING_TYPE_GFX_INDEX
,
1051 CAYMAN_RING_TYPE_CP1_INDEX
,
1052 CAYMAN_RING_TYPE_CP2_INDEX
1054 static const unsigned cp_rb_cntl
[] = {
1059 static const unsigned cp_rb_rptr_addr
[] = {
1064 static const unsigned cp_rb_rptr_addr_hi
[] = {
1065 CP_RB0_RPTR_ADDR_HI
,
1066 CP_RB1_RPTR_ADDR_HI
,
1069 static const unsigned cp_rb_base
[] = {
1074 struct radeon_ring
*ring
;
1077 /* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
1078 WREG32(GRBM_SOFT_RESET
, (SOFT_RESET_CP
|
1084 RREG32(GRBM_SOFT_RESET
);
1086 WREG32(GRBM_SOFT_RESET
, 0);
1087 RREG32(GRBM_SOFT_RESET
);
1089 WREG32(CP_SEM_WAIT_TIMER
, 0x0);
1090 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL
, 0x0);
1092 /* Set the write pointer delay */
1093 WREG32(CP_RB_WPTR_DELAY
, 0);
1095 WREG32(CP_DEBUG
, (1 << 27));
1097 /* set the wb address whether it's enabled or not */
1098 WREG32(SCRATCH_ADDR
, ((rdev
->wb
.gpu_addr
+ RADEON_WB_SCRATCH_OFFSET
) >> 8) & 0xFFFFFFFF);
1099 WREG32(SCRATCH_UMSK
, 0xff);
1101 for (i
= 0; i
< 3; ++i
) {
1105 /* Set ring buffer size */
1106 ring
= &rdev
->ring
[ridx
[i
]];
1107 rb_cntl
= drm_order(ring
->ring_size
/ 8);
1108 rb_cntl
|= drm_order(RADEON_GPU_PAGE_SIZE
/8) << 8;
1110 rb_cntl
|= BUF_SWAP_32BIT
;
1112 WREG32(cp_rb_cntl
[i
], rb_cntl
);
1114 /* set the wb address whether it's enabled or not */
1115 addr
= rdev
->wb
.gpu_addr
+ RADEON_WB_CP_RPTR_OFFSET
;
1116 WREG32(cp_rb_rptr_addr
[i
], addr
& 0xFFFFFFFC);
1117 WREG32(cp_rb_rptr_addr_hi
[i
], upper_32_bits(addr
) & 0xFF);
1120 /* set the rb base addr, this causes an internal reset of ALL rings */
1121 for (i
= 0; i
< 3; ++i
) {
1122 ring
= &rdev
->ring
[ridx
[i
]];
1123 WREG32(cp_rb_base
[i
], ring
->gpu_addr
>> 8);
1126 for (i
= 0; i
< 3; ++i
) {
1127 /* Initialize the ring buffer's read and write pointers */
1128 ring
= &rdev
->ring
[ridx
[i
]];
1129 WREG32_P(cp_rb_cntl
[i
], RB_RPTR_WR_ENA
, ~RB_RPTR_WR_ENA
);
1131 ring
->rptr
= ring
->wptr
= 0;
1132 WREG32(ring
->rptr_reg
, ring
->rptr
);
1133 WREG32(ring
->wptr_reg
, ring
->wptr
);
1136 WREG32_P(cp_rb_cntl
[i
], 0, ~RB_RPTR_WR_ENA
);
1139 /* start the rings */
1140 cayman_cp_start(rdev
);
1141 rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
].ready
= true;
1142 rdev
->ring
[CAYMAN_RING_TYPE_CP1_INDEX
].ready
= false;
1143 rdev
->ring
[CAYMAN_RING_TYPE_CP2_INDEX
].ready
= false;
1144 /* this only test cp0 */
1145 r
= radeon_ring_test(rdev
, RADEON_RING_TYPE_GFX_INDEX
, &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
]);
1147 rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
].ready
= false;
1148 rdev
->ring
[CAYMAN_RING_TYPE_CP1_INDEX
].ready
= false;
1149 rdev
->ring
[CAYMAN_RING_TYPE_CP2_INDEX
].ready
= false;
1158 * Starting with R600, the GPU has an asynchronous
1159 * DMA engine. The programming model is very similar
1160 * to the 3D engine (ring buffer, IBs, etc.), but the
1161 * DMA controller has it's own packet format that is
1162 * different form the PM4 format used by the 3D engine.
1163 * It supports copying data, writing embedded data,
1164 * solid fills, and a number of other things. It also
1165 * has support for tiling/detiling of buffers.
1166 * Cayman and newer support two asynchronous DMA engines.
1169 * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine
1171 * @rdev: radeon_device pointer
1172 * @ib: IB object to schedule
1174 * Schedule an IB in the DMA ring (cayman-SI).
1176 void cayman_dma_ring_ib_execute(struct radeon_device
*rdev
,
1177 struct radeon_ib
*ib
)
1179 struct radeon_ring
*ring
= &rdev
->ring
[ib
->ring
];
1181 if (rdev
->wb
.enabled
) {
1182 u32 next_rptr
= ring
->wptr
+ 4;
1183 while ((next_rptr
& 7) != 5)
1186 radeon_ring_write(ring
, DMA_PACKET(DMA_PACKET_WRITE
, 0, 0, 1));
1187 radeon_ring_write(ring
, ring
->next_rptr_gpu_addr
& 0xfffffffc);
1188 radeon_ring_write(ring
, upper_32_bits(ring
->next_rptr_gpu_addr
) & 0xff);
1189 radeon_ring_write(ring
, next_rptr
);
1192 /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
1193 * Pad as necessary with NOPs.
1195 while ((ring
->wptr
& 7) != 5)
1196 radeon_ring_write(ring
, DMA_PACKET(DMA_PACKET_NOP
, 0, 0, 0));
1197 radeon_ring_write(ring
, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER
, ib
->vm
? ib
->vm
->id
: 0, 0));
1198 radeon_ring_write(ring
, (ib
->gpu_addr
& 0xFFFFFFE0));
1199 radeon_ring_write(ring
, (ib
->length_dw
<< 12) | (upper_32_bits(ib
->gpu_addr
) & 0xFF));
1204 * cayman_dma_stop - stop the async dma engines
1206 * @rdev: radeon_device pointer
1208 * Stop the async dma engines (cayman-SI).
1210 void cayman_dma_stop(struct radeon_device
*rdev
)
1214 radeon_ttm_set_active_vram_size(rdev
, rdev
->mc
.visible_vram_size
);
1217 rb_cntl
= RREG32(DMA_RB_CNTL
+ DMA0_REGISTER_OFFSET
);
1218 rb_cntl
&= ~DMA_RB_ENABLE
;
1219 WREG32(DMA_RB_CNTL
+ DMA0_REGISTER_OFFSET
, rb_cntl
);
1222 rb_cntl
= RREG32(DMA_RB_CNTL
+ DMA1_REGISTER_OFFSET
);
1223 rb_cntl
&= ~DMA_RB_ENABLE
;
1224 WREG32(DMA_RB_CNTL
+ DMA1_REGISTER_OFFSET
, rb_cntl
);
1226 rdev
->ring
[R600_RING_TYPE_DMA_INDEX
].ready
= false;
1227 rdev
->ring
[CAYMAN_RING_TYPE_DMA1_INDEX
].ready
= false;
1231 * cayman_dma_resume - setup and start the async dma engines
1233 * @rdev: radeon_device pointer
1235 * Set up the DMA ring buffers and enable them. (cayman-SI).
1236 * Returns 0 for success, error for failure.
1238 int cayman_dma_resume(struct radeon_device
*rdev
)
1240 struct radeon_ring
*ring
;
1241 u32 rb_cntl
, dma_cntl
, ib_cntl
;
1243 u32 reg_offset
, wb_offset
;
1247 WREG32(SRBM_SOFT_RESET
, SOFT_RESET_DMA
| SOFT_RESET_DMA1
);
1248 RREG32(SRBM_SOFT_RESET
);
1250 WREG32(SRBM_SOFT_RESET
, 0);
1252 for (i
= 0; i
< 2; i
++) {
1254 ring
= &rdev
->ring
[R600_RING_TYPE_DMA_INDEX
];
1255 reg_offset
= DMA0_REGISTER_OFFSET
;
1256 wb_offset
= R600_WB_DMA_RPTR_OFFSET
;
1258 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_DMA1_INDEX
];
1259 reg_offset
= DMA1_REGISTER_OFFSET
;
1260 wb_offset
= CAYMAN_WB_DMA1_RPTR_OFFSET
;
1263 WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL
+ reg_offset
, 0);
1264 WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL
+ reg_offset
, 0);
1266 /* Set ring buffer size in dwords */
1267 rb_bufsz
= drm_order(ring
->ring_size
/ 4);
1268 rb_cntl
= rb_bufsz
<< 1;
1270 rb_cntl
|= DMA_RB_SWAP_ENABLE
| DMA_RPTR_WRITEBACK_SWAP_ENABLE
;
1272 WREG32(DMA_RB_CNTL
+ reg_offset
, rb_cntl
);
1274 /* Initialize the ring buffer's read and write pointers */
1275 WREG32(DMA_RB_RPTR
+ reg_offset
, 0);
1276 WREG32(DMA_RB_WPTR
+ reg_offset
, 0);
1278 /* set the wb address whether it's enabled or not */
1279 WREG32(DMA_RB_RPTR_ADDR_HI
+ reg_offset
,
1280 upper_32_bits(rdev
->wb
.gpu_addr
+ wb_offset
) & 0xFF);
1281 WREG32(DMA_RB_RPTR_ADDR_LO
+ reg_offset
,
1282 ((rdev
->wb
.gpu_addr
+ wb_offset
) & 0xFFFFFFFC));
1284 if (rdev
->wb
.enabled
)
1285 rb_cntl
|= DMA_RPTR_WRITEBACK_ENABLE
;
1287 WREG32(DMA_RB_BASE
+ reg_offset
, ring
->gpu_addr
>> 8);
1289 /* enable DMA IBs */
1290 ib_cntl
= DMA_IB_ENABLE
| CMD_VMID_FORCE
;
1292 ib_cntl
|= DMA_IB_SWAP_ENABLE
;
1294 WREG32(DMA_IB_CNTL
+ reg_offset
, ib_cntl
);
1296 dma_cntl
= RREG32(DMA_CNTL
+ reg_offset
);
1297 dma_cntl
&= ~CTXEMPTY_INT_ENABLE
;
1298 WREG32(DMA_CNTL
+ reg_offset
, dma_cntl
);
1301 WREG32(DMA_RB_WPTR
+ reg_offset
, ring
->wptr
<< 2);
1303 ring
->rptr
= RREG32(DMA_RB_RPTR
+ reg_offset
) >> 2;
1305 WREG32(DMA_RB_CNTL
+ reg_offset
, rb_cntl
| DMA_RB_ENABLE
);
1309 r
= radeon_ring_test(rdev
, ring
->idx
, ring
);
1311 ring
->ready
= false;
1316 radeon_ttm_set_active_vram_size(rdev
, rdev
->mc
.real_vram_size
);
1322 * cayman_dma_fini - tear down the async dma engines
1324 * @rdev: radeon_device pointer
1326 * Stop the async dma engines and free the rings (cayman-SI).
1328 void cayman_dma_fini(struct radeon_device
*rdev
)
1330 cayman_dma_stop(rdev
);
1331 radeon_ring_fini(rdev
, &rdev
->ring
[R600_RING_TYPE_DMA_INDEX
]);
1332 radeon_ring_fini(rdev
, &rdev
->ring
[CAYMAN_RING_TYPE_DMA1_INDEX
]);
1335 static u32
cayman_gpu_check_soft_reset(struct radeon_device
*rdev
)
1341 tmp
= RREG32(GRBM_STATUS
);
1342 if (tmp
& (PA_BUSY
| SC_BUSY
|
1344 TA_BUSY
| VGT_BUSY
|
1346 GDS_BUSY
| SPI_BUSY
|
1347 IA_BUSY
| IA_BUSY_NO_DMA
))
1348 reset_mask
|= RADEON_RESET_GFX
;
1350 if (tmp
& (CF_RQ_PENDING
| PF_RQ_PENDING
|
1351 CP_BUSY
| CP_COHERENCY_BUSY
))
1352 reset_mask
|= RADEON_RESET_CP
;
1354 if (tmp
& GRBM_EE_BUSY
)
1355 reset_mask
|= RADEON_RESET_GRBM
| RADEON_RESET_GFX
| RADEON_RESET_CP
;
1357 /* DMA_STATUS_REG 0 */
1358 tmp
= RREG32(DMA_STATUS_REG
+ DMA0_REGISTER_OFFSET
);
1359 if (!(tmp
& DMA_IDLE
))
1360 reset_mask
|= RADEON_RESET_DMA
;
1362 /* DMA_STATUS_REG 1 */
1363 tmp
= RREG32(DMA_STATUS_REG
+ DMA1_REGISTER_OFFSET
);
1364 if (!(tmp
& DMA_IDLE
))
1365 reset_mask
|= RADEON_RESET_DMA1
;
1368 tmp
= RREG32(SRBM_STATUS2
);
1370 reset_mask
|= RADEON_RESET_DMA
;
1372 if (tmp
& DMA1_BUSY
)
1373 reset_mask
|= RADEON_RESET_DMA1
;
1376 tmp
= RREG32(SRBM_STATUS
);
1377 if (tmp
& (RLC_RQ_PENDING
| RLC_BUSY
))
1378 reset_mask
|= RADEON_RESET_RLC
;
1381 reset_mask
|= RADEON_RESET_IH
;
1384 reset_mask
|= RADEON_RESET_SEM
;
1386 if (tmp
& GRBM_RQ_PENDING
)
1387 reset_mask
|= RADEON_RESET_GRBM
;
1390 reset_mask
|= RADEON_RESET_VMC
;
1392 if (tmp
& (MCB_BUSY
| MCB_NON_DISPLAY_BUSY
|
1393 MCC_BUSY
| MCD_BUSY
))
1394 reset_mask
|= RADEON_RESET_MC
;
1396 if (evergreen_is_display_hung(rdev
))
1397 reset_mask
|= RADEON_RESET_DISPLAY
;
1400 tmp
= RREG32(VM_L2_STATUS
);
1402 reset_mask
|= RADEON_RESET_VMC
;
1404 /* Skip MC reset as it's mostly likely not hung, just busy */
1405 if (reset_mask
& RADEON_RESET_MC
) {
1406 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask
);
1407 reset_mask
&= ~RADEON_RESET_MC
;
1413 static void cayman_gpu_soft_reset(struct radeon_device
*rdev
, u32 reset_mask
)
1415 struct evergreen_mc_save save
;
1416 u32 grbm_soft_reset
= 0, srbm_soft_reset
= 0;
1419 if (reset_mask
== 0)
1422 dev_info(rdev
->dev
, "GPU softreset: 0x%08X\n", reset_mask
);
1424 evergreen_print_gpu_status_regs(rdev
);
1425 dev_info(rdev
->dev
, " VM_CONTEXT0_PROTECTION_FAULT_ADDR 0x%08X\n",
1427 dev_info(rdev
->dev
, " VM_CONTEXT0_PROTECTION_FAULT_STATUS 0x%08X\n",
1429 dev_info(rdev
->dev
, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
1431 dev_info(rdev
->dev
, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
1434 /* Disable CP parsing/prefetching */
1435 WREG32(CP_ME_CNTL
, CP_ME_HALT
| CP_PFP_HALT
);
1437 if (reset_mask
& RADEON_RESET_DMA
) {
1439 tmp
= RREG32(DMA_RB_CNTL
+ DMA0_REGISTER_OFFSET
);
1440 tmp
&= ~DMA_RB_ENABLE
;
1441 WREG32(DMA_RB_CNTL
+ DMA0_REGISTER_OFFSET
, tmp
);
1444 if (reset_mask
& RADEON_RESET_DMA1
) {
1446 tmp
= RREG32(DMA_RB_CNTL
+ DMA1_REGISTER_OFFSET
);
1447 tmp
&= ~DMA_RB_ENABLE
;
1448 WREG32(DMA_RB_CNTL
+ DMA1_REGISTER_OFFSET
, tmp
);
1453 evergreen_mc_stop(rdev
, &save
);
1454 if (evergreen_mc_wait_for_idle(rdev
)) {
1455 dev_warn(rdev
->dev
, "Wait for MC idle timedout !\n");
1458 if (reset_mask
& (RADEON_RESET_GFX
| RADEON_RESET_COMPUTE
)) {
1459 grbm_soft_reset
= SOFT_RESET_CB
|
1473 if (reset_mask
& RADEON_RESET_CP
) {
1474 grbm_soft_reset
|= SOFT_RESET_CP
| SOFT_RESET_VGT
;
1476 srbm_soft_reset
|= SOFT_RESET_GRBM
;
1479 if (reset_mask
& RADEON_RESET_DMA
)
1480 srbm_soft_reset
|= SOFT_RESET_DMA
;
1482 if (reset_mask
& RADEON_RESET_DMA1
)
1483 srbm_soft_reset
|= SOFT_RESET_DMA1
;
1485 if (reset_mask
& RADEON_RESET_DISPLAY
)
1486 srbm_soft_reset
|= SOFT_RESET_DC
;
1488 if (reset_mask
& RADEON_RESET_RLC
)
1489 srbm_soft_reset
|= SOFT_RESET_RLC
;
1491 if (reset_mask
& RADEON_RESET_SEM
)
1492 srbm_soft_reset
|= SOFT_RESET_SEM
;
1494 if (reset_mask
& RADEON_RESET_IH
)
1495 srbm_soft_reset
|= SOFT_RESET_IH
;
1497 if (reset_mask
& RADEON_RESET_GRBM
)
1498 srbm_soft_reset
|= SOFT_RESET_GRBM
;
1500 if (reset_mask
& RADEON_RESET_VMC
)
1501 srbm_soft_reset
|= SOFT_RESET_VMC
;
1503 if (!(rdev
->flags
& RADEON_IS_IGP
)) {
1504 if (reset_mask
& RADEON_RESET_MC
)
1505 srbm_soft_reset
|= SOFT_RESET_MC
;
1508 if (grbm_soft_reset
) {
1509 tmp
= RREG32(GRBM_SOFT_RESET
);
1510 tmp
|= grbm_soft_reset
;
1511 dev_info(rdev
->dev
, "GRBM_SOFT_RESET=0x%08X\n", tmp
);
1512 WREG32(GRBM_SOFT_RESET
, tmp
);
1513 tmp
= RREG32(GRBM_SOFT_RESET
);
1517 tmp
&= ~grbm_soft_reset
;
1518 WREG32(GRBM_SOFT_RESET
, tmp
);
1519 tmp
= RREG32(GRBM_SOFT_RESET
);
1522 if (srbm_soft_reset
) {
1523 tmp
= RREG32(SRBM_SOFT_RESET
);
1524 tmp
|= srbm_soft_reset
;
1525 dev_info(rdev
->dev
, "SRBM_SOFT_RESET=0x%08X\n", tmp
);
1526 WREG32(SRBM_SOFT_RESET
, tmp
);
1527 tmp
= RREG32(SRBM_SOFT_RESET
);
1531 tmp
&= ~srbm_soft_reset
;
1532 WREG32(SRBM_SOFT_RESET
, tmp
);
1533 tmp
= RREG32(SRBM_SOFT_RESET
);
1536 /* Wait a little for things to settle down */
1539 evergreen_mc_resume(rdev
, &save
);
1542 evergreen_print_gpu_status_regs(rdev
);
1545 int cayman_asic_reset(struct radeon_device
*rdev
)
1549 reset_mask
= cayman_gpu_check_soft_reset(rdev
);
1552 r600_set_bios_scratch_engine_hung(rdev
, true);
1554 cayman_gpu_soft_reset(rdev
, reset_mask
);
1556 reset_mask
= cayman_gpu_check_soft_reset(rdev
);
1559 r600_set_bios_scratch_engine_hung(rdev
, false);
1565 * cayman_gfx_is_lockup - Check if the GFX engine is locked up
1567 * @rdev: radeon_device pointer
1568 * @ring: radeon_ring structure holding ring information
1570 * Check if the GFX engine is locked up.
1571 * Returns true if the engine appears to be locked up, false if not.
1573 bool cayman_gfx_is_lockup(struct radeon_device
*rdev
, struct radeon_ring
*ring
)
1575 u32 reset_mask
= cayman_gpu_check_soft_reset(rdev
);
1577 if (!(reset_mask
& (RADEON_RESET_GFX
|
1578 RADEON_RESET_COMPUTE
|
1579 RADEON_RESET_CP
))) {
1580 radeon_ring_lockup_update(ring
);
1583 /* force CP activities */
1584 radeon_ring_force_activity(rdev
, ring
);
1585 return radeon_ring_test_lockup(rdev
, ring
);
1589 * cayman_dma_is_lockup - Check if the DMA engine is locked up
1591 * @rdev: radeon_device pointer
1592 * @ring: radeon_ring structure holding ring information
1594 * Check if the async DMA engine is locked up.
1595 * Returns true if the engine appears to be locked up, false if not.
1597 bool cayman_dma_is_lockup(struct radeon_device
*rdev
, struct radeon_ring
*ring
)
1599 u32 reset_mask
= cayman_gpu_check_soft_reset(rdev
);
1602 if (ring
->idx
== R600_RING_TYPE_DMA_INDEX
)
1603 mask
= RADEON_RESET_DMA
;
1605 mask
= RADEON_RESET_DMA1
;
1607 if (!(reset_mask
& mask
)) {
1608 radeon_ring_lockup_update(ring
);
1611 /* force ring activities */
1612 radeon_ring_force_activity(rdev
, ring
);
1613 return radeon_ring_test_lockup(rdev
, ring
);
1616 static int cayman_startup(struct radeon_device
*rdev
)
1618 struct radeon_ring
*ring
= &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
];
1621 /* enable pcie gen2 link */
1622 evergreen_pcie_gen2_enable(rdev
);
1624 if (rdev
->flags
& RADEON_IS_IGP
) {
1625 if (!rdev
->me_fw
|| !rdev
->pfp_fw
|| !rdev
->rlc_fw
) {
1626 r
= ni_init_microcode(rdev
);
1628 DRM_ERROR("Failed to load firmware!\n");
1633 if (!rdev
->me_fw
|| !rdev
->pfp_fw
|| !rdev
->rlc_fw
|| !rdev
->mc_fw
) {
1634 r
= ni_init_microcode(rdev
);
1636 DRM_ERROR("Failed to load firmware!\n");
1641 r
= ni_mc_load_microcode(rdev
);
1643 DRM_ERROR("Failed to load MC firmware!\n");
1648 r
= r600_vram_scratch_init(rdev
);
1652 evergreen_mc_program(rdev
);
1653 r
= cayman_pcie_gart_enable(rdev
);
1656 cayman_gpu_init(rdev
);
1658 r
= evergreen_blit_init(rdev
);
1660 r600_blit_fini(rdev
);
1661 rdev
->asic
->copy
.copy
= NULL
;
1662 dev_warn(rdev
->dev
, "failed blitter (%d) falling back to memcpy\n", r
);
1665 /* allocate rlc buffers */
1666 if (rdev
->flags
& RADEON_IS_IGP
) {
1667 r
= si_rlc_init(rdev
);
1669 DRM_ERROR("Failed to init rlc BOs!\n");
1674 /* allocate wb buffer */
1675 r
= radeon_wb_init(rdev
);
1679 r
= radeon_fence_driver_start_ring(rdev
, RADEON_RING_TYPE_GFX_INDEX
);
1681 dev_err(rdev
->dev
, "failed initializing CP fences (%d).\n", r
);
1685 r
= radeon_fence_driver_start_ring(rdev
, CAYMAN_RING_TYPE_CP1_INDEX
);
1687 dev_err(rdev
->dev
, "failed initializing CP fences (%d).\n", r
);
1691 r
= radeon_fence_driver_start_ring(rdev
, CAYMAN_RING_TYPE_CP2_INDEX
);
1693 dev_err(rdev
->dev
, "failed initializing CP fences (%d).\n", r
);
1697 r
= radeon_fence_driver_start_ring(rdev
, R600_RING_TYPE_DMA_INDEX
);
1699 dev_err(rdev
->dev
, "failed initializing DMA fences (%d).\n", r
);
1703 r
= radeon_fence_driver_start_ring(rdev
, CAYMAN_RING_TYPE_DMA1_INDEX
);
1705 dev_err(rdev
->dev
, "failed initializing DMA fences (%d).\n", r
);
1710 r
= r600_irq_init(rdev
);
1712 DRM_ERROR("radeon: IH init failed (%d).\n", r
);
1713 radeon_irq_kms_fini(rdev
);
1716 evergreen_irq_set(rdev
);
1718 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
, RADEON_WB_CP_RPTR_OFFSET
,
1719 CP_RB0_RPTR
, CP_RB0_WPTR
,
1720 0, 0xfffff, RADEON_CP_PACKET2
);
1724 ring
= &rdev
->ring
[R600_RING_TYPE_DMA_INDEX
];
1725 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
, R600_WB_DMA_RPTR_OFFSET
,
1726 DMA_RB_RPTR
+ DMA0_REGISTER_OFFSET
,
1727 DMA_RB_WPTR
+ DMA0_REGISTER_OFFSET
,
1728 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP
, 0, 0, 0));
1732 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_DMA1_INDEX
];
1733 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
, CAYMAN_WB_DMA1_RPTR_OFFSET
,
1734 DMA_RB_RPTR
+ DMA1_REGISTER_OFFSET
,
1735 DMA_RB_WPTR
+ DMA1_REGISTER_OFFSET
,
1736 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP
, 0, 0, 0));
1740 r
= cayman_cp_load_microcode(rdev
);
1743 r
= cayman_cp_resume(rdev
);
1747 r
= cayman_dma_resume(rdev
);
1751 r
= radeon_ib_pool_init(rdev
);
1753 dev_err(rdev
->dev
, "IB initialization failed (%d).\n", r
);
1757 r
= radeon_vm_manager_init(rdev
);
1759 dev_err(rdev
->dev
, "vm manager initialization failed (%d).\n", r
);
1763 r
= r600_audio_init(rdev
);
1770 int cayman_resume(struct radeon_device
*rdev
)
1774 /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
1775 * posting will perform necessary task to bring back GPU into good
1779 atom_asic_init(rdev
->mode_info
.atom_context
);
1781 rdev
->accel_working
= true;
1782 r
= cayman_startup(rdev
);
1784 DRM_ERROR("cayman startup failed on resume\n");
1785 rdev
->accel_working
= false;
1791 int cayman_suspend(struct radeon_device
*rdev
)
1793 r600_audio_fini(rdev
);
1794 radeon_vm_manager_fini(rdev
);
1795 cayman_cp_enable(rdev
, false);
1796 cayman_dma_stop(rdev
);
1797 evergreen_irq_suspend(rdev
);
1798 radeon_wb_disable(rdev
);
1799 cayman_pcie_gart_disable(rdev
);
1803 /* Plan is to move initialization in that function and use
1804 * helper function so that radeon_device_init pretty much
1805 * do nothing more than calling asic specific function. This
1806 * should also allow to remove a bunch of callback function
1809 int cayman_init(struct radeon_device
*rdev
)
1811 struct radeon_ring
*ring
= &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
];
1815 if (!radeon_get_bios(rdev
)) {
1816 if (ASIC_IS_AVIVO(rdev
))
1819 /* Must be an ATOMBIOS */
1820 if (!rdev
->is_atom_bios
) {
1821 dev_err(rdev
->dev
, "Expecting atombios for cayman GPU\n");
1824 r
= radeon_atombios_init(rdev
);
1828 /* Post card if necessary */
1829 if (!radeon_card_posted(rdev
)) {
1831 dev_err(rdev
->dev
, "Card not posted and no BIOS - ignoring\n");
1834 DRM_INFO("GPU not posted. posting now...\n");
1835 atom_asic_init(rdev
->mode_info
.atom_context
);
1837 /* Initialize scratch registers */
1838 r600_scratch_init(rdev
);
1839 /* Initialize surface registers */
1840 radeon_surface_init(rdev
);
1841 /* Initialize clocks */
1842 radeon_get_clock_info(rdev
->ddev
);
1844 r
= radeon_fence_driver_init(rdev
);
1847 /* initialize memory controller */
1848 r
= evergreen_mc_init(rdev
);
1851 /* Memory manager */
1852 r
= radeon_bo_init(rdev
);
1856 r
= radeon_irq_kms_init(rdev
);
1860 ring
->ring_obj
= NULL
;
1861 r600_ring_init(rdev
, ring
, 1024 * 1024);
1863 ring
= &rdev
->ring
[R600_RING_TYPE_DMA_INDEX
];
1864 ring
->ring_obj
= NULL
;
1865 r600_ring_init(rdev
, ring
, 64 * 1024);
1867 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_DMA1_INDEX
];
1868 ring
->ring_obj
= NULL
;
1869 r600_ring_init(rdev
, ring
, 64 * 1024);
1871 rdev
->ih
.ring_obj
= NULL
;
1872 r600_ih_ring_init(rdev
, 64 * 1024);
1874 r
= r600_pcie_gart_init(rdev
);
1878 rdev
->accel_working
= true;
1879 r
= cayman_startup(rdev
);
1881 dev_err(rdev
->dev
, "disabling GPU acceleration\n");
1882 cayman_cp_fini(rdev
);
1883 cayman_dma_fini(rdev
);
1884 r600_irq_fini(rdev
);
1885 if (rdev
->flags
& RADEON_IS_IGP
)
1887 radeon_wb_fini(rdev
);
1888 radeon_ib_pool_fini(rdev
);
1889 radeon_vm_manager_fini(rdev
);
1890 radeon_irq_kms_fini(rdev
);
1891 cayman_pcie_gart_fini(rdev
);
1892 rdev
->accel_working
= false;
1895 /* Don't start up if the MC ucode is missing.
1896 * The default clocks and voltages before the MC ucode
1897 * is loaded are not suffient for advanced operations.
1899 * We can skip this check for TN, because there is no MC
1902 if (!rdev
->mc_fw
&& !(rdev
->flags
& RADEON_IS_IGP
)) {
1903 DRM_ERROR("radeon: MC ucode required for NI+.\n");
1910 void cayman_fini(struct radeon_device
*rdev
)
1912 r600_blit_fini(rdev
);
1913 cayman_cp_fini(rdev
);
1914 cayman_dma_fini(rdev
);
1915 r600_irq_fini(rdev
);
1916 if (rdev
->flags
& RADEON_IS_IGP
)
1918 radeon_wb_fini(rdev
);
1919 radeon_vm_manager_fini(rdev
);
1920 radeon_ib_pool_fini(rdev
);
1921 radeon_irq_kms_fini(rdev
);
1922 cayman_pcie_gart_fini(rdev
);
1923 r600_vram_scratch_fini(rdev
);
1924 radeon_gem_fini(rdev
);
1925 radeon_fence_driver_fini(rdev
);
1926 radeon_bo_fini(rdev
);
1927 radeon_atombios_fini(rdev
);
1935 int cayman_vm_init(struct radeon_device
*rdev
)
1938 rdev
->vm_manager
.nvm
= 8;
1939 /* base offset of vram pages */
1940 if (rdev
->flags
& RADEON_IS_IGP
) {
1941 u64 tmp
= RREG32(FUS_MC_VM_FB_OFFSET
);
1943 rdev
->vm_manager
.vram_base_offset
= tmp
;
1945 rdev
->vm_manager
.vram_base_offset
= 0;
1949 void cayman_vm_fini(struct radeon_device
*rdev
)
1953 #define R600_ENTRY_VALID (1 << 0)
1954 #define R600_PTE_SYSTEM (1 << 1)
1955 #define R600_PTE_SNOOPED (1 << 2)
1956 #define R600_PTE_READABLE (1 << 5)
1957 #define R600_PTE_WRITEABLE (1 << 6)
1959 uint32_t cayman_vm_page_flags(struct radeon_device
*rdev
, uint32_t flags
)
1961 uint32_t r600_flags
= 0;
1962 r600_flags
|= (flags
& RADEON_VM_PAGE_VALID
) ? R600_ENTRY_VALID
: 0;
1963 r600_flags
|= (flags
& RADEON_VM_PAGE_READABLE
) ? R600_PTE_READABLE
: 0;
1964 r600_flags
|= (flags
& RADEON_VM_PAGE_WRITEABLE
) ? R600_PTE_WRITEABLE
: 0;
1965 if (flags
& RADEON_VM_PAGE_SYSTEM
) {
1966 r600_flags
|= R600_PTE_SYSTEM
;
1967 r600_flags
|= (flags
& RADEON_VM_PAGE_SNOOPED
) ? R600_PTE_SNOOPED
: 0;
1973 * cayman_vm_set_page - update the page tables using the CP
1975 * @rdev: radeon_device pointer
1976 * @ib: indirect buffer to fill with commands
1977 * @pe: addr of the page entry
1978 * @addr: dst addr to write into pe
1979 * @count: number of page entries to update
1980 * @incr: increase next addr by incr bytes
1981 * @flags: access flags
1983 * Update the page tables using the CP (cayman/TN).
1985 void cayman_vm_set_page(struct radeon_device
*rdev
,
1986 struct radeon_ib
*ib
,
1988 uint64_t addr
, unsigned count
,
1989 uint32_t incr
, uint32_t flags
)
1991 uint32_t r600_flags
= cayman_vm_page_flags(rdev
, flags
);
1995 if (rdev
->asic
->vm
.pt_ring_index
== RADEON_RING_TYPE_GFX_INDEX
) {
1997 ndw
= 1 + count
* 2;
2001 ib
->ptr
[ib
->length_dw
++] = PACKET3(PACKET3_ME_WRITE
, ndw
);
2002 ib
->ptr
[ib
->length_dw
++] = pe
;
2003 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(pe
) & 0xff;
2004 for (; ndw
> 1; ndw
-= 2, --count
, pe
+= 8) {
2005 if (flags
& RADEON_VM_PAGE_SYSTEM
) {
2006 value
= radeon_vm_map_gart(rdev
, addr
);
2007 value
&= 0xFFFFFFFFFFFFF000ULL
;
2008 } else if (flags
& RADEON_VM_PAGE_VALID
) {
2014 value
|= r600_flags
;
2015 ib
->ptr
[ib
->length_dw
++] = value
;
2016 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(value
);
2025 /* for non-physically contiguous pages (system) */
2026 ib
->ptr
[ib
->length_dw
++] = DMA_PACKET(DMA_PACKET_WRITE
, 0, 0, ndw
);
2027 ib
->ptr
[ib
->length_dw
++] = pe
;
2028 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(pe
) & 0xff;
2029 for (; ndw
> 0; ndw
-= 2, --count
, pe
+= 8) {
2030 if (flags
& RADEON_VM_PAGE_SYSTEM
) {
2031 value
= radeon_vm_map_gart(rdev
, addr
);
2032 value
&= 0xFFFFFFFFFFFFF000ULL
;
2033 } else if (flags
& RADEON_VM_PAGE_VALID
) {
2039 value
|= r600_flags
;
2040 ib
->ptr
[ib
->length_dw
++] = value
;
2041 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(value
);
2044 while (ib
->length_dw
& 0x7)
2045 ib
->ptr
[ib
->length_dw
++] = DMA_PACKET(DMA_PACKET_NOP
, 0, 0, 0);
2050 * cayman_vm_flush - vm flush using the CP
2052 * @rdev: radeon_device pointer
2054 * Update the page table base and flush the VM TLB
2055 * using the CP (cayman-si).
2057 void cayman_vm_flush(struct radeon_device
*rdev
, int ridx
, struct radeon_vm
*vm
)
2059 struct radeon_ring
*ring
= &rdev
->ring
[ridx
];
2064 radeon_ring_write(ring
, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR
+ (vm
->id
<< 2), 0));
2065 radeon_ring_write(ring
, vm
->pd_gpu_addr
>> 12);
2067 /* flush hdp cache */
2068 radeon_ring_write(ring
, PACKET0(HDP_MEM_COHERENCY_FLUSH_CNTL
, 0));
2069 radeon_ring_write(ring
, 0x1);
2071 /* bits 0-7 are the VM contexts0-7 */
2072 radeon_ring_write(ring
, PACKET0(VM_INVALIDATE_REQUEST
, 0));
2073 radeon_ring_write(ring
, 1 << vm
->id
);
2075 /* sync PFP to ME, otherwise we might get invalid PFP reads */
2076 radeon_ring_write(ring
, PACKET3(PACKET3_PFP_SYNC_ME
, 0));
2077 radeon_ring_write(ring
, 0x0);
2080 void cayman_dma_vm_flush(struct radeon_device
*rdev
, int ridx
, struct radeon_vm
*vm
)
2082 struct radeon_ring
*ring
= &rdev
->ring
[ridx
];
2087 radeon_ring_write(ring
, DMA_PACKET(DMA_PACKET_SRBM_WRITE
, 0, 0, 0));
2088 radeon_ring_write(ring
, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR
+ (vm
->id
<< 2)) >> 2));
2089 radeon_ring_write(ring
, vm
->pd_gpu_addr
>> 12);
2091 /* flush hdp cache */
2092 radeon_ring_write(ring
, DMA_PACKET(DMA_PACKET_SRBM_WRITE
, 0, 0, 0));
2093 radeon_ring_write(ring
, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL
>> 2));
2094 radeon_ring_write(ring
, 1);
2096 /* bits 0-7 are the VM contexts0-7 */
2097 radeon_ring_write(ring
, DMA_PACKET(DMA_PACKET_SRBM_WRITE
, 0, 0, 0));
2098 radeon_ring_write(ring
, (0xf << 16) | (VM_INVALIDATE_REQUEST
>> 2));
2099 radeon_ring_write(ring
, 1 << vm
->id
);