2 * Copyright 2011 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Alex Deucher
24 #include <linux/firmware.h>
25 #include <linux/platform_device.h>
26 #include <linux/slab.h>
27 #include <linux/module.h>
30 #include "radeon_asic.h"
31 #include <drm/radeon_drm.h>
34 #include "si_blit_shaders.h"
36 #define SI_PFP_UCODE_SIZE 2144
37 #define SI_PM4_UCODE_SIZE 2144
38 #define SI_CE_UCODE_SIZE 2144
39 #define SI_RLC_UCODE_SIZE 2048
40 #define SI_MC_UCODE_SIZE 7769
41 #define OLAND_MC_UCODE_SIZE 7863
43 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
46 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
47 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
50 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
51 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
52 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
53 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
54 MODULE_FIRMWARE("radeon/VERDE_me.bin");
55 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
56 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
57 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
58 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
59 MODULE_FIRMWARE("radeon/OLAND_me.bin");
60 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
61 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
62 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
64 extern int r600_ih_ring_alloc(struct radeon_device
*rdev
);
65 extern void r600_ih_ring_fini(struct radeon_device
*rdev
);
66 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device
*rdev
);
67 extern void evergreen_mc_stop(struct radeon_device
*rdev
, struct evergreen_mc_save
*save
);
68 extern void evergreen_mc_resume(struct radeon_device
*rdev
, struct evergreen_mc_save
*save
);
69 extern u32
evergreen_get_number_of_dram_channels(struct radeon_device
*rdev
);
70 extern void evergreen_print_gpu_status_regs(struct radeon_device
*rdev
);
71 extern bool evergreen_is_display_hung(struct radeon_device
*rdev
);
73 #define PCIE_BUS_CLK 10000
74 #define TCLK (PCIE_BUS_CLK / 10)
77 * si_get_xclk - get the xclk
79 * @rdev: radeon_device pointer
81 * Returns the reference clock used by the gfx engine
84 u32
si_get_xclk(struct radeon_device
*rdev
)
86 u32 reference_clock
= rdev
->clock
.spll
.reference_freq
;
89 tmp
= RREG32(CG_CLKPIN_CNTL_2
);
90 if (tmp
& MUX_TCLK_TO_XCLK
)
93 tmp
= RREG32(CG_CLKPIN_CNTL
);
94 if (tmp
& XTALIN_DIVIDE
)
95 return reference_clock
/ 4;
97 return reference_clock
;
100 /* get temperature in millidegrees */
101 int si_get_temp(struct radeon_device
*rdev
)
106 temp
= (RREG32(CG_MULT_THERMAL_STATUS
) & CTF_TEMP_MASK
) >>
112 actual_temp
= temp
& 0x1ff;
114 actual_temp
= (actual_temp
* 1000);
119 #define TAHITI_IO_MC_REGS_SIZE 36
121 static const u32 tahiti_io_mc_regs
[TAHITI_IO_MC_REGS_SIZE
][2] = {
122 {0x0000006f, 0x03044000},
123 {0x00000070, 0x0480c018},
124 {0x00000071, 0x00000040},
125 {0x00000072, 0x01000000},
126 {0x00000074, 0x000000ff},
127 {0x00000075, 0x00143400},
128 {0x00000076, 0x08ec0800},
129 {0x00000077, 0x040000cc},
130 {0x00000079, 0x00000000},
131 {0x0000007a, 0x21000409},
132 {0x0000007c, 0x00000000},
133 {0x0000007d, 0xe8000000},
134 {0x0000007e, 0x044408a8},
135 {0x0000007f, 0x00000003},
136 {0x00000080, 0x00000000},
137 {0x00000081, 0x01000000},
138 {0x00000082, 0x02000000},
139 {0x00000083, 0x00000000},
140 {0x00000084, 0xe3f3e4f4},
141 {0x00000085, 0x00052024},
142 {0x00000087, 0x00000000},
143 {0x00000088, 0x66036603},
144 {0x00000089, 0x01000000},
145 {0x0000008b, 0x1c0a0000},
146 {0x0000008c, 0xff010000},
147 {0x0000008e, 0xffffefff},
148 {0x0000008f, 0xfff3efff},
149 {0x00000090, 0xfff3efbf},
150 {0x00000094, 0x00101101},
151 {0x00000095, 0x00000fff},
152 {0x00000096, 0x00116fff},
153 {0x00000097, 0x60010000},
154 {0x00000098, 0x10010000},
155 {0x00000099, 0x00006000},
156 {0x0000009a, 0x00001000},
157 {0x0000009f, 0x00a77400}
160 static const u32 pitcairn_io_mc_regs
[TAHITI_IO_MC_REGS_SIZE
][2] = {
161 {0x0000006f, 0x03044000},
162 {0x00000070, 0x0480c018},
163 {0x00000071, 0x00000040},
164 {0x00000072, 0x01000000},
165 {0x00000074, 0x000000ff},
166 {0x00000075, 0x00143400},
167 {0x00000076, 0x08ec0800},
168 {0x00000077, 0x040000cc},
169 {0x00000079, 0x00000000},
170 {0x0000007a, 0x21000409},
171 {0x0000007c, 0x00000000},
172 {0x0000007d, 0xe8000000},
173 {0x0000007e, 0x044408a8},
174 {0x0000007f, 0x00000003},
175 {0x00000080, 0x00000000},
176 {0x00000081, 0x01000000},
177 {0x00000082, 0x02000000},
178 {0x00000083, 0x00000000},
179 {0x00000084, 0xe3f3e4f4},
180 {0x00000085, 0x00052024},
181 {0x00000087, 0x00000000},
182 {0x00000088, 0x66036603},
183 {0x00000089, 0x01000000},
184 {0x0000008b, 0x1c0a0000},
185 {0x0000008c, 0xff010000},
186 {0x0000008e, 0xffffefff},
187 {0x0000008f, 0xfff3efff},
188 {0x00000090, 0xfff3efbf},
189 {0x00000094, 0x00101101},
190 {0x00000095, 0x00000fff},
191 {0x00000096, 0x00116fff},
192 {0x00000097, 0x60010000},
193 {0x00000098, 0x10010000},
194 {0x00000099, 0x00006000},
195 {0x0000009a, 0x00001000},
196 {0x0000009f, 0x00a47400}
199 static const u32 verde_io_mc_regs
[TAHITI_IO_MC_REGS_SIZE
][2] = {
200 {0x0000006f, 0x03044000},
201 {0x00000070, 0x0480c018},
202 {0x00000071, 0x00000040},
203 {0x00000072, 0x01000000},
204 {0x00000074, 0x000000ff},
205 {0x00000075, 0x00143400},
206 {0x00000076, 0x08ec0800},
207 {0x00000077, 0x040000cc},
208 {0x00000079, 0x00000000},
209 {0x0000007a, 0x21000409},
210 {0x0000007c, 0x00000000},
211 {0x0000007d, 0xe8000000},
212 {0x0000007e, 0x044408a8},
213 {0x0000007f, 0x00000003},
214 {0x00000080, 0x00000000},
215 {0x00000081, 0x01000000},
216 {0x00000082, 0x02000000},
217 {0x00000083, 0x00000000},
218 {0x00000084, 0xe3f3e4f4},
219 {0x00000085, 0x00052024},
220 {0x00000087, 0x00000000},
221 {0x00000088, 0x66036603},
222 {0x00000089, 0x01000000},
223 {0x0000008b, 0x1c0a0000},
224 {0x0000008c, 0xff010000},
225 {0x0000008e, 0xffffefff},
226 {0x0000008f, 0xfff3efff},
227 {0x00000090, 0xfff3efbf},
228 {0x00000094, 0x00101101},
229 {0x00000095, 0x00000fff},
230 {0x00000096, 0x00116fff},
231 {0x00000097, 0x60010000},
232 {0x00000098, 0x10010000},
233 {0x00000099, 0x00006000},
234 {0x0000009a, 0x00001000},
235 {0x0000009f, 0x00a37400}
238 static const u32 oland_io_mc_regs
[TAHITI_IO_MC_REGS_SIZE
][2] = {
239 {0x0000006f, 0x03044000},
240 {0x00000070, 0x0480c018},
241 {0x00000071, 0x00000040},
242 {0x00000072, 0x01000000},
243 {0x00000074, 0x000000ff},
244 {0x00000075, 0x00143400},
245 {0x00000076, 0x08ec0800},
246 {0x00000077, 0x040000cc},
247 {0x00000079, 0x00000000},
248 {0x0000007a, 0x21000409},
249 {0x0000007c, 0x00000000},
250 {0x0000007d, 0xe8000000},
251 {0x0000007e, 0x044408a8},
252 {0x0000007f, 0x00000003},
253 {0x00000080, 0x00000000},
254 {0x00000081, 0x01000000},
255 {0x00000082, 0x02000000},
256 {0x00000083, 0x00000000},
257 {0x00000084, 0xe3f3e4f4},
258 {0x00000085, 0x00052024},
259 {0x00000087, 0x00000000},
260 {0x00000088, 0x66036603},
261 {0x00000089, 0x01000000},
262 {0x0000008b, 0x1c0a0000},
263 {0x0000008c, 0xff010000},
264 {0x0000008e, 0xffffefff},
265 {0x0000008f, 0xfff3efff},
266 {0x00000090, 0xfff3efbf},
267 {0x00000094, 0x00101101},
268 {0x00000095, 0x00000fff},
269 {0x00000096, 0x00116fff},
270 {0x00000097, 0x60010000},
271 {0x00000098, 0x10010000},
272 {0x00000099, 0x00006000},
273 {0x0000009a, 0x00001000},
274 {0x0000009f, 0x00a17730}
278 static int si_mc_load_microcode(struct radeon_device
*rdev
)
280 const __be32
*fw_data
;
281 u32 running
, blackout
= 0;
283 int i
, ucode_size
, regs_size
;
288 switch (rdev
->family
) {
290 io_mc_regs
= (u32
*)&tahiti_io_mc_regs
;
291 ucode_size
= SI_MC_UCODE_SIZE
;
292 regs_size
= TAHITI_IO_MC_REGS_SIZE
;
295 io_mc_regs
= (u32
*)&pitcairn_io_mc_regs
;
296 ucode_size
= SI_MC_UCODE_SIZE
;
297 regs_size
= TAHITI_IO_MC_REGS_SIZE
;
301 io_mc_regs
= (u32
*)&verde_io_mc_regs
;
302 ucode_size
= SI_MC_UCODE_SIZE
;
303 regs_size
= TAHITI_IO_MC_REGS_SIZE
;
306 io_mc_regs
= (u32
*)&oland_io_mc_regs
;
307 ucode_size
= OLAND_MC_UCODE_SIZE
;
308 regs_size
= TAHITI_IO_MC_REGS_SIZE
;
312 running
= RREG32(MC_SEQ_SUP_CNTL
) & RUN_MASK
;
316 blackout
= RREG32(MC_SHARED_BLACKOUT_CNTL
);
317 WREG32(MC_SHARED_BLACKOUT_CNTL
, blackout
| 1);
320 /* reset the engine and set to writable */
321 WREG32(MC_SEQ_SUP_CNTL
, 0x00000008);
322 WREG32(MC_SEQ_SUP_CNTL
, 0x00000010);
324 /* load mc io regs */
325 for (i
= 0; i
< regs_size
; i
++) {
326 WREG32(MC_SEQ_IO_DEBUG_INDEX
, io_mc_regs
[(i
<< 1)]);
327 WREG32(MC_SEQ_IO_DEBUG_DATA
, io_mc_regs
[(i
<< 1) + 1]);
329 /* load the MC ucode */
330 fw_data
= (const __be32
*)rdev
->mc_fw
->data
;
331 for (i
= 0; i
< ucode_size
; i
++)
332 WREG32(MC_SEQ_SUP_PGM
, be32_to_cpup(fw_data
++));
334 /* put the engine back into the active state */
335 WREG32(MC_SEQ_SUP_CNTL
, 0x00000008);
336 WREG32(MC_SEQ_SUP_CNTL
, 0x00000004);
337 WREG32(MC_SEQ_SUP_CNTL
, 0x00000001);
339 /* wait for training to complete */
340 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
341 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL
) & TRAIN_DONE_D0
)
345 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
346 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL
) & TRAIN_DONE_D1
)
352 WREG32(MC_SHARED_BLACKOUT_CNTL
, blackout
);
358 static int si_init_microcode(struct radeon_device
*rdev
)
360 struct platform_device
*pdev
;
361 const char *chip_name
;
362 const char *rlc_chip_name
;
363 size_t pfp_req_size
, me_req_size
, ce_req_size
, rlc_req_size
, mc_req_size
;
369 pdev
= platform_device_register_simple("radeon_cp", 0, NULL
, 0);
372 printk(KERN_ERR
"radeon_cp: Failed to register firmware\n");
376 switch (rdev
->family
) {
378 chip_name
= "TAHITI";
379 rlc_chip_name
= "TAHITI";
380 pfp_req_size
= SI_PFP_UCODE_SIZE
* 4;
381 me_req_size
= SI_PM4_UCODE_SIZE
* 4;
382 ce_req_size
= SI_CE_UCODE_SIZE
* 4;
383 rlc_req_size
= SI_RLC_UCODE_SIZE
* 4;
384 mc_req_size
= SI_MC_UCODE_SIZE
* 4;
387 chip_name
= "PITCAIRN";
388 rlc_chip_name
= "PITCAIRN";
389 pfp_req_size
= SI_PFP_UCODE_SIZE
* 4;
390 me_req_size
= SI_PM4_UCODE_SIZE
* 4;
391 ce_req_size
= SI_CE_UCODE_SIZE
* 4;
392 rlc_req_size
= SI_RLC_UCODE_SIZE
* 4;
393 mc_req_size
= SI_MC_UCODE_SIZE
* 4;
397 rlc_chip_name
= "VERDE";
398 pfp_req_size
= SI_PFP_UCODE_SIZE
* 4;
399 me_req_size
= SI_PM4_UCODE_SIZE
* 4;
400 ce_req_size
= SI_CE_UCODE_SIZE
* 4;
401 rlc_req_size
= SI_RLC_UCODE_SIZE
* 4;
402 mc_req_size
= SI_MC_UCODE_SIZE
* 4;
406 rlc_chip_name
= "OLAND";
407 pfp_req_size
= SI_PFP_UCODE_SIZE
* 4;
408 me_req_size
= SI_PM4_UCODE_SIZE
* 4;
409 ce_req_size
= SI_CE_UCODE_SIZE
* 4;
410 rlc_req_size
= SI_RLC_UCODE_SIZE
* 4;
411 mc_req_size
= OLAND_MC_UCODE_SIZE
* 4;
416 DRM_INFO("Loading %s Microcode\n", chip_name
);
418 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_pfp.bin", chip_name
);
419 err
= request_firmware(&rdev
->pfp_fw
, fw_name
, &pdev
->dev
);
422 if (rdev
->pfp_fw
->size
!= pfp_req_size
) {
424 "si_cp: Bogus length %zu in firmware \"%s\"\n",
425 rdev
->pfp_fw
->size
, fw_name
);
430 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_me.bin", chip_name
);
431 err
= request_firmware(&rdev
->me_fw
, fw_name
, &pdev
->dev
);
434 if (rdev
->me_fw
->size
!= me_req_size
) {
436 "si_cp: Bogus length %zu in firmware \"%s\"\n",
437 rdev
->me_fw
->size
, fw_name
);
441 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_ce.bin", chip_name
);
442 err
= request_firmware(&rdev
->ce_fw
, fw_name
, &pdev
->dev
);
445 if (rdev
->ce_fw
->size
!= ce_req_size
) {
447 "si_cp: Bogus length %zu in firmware \"%s\"\n",
448 rdev
->ce_fw
->size
, fw_name
);
452 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_rlc.bin", rlc_chip_name
);
453 err
= request_firmware(&rdev
->rlc_fw
, fw_name
, &pdev
->dev
);
456 if (rdev
->rlc_fw
->size
!= rlc_req_size
) {
458 "si_rlc: Bogus length %zu in firmware \"%s\"\n",
459 rdev
->rlc_fw
->size
, fw_name
);
463 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_mc.bin", chip_name
);
464 err
= request_firmware(&rdev
->mc_fw
, fw_name
, &pdev
->dev
);
467 if (rdev
->mc_fw
->size
!= mc_req_size
) {
469 "si_mc: Bogus length %zu in firmware \"%s\"\n",
470 rdev
->mc_fw
->size
, fw_name
);
475 platform_device_unregister(pdev
);
480 "si_cp: Failed to load firmware \"%s\"\n",
482 release_firmware(rdev
->pfp_fw
);
484 release_firmware(rdev
->me_fw
);
486 release_firmware(rdev
->ce_fw
);
488 release_firmware(rdev
->rlc_fw
);
490 release_firmware(rdev
->mc_fw
);
496 /* watermark setup */
497 static u32
dce6_line_buffer_adjust(struct radeon_device
*rdev
,
498 struct radeon_crtc
*radeon_crtc
,
499 struct drm_display_mode
*mode
,
500 struct drm_display_mode
*other_mode
)
505 * There are 3 line buffers, each one shared by 2 display controllers.
506 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
507 * the display controllers. The paritioning is done via one of four
508 * preset allocations specified in bits 21:20:
510 * 2 - whole lb, other crtc must be disabled
512 /* this can get tricky if we have two large displays on a paired group
513 * of crtcs. Ideally for multiple large displays we'd assign them to
514 * non-linked crtcs for maximum line buffer allocation.
516 if (radeon_crtc
->base
.enabled
&& mode
) {
524 WREG32(DC_LB_MEMORY_SPLIT
+ radeon_crtc
->crtc_offset
,
525 DC_LB_MEMORY_CONFIG(tmp
));
527 if (radeon_crtc
->base
.enabled
&& mode
) {
537 /* controller not enabled, so no lb used */
541 static u32
si_get_number_of_dram_channels(struct radeon_device
*rdev
)
543 u32 tmp
= RREG32(MC_SHARED_CHMAP
);
545 switch ((tmp
& NOOFCHAN_MASK
) >> NOOFCHAN_SHIFT
) {
568 struct dce6_wm_params
{
569 u32 dram_channels
; /* number of dram channels */
570 u32 yclk
; /* bandwidth per dram data pin in kHz */
571 u32 sclk
; /* engine clock in kHz */
572 u32 disp_clk
; /* display clock in kHz */
573 u32 src_width
; /* viewport width */
574 u32 active_time
; /* active display time in ns */
575 u32 blank_time
; /* blank time in ns */
576 bool interlaced
; /* mode is interlaced */
577 fixed20_12 vsc
; /* vertical scale ratio */
578 u32 num_heads
; /* number of active crtcs */
579 u32 bytes_per_pixel
; /* bytes per pixel display + overlay */
580 u32 lb_size
; /* line buffer allocated to pipe */
581 u32 vtaps
; /* vertical scaler taps */
584 static u32
dce6_dram_bandwidth(struct dce6_wm_params
*wm
)
586 /* Calculate raw DRAM Bandwidth */
587 fixed20_12 dram_efficiency
; /* 0.7 */
588 fixed20_12 yclk
, dram_channels
, bandwidth
;
591 a
.full
= dfixed_const(1000);
592 yclk
.full
= dfixed_const(wm
->yclk
);
593 yclk
.full
= dfixed_div(yclk
, a
);
594 dram_channels
.full
= dfixed_const(wm
->dram_channels
* 4);
595 a
.full
= dfixed_const(10);
596 dram_efficiency
.full
= dfixed_const(7);
597 dram_efficiency
.full
= dfixed_div(dram_efficiency
, a
);
598 bandwidth
.full
= dfixed_mul(dram_channels
, yclk
);
599 bandwidth
.full
= dfixed_mul(bandwidth
, dram_efficiency
);
601 return dfixed_trunc(bandwidth
);
604 static u32
dce6_dram_bandwidth_for_display(struct dce6_wm_params
*wm
)
606 /* Calculate DRAM Bandwidth and the part allocated to display. */
607 fixed20_12 disp_dram_allocation
; /* 0.3 to 0.7 */
608 fixed20_12 yclk
, dram_channels
, bandwidth
;
611 a
.full
= dfixed_const(1000);
612 yclk
.full
= dfixed_const(wm
->yclk
);
613 yclk
.full
= dfixed_div(yclk
, a
);
614 dram_channels
.full
= dfixed_const(wm
->dram_channels
* 4);
615 a
.full
= dfixed_const(10);
616 disp_dram_allocation
.full
= dfixed_const(3); /* XXX worse case value 0.3 */
617 disp_dram_allocation
.full
= dfixed_div(disp_dram_allocation
, a
);
618 bandwidth
.full
= dfixed_mul(dram_channels
, yclk
);
619 bandwidth
.full
= dfixed_mul(bandwidth
, disp_dram_allocation
);
621 return dfixed_trunc(bandwidth
);
624 static u32
dce6_data_return_bandwidth(struct dce6_wm_params
*wm
)
626 /* Calculate the display Data return Bandwidth */
627 fixed20_12 return_efficiency
; /* 0.8 */
628 fixed20_12 sclk
, bandwidth
;
631 a
.full
= dfixed_const(1000);
632 sclk
.full
= dfixed_const(wm
->sclk
);
633 sclk
.full
= dfixed_div(sclk
, a
);
634 a
.full
= dfixed_const(10);
635 return_efficiency
.full
= dfixed_const(8);
636 return_efficiency
.full
= dfixed_div(return_efficiency
, a
);
637 a
.full
= dfixed_const(32);
638 bandwidth
.full
= dfixed_mul(a
, sclk
);
639 bandwidth
.full
= dfixed_mul(bandwidth
, return_efficiency
);
641 return dfixed_trunc(bandwidth
);
644 static u32
dce6_get_dmif_bytes_per_request(struct dce6_wm_params
*wm
)
649 static u32
dce6_dmif_request_bandwidth(struct dce6_wm_params
*wm
)
651 /* Calculate the DMIF Request Bandwidth */
652 fixed20_12 disp_clk_request_efficiency
; /* 0.8 */
653 fixed20_12 disp_clk
, sclk
, bandwidth
;
654 fixed20_12 a
, b1
, b2
;
657 a
.full
= dfixed_const(1000);
658 disp_clk
.full
= dfixed_const(wm
->disp_clk
);
659 disp_clk
.full
= dfixed_div(disp_clk
, a
);
660 a
.full
= dfixed_const(dce6_get_dmif_bytes_per_request(wm
) / 2);
661 b1
.full
= dfixed_mul(a
, disp_clk
);
663 a
.full
= dfixed_const(1000);
664 sclk
.full
= dfixed_const(wm
->sclk
);
665 sclk
.full
= dfixed_div(sclk
, a
);
666 a
.full
= dfixed_const(dce6_get_dmif_bytes_per_request(wm
));
667 b2
.full
= dfixed_mul(a
, sclk
);
669 a
.full
= dfixed_const(10);
670 disp_clk_request_efficiency
.full
= dfixed_const(8);
671 disp_clk_request_efficiency
.full
= dfixed_div(disp_clk_request_efficiency
, a
);
673 min_bandwidth
= min(dfixed_trunc(b1
), dfixed_trunc(b2
));
675 a
.full
= dfixed_const(min_bandwidth
);
676 bandwidth
.full
= dfixed_mul(a
, disp_clk_request_efficiency
);
678 return dfixed_trunc(bandwidth
);
681 static u32
dce6_available_bandwidth(struct dce6_wm_params
*wm
)
683 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
684 u32 dram_bandwidth
= dce6_dram_bandwidth(wm
);
685 u32 data_return_bandwidth
= dce6_data_return_bandwidth(wm
);
686 u32 dmif_req_bandwidth
= dce6_dmif_request_bandwidth(wm
);
688 return min(dram_bandwidth
, min(data_return_bandwidth
, dmif_req_bandwidth
));
691 static u32
dce6_average_bandwidth(struct dce6_wm_params
*wm
)
693 /* Calculate the display mode Average Bandwidth
694 * DisplayMode should contain the source and destination dimensions,
698 fixed20_12 line_time
;
699 fixed20_12 src_width
;
700 fixed20_12 bandwidth
;
703 a
.full
= dfixed_const(1000);
704 line_time
.full
= dfixed_const(wm
->active_time
+ wm
->blank_time
);
705 line_time
.full
= dfixed_div(line_time
, a
);
706 bpp
.full
= dfixed_const(wm
->bytes_per_pixel
);
707 src_width
.full
= dfixed_const(wm
->src_width
);
708 bandwidth
.full
= dfixed_mul(src_width
, bpp
);
709 bandwidth
.full
= dfixed_mul(bandwidth
, wm
->vsc
);
710 bandwidth
.full
= dfixed_div(bandwidth
, line_time
);
712 return dfixed_trunc(bandwidth
);
715 static u32
dce6_latency_watermark(struct dce6_wm_params
*wm
)
717 /* First calcualte the latency in ns */
718 u32 mc_latency
= 2000; /* 2000 ns. */
719 u32 available_bandwidth
= dce6_available_bandwidth(wm
);
720 u32 worst_chunk_return_time
= (512 * 8 * 1000) / available_bandwidth
;
721 u32 cursor_line_pair_return_time
= (128 * 4 * 1000) / available_bandwidth
;
722 u32 dc_latency
= 40000000 / wm
->disp_clk
; /* dc pipe latency */
723 u32 other_heads_data_return_time
= ((wm
->num_heads
+ 1) * worst_chunk_return_time
) +
724 (wm
->num_heads
* cursor_line_pair_return_time
);
725 u32 latency
= mc_latency
+ other_heads_data_return_time
+ dc_latency
;
726 u32 max_src_lines_per_dst_line
, lb_fill_bw
, line_fill_time
;
727 u32 tmp
, dmif_size
= 12288;
730 if (wm
->num_heads
== 0)
733 a
.full
= dfixed_const(2);
734 b
.full
= dfixed_const(1);
735 if ((wm
->vsc
.full
> a
.full
) ||
736 ((wm
->vsc
.full
> b
.full
) && (wm
->vtaps
>= 3)) ||
738 ((wm
->vsc
.full
>= a
.full
) && wm
->interlaced
))
739 max_src_lines_per_dst_line
= 4;
741 max_src_lines_per_dst_line
= 2;
743 a
.full
= dfixed_const(available_bandwidth
);
744 b
.full
= dfixed_const(wm
->num_heads
);
745 a
.full
= dfixed_div(a
, b
);
747 b
.full
= dfixed_const(mc_latency
+ 512);
748 c
.full
= dfixed_const(wm
->disp_clk
);
749 b
.full
= dfixed_div(b
, c
);
751 c
.full
= dfixed_const(dmif_size
);
752 b
.full
= dfixed_div(c
, b
);
754 tmp
= min(dfixed_trunc(a
), dfixed_trunc(b
));
756 b
.full
= dfixed_const(1000);
757 c
.full
= dfixed_const(wm
->disp_clk
);
758 b
.full
= dfixed_div(c
, b
);
759 c
.full
= dfixed_const(wm
->bytes_per_pixel
);
760 b
.full
= dfixed_mul(b
, c
);
762 lb_fill_bw
= min(tmp
, dfixed_trunc(b
));
764 a
.full
= dfixed_const(max_src_lines_per_dst_line
* wm
->src_width
* wm
->bytes_per_pixel
);
765 b
.full
= dfixed_const(1000);
766 c
.full
= dfixed_const(lb_fill_bw
);
767 b
.full
= dfixed_div(c
, b
);
768 a
.full
= dfixed_div(a
, b
);
769 line_fill_time
= dfixed_trunc(a
);
771 if (line_fill_time
< wm
->active_time
)
774 return latency
+ (line_fill_time
- wm
->active_time
);
778 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params
*wm
)
780 if (dce6_average_bandwidth(wm
) <=
781 (dce6_dram_bandwidth_for_display(wm
) / wm
->num_heads
))
787 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params
*wm
)
789 if (dce6_average_bandwidth(wm
) <=
790 (dce6_available_bandwidth(wm
) / wm
->num_heads
))
796 static bool dce6_check_latency_hiding(struct dce6_wm_params
*wm
)
798 u32 lb_partitions
= wm
->lb_size
/ wm
->src_width
;
799 u32 line_time
= wm
->active_time
+ wm
->blank_time
;
800 u32 latency_tolerant_lines
;
804 a
.full
= dfixed_const(1);
805 if (wm
->vsc
.full
> a
.full
)
806 latency_tolerant_lines
= 1;
808 if (lb_partitions
<= (wm
->vtaps
+ 1))
809 latency_tolerant_lines
= 1;
811 latency_tolerant_lines
= 2;
814 latency_hiding
= (latency_tolerant_lines
* line_time
+ wm
->blank_time
);
816 if (dce6_latency_watermark(wm
) <= latency_hiding
)
822 static void dce6_program_watermarks(struct radeon_device
*rdev
,
823 struct radeon_crtc
*radeon_crtc
,
824 u32 lb_size
, u32 num_heads
)
826 struct drm_display_mode
*mode
= &radeon_crtc
->base
.mode
;
827 struct dce6_wm_params wm
;
830 u32 latency_watermark_a
= 0, latency_watermark_b
= 0;
831 u32 priority_a_mark
= 0, priority_b_mark
= 0;
832 u32 priority_a_cnt
= PRIORITY_OFF
;
833 u32 priority_b_cnt
= PRIORITY_OFF
;
834 u32 tmp
, arb_control3
;
837 if (radeon_crtc
->base
.enabled
&& num_heads
&& mode
) {
838 pixel_period
= 1000000 / (u32
)mode
->clock
;
839 line_time
= min((u32
)mode
->crtc_htotal
* pixel_period
, (u32
)65535);
843 wm
.yclk
= rdev
->pm
.current_mclk
* 10;
844 wm
.sclk
= rdev
->pm
.current_sclk
* 10;
845 wm
.disp_clk
= mode
->clock
;
846 wm
.src_width
= mode
->crtc_hdisplay
;
847 wm
.active_time
= mode
->crtc_hdisplay
* pixel_period
;
848 wm
.blank_time
= line_time
- wm
.active_time
;
849 wm
.interlaced
= false;
850 if (mode
->flags
& DRM_MODE_FLAG_INTERLACE
)
851 wm
.interlaced
= true;
852 wm
.vsc
= radeon_crtc
->vsc
;
854 if (radeon_crtc
->rmx_type
!= RMX_OFF
)
856 wm
.bytes_per_pixel
= 4; /* XXX: get this from fb config */
857 wm
.lb_size
= lb_size
;
858 if (rdev
->family
== CHIP_ARUBA
)
859 wm
.dram_channels
= evergreen_get_number_of_dram_channels(rdev
);
861 wm
.dram_channels
= si_get_number_of_dram_channels(rdev
);
862 wm
.num_heads
= num_heads
;
864 /* set for high clocks */
865 latency_watermark_a
= min(dce6_latency_watermark(&wm
), (u32
)65535);
866 /* set for low clocks */
867 /* wm.yclk = low clk; wm.sclk = low clk */
868 latency_watermark_b
= min(dce6_latency_watermark(&wm
), (u32
)65535);
870 /* possibly force display priority to high */
871 /* should really do this at mode validation time... */
872 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm
) ||
873 !dce6_average_bandwidth_vs_available_bandwidth(&wm
) ||
874 !dce6_check_latency_hiding(&wm
) ||
875 (rdev
->disp_priority
== 2)) {
876 DRM_DEBUG_KMS("force priority to high\n");
877 priority_a_cnt
|= PRIORITY_ALWAYS_ON
;
878 priority_b_cnt
|= PRIORITY_ALWAYS_ON
;
881 a
.full
= dfixed_const(1000);
882 b
.full
= dfixed_const(mode
->clock
);
883 b
.full
= dfixed_div(b
, a
);
884 c
.full
= dfixed_const(latency_watermark_a
);
885 c
.full
= dfixed_mul(c
, b
);
886 c
.full
= dfixed_mul(c
, radeon_crtc
->hsc
);
887 c
.full
= dfixed_div(c
, a
);
888 a
.full
= dfixed_const(16);
889 c
.full
= dfixed_div(c
, a
);
890 priority_a_mark
= dfixed_trunc(c
);
891 priority_a_cnt
|= priority_a_mark
& PRIORITY_MARK_MASK
;
893 a
.full
= dfixed_const(1000);
894 b
.full
= dfixed_const(mode
->clock
);
895 b
.full
= dfixed_div(b
, a
);
896 c
.full
= dfixed_const(latency_watermark_b
);
897 c
.full
= dfixed_mul(c
, b
);
898 c
.full
= dfixed_mul(c
, radeon_crtc
->hsc
);
899 c
.full
= dfixed_div(c
, a
);
900 a
.full
= dfixed_const(16);
901 c
.full
= dfixed_div(c
, a
);
902 priority_b_mark
= dfixed_trunc(c
);
903 priority_b_cnt
|= priority_b_mark
& PRIORITY_MARK_MASK
;
907 arb_control3
= RREG32(DPG_PIPE_ARBITRATION_CONTROL3
+ radeon_crtc
->crtc_offset
);
909 tmp
&= ~LATENCY_WATERMARK_MASK(3);
910 tmp
|= LATENCY_WATERMARK_MASK(1);
911 WREG32(DPG_PIPE_ARBITRATION_CONTROL3
+ radeon_crtc
->crtc_offset
, tmp
);
912 WREG32(DPG_PIPE_LATENCY_CONTROL
+ radeon_crtc
->crtc_offset
,
913 (LATENCY_LOW_WATERMARK(latency_watermark_a
) |
914 LATENCY_HIGH_WATERMARK(line_time
)));
916 tmp
= RREG32(DPG_PIPE_ARBITRATION_CONTROL3
+ radeon_crtc
->crtc_offset
);
917 tmp
&= ~LATENCY_WATERMARK_MASK(3);
918 tmp
|= LATENCY_WATERMARK_MASK(2);
919 WREG32(DPG_PIPE_ARBITRATION_CONTROL3
+ radeon_crtc
->crtc_offset
, tmp
);
920 WREG32(DPG_PIPE_LATENCY_CONTROL
+ radeon_crtc
->crtc_offset
,
921 (LATENCY_LOW_WATERMARK(latency_watermark_b
) |
922 LATENCY_HIGH_WATERMARK(line_time
)));
923 /* restore original selection */
924 WREG32(DPG_PIPE_ARBITRATION_CONTROL3
+ radeon_crtc
->crtc_offset
, arb_control3
);
926 /* write the priority marks */
927 WREG32(PRIORITY_A_CNT
+ radeon_crtc
->crtc_offset
, priority_a_cnt
);
928 WREG32(PRIORITY_B_CNT
+ radeon_crtc
->crtc_offset
, priority_b_cnt
);
932 void dce6_bandwidth_update(struct radeon_device
*rdev
)
934 struct drm_display_mode
*mode0
= NULL
;
935 struct drm_display_mode
*mode1
= NULL
;
936 u32 num_heads
= 0, lb_size
;
939 radeon_update_display_priority(rdev
);
941 for (i
= 0; i
< rdev
->num_crtc
; i
++) {
942 if (rdev
->mode_info
.crtcs
[i
]->base
.enabled
)
945 for (i
= 0; i
< rdev
->num_crtc
; i
+= 2) {
946 mode0
= &rdev
->mode_info
.crtcs
[i
]->base
.mode
;
947 mode1
= &rdev
->mode_info
.crtcs
[i
+1]->base
.mode
;
948 lb_size
= dce6_line_buffer_adjust(rdev
, rdev
->mode_info
.crtcs
[i
], mode0
, mode1
);
949 dce6_program_watermarks(rdev
, rdev
->mode_info
.crtcs
[i
], lb_size
, num_heads
);
950 lb_size
= dce6_line_buffer_adjust(rdev
, rdev
->mode_info
.crtcs
[i
+1], mode1
, mode0
);
951 dce6_program_watermarks(rdev
, rdev
->mode_info
.crtcs
[i
+1], lb_size
, num_heads
);
958 static void si_tiling_mode_table_init(struct radeon_device
*rdev
)
960 const u32 num_tile_mode_states
= 32;
961 u32 reg_offset
, gb_tile_moden
, split_equal_to_row_size
;
963 switch (rdev
->config
.si
.mem_row_size_in_kb
) {
965 split_equal_to_row_size
= ADDR_SURF_TILE_SPLIT_1KB
;
969 split_equal_to_row_size
= ADDR_SURF_TILE_SPLIT_2KB
;
972 split_equal_to_row_size
= ADDR_SURF_TILE_SPLIT_4KB
;
976 if ((rdev
->family
== CHIP_TAHITI
) ||
977 (rdev
->family
== CHIP_PITCAIRN
)) {
978 for (reg_offset
= 0; reg_offset
< num_tile_mode_states
; reg_offset
++) {
979 switch (reg_offset
) {
980 case 0: /* non-AA compressed depth or any compressed stencil */
981 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
982 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING
) |
983 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
984 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
) |
985 NUM_BANKS(ADDR_SURF_16_BANK
) |
986 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
987 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
988 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
));
990 case 1: /* 2xAA/4xAA compressed depth only */
991 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
992 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING
) |
993 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
994 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B
) |
995 NUM_BANKS(ADDR_SURF_16_BANK
) |
996 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
997 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
998 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
));
1000 case 2: /* 8xAA compressed depth only */
1001 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1002 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING
) |
1003 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
1004 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
) |
1005 NUM_BANKS(ADDR_SURF_16_BANK
) |
1006 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1007 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1008 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
));
1010 case 3: /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
1011 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1012 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING
) |
1013 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
1014 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B
) |
1015 NUM_BANKS(ADDR_SURF_16_BANK
) |
1016 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1017 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1018 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
));
1020 case 4: /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
1021 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1022 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING
) |
1023 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
1024 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
) |
1025 NUM_BANKS(ADDR_SURF_16_BANK
) |
1026 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1027 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1028 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
));
1030 case 5: /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
1031 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1032 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING
) |
1033 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
1034 TILE_SPLIT(split_equal_to_row_size
) |
1035 NUM_BANKS(ADDR_SURF_16_BANK
) |
1036 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1037 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1038 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
));
1040 case 6: /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
1041 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1042 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING
) |
1043 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
1044 TILE_SPLIT(split_equal_to_row_size
) |
1045 NUM_BANKS(ADDR_SURF_16_BANK
) |
1046 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1047 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1048 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
));
1050 case 7: /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
1051 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1052 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING
) |
1053 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
1054 TILE_SPLIT(split_equal_to_row_size
) |
1055 NUM_BANKS(ADDR_SURF_16_BANK
) |
1056 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1057 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1058 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
));
1060 case 8: /* 1D and 1D Array Surfaces */
1061 gb_tile_moden
= (ARRAY_MODE(ARRAY_LINEAR_ALIGNED
) |
1062 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1063 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
1064 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
) |
1065 NUM_BANKS(ADDR_SURF_16_BANK
) |
1066 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1067 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1068 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
));
1070 case 9: /* Displayable maps. */
1071 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1072 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1073 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
1074 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
) |
1075 NUM_BANKS(ADDR_SURF_16_BANK
) |
1076 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1077 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1078 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
));
1080 case 10: /* Display 8bpp. */
1081 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1082 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1083 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
1084 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
) |
1085 NUM_BANKS(ADDR_SURF_16_BANK
) |
1086 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1087 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1088 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
));
1090 case 11: /* Display 16bpp. */
1091 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1092 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1093 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
1094 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
) |
1095 NUM_BANKS(ADDR_SURF_16_BANK
) |
1096 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1097 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1098 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
));
1100 case 12: /* Display 32bpp. */
1101 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1102 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1103 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
1104 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B
) |
1105 NUM_BANKS(ADDR_SURF_16_BANK
) |
1106 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1107 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1108 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
));
1110 case 13: /* Thin. */
1111 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1112 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING
) |
1113 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
1114 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
) |
1115 NUM_BANKS(ADDR_SURF_16_BANK
) |
1116 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1117 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1118 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
));
1120 case 14: /* Thin 8 bpp. */
1121 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1122 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING
) |
1123 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
1124 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
) |
1125 NUM_BANKS(ADDR_SURF_16_BANK
) |
1126 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1127 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1128 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
));
1130 case 15: /* Thin 16 bpp. */
1131 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1132 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING
) |
1133 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
1134 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
) |
1135 NUM_BANKS(ADDR_SURF_16_BANK
) |
1136 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1137 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1138 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
));
1140 case 16: /* Thin 32 bpp. */
1141 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1142 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING
) |
1143 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
1144 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B
) |
1145 NUM_BANKS(ADDR_SURF_16_BANK
) |
1146 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1147 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1148 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
));
1150 case 17: /* Thin 64 bpp. */
1151 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1152 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING
) |
1153 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
1154 TILE_SPLIT(split_equal_to_row_size
) |
1155 NUM_BANKS(ADDR_SURF_16_BANK
) |
1156 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1157 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1158 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
));
1160 case 21: /* 8 bpp PRT. */
1161 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1162 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING
) |
1163 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
1164 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
) |
1165 NUM_BANKS(ADDR_SURF_16_BANK
) |
1166 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
1167 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1168 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
));
1170 case 22: /* 16 bpp PRT */
1171 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1172 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING
) |
1173 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
1174 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
) |
1175 NUM_BANKS(ADDR_SURF_16_BANK
) |
1176 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1177 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1178 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
));
1180 case 23: /* 32 bpp PRT */
1181 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1182 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING
) |
1183 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
1184 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
) |
1185 NUM_BANKS(ADDR_SURF_16_BANK
) |
1186 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1187 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1188 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
));
1190 case 24: /* 64 bpp PRT */
1191 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1192 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING
) |
1193 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
1194 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B
) |
1195 NUM_BANKS(ADDR_SURF_16_BANK
) |
1196 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1197 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1198 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
));
1200 case 25: /* 128 bpp PRT */
1201 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1202 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING
) |
1203 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
1204 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB
) |
1205 NUM_BANKS(ADDR_SURF_8_BANK
) |
1206 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1207 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1208 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
));
1214 WREG32(GB_TILE_MODE0
+ (reg_offset
* 4), gb_tile_moden
);
1216 } else if ((rdev
->family
== CHIP_VERDE
) ||
1217 (rdev
->family
== CHIP_OLAND
)) {
1218 for (reg_offset
= 0; reg_offset
< num_tile_mode_states
; reg_offset
++) {
1219 switch (reg_offset
) {
1220 case 0: /* non-AA compressed depth or any compressed stencil */
1221 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1222 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING
) |
1223 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1224 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
) |
1225 NUM_BANKS(ADDR_SURF_16_BANK
) |
1226 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1227 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1228 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
));
1230 case 1: /* 2xAA/4xAA compressed depth only */
1231 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1232 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING
) |
1233 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1234 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B
) |
1235 NUM_BANKS(ADDR_SURF_16_BANK
) |
1236 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1237 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1238 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
));
1240 case 2: /* 8xAA compressed depth only */
1241 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1242 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING
) |
1243 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1244 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
) |
1245 NUM_BANKS(ADDR_SURF_16_BANK
) |
1246 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1247 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1248 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
));
1250 case 3: /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
1251 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1252 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING
) |
1253 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1254 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B
) |
1255 NUM_BANKS(ADDR_SURF_16_BANK
) |
1256 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1257 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1258 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
));
1260 case 4: /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
1261 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1262 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING
) |
1263 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1264 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
) |
1265 NUM_BANKS(ADDR_SURF_16_BANK
) |
1266 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1267 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1268 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
));
1270 case 5: /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
1271 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1272 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING
) |
1273 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1274 TILE_SPLIT(split_equal_to_row_size
) |
1275 NUM_BANKS(ADDR_SURF_16_BANK
) |
1276 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1277 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1278 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
));
1280 case 6: /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
1281 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1282 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING
) |
1283 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1284 TILE_SPLIT(split_equal_to_row_size
) |
1285 NUM_BANKS(ADDR_SURF_16_BANK
) |
1286 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1287 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1288 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
));
1290 case 7: /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
1291 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1292 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING
) |
1293 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1294 TILE_SPLIT(split_equal_to_row_size
) |
1295 NUM_BANKS(ADDR_SURF_16_BANK
) |
1296 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1297 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1298 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
));
1300 case 8: /* 1D and 1D Array Surfaces */
1301 gb_tile_moden
= (ARRAY_MODE(ARRAY_LINEAR_ALIGNED
) |
1302 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1303 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1304 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
) |
1305 NUM_BANKS(ADDR_SURF_16_BANK
) |
1306 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1307 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1308 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
));
1310 case 9: /* Displayable maps. */
1311 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1312 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1313 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1314 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
) |
1315 NUM_BANKS(ADDR_SURF_16_BANK
) |
1316 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1317 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1318 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
));
1320 case 10: /* Display 8bpp. */
1321 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1322 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1323 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1324 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
) |
1325 NUM_BANKS(ADDR_SURF_16_BANK
) |
1326 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1327 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1328 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
));
1330 case 11: /* Display 16bpp. */
1331 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1332 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1333 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1334 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
) |
1335 NUM_BANKS(ADDR_SURF_16_BANK
) |
1336 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1337 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1338 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
));
1340 case 12: /* Display 32bpp. */
1341 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1342 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1343 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1344 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B
) |
1345 NUM_BANKS(ADDR_SURF_16_BANK
) |
1346 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1347 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1348 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
));
1350 case 13: /* Thin. */
1351 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1352 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING
) |
1353 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1354 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
) |
1355 NUM_BANKS(ADDR_SURF_16_BANK
) |
1356 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1357 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1358 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
));
1360 case 14: /* Thin 8 bpp. */
1361 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1362 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING
) |
1363 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1364 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
) |
1365 NUM_BANKS(ADDR_SURF_16_BANK
) |
1366 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1367 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1368 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
));
1370 case 15: /* Thin 16 bpp. */
1371 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1372 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING
) |
1373 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1374 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
) |
1375 NUM_BANKS(ADDR_SURF_16_BANK
) |
1376 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1377 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1378 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
));
1380 case 16: /* Thin 32 bpp. */
1381 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1382 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING
) |
1383 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1384 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B
) |
1385 NUM_BANKS(ADDR_SURF_16_BANK
) |
1386 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1387 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1388 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
));
1390 case 17: /* Thin 64 bpp. */
1391 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1392 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING
) |
1393 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
1394 TILE_SPLIT(split_equal_to_row_size
) |
1395 NUM_BANKS(ADDR_SURF_16_BANK
) |
1396 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1397 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1398 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
));
1400 case 21: /* 8 bpp PRT. */
1401 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1402 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING
) |
1403 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
1404 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
) |
1405 NUM_BANKS(ADDR_SURF_16_BANK
) |
1406 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
1407 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1408 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
));
1410 case 22: /* 16 bpp PRT */
1411 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1412 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING
) |
1413 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
1414 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
) |
1415 NUM_BANKS(ADDR_SURF_16_BANK
) |
1416 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1417 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1418 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
));
1420 case 23: /* 32 bpp PRT */
1421 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1422 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING
) |
1423 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
1424 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
) |
1425 NUM_BANKS(ADDR_SURF_16_BANK
) |
1426 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1427 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1428 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
));
1430 case 24: /* 64 bpp PRT */
1431 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1432 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING
) |
1433 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
1434 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B
) |
1435 NUM_BANKS(ADDR_SURF_16_BANK
) |
1436 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1437 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1438 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
));
1440 case 25: /* 128 bpp PRT */
1441 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1442 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING
) |
1443 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
1444 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB
) |
1445 NUM_BANKS(ADDR_SURF_8_BANK
) |
1446 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1447 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1448 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
));
1454 WREG32(GB_TILE_MODE0
+ (reg_offset
* 4), gb_tile_moden
);
1457 DRM_ERROR("unknown asic: 0x%x\n", rdev
->family
);
1460 static void si_select_se_sh(struct radeon_device
*rdev
,
1461 u32 se_num
, u32 sh_num
)
1463 u32 data
= INSTANCE_BROADCAST_WRITES
;
1465 if ((se_num
== 0xffffffff) && (sh_num
== 0xffffffff))
1466 data
= SH_BROADCAST_WRITES
| SE_BROADCAST_WRITES
;
1467 else if (se_num
== 0xffffffff)
1468 data
|= SE_BROADCAST_WRITES
| SH_INDEX(sh_num
);
1469 else if (sh_num
== 0xffffffff)
1470 data
|= SH_BROADCAST_WRITES
| SE_INDEX(se_num
);
1472 data
|= SH_INDEX(sh_num
) | SE_INDEX(se_num
);
1473 WREG32(GRBM_GFX_INDEX
, data
);
1476 static u32
si_create_bitmask(u32 bit_width
)
1480 for (i
= 0; i
< bit_width
; i
++) {
1487 static u32
si_get_cu_enabled(struct radeon_device
*rdev
, u32 cu_per_sh
)
1491 data
= RREG32(CC_GC_SHADER_ARRAY_CONFIG
);
1493 data
&= INACTIVE_CUS_MASK
;
1496 data
|= RREG32(GC_USER_SHADER_ARRAY_CONFIG
);
1498 data
>>= INACTIVE_CUS_SHIFT
;
1500 mask
= si_create_bitmask(cu_per_sh
);
1502 return ~data
& mask
;
1505 static void si_setup_spi(struct radeon_device
*rdev
,
1506 u32 se_num
, u32 sh_per_se
,
1510 u32 data
, mask
, active_cu
;
1512 for (i
= 0; i
< se_num
; i
++) {
1513 for (j
= 0; j
< sh_per_se
; j
++) {
1514 si_select_se_sh(rdev
, i
, j
);
1515 data
= RREG32(SPI_STATIC_THREAD_MGMT_3
);
1516 active_cu
= si_get_cu_enabled(rdev
, cu_per_sh
);
1519 for (k
= 0; k
< 16; k
++) {
1521 if (active_cu
& mask
) {
1523 WREG32(SPI_STATIC_THREAD_MGMT_3
, data
);
1529 si_select_se_sh(rdev
, 0xffffffff, 0xffffffff);
1532 static u32
si_get_rb_disabled(struct radeon_device
*rdev
,
1533 u32 max_rb_num
, u32 se_num
,
1538 data
= RREG32(CC_RB_BACKEND_DISABLE
);
1540 data
&= BACKEND_DISABLE_MASK
;
1543 data
|= RREG32(GC_USER_RB_BACKEND_DISABLE
);
1545 data
>>= BACKEND_DISABLE_SHIFT
;
1547 mask
= si_create_bitmask(max_rb_num
/ se_num
/ sh_per_se
);
1552 static void si_setup_rb(struct radeon_device
*rdev
,
1553 u32 se_num
, u32 sh_per_se
,
1558 u32 disabled_rbs
= 0;
1559 u32 enabled_rbs
= 0;
1561 for (i
= 0; i
< se_num
; i
++) {
1562 for (j
= 0; j
< sh_per_se
; j
++) {
1563 si_select_se_sh(rdev
, i
, j
);
1564 data
= si_get_rb_disabled(rdev
, max_rb_num
, se_num
, sh_per_se
);
1565 disabled_rbs
|= data
<< ((i
* sh_per_se
+ j
) * TAHITI_RB_BITMAP_WIDTH_PER_SH
);
1568 si_select_se_sh(rdev
, 0xffffffff, 0xffffffff);
1571 for (i
= 0; i
< max_rb_num
; i
++) {
1572 if (!(disabled_rbs
& mask
))
1573 enabled_rbs
|= mask
;
1577 for (i
= 0; i
< se_num
; i
++) {
1578 si_select_se_sh(rdev
, i
, 0xffffffff);
1580 for (j
= 0; j
< sh_per_se
; j
++) {
1581 switch (enabled_rbs
& 3) {
1583 data
|= (RASTER_CONFIG_RB_MAP_0
<< (i
* sh_per_se
+ j
) * 2);
1586 data
|= (RASTER_CONFIG_RB_MAP_3
<< (i
* sh_per_se
+ j
) * 2);
1590 data
|= (RASTER_CONFIG_RB_MAP_2
<< (i
* sh_per_se
+ j
) * 2);
1595 WREG32(PA_SC_RASTER_CONFIG
, data
);
1597 si_select_se_sh(rdev
, 0xffffffff, 0xffffffff);
1600 static void si_gpu_init(struct radeon_device
*rdev
)
1602 u32 gb_addr_config
= 0;
1603 u32 mc_shared_chmap
, mc_arb_ramcfg
;
1605 u32 hdp_host_path_cntl
;
1609 switch (rdev
->family
) {
1611 rdev
->config
.si
.max_shader_engines
= 2;
1612 rdev
->config
.si
.max_tile_pipes
= 12;
1613 rdev
->config
.si
.max_cu_per_sh
= 8;
1614 rdev
->config
.si
.max_sh_per_se
= 2;
1615 rdev
->config
.si
.max_backends_per_se
= 4;
1616 rdev
->config
.si
.max_texture_channel_caches
= 12;
1617 rdev
->config
.si
.max_gprs
= 256;
1618 rdev
->config
.si
.max_gs_threads
= 32;
1619 rdev
->config
.si
.max_hw_contexts
= 8;
1621 rdev
->config
.si
.sc_prim_fifo_size_frontend
= 0x20;
1622 rdev
->config
.si
.sc_prim_fifo_size_backend
= 0x100;
1623 rdev
->config
.si
.sc_hiz_tile_fifo_size
= 0x30;
1624 rdev
->config
.si
.sc_earlyz_tile_fifo_size
= 0x130;
1625 gb_addr_config
= TAHITI_GB_ADDR_CONFIG_GOLDEN
;
1628 rdev
->config
.si
.max_shader_engines
= 2;
1629 rdev
->config
.si
.max_tile_pipes
= 8;
1630 rdev
->config
.si
.max_cu_per_sh
= 5;
1631 rdev
->config
.si
.max_sh_per_se
= 2;
1632 rdev
->config
.si
.max_backends_per_se
= 4;
1633 rdev
->config
.si
.max_texture_channel_caches
= 8;
1634 rdev
->config
.si
.max_gprs
= 256;
1635 rdev
->config
.si
.max_gs_threads
= 32;
1636 rdev
->config
.si
.max_hw_contexts
= 8;
1638 rdev
->config
.si
.sc_prim_fifo_size_frontend
= 0x20;
1639 rdev
->config
.si
.sc_prim_fifo_size_backend
= 0x100;
1640 rdev
->config
.si
.sc_hiz_tile_fifo_size
= 0x30;
1641 rdev
->config
.si
.sc_earlyz_tile_fifo_size
= 0x130;
1642 gb_addr_config
= TAHITI_GB_ADDR_CONFIG_GOLDEN
;
1646 rdev
->config
.si
.max_shader_engines
= 1;
1647 rdev
->config
.si
.max_tile_pipes
= 4;
1648 rdev
->config
.si
.max_cu_per_sh
= 2;
1649 rdev
->config
.si
.max_sh_per_se
= 2;
1650 rdev
->config
.si
.max_backends_per_se
= 4;
1651 rdev
->config
.si
.max_texture_channel_caches
= 4;
1652 rdev
->config
.si
.max_gprs
= 256;
1653 rdev
->config
.si
.max_gs_threads
= 32;
1654 rdev
->config
.si
.max_hw_contexts
= 8;
1656 rdev
->config
.si
.sc_prim_fifo_size_frontend
= 0x20;
1657 rdev
->config
.si
.sc_prim_fifo_size_backend
= 0x40;
1658 rdev
->config
.si
.sc_hiz_tile_fifo_size
= 0x30;
1659 rdev
->config
.si
.sc_earlyz_tile_fifo_size
= 0x130;
1660 gb_addr_config
= VERDE_GB_ADDR_CONFIG_GOLDEN
;
1663 rdev
->config
.si
.max_shader_engines
= 1;
1664 rdev
->config
.si
.max_tile_pipes
= 4;
1665 rdev
->config
.si
.max_cu_per_sh
= 6;
1666 rdev
->config
.si
.max_sh_per_se
= 1;
1667 rdev
->config
.si
.max_backends_per_se
= 2;
1668 rdev
->config
.si
.max_texture_channel_caches
= 4;
1669 rdev
->config
.si
.max_gprs
= 256;
1670 rdev
->config
.si
.max_gs_threads
= 16;
1671 rdev
->config
.si
.max_hw_contexts
= 8;
1673 rdev
->config
.si
.sc_prim_fifo_size_frontend
= 0x20;
1674 rdev
->config
.si
.sc_prim_fifo_size_backend
= 0x40;
1675 rdev
->config
.si
.sc_hiz_tile_fifo_size
= 0x30;
1676 rdev
->config
.si
.sc_earlyz_tile_fifo_size
= 0x130;
1677 gb_addr_config
= VERDE_GB_ADDR_CONFIG_GOLDEN
;
1681 /* Initialize HDP */
1682 for (i
= 0, j
= 0; i
< 32; i
++, j
+= 0x18) {
1683 WREG32((0x2c14 + j
), 0x00000000);
1684 WREG32((0x2c18 + j
), 0x00000000);
1685 WREG32((0x2c1c + j
), 0x00000000);
1686 WREG32((0x2c20 + j
), 0x00000000);
1687 WREG32((0x2c24 + j
), 0x00000000);
1690 WREG32(GRBM_CNTL
, GRBM_READ_TIMEOUT(0xff));
1692 evergreen_fix_pci_max_read_req_size(rdev
);
1694 WREG32(BIF_FB_EN
, FB_READ_EN
| FB_WRITE_EN
);
1696 mc_shared_chmap
= RREG32(MC_SHARED_CHMAP
);
1697 mc_arb_ramcfg
= RREG32(MC_ARB_RAMCFG
);
1699 rdev
->config
.si
.num_tile_pipes
= rdev
->config
.si
.max_tile_pipes
;
1700 rdev
->config
.si
.mem_max_burst_length_bytes
= 256;
1701 tmp
= (mc_arb_ramcfg
& NOOFCOLS_MASK
) >> NOOFCOLS_SHIFT
;
1702 rdev
->config
.si
.mem_row_size_in_kb
= (4 * (1 << (8 + tmp
))) / 1024;
1703 if (rdev
->config
.si
.mem_row_size_in_kb
> 4)
1704 rdev
->config
.si
.mem_row_size_in_kb
= 4;
1705 /* XXX use MC settings? */
1706 rdev
->config
.si
.shader_engine_tile_size
= 32;
1707 rdev
->config
.si
.num_gpus
= 1;
1708 rdev
->config
.si
.multi_gpu_tile_size
= 64;
1710 /* fix up row size */
1711 gb_addr_config
&= ~ROW_SIZE_MASK
;
1712 switch (rdev
->config
.si
.mem_row_size_in_kb
) {
1715 gb_addr_config
|= ROW_SIZE(0);
1718 gb_addr_config
|= ROW_SIZE(1);
1721 gb_addr_config
|= ROW_SIZE(2);
1725 /* setup tiling info dword. gb_addr_config is not adequate since it does
1726 * not have bank info, so create a custom tiling dword.
1727 * bits 3:0 num_pipes
1728 * bits 7:4 num_banks
1729 * bits 11:8 group_size
1730 * bits 15:12 row_size
1732 rdev
->config
.si
.tile_config
= 0;
1733 switch (rdev
->config
.si
.num_tile_pipes
) {
1735 rdev
->config
.si
.tile_config
|= (0 << 0);
1738 rdev
->config
.si
.tile_config
|= (1 << 0);
1741 rdev
->config
.si
.tile_config
|= (2 << 0);
1745 /* XXX what about 12? */
1746 rdev
->config
.si
.tile_config
|= (3 << 0);
1749 switch ((mc_arb_ramcfg
& NOOFBANK_MASK
) >> NOOFBANK_SHIFT
) {
1750 case 0: /* four banks */
1751 rdev
->config
.si
.tile_config
|= 0 << 4;
1753 case 1: /* eight banks */
1754 rdev
->config
.si
.tile_config
|= 1 << 4;
1756 case 2: /* sixteen banks */
1758 rdev
->config
.si
.tile_config
|= 2 << 4;
1761 rdev
->config
.si
.tile_config
|=
1762 ((gb_addr_config
& PIPE_INTERLEAVE_SIZE_MASK
) >> PIPE_INTERLEAVE_SIZE_SHIFT
) << 8;
1763 rdev
->config
.si
.tile_config
|=
1764 ((gb_addr_config
& ROW_SIZE_MASK
) >> ROW_SIZE_SHIFT
) << 12;
1766 WREG32(GB_ADDR_CONFIG
, gb_addr_config
);
1767 WREG32(DMIF_ADDR_CONFIG
, gb_addr_config
);
1768 WREG32(HDP_ADDR_CONFIG
, gb_addr_config
);
1769 WREG32(DMA_TILING_CONFIG
+ DMA0_REGISTER_OFFSET
, gb_addr_config
);
1770 WREG32(DMA_TILING_CONFIG
+ DMA1_REGISTER_OFFSET
, gb_addr_config
);
1772 si_tiling_mode_table_init(rdev
);
1774 si_setup_rb(rdev
, rdev
->config
.si
.max_shader_engines
,
1775 rdev
->config
.si
.max_sh_per_se
,
1776 rdev
->config
.si
.max_backends_per_se
);
1778 si_setup_spi(rdev
, rdev
->config
.si
.max_shader_engines
,
1779 rdev
->config
.si
.max_sh_per_se
,
1780 rdev
->config
.si
.max_cu_per_sh
);
1783 /* set HW defaults for 3D engine */
1784 WREG32(CP_QUEUE_THRESHOLDS
, (ROQ_IB1_START(0x16) |
1785 ROQ_IB2_START(0x2b)));
1786 WREG32(CP_MEQ_THRESHOLDS
, MEQ1_START(0x30) | MEQ2_START(0x60));
1788 sx_debug_1
= RREG32(SX_DEBUG_1
);
1789 WREG32(SX_DEBUG_1
, sx_debug_1
);
1791 WREG32(SPI_CONFIG_CNTL_1
, VTX_DONE_DELAY(4));
1793 WREG32(PA_SC_FIFO_SIZE
, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev
->config
.si
.sc_prim_fifo_size_frontend
) |
1794 SC_BACKEND_PRIM_FIFO_SIZE(rdev
->config
.si
.sc_prim_fifo_size_backend
) |
1795 SC_HIZ_TILE_FIFO_SIZE(rdev
->config
.si
.sc_hiz_tile_fifo_size
) |
1796 SC_EARLYZ_TILE_FIFO_SIZE(rdev
->config
.si
.sc_earlyz_tile_fifo_size
)));
1798 WREG32(VGT_NUM_INSTANCES
, 1);
1800 WREG32(CP_PERFMON_CNTL
, 0);
1802 WREG32(SQ_CONFIG
, 0);
1804 WREG32(PA_SC_FORCE_EOV_MAX_CNTS
, (FORCE_EOV_MAX_CLK_CNT(4095) |
1805 FORCE_EOV_MAX_REZ_CNT(255)));
1807 WREG32(VGT_CACHE_INVALIDATION
, CACHE_INVALIDATION(VC_AND_TC
) |
1808 AUTO_INVLD_EN(ES_AND_GS_AUTO
));
1810 WREG32(VGT_GS_VERTEX_REUSE
, 16);
1811 WREG32(PA_SC_LINE_STIPPLE_STATE
, 0);
1813 WREG32(CB_PERFCOUNTER0_SELECT0
, 0);
1814 WREG32(CB_PERFCOUNTER0_SELECT1
, 0);
1815 WREG32(CB_PERFCOUNTER1_SELECT0
, 0);
1816 WREG32(CB_PERFCOUNTER1_SELECT1
, 0);
1817 WREG32(CB_PERFCOUNTER2_SELECT0
, 0);
1818 WREG32(CB_PERFCOUNTER2_SELECT1
, 0);
1819 WREG32(CB_PERFCOUNTER3_SELECT0
, 0);
1820 WREG32(CB_PERFCOUNTER3_SELECT1
, 0);
1822 tmp
= RREG32(HDP_MISC_CNTL
);
1823 tmp
|= HDP_FLUSH_INVALIDATE_CACHE
;
1824 WREG32(HDP_MISC_CNTL
, tmp
);
1826 hdp_host_path_cntl
= RREG32(HDP_HOST_PATH_CNTL
);
1827 WREG32(HDP_HOST_PATH_CNTL
, hdp_host_path_cntl
);
1829 WREG32(PA_CL_ENHANCE
, CLIP_VTX_REORDER_ENA
| NUM_CLIP_SEQ(3));
1835 * GPU scratch registers helpers function.
1837 static void si_scratch_init(struct radeon_device
*rdev
)
1841 rdev
->scratch
.num_reg
= 7;
1842 rdev
->scratch
.reg_base
= SCRATCH_REG0
;
1843 for (i
= 0; i
< rdev
->scratch
.num_reg
; i
++) {
1844 rdev
->scratch
.free
[i
] = true;
1845 rdev
->scratch
.reg
[i
] = rdev
->scratch
.reg_base
+ (i
* 4);
1849 void si_fence_ring_emit(struct radeon_device
*rdev
,
1850 struct radeon_fence
*fence
)
1852 struct radeon_ring
*ring
= &rdev
->ring
[fence
->ring
];
1853 u64 addr
= rdev
->fence_drv
[fence
->ring
].gpu_addr
;
1855 /* flush read cache over gart */
1856 radeon_ring_write(ring
, PACKET3(PACKET3_SET_CONFIG_REG
, 1));
1857 radeon_ring_write(ring
, (CP_COHER_CNTL2
- PACKET3_SET_CONFIG_REG_START
) >> 2);
1858 radeon_ring_write(ring
, 0);
1859 radeon_ring_write(ring
, PACKET3(PACKET3_SURFACE_SYNC
, 3));
1860 radeon_ring_write(ring
, PACKET3_TCL1_ACTION_ENA
|
1861 PACKET3_TC_ACTION_ENA
|
1862 PACKET3_SH_KCACHE_ACTION_ENA
|
1863 PACKET3_SH_ICACHE_ACTION_ENA
);
1864 radeon_ring_write(ring
, 0xFFFFFFFF);
1865 radeon_ring_write(ring
, 0);
1866 radeon_ring_write(ring
, 10); /* poll interval */
1867 /* EVENT_WRITE_EOP - flush caches, send int */
1868 radeon_ring_write(ring
, PACKET3(PACKET3_EVENT_WRITE_EOP
, 4));
1869 radeon_ring_write(ring
, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT
) | EVENT_INDEX(5));
1870 radeon_ring_write(ring
, addr
& 0xffffffff);
1871 radeon_ring_write(ring
, (upper_32_bits(addr
) & 0xff) | DATA_SEL(1) | INT_SEL(2));
1872 radeon_ring_write(ring
, fence
->seq
);
1873 radeon_ring_write(ring
, 0);
1879 void si_ring_ib_execute(struct radeon_device
*rdev
, struct radeon_ib
*ib
)
1881 struct radeon_ring
*ring
= &rdev
->ring
[ib
->ring
];
1884 if (ib
->is_const_ib
) {
1885 /* set switch buffer packet before const IB */
1886 radeon_ring_write(ring
, PACKET3(PACKET3_SWITCH_BUFFER
, 0));
1887 radeon_ring_write(ring
, 0);
1889 header
= PACKET3(PACKET3_INDIRECT_BUFFER_CONST
, 2);
1892 if (ring
->rptr_save_reg
) {
1893 next_rptr
= ring
->wptr
+ 3 + 4 + 8;
1894 radeon_ring_write(ring
, PACKET3(PACKET3_SET_CONFIG_REG
, 1));
1895 radeon_ring_write(ring
, ((ring
->rptr_save_reg
-
1896 PACKET3_SET_CONFIG_REG_START
) >> 2));
1897 radeon_ring_write(ring
, next_rptr
);
1898 } else if (rdev
->wb
.enabled
) {
1899 next_rptr
= ring
->wptr
+ 5 + 4 + 8;
1900 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
1901 radeon_ring_write(ring
, (1 << 8));
1902 radeon_ring_write(ring
, ring
->next_rptr_gpu_addr
& 0xfffffffc);
1903 radeon_ring_write(ring
, upper_32_bits(ring
->next_rptr_gpu_addr
) & 0xffffffff);
1904 radeon_ring_write(ring
, next_rptr
);
1907 header
= PACKET3(PACKET3_INDIRECT_BUFFER
, 2);
1910 radeon_ring_write(ring
, header
);
1911 radeon_ring_write(ring
,
1915 (ib
->gpu_addr
& 0xFFFFFFFC));
1916 radeon_ring_write(ring
, upper_32_bits(ib
->gpu_addr
) & 0xFFFF);
1917 radeon_ring_write(ring
, ib
->length_dw
|
1918 (ib
->vm
? (ib
->vm
->id
<< 24) : 0));
1920 if (!ib
->is_const_ib
) {
1921 /* flush read cache over gart for this vmid */
1922 radeon_ring_write(ring
, PACKET3(PACKET3_SET_CONFIG_REG
, 1));
1923 radeon_ring_write(ring
, (CP_COHER_CNTL2
- PACKET3_SET_CONFIG_REG_START
) >> 2);
1924 radeon_ring_write(ring
, ib
->vm
? ib
->vm
->id
: 0);
1925 radeon_ring_write(ring
, PACKET3(PACKET3_SURFACE_SYNC
, 3));
1926 radeon_ring_write(ring
, PACKET3_TCL1_ACTION_ENA
|
1927 PACKET3_TC_ACTION_ENA
|
1928 PACKET3_SH_KCACHE_ACTION_ENA
|
1929 PACKET3_SH_ICACHE_ACTION_ENA
);
1930 radeon_ring_write(ring
, 0xFFFFFFFF);
1931 radeon_ring_write(ring
, 0);
1932 radeon_ring_write(ring
, 10); /* poll interval */
1939 static void si_cp_enable(struct radeon_device
*rdev
, bool enable
)
1942 WREG32(CP_ME_CNTL
, 0);
1944 radeon_ttm_set_active_vram_size(rdev
, rdev
->mc
.visible_vram_size
);
1945 WREG32(CP_ME_CNTL
, (CP_ME_HALT
| CP_PFP_HALT
| CP_CE_HALT
));
1946 WREG32(SCRATCH_UMSK
, 0);
1947 rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
].ready
= false;
1948 rdev
->ring
[CAYMAN_RING_TYPE_CP1_INDEX
].ready
= false;
1949 rdev
->ring
[CAYMAN_RING_TYPE_CP2_INDEX
].ready
= false;
1954 static int si_cp_load_microcode(struct radeon_device
*rdev
)
1956 const __be32
*fw_data
;
1959 if (!rdev
->me_fw
|| !rdev
->pfp_fw
)
1962 si_cp_enable(rdev
, false);
1965 fw_data
= (const __be32
*)rdev
->pfp_fw
->data
;
1966 WREG32(CP_PFP_UCODE_ADDR
, 0);
1967 for (i
= 0; i
< SI_PFP_UCODE_SIZE
; i
++)
1968 WREG32(CP_PFP_UCODE_DATA
, be32_to_cpup(fw_data
++));
1969 WREG32(CP_PFP_UCODE_ADDR
, 0);
1972 fw_data
= (const __be32
*)rdev
->ce_fw
->data
;
1973 WREG32(CP_CE_UCODE_ADDR
, 0);
1974 for (i
= 0; i
< SI_CE_UCODE_SIZE
; i
++)
1975 WREG32(CP_CE_UCODE_DATA
, be32_to_cpup(fw_data
++));
1976 WREG32(CP_CE_UCODE_ADDR
, 0);
1979 fw_data
= (const __be32
*)rdev
->me_fw
->data
;
1980 WREG32(CP_ME_RAM_WADDR
, 0);
1981 for (i
= 0; i
< SI_PM4_UCODE_SIZE
; i
++)
1982 WREG32(CP_ME_RAM_DATA
, be32_to_cpup(fw_data
++));
1983 WREG32(CP_ME_RAM_WADDR
, 0);
1985 WREG32(CP_PFP_UCODE_ADDR
, 0);
1986 WREG32(CP_CE_UCODE_ADDR
, 0);
1987 WREG32(CP_ME_RAM_WADDR
, 0);
1988 WREG32(CP_ME_RAM_RADDR
, 0);
1992 static int si_cp_start(struct radeon_device
*rdev
)
1994 struct radeon_ring
*ring
= &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
];
1997 r
= radeon_ring_lock(rdev
, ring
, 7 + 4);
1999 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r
);
2003 radeon_ring_write(ring
, PACKET3(PACKET3_ME_INITIALIZE
, 5));
2004 radeon_ring_write(ring
, 0x1);
2005 radeon_ring_write(ring
, 0x0);
2006 radeon_ring_write(ring
, rdev
->config
.si
.max_hw_contexts
- 1);
2007 radeon_ring_write(ring
, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
2008 radeon_ring_write(ring
, 0);
2009 radeon_ring_write(ring
, 0);
2011 /* init the CE partitions */
2012 radeon_ring_write(ring
, PACKET3(PACKET3_SET_BASE
, 2));
2013 radeon_ring_write(ring
, PACKET3_BASE_INDEX(CE_PARTITION_BASE
));
2014 radeon_ring_write(ring
, 0xc000);
2015 radeon_ring_write(ring
, 0xe000);
2016 radeon_ring_unlock_commit(rdev
, ring
);
2018 si_cp_enable(rdev
, true);
2020 r
= radeon_ring_lock(rdev
, ring
, si_default_size
+ 10);
2022 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r
);
2026 /* setup clear context state */
2027 radeon_ring_write(ring
, PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
2028 radeon_ring_write(ring
, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE
);
2030 for (i
= 0; i
< si_default_size
; i
++)
2031 radeon_ring_write(ring
, si_default_state
[i
]);
2033 radeon_ring_write(ring
, PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
2034 radeon_ring_write(ring
, PACKET3_PREAMBLE_END_CLEAR_STATE
);
2036 /* set clear context state */
2037 radeon_ring_write(ring
, PACKET3(PACKET3_CLEAR_STATE
, 0));
2038 radeon_ring_write(ring
, 0);
2040 radeon_ring_write(ring
, PACKET3(PACKET3_SET_CONTEXT_REG
, 2));
2041 radeon_ring_write(ring
, 0x00000316);
2042 radeon_ring_write(ring
, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2043 radeon_ring_write(ring
, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2045 radeon_ring_unlock_commit(rdev
, ring
);
2047 for (i
= RADEON_RING_TYPE_GFX_INDEX
; i
<= CAYMAN_RING_TYPE_CP2_INDEX
; ++i
) {
2048 ring
= &rdev
->ring
[i
];
2049 r
= radeon_ring_lock(rdev
, ring
, 2);
2051 /* clear the compute context state */
2052 radeon_ring_write(ring
, PACKET3_COMPUTE(PACKET3_CLEAR_STATE
, 0));
2053 radeon_ring_write(ring
, 0);
2055 radeon_ring_unlock_commit(rdev
, ring
);
2061 static void si_cp_fini(struct radeon_device
*rdev
)
2063 struct radeon_ring
*ring
;
2064 si_cp_enable(rdev
, false);
2066 ring
= &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
];
2067 radeon_ring_fini(rdev
, ring
);
2068 radeon_scratch_free(rdev
, ring
->rptr_save_reg
);
2070 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP1_INDEX
];
2071 radeon_ring_fini(rdev
, ring
);
2072 radeon_scratch_free(rdev
, ring
->rptr_save_reg
);
2074 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP2_INDEX
];
2075 radeon_ring_fini(rdev
, ring
);
2076 radeon_scratch_free(rdev
, ring
->rptr_save_reg
);
2079 static int si_cp_resume(struct radeon_device
*rdev
)
2081 struct radeon_ring
*ring
;
2086 /* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
2087 WREG32(GRBM_SOFT_RESET
, (SOFT_RESET_CP
|
2092 RREG32(GRBM_SOFT_RESET
);
2094 WREG32(GRBM_SOFT_RESET
, 0);
2095 RREG32(GRBM_SOFT_RESET
);
2097 WREG32(CP_SEM_WAIT_TIMER
, 0x0);
2098 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL
, 0x0);
2100 /* Set the write pointer delay */
2101 WREG32(CP_RB_WPTR_DELAY
, 0);
2103 WREG32(CP_DEBUG
, 0);
2104 WREG32(SCRATCH_ADDR
, ((rdev
->wb
.gpu_addr
+ RADEON_WB_SCRATCH_OFFSET
) >> 8) & 0xFFFFFFFF);
2106 /* ring 0 - compute and gfx */
2107 /* Set ring buffer size */
2108 ring
= &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
];
2109 rb_bufsz
= drm_order(ring
->ring_size
/ 8);
2110 tmp
= (drm_order(RADEON_GPU_PAGE_SIZE
/8) << 8) | rb_bufsz
;
2112 tmp
|= BUF_SWAP_32BIT
;
2114 WREG32(CP_RB0_CNTL
, tmp
);
2116 /* Initialize the ring buffer's read and write pointers */
2117 WREG32(CP_RB0_CNTL
, tmp
| RB_RPTR_WR_ENA
);
2119 WREG32(CP_RB0_WPTR
, ring
->wptr
);
2121 /* set the wb address whether it's enabled or not */
2122 WREG32(CP_RB0_RPTR_ADDR
, (rdev
->wb
.gpu_addr
+ RADEON_WB_CP_RPTR_OFFSET
) & 0xFFFFFFFC);
2123 WREG32(CP_RB0_RPTR_ADDR_HI
, upper_32_bits(rdev
->wb
.gpu_addr
+ RADEON_WB_CP_RPTR_OFFSET
) & 0xFF);
2125 if (rdev
->wb
.enabled
)
2126 WREG32(SCRATCH_UMSK
, 0xff);
2128 tmp
|= RB_NO_UPDATE
;
2129 WREG32(SCRATCH_UMSK
, 0);
2133 WREG32(CP_RB0_CNTL
, tmp
);
2135 WREG32(CP_RB0_BASE
, ring
->gpu_addr
>> 8);
2137 ring
->rptr
= RREG32(CP_RB0_RPTR
);
2139 /* ring1 - compute only */
2140 /* Set ring buffer size */
2141 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP1_INDEX
];
2142 rb_bufsz
= drm_order(ring
->ring_size
/ 8);
2143 tmp
= (drm_order(RADEON_GPU_PAGE_SIZE
/8) << 8) | rb_bufsz
;
2145 tmp
|= BUF_SWAP_32BIT
;
2147 WREG32(CP_RB1_CNTL
, tmp
);
2149 /* Initialize the ring buffer's read and write pointers */
2150 WREG32(CP_RB1_CNTL
, tmp
| RB_RPTR_WR_ENA
);
2152 WREG32(CP_RB1_WPTR
, ring
->wptr
);
2154 /* set the wb address whether it's enabled or not */
2155 WREG32(CP_RB1_RPTR_ADDR
, (rdev
->wb
.gpu_addr
+ RADEON_WB_CP1_RPTR_OFFSET
) & 0xFFFFFFFC);
2156 WREG32(CP_RB1_RPTR_ADDR_HI
, upper_32_bits(rdev
->wb
.gpu_addr
+ RADEON_WB_CP1_RPTR_OFFSET
) & 0xFF);
2159 WREG32(CP_RB1_CNTL
, tmp
);
2161 WREG32(CP_RB1_BASE
, ring
->gpu_addr
>> 8);
2163 ring
->rptr
= RREG32(CP_RB1_RPTR
);
2165 /* ring2 - compute only */
2166 /* Set ring buffer size */
2167 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP2_INDEX
];
2168 rb_bufsz
= drm_order(ring
->ring_size
/ 8);
2169 tmp
= (drm_order(RADEON_GPU_PAGE_SIZE
/8) << 8) | rb_bufsz
;
2171 tmp
|= BUF_SWAP_32BIT
;
2173 WREG32(CP_RB2_CNTL
, tmp
);
2175 /* Initialize the ring buffer's read and write pointers */
2176 WREG32(CP_RB2_CNTL
, tmp
| RB_RPTR_WR_ENA
);
2178 WREG32(CP_RB2_WPTR
, ring
->wptr
);
2180 /* set the wb address whether it's enabled or not */
2181 WREG32(CP_RB2_RPTR_ADDR
, (rdev
->wb
.gpu_addr
+ RADEON_WB_CP2_RPTR_OFFSET
) & 0xFFFFFFFC);
2182 WREG32(CP_RB2_RPTR_ADDR_HI
, upper_32_bits(rdev
->wb
.gpu_addr
+ RADEON_WB_CP2_RPTR_OFFSET
) & 0xFF);
2185 WREG32(CP_RB2_CNTL
, tmp
);
2187 WREG32(CP_RB2_BASE
, ring
->gpu_addr
>> 8);
2189 ring
->rptr
= RREG32(CP_RB2_RPTR
);
2191 /* start the rings */
2193 rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
].ready
= true;
2194 rdev
->ring
[CAYMAN_RING_TYPE_CP1_INDEX
].ready
= true;
2195 rdev
->ring
[CAYMAN_RING_TYPE_CP2_INDEX
].ready
= true;
2196 r
= radeon_ring_test(rdev
, RADEON_RING_TYPE_GFX_INDEX
, &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
]);
2198 rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
].ready
= false;
2199 rdev
->ring
[CAYMAN_RING_TYPE_CP1_INDEX
].ready
= false;
2200 rdev
->ring
[CAYMAN_RING_TYPE_CP2_INDEX
].ready
= false;
2203 r
= radeon_ring_test(rdev
, CAYMAN_RING_TYPE_CP1_INDEX
, &rdev
->ring
[CAYMAN_RING_TYPE_CP1_INDEX
]);
2205 rdev
->ring
[CAYMAN_RING_TYPE_CP1_INDEX
].ready
= false;
2207 r
= radeon_ring_test(rdev
, CAYMAN_RING_TYPE_CP2_INDEX
, &rdev
->ring
[CAYMAN_RING_TYPE_CP2_INDEX
]);
2209 rdev
->ring
[CAYMAN_RING_TYPE_CP2_INDEX
].ready
= false;
2215 static u32
si_gpu_check_soft_reset(struct radeon_device
*rdev
)
2221 tmp
= RREG32(GRBM_STATUS
);
2222 if (tmp
& (PA_BUSY
| SC_BUSY
|
2223 BCI_BUSY
| SX_BUSY
|
2224 TA_BUSY
| VGT_BUSY
|
2226 GDS_BUSY
| SPI_BUSY
|
2227 IA_BUSY
| IA_BUSY_NO_DMA
))
2228 reset_mask
|= RADEON_RESET_GFX
;
2230 if (tmp
& (CF_RQ_PENDING
| PF_RQ_PENDING
|
2231 CP_BUSY
| CP_COHERENCY_BUSY
))
2232 reset_mask
|= RADEON_RESET_CP
;
2234 if (tmp
& GRBM_EE_BUSY
)
2235 reset_mask
|= RADEON_RESET_GRBM
| RADEON_RESET_GFX
| RADEON_RESET_CP
;
2238 tmp
= RREG32(GRBM_STATUS2
);
2239 if (tmp
& (RLC_RQ_PENDING
| RLC_BUSY
))
2240 reset_mask
|= RADEON_RESET_RLC
;
2242 /* DMA_STATUS_REG 0 */
2243 tmp
= RREG32(DMA_STATUS_REG
+ DMA0_REGISTER_OFFSET
);
2244 if (!(tmp
& DMA_IDLE
))
2245 reset_mask
|= RADEON_RESET_DMA
;
2247 /* DMA_STATUS_REG 1 */
2248 tmp
= RREG32(DMA_STATUS_REG
+ DMA1_REGISTER_OFFSET
);
2249 if (!(tmp
& DMA_IDLE
))
2250 reset_mask
|= RADEON_RESET_DMA1
;
2253 tmp
= RREG32(SRBM_STATUS2
);
2255 reset_mask
|= RADEON_RESET_DMA
;
2257 if (tmp
& DMA1_BUSY
)
2258 reset_mask
|= RADEON_RESET_DMA1
;
2261 tmp
= RREG32(SRBM_STATUS
);
2264 reset_mask
|= RADEON_RESET_IH
;
2267 reset_mask
|= RADEON_RESET_SEM
;
2269 if (tmp
& GRBM_RQ_PENDING
)
2270 reset_mask
|= RADEON_RESET_GRBM
;
2273 reset_mask
|= RADEON_RESET_VMC
;
2275 if (tmp
& (MCB_BUSY
| MCB_NON_DISPLAY_BUSY
|
2276 MCC_BUSY
| MCD_BUSY
))
2277 reset_mask
|= RADEON_RESET_MC
;
2279 if (evergreen_is_display_hung(rdev
))
2280 reset_mask
|= RADEON_RESET_DISPLAY
;
2283 tmp
= RREG32(VM_L2_STATUS
);
2285 reset_mask
|= RADEON_RESET_VMC
;
2287 /* Skip MC reset as it's mostly likely not hung, just busy */
2288 if (reset_mask
& RADEON_RESET_MC
) {
2289 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask
);
2290 reset_mask
&= ~RADEON_RESET_MC
;
2296 static void si_gpu_soft_reset(struct radeon_device
*rdev
, u32 reset_mask
)
2298 struct evergreen_mc_save save
;
2299 u32 grbm_soft_reset
= 0, srbm_soft_reset
= 0;
2302 if (reset_mask
== 0)
2305 dev_info(rdev
->dev
, "GPU softreset: 0x%08X\n", reset_mask
);
2307 evergreen_print_gpu_status_regs(rdev
);
2308 dev_info(rdev
->dev
, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
2309 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR
));
2310 dev_info(rdev
->dev
, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
2311 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS
));
2313 /* Disable CP parsing/prefetching */
2314 WREG32(CP_ME_CNTL
, CP_ME_HALT
| CP_PFP_HALT
| CP_CE_HALT
);
2316 if (reset_mask
& RADEON_RESET_DMA
) {
2318 tmp
= RREG32(DMA_RB_CNTL
+ DMA0_REGISTER_OFFSET
);
2319 tmp
&= ~DMA_RB_ENABLE
;
2320 WREG32(DMA_RB_CNTL
+ DMA0_REGISTER_OFFSET
, tmp
);
2322 if (reset_mask
& RADEON_RESET_DMA1
) {
2324 tmp
= RREG32(DMA_RB_CNTL
+ DMA1_REGISTER_OFFSET
);
2325 tmp
&= ~DMA_RB_ENABLE
;
2326 WREG32(DMA_RB_CNTL
+ DMA1_REGISTER_OFFSET
, tmp
);
2331 evergreen_mc_stop(rdev
, &save
);
2332 if (evergreen_mc_wait_for_idle(rdev
)) {
2333 dev_warn(rdev
->dev
, "Wait for MC idle timedout !\n");
2336 if (reset_mask
& (RADEON_RESET_GFX
| RADEON_RESET_COMPUTE
| RADEON_RESET_CP
)) {
2337 grbm_soft_reset
= SOFT_RESET_CB
|
2351 if (reset_mask
& RADEON_RESET_CP
) {
2352 grbm_soft_reset
|= SOFT_RESET_CP
| SOFT_RESET_VGT
;
2354 srbm_soft_reset
|= SOFT_RESET_GRBM
;
2357 if (reset_mask
& RADEON_RESET_DMA
)
2358 srbm_soft_reset
|= SOFT_RESET_DMA
;
2360 if (reset_mask
& RADEON_RESET_DMA1
)
2361 srbm_soft_reset
|= SOFT_RESET_DMA1
;
2363 if (reset_mask
& RADEON_RESET_DISPLAY
)
2364 srbm_soft_reset
|= SOFT_RESET_DC
;
2366 if (reset_mask
& RADEON_RESET_RLC
)
2367 grbm_soft_reset
|= SOFT_RESET_RLC
;
2369 if (reset_mask
& RADEON_RESET_SEM
)
2370 srbm_soft_reset
|= SOFT_RESET_SEM
;
2372 if (reset_mask
& RADEON_RESET_IH
)
2373 srbm_soft_reset
|= SOFT_RESET_IH
;
2375 if (reset_mask
& RADEON_RESET_GRBM
)
2376 srbm_soft_reset
|= SOFT_RESET_GRBM
;
2378 if (reset_mask
& RADEON_RESET_VMC
)
2379 srbm_soft_reset
|= SOFT_RESET_VMC
;
2381 if (reset_mask
& RADEON_RESET_MC
)
2382 srbm_soft_reset
|= SOFT_RESET_MC
;
2384 if (grbm_soft_reset
) {
2385 tmp
= RREG32(GRBM_SOFT_RESET
);
2386 tmp
|= grbm_soft_reset
;
2387 dev_info(rdev
->dev
, "GRBM_SOFT_RESET=0x%08X\n", tmp
);
2388 WREG32(GRBM_SOFT_RESET
, tmp
);
2389 tmp
= RREG32(GRBM_SOFT_RESET
);
2393 tmp
&= ~grbm_soft_reset
;
2394 WREG32(GRBM_SOFT_RESET
, tmp
);
2395 tmp
= RREG32(GRBM_SOFT_RESET
);
2398 if (srbm_soft_reset
) {
2399 tmp
= RREG32(SRBM_SOFT_RESET
);
2400 tmp
|= srbm_soft_reset
;
2401 dev_info(rdev
->dev
, "SRBM_SOFT_RESET=0x%08X\n", tmp
);
2402 WREG32(SRBM_SOFT_RESET
, tmp
);
2403 tmp
= RREG32(SRBM_SOFT_RESET
);
2407 tmp
&= ~srbm_soft_reset
;
2408 WREG32(SRBM_SOFT_RESET
, tmp
);
2409 tmp
= RREG32(SRBM_SOFT_RESET
);
2412 /* Wait a little for things to settle down */
2415 evergreen_mc_resume(rdev
, &save
);
2418 evergreen_print_gpu_status_regs(rdev
);
2421 int si_asic_reset(struct radeon_device
*rdev
)
2425 reset_mask
= si_gpu_check_soft_reset(rdev
);
2428 r600_set_bios_scratch_engine_hung(rdev
, true);
2430 si_gpu_soft_reset(rdev
, reset_mask
);
2432 reset_mask
= si_gpu_check_soft_reset(rdev
);
2435 r600_set_bios_scratch_engine_hung(rdev
, false);
2441 * si_gfx_is_lockup - Check if the GFX engine is locked up
2443 * @rdev: radeon_device pointer
2444 * @ring: radeon_ring structure holding ring information
2446 * Check if the GFX engine is locked up.
2447 * Returns true if the engine appears to be locked up, false if not.
2449 bool si_gfx_is_lockup(struct radeon_device
*rdev
, struct radeon_ring
*ring
)
2451 u32 reset_mask
= si_gpu_check_soft_reset(rdev
);
2453 if (!(reset_mask
& (RADEON_RESET_GFX
|
2454 RADEON_RESET_COMPUTE
|
2455 RADEON_RESET_CP
))) {
2456 radeon_ring_lockup_update(ring
);
2459 /* force CP activities */
2460 radeon_ring_force_activity(rdev
, ring
);
2461 return radeon_ring_test_lockup(rdev
, ring
);
2465 * si_dma_is_lockup - Check if the DMA engine is locked up
2467 * @rdev: radeon_device pointer
2468 * @ring: radeon_ring structure holding ring information
2470 * Check if the async DMA engine is locked up.
2471 * Returns true if the engine appears to be locked up, false if not.
2473 bool si_dma_is_lockup(struct radeon_device
*rdev
, struct radeon_ring
*ring
)
2475 u32 reset_mask
= si_gpu_check_soft_reset(rdev
);
2478 if (ring
->idx
== R600_RING_TYPE_DMA_INDEX
)
2479 mask
= RADEON_RESET_DMA
;
2481 mask
= RADEON_RESET_DMA1
;
2483 if (!(reset_mask
& mask
)) {
2484 radeon_ring_lockup_update(ring
);
2487 /* force ring activities */
2488 radeon_ring_force_activity(rdev
, ring
);
2489 return radeon_ring_test_lockup(rdev
, ring
);
2493 static void si_mc_program(struct radeon_device
*rdev
)
2495 struct evergreen_mc_save save
;
2499 /* Initialize HDP */
2500 for (i
= 0, j
= 0; i
< 32; i
++, j
+= 0x18) {
2501 WREG32((0x2c14 + j
), 0x00000000);
2502 WREG32((0x2c18 + j
), 0x00000000);
2503 WREG32((0x2c1c + j
), 0x00000000);
2504 WREG32((0x2c20 + j
), 0x00000000);
2505 WREG32((0x2c24 + j
), 0x00000000);
2507 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL
, 0);
2509 evergreen_mc_stop(rdev
, &save
);
2510 if (radeon_mc_wait_for_idle(rdev
)) {
2511 dev_warn(rdev
->dev
, "Wait for MC idle timedout !\n");
2513 /* Lockout access through VGA aperture*/
2514 WREG32(VGA_HDP_CONTROL
, VGA_MEMORY_DISABLE
);
2515 /* Update configuration */
2516 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR
,
2517 rdev
->mc
.vram_start
>> 12);
2518 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR
,
2519 rdev
->mc
.vram_end
>> 12);
2520 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR
,
2521 rdev
->vram_scratch
.gpu_addr
>> 12);
2522 tmp
= ((rdev
->mc
.vram_end
>> 24) & 0xFFFF) << 16;
2523 tmp
|= ((rdev
->mc
.vram_start
>> 24) & 0xFFFF);
2524 WREG32(MC_VM_FB_LOCATION
, tmp
);
2525 /* XXX double check these! */
2526 WREG32(HDP_NONSURFACE_BASE
, (rdev
->mc
.vram_start
>> 8));
2527 WREG32(HDP_NONSURFACE_INFO
, (2 << 7) | (1 << 30));
2528 WREG32(HDP_NONSURFACE_SIZE
, 0x3FFFFFFF);
2529 WREG32(MC_VM_AGP_BASE
, 0);
2530 WREG32(MC_VM_AGP_TOP
, 0x0FFFFFFF);
2531 WREG32(MC_VM_AGP_BOT
, 0x0FFFFFFF);
2532 if (radeon_mc_wait_for_idle(rdev
)) {
2533 dev_warn(rdev
->dev
, "Wait for MC idle timedout !\n");
2535 evergreen_mc_resume(rdev
, &save
);
2536 /* we need to own VRAM, so turn off the VGA renderer here
2537 * to stop it overwriting our objects */
2538 rv515_vga_render_disable(rdev
);
2541 /* SI MC address space is 40 bits */
2542 static void si_vram_location(struct radeon_device
*rdev
,
2543 struct radeon_mc
*mc
, u64 base
)
2545 mc
->vram_start
= base
;
2546 if (mc
->mc_vram_size
> (0xFFFFFFFFFFULL
- base
+ 1)) {
2547 dev_warn(rdev
->dev
, "limiting VRAM to PCI aperture size\n");
2548 mc
->real_vram_size
= mc
->aper_size
;
2549 mc
->mc_vram_size
= mc
->aper_size
;
2551 mc
->vram_end
= mc
->vram_start
+ mc
->mc_vram_size
- 1;
2552 dev_info(rdev
->dev
, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
2553 mc
->mc_vram_size
>> 20, mc
->vram_start
,
2554 mc
->vram_end
, mc
->real_vram_size
>> 20);
2557 static void si_gtt_location(struct radeon_device
*rdev
, struct radeon_mc
*mc
)
2559 u64 size_af
, size_bf
;
2561 size_af
= ((0xFFFFFFFFFFULL
- mc
->vram_end
) + mc
->gtt_base_align
) & ~mc
->gtt_base_align
;
2562 size_bf
= mc
->vram_start
& ~mc
->gtt_base_align
;
2563 if (size_bf
> size_af
) {
2564 if (mc
->gtt_size
> size_bf
) {
2565 dev_warn(rdev
->dev
, "limiting GTT\n");
2566 mc
->gtt_size
= size_bf
;
2568 mc
->gtt_start
= (mc
->vram_start
& ~mc
->gtt_base_align
) - mc
->gtt_size
;
2570 if (mc
->gtt_size
> size_af
) {
2571 dev_warn(rdev
->dev
, "limiting GTT\n");
2572 mc
->gtt_size
= size_af
;
2574 mc
->gtt_start
= (mc
->vram_end
+ 1 + mc
->gtt_base_align
) & ~mc
->gtt_base_align
;
2576 mc
->gtt_end
= mc
->gtt_start
+ mc
->gtt_size
- 1;
2577 dev_info(rdev
->dev
, "GTT: %lluM 0x%016llX - 0x%016llX\n",
2578 mc
->gtt_size
>> 20, mc
->gtt_start
, mc
->gtt_end
);
2581 static void si_vram_gtt_location(struct radeon_device
*rdev
,
2582 struct radeon_mc
*mc
)
2584 if (mc
->mc_vram_size
> 0xFFC0000000ULL
) {
2585 /* leave room for at least 1024M GTT */
2586 dev_warn(rdev
->dev
, "limiting VRAM\n");
2587 mc
->real_vram_size
= 0xFFC0000000ULL
;
2588 mc
->mc_vram_size
= 0xFFC0000000ULL
;
2590 si_vram_location(rdev
, &rdev
->mc
, 0);
2591 rdev
->mc
.gtt_base_align
= 0;
2592 si_gtt_location(rdev
, mc
);
2595 static int si_mc_init(struct radeon_device
*rdev
)
2598 int chansize
, numchan
;
2600 /* Get VRAM informations */
2601 rdev
->mc
.vram_is_ddr
= true;
2602 tmp
= RREG32(MC_ARB_RAMCFG
);
2603 if (tmp
& CHANSIZE_OVERRIDE
) {
2605 } else if (tmp
& CHANSIZE_MASK
) {
2610 tmp
= RREG32(MC_SHARED_CHMAP
);
2611 switch ((tmp
& NOOFCHAN_MASK
) >> NOOFCHAN_SHIFT
) {
2641 rdev
->mc
.vram_width
= numchan
* chansize
;
2642 /* Could aper size report 0 ? */
2643 rdev
->mc
.aper_base
= pci_resource_start(rdev
->pdev
, 0);
2644 rdev
->mc
.aper_size
= pci_resource_len(rdev
->pdev
, 0);
2645 /* size in MB on si */
2646 rdev
->mc
.mc_vram_size
= RREG32(CONFIG_MEMSIZE
) * 1024 * 1024;
2647 rdev
->mc
.real_vram_size
= RREG32(CONFIG_MEMSIZE
) * 1024 * 1024;
2648 rdev
->mc
.visible_vram_size
= rdev
->mc
.aper_size
;
2649 si_vram_gtt_location(rdev
, &rdev
->mc
);
2650 radeon_update_bandwidth_info(rdev
);
2658 void si_pcie_gart_tlb_flush(struct radeon_device
*rdev
)
2660 /* flush hdp cache */
2661 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL
, 0x1);
2663 /* bits 0-15 are the VM contexts0-15 */
2664 WREG32(VM_INVALIDATE_REQUEST
, 1);
2667 static int si_pcie_gart_enable(struct radeon_device
*rdev
)
2671 if (rdev
->gart
.robj
== NULL
) {
2672 dev_err(rdev
->dev
, "No VRAM object for PCIE GART.\n");
2675 r
= radeon_gart_table_vram_pin(rdev
);
2678 radeon_gart_restore(rdev
);
2679 /* Setup TLB control */
2680 WREG32(MC_VM_MX_L1_TLB_CNTL
,
2683 SYSTEM_ACCESS_MODE_NOT_IN_SYS
|
2684 ENABLE_ADVANCED_DRIVER_MODEL
|
2685 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU
);
2686 /* Setup L2 cache */
2687 WREG32(VM_L2_CNTL
, ENABLE_L2_CACHE
|
2688 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE
|
2689 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE
|
2690 EFFECTIVE_L2_QUEUE_SIZE(7) |
2691 CONTEXT1_IDENTITY_ACCESS_MODE(1));
2692 WREG32(VM_L2_CNTL2
, INVALIDATE_ALL_L1_TLBS
| INVALIDATE_L2_CACHE
);
2693 WREG32(VM_L2_CNTL3
, L2_CACHE_BIGK_ASSOCIATIVITY
|
2694 L2_CACHE_BIGK_FRAGMENT_SIZE(0));
2695 /* setup context0 */
2696 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR
, rdev
->mc
.gtt_start
>> 12);
2697 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR
, rdev
->mc
.gtt_end
>> 12);
2698 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR
, rdev
->gart
.table_addr
>> 12);
2699 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR
,
2700 (u32
)(rdev
->dummy_page
.addr
>> 12));
2701 WREG32(VM_CONTEXT0_CNTL2
, 0);
2702 WREG32(VM_CONTEXT0_CNTL
, (ENABLE_CONTEXT
| PAGE_TABLE_DEPTH(0) |
2703 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT
));
2709 /* empty context1-15 */
2710 /* set vm size, must be a multiple of 4 */
2711 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR
, 0);
2712 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR
, rdev
->vm_manager
.max_pfn
);
2713 /* Assign the pt base to something valid for now; the pts used for
2714 * the VMs are determined by the application and setup and assigned
2715 * on the fly in the vm part of radeon_gart.c
2717 for (i
= 1; i
< 16; i
++) {
2719 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR
+ (i
<< 2),
2720 rdev
->gart
.table_addr
>> 12);
2722 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR
+ ((i
- 8) << 2),
2723 rdev
->gart
.table_addr
>> 12);
2726 /* enable context1-15 */
2727 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR
,
2728 (u32
)(rdev
->dummy_page
.addr
>> 12));
2729 WREG32(VM_CONTEXT1_CNTL2
, 4);
2730 WREG32(VM_CONTEXT1_CNTL
, ENABLE_CONTEXT
| PAGE_TABLE_DEPTH(1) |
2731 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT
|
2732 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT
|
2733 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT
|
2734 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT
|
2735 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT
|
2736 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT
|
2737 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT
|
2738 VALID_PROTECTION_FAULT_ENABLE_DEFAULT
|
2739 READ_PROTECTION_FAULT_ENABLE_INTERRUPT
|
2740 READ_PROTECTION_FAULT_ENABLE_DEFAULT
|
2741 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT
|
2742 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT
);
2744 si_pcie_gart_tlb_flush(rdev
);
2745 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
2746 (unsigned)(rdev
->mc
.gtt_size
>> 20),
2747 (unsigned long long)rdev
->gart
.table_addr
);
2748 rdev
->gart
.ready
= true;
2752 static void si_pcie_gart_disable(struct radeon_device
*rdev
)
2754 /* Disable all tables */
2755 WREG32(VM_CONTEXT0_CNTL
, 0);
2756 WREG32(VM_CONTEXT1_CNTL
, 0);
2757 /* Setup TLB control */
2758 WREG32(MC_VM_MX_L1_TLB_CNTL
, SYSTEM_ACCESS_MODE_NOT_IN_SYS
|
2759 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU
);
2760 /* Setup L2 cache */
2761 WREG32(VM_L2_CNTL
, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE
|
2762 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE
|
2763 EFFECTIVE_L2_QUEUE_SIZE(7) |
2764 CONTEXT1_IDENTITY_ACCESS_MODE(1));
2765 WREG32(VM_L2_CNTL2
, 0);
2766 WREG32(VM_L2_CNTL3
, L2_CACHE_BIGK_ASSOCIATIVITY
|
2767 L2_CACHE_BIGK_FRAGMENT_SIZE(0));
2768 radeon_gart_table_vram_unpin(rdev
);
2771 static void si_pcie_gart_fini(struct radeon_device
*rdev
)
2773 si_pcie_gart_disable(rdev
);
2774 radeon_gart_table_vram_free(rdev
);
2775 radeon_gart_fini(rdev
);
2779 static bool si_vm_reg_valid(u32 reg
)
2781 /* context regs are fine */
2785 /* check config regs */
2787 case GRBM_GFX_INDEX
:
2788 case CP_STRMOUT_CNTL
:
2789 case VGT_VTX_VECT_EJECT_REG
:
2790 case VGT_CACHE_INVALIDATION
:
2791 case VGT_ESGS_RING_SIZE
:
2792 case VGT_GSVS_RING_SIZE
:
2793 case VGT_GS_VERTEX_REUSE
:
2794 case VGT_PRIMITIVE_TYPE
:
2795 case VGT_INDEX_TYPE
:
2796 case VGT_NUM_INDICES
:
2797 case VGT_NUM_INSTANCES
:
2798 case VGT_TF_RING_SIZE
:
2799 case VGT_HS_OFFCHIP_PARAM
:
2800 case VGT_TF_MEMORY_BASE
:
2802 case PA_SU_LINE_STIPPLE_VALUE
:
2803 case PA_SC_LINE_STIPPLE_STATE
:
2806 case SPI_STATIC_THREAD_MGMT_1
:
2807 case SPI_STATIC_THREAD_MGMT_2
:
2808 case SPI_STATIC_THREAD_MGMT_3
:
2809 case SPI_PS_MAX_WAVE_ID
:
2810 case SPI_CONFIG_CNTL
:
2811 case SPI_CONFIG_CNTL_1
:
2815 DRM_ERROR("Invalid register 0x%x in CS\n", reg
);
2820 static int si_vm_packet3_ce_check(struct radeon_device
*rdev
,
2821 u32
*ib
, struct radeon_cs_packet
*pkt
)
2823 switch (pkt
->opcode
) {
2825 case PACKET3_SET_BASE
:
2826 case PACKET3_SET_CE_DE_COUNTERS
:
2827 case PACKET3_LOAD_CONST_RAM
:
2828 case PACKET3_WRITE_CONST_RAM
:
2829 case PACKET3_WRITE_CONST_RAM_OFFSET
:
2830 case PACKET3_DUMP_CONST_RAM
:
2831 case PACKET3_INCREMENT_CE_COUNTER
:
2832 case PACKET3_WAIT_ON_DE_COUNTER
:
2833 case PACKET3_CE_WRITE
:
2836 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt
->opcode
);
2842 static int si_vm_packet3_gfx_check(struct radeon_device
*rdev
,
2843 u32
*ib
, struct radeon_cs_packet
*pkt
)
2845 u32 idx
= pkt
->idx
+ 1;
2846 u32 idx_value
= ib
[idx
];
2847 u32 start_reg
, end_reg
, reg
, i
;
2850 switch (pkt
->opcode
) {
2852 case PACKET3_SET_BASE
:
2853 case PACKET3_CLEAR_STATE
:
2854 case PACKET3_INDEX_BUFFER_SIZE
:
2855 case PACKET3_DISPATCH_DIRECT
:
2856 case PACKET3_DISPATCH_INDIRECT
:
2857 case PACKET3_ALLOC_GDS
:
2858 case PACKET3_WRITE_GDS_RAM
:
2859 case PACKET3_ATOMIC_GDS
:
2860 case PACKET3_ATOMIC
:
2861 case PACKET3_OCCLUSION_QUERY
:
2862 case PACKET3_SET_PREDICATION
:
2863 case PACKET3_COND_EXEC
:
2864 case PACKET3_PRED_EXEC
:
2865 case PACKET3_DRAW_INDIRECT
:
2866 case PACKET3_DRAW_INDEX_INDIRECT
:
2867 case PACKET3_INDEX_BASE
:
2868 case PACKET3_DRAW_INDEX_2
:
2869 case PACKET3_CONTEXT_CONTROL
:
2870 case PACKET3_INDEX_TYPE
:
2871 case PACKET3_DRAW_INDIRECT_MULTI
:
2872 case PACKET3_DRAW_INDEX_AUTO
:
2873 case PACKET3_DRAW_INDEX_IMMD
:
2874 case PACKET3_NUM_INSTANCES
:
2875 case PACKET3_DRAW_INDEX_MULTI_AUTO
:
2876 case PACKET3_STRMOUT_BUFFER_UPDATE
:
2877 case PACKET3_DRAW_INDEX_OFFSET_2
:
2878 case PACKET3_DRAW_INDEX_MULTI_ELEMENT
:
2879 case PACKET3_DRAW_INDEX_INDIRECT_MULTI
:
2880 case PACKET3_MPEG_INDEX
:
2881 case PACKET3_WAIT_REG_MEM
:
2882 case PACKET3_MEM_WRITE
:
2883 case PACKET3_PFP_SYNC_ME
:
2884 case PACKET3_SURFACE_SYNC
:
2885 case PACKET3_EVENT_WRITE
:
2886 case PACKET3_EVENT_WRITE_EOP
:
2887 case PACKET3_EVENT_WRITE_EOS
:
2888 case PACKET3_SET_CONTEXT_REG
:
2889 case PACKET3_SET_CONTEXT_REG_INDIRECT
:
2890 case PACKET3_SET_SH_REG
:
2891 case PACKET3_SET_SH_REG_OFFSET
:
2892 case PACKET3_INCREMENT_DE_COUNTER
:
2893 case PACKET3_WAIT_ON_CE_COUNTER
:
2894 case PACKET3_WAIT_ON_AVAIL_BUFFER
:
2895 case PACKET3_ME_WRITE
:
2897 case PACKET3_COPY_DATA
:
2898 if ((idx_value
& 0xf00) == 0) {
2899 reg
= ib
[idx
+ 3] * 4;
2900 if (!si_vm_reg_valid(reg
))
2904 case PACKET3_WRITE_DATA
:
2905 if ((idx_value
& 0xf00) == 0) {
2906 start_reg
= ib
[idx
+ 1] * 4;
2907 if (idx_value
& 0x10000) {
2908 if (!si_vm_reg_valid(start_reg
))
2911 for (i
= 0; i
< (pkt
->count
- 2); i
++) {
2912 reg
= start_reg
+ (4 * i
);
2913 if (!si_vm_reg_valid(reg
))
2919 case PACKET3_COND_WRITE
:
2920 if (idx_value
& 0x100) {
2921 reg
= ib
[idx
+ 5] * 4;
2922 if (!si_vm_reg_valid(reg
))
2926 case PACKET3_COPY_DW
:
2927 if (idx_value
& 0x2) {
2928 reg
= ib
[idx
+ 3] * 4;
2929 if (!si_vm_reg_valid(reg
))
2933 case PACKET3_SET_CONFIG_REG
:
2934 start_reg
= (idx_value
<< 2) + PACKET3_SET_CONFIG_REG_START
;
2935 end_reg
= 4 * pkt
->count
+ start_reg
- 4;
2936 if ((start_reg
< PACKET3_SET_CONFIG_REG_START
) ||
2937 (start_reg
>= PACKET3_SET_CONFIG_REG_END
) ||
2938 (end_reg
>= PACKET3_SET_CONFIG_REG_END
)) {
2939 DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
2942 for (i
= 0; i
< pkt
->count
; i
++) {
2943 reg
= start_reg
+ (4 * i
);
2944 if (!si_vm_reg_valid(reg
))
2948 case PACKET3_CP_DMA
:
2949 command
= ib
[idx
+ 4];
2951 if (command
& PACKET3_CP_DMA_CMD_SAS
) {
2952 /* src address space is register */
2953 if (((info
& 0x60000000) >> 29) == 0) {
2954 start_reg
= idx_value
<< 2;
2955 if (command
& PACKET3_CP_DMA_CMD_SAIC
) {
2957 if (!si_vm_reg_valid(reg
)) {
2958 DRM_ERROR("CP DMA Bad SRC register\n");
2962 for (i
= 0; i
< (command
& 0x1fffff); i
++) {
2963 reg
= start_reg
+ (4 * i
);
2964 if (!si_vm_reg_valid(reg
)) {
2965 DRM_ERROR("CP DMA Bad SRC register\n");
2972 if (command
& PACKET3_CP_DMA_CMD_DAS
) {
2973 /* dst address space is register */
2974 if (((info
& 0x00300000) >> 20) == 0) {
2975 start_reg
= ib
[idx
+ 2];
2976 if (command
& PACKET3_CP_DMA_CMD_DAIC
) {
2978 if (!si_vm_reg_valid(reg
)) {
2979 DRM_ERROR("CP DMA Bad DST register\n");
2983 for (i
= 0; i
< (command
& 0x1fffff); i
++) {
2984 reg
= start_reg
+ (4 * i
);
2985 if (!si_vm_reg_valid(reg
)) {
2986 DRM_ERROR("CP DMA Bad DST register\n");
2995 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt
->opcode
);
3001 static int si_vm_packet3_compute_check(struct radeon_device
*rdev
,
3002 u32
*ib
, struct radeon_cs_packet
*pkt
)
3004 u32 idx
= pkt
->idx
+ 1;
3005 u32 idx_value
= ib
[idx
];
3006 u32 start_reg
, reg
, i
;
3008 switch (pkt
->opcode
) {
3010 case PACKET3_SET_BASE
:
3011 case PACKET3_CLEAR_STATE
:
3012 case PACKET3_DISPATCH_DIRECT
:
3013 case PACKET3_DISPATCH_INDIRECT
:
3014 case PACKET3_ALLOC_GDS
:
3015 case PACKET3_WRITE_GDS_RAM
:
3016 case PACKET3_ATOMIC_GDS
:
3017 case PACKET3_ATOMIC
:
3018 case PACKET3_OCCLUSION_QUERY
:
3019 case PACKET3_SET_PREDICATION
:
3020 case PACKET3_COND_EXEC
:
3021 case PACKET3_PRED_EXEC
:
3022 case PACKET3_CONTEXT_CONTROL
:
3023 case PACKET3_STRMOUT_BUFFER_UPDATE
:
3024 case PACKET3_WAIT_REG_MEM
:
3025 case PACKET3_MEM_WRITE
:
3026 case PACKET3_PFP_SYNC_ME
:
3027 case PACKET3_SURFACE_SYNC
:
3028 case PACKET3_EVENT_WRITE
:
3029 case PACKET3_EVENT_WRITE_EOP
:
3030 case PACKET3_EVENT_WRITE_EOS
:
3031 case PACKET3_SET_CONTEXT_REG
:
3032 case PACKET3_SET_CONTEXT_REG_INDIRECT
:
3033 case PACKET3_SET_SH_REG
:
3034 case PACKET3_SET_SH_REG_OFFSET
:
3035 case PACKET3_INCREMENT_DE_COUNTER
:
3036 case PACKET3_WAIT_ON_CE_COUNTER
:
3037 case PACKET3_WAIT_ON_AVAIL_BUFFER
:
3038 case PACKET3_ME_WRITE
:
3040 case PACKET3_COPY_DATA
:
3041 if ((idx_value
& 0xf00) == 0) {
3042 reg
= ib
[idx
+ 3] * 4;
3043 if (!si_vm_reg_valid(reg
))
3047 case PACKET3_WRITE_DATA
:
3048 if ((idx_value
& 0xf00) == 0) {
3049 start_reg
= ib
[idx
+ 1] * 4;
3050 if (idx_value
& 0x10000) {
3051 if (!si_vm_reg_valid(start_reg
))
3054 for (i
= 0; i
< (pkt
->count
- 2); i
++) {
3055 reg
= start_reg
+ (4 * i
);
3056 if (!si_vm_reg_valid(reg
))
3062 case PACKET3_COND_WRITE
:
3063 if (idx_value
& 0x100) {
3064 reg
= ib
[idx
+ 5] * 4;
3065 if (!si_vm_reg_valid(reg
))
3069 case PACKET3_COPY_DW
:
3070 if (idx_value
& 0x2) {
3071 reg
= ib
[idx
+ 3] * 4;
3072 if (!si_vm_reg_valid(reg
))
3077 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt
->opcode
);
3083 int si_ib_parse(struct radeon_device
*rdev
, struct radeon_ib
*ib
)
3087 struct radeon_cs_packet pkt
;
3091 pkt
.type
= RADEON_CP_PACKET_GET_TYPE(ib
->ptr
[idx
]);
3092 pkt
.count
= RADEON_CP_PACKET_GET_COUNT(ib
->ptr
[idx
]);
3095 case RADEON_PACKET_TYPE0
:
3096 dev_err(rdev
->dev
, "Packet0 not allowed!\n");
3099 case RADEON_PACKET_TYPE2
:
3102 case RADEON_PACKET_TYPE3
:
3103 pkt
.opcode
= RADEON_CP_PACKET3_GET_OPCODE(ib
->ptr
[idx
]);
3104 if (ib
->is_const_ib
)
3105 ret
= si_vm_packet3_ce_check(rdev
, ib
->ptr
, &pkt
);
3108 case RADEON_RING_TYPE_GFX_INDEX
:
3109 ret
= si_vm_packet3_gfx_check(rdev
, ib
->ptr
, &pkt
);
3111 case CAYMAN_RING_TYPE_CP1_INDEX
:
3112 case CAYMAN_RING_TYPE_CP2_INDEX
:
3113 ret
= si_vm_packet3_compute_check(rdev
, ib
->ptr
, &pkt
);
3116 dev_err(rdev
->dev
, "Non-PM4 ring %d !\n", ib
->ring
);
3121 idx
+= pkt
.count
+ 2;
3124 dev_err(rdev
->dev
, "Unknown packet type %d !\n", pkt
.type
);
3130 } while (idx
< ib
->length_dw
);
3138 int si_vm_init(struct radeon_device
*rdev
)
3141 rdev
->vm_manager
.nvm
= 16;
3142 /* base offset of vram pages */
3143 rdev
->vm_manager
.vram_base_offset
= 0;
3148 void si_vm_fini(struct radeon_device
*rdev
)
3153 * si_vm_set_page - update the page tables using the CP
3155 * @rdev: radeon_device pointer
3156 * @ib: indirect buffer to fill with commands
3157 * @pe: addr of the page entry
3158 * @addr: dst addr to write into pe
3159 * @count: number of page entries to update
3160 * @incr: increase next addr by incr bytes
3161 * @flags: access flags
3163 * Update the page tables using the CP (SI).
3165 void si_vm_set_page(struct radeon_device
*rdev
,
3166 struct radeon_ib
*ib
,
3168 uint64_t addr
, unsigned count
,
3169 uint32_t incr
, uint32_t flags
)
3171 uint32_t r600_flags
= cayman_vm_page_flags(rdev
, flags
);
3175 if (rdev
->asic
->vm
.pt_ring_index
== RADEON_RING_TYPE_GFX_INDEX
) {
3177 ndw
= 2 + count
* 2;
3181 ib
->ptr
[ib
->length_dw
++] = PACKET3(PACKET3_WRITE_DATA
, ndw
);
3182 ib
->ptr
[ib
->length_dw
++] = (WRITE_DATA_ENGINE_SEL(0) |
3183 WRITE_DATA_DST_SEL(1));
3184 ib
->ptr
[ib
->length_dw
++] = pe
;
3185 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(pe
);
3186 for (; ndw
> 2; ndw
-= 2, --count
, pe
+= 8) {
3187 if (flags
& RADEON_VM_PAGE_SYSTEM
) {
3188 value
= radeon_vm_map_gart(rdev
, addr
);
3189 value
&= 0xFFFFFFFFFFFFF000ULL
;
3190 } else if (flags
& RADEON_VM_PAGE_VALID
) {
3196 value
|= r600_flags
;
3197 ib
->ptr
[ib
->length_dw
++] = value
;
3198 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(value
);
3203 if (flags
& RADEON_VM_PAGE_SYSTEM
) {
3209 /* for non-physically contiguous pages (system) */
3210 ib
->ptr
[ib
->length_dw
++] = DMA_PACKET(DMA_PACKET_WRITE
, 0, 0, 0, ndw
);
3211 ib
->ptr
[ib
->length_dw
++] = pe
;
3212 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(pe
) & 0xff;
3213 for (; ndw
> 0; ndw
-= 2, --count
, pe
+= 8) {
3214 if (flags
& RADEON_VM_PAGE_SYSTEM
) {
3215 value
= radeon_vm_map_gart(rdev
, addr
);
3216 value
&= 0xFFFFFFFFFFFFF000ULL
;
3217 } else if (flags
& RADEON_VM_PAGE_VALID
) {
3223 value
|= r600_flags
;
3224 ib
->ptr
[ib
->length_dw
++] = value
;
3225 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(value
);
3234 if (flags
& RADEON_VM_PAGE_VALID
)
3238 /* for physically contiguous pages (vram) */
3239 ib
->ptr
[ib
->length_dw
++] = DMA_PTE_PDE_PACKET(ndw
);
3240 ib
->ptr
[ib
->length_dw
++] = pe
; /* dst addr */
3241 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(pe
) & 0xff;
3242 ib
->ptr
[ib
->length_dw
++] = r600_flags
; /* mask */
3243 ib
->ptr
[ib
->length_dw
++] = 0;
3244 ib
->ptr
[ib
->length_dw
++] = value
; /* value */
3245 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(value
);
3246 ib
->ptr
[ib
->length_dw
++] = incr
; /* increment size */
3247 ib
->ptr
[ib
->length_dw
++] = 0;
3249 addr
+= (ndw
/ 2) * incr
;
3253 while (ib
->length_dw
& 0x7)
3254 ib
->ptr
[ib
->length_dw
++] = DMA_PACKET(DMA_PACKET_NOP
, 0, 0, 0, 0);
3258 void si_vm_flush(struct radeon_device
*rdev
, int ridx
, struct radeon_vm
*vm
)
3260 struct radeon_ring
*ring
= &rdev
->ring
[ridx
];
3265 /* write new base address */
3266 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
3267 radeon_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
3268 WRITE_DATA_DST_SEL(0)));
3271 radeon_ring_write(ring
,
3272 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR
+ (vm
->id
<< 2)) >> 2);
3274 radeon_ring_write(ring
,
3275 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR
+ ((vm
->id
- 8) << 2)) >> 2);
3277 radeon_ring_write(ring
, 0);
3278 radeon_ring_write(ring
, vm
->pd_gpu_addr
>> 12);
3280 /* flush hdp cache */
3281 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
3282 radeon_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
3283 WRITE_DATA_DST_SEL(0)));
3284 radeon_ring_write(ring
, HDP_MEM_COHERENCY_FLUSH_CNTL
>> 2);
3285 radeon_ring_write(ring
, 0);
3286 radeon_ring_write(ring
, 0x1);
3288 /* bits 0-15 are the VM contexts0-15 */
3289 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
3290 radeon_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
3291 WRITE_DATA_DST_SEL(0)));
3292 radeon_ring_write(ring
, VM_INVALIDATE_REQUEST
>> 2);
3293 radeon_ring_write(ring
, 0);
3294 radeon_ring_write(ring
, 1 << vm
->id
);
3296 /* sync PFP to ME, otherwise we might get invalid PFP reads */
3297 radeon_ring_write(ring
, PACKET3(PACKET3_PFP_SYNC_ME
, 0));
3298 radeon_ring_write(ring
, 0x0);
3301 void si_dma_vm_flush(struct radeon_device
*rdev
, int ridx
, struct radeon_vm
*vm
)
3303 struct radeon_ring
*ring
= &rdev
->ring
[ridx
];
3308 radeon_ring_write(ring
, DMA_PACKET(DMA_PACKET_SRBM_WRITE
, 0, 0, 0, 0));
3310 radeon_ring_write(ring
, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR
+ (vm
->id
<< 2)) >> 2));
3312 radeon_ring_write(ring
, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR
+ ((vm
->id
- 8) << 2)) >> 2));
3314 radeon_ring_write(ring
, vm
->pd_gpu_addr
>> 12);
3316 /* flush hdp cache */
3317 radeon_ring_write(ring
, DMA_PACKET(DMA_PACKET_SRBM_WRITE
, 0, 0, 0, 0));
3318 radeon_ring_write(ring
, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL
>> 2));
3319 radeon_ring_write(ring
, 1);
3321 /* bits 0-7 are the VM contexts0-7 */
3322 radeon_ring_write(ring
, DMA_PACKET(DMA_PACKET_SRBM_WRITE
, 0, 0, 0, 0));
3323 radeon_ring_write(ring
, (0xf << 16) | (VM_INVALIDATE_REQUEST
>> 2));
3324 radeon_ring_write(ring
, 1 << vm
->id
);
3330 void si_rlc_fini(struct radeon_device
*rdev
)
3334 /* save restore block */
3335 if (rdev
->rlc
.save_restore_obj
) {
3336 r
= radeon_bo_reserve(rdev
->rlc
.save_restore_obj
, false);
3337 if (unlikely(r
!= 0))
3338 dev_warn(rdev
->dev
, "(%d) reserve RLC sr bo failed\n", r
);
3339 radeon_bo_unpin(rdev
->rlc
.save_restore_obj
);
3340 radeon_bo_unreserve(rdev
->rlc
.save_restore_obj
);
3342 radeon_bo_unref(&rdev
->rlc
.save_restore_obj
);
3343 rdev
->rlc
.save_restore_obj
= NULL
;
3346 /* clear state block */
3347 if (rdev
->rlc
.clear_state_obj
) {
3348 r
= radeon_bo_reserve(rdev
->rlc
.clear_state_obj
, false);
3349 if (unlikely(r
!= 0))
3350 dev_warn(rdev
->dev
, "(%d) reserve RLC c bo failed\n", r
);
3351 radeon_bo_unpin(rdev
->rlc
.clear_state_obj
);
3352 radeon_bo_unreserve(rdev
->rlc
.clear_state_obj
);
3354 radeon_bo_unref(&rdev
->rlc
.clear_state_obj
);
3355 rdev
->rlc
.clear_state_obj
= NULL
;
3359 int si_rlc_init(struct radeon_device
*rdev
)
3363 /* save restore block */
3364 if (rdev
->rlc
.save_restore_obj
== NULL
) {
3365 r
= radeon_bo_create(rdev
, RADEON_GPU_PAGE_SIZE
, PAGE_SIZE
, true,
3366 RADEON_GEM_DOMAIN_VRAM
, NULL
,
3367 &rdev
->rlc
.save_restore_obj
);
3369 dev_warn(rdev
->dev
, "(%d) create RLC sr bo failed\n", r
);
3374 r
= radeon_bo_reserve(rdev
->rlc
.save_restore_obj
, false);
3375 if (unlikely(r
!= 0)) {
3379 r
= radeon_bo_pin(rdev
->rlc
.save_restore_obj
, RADEON_GEM_DOMAIN_VRAM
,
3380 &rdev
->rlc
.save_restore_gpu_addr
);
3381 radeon_bo_unreserve(rdev
->rlc
.save_restore_obj
);
3383 dev_warn(rdev
->dev
, "(%d) pin RLC sr bo failed\n", r
);
3388 /* clear state block */
3389 if (rdev
->rlc
.clear_state_obj
== NULL
) {
3390 r
= radeon_bo_create(rdev
, RADEON_GPU_PAGE_SIZE
, PAGE_SIZE
, true,
3391 RADEON_GEM_DOMAIN_VRAM
, NULL
,
3392 &rdev
->rlc
.clear_state_obj
);
3394 dev_warn(rdev
->dev
, "(%d) create RLC c bo failed\n", r
);
3399 r
= radeon_bo_reserve(rdev
->rlc
.clear_state_obj
, false);
3400 if (unlikely(r
!= 0)) {
3404 r
= radeon_bo_pin(rdev
->rlc
.clear_state_obj
, RADEON_GEM_DOMAIN_VRAM
,
3405 &rdev
->rlc
.clear_state_gpu_addr
);
3406 radeon_bo_unreserve(rdev
->rlc
.clear_state_obj
);
3408 dev_warn(rdev
->dev
, "(%d) pin RLC c bo failed\n", r
);
3416 static void si_rlc_stop(struct radeon_device
*rdev
)
3418 WREG32(RLC_CNTL
, 0);
3421 static void si_rlc_start(struct radeon_device
*rdev
)
3423 WREG32(RLC_CNTL
, RLC_ENABLE
);
3426 static int si_rlc_resume(struct radeon_device
*rdev
)
3429 const __be32
*fw_data
;
3436 WREG32(RLC_RL_BASE
, 0);
3437 WREG32(RLC_RL_SIZE
, 0);
3438 WREG32(RLC_LB_CNTL
, 0);
3439 WREG32(RLC_LB_CNTR_MAX
, 0xffffffff);
3440 WREG32(RLC_LB_CNTR_INIT
, 0);
3442 WREG32(RLC_SAVE_AND_RESTORE_BASE
, rdev
->rlc
.save_restore_gpu_addr
>> 8);
3443 WREG32(RLC_CLEAR_STATE_RESTORE_BASE
, rdev
->rlc
.clear_state_gpu_addr
>> 8);
3445 WREG32(RLC_MC_CNTL
, 0);
3446 WREG32(RLC_UCODE_CNTL
, 0);
3448 fw_data
= (const __be32
*)rdev
->rlc_fw
->data
;
3449 for (i
= 0; i
< SI_RLC_UCODE_SIZE
; i
++) {
3450 WREG32(RLC_UCODE_ADDR
, i
);
3451 WREG32(RLC_UCODE_DATA
, be32_to_cpup(fw_data
++));
3453 WREG32(RLC_UCODE_ADDR
, 0);
3460 static void si_enable_interrupts(struct radeon_device
*rdev
)
3462 u32 ih_cntl
= RREG32(IH_CNTL
);
3463 u32 ih_rb_cntl
= RREG32(IH_RB_CNTL
);
3465 ih_cntl
|= ENABLE_INTR
;
3466 ih_rb_cntl
|= IH_RB_ENABLE
;
3467 WREG32(IH_CNTL
, ih_cntl
);
3468 WREG32(IH_RB_CNTL
, ih_rb_cntl
);
3469 rdev
->ih
.enabled
= true;
3472 static void si_disable_interrupts(struct radeon_device
*rdev
)
3474 u32 ih_rb_cntl
= RREG32(IH_RB_CNTL
);
3475 u32 ih_cntl
= RREG32(IH_CNTL
);
3477 ih_rb_cntl
&= ~IH_RB_ENABLE
;
3478 ih_cntl
&= ~ENABLE_INTR
;
3479 WREG32(IH_RB_CNTL
, ih_rb_cntl
);
3480 WREG32(IH_CNTL
, ih_cntl
);
3481 /* set rptr, wptr to 0 */
3482 WREG32(IH_RB_RPTR
, 0);
3483 WREG32(IH_RB_WPTR
, 0);
3484 rdev
->ih
.enabled
= false;
3488 static void si_disable_interrupt_state(struct radeon_device
*rdev
)
3492 WREG32(CP_INT_CNTL_RING0
, CNTX_BUSY_INT_ENABLE
| CNTX_EMPTY_INT_ENABLE
);
3493 WREG32(CP_INT_CNTL_RING1
, 0);
3494 WREG32(CP_INT_CNTL_RING2
, 0);
3495 tmp
= RREG32(DMA_CNTL
+ DMA0_REGISTER_OFFSET
) & ~TRAP_ENABLE
;
3496 WREG32(DMA_CNTL
+ DMA0_REGISTER_OFFSET
, tmp
);
3497 tmp
= RREG32(DMA_CNTL
+ DMA1_REGISTER_OFFSET
) & ~TRAP_ENABLE
;
3498 WREG32(DMA_CNTL
+ DMA1_REGISTER_OFFSET
, tmp
);
3499 WREG32(GRBM_INT_CNTL
, 0);
3500 WREG32(INT_MASK
+ EVERGREEN_CRTC0_REGISTER_OFFSET
, 0);
3501 WREG32(INT_MASK
+ EVERGREEN_CRTC1_REGISTER_OFFSET
, 0);
3502 if (rdev
->num_crtc
>= 4) {
3503 WREG32(INT_MASK
+ EVERGREEN_CRTC2_REGISTER_OFFSET
, 0);
3504 WREG32(INT_MASK
+ EVERGREEN_CRTC3_REGISTER_OFFSET
, 0);
3506 if (rdev
->num_crtc
>= 6) {
3507 WREG32(INT_MASK
+ EVERGREEN_CRTC4_REGISTER_OFFSET
, 0);
3508 WREG32(INT_MASK
+ EVERGREEN_CRTC5_REGISTER_OFFSET
, 0);
3511 WREG32(GRPH_INT_CONTROL
+ EVERGREEN_CRTC0_REGISTER_OFFSET
, 0);
3512 WREG32(GRPH_INT_CONTROL
+ EVERGREEN_CRTC1_REGISTER_OFFSET
, 0);
3513 if (rdev
->num_crtc
>= 4) {
3514 WREG32(GRPH_INT_CONTROL
+ EVERGREEN_CRTC2_REGISTER_OFFSET
, 0);
3515 WREG32(GRPH_INT_CONTROL
+ EVERGREEN_CRTC3_REGISTER_OFFSET
, 0);
3517 if (rdev
->num_crtc
>= 6) {
3518 WREG32(GRPH_INT_CONTROL
+ EVERGREEN_CRTC4_REGISTER_OFFSET
, 0);
3519 WREG32(GRPH_INT_CONTROL
+ EVERGREEN_CRTC5_REGISTER_OFFSET
, 0);
3522 WREG32(DACA_AUTODETECT_INT_CONTROL
, 0);
3524 tmp
= RREG32(DC_HPD1_INT_CONTROL
) & DC_HPDx_INT_POLARITY
;
3525 WREG32(DC_HPD1_INT_CONTROL
, tmp
);
3526 tmp
= RREG32(DC_HPD2_INT_CONTROL
) & DC_HPDx_INT_POLARITY
;
3527 WREG32(DC_HPD2_INT_CONTROL
, tmp
);
3528 tmp
= RREG32(DC_HPD3_INT_CONTROL
) & DC_HPDx_INT_POLARITY
;
3529 WREG32(DC_HPD3_INT_CONTROL
, tmp
);
3530 tmp
= RREG32(DC_HPD4_INT_CONTROL
) & DC_HPDx_INT_POLARITY
;
3531 WREG32(DC_HPD4_INT_CONTROL
, tmp
);
3532 tmp
= RREG32(DC_HPD5_INT_CONTROL
) & DC_HPDx_INT_POLARITY
;
3533 WREG32(DC_HPD5_INT_CONTROL
, tmp
);
3534 tmp
= RREG32(DC_HPD6_INT_CONTROL
) & DC_HPDx_INT_POLARITY
;
3535 WREG32(DC_HPD6_INT_CONTROL
, tmp
);
3539 static int si_irq_init(struct radeon_device
*rdev
)
3543 u32 interrupt_cntl
, ih_cntl
, ih_rb_cntl
;
3546 ret
= r600_ih_ring_alloc(rdev
);
3551 si_disable_interrupts(rdev
);
3554 ret
= si_rlc_resume(rdev
);
3556 r600_ih_ring_fini(rdev
);
3560 /* setup interrupt control */
3561 /* set dummy read address to ring address */
3562 WREG32(INTERRUPT_CNTL2
, rdev
->ih
.gpu_addr
>> 8);
3563 interrupt_cntl
= RREG32(INTERRUPT_CNTL
);
3564 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
3565 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
3567 interrupt_cntl
&= ~IH_DUMMY_RD_OVERRIDE
;
3568 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
3569 interrupt_cntl
&= ~IH_REQ_NONSNOOP_EN
;
3570 WREG32(INTERRUPT_CNTL
, interrupt_cntl
);
3572 WREG32(IH_RB_BASE
, rdev
->ih
.gpu_addr
>> 8);
3573 rb_bufsz
= drm_order(rdev
->ih
.ring_size
/ 4);
3575 ih_rb_cntl
= (IH_WPTR_OVERFLOW_ENABLE
|
3576 IH_WPTR_OVERFLOW_CLEAR
|
3579 if (rdev
->wb
.enabled
)
3580 ih_rb_cntl
|= IH_WPTR_WRITEBACK_ENABLE
;
3582 /* set the writeback address whether it's enabled or not */
3583 WREG32(IH_RB_WPTR_ADDR_LO
, (rdev
->wb
.gpu_addr
+ R600_WB_IH_WPTR_OFFSET
) & 0xFFFFFFFC);
3584 WREG32(IH_RB_WPTR_ADDR_HI
, upper_32_bits(rdev
->wb
.gpu_addr
+ R600_WB_IH_WPTR_OFFSET
) & 0xFF);
3586 WREG32(IH_RB_CNTL
, ih_rb_cntl
);
3588 /* set rptr, wptr to 0 */
3589 WREG32(IH_RB_RPTR
, 0);
3590 WREG32(IH_RB_WPTR
, 0);
3592 /* Default settings for IH_CNTL (disabled at first) */
3593 ih_cntl
= MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
3594 /* RPTR_REARM only works if msi's are enabled */
3595 if (rdev
->msi_enabled
)
3596 ih_cntl
|= RPTR_REARM
;
3597 WREG32(IH_CNTL
, ih_cntl
);
3599 /* force the active interrupt state to all disabled */
3600 si_disable_interrupt_state(rdev
);
3602 pci_set_master(rdev
->pdev
);
3605 si_enable_interrupts(rdev
);
3610 int si_irq_set(struct radeon_device
*rdev
)
3612 u32 cp_int_cntl
= CNTX_BUSY_INT_ENABLE
| CNTX_EMPTY_INT_ENABLE
;
3613 u32 cp_int_cntl1
= 0, cp_int_cntl2
= 0;
3614 u32 crtc1
= 0, crtc2
= 0, crtc3
= 0, crtc4
= 0, crtc5
= 0, crtc6
= 0;
3615 u32 hpd1
, hpd2
, hpd3
, hpd4
, hpd5
, hpd6
;
3616 u32 grbm_int_cntl
= 0;
3617 u32 grph1
= 0, grph2
= 0, grph3
= 0, grph4
= 0, grph5
= 0, grph6
= 0;
3618 u32 dma_cntl
, dma_cntl1
;
3620 if (!rdev
->irq
.installed
) {
3621 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
3624 /* don't enable anything if the ih is disabled */
3625 if (!rdev
->ih
.enabled
) {
3626 si_disable_interrupts(rdev
);
3627 /* force the active interrupt state to all disabled */
3628 si_disable_interrupt_state(rdev
);
3632 hpd1
= RREG32(DC_HPD1_INT_CONTROL
) & ~DC_HPDx_INT_EN
;
3633 hpd2
= RREG32(DC_HPD2_INT_CONTROL
) & ~DC_HPDx_INT_EN
;
3634 hpd3
= RREG32(DC_HPD3_INT_CONTROL
) & ~DC_HPDx_INT_EN
;
3635 hpd4
= RREG32(DC_HPD4_INT_CONTROL
) & ~DC_HPDx_INT_EN
;
3636 hpd5
= RREG32(DC_HPD5_INT_CONTROL
) & ~DC_HPDx_INT_EN
;
3637 hpd6
= RREG32(DC_HPD6_INT_CONTROL
) & ~DC_HPDx_INT_EN
;
3639 dma_cntl
= RREG32(DMA_CNTL
+ DMA0_REGISTER_OFFSET
) & ~TRAP_ENABLE
;
3640 dma_cntl1
= RREG32(DMA_CNTL
+ DMA1_REGISTER_OFFSET
) & ~TRAP_ENABLE
;
3642 /* enable CP interrupts on all rings */
3643 if (atomic_read(&rdev
->irq
.ring_int
[RADEON_RING_TYPE_GFX_INDEX
])) {
3644 DRM_DEBUG("si_irq_set: sw int gfx\n");
3645 cp_int_cntl
|= TIME_STAMP_INT_ENABLE
;
3647 if (atomic_read(&rdev
->irq
.ring_int
[CAYMAN_RING_TYPE_CP1_INDEX
])) {
3648 DRM_DEBUG("si_irq_set: sw int cp1\n");
3649 cp_int_cntl1
|= TIME_STAMP_INT_ENABLE
;
3651 if (atomic_read(&rdev
->irq
.ring_int
[CAYMAN_RING_TYPE_CP2_INDEX
])) {
3652 DRM_DEBUG("si_irq_set: sw int cp2\n");
3653 cp_int_cntl2
|= TIME_STAMP_INT_ENABLE
;
3655 if (atomic_read(&rdev
->irq
.ring_int
[R600_RING_TYPE_DMA_INDEX
])) {
3656 DRM_DEBUG("si_irq_set: sw int dma\n");
3657 dma_cntl
|= TRAP_ENABLE
;
3660 if (atomic_read(&rdev
->irq
.ring_int
[CAYMAN_RING_TYPE_DMA1_INDEX
])) {
3661 DRM_DEBUG("si_irq_set: sw int dma1\n");
3662 dma_cntl1
|= TRAP_ENABLE
;
3664 if (rdev
->irq
.crtc_vblank_int
[0] ||
3665 atomic_read(&rdev
->irq
.pflip
[0])) {
3666 DRM_DEBUG("si_irq_set: vblank 0\n");
3667 crtc1
|= VBLANK_INT_MASK
;
3669 if (rdev
->irq
.crtc_vblank_int
[1] ||
3670 atomic_read(&rdev
->irq
.pflip
[1])) {
3671 DRM_DEBUG("si_irq_set: vblank 1\n");
3672 crtc2
|= VBLANK_INT_MASK
;
3674 if (rdev
->irq
.crtc_vblank_int
[2] ||
3675 atomic_read(&rdev
->irq
.pflip
[2])) {
3676 DRM_DEBUG("si_irq_set: vblank 2\n");
3677 crtc3
|= VBLANK_INT_MASK
;
3679 if (rdev
->irq
.crtc_vblank_int
[3] ||
3680 atomic_read(&rdev
->irq
.pflip
[3])) {
3681 DRM_DEBUG("si_irq_set: vblank 3\n");
3682 crtc4
|= VBLANK_INT_MASK
;
3684 if (rdev
->irq
.crtc_vblank_int
[4] ||
3685 atomic_read(&rdev
->irq
.pflip
[4])) {
3686 DRM_DEBUG("si_irq_set: vblank 4\n");
3687 crtc5
|= VBLANK_INT_MASK
;
3689 if (rdev
->irq
.crtc_vblank_int
[5] ||
3690 atomic_read(&rdev
->irq
.pflip
[5])) {
3691 DRM_DEBUG("si_irq_set: vblank 5\n");
3692 crtc6
|= VBLANK_INT_MASK
;
3694 if (rdev
->irq
.hpd
[0]) {
3695 DRM_DEBUG("si_irq_set: hpd 1\n");
3696 hpd1
|= DC_HPDx_INT_EN
;
3698 if (rdev
->irq
.hpd
[1]) {
3699 DRM_DEBUG("si_irq_set: hpd 2\n");
3700 hpd2
|= DC_HPDx_INT_EN
;
3702 if (rdev
->irq
.hpd
[2]) {
3703 DRM_DEBUG("si_irq_set: hpd 3\n");
3704 hpd3
|= DC_HPDx_INT_EN
;
3706 if (rdev
->irq
.hpd
[3]) {
3707 DRM_DEBUG("si_irq_set: hpd 4\n");
3708 hpd4
|= DC_HPDx_INT_EN
;
3710 if (rdev
->irq
.hpd
[4]) {
3711 DRM_DEBUG("si_irq_set: hpd 5\n");
3712 hpd5
|= DC_HPDx_INT_EN
;
3714 if (rdev
->irq
.hpd
[5]) {
3715 DRM_DEBUG("si_irq_set: hpd 6\n");
3716 hpd6
|= DC_HPDx_INT_EN
;
3719 WREG32(CP_INT_CNTL_RING0
, cp_int_cntl
);
3720 WREG32(CP_INT_CNTL_RING1
, cp_int_cntl1
);
3721 WREG32(CP_INT_CNTL_RING2
, cp_int_cntl2
);
3723 WREG32(DMA_CNTL
+ DMA0_REGISTER_OFFSET
, dma_cntl
);
3724 WREG32(DMA_CNTL
+ DMA1_REGISTER_OFFSET
, dma_cntl1
);
3726 WREG32(GRBM_INT_CNTL
, grbm_int_cntl
);
3728 WREG32(INT_MASK
+ EVERGREEN_CRTC0_REGISTER_OFFSET
, crtc1
);
3729 WREG32(INT_MASK
+ EVERGREEN_CRTC1_REGISTER_OFFSET
, crtc2
);
3730 if (rdev
->num_crtc
>= 4) {
3731 WREG32(INT_MASK
+ EVERGREEN_CRTC2_REGISTER_OFFSET
, crtc3
);
3732 WREG32(INT_MASK
+ EVERGREEN_CRTC3_REGISTER_OFFSET
, crtc4
);
3734 if (rdev
->num_crtc
>= 6) {
3735 WREG32(INT_MASK
+ EVERGREEN_CRTC4_REGISTER_OFFSET
, crtc5
);
3736 WREG32(INT_MASK
+ EVERGREEN_CRTC5_REGISTER_OFFSET
, crtc6
);
3739 WREG32(GRPH_INT_CONTROL
+ EVERGREEN_CRTC0_REGISTER_OFFSET
, grph1
);
3740 WREG32(GRPH_INT_CONTROL
+ EVERGREEN_CRTC1_REGISTER_OFFSET
, grph2
);
3741 if (rdev
->num_crtc
>= 4) {
3742 WREG32(GRPH_INT_CONTROL
+ EVERGREEN_CRTC2_REGISTER_OFFSET
, grph3
);
3743 WREG32(GRPH_INT_CONTROL
+ EVERGREEN_CRTC3_REGISTER_OFFSET
, grph4
);
3745 if (rdev
->num_crtc
>= 6) {
3746 WREG32(GRPH_INT_CONTROL
+ EVERGREEN_CRTC4_REGISTER_OFFSET
, grph5
);
3747 WREG32(GRPH_INT_CONTROL
+ EVERGREEN_CRTC5_REGISTER_OFFSET
, grph6
);
3750 WREG32(DC_HPD1_INT_CONTROL
, hpd1
);
3751 WREG32(DC_HPD2_INT_CONTROL
, hpd2
);
3752 WREG32(DC_HPD3_INT_CONTROL
, hpd3
);
3753 WREG32(DC_HPD4_INT_CONTROL
, hpd4
);
3754 WREG32(DC_HPD5_INT_CONTROL
, hpd5
);
3755 WREG32(DC_HPD6_INT_CONTROL
, hpd6
);
3760 static inline void si_irq_ack(struct radeon_device
*rdev
)
3764 rdev
->irq
.stat_regs
.evergreen
.disp_int
= RREG32(DISP_INTERRUPT_STATUS
);
3765 rdev
->irq
.stat_regs
.evergreen
.disp_int_cont
= RREG32(DISP_INTERRUPT_STATUS_CONTINUE
);
3766 rdev
->irq
.stat_regs
.evergreen
.disp_int_cont2
= RREG32(DISP_INTERRUPT_STATUS_CONTINUE2
);
3767 rdev
->irq
.stat_regs
.evergreen
.disp_int_cont3
= RREG32(DISP_INTERRUPT_STATUS_CONTINUE3
);
3768 rdev
->irq
.stat_regs
.evergreen
.disp_int_cont4
= RREG32(DISP_INTERRUPT_STATUS_CONTINUE4
);
3769 rdev
->irq
.stat_regs
.evergreen
.disp_int_cont5
= RREG32(DISP_INTERRUPT_STATUS_CONTINUE5
);
3770 rdev
->irq
.stat_regs
.evergreen
.d1grph_int
= RREG32(GRPH_INT_STATUS
+ EVERGREEN_CRTC0_REGISTER_OFFSET
);
3771 rdev
->irq
.stat_regs
.evergreen
.d2grph_int
= RREG32(GRPH_INT_STATUS
+ EVERGREEN_CRTC1_REGISTER_OFFSET
);
3772 if (rdev
->num_crtc
>= 4) {
3773 rdev
->irq
.stat_regs
.evergreen
.d3grph_int
= RREG32(GRPH_INT_STATUS
+ EVERGREEN_CRTC2_REGISTER_OFFSET
);
3774 rdev
->irq
.stat_regs
.evergreen
.d4grph_int
= RREG32(GRPH_INT_STATUS
+ EVERGREEN_CRTC3_REGISTER_OFFSET
);
3776 if (rdev
->num_crtc
>= 6) {
3777 rdev
->irq
.stat_regs
.evergreen
.d5grph_int
= RREG32(GRPH_INT_STATUS
+ EVERGREEN_CRTC4_REGISTER_OFFSET
);
3778 rdev
->irq
.stat_regs
.evergreen
.d6grph_int
= RREG32(GRPH_INT_STATUS
+ EVERGREEN_CRTC5_REGISTER_OFFSET
);
3781 if (rdev
->irq
.stat_regs
.evergreen
.d1grph_int
& GRPH_PFLIP_INT_OCCURRED
)
3782 WREG32(GRPH_INT_STATUS
+ EVERGREEN_CRTC0_REGISTER_OFFSET
, GRPH_PFLIP_INT_CLEAR
);
3783 if (rdev
->irq
.stat_regs
.evergreen
.d2grph_int
& GRPH_PFLIP_INT_OCCURRED
)
3784 WREG32(GRPH_INT_STATUS
+ EVERGREEN_CRTC1_REGISTER_OFFSET
, GRPH_PFLIP_INT_CLEAR
);
3785 if (rdev
->irq
.stat_regs
.evergreen
.disp_int
& LB_D1_VBLANK_INTERRUPT
)
3786 WREG32(VBLANK_STATUS
+ EVERGREEN_CRTC0_REGISTER_OFFSET
, VBLANK_ACK
);
3787 if (rdev
->irq
.stat_regs
.evergreen
.disp_int
& LB_D1_VLINE_INTERRUPT
)
3788 WREG32(VLINE_STATUS
+ EVERGREEN_CRTC0_REGISTER_OFFSET
, VLINE_ACK
);
3789 if (rdev
->irq
.stat_regs
.evergreen
.disp_int_cont
& LB_D2_VBLANK_INTERRUPT
)
3790 WREG32(VBLANK_STATUS
+ EVERGREEN_CRTC1_REGISTER_OFFSET
, VBLANK_ACK
);
3791 if (rdev
->irq
.stat_regs
.evergreen
.disp_int_cont
& LB_D2_VLINE_INTERRUPT
)
3792 WREG32(VLINE_STATUS
+ EVERGREEN_CRTC1_REGISTER_OFFSET
, VLINE_ACK
);
3794 if (rdev
->num_crtc
>= 4) {
3795 if (rdev
->irq
.stat_regs
.evergreen
.d3grph_int
& GRPH_PFLIP_INT_OCCURRED
)
3796 WREG32(GRPH_INT_STATUS
+ EVERGREEN_CRTC2_REGISTER_OFFSET
, GRPH_PFLIP_INT_CLEAR
);
3797 if (rdev
->irq
.stat_regs
.evergreen
.d4grph_int
& GRPH_PFLIP_INT_OCCURRED
)
3798 WREG32(GRPH_INT_STATUS
+ EVERGREEN_CRTC3_REGISTER_OFFSET
, GRPH_PFLIP_INT_CLEAR
);
3799 if (rdev
->irq
.stat_regs
.evergreen
.disp_int_cont2
& LB_D3_VBLANK_INTERRUPT
)
3800 WREG32(VBLANK_STATUS
+ EVERGREEN_CRTC2_REGISTER_OFFSET
, VBLANK_ACK
);
3801 if (rdev
->irq
.stat_regs
.evergreen
.disp_int_cont2
& LB_D3_VLINE_INTERRUPT
)
3802 WREG32(VLINE_STATUS
+ EVERGREEN_CRTC2_REGISTER_OFFSET
, VLINE_ACK
);
3803 if (rdev
->irq
.stat_regs
.evergreen
.disp_int_cont3
& LB_D4_VBLANK_INTERRUPT
)
3804 WREG32(VBLANK_STATUS
+ EVERGREEN_CRTC3_REGISTER_OFFSET
, VBLANK_ACK
);
3805 if (rdev
->irq
.stat_regs
.evergreen
.disp_int_cont3
& LB_D4_VLINE_INTERRUPT
)
3806 WREG32(VLINE_STATUS
+ EVERGREEN_CRTC3_REGISTER_OFFSET
, VLINE_ACK
);
3809 if (rdev
->num_crtc
>= 6) {
3810 if (rdev
->irq
.stat_regs
.evergreen
.d5grph_int
& GRPH_PFLIP_INT_OCCURRED
)
3811 WREG32(GRPH_INT_STATUS
+ EVERGREEN_CRTC4_REGISTER_OFFSET
, GRPH_PFLIP_INT_CLEAR
);
3812 if (rdev
->irq
.stat_regs
.evergreen
.d6grph_int
& GRPH_PFLIP_INT_OCCURRED
)
3813 WREG32(GRPH_INT_STATUS
+ EVERGREEN_CRTC5_REGISTER_OFFSET
, GRPH_PFLIP_INT_CLEAR
);
3814 if (rdev
->irq
.stat_regs
.evergreen
.disp_int_cont4
& LB_D5_VBLANK_INTERRUPT
)
3815 WREG32(VBLANK_STATUS
+ EVERGREEN_CRTC4_REGISTER_OFFSET
, VBLANK_ACK
);
3816 if (rdev
->irq
.stat_regs
.evergreen
.disp_int_cont4
& LB_D5_VLINE_INTERRUPT
)
3817 WREG32(VLINE_STATUS
+ EVERGREEN_CRTC4_REGISTER_OFFSET
, VLINE_ACK
);
3818 if (rdev
->irq
.stat_regs
.evergreen
.disp_int_cont5
& LB_D6_VBLANK_INTERRUPT
)
3819 WREG32(VBLANK_STATUS
+ EVERGREEN_CRTC5_REGISTER_OFFSET
, VBLANK_ACK
);
3820 if (rdev
->irq
.stat_regs
.evergreen
.disp_int_cont5
& LB_D6_VLINE_INTERRUPT
)
3821 WREG32(VLINE_STATUS
+ EVERGREEN_CRTC5_REGISTER_OFFSET
, VLINE_ACK
);
3824 if (rdev
->irq
.stat_regs
.evergreen
.disp_int
& DC_HPD1_INTERRUPT
) {
3825 tmp
= RREG32(DC_HPD1_INT_CONTROL
);
3826 tmp
|= DC_HPDx_INT_ACK
;
3827 WREG32(DC_HPD1_INT_CONTROL
, tmp
);
3829 if (rdev
->irq
.stat_regs
.evergreen
.disp_int_cont
& DC_HPD2_INTERRUPT
) {
3830 tmp
= RREG32(DC_HPD2_INT_CONTROL
);
3831 tmp
|= DC_HPDx_INT_ACK
;
3832 WREG32(DC_HPD2_INT_CONTROL
, tmp
);
3834 if (rdev
->irq
.stat_regs
.evergreen
.disp_int_cont2
& DC_HPD3_INTERRUPT
) {
3835 tmp
= RREG32(DC_HPD3_INT_CONTROL
);
3836 tmp
|= DC_HPDx_INT_ACK
;
3837 WREG32(DC_HPD3_INT_CONTROL
, tmp
);
3839 if (rdev
->irq
.stat_regs
.evergreen
.disp_int_cont3
& DC_HPD4_INTERRUPT
) {
3840 tmp
= RREG32(DC_HPD4_INT_CONTROL
);
3841 tmp
|= DC_HPDx_INT_ACK
;
3842 WREG32(DC_HPD4_INT_CONTROL
, tmp
);
3844 if (rdev
->irq
.stat_regs
.evergreen
.disp_int_cont4
& DC_HPD5_INTERRUPT
) {
3845 tmp
= RREG32(DC_HPD5_INT_CONTROL
);
3846 tmp
|= DC_HPDx_INT_ACK
;
3847 WREG32(DC_HPD5_INT_CONTROL
, tmp
);
3849 if (rdev
->irq
.stat_regs
.evergreen
.disp_int_cont5
& DC_HPD6_INTERRUPT
) {
3850 tmp
= RREG32(DC_HPD5_INT_CONTROL
);
3851 tmp
|= DC_HPDx_INT_ACK
;
3852 WREG32(DC_HPD6_INT_CONTROL
, tmp
);
3856 static void si_irq_disable(struct radeon_device
*rdev
)
3858 si_disable_interrupts(rdev
);
3859 /* Wait and acknowledge irq */
3862 si_disable_interrupt_state(rdev
);
3865 static void si_irq_suspend(struct radeon_device
*rdev
)
3867 si_irq_disable(rdev
);
3871 static void si_irq_fini(struct radeon_device
*rdev
)
3873 si_irq_suspend(rdev
);
3874 r600_ih_ring_fini(rdev
);
3877 static inline u32
si_get_ih_wptr(struct radeon_device
*rdev
)
3881 if (rdev
->wb
.enabled
)
3882 wptr
= le32_to_cpu(rdev
->wb
.wb
[R600_WB_IH_WPTR_OFFSET
/4]);
3884 wptr
= RREG32(IH_RB_WPTR
);
3886 if (wptr
& RB_OVERFLOW
) {
3887 /* When a ring buffer overflow happen start parsing interrupt
3888 * from the last not overwritten vector (wptr + 16). Hopefully
3889 * this should allow us to catchup.
3891 dev_warn(rdev
->dev
, "IH ring buffer overflow (0x%08X, %d, %d)\n",
3892 wptr
, rdev
->ih
.rptr
, (wptr
+ 16) + rdev
->ih
.ptr_mask
);
3893 rdev
->ih
.rptr
= (wptr
+ 16) & rdev
->ih
.ptr_mask
;
3894 tmp
= RREG32(IH_RB_CNTL
);
3895 tmp
|= IH_WPTR_OVERFLOW_CLEAR
;
3896 WREG32(IH_RB_CNTL
, tmp
);
3898 return (wptr
& rdev
->ih
.ptr_mask
);
3902 * Each IV ring entry is 128 bits:
3903 * [7:0] - interrupt source id
3905 * [59:32] - interrupt source data
3906 * [63:60] - reserved
3909 * [127:80] - reserved
3911 int si_irq_process(struct radeon_device
*rdev
)
3915 u32 src_id
, src_data
, ring_id
;
3917 bool queue_hotplug
= false;
3919 if (!rdev
->ih
.enabled
|| rdev
->shutdown
)
3922 wptr
= si_get_ih_wptr(rdev
);
3925 /* is somebody else already processing irqs? */
3926 if (atomic_xchg(&rdev
->ih
.lock
, 1))
3929 rptr
= rdev
->ih
.rptr
;
3930 DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr
, wptr
);
3932 /* Order reading of wptr vs. reading of IH ring data */
3935 /* display interrupts */
3938 while (rptr
!= wptr
) {
3939 /* wptr/rptr are in bytes! */
3940 ring_index
= rptr
/ 4;
3941 src_id
= le32_to_cpu(rdev
->ih
.ring
[ring_index
]) & 0xff;
3942 src_data
= le32_to_cpu(rdev
->ih
.ring
[ring_index
+ 1]) & 0xfffffff;
3943 ring_id
= le32_to_cpu(rdev
->ih
.ring
[ring_index
+ 2]) & 0xff;
3946 case 1: /* D1 vblank/vline */
3948 case 0: /* D1 vblank */
3949 if (rdev
->irq
.stat_regs
.evergreen
.disp_int
& LB_D1_VBLANK_INTERRUPT
) {
3950 if (rdev
->irq
.crtc_vblank_int
[0]) {
3951 drm_handle_vblank(rdev
->ddev
, 0);
3952 rdev
->pm
.vblank_sync
= true;
3953 wake_up(&rdev
->irq
.vblank_queue
);
3955 if (atomic_read(&rdev
->irq
.pflip
[0]))
3956 radeon_crtc_handle_flip(rdev
, 0);
3957 rdev
->irq
.stat_regs
.evergreen
.disp_int
&= ~LB_D1_VBLANK_INTERRUPT
;
3958 DRM_DEBUG("IH: D1 vblank\n");
3961 case 1: /* D1 vline */
3962 if (rdev
->irq
.stat_regs
.evergreen
.disp_int
& LB_D1_VLINE_INTERRUPT
) {
3963 rdev
->irq
.stat_regs
.evergreen
.disp_int
&= ~LB_D1_VLINE_INTERRUPT
;
3964 DRM_DEBUG("IH: D1 vline\n");
3968 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
3972 case 2: /* D2 vblank/vline */
3974 case 0: /* D2 vblank */
3975 if (rdev
->irq
.stat_regs
.evergreen
.disp_int_cont
& LB_D2_VBLANK_INTERRUPT
) {
3976 if (rdev
->irq
.crtc_vblank_int
[1]) {
3977 drm_handle_vblank(rdev
->ddev
, 1);
3978 rdev
->pm
.vblank_sync
= true;
3979 wake_up(&rdev
->irq
.vblank_queue
);
3981 if (atomic_read(&rdev
->irq
.pflip
[1]))
3982 radeon_crtc_handle_flip(rdev
, 1);
3983 rdev
->irq
.stat_regs
.evergreen
.disp_int_cont
&= ~LB_D2_VBLANK_INTERRUPT
;
3984 DRM_DEBUG("IH: D2 vblank\n");
3987 case 1: /* D2 vline */
3988 if (rdev
->irq
.stat_regs
.evergreen
.disp_int_cont
& LB_D2_VLINE_INTERRUPT
) {
3989 rdev
->irq
.stat_regs
.evergreen
.disp_int_cont
&= ~LB_D2_VLINE_INTERRUPT
;
3990 DRM_DEBUG("IH: D2 vline\n");
3994 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
3998 case 3: /* D3 vblank/vline */
4000 case 0: /* D3 vblank */
4001 if (rdev
->irq
.stat_regs
.evergreen
.disp_int_cont2
& LB_D3_VBLANK_INTERRUPT
) {
4002 if (rdev
->irq
.crtc_vblank_int
[2]) {
4003 drm_handle_vblank(rdev
->ddev
, 2);
4004 rdev
->pm
.vblank_sync
= true;
4005 wake_up(&rdev
->irq
.vblank_queue
);
4007 if (atomic_read(&rdev
->irq
.pflip
[2]))
4008 radeon_crtc_handle_flip(rdev
, 2);
4009 rdev
->irq
.stat_regs
.evergreen
.disp_int_cont2
&= ~LB_D3_VBLANK_INTERRUPT
;
4010 DRM_DEBUG("IH: D3 vblank\n");
4013 case 1: /* D3 vline */
4014 if (rdev
->irq
.stat_regs
.evergreen
.disp_int_cont2
& LB_D3_VLINE_INTERRUPT
) {
4015 rdev
->irq
.stat_regs
.evergreen
.disp_int_cont2
&= ~LB_D3_VLINE_INTERRUPT
;
4016 DRM_DEBUG("IH: D3 vline\n");
4020 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
4024 case 4: /* D4 vblank/vline */
4026 case 0: /* D4 vblank */
4027 if (rdev
->irq
.stat_regs
.evergreen
.disp_int_cont3
& LB_D4_VBLANK_INTERRUPT
) {
4028 if (rdev
->irq
.crtc_vblank_int
[3]) {
4029 drm_handle_vblank(rdev
->ddev
, 3);
4030 rdev
->pm
.vblank_sync
= true;
4031 wake_up(&rdev
->irq
.vblank_queue
);
4033 if (atomic_read(&rdev
->irq
.pflip
[3]))
4034 radeon_crtc_handle_flip(rdev
, 3);
4035 rdev
->irq
.stat_regs
.evergreen
.disp_int_cont3
&= ~LB_D4_VBLANK_INTERRUPT
;
4036 DRM_DEBUG("IH: D4 vblank\n");
4039 case 1: /* D4 vline */
4040 if (rdev
->irq
.stat_regs
.evergreen
.disp_int_cont3
& LB_D4_VLINE_INTERRUPT
) {
4041 rdev
->irq
.stat_regs
.evergreen
.disp_int_cont3
&= ~LB_D4_VLINE_INTERRUPT
;
4042 DRM_DEBUG("IH: D4 vline\n");
4046 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
4050 case 5: /* D5 vblank/vline */
4052 case 0: /* D5 vblank */
4053 if (rdev
->irq
.stat_regs
.evergreen
.disp_int_cont4
& LB_D5_VBLANK_INTERRUPT
) {
4054 if (rdev
->irq
.crtc_vblank_int
[4]) {
4055 drm_handle_vblank(rdev
->ddev
, 4);
4056 rdev
->pm
.vblank_sync
= true;
4057 wake_up(&rdev
->irq
.vblank_queue
);
4059 if (atomic_read(&rdev
->irq
.pflip
[4]))
4060 radeon_crtc_handle_flip(rdev
, 4);
4061 rdev
->irq
.stat_regs
.evergreen
.disp_int_cont4
&= ~LB_D5_VBLANK_INTERRUPT
;
4062 DRM_DEBUG("IH: D5 vblank\n");
4065 case 1: /* D5 vline */
4066 if (rdev
->irq
.stat_regs
.evergreen
.disp_int_cont4
& LB_D5_VLINE_INTERRUPT
) {
4067 rdev
->irq
.stat_regs
.evergreen
.disp_int_cont4
&= ~LB_D5_VLINE_INTERRUPT
;
4068 DRM_DEBUG("IH: D5 vline\n");
4072 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
4076 case 6: /* D6 vblank/vline */
4078 case 0: /* D6 vblank */
4079 if (rdev
->irq
.stat_regs
.evergreen
.disp_int_cont5
& LB_D6_VBLANK_INTERRUPT
) {
4080 if (rdev
->irq
.crtc_vblank_int
[5]) {
4081 drm_handle_vblank(rdev
->ddev
, 5);
4082 rdev
->pm
.vblank_sync
= true;
4083 wake_up(&rdev
->irq
.vblank_queue
);
4085 if (atomic_read(&rdev
->irq
.pflip
[5]))
4086 radeon_crtc_handle_flip(rdev
, 5);
4087 rdev
->irq
.stat_regs
.evergreen
.disp_int_cont5
&= ~LB_D6_VBLANK_INTERRUPT
;
4088 DRM_DEBUG("IH: D6 vblank\n");
4091 case 1: /* D6 vline */
4092 if (rdev
->irq
.stat_regs
.evergreen
.disp_int_cont5
& LB_D6_VLINE_INTERRUPT
) {
4093 rdev
->irq
.stat_regs
.evergreen
.disp_int_cont5
&= ~LB_D6_VLINE_INTERRUPT
;
4094 DRM_DEBUG("IH: D6 vline\n");
4098 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
4102 case 42: /* HPD hotplug */
4105 if (rdev
->irq
.stat_regs
.evergreen
.disp_int
& DC_HPD1_INTERRUPT
) {
4106 rdev
->irq
.stat_regs
.evergreen
.disp_int
&= ~DC_HPD1_INTERRUPT
;
4107 queue_hotplug
= true;
4108 DRM_DEBUG("IH: HPD1\n");
4112 if (rdev
->irq
.stat_regs
.evergreen
.disp_int_cont
& DC_HPD2_INTERRUPT
) {
4113 rdev
->irq
.stat_regs
.evergreen
.disp_int_cont
&= ~DC_HPD2_INTERRUPT
;
4114 queue_hotplug
= true;
4115 DRM_DEBUG("IH: HPD2\n");
4119 if (rdev
->irq
.stat_regs
.evergreen
.disp_int_cont2
& DC_HPD3_INTERRUPT
) {
4120 rdev
->irq
.stat_regs
.evergreen
.disp_int_cont2
&= ~DC_HPD3_INTERRUPT
;
4121 queue_hotplug
= true;
4122 DRM_DEBUG("IH: HPD3\n");
4126 if (rdev
->irq
.stat_regs
.evergreen
.disp_int_cont3
& DC_HPD4_INTERRUPT
) {
4127 rdev
->irq
.stat_regs
.evergreen
.disp_int_cont3
&= ~DC_HPD4_INTERRUPT
;
4128 queue_hotplug
= true;
4129 DRM_DEBUG("IH: HPD4\n");
4133 if (rdev
->irq
.stat_regs
.evergreen
.disp_int_cont4
& DC_HPD5_INTERRUPT
) {
4134 rdev
->irq
.stat_regs
.evergreen
.disp_int_cont4
&= ~DC_HPD5_INTERRUPT
;
4135 queue_hotplug
= true;
4136 DRM_DEBUG("IH: HPD5\n");
4140 if (rdev
->irq
.stat_regs
.evergreen
.disp_int_cont5
& DC_HPD6_INTERRUPT
) {
4141 rdev
->irq
.stat_regs
.evergreen
.disp_int_cont5
&= ~DC_HPD6_INTERRUPT
;
4142 queue_hotplug
= true;
4143 DRM_DEBUG("IH: HPD6\n");
4147 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
4153 dev_err(rdev
->dev
, "GPU fault detected: %d 0x%08x\n", src_id
, src_data
);
4154 dev_err(rdev
->dev
, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4155 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR
));
4156 dev_err(rdev
->dev
, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4157 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS
));
4158 /* reset addr and status */
4159 WREG32_P(VM_CONTEXT1_CNTL2
, 1, ~1);
4161 case 176: /* RINGID0 CP_INT */
4162 radeon_fence_process(rdev
, RADEON_RING_TYPE_GFX_INDEX
);
4164 case 177: /* RINGID1 CP_INT */
4165 radeon_fence_process(rdev
, CAYMAN_RING_TYPE_CP1_INDEX
);
4167 case 178: /* RINGID2 CP_INT */
4168 radeon_fence_process(rdev
, CAYMAN_RING_TYPE_CP2_INDEX
);
4170 case 181: /* CP EOP event */
4171 DRM_DEBUG("IH: CP EOP\n");
4174 radeon_fence_process(rdev
, RADEON_RING_TYPE_GFX_INDEX
);
4177 radeon_fence_process(rdev
, CAYMAN_RING_TYPE_CP1_INDEX
);
4180 radeon_fence_process(rdev
, CAYMAN_RING_TYPE_CP2_INDEX
);
4184 case 224: /* DMA trap event */
4185 DRM_DEBUG("IH: DMA trap\n");
4186 radeon_fence_process(rdev
, R600_RING_TYPE_DMA_INDEX
);
4188 case 233: /* GUI IDLE */
4189 DRM_DEBUG("IH: GUI idle\n");
4191 case 244: /* DMA trap event */
4192 DRM_DEBUG("IH: DMA1 trap\n");
4193 radeon_fence_process(rdev
, CAYMAN_RING_TYPE_DMA1_INDEX
);
4196 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
4200 /* wptr/rptr are in bytes! */
4202 rptr
&= rdev
->ih
.ptr_mask
;
4205 schedule_work(&rdev
->hotplug_work
);
4206 rdev
->ih
.rptr
= rptr
;
4207 WREG32(IH_RB_RPTR
, rdev
->ih
.rptr
);
4208 atomic_set(&rdev
->ih
.lock
, 0);
4210 /* make sure wptr hasn't changed while processing */
4211 wptr
= si_get_ih_wptr(rdev
);
4219 * si_copy_dma - copy pages using the DMA engine
4221 * @rdev: radeon_device pointer
4222 * @src_offset: src GPU address
4223 * @dst_offset: dst GPU address
4224 * @num_gpu_pages: number of GPU pages to xfer
4225 * @fence: radeon fence object
4227 * Copy GPU paging using the DMA engine (SI).
4228 * Used by the radeon ttm implementation to move pages if
4229 * registered as the asic copy callback.
4231 int si_copy_dma(struct radeon_device
*rdev
,
4232 uint64_t src_offset
, uint64_t dst_offset
,
4233 unsigned num_gpu_pages
,
4234 struct radeon_fence
**fence
)
4236 struct radeon_semaphore
*sem
= NULL
;
4237 int ring_index
= rdev
->asic
->copy
.dma_ring_index
;
4238 struct radeon_ring
*ring
= &rdev
->ring
[ring_index
];
4239 u32 size_in_bytes
, cur_size_in_bytes
;
4243 r
= radeon_semaphore_create(rdev
, &sem
);
4245 DRM_ERROR("radeon: moving bo (%d).\n", r
);
4249 size_in_bytes
= (num_gpu_pages
<< RADEON_GPU_PAGE_SHIFT
);
4250 num_loops
= DIV_ROUND_UP(size_in_bytes
, 0xfffff);
4251 r
= radeon_ring_lock(rdev
, ring
, num_loops
* 5 + 11);
4253 DRM_ERROR("radeon: moving bo (%d).\n", r
);
4254 radeon_semaphore_free(rdev
, &sem
, NULL
);
4258 if (radeon_fence_need_sync(*fence
, ring
->idx
)) {
4259 radeon_semaphore_sync_rings(rdev
, sem
, (*fence
)->ring
,
4261 radeon_fence_note_sync(*fence
, ring
->idx
);
4263 radeon_semaphore_free(rdev
, &sem
, NULL
);
4266 for (i
= 0; i
< num_loops
; i
++) {
4267 cur_size_in_bytes
= size_in_bytes
;
4268 if (cur_size_in_bytes
> 0xFFFFF)
4269 cur_size_in_bytes
= 0xFFFFF;
4270 size_in_bytes
-= cur_size_in_bytes
;
4271 radeon_ring_write(ring
, DMA_PACKET(DMA_PACKET_COPY
, 1, 0, 0, cur_size_in_bytes
));
4272 radeon_ring_write(ring
, dst_offset
& 0xffffffff);
4273 radeon_ring_write(ring
, src_offset
& 0xffffffff);
4274 radeon_ring_write(ring
, upper_32_bits(dst_offset
) & 0xff);
4275 radeon_ring_write(ring
, upper_32_bits(src_offset
) & 0xff);
4276 src_offset
+= cur_size_in_bytes
;
4277 dst_offset
+= cur_size_in_bytes
;
4280 r
= radeon_fence_emit(rdev
, fence
, ring
->idx
);
4282 radeon_ring_unlock_undo(rdev
, ring
);
4286 radeon_ring_unlock_commit(rdev
, ring
);
4287 radeon_semaphore_free(rdev
, &sem
, *fence
);
4293 * startup/shutdown callbacks
4295 static int si_startup(struct radeon_device
*rdev
)
4297 struct radeon_ring
*ring
;
4300 if (!rdev
->me_fw
|| !rdev
->pfp_fw
|| !rdev
->ce_fw
||
4301 !rdev
->rlc_fw
|| !rdev
->mc_fw
) {
4302 r
= si_init_microcode(rdev
);
4304 DRM_ERROR("Failed to load firmware!\n");
4309 r
= si_mc_load_microcode(rdev
);
4311 DRM_ERROR("Failed to load MC firmware!\n");
4315 r
= r600_vram_scratch_init(rdev
);
4319 si_mc_program(rdev
);
4320 r
= si_pcie_gart_enable(rdev
);
4326 r
= evergreen_blit_init(rdev
);
4328 r600_blit_fini(rdev
);
4329 rdev
->asic
->copy
= NULL
;
4330 dev_warn(rdev
->dev
, "failed blitter (%d) falling back to memcpy\n", r
);
4333 /* allocate rlc buffers */
4334 r
= si_rlc_init(rdev
);
4336 DRM_ERROR("Failed to init rlc BOs!\n");
4340 /* allocate wb buffer */
4341 r
= radeon_wb_init(rdev
);
4345 r
= radeon_fence_driver_start_ring(rdev
, RADEON_RING_TYPE_GFX_INDEX
);
4347 dev_err(rdev
->dev
, "failed initializing CP fences (%d).\n", r
);
4351 r
= radeon_fence_driver_start_ring(rdev
, CAYMAN_RING_TYPE_CP1_INDEX
);
4353 dev_err(rdev
->dev
, "failed initializing CP fences (%d).\n", r
);
4357 r
= radeon_fence_driver_start_ring(rdev
, CAYMAN_RING_TYPE_CP2_INDEX
);
4359 dev_err(rdev
->dev
, "failed initializing CP fences (%d).\n", r
);
4363 r
= radeon_fence_driver_start_ring(rdev
, R600_RING_TYPE_DMA_INDEX
);
4365 dev_err(rdev
->dev
, "failed initializing DMA fences (%d).\n", r
);
4369 r
= radeon_fence_driver_start_ring(rdev
, CAYMAN_RING_TYPE_DMA1_INDEX
);
4371 dev_err(rdev
->dev
, "failed initializing DMA fences (%d).\n", r
);
4376 r
= si_irq_init(rdev
);
4378 DRM_ERROR("radeon: IH init failed (%d).\n", r
);
4379 radeon_irq_kms_fini(rdev
);
4384 ring
= &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
];
4385 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
, RADEON_WB_CP_RPTR_OFFSET
,
4386 CP_RB0_RPTR
, CP_RB0_WPTR
,
4387 0, 0xfffff, RADEON_CP_PACKET2
);
4391 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP1_INDEX
];
4392 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
, RADEON_WB_CP1_RPTR_OFFSET
,
4393 CP_RB1_RPTR
, CP_RB1_WPTR
,
4394 0, 0xfffff, RADEON_CP_PACKET2
);
4398 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP2_INDEX
];
4399 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
, RADEON_WB_CP2_RPTR_OFFSET
,
4400 CP_RB2_RPTR
, CP_RB2_WPTR
,
4401 0, 0xfffff, RADEON_CP_PACKET2
);
4405 ring
= &rdev
->ring
[R600_RING_TYPE_DMA_INDEX
];
4406 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
, R600_WB_DMA_RPTR_OFFSET
,
4407 DMA_RB_RPTR
+ DMA0_REGISTER_OFFSET
,
4408 DMA_RB_WPTR
+ DMA0_REGISTER_OFFSET
,
4409 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP
, 0, 0, 0, 0));
4413 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_DMA1_INDEX
];
4414 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
, CAYMAN_WB_DMA1_RPTR_OFFSET
,
4415 DMA_RB_RPTR
+ DMA1_REGISTER_OFFSET
,
4416 DMA_RB_WPTR
+ DMA1_REGISTER_OFFSET
,
4417 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP
, 0, 0, 0, 0));
4421 r
= si_cp_load_microcode(rdev
);
4424 r
= si_cp_resume(rdev
);
4428 r
= cayman_dma_resume(rdev
);
4432 r
= radeon_ib_pool_init(rdev
);
4434 dev_err(rdev
->dev
, "IB initialization failed (%d).\n", r
);
4438 r
= radeon_vm_manager_init(rdev
);
4440 dev_err(rdev
->dev
, "vm manager initialization failed (%d).\n", r
);
4447 int si_resume(struct radeon_device
*rdev
)
4451 /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
4452 * posting will perform necessary task to bring back GPU into good
4456 atom_asic_init(rdev
->mode_info
.atom_context
);
4458 rdev
->accel_working
= true;
4459 r
= si_startup(rdev
);
4461 DRM_ERROR("si startup failed on resume\n");
4462 rdev
->accel_working
= false;
4470 int si_suspend(struct radeon_device
*rdev
)
4472 radeon_vm_manager_fini(rdev
);
4473 si_cp_enable(rdev
, false);
4474 cayman_dma_stop(rdev
);
4475 si_irq_suspend(rdev
);
4476 radeon_wb_disable(rdev
);
4477 si_pcie_gart_disable(rdev
);
4481 /* Plan is to move initialization in that function and use
4482 * helper function so that radeon_device_init pretty much
4483 * do nothing more than calling asic specific function. This
4484 * should also allow to remove a bunch of callback function
4487 int si_init(struct radeon_device
*rdev
)
4489 struct radeon_ring
*ring
= &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
];
4493 if (!radeon_get_bios(rdev
)) {
4494 if (ASIC_IS_AVIVO(rdev
))
4497 /* Must be an ATOMBIOS */
4498 if (!rdev
->is_atom_bios
) {
4499 dev_err(rdev
->dev
, "Expecting atombios for cayman GPU\n");
4502 r
= radeon_atombios_init(rdev
);
4506 /* Post card if necessary */
4507 if (!radeon_card_posted(rdev
)) {
4509 dev_err(rdev
->dev
, "Card not posted and no BIOS - ignoring\n");
4512 DRM_INFO("GPU not posted. posting now...\n");
4513 atom_asic_init(rdev
->mode_info
.atom_context
);
4515 /* Initialize scratch registers */
4516 si_scratch_init(rdev
);
4517 /* Initialize surface registers */
4518 radeon_surface_init(rdev
);
4519 /* Initialize clocks */
4520 radeon_get_clock_info(rdev
->ddev
);
4523 r
= radeon_fence_driver_init(rdev
);
4527 /* initialize memory controller */
4528 r
= si_mc_init(rdev
);
4531 /* Memory manager */
4532 r
= radeon_bo_init(rdev
);
4536 r
= radeon_irq_kms_init(rdev
);
4540 ring
= &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
];
4541 ring
->ring_obj
= NULL
;
4542 r600_ring_init(rdev
, ring
, 1024 * 1024);
4544 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP1_INDEX
];
4545 ring
->ring_obj
= NULL
;
4546 r600_ring_init(rdev
, ring
, 1024 * 1024);
4548 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP2_INDEX
];
4549 ring
->ring_obj
= NULL
;
4550 r600_ring_init(rdev
, ring
, 1024 * 1024);
4552 ring
= &rdev
->ring
[R600_RING_TYPE_DMA_INDEX
];
4553 ring
->ring_obj
= NULL
;
4554 r600_ring_init(rdev
, ring
, 64 * 1024);
4556 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_DMA1_INDEX
];
4557 ring
->ring_obj
= NULL
;
4558 r600_ring_init(rdev
, ring
, 64 * 1024);
4560 rdev
->ih
.ring_obj
= NULL
;
4561 r600_ih_ring_init(rdev
, 64 * 1024);
4563 r
= r600_pcie_gart_init(rdev
);
4567 rdev
->accel_working
= true;
4568 r
= si_startup(rdev
);
4570 dev_err(rdev
->dev
, "disabling GPU acceleration\n");
4572 cayman_dma_fini(rdev
);
4575 radeon_wb_fini(rdev
);
4576 radeon_ib_pool_fini(rdev
);
4577 radeon_vm_manager_fini(rdev
);
4578 radeon_irq_kms_fini(rdev
);
4579 si_pcie_gart_fini(rdev
);
4580 rdev
->accel_working
= false;
4583 /* Don't start up if the MC ucode is missing.
4584 * The default clocks and voltages before the MC ucode
4585 * is loaded are not suffient for advanced operations.
4588 DRM_ERROR("radeon: MC ucode required for NI+.\n");
4595 void si_fini(struct radeon_device
*rdev
)
4598 r600_blit_fini(rdev
);
4601 cayman_dma_fini(rdev
);
4604 radeon_wb_fini(rdev
);
4605 radeon_vm_manager_fini(rdev
);
4606 radeon_ib_pool_fini(rdev
);
4607 radeon_irq_kms_fini(rdev
);
4608 si_pcie_gart_fini(rdev
);
4609 r600_vram_scratch_fini(rdev
);
4610 radeon_gem_fini(rdev
);
4611 radeon_fence_driver_fini(rdev
);
4612 radeon_bo_fini(rdev
);
4613 radeon_atombios_fini(rdev
);
4619 * si_get_gpu_clock_counter - return GPU clock counter snapshot
4621 * @rdev: radeon_device pointer
4623 * Fetches a GPU clock counter snapshot (SI).
4624 * Returns the 64 bit clock counter snapshot.
4626 uint64_t si_get_gpu_clock_counter(struct radeon_device
*rdev
)
4630 mutex_lock(&rdev
->gpu_clock_mutex
);
4631 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT
, 1);
4632 clock
= (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB
) |
4633 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB
) << 32ULL);
4634 mutex_unlock(&rdev
->gpu_clock_mutex
);