From: Alex Deucher <alexdeucher@gmail.com>
Date: Wed, 25 May 2011 20:39:00 +0000 (-0400)
Subject: drm/radeon/kms: add blit support for cayman (v2)
X-Git-Url: https://git.stricted.de/?a=commitdiff_plain;h=cb92d452ba665205ad6bfb424c0ef009cf26587d;p=GitHub%2FLineageOS%2FG12%2Fandroid_kernel_amlogic_linux-4.9.git

drm/radeon/kms: add blit support for cayman (v2)

Allows us to use the 3D engine for memory management
and allows us to use vram beyond the BAR aperture.

v2: fix copy paste typo
Reported-by: Nils Wallménius <nils.wallmenius@gmail.com>

Signed-off-by: Alex Deucher <alexdeucher@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---

diff --git a/drivers/gpu/drm/radeon/cayman_blit_shaders.c b/drivers/gpu/drm/radeon/cayman_blit_shaders.c
index e148ab04b80b..7b4eeb7b4a8c 100644
--- a/drivers/gpu/drm/radeon/cayman_blit_shaders.c
+++ b/drivers/gpu/drm/radeon/cayman_blit_shaders.c
@@ -39,17 +39,335 @@
 
 const u32 cayman_default_state[] =
 {
-	/* XXX fill in additional blit state */
+	0xc0066900,
+	0x00000000,
+	0x00000060, /* DB_RENDER_CONTROL */
+	0x00000000, /* DB_COUNT_CONTROL */
+	0x00000000, /* DB_DEPTH_VIEW */
+	0x0000002a, /* DB_RENDER_OVERRIDE */
+	0x00000000, /* DB_RENDER_OVERRIDE2 */
+	0x00000000, /* DB_HTILE_DATA_BASE */
 
 	0xc0026900,
-	0x00000316,
-	0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */
-	0x00000010, /*  */
+	0x0000000a,
+	0x00000000, /* DB_STENCIL_CLEAR */
+	0x00000000, /* DB_DEPTH_CLEAR */
+
+	0xc0036900,
+	0x0000000f,
+	0x00000000, /* DB_DEPTH_INFO */
+	0x00000000, /* DB_Z_INFO */
+	0x00000000, /* DB_STENCIL_INFO */
+
+	0xc0016900,
+	0x00000080,
+	0x00000000, /* PA_SC_WINDOW_OFFSET */
+
+	0xc00d6900,
+	0x00000083,
+	0x0000ffff, /* PA_SC_CLIPRECT_RULE */
+	0x00000000, /* PA_SC_CLIPRECT_0_TL */
+	0x20002000, /* PA_SC_CLIPRECT_0_BR */
+	0x00000000,
+	0x20002000,
+	0x00000000,
+	0x20002000,
+	0x00000000,
+	0x20002000,
+	0xaaaaaaaa, /* PA_SC_EDGERULE */
+	0x00000000, /* PA_SU_HARDWARE_SCREEN_OFFSET */
+	0x0000000f, /* CB_TARGET_MASK */
+	0x0000000f, /* CB_SHADER_MASK */
+
+	0xc0226900,
+	0x00000094,
+	0x80000000, /* PA_SC_VPORT_SCISSOR_0_TL */
+	0x20002000, /* PA_SC_VPORT_SCISSOR_0_BR */
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x80000000,
+	0x20002000,
+	0x00000000, /* PA_SC_VPORT_ZMIN_0 */
+	0x3f800000, /* PA_SC_VPORT_ZMAX_0 */
+
+	0xc0016900,
+	0x000000d4,
+	0x00000000, /* SX_MISC */
 
 	0xc0026900,
 	0x000000d9,
 	0x00000000, /* CP_RINGID */
 	0x00000000, /* CP_VMID */
+
+	0xc0096900,
+	0x00000100,
+	0x00ffffff, /* VGT_MAX_VTX_INDX */
+	0x00000000, /* VGT_MIN_VTX_INDX */
+	0x00000000, /* VGT_INDX_OFFSET */
+	0x00000000, /* VGT_MULTI_PRIM_IB_RESET_INDX */
+	0x00000000, /* SX_ALPHA_TEST_CONTROL */
+	0x00000000, /* CB_BLEND_RED */
+	0x00000000, /* CB_BLEND_GREEN */
+	0x00000000, /* CB_BLEND_BLUE */
+	0x00000000, /* CB_BLEND_ALPHA */
+
+	0xc0016900,
+	0x00000187,
+	0x00000100, /* SPI_VS_OUT_ID_0 */
+
+	0xc0026900,
+	0x00000191,
+	0x00000100, /* SPI_PS_INPUT_CNTL_0 */
+	0x00000101, /* SPI_PS_INPUT_CNTL_1 */
+
+	0xc0016900,
+	0x000001b1,
+	0x00000000, /* SPI_VS_OUT_CONFIG */
+
+	0xc0106900,
+	0x000001b3,
+	0x20000001, /* SPI_PS_IN_CONTROL_0 */
+	0x00000000, /* SPI_PS_IN_CONTROL_1 */
+	0x00000000, /* SPI_INTERP_CONTROL_0 */
+	0x00000000, /* SPI_INPUT_Z */
+	0x00000000, /* SPI_FOG_CNTL */
+	0x00100000, /* SPI_BARYC_CNTL */
+	0x00000000, /* SPI_PS_IN_CONTROL_2 */
+	0x00000000, /* SPI_COMPUTE_INPUT_CNTL */
+	0x00000000, /* SPI_COMPUTE_NUM_THREAD_X */
+	0x00000000, /* SPI_COMPUTE_NUM_THREAD_Y */
+	0x00000000, /* SPI_COMPUTE_NUM_THREAD_Z */
+	0x00000000, /* SPI_GPR_MGMT */
+	0x00000000, /* SPI_LDS_MGMT */
+	0x00000000, /* SPI_STACK_MGMT */
+	0x00000000, /* SPI_WAVE_MGMT_1 */
+	0x00000000, /* SPI_WAVE_MGMT_2 */
+
+	0xc0016900,
+	0x000001e0,
+	0x00000000, /* CB_BLEND0_CONTROL */
+
+	0xc00e6900,
+	0x00000200,
+	0x00000000, /* DB_DEPTH_CONTROL */
+	0x00000000, /* DB_EQAA */
+	0x00cc0010, /* CB_COLOR_CONTROL */
+	0x00000210, /* DB_SHADER_CONTROL */
+	0x00010000, /* PA_CL_CLIP_CNTL */
+	0x00000004, /* PA_SU_SC_MODE_CNTL */
+	0x00000100, /* PA_CL_VTE_CNTL */
+	0x00000000, /* PA_CL_VS_OUT_CNTL */
+	0x00000000, /* PA_CL_NANINF_CNTL */
+	0x00000000, /* PA_SU_LINE_STIPPLE_CNTL */
+	0x00000000, /* PA_SU_LINE_STIPPLE_SCALE */
+	0x00000000, /* PA_SU_PRIM_FILTER_CNTL */
+	0x00000000, /*  */
+	0x00000000, /*  */
+
+	0xc0026900,
+	0x00000229,
+	0x00000000, /* SQ_PGM_START_FS */
+	0x00000000,
+
+	0xc0016900,
+	0x0000023b,
+	0x00000000, /* SQ_LDS_ALLOC_PS */
+
+	0xc0066900,
+	0x00000240,
+	0x00000000, /* SQ_ESGS_RING_ITEMSIZE */
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+
+	0xc0046900,
+	0x00000247,
+	0x00000000, /* SQ_GS_VERT_ITEMSIZE */
+	0x00000000,
+	0x00000000,
+	0x00000000,
+
+	0xc0116900,
+	0x00000280,
+	0x00000000, /* PA_SU_POINT_SIZE */
+	0x00000000, /* PA_SU_POINT_MINMAX */
+	0x00000008, /* PA_SU_LINE_CNTL */
+	0x00000000, /* PA_SC_LINE_STIPPLE */
+	0x00000000, /* VGT_OUTPUT_PATH_CNTL */
+	0x00000000, /* VGT_HOS_CNTL */
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000, /* VGT_GS_MODE */
+
+	0xc0026900,
+	0x00000292,
+	0x00000000, /* PA_SC_MODE_CNTL_0 */
+	0x00000000, /* PA_SC_MODE_CNTL_1 */
+
+	0xc0016900,
+	0x000002a1,
+	0x00000000, /* VGT_PRIMITIVEID_EN */
+
+	0xc0016900,
+	0x000002a5,
+	0x00000000, /* VGT_MULTI_PRIM_IB_RESET_EN */
+
+	0xc0026900,
+	0x000002a8,
+	0x00000000, /* VGT_INSTANCE_STEP_RATE_0 */
+	0x00000000,
+
+	0xc0026900,
+	0x000002ad,
+	0x00000000, /* VGT_REUSE_OFF */
+	0x00000000,
+
+	0xc0016900,
+	0x000002d5,
+	0x00000000, /* VGT_SHADER_STAGES_EN */
+
+	0xc0016900,
+	0x000002dc,
+	0x0000aa00, /* DB_ALPHA_TO_MASK */
+
+	0xc0066900,
+	0x000002de,
+	0x00000000, /* PA_SU_POLY_OFFSET_DB_FMT_CNTL */
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+
+	0xc0026900,
+	0x000002e5,
+	0x00000000, /* VGT_STRMOUT_CONFIG */
+	0x00000000,
+
+	0xc01b6900,
+	0x000002f5,
+	0x76543210, /* PA_SC_CENTROID_PRIORITY_0 */
+	0xfedcba98, /* PA_SC_CENTROID_PRIORITY_1 */
+	0x00000000, /* PA_SC_LINE_CNTL */
+	0x00000000, /* PA_SC_AA_CONFIG */
+	0x00000005, /* PA_SU_VTX_CNTL */
+	0x3f800000, /* PA_CL_GB_VERT_CLIP_ADJ */
+	0x3f800000, /* PA_CL_GB_VERT_DISC_ADJ */
+	0x3f800000, /* PA_CL_GB_HORZ_CLIP_ADJ */
+	0x3f800000, /* PA_CL_GB_HORZ_DISC_ADJ */
+	0x00000000, /* PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0xffffffff, /* PA_SC_AA_MASK_X0Y0_X1Y0 */
+	0xffffffff,
+
+	0xc0026900,
+	0x00000316,
+	0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */
+	0x00000010, /*  */
+};
+
+const u32 cayman_vs[] =
+{
+	0x00000004,
+	0x80400400,
+	0x0000a03c,
+	0x95000688,
+	0x00004000,
+	0x15000688,
+	0x00000000,
+	0x88000000,
+	0x04000000,
+	0x67961001,
+#ifdef __BIG_ENDIAN
+	0x00020000,
+#else
+	0x00000000,
+#endif
+	0x00000000,
+	0x04000000,
+	0x67961000,
+#ifdef __BIG_ENDIAN
+	0x00020008,
+#else
+	0x00000008,
+#endif
+	0x00000000,
+};
+
+const u32 cayman_ps[] =
+{
+	0x00000004,
+	0xa00c0000,
+	0x00000008,
+	0x80400000,
+	0x00000000,
+	0x95000688,
+	0x00000000,
+	0x88000000,
+	0x00380400,
+	0x00146b10,
+	0x00380000,
+	0x20146b10,
+	0x00380400,
+	0x40146b00,
+	0x80380000,
+	0x60146b00,
+	0x00000010,
+	0x000d1000,
+	0xb0800000,
+	0x00000000,
 };
 
+const u32 cayman_ps_size = ARRAY_SIZE(cayman_ps);
+const u32 cayman_vs_size = ARRAY_SIZE(cayman_vs);
 const u32 cayman_default_size = ARRAY_SIZE(cayman_default_state);
diff --git a/drivers/gpu/drm/radeon/cayman_blit_shaders.h b/drivers/gpu/drm/radeon/cayman_blit_shaders.h
index 33b75e5d0fa4..f5d0e9a60267 100644
--- a/drivers/gpu/drm/radeon/cayman_blit_shaders.h
+++ b/drivers/gpu/drm/radeon/cayman_blit_shaders.h
@@ -25,8 +25,11 @@
 #ifndef CAYMAN_BLIT_SHADERS_H
 #define CAYMAN_BLIT_SHADERS_H
 
+extern const u32 cayman_ps[];
+extern const u32 cayman_vs[];
 extern const u32 cayman_default_state[];
 
+extern const u32 cayman_ps_size, cayman_vs_size;
 extern const u32 cayman_default_size;
 
 #endif
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
index 40867290863c..a60ad28b0389 100644
--- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c
+++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
@@ -31,6 +31,7 @@
 
 #include "evergreend.h"
 #include "evergreen_blit_shaders.h"
+#include "cayman_blit_shaders.h"
 
 #define DI_PT_RECTLIST        0x11
 #define DI_INDEX_SIZE_16_BIT  0x0
@@ -265,238 +266,240 @@ set_default_state(struct radeon_device *rdev)
 	u64 gpu_addr;
 	int dwords;
 
-	switch (rdev->family) {
-	case CHIP_CEDAR:
-	default:
-		num_ps_gprs = 93;
-		num_vs_gprs = 46;
-		num_temp_gprs = 4;
-		num_gs_gprs = 31;
-		num_es_gprs = 31;
-		num_hs_gprs = 23;
-		num_ls_gprs = 23;
-		num_ps_threads = 96;
-		num_vs_threads = 16;
-		num_gs_threads = 16;
-		num_es_threads = 16;
-		num_hs_threads = 16;
-		num_ls_threads = 16;
-		num_ps_stack_entries = 42;
-		num_vs_stack_entries = 42;
-		num_gs_stack_entries = 42;
-		num_es_stack_entries = 42;
-		num_hs_stack_entries = 42;
-		num_ls_stack_entries = 42;
-		break;
-	case CHIP_REDWOOD:
-		num_ps_gprs = 93;
-		num_vs_gprs = 46;
-		num_temp_gprs = 4;
-		num_gs_gprs = 31;
-		num_es_gprs = 31;
-		num_hs_gprs = 23;
-		num_ls_gprs = 23;
-		num_ps_threads = 128;
-		num_vs_threads = 20;
-		num_gs_threads = 20;
-		num_es_threads = 20;
-		num_hs_threads = 20;
-		num_ls_threads = 20;
-		num_ps_stack_entries = 42;
-		num_vs_stack_entries = 42;
-		num_gs_stack_entries = 42;
-		num_es_stack_entries = 42;
-		num_hs_stack_entries = 42;
-		num_ls_stack_entries = 42;
-		break;
-	case CHIP_JUNIPER:
-		num_ps_gprs = 93;
-		num_vs_gprs = 46;
-		num_temp_gprs = 4;
-		num_gs_gprs = 31;
-		num_es_gprs = 31;
-		num_hs_gprs = 23;
-		num_ls_gprs = 23;
-		num_ps_threads = 128;
-		num_vs_threads = 20;
-		num_gs_threads = 20;
-		num_es_threads = 20;
-		num_hs_threads = 20;
-		num_ls_threads = 20;
-		num_ps_stack_entries = 85;
-		num_vs_stack_entries = 85;
-		num_gs_stack_entries = 85;
-		num_es_stack_entries = 85;
-		num_hs_stack_entries = 85;
-		num_ls_stack_entries = 85;
-		break;
-	case CHIP_CYPRESS:
-	case CHIP_HEMLOCK:
-		num_ps_gprs = 93;
-		num_vs_gprs = 46;
-		num_temp_gprs = 4;
-		num_gs_gprs = 31;
-		num_es_gprs = 31;
-		num_hs_gprs = 23;
-		num_ls_gprs = 23;
-		num_ps_threads = 128;
-		num_vs_threads = 20;
-		num_gs_threads = 20;
-		num_es_threads = 20;
-		num_hs_threads = 20;
-		num_ls_threads = 20;
-		num_ps_stack_entries = 85;
-		num_vs_stack_entries = 85;
-		num_gs_stack_entries = 85;
-		num_es_stack_entries = 85;
-		num_hs_stack_entries = 85;
-		num_ls_stack_entries = 85;
-		break;
-	case CHIP_PALM:
-		num_ps_gprs = 93;
-		num_vs_gprs = 46;
-		num_temp_gprs = 4;
-		num_gs_gprs = 31;
-		num_es_gprs = 31;
-		num_hs_gprs = 23;
-		num_ls_gprs = 23;
-		num_ps_threads = 96;
-		num_vs_threads = 16;
-		num_gs_threads = 16;
-		num_es_threads = 16;
-		num_hs_threads = 16;
-		num_ls_threads = 16;
-		num_ps_stack_entries = 42;
-		num_vs_stack_entries = 42;
-		num_gs_stack_entries = 42;
-		num_es_stack_entries = 42;
-		num_hs_stack_entries = 42;
-		num_ls_stack_entries = 42;
-		break;
-	case CHIP_BARTS:
-		num_ps_gprs = 93;
-		num_vs_gprs = 46;
-		num_temp_gprs = 4;
-		num_gs_gprs = 31;
-		num_es_gprs = 31;
-		num_hs_gprs = 23;
-		num_ls_gprs = 23;
-		num_ps_threads = 128;
-		num_vs_threads = 20;
-		num_gs_threads = 20;
-		num_es_threads = 20;
-		num_hs_threads = 20;
-		num_ls_threads = 20;
-		num_ps_stack_entries = 85;
-		num_vs_stack_entries = 85;
-		num_gs_stack_entries = 85;
-		num_es_stack_entries = 85;
-		num_hs_stack_entries = 85;
-		num_ls_stack_entries = 85;
-		break;
-	case CHIP_TURKS:
-		num_ps_gprs = 93;
-		num_vs_gprs = 46;
-		num_temp_gprs = 4;
-		num_gs_gprs = 31;
-		num_es_gprs = 31;
-		num_hs_gprs = 23;
-		num_ls_gprs = 23;
-		num_ps_threads = 128;
-		num_vs_threads = 20;
-		num_gs_threads = 20;
-		num_es_threads = 20;
-		num_hs_threads = 20;
-		num_ls_threads = 20;
-		num_ps_stack_entries = 42;
-		num_vs_stack_entries = 42;
-		num_gs_stack_entries = 42;
-		num_es_stack_entries = 42;
-		num_hs_stack_entries = 42;
-		num_ls_stack_entries = 42;
-		break;
-	case CHIP_CAICOS:
-		num_ps_gprs = 93;
-		num_vs_gprs = 46;
-		num_temp_gprs = 4;
-		num_gs_gprs = 31;
-		num_es_gprs = 31;
-		num_hs_gprs = 23;
-		num_ls_gprs = 23;
-		num_ps_threads = 128;
-		num_vs_threads = 10;
-		num_gs_threads = 10;
-		num_es_threads = 10;
-		num_hs_threads = 10;
-		num_ls_threads = 10;
-		num_ps_stack_entries = 42;
-		num_vs_stack_entries = 42;
-		num_gs_stack_entries = 42;
-		num_es_stack_entries = 42;
-		num_hs_stack_entries = 42;
-		num_ls_stack_entries = 42;
-		break;
-	}
-
-	if ((rdev->family == CHIP_CEDAR) ||
-	    (rdev->family == CHIP_PALM) ||
-	    (rdev->family == CHIP_CAICOS))
-		sq_config = 0;
-	else
-		sq_config = VC_ENABLE;
-
-	sq_config |= (EXPORT_SRC_C |
-		      CS_PRIO(0) |
-		      LS_PRIO(0) |
-		      HS_PRIO(0) |
-		      PS_PRIO(0) |
-		      VS_PRIO(1) |
-		      GS_PRIO(2) |
-		      ES_PRIO(3));
-
-	sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) |
-				  NUM_VS_GPRS(num_vs_gprs) |
-				  NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
-	sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) |
-				  NUM_ES_GPRS(num_es_gprs));
-	sq_gpr_resource_mgmt_3 = (NUM_HS_GPRS(num_hs_gprs) |
-				  NUM_LS_GPRS(num_ls_gprs));
-	sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) |
-				   NUM_VS_THREADS(num_vs_threads) |
-				   NUM_GS_THREADS(num_gs_threads) |
-				   NUM_ES_THREADS(num_es_threads));
-	sq_thread_resource_mgmt_2 = (NUM_HS_THREADS(num_hs_threads) |
-				     NUM_LS_THREADS(num_ls_threads));
-	sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
-				    NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
-	sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
-				    NUM_ES_STACK_ENTRIES(num_es_stack_entries));
-	sq_stack_resource_mgmt_3 = (NUM_HS_STACK_ENTRIES(num_hs_stack_entries) |
-				    NUM_LS_STACK_ENTRIES(num_ls_stack_entries));
-
 	/* set clear context state */
 	radeon_ring_write(rdev, PACKET3(PACKET3_CLEAR_STATE, 0));
 	radeon_ring_write(rdev, 0);
 
-	/* disable dyn gprs */
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
-	radeon_ring_write(rdev, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - PACKET3_SET_CONFIG_REG_START) >> 2);
-	radeon_ring_write(rdev, 0);
+	if (rdev->family < CHIP_CAYMAN) {
+		switch (rdev->family) {
+		case CHIP_CEDAR:
+		default:
+			num_ps_gprs = 93;
+			num_vs_gprs = 46;
+			num_temp_gprs = 4;
+			num_gs_gprs = 31;
+			num_es_gprs = 31;
+			num_hs_gprs = 23;
+			num_ls_gprs = 23;
+			num_ps_threads = 96;
+			num_vs_threads = 16;
+			num_gs_threads = 16;
+			num_es_threads = 16;
+			num_hs_threads = 16;
+			num_ls_threads = 16;
+			num_ps_stack_entries = 42;
+			num_vs_stack_entries = 42;
+			num_gs_stack_entries = 42;
+			num_es_stack_entries = 42;
+			num_hs_stack_entries = 42;
+			num_ls_stack_entries = 42;
+			break;
+		case CHIP_REDWOOD:
+			num_ps_gprs = 93;
+			num_vs_gprs = 46;
+			num_temp_gprs = 4;
+			num_gs_gprs = 31;
+			num_es_gprs = 31;
+			num_hs_gprs = 23;
+			num_ls_gprs = 23;
+			num_ps_threads = 128;
+			num_vs_threads = 20;
+			num_gs_threads = 20;
+			num_es_threads = 20;
+			num_hs_threads = 20;
+			num_ls_threads = 20;
+			num_ps_stack_entries = 42;
+			num_vs_stack_entries = 42;
+			num_gs_stack_entries = 42;
+			num_es_stack_entries = 42;
+			num_hs_stack_entries = 42;
+			num_ls_stack_entries = 42;
+			break;
+		case CHIP_JUNIPER:
+			num_ps_gprs = 93;
+			num_vs_gprs = 46;
+			num_temp_gprs = 4;
+			num_gs_gprs = 31;
+			num_es_gprs = 31;
+			num_hs_gprs = 23;
+			num_ls_gprs = 23;
+			num_ps_threads = 128;
+			num_vs_threads = 20;
+			num_gs_threads = 20;
+			num_es_threads = 20;
+			num_hs_threads = 20;
+			num_ls_threads = 20;
+			num_ps_stack_entries = 85;
+			num_vs_stack_entries = 85;
+			num_gs_stack_entries = 85;
+			num_es_stack_entries = 85;
+			num_hs_stack_entries = 85;
+			num_ls_stack_entries = 85;
+			break;
+		case CHIP_CYPRESS:
+		case CHIP_HEMLOCK:
+			num_ps_gprs = 93;
+			num_vs_gprs = 46;
+			num_temp_gprs = 4;
+			num_gs_gprs = 31;
+			num_es_gprs = 31;
+			num_hs_gprs = 23;
+			num_ls_gprs = 23;
+			num_ps_threads = 128;
+			num_vs_threads = 20;
+			num_gs_threads = 20;
+			num_es_threads = 20;
+			num_hs_threads = 20;
+			num_ls_threads = 20;
+			num_ps_stack_entries = 85;
+			num_vs_stack_entries = 85;
+			num_gs_stack_entries = 85;
+			num_es_stack_entries = 85;
+			num_hs_stack_entries = 85;
+			num_ls_stack_entries = 85;
+			break;
+		case CHIP_PALM:
+			num_ps_gprs = 93;
+			num_vs_gprs = 46;
+			num_temp_gprs = 4;
+			num_gs_gprs = 31;
+			num_es_gprs = 31;
+			num_hs_gprs = 23;
+			num_ls_gprs = 23;
+			num_ps_threads = 96;
+			num_vs_threads = 16;
+			num_gs_threads = 16;
+			num_es_threads = 16;
+			num_hs_threads = 16;
+			num_ls_threads = 16;
+			num_ps_stack_entries = 42;
+			num_vs_stack_entries = 42;
+			num_gs_stack_entries = 42;
+			num_es_stack_entries = 42;
+			num_hs_stack_entries = 42;
+			num_ls_stack_entries = 42;
+			break;
+		case CHIP_BARTS:
+			num_ps_gprs = 93;
+			num_vs_gprs = 46;
+			num_temp_gprs = 4;
+			num_gs_gprs = 31;
+			num_es_gprs = 31;
+			num_hs_gprs = 23;
+			num_ls_gprs = 23;
+			num_ps_threads = 128;
+			num_vs_threads = 20;
+			num_gs_threads = 20;
+			num_es_threads = 20;
+			num_hs_threads = 20;
+			num_ls_threads = 20;
+			num_ps_stack_entries = 85;
+			num_vs_stack_entries = 85;
+			num_gs_stack_entries = 85;
+			num_es_stack_entries = 85;
+			num_hs_stack_entries = 85;
+			num_ls_stack_entries = 85;
+			break;
+		case CHIP_TURKS:
+			num_ps_gprs = 93;
+			num_vs_gprs = 46;
+			num_temp_gprs = 4;
+			num_gs_gprs = 31;
+			num_es_gprs = 31;
+			num_hs_gprs = 23;
+			num_ls_gprs = 23;
+			num_ps_threads = 128;
+			num_vs_threads = 20;
+			num_gs_threads = 20;
+			num_es_threads = 20;
+			num_hs_threads = 20;
+			num_ls_threads = 20;
+			num_ps_stack_entries = 42;
+			num_vs_stack_entries = 42;
+			num_gs_stack_entries = 42;
+			num_es_stack_entries = 42;
+			num_hs_stack_entries = 42;
+			num_ls_stack_entries = 42;
+			break;
+		case CHIP_CAICOS:
+			num_ps_gprs = 93;
+			num_vs_gprs = 46;
+			num_temp_gprs = 4;
+			num_gs_gprs = 31;
+			num_es_gprs = 31;
+			num_hs_gprs = 23;
+			num_ls_gprs = 23;
+			num_ps_threads = 128;
+			num_vs_threads = 10;
+			num_gs_threads = 10;
+			num_es_threads = 10;
+			num_hs_threads = 10;
+			num_ls_threads = 10;
+			num_ps_stack_entries = 42;
+			num_vs_stack_entries = 42;
+			num_gs_stack_entries = 42;
+			num_es_stack_entries = 42;
+			num_hs_stack_entries = 42;
+			num_ls_stack_entries = 42;
+			break;
+		}
 
-	/* SQ config */
-	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 11));
-	radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_START) >> 2);
-	radeon_ring_write(rdev, sq_config);
-	radeon_ring_write(rdev, sq_gpr_resource_mgmt_1);
-	radeon_ring_write(rdev, sq_gpr_resource_mgmt_2);
-	radeon_ring_write(rdev, sq_gpr_resource_mgmt_3);
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, sq_thread_resource_mgmt);
-	radeon_ring_write(rdev, sq_thread_resource_mgmt_2);
-	radeon_ring_write(rdev, sq_stack_resource_mgmt_1);
-	radeon_ring_write(rdev, sq_stack_resource_mgmt_2);
-	radeon_ring_write(rdev, sq_stack_resource_mgmt_3);
+		if ((rdev->family == CHIP_CEDAR) ||
+		    (rdev->family == CHIP_PALM) ||
+		    (rdev->family == CHIP_CAICOS))
+			sq_config = 0;
+		else
+			sq_config = VC_ENABLE;
+
+		sq_config |= (EXPORT_SRC_C |
+			      CS_PRIO(0) |
+			      LS_PRIO(0) |
+			      HS_PRIO(0) |
+			      PS_PRIO(0) |
+			      VS_PRIO(1) |
+			      GS_PRIO(2) |
+			      ES_PRIO(3));
+
+		sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) |
+					  NUM_VS_GPRS(num_vs_gprs) |
+					  NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
+		sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) |
+					  NUM_ES_GPRS(num_es_gprs));
+		sq_gpr_resource_mgmt_3 = (NUM_HS_GPRS(num_hs_gprs) |
+					  NUM_LS_GPRS(num_ls_gprs));
+		sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) |
+					   NUM_VS_THREADS(num_vs_threads) |
+					   NUM_GS_THREADS(num_gs_threads) |
+					   NUM_ES_THREADS(num_es_threads));
+		sq_thread_resource_mgmt_2 = (NUM_HS_THREADS(num_hs_threads) |
+					     NUM_LS_THREADS(num_ls_threads));
+		sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
+					    NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
+		sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
+					    NUM_ES_STACK_ENTRIES(num_es_stack_entries));
+		sq_stack_resource_mgmt_3 = (NUM_HS_STACK_ENTRIES(num_hs_stack_entries) |
+					    NUM_LS_STACK_ENTRIES(num_ls_stack_entries));
+
+		/* disable dyn gprs */
+		radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+		radeon_ring_write(rdev, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - PACKET3_SET_CONFIG_REG_START) >> 2);
+		radeon_ring_write(rdev, 0);
+
+		/* SQ config */
+		radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 11));
+		radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_START) >> 2);
+		radeon_ring_write(rdev, sq_config);
+		radeon_ring_write(rdev, sq_gpr_resource_mgmt_1);
+		radeon_ring_write(rdev, sq_gpr_resource_mgmt_2);
+		radeon_ring_write(rdev, sq_gpr_resource_mgmt_3);
+		radeon_ring_write(rdev, 0);
+		radeon_ring_write(rdev, 0);
+		radeon_ring_write(rdev, sq_thread_resource_mgmt);
+		radeon_ring_write(rdev, sq_thread_resource_mgmt_2);
+		radeon_ring_write(rdev, sq_stack_resource_mgmt_1);
+		radeon_ring_write(rdev, sq_stack_resource_mgmt_2);
+		radeon_ring_write(rdev, sq_stack_resource_mgmt_3);
+	}
 
 	/* CONTEXT_CONTROL */
 	radeon_ring_write(rdev, 0xc0012800);
@@ -570,7 +573,10 @@ int evergreen_blit_init(struct radeon_device *rdev)
 	mutex_init(&rdev->r600_blit.mutex);
 	rdev->r600_blit.state_offset = 0;
 
-	rdev->r600_blit.state_len = evergreen_default_size;
+	if (rdev->family < CHIP_CAYMAN)
+		rdev->r600_blit.state_len = evergreen_default_size;
+	else
+		rdev->r600_blit.state_len = cayman_default_size;
 
 	dwords = rdev->r600_blit.state_len;
 	while (dwords & 0xf) {
@@ -582,11 +588,17 @@ int evergreen_blit_init(struct radeon_device *rdev)
 	obj_size = ALIGN(obj_size, 256);
 
 	rdev->r600_blit.vs_offset = obj_size;
-	obj_size += evergreen_vs_size * 4;
+	if (rdev->family < CHIP_CAYMAN)
+		obj_size += evergreen_vs_size * 4;
+	else
+		obj_size += cayman_vs_size * 4;
 	obj_size = ALIGN(obj_size, 256);
 
 	rdev->r600_blit.ps_offset = obj_size;
-	obj_size += evergreen_ps_size * 4;
+	if (rdev->family < CHIP_CAYMAN)
+		obj_size += evergreen_ps_size * 4;
+	else
+		obj_size += cayman_ps_size * 4;
 	obj_size = ALIGN(obj_size, 256);
 
 	r = radeon_bo_create(rdev, obj_size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM,
@@ -609,16 +621,29 @@ int evergreen_blit_init(struct radeon_device *rdev)
 		return r;
 	}
 
-	memcpy_toio(ptr + rdev->r600_blit.state_offset,
-		    evergreen_default_state, rdev->r600_blit.state_len * 4);
-
-	if (num_packet2s)
-		memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4),
-			    packet2s, num_packet2s * 4);
-	for (i = 0; i < evergreen_vs_size; i++)
-		*(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(evergreen_vs[i]);
-	for (i = 0; i < evergreen_ps_size; i++)
-		*(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(evergreen_ps[i]);
+	if (rdev->family < CHIP_CAYMAN) {
+		memcpy_toio(ptr + rdev->r600_blit.state_offset,
+			    evergreen_default_state, rdev->r600_blit.state_len * 4);
+
+		if (num_packet2s)
+			memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4),
+				    packet2s, num_packet2s * 4);
+		for (i = 0; i < evergreen_vs_size; i++)
+			*(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(evergreen_vs[i]);
+		for (i = 0; i < evergreen_ps_size; i++)
+			*(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(evergreen_ps[i]);
+	} else {
+		memcpy_toio(ptr + rdev->r600_blit.state_offset,
+			    cayman_default_state, rdev->r600_blit.state_len * 4);
+
+		if (num_packet2s)
+			memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4),
+				    packet2s, num_packet2s * 4);
+		for (i = 0; i < cayman_vs_size; i++)
+			*(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(cayman_vs[i]);
+		for (i = 0; i < cayman_ps_size; i++)
+			*(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(cayman_ps[i]);
+	}
 	radeon_bo_kunmap(rdev->r600_blit.shader_obj);
 	radeon_bo_unreserve(rdev->r600_blit.shader_obj);
 
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index a0cc7a5ff031..c023b0ad89f4 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1387,14 +1387,12 @@ static int cayman_startup(struct radeon_device *rdev)
 		return r;
 	cayman_gpu_init(rdev);
 
-#if 0
-	r = cayman_blit_init(rdev);
+	r = evergreen_blit_init(rdev);
 	if (r) {
-		cayman_blit_fini(rdev);
+		evergreen_blit_fini(rdev);
 		rdev->asic->copy = NULL;
 		dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
 	}
-#endif
 
 	/* allocate wb buffer */
 	r = radeon_wb_init(rdev);
@@ -1452,7 +1450,7 @@ int cayman_resume(struct radeon_device *rdev)
 
 int cayman_suspend(struct radeon_device *rdev)
 {
-	/* int r; */
+	int r;
 
 	/* FIXME: we should wait for ring to be empty */
 	cayman_cp_enable(rdev, false);
@@ -1461,14 +1459,13 @@ int cayman_suspend(struct radeon_device *rdev)
 	radeon_wb_disable(rdev);
 	cayman_pcie_gart_disable(rdev);
 
-#if 0
 	/* unpin shaders bo */
 	r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
 	if (likely(r == 0)) {
 		radeon_bo_unpin(rdev->r600_blit.shader_obj);
 		radeon_bo_unreserve(rdev->r600_blit.shader_obj);
 	}
-#endif
+
 	return 0;
 }
 
@@ -1580,7 +1577,7 @@ int cayman_init(struct radeon_device *rdev)
 
 void cayman_fini(struct radeon_device *rdev)
 {
-	/* cayman_blit_fini(rdev); */
+	evergreen_blit_fini(rdev);
 	cayman_cp_fini(rdev);
 	r600_irq_fini(rdev);
 	radeon_wb_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index d948265db87e..b9b3c2a2b119 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -906,9 +906,9 @@ static struct radeon_asic cayman_asic = {
 	.get_vblank_counter = &evergreen_get_vblank_counter,
 	.fence_ring_emit = &r600_fence_ring_emit,
 	.cs_parse = &evergreen_cs_parse,
-	.copy_blit = NULL,
-	.copy_dma = NULL,
-	.copy = NULL,
+	.copy_blit = &evergreen_copy_blit,
+	.copy_dma = &evergreen_copy_blit,
+	.copy = &evergreen_copy_blit,
 	.get_engine_clock = &radeon_atom_get_engine_clock,
 	.set_engine_clock = &radeon_atom_set_engine_clock,
 	.get_memory_clock = &radeon_atom_get_memory_clock,