From 7dbf41db327ebcbc0d28a2b39afcbd60664094e6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 17 May 2011 05:09:43 -0400 Subject: [PATCH] drm/radeon/kms: simplify r6xx blit code Covert 4k pages to multiples of 64x64x4 tiles. This is also more efficient than a scanline based approach from the MC's perspective. Signed-off-by: Alex Deucher Signed-off-by: Ilija Hadzic Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/r600.c | 4 +- drivers/gpu/drm/radeon/r600_blit_kms.c | 280 ++++++++++--------------- drivers/gpu/drm/radeon/radeon_asic.h | 4 +- 3 files changed, 111 insertions(+), 177 deletions(-) diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index c4de1610088c..e106f30787fd 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2363,14 +2363,14 @@ int r600_copy_blit(struct radeon_device *rdev, mutex_lock(&rdev->r600_blit.mutex); rdev->r600_blit.vb_ib = NULL; - r = r600_blit_prepare_copy(rdev, num_pages * RADEON_GPU_PAGE_SIZE); + r = r600_blit_prepare_copy(rdev, num_pages); if (r) { if (rdev->r600_blit.vb_ib) radeon_ib_free(rdev, &rdev->r600_blit.vb_ib); mutex_unlock(&rdev->r600_blit.mutex); return r; } - r600_kms_blit_copy(rdev, src_offset, dst_offset, num_pages * RADEON_GPU_PAGE_SIZE); + r600_kms_blit_copy(rdev, src_offset, dst_offset, num_pages); r600_blit_done_copy(rdev, fence); mutex_unlock(&rdev->r600_blit.mutex); return 0; diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c index bbbafe685543..3940be619af7 100644 --- a/drivers/gpu/drm/radeon/r600_blit_kms.c +++ b/drivers/gpu/drm/radeon/r600_blit_kms.c @@ -42,6 +42,10 @@ #define COLOR_5_6_5 0x8 #define COLOR_8_8_8_8 0x1a +#define RECT_UNIT_H 32 +#define RECT_UNIT_W (RADEON_GPU_PAGE_SIZE / 4 / RECT_UNIT_H) +#define MAX_RECT_DIM 8192 + /* emits 21 on rv770+, 23 on r600 */ static void set_render_target(struct radeon_device *rdev, int format, @@ -600,13 +604,59 @@ static void r600_vb_ib_put(struct radeon_device *rdev) radeon_ib_free(rdev, &rdev->r600_blit.vb_ib); } -int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes) +/* FIXME: the function is very similar to evergreen_blit_create_rect, except + that it different predefined constants; consider commonizing */ +static unsigned r600_blit_create_rect(unsigned num_pages, int *width, int *height) +{ + unsigned max_pages; + unsigned pages = num_pages; + int w, h; + + if (num_pages == 0) { + /* not supposed to be called with no pages, but just in case */ + h = 0; + w = 0; + pages = 0; + WARN_ON(1); + } else { + int rect_order = 2; + h = RECT_UNIT_H; + while (num_pages / rect_order) { + h *= 2; + rect_order *= 4; + if (h >= MAX_RECT_DIM) { + h = MAX_RECT_DIM; + break; + } + } + max_pages = (MAX_RECT_DIM * h) / (RECT_UNIT_W * RECT_UNIT_H); + if (pages > max_pages) + pages = max_pages; + w = (pages * RECT_UNIT_W * RECT_UNIT_H) / h; + w = (w / RECT_UNIT_W) * RECT_UNIT_W; + pages = (w * h) / (RECT_UNIT_W * RECT_UNIT_H); + BUG_ON(pages == 0); + } + + + DRM_DEBUG("blit_rectangle: h=%d, w=%d, pages=%d\n", h, w, pages); + + /* return width and height only of the caller wants it */ + if (height) + *height = h; + if (width) + *width = w; + + return pages; +} + + +int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_pages) { int r; - int ring_size, line_size; - int max_size; + int ring_size; /* loops of emits 64 + fence emit possible */ - int dwords_per_loop = 76, num_loops; + int dwords_per_loop = 76, num_loops = 0; r = r600_vb_ib_get(rdev); if (r) @@ -616,18 +666,12 @@ int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes) if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770) dwords_per_loop += 2; - /* 8 bpp vs 32 bpp for xfer unit */ - if (size_bytes & 3) - line_size = 8192; - else - line_size = 8192*4; - - max_size = 8192 * line_size; + /* num loops */ + while (num_pages) { + num_pages -= r600_blit_create_rect(num_pages, NULL, NULL); + num_loops++; + } - /* major loops cover the max size transfer */ - num_loops = ((size_bytes + max_size) / max_size); - /* minor loops cover the extra non aligned bits */ - num_loops += ((size_bytes % line_size) ? 1 : 0); /* calculate number of loops correctly */ ring_size = num_loops * dwords_per_loop; /* set default + shaders */ @@ -659,182 +703,72 @@ void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence) void r600_kms_blit_copy(struct radeon_device *rdev, u64 src_gpu_addr, u64 dst_gpu_addr, - int size_bytes) + unsigned num_pages) { - int max_bytes; u64 vb_gpu_addr; u32 *vb; DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr, - size_bytes, rdev->r600_blit.vb_used); + num_pages, rdev->r600_blit.vb_used); vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used); - if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) { - max_bytes = 8192; - - while (size_bytes) { - int cur_size = size_bytes; - int src_x = src_gpu_addr & 255; - int dst_x = dst_gpu_addr & 255; - int h = 1; - src_gpu_addr = src_gpu_addr & ~255ULL; - dst_gpu_addr = dst_gpu_addr & ~255ULL; - - if (!src_x && !dst_x) { - h = (cur_size / max_bytes); - if (h > 8192) - h = 8192; - if (h == 0) - h = 1; - else - cur_size = max_bytes; - } else { - if (cur_size > max_bytes) - cur_size = max_bytes; - if (cur_size > (max_bytes - dst_x)) - cur_size = (max_bytes - dst_x); - if (cur_size > (max_bytes - src_x)) - cur_size = (max_bytes - src_x); - } - if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { - WARN_ON(1); - } + while (num_pages) { + int w, h; + unsigned size_in_bytes; + unsigned pages_per_loop = r600_blit_create_rect(num_pages, &w, &h); - vb[0] = i2f(dst_x); - vb[1] = 0; - vb[2] = i2f(src_x); - vb[3] = 0; + size_in_bytes = pages_per_loop * RADEON_GPU_PAGE_SIZE; + DRM_DEBUG("rectangle w=%d h=%d\n", w, h); - vb[4] = i2f(dst_x); - vb[5] = i2f(h); - vb[6] = i2f(src_x); - vb[7] = i2f(h); + if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { + WARN_ON(1); + } - vb[8] = i2f(dst_x + cur_size); - vb[9] = i2f(h); - vb[10] = i2f(src_x + cur_size); - vb[11] = i2f(h); + vb[0] = 0; + vb[1] = 0; + vb[2] = 0; + vb[3] = 0; - /* src 9 */ - set_tex_resource(rdev, FMT_8, - src_x + cur_size, h, src_x + cur_size, - src_gpu_addr); + vb[4] = 0; + vb[5] = i2f(h); + vb[6] = 0; + vb[7] = i2f(h); - /* 5 */ - cp_set_surface_sync(rdev, - PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); + vb[8] = i2f(w); + vb[9] = i2f(h); + vb[10] = i2f(w); + vb[11] = i2f(h); - /* dst 23 */ - set_render_target(rdev, COLOR_8, - dst_x + cur_size, h, - dst_gpu_addr); + /* src 9 */ + set_tex_resource(rdev, FMT_8_8_8_8, w, h, w, src_gpu_addr); - /* scissors 12 */ - set_scissors(rdev, dst_x, 0, dst_x + cur_size, h); + /* 5 */ + cp_set_surface_sync(rdev, + PACKET3_TC_ACTION_ENA, size_in_bytes, src_gpu_addr); - /* 14 */ - vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used; - set_vtx_resource(rdev, vb_gpu_addr); + /* dst 23 */ + set_render_target(rdev, COLOR_8_8_8_8, w, h, dst_gpu_addr); - /* draw 10 */ - draw_auto(rdev); + /* scissors 12 */ + set_scissors(rdev, 0, 0, w, h); - /* 5 */ - cp_set_surface_sync(rdev, - PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA, - cur_size * h, dst_gpu_addr); + /* Vertex buffer setup 14 */ + vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used; + set_vtx_resource(rdev, vb_gpu_addr); - vb += 12; - rdev->r600_blit.vb_used += 12 * 4; + /* draw 10 */ + draw_auto(rdev); - src_gpu_addr += cur_size * h; - dst_gpu_addr += cur_size * h; - size_bytes -= cur_size * h; - } - } else { - max_bytes = 8192 * 4; - - while (size_bytes) { - int cur_size = size_bytes; - int src_x = (src_gpu_addr & 255); - int dst_x = (dst_gpu_addr & 255); - int h = 1; - src_gpu_addr = src_gpu_addr & ~255ULL; - dst_gpu_addr = dst_gpu_addr & ~255ULL; - - if (!src_x && !dst_x) { - h = (cur_size / max_bytes); - if (h > 8192) - h = 8192; - if (h == 0) - h = 1; - else - cur_size = max_bytes; - } else { - if (cur_size > max_bytes) - cur_size = max_bytes; - if (cur_size > (max_bytes - dst_x)) - cur_size = (max_bytes - dst_x); - if (cur_size > (max_bytes - src_x)) - cur_size = (max_bytes - src_x); - } - - if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) { - WARN_ON(1); - } - - vb[0] = i2f(dst_x / 4); - vb[1] = 0; - vb[2] = i2f(src_x / 4); - vb[3] = 0; - - vb[4] = i2f(dst_x / 4); - vb[5] = i2f(h); - vb[6] = i2f(src_x / 4); - vb[7] = i2f(h); - - vb[8] = i2f((dst_x + cur_size) / 4); - vb[9] = i2f(h); - vb[10] = i2f((src_x + cur_size) / 4); - vb[11] = i2f(h); - - /* src 9 */ - set_tex_resource(rdev, FMT_8_8_8_8, - (src_x + cur_size) / 4, - h, (src_x + cur_size) / 4, - src_gpu_addr); - /* 5 */ - cp_set_surface_sync(rdev, - PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr); - - /* dst 23 */ - set_render_target(rdev, COLOR_8_8_8_8, - (dst_x + cur_size) / 4, h, - dst_gpu_addr); - - /* scissors 12 */ - set_scissors(rdev, (dst_x / 4), 0, (dst_x + cur_size / 4), h); - - /* Vertex buffer setup 14 */ - vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used; - set_vtx_resource(rdev, vb_gpu_addr); - - /* draw 10 */ - draw_auto(rdev); - - /* 5 */ - cp_set_surface_sync(rdev, - PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA, - cur_size * h, dst_gpu_addr); - - /* 78 ring dwords per loop */ - vb += 12; - rdev->r600_blit.vb_used += 12 * 4; - - src_gpu_addr += cur_size * h; - dst_gpu_addr += cur_size * h; - size_bytes -= cur_size * h; - } + /* 5 */ + cp_set_surface_sync(rdev, + PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA, + size_in_bytes, dst_gpu_addr); + + /* 78 ring dwords per loop */ + vb += 12; + rdev->r600_blit.vb_used += 4*12; + src_gpu_addr += size_in_bytes; + dst_gpu_addr += size_in_bytes; + num_pages -= pages_per_loop; } } - diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 62877ef7e0c6..24402e94d815 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -364,11 +364,11 @@ void r600_hdmi_init(struct drm_encoder *encoder); int r600_hdmi_buffer_status_changed(struct drm_encoder *encoder); void r600_hdmi_update_audio_settings(struct drm_encoder *encoder); /* r600 blit */ -int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes); +int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_pages); void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence); void r600_kms_blit_copy(struct radeon_device *rdev, u64 src_gpu_addr, u64 dst_gpu_addr, - int size_bytes); + unsigned num_pages); /* * rv770,rv730,rv710,rv740 -- 2.20.1