drm/radeon: Use direct mapping for fast fb access on RS690
authorSamuel Li <samuel.li@amd.com>
Mon, 8 Apr 2013 21:25:47 +0000 (17:25 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 9 Apr 2013 14:31:31 +0000 (10:31 -0400)
This patch allows the CPU to map the stolen vram segment
directly rather than going through the PCI BAR.  This
significantly improves performance for certain workloads with
a properly patched ddx.

Use radeon.fastfb=1 to enable it (disabled by default).
Currently only supported on RS690, but support for RS780/880
and newer APUs may be added eventually.

Signed-off-by: Samuel Li <samuel.li@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/radeon/radeon.h
drivers/gpu/drm/radeon/radeon_drv.c
drivers/gpu/drm/radeon/radeon_kms.c
drivers/gpu/drm/radeon/radeon_object.c
drivers/gpu/drm/radeon/rs690.c
drivers/gpu/drm/radeon/rs690d.h
include/uapi/drm/radeon_drm.h

index 8bd875304441ef8f3894af179d031b7046def47f..730d3359af60e0270fb7b5959a91e0d5ceec27ca 100644 (file)
@@ -95,6 +95,7 @@ extern int radeon_hw_i2c;
 extern int radeon_pcie_gen2;
 extern int radeon_msi;
 extern int radeon_lockup_timeout;
+extern int radeon_fastfb;
 
 /*
  * Copy from radeon_drv.h so we don't have to include both and have conflicting
@@ -1616,6 +1617,7 @@ struct radeon_device {
        bool                            suspend;
        bool                            need_dma32;
        bool                            accel_working;
+       bool                            fastfb_working; /* IGP feature*/
        struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES];
        const struct firmware *me_fw;   /* all family ME firmware */
        const struct firmware *pfp_fw;  /* r6/700 PFP firmware */
index 66a7f0fd96201f09a7f540057d62c9bd89d6b23e..b500bbc3e4112fedefca69da5cb8dc0573d94729 100644 (file)
  *   2.28.0 - r600-eg: Add MEM_WRITE packet support
  *   2.29.0 - R500 FP16 color clear registers
  *   2.30.0 - fix for FMASK texturing
+ *   2.31.0 - Add fastfb support for rs690
  */
 #define KMS_DRIVER_MAJOR       2
-#define KMS_DRIVER_MINOR       30
+#define KMS_DRIVER_MINOR       31
 #define KMS_DRIVER_PATCHLEVEL  0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int radeon_driver_unload_kms(struct drm_device *dev);
@@ -160,6 +161,7 @@ int radeon_hw_i2c = 0;
 int radeon_pcie_gen2 = -1;
 int radeon_msi = -1;
 int radeon_lockup_timeout = 10000;
+int radeon_fastfb = 0;
 
 MODULE_PARM_DESC(no_wb, "Disable AGP writeback for scratch registers");
 module_param_named(no_wb, radeon_no_wb, int, 0444);
@@ -212,6 +214,9 @@ module_param_named(msi, radeon_msi, int, 0444);
 MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (defaul 10000 = 10 seconds, 0 = disable)");
 module_param_named(lockup_timeout, radeon_lockup_timeout, int, 0444);
 
+MODULE_PARM_DESC(fastfb, "Direct FB access for IGP chips (0 = disable, 1 = enable)");
+module_param_named(fastfb, radeon_fastfb, int, 0444);
+
 static struct pci_device_id pciidlist[] = {
        radeon_PCI_IDS
 };
index c75cb2c6ba71164e7429a5be99dc5938c40248fe..f5464482dee8b5ee63845a9b1522577279fc2afe 100644 (file)
@@ -376,6 +376,9 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
                else
                        return -EINVAL;
                break;
+       case RADEON_INFO_FASTFB_WORKING:
+               value = rdev->fastfb_working;
+               break;
        default:
                DRM_DEBUG_KMS("Invalid request %d\n", info->request);
                return -EINVAL;
index d3aface2d12d6f3a03ee5f5a2667b42caf52d7d5..58e026afec171dd6ca5caa0afadee63f2979c4ea 100644 (file)
@@ -321,8 +321,10 @@ void radeon_bo_force_delete(struct radeon_device *rdev)
 int radeon_bo_init(struct radeon_device *rdev)
 {
        /* Add an MTRR for the VRAM */
-       rdev->mc.vram_mtrr = mtrr_add(rdev->mc.aper_base, rdev->mc.aper_size,
+       if (!rdev->fastfb_working) {
+               rdev->mc.vram_mtrr = mtrr_add(rdev->mc.aper_base, rdev->mc.aper_size,
                        MTRR_TYPE_WRCOMB, 1);
+       }
        DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n",
                rdev->mc.mc_vram_size >> 20,
                (unsigned long long)rdev->mc.aper_size >> 20);
index 5706d2ac75ab1c4498e810f10edcfcb7e3649a2a..ab4c86cfd5526f1884a4a7575cd29f0341e0b59b 100644 (file)
@@ -148,6 +148,8 @@ void rs690_pm_info(struct radeon_device *rdev)
 static void rs690_mc_init(struct radeon_device *rdev)
 {
        u64 base;
+       uint32_t h_addr, l_addr;
+       unsigned long long k8_addr;
 
        rs400_gart_adjust_size(rdev);
        rdev->mc.vram_is_ddr = true;
@@ -160,6 +162,27 @@ static void rs690_mc_init(struct radeon_device *rdev)
        base = RREG32_MC(R_000100_MCCFG_FB_LOCATION);
        base = G_000100_MC_FB_START(base) << 16;
        rdev->mc.igp_sideport_enabled = radeon_atombios_sideport_present(rdev);
+
+       /* Use K8 direct mapping for fast fb access. */ 
+       rdev->fastfb_working = false;
+       h_addr = G_00005F_K8_ADDR_EXT(RREG32_MC(R_00005F_MC_MISC_UMA_CNTL));
+       l_addr = RREG32_MC(R_00001E_K8_FB_LOCATION);
+       k8_addr = ((unsigned long long)h_addr) << 32 | l_addr;
+#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
+       if (k8_addr + rdev->mc.visible_vram_size < 0x100000000ULL)      
+#endif
+       {
+               /* FastFB shall be used with UMA memory. Here it is simply disabled when sideport 
+                * memory is present.
+                */
+               if (rdev->mc.igp_sideport_enabled == false && radeon_fastfb == 1) {
+                       DRM_INFO("Direct mapping: aper base at 0x%llx, replaced by direct mapping base 0x%llx.\n", 
+                                       (unsigned long long)rdev->mc.aper_base, k8_addr);
+                       rdev->mc.aper_base = (resource_size_t)k8_addr;
+                       rdev->fastfb_working = true;
+               }
+       }  
+
        rs690_pm_info(rdev);
        radeon_vram_location(rdev, &rdev->mc, base);
        rdev->mc.gtt_base_align = rdev->mc.gtt_size - 1;
index 36e6398a98aea242520f5772bd556ea9293af48a..8af3ccf20cc07891cbd4b8e71e43ddbb2a96ca82 100644 (file)
@@ -29,6 +29,9 @@
 #define __RS690D_H__
 
 /* Registers */
+#define R_00001E_K8_FB_LOCATION                      0x00001E
+#define R_00005F_MC_MISC_UMA_CNTL                    0x00005F
+#define   G_00005F_K8_ADDR_EXT(x)                      (((x) >> 0) & 0xFF)
 #define R_000078_MC_INDEX                            0x000078
 #define   S_000078_MC_IND_ADDR(x)                      (((x) & 0x1FF) << 0)
 #define   G_000078_MC_IND_ADDR(x)                      (((x) >> 0) & 0x1FF)
index eeda91774c8ac24f9dd95b0d2a45e61ab0da622a..6fd25563f3017453543ca387e89023638174f0b2 100644 (file)
@@ -972,6 +972,9 @@ struct drm_radeon_cs {
 #define RADEON_INFO_MAX_SE             0x12
 /* max SH per SE */
 #define RADEON_INFO_MAX_SH_PER_SE      0x13
+/* fast fb access is enabled */
+#define RADEON_INFO_FASTFB_WORKING     0x14
+
 
 struct drm_radeon_info {
        uint32_t                request;