drm/amdgpu: write PTEs directly into the IB.
authorChristian König <christian.koenig@amd.com>
Thu, 11 Aug 2016 12:06:54 +0000 (14:06 +0200)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 16 Aug 2016 14:43:44 +0000 (10:43 -0400)
Write the PTEs at the end of the IB instead of directly into the SDMA commands.
This can save quite some CPU cycles building the entries.

This doesn't change the DW estimation because PTEs where embedded into the IB
before as well. It just moves them to the end of the IB.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 1a474fa1f441c69c875f40ceebb9286d47840605..e5095b5e10e53ed5326191234d9159cc491dc013 100644 (file)
@@ -911,15 +911,15 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
        /* padding, etc. */
        ndw = 64;
 
-       if (params.src) {
+       if (src) {
                /* only copy commands needed */
                ndw += ncmds * 7;
 
-       } else if (params.pages_addr) {
-               /* header for write data commands */
-               ndw += ncmds * 4;
+       } else if (pages_addr) {
+               /* copy commands needed */
+               ndw += ncmds * 7;
 
-               /* body of write data command */
+               /* and also PTEs */
                ndw += nptes * 2;
 
        } else {
@@ -936,6 +936,22 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 
        params.ib = &job->ibs[0];
 
+       if (!src && pages_addr) {
+               uint64_t *pte;
+               unsigned i;
+
+               /* Put the PTEs at the end of the IB. */
+               i = ndw - nptes * 2;
+               pte= (uint64_t *)&(job->ibs->ptr[i]);
+               params.src = job->ibs->gpu_addr + i * 4;
+
+               for (i = 0; i < nptes; ++i) {
+                       pte[i] = amdgpu_vm_map_gart(pages_addr, addr + i *
+                                                   AMDGPU_GPU_PAGE_SIZE);
+                       pte[i] |= flags;
+               }
+       }
+
        r = amdgpu_sync_fence(adev, &job->sync, exclusive);
        if (r)
                goto error_free;