drm/radeon: re-enable PTE/PDE packet for set_page on cayman/TN
authorAlex Deucher <alexander.deucher@amd.com>
Tue, 16 Apr 2013 14:42:15 +0000 (10:42 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 16 Apr 2013 20:15:27 +0000 (16:15 -0400)
PTE/PDE doesn't support a single update (count = 1).  We had
previously disabled it since it we were hitting that case which
let to hangs.  The PTE/PDE packet is much more efficient for VM
updates where it can be used.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/radeon/ni.c
drivers/gpu/drm/radeon/nid.h

index 78aead0ada7b13ab5befc758aac285b6599c5503..fd03f318cc1cc7cd29892a2d05c9235e86e9c394 100644 (file)
@@ -2079,28 +2079,57 @@ void cayman_vm_set_page(struct radeon_device *rdev,
                        }
                }
        } else {
-               while (count) {
-                       ndw = count * 2;
-                       if (ndw > 0xFFFFE)
-                               ndw = 0xFFFFE;
+               if ((flags & RADEON_VM_PAGE_SYSTEM) ||
+                   (count == 1)) {
+                       while (count) {
+                               ndw = count * 2;
+                               if (ndw > 0xFFFFE)
+                                       ndw = 0xFFFFE;
+
+                               /* for non-physically contiguous pages (system) */
+                               ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw);
+                               ib->ptr[ib->length_dw++] = pe;
+                               ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
+                               for (; ndw > 0; ndw -= 2, --count, pe += 8) {
+                                       if (flags & RADEON_VM_PAGE_SYSTEM) {
+                                               value = radeon_vm_map_gart(rdev, addr);
+                                               value &= 0xFFFFFFFFFFFFF000ULL;
+                                       } else if (flags & RADEON_VM_PAGE_VALID) {
+                                               value = addr;
+                                       } else {
+                                               value = 0;
+                                       }
+                                       addr += incr;
+                                       value |= r600_flags;
+                                       ib->ptr[ib->length_dw++] = value;
+                                       ib->ptr[ib->length_dw++] = upper_32_bits(value);
+                               }
+                       }
+                       while (ib->length_dw & 0x7)
+                               ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
+               } else {
+                       while (count) {
+                               ndw = count * 2;
+                               if (ndw > 0xFFFFE)
+                                       ndw = 0xFFFFE;
 
-                       /* for non-physically contiguous pages (system) */
-                       ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw);
-                       ib->ptr[ib->length_dw++] = pe;
-                       ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
-                       for (; ndw > 0; ndw -= 2, --count, pe += 8) {
-                               if (flags & RADEON_VM_PAGE_SYSTEM) {
-                                       value = radeon_vm_map_gart(rdev, addr);
-                                       value &= 0xFFFFFFFFFFFFF000ULL;
-                               } else if (flags & RADEON_VM_PAGE_VALID) {
+                               if (flags & RADEON_VM_PAGE_VALID)
                                        value = addr;
-                               } else {
+                               else
                                        value = 0;
-                               }
-                               addr += incr;
-                               value |= r600_flags;
-                               ib->ptr[ib->length_dw++] = value;
+                               /* for physically contiguous pages (vram) */
+                               ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
+                               ib->ptr[ib->length_dw++] = pe; /* dst addr */
+                               ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
+                               ib->ptr[ib->length_dw++] = r600_flags; /* mask */
+                               ib->ptr[ib->length_dw++] = 0;
+                               ib->ptr[ib->length_dw++] = value; /* value */
                                ib->ptr[ib->length_dw++] = upper_32_bits(value);
+                               ib->ptr[ib->length_dw++] = incr; /* increment size */
+                               ib->ptr[ib->length_dw++] = 0;
+                               pe += ndw * 4;
+                               addr += (ndw / 2) * incr;
+                               count -= ndw / 2;
                        }
                }
                while (ib->length_dw & 0x7)
index 71a1709de1546f63a5c83efd594fc2862e2c40d5..e226faf16fea807f72fba17b7a7e4febc1bbe1ae 100644 (file)
                                         (((vmid) & 0xF) << 20) |       \
                                         (((n) & 0xFFFFF) << 0))
 
+#define DMA_PTE_PDE_PACKET(n)          ((2 << 28) |                    \
+                                        (1 << 26) |                    \
+                                        (1 << 21) |                    \
+                                        (((n) & 0xFFFFF) << 0))
+
 /* async DMA Packet types */
 #define        DMA_PACKET_WRITE                                  0x2
 #define        DMA_PACKET_COPY                                   0x3