return pte;
}
-static void unmap_and_free_pt(struct i915_page_table_entry *pt, struct drm_device *dev)
+#define i915_dma_unmap_single(px, dev) \
+ __i915_dma_unmap_single((px)->daddr, dev)
+
+static inline void __i915_dma_unmap_single(dma_addr_t daddr,
+ struct drm_device *dev)
+{
+ struct device *device = &dev->pdev->dev;
+
+ dma_unmap_page(device, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
+}
+
+/**
+ * i915_dma_map_single() - Create a dma mapping for a page table/dir/etc.
+ * @px: Page table/dir/etc to get a DMA map for
+ * @dev: drm device
+ *
+ * Page table allocations are unified across all gens. They always require a
+ * single 4k allocation, as well as a DMA mapping. If we keep the structs
+ * symmetric here, the simple macro covers us for every page table type.
+ *
+ * Return: 0 if success.
+ */
+#define i915_dma_map_single(px, dev) \
+ i915_dma_map_page_single((px)->page, (dev), &(px)->daddr)
+
+static inline int i915_dma_map_page_single(struct page *page,
+ struct drm_device *dev,
+ dma_addr_t *daddr)
+{
+ struct device *device = &dev->pdev->dev;
+
+ *daddr = dma_map_page(device, page, 0, 4096, PCI_DMA_BIDIRECTIONAL);
+ return dma_mapping_error(device, *daddr);
+}
+
+static void unmap_and_free_pt(struct i915_page_table_entry *pt,
+ struct drm_device *dev)
{
if (WARN_ON(!pt->page))
return;
+
+ i915_dma_unmap_single(pt, dev);
__free_page(pt->page);
+ kfree(pt->used_ptes);
kfree(pt);
}
static struct i915_page_table_entry *alloc_pt_single(struct drm_device *dev)
{
struct i915_page_table_entry *pt;
+ const size_t count = INTEL_INFO(dev)->gen >= 8 ?
+ GEN8_PTES : GEN6_PTES;
+ int ret = -ENOMEM;
pt = kzalloc(sizeof(*pt), GFP_KERNEL);
if (!pt)
return ERR_PTR(-ENOMEM);
+ pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes),
+ GFP_KERNEL);
+
+ if (!pt->used_ptes)
+ goto fail_bitmap;
+
pt->page = alloc_page(GFP_KERNEL | __GFP_ZERO);
- if (!pt->page) {
- kfree(pt);
- return ERR_PTR(-ENOMEM);
- }
+ if (!pt->page)
+ goto fail_page;
+
+ ret = i915_dma_map_single(pt, dev);
+ if (ret)
+ goto fail_dma;
return pt;
+
+fail_dma:
+ __free_page(pt->page);
+fail_page:
+ kfree(pt->used_ptes);
+fail_bitmap:
+ kfree(pt);
+
+ return ERR_PTR(ret);
}
/**
}
}
-static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt)
+/* Write pde (index) from the page directory @pd to the page table @pt */
+static void gen6_write_pde(struct i915_page_directory_entry *pd,
+ const int pde, struct i915_page_table_entry *pt)
{
- struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
- gen6_pte_t __iomem *pd_addr;
- uint32_t pd_entry;
- int i;
+ /* Caller needs to make sure the write completes if necessary */
+ struct i915_hw_ppgtt *ppgtt =
+ container_of(pd, struct i915_hw_ppgtt, pd);
+ u32 pd_entry;
- WARN_ON(ppgtt->pd.pd_offset & 0x3f);
- pd_addr = (gen6_pte_t __iomem*)dev_priv->gtt.gsm +
- ppgtt->pd.pd_offset / sizeof(gen6_pte_t);
- for (i = 0; i < ppgtt->num_pd_entries; i++) {
- dma_addr_t pt_addr;
+ pd_entry = GEN6_PDE_ADDR_ENCODE(pt->daddr);
+ pd_entry |= GEN6_PDE_VALID;
- pt_addr = ppgtt->pd.page_table[i]->daddr;
- pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr);
- pd_entry |= GEN6_PDE_VALID;
+ writel(pd_entry, ppgtt->pd_addr + pde);
+}
- writel(pd_entry, pd_addr + i);
- }
- readl(pd_addr);
+/* Write all the page tables found in the ppgtt structure to incrementing page
+ * directories. */
+static void gen6_write_page_range(struct drm_i915_private *dev_priv,
+ struct i915_page_directory_entry *pd,
+ uint32_t start, uint32_t length)
+{
+ struct i915_page_table_entry *pt;
+ uint32_t pde, temp;
+
+ gen6_for_each_pde(pt, pd, start, length, temp, pde)
+ gen6_write_pde(pd, pde, pt);
+
+ /* Make sure write is complete before other code can use this page
+ * table. Also require for WC mapped PTEs */
+ readl(dev_priv->gtt.gsm);
}
static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
4096, PCI_DMA_BIDIRECTIONAL);
}
+static int gen6_alloc_va_range(struct i915_address_space *vm,
+ uint64_t start, uint64_t length)
+{
+ struct i915_hw_ppgtt *ppgtt =
+ container_of(vm, struct i915_hw_ppgtt, base);
+ struct i915_page_table_entry *pt;
+ uint32_t pde, temp;
+
+ gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) {
+ DECLARE_BITMAP(tmp_bitmap, GEN6_PTES);
+
+ bitmap_zero(tmp_bitmap, GEN6_PTES);
+ bitmap_set(tmp_bitmap, gen6_pte_index(start),
+ gen6_pte_count(start, length));
+
+ bitmap_or(pt->used_ptes, pt->used_ptes, tmp_bitmap,
+ GEN6_PTES);
+ }
+
+ return 0;
+}
+
static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
{
int i;
0, dev_priv->gtt.base.total,
0);
if (ret)
- return ret;
+ goto err_out;
retried = true;
goto alloc;
}
if (ret)
- return ret;
+ goto err_out;
+
if (ppgtt->node.start < dev_priv->gtt.mappable_end)
DRM_DEBUG("Forced to use aperture for PDEs\n");
ppgtt->num_pd_entries = I915_PDES;
return 0;
+
+err_out:
+ return ret;
}
static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
return 0;
}
-static int gen6_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt)
-{
- struct drm_device *dev = ppgtt->base.dev;
- int i;
-
- for (i = 0; i < ppgtt->num_pd_entries; i++) {
- struct page *page;
- dma_addr_t pt_addr;
-
- page = ppgtt->pd.page_table[i]->page;
- pt_addr = pci_map_page(dev->pdev, page, 0, 4096,
- PCI_DMA_BIDIRECTIONAL);
-
- if (pci_dma_mapping_error(dev->pdev, pt_addr)) {
- gen6_ppgtt_unmap_pages(ppgtt);
- return -EIO;
- }
-
- ppgtt->pd.page_table[i]->daddr = pt_addr;
- }
-
- return 0;
-}
-
static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
{
struct drm_device *dev = ppgtt->base.dev;
if (ret)
return ret;
- ret = gen6_ppgtt_setup_page_tables(ppgtt);
- if (ret) {
- gen6_ppgtt_free(ppgtt);
- return ret;
- }
-
+ ppgtt->base.allocate_va_range = gen6_alloc_va_range;
ppgtt->base.clear_range = gen6_ppgtt_clear_range;
ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
ppgtt->base.cleanup = gen6_ppgtt_cleanup;
ppgtt->pd.pd_offset =
ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t);
+ ppgtt->pd_addr = (gen6_pte_t __iomem *)dev_priv->gtt.gsm +
+ ppgtt->pd.pd_offset / sizeof(gen6_pte_t);
+
ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
+ gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total);
+
DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n",
ppgtt->node.size >> 20,
ppgtt->node.start / PAGE_SIZE);
- gen6_write_pdes(ppgtt);
DRM_DEBUG("Adding PPGTT at offset %x\n",
ppgtt->pd.pd_offset << 10);
return;
}
- list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
- /* TODO: Perhaps it shouldn't be gen6 specific */
- if (i915_is_ggtt(vm)) {
- if (dev_priv->mm.aliasing_ppgtt)
- gen6_write_pdes(dev_priv->mm.aliasing_ppgtt);
- continue;
- }
+ if (USES_PPGTT(dev)) {
+ list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
+ /* TODO: Perhaps it shouldn't be gen6 specific */
+
+ struct i915_hw_ppgtt *ppgtt =
+ container_of(vm, struct i915_hw_ppgtt,
+ base);
- gen6_write_pdes(container_of(vm, struct i915_hw_ppgtt, base));
+ if (i915_is_ggtt(vm))
+ ppgtt = dev_priv->mm.aliasing_ppgtt;
+
+ gen6_write_page_range(dev_priv, &ppgtt->pd,
+ 0, ppgtt->base.total);
+ }
}
i915_ggtt_flush(dev_priv);
#define I915_PTE_MASK(pte_len) (I915_PTES(pte_len) - 1)
#define I915_PDES 512
#define I915_PDE_MASK (I915_PDES - 1)
+#define NUM_PTE(pde_shift) (1 << (pde_shift - PAGE_SHIFT))
#define GEN6_PTES I915_PTES(sizeof(gen6_pte_t))
#define GEN6_PD_SIZE (I915_PDES * PAGE_SIZE)
#define GEN6_PD_ALIGN (PAGE_SIZE * 16)
+#define GEN6_PDE_SHIFT 22
#define GEN6_PDE_VALID (1 << 0)
#define GEN7_PTE_CACHE_L3_LLC (3 << 1)
struct i915_page_table_entry {
struct page *page;
dma_addr_t daddr;
+
+ unsigned long *used_ptes;
};
struct i915_page_directory_entry {
gen6_pte_t (*pte_encode)(dma_addr_t addr,
enum i915_cache_level level,
bool valid, u32 flags); /* Create a valid PTE */
+ int (*allocate_va_range)(struct i915_address_space *vm,
+ uint64_t start,
+ uint64_t length);
void (*clear_range)(struct i915_address_space *vm,
uint64_t start,
uint64_t length,
struct drm_i915_file_private *file_priv;
+ gen6_pte_t __iomem *pd_addr;
+
int (*enable)(struct i915_hw_ppgtt *ppgtt);
int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
struct intel_engine_cs *ring);
void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m);
};
+/* For each pde iterates over every pde between from start until start + length.
+ * If start, and start+length are not perfectly divisible, the macro will round
+ * down, and up as needed. The macro modifies pde, start, and length. Dev is
+ * only used to differentiate shift values. Temp is temp. On gen6/7, start = 0,
+ * and length = 2G effectively iterates over every PDE in the system.
+ *
+ * XXX: temp is not actually needed, but it saves doing the ALIGN operation.
+ */
+#define gen6_for_each_pde(pt, pd, start, length, temp, iter) \
+ for (iter = gen6_pde_index(start), pt = (pd)->page_table[iter]; \
+ length > 0 && iter < I915_PDES; \
+ pt = (pd)->page_table[++iter], \
+ temp = ALIGN(start+1, 1 << GEN6_PDE_SHIFT) - start, \
+ temp = min_t(unsigned, temp, length), \
+ start += temp, length -= temp)
+
+static inline uint32_t i915_pte_index(uint64_t address, uint32_t pde_shift)
+{
+ const uint32_t mask = NUM_PTE(pde_shift) - 1;
+
+ return (address >> PAGE_SHIFT) & mask;
+}
+
+/* Helper to counts the number of PTEs within the given length. This count
+ * does not cross a page table boundary, so the max value would be
+ * GEN6_PTES for GEN6, and GEN8_PTES for GEN8.
+*/
+static inline uint32_t i915_pte_count(uint64_t addr, size_t length,
+ uint32_t pde_shift)
+{
+ const uint64_t mask = ~((1 << pde_shift) - 1);
+ uint64_t end;
+
+ WARN_ON(length == 0);
+ WARN_ON(offset_in_page(addr|length));
+
+ end = addr + length;
+
+ if ((addr & mask) != (end & mask))
+ return NUM_PTE(pde_shift) - i915_pte_index(addr, pde_shift);
+
+ return i915_pte_index(end, pde_shift) - i915_pte_index(addr, pde_shift);
+}
+
+static inline uint32_t i915_pde_index(uint64_t addr, uint32_t shift)
+{
+ return (addr >> shift) & I915_PDE_MASK;
+}
+
+static inline uint32_t gen6_pte_index(uint32_t addr)
+{
+ return i915_pte_index(addr, GEN6_PDE_SHIFT);
+}
+
+static inline size_t gen6_pte_count(uint32_t addr, uint32_t length)
+{
+ return i915_pte_count(addr, length, GEN6_PDE_SHIFT);
+}
+
+static inline uint32_t gen6_pde_index(uint32_t addr)
+{
+ return i915_pde_index(addr, GEN6_PDE_SHIFT);
+}
+
int i915_gem_gtt_init(struct drm_device *dev);
void i915_gem_init_global_gtt(struct drm_device *dev);
void i915_global_gtt_cleanup(struct drm_device *dev);