drm/i915: Track active vma requests
[GitHub/LineageOS/android_kernel_motorola_exynos9610.git] / drivers / gpu / drm / i915 / i915_gem.c
CommitLineData
673a394b 1/*
be6a0376 2 * Copyright © 2008-2015 Intel Corporation
673a394b
EA
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
760285e7 28#include <drm/drmP.h>
0de23977 29#include <drm/drm_vma_manager.h>
760285e7 30#include <drm/i915_drm.h>
673a394b 31#include "i915_drv.h"
c13d87ea 32#include "i915_gem_dmabuf.h"
eb82289a 33#include "i915_vgpu.h"
1c5d22f7 34#include "i915_trace.h"
652c393a 35#include "intel_drv.h"
0ccdacf6 36#include "intel_mocs.h"
c13d87ea 37#include <linux/reservation.h>
5949eac4 38#include <linux/shmem_fs.h>
5a0e3ad6 39#include <linux/slab.h>
673a394b 40#include <linux/swap.h>
79e53945 41#include <linux/pci.h>
1286ff73 42#include <linux/dma-buf.h>
673a394b 43
05394f39 44static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
e62b59e4 45static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
61050808 46
c76ce038
CW
47static bool cpu_cache_is_coherent(struct drm_device *dev,
48 enum i915_cache_level level)
49{
50 return HAS_LLC(dev) || level != I915_CACHE_NONE;
51}
52
2c22569b
CW
53static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
54{
b50a5371
AS
55 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
56 return false;
57
2c22569b
CW
58 if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
59 return true;
60
61 return obj->pin_display;
62}
63
4f1959ee
AS
64static int
65insert_mappable_node(struct drm_i915_private *i915,
66 struct drm_mm_node *node, u32 size)
67{
68 memset(node, 0, sizeof(*node));
69 return drm_mm_insert_node_in_range_generic(&i915->ggtt.base.mm, node,
70 size, 0, 0, 0,
71 i915->ggtt.mappable_end,
72 DRM_MM_SEARCH_DEFAULT,
73 DRM_MM_CREATE_DEFAULT);
74}
75
76static void
77remove_mappable_node(struct drm_mm_node *node)
78{
79 drm_mm_remove_node(node);
80}
81
73aa808f
CW
82/* some bookkeeping */
83static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
84 size_t size)
85{
c20e8355 86 spin_lock(&dev_priv->mm.object_stat_lock);
73aa808f
CW
87 dev_priv->mm.object_count++;
88 dev_priv->mm.object_memory += size;
c20e8355 89 spin_unlock(&dev_priv->mm.object_stat_lock);
73aa808f
CW
90}
91
92static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
93 size_t size)
94{
c20e8355 95 spin_lock(&dev_priv->mm.object_stat_lock);
73aa808f
CW
96 dev_priv->mm.object_count--;
97 dev_priv->mm.object_memory -= size;
c20e8355 98 spin_unlock(&dev_priv->mm.object_stat_lock);
73aa808f
CW
99}
100
21dd3734 101static int
33196ded 102i915_gem_wait_for_error(struct i915_gpu_error *error)
30dbf0c0 103{
30dbf0c0
CW
104 int ret;
105
d98c52cf 106 if (!i915_reset_in_progress(error))
30dbf0c0
CW
107 return 0;
108
0a6759c6
DV
109 /*
110 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
111 * userspace. If it takes that long something really bad is going on and
112 * we should simply try to bail out and fail as gracefully as possible.
113 */
1f83fee0 114 ret = wait_event_interruptible_timeout(error->reset_queue,
d98c52cf 115 !i915_reset_in_progress(error),
1f83fee0 116 10*HZ);
0a6759c6
DV
117 if (ret == 0) {
118 DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
119 return -EIO;
120 } else if (ret < 0) {
30dbf0c0 121 return ret;
d98c52cf
CW
122 } else {
123 return 0;
0a6759c6 124 }
30dbf0c0
CW
125}
126
54cf91dc 127int i915_mutex_lock_interruptible(struct drm_device *dev)
76c1dec1 128{
fac5e23e 129 struct drm_i915_private *dev_priv = to_i915(dev);
76c1dec1
CW
130 int ret;
131
33196ded 132 ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
76c1dec1
CW
133 if (ret)
134 return ret;
135
136 ret = mutex_lock_interruptible(&dev->struct_mutex);
137 if (ret)
138 return ret;
139
76c1dec1
CW
140 return 0;
141}
30dbf0c0 142
5a125c3c
EA
143int
144i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
05394f39 145 struct drm_file *file)
5a125c3c 146{
72e96d64 147 struct drm_i915_private *dev_priv = to_i915(dev);
62106b4f 148 struct i915_ggtt *ggtt = &dev_priv->ggtt;
72e96d64 149 struct drm_i915_gem_get_aperture *args = data;
ca1543be 150 struct i915_vma *vma;
6299f992 151 size_t pinned;
5a125c3c 152
6299f992 153 pinned = 0;
73aa808f 154 mutex_lock(&dev->struct_mutex);
1c7f4bca 155 list_for_each_entry(vma, &ggtt->base.active_list, vm_link)
ca1543be
TU
156 if (vma->pin_count)
157 pinned += vma->node.size;
1c7f4bca 158 list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link)
ca1543be
TU
159 if (vma->pin_count)
160 pinned += vma->node.size;
73aa808f 161 mutex_unlock(&dev->struct_mutex);
5a125c3c 162
72e96d64 163 args->aper_size = ggtt->base.total;
0206e353 164 args->aper_available_size = args->aper_size - pinned;
6299f992 165
5a125c3c
EA
166 return 0;
167}
168
6a2c4232
CW
169static int
170i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
00731155 171{
6a2c4232
CW
172 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
173 char *vaddr = obj->phys_handle->vaddr;
174 struct sg_table *st;
175 struct scatterlist *sg;
176 int i;
00731155 177
6a2c4232
CW
178 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
179 return -EINVAL;
180
181 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
182 struct page *page;
183 char *src;
184
185 page = shmem_read_mapping_page(mapping, i);
186 if (IS_ERR(page))
187 return PTR_ERR(page);
188
189 src = kmap_atomic(page);
190 memcpy(vaddr, src, PAGE_SIZE);
191 drm_clflush_virt_range(vaddr, PAGE_SIZE);
192 kunmap_atomic(src);
193
09cbfeaf 194 put_page(page);
6a2c4232
CW
195 vaddr += PAGE_SIZE;
196 }
197
c033666a 198 i915_gem_chipset_flush(to_i915(obj->base.dev));
6a2c4232
CW
199
200 st = kmalloc(sizeof(*st), GFP_KERNEL);
201 if (st == NULL)
202 return -ENOMEM;
203
204 if (sg_alloc_table(st, 1, GFP_KERNEL)) {
205 kfree(st);
206 return -ENOMEM;
207 }
208
209 sg = st->sgl;
210 sg->offset = 0;
211 sg->length = obj->base.size;
00731155 212
6a2c4232
CW
213 sg_dma_address(sg) = obj->phys_handle->busaddr;
214 sg_dma_len(sg) = obj->base.size;
215
216 obj->pages = st;
6a2c4232
CW
217 return 0;
218}
219
220static void
221i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
222{
223 int ret;
224
225 BUG_ON(obj->madv == __I915_MADV_PURGED);
00731155 226
6a2c4232 227 ret = i915_gem_object_set_to_cpu_domain(obj, true);
f4457ae7 228 if (WARN_ON(ret)) {
6a2c4232
CW
229 /* In the event of a disaster, abandon all caches and
230 * hope for the best.
231 */
6a2c4232
CW
232 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
233 }
234
235 if (obj->madv == I915_MADV_DONTNEED)
236 obj->dirty = 0;
237
238 if (obj->dirty) {
00731155 239 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
6a2c4232 240 char *vaddr = obj->phys_handle->vaddr;
00731155
CW
241 int i;
242
243 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
6a2c4232
CW
244 struct page *page;
245 char *dst;
246
247 page = shmem_read_mapping_page(mapping, i);
248 if (IS_ERR(page))
249 continue;
250
251 dst = kmap_atomic(page);
252 drm_clflush_virt_range(vaddr, PAGE_SIZE);
253 memcpy(dst, vaddr, PAGE_SIZE);
254 kunmap_atomic(dst);
255
256 set_page_dirty(page);
257 if (obj->madv == I915_MADV_WILLNEED)
00731155 258 mark_page_accessed(page);
09cbfeaf 259 put_page(page);
00731155
CW
260 vaddr += PAGE_SIZE;
261 }
6a2c4232 262 obj->dirty = 0;
00731155
CW
263 }
264
6a2c4232
CW
265 sg_free_table(obj->pages);
266 kfree(obj->pages);
6a2c4232
CW
267}
268
269static void
270i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
271{
272 drm_pci_free(obj->base.dev, obj->phys_handle);
273}
274
275static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
276 .get_pages = i915_gem_object_get_pages_phys,
277 .put_pages = i915_gem_object_put_pages_phys,
278 .release = i915_gem_object_release_phys,
279};
280
aa653a68
CW
281int
282i915_gem_object_unbind(struct drm_i915_gem_object *obj)
283{
284 struct i915_vma *vma;
285 LIST_HEAD(still_in_list);
286 int ret;
287
288 /* The vma will only be freed if it is marked as closed, and if we wait
289 * upon rendering to the vma, we may unbind anything in the list.
290 */
291 while ((vma = list_first_entry_or_null(&obj->vma_list,
292 struct i915_vma,
293 obj_link))) {
294 list_move_tail(&vma->obj_link, &still_in_list);
295 ret = i915_vma_unbind(vma);
296 if (ret)
297 break;
298 }
299 list_splice(&still_in_list, &obj->vma_list);
300
301 return ret;
302}
303
00731155
CW
304int
305i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
306 int align)
307{
308 drm_dma_handle_t *phys;
6a2c4232 309 int ret;
00731155
CW
310
311 if (obj->phys_handle) {
312 if ((unsigned long)obj->phys_handle->vaddr & (align -1))
313 return -EBUSY;
314
315 return 0;
316 }
317
318 if (obj->madv != I915_MADV_WILLNEED)
319 return -EFAULT;
320
321 if (obj->base.filp == NULL)
322 return -EINVAL;
323
4717ca9e
CW
324 ret = i915_gem_object_unbind(obj);
325 if (ret)
326 return ret;
327
328 ret = i915_gem_object_put_pages(obj);
6a2c4232
CW
329 if (ret)
330 return ret;
331
00731155
CW
332 /* create a new object */
333 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
334 if (!phys)
335 return -ENOMEM;
336
00731155 337 obj->phys_handle = phys;
6a2c4232
CW
338 obj->ops = &i915_gem_phys_ops;
339
340 return i915_gem_object_get_pages(obj);
00731155
CW
341}
342
343static int
344i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
345 struct drm_i915_gem_pwrite *args,
346 struct drm_file *file_priv)
347{
348 struct drm_device *dev = obj->base.dev;
349 void *vaddr = obj->phys_handle->vaddr + args->offset;
3ed605bc 350 char __user *user_data = u64_to_user_ptr(args->data_ptr);
063e4e6b 351 int ret = 0;
6a2c4232
CW
352
353 /* We manually control the domain here and pretend that it
354 * remains coherent i.e. in the GTT domain, like shmem_pwrite.
355 */
356 ret = i915_gem_object_wait_rendering(obj, false);
357 if (ret)
358 return ret;
00731155 359
77a0d1ca 360 intel_fb_obj_invalidate(obj, ORIGIN_CPU);
00731155
CW
361 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
362 unsigned long unwritten;
363
364 /* The physical object once assigned is fixed for the lifetime
365 * of the obj, so we can safely drop the lock and continue
366 * to access vaddr.
367 */
368 mutex_unlock(&dev->struct_mutex);
369 unwritten = copy_from_user(vaddr, user_data, args->size);
370 mutex_lock(&dev->struct_mutex);
063e4e6b
PZ
371 if (unwritten) {
372 ret = -EFAULT;
373 goto out;
374 }
00731155
CW
375 }
376
6a2c4232 377 drm_clflush_virt_range(vaddr, args->size);
c033666a 378 i915_gem_chipset_flush(to_i915(dev));
063e4e6b
PZ
379
380out:
de152b62 381 intel_fb_obj_flush(obj, false, ORIGIN_CPU);
063e4e6b 382 return ret;
00731155
CW
383}
384
42dcedd4
CW
385void *i915_gem_object_alloc(struct drm_device *dev)
386{
fac5e23e 387 struct drm_i915_private *dev_priv = to_i915(dev);
efab6d8d 388 return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL);
42dcedd4
CW
389}
390
391void i915_gem_object_free(struct drm_i915_gem_object *obj)
392{
fac5e23e 393 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
efab6d8d 394 kmem_cache_free(dev_priv->objects, obj);
42dcedd4
CW
395}
396
ff72145b
DA
397static int
398i915_gem_create(struct drm_file *file,
399 struct drm_device *dev,
400 uint64_t size,
401 uint32_t *handle_p)
673a394b 402{
05394f39 403 struct drm_i915_gem_object *obj;
a1a2d1d3
PP
404 int ret;
405 u32 handle;
673a394b 406
ff72145b 407 size = roundup(size, PAGE_SIZE);
8ffc0246
CW
408 if (size == 0)
409 return -EINVAL;
673a394b
EA
410
411 /* Allocate the new object */
d37cd8a8 412 obj = i915_gem_object_create(dev, size);
fe3db79b
CW
413 if (IS_ERR(obj))
414 return PTR_ERR(obj);
673a394b 415
05394f39 416 ret = drm_gem_handle_create(file, &obj->base, &handle);
202f2fef 417 /* drop reference from allocate - handle holds it now */
34911fd3 418 i915_gem_object_put_unlocked(obj);
d861e338
DV
419 if (ret)
420 return ret;
202f2fef 421
ff72145b 422 *handle_p = handle;
673a394b
EA
423 return 0;
424}
425
ff72145b
DA
426int
427i915_gem_dumb_create(struct drm_file *file,
428 struct drm_device *dev,
429 struct drm_mode_create_dumb *args)
430{
431 /* have to work out size/pitch and return them */
de45eaf7 432 args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
ff72145b
DA
433 args->size = args->pitch * args->height;
434 return i915_gem_create(file, dev,
da6b51d0 435 args->size, &args->handle);
ff72145b
DA
436}
437
ff72145b
DA
438/**
439 * Creates a new mm object and returns a handle to it.
14bb2c11
TU
440 * @dev: drm device pointer
441 * @data: ioctl data blob
442 * @file: drm file pointer
ff72145b
DA
443 */
444int
445i915_gem_create_ioctl(struct drm_device *dev, void *data,
446 struct drm_file *file)
447{
448 struct drm_i915_gem_create *args = data;
63ed2cb2 449
ff72145b 450 return i915_gem_create(file, dev,
da6b51d0 451 args->size, &args->handle);
ff72145b
DA
452}
453
8461d226
DV
454static inline int
455__copy_to_user_swizzled(char __user *cpu_vaddr,
456 const char *gpu_vaddr, int gpu_offset,
457 int length)
458{
459 int ret, cpu_offset = 0;
460
461 while (length > 0) {
462 int cacheline_end = ALIGN(gpu_offset + 1, 64);
463 int this_length = min(cacheline_end - gpu_offset, length);
464 int swizzled_gpu_offset = gpu_offset ^ 64;
465
466 ret = __copy_to_user(cpu_vaddr + cpu_offset,
467 gpu_vaddr + swizzled_gpu_offset,
468 this_length);
469 if (ret)
470 return ret + length;
471
472 cpu_offset += this_length;
473 gpu_offset += this_length;
474 length -= this_length;
475 }
476
477 return 0;
478}
479
8c59967c 480static inline int
4f0c7cfb
BW
481__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
482 const char __user *cpu_vaddr,
8c59967c
DV
483 int length)
484{
485 int ret, cpu_offset = 0;
486
487 while (length > 0) {
488 int cacheline_end = ALIGN(gpu_offset + 1, 64);
489 int this_length = min(cacheline_end - gpu_offset, length);
490 int swizzled_gpu_offset = gpu_offset ^ 64;
491
492 ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
493 cpu_vaddr + cpu_offset,
494 this_length);
495 if (ret)
496 return ret + length;
497
498 cpu_offset += this_length;
499 gpu_offset += this_length;
500 length -= this_length;
501 }
502
503 return 0;
504}
505
4c914c0c
BV
506/*
507 * Pins the specified object's pages and synchronizes the object with
508 * GPU accesses. Sets needs_clflush to non-zero if the caller should
509 * flush the object from the CPU cache.
510 */
511int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
512 int *needs_clflush)
513{
514 int ret;
515
516 *needs_clflush = 0;
517
b9bcd14a 518 if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
4c914c0c
BV
519 return -EINVAL;
520
c13d87ea
CW
521 ret = i915_gem_object_wait_rendering(obj, true);
522 if (ret)
523 return ret;
524
4c914c0c
BV
525 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
526 /* If we're not in the cpu read domain, set ourself into the gtt
527 * read domain and manually flush cachelines (if required). This
528 * optimizes for the case when the gpu will dirty the data
529 * anyway again before the next pread happens. */
530 *needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
531 obj->cache_level);
4c914c0c
BV
532 }
533
534 ret = i915_gem_object_get_pages(obj);
535 if (ret)
536 return ret;
537
538 i915_gem_object_pin_pages(obj);
539
540 return ret;
541}
542
d174bd64
DV
543/* Per-page copy function for the shmem pread fastpath.
544 * Flushes invalid cachelines before reading the target if
545 * needs_clflush is set. */
eb01459f 546static int
d174bd64
DV
547shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
548 char __user *user_data,
549 bool page_do_bit17_swizzling, bool needs_clflush)
550{
551 char *vaddr;
552 int ret;
553
e7e58eb5 554 if (unlikely(page_do_bit17_swizzling))
d174bd64
DV
555 return -EINVAL;
556
557 vaddr = kmap_atomic(page);
558 if (needs_clflush)
559 drm_clflush_virt_range(vaddr + shmem_page_offset,
560 page_length);
561 ret = __copy_to_user_inatomic(user_data,
562 vaddr + shmem_page_offset,
563 page_length);
564 kunmap_atomic(vaddr);
565
f60d7f0c 566 return ret ? -EFAULT : 0;
d174bd64
DV
567}
568
23c18c71
DV
569static void
570shmem_clflush_swizzled_range(char *addr, unsigned long length,
571 bool swizzled)
572{
e7e58eb5 573 if (unlikely(swizzled)) {
23c18c71
DV
574 unsigned long start = (unsigned long) addr;
575 unsigned long end = (unsigned long) addr + length;
576
577 /* For swizzling simply ensure that we always flush both
578 * channels. Lame, but simple and it works. Swizzled
579 * pwrite/pread is far from a hotpath - current userspace
580 * doesn't use it at all. */
581 start = round_down(start, 128);
582 end = round_up(end, 128);
583
584 drm_clflush_virt_range((void *)start, end - start);
585 } else {
586 drm_clflush_virt_range(addr, length);
587 }
588
589}
590
d174bd64
DV
591/* Only difference to the fast-path function is that this can handle bit17
592 * and uses non-atomic copy and kmap functions. */
593static int
594shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
595 char __user *user_data,
596 bool page_do_bit17_swizzling, bool needs_clflush)
597{
598 char *vaddr;
599 int ret;
600
601 vaddr = kmap(page);
602 if (needs_clflush)
23c18c71
DV
603 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
604 page_length,
605 page_do_bit17_swizzling);
d174bd64
DV
606
607 if (page_do_bit17_swizzling)
608 ret = __copy_to_user_swizzled(user_data,
609 vaddr, shmem_page_offset,
610 page_length);
611 else
612 ret = __copy_to_user(user_data,
613 vaddr + shmem_page_offset,
614 page_length);
615 kunmap(page);
616
f60d7f0c 617 return ret ? - EFAULT : 0;
d174bd64
DV
618}
619
b50a5371
AS
620static inline unsigned long
621slow_user_access(struct io_mapping *mapping,
622 uint64_t page_base, int page_offset,
623 char __user *user_data,
624 unsigned long length, bool pwrite)
625{
626 void __iomem *ioaddr;
627 void *vaddr;
628 uint64_t unwritten;
629
630 ioaddr = io_mapping_map_wc(mapping, page_base, PAGE_SIZE);
631 /* We can use the cpu mem copy function because this is X86. */
632 vaddr = (void __force *)ioaddr + page_offset;
633 if (pwrite)
634 unwritten = __copy_from_user(vaddr, user_data, length);
635 else
636 unwritten = __copy_to_user(user_data, vaddr, length);
637
638 io_mapping_unmap(ioaddr);
639 return unwritten;
640}
641
642static int
643i915_gem_gtt_pread(struct drm_device *dev,
644 struct drm_i915_gem_object *obj, uint64_t size,
645 uint64_t data_offset, uint64_t data_ptr)
646{
fac5e23e 647 struct drm_i915_private *dev_priv = to_i915(dev);
b50a5371
AS
648 struct i915_ggtt *ggtt = &dev_priv->ggtt;
649 struct drm_mm_node node;
650 char __user *user_data;
651 uint64_t remain;
652 uint64_t offset;
653 int ret;
654
655 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
656 if (ret) {
657 ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE);
658 if (ret)
659 goto out;
660
661 ret = i915_gem_object_get_pages(obj);
662 if (ret) {
663 remove_mappable_node(&node);
664 goto out;
665 }
666
667 i915_gem_object_pin_pages(obj);
668 } else {
669 node.start = i915_gem_obj_ggtt_offset(obj);
670 node.allocated = false;
671 ret = i915_gem_object_put_fence(obj);
672 if (ret)
673 goto out_unpin;
674 }
675
676 ret = i915_gem_object_set_to_gtt_domain(obj, false);
677 if (ret)
678 goto out_unpin;
679
680 user_data = u64_to_user_ptr(data_ptr);
681 remain = size;
682 offset = data_offset;
683
684 mutex_unlock(&dev->struct_mutex);
685 if (likely(!i915.prefault_disable)) {
686 ret = fault_in_multipages_writeable(user_data, remain);
687 if (ret) {
688 mutex_lock(&dev->struct_mutex);
689 goto out_unpin;
690 }
691 }
692
693 while (remain > 0) {
694 /* Operation in this page
695 *
696 * page_base = page offset within aperture
697 * page_offset = offset within page
698 * page_length = bytes to copy for this page
699 */
700 u32 page_base = node.start;
701 unsigned page_offset = offset_in_page(offset);
702 unsigned page_length = PAGE_SIZE - page_offset;
703 page_length = remain < page_length ? remain : page_length;
704 if (node.allocated) {
705 wmb();
706 ggtt->base.insert_page(&ggtt->base,
707 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
708 node.start,
709 I915_CACHE_NONE, 0);
710 wmb();
711 } else {
712 page_base += offset & PAGE_MASK;
713 }
714 /* This is a slow read/write as it tries to read from
715 * and write to user memory which may result into page
716 * faults, and so we cannot perform this under struct_mutex.
717 */
718 if (slow_user_access(ggtt->mappable, page_base,
719 page_offset, user_data,
720 page_length, false)) {
721 ret = -EFAULT;
722 break;
723 }
724
725 remain -= page_length;
726 user_data += page_length;
727 offset += page_length;
728 }
729
730 mutex_lock(&dev->struct_mutex);
731 if (ret == 0 && (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) {
732 /* The user has modified the object whilst we tried
733 * reading from it, and we now have no idea what domain
734 * the pages should be in. As we have just been touching
735 * them directly, flush everything back to the GTT
736 * domain.
737 */
738 ret = i915_gem_object_set_to_gtt_domain(obj, false);
739 }
740
741out_unpin:
742 if (node.allocated) {
743 wmb();
744 ggtt->base.clear_range(&ggtt->base,
745 node.start, node.size,
746 true);
747 i915_gem_object_unpin_pages(obj);
748 remove_mappable_node(&node);
749 } else {
750 i915_gem_object_ggtt_unpin(obj);
751 }
752out:
753 return ret;
754}
755
eb01459f 756static int
dbf7bff0
DV
757i915_gem_shmem_pread(struct drm_device *dev,
758 struct drm_i915_gem_object *obj,
759 struct drm_i915_gem_pread *args,
760 struct drm_file *file)
eb01459f 761{
8461d226 762 char __user *user_data;
eb01459f 763 ssize_t remain;
8461d226 764 loff_t offset;
eb2c0c81 765 int shmem_page_offset, page_length, ret = 0;
8461d226 766 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
96d79b52 767 int prefaulted = 0;
8489731c 768 int needs_clflush = 0;
67d5a50c 769 struct sg_page_iter sg_iter;
eb01459f 770
6eae0059 771 if (!i915_gem_object_has_struct_page(obj))
b50a5371
AS
772 return -ENODEV;
773
3ed605bc 774 user_data = u64_to_user_ptr(args->data_ptr);
eb01459f
EA
775 remain = args->size;
776
8461d226 777 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
eb01459f 778
4c914c0c 779 ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
f60d7f0c
CW
780 if (ret)
781 return ret;
782
8461d226 783 offset = args->offset;
eb01459f 784
67d5a50c
ID
785 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
786 offset >> PAGE_SHIFT) {
2db76d7c 787 struct page *page = sg_page_iter_page(&sg_iter);
9da3da66
CW
788
789 if (remain <= 0)
790 break;
791
eb01459f
EA
792 /* Operation in this page
793 *
eb01459f 794 * shmem_page_offset = offset within page in shmem file
eb01459f
EA
795 * page_length = bytes to copy for this page
796 */
c8cbbb8b 797 shmem_page_offset = offset_in_page(offset);
eb01459f
EA
798 page_length = remain;
799 if ((shmem_page_offset + page_length) > PAGE_SIZE)
800 page_length = PAGE_SIZE - shmem_page_offset;
eb01459f 801
8461d226
DV
802 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
803 (page_to_phys(page) & (1 << 17)) != 0;
804
d174bd64
DV
805 ret = shmem_pread_fast(page, shmem_page_offset, page_length,
806 user_data, page_do_bit17_swizzling,
807 needs_clflush);
808 if (ret == 0)
809 goto next_page;
dbf7bff0 810
dbf7bff0
DV
811 mutex_unlock(&dev->struct_mutex);
812
d330a953 813 if (likely(!i915.prefault_disable) && !prefaulted) {
f56f821f 814 ret = fault_in_multipages_writeable(user_data, remain);
96d79b52
DV
815 /* Userspace is tricking us, but we've already clobbered
816 * its pages with the prefault and promised to write the
817 * data up to the first fault. Hence ignore any errors
818 * and just continue. */
819 (void)ret;
820 prefaulted = 1;
821 }
eb01459f 822
d174bd64
DV
823 ret = shmem_pread_slow(page, shmem_page_offset, page_length,
824 user_data, page_do_bit17_swizzling,
825 needs_clflush);
eb01459f 826
dbf7bff0 827 mutex_lock(&dev->struct_mutex);
f60d7f0c 828
f60d7f0c 829 if (ret)
8461d226 830 goto out;
8461d226 831
17793c9a 832next_page:
eb01459f 833 remain -= page_length;
8461d226 834 user_data += page_length;
eb01459f
EA
835 offset += page_length;
836 }
837
4f27b75d 838out:
f60d7f0c
CW
839 i915_gem_object_unpin_pages(obj);
840
eb01459f
EA
841 return ret;
842}
843
673a394b
EA
844/**
845 * Reads data from the object referenced by handle.
14bb2c11
TU
846 * @dev: drm device pointer
847 * @data: ioctl data blob
848 * @file: drm file pointer
673a394b
EA
849 *
850 * On error, the contents of *data are undefined.
851 */
852int
853i915_gem_pread_ioctl(struct drm_device *dev, void *data,
05394f39 854 struct drm_file *file)
673a394b
EA
855{
856 struct drm_i915_gem_pread *args = data;
05394f39 857 struct drm_i915_gem_object *obj;
35b62a89 858 int ret = 0;
673a394b 859
51311d0a
CW
860 if (args->size == 0)
861 return 0;
862
863 if (!access_ok(VERIFY_WRITE,
3ed605bc 864 u64_to_user_ptr(args->data_ptr),
51311d0a
CW
865 args->size))
866 return -EFAULT;
867
4f27b75d 868 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 869 if (ret)
4f27b75d 870 return ret;
673a394b 871
03ac0642
CW
872 obj = i915_gem_object_lookup(file, args->handle);
873 if (!obj) {
1d7cfea1
CW
874 ret = -ENOENT;
875 goto unlock;
4f27b75d 876 }
673a394b 877
7dcd2499 878 /* Bounds check source. */
05394f39
CW
879 if (args->offset > obj->base.size ||
880 args->size > obj->base.size - args->offset) {
ce9d419d 881 ret = -EINVAL;
35b62a89 882 goto out;
ce9d419d
CW
883 }
884
db53a302
CW
885 trace_i915_gem_object_pread(obj, args->offset, args->size);
886
dbf7bff0 887 ret = i915_gem_shmem_pread(dev, obj, args, file);
673a394b 888
b50a5371
AS
889 /* pread for non shmem backed objects */
890 if (ret == -EFAULT || ret == -ENODEV)
891 ret = i915_gem_gtt_pread(dev, obj, args->size,
892 args->offset, args->data_ptr);
893
35b62a89 894out:
f8c417cd 895 i915_gem_object_put(obj);
1d7cfea1 896unlock:
4f27b75d 897 mutex_unlock(&dev->struct_mutex);
eb01459f 898 return ret;
673a394b
EA
899}
900
0839ccb8
KP
901/* This is the fast write path which cannot handle
902 * page faults in the source data
9b7530cc 903 */
0839ccb8
KP
904
905static inline int
906fast_user_write(struct io_mapping *mapping,
907 loff_t page_base, int page_offset,
908 char __user *user_data,
909 int length)
9b7530cc 910{
4f0c7cfb
BW
911 void __iomem *vaddr_atomic;
912 void *vaddr;
0839ccb8 913 unsigned long unwritten;
9b7530cc 914
3e4d3af5 915 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
4f0c7cfb
BW
916 /* We can use the cpu mem copy function because this is X86. */
917 vaddr = (void __force*)vaddr_atomic + page_offset;
918 unwritten = __copy_from_user_inatomic_nocache(vaddr,
0839ccb8 919 user_data, length);
3e4d3af5 920 io_mapping_unmap_atomic(vaddr_atomic);
fbd5a26d 921 return unwritten;
0839ccb8
KP
922}
923
3de09aa3
EA
924/**
925 * This is the fast pwrite path, where we copy the data directly from the
926 * user into the GTT, uncached.
62f90b38 927 * @i915: i915 device private data
14bb2c11
TU
928 * @obj: i915 gem object
929 * @args: pwrite arguments structure
930 * @file: drm file pointer
3de09aa3 931 */
673a394b 932static int
4f1959ee 933i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
05394f39 934 struct drm_i915_gem_object *obj,
3de09aa3 935 struct drm_i915_gem_pwrite *args,
05394f39 936 struct drm_file *file)
673a394b 937{
4f1959ee 938 struct i915_ggtt *ggtt = &i915->ggtt;
b50a5371 939 struct drm_device *dev = obj->base.dev;
4f1959ee
AS
940 struct drm_mm_node node;
941 uint64_t remain, offset;
673a394b 942 char __user *user_data;
4f1959ee 943 int ret;
b50a5371
AS
944 bool hit_slow_path = false;
945
946 if (obj->tiling_mode != I915_TILING_NONE)
947 return -EFAULT;
935aaa69 948
1ec9e26d 949 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
4f1959ee
AS
950 if (ret) {
951 ret = insert_mappable_node(i915, &node, PAGE_SIZE);
952 if (ret)
953 goto out;
954
955 ret = i915_gem_object_get_pages(obj);
956 if (ret) {
957 remove_mappable_node(&node);
958 goto out;
959 }
960
961 i915_gem_object_pin_pages(obj);
962 } else {
963 node.start = i915_gem_obj_ggtt_offset(obj);
964 node.allocated = false;
b50a5371
AS
965 ret = i915_gem_object_put_fence(obj);
966 if (ret)
967 goto out_unpin;
4f1959ee 968 }
935aaa69
DV
969
970 ret = i915_gem_object_set_to_gtt_domain(obj, true);
971 if (ret)
972 goto out_unpin;
973
77a0d1ca 974 intel_fb_obj_invalidate(obj, ORIGIN_GTT);
4f1959ee 975 obj->dirty = true;
063e4e6b 976
4f1959ee
AS
977 user_data = u64_to_user_ptr(args->data_ptr);
978 offset = args->offset;
979 remain = args->size;
980 while (remain) {
673a394b
EA
981 /* Operation in this page
982 *
0839ccb8
KP
983 * page_base = page offset within aperture
984 * page_offset = offset within page
985 * page_length = bytes to copy for this page
673a394b 986 */
4f1959ee
AS
987 u32 page_base = node.start;
988 unsigned page_offset = offset_in_page(offset);
989 unsigned page_length = PAGE_SIZE - page_offset;
990 page_length = remain < page_length ? remain : page_length;
991 if (node.allocated) {
992 wmb(); /* flush the write before we modify the GGTT */
993 ggtt->base.insert_page(&ggtt->base,
994 i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
995 node.start, I915_CACHE_NONE, 0);
996 wmb(); /* flush modifications to the GGTT (insert_page) */
997 } else {
998 page_base += offset & PAGE_MASK;
999 }
0839ccb8 1000 /* If we get a fault while copying data, then (presumably) our
3de09aa3
EA
1001 * source page isn't available. Return the error and we'll
1002 * retry in the slow path.
b50a5371
AS
1003 * If the object is non-shmem backed, we retry again with the
1004 * path that handles page fault.
0839ccb8 1005 */
72e96d64 1006 if (fast_user_write(ggtt->mappable, page_base,
935aaa69 1007 page_offset, user_data, page_length)) {
b50a5371
AS
1008 hit_slow_path = true;
1009 mutex_unlock(&dev->struct_mutex);
1010 if (slow_user_access(ggtt->mappable,
1011 page_base,
1012 page_offset, user_data,
1013 page_length, true)) {
1014 ret = -EFAULT;
1015 mutex_lock(&dev->struct_mutex);
1016 goto out_flush;
1017 }
1018
1019 mutex_lock(&dev->struct_mutex);
935aaa69 1020 }
673a394b 1021
0839ccb8
KP
1022 remain -= page_length;
1023 user_data += page_length;
1024 offset += page_length;
673a394b 1025 }
673a394b 1026
063e4e6b 1027out_flush:
b50a5371
AS
1028 if (hit_slow_path) {
1029 if (ret == 0 &&
1030 (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) {
1031 /* The user has modified the object whilst we tried
1032 * reading from it, and we now have no idea what domain
1033 * the pages should be in. As we have just been touching
1034 * them directly, flush everything back to the GTT
1035 * domain.
1036 */
1037 ret = i915_gem_object_set_to_gtt_domain(obj, false);
1038 }
1039 }
1040
de152b62 1041 intel_fb_obj_flush(obj, false, ORIGIN_GTT);
935aaa69 1042out_unpin:
4f1959ee
AS
1043 if (node.allocated) {
1044 wmb();
1045 ggtt->base.clear_range(&ggtt->base,
1046 node.start, node.size,
1047 true);
1048 i915_gem_object_unpin_pages(obj);
1049 remove_mappable_node(&node);
1050 } else {
1051 i915_gem_object_ggtt_unpin(obj);
1052 }
935aaa69 1053out:
3de09aa3 1054 return ret;
673a394b
EA
1055}
1056
d174bd64
DV
1057/* Per-page copy function for the shmem pwrite fastpath.
1058 * Flushes invalid cachelines before writing to the target if
1059 * needs_clflush_before is set and flushes out any written cachelines after
1060 * writing if needs_clflush is set. */
3043c60c 1061static int
d174bd64
DV
1062shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
1063 char __user *user_data,
1064 bool page_do_bit17_swizzling,
1065 bool needs_clflush_before,
1066 bool needs_clflush_after)
673a394b 1067{
d174bd64 1068 char *vaddr;
673a394b 1069 int ret;
3de09aa3 1070
e7e58eb5 1071 if (unlikely(page_do_bit17_swizzling))
d174bd64 1072 return -EINVAL;
3de09aa3 1073
d174bd64
DV
1074 vaddr = kmap_atomic(page);
1075 if (needs_clflush_before)
1076 drm_clflush_virt_range(vaddr + shmem_page_offset,
1077 page_length);
c2831a94
CW
1078 ret = __copy_from_user_inatomic(vaddr + shmem_page_offset,
1079 user_data, page_length);
d174bd64
DV
1080 if (needs_clflush_after)
1081 drm_clflush_virt_range(vaddr + shmem_page_offset,
1082 page_length);
1083 kunmap_atomic(vaddr);
3de09aa3 1084
755d2218 1085 return ret ? -EFAULT : 0;
3de09aa3
EA
1086}
1087
d174bd64
DV
1088/* Only difference to the fast-path function is that this can handle bit17
1089 * and uses non-atomic copy and kmap functions. */
3043c60c 1090static int
d174bd64
DV
1091shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
1092 char __user *user_data,
1093 bool page_do_bit17_swizzling,
1094 bool needs_clflush_before,
1095 bool needs_clflush_after)
673a394b 1096{
d174bd64
DV
1097 char *vaddr;
1098 int ret;
e5281ccd 1099
d174bd64 1100 vaddr = kmap(page);
e7e58eb5 1101 if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
23c18c71
DV
1102 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
1103 page_length,
1104 page_do_bit17_swizzling);
d174bd64
DV
1105 if (page_do_bit17_swizzling)
1106 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
e5281ccd
CW
1107 user_data,
1108 page_length);
d174bd64
DV
1109 else
1110 ret = __copy_from_user(vaddr + shmem_page_offset,
1111 user_data,
1112 page_length);
1113 if (needs_clflush_after)
23c18c71
DV
1114 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
1115 page_length,
1116 page_do_bit17_swizzling);
d174bd64 1117 kunmap(page);
40123c1f 1118
755d2218 1119 return ret ? -EFAULT : 0;
40123c1f
EA
1120}
1121
40123c1f 1122static int
e244a443
DV
1123i915_gem_shmem_pwrite(struct drm_device *dev,
1124 struct drm_i915_gem_object *obj,
1125 struct drm_i915_gem_pwrite *args,
1126 struct drm_file *file)
40123c1f 1127{
40123c1f 1128 ssize_t remain;
8c59967c
DV
1129 loff_t offset;
1130 char __user *user_data;
eb2c0c81 1131 int shmem_page_offset, page_length, ret = 0;
8c59967c 1132 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
e244a443 1133 int hit_slowpath = 0;
58642885
DV
1134 int needs_clflush_after = 0;
1135 int needs_clflush_before = 0;
67d5a50c 1136 struct sg_page_iter sg_iter;
40123c1f 1137
3ed605bc 1138 user_data = u64_to_user_ptr(args->data_ptr);
40123c1f
EA
1139 remain = args->size;
1140
8c59967c 1141 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
40123c1f 1142
c13d87ea
CW
1143 ret = i915_gem_object_wait_rendering(obj, false);
1144 if (ret)
1145 return ret;
1146
58642885
DV
1147 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1148 /* If we're not in the cpu write domain, set ourself into the gtt
1149 * write domain and manually flush cachelines (if required). This
1150 * optimizes for the case when the gpu will use the data
1151 * right away and we therefore have to clflush anyway. */
2c22569b 1152 needs_clflush_after = cpu_write_needs_clflush(obj);
58642885 1153 }
c76ce038
CW
1154 /* Same trick applies to invalidate partially written cachelines read
1155 * before writing. */
1156 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
1157 needs_clflush_before =
1158 !cpu_cache_is_coherent(dev, obj->cache_level);
58642885 1159
755d2218
CW
1160 ret = i915_gem_object_get_pages(obj);
1161 if (ret)
1162 return ret;
1163
77a0d1ca 1164 intel_fb_obj_invalidate(obj, ORIGIN_CPU);
063e4e6b 1165
755d2218
CW
1166 i915_gem_object_pin_pages(obj);
1167
673a394b 1168 offset = args->offset;
05394f39 1169 obj->dirty = 1;
673a394b 1170
67d5a50c
ID
1171 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
1172 offset >> PAGE_SHIFT) {
2db76d7c 1173 struct page *page = sg_page_iter_page(&sg_iter);
58642885 1174 int partial_cacheline_write;
e5281ccd 1175
9da3da66
CW
1176 if (remain <= 0)
1177 break;
1178
40123c1f
EA
1179 /* Operation in this page
1180 *
40123c1f 1181 * shmem_page_offset = offset within page in shmem file
40123c1f
EA
1182 * page_length = bytes to copy for this page
1183 */
c8cbbb8b 1184 shmem_page_offset = offset_in_page(offset);
40123c1f
EA
1185
1186 page_length = remain;
1187 if ((shmem_page_offset + page_length) > PAGE_SIZE)
1188 page_length = PAGE_SIZE - shmem_page_offset;
40123c1f 1189
58642885
DV
1190 /* If we don't overwrite a cacheline completely we need to be
1191 * careful to have up-to-date data by first clflushing. Don't
1192 * overcomplicate things and flush the entire patch. */
1193 partial_cacheline_write = needs_clflush_before &&
1194 ((shmem_page_offset | page_length)
1195 & (boot_cpu_data.x86_clflush_size - 1));
1196
8c59967c
DV
1197 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
1198 (page_to_phys(page) & (1 << 17)) != 0;
1199
d174bd64
DV
1200 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
1201 user_data, page_do_bit17_swizzling,
1202 partial_cacheline_write,
1203 needs_clflush_after);
1204 if (ret == 0)
1205 goto next_page;
e244a443
DV
1206
1207 hit_slowpath = 1;
e244a443 1208 mutex_unlock(&dev->struct_mutex);
d174bd64
DV
1209 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
1210 user_data, page_do_bit17_swizzling,
1211 partial_cacheline_write,
1212 needs_clflush_after);
40123c1f 1213
e244a443 1214 mutex_lock(&dev->struct_mutex);
755d2218 1215
755d2218 1216 if (ret)
8c59967c 1217 goto out;
8c59967c 1218
17793c9a 1219next_page:
40123c1f 1220 remain -= page_length;
8c59967c 1221 user_data += page_length;
40123c1f 1222 offset += page_length;
673a394b
EA
1223 }
1224
fbd5a26d 1225out:
755d2218
CW
1226 i915_gem_object_unpin_pages(obj);
1227
e244a443 1228 if (hit_slowpath) {
8dcf015e
DV
1229 /*
1230 * Fixup: Flush cpu caches in case we didn't flush the dirty
1231 * cachelines in-line while writing and the object moved
1232 * out of the cpu write domain while we've dropped the lock.
1233 */
1234 if (!needs_clflush_after &&
1235 obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
000433b6 1236 if (i915_gem_clflush_object(obj, obj->pin_display))
ed75a55b 1237 needs_clflush_after = true;
e244a443 1238 }
8c59967c 1239 }
673a394b 1240
58642885 1241 if (needs_clflush_after)
c033666a 1242 i915_gem_chipset_flush(to_i915(dev));
ed75a55b
VS
1243 else
1244 obj->cache_dirty = true;
58642885 1245
de152b62 1246 intel_fb_obj_flush(obj, false, ORIGIN_CPU);
40123c1f 1247 return ret;
673a394b
EA
1248}
1249
1250/**
1251 * Writes data to the object referenced by handle.
14bb2c11
TU
1252 * @dev: drm device
1253 * @data: ioctl data blob
1254 * @file: drm file
673a394b
EA
1255 *
1256 * On error, the contents of the buffer that were to be modified are undefined.
1257 */
1258int
1259i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
fbd5a26d 1260 struct drm_file *file)
673a394b 1261{
fac5e23e 1262 struct drm_i915_private *dev_priv = to_i915(dev);
673a394b 1263 struct drm_i915_gem_pwrite *args = data;
05394f39 1264 struct drm_i915_gem_object *obj;
51311d0a
CW
1265 int ret;
1266
1267 if (args->size == 0)
1268 return 0;
1269
1270 if (!access_ok(VERIFY_READ,
3ed605bc 1271 u64_to_user_ptr(args->data_ptr),
51311d0a
CW
1272 args->size))
1273 return -EFAULT;
1274
d330a953 1275 if (likely(!i915.prefault_disable)) {
3ed605bc 1276 ret = fault_in_multipages_readable(u64_to_user_ptr(args->data_ptr),
0b74b508
XZ
1277 args->size);
1278 if (ret)
1279 return -EFAULT;
1280 }
673a394b 1281
5d77d9c5
ID
1282 intel_runtime_pm_get(dev_priv);
1283
fbd5a26d 1284 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 1285 if (ret)
5d77d9c5 1286 goto put_rpm;
1d7cfea1 1287
03ac0642
CW
1288 obj = i915_gem_object_lookup(file, args->handle);
1289 if (!obj) {
1d7cfea1
CW
1290 ret = -ENOENT;
1291 goto unlock;
fbd5a26d 1292 }
673a394b 1293
7dcd2499 1294 /* Bounds check destination. */
05394f39
CW
1295 if (args->offset > obj->base.size ||
1296 args->size > obj->base.size - args->offset) {
ce9d419d 1297 ret = -EINVAL;
35b62a89 1298 goto out;
ce9d419d
CW
1299 }
1300
db53a302
CW
1301 trace_i915_gem_object_pwrite(obj, args->offset, args->size);
1302
935aaa69 1303 ret = -EFAULT;
673a394b
EA
1304 /* We can only do the GTT pwrite on untiled buffers, as otherwise
1305 * it would end up going through the fenced access, and we'll get
1306 * different detiling behavior between reading and writing.
1307 * pread/pwrite currently are reading and writing from the CPU
1308 * perspective, requiring manual detiling by the client.
1309 */
6eae0059
CW
1310 if (!i915_gem_object_has_struct_page(obj) ||
1311 cpu_write_needs_clflush(obj)) {
4f1959ee 1312 ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);
935aaa69
DV
1313 /* Note that the gtt paths might fail with non-page-backed user
1314 * pointers (e.g. gtt mappings when moving data between
1315 * textures). Fallback to the shmem path in that case. */
fbd5a26d 1316 }
673a394b 1317
d1054ee4 1318 if (ret == -EFAULT || ret == -ENOSPC) {
6a2c4232
CW
1319 if (obj->phys_handle)
1320 ret = i915_gem_phys_pwrite(obj, args, file);
6eae0059 1321 else if (i915_gem_object_has_struct_page(obj))
6a2c4232 1322 ret = i915_gem_shmem_pwrite(dev, obj, args, file);
b50a5371
AS
1323 else
1324 ret = -ENODEV;
6a2c4232 1325 }
5c0480f2 1326
35b62a89 1327out:
f8c417cd 1328 i915_gem_object_put(obj);
1d7cfea1 1329unlock:
fbd5a26d 1330 mutex_unlock(&dev->struct_mutex);
5d77d9c5
ID
1331put_rpm:
1332 intel_runtime_pm_put(dev_priv);
1333
673a394b
EA
1334 return ret;
1335}
1336
b361237b
CW
1337/**
1338 * Ensures that all rendering to the object has completed and the object is
1339 * safe to unbind from the GTT or access from the CPU.
14bb2c11
TU
1340 * @obj: i915 gem object
1341 * @readonly: waiting for read access or write
b361237b 1342 */
2e2f351d 1343int
b361237b
CW
1344i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
1345 bool readonly)
1346{
c13d87ea 1347 struct reservation_object *resv;
8cac6f6c
CW
1348 struct i915_gem_active *active;
1349 unsigned long active_mask;
1350 int idx, ret;
b361237b 1351
8cac6f6c
CW
1352 lockdep_assert_held(&obj->base.dev->struct_mutex);
1353
1354 if (!readonly) {
1355 active = obj->last_read;
1356 active_mask = obj->active;
b4716185 1357 } else {
8cac6f6c
CW
1358 active_mask = 1;
1359 active = &obj->last_write;
1360 }
b4716185 1361
8cac6f6c 1362 for_each_active(active_mask, idx) {
fa545cbf
CW
1363 ret = i915_gem_active_wait(&active[idx],
1364 &obj->base.dev->struct_mutex);
8cac6f6c
CW
1365 if (ret)
1366 return ret;
b4716185
CW
1367 }
1368
c13d87ea
CW
1369 resv = i915_gem_object_get_dmabuf_resv(obj);
1370 if (resv) {
1371 long err;
1372
1373 err = reservation_object_wait_timeout_rcu(resv, !readonly, true,
1374 MAX_SCHEDULE_TIMEOUT);
1375 if (err < 0)
1376 return err;
1377 }
1378
b4716185
CW
1379 return 0;
1380}
1381
3236f57a
CW
1382/* A nonblocking variant of the above wait. This is a highly dangerous routine
1383 * as the object state may change during this call.
1384 */
1385static __must_check int
1386i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
2e1b8730 1387 struct intel_rps_client *rps,
3236f57a
CW
1388 bool readonly)
1389{
1390 struct drm_device *dev = obj->base.dev;
fac5e23e 1391 struct drm_i915_private *dev_priv = to_i915(dev);
666796da 1392 struct drm_i915_gem_request *requests[I915_NUM_ENGINES];
8cac6f6c
CW
1393 struct i915_gem_active *active;
1394 unsigned long active_mask;
b4716185 1395 int ret, i, n = 0;
3236f57a
CW
1396
1397 BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1398 BUG_ON(!dev_priv->mm.interruptible);
1399
8cac6f6c
CW
1400 active_mask = obj->active;
1401 if (!active_mask)
3236f57a
CW
1402 return 0;
1403
8cac6f6c
CW
1404 if (!readonly) {
1405 active = obj->last_read;
b4716185 1406 } else {
8cac6f6c
CW
1407 active_mask = 1;
1408 active = &obj->last_write;
1409 }
b4716185 1410
8cac6f6c
CW
1411 for_each_active(active_mask, i) {
1412 struct drm_i915_gem_request *req;
b4716185 1413
8cac6f6c
CW
1414 req = i915_gem_active_get(&active[i],
1415 &obj->base.dev->struct_mutex);
1416 if (req)
27c01aae 1417 requests[n++] = req;
b4716185
CW
1418 }
1419
3236f57a 1420 mutex_unlock(&dev->struct_mutex);
299259a3 1421 ret = 0;
b4716185 1422 for (i = 0; ret == 0 && i < n; i++)
776f3236 1423 ret = i915_wait_request(requests[i], true, NULL, rps);
3236f57a
CW
1424 mutex_lock(&dev->struct_mutex);
1425
fa545cbf 1426 for (i = 0; i < n; i++)
e8a261ea 1427 i915_gem_request_put(requests[i]);
b4716185
CW
1428
1429 return ret;
3236f57a
CW
1430}
1431
2e1b8730
CW
1432static struct intel_rps_client *to_rps_client(struct drm_file *file)
1433{
1434 struct drm_i915_file_private *fpriv = file->driver_priv;
1435 return &fpriv->rps;
1436}
1437
aeecc969
CW
1438static enum fb_op_origin
1439write_origin(struct drm_i915_gem_object *obj, unsigned domain)
1440{
1441 return domain == I915_GEM_DOMAIN_GTT && !obj->has_wc_mmap ?
1442 ORIGIN_GTT : ORIGIN_CPU;
1443}
1444
673a394b 1445/**
2ef7eeaa
EA
1446 * Called when user space prepares to use an object with the CPU, either
1447 * through the mmap ioctl's mapping or a GTT mapping.
14bb2c11
TU
1448 * @dev: drm device
1449 * @data: ioctl data blob
1450 * @file: drm file
673a394b
EA
1451 */
1452int
1453i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
05394f39 1454 struct drm_file *file)
673a394b
EA
1455{
1456 struct drm_i915_gem_set_domain *args = data;
05394f39 1457 struct drm_i915_gem_object *obj;
2ef7eeaa
EA
1458 uint32_t read_domains = args->read_domains;
1459 uint32_t write_domain = args->write_domain;
673a394b
EA
1460 int ret;
1461
2ef7eeaa 1462 /* Only handle setting domains to types used by the CPU. */
21d509e3 1463 if (write_domain & I915_GEM_GPU_DOMAINS)
2ef7eeaa
EA
1464 return -EINVAL;
1465
21d509e3 1466 if (read_domains & I915_GEM_GPU_DOMAINS)
2ef7eeaa
EA
1467 return -EINVAL;
1468
1469 /* Having something in the write domain implies it's in the read
1470 * domain, and only that read domain. Enforce that in the request.
1471 */
1472 if (write_domain != 0 && read_domains != write_domain)
1473 return -EINVAL;
1474
76c1dec1 1475 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 1476 if (ret)
76c1dec1 1477 return ret;
1d7cfea1 1478
03ac0642
CW
1479 obj = i915_gem_object_lookup(file, args->handle);
1480 if (!obj) {
1d7cfea1
CW
1481 ret = -ENOENT;
1482 goto unlock;
76c1dec1 1483 }
673a394b 1484
3236f57a
CW
1485 /* Try to flush the object off the GPU without holding the lock.
1486 * We will repeat the flush holding the lock in the normal manner
1487 * to catch cases where we are gazumped.
1488 */
6e4930f6 1489 ret = i915_gem_object_wait_rendering__nonblocking(obj,
2e1b8730 1490 to_rps_client(file),
6e4930f6 1491 !write_domain);
3236f57a
CW
1492 if (ret)
1493 goto unref;
1494
43566ded 1495 if (read_domains & I915_GEM_DOMAIN_GTT)
2ef7eeaa 1496 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
43566ded 1497 else
e47c68e9 1498 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
2ef7eeaa 1499
031b698a 1500 if (write_domain != 0)
aeecc969 1501 intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
031b698a 1502
3236f57a 1503unref:
f8c417cd 1504 i915_gem_object_put(obj);
1d7cfea1 1505unlock:
673a394b
EA
1506 mutex_unlock(&dev->struct_mutex);
1507 return ret;
1508}
1509
1510/**
1511 * Called when user space has done writes to this buffer
14bb2c11
TU
1512 * @dev: drm device
1513 * @data: ioctl data blob
1514 * @file: drm file
673a394b
EA
1515 */
1516int
1517i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
05394f39 1518 struct drm_file *file)
673a394b
EA
1519{
1520 struct drm_i915_gem_sw_finish *args = data;
05394f39 1521 struct drm_i915_gem_object *obj;
673a394b
EA
1522 int ret = 0;
1523
76c1dec1 1524 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 1525 if (ret)
76c1dec1 1526 return ret;
1d7cfea1 1527
03ac0642
CW
1528 obj = i915_gem_object_lookup(file, args->handle);
1529 if (!obj) {
1d7cfea1
CW
1530 ret = -ENOENT;
1531 goto unlock;
673a394b
EA
1532 }
1533
673a394b 1534 /* Pinned buffers may be scanout, so flush the cache */
2c22569b 1535 if (obj->pin_display)
e62b59e4 1536 i915_gem_object_flush_cpu_write_domain(obj);
e47c68e9 1537
f8c417cd 1538 i915_gem_object_put(obj);
1d7cfea1 1539unlock:
673a394b
EA
1540 mutex_unlock(&dev->struct_mutex);
1541 return ret;
1542}
1543
1544/**
14bb2c11
TU
1545 * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
1546 * it is mapped to.
1547 * @dev: drm device
1548 * @data: ioctl data blob
1549 * @file: drm file
673a394b
EA
1550 *
1551 * While the mapping holds a reference on the contents of the object, it doesn't
1552 * imply a ref on the object itself.
34367381
DV
1553 *
1554 * IMPORTANT:
1555 *
1556 * DRM driver writers who look a this function as an example for how to do GEM
1557 * mmap support, please don't implement mmap support like here. The modern way
1558 * to implement DRM mmap support is with an mmap offset ioctl (like
1559 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1560 * That way debug tooling like valgrind will understand what's going on, hiding
1561 * the mmap call in a driver private ioctl will break that. The i915 driver only
1562 * does cpu mmaps this way because we didn't know better.
673a394b
EA
1563 */
1564int
1565i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
05394f39 1566 struct drm_file *file)
673a394b
EA
1567{
1568 struct drm_i915_gem_mmap *args = data;
03ac0642 1569 struct drm_i915_gem_object *obj;
673a394b
EA
1570 unsigned long addr;
1571
1816f923
AG
1572 if (args->flags & ~(I915_MMAP_WC))
1573 return -EINVAL;
1574
568a58e5 1575 if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
1816f923
AG
1576 return -ENODEV;
1577
03ac0642
CW
1578 obj = i915_gem_object_lookup(file, args->handle);
1579 if (!obj)
bf79cb91 1580 return -ENOENT;
673a394b 1581
1286ff73
DV
1582 /* prime objects have no backing filp to GEM mmap
1583 * pages from.
1584 */
03ac0642 1585 if (!obj->base.filp) {
34911fd3 1586 i915_gem_object_put_unlocked(obj);
1286ff73
DV
1587 return -EINVAL;
1588 }
1589
03ac0642 1590 addr = vm_mmap(obj->base.filp, 0, args->size,
673a394b
EA
1591 PROT_READ | PROT_WRITE, MAP_SHARED,
1592 args->offset);
1816f923
AG
1593 if (args->flags & I915_MMAP_WC) {
1594 struct mm_struct *mm = current->mm;
1595 struct vm_area_struct *vma;
1596
80a89a5e 1597 if (down_write_killable(&mm->mmap_sem)) {
34911fd3 1598 i915_gem_object_put_unlocked(obj);
80a89a5e
MH
1599 return -EINTR;
1600 }
1816f923
AG
1601 vma = find_vma(mm, addr);
1602 if (vma)
1603 vma->vm_page_prot =
1604 pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
1605 else
1606 addr = -ENOMEM;
1607 up_write(&mm->mmap_sem);
aeecc969
CW
1608
1609 /* This may race, but that's ok, it only gets set */
03ac0642 1610 WRITE_ONCE(obj->has_wc_mmap, true);
1816f923 1611 }
34911fd3 1612 i915_gem_object_put_unlocked(obj);
673a394b
EA
1613 if (IS_ERR((void *)addr))
1614 return addr;
1615
1616 args->addr_ptr = (uint64_t) addr;
1617
1618 return 0;
1619}
1620
de151cf6
JB
1621/**
1622 * i915_gem_fault - fault a page into the GTT
d9072a3e
GT
1623 * @vma: VMA in question
1624 * @vmf: fault info
de151cf6
JB
1625 *
1626 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1627 * from userspace. The fault handler takes care of binding the object to
1628 * the GTT (if needed), allocating and programming a fence register (again,
1629 * only if needed based on whether the old reg is still valid or the object
1630 * is tiled) and inserting a new PTE into the faulting process.
1631 *
1632 * Note that the faulting process may involve evicting existing objects
1633 * from the GTT and/or fence registers to make room. So performance may
1634 * suffer if the GTT working set is large or there are few fence registers
1635 * left.
1636 */
1637int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1638{
05394f39
CW
1639 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
1640 struct drm_device *dev = obj->base.dev;
72e96d64
JL
1641 struct drm_i915_private *dev_priv = to_i915(dev);
1642 struct i915_ggtt *ggtt = &dev_priv->ggtt;
c5ad54cf 1643 struct i915_ggtt_view view = i915_ggtt_view_normal;
de151cf6
JB
1644 pgoff_t page_offset;
1645 unsigned long pfn;
1646 int ret = 0;
0f973f27 1647 bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
de151cf6 1648
f65c9168
PZ
1649 intel_runtime_pm_get(dev_priv);
1650
de151cf6
JB
1651 /* We don't use vmf->pgoff since that has the fake offset */
1652 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
1653 PAGE_SHIFT;
1654
d9bc7e9f
CW
1655 ret = i915_mutex_lock_interruptible(dev);
1656 if (ret)
1657 goto out;
a00b10c3 1658
db53a302
CW
1659 trace_i915_gem_object_fault(obj, page_offset, true, write);
1660
6e4930f6
CW
1661 /* Try to flush the object off the GPU first without holding the lock.
1662 * Upon reacquiring the lock, we will perform our sanity checks and then
1663 * repeat the flush holding the lock in the normal manner to catch cases
1664 * where we are gazumped.
1665 */
1666 ret = i915_gem_object_wait_rendering__nonblocking(obj, NULL, !write);
1667 if (ret)
1668 goto unlock;
1669
eb119bd6
CW
1670 /* Access to snoopable pages through the GTT is incoherent. */
1671 if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
ddeff6ee 1672 ret = -EFAULT;
eb119bd6
CW
1673 goto unlock;
1674 }
1675
c5ad54cf 1676 /* Use a partial view if the object is bigger than the aperture. */
72e96d64 1677 if (obj->base.size >= ggtt->mappable_end &&
e7ded2d7 1678 obj->tiling_mode == I915_TILING_NONE) {
c5ad54cf 1679 static const unsigned int chunk_size = 256; // 1 MiB
e7ded2d7 1680
c5ad54cf
JL
1681 memset(&view, 0, sizeof(view));
1682 view.type = I915_GGTT_VIEW_PARTIAL;
1683 view.params.partial.offset = rounddown(page_offset, chunk_size);
1684 view.params.partial.size =
1685 min_t(unsigned int,
1686 chunk_size,
1687 (vma->vm_end - vma->vm_start)/PAGE_SIZE -
1688 view.params.partial.offset);
1689 }
1690
1691 /* Now pin it into the GTT if needed */
1692 ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE);
c9839303
CW
1693 if (ret)
1694 goto unlock;
4a684a41 1695
c9839303
CW
1696 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1697 if (ret)
1698 goto unpin;
74898d7e 1699
06d98131 1700 ret = i915_gem_object_get_fence(obj);
d9e86c0e 1701 if (ret)
c9839303 1702 goto unpin;
7d1c4804 1703
b90b91d8 1704 /* Finally, remap it using the new GTT offset */
72e96d64 1705 pfn = ggtt->mappable_base +
c5ad54cf 1706 i915_gem_obj_ggtt_offset_view(obj, &view);
f343c5f6 1707 pfn >>= PAGE_SHIFT;
de151cf6 1708
c5ad54cf
JL
1709 if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) {
1710 /* Overriding existing pages in partial view does not cause
1711 * us any trouble as TLBs are still valid because the fault
1712 * is due to userspace losing part of the mapping or never
1713 * having accessed it before (at this partials' range).
1714 */
1715 unsigned long base = vma->vm_start +
1716 (view.params.partial.offset << PAGE_SHIFT);
1717 unsigned int i;
b90b91d8 1718
c5ad54cf
JL
1719 for (i = 0; i < view.params.partial.size; i++) {
1720 ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i);
b90b91d8
CW
1721 if (ret)
1722 break;
1723 }
1724
1725 obj->fault_mappable = true;
c5ad54cf
JL
1726 } else {
1727 if (!obj->fault_mappable) {
1728 unsigned long size = min_t(unsigned long,
1729 vma->vm_end - vma->vm_start,
1730 obj->base.size);
1731 int i;
1732
1733 for (i = 0; i < size >> PAGE_SHIFT; i++) {
1734 ret = vm_insert_pfn(vma,
1735 (unsigned long)vma->vm_start + i * PAGE_SIZE,
1736 pfn + i);
1737 if (ret)
1738 break;
1739 }
1740
1741 obj->fault_mappable = true;
1742 } else
1743 ret = vm_insert_pfn(vma,
1744 (unsigned long)vmf->virtual_address,
1745 pfn + page_offset);
1746 }
c9839303 1747unpin:
c5ad54cf 1748 i915_gem_object_ggtt_unpin_view(obj, &view);
c715089f 1749unlock:
de151cf6 1750 mutex_unlock(&dev->struct_mutex);
d9bc7e9f 1751out:
de151cf6 1752 switch (ret) {
d9bc7e9f 1753 case -EIO:
2232f031
DV
1754 /*
1755 * We eat errors when the gpu is terminally wedged to avoid
1756 * userspace unduly crashing (gl has no provisions for mmaps to
1757 * fail). But any other -EIO isn't ours (e.g. swap in failure)
1758 * and so needs to be reported.
1759 */
1760 if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
f65c9168
PZ
1761 ret = VM_FAULT_SIGBUS;
1762 break;
1763 }
045e769a 1764 case -EAGAIN:
571c608d
DV
1765 /*
1766 * EAGAIN means the gpu is hung and we'll wait for the error
1767 * handler to reset everything when re-faulting in
1768 * i915_mutex_lock_interruptible.
d9bc7e9f 1769 */
c715089f
CW
1770 case 0:
1771 case -ERESTARTSYS:
bed636ab 1772 case -EINTR:
e79e0fe3
DR
1773 case -EBUSY:
1774 /*
1775 * EBUSY is ok: this just means that another thread
1776 * already did the job.
1777 */
f65c9168
PZ
1778 ret = VM_FAULT_NOPAGE;
1779 break;
de151cf6 1780 case -ENOMEM:
f65c9168
PZ
1781 ret = VM_FAULT_OOM;
1782 break;
a7c2e1aa 1783 case -ENOSPC:
45d67817 1784 case -EFAULT:
f65c9168
PZ
1785 ret = VM_FAULT_SIGBUS;
1786 break;
de151cf6 1787 default:
a7c2e1aa 1788 WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
f65c9168
PZ
1789 ret = VM_FAULT_SIGBUS;
1790 break;
de151cf6 1791 }
f65c9168
PZ
1792
1793 intel_runtime_pm_put(dev_priv);
1794 return ret;
de151cf6
JB
1795}
1796
901782b2
CW
1797/**
1798 * i915_gem_release_mmap - remove physical page mappings
1799 * @obj: obj in question
1800 *
af901ca1 1801 * Preserve the reservation of the mmapping with the DRM core code, but
901782b2
CW
1802 * relinquish ownership of the pages back to the system.
1803 *
1804 * It is vital that we remove the page mapping if we have mapped a tiled
1805 * object through the GTT and then lose the fence register due to
1806 * resource pressure. Similarly if the object has been moved out of the
1807 * aperture, than pages mapped into userspace must be revoked. Removing the
1808 * mapping will then trigger a page fault on the next user access, allowing
1809 * fixup by i915_gem_fault().
1810 */
d05ca301 1811void
05394f39 1812i915_gem_release_mmap(struct drm_i915_gem_object *obj)
901782b2 1813{
349f2ccf
CW
1814 /* Serialisation between user GTT access and our code depends upon
1815 * revoking the CPU's PTE whilst the mutex is held. The next user
1816 * pagefault then has to wait until we release the mutex.
1817 */
1818 lockdep_assert_held(&obj->base.dev->struct_mutex);
1819
6299f992
CW
1820 if (!obj->fault_mappable)
1821 return;
901782b2 1822
6796cb16
DH
1823 drm_vma_node_unmap(&obj->base.vma_node,
1824 obj->base.dev->anon_inode->i_mapping);
349f2ccf
CW
1825
1826 /* Ensure that the CPU's PTE are revoked and there are not outstanding
1827 * memory transactions from userspace before we return. The TLB
1828 * flushing implied above by changing the PTE above *should* be
1829 * sufficient, an extra barrier here just provides us with a bit
1830 * of paranoid documentation about our requirement to serialise
1831 * memory writes before touching registers / GSM.
1832 */
1833 wmb();
1834
6299f992 1835 obj->fault_mappable = false;
901782b2
CW
1836}
1837
eedd10f4
CW
1838void
1839i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
1840{
1841 struct drm_i915_gem_object *obj;
1842
1843 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
1844 i915_gem_release_mmap(obj);
1845}
1846
0fa87796 1847uint32_t
e28f8711 1848i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
92b88aeb 1849{
e28f8711 1850 uint32_t gtt_size;
92b88aeb
CW
1851
1852 if (INTEL_INFO(dev)->gen >= 4 ||
e28f8711
CW
1853 tiling_mode == I915_TILING_NONE)
1854 return size;
92b88aeb
CW
1855
1856 /* Previous chips need a power-of-two fence region when tiling */
7e22dbbb 1857 if (IS_GEN3(dev))
e28f8711 1858 gtt_size = 1024*1024;
92b88aeb 1859 else
e28f8711 1860 gtt_size = 512*1024;
92b88aeb 1861
e28f8711
CW
1862 while (gtt_size < size)
1863 gtt_size <<= 1;
92b88aeb 1864
e28f8711 1865 return gtt_size;
92b88aeb
CW
1866}
1867
de151cf6
JB
1868/**
1869 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
14bb2c11
TU
1870 * @dev: drm device
1871 * @size: object size
1872 * @tiling_mode: tiling mode
1873 * @fenced: is fenced alignemned required or not
de151cf6
JB
1874 *
1875 * Return the required GTT alignment for an object, taking into account
5e783301 1876 * potential fence register mapping.
de151cf6 1877 */
d865110c
ID
1878uint32_t
1879i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
1880 int tiling_mode, bool fenced)
de151cf6 1881{
de151cf6
JB
1882 /*
1883 * Minimum alignment is 4k (GTT page size), but might be greater
1884 * if a fence register is needed for the object.
1885 */
d865110c 1886 if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) ||
e28f8711 1887 tiling_mode == I915_TILING_NONE)
de151cf6
JB
1888 return 4096;
1889
a00b10c3
CW
1890 /*
1891 * Previous chips need to be aligned to the size of the smallest
1892 * fence register that can contain the object.
1893 */
e28f8711 1894 return i915_gem_get_gtt_size(dev, size, tiling_mode);
a00b10c3
CW
1895}
1896
d8cb5086
CW
1897static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
1898{
fac5e23e 1899 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
d8cb5086
CW
1900 int ret;
1901
da494d7c
DV
1902 dev_priv->mm.shrinker_no_lock_stealing = true;
1903
d8cb5086
CW
1904 ret = drm_gem_create_mmap_offset(&obj->base);
1905 if (ret != -ENOSPC)
da494d7c 1906 goto out;
d8cb5086
CW
1907
1908 /* Badly fragmented mmap space? The only way we can recover
1909 * space is by destroying unwanted objects. We can't randomly release
1910 * mmap_offsets as userspace expects them to be persistent for the
1911 * lifetime of the objects. The closest we can is to release the
1912 * offsets on purgeable objects by truncating it and marking it purged,
1913 * which prevents userspace from ever using that object again.
1914 */
21ab4e74
CW
1915 i915_gem_shrink(dev_priv,
1916 obj->base.size >> PAGE_SHIFT,
1917 I915_SHRINK_BOUND |
1918 I915_SHRINK_UNBOUND |
1919 I915_SHRINK_PURGEABLE);
d8cb5086
CW
1920 ret = drm_gem_create_mmap_offset(&obj->base);
1921 if (ret != -ENOSPC)
da494d7c 1922 goto out;
d8cb5086
CW
1923
1924 i915_gem_shrink_all(dev_priv);
da494d7c
DV
1925 ret = drm_gem_create_mmap_offset(&obj->base);
1926out:
1927 dev_priv->mm.shrinker_no_lock_stealing = false;
1928
1929 return ret;
d8cb5086
CW
1930}
1931
1932static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
1933{
d8cb5086
CW
1934 drm_gem_free_mmap_offset(&obj->base);
1935}
1936
da6b51d0 1937int
ff72145b
DA
1938i915_gem_mmap_gtt(struct drm_file *file,
1939 struct drm_device *dev,
da6b51d0 1940 uint32_t handle,
ff72145b 1941 uint64_t *offset)
de151cf6 1942{
05394f39 1943 struct drm_i915_gem_object *obj;
de151cf6
JB
1944 int ret;
1945
76c1dec1 1946 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 1947 if (ret)
76c1dec1 1948 return ret;
de151cf6 1949
03ac0642
CW
1950 obj = i915_gem_object_lookup(file, handle);
1951 if (!obj) {
1d7cfea1
CW
1952 ret = -ENOENT;
1953 goto unlock;
1954 }
de151cf6 1955
05394f39 1956 if (obj->madv != I915_MADV_WILLNEED) {
bd9b6a4e 1957 DRM_DEBUG("Attempting to mmap a purgeable buffer\n");
8c99e57d 1958 ret = -EFAULT;
1d7cfea1 1959 goto out;
ab18282d
CW
1960 }
1961
d8cb5086
CW
1962 ret = i915_gem_object_create_mmap_offset(obj);
1963 if (ret)
1964 goto out;
de151cf6 1965
0de23977 1966 *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
de151cf6 1967
1d7cfea1 1968out:
f8c417cd 1969 i915_gem_object_put(obj);
1d7cfea1 1970unlock:
de151cf6 1971 mutex_unlock(&dev->struct_mutex);
1d7cfea1 1972 return ret;
de151cf6
JB
1973}
1974
ff72145b
DA
1975/**
1976 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1977 * @dev: DRM device
1978 * @data: GTT mapping ioctl data
1979 * @file: GEM object info
1980 *
1981 * Simply returns the fake offset to userspace so it can mmap it.
1982 * The mmap call will end up in drm_gem_mmap(), which will set things
1983 * up so we can get faults in the handler above.
1984 *
1985 * The fault handler will take care of binding the object into the GTT
1986 * (since it may have been evicted to make room for something), allocating
1987 * a fence register, and mapping the appropriate aperture address into
1988 * userspace.
1989 */
1990int
1991i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1992 struct drm_file *file)
1993{
1994 struct drm_i915_gem_mmap_gtt *args = data;
1995
da6b51d0 1996 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
ff72145b
DA
1997}
1998
225067ee
DV
1999/* Immediately discard the backing storage */
2000static void
2001i915_gem_object_truncate(struct drm_i915_gem_object *obj)
e5281ccd 2002{
4d6294bf 2003 i915_gem_object_free_mmap_offset(obj);
1286ff73 2004
4d6294bf
CW
2005 if (obj->base.filp == NULL)
2006 return;
e5281ccd 2007
225067ee
DV
2008 /* Our goal here is to return as much of the memory as
2009 * is possible back to the system as we are called from OOM.
2010 * To do this we must instruct the shmfs to drop all of its
2011 * backing pages, *now*.
2012 */
5537252b 2013 shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
225067ee
DV
2014 obj->madv = __I915_MADV_PURGED;
2015}
e5281ccd 2016
5537252b
CW
2017/* Try to discard unwanted pages */
2018static void
2019i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
225067ee 2020{
5537252b
CW
2021 struct address_space *mapping;
2022
2023 switch (obj->madv) {
2024 case I915_MADV_DONTNEED:
2025 i915_gem_object_truncate(obj);
2026 case __I915_MADV_PURGED:
2027 return;
2028 }
2029
2030 if (obj->base.filp == NULL)
2031 return;
2032
2033 mapping = file_inode(obj->base.filp)->i_mapping,
2034 invalidate_mapping_pages(mapping, 0, (loff_t)-1);
e5281ccd
CW
2035}
2036
5cdf5881 2037static void
05394f39 2038i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
673a394b 2039{
85d1225e
DG
2040 struct sgt_iter sgt_iter;
2041 struct page *page;
90797e6d 2042 int ret;
1286ff73 2043
05394f39 2044 BUG_ON(obj->madv == __I915_MADV_PURGED);
673a394b 2045
6c085a72 2046 ret = i915_gem_object_set_to_cpu_domain(obj, true);
f4457ae7 2047 if (WARN_ON(ret)) {
6c085a72
CW
2048 /* In the event of a disaster, abandon all caches and
2049 * hope for the best.
2050 */
2c22569b 2051 i915_gem_clflush_object(obj, true);
6c085a72
CW
2052 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2053 }
2054
e2273302
ID
2055 i915_gem_gtt_finish_object(obj);
2056
6dacfd2f 2057 if (i915_gem_object_needs_bit17_swizzle(obj))
280b713b
EA
2058 i915_gem_object_save_bit_17_swizzle(obj);
2059
05394f39
CW
2060 if (obj->madv == I915_MADV_DONTNEED)
2061 obj->dirty = 0;
3ef94daa 2062
85d1225e 2063 for_each_sgt_page(page, sgt_iter, obj->pages) {
05394f39 2064 if (obj->dirty)
9da3da66 2065 set_page_dirty(page);
3ef94daa 2066
05394f39 2067 if (obj->madv == I915_MADV_WILLNEED)
9da3da66 2068 mark_page_accessed(page);
3ef94daa 2069
09cbfeaf 2070 put_page(page);
3ef94daa 2071 }
05394f39 2072 obj->dirty = 0;
673a394b 2073
9da3da66
CW
2074 sg_free_table(obj->pages);
2075 kfree(obj->pages);
37e680a1 2076}
6c085a72 2077
dd624afd 2078int
37e680a1
CW
2079i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
2080{
2081 const struct drm_i915_gem_object_ops *ops = obj->ops;
2082
2f745ad3 2083 if (obj->pages == NULL)
37e680a1
CW
2084 return 0;
2085
a5570178
CW
2086 if (obj->pages_pin_count)
2087 return -EBUSY;
2088
15717de2 2089 GEM_BUG_ON(obj->bind_count);
3e123027 2090
a2165e31
CW
2091 /* ->put_pages might need to allocate memory for the bit17 swizzle
2092 * array, hence protect them from being reaped by removing them from gtt
2093 * lists early. */
35c20a60 2094 list_del(&obj->global_list);
a2165e31 2095
0a798eb9 2096 if (obj->mapping) {
fb8621d3
CW
2097 if (is_vmalloc_addr(obj->mapping))
2098 vunmap(obj->mapping);
2099 else
2100 kunmap(kmap_to_page(obj->mapping));
0a798eb9
CW
2101 obj->mapping = NULL;
2102 }
2103
37e680a1 2104 ops->put_pages(obj);
05394f39 2105 obj->pages = NULL;
37e680a1 2106
5537252b 2107 i915_gem_object_invalidate(obj);
6c085a72
CW
2108
2109 return 0;
2110}
2111
37e680a1 2112static int
6c085a72 2113i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
e5281ccd 2114{
fac5e23e 2115 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
e5281ccd
CW
2116 int page_count, i;
2117 struct address_space *mapping;
9da3da66
CW
2118 struct sg_table *st;
2119 struct scatterlist *sg;
85d1225e 2120 struct sgt_iter sgt_iter;
e5281ccd 2121 struct page *page;
90797e6d 2122 unsigned long last_pfn = 0; /* suppress gcc warning */
e2273302 2123 int ret;
6c085a72 2124 gfp_t gfp;
e5281ccd 2125
6c085a72
CW
2126 /* Assert that the object is not currently in any GPU domain. As it
2127 * wasn't in the GTT, there shouldn't be any way it could have been in
2128 * a GPU cache
2129 */
2130 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2131 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2132
9da3da66
CW
2133 st = kmalloc(sizeof(*st), GFP_KERNEL);
2134 if (st == NULL)
2135 return -ENOMEM;
2136
05394f39 2137 page_count = obj->base.size / PAGE_SIZE;
9da3da66 2138 if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
9da3da66 2139 kfree(st);
e5281ccd 2140 return -ENOMEM;
9da3da66 2141 }
e5281ccd 2142
9da3da66
CW
2143 /* Get the list of pages out of our struct file. They'll be pinned
2144 * at this point until we release them.
2145 *
2146 * Fail silently without starting the shrinker
2147 */
496ad9aa 2148 mapping = file_inode(obj->base.filp)->i_mapping;
c62d2555 2149 gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM));
d0164adc 2150 gfp |= __GFP_NORETRY | __GFP_NOWARN;
90797e6d
ID
2151 sg = st->sgl;
2152 st->nents = 0;
2153 for (i = 0; i < page_count; i++) {
6c085a72
CW
2154 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2155 if (IS_ERR(page)) {
21ab4e74
CW
2156 i915_gem_shrink(dev_priv,
2157 page_count,
2158 I915_SHRINK_BOUND |
2159 I915_SHRINK_UNBOUND |
2160 I915_SHRINK_PURGEABLE);
6c085a72
CW
2161 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
2162 }
2163 if (IS_ERR(page)) {
2164 /* We've tried hard to allocate the memory by reaping
2165 * our own buffer, now let the real VM do its job and
2166 * go down in flames if truly OOM.
2167 */
6c085a72 2168 i915_gem_shrink_all(dev_priv);
f461d1be 2169 page = shmem_read_mapping_page(mapping, i);
e2273302
ID
2170 if (IS_ERR(page)) {
2171 ret = PTR_ERR(page);
6c085a72 2172 goto err_pages;
e2273302 2173 }
6c085a72 2174 }
426729dc
KRW
2175#ifdef CONFIG_SWIOTLB
2176 if (swiotlb_nr_tbl()) {
2177 st->nents++;
2178 sg_set_page(sg, page, PAGE_SIZE, 0);
2179 sg = sg_next(sg);
2180 continue;
2181 }
2182#endif
90797e6d
ID
2183 if (!i || page_to_pfn(page) != last_pfn + 1) {
2184 if (i)
2185 sg = sg_next(sg);
2186 st->nents++;
2187 sg_set_page(sg, page, PAGE_SIZE, 0);
2188 } else {
2189 sg->length += PAGE_SIZE;
2190 }
2191 last_pfn = page_to_pfn(page);
3bbbe706
DV
2192
2193 /* Check that the i965g/gm workaround works. */
2194 WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
e5281ccd 2195 }
426729dc
KRW
2196#ifdef CONFIG_SWIOTLB
2197 if (!swiotlb_nr_tbl())
2198#endif
2199 sg_mark_end(sg);
74ce6b6c
CW
2200 obj->pages = st;
2201
e2273302
ID
2202 ret = i915_gem_gtt_prepare_object(obj);
2203 if (ret)
2204 goto err_pages;
2205
6dacfd2f 2206 if (i915_gem_object_needs_bit17_swizzle(obj))
e5281ccd
CW
2207 i915_gem_object_do_bit_17_swizzle(obj);
2208
656bfa3a
DV
2209 if (obj->tiling_mode != I915_TILING_NONE &&
2210 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
2211 i915_gem_object_pin_pages(obj);
2212
e5281ccd
CW
2213 return 0;
2214
2215err_pages:
90797e6d 2216 sg_mark_end(sg);
85d1225e
DG
2217 for_each_sgt_page(page, sgt_iter, st)
2218 put_page(page);
9da3da66
CW
2219 sg_free_table(st);
2220 kfree(st);
0820baf3
CW
2221
2222 /* shmemfs first checks if there is enough memory to allocate the page
2223 * and reports ENOSPC should there be insufficient, along with the usual
2224 * ENOMEM for a genuine allocation failure.
2225 *
2226 * We use ENOSPC in our driver to mean that we have run out of aperture
2227 * space and so want to translate the error from shmemfs back to our
2228 * usual understanding of ENOMEM.
2229 */
e2273302
ID
2230 if (ret == -ENOSPC)
2231 ret = -ENOMEM;
2232
2233 return ret;
673a394b
EA
2234}
2235
37e680a1
CW
2236/* Ensure that the associated pages are gathered from the backing storage
2237 * and pinned into our object. i915_gem_object_get_pages() may be called
2238 * multiple times before they are released by a single call to
2239 * i915_gem_object_put_pages() - once the pages are no longer referenced
2240 * either as a result of memory pressure (reaping pages under the shrinker)
2241 * or as the object is itself released.
2242 */
2243int
2244i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2245{
fac5e23e 2246 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
37e680a1
CW
2247 const struct drm_i915_gem_object_ops *ops = obj->ops;
2248 int ret;
2249
2f745ad3 2250 if (obj->pages)
37e680a1
CW
2251 return 0;
2252
43e28f09 2253 if (obj->madv != I915_MADV_WILLNEED) {
bd9b6a4e 2254 DRM_DEBUG("Attempting to obtain a purgeable object\n");
8c99e57d 2255 return -EFAULT;
43e28f09
CW
2256 }
2257
a5570178
CW
2258 BUG_ON(obj->pages_pin_count);
2259
37e680a1
CW
2260 ret = ops->get_pages(obj);
2261 if (ret)
2262 return ret;
2263
35c20a60 2264 list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
ee286370
CW
2265
2266 obj->get_page.sg = obj->pages->sgl;
2267 obj->get_page.last = 0;
2268
37e680a1 2269 return 0;
673a394b
EA
2270}
2271
dd6034c6
DG
2272/* The 'mapping' part of i915_gem_object_pin_map() below */
2273static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
2274{
2275 unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
2276 struct sg_table *sgt = obj->pages;
85d1225e
DG
2277 struct sgt_iter sgt_iter;
2278 struct page *page;
b338fa47
DG
2279 struct page *stack_pages[32];
2280 struct page **pages = stack_pages;
dd6034c6
DG
2281 unsigned long i = 0;
2282 void *addr;
2283
2284 /* A single page can always be kmapped */
2285 if (n_pages == 1)
2286 return kmap(sg_page(sgt->sgl));
2287
b338fa47
DG
2288 if (n_pages > ARRAY_SIZE(stack_pages)) {
2289 /* Too big for stack -- allocate temporary array instead */
2290 pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY);
2291 if (!pages)
2292 return NULL;
2293 }
dd6034c6 2294
85d1225e
DG
2295 for_each_sgt_page(page, sgt_iter, sgt)
2296 pages[i++] = page;
dd6034c6
DG
2297
2298 /* Check that we have the expected number of pages */
2299 GEM_BUG_ON(i != n_pages);
2300
2301 addr = vmap(pages, n_pages, 0, PAGE_KERNEL);
2302
b338fa47
DG
2303 if (pages != stack_pages)
2304 drm_free_large(pages);
dd6034c6
DG
2305
2306 return addr;
2307}
2308
2309/* get, pin, and map the pages of the object into kernel space */
0a798eb9
CW
2310void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
2311{
2312 int ret;
2313
2314 lockdep_assert_held(&obj->base.dev->struct_mutex);
2315
2316 ret = i915_gem_object_get_pages(obj);
2317 if (ret)
2318 return ERR_PTR(ret);
2319
2320 i915_gem_object_pin_pages(obj);
2321
dd6034c6
DG
2322 if (!obj->mapping) {
2323 obj->mapping = i915_gem_object_map(obj);
2324 if (!obj->mapping) {
0a798eb9
CW
2325 i915_gem_object_unpin_pages(obj);
2326 return ERR_PTR(-ENOMEM);
2327 }
2328 }
2329
2330 return obj->mapping;
2331}
2332
b4716185 2333static void
fa545cbf
CW
2334i915_gem_object_retire__write(struct i915_gem_active *active,
2335 struct drm_i915_gem_request *request)
e2d05a8b 2336{
fa545cbf
CW
2337 struct drm_i915_gem_object *obj =
2338 container_of(active, struct drm_i915_gem_object, last_write);
b4716185 2339
de152b62 2340 intel_fb_obj_flush(obj, true, ORIGIN_CS);
e2d05a8b
BW
2341}
2342
caea7476 2343static void
fa545cbf
CW
2344i915_gem_object_retire__read(struct i915_gem_active *active,
2345 struct drm_i915_gem_request *request)
ce44b0ea 2346{
fa545cbf
CW
2347 int idx = request->engine->id;
2348 struct drm_i915_gem_object *obj =
2349 container_of(active, struct drm_i915_gem_object, last_read[idx]);
ce44b0ea 2350
fa545cbf 2351 GEM_BUG_ON((obj->active & (1 << idx)) == 0);
b4716185 2352
7e21d648 2353 obj->active &= ~(1 << idx);
b4716185
CW
2354 if (obj->active)
2355 return;
caea7476 2356
6c246959
CW
2357 /* Bump our place on the bound list to keep it roughly in LRU order
2358 * so that we don't steal from recently used but inactive objects
2359 * (unless we are forced to ofc!)
2360 */
b0decaf7
CW
2361 if (obj->bind_count)
2362 list_move_tail(&obj->global_list,
2363 &request->i915->mm.bound_list);
caea7476 2364
f8c417cd 2365 i915_gem_object_put(obj);
c8725f3d
CW
2366}
2367
7b4d3a16 2368static bool i915_context_is_banned(const struct i915_gem_context *ctx)
be62acb4 2369{
44e2c070 2370 unsigned long elapsed;
be62acb4 2371
44e2c070 2372 if (ctx->hang_stats.banned)
be62acb4
MK
2373 return true;
2374
7b4d3a16 2375 elapsed = get_seconds() - ctx->hang_stats.guilty_ts;
676fa572
CW
2376 if (ctx->hang_stats.ban_period_seconds &&
2377 elapsed <= ctx->hang_stats.ban_period_seconds) {
7b4d3a16
CW
2378 DRM_DEBUG("context hanging too fast, banning!\n");
2379 return true;
be62acb4
MK
2380 }
2381
2382 return false;
2383}
2384
7b4d3a16 2385static void i915_set_reset_status(struct i915_gem_context *ctx,
b6b0fac0 2386 const bool guilty)
aa60c664 2387{
7b4d3a16 2388 struct i915_ctx_hang_stats *hs = &ctx->hang_stats;
44e2c070
MK
2389
2390 if (guilty) {
7b4d3a16 2391 hs->banned = i915_context_is_banned(ctx);
44e2c070
MK
2392 hs->batch_active++;
2393 hs->guilty_ts = get_seconds();
2394 } else {
2395 hs->batch_pending++;
aa60c664
MK
2396 }
2397}
2398
8d9fc7fd 2399struct drm_i915_gem_request *
0bc40be8 2400i915_gem_find_active_request(struct intel_engine_cs *engine)
9375e446 2401{
4db080f9
CW
2402 struct drm_i915_gem_request *request;
2403
f69a02c9
CW
2404 /* We are called by the error capture and reset at a random
2405 * point in time. In particular, note that neither is crucially
2406 * ordered with an interrupt. After a hang, the GPU is dead and we
2407 * assume that no more writes can happen (we waited long enough for
2408 * all writes that were in transaction to be flushed) - adding an
2409 * extra delay for a recent interrupt is pointless. Hence, we do
2410 * not need an engine->irq_seqno_barrier() before the seqno reads.
2411 */
efdf7c06 2412 list_for_each_entry(request, &engine->request_list, link) {
f69a02c9 2413 if (i915_gem_request_completed(request))
4db080f9 2414 continue;
aa60c664 2415
b6b0fac0 2416 return request;
4db080f9 2417 }
b6b0fac0
MK
2418
2419 return NULL;
2420}
2421
7b4d3a16 2422static void i915_gem_reset_engine_status(struct intel_engine_cs *engine)
b6b0fac0
MK
2423{
2424 struct drm_i915_gem_request *request;
2425 bool ring_hung;
2426
0bc40be8 2427 request = i915_gem_find_active_request(engine);
b6b0fac0
MK
2428 if (request == NULL)
2429 return;
2430
0bc40be8 2431 ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
b6b0fac0 2432
7b4d3a16 2433 i915_set_reset_status(request->ctx, ring_hung);
efdf7c06 2434 list_for_each_entry_continue(request, &engine->request_list, link)
7b4d3a16 2435 i915_set_reset_status(request->ctx, false);
4db080f9 2436}
aa60c664 2437
7b4d3a16 2438static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
4db080f9 2439{
7e37f889 2440 struct intel_ring *ring;
608c1a52 2441
c4b0930b
CW
2442 /* Mark all pending requests as complete so that any concurrent
2443 * (lockless) lookup doesn't try and wait upon the request as we
2444 * reset it.
2445 */
7e37f889 2446 intel_engine_init_seqno(engine, engine->last_submitted_seqno);
c4b0930b 2447
dcb4c12a
OM
2448 /*
2449 * Clear the execlists queue up before freeing the requests, as those
2450 * are the ones that keep the context and ringbuffer backing objects
2451 * pinned in place.
2452 */
dcb4c12a 2453
7de1691a 2454 if (i915.enable_execlists) {
27af5eea
TU
2455 /* Ensure irq handler finishes or is cancelled. */
2456 tasklet_kill(&engine->irq_tasklet);
1197b4f2 2457
e39d42fa 2458 intel_execlists_cancel_requests(engine);
dcb4c12a
OM
2459 }
2460
1d62beea
BW
2461 /*
2462 * We must free the requests after all the corresponding objects have
2463 * been moved off active lists. Which is the same order as the normal
2464 * retire_requests function does. This is important if object hold
2465 * implicit references on things like e.g. ppgtt address spaces through
2466 * the request.
2467 */
05235c53 2468 if (!list_empty(&engine->request_list)) {
1d62beea
BW
2469 struct drm_i915_gem_request *request;
2470
05235c53
CW
2471 request = list_last_entry(&engine->request_list,
2472 struct drm_i915_gem_request,
efdf7c06 2473 link);
1d62beea 2474
05235c53 2475 i915_gem_request_retire_upto(request);
1d62beea 2476 }
608c1a52
CW
2477
2478 /* Having flushed all requests from all queues, we know that all
2479 * ringbuffers must now be empty. However, since we do not reclaim
2480 * all space when retiring the request (to prevent HEADs colliding
2481 * with rapid ringbuffer wraparound) the amount of available space
2482 * upon reset is less than when we start. Do one more pass over
2483 * all the ringbuffers to reset last_retired_head.
2484 */
7e37f889
CW
2485 list_for_each_entry(ring, &engine->buffers, link) {
2486 ring->last_retired_head = ring->tail;
2487 intel_ring_update_space(ring);
608c1a52 2488 }
2ed53a94 2489
b913b33c 2490 engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
673a394b
EA
2491}
2492
069efc1d 2493void i915_gem_reset(struct drm_device *dev)
673a394b 2494{
fac5e23e 2495 struct drm_i915_private *dev_priv = to_i915(dev);
e2f80391 2496 struct intel_engine_cs *engine;
673a394b 2497
4db080f9
CW
2498 /*
2499 * Before we free the objects from the requests, we need to inspect
2500 * them for finding the guilty party. As the requests only borrow
2501 * their reference to the objects, the inspection must be done first.
2502 */
b4ac5afc 2503 for_each_engine(engine, dev_priv)
7b4d3a16 2504 i915_gem_reset_engine_status(engine);
4db080f9 2505
b4ac5afc 2506 for_each_engine(engine, dev_priv)
7b4d3a16 2507 i915_gem_reset_engine_cleanup(engine);
b913b33c 2508 mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
dfaae392 2509
acce9ffa
BW
2510 i915_gem_context_reset(dev);
2511
19b2dbde 2512 i915_gem_restore_fences(dev);
673a394b
EA
2513}
2514
75ef9da2 2515static void
673a394b
EA
2516i915_gem_retire_work_handler(struct work_struct *work)
2517{
b29c19b6 2518 struct drm_i915_private *dev_priv =
67d97da3 2519 container_of(work, typeof(*dev_priv), gt.retire_work.work);
91c8a326 2520 struct drm_device *dev = &dev_priv->drm;
673a394b 2521
891b48cf 2522 /* Come back later if the device is busy... */
b29c19b6 2523 if (mutex_trylock(&dev->struct_mutex)) {
67d97da3 2524 i915_gem_retire_requests(dev_priv);
b29c19b6 2525 mutex_unlock(&dev->struct_mutex);
673a394b 2526 }
67d97da3
CW
2527
2528 /* Keep the retire handler running until we are finally idle.
2529 * We do not need to do this test under locking as in the worst-case
2530 * we queue the retire worker once too often.
2531 */
c9615613
CW
2532 if (READ_ONCE(dev_priv->gt.awake)) {
2533 i915_queue_hangcheck(dev_priv);
67d97da3
CW
2534 queue_delayed_work(dev_priv->wq,
2535 &dev_priv->gt.retire_work,
bcb45086 2536 round_jiffies_up_relative(HZ));
c9615613 2537 }
b29c19b6 2538}
0a58705b 2539
b29c19b6
CW
2540static void
2541i915_gem_idle_work_handler(struct work_struct *work)
2542{
2543 struct drm_i915_private *dev_priv =
67d97da3 2544 container_of(work, typeof(*dev_priv), gt.idle_work.work);
91c8a326 2545 struct drm_device *dev = &dev_priv->drm;
b4ac5afc 2546 struct intel_engine_cs *engine;
67d97da3
CW
2547 unsigned int stuck_engines;
2548 bool rearm_hangcheck;
2549
2550 if (!READ_ONCE(dev_priv->gt.awake))
2551 return;
2552
2553 if (READ_ONCE(dev_priv->gt.active_engines))
2554 return;
2555
2556 rearm_hangcheck =
2557 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
2558
2559 if (!mutex_trylock(&dev->struct_mutex)) {
2560 /* Currently busy, come back later */
2561 mod_delayed_work(dev_priv->wq,
2562 &dev_priv->gt.idle_work,
2563 msecs_to_jiffies(50));
2564 goto out_rearm;
2565 }
2566
2567 if (dev_priv->gt.active_engines)
2568 goto out_unlock;
b29c19b6 2569
b4ac5afc 2570 for_each_engine(engine, dev_priv)
67d97da3 2571 i915_gem_batch_pool_fini(&engine->batch_pool);
35c94185 2572
67d97da3
CW
2573 GEM_BUG_ON(!dev_priv->gt.awake);
2574 dev_priv->gt.awake = false;
2575 rearm_hangcheck = false;
30ecad77 2576
2529d570
CW
2577 /* As we have disabled hangcheck, we need to unstick any waiters still
2578 * hanging around. However, as we may be racing against the interrupt
2579 * handler or the waiters themselves, we skip enabling the fake-irq.
2580 */
67d97da3 2581 stuck_engines = intel_kick_waiters(dev_priv);
2529d570
CW
2582 if (unlikely(stuck_engines))
2583 DRM_DEBUG_DRIVER("kicked stuck waiters (%x)...missed irq?\n",
2584 stuck_engines);
35c94185 2585
67d97da3
CW
2586 if (INTEL_GEN(dev_priv) >= 6)
2587 gen6_rps_idle(dev_priv);
2588 intel_runtime_pm_put(dev_priv);
2589out_unlock:
2590 mutex_unlock(&dev->struct_mutex);
b29c19b6 2591
67d97da3
CW
2592out_rearm:
2593 if (rearm_hangcheck) {
2594 GEM_BUG_ON(!dev_priv->gt.awake);
2595 i915_queue_hangcheck(dev_priv);
35c94185 2596 }
673a394b
EA
2597}
2598
23ba4fd0
BW
2599/**
2600 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
14bb2c11
TU
2601 * @dev: drm device pointer
2602 * @data: ioctl data blob
2603 * @file: drm file pointer
23ba4fd0
BW
2604 *
2605 * Returns 0 if successful, else an error is returned with the remaining time in
2606 * the timeout parameter.
2607 * -ETIME: object is still busy after timeout
2608 * -ERESTARTSYS: signal interrupted the wait
2609 * -ENONENT: object doesn't exist
2610 * Also possible, but rare:
2611 * -EAGAIN: GPU wedged
2612 * -ENOMEM: damn
2613 * -ENODEV: Internal IRQ fail
2614 * -E?: The add request failed
2615 *
2616 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
2617 * non-zero timeout parameter the wait ioctl will wait for the given number of
2618 * nanoseconds on an object becoming unbusy. Since the wait itself does so
2619 * without holding struct_mutex the object may become re-busied before this
2620 * function completes. A similar but shorter * race condition exists in the busy
2621 * ioctl
2622 */
2623int
2624i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2625{
2626 struct drm_i915_gem_wait *args = data;
2627 struct drm_i915_gem_object *obj;
27c01aae 2628 struct drm_i915_gem_request *requests[I915_NUM_ENGINES];
b4716185
CW
2629 int i, n = 0;
2630 int ret;
23ba4fd0 2631
11b5d511
DV
2632 if (args->flags != 0)
2633 return -EINVAL;
2634
23ba4fd0
BW
2635 ret = i915_mutex_lock_interruptible(dev);
2636 if (ret)
2637 return ret;
2638
03ac0642
CW
2639 obj = i915_gem_object_lookup(file, args->bo_handle);
2640 if (!obj) {
23ba4fd0
BW
2641 mutex_unlock(&dev->struct_mutex);
2642 return -ENOENT;
2643 }
2644
b4716185 2645 if (!obj->active)
97b2a6a1 2646 goto out;
23ba4fd0 2647
666796da 2648 for (i = 0; i < I915_NUM_ENGINES; i++) {
27c01aae 2649 struct drm_i915_gem_request *req;
b4716185 2650
d72d908b
CW
2651 req = i915_gem_active_get(&obj->last_read[i],
2652 &obj->base.dev->struct_mutex);
27c01aae
CW
2653 if (req)
2654 requests[n++] = req;
b4716185
CW
2655 }
2656
21c310f2
CW
2657out:
2658 i915_gem_object_put(obj);
23ba4fd0
BW
2659 mutex_unlock(&dev->struct_mutex);
2660
b4716185
CW
2661 for (i = 0; i < n; i++) {
2662 if (ret == 0)
776f3236
CW
2663 ret = i915_wait_request(requests[i], true,
2664 args->timeout_ns > 0 ? &args->timeout_ns : NULL,
2665 to_rps_client(file));
27c01aae 2666 i915_gem_request_put(requests[i]);
b4716185 2667 }
ff865885 2668 return ret;
23ba4fd0
BW
2669}
2670
b4716185 2671static int
fa545cbf 2672__i915_gem_object_sync(struct drm_i915_gem_request *to,
8e637178 2673 struct drm_i915_gem_request *from)
b4716185 2674{
b4716185
CW
2675 int ret;
2676
8e637178 2677 if (to->engine == from->engine)
b4716185
CW
2678 return 0;
2679
39df9190 2680 if (!i915.semaphores) {
776f3236
CW
2681 ret = i915_wait_request(from,
2682 from->i915->mm.interruptible,
2683 NULL,
2684 NO_WAITBOOST);
b4716185
CW
2685 if (ret)
2686 return ret;
b4716185 2687 } else {
8e637178 2688 int idx = intel_engine_sync_index(from->engine, to->engine);
ddf07be7 2689 if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx])
b4716185
CW
2690 return 0;
2691
8e637178 2692 trace_i915_gem_ring_sync_to(to, from);
ddf07be7 2693 ret = to->engine->semaphore.sync_to(to, from);
b4716185
CW
2694 if (ret)
2695 return ret;
2696
ddf07be7 2697 from->engine->semaphore.sync_seqno[idx] = from->fence.seqno;
b4716185
CW
2698 }
2699
2700 return 0;
2701}
2702
5816d648
BW
2703/**
2704 * i915_gem_object_sync - sync an object to a ring.
2705 *
2706 * @obj: object which may be in use on another ring.
8e637178 2707 * @to: request we are wishing to use
5816d648
BW
2708 *
2709 * This code is meant to abstract object synchronization with the GPU.
8e637178
CW
2710 * Conceptually we serialise writes between engines inside the GPU.
2711 * We only allow one engine to write into a buffer at any time, but
2712 * multiple readers. To ensure each has a coherent view of memory, we must:
b4716185
CW
2713 *
2714 * - If there is an outstanding write request to the object, the new
2715 * request must wait for it to complete (either CPU or in hw, requests
2716 * on the same ring will be naturally ordered).
2717 *
2718 * - If we are a write request (pending_write_domain is set), the new
2719 * request must wait for outstanding read requests to complete.
5816d648
BW
2720 *
2721 * Returns 0 if successful, else propagates up the lower layer error.
2722 */
2911a35b
BW
2723int
2724i915_gem_object_sync(struct drm_i915_gem_object *obj,
8e637178 2725 struct drm_i915_gem_request *to)
2911a35b 2726{
8cac6f6c
CW
2727 struct i915_gem_active *active;
2728 unsigned long active_mask;
2729 int idx;
41c52415 2730
8cac6f6c 2731 lockdep_assert_held(&obj->base.dev->struct_mutex);
2911a35b 2732
8cac6f6c
CW
2733 active_mask = obj->active;
2734 if (!active_mask)
2735 return 0;
27c01aae 2736
8cac6f6c
CW
2737 if (obj->base.pending_write_domain) {
2738 active = obj->last_read;
b4716185 2739 } else {
8cac6f6c
CW
2740 active_mask = 1;
2741 active = &obj->last_write;
b4716185 2742 }
8cac6f6c
CW
2743
2744 for_each_active(active_mask, idx) {
2745 struct drm_i915_gem_request *request;
2746 int ret;
2747
2748 request = i915_gem_active_peek(&active[idx],
2749 &obj->base.dev->struct_mutex);
2750 if (!request)
2751 continue;
2752
fa545cbf 2753 ret = __i915_gem_object_sync(to, request);
b4716185
CW
2754 if (ret)
2755 return ret;
2756 }
2911a35b 2757
b4716185 2758 return 0;
2911a35b
BW
2759}
2760
b5ffc9bc
CW
2761static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
2762{
2763 u32 old_write_domain, old_read_domains;
2764
b5ffc9bc
CW
2765 /* Force a pagefault for domain tracking on next user access */
2766 i915_gem_release_mmap(obj);
2767
b97c3d9c
KP
2768 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
2769 return;
2770
b5ffc9bc
CW
2771 old_read_domains = obj->base.read_domains;
2772 old_write_domain = obj->base.write_domain;
2773
2774 obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
2775 obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
2776
2777 trace_i915_gem_object_change_domain(obj,
2778 old_read_domains,
2779 old_write_domain);
2780}
2781
8ef8561f
CW
2782static void __i915_vma_iounmap(struct i915_vma *vma)
2783{
2784 GEM_BUG_ON(vma->pin_count);
2785
2786 if (vma->iomap == NULL)
2787 return;
2788
2789 io_mapping_unmap(vma->iomap);
2790 vma->iomap = NULL;
2791}
2792
e9f24d5f 2793static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
673a394b 2794{
07fe0b12 2795 struct drm_i915_gem_object *obj = vma->obj;
b0decaf7 2796 unsigned long active;
43e28f09 2797 int ret;
673a394b 2798
b0decaf7
CW
2799 /* First wait upon any activity as retiring the request may
2800 * have side-effects such as unpinning or even unbinding this vma.
2801 */
2802 active = i915_vma_get_active(vma);
2803 if (active && wait) {
2804 int idx;
2805
2806 for_each_active(active, idx) {
2807 ret = i915_gem_active_retire(&vma->last_read[idx],
2808 &vma->vm->dev->struct_mutex);
2809 if (ret)
2810 return ret;
2811 }
2812
2813 GEM_BUG_ON(i915_vma_is_active(vma));
2814 }
2815
2816 if (vma->pin_count)
2817 return -EBUSY;
2818
1c7f4bca 2819 if (list_empty(&vma->obj_link))
673a394b
EA
2820 return 0;
2821
0ff501cb
DV
2822 if (!drm_mm_node_allocated(&vma->node)) {
2823 i915_gem_vma_destroy(vma);
0ff501cb
DV
2824 return 0;
2825 }
433544bd 2826
15717de2
CW
2827 GEM_BUG_ON(obj->bind_count == 0);
2828 GEM_BUG_ON(!obj->pages);
c4670ad0 2829
596c5923 2830 if (vma->is_ggtt && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
8b1bc9b4 2831 i915_gem_object_finish_gtt(obj);
5323fd04 2832
8b1bc9b4
DV
2833 /* release the fence reg _after_ flushing */
2834 ret = i915_gem_object_put_fence(obj);
2835 if (ret)
2836 return ret;
8ef8561f
CW
2837
2838 __i915_vma_iounmap(vma);
8b1bc9b4 2839 }
96b47b65 2840
07fe0b12 2841 trace_i915_vma_unbind(vma);
db53a302 2842
777dc5bb 2843 vma->vm->unbind_vma(vma);
5e562f1d 2844 vma->bound = 0;
6f65e29a 2845
1c7f4bca 2846 list_del_init(&vma->vm_link);
596c5923 2847 if (vma->is_ggtt) {
fe14d5f4
TU
2848 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
2849 obj->map_and_fenceable = false;
2850 } else if (vma->ggtt_view.pages) {
2851 sg_free_table(vma->ggtt_view.pages);
2852 kfree(vma->ggtt_view.pages);
fe14d5f4 2853 }
016a65a3 2854 vma->ggtt_view.pages = NULL;
fe14d5f4 2855 }
673a394b 2856
2f633156
BW
2857 drm_mm_remove_node(&vma->node);
2858 i915_gem_vma_destroy(vma);
2859
2860 /* Since the unbound list is global, only move to that list if
b93dab6e 2861 * no more VMAs exist. */
15717de2
CW
2862 if (--obj->bind_count == 0)
2863 list_move_tail(&obj->global_list,
2864 &to_i915(obj->base.dev)->mm.unbound_list);
673a394b 2865
70903c3b
CW
2866 /* And finally now the object is completely decoupled from this vma,
2867 * we can drop its hold on the backing storage and allow it to be
2868 * reaped by the shrinker.
2869 */
2870 i915_gem_object_unpin_pages(obj);
2871
88241785 2872 return 0;
54cf91dc
CW
2873}
2874
e9f24d5f
TU
2875int i915_vma_unbind(struct i915_vma *vma)
2876{
2877 return __i915_vma_unbind(vma, true);
2878}
2879
2880int __i915_vma_unbind_no_wait(struct i915_vma *vma)
2881{
2882 return __i915_vma_unbind(vma, false);
2883}
2884
6e5a5beb 2885int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv)
4df2faf4 2886{
e2f80391 2887 struct intel_engine_cs *engine;
b4ac5afc 2888 int ret;
4df2faf4 2889
91c8a326 2890 lockdep_assert_held(&dev_priv->drm.struct_mutex);
6e5a5beb 2891
b4ac5afc 2892 for_each_engine(engine, dev_priv) {
62e63007
CW
2893 if (engine->last_context == NULL)
2894 continue;
2895
666796da 2896 ret = intel_engine_idle(engine);
1ec14ad3
CW
2897 if (ret)
2898 return ret;
2899 }
4df2faf4 2900
8a1a49f9 2901 return 0;
4df2faf4
DV
2902}
2903
4144f9b5 2904static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
42d6ab48
CW
2905 unsigned long cache_level)
2906{
4144f9b5 2907 struct drm_mm_node *gtt_space = &vma->node;
42d6ab48
CW
2908 struct drm_mm_node *other;
2909
4144f9b5
CW
2910 /*
2911 * On some machines we have to be careful when putting differing types
2912 * of snoopable memory together to avoid the prefetcher crossing memory
2913 * domains and dying. During vm initialisation, we decide whether or not
2914 * these constraints apply and set the drm_mm.color_adjust
2915 * appropriately.
42d6ab48 2916 */
4144f9b5 2917 if (vma->vm->mm.color_adjust == NULL)
42d6ab48
CW
2918 return true;
2919
c6cfb325 2920 if (!drm_mm_node_allocated(gtt_space))
42d6ab48
CW
2921 return true;
2922
2923 if (list_empty(&gtt_space->node_list))
2924 return true;
2925
2926 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
2927 if (other->allocated && !other->hole_follows && other->color != cache_level)
2928 return false;
2929
2930 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
2931 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
2932 return false;
2933
2934 return true;
2935}
2936
673a394b 2937/**
91e6711e
JL
2938 * Finds free space in the GTT aperture and binds the object or a view of it
2939 * there.
14bb2c11
TU
2940 * @obj: object to bind
2941 * @vm: address space to bind into
2942 * @ggtt_view: global gtt view if applicable
2943 * @alignment: requested alignment
2944 * @flags: mask of PIN_* flags to use
673a394b 2945 */
262de145 2946static struct i915_vma *
07fe0b12
BW
2947i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
2948 struct i915_address_space *vm,
ec7adb6e 2949 const struct i915_ggtt_view *ggtt_view,
07fe0b12 2950 unsigned alignment,
ec7adb6e 2951 uint64_t flags)
673a394b 2952{
05394f39 2953 struct drm_device *dev = obj->base.dev;
72e96d64
JL
2954 struct drm_i915_private *dev_priv = to_i915(dev);
2955 struct i915_ggtt *ggtt = &dev_priv->ggtt;
65bd342f 2956 u32 fence_alignment, unfenced_alignment;
101b506a
MT
2957 u32 search_flag, alloc_flag;
2958 u64 start, end;
65bd342f 2959 u64 size, fence_size;
2f633156 2960 struct i915_vma *vma;
07f73f69 2961 int ret;
673a394b 2962
91e6711e
JL
2963 if (i915_is_ggtt(vm)) {
2964 u32 view_size;
2965
2966 if (WARN_ON(!ggtt_view))
2967 return ERR_PTR(-EINVAL);
ec7adb6e 2968
91e6711e
JL
2969 view_size = i915_ggtt_view_size(obj, ggtt_view);
2970
2971 fence_size = i915_gem_get_gtt_size(dev,
2972 view_size,
2973 obj->tiling_mode);
2974 fence_alignment = i915_gem_get_gtt_alignment(dev,
2975 view_size,
2976 obj->tiling_mode,
2977 true);
2978 unfenced_alignment = i915_gem_get_gtt_alignment(dev,
2979 view_size,
2980 obj->tiling_mode,
2981 false);
2982 size = flags & PIN_MAPPABLE ? fence_size : view_size;
2983 } else {
2984 fence_size = i915_gem_get_gtt_size(dev,
2985 obj->base.size,
2986 obj->tiling_mode);
2987 fence_alignment = i915_gem_get_gtt_alignment(dev,
2988 obj->base.size,
2989 obj->tiling_mode,
2990 true);
2991 unfenced_alignment =
2992 i915_gem_get_gtt_alignment(dev,
2993 obj->base.size,
2994 obj->tiling_mode,
2995 false);
2996 size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
2997 }
a00b10c3 2998
101b506a
MT
2999 start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
3000 end = vm->total;
3001 if (flags & PIN_MAPPABLE)
72e96d64 3002 end = min_t(u64, end, ggtt->mappable_end);
101b506a 3003 if (flags & PIN_ZONE_4G)
48ea1e32 3004 end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE);
101b506a 3005
673a394b 3006 if (alignment == 0)
1ec9e26d 3007 alignment = flags & PIN_MAPPABLE ? fence_alignment :
5e783301 3008 unfenced_alignment;
1ec9e26d 3009 if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
91e6711e
JL
3010 DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
3011 ggtt_view ? ggtt_view->type : 0,
3012 alignment);
262de145 3013 return ERR_PTR(-EINVAL);
673a394b
EA
3014 }
3015
91e6711e
JL
3016 /* If binding the object/GGTT view requires more space than the entire
3017 * aperture has, reject it early before evicting everything in a vain
3018 * attempt to find space.
654fc607 3019 */
91e6711e 3020 if (size > end) {
65bd342f 3021 DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n",
91e6711e
JL
3022 ggtt_view ? ggtt_view->type : 0,
3023 size,
1ec9e26d 3024 flags & PIN_MAPPABLE ? "mappable" : "total",
d23db88c 3025 end);
262de145 3026 return ERR_PTR(-E2BIG);
654fc607
CW
3027 }
3028
37e680a1 3029 ret = i915_gem_object_get_pages(obj);
6c085a72 3030 if (ret)
262de145 3031 return ERR_PTR(ret);
6c085a72 3032
fbdda6fb
CW
3033 i915_gem_object_pin_pages(obj);
3034
ec7adb6e
JL
3035 vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) :
3036 i915_gem_obj_lookup_or_create_vma(obj, vm);
3037
262de145 3038 if (IS_ERR(vma))
bc6bc15b 3039 goto err_unpin;
2f633156 3040
506a8e87
CW
3041 if (flags & PIN_OFFSET_FIXED) {
3042 uint64_t offset = flags & PIN_OFFSET_MASK;
3043
3044 if (offset & (alignment - 1) || offset + size > end) {
3045 ret = -EINVAL;
3046 goto err_free_vma;
3047 }
3048 vma->node.start = offset;
3049 vma->node.size = size;
3050 vma->node.color = obj->cache_level;
3051 ret = drm_mm_reserve_node(&vm->mm, &vma->node);
3052 if (ret) {
3053 ret = i915_gem_evict_for_vma(vma);
3054 if (ret == 0)
3055 ret = drm_mm_reserve_node(&vm->mm, &vma->node);
3056 }
3057 if (ret)
3058 goto err_free_vma;
101b506a 3059 } else {
506a8e87
CW
3060 if (flags & PIN_HIGH) {
3061 search_flag = DRM_MM_SEARCH_BELOW;
3062 alloc_flag = DRM_MM_CREATE_TOP;
3063 } else {
3064 search_flag = DRM_MM_SEARCH_DEFAULT;
3065 alloc_flag = DRM_MM_CREATE_DEFAULT;
3066 }
101b506a 3067
0a9ae0d7 3068search_free:
506a8e87
CW
3069 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
3070 size, alignment,
3071 obj->cache_level,
3072 start, end,
3073 search_flag,
3074 alloc_flag);
3075 if (ret) {
3076 ret = i915_gem_evict_something(dev, vm, size, alignment,
3077 obj->cache_level,
3078 start, end,
3079 flags);
3080 if (ret == 0)
3081 goto search_free;
9731129c 3082
506a8e87
CW
3083 goto err_free_vma;
3084 }
673a394b 3085 }
4144f9b5 3086 if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
2f633156 3087 ret = -EINVAL;
bc6bc15b 3088 goto err_remove_node;
673a394b
EA
3089 }
3090
fe14d5f4 3091 trace_i915_vma_bind(vma, flags);
0875546c 3092 ret = i915_vma_bind(vma, obj->cache_level, flags);
fe14d5f4 3093 if (ret)
e2273302 3094 goto err_remove_node;
fe14d5f4 3095
35c20a60 3096 list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
1c7f4bca 3097 list_add_tail(&vma->vm_link, &vm->inactive_list);
15717de2 3098 obj->bind_count++;
bf1a1092 3099
262de145 3100 return vma;
2f633156 3101
bc6bc15b 3102err_remove_node:
6286ef9b 3103 drm_mm_remove_node(&vma->node);
bc6bc15b 3104err_free_vma:
2f633156 3105 i915_gem_vma_destroy(vma);
262de145 3106 vma = ERR_PTR(ret);
bc6bc15b 3107err_unpin:
2f633156 3108 i915_gem_object_unpin_pages(obj);
262de145 3109 return vma;
673a394b
EA
3110}
3111
000433b6 3112bool
2c22569b
CW
3113i915_gem_clflush_object(struct drm_i915_gem_object *obj,
3114 bool force)
673a394b 3115{
673a394b
EA
3116 /* If we don't have a page list set up, then we're not pinned
3117 * to GPU, and we can ignore the cache flush because it'll happen
3118 * again at bind time.
3119 */
05394f39 3120 if (obj->pages == NULL)
000433b6 3121 return false;
673a394b 3122
769ce464
ID
3123 /*
3124 * Stolen memory is always coherent with the GPU as it is explicitly
3125 * marked as wc by the system, or the system is cache-coherent.
3126 */
6a2c4232 3127 if (obj->stolen || obj->phys_handle)
000433b6 3128 return false;
769ce464 3129
9c23f7fc
CW
3130 /* If the GPU is snooping the contents of the CPU cache,
3131 * we do not need to manually clear the CPU cache lines. However,
3132 * the caches are only snooped when the render cache is
3133 * flushed/invalidated. As we always have to emit invalidations
3134 * and flushes when moving into and out of the RENDER domain, correct
3135 * snooping behaviour occurs naturally as the result of our domain
3136 * tracking.
3137 */
0f71979a
CW
3138 if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) {
3139 obj->cache_dirty = true;
000433b6 3140 return false;
0f71979a 3141 }
9c23f7fc 3142
1c5d22f7 3143 trace_i915_gem_object_clflush(obj);
9da3da66 3144 drm_clflush_sg(obj->pages);
0f71979a 3145 obj->cache_dirty = false;
000433b6
CW
3146
3147 return true;
e47c68e9
EA
3148}
3149
3150/** Flushes the GTT write domain for the object if it's dirty. */
3151static void
05394f39 3152i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
e47c68e9 3153{
1c5d22f7
CW
3154 uint32_t old_write_domain;
3155
05394f39 3156 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
e47c68e9
EA
3157 return;
3158
63256ec5 3159 /* No actual flushing is required for the GTT write domain. Writes
e47c68e9
EA
3160 * to it immediately go to main memory as far as we know, so there's
3161 * no chipset flush. It also doesn't land in render cache.
63256ec5
CW
3162 *
3163 * However, we do have to enforce the order so that all writes through
3164 * the GTT land before any writes to the device, such as updates to
3165 * the GATT itself.
e47c68e9 3166 */
63256ec5
CW
3167 wmb();
3168
05394f39
CW
3169 old_write_domain = obj->base.write_domain;
3170 obj->base.write_domain = 0;
1c5d22f7 3171
de152b62 3172 intel_fb_obj_flush(obj, false, ORIGIN_GTT);
f99d7069 3173
1c5d22f7 3174 trace_i915_gem_object_change_domain(obj,
05394f39 3175 obj->base.read_domains,
1c5d22f7 3176 old_write_domain);
e47c68e9
EA
3177}
3178
3179/** Flushes the CPU write domain for the object if it's dirty. */
3180static void
e62b59e4 3181i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
e47c68e9 3182{
1c5d22f7 3183 uint32_t old_write_domain;
e47c68e9 3184
05394f39 3185 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
e47c68e9
EA
3186 return;
3187
e62b59e4 3188 if (i915_gem_clflush_object(obj, obj->pin_display))
c033666a 3189 i915_gem_chipset_flush(to_i915(obj->base.dev));
000433b6 3190
05394f39
CW
3191 old_write_domain = obj->base.write_domain;
3192 obj->base.write_domain = 0;
1c5d22f7 3193
de152b62 3194 intel_fb_obj_flush(obj, false, ORIGIN_CPU);
f99d7069 3195
1c5d22f7 3196 trace_i915_gem_object_change_domain(obj,
05394f39 3197 obj->base.read_domains,
1c5d22f7 3198 old_write_domain);
e47c68e9
EA
3199}
3200
2ef7eeaa
EA
3201/**
3202 * Moves a single object to the GTT read, and possibly write domain.
14bb2c11
TU
3203 * @obj: object to act on
3204 * @write: ask for write access or read only
2ef7eeaa
EA
3205 *
3206 * This function returns when the move is complete, including waiting on
3207 * flushes to occur.
3208 */
79e53945 3209int
2021746e 3210i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
2ef7eeaa 3211{
1c5d22f7 3212 uint32_t old_write_domain, old_read_domains;
43566ded 3213 struct i915_vma *vma;
e47c68e9 3214 int ret;
2ef7eeaa 3215
0201f1ec 3216 ret = i915_gem_object_wait_rendering(obj, !write);
88241785
CW
3217 if (ret)
3218 return ret;
3219
c13d87ea
CW
3220 if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
3221 return 0;
3222
43566ded
CW
3223 /* Flush and acquire obj->pages so that we are coherent through
3224 * direct access in memory with previous cached writes through
3225 * shmemfs and that our cache domain tracking remains valid.
3226 * For example, if the obj->filp was moved to swap without us
3227 * being notified and releasing the pages, we would mistakenly
3228 * continue to assume that the obj remained out of the CPU cached
3229 * domain.
3230 */
3231 ret = i915_gem_object_get_pages(obj);
3232 if (ret)
3233 return ret;
3234
e62b59e4 3235 i915_gem_object_flush_cpu_write_domain(obj);
1c5d22f7 3236
d0a57789
CW
3237 /* Serialise direct access to this object with the barriers for
3238 * coherent writes from the GPU, by effectively invalidating the
3239 * GTT domain upon first access.
3240 */
3241 if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3242 mb();
3243
05394f39
CW
3244 old_write_domain = obj->base.write_domain;
3245 old_read_domains = obj->base.read_domains;
1c5d22f7 3246
e47c68e9
EA
3247 /* It should now be out of any other write domains, and we can update
3248 * the domain values for our changes.
3249 */
05394f39
CW
3250 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3251 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
e47c68e9 3252 if (write) {
05394f39
CW
3253 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
3254 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
3255 obj->dirty = 1;
2ef7eeaa
EA
3256 }
3257
1c5d22f7
CW
3258 trace_i915_gem_object_change_domain(obj,
3259 old_read_domains,
3260 old_write_domain);
3261
8325a09d 3262 /* And bump the LRU for this access */
43566ded 3263 vma = i915_gem_obj_to_ggtt(obj);
b0decaf7
CW
3264 if (vma &&
3265 drm_mm_node_allocated(&vma->node) &&
3266 !i915_vma_is_active(vma))
3267 list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
8325a09d 3268
e47c68e9
EA
3269 return 0;
3270}
3271
ef55f92a
CW
3272/**
3273 * Changes the cache-level of an object across all VMA.
14bb2c11
TU
3274 * @obj: object to act on
3275 * @cache_level: new cache level to set for the object
ef55f92a
CW
3276 *
3277 * After this function returns, the object will be in the new cache-level
3278 * across all GTT and the contents of the backing storage will be coherent,
3279 * with respect to the new cache-level. In order to keep the backing storage
3280 * coherent for all users, we only allow a single cache level to be set
3281 * globally on the object and prevent it from being changed whilst the
3282 * hardware is reading from the object. That is if the object is currently
3283 * on the scanout it will be set to uncached (or equivalent display
3284 * cache coherency) and all non-MOCS GPU access will also be uncached so
3285 * that all direct access to the scanout remains coherent.
3286 */
e4ffd173
CW
3287int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3288 enum i915_cache_level cache_level)
3289{
aa653a68 3290 struct i915_vma *vma;
ed75a55b 3291 int ret = 0;
e4ffd173
CW
3292
3293 if (obj->cache_level == cache_level)
ed75a55b 3294 goto out;
e4ffd173 3295
ef55f92a
CW
3296 /* Inspect the list of currently bound VMA and unbind any that would
3297 * be invalid given the new cache-level. This is principally to
3298 * catch the issue of the CS prefetch crossing page boundaries and
3299 * reading an invalid PTE on older architectures.
3300 */
aa653a68
CW
3301restart:
3302 list_for_each_entry(vma, &obj->vma_list, obj_link) {
ef55f92a
CW
3303 if (!drm_mm_node_allocated(&vma->node))
3304 continue;
3305
3306 if (vma->pin_count) {
3307 DRM_DEBUG("can not change the cache level of pinned objects\n");
3308 return -EBUSY;
3309 }
3310
aa653a68
CW
3311 if (i915_gem_valid_gtt_space(vma, cache_level))
3312 continue;
3313
3314 ret = i915_vma_unbind(vma);
3315 if (ret)
3316 return ret;
3317
3318 /* As unbinding may affect other elements in the
3319 * obj->vma_list (due to side-effects from retiring
3320 * an active vma), play safe and restart the iterator.
3321 */
3322 goto restart;
42d6ab48
CW
3323 }
3324
ef55f92a
CW
3325 /* We can reuse the existing drm_mm nodes but need to change the
3326 * cache-level on the PTE. We could simply unbind them all and
3327 * rebind with the correct cache-level on next use. However since
3328 * we already have a valid slot, dma mapping, pages etc, we may as
3329 * rewrite the PTE in the belief that doing so tramples upon less
3330 * state and so involves less work.
3331 */
15717de2 3332 if (obj->bind_count) {
ef55f92a
CW
3333 /* Before we change the PTE, the GPU must not be accessing it.
3334 * If we wait upon the object, we know that all the bound
3335 * VMA are no longer active.
3336 */
2e2f351d 3337 ret = i915_gem_object_wait_rendering(obj, false);
e4ffd173
CW
3338 if (ret)
3339 return ret;
3340
aa653a68 3341 if (!HAS_LLC(obj->base.dev) && cache_level != I915_CACHE_NONE) {
ef55f92a
CW
3342 /* Access to snoopable pages through the GTT is
3343 * incoherent and on some machines causes a hard
3344 * lockup. Relinquish the CPU mmaping to force
3345 * userspace to refault in the pages and we can
3346 * then double check if the GTT mapping is still
3347 * valid for that pointer access.
3348 */
3349 i915_gem_release_mmap(obj);
3350
3351 /* As we no longer need a fence for GTT access,
3352 * we can relinquish it now (and so prevent having
3353 * to steal a fence from someone else on the next
3354 * fence request). Note GPU activity would have
3355 * dropped the fence as all snoopable access is
3356 * supposed to be linear.
3357 */
e4ffd173
CW
3358 ret = i915_gem_object_put_fence(obj);
3359 if (ret)
3360 return ret;
ef55f92a
CW
3361 } else {
3362 /* We either have incoherent backing store and
3363 * so no GTT access or the architecture is fully
3364 * coherent. In such cases, existing GTT mmaps
3365 * ignore the cache bit in the PTE and we can
3366 * rewrite it without confusing the GPU or having
3367 * to force userspace to fault back in its mmaps.
3368 */
e4ffd173
CW
3369 }
3370
1c7f4bca 3371 list_for_each_entry(vma, &obj->vma_list, obj_link) {
ef55f92a
CW
3372 if (!drm_mm_node_allocated(&vma->node))
3373 continue;
3374
3375 ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
3376 if (ret)
3377 return ret;
3378 }
e4ffd173
CW
3379 }
3380
1c7f4bca 3381 list_for_each_entry(vma, &obj->vma_list, obj_link)
2c22569b
CW
3382 vma->node.color = cache_level;
3383 obj->cache_level = cache_level;
3384
ed75a55b 3385out:
ef55f92a
CW
3386 /* Flush the dirty CPU caches to the backing storage so that the
3387 * object is now coherent at its new cache level (with respect
3388 * to the access domain).
3389 */
b50a5371 3390 if (obj->cache_dirty && cpu_write_needs_clflush(obj)) {
0f71979a 3391 if (i915_gem_clflush_object(obj, true))
c033666a 3392 i915_gem_chipset_flush(to_i915(obj->base.dev));
e4ffd173
CW
3393 }
3394
e4ffd173
CW
3395 return 0;
3396}
3397
199adf40
BW
3398int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
3399 struct drm_file *file)
e6994aee 3400{
199adf40 3401 struct drm_i915_gem_caching *args = data;
e6994aee 3402 struct drm_i915_gem_object *obj;
e6994aee 3403
03ac0642
CW
3404 obj = i915_gem_object_lookup(file, args->handle);
3405 if (!obj)
432be69d 3406 return -ENOENT;
e6994aee 3407
651d794f
CW
3408 switch (obj->cache_level) {
3409 case I915_CACHE_LLC:
3410 case I915_CACHE_L3_LLC:
3411 args->caching = I915_CACHING_CACHED;
3412 break;
3413
4257d3ba
CW
3414 case I915_CACHE_WT:
3415 args->caching = I915_CACHING_DISPLAY;
3416 break;
3417
651d794f
CW
3418 default:
3419 args->caching = I915_CACHING_NONE;
3420 break;
3421 }
e6994aee 3422
34911fd3 3423 i915_gem_object_put_unlocked(obj);
432be69d 3424 return 0;
e6994aee
CW
3425}
3426
199adf40
BW
3427int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
3428 struct drm_file *file)
e6994aee 3429{
fac5e23e 3430 struct drm_i915_private *dev_priv = to_i915(dev);
199adf40 3431 struct drm_i915_gem_caching *args = data;
e6994aee
CW
3432 struct drm_i915_gem_object *obj;
3433 enum i915_cache_level level;
3434 int ret;
3435
199adf40
BW
3436 switch (args->caching) {
3437 case I915_CACHING_NONE:
e6994aee
CW
3438 level = I915_CACHE_NONE;
3439 break;
199adf40 3440 case I915_CACHING_CACHED:
e5756c10
ID
3441 /*
3442 * Due to a HW issue on BXT A stepping, GPU stores via a
3443 * snooped mapping may leave stale data in a corresponding CPU
3444 * cacheline, whereas normally such cachelines would get
3445 * invalidated.
3446 */
ca377809 3447 if (!HAS_LLC(dev) && !HAS_SNOOP(dev))
e5756c10
ID
3448 return -ENODEV;
3449
e6994aee
CW
3450 level = I915_CACHE_LLC;
3451 break;
4257d3ba
CW
3452 case I915_CACHING_DISPLAY:
3453 level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE;
3454 break;
e6994aee
CW
3455 default:
3456 return -EINVAL;
3457 }
3458
fd0fe6ac
ID
3459 intel_runtime_pm_get(dev_priv);
3460
3bc2913e
BW
3461 ret = i915_mutex_lock_interruptible(dev);
3462 if (ret)
fd0fe6ac 3463 goto rpm_put;
3bc2913e 3464
03ac0642
CW
3465 obj = i915_gem_object_lookup(file, args->handle);
3466 if (!obj) {
e6994aee
CW
3467 ret = -ENOENT;
3468 goto unlock;
3469 }
3470
3471 ret = i915_gem_object_set_cache_level(obj, level);
3472
f8c417cd 3473 i915_gem_object_put(obj);
e6994aee
CW
3474unlock:
3475 mutex_unlock(&dev->struct_mutex);
fd0fe6ac
ID
3476rpm_put:
3477 intel_runtime_pm_put(dev_priv);
3478
e6994aee
CW
3479 return ret;
3480}
3481
b9241ea3 3482/*
2da3b9b9
CW
3483 * Prepare buffer for display plane (scanout, cursors, etc).
3484 * Can be called from an uninterruptible phase (modesetting) and allows
3485 * any flushes to be pipelined (for pageflips).
b9241ea3
ZW
3486 */
3487int
2da3b9b9
CW
3488i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3489 u32 alignment,
e6617330 3490 const struct i915_ggtt_view *view)
b9241ea3 3491{
2da3b9b9 3492 u32 old_read_domains, old_write_domain;
b9241ea3
ZW
3493 int ret;
3494
cc98b413
CW
3495 /* Mark the pin_display early so that we account for the
3496 * display coherency whilst setting up the cache domains.
3497 */
8a0c39b1 3498 obj->pin_display++;
cc98b413 3499
a7ef0640
EA
3500 /* The display engine is not coherent with the LLC cache on gen6. As
3501 * a result, we make sure that the pinning that is about to occur is
3502 * done with uncached PTEs. This is lowest common denominator for all
3503 * chipsets.
3504 *
3505 * However for gen6+, we could do better by using the GFDT bit instead
3506 * of uncaching, which would allow us to flush all the LLC-cached data
3507 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
3508 */
651d794f
CW
3509 ret = i915_gem_object_set_cache_level(obj,
3510 HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
a7ef0640 3511 if (ret)
cc98b413 3512 goto err_unpin_display;
a7ef0640 3513
2da3b9b9
CW
3514 /* As the user may map the buffer once pinned in the display plane
3515 * (e.g. libkms for the bootup splash), we have to ensure that we
3516 * always use map_and_fenceable for all scanout buffers.
3517 */
50470bb0
TU
3518 ret = i915_gem_object_ggtt_pin(obj, view, alignment,
3519 view->type == I915_GGTT_VIEW_NORMAL ?
3520 PIN_MAPPABLE : 0);
2da3b9b9 3521 if (ret)
cc98b413 3522 goto err_unpin_display;
2da3b9b9 3523
e62b59e4 3524 i915_gem_object_flush_cpu_write_domain(obj);
b118c1e3 3525
2da3b9b9 3526 old_write_domain = obj->base.write_domain;
05394f39 3527 old_read_domains = obj->base.read_domains;
2da3b9b9
CW
3528
3529 /* It should now be out of any other write domains, and we can update
3530 * the domain values for our changes.
3531 */
e5f1d962 3532 obj->base.write_domain = 0;
05394f39 3533 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
b9241ea3
ZW
3534
3535 trace_i915_gem_object_change_domain(obj,
3536 old_read_domains,
2da3b9b9 3537 old_write_domain);
b9241ea3
ZW
3538
3539 return 0;
cc98b413
CW
3540
3541err_unpin_display:
8a0c39b1 3542 obj->pin_display--;
cc98b413
CW
3543 return ret;
3544}
3545
3546void
e6617330
TU
3547i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
3548 const struct i915_ggtt_view *view)
cc98b413 3549{
8a0c39b1
TU
3550 if (WARN_ON(obj->pin_display == 0))
3551 return;
3552
e6617330
TU
3553 i915_gem_object_ggtt_unpin_view(obj, view);
3554
8a0c39b1 3555 obj->pin_display--;
b9241ea3
ZW
3556}
3557
e47c68e9
EA
3558/**
3559 * Moves a single object to the CPU read, and possibly write domain.
14bb2c11
TU
3560 * @obj: object to act on
3561 * @write: requesting write or read-only access
e47c68e9
EA
3562 *
3563 * This function returns when the move is complete, including waiting on
3564 * flushes to occur.
3565 */
dabdfe02 3566int
919926ae 3567i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
e47c68e9 3568{
1c5d22f7 3569 uint32_t old_write_domain, old_read_domains;
e47c68e9
EA
3570 int ret;
3571
0201f1ec 3572 ret = i915_gem_object_wait_rendering(obj, !write);
88241785
CW
3573 if (ret)
3574 return ret;
3575
c13d87ea
CW
3576 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
3577 return 0;
3578
e47c68e9 3579 i915_gem_object_flush_gtt_write_domain(obj);
2ef7eeaa 3580
05394f39
CW
3581 old_write_domain = obj->base.write_domain;
3582 old_read_domains = obj->base.read_domains;
1c5d22f7 3583
e47c68e9 3584 /* Flush the CPU cache if it's still invalid. */
05394f39 3585 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
2c22569b 3586 i915_gem_clflush_object(obj, false);
2ef7eeaa 3587
05394f39 3588 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
2ef7eeaa
EA
3589 }
3590
3591 /* It should now be out of any other write domains, and we can update
3592 * the domain values for our changes.
3593 */
05394f39 3594 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
e47c68e9
EA
3595
3596 /* If we're writing through the CPU, then the GPU read domains will
3597 * need to be invalidated at next use.
3598 */
3599 if (write) {
05394f39
CW
3600 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3601 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
e47c68e9 3602 }
2ef7eeaa 3603
1c5d22f7
CW
3604 trace_i915_gem_object_change_domain(obj,
3605 old_read_domains,
3606 old_write_domain);
3607
2ef7eeaa
EA
3608 return 0;
3609}
3610
673a394b
EA
3611/* Throttle our rendering by waiting until the ring has completed our requests
3612 * emitted over 20 msec ago.
3613 *
b962442e
EA
3614 * Note that if we were to use the current jiffies each time around the loop,
3615 * we wouldn't escape the function with any frames outstanding if the time to
3616 * render a frame was over 20ms.
3617 *
673a394b
EA
3618 * This should get us reasonable parallelism between CPU and GPU but also
3619 * relatively low latency when blocking on a particular request to finish.
3620 */
40a5f0de 3621static int
f787a5f5 3622i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
40a5f0de 3623{
fac5e23e 3624 struct drm_i915_private *dev_priv = to_i915(dev);
f787a5f5 3625 struct drm_i915_file_private *file_priv = file->driver_priv;
d0bc54f2 3626 unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
54fb2411 3627 struct drm_i915_gem_request *request, *target = NULL;
f787a5f5 3628 int ret;
93533c29 3629
308887aa
DV
3630 ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
3631 if (ret)
3632 return ret;
3633
f4457ae7
CW
3634 /* ABI: return -EIO if already wedged */
3635 if (i915_terminally_wedged(&dev_priv->gpu_error))
3636 return -EIO;
e110e8d6 3637
1c25595f 3638 spin_lock(&file_priv->mm.lock);
f787a5f5 3639 list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
b962442e
EA
3640 if (time_after_eq(request->emitted_jiffies, recent_enough))
3641 break;
40a5f0de 3642
fcfa423c
JH
3643 /*
3644 * Note that the request might not have been submitted yet.
3645 * In which case emitted_jiffies will be zero.
3646 */
3647 if (!request->emitted_jiffies)
3648 continue;
3649
54fb2411 3650 target = request;
b962442e 3651 }
ff865885 3652 if (target)
e8a261ea 3653 i915_gem_request_get(target);
1c25595f 3654 spin_unlock(&file_priv->mm.lock);
40a5f0de 3655
54fb2411 3656 if (target == NULL)
f787a5f5 3657 return 0;
2bc43b5c 3658
776f3236 3659 ret = i915_wait_request(target, true, NULL, NULL);
e8a261ea 3660 i915_gem_request_put(target);
ff865885 3661
40a5f0de
EA
3662 return ret;
3663}
3664
d23db88c
CW
3665static bool
3666i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
3667{
3668 struct drm_i915_gem_object *obj = vma->obj;
3669
3670 if (alignment &&
3671 vma->node.start & (alignment - 1))
3672 return true;
3673
3674 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
3675 return true;
3676
3677 if (flags & PIN_OFFSET_BIAS &&
3678 vma->node.start < (flags & PIN_OFFSET_MASK))
3679 return true;
3680
506a8e87
CW
3681 if (flags & PIN_OFFSET_FIXED &&
3682 vma->node.start != (flags & PIN_OFFSET_MASK))
3683 return true;
3684
d23db88c
CW
3685 return false;
3686}
3687
d0710abb
CW
3688void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
3689{
3690 struct drm_i915_gem_object *obj = vma->obj;
3691 bool mappable, fenceable;
3692 u32 fence_size, fence_alignment;
3693
3694 fence_size = i915_gem_get_gtt_size(obj->base.dev,
3695 obj->base.size,
3696 obj->tiling_mode);
3697 fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
3698 obj->base.size,
3699 obj->tiling_mode,
3700 true);
3701
3702 fenceable = (vma->node.size == fence_size &&
3703 (vma->node.start & (fence_alignment - 1)) == 0);
3704
3705 mappable = (vma->node.start + fence_size <=
62106b4f 3706 to_i915(obj->base.dev)->ggtt.mappable_end);
d0710abb
CW
3707
3708 obj->map_and_fenceable = mappable && fenceable;
3709}
3710
ec7adb6e
JL
3711static int
3712i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
3713 struct i915_address_space *vm,
3714 const struct i915_ggtt_view *ggtt_view,
3715 uint32_t alignment,
3716 uint64_t flags)
673a394b 3717{
fac5e23e 3718 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
07fe0b12 3719 struct i915_vma *vma;
ef79e17c 3720 unsigned bound;
673a394b
EA
3721 int ret;
3722
6e7186af
BW
3723 if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base))
3724 return -ENODEV;
3725
bf3d149b 3726 if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm)))
1ec9e26d 3727 return -EINVAL;
07fe0b12 3728
c826c449
CW
3729 if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE))
3730 return -EINVAL;
3731
ec7adb6e
JL
3732 if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
3733 return -EINVAL;
3734
3735 vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) :
3736 i915_gem_obj_to_vma(obj, vm);
3737
07fe0b12 3738 if (vma) {
d7f46fc4
BW
3739 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
3740 return -EBUSY;
3741
d23db88c 3742 if (i915_vma_misplaced(vma, alignment, flags)) {
d7f46fc4 3743 WARN(vma->pin_count,
ec7adb6e 3744 "bo is already pinned in %s with incorrect alignment:"
088e0df4 3745 " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d,"
75e9e915 3746 " obj->map_and_fenceable=%d\n",
ec7adb6e 3747 ggtt_view ? "ggtt" : "ppgtt",
088e0df4
MT
3748 upper_32_bits(vma->node.start),
3749 lower_32_bits(vma->node.start),
fe14d5f4 3750 alignment,
d23db88c 3751 !!(flags & PIN_MAPPABLE),
05394f39 3752 obj->map_and_fenceable);
07fe0b12 3753 ret = i915_vma_unbind(vma);
ac0c6b5a
CW
3754 if (ret)
3755 return ret;
8ea99c92
DV
3756
3757 vma = NULL;
ac0c6b5a
CW
3758 }
3759 }
3760
ef79e17c 3761 bound = vma ? vma->bound : 0;
8ea99c92 3762 if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
ec7adb6e
JL
3763 vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment,
3764 flags);
262de145
DV
3765 if (IS_ERR(vma))
3766 return PTR_ERR(vma);
0875546c
DV
3767 } else {
3768 ret = i915_vma_bind(vma, obj->cache_level, flags);
fe14d5f4
TU
3769 if (ret)
3770 return ret;
3771 }
74898d7e 3772
91e6711e
JL
3773 if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL &&
3774 (bound ^ vma->bound) & GLOBAL_BIND) {
d0710abb 3775 __i915_vma_set_map_and_fenceable(vma);
91e6711e
JL
3776 WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
3777 }
ef79e17c 3778
8ea99c92 3779 vma->pin_count++;
673a394b
EA
3780 return 0;
3781}
3782
ec7adb6e
JL
3783int
3784i915_gem_object_pin(struct drm_i915_gem_object *obj,
3785 struct i915_address_space *vm,
3786 uint32_t alignment,
3787 uint64_t flags)
3788{
3789 return i915_gem_object_do_pin(obj, vm,
3790 i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL,
3791 alignment, flags);
3792}
3793
3794int
3795i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
3796 const struct i915_ggtt_view *view,
3797 uint32_t alignment,
3798 uint64_t flags)
3799{
72e96d64
JL
3800 struct drm_device *dev = obj->base.dev;
3801 struct drm_i915_private *dev_priv = to_i915(dev);
3802 struct i915_ggtt *ggtt = &dev_priv->ggtt;
3803
ade7daa1 3804 BUG_ON(!view);
ec7adb6e 3805
72e96d64 3806 return i915_gem_object_do_pin(obj, &ggtt->base, view,
6fafab76 3807 alignment, flags | PIN_GLOBAL);
ec7adb6e
JL
3808}
3809
673a394b 3810void
e6617330
TU
3811i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
3812 const struct i915_ggtt_view *view)
673a394b 3813{
e6617330 3814 struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view);
673a394b 3815
e6617330 3816 WARN_ON(vma->pin_count == 0);
9abc4648 3817 WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view));
d7f46fc4 3818
30154650 3819 --vma->pin_count;
673a394b
EA
3820}
3821
673a394b
EA
3822int
3823i915_gem_busy_ioctl(struct drm_device *dev, void *data,
05394f39 3824 struct drm_file *file)
673a394b
EA
3825{
3826 struct drm_i915_gem_busy *args = data;
05394f39 3827 struct drm_i915_gem_object *obj;
30dbf0c0
CW
3828 int ret;
3829
76c1dec1 3830 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 3831 if (ret)
76c1dec1 3832 return ret;
673a394b 3833
03ac0642
CW
3834 obj = i915_gem_object_lookup(file, args->handle);
3835 if (!obj) {
1d7cfea1
CW
3836 ret = -ENOENT;
3837 goto unlock;
673a394b 3838 }
d1b851fc 3839
0be555b6
CW
3840 /* Count all active objects as busy, even if they are currently not used
3841 * by the gpu. Users of this interface expect objects to eventually
21c310f2 3842 * become non-busy without any further actions.
c4de0a5d 3843 */
426960be
CW
3844 args->busy = 0;
3845 if (obj->active) {
27c01aae 3846 struct drm_i915_gem_request *req;
426960be
CW
3847 int i;
3848
666796da 3849 for (i = 0; i < I915_NUM_ENGINES; i++) {
d72d908b
CW
3850 req = i915_gem_active_peek(&obj->last_read[i],
3851 &obj->base.dev->struct_mutex);
426960be 3852 if (req)
4a570db5 3853 args->busy |= 1 << (16 + req->engine->exec_id);
426960be 3854 }
d72d908b
CW
3855 req = i915_gem_active_peek(&obj->last_write,
3856 &obj->base.dev->struct_mutex);
27c01aae
CW
3857 if (req)
3858 args->busy |= req->engine->exec_id;
426960be 3859 }
673a394b 3860
f8c417cd 3861 i915_gem_object_put(obj);
1d7cfea1 3862unlock:
673a394b 3863 mutex_unlock(&dev->struct_mutex);
1d7cfea1 3864 return ret;
673a394b
EA
3865}
3866
3867int
3868i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
3869 struct drm_file *file_priv)
3870{
0206e353 3871 return i915_gem_ring_throttle(dev, file_priv);
673a394b
EA
3872}
3873
3ef94daa
CW
3874int
3875i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
3876 struct drm_file *file_priv)
3877{
fac5e23e 3878 struct drm_i915_private *dev_priv = to_i915(dev);
3ef94daa 3879 struct drm_i915_gem_madvise *args = data;
05394f39 3880 struct drm_i915_gem_object *obj;
76c1dec1 3881 int ret;
3ef94daa
CW
3882
3883 switch (args->madv) {
3884 case I915_MADV_DONTNEED:
3885 case I915_MADV_WILLNEED:
3886 break;
3887 default:
3888 return -EINVAL;
3889 }
3890
1d7cfea1
CW
3891 ret = i915_mutex_lock_interruptible(dev);
3892 if (ret)
3893 return ret;
3894
03ac0642
CW
3895 obj = i915_gem_object_lookup(file_priv, args->handle);
3896 if (!obj) {
1d7cfea1
CW
3897 ret = -ENOENT;
3898 goto unlock;
3ef94daa 3899 }
3ef94daa 3900
d7f46fc4 3901 if (i915_gem_obj_is_pinned(obj)) {
1d7cfea1
CW
3902 ret = -EINVAL;
3903 goto out;
3ef94daa
CW
3904 }
3905
656bfa3a
DV
3906 if (obj->pages &&
3907 obj->tiling_mode != I915_TILING_NONE &&
3908 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
3909 if (obj->madv == I915_MADV_WILLNEED)
3910 i915_gem_object_unpin_pages(obj);
3911 if (args->madv == I915_MADV_WILLNEED)
3912 i915_gem_object_pin_pages(obj);
3913 }
3914
05394f39
CW
3915 if (obj->madv != __I915_MADV_PURGED)
3916 obj->madv = args->madv;
3ef94daa 3917
6c085a72 3918 /* if the object is no longer attached, discard its backing storage */
be6a0376 3919 if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL)
2d7ef395
CW
3920 i915_gem_object_truncate(obj);
3921
05394f39 3922 args->retained = obj->madv != __I915_MADV_PURGED;
bb6baf76 3923
1d7cfea1 3924out:
f8c417cd 3925 i915_gem_object_put(obj);
1d7cfea1 3926unlock:
3ef94daa 3927 mutex_unlock(&dev->struct_mutex);
1d7cfea1 3928 return ret;
3ef94daa
CW
3929}
3930
37e680a1
CW
3931void i915_gem_object_init(struct drm_i915_gem_object *obj,
3932 const struct drm_i915_gem_object_ops *ops)
0327d6ba 3933{
b4716185
CW
3934 int i;
3935
35c20a60 3936 INIT_LIST_HEAD(&obj->global_list);
666796da 3937 for (i = 0; i < I915_NUM_ENGINES; i++)
fa545cbf
CW
3938 init_request_active(&obj->last_read[i],
3939 i915_gem_object_retire__read);
3940 init_request_active(&obj->last_write,
3941 i915_gem_object_retire__write);
3942 init_request_active(&obj->last_fence, NULL);
b25cb2f8 3943 INIT_LIST_HEAD(&obj->obj_exec_link);
2f633156 3944 INIT_LIST_HEAD(&obj->vma_list);
8d9d5744 3945 INIT_LIST_HEAD(&obj->batch_pool_link);
0327d6ba 3946
37e680a1
CW
3947 obj->ops = ops;
3948
0327d6ba
CW
3949 obj->fence_reg = I915_FENCE_REG_NONE;
3950 obj->madv = I915_MADV_WILLNEED;
0327d6ba 3951
f19ec8cb 3952 i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
0327d6ba
CW
3953}
3954
37e680a1 3955static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
de472664 3956 .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE,
37e680a1
CW
3957 .get_pages = i915_gem_object_get_pages_gtt,
3958 .put_pages = i915_gem_object_put_pages_gtt,
3959};
3960
d37cd8a8 3961struct drm_i915_gem_object *i915_gem_object_create(struct drm_device *dev,
05394f39 3962 size_t size)
ac52bc56 3963{
c397b908 3964 struct drm_i915_gem_object *obj;
5949eac4 3965 struct address_space *mapping;
1a240d4d 3966 gfp_t mask;
fe3db79b 3967 int ret;
ac52bc56 3968
42dcedd4 3969 obj = i915_gem_object_alloc(dev);
c397b908 3970 if (obj == NULL)
fe3db79b 3971 return ERR_PTR(-ENOMEM);
673a394b 3972
fe3db79b
CW
3973 ret = drm_gem_object_init(dev, &obj->base, size);
3974 if (ret)
3975 goto fail;
673a394b 3976
bed1ea95
CW
3977 mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
3978 if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
3979 /* 965gm cannot relocate objects above 4GiB. */
3980 mask &= ~__GFP_HIGHMEM;
3981 mask |= __GFP_DMA32;
3982 }
3983
496ad9aa 3984 mapping = file_inode(obj->base.filp)->i_mapping;
bed1ea95 3985 mapping_set_gfp_mask(mapping, mask);
5949eac4 3986
37e680a1 3987 i915_gem_object_init(obj, &i915_gem_object_ops);
73aa808f 3988
c397b908
DV
3989 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3990 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
673a394b 3991
3d29b842
ED
3992 if (HAS_LLC(dev)) {
3993 /* On some devices, we can have the GPU use the LLC (the CPU
a1871112
EA
3994 * cache) for about a 10% performance improvement
3995 * compared to uncached. Graphics requests other than
3996 * display scanout are coherent with the CPU in
3997 * accessing this cache. This means in this mode we
3998 * don't need to clflush on the CPU side, and on the
3999 * GPU side we only need to flush internal caches to
4000 * get data visible to the CPU.
4001 *
4002 * However, we maintain the display planes as UC, and so
4003 * need to rebind when first used as such.
4004 */
4005 obj->cache_level = I915_CACHE_LLC;
4006 } else
4007 obj->cache_level = I915_CACHE_NONE;
4008
d861e338
DV
4009 trace_i915_gem_object_create(obj);
4010
05394f39 4011 return obj;
fe3db79b
CW
4012
4013fail:
4014 i915_gem_object_free(obj);
4015
4016 return ERR_PTR(ret);
c397b908
DV
4017}
4018
340fbd8c
CW
4019static bool discard_backing_storage(struct drm_i915_gem_object *obj)
4020{
4021 /* If we are the last user of the backing storage (be it shmemfs
4022 * pages or stolen etc), we know that the pages are going to be
4023 * immediately released. In this case, we can then skip copying
4024 * back the contents from the GPU.
4025 */
4026
4027 if (obj->madv != I915_MADV_WILLNEED)
4028 return false;
4029
4030 if (obj->base.filp == NULL)
4031 return true;
4032
4033 /* At first glance, this looks racy, but then again so would be
4034 * userspace racing mmap against close. However, the first external
4035 * reference to the filp can only be obtained through the
4036 * i915_gem_mmap_ioctl() which safeguards us against the user
4037 * acquiring such a reference whilst we are in the middle of
4038 * freeing the object.
4039 */
4040 return atomic_long_read(&obj->base.filp->f_count) == 1;
4041}
4042
1488fc08 4043void i915_gem_free_object(struct drm_gem_object *gem_obj)
673a394b 4044{
1488fc08 4045 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
05394f39 4046 struct drm_device *dev = obj->base.dev;
fac5e23e 4047 struct drm_i915_private *dev_priv = to_i915(dev);
07fe0b12 4048 struct i915_vma *vma, *next;
673a394b 4049
f65c9168
PZ
4050 intel_runtime_pm_get(dev_priv);
4051
26e12f89
CW
4052 trace_i915_gem_object_destroy(obj);
4053
1c7f4bca 4054 list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) {
d7f46fc4
BW
4055 int ret;
4056
4057 vma->pin_count = 0;
c13d87ea 4058 ret = __i915_vma_unbind_no_wait(vma);
07fe0b12
BW
4059 if (WARN_ON(ret == -ERESTARTSYS)) {
4060 bool was_interruptible;
1488fc08 4061
07fe0b12
BW
4062 was_interruptible = dev_priv->mm.interruptible;
4063 dev_priv->mm.interruptible = false;
1488fc08 4064
07fe0b12 4065 WARN_ON(i915_vma_unbind(vma));
1488fc08 4066
07fe0b12
BW
4067 dev_priv->mm.interruptible = was_interruptible;
4068 }
1488fc08 4069 }
15717de2 4070 GEM_BUG_ON(obj->bind_count);
1488fc08 4071
1d64ae71
BW
4072 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up
4073 * before progressing. */
4074 if (obj->stolen)
4075 i915_gem_object_unpin_pages(obj);
4076
a071fa00
DV
4077 WARN_ON(obj->frontbuffer_bits);
4078
656bfa3a
DV
4079 if (obj->pages && obj->madv == I915_MADV_WILLNEED &&
4080 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
4081 obj->tiling_mode != I915_TILING_NONE)
4082 i915_gem_object_unpin_pages(obj);
4083
401c29f6
BW
4084 if (WARN_ON(obj->pages_pin_count))
4085 obj->pages_pin_count = 0;
340fbd8c 4086 if (discard_backing_storage(obj))
5537252b 4087 obj->madv = I915_MADV_DONTNEED;
37e680a1 4088 i915_gem_object_put_pages(obj);
de151cf6 4089
9da3da66
CW
4090 BUG_ON(obj->pages);
4091
2f745ad3
CW
4092 if (obj->base.import_attach)
4093 drm_prime_gem_destroy(&obj->base, NULL);
de151cf6 4094
5cc9ed4b
CW
4095 if (obj->ops->release)
4096 obj->ops->release(obj);
4097
05394f39
CW
4098 drm_gem_object_release(&obj->base);
4099 i915_gem_info_remove_obj(dev_priv, obj->base.size);
c397b908 4100
05394f39 4101 kfree(obj->bit_17);
42dcedd4 4102 i915_gem_object_free(obj);
f65c9168
PZ
4103
4104 intel_runtime_pm_put(dev_priv);
673a394b
EA
4105}
4106
ec7adb6e
JL
4107struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
4108 struct i915_address_space *vm)
e656a6cb
DV
4109{
4110 struct i915_vma *vma;
1c7f4bca 4111 list_for_each_entry(vma, &obj->vma_list, obj_link) {
1b683729
TU
4112 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL &&
4113 vma->vm == vm)
e656a6cb 4114 return vma;
ec7adb6e
JL
4115 }
4116 return NULL;
4117}
4118
4119struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj,
4120 const struct i915_ggtt_view *view)
4121{
ec7adb6e 4122 struct i915_vma *vma;
e656a6cb 4123
598b9ec8 4124 GEM_BUG_ON(!view);
ec7adb6e 4125
1c7f4bca 4126 list_for_each_entry(vma, &obj->vma_list, obj_link)
598b9ec8 4127 if (vma->is_ggtt && i915_ggtt_view_equal(&vma->ggtt_view, view))
ec7adb6e 4128 return vma;
e656a6cb
DV
4129 return NULL;
4130}
4131
2f633156
BW
4132void i915_gem_vma_destroy(struct i915_vma *vma)
4133{
4134 WARN_ON(vma->node.allocated);
aaa05667
CW
4135
4136 /* Keep the vma as a placeholder in the execbuffer reservation lists */
4137 if (!list_empty(&vma->exec_list))
4138 return;
4139
596c5923
CW
4140 if (!vma->is_ggtt)
4141 i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
b9d06dd9 4142
1c7f4bca 4143 list_del(&vma->obj_link);
b93dab6e 4144
e20d2ab7 4145 kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma);
2f633156
BW
4146}
4147
e3efda49 4148static void
117897f4 4149i915_gem_stop_engines(struct drm_device *dev)
e3efda49 4150{
fac5e23e 4151 struct drm_i915_private *dev_priv = to_i915(dev);
e2f80391 4152 struct intel_engine_cs *engine;
e3efda49 4153
b4ac5afc 4154 for_each_engine(engine, dev_priv)
117897f4 4155 dev_priv->gt.stop_engine(engine);
e3efda49
CW
4156}
4157
29105ccc 4158int
45c5f202 4159i915_gem_suspend(struct drm_device *dev)
29105ccc 4160{
fac5e23e 4161 struct drm_i915_private *dev_priv = to_i915(dev);
45c5f202 4162 int ret = 0;
28dfe52a 4163
54b4f68f
CW
4164 intel_suspend_gt_powersave(dev_priv);
4165
45c5f202 4166 mutex_lock(&dev->struct_mutex);
5ab57c70
CW
4167
4168 /* We have to flush all the executing contexts to main memory so
4169 * that they can saved in the hibernation image. To ensure the last
4170 * context image is coherent, we have to switch away from it. That
4171 * leaves the dev_priv->kernel_context still active when
4172 * we actually suspend, and its image in memory may not match the GPU
4173 * state. Fortunately, the kernel_context is disposable and we do
4174 * not rely on its state.
4175 */
4176 ret = i915_gem_switch_to_kernel_context(dev_priv);
4177 if (ret)
4178 goto err;
4179
6e5a5beb 4180 ret = i915_gem_wait_for_idle(dev_priv);
f7403347 4181 if (ret)
45c5f202 4182 goto err;
f7403347 4183
c033666a 4184 i915_gem_retire_requests(dev_priv);
673a394b 4185
5ab57c70
CW
4186 /* Note that rather than stopping the engines, all we have to do
4187 * is assert that every RING_HEAD == RING_TAIL (all execution complete)
4188 * and similar for all logical context images (to ensure they are
4189 * all ready for hibernation).
4190 */
117897f4 4191 i915_gem_stop_engines(dev);
b2e862d0 4192 i915_gem_context_lost(dev_priv);
45c5f202
CW
4193 mutex_unlock(&dev->struct_mutex);
4194
737b1506 4195 cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
67d97da3
CW
4196 cancel_delayed_work_sync(&dev_priv->gt.retire_work);
4197 flush_delayed_work(&dev_priv->gt.idle_work);
29105ccc 4198
bdcf120b
CW
4199 /* Assert that we sucessfully flushed all the work and
4200 * reset the GPU back to its idle, low power state.
4201 */
67d97da3 4202 WARN_ON(dev_priv->gt.awake);
bdcf120b 4203
673a394b 4204 return 0;
45c5f202
CW
4205
4206err:
4207 mutex_unlock(&dev->struct_mutex);
4208 return ret;
673a394b
EA
4209}
4210
5ab57c70
CW
4211void i915_gem_resume(struct drm_device *dev)
4212{
4213 struct drm_i915_private *dev_priv = to_i915(dev);
4214
4215 mutex_lock(&dev->struct_mutex);
4216 i915_gem_restore_gtt_mappings(dev);
4217
4218 /* As we didn't flush the kernel context before suspend, we cannot
4219 * guarantee that the context image is complete. So let's just reset
4220 * it and start again.
4221 */
4222 if (i915.enable_execlists)
4223 intel_lr_context_reset(dev_priv, dev_priv->kernel_context);
4224
4225 mutex_unlock(&dev->struct_mutex);
4226}
4227
f691e2f4
DV
4228void i915_gem_init_swizzling(struct drm_device *dev)
4229{
fac5e23e 4230 struct drm_i915_private *dev_priv = to_i915(dev);
f691e2f4 4231
11782b02 4232 if (INTEL_INFO(dev)->gen < 5 ||
f691e2f4
DV
4233 dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
4234 return;
4235
4236 I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
4237 DISP_TILE_SURFACE_SWIZZLING);
4238
11782b02
DV
4239 if (IS_GEN5(dev))
4240 return;
4241
f691e2f4
DV
4242 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
4243 if (IS_GEN6(dev))
6b26c86d 4244 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
8782e26c 4245 else if (IS_GEN7(dev))
6b26c86d 4246 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
31a5336e
BW
4247 else if (IS_GEN8(dev))
4248 I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
8782e26c
BW
4249 else
4250 BUG();
f691e2f4 4251}
e21af88d 4252
81e7f200
VS
4253static void init_unused_ring(struct drm_device *dev, u32 base)
4254{
fac5e23e 4255 struct drm_i915_private *dev_priv = to_i915(dev);
81e7f200
VS
4256
4257 I915_WRITE(RING_CTL(base), 0);
4258 I915_WRITE(RING_HEAD(base), 0);
4259 I915_WRITE(RING_TAIL(base), 0);
4260 I915_WRITE(RING_START(base), 0);
4261}
4262
4263static void init_unused_rings(struct drm_device *dev)
4264{
4265 if (IS_I830(dev)) {
4266 init_unused_ring(dev, PRB1_BASE);
4267 init_unused_ring(dev, SRB0_BASE);
4268 init_unused_ring(dev, SRB1_BASE);
4269 init_unused_ring(dev, SRB2_BASE);
4270 init_unused_ring(dev, SRB3_BASE);
4271 } else if (IS_GEN2(dev)) {
4272 init_unused_ring(dev, SRB0_BASE);
4273 init_unused_ring(dev, SRB1_BASE);
4274 } else if (IS_GEN3(dev)) {
4275 init_unused_ring(dev, PRB1_BASE);
4276 init_unused_ring(dev, PRB2_BASE);
4277 }
4278}
4279
4fc7c971
BW
4280int
4281i915_gem_init_hw(struct drm_device *dev)
4282{
fac5e23e 4283 struct drm_i915_private *dev_priv = to_i915(dev);
e2f80391 4284 struct intel_engine_cs *engine;
d200cda6 4285 int ret;
4fc7c971 4286
5e4f5189
CW
4287 /* Double layer security blanket, see i915_gem_init() */
4288 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4289
3accaf7e 4290 if (HAS_EDRAM(dev) && INTEL_GEN(dev_priv) < 9)
05e21cc4 4291 I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
4fc7c971 4292
0bf21347
VS
4293 if (IS_HASWELL(dev))
4294 I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ?
4295 LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
9435373e 4296
88a2b2a3 4297 if (HAS_PCH_NOP(dev)) {
6ba844b0
DV
4298 if (IS_IVYBRIDGE(dev)) {
4299 u32 temp = I915_READ(GEN7_MSG_CTL);
4300 temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
4301 I915_WRITE(GEN7_MSG_CTL, temp);
4302 } else if (INTEL_INFO(dev)->gen >= 7) {
4303 u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT);
4304 temp &= ~RESET_PCH_HANDSHAKE_ENABLE;
4305 I915_WRITE(HSW_NDE_RSTWRN_OPT, temp);
4306 }
88a2b2a3
BW
4307 }
4308
4fc7c971
BW
4309 i915_gem_init_swizzling(dev);
4310
d5abdfda
DV
4311 /*
4312 * At least 830 can leave some of the unused rings
4313 * "active" (ie. head != tail) after resume which
4314 * will prevent c3 entry. Makes sure all unused rings
4315 * are totally idle.
4316 */
4317 init_unused_rings(dev);
4318
ed54c1a1 4319 BUG_ON(!dev_priv->kernel_context);
90638cc1 4320
4ad2fd88
JH
4321 ret = i915_ppgtt_init_hw(dev);
4322 if (ret) {
4323 DRM_ERROR("PPGTT enable HW failed %d\n", ret);
4324 goto out;
4325 }
4326
4327 /* Need to do basic initialisation of all rings first: */
b4ac5afc 4328 for_each_engine(engine, dev_priv) {
e2f80391 4329 ret = engine->init_hw(engine);
35a57ffb 4330 if (ret)
5e4f5189 4331 goto out;
35a57ffb 4332 }
99433931 4333
0ccdacf6
PA
4334 intel_mocs_init_l3cc_table(dev);
4335
33a732f4 4336 /* We can't enable contexts until all firmware is loaded */
e556f7c1
DG
4337 ret = intel_guc_setup(dev);
4338 if (ret)
4339 goto out;
33a732f4 4340
5e4f5189
CW
4341out:
4342 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
2fa48d8d 4343 return ret;
8187a2b7
ZN
4344}
4345
39df9190
CW
4346bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value)
4347{
4348 if (INTEL_INFO(dev_priv)->gen < 6)
4349 return false;
4350
4351 /* TODO: make semaphores and Execlists play nicely together */
4352 if (i915.enable_execlists)
4353 return false;
4354
4355 if (value >= 0)
4356 return value;
4357
4358#ifdef CONFIG_INTEL_IOMMU
4359 /* Enable semaphores on SNB when IO remapping is off */
4360 if (INTEL_INFO(dev_priv)->gen == 6 && intel_iommu_gfx_mapped)
4361 return false;
4362#endif
4363
4364 return true;
4365}
4366
1070a42b
CW
4367int i915_gem_init(struct drm_device *dev)
4368{
fac5e23e 4369 struct drm_i915_private *dev_priv = to_i915(dev);
1070a42b
CW
4370 int ret;
4371
1070a42b 4372 mutex_lock(&dev->struct_mutex);
d62b4892 4373
a83014d3 4374 if (!i915.enable_execlists) {
7e37f889
CW
4375 dev_priv->gt.cleanup_engine = intel_engine_cleanup;
4376 dev_priv->gt.stop_engine = intel_engine_stop;
454afebd 4377 } else {
117897f4
TU
4378 dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
4379 dev_priv->gt.stop_engine = intel_logical_ring_stop;
a83014d3
OM
4380 }
4381
5e4f5189
CW
4382 /* This is just a security blanket to placate dragons.
4383 * On some systems, we very sporadically observe that the first TLBs
4384 * used by the CS may be stale, despite us poking the TLB reset. If
4385 * we hold the forcewake during initialisation these problems
4386 * just magically go away.
4387 */
4388 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4389
72778cb2 4390 i915_gem_init_userptr(dev_priv);
f6b9d5ca
CW
4391
4392 ret = i915_gem_init_ggtt(dev_priv);
4393 if (ret)
4394 goto out_unlock;
d62b4892 4395
2fa48d8d 4396 ret = i915_gem_context_init(dev);
7bcc3777
JN
4397 if (ret)
4398 goto out_unlock;
2fa48d8d 4399
8b3e2d36 4400 ret = intel_engines_init(dev);
35a57ffb 4401 if (ret)
7bcc3777 4402 goto out_unlock;
2fa48d8d 4403
1070a42b 4404 ret = i915_gem_init_hw(dev);
60990320 4405 if (ret == -EIO) {
7e21d648 4406 /* Allow engine initialisation to fail by marking the GPU as
60990320
CW
4407 * wedged. But we only want to do this where the GPU is angry,
4408 * for all other failure, such as an allocation failure, bail.
4409 */
4410 DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
805de8f4 4411 atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
60990320 4412 ret = 0;
1070a42b 4413 }
7bcc3777
JN
4414
4415out_unlock:
5e4f5189 4416 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
60990320 4417 mutex_unlock(&dev->struct_mutex);
1070a42b 4418
60990320 4419 return ret;
1070a42b
CW
4420}
4421
8187a2b7 4422void
117897f4 4423i915_gem_cleanup_engines(struct drm_device *dev)
8187a2b7 4424{
fac5e23e 4425 struct drm_i915_private *dev_priv = to_i915(dev);
e2f80391 4426 struct intel_engine_cs *engine;
8187a2b7 4427
b4ac5afc 4428 for_each_engine(engine, dev_priv)
117897f4 4429 dev_priv->gt.cleanup_engine(engine);
8187a2b7
ZN
4430}
4431
64193406 4432static void
666796da 4433init_engine_lists(struct intel_engine_cs *engine)
64193406 4434{
0bc40be8 4435 INIT_LIST_HEAD(&engine->request_list);
64193406
CW
4436}
4437
40ae4e16
ID
4438void
4439i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
4440{
91c8a326 4441 struct drm_device *dev = &dev_priv->drm;
40ae4e16
ID
4442
4443 if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) &&
4444 !IS_CHERRYVIEW(dev_priv))
4445 dev_priv->num_fence_regs = 32;
4446 else if (INTEL_INFO(dev_priv)->gen >= 4 || IS_I945G(dev_priv) ||
4447 IS_I945GM(dev_priv) || IS_G33(dev_priv))
4448 dev_priv->num_fence_regs = 16;
4449 else
4450 dev_priv->num_fence_regs = 8;
4451
c033666a 4452 if (intel_vgpu_active(dev_priv))
40ae4e16
ID
4453 dev_priv->num_fence_regs =
4454 I915_READ(vgtif_reg(avail_rs.fence_num));
4455
4456 /* Initialize fence registers to zero */
4457 i915_gem_restore_fences(dev);
4458
4459 i915_gem_detect_bit_6_swizzle(dev);
4460}
4461
673a394b 4462void
d64aa096 4463i915_gem_load_init(struct drm_device *dev)
673a394b 4464{
fac5e23e 4465 struct drm_i915_private *dev_priv = to_i915(dev);
42dcedd4
CW
4466 int i;
4467
efab6d8d 4468 dev_priv->objects =
42dcedd4
CW
4469 kmem_cache_create("i915_gem_object",
4470 sizeof(struct drm_i915_gem_object), 0,
4471 SLAB_HWCACHE_ALIGN,
4472 NULL);
e20d2ab7
CW
4473 dev_priv->vmas =
4474 kmem_cache_create("i915_gem_vma",
4475 sizeof(struct i915_vma), 0,
4476 SLAB_HWCACHE_ALIGN,
4477 NULL);
efab6d8d
CW
4478 dev_priv->requests =
4479 kmem_cache_create("i915_gem_request",
4480 sizeof(struct drm_i915_gem_request), 0,
4481 SLAB_HWCACHE_ALIGN,
4482 NULL);
673a394b 4483
a33afea5 4484 INIT_LIST_HEAD(&dev_priv->context_list);
6c085a72
CW
4485 INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
4486 INIT_LIST_HEAD(&dev_priv->mm.bound_list);
a09ba7fa 4487 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
666796da
TU
4488 for (i = 0; i < I915_NUM_ENGINES; i++)
4489 init_engine_lists(&dev_priv->engine[i]);
4b9de737 4490 for (i = 0; i < I915_MAX_NUM_FENCES; i++)
007cc8ac 4491 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
67d97da3 4492 INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
673a394b 4493 i915_gem_retire_work_handler);
67d97da3 4494 INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
b29c19b6 4495 i915_gem_idle_work_handler);
1f15b76f 4496 init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
1f83fee0 4497 init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
31169714 4498
72bfa19c
CW
4499 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
4500
19b2dbde 4501 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
10ed13e4 4502
6b95a207 4503 init_waitqueue_head(&dev_priv->pending_flip_queue);
17250b71 4504
ce453d81
CW
4505 dev_priv->mm.interruptible = true;
4506
f99d7069 4507 mutex_init(&dev_priv->fb_tracking.lock);
673a394b 4508}
71acb5eb 4509
d64aa096
ID
4510void i915_gem_load_cleanup(struct drm_device *dev)
4511{
4512 struct drm_i915_private *dev_priv = to_i915(dev);
4513
4514 kmem_cache_destroy(dev_priv->requests);
4515 kmem_cache_destroy(dev_priv->vmas);
4516 kmem_cache_destroy(dev_priv->objects);
4517}
4518
461fb99c
CW
4519int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
4520{
4521 struct drm_i915_gem_object *obj;
4522
4523 /* Called just before we write the hibernation image.
4524 *
4525 * We need to update the domain tracking to reflect that the CPU
4526 * will be accessing all the pages to create and restore from the
4527 * hibernation, and so upon restoration those pages will be in the
4528 * CPU domain.
4529 *
4530 * To make sure the hibernation image contains the latest state,
4531 * we update that state just before writing out the image.
4532 */
4533
4534 list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) {
4535 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4536 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4537 }
4538
4539 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
4540 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4541 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4542 }
4543
4544 return 0;
4545}
4546
f787a5f5 4547void i915_gem_release(struct drm_device *dev, struct drm_file *file)
b962442e 4548{
f787a5f5 4549 struct drm_i915_file_private *file_priv = file->driver_priv;
15f7bbc7 4550 struct drm_i915_gem_request *request;
b962442e
EA
4551
4552 /* Clean up our request list when the client is going away, so that
4553 * later retire_requests won't dereference our soon-to-be-gone
4554 * file_priv.
4555 */
1c25595f 4556 spin_lock(&file_priv->mm.lock);
15f7bbc7 4557 list_for_each_entry(request, &file_priv->mm.request_list, client_list)
f787a5f5 4558 request->file_priv = NULL;
1c25595f 4559 spin_unlock(&file_priv->mm.lock);
b29c19b6 4560
2e1b8730 4561 if (!list_empty(&file_priv->rps.link)) {
8d3afd7d 4562 spin_lock(&to_i915(dev)->rps.client_lock);
2e1b8730 4563 list_del(&file_priv->rps.link);
8d3afd7d 4564 spin_unlock(&to_i915(dev)->rps.client_lock);
1854d5ca 4565 }
b29c19b6
CW
4566}
4567
4568int i915_gem_open(struct drm_device *dev, struct drm_file *file)
4569{
4570 struct drm_i915_file_private *file_priv;
e422b888 4571 int ret;
b29c19b6
CW
4572
4573 DRM_DEBUG_DRIVER("\n");
4574
4575 file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
4576 if (!file_priv)
4577 return -ENOMEM;
4578
4579 file->driver_priv = file_priv;
f19ec8cb 4580 file_priv->dev_priv = to_i915(dev);
ab0e7ff9 4581 file_priv->file = file;
2e1b8730 4582 INIT_LIST_HEAD(&file_priv->rps.link);
b29c19b6
CW
4583
4584 spin_lock_init(&file_priv->mm.lock);
4585 INIT_LIST_HEAD(&file_priv->mm.request_list);
b29c19b6 4586
c80ff16e 4587 file_priv->bsd_engine = -1;
de1add36 4588
e422b888
BW
4589 ret = i915_gem_context_open(dev, file);
4590 if (ret)
4591 kfree(file_priv);
b29c19b6 4592
e422b888 4593 return ret;
b29c19b6
CW
4594}
4595
b680c37a
DV
4596/**
4597 * i915_gem_track_fb - update frontbuffer tracking
d9072a3e
GT
4598 * @old: current GEM buffer for the frontbuffer slots
4599 * @new: new GEM buffer for the frontbuffer slots
4600 * @frontbuffer_bits: bitmask of frontbuffer slots
b680c37a
DV
4601 *
4602 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
4603 * from @old and setting them in @new. Both @old and @new can be NULL.
4604 */
a071fa00
DV
4605void i915_gem_track_fb(struct drm_i915_gem_object *old,
4606 struct drm_i915_gem_object *new,
4607 unsigned frontbuffer_bits)
4608{
4609 if (old) {
4610 WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex));
4611 WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits));
4612 old->frontbuffer_bits &= ~frontbuffer_bits;
4613 }
4614
4615 if (new) {
4616 WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex));
4617 WARN_ON(new->frontbuffer_bits & frontbuffer_bits);
4618 new->frontbuffer_bits |= frontbuffer_bits;
4619 }
4620}
4621
a70a3148 4622/* All the new VM stuff */
088e0df4
MT
4623u64 i915_gem_obj_offset(struct drm_i915_gem_object *o,
4624 struct i915_address_space *vm)
a70a3148 4625{
fac5e23e 4626 struct drm_i915_private *dev_priv = to_i915(o->base.dev);
a70a3148
BW
4627 struct i915_vma *vma;
4628
896ab1a5 4629 WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
a70a3148 4630
1c7f4bca 4631 list_for_each_entry(vma, &o->vma_list, obj_link) {
596c5923 4632 if (vma->is_ggtt &&
ec7adb6e
JL
4633 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
4634 continue;
4635 if (vma->vm == vm)
a70a3148 4636 return vma->node.start;
a70a3148 4637 }
ec7adb6e 4638
f25748ea
DV
4639 WARN(1, "%s vma for this object not found.\n",
4640 i915_is_ggtt(vm) ? "global" : "ppgtt");
a70a3148
BW
4641 return -1;
4642}
4643
088e0df4
MT
4644u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o,
4645 const struct i915_ggtt_view *view)
a70a3148
BW
4646{
4647 struct i915_vma *vma;
4648
1c7f4bca 4649 list_for_each_entry(vma, &o->vma_list, obj_link)
8aac2220 4650 if (vma->is_ggtt && i915_ggtt_view_equal(&vma->ggtt_view, view))
ec7adb6e
JL
4651 return vma->node.start;
4652
5678ad73 4653 WARN(1, "global vma for this object not found. (view=%u)\n", view->type);
ec7adb6e
JL
4654 return -1;
4655}
4656
4657bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
4658 struct i915_address_space *vm)
4659{
4660 struct i915_vma *vma;
4661
1c7f4bca 4662 list_for_each_entry(vma, &o->vma_list, obj_link) {
596c5923 4663 if (vma->is_ggtt &&
ec7adb6e
JL
4664 vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
4665 continue;
4666 if (vma->vm == vm && drm_mm_node_allocated(&vma->node))
4667 return true;
4668 }
4669
4670 return false;
4671}
4672
4673bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o,
9abc4648 4674 const struct i915_ggtt_view *view)
ec7adb6e 4675{
ec7adb6e
JL
4676 struct i915_vma *vma;
4677
1c7f4bca 4678 list_for_each_entry(vma, &o->vma_list, obj_link)
ff5ec22d 4679 if (vma->is_ggtt &&
9abc4648 4680 i915_ggtt_view_equal(&vma->ggtt_view, view) &&
fe14d5f4 4681 drm_mm_node_allocated(&vma->node))
a70a3148
BW
4682 return true;
4683
4684 return false;
4685}
4686
8da32727 4687unsigned long i915_gem_obj_ggtt_size(struct drm_i915_gem_object *o)
a70a3148 4688{
a70a3148
BW
4689 struct i915_vma *vma;
4690
8da32727 4691 GEM_BUG_ON(list_empty(&o->vma_list));
a70a3148 4692
1c7f4bca 4693 list_for_each_entry(vma, &o->vma_list, obj_link) {
596c5923 4694 if (vma->is_ggtt &&
8da32727 4695 vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
a70a3148 4696 return vma->node.size;
ec7adb6e 4697 }
8da32727 4698
a70a3148
BW
4699 return 0;
4700}
4701
ec7adb6e 4702bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
5c2abbea
BW
4703{
4704 struct i915_vma *vma;
1c7f4bca 4705 list_for_each_entry(vma, &obj->vma_list, obj_link)
ec7adb6e
JL
4706 if (vma->pin_count > 0)
4707 return true;
a6631ae1 4708
ec7adb6e 4709 return false;
5c2abbea 4710}
ea70299d 4711
033908ae
DG
4712/* Like i915_gem_object_get_page(), but mark the returned page dirty */
4713struct page *
4714i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n)
4715{
4716 struct page *page;
4717
4718 /* Only default objects have per-page dirty tracking */
b9bcd14a 4719 if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
033908ae
DG
4720 return NULL;
4721
4722 page = i915_gem_object_get_page(obj, n);
4723 set_page_dirty(page);
4724 return page;
4725}
4726
ea70299d
DG
4727/* Allocate a new GEM object and fill it with the supplied data */
4728struct drm_i915_gem_object *
4729i915_gem_object_create_from_data(struct drm_device *dev,
4730 const void *data, size_t size)
4731{
4732 struct drm_i915_gem_object *obj;
4733 struct sg_table *sg;
4734 size_t bytes;
4735 int ret;
4736
d37cd8a8 4737 obj = i915_gem_object_create(dev, round_up(size, PAGE_SIZE));
fe3db79b 4738 if (IS_ERR(obj))
ea70299d
DG
4739 return obj;
4740
4741 ret = i915_gem_object_set_to_cpu_domain(obj, true);
4742 if (ret)
4743 goto fail;
4744
4745 ret = i915_gem_object_get_pages(obj);
4746 if (ret)
4747 goto fail;
4748
4749 i915_gem_object_pin_pages(obj);
4750 sg = obj->pages;
4751 bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size);
9e7d18c0 4752 obj->dirty = 1; /* Backing store is now out of date */
ea70299d
DG
4753 i915_gem_object_unpin_pages(obj);
4754
4755 if (WARN_ON(bytes != size)) {
4756 DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size);
4757 ret = -EFAULT;
4758 goto fail;
4759 }
4760
4761 return obj;
4762
4763fail:
f8c417cd 4764 i915_gem_object_put(obj);
ea70299d
DG
4765 return ERR_PTR(ret);
4766}