drm/i915: Defer accounting until read from debugfs
[GitHub/LineageOS/android_kernel_motorola_exynos9610.git] / drivers / gpu / drm / i915 / i915_gem.c
CommitLineData
673a394b
EA
1/*
2 * Copyright © 2008 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
28#include "drmP.h"
29#include "drm.h"
30#include "i915_drm.h"
31#include "i915_drv.h"
1c5d22f7 32#include "i915_trace.h"
652c393a 33#include "intel_drv.h"
5a0e3ad6 34#include <linux/slab.h>
673a394b 35#include <linux/swap.h>
79e53945 36#include <linux/pci.h>
673a394b 37
0f8c6d7c
CW
38struct change_domains {
39 uint32_t invalidate_domains;
40 uint32_t flush_domains;
41 uint32_t flush_rings;
42};
43
05394f39 44static int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj,
919926ae 45 struct intel_ring_buffer *pipelined);
05394f39
CW
46static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
47static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
48static int i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj,
919926ae 49 bool write);
05394f39 50static int i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj,
e47c68e9
EA
51 uint64_t offset,
52 uint64_t size);
05394f39
CW
53static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object *obj);
54static int i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
2cf34d7b 55 bool interruptible);
05394f39 56static int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
a00b10c3 57 unsigned alignment,
75e9e915 58 bool map_and_fenceable);
05394f39
CW
59static void i915_gem_clear_fence_reg(struct drm_i915_gem_object *obj);
60static int i915_gem_phys_pwrite(struct drm_device *dev,
61 struct drm_i915_gem_object *obj,
71acb5eb 62 struct drm_i915_gem_pwrite *args,
05394f39
CW
63 struct drm_file *file);
64static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj);
673a394b 65
17250b71
CW
66static int i915_gem_inactive_shrink(struct shrinker *shrinker,
67 int nr_to_scan,
68 gfp_t gfp_mask);
69
31169714 70
73aa808f
CW
71/* some bookkeeping */
72static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
73 size_t size)
74{
75 dev_priv->mm.object_count++;
76 dev_priv->mm.object_memory += size;
77}
78
79static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
80 size_t size)
81{
82 dev_priv->mm.object_count--;
83 dev_priv->mm.object_memory -= size;
84}
85
30dbf0c0
CW
86int
87i915_gem_check_is_wedged(struct drm_device *dev)
88{
89 struct drm_i915_private *dev_priv = dev->dev_private;
90 struct completion *x = &dev_priv->error_completion;
91 unsigned long flags;
92 int ret;
93
94 if (!atomic_read(&dev_priv->mm.wedged))
95 return 0;
96
97 ret = wait_for_completion_interruptible(x);
98 if (ret)
99 return ret;
100
101 /* Success, we reset the GPU! */
102 if (!atomic_read(&dev_priv->mm.wedged))
103 return 0;
104
105 /* GPU is hung, bump the completion count to account for
106 * the token we just consumed so that we never hit zero and
107 * end up waiting upon a subsequent completion event that
108 * will never happen.
109 */
110 spin_lock_irqsave(&x->wait.lock, flags);
111 x->done++;
112 spin_unlock_irqrestore(&x->wait.lock, flags);
113 return -EIO;
114}
115
76c1dec1
CW
116static int i915_mutex_lock_interruptible(struct drm_device *dev)
117{
118 struct drm_i915_private *dev_priv = dev->dev_private;
119 int ret;
120
121 ret = i915_gem_check_is_wedged(dev);
122 if (ret)
123 return ret;
124
125 ret = mutex_lock_interruptible(&dev->struct_mutex);
126 if (ret)
127 return ret;
128
129 if (atomic_read(&dev_priv->mm.wedged)) {
130 mutex_unlock(&dev->struct_mutex);
131 return -EAGAIN;
132 }
133
23bc5982 134 WARN_ON(i915_verify_lists(dev));
76c1dec1
CW
135 return 0;
136}
30dbf0c0 137
7d1c4804 138static inline bool
05394f39 139i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
7d1c4804 140{
05394f39 141 return obj->gtt_space && !obj->active && obj->pin_count == 0;
7d1c4804
CW
142}
143
2021746e
CW
144void i915_gem_do_init(struct drm_device *dev,
145 unsigned long start,
146 unsigned long mappable_end,
147 unsigned long end)
673a394b
EA
148{
149 drm_i915_private_t *dev_priv = dev->dev_private;
673a394b 150
79e53945
JB
151 drm_mm_init(&dev_priv->mm.gtt_space, start,
152 end - start);
673a394b 153
73aa808f 154 dev_priv->mm.gtt_total = end - start;
fb7d516a 155 dev_priv->mm.mappable_gtt_total = min(end, mappable_end) - start;
53984635 156 dev_priv->mm.gtt_mappable_end = mappable_end;
79e53945 157}
673a394b 158
79e53945
JB
159int
160i915_gem_init_ioctl(struct drm_device *dev, void *data,
05394f39 161 struct drm_file *file)
79e53945
JB
162{
163 struct drm_i915_gem_init *args = data;
2021746e
CW
164
165 if (args->gtt_start >= args->gtt_end ||
166 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1))
167 return -EINVAL;
79e53945
JB
168
169 mutex_lock(&dev->struct_mutex);
2021746e 170 i915_gem_do_init(dev, args->gtt_start, args->gtt_end, args->gtt_end);
673a394b
EA
171 mutex_unlock(&dev->struct_mutex);
172
2021746e 173 return 0;
673a394b
EA
174}
175
5a125c3c
EA
176int
177i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
05394f39 178 struct drm_file *file)
5a125c3c 179{
73aa808f 180 struct drm_i915_private *dev_priv = dev->dev_private;
5a125c3c 181 struct drm_i915_gem_get_aperture *args = data;
6299f992
CW
182 struct drm_i915_gem_object *obj;
183 size_t pinned;
5a125c3c
EA
184
185 if (!(dev->driver->driver_features & DRIVER_GEM))
186 return -ENODEV;
187
6299f992 188 pinned = 0;
73aa808f 189 mutex_lock(&dev->struct_mutex);
6299f992
CW
190 list_for_each_entry(obj, &dev_priv->mm.pinned_list, mm_list)
191 pinned += obj->gtt_space->size;
73aa808f 192 mutex_unlock(&dev->struct_mutex);
5a125c3c 193
6299f992
CW
194 args->aper_size = dev_priv->mm.gtt_total;
195 args->aper_available_size = args->aper_size -pinned;
196
5a125c3c
EA
197 return 0;
198}
199
673a394b
EA
200/**
201 * Creates a new mm object and returns a handle to it.
202 */
203int
204i915_gem_create_ioctl(struct drm_device *dev, void *data,
05394f39 205 struct drm_file *file)
673a394b
EA
206{
207 struct drm_i915_gem_create *args = data;
05394f39 208 struct drm_i915_gem_object *obj;
a1a2d1d3
PP
209 int ret;
210 u32 handle;
673a394b
EA
211
212 args->size = roundup(args->size, PAGE_SIZE);
213
214 /* Allocate the new object */
ac52bc56 215 obj = i915_gem_alloc_object(dev, args->size);
673a394b
EA
216 if (obj == NULL)
217 return -ENOMEM;
218
05394f39 219 ret = drm_gem_handle_create(file, &obj->base, &handle);
1dfd9754 220 if (ret) {
05394f39
CW
221 drm_gem_object_release(&obj->base);
222 i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
202f2fef 223 kfree(obj);
673a394b 224 return ret;
1dfd9754 225 }
673a394b 226
202f2fef 227 /* drop reference from allocate - handle holds it now */
05394f39 228 drm_gem_object_unreference(&obj->base);
202f2fef
CW
229 trace_i915_gem_object_create(obj);
230
1dfd9754 231 args->handle = handle;
673a394b
EA
232 return 0;
233}
234
05394f39 235static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
280b713b 236{
05394f39 237 drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
280b713b
EA
238
239 return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
05394f39 240 obj->tiling_mode != I915_TILING_NONE;
280b713b
EA
241}
242
99a03df5 243static inline void
40123c1f
EA
244slow_shmem_copy(struct page *dst_page,
245 int dst_offset,
246 struct page *src_page,
247 int src_offset,
248 int length)
249{
250 char *dst_vaddr, *src_vaddr;
251
99a03df5
CW
252 dst_vaddr = kmap(dst_page);
253 src_vaddr = kmap(src_page);
40123c1f
EA
254
255 memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length);
256
99a03df5
CW
257 kunmap(src_page);
258 kunmap(dst_page);
40123c1f
EA
259}
260
99a03df5 261static inline void
280b713b
EA
262slow_shmem_bit17_copy(struct page *gpu_page,
263 int gpu_offset,
264 struct page *cpu_page,
265 int cpu_offset,
266 int length,
267 int is_read)
268{
269 char *gpu_vaddr, *cpu_vaddr;
270
271 /* Use the unswizzled path if this page isn't affected. */
272 if ((page_to_phys(gpu_page) & (1 << 17)) == 0) {
273 if (is_read)
274 return slow_shmem_copy(cpu_page, cpu_offset,
275 gpu_page, gpu_offset, length);
276 else
277 return slow_shmem_copy(gpu_page, gpu_offset,
278 cpu_page, cpu_offset, length);
279 }
280
99a03df5
CW
281 gpu_vaddr = kmap(gpu_page);
282 cpu_vaddr = kmap(cpu_page);
280b713b
EA
283
284 /* Copy the data, XORing A6 with A17 (1). The user already knows he's
285 * XORing with the other bits (A9 for Y, A9 and A10 for X)
286 */
287 while (length > 0) {
288 int cacheline_end = ALIGN(gpu_offset + 1, 64);
289 int this_length = min(cacheline_end - gpu_offset, length);
290 int swizzled_gpu_offset = gpu_offset ^ 64;
291
292 if (is_read) {
293 memcpy(cpu_vaddr + cpu_offset,
294 gpu_vaddr + swizzled_gpu_offset,
295 this_length);
296 } else {
297 memcpy(gpu_vaddr + swizzled_gpu_offset,
298 cpu_vaddr + cpu_offset,
299 this_length);
300 }
301 cpu_offset += this_length;
302 gpu_offset += this_length;
303 length -= this_length;
304 }
305
99a03df5
CW
306 kunmap(cpu_page);
307 kunmap(gpu_page);
280b713b
EA
308}
309
eb01459f
EA
310/**
311 * This is the fast shmem pread path, which attempts to copy_from_user directly
312 * from the backing pages of the object to the user's address space. On a
313 * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow().
314 */
315static int
05394f39
CW
316i915_gem_shmem_pread_fast(struct drm_device *dev,
317 struct drm_i915_gem_object *obj,
eb01459f 318 struct drm_i915_gem_pread *args,
05394f39 319 struct drm_file *file)
eb01459f 320{
05394f39 321 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
eb01459f 322 ssize_t remain;
e5281ccd 323 loff_t offset;
eb01459f
EA
324 char __user *user_data;
325 int page_offset, page_length;
eb01459f
EA
326
327 user_data = (char __user *) (uintptr_t) args->data_ptr;
328 remain = args->size;
329
eb01459f
EA
330 offset = args->offset;
331
332 while (remain > 0) {
e5281ccd
CW
333 struct page *page;
334 char *vaddr;
335 int ret;
336
eb01459f
EA
337 /* Operation in this page
338 *
eb01459f
EA
339 * page_offset = offset within page
340 * page_length = bytes to copy for this page
341 */
eb01459f
EA
342 page_offset = offset & (PAGE_SIZE-1);
343 page_length = remain;
344 if ((page_offset + remain) > PAGE_SIZE)
345 page_length = PAGE_SIZE - page_offset;
346
e5281ccd
CW
347 page = read_cache_page_gfp(mapping, offset >> PAGE_SHIFT,
348 GFP_HIGHUSER | __GFP_RECLAIMABLE);
349 if (IS_ERR(page))
350 return PTR_ERR(page);
351
352 vaddr = kmap_atomic(page);
353 ret = __copy_to_user_inatomic(user_data,
354 vaddr + page_offset,
355 page_length);
356 kunmap_atomic(vaddr);
357
358 mark_page_accessed(page);
359 page_cache_release(page);
360 if (ret)
4f27b75d 361 return -EFAULT;
eb01459f
EA
362
363 remain -= page_length;
364 user_data += page_length;
365 offset += page_length;
366 }
367
4f27b75d 368 return 0;
eb01459f
EA
369}
370
371/**
372 * This is the fallback shmem pread path, which allocates temporary storage
373 * in kernel space to copy_to_user into outside of the struct_mutex, so we
374 * can copy out of the object's backing pages while holding the struct mutex
375 * and not take page faults.
376 */
377static int
05394f39
CW
378i915_gem_shmem_pread_slow(struct drm_device *dev,
379 struct drm_i915_gem_object *obj,
eb01459f 380 struct drm_i915_gem_pread *args,
05394f39 381 struct drm_file *file)
eb01459f 382{
05394f39 383 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
eb01459f
EA
384 struct mm_struct *mm = current->mm;
385 struct page **user_pages;
386 ssize_t remain;
387 loff_t offset, pinned_pages, i;
388 loff_t first_data_page, last_data_page, num_pages;
e5281ccd
CW
389 int shmem_page_offset;
390 int data_page_index, data_page_offset;
eb01459f
EA
391 int page_length;
392 int ret;
393 uint64_t data_ptr = args->data_ptr;
280b713b 394 int do_bit17_swizzling;
eb01459f
EA
395
396 remain = args->size;
397
398 /* Pin the user pages containing the data. We can't fault while
399 * holding the struct mutex, yet we want to hold it while
400 * dereferencing the user data.
401 */
402 first_data_page = data_ptr / PAGE_SIZE;
403 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
404 num_pages = last_data_page - first_data_page + 1;
405
4f27b75d 406 user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
eb01459f
EA
407 if (user_pages == NULL)
408 return -ENOMEM;
409
4f27b75d 410 mutex_unlock(&dev->struct_mutex);
eb01459f
EA
411 down_read(&mm->mmap_sem);
412 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
e5e9ecde 413 num_pages, 1, 0, user_pages, NULL);
eb01459f 414 up_read(&mm->mmap_sem);
4f27b75d 415 mutex_lock(&dev->struct_mutex);
eb01459f
EA
416 if (pinned_pages < num_pages) {
417 ret = -EFAULT;
4f27b75d 418 goto out;
eb01459f
EA
419 }
420
4f27b75d
CW
421 ret = i915_gem_object_set_cpu_read_domain_range(obj,
422 args->offset,
423 args->size);
07f73f69 424 if (ret)
4f27b75d 425 goto out;
eb01459f 426
4f27b75d 427 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
eb01459f 428
eb01459f
EA
429 offset = args->offset;
430
431 while (remain > 0) {
e5281ccd
CW
432 struct page *page;
433
eb01459f
EA
434 /* Operation in this page
435 *
eb01459f
EA
436 * shmem_page_offset = offset within page in shmem file
437 * data_page_index = page number in get_user_pages return
438 * data_page_offset = offset with data_page_index page.
439 * page_length = bytes to copy for this page
440 */
eb01459f
EA
441 shmem_page_offset = offset & ~PAGE_MASK;
442 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
443 data_page_offset = data_ptr & ~PAGE_MASK;
444
445 page_length = remain;
446 if ((shmem_page_offset + page_length) > PAGE_SIZE)
447 page_length = PAGE_SIZE - shmem_page_offset;
448 if ((data_page_offset + page_length) > PAGE_SIZE)
449 page_length = PAGE_SIZE - data_page_offset;
450
e5281ccd
CW
451 page = read_cache_page_gfp(mapping, offset >> PAGE_SHIFT,
452 GFP_HIGHUSER | __GFP_RECLAIMABLE);
453 if (IS_ERR(page))
454 return PTR_ERR(page);
455
280b713b 456 if (do_bit17_swizzling) {
e5281ccd 457 slow_shmem_bit17_copy(page,
280b713b 458 shmem_page_offset,
99a03df5
CW
459 user_pages[data_page_index],
460 data_page_offset,
461 page_length,
462 1);
463 } else {
464 slow_shmem_copy(user_pages[data_page_index],
465 data_page_offset,
e5281ccd 466 page,
99a03df5
CW
467 shmem_page_offset,
468 page_length);
280b713b 469 }
eb01459f 470
e5281ccd
CW
471 mark_page_accessed(page);
472 page_cache_release(page);
473
eb01459f
EA
474 remain -= page_length;
475 data_ptr += page_length;
476 offset += page_length;
477 }
478
4f27b75d 479out:
eb01459f
EA
480 for (i = 0; i < pinned_pages; i++) {
481 SetPageDirty(user_pages[i]);
e5281ccd 482 mark_page_accessed(user_pages[i]);
eb01459f
EA
483 page_cache_release(user_pages[i]);
484 }
8e7d2b2c 485 drm_free_large(user_pages);
eb01459f
EA
486
487 return ret;
488}
489
673a394b
EA
490/**
491 * Reads data from the object referenced by handle.
492 *
493 * On error, the contents of *data are undefined.
494 */
495int
496i915_gem_pread_ioctl(struct drm_device *dev, void *data,
05394f39 497 struct drm_file *file)
673a394b
EA
498{
499 struct drm_i915_gem_pread *args = data;
05394f39 500 struct drm_i915_gem_object *obj;
35b62a89 501 int ret = 0;
673a394b 502
51311d0a
CW
503 if (args->size == 0)
504 return 0;
505
506 if (!access_ok(VERIFY_WRITE,
507 (char __user *)(uintptr_t)args->data_ptr,
508 args->size))
509 return -EFAULT;
510
511 ret = fault_in_pages_writeable((char __user *)(uintptr_t)args->data_ptr,
512 args->size);
513 if (ret)
514 return -EFAULT;
515
4f27b75d 516 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 517 if (ret)
4f27b75d 518 return ret;
673a394b 519
05394f39 520 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1d7cfea1
CW
521 if (obj == NULL) {
522 ret = -ENOENT;
523 goto unlock;
4f27b75d 524 }
673a394b 525
7dcd2499 526 /* Bounds check source. */
05394f39
CW
527 if (args->offset > obj->base.size ||
528 args->size > obj->base.size - args->offset) {
ce9d419d 529 ret = -EINVAL;
35b62a89 530 goto out;
ce9d419d
CW
531 }
532
4f27b75d
CW
533 ret = i915_gem_object_set_cpu_read_domain_range(obj,
534 args->offset,
535 args->size);
536 if (ret)
e5281ccd 537 goto out;
4f27b75d
CW
538
539 ret = -EFAULT;
540 if (!i915_gem_object_needs_bit17_swizzle(obj))
05394f39 541 ret = i915_gem_shmem_pread_fast(dev, obj, args, file);
4f27b75d 542 if (ret == -EFAULT)
05394f39 543 ret = i915_gem_shmem_pread_slow(dev, obj, args, file);
673a394b 544
35b62a89 545out:
05394f39 546 drm_gem_object_unreference(&obj->base);
1d7cfea1 547unlock:
4f27b75d 548 mutex_unlock(&dev->struct_mutex);
eb01459f 549 return ret;
673a394b
EA
550}
551
0839ccb8
KP
552/* This is the fast write path which cannot handle
553 * page faults in the source data
9b7530cc 554 */
0839ccb8
KP
555
556static inline int
557fast_user_write(struct io_mapping *mapping,
558 loff_t page_base, int page_offset,
559 char __user *user_data,
560 int length)
9b7530cc 561{
9b7530cc 562 char *vaddr_atomic;
0839ccb8 563 unsigned long unwritten;
9b7530cc 564
3e4d3af5 565 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
0839ccb8
KP
566 unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset,
567 user_data, length);
3e4d3af5 568 io_mapping_unmap_atomic(vaddr_atomic);
fbd5a26d 569 return unwritten;
0839ccb8
KP
570}
571
572/* Here's the write path which can sleep for
573 * page faults
574 */
575
ab34c226 576static inline void
3de09aa3
EA
577slow_kernel_write(struct io_mapping *mapping,
578 loff_t gtt_base, int gtt_offset,
579 struct page *user_page, int user_offset,
580 int length)
0839ccb8 581{
ab34c226
CW
582 char __iomem *dst_vaddr;
583 char *src_vaddr;
0839ccb8 584
ab34c226
CW
585 dst_vaddr = io_mapping_map_wc(mapping, gtt_base);
586 src_vaddr = kmap(user_page);
587
588 memcpy_toio(dst_vaddr + gtt_offset,
589 src_vaddr + user_offset,
590 length);
591
592 kunmap(user_page);
593 io_mapping_unmap(dst_vaddr);
9b7530cc
LT
594}
595
3de09aa3
EA
596/**
597 * This is the fast pwrite path, where we copy the data directly from the
598 * user into the GTT, uncached.
599 */
673a394b 600static int
05394f39
CW
601i915_gem_gtt_pwrite_fast(struct drm_device *dev,
602 struct drm_i915_gem_object *obj,
3de09aa3 603 struct drm_i915_gem_pwrite *args,
05394f39 604 struct drm_file *file)
673a394b 605{
0839ccb8 606 drm_i915_private_t *dev_priv = dev->dev_private;
673a394b 607 ssize_t remain;
0839ccb8 608 loff_t offset, page_base;
673a394b 609 char __user *user_data;
0839ccb8 610 int page_offset, page_length;
673a394b
EA
611
612 user_data = (char __user *) (uintptr_t) args->data_ptr;
613 remain = args->size;
673a394b 614
05394f39 615 offset = obj->gtt_offset + args->offset;
673a394b
EA
616
617 while (remain > 0) {
618 /* Operation in this page
619 *
0839ccb8
KP
620 * page_base = page offset within aperture
621 * page_offset = offset within page
622 * page_length = bytes to copy for this page
673a394b 623 */
0839ccb8
KP
624 page_base = (offset & ~(PAGE_SIZE-1));
625 page_offset = offset & (PAGE_SIZE-1);
626 page_length = remain;
627 if ((page_offset + remain) > PAGE_SIZE)
628 page_length = PAGE_SIZE - page_offset;
629
0839ccb8 630 /* If we get a fault while copying data, then (presumably) our
3de09aa3
EA
631 * source page isn't available. Return the error and we'll
632 * retry in the slow path.
0839ccb8 633 */
fbd5a26d
CW
634 if (fast_user_write(dev_priv->mm.gtt_mapping, page_base,
635 page_offset, user_data, page_length))
636
637 return -EFAULT;
673a394b 638
0839ccb8
KP
639 remain -= page_length;
640 user_data += page_length;
641 offset += page_length;
673a394b 642 }
673a394b 643
fbd5a26d 644 return 0;
673a394b
EA
645}
646
3de09aa3
EA
647/**
648 * This is the fallback GTT pwrite path, which uses get_user_pages to pin
649 * the memory and maps it using kmap_atomic for copying.
650 *
651 * This code resulted in x11perf -rgb10text consuming about 10% more CPU
652 * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit).
653 */
3043c60c 654static int
05394f39
CW
655i915_gem_gtt_pwrite_slow(struct drm_device *dev,
656 struct drm_i915_gem_object *obj,
3de09aa3 657 struct drm_i915_gem_pwrite *args,
05394f39 658 struct drm_file *file)
673a394b 659{
3de09aa3
EA
660 drm_i915_private_t *dev_priv = dev->dev_private;
661 ssize_t remain;
662 loff_t gtt_page_base, offset;
663 loff_t first_data_page, last_data_page, num_pages;
664 loff_t pinned_pages, i;
665 struct page **user_pages;
666 struct mm_struct *mm = current->mm;
667 int gtt_page_offset, data_page_offset, data_page_index, page_length;
673a394b 668 int ret;
3de09aa3
EA
669 uint64_t data_ptr = args->data_ptr;
670
671 remain = args->size;
672
673 /* Pin the user pages containing the data. We can't fault while
674 * holding the struct mutex, and all of the pwrite implementations
675 * want to hold it while dereferencing the user data.
676 */
677 first_data_page = data_ptr / PAGE_SIZE;
678 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
679 num_pages = last_data_page - first_data_page + 1;
680
fbd5a26d 681 user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
3de09aa3
EA
682 if (user_pages == NULL)
683 return -ENOMEM;
684
fbd5a26d 685 mutex_unlock(&dev->struct_mutex);
3de09aa3
EA
686 down_read(&mm->mmap_sem);
687 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
688 num_pages, 0, 0, user_pages, NULL);
689 up_read(&mm->mmap_sem);
fbd5a26d 690 mutex_lock(&dev->struct_mutex);
3de09aa3
EA
691 if (pinned_pages < num_pages) {
692 ret = -EFAULT;
693 goto out_unpin_pages;
694 }
673a394b 695
3de09aa3
EA
696 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
697 if (ret)
fbd5a26d 698 goto out_unpin_pages;
3de09aa3 699
05394f39 700 offset = obj->gtt_offset + args->offset;
3de09aa3
EA
701
702 while (remain > 0) {
703 /* Operation in this page
704 *
705 * gtt_page_base = page offset within aperture
706 * gtt_page_offset = offset within page in aperture
707 * data_page_index = page number in get_user_pages return
708 * data_page_offset = offset with data_page_index page.
709 * page_length = bytes to copy for this page
710 */
711 gtt_page_base = offset & PAGE_MASK;
712 gtt_page_offset = offset & ~PAGE_MASK;
713 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
714 data_page_offset = data_ptr & ~PAGE_MASK;
715
716 page_length = remain;
717 if ((gtt_page_offset + page_length) > PAGE_SIZE)
718 page_length = PAGE_SIZE - gtt_page_offset;
719 if ((data_page_offset + page_length) > PAGE_SIZE)
720 page_length = PAGE_SIZE - data_page_offset;
721
ab34c226
CW
722 slow_kernel_write(dev_priv->mm.gtt_mapping,
723 gtt_page_base, gtt_page_offset,
724 user_pages[data_page_index],
725 data_page_offset,
726 page_length);
3de09aa3
EA
727
728 remain -= page_length;
729 offset += page_length;
730 data_ptr += page_length;
731 }
732
3de09aa3
EA
733out_unpin_pages:
734 for (i = 0; i < pinned_pages; i++)
735 page_cache_release(user_pages[i]);
8e7d2b2c 736 drm_free_large(user_pages);
3de09aa3
EA
737
738 return ret;
739}
740
40123c1f
EA
741/**
742 * This is the fast shmem pwrite path, which attempts to directly
743 * copy_from_user into the kmapped pages backing the object.
744 */
3043c60c 745static int
05394f39
CW
746i915_gem_shmem_pwrite_fast(struct drm_device *dev,
747 struct drm_i915_gem_object *obj,
40123c1f 748 struct drm_i915_gem_pwrite *args,
05394f39 749 struct drm_file *file)
673a394b 750{
05394f39 751 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
40123c1f 752 ssize_t remain;
e5281ccd 753 loff_t offset;
40123c1f
EA
754 char __user *user_data;
755 int page_offset, page_length;
40123c1f
EA
756
757 user_data = (char __user *) (uintptr_t) args->data_ptr;
758 remain = args->size;
673a394b 759
40123c1f 760 offset = args->offset;
05394f39 761 obj->dirty = 1;
40123c1f
EA
762
763 while (remain > 0) {
e5281ccd
CW
764 struct page *page;
765 char *vaddr;
766 int ret;
767
40123c1f
EA
768 /* Operation in this page
769 *
40123c1f
EA
770 * page_offset = offset within page
771 * page_length = bytes to copy for this page
772 */
40123c1f
EA
773 page_offset = offset & (PAGE_SIZE-1);
774 page_length = remain;
775 if ((page_offset + remain) > PAGE_SIZE)
776 page_length = PAGE_SIZE - page_offset;
777
e5281ccd
CW
778 page = read_cache_page_gfp(mapping, offset >> PAGE_SHIFT,
779 GFP_HIGHUSER | __GFP_RECLAIMABLE);
780 if (IS_ERR(page))
781 return PTR_ERR(page);
782
783 vaddr = kmap_atomic(page, KM_USER0);
784 ret = __copy_from_user_inatomic(vaddr + page_offset,
785 user_data,
786 page_length);
787 kunmap_atomic(vaddr, KM_USER0);
788
789 set_page_dirty(page);
790 mark_page_accessed(page);
791 page_cache_release(page);
792
793 /* If we get a fault while copying data, then (presumably) our
794 * source page isn't available. Return the error and we'll
795 * retry in the slow path.
796 */
797 if (ret)
fbd5a26d 798 return -EFAULT;
40123c1f
EA
799
800 remain -= page_length;
801 user_data += page_length;
802 offset += page_length;
803 }
804
fbd5a26d 805 return 0;
40123c1f
EA
806}
807
808/**
809 * This is the fallback shmem pwrite path, which uses get_user_pages to pin
810 * the memory and maps it using kmap_atomic for copying.
811 *
812 * This avoids taking mmap_sem for faulting on the user's address while the
813 * struct_mutex is held.
814 */
815static int
05394f39
CW
816i915_gem_shmem_pwrite_slow(struct drm_device *dev,
817 struct drm_i915_gem_object *obj,
40123c1f 818 struct drm_i915_gem_pwrite *args,
05394f39 819 struct drm_file *file)
40123c1f 820{
05394f39 821 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
40123c1f
EA
822 struct mm_struct *mm = current->mm;
823 struct page **user_pages;
824 ssize_t remain;
825 loff_t offset, pinned_pages, i;
826 loff_t first_data_page, last_data_page, num_pages;
e5281ccd 827 int shmem_page_offset;
40123c1f
EA
828 int data_page_index, data_page_offset;
829 int page_length;
830 int ret;
831 uint64_t data_ptr = args->data_ptr;
280b713b 832 int do_bit17_swizzling;
40123c1f
EA
833
834 remain = args->size;
835
836 /* Pin the user pages containing the data. We can't fault while
837 * holding the struct mutex, and all of the pwrite implementations
838 * want to hold it while dereferencing the user data.
839 */
840 first_data_page = data_ptr / PAGE_SIZE;
841 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
842 num_pages = last_data_page - first_data_page + 1;
843
4f27b75d 844 user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
40123c1f
EA
845 if (user_pages == NULL)
846 return -ENOMEM;
847
fbd5a26d 848 mutex_unlock(&dev->struct_mutex);
40123c1f
EA
849 down_read(&mm->mmap_sem);
850 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
851 num_pages, 0, 0, user_pages, NULL);
852 up_read(&mm->mmap_sem);
fbd5a26d 853 mutex_lock(&dev->struct_mutex);
40123c1f
EA
854 if (pinned_pages < num_pages) {
855 ret = -EFAULT;
fbd5a26d 856 goto out;
673a394b
EA
857 }
858
fbd5a26d 859 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
07f73f69 860 if (ret)
fbd5a26d 861 goto out;
40123c1f 862
fbd5a26d 863 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
40123c1f 864
673a394b 865 offset = args->offset;
05394f39 866 obj->dirty = 1;
673a394b 867
40123c1f 868 while (remain > 0) {
e5281ccd
CW
869 struct page *page;
870
40123c1f
EA
871 /* Operation in this page
872 *
40123c1f
EA
873 * shmem_page_offset = offset within page in shmem file
874 * data_page_index = page number in get_user_pages return
875 * data_page_offset = offset with data_page_index page.
876 * page_length = bytes to copy for this page
877 */
40123c1f
EA
878 shmem_page_offset = offset & ~PAGE_MASK;
879 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
880 data_page_offset = data_ptr & ~PAGE_MASK;
881
882 page_length = remain;
883 if ((shmem_page_offset + page_length) > PAGE_SIZE)
884 page_length = PAGE_SIZE - shmem_page_offset;
885 if ((data_page_offset + page_length) > PAGE_SIZE)
886 page_length = PAGE_SIZE - data_page_offset;
887
e5281ccd
CW
888 page = read_cache_page_gfp(mapping, offset >> PAGE_SHIFT,
889 GFP_HIGHUSER | __GFP_RECLAIMABLE);
890 if (IS_ERR(page)) {
891 ret = PTR_ERR(page);
892 goto out;
893 }
894
280b713b 895 if (do_bit17_swizzling) {
e5281ccd 896 slow_shmem_bit17_copy(page,
280b713b
EA
897 shmem_page_offset,
898 user_pages[data_page_index],
899 data_page_offset,
99a03df5
CW
900 page_length,
901 0);
902 } else {
e5281ccd 903 slow_shmem_copy(page,
99a03df5
CW
904 shmem_page_offset,
905 user_pages[data_page_index],
906 data_page_offset,
907 page_length);
280b713b 908 }
40123c1f 909
e5281ccd
CW
910 set_page_dirty(page);
911 mark_page_accessed(page);
912 page_cache_release(page);
913
40123c1f
EA
914 remain -= page_length;
915 data_ptr += page_length;
916 offset += page_length;
673a394b
EA
917 }
918
fbd5a26d 919out:
40123c1f
EA
920 for (i = 0; i < pinned_pages; i++)
921 page_cache_release(user_pages[i]);
8e7d2b2c 922 drm_free_large(user_pages);
673a394b 923
40123c1f 924 return ret;
673a394b
EA
925}
926
927/**
928 * Writes data to the object referenced by handle.
929 *
930 * On error, the contents of the buffer that were to be modified are undefined.
931 */
932int
933i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
fbd5a26d 934 struct drm_file *file)
673a394b
EA
935{
936 struct drm_i915_gem_pwrite *args = data;
05394f39 937 struct drm_i915_gem_object *obj;
51311d0a
CW
938 int ret;
939
940 if (args->size == 0)
941 return 0;
942
943 if (!access_ok(VERIFY_READ,
944 (char __user *)(uintptr_t)args->data_ptr,
945 args->size))
946 return -EFAULT;
947
948 ret = fault_in_pages_readable((char __user *)(uintptr_t)args->data_ptr,
949 args->size);
950 if (ret)
951 return -EFAULT;
673a394b 952
fbd5a26d 953 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 954 if (ret)
fbd5a26d 955 return ret;
1d7cfea1 956
05394f39 957 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1d7cfea1
CW
958 if (obj == NULL) {
959 ret = -ENOENT;
960 goto unlock;
fbd5a26d 961 }
673a394b 962
7dcd2499 963 /* Bounds check destination. */
05394f39
CW
964 if (args->offset > obj->base.size ||
965 args->size > obj->base.size - args->offset) {
ce9d419d 966 ret = -EINVAL;
35b62a89 967 goto out;
ce9d419d
CW
968 }
969
673a394b
EA
970 /* We can only do the GTT pwrite on untiled buffers, as otherwise
971 * it would end up going through the fenced access, and we'll get
972 * different detiling behavior between reading and writing.
973 * pread/pwrite currently are reading and writing from the CPU
974 * perspective, requiring manual detiling by the client.
975 */
05394f39 976 if (obj->phys_obj)
fbd5a26d 977 ret = i915_gem_phys_pwrite(dev, obj, args, file);
05394f39
CW
978 else if (obj->tiling_mode == I915_TILING_NONE &&
979 obj->gtt_space &&
980 obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
75e9e915 981 ret = i915_gem_object_pin(obj, 0, true);
fbd5a26d
CW
982 if (ret)
983 goto out;
984
985 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
986 if (ret)
987 goto out_unpin;
988
989 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
990 if (ret == -EFAULT)
991 ret = i915_gem_gtt_pwrite_slow(dev, obj, args, file);
992
993out_unpin:
994 i915_gem_object_unpin(obj);
40123c1f 995 } else {
fbd5a26d
CW
996 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
997 if (ret)
e5281ccd 998 goto out;
673a394b 999
fbd5a26d
CW
1000 ret = -EFAULT;
1001 if (!i915_gem_object_needs_bit17_swizzle(obj))
1002 ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file);
1003 if (ret == -EFAULT)
1004 ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file);
fbd5a26d 1005 }
673a394b 1006
35b62a89 1007out:
05394f39 1008 drm_gem_object_unreference(&obj->base);
1d7cfea1 1009unlock:
fbd5a26d 1010 mutex_unlock(&dev->struct_mutex);
673a394b
EA
1011 return ret;
1012}
1013
1014/**
2ef7eeaa
EA
1015 * Called when user space prepares to use an object with the CPU, either
1016 * through the mmap ioctl's mapping or a GTT mapping.
673a394b
EA
1017 */
1018int
1019i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
05394f39 1020 struct drm_file *file)
673a394b 1021{
a09ba7fa 1022 struct drm_i915_private *dev_priv = dev->dev_private;
673a394b 1023 struct drm_i915_gem_set_domain *args = data;
05394f39 1024 struct drm_i915_gem_object *obj;
2ef7eeaa
EA
1025 uint32_t read_domains = args->read_domains;
1026 uint32_t write_domain = args->write_domain;
673a394b
EA
1027 int ret;
1028
1029 if (!(dev->driver->driver_features & DRIVER_GEM))
1030 return -ENODEV;
1031
2ef7eeaa 1032 /* Only handle setting domains to types used by the CPU. */
21d509e3 1033 if (write_domain & I915_GEM_GPU_DOMAINS)
2ef7eeaa
EA
1034 return -EINVAL;
1035
21d509e3 1036 if (read_domains & I915_GEM_GPU_DOMAINS)
2ef7eeaa
EA
1037 return -EINVAL;
1038
1039 /* Having something in the write domain implies it's in the read
1040 * domain, and only that read domain. Enforce that in the request.
1041 */
1042 if (write_domain != 0 && read_domains != write_domain)
1043 return -EINVAL;
1044
76c1dec1 1045 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 1046 if (ret)
76c1dec1 1047 return ret;
1d7cfea1 1048
05394f39 1049 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1d7cfea1
CW
1050 if (obj == NULL) {
1051 ret = -ENOENT;
1052 goto unlock;
76c1dec1 1053 }
673a394b 1054
652c393a
JB
1055 intel_mark_busy(dev, obj);
1056
2ef7eeaa
EA
1057 if (read_domains & I915_GEM_DOMAIN_GTT) {
1058 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
02354392 1059
a09ba7fa
EA
1060 /* Update the LRU on the fence for the CPU access that's
1061 * about to occur.
1062 */
05394f39 1063 if (obj->fence_reg != I915_FENCE_REG_NONE) {
007cc8ac 1064 struct drm_i915_fence_reg *reg =
05394f39 1065 &dev_priv->fence_regs[obj->fence_reg];
007cc8ac 1066 list_move_tail(&reg->lru_list,
a09ba7fa
EA
1067 &dev_priv->mm.fence_list);
1068 }
1069
02354392
EA
1070 /* Silently promote "you're not bound, there was nothing to do"
1071 * to success, since the client was just asking us to
1072 * make sure everything was done.
1073 */
1074 if (ret == -EINVAL)
1075 ret = 0;
2ef7eeaa 1076 } else {
e47c68e9 1077 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
2ef7eeaa
EA
1078 }
1079
7d1c4804 1080 /* Maintain LRU order of "inactive" objects */
05394f39
CW
1081 if (ret == 0 && i915_gem_object_is_inactive(obj))
1082 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
7d1c4804 1083
05394f39 1084 drm_gem_object_unreference(&obj->base);
1d7cfea1 1085unlock:
673a394b
EA
1086 mutex_unlock(&dev->struct_mutex);
1087 return ret;
1088}
1089
1090/**
1091 * Called when user space has done writes to this buffer
1092 */
1093int
1094i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
05394f39 1095 struct drm_file *file)
673a394b
EA
1096{
1097 struct drm_i915_gem_sw_finish *args = data;
05394f39 1098 struct drm_i915_gem_object *obj;
673a394b
EA
1099 int ret = 0;
1100
1101 if (!(dev->driver->driver_features & DRIVER_GEM))
1102 return -ENODEV;
1103
76c1dec1 1104 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 1105 if (ret)
76c1dec1 1106 return ret;
1d7cfea1 1107
05394f39 1108 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
673a394b 1109 if (obj == NULL) {
1d7cfea1
CW
1110 ret = -ENOENT;
1111 goto unlock;
673a394b
EA
1112 }
1113
673a394b 1114 /* Pinned buffers may be scanout, so flush the cache */
05394f39 1115 if (obj->pin_count)
e47c68e9
EA
1116 i915_gem_object_flush_cpu_write_domain(obj);
1117
05394f39 1118 drm_gem_object_unreference(&obj->base);
1d7cfea1 1119unlock:
673a394b
EA
1120 mutex_unlock(&dev->struct_mutex);
1121 return ret;
1122}
1123
1124/**
1125 * Maps the contents of an object, returning the address it is mapped
1126 * into.
1127 *
1128 * While the mapping holds a reference on the contents of the object, it doesn't
1129 * imply a ref on the object itself.
1130 */
1131int
1132i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
05394f39 1133 struct drm_file *file)
673a394b 1134{
da761a6e 1135 struct drm_i915_private *dev_priv = dev->dev_private;
673a394b
EA
1136 struct drm_i915_gem_mmap *args = data;
1137 struct drm_gem_object *obj;
1138 loff_t offset;
1139 unsigned long addr;
1140
1141 if (!(dev->driver->driver_features & DRIVER_GEM))
1142 return -ENODEV;
1143
05394f39 1144 obj = drm_gem_object_lookup(dev, file, args->handle);
673a394b 1145 if (obj == NULL)
bf79cb91 1146 return -ENOENT;
673a394b 1147
da761a6e
CW
1148 if (obj->size > dev_priv->mm.gtt_mappable_end) {
1149 drm_gem_object_unreference_unlocked(obj);
1150 return -E2BIG;
1151 }
1152
673a394b
EA
1153 offset = args->offset;
1154
1155 down_write(&current->mm->mmap_sem);
1156 addr = do_mmap(obj->filp, 0, args->size,
1157 PROT_READ | PROT_WRITE, MAP_SHARED,
1158 args->offset);
1159 up_write(&current->mm->mmap_sem);
bc9025bd 1160 drm_gem_object_unreference_unlocked(obj);
673a394b
EA
1161 if (IS_ERR((void *)addr))
1162 return addr;
1163
1164 args->addr_ptr = (uint64_t) addr;
1165
1166 return 0;
1167}
1168
de151cf6
JB
1169/**
1170 * i915_gem_fault - fault a page into the GTT
1171 * vma: VMA in question
1172 * vmf: fault info
1173 *
1174 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1175 * from userspace. The fault handler takes care of binding the object to
1176 * the GTT (if needed), allocating and programming a fence register (again,
1177 * only if needed based on whether the old reg is still valid or the object
1178 * is tiled) and inserting a new PTE into the faulting process.
1179 *
1180 * Note that the faulting process may involve evicting existing objects
1181 * from the GTT and/or fence registers to make room. So performance may
1182 * suffer if the GTT working set is large or there are few fence registers
1183 * left.
1184 */
1185int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1186{
05394f39
CW
1187 struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
1188 struct drm_device *dev = obj->base.dev;
7d1c4804 1189 drm_i915_private_t *dev_priv = dev->dev_private;
de151cf6
JB
1190 pgoff_t page_offset;
1191 unsigned long pfn;
1192 int ret = 0;
0f973f27 1193 bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
de151cf6
JB
1194
1195 /* We don't use vmf->pgoff since that has the fake offset */
1196 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
1197 PAGE_SHIFT;
1198
1199 /* Now bind it into the GTT if needed */
1200 mutex_lock(&dev->struct_mutex);
a00b10c3 1201
919926ae
CW
1202 if (!obj->map_and_fenceable) {
1203 ret = i915_gem_object_unbind(obj);
1204 if (ret)
1205 goto unlock;
a00b10c3 1206 }
05394f39 1207 if (!obj->gtt_space) {
75e9e915 1208 ret = i915_gem_object_bind_to_gtt(obj, 0, true);
c715089f
CW
1209 if (ret)
1210 goto unlock;
de151cf6
JB
1211 }
1212
4a684a41
CW
1213 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1214 if (ret)
1215 goto unlock;
1216
de151cf6 1217 /* Need a new fence register? */
05394f39 1218 if (obj->tiling_mode != I915_TILING_NONE) {
2cf34d7b 1219 ret = i915_gem_object_get_fence_reg(obj, true);
c715089f
CW
1220 if (ret)
1221 goto unlock;
d9ddcb96 1222 }
de151cf6 1223
05394f39
CW
1224 if (i915_gem_object_is_inactive(obj))
1225 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
7d1c4804 1226
6299f992
CW
1227 obj->fault_mappable = true;
1228
05394f39 1229 pfn = ((dev->agp->base + obj->gtt_offset) >> PAGE_SHIFT) +
de151cf6
JB
1230 page_offset;
1231
1232 /* Finally, remap it using the new GTT offset */
1233 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
c715089f 1234unlock:
de151cf6
JB
1235 mutex_unlock(&dev->struct_mutex);
1236
1237 switch (ret) {
045e769a
CW
1238 case -EAGAIN:
1239 set_need_resched();
c715089f
CW
1240 case 0:
1241 case -ERESTARTSYS:
1242 return VM_FAULT_NOPAGE;
de151cf6 1243 case -ENOMEM:
de151cf6 1244 return VM_FAULT_OOM;
de151cf6 1245 default:
c715089f 1246 return VM_FAULT_SIGBUS;
de151cf6
JB
1247 }
1248}
1249
1250/**
1251 * i915_gem_create_mmap_offset - create a fake mmap offset for an object
1252 * @obj: obj in question
1253 *
1254 * GEM memory mapping works by handing back to userspace a fake mmap offset
1255 * it can use in a subsequent mmap(2) call. The DRM core code then looks
1256 * up the object based on the offset and sets up the various memory mapping
1257 * structures.
1258 *
1259 * This routine allocates and attaches a fake offset for @obj.
1260 */
1261static int
05394f39 1262i915_gem_create_mmap_offset(struct drm_i915_gem_object *obj)
de151cf6 1263{
05394f39 1264 struct drm_device *dev = obj->base.dev;
de151cf6 1265 struct drm_gem_mm *mm = dev->mm_private;
de151cf6 1266 struct drm_map_list *list;
f77d390c 1267 struct drm_local_map *map;
de151cf6
JB
1268 int ret = 0;
1269
1270 /* Set the object up for mmap'ing */
05394f39 1271 list = &obj->base.map_list;
9a298b2a 1272 list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL);
de151cf6
JB
1273 if (!list->map)
1274 return -ENOMEM;
1275
1276 map = list->map;
1277 map->type = _DRM_GEM;
05394f39 1278 map->size = obj->base.size;
de151cf6
JB
1279 map->handle = obj;
1280
1281 /* Get a DRM GEM mmap offset allocated... */
1282 list->file_offset_node = drm_mm_search_free(&mm->offset_manager,
05394f39
CW
1283 obj->base.size / PAGE_SIZE,
1284 0, 0);
de151cf6 1285 if (!list->file_offset_node) {
05394f39
CW
1286 DRM_ERROR("failed to allocate offset for bo %d\n",
1287 obj->base.name);
9e0ae534 1288 ret = -ENOSPC;
de151cf6
JB
1289 goto out_free_list;
1290 }
1291
1292 list->file_offset_node = drm_mm_get_block(list->file_offset_node,
05394f39
CW
1293 obj->base.size / PAGE_SIZE,
1294 0);
de151cf6
JB
1295 if (!list->file_offset_node) {
1296 ret = -ENOMEM;
1297 goto out_free_list;
1298 }
1299
1300 list->hash.key = list->file_offset_node->start;
9e0ae534
CW
1301 ret = drm_ht_insert_item(&mm->offset_hash, &list->hash);
1302 if (ret) {
de151cf6
JB
1303 DRM_ERROR("failed to add to map hash\n");
1304 goto out_free_mm;
1305 }
1306
de151cf6
JB
1307 return 0;
1308
1309out_free_mm:
1310 drm_mm_put_block(list->file_offset_node);
1311out_free_list:
9a298b2a 1312 kfree(list->map);
39a01d1f 1313 list->map = NULL;
de151cf6
JB
1314
1315 return ret;
1316}
1317
901782b2
CW
1318/**
1319 * i915_gem_release_mmap - remove physical page mappings
1320 * @obj: obj in question
1321 *
af901ca1 1322 * Preserve the reservation of the mmapping with the DRM core code, but
901782b2
CW
1323 * relinquish ownership of the pages back to the system.
1324 *
1325 * It is vital that we remove the page mapping if we have mapped a tiled
1326 * object through the GTT and then lose the fence register due to
1327 * resource pressure. Similarly if the object has been moved out of the
1328 * aperture, than pages mapped into userspace must be revoked. Removing the
1329 * mapping will then trigger a page fault on the next user access, allowing
1330 * fixup by i915_gem_fault().
1331 */
d05ca301 1332void
05394f39 1333i915_gem_release_mmap(struct drm_i915_gem_object *obj)
901782b2 1334{
6299f992
CW
1335 if (!obj->fault_mappable)
1336 return;
901782b2 1337
6299f992
CW
1338 unmap_mapping_range(obj->base.dev->dev_mapping,
1339 (loff_t)obj->base.map_list.hash.key<<PAGE_SHIFT,
1340 obj->base.size, 1);
fb7d516a 1341
6299f992 1342 obj->fault_mappable = false;
901782b2
CW
1343}
1344
ab00b3e5 1345static void
05394f39 1346i915_gem_free_mmap_offset(struct drm_i915_gem_object *obj)
ab00b3e5 1347{
05394f39 1348 struct drm_device *dev = obj->base.dev;
ab00b3e5 1349 struct drm_gem_mm *mm = dev->mm_private;
05394f39 1350 struct drm_map_list *list = &obj->base.map_list;
ab00b3e5 1351
ab00b3e5 1352 drm_ht_remove_item(&mm->offset_hash, &list->hash);
39a01d1f
CW
1353 drm_mm_put_block(list->file_offset_node);
1354 kfree(list->map);
1355 list->map = NULL;
ab00b3e5
JB
1356}
1357
92b88aeb
CW
1358static uint32_t
1359i915_gem_get_gtt_size(struct drm_i915_gem_object *obj)
1360{
1361 struct drm_device *dev = obj->base.dev;
1362 uint32_t size;
1363
1364 if (INTEL_INFO(dev)->gen >= 4 ||
1365 obj->tiling_mode == I915_TILING_NONE)
1366 return obj->base.size;
1367
1368 /* Previous chips need a power-of-two fence region when tiling */
1369 if (INTEL_INFO(dev)->gen == 3)
1370 size = 1024*1024;
1371 else
1372 size = 512*1024;
1373
1374 while (size < obj->base.size)
1375 size <<= 1;
1376
1377 return size;
1378}
1379
de151cf6
JB
1380/**
1381 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1382 * @obj: object to check
1383 *
1384 * Return the required GTT alignment for an object, taking into account
5e783301 1385 * potential fence register mapping.
de151cf6
JB
1386 */
1387static uint32_t
05394f39 1388i915_gem_get_gtt_alignment(struct drm_i915_gem_object *obj)
de151cf6 1389{
05394f39 1390 struct drm_device *dev = obj->base.dev;
de151cf6
JB
1391
1392 /*
1393 * Minimum alignment is 4k (GTT page size), but might be greater
1394 * if a fence register is needed for the object.
1395 */
a00b10c3 1396 if (INTEL_INFO(dev)->gen >= 4 ||
05394f39 1397 obj->tiling_mode == I915_TILING_NONE)
de151cf6
JB
1398 return 4096;
1399
a00b10c3
CW
1400 /*
1401 * Previous chips need to be aligned to the size of the smallest
1402 * fence register that can contain the object.
1403 */
05394f39 1404 return i915_gem_get_gtt_size(obj);
a00b10c3
CW
1405}
1406
5e783301
DV
1407/**
1408 * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an
1409 * unfenced object
1410 * @obj: object to check
1411 *
1412 * Return the required GTT alignment for an object, only taking into account
1413 * unfenced tiled surface requirements.
1414 */
1415static uint32_t
05394f39 1416i915_gem_get_unfenced_gtt_alignment(struct drm_i915_gem_object *obj)
5e783301 1417{
05394f39 1418 struct drm_device *dev = obj->base.dev;
5e783301
DV
1419 int tile_height;
1420
1421 /*
1422 * Minimum alignment is 4k (GTT page size) for sane hw.
1423 */
1424 if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) ||
05394f39 1425 obj->tiling_mode == I915_TILING_NONE)
5e783301
DV
1426 return 4096;
1427
1428 /*
1429 * Older chips need unfenced tiled buffers to be aligned to the left
1430 * edge of an even tile row (where tile rows are counted as if the bo is
1431 * placed in a fenced gtt region).
1432 */
1433 if (IS_GEN2(dev) ||
05394f39 1434 (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)))
5e783301
DV
1435 tile_height = 32;
1436 else
1437 tile_height = 8;
1438
05394f39 1439 return tile_height * obj->stride * 2;
5e783301
DV
1440}
1441
de151cf6
JB
1442/**
1443 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1444 * @dev: DRM device
1445 * @data: GTT mapping ioctl data
05394f39 1446 * @file: GEM object info
de151cf6
JB
1447 *
1448 * Simply returns the fake offset to userspace so it can mmap it.
1449 * The mmap call will end up in drm_gem_mmap(), which will set things
1450 * up so we can get faults in the handler above.
1451 *
1452 * The fault handler will take care of binding the object into the GTT
1453 * (since it may have been evicted to make room for something), allocating
1454 * a fence register, and mapping the appropriate aperture address into
1455 * userspace.
1456 */
1457int
1458i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
05394f39 1459 struct drm_file *file)
de151cf6 1460{
da761a6e 1461 struct drm_i915_private *dev_priv = dev->dev_private;
de151cf6 1462 struct drm_i915_gem_mmap_gtt *args = data;
05394f39 1463 struct drm_i915_gem_object *obj;
de151cf6
JB
1464 int ret;
1465
1466 if (!(dev->driver->driver_features & DRIVER_GEM))
1467 return -ENODEV;
1468
76c1dec1 1469 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 1470 if (ret)
76c1dec1 1471 return ret;
de151cf6 1472
05394f39 1473 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1d7cfea1
CW
1474 if (obj == NULL) {
1475 ret = -ENOENT;
1476 goto unlock;
1477 }
de151cf6 1478
05394f39 1479 if (obj->base.size > dev_priv->mm.gtt_mappable_end) {
da761a6e
CW
1480 ret = -E2BIG;
1481 goto unlock;
1482 }
1483
05394f39 1484 if (obj->madv != I915_MADV_WILLNEED) {
ab18282d 1485 DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1d7cfea1
CW
1486 ret = -EINVAL;
1487 goto out;
ab18282d
CW
1488 }
1489
05394f39 1490 if (!obj->base.map_list.map) {
de151cf6 1491 ret = i915_gem_create_mmap_offset(obj);
1d7cfea1
CW
1492 if (ret)
1493 goto out;
de151cf6
JB
1494 }
1495
05394f39 1496 args->offset = (u64)obj->base.map_list.hash.key << PAGE_SHIFT;
de151cf6 1497
1d7cfea1 1498out:
05394f39 1499 drm_gem_object_unreference(&obj->base);
1d7cfea1 1500unlock:
de151cf6 1501 mutex_unlock(&dev->struct_mutex);
1d7cfea1 1502 return ret;
de151cf6
JB
1503}
1504
e5281ccd 1505static int
05394f39 1506i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj,
e5281ccd
CW
1507 gfp_t gfpmask)
1508{
e5281ccd
CW
1509 int page_count, i;
1510 struct address_space *mapping;
1511 struct inode *inode;
1512 struct page *page;
1513
1514 /* Get the list of pages out of our struct file. They'll be pinned
1515 * at this point until we release them.
1516 */
05394f39
CW
1517 page_count = obj->base.size / PAGE_SIZE;
1518 BUG_ON(obj->pages != NULL);
1519 obj->pages = drm_malloc_ab(page_count, sizeof(struct page *));
1520 if (obj->pages == NULL)
e5281ccd
CW
1521 return -ENOMEM;
1522
05394f39 1523 inode = obj->base.filp->f_path.dentry->d_inode;
e5281ccd
CW
1524 mapping = inode->i_mapping;
1525 for (i = 0; i < page_count; i++) {
1526 page = read_cache_page_gfp(mapping, i,
1527 GFP_HIGHUSER |
1528 __GFP_COLD |
1529 __GFP_RECLAIMABLE |
1530 gfpmask);
1531 if (IS_ERR(page))
1532 goto err_pages;
1533
05394f39 1534 obj->pages[i] = page;
e5281ccd
CW
1535 }
1536
05394f39 1537 if (obj->tiling_mode != I915_TILING_NONE)
e5281ccd
CW
1538 i915_gem_object_do_bit_17_swizzle(obj);
1539
1540 return 0;
1541
1542err_pages:
1543 while (i--)
05394f39 1544 page_cache_release(obj->pages[i]);
e5281ccd 1545
05394f39
CW
1546 drm_free_large(obj->pages);
1547 obj->pages = NULL;
e5281ccd
CW
1548 return PTR_ERR(page);
1549}
1550
5cdf5881 1551static void
05394f39 1552i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
673a394b 1553{
05394f39 1554 int page_count = obj->base.size / PAGE_SIZE;
673a394b
EA
1555 int i;
1556
05394f39 1557 BUG_ON(obj->madv == __I915_MADV_PURGED);
673a394b 1558
05394f39 1559 if (obj->tiling_mode != I915_TILING_NONE)
280b713b
EA
1560 i915_gem_object_save_bit_17_swizzle(obj);
1561
05394f39
CW
1562 if (obj->madv == I915_MADV_DONTNEED)
1563 obj->dirty = 0;
3ef94daa
CW
1564
1565 for (i = 0; i < page_count; i++) {
05394f39
CW
1566 if (obj->dirty)
1567 set_page_dirty(obj->pages[i]);
3ef94daa 1568
05394f39
CW
1569 if (obj->madv == I915_MADV_WILLNEED)
1570 mark_page_accessed(obj->pages[i]);
3ef94daa 1571
05394f39 1572 page_cache_release(obj->pages[i]);
3ef94daa 1573 }
05394f39 1574 obj->dirty = 0;
673a394b 1575
05394f39
CW
1576 drm_free_large(obj->pages);
1577 obj->pages = NULL;
673a394b
EA
1578}
1579
a56ba56c
CW
1580static uint32_t
1581i915_gem_next_request_seqno(struct drm_device *dev,
1582 struct intel_ring_buffer *ring)
1583{
1584 drm_i915_private_t *dev_priv = dev->dev_private;
5d97eb69 1585 return ring->outstanding_lazy_request = dev_priv->next_seqno;
a56ba56c
CW
1586}
1587
673a394b 1588static void
05394f39 1589i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
852835f3 1590 struct intel_ring_buffer *ring)
673a394b 1591{
05394f39 1592 struct drm_device *dev = obj->base.dev;
69dc4987 1593 struct drm_i915_private *dev_priv = dev->dev_private;
a56ba56c 1594 uint32_t seqno = i915_gem_next_request_seqno(dev, ring);
617dbe27 1595
852835f3 1596 BUG_ON(ring == NULL);
05394f39 1597 obj->ring = ring;
673a394b
EA
1598
1599 /* Add a reference if we're newly entering the active list. */
05394f39
CW
1600 if (!obj->active) {
1601 drm_gem_object_reference(&obj->base);
1602 obj->active = 1;
673a394b 1603 }
e35a41de 1604
673a394b 1605 /* Move from whatever list we were on to the tail of execution. */
05394f39
CW
1606 list_move_tail(&obj->mm_list, &dev_priv->mm.active_list);
1607 list_move_tail(&obj->ring_list, &ring->active_list);
caea7476 1608
05394f39 1609 obj->last_rendering_seqno = seqno;
caea7476
CW
1610 if (obj->fenced_gpu_access) {
1611 struct drm_i915_fence_reg *reg;
1612
1613 BUG_ON(obj->fence_reg == I915_FENCE_REG_NONE);
1614
1615 obj->last_fenced_seqno = seqno;
1616 obj->last_fenced_ring = ring;
1617
1618 reg = &dev_priv->fence_regs[obj->fence_reg];
1619 list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
1620 }
1621}
1622
1623static void
1624i915_gem_object_move_off_active(struct drm_i915_gem_object *obj)
1625{
1626 list_del_init(&obj->ring_list);
1627 obj->last_rendering_seqno = 0;
1628 obj->last_fenced_seqno = 0;
673a394b
EA
1629}
1630
ce44b0ea 1631static void
05394f39 1632i915_gem_object_move_to_flushing(struct drm_i915_gem_object *obj)
ce44b0ea 1633{
05394f39 1634 struct drm_device *dev = obj->base.dev;
ce44b0ea 1635 drm_i915_private_t *dev_priv = dev->dev_private;
ce44b0ea 1636
05394f39
CW
1637 BUG_ON(!obj->active);
1638 list_move_tail(&obj->mm_list, &dev_priv->mm.flushing_list);
caea7476
CW
1639
1640 i915_gem_object_move_off_active(obj);
1641}
1642
1643static void
1644i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
1645{
1646 struct drm_device *dev = obj->base.dev;
1647 struct drm_i915_private *dev_priv = dev->dev_private;
1648
1649 if (obj->pin_count != 0)
1650 list_move_tail(&obj->mm_list, &dev_priv->mm.pinned_list);
1651 else
1652 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
1653
1654 BUG_ON(!list_empty(&obj->gpu_write_list));
1655 BUG_ON(!obj->active);
1656 obj->ring = NULL;
1657
1658 i915_gem_object_move_off_active(obj);
1659 obj->fenced_gpu_access = false;
1660 obj->last_fenced_ring = NULL;
1661
1662 obj->active = 0;
1663 drm_gem_object_unreference(&obj->base);
1664
1665 WARN_ON(i915_verify_lists(dev));
ce44b0ea 1666}
673a394b 1667
963b4836
CW
1668/* Immediately discard the backing storage */
1669static void
05394f39 1670i915_gem_object_truncate(struct drm_i915_gem_object *obj)
963b4836 1671{
bb6baf76 1672 struct inode *inode;
963b4836 1673
ae9fed6b
CW
1674 /* Our goal here is to return as much of the memory as
1675 * is possible back to the system as we are called from OOM.
1676 * To do this we must instruct the shmfs to drop all of its
1677 * backing pages, *now*. Here we mirror the actions taken
1678 * when by shmem_delete_inode() to release the backing store.
1679 */
05394f39 1680 inode = obj->base.filp->f_path.dentry->d_inode;
ae9fed6b
CW
1681 truncate_inode_pages(inode->i_mapping, 0);
1682 if (inode->i_op->truncate_range)
1683 inode->i_op->truncate_range(inode, 0, (loff_t)-1);
bb6baf76 1684
05394f39 1685 obj->madv = __I915_MADV_PURGED;
963b4836
CW
1686}
1687
1688static inline int
05394f39 1689i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
963b4836 1690{
05394f39 1691 return obj->madv == I915_MADV_DONTNEED;
963b4836
CW
1692}
1693
63560396
DV
1694static void
1695i915_gem_process_flushing_list(struct drm_device *dev,
8a1a49f9 1696 uint32_t flush_domains,
852835f3 1697 struct intel_ring_buffer *ring)
63560396 1698{
05394f39 1699 struct drm_i915_gem_object *obj, *next;
63560396 1700
05394f39 1701 list_for_each_entry_safe(obj, next,
64193406 1702 &ring->gpu_write_list,
63560396 1703 gpu_write_list) {
05394f39
CW
1704 if (obj->base.write_domain & flush_domains) {
1705 uint32_t old_write_domain = obj->base.write_domain;
63560396 1706
05394f39
CW
1707 obj->base.write_domain = 0;
1708 list_del_init(&obj->gpu_write_list);
617dbe27 1709 i915_gem_object_move_to_active(obj, ring);
63560396 1710
63560396 1711 trace_i915_gem_object_change_domain(obj,
05394f39 1712 obj->base.read_domains,
63560396
DV
1713 old_write_domain);
1714 }
1715 }
1716}
8187a2b7 1717
3cce469c 1718int
8a1a49f9 1719i915_add_request(struct drm_device *dev,
f787a5f5 1720 struct drm_file *file,
8dc5d147 1721 struct drm_i915_gem_request *request,
8a1a49f9 1722 struct intel_ring_buffer *ring)
673a394b
EA
1723{
1724 drm_i915_private_t *dev_priv = dev->dev_private;
f787a5f5 1725 struct drm_i915_file_private *file_priv = NULL;
673a394b
EA
1726 uint32_t seqno;
1727 int was_empty;
3cce469c
CW
1728 int ret;
1729
1730 BUG_ON(request == NULL);
673a394b 1731
f787a5f5
CW
1732 if (file != NULL)
1733 file_priv = file->driver_priv;
b962442e 1734
3cce469c
CW
1735 ret = ring->add_request(ring, &seqno);
1736 if (ret)
1737 return ret;
673a394b 1738
a56ba56c 1739 ring->outstanding_lazy_request = false;
673a394b
EA
1740
1741 request->seqno = seqno;
852835f3 1742 request->ring = ring;
673a394b 1743 request->emitted_jiffies = jiffies;
852835f3
ZN
1744 was_empty = list_empty(&ring->request_list);
1745 list_add_tail(&request->list, &ring->request_list);
1746
f787a5f5 1747 if (file_priv) {
1c25595f 1748 spin_lock(&file_priv->mm.lock);
f787a5f5 1749 request->file_priv = file_priv;
b962442e 1750 list_add_tail(&request->client_list,
f787a5f5 1751 &file_priv->mm.request_list);
1c25595f 1752 spin_unlock(&file_priv->mm.lock);
b962442e 1753 }
673a394b 1754
f65d9421 1755 if (!dev_priv->mm.suspended) {
b3b079db
CW
1756 mod_timer(&dev_priv->hangcheck_timer,
1757 jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
f65d9421 1758 if (was_empty)
b3b079db
CW
1759 queue_delayed_work(dev_priv->wq,
1760 &dev_priv->mm.retire_work, HZ);
f65d9421 1761 }
3cce469c 1762 return 0;
673a394b
EA
1763}
1764
1765/**
1766 * Command execution barrier
1767 *
1768 * Ensures that all commands in the ring are finished
1769 * before signalling the CPU
1770 */
8a1a49f9 1771static void
852835f3 1772i915_retire_commands(struct drm_device *dev, struct intel_ring_buffer *ring)
673a394b 1773{
673a394b 1774 uint32_t flush_domains = 0;
673a394b
EA
1775
1776 /* The sampler always gets flushed on i965 (sigh) */
a6c45cf0 1777 if (INTEL_INFO(dev)->gen >= 4)
673a394b 1778 flush_domains |= I915_GEM_DOMAIN_SAMPLER;
852835f3 1779
78501eac 1780 ring->flush(ring, I915_GEM_DOMAIN_COMMAND, flush_domains);
673a394b
EA
1781}
1782
f787a5f5
CW
1783static inline void
1784i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
673a394b 1785{
1c25595f 1786 struct drm_i915_file_private *file_priv = request->file_priv;
673a394b 1787
1c25595f
CW
1788 if (!file_priv)
1789 return;
1c5d22f7 1790
1c25595f
CW
1791 spin_lock(&file_priv->mm.lock);
1792 list_del(&request->client_list);
1793 request->file_priv = NULL;
1794 spin_unlock(&file_priv->mm.lock);
673a394b 1795}
673a394b 1796
dfaae392
CW
1797static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
1798 struct intel_ring_buffer *ring)
9375e446 1799{
dfaae392
CW
1800 while (!list_empty(&ring->request_list)) {
1801 struct drm_i915_gem_request *request;
673a394b 1802
dfaae392
CW
1803 request = list_first_entry(&ring->request_list,
1804 struct drm_i915_gem_request,
1805 list);
de151cf6 1806
dfaae392 1807 list_del(&request->list);
f787a5f5 1808 i915_gem_request_remove_from_client(request);
dfaae392
CW
1809 kfree(request);
1810 }
673a394b 1811
dfaae392 1812 while (!list_empty(&ring->active_list)) {
05394f39 1813 struct drm_i915_gem_object *obj;
9375e446 1814
05394f39
CW
1815 obj = list_first_entry(&ring->active_list,
1816 struct drm_i915_gem_object,
1817 ring_list);
9375e446 1818
05394f39
CW
1819 obj->base.write_domain = 0;
1820 list_del_init(&obj->gpu_write_list);
1821 i915_gem_object_move_to_inactive(obj);
673a394b
EA
1822 }
1823}
1824
312817a3
CW
1825static void i915_gem_reset_fences(struct drm_device *dev)
1826{
1827 struct drm_i915_private *dev_priv = dev->dev_private;
1828 int i;
1829
1830 for (i = 0; i < 16; i++) {
1831 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
1832 if (reg->obj)
1833 i915_gem_clear_fence_reg(reg->obj);
1834 }
1835}
1836
069efc1d 1837void i915_gem_reset(struct drm_device *dev)
673a394b 1838{
77f01230 1839 struct drm_i915_private *dev_priv = dev->dev_private;
05394f39 1840 struct drm_i915_gem_object *obj;
673a394b 1841
dfaae392 1842 i915_gem_reset_ring_lists(dev_priv, &dev_priv->render_ring);
87acb0a5 1843 i915_gem_reset_ring_lists(dev_priv, &dev_priv->bsd_ring);
549f7365 1844 i915_gem_reset_ring_lists(dev_priv, &dev_priv->blt_ring);
dfaae392
CW
1845
1846 /* Remove anything from the flushing lists. The GPU cache is likely
1847 * to be lost on reset along with the data, so simply move the
1848 * lost bo to the inactive list.
1849 */
1850 while (!list_empty(&dev_priv->mm.flushing_list)) {
05394f39
CW
1851 obj= list_first_entry(&dev_priv->mm.flushing_list,
1852 struct drm_i915_gem_object,
1853 mm_list);
dfaae392 1854
05394f39
CW
1855 obj->base.write_domain = 0;
1856 list_del_init(&obj->gpu_write_list);
1857 i915_gem_object_move_to_inactive(obj);
dfaae392
CW
1858 }
1859
1860 /* Move everything out of the GPU domains to ensure we do any
1861 * necessary invalidation upon reuse.
1862 */
05394f39 1863 list_for_each_entry(obj,
77f01230 1864 &dev_priv->mm.inactive_list,
69dc4987 1865 mm_list)
77f01230 1866 {
05394f39 1867 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
77f01230 1868 }
069efc1d
CW
1869
1870 /* The fence registers are invalidated so clear them out */
312817a3 1871 i915_gem_reset_fences(dev);
673a394b
EA
1872}
1873
1874/**
1875 * This function clears the request list as sequence numbers are passed.
1876 */
b09a1fec
CW
1877static void
1878i915_gem_retire_requests_ring(struct drm_device *dev,
1879 struct intel_ring_buffer *ring)
673a394b
EA
1880{
1881 drm_i915_private_t *dev_priv = dev->dev_private;
1882 uint32_t seqno;
1883
b84d5f0c
CW
1884 if (!ring->status_page.page_addr ||
1885 list_empty(&ring->request_list))
6c0594a3
KW
1886 return;
1887
23bc5982 1888 WARN_ON(i915_verify_lists(dev));
673a394b 1889
78501eac 1890 seqno = ring->get_seqno(ring);
852835f3 1891 while (!list_empty(&ring->request_list)) {
673a394b 1892 struct drm_i915_gem_request *request;
673a394b 1893
852835f3 1894 request = list_first_entry(&ring->request_list,
673a394b
EA
1895 struct drm_i915_gem_request,
1896 list);
673a394b 1897
dfaae392 1898 if (!i915_seqno_passed(seqno, request->seqno))
b84d5f0c
CW
1899 break;
1900
1901 trace_i915_gem_request_retire(dev, request->seqno);
1902
1903 list_del(&request->list);
f787a5f5 1904 i915_gem_request_remove_from_client(request);
b84d5f0c
CW
1905 kfree(request);
1906 }
673a394b 1907
b84d5f0c
CW
1908 /* Move any buffers on the active list that are no longer referenced
1909 * by the ringbuffer to the flushing/inactive lists as appropriate.
1910 */
1911 while (!list_empty(&ring->active_list)) {
05394f39 1912 struct drm_i915_gem_object *obj;
b84d5f0c 1913
05394f39
CW
1914 obj= list_first_entry(&ring->active_list,
1915 struct drm_i915_gem_object,
1916 ring_list);
673a394b 1917
05394f39 1918 if (!i915_seqno_passed(seqno, obj->last_rendering_seqno))
673a394b 1919 break;
b84d5f0c 1920
05394f39 1921 if (obj->base.write_domain != 0)
b84d5f0c
CW
1922 i915_gem_object_move_to_flushing(obj);
1923 else
1924 i915_gem_object_move_to_inactive(obj);
673a394b 1925 }
9d34e5db
CW
1926
1927 if (unlikely (dev_priv->trace_irq_seqno &&
1928 i915_seqno_passed(dev_priv->trace_irq_seqno, seqno))) {
78501eac 1929 ring->user_irq_put(ring);
9d34e5db
CW
1930 dev_priv->trace_irq_seqno = 0;
1931 }
23bc5982
CW
1932
1933 WARN_ON(i915_verify_lists(dev));
673a394b
EA
1934}
1935
b09a1fec
CW
1936void
1937i915_gem_retire_requests(struct drm_device *dev)
1938{
1939 drm_i915_private_t *dev_priv = dev->dev_private;
1940
be72615b 1941 if (!list_empty(&dev_priv->mm.deferred_free_list)) {
05394f39 1942 struct drm_i915_gem_object *obj, *next;
be72615b
CW
1943
1944 /* We must be careful that during unbind() we do not
1945 * accidentally infinitely recurse into retire requests.
1946 * Currently:
1947 * retire -> free -> unbind -> wait -> retire_ring
1948 */
05394f39 1949 list_for_each_entry_safe(obj, next,
be72615b 1950 &dev_priv->mm.deferred_free_list,
69dc4987 1951 mm_list)
05394f39 1952 i915_gem_free_object_tail(obj);
be72615b
CW
1953 }
1954
b09a1fec 1955 i915_gem_retire_requests_ring(dev, &dev_priv->render_ring);
87acb0a5 1956 i915_gem_retire_requests_ring(dev, &dev_priv->bsd_ring);
549f7365 1957 i915_gem_retire_requests_ring(dev, &dev_priv->blt_ring);
b09a1fec
CW
1958}
1959
75ef9da2 1960static void
673a394b
EA
1961i915_gem_retire_work_handler(struct work_struct *work)
1962{
1963 drm_i915_private_t *dev_priv;
1964 struct drm_device *dev;
1965
1966 dev_priv = container_of(work, drm_i915_private_t,
1967 mm.retire_work.work);
1968 dev = dev_priv->dev;
1969
891b48cf
CW
1970 /* Come back later if the device is busy... */
1971 if (!mutex_trylock(&dev->struct_mutex)) {
1972 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1973 return;
1974 }
1975
b09a1fec 1976 i915_gem_retire_requests(dev);
d1b851fc 1977
6dbe2772 1978 if (!dev_priv->mm.suspended &&
d1b851fc 1979 (!list_empty(&dev_priv->render_ring.request_list) ||
549f7365
CW
1980 !list_empty(&dev_priv->bsd_ring.request_list) ||
1981 !list_empty(&dev_priv->blt_ring.request_list)))
9c9fe1f8 1982 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
673a394b
EA
1983 mutex_unlock(&dev->struct_mutex);
1984}
1985
5a5a0c64 1986int
852835f3 1987i915_do_wait_request(struct drm_device *dev, uint32_t seqno,
8a1a49f9 1988 bool interruptible, struct intel_ring_buffer *ring)
673a394b
EA
1989{
1990 drm_i915_private_t *dev_priv = dev->dev_private;
802c7eb6 1991 u32 ier;
673a394b
EA
1992 int ret = 0;
1993
1994 BUG_ON(seqno == 0);
1995
ba1234d1 1996 if (atomic_read(&dev_priv->mm.wedged))
30dbf0c0
CW
1997 return -EAGAIN;
1998
5d97eb69 1999 if (seqno == ring->outstanding_lazy_request) {
3cce469c
CW
2000 struct drm_i915_gem_request *request;
2001
2002 request = kzalloc(sizeof(*request), GFP_KERNEL);
2003 if (request == NULL)
e35a41de 2004 return -ENOMEM;
3cce469c
CW
2005
2006 ret = i915_add_request(dev, NULL, request, ring);
2007 if (ret) {
2008 kfree(request);
2009 return ret;
2010 }
2011
2012 seqno = request->seqno;
e35a41de 2013 }
ffed1d09 2014
78501eac 2015 if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) {
bad720ff 2016 if (HAS_PCH_SPLIT(dev))
036a4a7d
ZW
2017 ier = I915_READ(DEIER) | I915_READ(GTIER);
2018 else
2019 ier = I915_READ(IER);
802c7eb6
JB
2020 if (!ier) {
2021 DRM_ERROR("something (likely vbetool) disabled "
2022 "interrupts, re-enabling\n");
2023 i915_driver_irq_preinstall(dev);
2024 i915_driver_irq_postinstall(dev);
2025 }
2026
1c5d22f7
CW
2027 trace_i915_gem_request_wait_begin(dev, seqno);
2028
b2223497 2029 ring->waiting_seqno = seqno;
78501eac 2030 ring->user_irq_get(ring);
48764bf4 2031 if (interruptible)
852835f3 2032 ret = wait_event_interruptible(ring->irq_queue,
78501eac 2033 i915_seqno_passed(ring->get_seqno(ring), seqno)
852835f3 2034 || atomic_read(&dev_priv->mm.wedged));
48764bf4 2035 else
852835f3 2036 wait_event(ring->irq_queue,
78501eac 2037 i915_seqno_passed(ring->get_seqno(ring), seqno)
852835f3 2038 || atomic_read(&dev_priv->mm.wedged));
48764bf4 2039
78501eac 2040 ring->user_irq_put(ring);
b2223497 2041 ring->waiting_seqno = 0;
1c5d22f7
CW
2042
2043 trace_i915_gem_request_wait_end(dev, seqno);
673a394b 2044 }
ba1234d1 2045 if (atomic_read(&dev_priv->mm.wedged))
30dbf0c0 2046 ret = -EAGAIN;
673a394b
EA
2047
2048 if (ret && ret != -ERESTARTSYS)
8bff917c 2049 DRM_ERROR("%s returns %d (awaiting %d at %d, next %d)\n",
78501eac 2050 __func__, ret, seqno, ring->get_seqno(ring),
8bff917c 2051 dev_priv->next_seqno);
673a394b
EA
2052
2053 /* Directly dispatch request retiring. While we have the work queue
2054 * to handle this, the waiter on a request often wants an associated
2055 * buffer to have made it to the inactive list, and we would need
2056 * a separate wait queue to handle that.
2057 */
2058 if (ret == 0)
b09a1fec 2059 i915_gem_retire_requests_ring(dev, ring);
673a394b
EA
2060
2061 return ret;
2062}
2063
48764bf4
DV
2064/**
2065 * Waits for a sequence number to be signaled, and cleans up the
2066 * request and object lists appropriately for that event.
2067 */
2068static int
852835f3 2069i915_wait_request(struct drm_device *dev, uint32_t seqno,
a56ba56c 2070 struct intel_ring_buffer *ring)
48764bf4 2071{
852835f3 2072 return i915_do_wait_request(dev, seqno, 1, ring);
48764bf4
DV
2073}
2074
20f0cd55 2075static void
9220434a
CW
2076i915_gem_flush_ring(struct drm_device *dev,
2077 struct intel_ring_buffer *ring,
2078 uint32_t invalidate_domains,
2079 uint32_t flush_domains)
2080{
78501eac 2081 ring->flush(ring, invalidate_domains, flush_domains);
9220434a
CW
2082 i915_gem_process_flushing_list(dev, flush_domains, ring);
2083}
2084
8187a2b7
ZN
2085static void
2086i915_gem_flush(struct drm_device *dev,
2087 uint32_t invalidate_domains,
9220434a
CW
2088 uint32_t flush_domains,
2089 uint32_t flush_rings)
8187a2b7
ZN
2090{
2091 drm_i915_private_t *dev_priv = dev->dev_private;
8bff917c 2092
8187a2b7 2093 if (flush_domains & I915_GEM_DOMAIN_CPU)
40ce6575 2094 intel_gtt_chipset_flush();
8bff917c 2095
9220434a
CW
2096 if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) {
2097 if (flush_rings & RING_RENDER)
05394f39 2098 i915_gem_flush_ring(dev, &dev_priv->render_ring,
9220434a
CW
2099 invalidate_domains, flush_domains);
2100 if (flush_rings & RING_BSD)
05394f39 2101 i915_gem_flush_ring(dev, &dev_priv->bsd_ring,
9220434a 2102 invalidate_domains, flush_domains);
549f7365 2103 if (flush_rings & RING_BLT)
05394f39 2104 i915_gem_flush_ring(dev, &dev_priv->blt_ring,
549f7365 2105 invalidate_domains, flush_domains);
9220434a 2106 }
8187a2b7
ZN
2107}
2108
673a394b
EA
2109/**
2110 * Ensures that all rendering to the object has completed and the object is
2111 * safe to unbind from the GTT or access from the CPU.
2112 */
2113static int
05394f39 2114i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
2cf34d7b 2115 bool interruptible)
673a394b 2116{
05394f39 2117 struct drm_device *dev = obj->base.dev;
673a394b
EA
2118 int ret;
2119
e47c68e9
EA
2120 /* This function only exists to support waiting for existing rendering,
2121 * not for emitting required flushes.
673a394b 2122 */
05394f39 2123 BUG_ON((obj->base.write_domain & I915_GEM_GPU_DOMAINS) != 0);
673a394b
EA
2124
2125 /* If there is rendering queued on the buffer being evicted, wait for
2126 * it.
2127 */
05394f39 2128 if (obj->active) {
2cf34d7b 2129 ret = i915_do_wait_request(dev,
05394f39 2130 obj->last_rendering_seqno,
2cf34d7b 2131 interruptible,
05394f39 2132 obj->ring);
2cf34d7b 2133 if (ret)
673a394b
EA
2134 return ret;
2135 }
2136
2137 return 0;
2138}
2139
2140/**
2141 * Unbinds an object from the GTT aperture.
2142 */
0f973f27 2143int
05394f39 2144i915_gem_object_unbind(struct drm_i915_gem_object *obj)
673a394b 2145{
673a394b
EA
2146 int ret = 0;
2147
05394f39 2148 if (obj->gtt_space == NULL)
673a394b
EA
2149 return 0;
2150
05394f39 2151 if (obj->pin_count != 0) {
673a394b
EA
2152 DRM_ERROR("Attempting to unbind pinned buffer\n");
2153 return -EINVAL;
2154 }
2155
5323fd04
EA
2156 /* blow away mappings if mapped through GTT */
2157 i915_gem_release_mmap(obj);
2158
673a394b
EA
2159 /* Move the object to the CPU domain to ensure that
2160 * any possible CPU writes while it's not in the GTT
2161 * are flushed when we go to remap it. This will
2162 * also ensure that all pending GPU writes are finished
2163 * before we unbind.
2164 */
e47c68e9 2165 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
8dc1775d 2166 if (ret == -ERESTARTSYS)
673a394b 2167 return ret;
8dc1775d
CW
2168 /* Continue on if we fail due to EIO, the GPU is hung so we
2169 * should be safe and we need to cleanup or else we might
2170 * cause memory corruption through use-after-free.
2171 */
812ed492
CW
2172 if (ret) {
2173 i915_gem_clflush_object(obj);
05394f39 2174 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
812ed492 2175 }
673a394b 2176
96b47b65 2177 /* release the fence reg _after_ flushing */
05394f39 2178 if (obj->fence_reg != I915_FENCE_REG_NONE)
96b47b65
DV
2179 i915_gem_clear_fence_reg(obj);
2180
7c2e6fdf 2181 i915_gem_gtt_unbind_object(obj);
e5281ccd 2182 i915_gem_object_put_pages_gtt(obj);
673a394b 2183
6299f992 2184 list_del_init(&obj->gtt_list);
05394f39 2185 list_del_init(&obj->mm_list);
75e9e915 2186 /* Avoid an unnecessary call to unbind on rebind. */
05394f39 2187 obj->map_and_fenceable = true;
673a394b 2188
05394f39
CW
2189 drm_mm_put_block(obj->gtt_space);
2190 obj->gtt_space = NULL;
2191 obj->gtt_offset = 0;
673a394b 2192
05394f39 2193 if (i915_gem_object_is_purgeable(obj))
963b4836
CW
2194 i915_gem_object_truncate(obj);
2195
1c5d22f7
CW
2196 trace_i915_gem_object_unbind(obj);
2197
8dc1775d 2198 return ret;
673a394b
EA
2199}
2200
a56ba56c
CW
2201static int i915_ring_idle(struct drm_device *dev,
2202 struct intel_ring_buffer *ring)
2203{
395b70be 2204 if (list_empty(&ring->gpu_write_list) && list_empty(&ring->active_list))
64193406
CW
2205 return 0;
2206
05394f39 2207 i915_gem_flush_ring(dev, ring,
a56ba56c
CW
2208 I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
2209 return i915_wait_request(dev,
2210 i915_gem_next_request_seqno(dev, ring),
2211 ring);
2212}
2213
b47eb4a2 2214int
4df2faf4
DV
2215i915_gpu_idle(struct drm_device *dev)
2216{
2217 drm_i915_private_t *dev_priv = dev->dev_private;
2218 bool lists_empty;
852835f3 2219 int ret;
4df2faf4 2220
d1b851fc 2221 lists_empty = (list_empty(&dev_priv->mm.flushing_list) &&
395b70be 2222 list_empty(&dev_priv->mm.active_list));
4df2faf4
DV
2223 if (lists_empty)
2224 return 0;
2225
2226 /* Flush everything onto the inactive list. */
a56ba56c 2227 ret = i915_ring_idle(dev, &dev_priv->render_ring);
8a1a49f9
DV
2228 if (ret)
2229 return ret;
d1b851fc 2230
87acb0a5
CW
2231 ret = i915_ring_idle(dev, &dev_priv->bsd_ring);
2232 if (ret)
2233 return ret;
d1b851fc 2234
549f7365
CW
2235 ret = i915_ring_idle(dev, &dev_priv->blt_ring);
2236 if (ret)
2237 return ret;
4df2faf4 2238
8a1a49f9 2239 return 0;
4df2faf4
DV
2240}
2241
c6642782
DV
2242static int sandybridge_write_fence_reg(struct drm_i915_gem_object *obj,
2243 struct intel_ring_buffer *pipelined)
4e901fdc 2244{
05394f39 2245 struct drm_device *dev = obj->base.dev;
4e901fdc 2246 drm_i915_private_t *dev_priv = dev->dev_private;
05394f39
CW
2247 u32 size = obj->gtt_space->size;
2248 int regnum = obj->fence_reg;
4e901fdc
EA
2249 uint64_t val;
2250
05394f39 2251 val = (uint64_t)((obj->gtt_offset + size - 4096) &
c6642782 2252 0xfffff000) << 32;
05394f39
CW
2253 val |= obj->gtt_offset & 0xfffff000;
2254 val |= (uint64_t)((obj->stride / 128) - 1) <<
4e901fdc
EA
2255 SANDYBRIDGE_FENCE_PITCH_SHIFT;
2256
05394f39 2257 if (obj->tiling_mode == I915_TILING_Y)
4e901fdc
EA
2258 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2259 val |= I965_FENCE_REG_VALID;
2260
c6642782
DV
2261 if (pipelined) {
2262 int ret = intel_ring_begin(pipelined, 6);
2263 if (ret)
2264 return ret;
2265
2266 intel_ring_emit(pipelined, MI_NOOP);
2267 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2));
2268 intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8);
2269 intel_ring_emit(pipelined, (u32)val);
2270 intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8 + 4);
2271 intel_ring_emit(pipelined, (u32)(val >> 32));
2272 intel_ring_advance(pipelined);
2273 } else
2274 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + regnum * 8, val);
2275
2276 return 0;
4e901fdc
EA
2277}
2278
c6642782
DV
2279static int i965_write_fence_reg(struct drm_i915_gem_object *obj,
2280 struct intel_ring_buffer *pipelined)
de151cf6 2281{
05394f39 2282 struct drm_device *dev = obj->base.dev;
de151cf6 2283 drm_i915_private_t *dev_priv = dev->dev_private;
05394f39
CW
2284 u32 size = obj->gtt_space->size;
2285 int regnum = obj->fence_reg;
de151cf6
JB
2286 uint64_t val;
2287
05394f39 2288 val = (uint64_t)((obj->gtt_offset + size - 4096) &
de151cf6 2289 0xfffff000) << 32;
05394f39
CW
2290 val |= obj->gtt_offset & 0xfffff000;
2291 val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
2292 if (obj->tiling_mode == I915_TILING_Y)
de151cf6
JB
2293 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2294 val |= I965_FENCE_REG_VALID;
2295
c6642782
DV
2296 if (pipelined) {
2297 int ret = intel_ring_begin(pipelined, 6);
2298 if (ret)
2299 return ret;
2300
2301 intel_ring_emit(pipelined, MI_NOOP);
2302 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2));
2303 intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8);
2304 intel_ring_emit(pipelined, (u32)val);
2305 intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8 + 4);
2306 intel_ring_emit(pipelined, (u32)(val >> 32));
2307 intel_ring_advance(pipelined);
2308 } else
2309 I915_WRITE64(FENCE_REG_965_0 + regnum * 8, val);
2310
2311 return 0;
de151cf6
JB
2312}
2313
c6642782
DV
2314static int i915_write_fence_reg(struct drm_i915_gem_object *obj,
2315 struct intel_ring_buffer *pipelined)
de151cf6 2316{
05394f39 2317 struct drm_device *dev = obj->base.dev;
de151cf6 2318 drm_i915_private_t *dev_priv = dev->dev_private;
05394f39 2319 u32 size = obj->gtt_space->size;
c6642782 2320 u32 fence_reg, val, pitch_val;
0f973f27 2321 int tile_width;
de151cf6 2322
c6642782
DV
2323 if (WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) ||
2324 (size & -size) != size ||
2325 (obj->gtt_offset & (size - 1)),
2326 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
2327 obj->gtt_offset, obj->map_and_fenceable, size))
2328 return -EINVAL;
de151cf6 2329
c6642782 2330 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
0f973f27 2331 tile_width = 128;
de151cf6 2332 else
0f973f27
JB
2333 tile_width = 512;
2334
2335 /* Note: pitch better be a power of two tile widths */
05394f39 2336 pitch_val = obj->stride / tile_width;
0f973f27 2337 pitch_val = ffs(pitch_val) - 1;
de151cf6 2338
05394f39
CW
2339 val = obj->gtt_offset;
2340 if (obj->tiling_mode == I915_TILING_Y)
de151cf6 2341 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
a00b10c3 2342 val |= I915_FENCE_SIZE_BITS(size);
de151cf6
JB
2343 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2344 val |= I830_FENCE_REG_VALID;
2345
05394f39 2346 fence_reg = obj->fence_reg;
a00b10c3
CW
2347 if (fence_reg < 8)
2348 fence_reg = FENCE_REG_830_0 + fence_reg * 4;
dc529a4f 2349 else
a00b10c3 2350 fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4;
c6642782
DV
2351
2352 if (pipelined) {
2353 int ret = intel_ring_begin(pipelined, 4);
2354 if (ret)
2355 return ret;
2356
2357 intel_ring_emit(pipelined, MI_NOOP);
2358 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1));
2359 intel_ring_emit(pipelined, fence_reg);
2360 intel_ring_emit(pipelined, val);
2361 intel_ring_advance(pipelined);
2362 } else
2363 I915_WRITE(fence_reg, val);
2364
2365 return 0;
de151cf6
JB
2366}
2367
c6642782
DV
2368static int i830_write_fence_reg(struct drm_i915_gem_object *obj,
2369 struct intel_ring_buffer *pipelined)
de151cf6 2370{
05394f39 2371 struct drm_device *dev = obj->base.dev;
de151cf6 2372 drm_i915_private_t *dev_priv = dev->dev_private;
05394f39
CW
2373 u32 size = obj->gtt_space->size;
2374 int regnum = obj->fence_reg;
de151cf6
JB
2375 uint32_t val;
2376 uint32_t pitch_val;
2377
c6642782
DV
2378 if (WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) ||
2379 (size & -size) != size ||
2380 (obj->gtt_offset & (size - 1)),
2381 "object 0x%08x not 512K or pot-size 0x%08x aligned\n",
2382 obj->gtt_offset, size))
2383 return -EINVAL;
de151cf6 2384
05394f39 2385 pitch_val = obj->stride / 128;
e76a16de 2386 pitch_val = ffs(pitch_val) - 1;
e76a16de 2387
05394f39
CW
2388 val = obj->gtt_offset;
2389 if (obj->tiling_mode == I915_TILING_Y)
de151cf6 2390 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
c6642782 2391 val |= I830_FENCE_SIZE_BITS(size);
de151cf6
JB
2392 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2393 val |= I830_FENCE_REG_VALID;
2394
c6642782
DV
2395 if (pipelined) {
2396 int ret = intel_ring_begin(pipelined, 4);
2397 if (ret)
2398 return ret;
2399
2400 intel_ring_emit(pipelined, MI_NOOP);
2401 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1));
2402 intel_ring_emit(pipelined, FENCE_REG_830_0 + regnum*4);
2403 intel_ring_emit(pipelined, val);
2404 intel_ring_advance(pipelined);
2405 } else
2406 I915_WRITE(FENCE_REG_830_0 + regnum * 4, val);
2407
2408 return 0;
de151cf6
JB
2409}
2410
2cf34d7b
CW
2411static int i915_find_fence_reg(struct drm_device *dev,
2412 bool interruptible)
ae3db24a 2413{
ae3db24a 2414 struct drm_i915_private *dev_priv = dev->dev_private;
a00b10c3 2415 struct drm_i915_fence_reg *reg;
05394f39 2416 struct drm_i915_gem_object *obj = NULL;
ae3db24a
DV
2417 int i, avail, ret;
2418
2419 /* First try to find a free reg */
2420 avail = 0;
2421 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
2422 reg = &dev_priv->fence_regs[i];
2423 if (!reg->obj)
2424 return i;
2425
05394f39
CW
2426 if (!reg->obj->pin_count)
2427 avail++;
ae3db24a
DV
2428 }
2429
2430 if (avail == 0)
2431 return -ENOSPC;
2432
2433 /* None available, try to steal one or wait for a user to finish */
a00b10c3 2434 avail = I915_FENCE_REG_NONE;
007cc8ac
DV
2435 list_for_each_entry(reg, &dev_priv->mm.fence_list,
2436 lru_list) {
05394f39
CW
2437 obj = reg->obj;
2438 if (obj->pin_count)
ae3db24a
DV
2439 continue;
2440
2441 /* found one! */
05394f39 2442 avail = obj->fence_reg;
ae3db24a
DV
2443 break;
2444 }
2445
a00b10c3 2446 BUG_ON(avail == I915_FENCE_REG_NONE);
ae3db24a
DV
2447
2448 /* We only have a reference on obj from the active list. put_fence_reg
2449 * might drop that one, causing a use-after-free in it. So hold a
2450 * private reference to obj like the other callers of put_fence_reg
2451 * (set_tiling ioctl) do. */
05394f39
CW
2452 drm_gem_object_reference(&obj->base);
2453 ret = i915_gem_object_put_fence_reg(obj, interruptible);
2454 drm_gem_object_unreference(&obj->base);
ae3db24a
DV
2455 if (ret != 0)
2456 return ret;
2457
a00b10c3 2458 return avail;
ae3db24a
DV
2459}
2460
de151cf6
JB
2461/**
2462 * i915_gem_object_get_fence_reg - set up a fence reg for an object
2463 * @obj: object to map through a fence reg
2464 *
2465 * When mapping objects through the GTT, userspace wants to be able to write
2466 * to them without having to worry about swizzling if the object is tiled.
2467 *
2468 * This function walks the fence regs looking for a free one for @obj,
2469 * stealing one if it can't find any.
2470 *
2471 * It then sets up the reg based on the object's properties: address, pitch
2472 * and tiling format.
2473 */
8c4b8c3f 2474int
05394f39 2475i915_gem_object_get_fence_reg(struct drm_i915_gem_object *obj,
2cf34d7b 2476 bool interruptible)
de151cf6 2477{
05394f39 2478 struct drm_device *dev = obj->base.dev;
79e53945 2479 struct drm_i915_private *dev_priv = dev->dev_private;
de151cf6 2480 struct drm_i915_fence_reg *reg = NULL;
c6642782 2481 struct intel_ring_buffer *pipelined = NULL;
ae3db24a 2482 int ret;
de151cf6 2483
a09ba7fa 2484 /* Just update our place in the LRU if our fence is getting used. */
05394f39
CW
2485 if (obj->fence_reg != I915_FENCE_REG_NONE) {
2486 reg = &dev_priv->fence_regs[obj->fence_reg];
007cc8ac 2487 list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
a09ba7fa
EA
2488 return 0;
2489 }
2490
05394f39 2491 switch (obj->tiling_mode) {
de151cf6
JB
2492 case I915_TILING_NONE:
2493 WARN(1, "allocating a fence for non-tiled object?\n");
2494 break;
2495 case I915_TILING_X:
05394f39 2496 if (!obj->stride)
0f973f27 2497 return -EINVAL;
05394f39 2498 WARN((obj->stride & (512 - 1)),
0f973f27 2499 "object 0x%08x is X tiled but has non-512B pitch\n",
05394f39 2500 obj->gtt_offset);
de151cf6
JB
2501 break;
2502 case I915_TILING_Y:
05394f39 2503 if (!obj->stride)
0f973f27 2504 return -EINVAL;
05394f39 2505 WARN((obj->stride & (128 - 1)),
0f973f27 2506 "object 0x%08x is Y tiled but has non-128B pitch\n",
05394f39 2507 obj->gtt_offset);
de151cf6
JB
2508 break;
2509 }
2510
2cf34d7b 2511 ret = i915_find_fence_reg(dev, interruptible);
ae3db24a
DV
2512 if (ret < 0)
2513 return ret;
de151cf6 2514
05394f39
CW
2515 obj->fence_reg = ret;
2516 reg = &dev_priv->fence_regs[obj->fence_reg];
007cc8ac 2517 list_add_tail(&reg->lru_list, &dev_priv->mm.fence_list);
a09ba7fa 2518
de151cf6
JB
2519 reg->obj = obj;
2520
e259befd
CW
2521 switch (INTEL_INFO(dev)->gen) {
2522 case 6:
c6642782 2523 ret = sandybridge_write_fence_reg(obj, pipelined);
e259befd
CW
2524 break;
2525 case 5:
2526 case 4:
c6642782 2527 ret = i965_write_fence_reg(obj, pipelined);
e259befd
CW
2528 break;
2529 case 3:
c6642782 2530 ret = i915_write_fence_reg(obj, pipelined);
e259befd
CW
2531 break;
2532 case 2:
c6642782 2533 ret = i830_write_fence_reg(obj, pipelined);
e259befd
CW
2534 break;
2535 }
d9ddcb96 2536
a00b10c3 2537 trace_i915_gem_object_get_fence(obj,
05394f39
CW
2538 obj->fence_reg,
2539 obj->tiling_mode);
c6642782 2540 return ret;
de151cf6
JB
2541}
2542
2543/**
2544 * i915_gem_clear_fence_reg - clear out fence register info
2545 * @obj: object to clear
2546 *
2547 * Zeroes out the fence register itself and clears out the associated
05394f39 2548 * data structures in dev_priv and obj.
de151cf6
JB
2549 */
2550static void
05394f39 2551i915_gem_clear_fence_reg(struct drm_i915_gem_object *obj)
de151cf6 2552{
05394f39 2553 struct drm_device *dev = obj->base.dev;
79e53945 2554 drm_i915_private_t *dev_priv = dev->dev_private;
05394f39 2555 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[obj->fence_reg];
e259befd 2556 uint32_t fence_reg;
de151cf6 2557
e259befd
CW
2558 switch (INTEL_INFO(dev)->gen) {
2559 case 6:
4e901fdc 2560 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 +
05394f39 2561 (obj->fence_reg * 8), 0);
e259befd
CW
2562 break;
2563 case 5:
2564 case 4:
05394f39 2565 I915_WRITE64(FENCE_REG_965_0 + (obj->fence_reg * 8), 0);
e259befd
CW
2566 break;
2567 case 3:
05394f39
CW
2568 if (obj->fence_reg >= 8)
2569 fence_reg = FENCE_REG_945_8 + (obj->fence_reg - 8) * 4;
dc529a4f 2570 else
e259befd 2571 case 2:
05394f39 2572 fence_reg = FENCE_REG_830_0 + obj->fence_reg * 4;
dc529a4f
EA
2573
2574 I915_WRITE(fence_reg, 0);
e259befd 2575 break;
dc529a4f 2576 }
de151cf6 2577
007cc8ac 2578 reg->obj = NULL;
05394f39 2579 obj->fence_reg = I915_FENCE_REG_NONE;
007cc8ac 2580 list_del_init(&reg->lru_list);
de151cf6
JB
2581}
2582
52dc7d32
CW
2583/**
2584 * i915_gem_object_put_fence_reg - waits on outstanding fenced access
2585 * to the buffer to finish, and then resets the fence register.
2586 * @obj: tiled object holding a fence register.
2cf34d7b 2587 * @bool: whether the wait upon the fence is interruptible
52dc7d32
CW
2588 *
2589 * Zeroes out the fence register itself and clears out the associated
05394f39 2590 * data structures in dev_priv and obj.
52dc7d32
CW
2591 */
2592int
05394f39 2593i915_gem_object_put_fence_reg(struct drm_i915_gem_object *obj,
2cf34d7b 2594 bool interruptible)
52dc7d32 2595{
05394f39 2596 struct drm_device *dev = obj->base.dev;
caea7476 2597 int ret;
52dc7d32 2598
05394f39 2599 if (obj->fence_reg == I915_FENCE_REG_NONE)
52dc7d32
CW
2600 return 0;
2601
10ae9bd2
DV
2602 /* If we've changed tiling, GTT-mappings of the object
2603 * need to re-fault to ensure that the correct fence register
2604 * setup is in place.
2605 */
2606 i915_gem_release_mmap(obj);
2607
52dc7d32
CW
2608 /* On the i915, GPU access to tiled buffers is via a fence,
2609 * therefore we must wait for any outstanding access to complete
2610 * before clearing the fence.
2611 */
caea7476 2612 if (obj->fenced_gpu_access) {
919926ae 2613 ret = i915_gem_object_flush_gpu_write_domain(obj, NULL);
0bc23aad 2614 if (ret)
2dafb1e0
CW
2615 return ret;
2616
caea7476
CW
2617 obj->fenced_gpu_access = false;
2618 }
2619
2620 if (obj->last_fenced_seqno) {
2621 ret = i915_do_wait_request(dev,
2622 obj->last_fenced_seqno,
2623 interruptible,
2624 obj->last_fenced_ring);
0bc23aad 2625 if (ret)
52dc7d32 2626 return ret;
53640e1d 2627
caea7476 2628 obj->last_fenced_seqno = false;
52dc7d32
CW
2629 }
2630
4a726612 2631 i915_gem_object_flush_gtt_write_domain(obj);
0bc23aad 2632 i915_gem_clear_fence_reg(obj);
52dc7d32
CW
2633
2634 return 0;
2635}
2636
673a394b
EA
2637/**
2638 * Finds free space in the GTT aperture and binds the object there.
2639 */
2640static int
05394f39 2641i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
920afa77 2642 unsigned alignment,
75e9e915 2643 bool map_and_fenceable)
673a394b 2644{
05394f39 2645 struct drm_device *dev = obj->base.dev;
673a394b 2646 drm_i915_private_t *dev_priv = dev->dev_private;
673a394b 2647 struct drm_mm_node *free_space;
a00b10c3 2648 gfp_t gfpmask = __GFP_NORETRY | __GFP_NOWARN;
5e783301 2649 u32 size, fence_size, fence_alignment, unfenced_alignment;
75e9e915 2650 bool mappable, fenceable;
07f73f69 2651 int ret;
673a394b 2652
05394f39 2653 if (obj->madv != I915_MADV_WILLNEED) {
3ef94daa
CW
2654 DRM_ERROR("Attempting to bind a purgeable object\n");
2655 return -EINVAL;
2656 }
2657
05394f39
CW
2658 fence_size = i915_gem_get_gtt_size(obj);
2659 fence_alignment = i915_gem_get_gtt_alignment(obj);
2660 unfenced_alignment = i915_gem_get_unfenced_gtt_alignment(obj);
a00b10c3 2661
673a394b 2662 if (alignment == 0)
5e783301
DV
2663 alignment = map_and_fenceable ? fence_alignment :
2664 unfenced_alignment;
75e9e915 2665 if (map_and_fenceable && alignment & (fence_alignment - 1)) {
673a394b
EA
2666 DRM_ERROR("Invalid object alignment requested %u\n", alignment);
2667 return -EINVAL;
2668 }
2669
05394f39 2670 size = map_and_fenceable ? fence_size : obj->base.size;
a00b10c3 2671
654fc607
CW
2672 /* If the object is bigger than the entire aperture, reject it early
2673 * before evicting everything in a vain attempt to find space.
2674 */
05394f39 2675 if (obj->base.size >
75e9e915 2676 (map_and_fenceable ? dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) {
654fc607
CW
2677 DRM_ERROR("Attempting to bind an object larger than the aperture\n");
2678 return -E2BIG;
2679 }
2680
673a394b 2681 search_free:
75e9e915 2682 if (map_and_fenceable)
920afa77
DV
2683 free_space =
2684 drm_mm_search_free_in_range(&dev_priv->mm.gtt_space,
a00b10c3 2685 size, alignment, 0,
920afa77
DV
2686 dev_priv->mm.gtt_mappable_end,
2687 0);
2688 else
2689 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
a00b10c3 2690 size, alignment, 0);
920afa77
DV
2691
2692 if (free_space != NULL) {
75e9e915 2693 if (map_and_fenceable)
05394f39 2694 obj->gtt_space =
920afa77 2695 drm_mm_get_block_range_generic(free_space,
a00b10c3 2696 size, alignment, 0,
920afa77
DV
2697 dev_priv->mm.gtt_mappable_end,
2698 0);
2699 else
05394f39 2700 obj->gtt_space =
a00b10c3 2701 drm_mm_get_block(free_space, size, alignment);
920afa77 2702 }
05394f39 2703 if (obj->gtt_space == NULL) {
673a394b
EA
2704 /* If the gtt is empty and we're still having trouble
2705 * fitting our object in, we're out of memory.
2706 */
75e9e915
DV
2707 ret = i915_gem_evict_something(dev, size, alignment,
2708 map_and_fenceable);
9731129c 2709 if (ret)
673a394b 2710 return ret;
9731129c 2711
673a394b
EA
2712 goto search_free;
2713 }
2714
e5281ccd 2715 ret = i915_gem_object_get_pages_gtt(obj, gfpmask);
673a394b 2716 if (ret) {
05394f39
CW
2717 drm_mm_put_block(obj->gtt_space);
2718 obj->gtt_space = NULL;
07f73f69
CW
2719
2720 if (ret == -ENOMEM) {
2721 /* first try to clear up some space from the GTT */
a00b10c3 2722 ret = i915_gem_evict_something(dev, size,
75e9e915
DV
2723 alignment,
2724 map_and_fenceable);
07f73f69 2725 if (ret) {
07f73f69 2726 /* now try to shrink everyone else */
4bdadb97
CW
2727 if (gfpmask) {
2728 gfpmask = 0;
2729 goto search_free;
07f73f69
CW
2730 }
2731
2732 return ret;
2733 }
2734
2735 goto search_free;
2736 }
2737
673a394b
EA
2738 return ret;
2739 }
2740
7c2e6fdf
DV
2741 ret = i915_gem_gtt_bind_object(obj);
2742 if (ret) {
e5281ccd 2743 i915_gem_object_put_pages_gtt(obj);
05394f39
CW
2744 drm_mm_put_block(obj->gtt_space);
2745 obj->gtt_space = NULL;
07f73f69 2746
a00b10c3 2747 ret = i915_gem_evict_something(dev, size,
75e9e915 2748 alignment, map_and_fenceable);
9731129c 2749 if (ret)
07f73f69 2750 return ret;
07f73f69
CW
2751
2752 goto search_free;
673a394b 2753 }
673a394b 2754
6299f992 2755 list_add_tail(&obj->gtt_list, &dev_priv->mm.gtt_list);
05394f39 2756 list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
bf1a1092 2757
673a394b
EA
2758 /* Assert that the object is not currently in any GPU domain. As it
2759 * wasn't in the GTT, there shouldn't be any way it could have been in
2760 * a GPU cache
2761 */
05394f39
CW
2762 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2763 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
673a394b 2764
6299f992 2765 obj->gtt_offset = obj->gtt_space->start;
1c5d22f7 2766
75e9e915 2767 fenceable =
05394f39
CW
2768 obj->gtt_space->size == fence_size &&
2769 (obj->gtt_space->start & (fence_alignment -1)) == 0;
a00b10c3 2770
75e9e915 2771 mappable =
05394f39 2772 obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end;
a00b10c3 2773
05394f39 2774 obj->map_and_fenceable = mappable && fenceable;
75e9e915 2775
6299f992 2776 trace_i915_gem_object_bind(obj, obj->gtt_offset, map_and_fenceable);
673a394b
EA
2777 return 0;
2778}
2779
2780void
05394f39 2781i915_gem_clflush_object(struct drm_i915_gem_object *obj)
673a394b 2782{
673a394b
EA
2783 /* If we don't have a page list set up, then we're not pinned
2784 * to GPU, and we can ignore the cache flush because it'll happen
2785 * again at bind time.
2786 */
05394f39 2787 if (obj->pages == NULL)
673a394b
EA
2788 return;
2789
1c5d22f7 2790 trace_i915_gem_object_clflush(obj);
cfa16a0d 2791
05394f39 2792 drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE);
673a394b
EA
2793}
2794
e47c68e9 2795/** Flushes any GPU write domain for the object if it's dirty. */
2dafb1e0 2796static int
05394f39 2797i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj,
919926ae 2798 struct intel_ring_buffer *pipelined)
e47c68e9 2799{
05394f39 2800 struct drm_device *dev = obj->base.dev;
e47c68e9 2801
05394f39 2802 if ((obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0)
2dafb1e0 2803 return 0;
e47c68e9
EA
2804
2805 /* Queue the GPU write cache flushing we need. */
05394f39
CW
2806 i915_gem_flush_ring(dev, obj->ring, 0, obj->base.write_domain);
2807 BUG_ON(obj->base.write_domain);
1c5d22f7 2808
919926ae 2809 if (pipelined && pipelined == obj->ring)
ba3d8d74
DV
2810 return 0;
2811
2cf34d7b 2812 return i915_gem_object_wait_rendering(obj, true);
e47c68e9
EA
2813}
2814
2815/** Flushes the GTT write domain for the object if it's dirty. */
2816static void
05394f39 2817i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
e47c68e9 2818{
1c5d22f7
CW
2819 uint32_t old_write_domain;
2820
05394f39 2821 if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
e47c68e9
EA
2822 return;
2823
2824 /* No actual flushing is required for the GTT write domain. Writes
2825 * to it immediately go to main memory as far as we know, so there's
2826 * no chipset flush. It also doesn't land in render cache.
2827 */
4a684a41
CW
2828 i915_gem_release_mmap(obj);
2829
05394f39
CW
2830 old_write_domain = obj->base.write_domain;
2831 obj->base.write_domain = 0;
1c5d22f7
CW
2832
2833 trace_i915_gem_object_change_domain(obj,
05394f39 2834 obj->base.read_domains,
1c5d22f7 2835 old_write_domain);
e47c68e9
EA
2836}
2837
2838/** Flushes the CPU write domain for the object if it's dirty. */
2839static void
05394f39 2840i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
e47c68e9 2841{
1c5d22f7 2842 uint32_t old_write_domain;
e47c68e9 2843
05394f39 2844 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
e47c68e9
EA
2845 return;
2846
2847 i915_gem_clflush_object(obj);
40ce6575 2848 intel_gtt_chipset_flush();
05394f39
CW
2849 old_write_domain = obj->base.write_domain;
2850 obj->base.write_domain = 0;
1c5d22f7
CW
2851
2852 trace_i915_gem_object_change_domain(obj,
05394f39 2853 obj->base.read_domains,
1c5d22f7 2854 old_write_domain);
e47c68e9
EA
2855}
2856
2ef7eeaa
EA
2857/**
2858 * Moves a single object to the GTT read, and possibly write domain.
2859 *
2860 * This function returns when the move is complete, including waiting on
2861 * flushes to occur.
2862 */
79e53945 2863int
2021746e 2864i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
2ef7eeaa 2865{
1c5d22f7 2866 uint32_t old_write_domain, old_read_domains;
e47c68e9 2867 int ret;
2ef7eeaa 2868
02354392 2869 /* Not valid to be called on unbound objects. */
05394f39 2870 if (obj->gtt_space == NULL)
02354392
EA
2871 return -EINVAL;
2872
919926ae 2873 ret = i915_gem_object_flush_gpu_write_domain(obj, NULL);
2dafb1e0
CW
2874 if (ret != 0)
2875 return ret;
2876
7213342d 2877 i915_gem_object_flush_cpu_write_domain(obj);
1c5d22f7 2878
ba3d8d74 2879 if (write) {
2cf34d7b 2880 ret = i915_gem_object_wait_rendering(obj, true);
ba3d8d74
DV
2881 if (ret)
2882 return ret;
ba3d8d74 2883 }
e47c68e9 2884
05394f39
CW
2885 old_write_domain = obj->base.write_domain;
2886 old_read_domains = obj->base.read_domains;
1c5d22f7 2887
e47c68e9
EA
2888 /* It should now be out of any other write domains, and we can update
2889 * the domain values for our changes.
2890 */
05394f39
CW
2891 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
2892 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
e47c68e9 2893 if (write) {
05394f39
CW
2894 obj->base.read_domains = I915_GEM_DOMAIN_GTT;
2895 obj->base.write_domain = I915_GEM_DOMAIN_GTT;
2896 obj->dirty = 1;
2ef7eeaa
EA
2897 }
2898
1c5d22f7
CW
2899 trace_i915_gem_object_change_domain(obj,
2900 old_read_domains,
2901 old_write_domain);
2902
e47c68e9
EA
2903 return 0;
2904}
2905
b9241ea3
ZW
2906/*
2907 * Prepare buffer for display plane. Use uninterruptible for possible flush
2908 * wait, as in modesetting process we're not supposed to be interrupted.
2909 */
2910int
05394f39 2911i915_gem_object_set_to_display_plane(struct drm_i915_gem_object *obj,
919926ae 2912 struct intel_ring_buffer *pipelined)
b9241ea3 2913{
ba3d8d74 2914 uint32_t old_read_domains;
b9241ea3
ZW
2915 int ret;
2916
2917 /* Not valid to be called on unbound objects. */
05394f39 2918 if (obj->gtt_space == NULL)
b9241ea3
ZW
2919 return -EINVAL;
2920
919926ae 2921 ret = i915_gem_object_flush_gpu_write_domain(obj, pipelined);
2dafb1e0
CW
2922 if (ret)
2923 return ret;
b9241ea3 2924
ced270fa
CW
2925 /* Currently, we are always called from an non-interruptible context. */
2926 if (!pipelined) {
2927 ret = i915_gem_object_wait_rendering(obj, false);
2928 if (ret)
b9241ea3
ZW
2929 return ret;
2930 }
2931
b118c1e3
CW
2932 i915_gem_object_flush_cpu_write_domain(obj);
2933
05394f39
CW
2934 old_read_domains = obj->base.read_domains;
2935 obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
b9241ea3
ZW
2936
2937 trace_i915_gem_object_change_domain(obj,
2938 old_read_domains,
05394f39 2939 obj->base.write_domain);
b9241ea3
ZW
2940
2941 return 0;
2942}
2943
85345517
CW
2944int
2945i915_gem_object_flush_gpu(struct drm_i915_gem_object *obj,
2946 bool interruptible)
2947{
2948 if (!obj->active)
2949 return 0;
2950
2951 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS)
05394f39 2952 i915_gem_flush_ring(obj->base.dev, obj->ring,
85345517
CW
2953 0, obj->base.write_domain);
2954
05394f39 2955 return i915_gem_object_wait_rendering(obj, interruptible);
85345517
CW
2956}
2957
e47c68e9
EA
2958/**
2959 * Moves a single object to the CPU read, and possibly write domain.
2960 *
2961 * This function returns when the move is complete, including waiting on
2962 * flushes to occur.
2963 */
2964static int
919926ae 2965i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
e47c68e9 2966{
1c5d22f7 2967 uint32_t old_write_domain, old_read_domains;
e47c68e9
EA
2968 int ret;
2969
ba3d8d74 2970 ret = i915_gem_object_flush_gpu_write_domain(obj, false);
e47c68e9
EA
2971 if (ret != 0)
2972 return ret;
2ef7eeaa 2973
e47c68e9 2974 i915_gem_object_flush_gtt_write_domain(obj);
2ef7eeaa 2975
e47c68e9
EA
2976 /* If we have a partially-valid cache of the object in the CPU,
2977 * finish invalidating it and free the per-page flags.
2ef7eeaa 2978 */
e47c68e9 2979 i915_gem_object_set_to_full_cpu_read_domain(obj);
2ef7eeaa 2980
7213342d 2981 if (write) {
2cf34d7b 2982 ret = i915_gem_object_wait_rendering(obj, true);
7213342d
CW
2983 if (ret)
2984 return ret;
2985 }
2986
05394f39
CW
2987 old_write_domain = obj->base.write_domain;
2988 old_read_domains = obj->base.read_domains;
1c5d22f7 2989
e47c68e9 2990 /* Flush the CPU cache if it's still invalid. */
05394f39 2991 if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
2ef7eeaa 2992 i915_gem_clflush_object(obj);
2ef7eeaa 2993
05394f39 2994 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
2ef7eeaa
EA
2995 }
2996
2997 /* It should now be out of any other write domains, and we can update
2998 * the domain values for our changes.
2999 */
05394f39 3000 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
e47c68e9
EA
3001
3002 /* If we're writing through the CPU, then the GPU read domains will
3003 * need to be invalidated at next use.
3004 */
3005 if (write) {
05394f39
CW
3006 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3007 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
e47c68e9 3008 }
2ef7eeaa 3009
1c5d22f7
CW
3010 trace_i915_gem_object_change_domain(obj,
3011 old_read_domains,
3012 old_write_domain);
3013
2ef7eeaa
EA
3014 return 0;
3015}
3016
673a394b
EA
3017/*
3018 * Set the next domain for the specified object. This
3019 * may not actually perform the necessary flushing/invaliding though,
3020 * as that may want to be batched with other set_domain operations
3021 *
3022 * This is (we hope) the only really tricky part of gem. The goal
3023 * is fairly simple -- track which caches hold bits of the object
3024 * and make sure they remain coherent. A few concrete examples may
3025 * help to explain how it works. For shorthand, we use the notation
3026 * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
3027 * a pair of read and write domain masks.
3028 *
3029 * Case 1: the batch buffer
3030 *
3031 * 1. Allocated
3032 * 2. Written by CPU
3033 * 3. Mapped to GTT
3034 * 4. Read by GPU
3035 * 5. Unmapped from GTT
3036 * 6. Freed
3037 *
3038 * Let's take these a step at a time
3039 *
3040 * 1. Allocated
3041 * Pages allocated from the kernel may still have
3042 * cache contents, so we set them to (CPU, CPU) always.
3043 * 2. Written by CPU (using pwrite)
3044 * The pwrite function calls set_domain (CPU, CPU) and
3045 * this function does nothing (as nothing changes)
3046 * 3. Mapped by GTT
3047 * This function asserts that the object is not
3048 * currently in any GPU-based read or write domains
3049 * 4. Read by GPU
3050 * i915_gem_execbuffer calls set_domain (COMMAND, 0).
3051 * As write_domain is zero, this function adds in the
3052 * current read domains (CPU+COMMAND, 0).
3053 * flush_domains is set to CPU.
3054 * invalidate_domains is set to COMMAND
3055 * clflush is run to get data out of the CPU caches
3056 * then i915_dev_set_domain calls i915_gem_flush to
3057 * emit an MI_FLUSH and drm_agp_chipset_flush
3058 * 5. Unmapped from GTT
3059 * i915_gem_object_unbind calls set_domain (CPU, CPU)
3060 * flush_domains and invalidate_domains end up both zero
3061 * so no flushing/invalidating happens
3062 * 6. Freed
3063 * yay, done
3064 *
3065 * Case 2: The shared render buffer
3066 *
3067 * 1. Allocated
3068 * 2. Mapped to GTT
3069 * 3. Read/written by GPU
3070 * 4. set_domain to (CPU,CPU)
3071 * 5. Read/written by CPU
3072 * 6. Read/written by GPU
3073 *
3074 * 1. Allocated
3075 * Same as last example, (CPU, CPU)
3076 * 2. Mapped to GTT
3077 * Nothing changes (assertions find that it is not in the GPU)
3078 * 3. Read/written by GPU
3079 * execbuffer calls set_domain (RENDER, RENDER)
3080 * flush_domains gets CPU
3081 * invalidate_domains gets GPU
3082 * clflush (obj)
3083 * MI_FLUSH and drm_agp_chipset_flush
3084 * 4. set_domain (CPU, CPU)
3085 * flush_domains gets GPU
3086 * invalidate_domains gets CPU
3087 * wait_rendering (obj) to make sure all drawing is complete.
3088 * This will include an MI_FLUSH to get the data from GPU
3089 * to memory
3090 * clflush (obj) to invalidate the CPU cache
3091 * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
3092 * 5. Read/written by CPU
3093 * cache lines are loaded and dirtied
3094 * 6. Read written by GPU
3095 * Same as last GPU access
3096 *
3097 * Case 3: The constant buffer
3098 *
3099 * 1. Allocated
3100 * 2. Written by CPU
3101 * 3. Read by GPU
3102 * 4. Updated (written) by CPU again
3103 * 5. Read by GPU
3104 *
3105 * 1. Allocated
3106 * (CPU, CPU)
3107 * 2. Written by CPU
3108 * (CPU, CPU)
3109 * 3. Read by GPU
3110 * (CPU+RENDER, 0)
3111 * flush_domains = CPU
3112 * invalidate_domains = RENDER
3113 * clflush (obj)
3114 * MI_FLUSH
3115 * drm_agp_chipset_flush
3116 * 4. Updated (written) by CPU again
3117 * (CPU, CPU)
3118 * flush_domains = 0 (no previous write domain)
3119 * invalidate_domains = 0 (no new read domains)
3120 * 5. Read by GPU
3121 * (CPU+RENDER, 0)
3122 * flush_domains = CPU
3123 * invalidate_domains = RENDER
3124 * clflush (obj)
3125 * MI_FLUSH
3126 * drm_agp_chipset_flush
3127 */
c0d90829 3128static void
05394f39 3129i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj,
0f8c6d7c
CW
3130 struct intel_ring_buffer *ring,
3131 struct change_domains *cd)
673a394b 3132{
05394f39 3133 uint32_t invalidate_domains = 0, flush_domains = 0;
652c393a 3134
673a394b
EA
3135 /*
3136 * If the object isn't moving to a new write domain,
3137 * let the object stay in multiple read domains
3138 */
05394f39
CW
3139 if (obj->base.pending_write_domain == 0)
3140 obj->base.pending_read_domains |= obj->base.read_domains;
673a394b
EA
3141
3142 /*
3143 * Flush the current write domain if
3144 * the new read domains don't match. Invalidate
3145 * any read domains which differ from the old
3146 * write domain
3147 */
05394f39 3148 if (obj->base.write_domain &&
caea7476
CW
3149 (((obj->base.write_domain != obj->base.pending_read_domains ||
3150 obj->ring != ring)) ||
3151 (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) {
05394f39 3152 flush_domains |= obj->base.write_domain;
8b0e378a 3153 invalidate_domains |=
05394f39 3154 obj->base.pending_read_domains & ~obj->base.write_domain;
673a394b
EA
3155 }
3156 /*
3157 * Invalidate any read caches which may have
3158 * stale data. That is, any new read domains.
3159 */
05394f39 3160 invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains;
3d2a812a 3161 if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU)
673a394b 3162 i915_gem_clflush_object(obj);
673a394b 3163
4a684a41
CW
3164 /* blow away mappings if mapped through GTT */
3165 if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_GTT)
3166 i915_gem_release_mmap(obj);
3167
efbeed96
EA
3168 /* The actual obj->write_domain will be updated with
3169 * pending_write_domain after we emit the accumulated flush for all
3170 * of our domain changes in execbuffers (which clears objects'
3171 * write_domains). So if we have a current write domain that we
3172 * aren't changing, set pending_write_domain to that.
3173 */
05394f39
CW
3174 if (flush_domains == 0 && obj->base.pending_write_domain == 0)
3175 obj->base.pending_write_domain = obj->base.write_domain;
673a394b 3176
0f8c6d7c
CW
3177 cd->invalidate_domains |= invalidate_domains;
3178 cd->flush_domains |= flush_domains;
b6651458 3179 if (flush_domains & I915_GEM_GPU_DOMAINS)
05394f39 3180 cd->flush_rings |= obj->ring->id;
b6651458 3181 if (invalidate_domains & I915_GEM_GPU_DOMAINS)
0f8c6d7c 3182 cd->flush_rings |= ring->id;
673a394b
EA
3183}
3184
3185/**
e47c68e9 3186 * Moves the object from a partially CPU read to a full one.
673a394b 3187 *
e47c68e9
EA
3188 * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(),
3189 * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU).
673a394b 3190 */
e47c68e9 3191static void
05394f39 3192i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object *obj)
673a394b 3193{
05394f39 3194 if (!obj->page_cpu_valid)
e47c68e9
EA
3195 return;
3196
3197 /* If we're partially in the CPU read domain, finish moving it in.
3198 */
05394f39 3199 if (obj->base.read_domains & I915_GEM_DOMAIN_CPU) {
e47c68e9
EA
3200 int i;
3201
05394f39
CW
3202 for (i = 0; i <= (obj->base.size - 1) / PAGE_SIZE; i++) {
3203 if (obj->page_cpu_valid[i])
e47c68e9 3204 continue;
05394f39 3205 drm_clflush_pages(obj->pages + i, 1);
e47c68e9 3206 }
e47c68e9
EA
3207 }
3208
3209 /* Free the page_cpu_valid mappings which are now stale, whether
3210 * or not we've got I915_GEM_DOMAIN_CPU.
3211 */
05394f39
CW
3212 kfree(obj->page_cpu_valid);
3213 obj->page_cpu_valid = NULL;
e47c68e9
EA
3214}
3215
3216/**
3217 * Set the CPU read domain on a range of the object.
3218 *
3219 * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's
3220 * not entirely valid. The page_cpu_valid member of the object flags which
3221 * pages have been flushed, and will be respected by
3222 * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping
3223 * of the whole object.
3224 *
3225 * This function returns when the move is complete, including waiting on
3226 * flushes to occur.
3227 */
3228static int
05394f39 3229i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj,
e47c68e9
EA
3230 uint64_t offset, uint64_t size)
3231{
1c5d22f7 3232 uint32_t old_read_domains;
e47c68e9 3233 int i, ret;
673a394b 3234
05394f39 3235 if (offset == 0 && size == obj->base.size)
e47c68e9 3236 return i915_gem_object_set_to_cpu_domain(obj, 0);
673a394b 3237
ba3d8d74 3238 ret = i915_gem_object_flush_gpu_write_domain(obj, false);
e47c68e9 3239 if (ret != 0)
6a47baa6 3240 return ret;
e47c68e9
EA
3241 i915_gem_object_flush_gtt_write_domain(obj);
3242
3243 /* If we're already fully in the CPU read domain, we're done. */
05394f39
CW
3244 if (obj->page_cpu_valid == NULL &&
3245 (obj->base.read_domains & I915_GEM_DOMAIN_CPU) != 0)
e47c68e9 3246 return 0;
673a394b 3247
e47c68e9
EA
3248 /* Otherwise, create/clear the per-page CPU read domain flag if we're
3249 * newly adding I915_GEM_DOMAIN_CPU
3250 */
05394f39
CW
3251 if (obj->page_cpu_valid == NULL) {
3252 obj->page_cpu_valid = kzalloc(obj->base.size / PAGE_SIZE,
3253 GFP_KERNEL);
3254 if (obj->page_cpu_valid == NULL)
e47c68e9 3255 return -ENOMEM;
05394f39
CW
3256 } else if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
3257 memset(obj->page_cpu_valid, 0, obj->base.size / PAGE_SIZE);
673a394b
EA
3258
3259 /* Flush the cache on any pages that are still invalid from the CPU's
3260 * perspective.
3261 */
e47c68e9
EA
3262 for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE;
3263 i++) {
05394f39 3264 if (obj->page_cpu_valid[i])
673a394b
EA
3265 continue;
3266
05394f39 3267 drm_clflush_pages(obj->pages + i, 1);
673a394b 3268
05394f39 3269 obj->page_cpu_valid[i] = 1;
673a394b
EA
3270 }
3271
e47c68e9
EA
3272 /* It should now be out of any other write domains, and we can update
3273 * the domain values for our changes.
3274 */
05394f39 3275 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
e47c68e9 3276
05394f39
CW
3277 old_read_domains = obj->base.read_domains;
3278 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
e47c68e9 3279
1c5d22f7
CW
3280 trace_i915_gem_object_change_domain(obj,
3281 old_read_domains,
05394f39 3282 obj->base.write_domain);
1c5d22f7 3283
673a394b
EA
3284 return 0;
3285}
3286
673a394b 3287static int
bcf50e27
CW
3288i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
3289 struct drm_file *file_priv,
3290 struct drm_i915_gem_exec_object2 *entry,
3291 struct drm_i915_gem_relocation_entry *reloc)
673a394b 3292{
9af90d19 3293 struct drm_device *dev = obj->base.dev;
bcf50e27
CW
3294 struct drm_gem_object *target_obj;
3295 uint32_t target_offset;
3296 int ret = -EINVAL;
673a394b 3297
bcf50e27
CW
3298 target_obj = drm_gem_object_lookup(dev, file_priv,
3299 reloc->target_handle);
3300 if (target_obj == NULL)
3301 return -ENOENT;
673a394b 3302
bcf50e27 3303 target_offset = to_intel_bo(target_obj)->gtt_offset;
76446cac 3304
bcf50e27
CW
3305#if WATCH_RELOC
3306 DRM_INFO("%s: obj %p offset %08x target %d "
3307 "read %08x write %08x gtt %08x "
3308 "presumed %08x delta %08x\n",
3309 __func__,
3310 obj,
3311 (int) reloc->offset,
3312 (int) reloc->target_handle,
3313 (int) reloc->read_domains,
3314 (int) reloc->write_domain,
3315 (int) target_offset,
3316 (int) reloc->presumed_offset,
3317 reloc->delta);
3318#endif
673a394b 3319
bcf50e27
CW
3320 /* The target buffer should have appeared before us in the
3321 * exec_object list, so it should have a GTT space bound by now.
3322 */
3323 if (target_offset == 0) {
3324 DRM_ERROR("No GTT space found for object %d\n",
3325 reloc->target_handle);
3326 goto err;
3327 }
9af90d19 3328
bcf50e27
CW
3329 /* Validate that the target is in a valid r/w GPU domain */
3330 if (reloc->write_domain & (reloc->write_domain - 1)) {
3331 DRM_ERROR("reloc with multiple write domains: "
3332 "obj %p target %d offset %d "
3333 "read %08x write %08x",
3334 obj, reloc->target_handle,
3335 (int) reloc->offset,
3336 reloc->read_domains,
3337 reloc->write_domain);
3338 goto err;
3339 }
3340 if (reloc->write_domain & I915_GEM_DOMAIN_CPU ||
3341 reloc->read_domains & I915_GEM_DOMAIN_CPU) {
3342 DRM_ERROR("reloc with read/write CPU domains: "
3343 "obj %p target %d offset %d "
3344 "read %08x write %08x",
3345 obj, reloc->target_handle,
3346 (int) reloc->offset,
3347 reloc->read_domains,
3348 reloc->write_domain);
3349 goto err;
3350 }
3351 if (reloc->write_domain && target_obj->pending_write_domain &&
3352 reloc->write_domain != target_obj->pending_write_domain) {
3353 DRM_ERROR("Write domain conflict: "
3354 "obj %p target %d offset %d "
3355 "new %08x old %08x\n",
3356 obj, reloc->target_handle,
3357 (int) reloc->offset,
3358 reloc->write_domain,
3359 target_obj->pending_write_domain);
3360 goto err;
3361 }
673a394b 3362
bcf50e27
CW
3363 target_obj->pending_read_domains |= reloc->read_domains;
3364 target_obj->pending_write_domain |= reloc->write_domain;
8542a0bb 3365
bcf50e27
CW
3366 /* If the relocation already has the right value in it, no
3367 * more work needs to be done.
3368 */
3369 if (target_offset == reloc->presumed_offset)
3370 goto out;
673a394b 3371
bcf50e27
CW
3372 /* Check that the relocation address is valid... */
3373 if (reloc->offset > obj->base.size - 4) {
3374 DRM_ERROR("Relocation beyond object bounds: "
3375 "obj %p target %d offset %d size %d.\n",
3376 obj, reloc->target_handle,
3377 (int) reloc->offset,
3378 (int) obj->base.size);
3379 goto err;
3380 }
3381 if (reloc->offset & 3) {
3382 DRM_ERROR("Relocation not 4-byte aligned: "
3383 "obj %p target %d offset %d.\n",
3384 obj, reloc->target_handle,
3385 (int) reloc->offset);
3386 goto err;
3387 }
673a394b 3388
bcf50e27
CW
3389 /* and points to somewhere within the target object. */
3390 if (reloc->delta >= target_obj->size) {
3391 DRM_ERROR("Relocation beyond target object bounds: "
3392 "obj %p target %d delta %d size %d.\n",
3393 obj, reloc->target_handle,
3394 (int) reloc->delta,
3395 (int) target_obj->size);
3396 goto err;
3397 }
673a394b 3398
bcf50e27
CW
3399 reloc->delta += target_offset;
3400 if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
3401 uint32_t page_offset = reloc->offset & ~PAGE_MASK;
3402 char *vaddr;
673a394b 3403
bcf50e27
CW
3404 vaddr = kmap_atomic(obj->pages[reloc->offset >> PAGE_SHIFT]);
3405 *(uint32_t *)(vaddr + page_offset) = reloc->delta;
3406 kunmap_atomic(vaddr);
3407 } else {
3408 struct drm_i915_private *dev_priv = dev->dev_private;
3409 uint32_t __iomem *reloc_entry;
3410 void __iomem *reloc_page;
8542a0bb 3411
05394f39 3412 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
bcf50e27
CW
3413 if (ret)
3414 goto err;
673a394b 3415
bcf50e27
CW
3416 /* Map the page containing the relocation we're going to perform. */
3417 reloc->offset += obj->gtt_offset;
3418 reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
3419 reloc->offset & PAGE_MASK);
3420 reloc_entry = (uint32_t __iomem *)
3421 (reloc_page + (reloc->offset & ~PAGE_MASK));
3422 iowrite32(reloc->delta, reloc_entry);
3423 io_mapping_unmap_atomic(reloc_page);
3424 }
673a394b 3425
bcf50e27
CW
3426 /* and update the user's relocation entry */
3427 reloc->presumed_offset = target_offset;
b962442e 3428
bcf50e27
CW
3429out:
3430 ret = 0;
3431err:
3432 drm_gem_object_unreference(target_obj);
3433 return ret;
3434}
b962442e 3435
bcf50e27
CW
3436static int
3437i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
3438 struct drm_file *file_priv,
3439 struct drm_i915_gem_exec_object2 *entry)
3440{
3441 struct drm_i915_gem_relocation_entry __user *user_relocs;
3442 int i, ret;
3443
3444 user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr;
3445 for (i = 0; i < entry->relocation_count; i++) {
3446 struct drm_i915_gem_relocation_entry reloc;
3447
3448 if (__copy_from_user_inatomic(&reloc,
3449 user_relocs+i,
3450 sizeof(reloc)))
3451 return -EFAULT;
3452
3453 ret = i915_gem_execbuffer_relocate_entry(obj, file_priv, entry, &reloc);
3454 if (ret)
3455 return ret;
b962442e 3456
b5dc608c 3457 if (__copy_to_user_inatomic(&user_relocs[i].presumed_offset,
bcf50e27
CW
3458 &reloc.presumed_offset,
3459 sizeof(reloc.presumed_offset)))
3460 return -EFAULT;
b962442e 3461 }
b962442e 3462
bcf50e27
CW
3463 return 0;
3464}
3465
3466static int
3467i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj,
3468 struct drm_file *file_priv,
3469 struct drm_i915_gem_exec_object2 *entry,
3470 struct drm_i915_gem_relocation_entry *relocs)
3471{
3472 int i, ret;
3473
3474 for (i = 0; i < entry->relocation_count; i++) {
3475 ret = i915_gem_execbuffer_relocate_entry(obj, file_priv, entry, &relocs[i]);
3476 if (ret)
3477 return ret;
3478 }
3479
3480 return 0;
673a394b
EA
3481}
3482
40a5f0de 3483static int
bcf50e27
CW
3484i915_gem_execbuffer_relocate(struct drm_device *dev,
3485 struct drm_file *file,
05394f39 3486 struct drm_i915_gem_object **object_list,
bcf50e27
CW
3487 struct drm_i915_gem_exec_object2 *exec_list,
3488 int count)
3489{
3490 int i, ret;
3491
3492 for (i = 0; i < count; i++) {
05394f39 3493 struct drm_i915_gem_object *obj = object_list[i];
bcf50e27
CW
3494 obj->base.pending_read_domains = 0;
3495 obj->base.pending_write_domain = 0;
3496 ret = i915_gem_execbuffer_relocate_object(obj, file,
3497 &exec_list[i]);
3498 if (ret)
3499 return ret;
3500 }
3501
3502 return 0;
673a394b
EA
3503}
3504
40a5f0de 3505static int
bcf50e27
CW
3506i915_gem_execbuffer_reserve(struct drm_device *dev,
3507 struct drm_file *file,
05394f39 3508 struct drm_i915_gem_object **object_list,
bcf50e27
CW
3509 struct drm_i915_gem_exec_object2 *exec_list,
3510 int count)
40a5f0de 3511{
9af90d19 3512 int ret, i, retry;
40a5f0de 3513
a7a09aeb
CW
3514 /* Attempt to pin all of the buffers into the GTT.
3515 * This is done in 3 phases:
3516 *
3517 * 1a. Unbind all objects that do not match the GTT constraints for
3518 * the execbuffer (fenceable, mappable, alignment etc).
3519 * 1b. Increment pin count for already bound objects.
3520 * 2. Bind new objects.
3521 * 3. Decrement pin count.
3522 *
3523 * This avoid unnecessary unbinding of later objects in order to makr
3524 * room for the earlier objects *unless* we need to defragment.
3525 */
5eac3ab4
CW
3526 retry = 0;
3527 do {
9af90d19 3528 ret = 0;
a7a09aeb
CW
3529
3530 /* Unbind any ill-fitting objects or pin. */
9af90d19 3531 for (i = 0; i < count; i++) {
05394f39 3532 struct drm_i915_gem_object *obj = object_list[i];
a7a09aeb
CW
3533 struct drm_i915_gem_exec_object2 *entry = &exec_list[i];
3534 bool need_fence, need_mappable;
3535
3536 if (!obj->gtt_space)
3537 continue;
3538
3539 need_fence =
9af90d19
CW
3540 entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
3541 obj->tiling_mode != I915_TILING_NONE;
a7a09aeb 3542 need_mappable =
16e809ac
DV
3543 entry->relocation_count ? true : need_fence;
3544
a7a09aeb
CW
3545 if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) ||
3546 (need_mappable && !obj->map_and_fenceable))
05394f39 3547 ret = i915_gem_object_unbind(obj);
a7a09aeb
CW
3548 else
3549 ret = i915_gem_object_pin(obj,
3550 entry->alignment,
3551 need_mappable);
3552 if (ret) {
3553 count = i;
3554 goto err;
3555 }
3556 }
3557
3558 /* Bind fresh objects */
3559 for (i = 0; i < count; i++) {
3560 struct drm_i915_gem_exec_object2 *entry = &exec_list[i];
3561 struct drm_i915_gem_object *obj = object_list[i];
3562 bool need_fence;
3563
3564 need_fence =
3565 entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
3566 obj->tiling_mode != I915_TILING_NONE;
3567
3568 if (!obj->gtt_space) {
3569 bool need_mappable =
3570 entry->relocation_count ? true : need_fence;
3571
3572 ret = i915_gem_object_pin(obj,
3573 entry->alignment,
3574 need_mappable);
9af90d19
CW
3575 if (ret)
3576 break;
3577 }
40a5f0de 3578
9af90d19 3579 if (need_fence) {
05394f39 3580 ret = i915_gem_object_get_fence_reg(obj, true);
a7a09aeb 3581 if (ret)
9af90d19 3582 break;
40a5f0de 3583
caea7476 3584 obj->pending_fenced_gpu_access = true;
9af90d19 3585 }
40a5f0de 3586
9af90d19 3587 entry->offset = obj->gtt_offset;
40a5f0de
EA
3588 }
3589
a7a09aeb
CW
3590err: /* Decrement pin count for bound objects */
3591 for (i = 0; i < count; i++) {
3592 struct drm_i915_gem_object *obj = object_list[i];
3593 if (obj->gtt_space)
3594 i915_gem_object_unpin(obj);
3595 }
9af90d19 3596
5eac3ab4 3597 if (ret != -ENOSPC || retry > 1)
9af90d19
CW
3598 return ret;
3599
5eac3ab4
CW
3600 /* First attempt, just clear anything that is purgeable.
3601 * Second attempt, clear the entire GTT.
3602 */
3603 ret = i915_gem_evict_everything(dev, retry == 0);
9af90d19
CW
3604 if (ret)
3605 return ret;
40a5f0de 3606
5eac3ab4
CW
3607 retry++;
3608 } while (1);
40a5f0de
EA
3609}
3610
bcf50e27
CW
3611static int
3612i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
3613 struct drm_file *file,
05394f39 3614 struct drm_i915_gem_object **object_list,
bcf50e27
CW
3615 struct drm_i915_gem_exec_object2 *exec_list,
3616 int count)
3617{
3618 struct drm_i915_gem_relocation_entry *reloc;
3619 int i, total, ret;
3620
05394f39
CW
3621 for (i = 0; i < count; i++)
3622 object_list[i]->in_execbuffer = false;
bcf50e27
CW
3623
3624 mutex_unlock(&dev->struct_mutex);
3625
3626 total = 0;
3627 for (i = 0; i < count; i++)
3628 total += exec_list[i].relocation_count;
3629
3630 reloc = drm_malloc_ab(total, sizeof(*reloc));
3631 if (reloc == NULL) {
3632 mutex_lock(&dev->struct_mutex);
3633 return -ENOMEM;
3634 }
3635
3636 total = 0;
3637 for (i = 0; i < count; i++) {
3638 struct drm_i915_gem_relocation_entry __user *user_relocs;
3639
3640 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr;
3641
3642 if (copy_from_user(reloc+total, user_relocs,
3643 exec_list[i].relocation_count *
3644 sizeof(*reloc))) {
3645 ret = -EFAULT;
3646 mutex_lock(&dev->struct_mutex);
3647 goto err;
3648 }
3649
3650 total += exec_list[i].relocation_count;
3651 }
3652
3653 ret = i915_mutex_lock_interruptible(dev);
3654 if (ret) {
3655 mutex_lock(&dev->struct_mutex);
3656 goto err;
3657 }
3658
3659 ret = i915_gem_execbuffer_reserve(dev, file,
3660 object_list, exec_list,
3661 count);
3662 if (ret)
3663 goto err;
3664
3665 total = 0;
3666 for (i = 0; i < count; i++) {
05394f39 3667 struct drm_i915_gem_object *obj = object_list[i];
bcf50e27
CW
3668 obj->base.pending_read_domains = 0;
3669 obj->base.pending_write_domain = 0;
3670 ret = i915_gem_execbuffer_relocate_object_slow(obj, file,
3671 &exec_list[i],
3672 reloc + total);
3673 if (ret)
3674 goto err;
3675
3676 total += exec_list[i].relocation_count;
3677 }
3678
3679 /* Leave the user relocations as are, this is the painfully slow path,
3680 * and we want to avoid the complication of dropping the lock whilst
3681 * having buffers reserved in the aperture and so causing spurious
3682 * ENOSPC for random operations.
3683 */
3684
3685err:
3686 drm_free_large(reloc);
3687 return ret;
3688}
3689
13b29289
CW
3690static int
3691i915_gem_execbuffer_move_to_gpu(struct drm_device *dev,
3692 struct drm_file *file,
3693 struct intel_ring_buffer *ring,
05394f39 3694 struct drm_i915_gem_object **objects,
13b29289
CW
3695 int count)
3696{
0f8c6d7c 3697 struct change_domains cd;
13b29289
CW
3698 int ret, i;
3699
0f8c6d7c
CW
3700 cd.invalidate_domains = 0;
3701 cd.flush_domains = 0;
3702 cd.flush_rings = 0;
13b29289 3703 for (i = 0; i < count; i++)
0f8c6d7c 3704 i915_gem_object_set_to_gpu_domain(objects[i], ring, &cd);
13b29289 3705
0f8c6d7c 3706 if (cd.invalidate_domains | cd.flush_domains) {
13b29289
CW
3707#if WATCH_EXEC
3708 DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n",
3709 __func__,
0f8c6d7c
CW
3710 cd.invalidate_domains,
3711 cd.flush_domains);
13b29289 3712#endif
05394f39 3713 i915_gem_flush(dev,
0f8c6d7c
CW
3714 cd.invalidate_domains,
3715 cd.flush_domains,
3716 cd.flush_rings);
13b29289
CW
3717 }
3718
3719 for (i = 0; i < count; i++) {
05394f39 3720 struct drm_i915_gem_object *obj = objects[i];
13b29289
CW
3721 /* XXX replace with semaphores */
3722 if (obj->ring && ring != obj->ring) {
05394f39 3723 ret = i915_gem_object_wait_rendering(obj, true);
13b29289
CW
3724 if (ret)
3725 return ret;
3726 }
3727 }
3728
3729 return 0;
3730}
3731
673a394b
EA
3732/* Throttle our rendering by waiting until the ring has completed our requests
3733 * emitted over 20 msec ago.
3734 *
b962442e
EA
3735 * Note that if we were to use the current jiffies each time around the loop,
3736 * we wouldn't escape the function with any frames outstanding if the time to
3737 * render a frame was over 20ms.
3738 *
673a394b
EA
3739 * This should get us reasonable parallelism between CPU and GPU but also
3740 * relatively low latency when blocking on a particular request to finish.
3741 */
40a5f0de 3742static int
f787a5f5 3743i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
40a5f0de 3744{
f787a5f5
CW
3745 struct drm_i915_private *dev_priv = dev->dev_private;
3746 struct drm_i915_file_private *file_priv = file->driver_priv;
b962442e 3747 unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
f787a5f5
CW
3748 struct drm_i915_gem_request *request;
3749 struct intel_ring_buffer *ring = NULL;
3750 u32 seqno = 0;
3751 int ret;
93533c29 3752
1c25595f 3753 spin_lock(&file_priv->mm.lock);
f787a5f5 3754 list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
b962442e
EA
3755 if (time_after_eq(request->emitted_jiffies, recent_enough))
3756 break;
40a5f0de 3757
f787a5f5
CW
3758 ring = request->ring;
3759 seqno = request->seqno;
b962442e 3760 }
1c25595f 3761 spin_unlock(&file_priv->mm.lock);
40a5f0de 3762
f787a5f5
CW
3763 if (seqno == 0)
3764 return 0;
2bc43b5c 3765
f787a5f5 3766 ret = 0;
78501eac 3767 if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) {
f787a5f5
CW
3768 /* And wait for the seqno passing without holding any locks and
3769 * causing extra latency for others. This is safe as the irq
3770 * generation is designed to be run atomically and so is
3771 * lockless.
3772 */
78501eac 3773 ring->user_irq_get(ring);
f787a5f5 3774 ret = wait_event_interruptible(ring->irq_queue,
78501eac 3775 i915_seqno_passed(ring->get_seqno(ring), seqno)
f787a5f5 3776 || atomic_read(&dev_priv->mm.wedged));
78501eac 3777 ring->user_irq_put(ring);
40a5f0de 3778
f787a5f5
CW
3779 if (ret == 0 && atomic_read(&dev_priv->mm.wedged))
3780 ret = -EIO;
40a5f0de
EA
3781 }
3782
f787a5f5
CW
3783 if (ret == 0)
3784 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
40a5f0de
EA
3785
3786 return ret;
3787}
3788
83d60795 3789static int
2549d6c2
CW
3790i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec,
3791 uint64_t exec_offset)
83d60795
CW
3792{
3793 uint32_t exec_start, exec_len;
3794
3795 exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
3796 exec_len = (uint32_t) exec->batch_len;
3797
3798 if ((exec_start | exec_len) & 0x7)
3799 return -EINVAL;
3800
3801 if (!exec_start)
3802 return -EINVAL;
3803
3804 return 0;
3805}
3806
6b95a207 3807static int
2549d6c2
CW
3808validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
3809 int count)
6b95a207 3810{
2549d6c2 3811 int i;
6b95a207 3812
2549d6c2
CW
3813 for (i = 0; i < count; i++) {
3814 char __user *ptr = (char __user *)(uintptr_t)exec[i].relocs_ptr;
d1d78830 3815 int length; /* limited by fault_in_pages_readable() */
6b95a207 3816
d1d78830
CW
3817 /* First check for malicious input causing overflow */
3818 if (exec[i].relocation_count >
3819 INT_MAX / sizeof(struct drm_i915_gem_relocation_entry))
3820 return -EINVAL;
6b95a207 3821
d1d78830
CW
3822 length = exec[i].relocation_count *
3823 sizeof(struct drm_i915_gem_relocation_entry);
2549d6c2
CW
3824 if (!access_ok(VERIFY_READ, ptr, length))
3825 return -EFAULT;
40a5f0de 3826
b5dc608c
CW
3827 /* we may also need to update the presumed offsets */
3828 if (!access_ok(VERIFY_WRITE, ptr, length))
3829 return -EFAULT;
3830
2549d6c2
CW
3831 if (fault_in_pages_readable(ptr, length))
3832 return -EFAULT;
6b95a207 3833 }
6b95a207 3834
83d60795 3835 return 0;
6b95a207
KH
3836}
3837
8dc5d147 3838static int
76446cac 3839i915_gem_do_execbuffer(struct drm_device *dev, void *data,
9af90d19 3840 struct drm_file *file,
76446cac
JB
3841 struct drm_i915_gem_execbuffer2 *args,
3842 struct drm_i915_gem_exec_object2 *exec_list)
673a394b
EA
3843{
3844 drm_i915_private_t *dev_priv = dev->dev_private;
05394f39
CW
3845 struct drm_i915_gem_object **object_list = NULL;
3846 struct drm_i915_gem_object *batch_obj;
201361a5 3847 struct drm_clip_rect *cliprects = NULL;
8dc5d147 3848 struct drm_i915_gem_request *request = NULL;
9af90d19 3849 int ret, i, flips;
673a394b 3850 uint64_t exec_offset;
673a394b 3851
852835f3
ZN
3852 struct intel_ring_buffer *ring = NULL;
3853
30dbf0c0
CW
3854 ret = i915_gem_check_is_wedged(dev);
3855 if (ret)
3856 return ret;
3857
2549d6c2
CW
3858 ret = validate_exec_list(exec_list, args->buffer_count);
3859 if (ret)
3860 return ret;
3861
673a394b
EA
3862#if WATCH_EXEC
3863 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
3864 (int) args->buffers_ptr, args->buffer_count, args->batch_len);
3865#endif
549f7365
CW
3866 switch (args->flags & I915_EXEC_RING_MASK) {
3867 case I915_EXEC_DEFAULT:
3868 case I915_EXEC_RENDER:
3869 ring = &dev_priv->render_ring;
3870 break;
3871 case I915_EXEC_BSD:
d1b851fc 3872 if (!HAS_BSD(dev)) {
549f7365 3873 DRM_ERROR("execbuf with invalid ring (BSD)\n");
d1b851fc
ZN
3874 return -EINVAL;
3875 }
3876 ring = &dev_priv->bsd_ring;
549f7365
CW
3877 break;
3878 case I915_EXEC_BLT:
3879 if (!HAS_BLT(dev)) {
3880 DRM_ERROR("execbuf with invalid ring (BLT)\n");
3881 return -EINVAL;
3882 }
3883 ring = &dev_priv->blt_ring;
3884 break;
3885 default:
3886 DRM_ERROR("execbuf with unknown ring: %d\n",
3887 (int)(args->flags & I915_EXEC_RING_MASK));
3888 return -EINVAL;
d1b851fc
ZN
3889 }
3890
4f481ed2
EA
3891 if (args->buffer_count < 1) {
3892 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
3893 return -EINVAL;
3894 }
c8e0f93a 3895 object_list = drm_malloc_ab(sizeof(*object_list), args->buffer_count);
76446cac
JB
3896 if (object_list == NULL) {
3897 DRM_ERROR("Failed to allocate object list for %d buffers\n",
673a394b
EA
3898 args->buffer_count);
3899 ret = -ENOMEM;
3900 goto pre_mutex_err;
3901 }
673a394b 3902
201361a5 3903 if (args->num_cliprects != 0) {
9a298b2a
EA
3904 cliprects = kcalloc(args->num_cliprects, sizeof(*cliprects),
3905 GFP_KERNEL);
a40e8d31
OA
3906 if (cliprects == NULL) {
3907 ret = -ENOMEM;
201361a5 3908 goto pre_mutex_err;
a40e8d31 3909 }
201361a5
EA
3910
3911 ret = copy_from_user(cliprects,
3912 (struct drm_clip_rect __user *)
3913 (uintptr_t) args->cliprects_ptr,
3914 sizeof(*cliprects) * args->num_cliprects);
3915 if (ret != 0) {
3916 DRM_ERROR("copy %d cliprects failed: %d\n",
3917 args->num_cliprects, ret);
c877cdce 3918 ret = -EFAULT;
201361a5
EA
3919 goto pre_mutex_err;
3920 }
3921 }
3922
8dc5d147
CW
3923 request = kzalloc(sizeof(*request), GFP_KERNEL);
3924 if (request == NULL) {
3925 ret = -ENOMEM;
40a5f0de 3926 goto pre_mutex_err;
8dc5d147 3927 }
40a5f0de 3928
76c1dec1
CW
3929 ret = i915_mutex_lock_interruptible(dev);
3930 if (ret)
a198bc80 3931 goto pre_mutex_err;
673a394b
EA
3932
3933 if (dev_priv->mm.suspended) {
673a394b 3934 mutex_unlock(&dev->struct_mutex);
a198bc80
CW
3935 ret = -EBUSY;
3936 goto pre_mutex_err;
673a394b
EA
3937 }
3938
ac94a962 3939 /* Look up object handles */
673a394b 3940 for (i = 0; i < args->buffer_count; i++) {
05394f39 3941 struct drm_i915_gem_object *obj;
7e318e18 3942
05394f39
CW
3943 obj = to_intel_bo (drm_gem_object_lookup(dev, file,
3944 exec_list[i].handle));
3945 if (obj == NULL) {
673a394b
EA
3946 DRM_ERROR("Invalid object handle %d at index %d\n",
3947 exec_list[i].handle, i);
0ce907f8 3948 /* prevent error path from reading uninitialized data */
05394f39 3949 args->buffer_count = i;
bf79cb91 3950 ret = -ENOENT;
673a394b
EA
3951 goto err;
3952 }
05394f39 3953 object_list[i] = obj;
b70d11da 3954
05394f39 3955 if (obj->in_execbuffer) {
b70d11da 3956 DRM_ERROR("Object %p appears more than once in object list\n",
05394f39 3957 obj);
0ce907f8
CW
3958 /* prevent error path from reading uninitialized data */
3959 args->buffer_count = i + 1;
bf79cb91 3960 ret = -EINVAL;
b70d11da
KH
3961 goto err;
3962 }
05394f39 3963 obj->in_execbuffer = true;
caea7476 3964 obj->pending_fenced_gpu_access = false;
ac94a962 3965 }
673a394b 3966
9af90d19 3967 /* Move the objects en-masse into the GTT, evicting if necessary. */
bcf50e27
CW
3968 ret = i915_gem_execbuffer_reserve(dev, file,
3969 object_list, exec_list,
3970 args->buffer_count);
9af90d19
CW
3971 if (ret)
3972 goto err;
ac94a962 3973
9af90d19 3974 /* The objects are in their final locations, apply the relocations. */
bcf50e27
CW
3975 ret = i915_gem_execbuffer_relocate(dev, file,
3976 object_list, exec_list,
3977 args->buffer_count);
3978 if (ret) {
3979 if (ret == -EFAULT) {
3980 ret = i915_gem_execbuffer_relocate_slow(dev, file,
3981 object_list,
3982 exec_list,
3983 args->buffer_count);
3984 BUG_ON(!mutex_is_locked(&dev->struct_mutex));
3985 }
9af90d19 3986 if (ret)
ac94a962 3987 goto err;
673a394b
EA
3988 }
3989
3990 /* Set the pending read domains for the batch buffer to COMMAND */
3991 batch_obj = object_list[args->buffer_count-1];
05394f39 3992 if (batch_obj->base.pending_write_domain) {
5f26a2c7
CW
3993 DRM_ERROR("Attempting to use self-modifying batch buffer\n");
3994 ret = -EINVAL;
3995 goto err;
3996 }
05394f39 3997 batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
673a394b 3998
9af90d19 3999 /* Sanity check the batch buffer */
05394f39 4000 exec_offset = batch_obj->gtt_offset;
9af90d19 4001 ret = i915_gem_check_execbuffer(args, exec_offset);
83d60795
CW
4002 if (ret != 0) {
4003 DRM_ERROR("execbuf with invalid offset/length\n");
4004 goto err;
4005 }
4006
13b29289
CW
4007 ret = i915_gem_execbuffer_move_to_gpu(dev, file, ring,
4008 object_list, args->buffer_count);
4009 if (ret)
4010 goto err;
673a394b 4011
673a394b
EA
4012#if WATCH_COHERENCY
4013 for (i = 0; i < args->buffer_count; i++) {
4014 i915_gem_object_check_coherency(object_list[i],
4015 exec_list[i].handle);
4016 }
4017#endif
4018
673a394b 4019#if WATCH_EXEC
6911a9b8 4020 i915_gem_dump_object(batch_obj,
673a394b
EA
4021 args->batch_len,
4022 __func__,
4023 ~0);
4024#endif
4025
e59f2bac
CW
4026 /* Check for any pending flips. As we only maintain a flip queue depth
4027 * of 1, we can simply insert a WAIT for the next display flip prior
4028 * to executing the batch and avoid stalling the CPU.
4029 */
4030 flips = 0;
4031 for (i = 0; i < args->buffer_count; i++) {
05394f39
CW
4032 if (object_list[i]->base.write_domain)
4033 flips |= atomic_read(&object_list[i]->pending_flip);
e59f2bac
CW
4034 }
4035 if (flips) {
4036 int plane, flip_mask;
4037
4038 for (plane = 0; flips >> plane; plane++) {
4039 if (((flips >> plane) & 1) == 0)
4040 continue;
4041
4042 if (plane)
4043 flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
4044 else
4045 flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
4046
e1f99ce6
CW
4047 ret = intel_ring_begin(ring, 2);
4048 if (ret)
4049 goto err;
4050
78501eac
CW
4051 intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
4052 intel_ring_emit(ring, MI_NOOP);
4053 intel_ring_advance(ring);
e59f2bac
CW
4054 }
4055 }
4056
673a394b 4057 /* Exec the batchbuffer */
78501eac 4058 ret = ring->dispatch_execbuffer(ring, args, cliprects, exec_offset);
673a394b
EA
4059 if (ret) {
4060 DRM_ERROR("dispatch failed %d\n", ret);
4061 goto err;
4062 }
4063
673a394b 4064 for (i = 0; i < args->buffer_count; i++) {
05394f39 4065 struct drm_i915_gem_object *obj = object_list[i];
673a394b 4066
05394f39
CW
4067 obj->base.read_domains = obj->base.pending_read_domains;
4068 obj->base.write_domain = obj->base.pending_write_domain;
caea7476 4069 obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
7e318e18 4070
617dbe27 4071 i915_gem_object_move_to_active(obj, ring);
05394f39
CW
4072 if (obj->base.write_domain) {
4073 obj->dirty = 1;
4074 list_move_tail(&obj->gpu_write_list,
64193406 4075 &ring->gpu_write_list);
7e318e18
CW
4076 intel_mark_busy(dev, obj);
4077 }
4078
4079 trace_i915_gem_object_change_domain(obj,
05394f39
CW
4080 obj->base.read_domains,
4081 obj->base.write_domain);
673a394b 4082 }
673a394b 4083
7e318e18
CW
4084 /*
4085 * Ensure that the commands in the batch buffer are
4086 * finished before the interrupt fires
4087 */
4088 i915_retire_commands(dev, ring);
4089
3cce469c 4090 if (i915_add_request(dev, file, request, ring))
5d97eb69 4091 i915_gem_next_request_seqno(dev, ring);
3cce469c
CW
4092 else
4093 request = NULL;
673a394b 4094
673a394b 4095err:
b70d11da 4096 for (i = 0; i < args->buffer_count; i++) {
05394f39
CW
4097 object_list[i]->in_execbuffer = false;
4098 drm_gem_object_unreference(&object_list[i]->base);
b70d11da 4099 }
673a394b 4100
673a394b
EA
4101 mutex_unlock(&dev->struct_mutex);
4102
93533c29 4103pre_mutex_err:
8e7d2b2c 4104 drm_free_large(object_list);
9a298b2a 4105 kfree(cliprects);
8dc5d147 4106 kfree(request);
673a394b
EA
4107
4108 return ret;
4109}
4110
76446cac
JB
4111/*
4112 * Legacy execbuffer just creates an exec2 list from the original exec object
4113 * list array and passes it to the real function.
4114 */
4115int
4116i915_gem_execbuffer(struct drm_device *dev, void *data,
05394f39 4117 struct drm_file *file)
76446cac
JB
4118{
4119 struct drm_i915_gem_execbuffer *args = data;
4120 struct drm_i915_gem_execbuffer2 exec2;
4121 struct drm_i915_gem_exec_object *exec_list = NULL;
4122 struct drm_i915_gem_exec_object2 *exec2_list = NULL;
4123 int ret, i;
4124
4125#if WATCH_EXEC
4126 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
4127 (int) args->buffers_ptr, args->buffer_count, args->batch_len);
4128#endif
4129
4130 if (args->buffer_count < 1) {
4131 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
4132 return -EINVAL;
4133 }
4134
4135 /* Copy in the exec list from userland */
4136 exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
4137 exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
4138 if (exec_list == NULL || exec2_list == NULL) {
4139 DRM_ERROR("Failed to allocate exec list for %d buffers\n",
4140 args->buffer_count);
4141 drm_free_large(exec_list);
4142 drm_free_large(exec2_list);
4143 return -ENOMEM;
4144 }
4145 ret = copy_from_user(exec_list,
4146 (struct drm_i915_relocation_entry __user *)
4147 (uintptr_t) args->buffers_ptr,
4148 sizeof(*exec_list) * args->buffer_count);
4149 if (ret != 0) {
4150 DRM_ERROR("copy %d exec entries failed %d\n",
4151 args->buffer_count, ret);
4152 drm_free_large(exec_list);
4153 drm_free_large(exec2_list);
4154 return -EFAULT;
4155 }
4156
4157 for (i = 0; i < args->buffer_count; i++) {
4158 exec2_list[i].handle = exec_list[i].handle;
4159 exec2_list[i].relocation_count = exec_list[i].relocation_count;
4160 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
4161 exec2_list[i].alignment = exec_list[i].alignment;
4162 exec2_list[i].offset = exec_list[i].offset;
a6c45cf0 4163 if (INTEL_INFO(dev)->gen < 4)
76446cac
JB
4164 exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
4165 else
4166 exec2_list[i].flags = 0;
4167 }
4168
4169 exec2.buffers_ptr = args->buffers_ptr;
4170 exec2.buffer_count = args->buffer_count;
4171 exec2.batch_start_offset = args->batch_start_offset;
4172 exec2.batch_len = args->batch_len;
4173 exec2.DR1 = args->DR1;
4174 exec2.DR4 = args->DR4;
4175 exec2.num_cliprects = args->num_cliprects;
4176 exec2.cliprects_ptr = args->cliprects_ptr;
852835f3 4177 exec2.flags = I915_EXEC_RENDER;
76446cac 4178
05394f39 4179 ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
76446cac
JB
4180 if (!ret) {
4181 /* Copy the new buffer offsets back to the user's exec list. */
4182 for (i = 0; i < args->buffer_count; i++)
4183 exec_list[i].offset = exec2_list[i].offset;
4184 /* ... and back out to userspace */
4185 ret = copy_to_user((struct drm_i915_relocation_entry __user *)
4186 (uintptr_t) args->buffers_ptr,
4187 exec_list,
4188 sizeof(*exec_list) * args->buffer_count);
4189 if (ret) {
4190 ret = -EFAULT;
4191 DRM_ERROR("failed to copy %d exec entries "
4192 "back to user (%d)\n",
4193 args->buffer_count, ret);
4194 }
76446cac
JB
4195 }
4196
4197 drm_free_large(exec_list);
4198 drm_free_large(exec2_list);
4199 return ret;
4200}
4201
4202int
4203i915_gem_execbuffer2(struct drm_device *dev, void *data,
05394f39 4204 struct drm_file *file)
76446cac
JB
4205{
4206 struct drm_i915_gem_execbuffer2 *args = data;
4207 struct drm_i915_gem_exec_object2 *exec2_list = NULL;
4208 int ret;
4209
4210#if WATCH_EXEC
4211 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
4212 (int) args->buffers_ptr, args->buffer_count, args->batch_len);
4213#endif
4214
4215 if (args->buffer_count < 1) {
4216 DRM_ERROR("execbuf2 with %d buffers\n", args->buffer_count);
4217 return -EINVAL;
4218 }
4219
4220 exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
4221 if (exec2_list == NULL) {
4222 DRM_ERROR("Failed to allocate exec list for %d buffers\n",
4223 args->buffer_count);
4224 return -ENOMEM;
4225 }
4226 ret = copy_from_user(exec2_list,
4227 (struct drm_i915_relocation_entry __user *)
4228 (uintptr_t) args->buffers_ptr,
4229 sizeof(*exec2_list) * args->buffer_count);
4230 if (ret != 0) {
4231 DRM_ERROR("copy %d exec entries failed %d\n",
4232 args->buffer_count, ret);
4233 drm_free_large(exec2_list);
4234 return -EFAULT;
4235 }
4236
05394f39 4237 ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
76446cac
JB
4238 if (!ret) {
4239 /* Copy the new buffer offsets back to the user's exec list. */
4240 ret = copy_to_user((struct drm_i915_relocation_entry __user *)
4241 (uintptr_t) args->buffers_ptr,
4242 exec2_list,
4243 sizeof(*exec2_list) * args->buffer_count);
4244 if (ret) {
4245 ret = -EFAULT;
4246 DRM_ERROR("failed to copy %d exec entries "
4247 "back to user (%d)\n",
4248 args->buffer_count, ret);
4249 }
4250 }
4251
4252 drm_free_large(exec2_list);
4253 return ret;
4254}
4255
673a394b 4256int
05394f39
CW
4257i915_gem_object_pin(struct drm_i915_gem_object *obj,
4258 uint32_t alignment,
75e9e915 4259 bool map_and_fenceable)
673a394b 4260{
05394f39 4261 struct drm_device *dev = obj->base.dev;
f13d3f73 4262 struct drm_i915_private *dev_priv = dev->dev_private;
673a394b
EA
4263 int ret;
4264
05394f39 4265 BUG_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT);
23bc5982 4266 WARN_ON(i915_verify_lists(dev));
ac0c6b5a 4267
05394f39
CW
4268 if (obj->gtt_space != NULL) {
4269 if ((alignment && obj->gtt_offset & (alignment - 1)) ||
4270 (map_and_fenceable && !obj->map_and_fenceable)) {
4271 WARN(obj->pin_count,
ae7d49d8 4272 "bo is already pinned with incorrect alignment:"
75e9e915
DV
4273 " offset=%x, req.alignment=%x, req.map_and_fenceable=%d,"
4274 " obj->map_and_fenceable=%d\n",
05394f39 4275 obj->gtt_offset, alignment,
75e9e915 4276 map_and_fenceable,
05394f39 4277 obj->map_and_fenceable);
ac0c6b5a
CW
4278 ret = i915_gem_object_unbind(obj);
4279 if (ret)
4280 return ret;
4281 }
4282 }
4283
05394f39 4284 if (obj->gtt_space == NULL) {
a00b10c3 4285 ret = i915_gem_object_bind_to_gtt(obj, alignment,
75e9e915 4286 map_and_fenceable);
9731129c 4287 if (ret)
673a394b 4288 return ret;
22c344e9 4289 }
76446cac 4290
05394f39 4291 if (obj->pin_count++ == 0) {
05394f39
CW
4292 if (!obj->active)
4293 list_move_tail(&obj->mm_list,
f13d3f73 4294 &dev_priv->mm.pinned_list);
673a394b 4295 }
6299f992 4296 obj->pin_mappable |= map_and_fenceable;
673a394b 4297
23bc5982 4298 WARN_ON(i915_verify_lists(dev));
673a394b
EA
4299 return 0;
4300}
4301
4302void
05394f39 4303i915_gem_object_unpin(struct drm_i915_gem_object *obj)
673a394b 4304{
05394f39 4305 struct drm_device *dev = obj->base.dev;
673a394b 4306 drm_i915_private_t *dev_priv = dev->dev_private;
673a394b 4307
23bc5982 4308 WARN_ON(i915_verify_lists(dev));
05394f39
CW
4309 BUG_ON(obj->pin_count == 0);
4310 BUG_ON(obj->gtt_space == NULL);
673a394b 4311
05394f39
CW
4312 if (--obj->pin_count == 0) {
4313 if (!obj->active)
4314 list_move_tail(&obj->mm_list,
673a394b 4315 &dev_priv->mm.inactive_list);
6299f992 4316 obj->pin_mappable = false;
673a394b 4317 }
23bc5982 4318 WARN_ON(i915_verify_lists(dev));
673a394b
EA
4319}
4320
4321int
4322i915_gem_pin_ioctl(struct drm_device *dev, void *data,
05394f39 4323 struct drm_file *file)
673a394b
EA
4324{
4325 struct drm_i915_gem_pin *args = data;
05394f39 4326 struct drm_i915_gem_object *obj;
673a394b
EA
4327 int ret;
4328
1d7cfea1
CW
4329 ret = i915_mutex_lock_interruptible(dev);
4330 if (ret)
4331 return ret;
673a394b 4332
05394f39 4333 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
673a394b 4334 if (obj == NULL) {
1d7cfea1
CW
4335 ret = -ENOENT;
4336 goto unlock;
673a394b 4337 }
673a394b 4338
05394f39 4339 if (obj->madv != I915_MADV_WILLNEED) {
bb6baf76 4340 DRM_ERROR("Attempting to pin a purgeable buffer\n");
1d7cfea1
CW
4341 ret = -EINVAL;
4342 goto out;
3ef94daa
CW
4343 }
4344
05394f39 4345 if (obj->pin_filp != NULL && obj->pin_filp != file) {
79e53945
JB
4346 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
4347 args->handle);
1d7cfea1
CW
4348 ret = -EINVAL;
4349 goto out;
79e53945
JB
4350 }
4351
05394f39
CW
4352 obj->user_pin_count++;
4353 obj->pin_filp = file;
4354 if (obj->user_pin_count == 1) {
75e9e915 4355 ret = i915_gem_object_pin(obj, args->alignment, true);
1d7cfea1
CW
4356 if (ret)
4357 goto out;
673a394b
EA
4358 }
4359
4360 /* XXX - flush the CPU caches for pinned objects
4361 * as the X server doesn't manage domains yet
4362 */
e47c68e9 4363 i915_gem_object_flush_cpu_write_domain(obj);
05394f39 4364 args->offset = obj->gtt_offset;
1d7cfea1 4365out:
05394f39 4366 drm_gem_object_unreference(&obj->base);
1d7cfea1 4367unlock:
673a394b 4368 mutex_unlock(&dev->struct_mutex);
1d7cfea1 4369 return ret;
673a394b
EA
4370}
4371
4372int
4373i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
05394f39 4374 struct drm_file *file)
673a394b
EA
4375{
4376 struct drm_i915_gem_pin *args = data;
05394f39 4377 struct drm_i915_gem_object *obj;
76c1dec1 4378 int ret;
673a394b 4379
1d7cfea1
CW
4380 ret = i915_mutex_lock_interruptible(dev);
4381 if (ret)
4382 return ret;
673a394b 4383
05394f39 4384 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
673a394b 4385 if (obj == NULL) {
1d7cfea1
CW
4386 ret = -ENOENT;
4387 goto unlock;
673a394b 4388 }
76c1dec1 4389
05394f39 4390 if (obj->pin_filp != file) {
79e53945
JB
4391 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
4392 args->handle);
1d7cfea1
CW
4393 ret = -EINVAL;
4394 goto out;
79e53945 4395 }
05394f39
CW
4396 obj->user_pin_count--;
4397 if (obj->user_pin_count == 0) {
4398 obj->pin_filp = NULL;
79e53945
JB
4399 i915_gem_object_unpin(obj);
4400 }
673a394b 4401
1d7cfea1 4402out:
05394f39 4403 drm_gem_object_unreference(&obj->base);
1d7cfea1 4404unlock:
673a394b 4405 mutex_unlock(&dev->struct_mutex);
1d7cfea1 4406 return ret;
673a394b
EA
4407}
4408
4409int
4410i915_gem_busy_ioctl(struct drm_device *dev, void *data,
05394f39 4411 struct drm_file *file)
673a394b
EA
4412{
4413 struct drm_i915_gem_busy *args = data;
05394f39 4414 struct drm_i915_gem_object *obj;
30dbf0c0
CW
4415 int ret;
4416
76c1dec1 4417 ret = i915_mutex_lock_interruptible(dev);
1d7cfea1 4418 if (ret)
76c1dec1 4419 return ret;
673a394b 4420
05394f39 4421 obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
673a394b 4422 if (obj == NULL) {
1d7cfea1
CW
4423 ret = -ENOENT;
4424 goto unlock;
673a394b 4425 }
d1b851fc 4426
0be555b6
CW
4427 /* Count all active objects as busy, even if they are currently not used
4428 * by the gpu. Users of this interface expect objects to eventually
4429 * become non-busy without any further actions, therefore emit any
4430 * necessary flushes here.
c4de0a5d 4431 */
05394f39 4432 args->busy = obj->active;
0be555b6
CW
4433 if (args->busy) {
4434 /* Unconditionally flush objects, even when the gpu still uses this
4435 * object. Userspace calling this function indicates that it wants to
4436 * use this buffer rather sooner than later, so issuing the required
4437 * flush earlier is beneficial.
4438 */
05394f39
CW
4439 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS)
4440 i915_gem_flush_ring(dev, obj->ring,
4441 0, obj->base.write_domain);
0be555b6
CW
4442
4443 /* Update the active list for the hardware's current position.
4444 * Otherwise this only updates on a delayed timer or when irqs
4445 * are actually unmasked, and our working set ends up being
4446 * larger than required.
4447 */
05394f39 4448 i915_gem_retire_requests_ring(dev, obj->ring);
0be555b6 4449
05394f39 4450 args->busy = obj->active;
0be555b6 4451 }
673a394b 4452
05394f39 4453 drm_gem_object_unreference(&obj->base);
1d7cfea1 4454unlock:
673a394b 4455 mutex_unlock(&dev->struct_mutex);
1d7cfea1 4456 return ret;
673a394b
EA
4457}
4458
4459int
4460i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4461 struct drm_file *file_priv)
4462{
4463 return i915_gem_ring_throttle(dev, file_priv);
4464}
4465
3ef94daa
CW
4466int
4467i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4468 struct drm_file *file_priv)
4469{
4470 struct drm_i915_gem_madvise *args = data;
05394f39 4471 struct drm_i915_gem_object *obj;
76c1dec1 4472 int ret;
3ef94daa
CW
4473
4474 switch (args->madv) {
4475 case I915_MADV_DONTNEED:
4476 case I915_MADV_WILLNEED:
4477 break;
4478 default:
4479 return -EINVAL;
4480 }
4481
1d7cfea1
CW
4482 ret = i915_mutex_lock_interruptible(dev);
4483 if (ret)
4484 return ret;
4485
05394f39 4486 obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle));
3ef94daa 4487 if (obj == NULL) {
1d7cfea1
CW
4488 ret = -ENOENT;
4489 goto unlock;
3ef94daa 4490 }
3ef94daa 4491
05394f39 4492 if (obj->pin_count) {
1d7cfea1
CW
4493 ret = -EINVAL;
4494 goto out;
3ef94daa
CW
4495 }
4496
05394f39
CW
4497 if (obj->madv != __I915_MADV_PURGED)
4498 obj->madv = args->madv;
3ef94daa 4499
2d7ef395 4500 /* if the object is no longer bound, discard its backing storage */
05394f39
CW
4501 if (i915_gem_object_is_purgeable(obj) &&
4502 obj->gtt_space == NULL)
2d7ef395
CW
4503 i915_gem_object_truncate(obj);
4504
05394f39 4505 args->retained = obj->madv != __I915_MADV_PURGED;
bb6baf76 4506
1d7cfea1 4507out:
05394f39 4508 drm_gem_object_unreference(&obj->base);
1d7cfea1 4509unlock:
3ef94daa 4510 mutex_unlock(&dev->struct_mutex);
1d7cfea1 4511 return ret;
3ef94daa
CW
4512}
4513
05394f39
CW
4514struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
4515 size_t size)
ac52bc56 4516{
73aa808f 4517 struct drm_i915_private *dev_priv = dev->dev_private;
c397b908 4518 struct drm_i915_gem_object *obj;
ac52bc56 4519
c397b908
DV
4520 obj = kzalloc(sizeof(*obj), GFP_KERNEL);
4521 if (obj == NULL)
4522 return NULL;
673a394b 4523
c397b908
DV
4524 if (drm_gem_object_init(dev, &obj->base, size) != 0) {
4525 kfree(obj);
4526 return NULL;
4527 }
673a394b 4528
73aa808f
CW
4529 i915_gem_info_add_obj(dev_priv, size);
4530
c397b908
DV
4531 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4532 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
673a394b 4533
c397b908 4534 obj->agp_type = AGP_USER_MEMORY;
62b8b215 4535 obj->base.driver_private = NULL;
c397b908 4536 obj->fence_reg = I915_FENCE_REG_NONE;
69dc4987 4537 INIT_LIST_HEAD(&obj->mm_list);
93a37f20 4538 INIT_LIST_HEAD(&obj->gtt_list);
69dc4987 4539 INIT_LIST_HEAD(&obj->ring_list);
c397b908 4540 INIT_LIST_HEAD(&obj->gpu_write_list);
c397b908 4541 obj->madv = I915_MADV_WILLNEED;
75e9e915
DV
4542 /* Avoid an unnecessary call to unbind on the first bind. */
4543 obj->map_and_fenceable = true;
de151cf6 4544
05394f39 4545 return obj;
c397b908
DV
4546}
4547
4548int i915_gem_init_object(struct drm_gem_object *obj)
4549{
4550 BUG();
de151cf6 4551
673a394b
EA
4552 return 0;
4553}
4554
05394f39 4555static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj)
673a394b 4556{
05394f39 4557 struct drm_device *dev = obj->base.dev;
be72615b 4558 drm_i915_private_t *dev_priv = dev->dev_private;
be72615b 4559 int ret;
673a394b 4560
be72615b
CW
4561 ret = i915_gem_object_unbind(obj);
4562 if (ret == -ERESTARTSYS) {
05394f39 4563 list_move(&obj->mm_list,
be72615b
CW
4564 &dev_priv->mm.deferred_free_list);
4565 return;
4566 }
673a394b 4567
05394f39 4568 if (obj->base.map_list.map)
7e616158 4569 i915_gem_free_mmap_offset(obj);
de151cf6 4570
05394f39
CW
4571 drm_gem_object_release(&obj->base);
4572 i915_gem_info_remove_obj(dev_priv, obj->base.size);
c397b908 4573
05394f39
CW
4574 kfree(obj->page_cpu_valid);
4575 kfree(obj->bit_17);
4576 kfree(obj);
673a394b
EA
4577}
4578
05394f39 4579void i915_gem_free_object(struct drm_gem_object *gem_obj)
be72615b 4580{
05394f39
CW
4581 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
4582 struct drm_device *dev = obj->base.dev;
be72615b
CW
4583
4584 trace_i915_gem_object_destroy(obj);
4585
05394f39 4586 while (obj->pin_count > 0)
be72615b
CW
4587 i915_gem_object_unpin(obj);
4588
05394f39 4589 if (obj->phys_obj)
be72615b
CW
4590 i915_gem_detach_phys_object(dev, obj);
4591
4592 i915_gem_free_object_tail(obj);
4593}
4594
29105ccc
CW
4595int
4596i915_gem_idle(struct drm_device *dev)
4597{
4598 drm_i915_private_t *dev_priv = dev->dev_private;
4599 int ret;
28dfe52a 4600
29105ccc 4601 mutex_lock(&dev->struct_mutex);
1c5d22f7 4602
87acb0a5 4603 if (dev_priv->mm.suspended) {
29105ccc
CW
4604 mutex_unlock(&dev->struct_mutex);
4605 return 0;
28dfe52a
EA
4606 }
4607
29105ccc 4608 ret = i915_gpu_idle(dev);
6dbe2772
KP
4609 if (ret) {
4610 mutex_unlock(&dev->struct_mutex);
673a394b 4611 return ret;
6dbe2772 4612 }
673a394b 4613
29105ccc
CW
4614 /* Under UMS, be paranoid and evict. */
4615 if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
5eac3ab4 4616 ret = i915_gem_evict_inactive(dev, false);
29105ccc
CW
4617 if (ret) {
4618 mutex_unlock(&dev->struct_mutex);
4619 return ret;
4620 }
4621 }
4622
312817a3
CW
4623 i915_gem_reset_fences(dev);
4624
29105ccc
CW
4625 /* Hack! Don't let anybody do execbuf while we don't control the chip.
4626 * We need to replace this with a semaphore, or something.
4627 * And not confound mm.suspended!
4628 */
4629 dev_priv->mm.suspended = 1;
bc0c7f14 4630 del_timer_sync(&dev_priv->hangcheck_timer);
29105ccc
CW
4631
4632 i915_kernel_lost_context(dev);
6dbe2772 4633 i915_gem_cleanup_ringbuffer(dev);
29105ccc 4634
6dbe2772
KP
4635 mutex_unlock(&dev->struct_mutex);
4636
29105ccc
CW
4637 /* Cancel the retire work handler, which should be idle now. */
4638 cancel_delayed_work_sync(&dev_priv->mm.retire_work);
4639
673a394b
EA
4640 return 0;
4641}
4642
8187a2b7
ZN
4643int
4644i915_gem_init_ringbuffer(struct drm_device *dev)
4645{
4646 drm_i915_private_t *dev_priv = dev->dev_private;
4647 int ret;
68f95ba9 4648
5c1143bb 4649 ret = intel_init_render_ring_buffer(dev);
68f95ba9 4650 if (ret)
b6913e4b 4651 return ret;
68f95ba9
CW
4652
4653 if (HAS_BSD(dev)) {
5c1143bb 4654 ret = intel_init_bsd_ring_buffer(dev);
68f95ba9
CW
4655 if (ret)
4656 goto cleanup_render_ring;
d1b851fc 4657 }
68f95ba9 4658
549f7365
CW
4659 if (HAS_BLT(dev)) {
4660 ret = intel_init_blt_ring_buffer(dev);
4661 if (ret)
4662 goto cleanup_bsd_ring;
4663 }
4664
6f392d54
CW
4665 dev_priv->next_seqno = 1;
4666
68f95ba9
CW
4667 return 0;
4668
549f7365 4669cleanup_bsd_ring:
78501eac 4670 intel_cleanup_ring_buffer(&dev_priv->bsd_ring);
68f95ba9 4671cleanup_render_ring:
78501eac 4672 intel_cleanup_ring_buffer(&dev_priv->render_ring);
8187a2b7
ZN
4673 return ret;
4674}
4675
4676void
4677i915_gem_cleanup_ringbuffer(struct drm_device *dev)
4678{
4679 drm_i915_private_t *dev_priv = dev->dev_private;
4680
78501eac
CW
4681 intel_cleanup_ring_buffer(&dev_priv->render_ring);
4682 intel_cleanup_ring_buffer(&dev_priv->bsd_ring);
4683 intel_cleanup_ring_buffer(&dev_priv->blt_ring);
8187a2b7
ZN
4684}
4685
673a394b
EA
4686int
4687i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
4688 struct drm_file *file_priv)
4689{
4690 drm_i915_private_t *dev_priv = dev->dev_private;
4691 int ret;
4692
79e53945
JB
4693 if (drm_core_check_feature(dev, DRIVER_MODESET))
4694 return 0;
4695
ba1234d1 4696 if (atomic_read(&dev_priv->mm.wedged)) {
673a394b 4697 DRM_ERROR("Reenabling wedged hardware, good luck\n");
ba1234d1 4698 atomic_set(&dev_priv->mm.wedged, 0);
673a394b
EA
4699 }
4700
673a394b 4701 mutex_lock(&dev->struct_mutex);
9bb2d6f9
EA
4702 dev_priv->mm.suspended = 0;
4703
4704 ret = i915_gem_init_ringbuffer(dev);
d816f6ac
WF
4705 if (ret != 0) {
4706 mutex_unlock(&dev->struct_mutex);
9bb2d6f9 4707 return ret;
d816f6ac 4708 }
9bb2d6f9 4709
69dc4987 4710 BUG_ON(!list_empty(&dev_priv->mm.active_list));
852835f3 4711 BUG_ON(!list_empty(&dev_priv->render_ring.active_list));
87acb0a5 4712 BUG_ON(!list_empty(&dev_priv->bsd_ring.active_list));
549f7365 4713 BUG_ON(!list_empty(&dev_priv->blt_ring.active_list));
673a394b
EA
4714 BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
4715 BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
852835f3 4716 BUG_ON(!list_empty(&dev_priv->render_ring.request_list));
87acb0a5 4717 BUG_ON(!list_empty(&dev_priv->bsd_ring.request_list));
549f7365 4718 BUG_ON(!list_empty(&dev_priv->blt_ring.request_list));
673a394b 4719 mutex_unlock(&dev->struct_mutex);
dbb19d30 4720
5f35308b
CW
4721 ret = drm_irq_install(dev);
4722 if (ret)
4723 goto cleanup_ringbuffer;
dbb19d30 4724
673a394b 4725 return 0;
5f35308b
CW
4726
4727cleanup_ringbuffer:
4728 mutex_lock(&dev->struct_mutex);
4729 i915_gem_cleanup_ringbuffer(dev);
4730 dev_priv->mm.suspended = 1;
4731 mutex_unlock(&dev->struct_mutex);
4732
4733 return ret;
673a394b
EA
4734}
4735
4736int
4737i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
4738 struct drm_file *file_priv)
4739{
79e53945
JB
4740 if (drm_core_check_feature(dev, DRIVER_MODESET))
4741 return 0;
4742
dbb19d30 4743 drm_irq_uninstall(dev);
e6890f6f 4744 return i915_gem_idle(dev);
673a394b
EA
4745}
4746
4747void
4748i915_gem_lastclose(struct drm_device *dev)
4749{
4750 int ret;
673a394b 4751
e806b495
EA
4752 if (drm_core_check_feature(dev, DRIVER_MODESET))
4753 return;
4754
6dbe2772
KP
4755 ret = i915_gem_idle(dev);
4756 if (ret)
4757 DRM_ERROR("failed to idle hardware: %d\n", ret);
673a394b
EA
4758}
4759
64193406
CW
4760static void
4761init_ring_lists(struct intel_ring_buffer *ring)
4762{
4763 INIT_LIST_HEAD(&ring->active_list);
4764 INIT_LIST_HEAD(&ring->request_list);
4765 INIT_LIST_HEAD(&ring->gpu_write_list);
4766}
4767
673a394b
EA
4768void
4769i915_gem_load(struct drm_device *dev)
4770{
b5aa8a0f 4771 int i;
673a394b
EA
4772 drm_i915_private_t *dev_priv = dev->dev_private;
4773
69dc4987 4774 INIT_LIST_HEAD(&dev_priv->mm.active_list);
673a394b
EA
4775 INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
4776 INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
f13d3f73 4777 INIT_LIST_HEAD(&dev_priv->mm.pinned_list);
a09ba7fa 4778 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
be72615b 4779 INIT_LIST_HEAD(&dev_priv->mm.deferred_free_list);
93a37f20 4780 INIT_LIST_HEAD(&dev_priv->mm.gtt_list);
64193406
CW
4781 init_ring_lists(&dev_priv->render_ring);
4782 init_ring_lists(&dev_priv->bsd_ring);
4783 init_ring_lists(&dev_priv->blt_ring);
007cc8ac
DV
4784 for (i = 0; i < 16; i++)
4785 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
673a394b
EA
4786 INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
4787 i915_gem_retire_work_handler);
30dbf0c0 4788 init_completion(&dev_priv->error_completion);
31169714 4789
94400120
DA
4790 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
4791 if (IS_GEN3(dev)) {
4792 u32 tmp = I915_READ(MI_ARB_STATE);
4793 if (!(tmp & MI_ARB_C3_LP_WRITE_ENABLE)) {
4794 /* arb state is a masked write, so set bit + bit in mask */
4795 tmp = MI_ARB_C3_LP_WRITE_ENABLE | (MI_ARB_C3_LP_WRITE_ENABLE << MI_ARB_MASK_SHIFT);
4796 I915_WRITE(MI_ARB_STATE, tmp);
4797 }
4798 }
4799
de151cf6 4800 /* Old X drivers will take 0-2 for front, back, depth buffers */
b397c836
EA
4801 if (!drm_core_check_feature(dev, DRIVER_MODESET))
4802 dev_priv->fence_reg_start = 3;
de151cf6 4803
a6c45cf0 4804 if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
de151cf6
JB
4805 dev_priv->num_fence_regs = 16;
4806 else
4807 dev_priv->num_fence_regs = 8;
4808
b5aa8a0f 4809 /* Initialize fence registers to zero */
a6c45cf0
CW
4810 switch (INTEL_INFO(dev)->gen) {
4811 case 6:
4812 for (i = 0; i < 16; i++)
4813 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (i * 8), 0);
4814 break;
4815 case 5:
4816 case 4:
b5aa8a0f
GH
4817 for (i = 0; i < 16; i++)
4818 I915_WRITE64(FENCE_REG_965_0 + (i * 8), 0);
a6c45cf0
CW
4819 break;
4820 case 3:
b5aa8a0f
GH
4821 if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
4822 for (i = 0; i < 8; i++)
4823 I915_WRITE(FENCE_REG_945_8 + (i * 4), 0);
a6c45cf0
CW
4824 case 2:
4825 for (i = 0; i < 8; i++)
4826 I915_WRITE(FENCE_REG_830_0 + (i * 4), 0);
4827 break;
b5aa8a0f 4828 }
673a394b 4829 i915_gem_detect_bit_6_swizzle(dev);
6b95a207 4830 init_waitqueue_head(&dev_priv->pending_flip_queue);
17250b71
CW
4831
4832 dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink;
4833 dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS;
4834 register_shrinker(&dev_priv->mm.inactive_shrinker);
673a394b 4835}
71acb5eb
DA
4836
4837/*
4838 * Create a physically contiguous memory object for this object
4839 * e.g. for cursor + overlay regs
4840 */
995b6762
CW
4841static int i915_gem_init_phys_object(struct drm_device *dev,
4842 int id, int size, int align)
71acb5eb
DA
4843{
4844 drm_i915_private_t *dev_priv = dev->dev_private;
4845 struct drm_i915_gem_phys_object *phys_obj;
4846 int ret;
4847
4848 if (dev_priv->mm.phys_objs[id - 1] || !size)
4849 return 0;
4850
9a298b2a 4851 phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL);
71acb5eb
DA
4852 if (!phys_obj)
4853 return -ENOMEM;
4854
4855 phys_obj->id = id;
4856
6eeefaf3 4857 phys_obj->handle = drm_pci_alloc(dev, size, align);
71acb5eb
DA
4858 if (!phys_obj->handle) {
4859 ret = -ENOMEM;
4860 goto kfree_obj;
4861 }
4862#ifdef CONFIG_X86
4863 set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4864#endif
4865
4866 dev_priv->mm.phys_objs[id - 1] = phys_obj;
4867
4868 return 0;
4869kfree_obj:
9a298b2a 4870 kfree(phys_obj);
71acb5eb
DA
4871 return ret;
4872}
4873
995b6762 4874static void i915_gem_free_phys_object(struct drm_device *dev, int id)
71acb5eb
DA
4875{
4876 drm_i915_private_t *dev_priv = dev->dev_private;
4877 struct drm_i915_gem_phys_object *phys_obj;
4878
4879 if (!dev_priv->mm.phys_objs[id - 1])
4880 return;
4881
4882 phys_obj = dev_priv->mm.phys_objs[id - 1];
4883 if (phys_obj->cur_obj) {
4884 i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
4885 }
4886
4887#ifdef CONFIG_X86
4888 set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4889#endif
4890 drm_pci_free(dev, phys_obj->handle);
4891 kfree(phys_obj);
4892 dev_priv->mm.phys_objs[id - 1] = NULL;
4893}
4894
4895void i915_gem_free_all_phys_object(struct drm_device *dev)
4896{
4897 int i;
4898
260883c8 4899 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
71acb5eb
DA
4900 i915_gem_free_phys_object(dev, i);
4901}
4902
4903void i915_gem_detach_phys_object(struct drm_device *dev,
05394f39 4904 struct drm_i915_gem_object *obj)
71acb5eb 4905{
05394f39 4906 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
e5281ccd 4907 char *vaddr;
71acb5eb 4908 int i;
71acb5eb
DA
4909 int page_count;
4910
05394f39 4911 if (!obj->phys_obj)
71acb5eb 4912 return;
05394f39 4913 vaddr = obj->phys_obj->handle->vaddr;
71acb5eb 4914
05394f39 4915 page_count = obj->base.size / PAGE_SIZE;
71acb5eb 4916 for (i = 0; i < page_count; i++) {
e5281ccd
CW
4917 struct page *page = read_cache_page_gfp(mapping, i,
4918 GFP_HIGHUSER | __GFP_RECLAIMABLE);
4919 if (!IS_ERR(page)) {
4920 char *dst = kmap_atomic(page);
4921 memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE);
4922 kunmap_atomic(dst);
4923
4924 drm_clflush_pages(&page, 1);
4925
4926 set_page_dirty(page);
4927 mark_page_accessed(page);
4928 page_cache_release(page);
4929 }
71acb5eb 4930 }
40ce6575 4931 intel_gtt_chipset_flush();
d78b47b9 4932
05394f39
CW
4933 obj->phys_obj->cur_obj = NULL;
4934 obj->phys_obj = NULL;
71acb5eb
DA
4935}
4936
4937int
4938i915_gem_attach_phys_object(struct drm_device *dev,
05394f39 4939 struct drm_i915_gem_object *obj,
6eeefaf3
CW
4940 int id,
4941 int align)
71acb5eb 4942{
05394f39 4943 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
71acb5eb 4944 drm_i915_private_t *dev_priv = dev->dev_private;
71acb5eb
DA
4945 int ret = 0;
4946 int page_count;
4947 int i;
4948
4949 if (id > I915_MAX_PHYS_OBJECT)
4950 return -EINVAL;
4951
05394f39
CW
4952 if (obj->phys_obj) {
4953 if (obj->phys_obj->id == id)
71acb5eb
DA
4954 return 0;
4955 i915_gem_detach_phys_object(dev, obj);
4956 }
4957
71acb5eb
DA
4958 /* create a new object */
4959 if (!dev_priv->mm.phys_objs[id - 1]) {
4960 ret = i915_gem_init_phys_object(dev, id,
05394f39 4961 obj->base.size, align);
71acb5eb 4962 if (ret) {
05394f39
CW
4963 DRM_ERROR("failed to init phys object %d size: %zu\n",
4964 id, obj->base.size);
e5281ccd 4965 return ret;
71acb5eb
DA
4966 }
4967 }
4968
4969 /* bind to the object */
05394f39
CW
4970 obj->phys_obj = dev_priv->mm.phys_objs[id - 1];
4971 obj->phys_obj->cur_obj = obj;
71acb5eb 4972
05394f39 4973 page_count = obj->base.size / PAGE_SIZE;
71acb5eb
DA
4974
4975 for (i = 0; i < page_count; i++) {
e5281ccd
CW
4976 struct page *page;
4977 char *dst, *src;
4978
4979 page = read_cache_page_gfp(mapping, i,
4980 GFP_HIGHUSER | __GFP_RECLAIMABLE);
4981 if (IS_ERR(page))
4982 return PTR_ERR(page);
71acb5eb 4983
ff75b9bc 4984 src = kmap_atomic(page);
05394f39 4985 dst = obj->phys_obj->handle->vaddr + (i * PAGE_SIZE);
71acb5eb 4986 memcpy(dst, src, PAGE_SIZE);
3e4d3af5 4987 kunmap_atomic(src);
71acb5eb 4988
e5281ccd
CW
4989 mark_page_accessed(page);
4990 page_cache_release(page);
4991 }
d78b47b9 4992
71acb5eb 4993 return 0;
71acb5eb
DA
4994}
4995
4996static int
05394f39
CW
4997i915_gem_phys_pwrite(struct drm_device *dev,
4998 struct drm_i915_gem_object *obj,
71acb5eb
DA
4999 struct drm_i915_gem_pwrite *args,
5000 struct drm_file *file_priv)
5001{
05394f39 5002 void *vaddr = obj->phys_obj->handle->vaddr + args->offset;
b47b30cc 5003 char __user *user_data = (char __user *) (uintptr_t) args->data_ptr;
71acb5eb 5004
b47b30cc
CW
5005 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
5006 unsigned long unwritten;
5007
5008 /* The physical object once assigned is fixed for the lifetime
5009 * of the obj, so we can safely drop the lock and continue
5010 * to access vaddr.
5011 */
5012 mutex_unlock(&dev->struct_mutex);
5013 unwritten = copy_from_user(vaddr, user_data, args->size);
5014 mutex_lock(&dev->struct_mutex);
5015 if (unwritten)
5016 return -EFAULT;
5017 }
71acb5eb 5018
40ce6575 5019 intel_gtt_chipset_flush();
71acb5eb
DA
5020 return 0;
5021}
b962442e 5022
f787a5f5 5023void i915_gem_release(struct drm_device *dev, struct drm_file *file)
b962442e 5024{
f787a5f5 5025 struct drm_i915_file_private *file_priv = file->driver_priv;
b962442e
EA
5026
5027 /* Clean up our request list when the client is going away, so that
5028 * later retire_requests won't dereference our soon-to-be-gone
5029 * file_priv.
5030 */
1c25595f 5031 spin_lock(&file_priv->mm.lock);
f787a5f5
CW
5032 while (!list_empty(&file_priv->mm.request_list)) {
5033 struct drm_i915_gem_request *request;
5034
5035 request = list_first_entry(&file_priv->mm.request_list,
5036 struct drm_i915_gem_request,
5037 client_list);
5038 list_del(&request->client_list);
5039 request->file_priv = NULL;
5040 }
1c25595f 5041 spin_unlock(&file_priv->mm.lock);
b962442e 5042}
31169714 5043
1637ef41
CW
5044static int
5045i915_gpu_is_active(struct drm_device *dev)
5046{
5047 drm_i915_private_t *dev_priv = dev->dev_private;
5048 int lists_empty;
5049
1637ef41 5050 lists_empty = list_empty(&dev_priv->mm.flushing_list) &&
17250b71 5051 list_empty(&dev_priv->mm.active_list);
1637ef41
CW
5052
5053 return !lists_empty;
5054}
5055
31169714 5056static int
17250b71
CW
5057i915_gem_inactive_shrink(struct shrinker *shrinker,
5058 int nr_to_scan,
5059 gfp_t gfp_mask)
31169714 5060{
17250b71
CW
5061 struct drm_i915_private *dev_priv =
5062 container_of(shrinker,
5063 struct drm_i915_private,
5064 mm.inactive_shrinker);
5065 struct drm_device *dev = dev_priv->dev;
5066 struct drm_i915_gem_object *obj, *next;
5067 int cnt;
5068
5069 if (!mutex_trylock(&dev->struct_mutex))
bbe2e11a 5070 return 0;
31169714
CW
5071
5072 /* "fast-path" to count number of available objects */
5073 if (nr_to_scan == 0) {
17250b71
CW
5074 cnt = 0;
5075 list_for_each_entry(obj,
5076 &dev_priv->mm.inactive_list,
5077 mm_list)
5078 cnt++;
5079 mutex_unlock(&dev->struct_mutex);
5080 return cnt / 100 * sysctl_vfs_cache_pressure;
31169714
CW
5081 }
5082
1637ef41 5083rescan:
31169714 5084 /* first scan for clean buffers */
17250b71 5085 i915_gem_retire_requests(dev);
31169714 5086
17250b71
CW
5087 list_for_each_entry_safe(obj, next,
5088 &dev_priv->mm.inactive_list,
5089 mm_list) {
5090 if (i915_gem_object_is_purgeable(obj)) {
2021746e
CW
5091 if (i915_gem_object_unbind(obj) == 0 &&
5092 --nr_to_scan == 0)
17250b71 5093 break;
31169714 5094 }
31169714
CW
5095 }
5096
5097 /* second pass, evict/count anything still on the inactive list */
17250b71
CW
5098 cnt = 0;
5099 list_for_each_entry_safe(obj, next,
5100 &dev_priv->mm.inactive_list,
5101 mm_list) {
2021746e
CW
5102 if (nr_to_scan &&
5103 i915_gem_object_unbind(obj) == 0)
17250b71 5104 nr_to_scan--;
2021746e 5105 else
17250b71
CW
5106 cnt++;
5107 }
5108
5109 if (nr_to_scan && i915_gpu_is_active(dev)) {
1637ef41
CW
5110 /*
5111 * We are desperate for pages, so as a last resort, wait
5112 * for the GPU to finish and discard whatever we can.
5113 * This has a dramatic impact to reduce the number of
5114 * OOM-killer events whilst running the GPU aggressively.
5115 */
17250b71 5116 if (i915_gpu_idle(dev) == 0)
1637ef41
CW
5117 goto rescan;
5118 }
17250b71
CW
5119 mutex_unlock(&dev->struct_mutex);
5120 return cnt / 100 * sysctl_vfs_cache_pressure;
31169714 5121}