drm/i915: Implement fair lru eviction across both rings. (v2)
[GitHub/LineageOS/android_kernel_motorola_exynos9610.git] / drivers / gpu / drm / i915 / i915_gem.c
CommitLineData
673a394b
EA
1/*
2 * Copyright © 2008 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
28#include "drmP.h"
29#include "drm.h"
30#include "i915_drm.h"
31#include "i915_drv.h"
1c5d22f7 32#include "i915_trace.h"
652c393a 33#include "intel_drv.h"
5a0e3ad6 34#include <linux/slab.h>
673a394b 35#include <linux/swap.h>
79e53945 36#include <linux/pci.h>
673a394b 37
0108a3ed 38static uint32_t i915_gem_get_gtt_alignment(struct drm_gem_object *obj);
2dafb1e0 39static int i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj);
e47c68e9
EA
40static void i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj);
41static void i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj);
e47c68e9
EA
42static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj,
43 int write);
44static int i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
45 uint64_t offset,
46 uint64_t size);
47static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj);
673a394b 48static int i915_gem_object_wait_rendering(struct drm_gem_object *obj);
de151cf6
JB
49static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
50 unsigned alignment);
de151cf6 51static void i915_gem_clear_fence_reg(struct drm_gem_object *obj);
71acb5eb
DA
52static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
53 struct drm_i915_gem_pwrite *args,
54 struct drm_file *file_priv);
be72615b 55static void i915_gem_free_object_tail(struct drm_gem_object *obj);
673a394b 56
31169714
CW
57static LIST_HEAD(shrink_list);
58static DEFINE_SPINLOCK(shrink_list_lock);
59
79e53945
JB
60int i915_gem_do_init(struct drm_device *dev, unsigned long start,
61 unsigned long end)
673a394b
EA
62{
63 drm_i915_private_t *dev_priv = dev->dev_private;
673a394b 64
79e53945
JB
65 if (start >= end ||
66 (start & (PAGE_SIZE - 1)) != 0 ||
67 (end & (PAGE_SIZE - 1)) != 0) {
673a394b
EA
68 return -EINVAL;
69 }
70
79e53945
JB
71 drm_mm_init(&dev_priv->mm.gtt_space, start,
72 end - start);
673a394b 73
79e53945
JB
74 dev->gtt_total = (uint32_t) (end - start);
75
76 return 0;
77}
673a394b 78
79e53945
JB
79int
80i915_gem_init_ioctl(struct drm_device *dev, void *data,
81 struct drm_file *file_priv)
82{
83 struct drm_i915_gem_init *args = data;
84 int ret;
85
86 mutex_lock(&dev->struct_mutex);
87 ret = i915_gem_do_init(dev, args->gtt_start, args->gtt_end);
673a394b
EA
88 mutex_unlock(&dev->struct_mutex);
89
79e53945 90 return ret;
673a394b
EA
91}
92
5a125c3c
EA
93int
94i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
95 struct drm_file *file_priv)
96{
5a125c3c 97 struct drm_i915_gem_get_aperture *args = data;
5a125c3c
EA
98
99 if (!(dev->driver->driver_features & DRIVER_GEM))
100 return -ENODEV;
101
102 args->aper_size = dev->gtt_total;
2678d9d6
KP
103 args->aper_available_size = (args->aper_size -
104 atomic_read(&dev->pin_memory));
5a125c3c
EA
105
106 return 0;
107}
108
673a394b
EA
109
110/**
111 * Creates a new mm object and returns a handle to it.
112 */
113int
114i915_gem_create_ioctl(struct drm_device *dev, void *data,
115 struct drm_file *file_priv)
116{
117 struct drm_i915_gem_create *args = data;
118 struct drm_gem_object *obj;
a1a2d1d3
PP
119 int ret;
120 u32 handle;
673a394b
EA
121
122 args->size = roundup(args->size, PAGE_SIZE);
123
124 /* Allocate the new object */
ac52bc56 125 obj = i915_gem_alloc_object(dev, args->size);
673a394b
EA
126 if (obj == NULL)
127 return -ENOMEM;
128
129 ret = drm_gem_handle_create(file_priv, obj, &handle);
86f100b1 130 drm_gem_object_unreference_unlocked(obj);
673a394b
EA
131 if (ret)
132 return ret;
133
134 args->handle = handle;
135
136 return 0;
137}
138
eb01459f
EA
139static inline int
140fast_shmem_read(struct page **pages,
141 loff_t page_base, int page_offset,
142 char __user *data,
143 int length)
144{
145 char __iomem *vaddr;
2bc43b5c 146 int unwritten;
eb01459f
EA
147
148 vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0);
149 if (vaddr == NULL)
150 return -ENOMEM;
2bc43b5c 151 unwritten = __copy_to_user_inatomic(data, vaddr + page_offset, length);
eb01459f
EA
152 kunmap_atomic(vaddr, KM_USER0);
153
2bc43b5c
FM
154 if (unwritten)
155 return -EFAULT;
156
157 return 0;
eb01459f
EA
158}
159
280b713b
EA
160static int i915_gem_object_needs_bit17_swizzle(struct drm_gem_object *obj)
161{
162 drm_i915_private_t *dev_priv = obj->dev->dev_private;
23010e43 163 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
280b713b
EA
164
165 return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
166 obj_priv->tiling_mode != I915_TILING_NONE;
167}
168
99a03df5 169static inline void
40123c1f
EA
170slow_shmem_copy(struct page *dst_page,
171 int dst_offset,
172 struct page *src_page,
173 int src_offset,
174 int length)
175{
176 char *dst_vaddr, *src_vaddr;
177
99a03df5
CW
178 dst_vaddr = kmap(dst_page);
179 src_vaddr = kmap(src_page);
40123c1f
EA
180
181 memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length);
182
99a03df5
CW
183 kunmap(src_page);
184 kunmap(dst_page);
40123c1f
EA
185}
186
99a03df5 187static inline void
280b713b
EA
188slow_shmem_bit17_copy(struct page *gpu_page,
189 int gpu_offset,
190 struct page *cpu_page,
191 int cpu_offset,
192 int length,
193 int is_read)
194{
195 char *gpu_vaddr, *cpu_vaddr;
196
197 /* Use the unswizzled path if this page isn't affected. */
198 if ((page_to_phys(gpu_page) & (1 << 17)) == 0) {
199 if (is_read)
200 return slow_shmem_copy(cpu_page, cpu_offset,
201 gpu_page, gpu_offset, length);
202 else
203 return slow_shmem_copy(gpu_page, gpu_offset,
204 cpu_page, cpu_offset, length);
205 }
206
99a03df5
CW
207 gpu_vaddr = kmap(gpu_page);
208 cpu_vaddr = kmap(cpu_page);
280b713b
EA
209
210 /* Copy the data, XORing A6 with A17 (1). The user already knows he's
211 * XORing with the other bits (A9 for Y, A9 and A10 for X)
212 */
213 while (length > 0) {
214 int cacheline_end = ALIGN(gpu_offset + 1, 64);
215 int this_length = min(cacheline_end - gpu_offset, length);
216 int swizzled_gpu_offset = gpu_offset ^ 64;
217
218 if (is_read) {
219 memcpy(cpu_vaddr + cpu_offset,
220 gpu_vaddr + swizzled_gpu_offset,
221 this_length);
222 } else {
223 memcpy(gpu_vaddr + swizzled_gpu_offset,
224 cpu_vaddr + cpu_offset,
225 this_length);
226 }
227 cpu_offset += this_length;
228 gpu_offset += this_length;
229 length -= this_length;
230 }
231
99a03df5
CW
232 kunmap(cpu_page);
233 kunmap(gpu_page);
280b713b
EA
234}
235
eb01459f
EA
236/**
237 * This is the fast shmem pread path, which attempts to copy_from_user directly
238 * from the backing pages of the object to the user's address space. On a
239 * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow().
240 */
241static int
242i915_gem_shmem_pread_fast(struct drm_device *dev, struct drm_gem_object *obj,
243 struct drm_i915_gem_pread *args,
244 struct drm_file *file_priv)
245{
23010e43 246 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
eb01459f
EA
247 ssize_t remain;
248 loff_t offset, page_base;
249 char __user *user_data;
250 int page_offset, page_length;
251 int ret;
252
253 user_data = (char __user *) (uintptr_t) args->data_ptr;
254 remain = args->size;
255
256 mutex_lock(&dev->struct_mutex);
257
4bdadb97 258 ret = i915_gem_object_get_pages(obj, 0);
eb01459f
EA
259 if (ret != 0)
260 goto fail_unlock;
261
262 ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset,
263 args->size);
264 if (ret != 0)
265 goto fail_put_pages;
266
23010e43 267 obj_priv = to_intel_bo(obj);
eb01459f
EA
268 offset = args->offset;
269
270 while (remain > 0) {
271 /* Operation in this page
272 *
273 * page_base = page offset within aperture
274 * page_offset = offset within page
275 * page_length = bytes to copy for this page
276 */
277 page_base = (offset & ~(PAGE_SIZE-1));
278 page_offset = offset & (PAGE_SIZE-1);
279 page_length = remain;
280 if ((page_offset + remain) > PAGE_SIZE)
281 page_length = PAGE_SIZE - page_offset;
282
283 ret = fast_shmem_read(obj_priv->pages,
284 page_base, page_offset,
285 user_data, page_length);
286 if (ret)
287 goto fail_put_pages;
288
289 remain -= page_length;
290 user_data += page_length;
291 offset += page_length;
292 }
293
294fail_put_pages:
295 i915_gem_object_put_pages(obj);
296fail_unlock:
297 mutex_unlock(&dev->struct_mutex);
298
299 return ret;
300}
301
07f73f69
CW
302static int
303i915_gem_object_get_pages_or_evict(struct drm_gem_object *obj)
304{
305 int ret;
306
4bdadb97 307 ret = i915_gem_object_get_pages(obj, __GFP_NORETRY | __GFP_NOWARN);
07f73f69
CW
308
309 /* If we've insufficient memory to map in the pages, attempt
310 * to make some space by throwing out some old buffers.
311 */
312 if (ret == -ENOMEM) {
313 struct drm_device *dev = obj->dev;
07f73f69 314
0108a3ed
DV
315 ret = i915_gem_evict_something(dev, obj->size,
316 i915_gem_get_gtt_alignment(obj));
07f73f69
CW
317 if (ret)
318 return ret;
319
4bdadb97 320 ret = i915_gem_object_get_pages(obj, 0);
07f73f69
CW
321 }
322
323 return ret;
324}
325
eb01459f
EA
326/**
327 * This is the fallback shmem pread path, which allocates temporary storage
328 * in kernel space to copy_to_user into outside of the struct_mutex, so we
329 * can copy out of the object's backing pages while holding the struct mutex
330 * and not take page faults.
331 */
332static int
333i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
334 struct drm_i915_gem_pread *args,
335 struct drm_file *file_priv)
336{
23010e43 337 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
eb01459f
EA
338 struct mm_struct *mm = current->mm;
339 struct page **user_pages;
340 ssize_t remain;
341 loff_t offset, pinned_pages, i;
342 loff_t first_data_page, last_data_page, num_pages;
343 int shmem_page_index, shmem_page_offset;
344 int data_page_index, data_page_offset;
345 int page_length;
346 int ret;
347 uint64_t data_ptr = args->data_ptr;
280b713b 348 int do_bit17_swizzling;
eb01459f
EA
349
350 remain = args->size;
351
352 /* Pin the user pages containing the data. We can't fault while
353 * holding the struct mutex, yet we want to hold it while
354 * dereferencing the user data.
355 */
356 first_data_page = data_ptr / PAGE_SIZE;
357 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
358 num_pages = last_data_page - first_data_page + 1;
359
8e7d2b2c 360 user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
eb01459f
EA
361 if (user_pages == NULL)
362 return -ENOMEM;
363
364 down_read(&mm->mmap_sem);
365 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
e5e9ecde 366 num_pages, 1, 0, user_pages, NULL);
eb01459f
EA
367 up_read(&mm->mmap_sem);
368 if (pinned_pages < num_pages) {
369 ret = -EFAULT;
370 goto fail_put_user_pages;
371 }
372
280b713b
EA
373 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
374
eb01459f
EA
375 mutex_lock(&dev->struct_mutex);
376
07f73f69
CW
377 ret = i915_gem_object_get_pages_or_evict(obj);
378 if (ret)
eb01459f
EA
379 goto fail_unlock;
380
381 ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset,
382 args->size);
383 if (ret != 0)
384 goto fail_put_pages;
385
23010e43 386 obj_priv = to_intel_bo(obj);
eb01459f
EA
387 offset = args->offset;
388
389 while (remain > 0) {
390 /* Operation in this page
391 *
392 * shmem_page_index = page number within shmem file
393 * shmem_page_offset = offset within page in shmem file
394 * data_page_index = page number in get_user_pages return
395 * data_page_offset = offset with data_page_index page.
396 * page_length = bytes to copy for this page
397 */
398 shmem_page_index = offset / PAGE_SIZE;
399 shmem_page_offset = offset & ~PAGE_MASK;
400 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
401 data_page_offset = data_ptr & ~PAGE_MASK;
402
403 page_length = remain;
404 if ((shmem_page_offset + page_length) > PAGE_SIZE)
405 page_length = PAGE_SIZE - shmem_page_offset;
406 if ((data_page_offset + page_length) > PAGE_SIZE)
407 page_length = PAGE_SIZE - data_page_offset;
408
280b713b 409 if (do_bit17_swizzling) {
99a03df5 410 slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
280b713b 411 shmem_page_offset,
99a03df5
CW
412 user_pages[data_page_index],
413 data_page_offset,
414 page_length,
415 1);
416 } else {
417 slow_shmem_copy(user_pages[data_page_index],
418 data_page_offset,
419 obj_priv->pages[shmem_page_index],
420 shmem_page_offset,
421 page_length);
280b713b 422 }
eb01459f
EA
423
424 remain -= page_length;
425 data_ptr += page_length;
426 offset += page_length;
427 }
428
429fail_put_pages:
430 i915_gem_object_put_pages(obj);
431fail_unlock:
432 mutex_unlock(&dev->struct_mutex);
433fail_put_user_pages:
434 for (i = 0; i < pinned_pages; i++) {
435 SetPageDirty(user_pages[i]);
436 page_cache_release(user_pages[i]);
437 }
8e7d2b2c 438 drm_free_large(user_pages);
eb01459f
EA
439
440 return ret;
441}
442
673a394b
EA
443/**
444 * Reads data from the object referenced by handle.
445 *
446 * On error, the contents of *data are undefined.
447 */
448int
449i915_gem_pread_ioctl(struct drm_device *dev, void *data,
450 struct drm_file *file_priv)
451{
452 struct drm_i915_gem_pread *args = data;
453 struct drm_gem_object *obj;
454 struct drm_i915_gem_object *obj_priv;
673a394b
EA
455 int ret;
456
457 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
458 if (obj == NULL)
459 return -EBADF;
23010e43 460 obj_priv = to_intel_bo(obj);
673a394b
EA
461
462 /* Bounds check source.
463 *
464 * XXX: This could use review for overflow issues...
465 */
466 if (args->offset > obj->size || args->size > obj->size ||
467 args->offset + args->size > obj->size) {
bc9025bd 468 drm_gem_object_unreference_unlocked(obj);
673a394b
EA
469 return -EINVAL;
470 }
471
280b713b 472 if (i915_gem_object_needs_bit17_swizzle(obj)) {
eb01459f 473 ret = i915_gem_shmem_pread_slow(dev, obj, args, file_priv);
280b713b
EA
474 } else {
475 ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv);
476 if (ret != 0)
477 ret = i915_gem_shmem_pread_slow(dev, obj, args,
478 file_priv);
479 }
673a394b 480
bc9025bd 481 drm_gem_object_unreference_unlocked(obj);
673a394b 482
eb01459f 483 return ret;
673a394b
EA
484}
485
0839ccb8
KP
486/* This is the fast write path which cannot handle
487 * page faults in the source data
9b7530cc 488 */
0839ccb8
KP
489
490static inline int
491fast_user_write(struct io_mapping *mapping,
492 loff_t page_base, int page_offset,
493 char __user *user_data,
494 int length)
9b7530cc 495{
9b7530cc 496 char *vaddr_atomic;
0839ccb8 497 unsigned long unwritten;
9b7530cc 498
fca3ec01 499 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base, KM_USER0);
0839ccb8
KP
500 unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset,
501 user_data, length);
fca3ec01 502 io_mapping_unmap_atomic(vaddr_atomic, KM_USER0);
0839ccb8
KP
503 if (unwritten)
504 return -EFAULT;
505 return 0;
506}
507
508/* Here's the write path which can sleep for
509 * page faults
510 */
511
ab34c226 512static inline void
3de09aa3
EA
513slow_kernel_write(struct io_mapping *mapping,
514 loff_t gtt_base, int gtt_offset,
515 struct page *user_page, int user_offset,
516 int length)
0839ccb8 517{
ab34c226
CW
518 char __iomem *dst_vaddr;
519 char *src_vaddr;
0839ccb8 520
ab34c226
CW
521 dst_vaddr = io_mapping_map_wc(mapping, gtt_base);
522 src_vaddr = kmap(user_page);
523
524 memcpy_toio(dst_vaddr + gtt_offset,
525 src_vaddr + user_offset,
526 length);
527
528 kunmap(user_page);
529 io_mapping_unmap(dst_vaddr);
9b7530cc
LT
530}
531
40123c1f
EA
532static inline int
533fast_shmem_write(struct page **pages,
534 loff_t page_base, int page_offset,
535 char __user *data,
536 int length)
537{
538 char __iomem *vaddr;
d0088775 539 unsigned long unwritten;
40123c1f
EA
540
541 vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0);
542 if (vaddr == NULL)
543 return -ENOMEM;
d0088775 544 unwritten = __copy_from_user_inatomic(vaddr + page_offset, data, length);
40123c1f
EA
545 kunmap_atomic(vaddr, KM_USER0);
546
d0088775
DA
547 if (unwritten)
548 return -EFAULT;
40123c1f
EA
549 return 0;
550}
551
3de09aa3
EA
552/**
553 * This is the fast pwrite path, where we copy the data directly from the
554 * user into the GTT, uncached.
555 */
673a394b 556static int
3de09aa3
EA
557i915_gem_gtt_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj,
558 struct drm_i915_gem_pwrite *args,
559 struct drm_file *file_priv)
673a394b 560{
23010e43 561 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
0839ccb8 562 drm_i915_private_t *dev_priv = dev->dev_private;
673a394b 563 ssize_t remain;
0839ccb8 564 loff_t offset, page_base;
673a394b 565 char __user *user_data;
0839ccb8
KP
566 int page_offset, page_length;
567 int ret;
673a394b
EA
568
569 user_data = (char __user *) (uintptr_t) args->data_ptr;
570 remain = args->size;
571 if (!access_ok(VERIFY_READ, user_data, remain))
572 return -EFAULT;
573
574
575 mutex_lock(&dev->struct_mutex);
576 ret = i915_gem_object_pin(obj, 0);
577 if (ret) {
578 mutex_unlock(&dev->struct_mutex);
579 return ret;
580 }
2ef7eeaa 581 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
673a394b
EA
582 if (ret)
583 goto fail;
584
23010e43 585 obj_priv = to_intel_bo(obj);
673a394b 586 offset = obj_priv->gtt_offset + args->offset;
673a394b
EA
587
588 while (remain > 0) {
589 /* Operation in this page
590 *
0839ccb8
KP
591 * page_base = page offset within aperture
592 * page_offset = offset within page
593 * page_length = bytes to copy for this page
673a394b 594 */
0839ccb8
KP
595 page_base = (offset & ~(PAGE_SIZE-1));
596 page_offset = offset & (PAGE_SIZE-1);
597 page_length = remain;
598 if ((page_offset + remain) > PAGE_SIZE)
599 page_length = PAGE_SIZE - page_offset;
600
601 ret = fast_user_write (dev_priv->mm.gtt_mapping, page_base,
602 page_offset, user_data, page_length);
603
604 /* If we get a fault while copying data, then (presumably) our
3de09aa3
EA
605 * source page isn't available. Return the error and we'll
606 * retry in the slow path.
0839ccb8 607 */
3de09aa3
EA
608 if (ret)
609 goto fail;
673a394b 610
0839ccb8
KP
611 remain -= page_length;
612 user_data += page_length;
613 offset += page_length;
673a394b 614 }
673a394b
EA
615
616fail:
617 i915_gem_object_unpin(obj);
618 mutex_unlock(&dev->struct_mutex);
619
620 return ret;
621}
622
3de09aa3
EA
623/**
624 * This is the fallback GTT pwrite path, which uses get_user_pages to pin
625 * the memory and maps it using kmap_atomic for copying.
626 *
627 * This code resulted in x11perf -rgb10text consuming about 10% more CPU
628 * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit).
629 */
3043c60c 630static int
3de09aa3
EA
631i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
632 struct drm_i915_gem_pwrite *args,
633 struct drm_file *file_priv)
673a394b 634{
23010e43 635 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
3de09aa3
EA
636 drm_i915_private_t *dev_priv = dev->dev_private;
637 ssize_t remain;
638 loff_t gtt_page_base, offset;
639 loff_t first_data_page, last_data_page, num_pages;
640 loff_t pinned_pages, i;
641 struct page **user_pages;
642 struct mm_struct *mm = current->mm;
643 int gtt_page_offset, data_page_offset, data_page_index, page_length;
673a394b 644 int ret;
3de09aa3
EA
645 uint64_t data_ptr = args->data_ptr;
646
647 remain = args->size;
648
649 /* Pin the user pages containing the data. We can't fault while
650 * holding the struct mutex, and all of the pwrite implementations
651 * want to hold it while dereferencing the user data.
652 */
653 first_data_page = data_ptr / PAGE_SIZE;
654 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
655 num_pages = last_data_page - first_data_page + 1;
656
8e7d2b2c 657 user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
3de09aa3
EA
658 if (user_pages == NULL)
659 return -ENOMEM;
660
661 down_read(&mm->mmap_sem);
662 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
663 num_pages, 0, 0, user_pages, NULL);
664 up_read(&mm->mmap_sem);
665 if (pinned_pages < num_pages) {
666 ret = -EFAULT;
667 goto out_unpin_pages;
668 }
673a394b
EA
669
670 mutex_lock(&dev->struct_mutex);
3de09aa3
EA
671 ret = i915_gem_object_pin(obj, 0);
672 if (ret)
673 goto out_unlock;
674
675 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
676 if (ret)
677 goto out_unpin_object;
678
23010e43 679 obj_priv = to_intel_bo(obj);
3de09aa3
EA
680 offset = obj_priv->gtt_offset + args->offset;
681
682 while (remain > 0) {
683 /* Operation in this page
684 *
685 * gtt_page_base = page offset within aperture
686 * gtt_page_offset = offset within page in aperture
687 * data_page_index = page number in get_user_pages return
688 * data_page_offset = offset with data_page_index page.
689 * page_length = bytes to copy for this page
690 */
691 gtt_page_base = offset & PAGE_MASK;
692 gtt_page_offset = offset & ~PAGE_MASK;
693 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
694 data_page_offset = data_ptr & ~PAGE_MASK;
695
696 page_length = remain;
697 if ((gtt_page_offset + page_length) > PAGE_SIZE)
698 page_length = PAGE_SIZE - gtt_page_offset;
699 if ((data_page_offset + page_length) > PAGE_SIZE)
700 page_length = PAGE_SIZE - data_page_offset;
701
ab34c226
CW
702 slow_kernel_write(dev_priv->mm.gtt_mapping,
703 gtt_page_base, gtt_page_offset,
704 user_pages[data_page_index],
705 data_page_offset,
706 page_length);
3de09aa3
EA
707
708 remain -= page_length;
709 offset += page_length;
710 data_ptr += page_length;
711 }
712
713out_unpin_object:
714 i915_gem_object_unpin(obj);
715out_unlock:
716 mutex_unlock(&dev->struct_mutex);
717out_unpin_pages:
718 for (i = 0; i < pinned_pages; i++)
719 page_cache_release(user_pages[i]);
8e7d2b2c 720 drm_free_large(user_pages);
3de09aa3
EA
721
722 return ret;
723}
724
40123c1f
EA
725/**
726 * This is the fast shmem pwrite path, which attempts to directly
727 * copy_from_user into the kmapped pages backing the object.
728 */
3043c60c 729static int
40123c1f
EA
730i915_gem_shmem_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj,
731 struct drm_i915_gem_pwrite *args,
732 struct drm_file *file_priv)
673a394b 733{
23010e43 734 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
40123c1f
EA
735 ssize_t remain;
736 loff_t offset, page_base;
737 char __user *user_data;
738 int page_offset, page_length;
673a394b 739 int ret;
40123c1f
EA
740
741 user_data = (char __user *) (uintptr_t) args->data_ptr;
742 remain = args->size;
673a394b
EA
743
744 mutex_lock(&dev->struct_mutex);
745
4bdadb97 746 ret = i915_gem_object_get_pages(obj, 0);
40123c1f
EA
747 if (ret != 0)
748 goto fail_unlock;
673a394b 749
e47c68e9 750 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
40123c1f
EA
751 if (ret != 0)
752 goto fail_put_pages;
753
23010e43 754 obj_priv = to_intel_bo(obj);
40123c1f
EA
755 offset = args->offset;
756 obj_priv->dirty = 1;
757
758 while (remain > 0) {
759 /* Operation in this page
760 *
761 * page_base = page offset within aperture
762 * page_offset = offset within page
763 * page_length = bytes to copy for this page
764 */
765 page_base = (offset & ~(PAGE_SIZE-1));
766 page_offset = offset & (PAGE_SIZE-1);
767 page_length = remain;
768 if ((page_offset + remain) > PAGE_SIZE)
769 page_length = PAGE_SIZE - page_offset;
770
771 ret = fast_shmem_write(obj_priv->pages,
772 page_base, page_offset,
773 user_data, page_length);
774 if (ret)
775 goto fail_put_pages;
776
777 remain -= page_length;
778 user_data += page_length;
779 offset += page_length;
780 }
781
782fail_put_pages:
783 i915_gem_object_put_pages(obj);
784fail_unlock:
785 mutex_unlock(&dev->struct_mutex);
786
787 return ret;
788}
789
790/**
791 * This is the fallback shmem pwrite path, which uses get_user_pages to pin
792 * the memory and maps it using kmap_atomic for copying.
793 *
794 * This avoids taking mmap_sem for faulting on the user's address while the
795 * struct_mutex is held.
796 */
797static int
798i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
799 struct drm_i915_gem_pwrite *args,
800 struct drm_file *file_priv)
801{
23010e43 802 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
40123c1f
EA
803 struct mm_struct *mm = current->mm;
804 struct page **user_pages;
805 ssize_t remain;
806 loff_t offset, pinned_pages, i;
807 loff_t first_data_page, last_data_page, num_pages;
808 int shmem_page_index, shmem_page_offset;
809 int data_page_index, data_page_offset;
810 int page_length;
811 int ret;
812 uint64_t data_ptr = args->data_ptr;
280b713b 813 int do_bit17_swizzling;
40123c1f
EA
814
815 remain = args->size;
816
817 /* Pin the user pages containing the data. We can't fault while
818 * holding the struct mutex, and all of the pwrite implementations
819 * want to hold it while dereferencing the user data.
820 */
821 first_data_page = data_ptr / PAGE_SIZE;
822 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
823 num_pages = last_data_page - first_data_page + 1;
824
8e7d2b2c 825 user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
40123c1f
EA
826 if (user_pages == NULL)
827 return -ENOMEM;
828
829 down_read(&mm->mmap_sem);
830 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
831 num_pages, 0, 0, user_pages, NULL);
832 up_read(&mm->mmap_sem);
833 if (pinned_pages < num_pages) {
834 ret = -EFAULT;
835 goto fail_put_user_pages;
673a394b
EA
836 }
837
280b713b
EA
838 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
839
40123c1f
EA
840 mutex_lock(&dev->struct_mutex);
841
07f73f69
CW
842 ret = i915_gem_object_get_pages_or_evict(obj);
843 if (ret)
40123c1f
EA
844 goto fail_unlock;
845
846 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
847 if (ret != 0)
848 goto fail_put_pages;
849
23010e43 850 obj_priv = to_intel_bo(obj);
673a394b 851 offset = args->offset;
40123c1f 852 obj_priv->dirty = 1;
673a394b 853
40123c1f
EA
854 while (remain > 0) {
855 /* Operation in this page
856 *
857 * shmem_page_index = page number within shmem file
858 * shmem_page_offset = offset within page in shmem file
859 * data_page_index = page number in get_user_pages return
860 * data_page_offset = offset with data_page_index page.
861 * page_length = bytes to copy for this page
862 */
863 shmem_page_index = offset / PAGE_SIZE;
864 shmem_page_offset = offset & ~PAGE_MASK;
865 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
866 data_page_offset = data_ptr & ~PAGE_MASK;
867
868 page_length = remain;
869 if ((shmem_page_offset + page_length) > PAGE_SIZE)
870 page_length = PAGE_SIZE - shmem_page_offset;
871 if ((data_page_offset + page_length) > PAGE_SIZE)
872 page_length = PAGE_SIZE - data_page_offset;
873
280b713b 874 if (do_bit17_swizzling) {
99a03df5 875 slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
280b713b
EA
876 shmem_page_offset,
877 user_pages[data_page_index],
878 data_page_offset,
99a03df5
CW
879 page_length,
880 0);
881 } else {
882 slow_shmem_copy(obj_priv->pages[shmem_page_index],
883 shmem_page_offset,
884 user_pages[data_page_index],
885 data_page_offset,
886 page_length);
280b713b 887 }
40123c1f
EA
888
889 remain -= page_length;
890 data_ptr += page_length;
891 offset += page_length;
673a394b
EA
892 }
893
40123c1f
EA
894fail_put_pages:
895 i915_gem_object_put_pages(obj);
896fail_unlock:
673a394b 897 mutex_unlock(&dev->struct_mutex);
40123c1f
EA
898fail_put_user_pages:
899 for (i = 0; i < pinned_pages; i++)
900 page_cache_release(user_pages[i]);
8e7d2b2c 901 drm_free_large(user_pages);
673a394b 902
40123c1f 903 return ret;
673a394b
EA
904}
905
906/**
907 * Writes data to the object referenced by handle.
908 *
909 * On error, the contents of the buffer that were to be modified are undefined.
910 */
911int
912i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
913 struct drm_file *file_priv)
914{
915 struct drm_i915_gem_pwrite *args = data;
916 struct drm_gem_object *obj;
917 struct drm_i915_gem_object *obj_priv;
918 int ret = 0;
919
920 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
921 if (obj == NULL)
922 return -EBADF;
23010e43 923 obj_priv = to_intel_bo(obj);
673a394b
EA
924
925 /* Bounds check destination.
926 *
927 * XXX: This could use review for overflow issues...
928 */
929 if (args->offset > obj->size || args->size > obj->size ||
930 args->offset + args->size > obj->size) {
bc9025bd 931 drm_gem_object_unreference_unlocked(obj);
673a394b
EA
932 return -EINVAL;
933 }
934
935 /* We can only do the GTT pwrite on untiled buffers, as otherwise
936 * it would end up going through the fenced access, and we'll get
937 * different detiling behavior between reading and writing.
938 * pread/pwrite currently are reading and writing from the CPU
939 * perspective, requiring manual detiling by the client.
940 */
71acb5eb
DA
941 if (obj_priv->phys_obj)
942 ret = i915_gem_phys_pwrite(dev, obj, args, file_priv);
943 else if (obj_priv->tiling_mode == I915_TILING_NONE &&
9b8c4a0b
CW
944 dev->gtt_total != 0 &&
945 obj->write_domain != I915_GEM_DOMAIN_CPU) {
3de09aa3
EA
946 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file_priv);
947 if (ret == -EFAULT) {
948 ret = i915_gem_gtt_pwrite_slow(dev, obj, args,
949 file_priv);
950 }
280b713b
EA
951 } else if (i915_gem_object_needs_bit17_swizzle(obj)) {
952 ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file_priv);
40123c1f
EA
953 } else {
954 ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file_priv);
955 if (ret == -EFAULT) {
956 ret = i915_gem_shmem_pwrite_slow(dev, obj, args,
957 file_priv);
958 }
959 }
673a394b
EA
960
961#if WATCH_PWRITE
962 if (ret)
963 DRM_INFO("pwrite failed %d\n", ret);
964#endif
965
bc9025bd 966 drm_gem_object_unreference_unlocked(obj);
673a394b
EA
967
968 return ret;
969}
970
971/**
2ef7eeaa
EA
972 * Called when user space prepares to use an object with the CPU, either
973 * through the mmap ioctl's mapping or a GTT mapping.
673a394b
EA
974 */
975int
976i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
977 struct drm_file *file_priv)
978{
a09ba7fa 979 struct drm_i915_private *dev_priv = dev->dev_private;
673a394b
EA
980 struct drm_i915_gem_set_domain *args = data;
981 struct drm_gem_object *obj;
652c393a 982 struct drm_i915_gem_object *obj_priv;
2ef7eeaa
EA
983 uint32_t read_domains = args->read_domains;
984 uint32_t write_domain = args->write_domain;
673a394b
EA
985 int ret;
986
987 if (!(dev->driver->driver_features & DRIVER_GEM))
988 return -ENODEV;
989
2ef7eeaa 990 /* Only handle setting domains to types used by the CPU. */
21d509e3 991 if (write_domain & I915_GEM_GPU_DOMAINS)
2ef7eeaa
EA
992 return -EINVAL;
993
21d509e3 994 if (read_domains & I915_GEM_GPU_DOMAINS)
2ef7eeaa
EA
995 return -EINVAL;
996
997 /* Having something in the write domain implies it's in the read
998 * domain, and only that read domain. Enforce that in the request.
999 */
1000 if (write_domain != 0 && read_domains != write_domain)
1001 return -EINVAL;
1002
673a394b
EA
1003 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1004 if (obj == NULL)
1005 return -EBADF;
23010e43 1006 obj_priv = to_intel_bo(obj);
673a394b
EA
1007
1008 mutex_lock(&dev->struct_mutex);
652c393a
JB
1009
1010 intel_mark_busy(dev, obj);
1011
673a394b 1012#if WATCH_BUF
cfd43c02 1013 DRM_INFO("set_domain_ioctl %p(%zd), %08x %08x\n",
2ef7eeaa 1014 obj, obj->size, read_domains, write_domain);
673a394b 1015#endif
2ef7eeaa
EA
1016 if (read_domains & I915_GEM_DOMAIN_GTT) {
1017 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
02354392 1018
a09ba7fa
EA
1019 /* Update the LRU on the fence for the CPU access that's
1020 * about to occur.
1021 */
1022 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
007cc8ac
DV
1023 struct drm_i915_fence_reg *reg =
1024 &dev_priv->fence_regs[obj_priv->fence_reg];
1025 list_move_tail(&reg->lru_list,
a09ba7fa
EA
1026 &dev_priv->mm.fence_list);
1027 }
1028
02354392
EA
1029 /* Silently promote "you're not bound, there was nothing to do"
1030 * to success, since the client was just asking us to
1031 * make sure everything was done.
1032 */
1033 if (ret == -EINVAL)
1034 ret = 0;
2ef7eeaa 1035 } else {
e47c68e9 1036 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
2ef7eeaa
EA
1037 }
1038
673a394b
EA
1039 drm_gem_object_unreference(obj);
1040 mutex_unlock(&dev->struct_mutex);
1041 return ret;
1042}
1043
1044/**
1045 * Called when user space has done writes to this buffer
1046 */
1047int
1048i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1049 struct drm_file *file_priv)
1050{
1051 struct drm_i915_gem_sw_finish *args = data;
1052 struct drm_gem_object *obj;
1053 struct drm_i915_gem_object *obj_priv;
1054 int ret = 0;
1055
1056 if (!(dev->driver->driver_features & DRIVER_GEM))
1057 return -ENODEV;
1058
1059 mutex_lock(&dev->struct_mutex);
1060 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1061 if (obj == NULL) {
1062 mutex_unlock(&dev->struct_mutex);
1063 return -EBADF;
1064 }
1065
1066#if WATCH_BUF
cfd43c02 1067 DRM_INFO("%s: sw_finish %d (%p %zd)\n",
673a394b
EA
1068 __func__, args->handle, obj, obj->size);
1069#endif
23010e43 1070 obj_priv = to_intel_bo(obj);
673a394b
EA
1071
1072 /* Pinned buffers may be scanout, so flush the cache */
e47c68e9
EA
1073 if (obj_priv->pin_count)
1074 i915_gem_object_flush_cpu_write_domain(obj);
1075
673a394b
EA
1076 drm_gem_object_unreference(obj);
1077 mutex_unlock(&dev->struct_mutex);
1078 return ret;
1079}
1080
1081/**
1082 * Maps the contents of an object, returning the address it is mapped
1083 * into.
1084 *
1085 * While the mapping holds a reference on the contents of the object, it doesn't
1086 * imply a ref on the object itself.
1087 */
1088int
1089i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1090 struct drm_file *file_priv)
1091{
1092 struct drm_i915_gem_mmap *args = data;
1093 struct drm_gem_object *obj;
1094 loff_t offset;
1095 unsigned long addr;
1096
1097 if (!(dev->driver->driver_features & DRIVER_GEM))
1098 return -ENODEV;
1099
1100 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1101 if (obj == NULL)
1102 return -EBADF;
1103
1104 offset = args->offset;
1105
1106 down_write(&current->mm->mmap_sem);
1107 addr = do_mmap(obj->filp, 0, args->size,
1108 PROT_READ | PROT_WRITE, MAP_SHARED,
1109 args->offset);
1110 up_write(&current->mm->mmap_sem);
bc9025bd 1111 drm_gem_object_unreference_unlocked(obj);
673a394b
EA
1112 if (IS_ERR((void *)addr))
1113 return addr;
1114
1115 args->addr_ptr = (uint64_t) addr;
1116
1117 return 0;
1118}
1119
de151cf6
JB
1120/**
1121 * i915_gem_fault - fault a page into the GTT
1122 * vma: VMA in question
1123 * vmf: fault info
1124 *
1125 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1126 * from userspace. The fault handler takes care of binding the object to
1127 * the GTT (if needed), allocating and programming a fence register (again,
1128 * only if needed based on whether the old reg is still valid or the object
1129 * is tiled) and inserting a new PTE into the faulting process.
1130 *
1131 * Note that the faulting process may involve evicting existing objects
1132 * from the GTT and/or fence registers to make room. So performance may
1133 * suffer if the GTT working set is large or there are few fence registers
1134 * left.
1135 */
1136int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1137{
1138 struct drm_gem_object *obj = vma->vm_private_data;
1139 struct drm_device *dev = obj->dev;
23010e43 1140 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
de151cf6
JB
1141 pgoff_t page_offset;
1142 unsigned long pfn;
1143 int ret = 0;
0f973f27 1144 bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
de151cf6
JB
1145
1146 /* We don't use vmf->pgoff since that has the fake offset */
1147 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
1148 PAGE_SHIFT;
1149
1150 /* Now bind it into the GTT if needed */
1151 mutex_lock(&dev->struct_mutex);
1152 if (!obj_priv->gtt_space) {
e67b8ce1 1153 ret = i915_gem_object_bind_to_gtt(obj, 0);
c715089f
CW
1154 if (ret)
1155 goto unlock;
07f4f3e8 1156
07f4f3e8 1157 ret = i915_gem_object_set_to_gtt_domain(obj, write);
c715089f
CW
1158 if (ret)
1159 goto unlock;
de151cf6
JB
1160 }
1161
1162 /* Need a new fence register? */
a09ba7fa 1163 if (obj_priv->tiling_mode != I915_TILING_NONE) {
8c4b8c3f 1164 ret = i915_gem_object_get_fence_reg(obj);
c715089f
CW
1165 if (ret)
1166 goto unlock;
d9ddcb96 1167 }
de151cf6
JB
1168
1169 pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) +
1170 page_offset;
1171
1172 /* Finally, remap it using the new GTT offset */
1173 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
c715089f 1174unlock:
de151cf6
JB
1175 mutex_unlock(&dev->struct_mutex);
1176
1177 switch (ret) {
c715089f
CW
1178 case 0:
1179 case -ERESTARTSYS:
1180 return VM_FAULT_NOPAGE;
de151cf6
JB
1181 case -ENOMEM:
1182 case -EAGAIN:
1183 return VM_FAULT_OOM;
de151cf6 1184 default:
c715089f 1185 return VM_FAULT_SIGBUS;
de151cf6
JB
1186 }
1187}
1188
1189/**
1190 * i915_gem_create_mmap_offset - create a fake mmap offset for an object
1191 * @obj: obj in question
1192 *
1193 * GEM memory mapping works by handing back to userspace a fake mmap offset
1194 * it can use in a subsequent mmap(2) call. The DRM core code then looks
1195 * up the object based on the offset and sets up the various memory mapping
1196 * structures.
1197 *
1198 * This routine allocates and attaches a fake offset for @obj.
1199 */
1200static int
1201i915_gem_create_mmap_offset(struct drm_gem_object *obj)
1202{
1203 struct drm_device *dev = obj->dev;
1204 struct drm_gem_mm *mm = dev->mm_private;
23010e43 1205 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
de151cf6 1206 struct drm_map_list *list;
f77d390c 1207 struct drm_local_map *map;
de151cf6
JB
1208 int ret = 0;
1209
1210 /* Set the object up for mmap'ing */
1211 list = &obj->map_list;
9a298b2a 1212 list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL);
de151cf6
JB
1213 if (!list->map)
1214 return -ENOMEM;
1215
1216 map = list->map;
1217 map->type = _DRM_GEM;
1218 map->size = obj->size;
1219 map->handle = obj;
1220
1221 /* Get a DRM GEM mmap offset allocated... */
1222 list->file_offset_node = drm_mm_search_free(&mm->offset_manager,
1223 obj->size / PAGE_SIZE, 0, 0);
1224 if (!list->file_offset_node) {
1225 DRM_ERROR("failed to allocate offset for bo %d\n", obj->name);
1226 ret = -ENOMEM;
1227 goto out_free_list;
1228 }
1229
1230 list->file_offset_node = drm_mm_get_block(list->file_offset_node,
1231 obj->size / PAGE_SIZE, 0);
1232 if (!list->file_offset_node) {
1233 ret = -ENOMEM;
1234 goto out_free_list;
1235 }
1236
1237 list->hash.key = list->file_offset_node->start;
1238 if (drm_ht_insert_item(&mm->offset_hash, &list->hash)) {
1239 DRM_ERROR("failed to add to map hash\n");
5618ca6a 1240 ret = -ENOMEM;
de151cf6
JB
1241 goto out_free_mm;
1242 }
1243
1244 /* By now we should be all set, any drm_mmap request on the offset
1245 * below will get to our mmap & fault handler */
1246 obj_priv->mmap_offset = ((uint64_t) list->hash.key) << PAGE_SHIFT;
1247
1248 return 0;
1249
1250out_free_mm:
1251 drm_mm_put_block(list->file_offset_node);
1252out_free_list:
9a298b2a 1253 kfree(list->map);
de151cf6
JB
1254
1255 return ret;
1256}
1257
901782b2
CW
1258/**
1259 * i915_gem_release_mmap - remove physical page mappings
1260 * @obj: obj in question
1261 *
af901ca1 1262 * Preserve the reservation of the mmapping with the DRM core code, but
901782b2
CW
1263 * relinquish ownership of the pages back to the system.
1264 *
1265 * It is vital that we remove the page mapping if we have mapped a tiled
1266 * object through the GTT and then lose the fence register due to
1267 * resource pressure. Similarly if the object has been moved out of the
1268 * aperture, than pages mapped into userspace must be revoked. Removing the
1269 * mapping will then trigger a page fault on the next user access, allowing
1270 * fixup by i915_gem_fault().
1271 */
d05ca301 1272void
901782b2
CW
1273i915_gem_release_mmap(struct drm_gem_object *obj)
1274{
1275 struct drm_device *dev = obj->dev;
23010e43 1276 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
901782b2
CW
1277
1278 if (dev->dev_mapping)
1279 unmap_mapping_range(dev->dev_mapping,
1280 obj_priv->mmap_offset, obj->size, 1);
1281}
1282
ab00b3e5
JB
1283static void
1284i915_gem_free_mmap_offset(struct drm_gem_object *obj)
1285{
1286 struct drm_device *dev = obj->dev;
23010e43 1287 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
ab00b3e5
JB
1288 struct drm_gem_mm *mm = dev->mm_private;
1289 struct drm_map_list *list;
1290
1291 list = &obj->map_list;
1292 drm_ht_remove_item(&mm->offset_hash, &list->hash);
1293
1294 if (list->file_offset_node) {
1295 drm_mm_put_block(list->file_offset_node);
1296 list->file_offset_node = NULL;
1297 }
1298
1299 if (list->map) {
9a298b2a 1300 kfree(list->map);
ab00b3e5
JB
1301 list->map = NULL;
1302 }
1303
1304 obj_priv->mmap_offset = 0;
1305}
1306
de151cf6
JB
1307/**
1308 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1309 * @obj: object to check
1310 *
1311 * Return the required GTT alignment for an object, taking into account
1312 * potential fence register mapping if needed.
1313 */
1314static uint32_t
1315i915_gem_get_gtt_alignment(struct drm_gem_object *obj)
1316{
1317 struct drm_device *dev = obj->dev;
23010e43 1318 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
de151cf6
JB
1319 int start, i;
1320
1321 /*
1322 * Minimum alignment is 4k (GTT page size), but might be greater
1323 * if a fence register is needed for the object.
1324 */
1325 if (IS_I965G(dev) || obj_priv->tiling_mode == I915_TILING_NONE)
1326 return 4096;
1327
1328 /*
1329 * Previous chips need to be aligned to the size of the smallest
1330 * fence register that can contain the object.
1331 */
1332 if (IS_I9XX(dev))
1333 start = 1024*1024;
1334 else
1335 start = 512*1024;
1336
1337 for (i = start; i < obj->size; i <<= 1)
1338 ;
1339
1340 return i;
1341}
1342
1343/**
1344 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1345 * @dev: DRM device
1346 * @data: GTT mapping ioctl data
1347 * @file_priv: GEM object info
1348 *
1349 * Simply returns the fake offset to userspace so it can mmap it.
1350 * The mmap call will end up in drm_gem_mmap(), which will set things
1351 * up so we can get faults in the handler above.
1352 *
1353 * The fault handler will take care of binding the object into the GTT
1354 * (since it may have been evicted to make room for something), allocating
1355 * a fence register, and mapping the appropriate aperture address into
1356 * userspace.
1357 */
1358int
1359i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1360 struct drm_file *file_priv)
1361{
1362 struct drm_i915_gem_mmap_gtt *args = data;
de151cf6
JB
1363 struct drm_gem_object *obj;
1364 struct drm_i915_gem_object *obj_priv;
1365 int ret;
1366
1367 if (!(dev->driver->driver_features & DRIVER_GEM))
1368 return -ENODEV;
1369
1370 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1371 if (obj == NULL)
1372 return -EBADF;
1373
1374 mutex_lock(&dev->struct_mutex);
1375
23010e43 1376 obj_priv = to_intel_bo(obj);
de151cf6 1377
ab18282d
CW
1378 if (obj_priv->madv != I915_MADV_WILLNEED) {
1379 DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1380 drm_gem_object_unreference(obj);
1381 mutex_unlock(&dev->struct_mutex);
1382 return -EINVAL;
1383 }
1384
1385
de151cf6
JB
1386 if (!obj_priv->mmap_offset) {
1387 ret = i915_gem_create_mmap_offset(obj);
13af1062
CW
1388 if (ret) {
1389 drm_gem_object_unreference(obj);
1390 mutex_unlock(&dev->struct_mutex);
de151cf6 1391 return ret;
13af1062 1392 }
de151cf6
JB
1393 }
1394
1395 args->offset = obj_priv->mmap_offset;
1396
de151cf6
JB
1397 /*
1398 * Pull it into the GTT so that we have a page list (makes the
1399 * initial fault faster and any subsequent flushing possible).
1400 */
1401 if (!obj_priv->agp_mem) {
e67b8ce1 1402 ret = i915_gem_object_bind_to_gtt(obj, 0);
de151cf6
JB
1403 if (ret) {
1404 drm_gem_object_unreference(obj);
1405 mutex_unlock(&dev->struct_mutex);
1406 return ret;
1407 }
de151cf6
JB
1408 }
1409
1410 drm_gem_object_unreference(obj);
1411 mutex_unlock(&dev->struct_mutex);
1412
1413 return 0;
1414}
1415
6911a9b8 1416void
856fa198 1417i915_gem_object_put_pages(struct drm_gem_object *obj)
673a394b 1418{
23010e43 1419 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
673a394b
EA
1420 int page_count = obj->size / PAGE_SIZE;
1421 int i;
1422
856fa198 1423 BUG_ON(obj_priv->pages_refcount == 0);
bb6baf76 1424 BUG_ON(obj_priv->madv == __I915_MADV_PURGED);
673a394b 1425
856fa198
EA
1426 if (--obj_priv->pages_refcount != 0)
1427 return;
673a394b 1428
280b713b
EA
1429 if (obj_priv->tiling_mode != I915_TILING_NONE)
1430 i915_gem_object_save_bit_17_swizzle(obj);
1431
3ef94daa 1432 if (obj_priv->madv == I915_MADV_DONTNEED)
13a05fd9 1433 obj_priv->dirty = 0;
3ef94daa
CW
1434
1435 for (i = 0; i < page_count; i++) {
3ef94daa
CW
1436 if (obj_priv->dirty)
1437 set_page_dirty(obj_priv->pages[i]);
1438
1439 if (obj_priv->madv == I915_MADV_WILLNEED)
856fa198 1440 mark_page_accessed(obj_priv->pages[i]);
3ef94daa
CW
1441
1442 page_cache_release(obj_priv->pages[i]);
1443 }
673a394b
EA
1444 obj_priv->dirty = 0;
1445
8e7d2b2c 1446 drm_free_large(obj_priv->pages);
856fa198 1447 obj_priv->pages = NULL;
673a394b
EA
1448}
1449
1450static void
852835f3
ZN
1451i915_gem_object_move_to_active(struct drm_gem_object *obj, uint32_t seqno,
1452 struct intel_ring_buffer *ring)
673a394b
EA
1453{
1454 struct drm_device *dev = obj->dev;
1455 drm_i915_private_t *dev_priv = dev->dev_private;
23010e43 1456 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
852835f3
ZN
1457 BUG_ON(ring == NULL);
1458 obj_priv->ring = ring;
673a394b
EA
1459
1460 /* Add a reference if we're newly entering the active list. */
1461 if (!obj_priv->active) {
1462 drm_gem_object_reference(obj);
1463 obj_priv->active = 1;
1464 }
1465 /* Move from whatever list we were on to the tail of execution. */
5e118f41 1466 spin_lock(&dev_priv->mm.active_list_lock);
852835f3 1467 list_move_tail(&obj_priv->list, &ring->active_list);
5e118f41 1468 spin_unlock(&dev_priv->mm.active_list_lock);
ce44b0ea 1469 obj_priv->last_rendering_seqno = seqno;
673a394b
EA
1470}
1471
ce44b0ea
EA
1472static void
1473i915_gem_object_move_to_flushing(struct drm_gem_object *obj)
1474{
1475 struct drm_device *dev = obj->dev;
1476 drm_i915_private_t *dev_priv = dev->dev_private;
23010e43 1477 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
ce44b0ea
EA
1478
1479 BUG_ON(!obj_priv->active);
1480 list_move_tail(&obj_priv->list, &dev_priv->mm.flushing_list);
1481 obj_priv->last_rendering_seqno = 0;
1482}
673a394b 1483
963b4836
CW
1484/* Immediately discard the backing storage */
1485static void
1486i915_gem_object_truncate(struct drm_gem_object *obj)
1487{
23010e43 1488 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
bb6baf76 1489 struct inode *inode;
963b4836 1490
bb6baf76
CW
1491 inode = obj->filp->f_path.dentry->d_inode;
1492 if (inode->i_op->truncate)
1493 inode->i_op->truncate (inode);
1494
1495 obj_priv->madv = __I915_MADV_PURGED;
963b4836
CW
1496}
1497
1498static inline int
1499i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj_priv)
1500{
1501 return obj_priv->madv == I915_MADV_DONTNEED;
1502}
1503
673a394b
EA
1504static void
1505i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
1506{
1507 struct drm_device *dev = obj->dev;
1508 drm_i915_private_t *dev_priv = dev->dev_private;
23010e43 1509 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
673a394b
EA
1510
1511 i915_verify_inactive(dev, __FILE__, __LINE__);
1512 if (obj_priv->pin_count != 0)
1513 list_del_init(&obj_priv->list);
1514 else
1515 list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
1516
99fcb766
DV
1517 BUG_ON(!list_empty(&obj_priv->gpu_write_list));
1518
ce44b0ea 1519 obj_priv->last_rendering_seqno = 0;
852835f3 1520 obj_priv->ring = NULL;
673a394b
EA
1521 if (obj_priv->active) {
1522 obj_priv->active = 0;
1523 drm_gem_object_unreference(obj);
1524 }
1525 i915_verify_inactive(dev, __FILE__, __LINE__);
1526}
1527
63560396
DV
1528static void
1529i915_gem_process_flushing_list(struct drm_device *dev,
852835f3
ZN
1530 uint32_t flush_domains, uint32_t seqno,
1531 struct intel_ring_buffer *ring)
63560396
DV
1532{
1533 drm_i915_private_t *dev_priv = dev->dev_private;
1534 struct drm_i915_gem_object *obj_priv, *next;
1535
1536 list_for_each_entry_safe(obj_priv, next,
1537 &dev_priv->mm.gpu_write_list,
1538 gpu_write_list) {
a8089e84 1539 struct drm_gem_object *obj = &obj_priv->base;
63560396
DV
1540
1541 if ((obj->write_domain & flush_domains) ==
852835f3
ZN
1542 obj->write_domain &&
1543 obj_priv->ring->ring_flag == ring->ring_flag) {
63560396
DV
1544 uint32_t old_write_domain = obj->write_domain;
1545
1546 obj->write_domain = 0;
1547 list_del_init(&obj_priv->gpu_write_list);
852835f3 1548 i915_gem_object_move_to_active(obj, seqno, ring);
63560396
DV
1549
1550 /* update the fence lru list */
007cc8ac
DV
1551 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
1552 struct drm_i915_fence_reg *reg =
1553 &dev_priv->fence_regs[obj_priv->fence_reg];
1554 list_move_tail(&reg->lru_list,
63560396 1555 &dev_priv->mm.fence_list);
007cc8ac 1556 }
63560396
DV
1557
1558 trace_i915_gem_object_change_domain(obj,
1559 obj->read_domains,
1560 old_write_domain);
1561 }
1562 }
1563}
8187a2b7 1564
5a5a0c64 1565uint32_t
b962442e 1566i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
852835f3 1567 uint32_t flush_domains, struct intel_ring_buffer *ring)
673a394b
EA
1568{
1569 drm_i915_private_t *dev_priv = dev->dev_private;
b962442e 1570 struct drm_i915_file_private *i915_file_priv = NULL;
673a394b
EA
1571 struct drm_i915_gem_request *request;
1572 uint32_t seqno;
1573 int was_empty;
673a394b 1574
b962442e
EA
1575 if (file_priv != NULL)
1576 i915_file_priv = file_priv->driver_priv;
1577
9a298b2a 1578 request = kzalloc(sizeof(*request), GFP_KERNEL);
673a394b
EA
1579 if (request == NULL)
1580 return 0;
1581
852835f3 1582 seqno = ring->add_request(dev, ring, file_priv, flush_domains);
673a394b
EA
1583
1584 request->seqno = seqno;
852835f3 1585 request->ring = ring;
673a394b 1586 request->emitted_jiffies = jiffies;
852835f3
ZN
1587 was_empty = list_empty(&ring->request_list);
1588 list_add_tail(&request->list, &ring->request_list);
1589
b962442e
EA
1590 if (i915_file_priv) {
1591 list_add_tail(&request->client_list,
1592 &i915_file_priv->mm.request_list);
1593 } else {
1594 INIT_LIST_HEAD(&request->client_list);
1595 }
673a394b 1596
ce44b0ea
EA
1597 /* Associate any objects on the flushing list matching the write
1598 * domain we're flushing with our flush.
1599 */
63560396 1600 if (flush_domains != 0)
852835f3 1601 i915_gem_process_flushing_list(dev, flush_domains, seqno, ring);
ce44b0ea 1602
f65d9421
BG
1603 if (!dev_priv->mm.suspended) {
1604 mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD);
1605 if (was_empty)
1606 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1607 }
673a394b
EA
1608 return seqno;
1609}
1610
1611/**
1612 * Command execution barrier
1613 *
1614 * Ensures that all commands in the ring are finished
1615 * before signalling the CPU
1616 */
3043c60c 1617static uint32_t
852835f3 1618i915_retire_commands(struct drm_device *dev, struct intel_ring_buffer *ring)
673a394b 1619{
673a394b 1620 uint32_t flush_domains = 0;
673a394b
EA
1621
1622 /* The sampler always gets flushed on i965 (sigh) */
1623 if (IS_I965G(dev))
1624 flush_domains |= I915_GEM_DOMAIN_SAMPLER;
852835f3
ZN
1625
1626 ring->flush(dev, ring,
1627 I915_GEM_DOMAIN_COMMAND, flush_domains);
673a394b
EA
1628 return flush_domains;
1629}
1630
1631/**
1632 * Moves buffers associated only with the given active seqno from the active
1633 * to inactive list, potentially freeing them.
1634 */
1635static void
1636i915_gem_retire_request(struct drm_device *dev,
1637 struct drm_i915_gem_request *request)
1638{
1639 drm_i915_private_t *dev_priv = dev->dev_private;
1640
1c5d22f7
CW
1641 trace_i915_gem_request_retire(dev, request->seqno);
1642
673a394b
EA
1643 /* Move any buffers on the active list that are no longer referenced
1644 * by the ringbuffer to the flushing/inactive lists as appropriate.
1645 */
5e118f41 1646 spin_lock(&dev_priv->mm.active_list_lock);
852835f3 1647 while (!list_empty(&request->ring->active_list)) {
673a394b
EA
1648 struct drm_gem_object *obj;
1649 struct drm_i915_gem_object *obj_priv;
1650
852835f3 1651 obj_priv = list_first_entry(&request->ring->active_list,
673a394b
EA
1652 struct drm_i915_gem_object,
1653 list);
a8089e84 1654 obj = &obj_priv->base;
673a394b
EA
1655
1656 /* If the seqno being retired doesn't match the oldest in the
1657 * list, then the oldest in the list must still be newer than
1658 * this seqno.
1659 */
1660 if (obj_priv->last_rendering_seqno != request->seqno)
5e118f41 1661 goto out;
de151cf6 1662
673a394b
EA
1663#if WATCH_LRU
1664 DRM_INFO("%s: retire %d moves to inactive list %p\n",
1665 __func__, request->seqno, obj);
1666#endif
1667
ce44b0ea
EA
1668 if (obj->write_domain != 0)
1669 i915_gem_object_move_to_flushing(obj);
68c84342
SL
1670 else {
1671 /* Take a reference on the object so it won't be
1672 * freed while the spinlock is held. The list
1673 * protection for this spinlock is safe when breaking
1674 * the lock like this since the next thing we do
1675 * is just get the head of the list again.
1676 */
1677 drm_gem_object_reference(obj);
673a394b 1678 i915_gem_object_move_to_inactive(obj);
68c84342
SL
1679 spin_unlock(&dev_priv->mm.active_list_lock);
1680 drm_gem_object_unreference(obj);
1681 spin_lock(&dev_priv->mm.active_list_lock);
1682 }
673a394b 1683 }
5e118f41
CW
1684out:
1685 spin_unlock(&dev_priv->mm.active_list_lock);
673a394b
EA
1686}
1687
1688/**
1689 * Returns true if seq1 is later than seq2.
1690 */
22be1724 1691bool
673a394b
EA
1692i915_seqno_passed(uint32_t seq1, uint32_t seq2)
1693{
1694 return (int32_t)(seq1 - seq2) >= 0;
1695}
1696
1697uint32_t
852835f3 1698i915_get_gem_seqno(struct drm_device *dev,
d1b851fc 1699 struct intel_ring_buffer *ring)
673a394b 1700{
852835f3 1701 return ring->get_gem_seqno(dev, ring);
673a394b
EA
1702}
1703
1704/**
1705 * This function clears the request list as sequence numbers are passed.
1706 */
b09a1fec
CW
1707static void
1708i915_gem_retire_requests_ring(struct drm_device *dev,
1709 struct intel_ring_buffer *ring)
673a394b
EA
1710{
1711 drm_i915_private_t *dev_priv = dev->dev_private;
1712 uint32_t seqno;
1713
8187a2b7 1714 if (!ring->status_page.page_addr
852835f3 1715 || list_empty(&ring->request_list))
6c0594a3
KW
1716 return;
1717
852835f3 1718 seqno = i915_get_gem_seqno(dev, ring);
673a394b 1719
852835f3 1720 while (!list_empty(&ring->request_list)) {
673a394b
EA
1721 struct drm_i915_gem_request *request;
1722 uint32_t retiring_seqno;
1723
852835f3 1724 request = list_first_entry(&ring->request_list,
673a394b
EA
1725 struct drm_i915_gem_request,
1726 list);
1727 retiring_seqno = request->seqno;
1728
1729 if (i915_seqno_passed(seqno, retiring_seqno) ||
ba1234d1 1730 atomic_read(&dev_priv->mm.wedged)) {
673a394b
EA
1731 i915_gem_retire_request(dev, request);
1732
1733 list_del(&request->list);
b962442e 1734 list_del(&request->client_list);
9a298b2a 1735 kfree(request);
673a394b
EA
1736 } else
1737 break;
1738 }
9d34e5db
CW
1739
1740 if (unlikely (dev_priv->trace_irq_seqno &&
1741 i915_seqno_passed(dev_priv->trace_irq_seqno, seqno))) {
8187a2b7
ZN
1742
1743 ring->user_irq_put(dev, ring);
9d34e5db
CW
1744 dev_priv->trace_irq_seqno = 0;
1745 }
673a394b
EA
1746}
1747
b09a1fec
CW
1748void
1749i915_gem_retire_requests(struct drm_device *dev)
1750{
1751 drm_i915_private_t *dev_priv = dev->dev_private;
1752
be72615b
CW
1753 if (!list_empty(&dev_priv->mm.deferred_free_list)) {
1754 struct drm_i915_gem_object *obj_priv, *tmp;
1755
1756 /* We must be careful that during unbind() we do not
1757 * accidentally infinitely recurse into retire requests.
1758 * Currently:
1759 * retire -> free -> unbind -> wait -> retire_ring
1760 */
1761 list_for_each_entry_safe(obj_priv, tmp,
1762 &dev_priv->mm.deferred_free_list,
1763 list)
1764 i915_gem_free_object_tail(&obj_priv->base);
1765 }
1766
b09a1fec
CW
1767 i915_gem_retire_requests_ring(dev, &dev_priv->render_ring);
1768 if (HAS_BSD(dev))
1769 i915_gem_retire_requests_ring(dev, &dev_priv->bsd_ring);
1770}
1771
673a394b
EA
1772void
1773i915_gem_retire_work_handler(struct work_struct *work)
1774{
1775 drm_i915_private_t *dev_priv;
1776 struct drm_device *dev;
1777
1778 dev_priv = container_of(work, drm_i915_private_t,
1779 mm.retire_work.work);
1780 dev = dev_priv->dev;
1781
1782 mutex_lock(&dev->struct_mutex);
b09a1fec 1783 i915_gem_retire_requests(dev);
d1b851fc 1784
6dbe2772 1785 if (!dev_priv->mm.suspended &&
d1b851fc
ZN
1786 (!list_empty(&dev_priv->render_ring.request_list) ||
1787 (HAS_BSD(dev) &&
1788 !list_empty(&dev_priv->bsd_ring.request_list))))
9c9fe1f8 1789 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
673a394b
EA
1790 mutex_unlock(&dev->struct_mutex);
1791}
1792
5a5a0c64 1793int
852835f3
ZN
1794i915_do_wait_request(struct drm_device *dev, uint32_t seqno,
1795 int interruptible, struct intel_ring_buffer *ring)
673a394b
EA
1796{
1797 drm_i915_private_t *dev_priv = dev->dev_private;
802c7eb6 1798 u32 ier;
673a394b
EA
1799 int ret = 0;
1800
1801 BUG_ON(seqno == 0);
1802
ba1234d1 1803 if (atomic_read(&dev_priv->mm.wedged))
ffed1d09
BG
1804 return -EIO;
1805
852835f3 1806 if (!i915_seqno_passed(ring->get_gem_seqno(dev, ring), seqno)) {
bad720ff 1807 if (HAS_PCH_SPLIT(dev))
036a4a7d
ZW
1808 ier = I915_READ(DEIER) | I915_READ(GTIER);
1809 else
1810 ier = I915_READ(IER);
802c7eb6
JB
1811 if (!ier) {
1812 DRM_ERROR("something (likely vbetool) disabled "
1813 "interrupts, re-enabling\n");
1814 i915_driver_irq_preinstall(dev);
1815 i915_driver_irq_postinstall(dev);
1816 }
1817
1c5d22f7
CW
1818 trace_i915_gem_request_wait_begin(dev, seqno);
1819
852835f3 1820 ring->waiting_gem_seqno = seqno;
8187a2b7 1821 ring->user_irq_get(dev, ring);
48764bf4 1822 if (interruptible)
852835f3
ZN
1823 ret = wait_event_interruptible(ring->irq_queue,
1824 i915_seqno_passed(
1825 ring->get_gem_seqno(dev, ring), seqno)
1826 || atomic_read(&dev_priv->mm.wedged));
48764bf4 1827 else
852835f3
ZN
1828 wait_event(ring->irq_queue,
1829 i915_seqno_passed(
1830 ring->get_gem_seqno(dev, ring), seqno)
1831 || atomic_read(&dev_priv->mm.wedged));
48764bf4 1832
8187a2b7 1833 ring->user_irq_put(dev, ring);
852835f3 1834 ring->waiting_gem_seqno = 0;
1c5d22f7
CW
1835
1836 trace_i915_gem_request_wait_end(dev, seqno);
673a394b 1837 }
ba1234d1 1838 if (atomic_read(&dev_priv->mm.wedged))
673a394b
EA
1839 ret = -EIO;
1840
1841 if (ret && ret != -ERESTARTSYS)
1842 DRM_ERROR("%s returns %d (awaiting %d at %d)\n",
852835f3 1843 __func__, ret, seqno, ring->get_gem_seqno(dev, ring));
673a394b
EA
1844
1845 /* Directly dispatch request retiring. While we have the work queue
1846 * to handle this, the waiter on a request often wants an associated
1847 * buffer to have made it to the inactive list, and we would need
1848 * a separate wait queue to handle that.
1849 */
1850 if (ret == 0)
b09a1fec 1851 i915_gem_retire_requests_ring(dev, ring);
673a394b
EA
1852
1853 return ret;
1854}
1855
48764bf4
DV
1856/**
1857 * Waits for a sequence number to be signaled, and cleans up the
1858 * request and object lists appropriately for that event.
1859 */
1860static int
852835f3
ZN
1861i915_wait_request(struct drm_device *dev, uint32_t seqno,
1862 struct intel_ring_buffer *ring)
48764bf4 1863{
852835f3 1864 return i915_do_wait_request(dev, seqno, 1, ring);
48764bf4
DV
1865}
1866
8187a2b7
ZN
1867static void
1868i915_gem_flush(struct drm_device *dev,
1869 uint32_t invalidate_domains,
1870 uint32_t flush_domains)
1871{
1872 drm_i915_private_t *dev_priv = dev->dev_private;
1873 if (flush_domains & I915_GEM_DOMAIN_CPU)
1874 drm_agp_chipset_flush(dev);
1875 dev_priv->render_ring.flush(dev, &dev_priv->render_ring,
1876 invalidate_domains,
1877 flush_domains);
d1b851fc
ZN
1878
1879 if (HAS_BSD(dev))
1880 dev_priv->bsd_ring.flush(dev, &dev_priv->bsd_ring,
1881 invalidate_domains,
1882 flush_domains);
8187a2b7
ZN
1883}
1884
673a394b
EA
1885/**
1886 * Ensures that all rendering to the object has completed and the object is
1887 * safe to unbind from the GTT or access from the CPU.
1888 */
1889static int
1890i915_gem_object_wait_rendering(struct drm_gem_object *obj)
1891{
1892 struct drm_device *dev = obj->dev;
23010e43 1893 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
673a394b
EA
1894 int ret;
1895
e47c68e9
EA
1896 /* This function only exists to support waiting for existing rendering,
1897 * not for emitting required flushes.
673a394b 1898 */
e47c68e9 1899 BUG_ON((obj->write_domain & I915_GEM_GPU_DOMAINS) != 0);
673a394b
EA
1900
1901 /* If there is rendering queued on the buffer being evicted, wait for
1902 * it.
1903 */
1904 if (obj_priv->active) {
1905#if WATCH_BUF
1906 DRM_INFO("%s: object %p wait for seqno %08x\n",
1907 __func__, obj, obj_priv->last_rendering_seqno);
1908#endif
852835f3
ZN
1909 ret = i915_wait_request(dev,
1910 obj_priv->last_rendering_seqno, obj_priv->ring);
673a394b
EA
1911 if (ret != 0)
1912 return ret;
1913 }
1914
1915 return 0;
1916}
1917
1918/**
1919 * Unbinds an object from the GTT aperture.
1920 */
0f973f27 1921int
673a394b
EA
1922i915_gem_object_unbind(struct drm_gem_object *obj)
1923{
1924 struct drm_device *dev = obj->dev;
4a87b8ca 1925 drm_i915_private_t *dev_priv = dev->dev_private;
23010e43 1926 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
673a394b
EA
1927 int ret = 0;
1928
1929#if WATCH_BUF
1930 DRM_INFO("%s:%d %p\n", __func__, __LINE__, obj);
1931 DRM_INFO("gtt_space %p\n", obj_priv->gtt_space);
1932#endif
1933 if (obj_priv->gtt_space == NULL)
1934 return 0;
1935
1936 if (obj_priv->pin_count != 0) {
1937 DRM_ERROR("Attempting to unbind pinned buffer\n");
1938 return -EINVAL;
1939 }
1940
5323fd04
EA
1941 /* blow away mappings if mapped through GTT */
1942 i915_gem_release_mmap(obj);
1943
673a394b
EA
1944 /* Move the object to the CPU domain to ensure that
1945 * any possible CPU writes while it's not in the GTT
1946 * are flushed when we go to remap it. This will
1947 * also ensure that all pending GPU writes are finished
1948 * before we unbind.
1949 */
e47c68e9 1950 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
8dc1775d 1951 if (ret == -ERESTARTSYS)
673a394b 1952 return ret;
8dc1775d
CW
1953 /* Continue on if we fail due to EIO, the GPU is hung so we
1954 * should be safe and we need to cleanup or else we might
1955 * cause memory corruption through use-after-free.
1956 */
673a394b 1957
5323fd04
EA
1958 BUG_ON(obj_priv->active);
1959
96b47b65
DV
1960 /* release the fence reg _after_ flushing */
1961 if (obj_priv->fence_reg != I915_FENCE_REG_NONE)
1962 i915_gem_clear_fence_reg(obj);
1963
673a394b
EA
1964 if (obj_priv->agp_mem != NULL) {
1965 drm_unbind_agp(obj_priv->agp_mem);
1966 drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE);
1967 obj_priv->agp_mem = NULL;
1968 }
1969
856fa198 1970 i915_gem_object_put_pages(obj);
a32808c0 1971 BUG_ON(obj_priv->pages_refcount);
673a394b
EA
1972
1973 if (obj_priv->gtt_space) {
1974 atomic_dec(&dev->gtt_count);
1975 atomic_sub(obj->size, &dev->gtt_memory);
1976
1977 drm_mm_put_block(obj_priv->gtt_space);
1978 obj_priv->gtt_space = NULL;
1979 }
1980
1981 /* Remove ourselves from the LRU list if present. */
4a87b8ca 1982 spin_lock(&dev_priv->mm.active_list_lock);
673a394b
EA
1983 if (!list_empty(&obj_priv->list))
1984 list_del_init(&obj_priv->list);
4a87b8ca 1985 spin_unlock(&dev_priv->mm.active_list_lock);
673a394b 1986
963b4836
CW
1987 if (i915_gem_object_is_purgeable(obj_priv))
1988 i915_gem_object_truncate(obj);
1989
1c5d22f7
CW
1990 trace_i915_gem_object_unbind(obj);
1991
8dc1775d 1992 return ret;
673a394b
EA
1993}
1994
b47eb4a2 1995int
4df2faf4
DV
1996i915_gpu_idle(struct drm_device *dev)
1997{
1998 drm_i915_private_t *dev_priv = dev->dev_private;
1999 bool lists_empty;
d1b851fc 2000 uint32_t seqno1, seqno2;
852835f3 2001 int ret;
4df2faf4
DV
2002
2003 spin_lock(&dev_priv->mm.active_list_lock);
d1b851fc
ZN
2004 lists_empty = (list_empty(&dev_priv->mm.flushing_list) &&
2005 list_empty(&dev_priv->render_ring.active_list) &&
2006 (!HAS_BSD(dev) ||
2007 list_empty(&dev_priv->bsd_ring.active_list)));
4df2faf4
DV
2008 spin_unlock(&dev_priv->mm.active_list_lock);
2009
2010 if (lists_empty)
2011 return 0;
2012
2013 /* Flush everything onto the inactive list. */
2014 i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
d1b851fc 2015 seqno1 = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS,
852835f3 2016 &dev_priv->render_ring);
d1b851fc 2017 if (seqno1 == 0)
4df2faf4 2018 return -ENOMEM;
d1b851fc
ZN
2019 ret = i915_wait_request(dev, seqno1, &dev_priv->render_ring);
2020
2021 if (HAS_BSD(dev)) {
2022 seqno2 = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS,
2023 &dev_priv->bsd_ring);
2024 if (seqno2 == 0)
2025 return -ENOMEM;
2026
2027 ret = i915_wait_request(dev, seqno2, &dev_priv->bsd_ring);
2028 if (ret)
2029 return ret;
2030 }
2031
4df2faf4 2032
852835f3 2033 return ret;
4df2faf4
DV
2034}
2035
6911a9b8 2036int
4bdadb97
CW
2037i915_gem_object_get_pages(struct drm_gem_object *obj,
2038 gfp_t gfpmask)
673a394b 2039{
23010e43 2040 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
673a394b
EA
2041 int page_count, i;
2042 struct address_space *mapping;
2043 struct inode *inode;
2044 struct page *page;
673a394b 2045
778c3544
DV
2046 BUG_ON(obj_priv->pages_refcount
2047 == DRM_I915_GEM_OBJECT_MAX_PAGES_REFCOUNT);
2048
856fa198 2049 if (obj_priv->pages_refcount++ != 0)
673a394b
EA
2050 return 0;
2051
2052 /* Get the list of pages out of our struct file. They'll be pinned
2053 * at this point until we release them.
2054 */
2055 page_count = obj->size / PAGE_SIZE;
856fa198 2056 BUG_ON(obj_priv->pages != NULL);
8e7d2b2c 2057 obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *));
856fa198 2058 if (obj_priv->pages == NULL) {
856fa198 2059 obj_priv->pages_refcount--;
673a394b
EA
2060 return -ENOMEM;
2061 }
2062
2063 inode = obj->filp->f_path.dentry->d_inode;
2064 mapping = inode->i_mapping;
2065 for (i = 0; i < page_count; i++) {
4bdadb97 2066 page = read_cache_page_gfp(mapping, i,
985b823b 2067 GFP_HIGHUSER |
4bdadb97 2068 __GFP_COLD |
cd9f040d 2069 __GFP_RECLAIMABLE |
4bdadb97 2070 gfpmask);
1f2b1013
CW
2071 if (IS_ERR(page))
2072 goto err_pages;
2073
856fa198 2074 obj_priv->pages[i] = page;
673a394b 2075 }
280b713b
EA
2076
2077 if (obj_priv->tiling_mode != I915_TILING_NONE)
2078 i915_gem_object_do_bit_17_swizzle(obj);
2079
673a394b 2080 return 0;
1f2b1013
CW
2081
2082err_pages:
2083 while (i--)
2084 page_cache_release(obj_priv->pages[i]);
2085
2086 drm_free_large(obj_priv->pages);
2087 obj_priv->pages = NULL;
2088 obj_priv->pages_refcount--;
2089 return PTR_ERR(page);
673a394b
EA
2090}
2091
4e901fdc
EA
2092static void sandybridge_write_fence_reg(struct drm_i915_fence_reg *reg)
2093{
2094 struct drm_gem_object *obj = reg->obj;
2095 struct drm_device *dev = obj->dev;
2096 drm_i915_private_t *dev_priv = dev->dev_private;
23010e43 2097 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4e901fdc
EA
2098 int regnum = obj_priv->fence_reg;
2099 uint64_t val;
2100
2101 val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) &
2102 0xfffff000) << 32;
2103 val |= obj_priv->gtt_offset & 0xfffff000;
2104 val |= (uint64_t)((obj_priv->stride / 128) - 1) <<
2105 SANDYBRIDGE_FENCE_PITCH_SHIFT;
2106
2107 if (obj_priv->tiling_mode == I915_TILING_Y)
2108 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2109 val |= I965_FENCE_REG_VALID;
2110
2111 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (regnum * 8), val);
2112}
2113
de151cf6
JB
2114static void i965_write_fence_reg(struct drm_i915_fence_reg *reg)
2115{
2116 struct drm_gem_object *obj = reg->obj;
2117 struct drm_device *dev = obj->dev;
2118 drm_i915_private_t *dev_priv = dev->dev_private;
23010e43 2119 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
de151cf6
JB
2120 int regnum = obj_priv->fence_reg;
2121 uint64_t val;
2122
2123 val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) &
2124 0xfffff000) << 32;
2125 val |= obj_priv->gtt_offset & 0xfffff000;
2126 val |= ((obj_priv->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
2127 if (obj_priv->tiling_mode == I915_TILING_Y)
2128 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2129 val |= I965_FENCE_REG_VALID;
2130
2131 I915_WRITE64(FENCE_REG_965_0 + (regnum * 8), val);
2132}
2133
2134static void i915_write_fence_reg(struct drm_i915_fence_reg *reg)
2135{
2136 struct drm_gem_object *obj = reg->obj;
2137 struct drm_device *dev = obj->dev;
2138 drm_i915_private_t *dev_priv = dev->dev_private;
23010e43 2139 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
de151cf6 2140 int regnum = obj_priv->fence_reg;
0f973f27 2141 int tile_width;
dc529a4f 2142 uint32_t fence_reg, val;
de151cf6
JB
2143 uint32_t pitch_val;
2144
2145 if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) ||
2146 (obj_priv->gtt_offset & (obj->size - 1))) {
f06da264 2147 WARN(1, "%s: object 0x%08x not 1M or size (0x%zx) aligned\n",
0f973f27 2148 __func__, obj_priv->gtt_offset, obj->size);
de151cf6
JB
2149 return;
2150 }
2151
0f973f27
JB
2152 if (obj_priv->tiling_mode == I915_TILING_Y &&
2153 HAS_128_BYTE_Y_TILING(dev))
2154 tile_width = 128;
de151cf6 2155 else
0f973f27
JB
2156 tile_width = 512;
2157
2158 /* Note: pitch better be a power of two tile widths */
2159 pitch_val = obj_priv->stride / tile_width;
2160 pitch_val = ffs(pitch_val) - 1;
de151cf6 2161
c36a2a6d
DV
2162 if (obj_priv->tiling_mode == I915_TILING_Y &&
2163 HAS_128_BYTE_Y_TILING(dev))
2164 WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL);
2165 else
2166 WARN_ON(pitch_val > I915_FENCE_MAX_PITCH_VAL);
2167
de151cf6
JB
2168 val = obj_priv->gtt_offset;
2169 if (obj_priv->tiling_mode == I915_TILING_Y)
2170 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2171 val |= I915_FENCE_SIZE_BITS(obj->size);
2172 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2173 val |= I830_FENCE_REG_VALID;
2174
dc529a4f
EA
2175 if (regnum < 8)
2176 fence_reg = FENCE_REG_830_0 + (regnum * 4);
2177 else
2178 fence_reg = FENCE_REG_945_8 + ((regnum - 8) * 4);
2179 I915_WRITE(fence_reg, val);
de151cf6
JB
2180}
2181
2182static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
2183{
2184 struct drm_gem_object *obj = reg->obj;
2185 struct drm_device *dev = obj->dev;
2186 drm_i915_private_t *dev_priv = dev->dev_private;
23010e43 2187 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
de151cf6
JB
2188 int regnum = obj_priv->fence_reg;
2189 uint32_t val;
2190 uint32_t pitch_val;
8d7773a3 2191 uint32_t fence_size_bits;
de151cf6 2192
8d7773a3 2193 if ((obj_priv->gtt_offset & ~I830_FENCE_START_MASK) ||
de151cf6 2194 (obj_priv->gtt_offset & (obj->size - 1))) {
8d7773a3 2195 WARN(1, "%s: object 0x%08x not 512K or size aligned\n",
0f973f27 2196 __func__, obj_priv->gtt_offset);
de151cf6
JB
2197 return;
2198 }
2199
e76a16de
EA
2200 pitch_val = obj_priv->stride / 128;
2201 pitch_val = ffs(pitch_val) - 1;
2202 WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL);
2203
de151cf6
JB
2204 val = obj_priv->gtt_offset;
2205 if (obj_priv->tiling_mode == I915_TILING_Y)
2206 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
8d7773a3
DV
2207 fence_size_bits = I830_FENCE_SIZE_BITS(obj->size);
2208 WARN_ON(fence_size_bits & ~0x00000f00);
2209 val |= fence_size_bits;
de151cf6
JB
2210 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2211 val |= I830_FENCE_REG_VALID;
2212
2213 I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val);
de151cf6
JB
2214}
2215
ae3db24a
DV
2216static int i915_find_fence_reg(struct drm_device *dev)
2217{
2218 struct drm_i915_fence_reg *reg = NULL;
2219 struct drm_i915_gem_object *obj_priv = NULL;
2220 struct drm_i915_private *dev_priv = dev->dev_private;
2221 struct drm_gem_object *obj = NULL;
2222 int i, avail, ret;
2223
2224 /* First try to find a free reg */
2225 avail = 0;
2226 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
2227 reg = &dev_priv->fence_regs[i];
2228 if (!reg->obj)
2229 return i;
2230
23010e43 2231 obj_priv = to_intel_bo(reg->obj);
ae3db24a
DV
2232 if (!obj_priv->pin_count)
2233 avail++;
2234 }
2235
2236 if (avail == 0)
2237 return -ENOSPC;
2238
2239 /* None available, try to steal one or wait for a user to finish */
2240 i = I915_FENCE_REG_NONE;
007cc8ac
DV
2241 list_for_each_entry(reg, &dev_priv->mm.fence_list,
2242 lru_list) {
2243 obj = reg->obj;
2244 obj_priv = to_intel_bo(obj);
ae3db24a
DV
2245
2246 if (obj_priv->pin_count)
2247 continue;
2248
2249 /* found one! */
2250 i = obj_priv->fence_reg;
2251 break;
2252 }
2253
2254 BUG_ON(i == I915_FENCE_REG_NONE);
2255
2256 /* We only have a reference on obj from the active list. put_fence_reg
2257 * might drop that one, causing a use-after-free in it. So hold a
2258 * private reference to obj like the other callers of put_fence_reg
2259 * (set_tiling ioctl) do. */
2260 drm_gem_object_reference(obj);
2261 ret = i915_gem_object_put_fence_reg(obj);
2262 drm_gem_object_unreference(obj);
2263 if (ret != 0)
2264 return ret;
2265
2266 return i;
2267}
2268
de151cf6
JB
2269/**
2270 * i915_gem_object_get_fence_reg - set up a fence reg for an object
2271 * @obj: object to map through a fence reg
2272 *
2273 * When mapping objects through the GTT, userspace wants to be able to write
2274 * to them without having to worry about swizzling if the object is tiled.
2275 *
2276 * This function walks the fence regs looking for a free one for @obj,
2277 * stealing one if it can't find any.
2278 *
2279 * It then sets up the reg based on the object's properties: address, pitch
2280 * and tiling format.
2281 */
8c4b8c3f
CW
2282int
2283i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
de151cf6
JB
2284{
2285 struct drm_device *dev = obj->dev;
79e53945 2286 struct drm_i915_private *dev_priv = dev->dev_private;
23010e43 2287 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
de151cf6 2288 struct drm_i915_fence_reg *reg = NULL;
ae3db24a 2289 int ret;
de151cf6 2290
a09ba7fa
EA
2291 /* Just update our place in the LRU if our fence is getting used. */
2292 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
007cc8ac
DV
2293 reg = &dev_priv->fence_regs[obj_priv->fence_reg];
2294 list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
a09ba7fa
EA
2295 return 0;
2296 }
2297
de151cf6
JB
2298 switch (obj_priv->tiling_mode) {
2299 case I915_TILING_NONE:
2300 WARN(1, "allocating a fence for non-tiled object?\n");
2301 break;
2302 case I915_TILING_X:
0f973f27
JB
2303 if (!obj_priv->stride)
2304 return -EINVAL;
2305 WARN((obj_priv->stride & (512 - 1)),
2306 "object 0x%08x is X tiled but has non-512B pitch\n",
2307 obj_priv->gtt_offset);
de151cf6
JB
2308 break;
2309 case I915_TILING_Y:
0f973f27
JB
2310 if (!obj_priv->stride)
2311 return -EINVAL;
2312 WARN((obj_priv->stride & (128 - 1)),
2313 "object 0x%08x is Y tiled but has non-128B pitch\n",
2314 obj_priv->gtt_offset);
de151cf6
JB
2315 break;
2316 }
2317
ae3db24a
DV
2318 ret = i915_find_fence_reg(dev);
2319 if (ret < 0)
2320 return ret;
de151cf6 2321
ae3db24a
DV
2322 obj_priv->fence_reg = ret;
2323 reg = &dev_priv->fence_regs[obj_priv->fence_reg];
007cc8ac 2324 list_add_tail(&reg->lru_list, &dev_priv->mm.fence_list);
a09ba7fa 2325
de151cf6
JB
2326 reg->obj = obj;
2327
4e901fdc
EA
2328 if (IS_GEN6(dev))
2329 sandybridge_write_fence_reg(reg);
2330 else if (IS_I965G(dev))
de151cf6
JB
2331 i965_write_fence_reg(reg);
2332 else if (IS_I9XX(dev))
2333 i915_write_fence_reg(reg);
2334 else
2335 i830_write_fence_reg(reg);
d9ddcb96 2336
ae3db24a
DV
2337 trace_i915_gem_object_get_fence(obj, obj_priv->fence_reg,
2338 obj_priv->tiling_mode);
1c5d22f7 2339
d9ddcb96 2340 return 0;
de151cf6
JB
2341}
2342
2343/**
2344 * i915_gem_clear_fence_reg - clear out fence register info
2345 * @obj: object to clear
2346 *
2347 * Zeroes out the fence register itself and clears out the associated
2348 * data structures in dev_priv and obj_priv.
2349 */
2350static void
2351i915_gem_clear_fence_reg(struct drm_gem_object *obj)
2352{
2353 struct drm_device *dev = obj->dev;
79e53945 2354 drm_i915_private_t *dev_priv = dev->dev_private;
23010e43 2355 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
007cc8ac
DV
2356 struct drm_i915_fence_reg *reg =
2357 &dev_priv->fence_regs[obj_priv->fence_reg];
de151cf6 2358
4e901fdc
EA
2359 if (IS_GEN6(dev)) {
2360 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 +
2361 (obj_priv->fence_reg * 8), 0);
2362 } else if (IS_I965G(dev)) {
de151cf6 2363 I915_WRITE64(FENCE_REG_965_0 + (obj_priv->fence_reg * 8), 0);
4e901fdc 2364 } else {
dc529a4f
EA
2365 uint32_t fence_reg;
2366
2367 if (obj_priv->fence_reg < 8)
2368 fence_reg = FENCE_REG_830_0 + obj_priv->fence_reg * 4;
2369 else
2370 fence_reg = FENCE_REG_945_8 + (obj_priv->fence_reg -
2371 8) * 4;
2372
2373 I915_WRITE(fence_reg, 0);
2374 }
de151cf6 2375
007cc8ac 2376 reg->obj = NULL;
de151cf6 2377 obj_priv->fence_reg = I915_FENCE_REG_NONE;
007cc8ac 2378 list_del_init(&reg->lru_list);
de151cf6
JB
2379}
2380
52dc7d32
CW
2381/**
2382 * i915_gem_object_put_fence_reg - waits on outstanding fenced access
2383 * to the buffer to finish, and then resets the fence register.
2384 * @obj: tiled object holding a fence register.
2385 *
2386 * Zeroes out the fence register itself and clears out the associated
2387 * data structures in dev_priv and obj_priv.
2388 */
2389int
2390i915_gem_object_put_fence_reg(struct drm_gem_object *obj)
2391{
2392 struct drm_device *dev = obj->dev;
23010e43 2393 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
52dc7d32
CW
2394
2395 if (obj_priv->fence_reg == I915_FENCE_REG_NONE)
2396 return 0;
2397
10ae9bd2
DV
2398 /* If we've changed tiling, GTT-mappings of the object
2399 * need to re-fault to ensure that the correct fence register
2400 * setup is in place.
2401 */
2402 i915_gem_release_mmap(obj);
2403
52dc7d32
CW
2404 /* On the i915, GPU access to tiled buffers is via a fence,
2405 * therefore we must wait for any outstanding access to complete
2406 * before clearing the fence.
2407 */
2408 if (!IS_I965G(dev)) {
2409 int ret;
2410
2dafb1e0
CW
2411 ret = i915_gem_object_flush_gpu_write_domain(obj);
2412 if (ret != 0)
2413 return ret;
2414
52dc7d32
CW
2415 ret = i915_gem_object_wait_rendering(obj);
2416 if (ret != 0)
2417 return ret;
2418 }
2419
4a726612 2420 i915_gem_object_flush_gtt_write_domain(obj);
52dc7d32
CW
2421 i915_gem_clear_fence_reg (obj);
2422
2423 return 0;
2424}
2425
673a394b
EA
2426/**
2427 * Finds free space in the GTT aperture and binds the object there.
2428 */
2429static int
2430i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
2431{
2432 struct drm_device *dev = obj->dev;
2433 drm_i915_private_t *dev_priv = dev->dev_private;
23010e43 2434 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
673a394b 2435 struct drm_mm_node *free_space;
4bdadb97 2436 gfp_t gfpmask = __GFP_NORETRY | __GFP_NOWARN;
07f73f69 2437 int ret;
673a394b 2438
bb6baf76 2439 if (obj_priv->madv != I915_MADV_WILLNEED) {
3ef94daa
CW
2440 DRM_ERROR("Attempting to bind a purgeable object\n");
2441 return -EINVAL;
2442 }
2443
673a394b 2444 if (alignment == 0)
0f973f27 2445 alignment = i915_gem_get_gtt_alignment(obj);
8d7773a3 2446 if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) {
673a394b
EA
2447 DRM_ERROR("Invalid object alignment requested %u\n", alignment);
2448 return -EINVAL;
2449 }
2450
654fc607
CW
2451 /* If the object is bigger than the entire aperture, reject it early
2452 * before evicting everything in a vain attempt to find space.
2453 */
2454 if (obj->size > dev->gtt_total) {
2455 DRM_ERROR("Attempting to bind an object larger than the aperture\n");
2456 return -E2BIG;
2457 }
2458
673a394b
EA
2459 search_free:
2460 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
2461 obj->size, alignment, 0);
2462 if (free_space != NULL) {
2463 obj_priv->gtt_space = drm_mm_get_block(free_space, obj->size,
2464 alignment);
db3307a9 2465 if (obj_priv->gtt_space != NULL)
673a394b 2466 obj_priv->gtt_offset = obj_priv->gtt_space->start;
673a394b
EA
2467 }
2468 if (obj_priv->gtt_space == NULL) {
2469 /* If the gtt is empty and we're still having trouble
2470 * fitting our object in, we're out of memory.
2471 */
2472#if WATCH_LRU
2473 DRM_INFO("%s: GTT full, evicting something\n", __func__);
2474#endif
0108a3ed 2475 ret = i915_gem_evict_something(dev, obj->size, alignment);
9731129c 2476 if (ret)
673a394b 2477 return ret;
9731129c 2478
673a394b
EA
2479 goto search_free;
2480 }
2481
2482#if WATCH_BUF
cfd43c02 2483 DRM_INFO("Binding object of size %zd at 0x%08x\n",
673a394b
EA
2484 obj->size, obj_priv->gtt_offset);
2485#endif
4bdadb97 2486 ret = i915_gem_object_get_pages(obj, gfpmask);
673a394b
EA
2487 if (ret) {
2488 drm_mm_put_block(obj_priv->gtt_space);
2489 obj_priv->gtt_space = NULL;
07f73f69
CW
2490
2491 if (ret == -ENOMEM) {
2492 /* first try to clear up some space from the GTT */
0108a3ed
DV
2493 ret = i915_gem_evict_something(dev, obj->size,
2494 alignment);
07f73f69 2495 if (ret) {
07f73f69 2496 /* now try to shrink everyone else */
4bdadb97
CW
2497 if (gfpmask) {
2498 gfpmask = 0;
2499 goto search_free;
07f73f69
CW
2500 }
2501
2502 return ret;
2503 }
2504
2505 goto search_free;
2506 }
2507
673a394b
EA
2508 return ret;
2509 }
2510
673a394b
EA
2511 /* Create an AGP memory structure pointing at our pages, and bind it
2512 * into the GTT.
2513 */
2514 obj_priv->agp_mem = drm_agp_bind_pages(dev,
856fa198 2515 obj_priv->pages,
07f73f69 2516 obj->size >> PAGE_SHIFT,
ba1eb1d8
KP
2517 obj_priv->gtt_offset,
2518 obj_priv->agp_type);
673a394b 2519 if (obj_priv->agp_mem == NULL) {
856fa198 2520 i915_gem_object_put_pages(obj);
673a394b
EA
2521 drm_mm_put_block(obj_priv->gtt_space);
2522 obj_priv->gtt_space = NULL;
07f73f69 2523
0108a3ed 2524 ret = i915_gem_evict_something(dev, obj->size, alignment);
9731129c 2525 if (ret)
07f73f69 2526 return ret;
07f73f69
CW
2527
2528 goto search_free;
673a394b
EA
2529 }
2530 atomic_inc(&dev->gtt_count);
2531 atomic_add(obj->size, &dev->gtt_memory);
2532
bf1a1092
CW
2533 /* keep track of bounds object by adding it to the inactive list */
2534 list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
2535
673a394b
EA
2536 /* Assert that the object is not currently in any GPU domain. As it
2537 * wasn't in the GTT, there shouldn't be any way it could have been in
2538 * a GPU cache
2539 */
21d509e3
CW
2540 BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
2541 BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
673a394b 2542
1c5d22f7
CW
2543 trace_i915_gem_object_bind(obj, obj_priv->gtt_offset);
2544
673a394b
EA
2545 return 0;
2546}
2547
2548void
2549i915_gem_clflush_object(struct drm_gem_object *obj)
2550{
23010e43 2551 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
673a394b
EA
2552
2553 /* If we don't have a page list set up, then we're not pinned
2554 * to GPU, and we can ignore the cache flush because it'll happen
2555 * again at bind time.
2556 */
856fa198 2557 if (obj_priv->pages == NULL)
673a394b
EA
2558 return;
2559
1c5d22f7 2560 trace_i915_gem_object_clflush(obj);
cfa16a0d 2561
856fa198 2562 drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE);
673a394b
EA
2563}
2564
e47c68e9 2565/** Flushes any GPU write domain for the object if it's dirty. */
2dafb1e0 2566static int
e47c68e9
EA
2567i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj)
2568{
2569 struct drm_device *dev = obj->dev;
1c5d22f7 2570 uint32_t old_write_domain;
852835f3 2571 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
e47c68e9
EA
2572
2573 if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
2dafb1e0 2574 return 0;
e47c68e9
EA
2575
2576 /* Queue the GPU write cache flushing we need. */
1c5d22f7 2577 old_write_domain = obj->write_domain;
e47c68e9 2578 i915_gem_flush(dev, 0, obj->write_domain);
2dafb1e0
CW
2579 if (i915_add_request(dev, NULL, obj->write_domain, obj_priv->ring) == 0)
2580 return -ENOMEM;
1c5d22f7
CW
2581
2582 trace_i915_gem_object_change_domain(obj,
2583 obj->read_domains,
2584 old_write_domain);
2dafb1e0 2585 return 0;
e47c68e9
EA
2586}
2587
2588/** Flushes the GTT write domain for the object if it's dirty. */
2589static void
2590i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj)
2591{
1c5d22f7
CW
2592 uint32_t old_write_domain;
2593
e47c68e9
EA
2594 if (obj->write_domain != I915_GEM_DOMAIN_GTT)
2595 return;
2596
2597 /* No actual flushing is required for the GTT write domain. Writes
2598 * to it immediately go to main memory as far as we know, so there's
2599 * no chipset flush. It also doesn't land in render cache.
2600 */
1c5d22f7 2601 old_write_domain = obj->write_domain;
e47c68e9 2602 obj->write_domain = 0;
1c5d22f7
CW
2603
2604 trace_i915_gem_object_change_domain(obj,
2605 obj->read_domains,
2606 old_write_domain);
e47c68e9
EA
2607}
2608
2609/** Flushes the CPU write domain for the object if it's dirty. */
2610static void
2611i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj)
2612{
2613 struct drm_device *dev = obj->dev;
1c5d22f7 2614 uint32_t old_write_domain;
e47c68e9
EA
2615
2616 if (obj->write_domain != I915_GEM_DOMAIN_CPU)
2617 return;
2618
2619 i915_gem_clflush_object(obj);
2620 drm_agp_chipset_flush(dev);
1c5d22f7 2621 old_write_domain = obj->write_domain;
e47c68e9 2622 obj->write_domain = 0;
1c5d22f7
CW
2623
2624 trace_i915_gem_object_change_domain(obj,
2625 obj->read_domains,
2626 old_write_domain);
e47c68e9
EA
2627}
2628
2dafb1e0 2629int
6b95a207
KH
2630i915_gem_object_flush_write_domain(struct drm_gem_object *obj)
2631{
2dafb1e0
CW
2632 int ret = 0;
2633
6b95a207
KH
2634 switch (obj->write_domain) {
2635 case I915_GEM_DOMAIN_GTT:
2636 i915_gem_object_flush_gtt_write_domain(obj);
2637 break;
2638 case I915_GEM_DOMAIN_CPU:
2639 i915_gem_object_flush_cpu_write_domain(obj);
2640 break;
2641 default:
2dafb1e0 2642 ret = i915_gem_object_flush_gpu_write_domain(obj);
6b95a207
KH
2643 break;
2644 }
2dafb1e0
CW
2645
2646 return ret;
6b95a207
KH
2647}
2648
2ef7eeaa
EA
2649/**
2650 * Moves a single object to the GTT read, and possibly write domain.
2651 *
2652 * This function returns when the move is complete, including waiting on
2653 * flushes to occur.
2654 */
79e53945 2655int
2ef7eeaa
EA
2656i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
2657{
23010e43 2658 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1c5d22f7 2659 uint32_t old_write_domain, old_read_domains;
e47c68e9 2660 int ret;
2ef7eeaa 2661
02354392
EA
2662 /* Not valid to be called on unbound objects. */
2663 if (obj_priv->gtt_space == NULL)
2664 return -EINVAL;
2665
2dafb1e0
CW
2666 ret = i915_gem_object_flush_gpu_write_domain(obj);
2667 if (ret != 0)
2668 return ret;
2669
e47c68e9
EA
2670 /* Wait on any GPU rendering and flushing to occur. */
2671 ret = i915_gem_object_wait_rendering(obj);
2672 if (ret != 0)
2673 return ret;
2674
1c5d22f7
CW
2675 old_write_domain = obj->write_domain;
2676 old_read_domains = obj->read_domains;
2677
e47c68e9
EA
2678 /* If we're writing through the GTT domain, then CPU and GPU caches
2679 * will need to be invalidated at next use.
2ef7eeaa 2680 */
e47c68e9
EA
2681 if (write)
2682 obj->read_domains &= I915_GEM_DOMAIN_GTT;
2ef7eeaa 2683
e47c68e9 2684 i915_gem_object_flush_cpu_write_domain(obj);
2ef7eeaa 2685
e47c68e9
EA
2686 /* It should now be out of any other write domains, and we can update
2687 * the domain values for our changes.
2688 */
2689 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
2690 obj->read_domains |= I915_GEM_DOMAIN_GTT;
2691 if (write) {
2692 obj->write_domain = I915_GEM_DOMAIN_GTT;
2693 obj_priv->dirty = 1;
2ef7eeaa
EA
2694 }
2695
1c5d22f7
CW
2696 trace_i915_gem_object_change_domain(obj,
2697 old_read_domains,
2698 old_write_domain);
2699
e47c68e9
EA
2700 return 0;
2701}
2702
b9241ea3
ZW
2703/*
2704 * Prepare buffer for display plane. Use uninterruptible for possible flush
2705 * wait, as in modesetting process we're not supposed to be interrupted.
2706 */
2707int
2708i915_gem_object_set_to_display_plane(struct drm_gem_object *obj)
2709{
2710 struct drm_device *dev = obj->dev;
23010e43 2711 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
b9241ea3
ZW
2712 uint32_t old_write_domain, old_read_domains;
2713 int ret;
2714
2715 /* Not valid to be called on unbound objects. */
2716 if (obj_priv->gtt_space == NULL)
2717 return -EINVAL;
2718
2dafb1e0
CW
2719 ret = i915_gem_object_flush_gpu_write_domain(obj);
2720 if (ret)
2721 return ret;
b9241ea3
ZW
2722
2723 /* Wait on any GPU rendering and flushing to occur. */
2724 if (obj_priv->active) {
2725#if WATCH_BUF
2726 DRM_INFO("%s: object %p wait for seqno %08x\n",
2727 __func__, obj, obj_priv->last_rendering_seqno);
2728#endif
852835f3
ZN
2729 ret = i915_do_wait_request(dev,
2730 obj_priv->last_rendering_seqno,
2731 0,
2732 obj_priv->ring);
b9241ea3
ZW
2733 if (ret != 0)
2734 return ret;
2735 }
2736
b118c1e3
CW
2737 i915_gem_object_flush_cpu_write_domain(obj);
2738
b9241ea3
ZW
2739 old_write_domain = obj->write_domain;
2740 old_read_domains = obj->read_domains;
2741
b9241ea3
ZW
2742 /* It should now be out of any other write domains, and we can update
2743 * the domain values for our changes.
2744 */
2745 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
b118c1e3 2746 obj->read_domains = I915_GEM_DOMAIN_GTT;
b9241ea3
ZW
2747 obj->write_domain = I915_GEM_DOMAIN_GTT;
2748 obj_priv->dirty = 1;
2749
2750 trace_i915_gem_object_change_domain(obj,
2751 old_read_domains,
2752 old_write_domain);
2753
2754 return 0;
2755}
2756
e47c68e9
EA
2757/**
2758 * Moves a single object to the CPU read, and possibly write domain.
2759 *
2760 * This function returns when the move is complete, including waiting on
2761 * flushes to occur.
2762 */
2763static int
2764i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
2765{
1c5d22f7 2766 uint32_t old_write_domain, old_read_domains;
e47c68e9
EA
2767 int ret;
2768
2dafb1e0
CW
2769 ret = i915_gem_object_flush_gpu_write_domain(obj);
2770 if (ret)
2771 return ret;
2772
2ef7eeaa 2773 /* Wait on any GPU rendering and flushing to occur. */
e47c68e9
EA
2774 ret = i915_gem_object_wait_rendering(obj);
2775 if (ret != 0)
2776 return ret;
2ef7eeaa 2777
e47c68e9 2778 i915_gem_object_flush_gtt_write_domain(obj);
2ef7eeaa 2779
e47c68e9
EA
2780 /* If we have a partially-valid cache of the object in the CPU,
2781 * finish invalidating it and free the per-page flags.
2ef7eeaa 2782 */
e47c68e9 2783 i915_gem_object_set_to_full_cpu_read_domain(obj);
2ef7eeaa 2784
1c5d22f7
CW
2785 old_write_domain = obj->write_domain;
2786 old_read_domains = obj->read_domains;
2787
e47c68e9
EA
2788 /* Flush the CPU cache if it's still invalid. */
2789 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
2ef7eeaa 2790 i915_gem_clflush_object(obj);
2ef7eeaa 2791
e47c68e9 2792 obj->read_domains |= I915_GEM_DOMAIN_CPU;
2ef7eeaa
EA
2793 }
2794
2795 /* It should now be out of any other write domains, and we can update
2796 * the domain values for our changes.
2797 */
e47c68e9
EA
2798 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
2799
2800 /* If we're writing through the CPU, then the GPU read domains will
2801 * need to be invalidated at next use.
2802 */
2803 if (write) {
2804 obj->read_domains &= I915_GEM_DOMAIN_CPU;
2805 obj->write_domain = I915_GEM_DOMAIN_CPU;
2806 }
2ef7eeaa 2807
1c5d22f7
CW
2808 trace_i915_gem_object_change_domain(obj,
2809 old_read_domains,
2810 old_write_domain);
2811
2ef7eeaa
EA
2812 return 0;
2813}
2814
673a394b
EA
2815/*
2816 * Set the next domain for the specified object. This
2817 * may not actually perform the necessary flushing/invaliding though,
2818 * as that may want to be batched with other set_domain operations
2819 *
2820 * This is (we hope) the only really tricky part of gem. The goal
2821 * is fairly simple -- track which caches hold bits of the object
2822 * and make sure they remain coherent. A few concrete examples may
2823 * help to explain how it works. For shorthand, we use the notation
2824 * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
2825 * a pair of read and write domain masks.
2826 *
2827 * Case 1: the batch buffer
2828 *
2829 * 1. Allocated
2830 * 2. Written by CPU
2831 * 3. Mapped to GTT
2832 * 4. Read by GPU
2833 * 5. Unmapped from GTT
2834 * 6. Freed
2835 *
2836 * Let's take these a step at a time
2837 *
2838 * 1. Allocated
2839 * Pages allocated from the kernel may still have
2840 * cache contents, so we set them to (CPU, CPU) always.
2841 * 2. Written by CPU (using pwrite)
2842 * The pwrite function calls set_domain (CPU, CPU) and
2843 * this function does nothing (as nothing changes)
2844 * 3. Mapped by GTT
2845 * This function asserts that the object is not
2846 * currently in any GPU-based read or write domains
2847 * 4. Read by GPU
2848 * i915_gem_execbuffer calls set_domain (COMMAND, 0).
2849 * As write_domain is zero, this function adds in the
2850 * current read domains (CPU+COMMAND, 0).
2851 * flush_domains is set to CPU.
2852 * invalidate_domains is set to COMMAND
2853 * clflush is run to get data out of the CPU caches
2854 * then i915_dev_set_domain calls i915_gem_flush to
2855 * emit an MI_FLUSH and drm_agp_chipset_flush
2856 * 5. Unmapped from GTT
2857 * i915_gem_object_unbind calls set_domain (CPU, CPU)
2858 * flush_domains and invalidate_domains end up both zero
2859 * so no flushing/invalidating happens
2860 * 6. Freed
2861 * yay, done
2862 *
2863 * Case 2: The shared render buffer
2864 *
2865 * 1. Allocated
2866 * 2. Mapped to GTT
2867 * 3. Read/written by GPU
2868 * 4. set_domain to (CPU,CPU)
2869 * 5. Read/written by CPU
2870 * 6. Read/written by GPU
2871 *
2872 * 1. Allocated
2873 * Same as last example, (CPU, CPU)
2874 * 2. Mapped to GTT
2875 * Nothing changes (assertions find that it is not in the GPU)
2876 * 3. Read/written by GPU
2877 * execbuffer calls set_domain (RENDER, RENDER)
2878 * flush_domains gets CPU
2879 * invalidate_domains gets GPU
2880 * clflush (obj)
2881 * MI_FLUSH and drm_agp_chipset_flush
2882 * 4. set_domain (CPU, CPU)
2883 * flush_domains gets GPU
2884 * invalidate_domains gets CPU
2885 * wait_rendering (obj) to make sure all drawing is complete.
2886 * This will include an MI_FLUSH to get the data from GPU
2887 * to memory
2888 * clflush (obj) to invalidate the CPU cache
2889 * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
2890 * 5. Read/written by CPU
2891 * cache lines are loaded and dirtied
2892 * 6. Read written by GPU
2893 * Same as last GPU access
2894 *
2895 * Case 3: The constant buffer
2896 *
2897 * 1. Allocated
2898 * 2. Written by CPU
2899 * 3. Read by GPU
2900 * 4. Updated (written) by CPU again
2901 * 5. Read by GPU
2902 *
2903 * 1. Allocated
2904 * (CPU, CPU)
2905 * 2. Written by CPU
2906 * (CPU, CPU)
2907 * 3. Read by GPU
2908 * (CPU+RENDER, 0)
2909 * flush_domains = CPU
2910 * invalidate_domains = RENDER
2911 * clflush (obj)
2912 * MI_FLUSH
2913 * drm_agp_chipset_flush
2914 * 4. Updated (written) by CPU again
2915 * (CPU, CPU)
2916 * flush_domains = 0 (no previous write domain)
2917 * invalidate_domains = 0 (no new read domains)
2918 * 5. Read by GPU
2919 * (CPU+RENDER, 0)
2920 * flush_domains = CPU
2921 * invalidate_domains = RENDER
2922 * clflush (obj)
2923 * MI_FLUSH
2924 * drm_agp_chipset_flush
2925 */
c0d90829 2926static void
8b0e378a 2927i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj)
673a394b
EA
2928{
2929 struct drm_device *dev = obj->dev;
88f356b7 2930 drm_i915_private_t *dev_priv = dev->dev_private;
23010e43 2931 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
673a394b
EA
2932 uint32_t invalidate_domains = 0;
2933 uint32_t flush_domains = 0;
1c5d22f7 2934 uint32_t old_read_domains;
e47c68e9 2935
8b0e378a
EA
2936 BUG_ON(obj->pending_read_domains & I915_GEM_DOMAIN_CPU);
2937 BUG_ON(obj->pending_write_domain == I915_GEM_DOMAIN_CPU);
673a394b 2938
652c393a
JB
2939 intel_mark_busy(dev, obj);
2940
673a394b
EA
2941#if WATCH_BUF
2942 DRM_INFO("%s: object %p read %08x -> %08x write %08x -> %08x\n",
2943 __func__, obj,
8b0e378a
EA
2944 obj->read_domains, obj->pending_read_domains,
2945 obj->write_domain, obj->pending_write_domain);
673a394b
EA
2946#endif
2947 /*
2948 * If the object isn't moving to a new write domain,
2949 * let the object stay in multiple read domains
2950 */
8b0e378a
EA
2951 if (obj->pending_write_domain == 0)
2952 obj->pending_read_domains |= obj->read_domains;
673a394b
EA
2953 else
2954 obj_priv->dirty = 1;
2955
2956 /*
2957 * Flush the current write domain if
2958 * the new read domains don't match. Invalidate
2959 * any read domains which differ from the old
2960 * write domain
2961 */
8b0e378a
EA
2962 if (obj->write_domain &&
2963 obj->write_domain != obj->pending_read_domains) {
673a394b 2964 flush_domains |= obj->write_domain;
8b0e378a
EA
2965 invalidate_domains |=
2966 obj->pending_read_domains & ~obj->write_domain;
673a394b
EA
2967 }
2968 /*
2969 * Invalidate any read caches which may have
2970 * stale data. That is, any new read domains.
2971 */
8b0e378a 2972 invalidate_domains |= obj->pending_read_domains & ~obj->read_domains;
673a394b
EA
2973 if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) {
2974#if WATCH_BUF
2975 DRM_INFO("%s: CPU domain flush %08x invalidate %08x\n",
2976 __func__, flush_domains, invalidate_domains);
2977#endif
673a394b
EA
2978 i915_gem_clflush_object(obj);
2979 }
2980
1c5d22f7
CW
2981 old_read_domains = obj->read_domains;
2982
efbeed96
EA
2983 /* The actual obj->write_domain will be updated with
2984 * pending_write_domain after we emit the accumulated flush for all
2985 * of our domain changes in execbuffers (which clears objects'
2986 * write_domains). So if we have a current write domain that we
2987 * aren't changing, set pending_write_domain to that.
2988 */
2989 if (flush_domains == 0 && obj->pending_write_domain == 0)
2990 obj->pending_write_domain = obj->write_domain;
8b0e378a 2991 obj->read_domains = obj->pending_read_domains;
673a394b 2992
88f356b7
CW
2993 if (flush_domains & I915_GEM_GPU_DOMAINS) {
2994 if (obj_priv->ring == &dev_priv->render_ring)
2995 dev_priv->flush_rings |= FLUSH_RENDER_RING;
2996 else if (obj_priv->ring == &dev_priv->bsd_ring)
2997 dev_priv->flush_rings |= FLUSH_BSD_RING;
2998 }
2999
673a394b
EA
3000 dev->invalidate_domains |= invalidate_domains;
3001 dev->flush_domains |= flush_domains;
3002#if WATCH_BUF
3003 DRM_INFO("%s: read %08x write %08x invalidate %08x flush %08x\n",
3004 __func__,
3005 obj->read_domains, obj->write_domain,
3006 dev->invalidate_domains, dev->flush_domains);
3007#endif
1c5d22f7
CW
3008
3009 trace_i915_gem_object_change_domain(obj,
3010 old_read_domains,
3011 obj->write_domain);
673a394b
EA
3012}
3013
3014/**
e47c68e9 3015 * Moves the object from a partially CPU read to a full one.
673a394b 3016 *
e47c68e9
EA
3017 * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(),
3018 * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU).
673a394b 3019 */
e47c68e9
EA
3020static void
3021i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj)
673a394b 3022{
23010e43 3023 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
673a394b 3024
e47c68e9
EA
3025 if (!obj_priv->page_cpu_valid)
3026 return;
3027
3028 /* If we're partially in the CPU read domain, finish moving it in.
3029 */
3030 if (obj->read_domains & I915_GEM_DOMAIN_CPU) {
3031 int i;
3032
3033 for (i = 0; i <= (obj->size - 1) / PAGE_SIZE; i++) {
3034 if (obj_priv->page_cpu_valid[i])
3035 continue;
856fa198 3036 drm_clflush_pages(obj_priv->pages + i, 1);
e47c68e9 3037 }
e47c68e9
EA
3038 }
3039
3040 /* Free the page_cpu_valid mappings which are now stale, whether
3041 * or not we've got I915_GEM_DOMAIN_CPU.
3042 */
9a298b2a 3043 kfree(obj_priv->page_cpu_valid);
e47c68e9
EA
3044 obj_priv->page_cpu_valid = NULL;
3045}
3046
3047/**
3048 * Set the CPU read domain on a range of the object.
3049 *
3050 * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's
3051 * not entirely valid. The page_cpu_valid member of the object flags which
3052 * pages have been flushed, and will be respected by
3053 * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping
3054 * of the whole object.
3055 *
3056 * This function returns when the move is complete, including waiting on
3057 * flushes to occur.
3058 */
3059static int
3060i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
3061 uint64_t offset, uint64_t size)
3062{
23010e43 3063 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1c5d22f7 3064 uint32_t old_read_domains;
e47c68e9 3065 int i, ret;
673a394b 3066
e47c68e9
EA
3067 if (offset == 0 && size == obj->size)
3068 return i915_gem_object_set_to_cpu_domain(obj, 0);
673a394b 3069
2dafb1e0
CW
3070 ret = i915_gem_object_flush_gpu_write_domain(obj);
3071 if (ret)
3072 return ret;
3073
e47c68e9 3074 /* Wait on any GPU rendering and flushing to occur. */
6a47baa6 3075 ret = i915_gem_object_wait_rendering(obj);
e47c68e9 3076 if (ret != 0)
6a47baa6 3077 return ret;
e47c68e9
EA
3078 i915_gem_object_flush_gtt_write_domain(obj);
3079
3080 /* If we're already fully in the CPU read domain, we're done. */
3081 if (obj_priv->page_cpu_valid == NULL &&
3082 (obj->read_domains & I915_GEM_DOMAIN_CPU) != 0)
3083 return 0;
673a394b 3084
e47c68e9
EA
3085 /* Otherwise, create/clear the per-page CPU read domain flag if we're
3086 * newly adding I915_GEM_DOMAIN_CPU
3087 */
673a394b 3088 if (obj_priv->page_cpu_valid == NULL) {
9a298b2a
EA
3089 obj_priv->page_cpu_valid = kzalloc(obj->size / PAGE_SIZE,
3090 GFP_KERNEL);
e47c68e9
EA
3091 if (obj_priv->page_cpu_valid == NULL)
3092 return -ENOMEM;
3093 } else if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0)
3094 memset(obj_priv->page_cpu_valid, 0, obj->size / PAGE_SIZE);
673a394b
EA
3095
3096 /* Flush the cache on any pages that are still invalid from the CPU's
3097 * perspective.
3098 */
e47c68e9
EA
3099 for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE;
3100 i++) {
673a394b
EA
3101 if (obj_priv->page_cpu_valid[i])
3102 continue;
3103
856fa198 3104 drm_clflush_pages(obj_priv->pages + i, 1);
673a394b
EA
3105
3106 obj_priv->page_cpu_valid[i] = 1;
3107 }
3108
e47c68e9
EA
3109 /* It should now be out of any other write domains, and we can update
3110 * the domain values for our changes.
3111 */
3112 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3113
1c5d22f7 3114 old_read_domains = obj->read_domains;
e47c68e9
EA
3115 obj->read_domains |= I915_GEM_DOMAIN_CPU;
3116
1c5d22f7
CW
3117 trace_i915_gem_object_change_domain(obj,
3118 old_read_domains,
3119 obj->write_domain);
3120
673a394b
EA
3121 return 0;
3122}
3123
673a394b
EA
3124/**
3125 * Pin an object to the GTT and evaluate the relocations landing in it.
3126 */
3127static int
3128i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
3129 struct drm_file *file_priv,
76446cac 3130 struct drm_i915_gem_exec_object2 *entry,
40a5f0de 3131 struct drm_i915_gem_relocation_entry *relocs)
673a394b
EA
3132{
3133 struct drm_device *dev = obj->dev;
0839ccb8 3134 drm_i915_private_t *dev_priv = dev->dev_private;
23010e43 3135 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
673a394b 3136 int i, ret;
0839ccb8 3137 void __iomem *reloc_page;
76446cac
JB
3138 bool need_fence;
3139
3140 need_fence = entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
3141 obj_priv->tiling_mode != I915_TILING_NONE;
3142
3143 /* Check fence reg constraints and rebind if necessary */
808b24d6
CW
3144 if (need_fence &&
3145 !i915_gem_object_fence_offset_ok(obj,
3146 obj_priv->tiling_mode)) {
3147 ret = i915_gem_object_unbind(obj);
3148 if (ret)
3149 return ret;
3150 }
673a394b
EA
3151
3152 /* Choose the GTT offset for our buffer and put it there. */
3153 ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment);
3154 if (ret)
3155 return ret;
3156
76446cac
JB
3157 /*
3158 * Pre-965 chips need a fence register set up in order to
3159 * properly handle blits to/from tiled surfaces.
3160 */
3161 if (need_fence) {
3162 ret = i915_gem_object_get_fence_reg(obj);
3163 if (ret != 0) {
76446cac
JB
3164 i915_gem_object_unpin(obj);
3165 return ret;
3166 }
3167 }
3168
673a394b
EA
3169 entry->offset = obj_priv->gtt_offset;
3170
673a394b
EA
3171 /* Apply the relocations, using the GTT aperture to avoid cache
3172 * flushing requirements.
3173 */
3174 for (i = 0; i < entry->relocation_count; i++) {
40a5f0de 3175 struct drm_i915_gem_relocation_entry *reloc= &relocs[i];
673a394b
EA
3176 struct drm_gem_object *target_obj;
3177 struct drm_i915_gem_object *target_obj_priv;
3043c60c
EA
3178 uint32_t reloc_val, reloc_offset;
3179 uint32_t __iomem *reloc_entry;
673a394b 3180
673a394b 3181 target_obj = drm_gem_object_lookup(obj->dev, file_priv,
40a5f0de 3182 reloc->target_handle);
673a394b
EA
3183 if (target_obj == NULL) {
3184 i915_gem_object_unpin(obj);
3185 return -EBADF;
3186 }
23010e43 3187 target_obj_priv = to_intel_bo(target_obj);
673a394b 3188
8542a0bb
CW
3189#if WATCH_RELOC
3190 DRM_INFO("%s: obj %p offset %08x target %d "
3191 "read %08x write %08x gtt %08x "
3192 "presumed %08x delta %08x\n",
3193 __func__,
3194 obj,
3195 (int) reloc->offset,
3196 (int) reloc->target_handle,
3197 (int) reloc->read_domains,
3198 (int) reloc->write_domain,
3199 (int) target_obj_priv->gtt_offset,
3200 (int) reloc->presumed_offset,
3201 reloc->delta);
3202#endif
3203
673a394b
EA
3204 /* The target buffer should have appeared before us in the
3205 * exec_object list, so it should have a GTT space bound by now.
3206 */
3207 if (target_obj_priv->gtt_space == NULL) {
3208 DRM_ERROR("No GTT space found for object %d\n",
40a5f0de 3209 reloc->target_handle);
673a394b
EA
3210 drm_gem_object_unreference(target_obj);
3211 i915_gem_object_unpin(obj);
3212 return -EINVAL;
3213 }
3214
8542a0bb 3215 /* Validate that the target is in a valid r/w GPU domain */
16edd550
DV
3216 if (reloc->write_domain & (reloc->write_domain - 1)) {
3217 DRM_ERROR("reloc with multiple write domains: "
3218 "obj %p target %d offset %d "
3219 "read %08x write %08x",
3220 obj, reloc->target_handle,
3221 (int) reloc->offset,
3222 reloc->read_domains,
3223 reloc->write_domain);
3224 return -EINVAL;
3225 }
40a5f0de
EA
3226 if (reloc->write_domain & I915_GEM_DOMAIN_CPU ||
3227 reloc->read_domains & I915_GEM_DOMAIN_CPU) {
e47c68e9
EA
3228 DRM_ERROR("reloc with read/write CPU domains: "
3229 "obj %p target %d offset %d "
3230 "read %08x write %08x",
40a5f0de
EA
3231 obj, reloc->target_handle,
3232 (int) reloc->offset,
3233 reloc->read_domains,
3234 reloc->write_domain);
491152b8
CW
3235 drm_gem_object_unreference(target_obj);
3236 i915_gem_object_unpin(obj);
e47c68e9
EA
3237 return -EINVAL;
3238 }
40a5f0de
EA
3239 if (reloc->write_domain && target_obj->pending_write_domain &&
3240 reloc->write_domain != target_obj->pending_write_domain) {
673a394b
EA
3241 DRM_ERROR("Write domain conflict: "
3242 "obj %p target %d offset %d "
3243 "new %08x old %08x\n",
40a5f0de
EA
3244 obj, reloc->target_handle,
3245 (int) reloc->offset,
3246 reloc->write_domain,
673a394b
EA
3247 target_obj->pending_write_domain);
3248 drm_gem_object_unreference(target_obj);
3249 i915_gem_object_unpin(obj);
3250 return -EINVAL;
3251 }
3252
40a5f0de
EA
3253 target_obj->pending_read_domains |= reloc->read_domains;
3254 target_obj->pending_write_domain |= reloc->write_domain;
673a394b
EA
3255
3256 /* If the relocation already has the right value in it, no
3257 * more work needs to be done.
3258 */
40a5f0de 3259 if (target_obj_priv->gtt_offset == reloc->presumed_offset) {
673a394b
EA
3260 drm_gem_object_unreference(target_obj);
3261 continue;
3262 }
3263
8542a0bb
CW
3264 /* Check that the relocation address is valid... */
3265 if (reloc->offset > obj->size - 4) {
3266 DRM_ERROR("Relocation beyond object bounds: "
3267 "obj %p target %d offset %d size %d.\n",
3268 obj, reloc->target_handle,
3269 (int) reloc->offset, (int) obj->size);
3270 drm_gem_object_unreference(target_obj);
3271 i915_gem_object_unpin(obj);
3272 return -EINVAL;
3273 }
3274 if (reloc->offset & 3) {
3275 DRM_ERROR("Relocation not 4-byte aligned: "
3276 "obj %p target %d offset %d.\n",
3277 obj, reloc->target_handle,
3278 (int) reloc->offset);
3279 drm_gem_object_unreference(target_obj);
3280 i915_gem_object_unpin(obj);
3281 return -EINVAL;
3282 }
3283
3284 /* and points to somewhere within the target object. */
3285 if (reloc->delta >= target_obj->size) {
3286 DRM_ERROR("Relocation beyond target object bounds: "
3287 "obj %p target %d delta %d size %d.\n",
3288 obj, reloc->target_handle,
3289 (int) reloc->delta, (int) target_obj->size);
3290 drm_gem_object_unreference(target_obj);
3291 i915_gem_object_unpin(obj);
3292 return -EINVAL;
3293 }
3294
2ef7eeaa
EA
3295 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
3296 if (ret != 0) {
3297 drm_gem_object_unreference(target_obj);
3298 i915_gem_object_unpin(obj);
3299 return -EINVAL;
673a394b
EA
3300 }
3301
3302 /* Map the page containing the relocation we're going to
3303 * perform.
3304 */
40a5f0de 3305 reloc_offset = obj_priv->gtt_offset + reloc->offset;
0839ccb8
KP
3306 reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
3307 (reloc_offset &
fca3ec01
CW
3308 ~(PAGE_SIZE - 1)),
3309 KM_USER0);
3043c60c 3310 reloc_entry = (uint32_t __iomem *)(reloc_page +
0839ccb8 3311 (reloc_offset & (PAGE_SIZE - 1)));
40a5f0de 3312 reloc_val = target_obj_priv->gtt_offset + reloc->delta;
673a394b
EA
3313
3314#if WATCH_BUF
3315 DRM_INFO("Applied relocation: %p@0x%08x %08x -> %08x\n",
40a5f0de 3316 obj, (unsigned int) reloc->offset,
673a394b
EA
3317 readl(reloc_entry), reloc_val);
3318#endif
3319 writel(reloc_val, reloc_entry);
fca3ec01 3320 io_mapping_unmap_atomic(reloc_page, KM_USER0);
673a394b 3321
40a5f0de
EA
3322 /* The updated presumed offset for this entry will be
3323 * copied back out to the user.
673a394b 3324 */
40a5f0de 3325 reloc->presumed_offset = target_obj_priv->gtt_offset;
673a394b
EA
3326
3327 drm_gem_object_unreference(target_obj);
3328 }
3329
673a394b
EA
3330#if WATCH_BUF
3331 if (0)
3332 i915_gem_dump_object(obj, 128, __func__, ~0);
3333#endif
3334 return 0;
3335}
3336
673a394b
EA
3337/* Throttle our rendering by waiting until the ring has completed our requests
3338 * emitted over 20 msec ago.
3339 *
b962442e
EA
3340 * Note that if we were to use the current jiffies each time around the loop,
3341 * we wouldn't escape the function with any frames outstanding if the time to
3342 * render a frame was over 20ms.
3343 *
673a394b
EA
3344 * This should get us reasonable parallelism between CPU and GPU but also
3345 * relatively low latency when blocking on a particular request to finish.
3346 */
3347static int
3348i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv)
3349{
3350 struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
3351 int ret = 0;
b962442e 3352 unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
673a394b
EA
3353
3354 mutex_lock(&dev->struct_mutex);
b962442e
EA
3355 while (!list_empty(&i915_file_priv->mm.request_list)) {
3356 struct drm_i915_gem_request *request;
3357
3358 request = list_first_entry(&i915_file_priv->mm.request_list,
3359 struct drm_i915_gem_request,
3360 client_list);
3361
3362 if (time_after_eq(request->emitted_jiffies, recent_enough))
3363 break;
3364
852835f3 3365 ret = i915_wait_request(dev, request->seqno, request->ring);
b962442e
EA
3366 if (ret != 0)
3367 break;
3368 }
673a394b 3369 mutex_unlock(&dev->struct_mutex);
b962442e 3370
673a394b
EA
3371 return ret;
3372}
3373
40a5f0de 3374static int
76446cac 3375i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object2 *exec_list,
40a5f0de
EA
3376 uint32_t buffer_count,
3377 struct drm_i915_gem_relocation_entry **relocs)
3378{
3379 uint32_t reloc_count = 0, reloc_index = 0, i;
3380 int ret;
3381
3382 *relocs = NULL;
3383 for (i = 0; i < buffer_count; i++) {
3384 if (reloc_count + exec_list[i].relocation_count < reloc_count)
3385 return -EINVAL;
3386 reloc_count += exec_list[i].relocation_count;
3387 }
3388
8e7d2b2c 3389 *relocs = drm_calloc_large(reloc_count, sizeof(**relocs));
76446cac
JB
3390 if (*relocs == NULL) {
3391 DRM_ERROR("failed to alloc relocs, count %d\n", reloc_count);
40a5f0de 3392 return -ENOMEM;
76446cac 3393 }
40a5f0de
EA
3394
3395 for (i = 0; i < buffer_count; i++) {
3396 struct drm_i915_gem_relocation_entry __user *user_relocs;
3397
3398 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr;
3399
3400 ret = copy_from_user(&(*relocs)[reloc_index],
3401 user_relocs,
3402 exec_list[i].relocation_count *
3403 sizeof(**relocs));
3404 if (ret != 0) {
8e7d2b2c 3405 drm_free_large(*relocs);
40a5f0de 3406 *relocs = NULL;
2bc43b5c 3407 return -EFAULT;
40a5f0de
EA
3408 }
3409
3410 reloc_index += exec_list[i].relocation_count;
3411 }
3412
2bc43b5c 3413 return 0;
40a5f0de
EA
3414}
3415
3416static int
76446cac 3417i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object2 *exec_list,
40a5f0de
EA
3418 uint32_t buffer_count,
3419 struct drm_i915_gem_relocation_entry *relocs)
3420{
3421 uint32_t reloc_count = 0, i;
2bc43b5c 3422 int ret = 0;
40a5f0de 3423
93533c29
CW
3424 if (relocs == NULL)
3425 return 0;
3426
40a5f0de
EA
3427 for (i = 0; i < buffer_count; i++) {
3428 struct drm_i915_gem_relocation_entry __user *user_relocs;
2bc43b5c 3429 int unwritten;
40a5f0de
EA
3430
3431 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr;
3432
2bc43b5c
FM
3433 unwritten = copy_to_user(user_relocs,
3434 &relocs[reloc_count],
3435 exec_list[i].relocation_count *
3436 sizeof(*relocs));
3437
3438 if (unwritten) {
3439 ret = -EFAULT;
3440 goto err;
40a5f0de
EA
3441 }
3442
3443 reloc_count += exec_list[i].relocation_count;
3444 }
3445
2bc43b5c 3446err:
8e7d2b2c 3447 drm_free_large(relocs);
40a5f0de
EA
3448
3449 return ret;
3450}
3451
83d60795 3452static int
76446cac 3453i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer2 *exec,
83d60795
CW
3454 uint64_t exec_offset)
3455{
3456 uint32_t exec_start, exec_len;
3457
3458 exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
3459 exec_len = (uint32_t) exec->batch_len;
3460
3461 if ((exec_start | exec_len) & 0x7)
3462 return -EINVAL;
3463
3464 if (!exec_start)
3465 return -EINVAL;
3466
3467 return 0;
3468}
3469
6b95a207
KH
3470static int
3471i915_gem_wait_for_pending_flip(struct drm_device *dev,
3472 struct drm_gem_object **object_list,
3473 int count)
3474{
3475 drm_i915_private_t *dev_priv = dev->dev_private;
3476 struct drm_i915_gem_object *obj_priv;
3477 DEFINE_WAIT(wait);
3478 int i, ret = 0;
3479
3480 for (;;) {
3481 prepare_to_wait(&dev_priv->pending_flip_queue,
3482 &wait, TASK_INTERRUPTIBLE);
3483 for (i = 0; i < count; i++) {
23010e43 3484 obj_priv = to_intel_bo(object_list[i]);
6b95a207
KH
3485 if (atomic_read(&obj_priv->pending_flip) > 0)
3486 break;
3487 }
3488 if (i == count)
3489 break;
3490
3491 if (!signal_pending(current)) {
3492 mutex_unlock(&dev->struct_mutex);
3493 schedule();
3494 mutex_lock(&dev->struct_mutex);
3495 continue;
3496 }
3497 ret = -ERESTARTSYS;
3498 break;
3499 }
3500 finish_wait(&dev_priv->pending_flip_queue, &wait);
3501
3502 return ret;
3503}
3504
43b27f40 3505
673a394b 3506int
76446cac
JB
3507i915_gem_do_execbuffer(struct drm_device *dev, void *data,
3508 struct drm_file *file_priv,
3509 struct drm_i915_gem_execbuffer2 *args,
3510 struct drm_i915_gem_exec_object2 *exec_list)
673a394b
EA
3511{
3512 drm_i915_private_t *dev_priv = dev->dev_private;
673a394b
EA
3513 struct drm_gem_object **object_list = NULL;
3514 struct drm_gem_object *batch_obj;
b70d11da 3515 struct drm_i915_gem_object *obj_priv;
201361a5 3516 struct drm_clip_rect *cliprects = NULL;
93533c29 3517 struct drm_i915_gem_relocation_entry *relocs = NULL;
76446cac 3518 int ret = 0, ret2, i, pinned = 0;
673a394b 3519 uint64_t exec_offset;
40a5f0de 3520 uint32_t seqno, flush_domains, reloc_index;
6b95a207 3521 int pin_tries, flips;
673a394b 3522
852835f3
ZN
3523 struct intel_ring_buffer *ring = NULL;
3524
673a394b
EA
3525#if WATCH_EXEC
3526 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
3527 (int) args->buffers_ptr, args->buffer_count, args->batch_len);
3528#endif
d1b851fc
ZN
3529 if (args->flags & I915_EXEC_BSD) {
3530 if (!HAS_BSD(dev)) {
3531 DRM_ERROR("execbuf with wrong flag\n");
3532 return -EINVAL;
3533 }
3534 ring = &dev_priv->bsd_ring;
3535 } else {
3536 ring = &dev_priv->render_ring;
3537 }
3538
4f481ed2
EA
3539 if (args->buffer_count < 1) {
3540 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
3541 return -EINVAL;
3542 }
c8e0f93a 3543 object_list = drm_malloc_ab(sizeof(*object_list), args->buffer_count);
76446cac
JB
3544 if (object_list == NULL) {
3545 DRM_ERROR("Failed to allocate object list for %d buffers\n",
673a394b
EA
3546 args->buffer_count);
3547 ret = -ENOMEM;
3548 goto pre_mutex_err;
3549 }
673a394b 3550
201361a5 3551 if (args->num_cliprects != 0) {
9a298b2a
EA
3552 cliprects = kcalloc(args->num_cliprects, sizeof(*cliprects),
3553 GFP_KERNEL);
a40e8d31
OA
3554 if (cliprects == NULL) {
3555 ret = -ENOMEM;
201361a5 3556 goto pre_mutex_err;
a40e8d31 3557 }
201361a5
EA
3558
3559 ret = copy_from_user(cliprects,
3560 (struct drm_clip_rect __user *)
3561 (uintptr_t) args->cliprects_ptr,
3562 sizeof(*cliprects) * args->num_cliprects);
3563 if (ret != 0) {
3564 DRM_ERROR("copy %d cliprects failed: %d\n",
3565 args->num_cliprects, ret);
3566 goto pre_mutex_err;
3567 }
3568 }
3569
40a5f0de
EA
3570 ret = i915_gem_get_relocs_from_user(exec_list, args->buffer_count,
3571 &relocs);
3572 if (ret != 0)
3573 goto pre_mutex_err;
3574
673a394b
EA
3575 mutex_lock(&dev->struct_mutex);
3576
3577 i915_verify_inactive(dev, __FILE__, __LINE__);
3578
ba1234d1 3579 if (atomic_read(&dev_priv->mm.wedged)) {
673a394b 3580 mutex_unlock(&dev->struct_mutex);
a198bc80
CW
3581 ret = -EIO;
3582 goto pre_mutex_err;
673a394b
EA
3583 }
3584
3585 if (dev_priv->mm.suspended) {
673a394b 3586 mutex_unlock(&dev->struct_mutex);
a198bc80
CW
3587 ret = -EBUSY;
3588 goto pre_mutex_err;
673a394b
EA
3589 }
3590
ac94a962 3591 /* Look up object handles */
6b95a207 3592 flips = 0;
673a394b
EA
3593 for (i = 0; i < args->buffer_count; i++) {
3594 object_list[i] = drm_gem_object_lookup(dev, file_priv,
3595 exec_list[i].handle);
3596 if (object_list[i] == NULL) {
3597 DRM_ERROR("Invalid object handle %d at index %d\n",
3598 exec_list[i].handle, i);
0ce907f8
CW
3599 /* prevent error path from reading uninitialized data */
3600 args->buffer_count = i + 1;
673a394b
EA
3601 ret = -EBADF;
3602 goto err;
3603 }
b70d11da 3604
23010e43 3605 obj_priv = to_intel_bo(object_list[i]);
b70d11da
KH
3606 if (obj_priv->in_execbuffer) {
3607 DRM_ERROR("Object %p appears more than once in object list\n",
3608 object_list[i]);
0ce907f8
CW
3609 /* prevent error path from reading uninitialized data */
3610 args->buffer_count = i + 1;
b70d11da
KH
3611 ret = -EBADF;
3612 goto err;
3613 }
3614 obj_priv->in_execbuffer = true;
6b95a207
KH
3615 flips += atomic_read(&obj_priv->pending_flip);
3616 }
3617
3618 if (flips > 0) {
3619 ret = i915_gem_wait_for_pending_flip(dev, object_list,
3620 args->buffer_count);
3621 if (ret)
3622 goto err;
ac94a962 3623 }
673a394b 3624
ac94a962
KP
3625 /* Pin and relocate */
3626 for (pin_tries = 0; ; pin_tries++) {
3627 ret = 0;
40a5f0de
EA
3628 reloc_index = 0;
3629
ac94a962
KP
3630 for (i = 0; i < args->buffer_count; i++) {
3631 object_list[i]->pending_read_domains = 0;
3632 object_list[i]->pending_write_domain = 0;
3633 ret = i915_gem_object_pin_and_relocate(object_list[i],
3634 file_priv,
40a5f0de
EA
3635 &exec_list[i],
3636 &relocs[reloc_index]);
ac94a962
KP
3637 if (ret)
3638 break;
3639 pinned = i + 1;
40a5f0de 3640 reloc_index += exec_list[i].relocation_count;
ac94a962
KP
3641 }
3642 /* success */
3643 if (ret == 0)
3644 break;
3645
3646 /* error other than GTT full, or we've already tried again */
2939e1f5 3647 if (ret != -ENOSPC || pin_tries >= 1) {
07f73f69
CW
3648 if (ret != -ERESTARTSYS) {
3649 unsigned long long total_size = 0;
3d1cc470
CW
3650 int num_fences = 0;
3651 for (i = 0; i < args->buffer_count; i++) {
43b27f40 3652 obj_priv = to_intel_bo(object_list[i]);
3d1cc470 3653
07f73f69 3654 total_size += object_list[i]->size;
3d1cc470
CW
3655 num_fences +=
3656 exec_list[i].flags & EXEC_OBJECT_NEEDS_FENCE &&
3657 obj_priv->tiling_mode != I915_TILING_NONE;
3658 }
3659 DRM_ERROR("Failed to pin buffer %d of %d, total %llu bytes, %d fences: %d\n",
07f73f69 3660 pinned+1, args->buffer_count,
3d1cc470
CW
3661 total_size, num_fences,
3662 ret);
07f73f69
CW
3663 DRM_ERROR("%d objects [%d pinned], "
3664 "%d object bytes [%d pinned], "
3665 "%d/%d gtt bytes\n",
3666 atomic_read(&dev->object_count),
3667 atomic_read(&dev->pin_count),
3668 atomic_read(&dev->object_memory),
3669 atomic_read(&dev->pin_memory),
3670 atomic_read(&dev->gtt_memory),
3671 dev->gtt_total);
3672 }
673a394b
EA
3673 goto err;
3674 }
ac94a962
KP
3675
3676 /* unpin all of our buffers */
3677 for (i = 0; i < pinned; i++)
3678 i915_gem_object_unpin(object_list[i]);
b1177636 3679 pinned = 0;
ac94a962
KP
3680
3681 /* evict everyone we can from the aperture */
3682 ret = i915_gem_evict_everything(dev);
07f73f69 3683 if (ret && ret != -ENOSPC)
ac94a962 3684 goto err;
673a394b
EA
3685 }
3686
3687 /* Set the pending read domains for the batch buffer to COMMAND */
3688 batch_obj = object_list[args->buffer_count-1];
5f26a2c7
CW
3689 if (batch_obj->pending_write_domain) {
3690 DRM_ERROR("Attempting to use self-modifying batch buffer\n");
3691 ret = -EINVAL;
3692 goto err;
3693 }
3694 batch_obj->pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
673a394b 3695
83d60795
CW
3696 /* Sanity check the batch buffer, prior to moving objects */
3697 exec_offset = exec_list[args->buffer_count - 1].offset;
3698 ret = i915_gem_check_execbuffer (args, exec_offset);
3699 if (ret != 0) {
3700 DRM_ERROR("execbuf with invalid offset/length\n");
3701 goto err;
3702 }
3703
673a394b
EA
3704 i915_verify_inactive(dev, __FILE__, __LINE__);
3705
646f0f6e
KP
3706 /* Zero the global flush/invalidate flags. These
3707 * will be modified as new domains are computed
3708 * for each object
3709 */
3710 dev->invalidate_domains = 0;
3711 dev->flush_domains = 0;
88f356b7 3712 dev_priv->flush_rings = 0;
646f0f6e 3713
673a394b
EA
3714 for (i = 0; i < args->buffer_count; i++) {
3715 struct drm_gem_object *obj = object_list[i];
673a394b 3716
646f0f6e 3717 /* Compute new gpu domains and update invalidate/flush */
8b0e378a 3718 i915_gem_object_set_to_gpu_domain(obj);
673a394b
EA
3719 }
3720
3721 i915_verify_inactive(dev, __FILE__, __LINE__);
3722
646f0f6e
KP
3723 if (dev->invalidate_domains | dev->flush_domains) {
3724#if WATCH_EXEC
3725 DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n",
3726 __func__,
3727 dev->invalidate_domains,
3728 dev->flush_domains);
3729#endif
3730 i915_gem_flush(dev,
3731 dev->invalidate_domains,
3732 dev->flush_domains);
88f356b7 3733 if (dev_priv->flush_rings & FLUSH_RENDER_RING)
b962442e 3734 (void)i915_add_request(dev, file_priv,
88f356b7
CW
3735 dev->flush_domains,
3736 &dev_priv->render_ring);
3737 if (dev_priv->flush_rings & FLUSH_BSD_RING)
3738 (void)i915_add_request(dev, file_priv,
3739 dev->flush_domains,
3740 &dev_priv->bsd_ring);
646f0f6e 3741 }
673a394b 3742
efbeed96
EA
3743 for (i = 0; i < args->buffer_count; i++) {
3744 struct drm_gem_object *obj = object_list[i];
23010e43 3745 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1c5d22f7 3746 uint32_t old_write_domain = obj->write_domain;
efbeed96
EA
3747
3748 obj->write_domain = obj->pending_write_domain;
99fcb766
DV
3749 if (obj->write_domain)
3750 list_move_tail(&obj_priv->gpu_write_list,
3751 &dev_priv->mm.gpu_write_list);
3752 else
3753 list_del_init(&obj_priv->gpu_write_list);
3754
1c5d22f7
CW
3755 trace_i915_gem_object_change_domain(obj,
3756 obj->read_domains,
3757 old_write_domain);
efbeed96
EA
3758 }
3759
673a394b
EA
3760 i915_verify_inactive(dev, __FILE__, __LINE__);
3761
3762#if WATCH_COHERENCY
3763 for (i = 0; i < args->buffer_count; i++) {
3764 i915_gem_object_check_coherency(object_list[i],
3765 exec_list[i].handle);
3766 }
3767#endif
3768
673a394b 3769#if WATCH_EXEC
6911a9b8 3770 i915_gem_dump_object(batch_obj,
673a394b
EA
3771 args->batch_len,
3772 __func__,
3773 ~0);
3774#endif
3775
673a394b 3776 /* Exec the batchbuffer */
852835f3
ZN
3777 ret = ring->dispatch_gem_execbuffer(dev, ring, args,
3778 cliprects, exec_offset);
673a394b
EA
3779 if (ret) {
3780 DRM_ERROR("dispatch failed %d\n", ret);
3781 goto err;
3782 }
3783
3784 /*
3785 * Ensure that the commands in the batch buffer are
3786 * finished before the interrupt fires
3787 */
852835f3 3788 flush_domains = i915_retire_commands(dev, ring);
673a394b
EA
3789
3790 i915_verify_inactive(dev, __FILE__, __LINE__);
3791
3792 /*
3793 * Get a seqno representing the execution of the current buffer,
3794 * which we can wait on. We would like to mitigate these interrupts,
3795 * likely by only creating seqnos occasionally (so that we have
3796 * *some* interrupts representing completion of buffers that we can
3797 * wait on when trying to clear up gtt space).
3798 */
852835f3 3799 seqno = i915_add_request(dev, file_priv, flush_domains, ring);
673a394b 3800 BUG_ON(seqno == 0);
673a394b
EA
3801 for (i = 0; i < args->buffer_count; i++) {
3802 struct drm_gem_object *obj = object_list[i];
852835f3 3803 obj_priv = to_intel_bo(obj);
673a394b 3804
852835f3 3805 i915_gem_object_move_to_active(obj, seqno, ring);
673a394b
EA
3806#if WATCH_LRU
3807 DRM_INFO("%s: move to exec list %p\n", __func__, obj);
3808#endif
3809 }
3810#if WATCH_LRU
3811 i915_dump_lru(dev, __func__);
3812#endif
3813
3814 i915_verify_inactive(dev, __FILE__, __LINE__);
3815
673a394b 3816err:
aad87dff
JL
3817 for (i = 0; i < pinned; i++)
3818 i915_gem_object_unpin(object_list[i]);
3819
b70d11da
KH
3820 for (i = 0; i < args->buffer_count; i++) {
3821 if (object_list[i]) {
23010e43 3822 obj_priv = to_intel_bo(object_list[i]);
b70d11da
KH
3823 obj_priv->in_execbuffer = false;
3824 }
aad87dff 3825 drm_gem_object_unreference(object_list[i]);
b70d11da 3826 }
673a394b 3827
673a394b
EA
3828 mutex_unlock(&dev->struct_mutex);
3829
93533c29 3830pre_mutex_err:
40a5f0de
EA
3831 /* Copy the updated relocations out regardless of current error
3832 * state. Failure to update the relocs would mean that the next
3833 * time userland calls execbuf, it would do so with presumed offset
3834 * state that didn't match the actual object state.
3835 */
3836 ret2 = i915_gem_put_relocs_to_user(exec_list, args->buffer_count,
3837 relocs);
3838 if (ret2 != 0) {
3839 DRM_ERROR("Failed to copy relocations back out: %d\n", ret2);
3840
3841 if (ret == 0)
3842 ret = ret2;
3843 }
3844
8e7d2b2c 3845 drm_free_large(object_list);
9a298b2a 3846 kfree(cliprects);
673a394b
EA
3847
3848 return ret;
3849}
3850
76446cac
JB
3851/*
3852 * Legacy execbuffer just creates an exec2 list from the original exec object
3853 * list array and passes it to the real function.
3854 */
3855int
3856i915_gem_execbuffer(struct drm_device *dev, void *data,
3857 struct drm_file *file_priv)
3858{
3859 struct drm_i915_gem_execbuffer *args = data;
3860 struct drm_i915_gem_execbuffer2 exec2;
3861 struct drm_i915_gem_exec_object *exec_list = NULL;
3862 struct drm_i915_gem_exec_object2 *exec2_list = NULL;
3863 int ret, i;
3864
3865#if WATCH_EXEC
3866 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
3867 (int) args->buffers_ptr, args->buffer_count, args->batch_len);
3868#endif
3869
3870 if (args->buffer_count < 1) {
3871 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
3872 return -EINVAL;
3873 }
3874
3875 /* Copy in the exec list from userland */
3876 exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
3877 exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
3878 if (exec_list == NULL || exec2_list == NULL) {
3879 DRM_ERROR("Failed to allocate exec list for %d buffers\n",
3880 args->buffer_count);
3881 drm_free_large(exec_list);
3882 drm_free_large(exec2_list);
3883 return -ENOMEM;
3884 }
3885 ret = copy_from_user(exec_list,
3886 (struct drm_i915_relocation_entry __user *)
3887 (uintptr_t) args->buffers_ptr,
3888 sizeof(*exec_list) * args->buffer_count);
3889 if (ret != 0) {
3890 DRM_ERROR("copy %d exec entries failed %d\n",
3891 args->buffer_count, ret);
3892 drm_free_large(exec_list);
3893 drm_free_large(exec2_list);
3894 return -EFAULT;
3895 }
3896
3897 for (i = 0; i < args->buffer_count; i++) {
3898 exec2_list[i].handle = exec_list[i].handle;
3899 exec2_list[i].relocation_count = exec_list[i].relocation_count;
3900 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
3901 exec2_list[i].alignment = exec_list[i].alignment;
3902 exec2_list[i].offset = exec_list[i].offset;
3903 if (!IS_I965G(dev))
3904 exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
3905 else
3906 exec2_list[i].flags = 0;
3907 }
3908
3909 exec2.buffers_ptr = args->buffers_ptr;
3910 exec2.buffer_count = args->buffer_count;
3911 exec2.batch_start_offset = args->batch_start_offset;
3912 exec2.batch_len = args->batch_len;
3913 exec2.DR1 = args->DR1;
3914 exec2.DR4 = args->DR4;
3915 exec2.num_cliprects = args->num_cliprects;
3916 exec2.cliprects_ptr = args->cliprects_ptr;
852835f3 3917 exec2.flags = I915_EXEC_RENDER;
76446cac
JB
3918
3919 ret = i915_gem_do_execbuffer(dev, data, file_priv, &exec2, exec2_list);
3920 if (!ret) {
3921 /* Copy the new buffer offsets back to the user's exec list. */
3922 for (i = 0; i < args->buffer_count; i++)
3923 exec_list[i].offset = exec2_list[i].offset;
3924 /* ... and back out to userspace */
3925 ret = copy_to_user((struct drm_i915_relocation_entry __user *)
3926 (uintptr_t) args->buffers_ptr,
3927 exec_list,
3928 sizeof(*exec_list) * args->buffer_count);
3929 if (ret) {
3930 ret = -EFAULT;
3931 DRM_ERROR("failed to copy %d exec entries "
3932 "back to user (%d)\n",
3933 args->buffer_count, ret);
3934 }
76446cac
JB
3935 }
3936
3937 drm_free_large(exec_list);
3938 drm_free_large(exec2_list);
3939 return ret;
3940}
3941
3942int
3943i915_gem_execbuffer2(struct drm_device *dev, void *data,
3944 struct drm_file *file_priv)
3945{
3946 struct drm_i915_gem_execbuffer2 *args = data;
3947 struct drm_i915_gem_exec_object2 *exec2_list = NULL;
3948 int ret;
3949
3950#if WATCH_EXEC
3951 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
3952 (int) args->buffers_ptr, args->buffer_count, args->batch_len);
3953#endif
3954
3955 if (args->buffer_count < 1) {
3956 DRM_ERROR("execbuf2 with %d buffers\n", args->buffer_count);
3957 return -EINVAL;
3958 }
3959
3960 exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
3961 if (exec2_list == NULL) {
3962 DRM_ERROR("Failed to allocate exec list for %d buffers\n",
3963 args->buffer_count);
3964 return -ENOMEM;
3965 }
3966 ret = copy_from_user(exec2_list,
3967 (struct drm_i915_relocation_entry __user *)
3968 (uintptr_t) args->buffers_ptr,
3969 sizeof(*exec2_list) * args->buffer_count);
3970 if (ret != 0) {
3971 DRM_ERROR("copy %d exec entries failed %d\n",
3972 args->buffer_count, ret);
3973 drm_free_large(exec2_list);
3974 return -EFAULT;
3975 }
3976
3977 ret = i915_gem_do_execbuffer(dev, data, file_priv, args, exec2_list);
3978 if (!ret) {
3979 /* Copy the new buffer offsets back to the user's exec list. */
3980 ret = copy_to_user((struct drm_i915_relocation_entry __user *)
3981 (uintptr_t) args->buffers_ptr,
3982 exec2_list,
3983 sizeof(*exec2_list) * args->buffer_count);
3984 if (ret) {
3985 ret = -EFAULT;
3986 DRM_ERROR("failed to copy %d exec entries "
3987 "back to user (%d)\n",
3988 args->buffer_count, ret);
3989 }
3990 }
3991
3992 drm_free_large(exec2_list);
3993 return ret;
3994}
3995
673a394b
EA
3996int
3997i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
3998{
3999 struct drm_device *dev = obj->dev;
23010e43 4000 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
673a394b
EA
4001 int ret;
4002
778c3544
DV
4003 BUG_ON(obj_priv->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT);
4004
673a394b 4005 i915_verify_inactive(dev, __FILE__, __LINE__);
ac0c6b5a
CW
4006
4007 if (obj_priv->gtt_space != NULL) {
4008 if (alignment == 0)
4009 alignment = i915_gem_get_gtt_alignment(obj);
4010 if (obj_priv->gtt_offset & (alignment - 1)) {
ae7d49d8
CW
4011 WARN(obj_priv->pin_count,
4012 "bo is already pinned with incorrect alignment:"
4013 " offset=%x, req.alignment=%x\n",
4014 obj_priv->gtt_offset, alignment);
ac0c6b5a
CW
4015 ret = i915_gem_object_unbind(obj);
4016 if (ret)
4017 return ret;
4018 }
4019 }
4020
673a394b
EA
4021 if (obj_priv->gtt_space == NULL) {
4022 ret = i915_gem_object_bind_to_gtt(obj, alignment);
9731129c 4023 if (ret)
673a394b 4024 return ret;
22c344e9 4025 }
76446cac 4026
673a394b
EA
4027 obj_priv->pin_count++;
4028
4029 /* If the object is not active and not pending a flush,
4030 * remove it from the inactive list
4031 */
4032 if (obj_priv->pin_count == 1) {
4033 atomic_inc(&dev->pin_count);
4034 atomic_add(obj->size, &dev->pin_memory);
4035 if (!obj_priv->active &&
bf1a1092 4036 (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
673a394b
EA
4037 list_del_init(&obj_priv->list);
4038 }
4039 i915_verify_inactive(dev, __FILE__, __LINE__);
4040
4041 return 0;
4042}
4043
4044void
4045i915_gem_object_unpin(struct drm_gem_object *obj)
4046{
4047 struct drm_device *dev = obj->dev;
4048 drm_i915_private_t *dev_priv = dev->dev_private;
23010e43 4049 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
673a394b
EA
4050
4051 i915_verify_inactive(dev, __FILE__, __LINE__);
4052 obj_priv->pin_count--;
4053 BUG_ON(obj_priv->pin_count < 0);
4054 BUG_ON(obj_priv->gtt_space == NULL);
4055
4056 /* If the object is no longer pinned, and is
4057 * neither active nor being flushed, then stick it on
4058 * the inactive list
4059 */
4060 if (obj_priv->pin_count == 0) {
4061 if (!obj_priv->active &&
21d509e3 4062 (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
673a394b
EA
4063 list_move_tail(&obj_priv->list,
4064 &dev_priv->mm.inactive_list);
4065 atomic_dec(&dev->pin_count);
4066 atomic_sub(obj->size, &dev->pin_memory);
4067 }
4068 i915_verify_inactive(dev, __FILE__, __LINE__);
4069}
4070
4071int
4072i915_gem_pin_ioctl(struct drm_device *dev, void *data,
4073 struct drm_file *file_priv)
4074{
4075 struct drm_i915_gem_pin *args = data;
4076 struct drm_gem_object *obj;
4077 struct drm_i915_gem_object *obj_priv;
4078 int ret;
4079
4080 mutex_lock(&dev->struct_mutex);
4081
4082 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4083 if (obj == NULL) {
4084 DRM_ERROR("Bad handle in i915_gem_pin_ioctl(): %d\n",
4085 args->handle);
4086 mutex_unlock(&dev->struct_mutex);
4087 return -EBADF;
4088 }
23010e43 4089 obj_priv = to_intel_bo(obj);
673a394b 4090
bb6baf76
CW
4091 if (obj_priv->madv != I915_MADV_WILLNEED) {
4092 DRM_ERROR("Attempting to pin a purgeable buffer\n");
3ef94daa
CW
4093 drm_gem_object_unreference(obj);
4094 mutex_unlock(&dev->struct_mutex);
4095 return -EINVAL;
4096 }
4097
79e53945
JB
4098 if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != file_priv) {
4099 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
4100 args->handle);
96dec61d 4101 drm_gem_object_unreference(obj);
673a394b 4102 mutex_unlock(&dev->struct_mutex);
79e53945
JB
4103 return -EINVAL;
4104 }
4105
4106 obj_priv->user_pin_count++;
4107 obj_priv->pin_filp = file_priv;
4108 if (obj_priv->user_pin_count == 1) {
4109 ret = i915_gem_object_pin(obj, args->alignment);
4110 if (ret != 0) {
4111 drm_gem_object_unreference(obj);
4112 mutex_unlock(&dev->struct_mutex);
4113 return ret;
4114 }
673a394b
EA
4115 }
4116
4117 /* XXX - flush the CPU caches for pinned objects
4118 * as the X server doesn't manage domains yet
4119 */
e47c68e9 4120 i915_gem_object_flush_cpu_write_domain(obj);
673a394b
EA
4121 args->offset = obj_priv->gtt_offset;
4122 drm_gem_object_unreference(obj);
4123 mutex_unlock(&dev->struct_mutex);
4124
4125 return 0;
4126}
4127
4128int
4129i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
4130 struct drm_file *file_priv)
4131{
4132 struct drm_i915_gem_pin *args = data;
4133 struct drm_gem_object *obj;
79e53945 4134 struct drm_i915_gem_object *obj_priv;
673a394b
EA
4135
4136 mutex_lock(&dev->struct_mutex);
4137
4138 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4139 if (obj == NULL) {
4140 DRM_ERROR("Bad handle in i915_gem_unpin_ioctl(): %d\n",
4141 args->handle);
4142 mutex_unlock(&dev->struct_mutex);
4143 return -EBADF;
4144 }
4145
23010e43 4146 obj_priv = to_intel_bo(obj);
79e53945
JB
4147 if (obj_priv->pin_filp != file_priv) {
4148 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
4149 args->handle);
4150 drm_gem_object_unreference(obj);
4151 mutex_unlock(&dev->struct_mutex);
4152 return -EINVAL;
4153 }
4154 obj_priv->user_pin_count--;
4155 if (obj_priv->user_pin_count == 0) {
4156 obj_priv->pin_filp = NULL;
4157 i915_gem_object_unpin(obj);
4158 }
673a394b
EA
4159
4160 drm_gem_object_unreference(obj);
4161 mutex_unlock(&dev->struct_mutex);
4162 return 0;
4163}
4164
4165int
4166i915_gem_busy_ioctl(struct drm_device *dev, void *data,
4167 struct drm_file *file_priv)
4168{
4169 struct drm_i915_gem_busy *args = data;
4170 struct drm_gem_object *obj;
4171 struct drm_i915_gem_object *obj_priv;
4172
673a394b
EA
4173 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4174 if (obj == NULL) {
4175 DRM_ERROR("Bad handle in i915_gem_busy_ioctl(): %d\n",
4176 args->handle);
673a394b
EA
4177 return -EBADF;
4178 }
4179
b1ce786c 4180 mutex_lock(&dev->struct_mutex);
d1b851fc 4181
0be555b6
CW
4182 /* Count all active objects as busy, even if they are currently not used
4183 * by the gpu. Users of this interface expect objects to eventually
4184 * become non-busy without any further actions, therefore emit any
4185 * necessary flushes here.
c4de0a5d 4186 */
0be555b6
CW
4187 obj_priv = to_intel_bo(obj);
4188 args->busy = obj_priv->active;
4189 if (args->busy) {
4190 /* Unconditionally flush objects, even when the gpu still uses this
4191 * object. Userspace calling this function indicates that it wants to
4192 * use this buffer rather sooner than later, so issuing the required
4193 * flush earlier is beneficial.
4194 */
4195 if (obj->write_domain) {
4196 i915_gem_flush(dev, 0, obj->write_domain);
4197 (void)i915_add_request(dev, file_priv, obj->write_domain, obj_priv->ring);
4198 }
4199
4200 /* Update the active list for the hardware's current position.
4201 * Otherwise this only updates on a delayed timer or when irqs
4202 * are actually unmasked, and our working set ends up being
4203 * larger than required.
4204 */
4205 i915_gem_retire_requests_ring(dev, obj_priv->ring);
4206
4207 args->busy = obj_priv->active;
4208 }
673a394b
EA
4209
4210 drm_gem_object_unreference(obj);
4211 mutex_unlock(&dev->struct_mutex);
4212 return 0;
4213}
4214
4215int
4216i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4217 struct drm_file *file_priv)
4218{
4219 return i915_gem_ring_throttle(dev, file_priv);
4220}
4221
3ef94daa
CW
4222int
4223i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4224 struct drm_file *file_priv)
4225{
4226 struct drm_i915_gem_madvise *args = data;
4227 struct drm_gem_object *obj;
4228 struct drm_i915_gem_object *obj_priv;
4229
4230 switch (args->madv) {
4231 case I915_MADV_DONTNEED:
4232 case I915_MADV_WILLNEED:
4233 break;
4234 default:
4235 return -EINVAL;
4236 }
4237
4238 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4239 if (obj == NULL) {
4240 DRM_ERROR("Bad handle in i915_gem_madvise_ioctl(): %d\n",
4241 args->handle);
4242 return -EBADF;
4243 }
4244
4245 mutex_lock(&dev->struct_mutex);
23010e43 4246 obj_priv = to_intel_bo(obj);
3ef94daa
CW
4247
4248 if (obj_priv->pin_count) {
4249 drm_gem_object_unreference(obj);
4250 mutex_unlock(&dev->struct_mutex);
4251
4252 DRM_ERROR("Attempted i915_gem_madvise_ioctl() on a pinned object\n");
4253 return -EINVAL;
4254 }
4255
bb6baf76
CW
4256 if (obj_priv->madv != __I915_MADV_PURGED)
4257 obj_priv->madv = args->madv;
3ef94daa 4258
2d7ef395
CW
4259 /* if the object is no longer bound, discard its backing storage */
4260 if (i915_gem_object_is_purgeable(obj_priv) &&
4261 obj_priv->gtt_space == NULL)
4262 i915_gem_object_truncate(obj);
4263
bb6baf76
CW
4264 args->retained = obj_priv->madv != __I915_MADV_PURGED;
4265
3ef94daa
CW
4266 drm_gem_object_unreference(obj);
4267 mutex_unlock(&dev->struct_mutex);
4268
4269 return 0;
4270}
4271
ac52bc56
DV
4272struct drm_gem_object * i915_gem_alloc_object(struct drm_device *dev,
4273 size_t size)
4274{
c397b908 4275 struct drm_i915_gem_object *obj;
ac52bc56 4276
c397b908
DV
4277 obj = kzalloc(sizeof(*obj), GFP_KERNEL);
4278 if (obj == NULL)
4279 return NULL;
673a394b 4280
c397b908
DV
4281 if (drm_gem_object_init(dev, &obj->base, size) != 0) {
4282 kfree(obj);
4283 return NULL;
4284 }
673a394b 4285
c397b908
DV
4286 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4287 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
673a394b 4288
c397b908 4289 obj->agp_type = AGP_USER_MEMORY;
62b8b215 4290 obj->base.driver_private = NULL;
c397b908
DV
4291 obj->fence_reg = I915_FENCE_REG_NONE;
4292 INIT_LIST_HEAD(&obj->list);
4293 INIT_LIST_HEAD(&obj->gpu_write_list);
c397b908 4294 obj->madv = I915_MADV_WILLNEED;
de151cf6 4295
c397b908
DV
4296 trace_i915_gem_object_create(&obj->base);
4297
4298 return &obj->base;
4299}
4300
4301int i915_gem_init_object(struct drm_gem_object *obj)
4302{
4303 BUG();
de151cf6 4304
673a394b
EA
4305 return 0;
4306}
4307
be72615b 4308static void i915_gem_free_object_tail(struct drm_gem_object *obj)
673a394b 4309{
de151cf6 4310 struct drm_device *dev = obj->dev;
be72615b 4311 drm_i915_private_t *dev_priv = dev->dev_private;
23010e43 4312 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
be72615b 4313 int ret;
673a394b 4314
be72615b
CW
4315 ret = i915_gem_object_unbind(obj);
4316 if (ret == -ERESTARTSYS) {
4317 list_move(&obj_priv->list,
4318 &dev_priv->mm.deferred_free_list);
4319 return;
4320 }
673a394b 4321
7e616158
CW
4322 if (obj_priv->mmap_offset)
4323 i915_gem_free_mmap_offset(obj);
de151cf6 4324
c397b908
DV
4325 drm_gem_object_release(obj);
4326
9a298b2a 4327 kfree(obj_priv->page_cpu_valid);
280b713b 4328 kfree(obj_priv->bit_17);
c397b908 4329 kfree(obj_priv);
673a394b
EA
4330}
4331
be72615b
CW
4332void i915_gem_free_object(struct drm_gem_object *obj)
4333{
4334 struct drm_device *dev = obj->dev;
4335 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4336
4337 trace_i915_gem_object_destroy(obj);
4338
4339 while (obj_priv->pin_count > 0)
4340 i915_gem_object_unpin(obj);
4341
4342 if (obj_priv->phys_obj)
4343 i915_gem_detach_phys_object(dev, obj);
4344
4345 i915_gem_free_object_tail(obj);
4346}
4347
29105ccc
CW
4348int
4349i915_gem_idle(struct drm_device *dev)
4350{
4351 drm_i915_private_t *dev_priv = dev->dev_private;
4352 int ret;
28dfe52a 4353
29105ccc 4354 mutex_lock(&dev->struct_mutex);
1c5d22f7 4355
8187a2b7 4356 if (dev_priv->mm.suspended ||
d1b851fc
ZN
4357 (dev_priv->render_ring.gem_object == NULL) ||
4358 (HAS_BSD(dev) &&
4359 dev_priv->bsd_ring.gem_object == NULL)) {
29105ccc
CW
4360 mutex_unlock(&dev->struct_mutex);
4361 return 0;
28dfe52a
EA
4362 }
4363
29105ccc 4364 ret = i915_gpu_idle(dev);
6dbe2772
KP
4365 if (ret) {
4366 mutex_unlock(&dev->struct_mutex);
673a394b 4367 return ret;
6dbe2772 4368 }
673a394b 4369
29105ccc
CW
4370 /* Under UMS, be paranoid and evict. */
4371 if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
b47eb4a2 4372 ret = i915_gem_evict_inactive(dev);
29105ccc
CW
4373 if (ret) {
4374 mutex_unlock(&dev->struct_mutex);
4375 return ret;
4376 }
4377 }
4378
4379 /* Hack! Don't let anybody do execbuf while we don't control the chip.
4380 * We need to replace this with a semaphore, or something.
4381 * And not confound mm.suspended!
4382 */
4383 dev_priv->mm.suspended = 1;
4384 del_timer(&dev_priv->hangcheck_timer);
4385
4386 i915_kernel_lost_context(dev);
6dbe2772 4387 i915_gem_cleanup_ringbuffer(dev);
29105ccc 4388
6dbe2772
KP
4389 mutex_unlock(&dev->struct_mutex);
4390
29105ccc
CW
4391 /* Cancel the retire work handler, which should be idle now. */
4392 cancel_delayed_work_sync(&dev_priv->mm.retire_work);
4393
673a394b
EA
4394 return 0;
4395}
4396
e552eb70
JB
4397/*
4398 * 965+ support PIPE_CONTROL commands, which provide finer grained control
4399 * over cache flushing.
4400 */
8187a2b7 4401static int
e552eb70
JB
4402i915_gem_init_pipe_control(struct drm_device *dev)
4403{
4404 drm_i915_private_t *dev_priv = dev->dev_private;
4405 struct drm_gem_object *obj;
4406 struct drm_i915_gem_object *obj_priv;
4407 int ret;
4408
34dc4d44 4409 obj = i915_gem_alloc_object(dev, 4096);
e552eb70
JB
4410 if (obj == NULL) {
4411 DRM_ERROR("Failed to allocate seqno page\n");
4412 ret = -ENOMEM;
4413 goto err;
4414 }
4415 obj_priv = to_intel_bo(obj);
4416 obj_priv->agp_type = AGP_USER_CACHED_MEMORY;
4417
4418 ret = i915_gem_object_pin(obj, 4096);
4419 if (ret)
4420 goto err_unref;
4421
4422 dev_priv->seqno_gfx_addr = obj_priv->gtt_offset;
4423 dev_priv->seqno_page = kmap(obj_priv->pages[0]);
4424 if (dev_priv->seqno_page == NULL)
4425 goto err_unpin;
4426
4427 dev_priv->seqno_obj = obj;
4428 memset(dev_priv->seqno_page, 0, PAGE_SIZE);
4429
4430 return 0;
4431
4432err_unpin:
4433 i915_gem_object_unpin(obj);
4434err_unref:
4435 drm_gem_object_unreference(obj);
4436err:
4437 return ret;
4438}
4439
8187a2b7
ZN
4440
4441static void
e552eb70
JB
4442i915_gem_cleanup_pipe_control(struct drm_device *dev)
4443{
4444 drm_i915_private_t *dev_priv = dev->dev_private;
4445 struct drm_gem_object *obj;
4446 struct drm_i915_gem_object *obj_priv;
4447
4448 obj = dev_priv->seqno_obj;
4449 obj_priv = to_intel_bo(obj);
4450 kunmap(obj_priv->pages[0]);
4451 i915_gem_object_unpin(obj);
4452 drm_gem_object_unreference(obj);
4453 dev_priv->seqno_obj = NULL;
4454
4455 dev_priv->seqno_page = NULL;
673a394b
EA
4456}
4457
8187a2b7
ZN
4458int
4459i915_gem_init_ringbuffer(struct drm_device *dev)
4460{
4461 drm_i915_private_t *dev_priv = dev->dev_private;
4462 int ret;
68f95ba9 4463
8187a2b7 4464 dev_priv->render_ring = render_ring;
68f95ba9 4465
8187a2b7
ZN
4466 if (!I915_NEED_GFX_HWS(dev)) {
4467 dev_priv->render_ring.status_page.page_addr
4468 = dev_priv->status_page_dmah->vaddr;
4469 memset(dev_priv->render_ring.status_page.page_addr,
4470 0, PAGE_SIZE);
4471 }
68f95ba9 4472
8187a2b7
ZN
4473 if (HAS_PIPE_CONTROL(dev)) {
4474 ret = i915_gem_init_pipe_control(dev);
4475 if (ret)
4476 return ret;
4477 }
68f95ba9 4478
8187a2b7 4479 ret = intel_init_ring_buffer(dev, &dev_priv->render_ring);
68f95ba9
CW
4480 if (ret)
4481 goto cleanup_pipe_control;
4482
4483 if (HAS_BSD(dev)) {
d1b851fc
ZN
4484 dev_priv->bsd_ring = bsd_ring;
4485 ret = intel_init_ring_buffer(dev, &dev_priv->bsd_ring);
68f95ba9
CW
4486 if (ret)
4487 goto cleanup_render_ring;
d1b851fc 4488 }
68f95ba9 4489
6f392d54
CW
4490 dev_priv->next_seqno = 1;
4491
68f95ba9
CW
4492 return 0;
4493
4494cleanup_render_ring:
4495 intel_cleanup_ring_buffer(dev, &dev_priv->render_ring);
4496cleanup_pipe_control:
4497 if (HAS_PIPE_CONTROL(dev))
4498 i915_gem_cleanup_pipe_control(dev);
8187a2b7
ZN
4499 return ret;
4500}
4501
4502void
4503i915_gem_cleanup_ringbuffer(struct drm_device *dev)
4504{
4505 drm_i915_private_t *dev_priv = dev->dev_private;
4506
4507 intel_cleanup_ring_buffer(dev, &dev_priv->render_ring);
d1b851fc
ZN
4508 if (HAS_BSD(dev))
4509 intel_cleanup_ring_buffer(dev, &dev_priv->bsd_ring);
8187a2b7
ZN
4510 if (HAS_PIPE_CONTROL(dev))
4511 i915_gem_cleanup_pipe_control(dev);
4512}
4513
673a394b
EA
4514int
4515i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
4516 struct drm_file *file_priv)
4517{
4518 drm_i915_private_t *dev_priv = dev->dev_private;
4519 int ret;
4520
79e53945
JB
4521 if (drm_core_check_feature(dev, DRIVER_MODESET))
4522 return 0;
4523
ba1234d1 4524 if (atomic_read(&dev_priv->mm.wedged)) {
673a394b 4525 DRM_ERROR("Reenabling wedged hardware, good luck\n");
ba1234d1 4526 atomic_set(&dev_priv->mm.wedged, 0);
673a394b
EA
4527 }
4528
673a394b 4529 mutex_lock(&dev->struct_mutex);
9bb2d6f9
EA
4530 dev_priv->mm.suspended = 0;
4531
4532 ret = i915_gem_init_ringbuffer(dev);
d816f6ac
WF
4533 if (ret != 0) {
4534 mutex_unlock(&dev->struct_mutex);
9bb2d6f9 4535 return ret;
d816f6ac 4536 }
9bb2d6f9 4537
5e118f41 4538 spin_lock(&dev_priv->mm.active_list_lock);
852835f3 4539 BUG_ON(!list_empty(&dev_priv->render_ring.active_list));
d1b851fc 4540 BUG_ON(HAS_BSD(dev) && !list_empty(&dev_priv->bsd_ring.active_list));
5e118f41
CW
4541 spin_unlock(&dev_priv->mm.active_list_lock);
4542
673a394b
EA
4543 BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
4544 BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
852835f3 4545 BUG_ON(!list_empty(&dev_priv->render_ring.request_list));
d1b851fc 4546 BUG_ON(HAS_BSD(dev) && !list_empty(&dev_priv->bsd_ring.request_list));
673a394b 4547 mutex_unlock(&dev->struct_mutex);
dbb19d30 4548
5f35308b
CW
4549 ret = drm_irq_install(dev);
4550 if (ret)
4551 goto cleanup_ringbuffer;
dbb19d30 4552
673a394b 4553 return 0;
5f35308b
CW
4554
4555cleanup_ringbuffer:
4556 mutex_lock(&dev->struct_mutex);
4557 i915_gem_cleanup_ringbuffer(dev);
4558 dev_priv->mm.suspended = 1;
4559 mutex_unlock(&dev->struct_mutex);
4560
4561 return ret;
673a394b
EA
4562}
4563
4564int
4565i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
4566 struct drm_file *file_priv)
4567{
79e53945
JB
4568 if (drm_core_check_feature(dev, DRIVER_MODESET))
4569 return 0;
4570
dbb19d30 4571 drm_irq_uninstall(dev);
e6890f6f 4572 return i915_gem_idle(dev);
673a394b
EA
4573}
4574
4575void
4576i915_gem_lastclose(struct drm_device *dev)
4577{
4578 int ret;
673a394b 4579
e806b495
EA
4580 if (drm_core_check_feature(dev, DRIVER_MODESET))
4581 return;
4582
6dbe2772
KP
4583 ret = i915_gem_idle(dev);
4584 if (ret)
4585 DRM_ERROR("failed to idle hardware: %d\n", ret);
673a394b
EA
4586}
4587
4588void
4589i915_gem_load(struct drm_device *dev)
4590{
b5aa8a0f 4591 int i;
673a394b
EA
4592 drm_i915_private_t *dev_priv = dev->dev_private;
4593
5e118f41 4594 spin_lock_init(&dev_priv->mm.active_list_lock);
673a394b 4595 INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
99fcb766 4596 INIT_LIST_HEAD(&dev_priv->mm.gpu_write_list);
673a394b 4597 INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
a09ba7fa 4598 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
be72615b 4599 INIT_LIST_HEAD(&dev_priv->mm.deferred_free_list);
852835f3
ZN
4600 INIT_LIST_HEAD(&dev_priv->render_ring.active_list);
4601 INIT_LIST_HEAD(&dev_priv->render_ring.request_list);
d1b851fc
ZN
4602 if (HAS_BSD(dev)) {
4603 INIT_LIST_HEAD(&dev_priv->bsd_ring.active_list);
4604 INIT_LIST_HEAD(&dev_priv->bsd_ring.request_list);
4605 }
007cc8ac
DV
4606 for (i = 0; i < 16; i++)
4607 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
673a394b
EA
4608 INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
4609 i915_gem_retire_work_handler);
31169714
CW
4610 spin_lock(&shrink_list_lock);
4611 list_add(&dev_priv->mm.shrink_list, &shrink_list);
4612 spin_unlock(&shrink_list_lock);
4613
94400120
DA
4614 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
4615 if (IS_GEN3(dev)) {
4616 u32 tmp = I915_READ(MI_ARB_STATE);
4617 if (!(tmp & MI_ARB_C3_LP_WRITE_ENABLE)) {
4618 /* arb state is a masked write, so set bit + bit in mask */
4619 tmp = MI_ARB_C3_LP_WRITE_ENABLE | (MI_ARB_C3_LP_WRITE_ENABLE << MI_ARB_MASK_SHIFT);
4620 I915_WRITE(MI_ARB_STATE, tmp);
4621 }
4622 }
4623
de151cf6 4624 /* Old X drivers will take 0-2 for front, back, depth buffers */
b397c836
EA
4625 if (!drm_core_check_feature(dev, DRIVER_MODESET))
4626 dev_priv->fence_reg_start = 3;
de151cf6 4627
0f973f27 4628 if (IS_I965G(dev) || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
de151cf6
JB
4629 dev_priv->num_fence_regs = 16;
4630 else
4631 dev_priv->num_fence_regs = 8;
4632
b5aa8a0f
GH
4633 /* Initialize fence registers to zero */
4634 if (IS_I965G(dev)) {
4635 for (i = 0; i < 16; i++)
4636 I915_WRITE64(FENCE_REG_965_0 + (i * 8), 0);
4637 } else {
4638 for (i = 0; i < 8; i++)
4639 I915_WRITE(FENCE_REG_830_0 + (i * 4), 0);
4640 if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
4641 for (i = 0; i < 8; i++)
4642 I915_WRITE(FENCE_REG_945_8 + (i * 4), 0);
4643 }
673a394b 4644 i915_gem_detect_bit_6_swizzle(dev);
6b95a207 4645 init_waitqueue_head(&dev_priv->pending_flip_queue);
673a394b 4646}
71acb5eb
DA
4647
4648/*
4649 * Create a physically contiguous memory object for this object
4650 * e.g. for cursor + overlay regs
4651 */
4652int i915_gem_init_phys_object(struct drm_device *dev,
4653 int id, int size)
4654{
4655 drm_i915_private_t *dev_priv = dev->dev_private;
4656 struct drm_i915_gem_phys_object *phys_obj;
4657 int ret;
4658
4659 if (dev_priv->mm.phys_objs[id - 1] || !size)
4660 return 0;
4661
9a298b2a 4662 phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL);
71acb5eb
DA
4663 if (!phys_obj)
4664 return -ENOMEM;
4665
4666 phys_obj->id = id;
4667
e6be8d9d 4668 phys_obj->handle = drm_pci_alloc(dev, size, 0);
71acb5eb
DA
4669 if (!phys_obj->handle) {
4670 ret = -ENOMEM;
4671 goto kfree_obj;
4672 }
4673#ifdef CONFIG_X86
4674 set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4675#endif
4676
4677 dev_priv->mm.phys_objs[id - 1] = phys_obj;
4678
4679 return 0;
4680kfree_obj:
9a298b2a 4681 kfree(phys_obj);
71acb5eb
DA
4682 return ret;
4683}
4684
4685void i915_gem_free_phys_object(struct drm_device *dev, int id)
4686{
4687 drm_i915_private_t *dev_priv = dev->dev_private;
4688 struct drm_i915_gem_phys_object *phys_obj;
4689
4690 if (!dev_priv->mm.phys_objs[id - 1])
4691 return;
4692
4693 phys_obj = dev_priv->mm.phys_objs[id - 1];
4694 if (phys_obj->cur_obj) {
4695 i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
4696 }
4697
4698#ifdef CONFIG_X86
4699 set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4700#endif
4701 drm_pci_free(dev, phys_obj->handle);
4702 kfree(phys_obj);
4703 dev_priv->mm.phys_objs[id - 1] = NULL;
4704}
4705
4706void i915_gem_free_all_phys_object(struct drm_device *dev)
4707{
4708 int i;
4709
260883c8 4710 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
71acb5eb
DA
4711 i915_gem_free_phys_object(dev, i);
4712}
4713
4714void i915_gem_detach_phys_object(struct drm_device *dev,
4715 struct drm_gem_object *obj)
4716{
4717 struct drm_i915_gem_object *obj_priv;
4718 int i;
4719 int ret;
4720 int page_count;
4721
23010e43 4722 obj_priv = to_intel_bo(obj);
71acb5eb
DA
4723 if (!obj_priv->phys_obj)
4724 return;
4725
4bdadb97 4726 ret = i915_gem_object_get_pages(obj, 0);
71acb5eb
DA
4727 if (ret)
4728 goto out;
4729
4730 page_count = obj->size / PAGE_SIZE;
4731
4732 for (i = 0; i < page_count; i++) {
856fa198 4733 char *dst = kmap_atomic(obj_priv->pages[i], KM_USER0);
71acb5eb
DA
4734 char *src = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE);
4735
4736 memcpy(dst, src, PAGE_SIZE);
4737 kunmap_atomic(dst, KM_USER0);
4738 }
856fa198 4739 drm_clflush_pages(obj_priv->pages, page_count);
71acb5eb 4740 drm_agp_chipset_flush(dev);
d78b47b9
CW
4741
4742 i915_gem_object_put_pages(obj);
71acb5eb
DA
4743out:
4744 obj_priv->phys_obj->cur_obj = NULL;
4745 obj_priv->phys_obj = NULL;
4746}
4747
4748int
4749i915_gem_attach_phys_object(struct drm_device *dev,
4750 struct drm_gem_object *obj, int id)
4751{
4752 drm_i915_private_t *dev_priv = dev->dev_private;
4753 struct drm_i915_gem_object *obj_priv;
4754 int ret = 0;
4755 int page_count;
4756 int i;
4757
4758 if (id > I915_MAX_PHYS_OBJECT)
4759 return -EINVAL;
4760
23010e43 4761 obj_priv = to_intel_bo(obj);
71acb5eb
DA
4762
4763 if (obj_priv->phys_obj) {
4764 if (obj_priv->phys_obj->id == id)
4765 return 0;
4766 i915_gem_detach_phys_object(dev, obj);
4767 }
4768
4769
4770 /* create a new object */
4771 if (!dev_priv->mm.phys_objs[id - 1]) {
4772 ret = i915_gem_init_phys_object(dev, id,
4773 obj->size);
4774 if (ret) {
aeb565df 4775 DRM_ERROR("failed to init phys object %d size: %zu\n", id, obj->size);
71acb5eb
DA
4776 goto out;
4777 }
4778 }
4779
4780 /* bind to the object */
4781 obj_priv->phys_obj = dev_priv->mm.phys_objs[id - 1];
4782 obj_priv->phys_obj->cur_obj = obj;
4783
4bdadb97 4784 ret = i915_gem_object_get_pages(obj, 0);
71acb5eb
DA
4785 if (ret) {
4786 DRM_ERROR("failed to get page list\n");
4787 goto out;
4788 }
4789
4790 page_count = obj->size / PAGE_SIZE;
4791
4792 for (i = 0; i < page_count; i++) {
856fa198 4793 char *src = kmap_atomic(obj_priv->pages[i], KM_USER0);
71acb5eb
DA
4794 char *dst = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE);
4795
4796 memcpy(dst, src, PAGE_SIZE);
4797 kunmap_atomic(src, KM_USER0);
4798 }
4799
d78b47b9
CW
4800 i915_gem_object_put_pages(obj);
4801
71acb5eb
DA
4802 return 0;
4803out:
4804 return ret;
4805}
4806
4807static int
4808i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
4809 struct drm_i915_gem_pwrite *args,
4810 struct drm_file *file_priv)
4811{
23010e43 4812 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
71acb5eb
DA
4813 void *obj_addr;
4814 int ret;
4815 char __user *user_data;
4816
4817 user_data = (char __user *) (uintptr_t) args->data_ptr;
4818 obj_addr = obj_priv->phys_obj->handle->vaddr + args->offset;
4819
44d98a61 4820 DRM_DEBUG_DRIVER("obj_addr %p, %lld\n", obj_addr, args->size);
71acb5eb
DA
4821 ret = copy_from_user(obj_addr, user_data, args->size);
4822 if (ret)
4823 return -EFAULT;
4824
4825 drm_agp_chipset_flush(dev);
4826 return 0;
4827}
b962442e
EA
4828
4829void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv)
4830{
4831 struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
4832
4833 /* Clean up our request list when the client is going away, so that
4834 * later retire_requests won't dereference our soon-to-be-gone
4835 * file_priv.
4836 */
4837 mutex_lock(&dev->struct_mutex);
4838 while (!list_empty(&i915_file_priv->mm.request_list))
4839 list_del_init(i915_file_priv->mm.request_list.next);
4840 mutex_unlock(&dev->struct_mutex);
4841}
31169714 4842
1637ef41
CW
4843static int
4844i915_gpu_is_active(struct drm_device *dev)
4845{
4846 drm_i915_private_t *dev_priv = dev->dev_private;
4847 int lists_empty;
4848
4849 spin_lock(&dev_priv->mm.active_list_lock);
4850 lists_empty = list_empty(&dev_priv->mm.flushing_list) &&
852835f3 4851 list_empty(&dev_priv->render_ring.active_list);
d1b851fc
ZN
4852 if (HAS_BSD(dev))
4853 lists_empty &= list_empty(&dev_priv->bsd_ring.active_list);
1637ef41
CW
4854 spin_unlock(&dev_priv->mm.active_list_lock);
4855
4856 return !lists_empty;
4857}
4858
31169714 4859static int
7f8275d0 4860i915_gem_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
31169714
CW
4861{
4862 drm_i915_private_t *dev_priv, *next_dev;
4863 struct drm_i915_gem_object *obj_priv, *next_obj;
4864 int cnt = 0;
4865 int would_deadlock = 1;
4866
4867 /* "fast-path" to count number of available objects */
4868 if (nr_to_scan == 0) {
4869 spin_lock(&shrink_list_lock);
4870 list_for_each_entry(dev_priv, &shrink_list, mm.shrink_list) {
4871 struct drm_device *dev = dev_priv->dev;
4872
4873 if (mutex_trylock(&dev->struct_mutex)) {
4874 list_for_each_entry(obj_priv,
4875 &dev_priv->mm.inactive_list,
4876 list)
4877 cnt++;
4878 mutex_unlock(&dev->struct_mutex);
4879 }
4880 }
4881 spin_unlock(&shrink_list_lock);
4882
4883 return (cnt / 100) * sysctl_vfs_cache_pressure;
4884 }
4885
4886 spin_lock(&shrink_list_lock);
4887
1637ef41 4888rescan:
31169714
CW
4889 /* first scan for clean buffers */
4890 list_for_each_entry_safe(dev_priv, next_dev,
4891 &shrink_list, mm.shrink_list) {
4892 struct drm_device *dev = dev_priv->dev;
4893
4894 if (! mutex_trylock(&dev->struct_mutex))
4895 continue;
4896
4897 spin_unlock(&shrink_list_lock);
b09a1fec 4898 i915_gem_retire_requests(dev);
31169714
CW
4899
4900 list_for_each_entry_safe(obj_priv, next_obj,
4901 &dev_priv->mm.inactive_list,
4902 list) {
4903 if (i915_gem_object_is_purgeable(obj_priv)) {
a8089e84 4904 i915_gem_object_unbind(&obj_priv->base);
31169714
CW
4905 if (--nr_to_scan <= 0)
4906 break;
4907 }
4908 }
4909
4910 spin_lock(&shrink_list_lock);
4911 mutex_unlock(&dev->struct_mutex);
4912
963b4836
CW
4913 would_deadlock = 0;
4914
31169714
CW
4915 if (nr_to_scan <= 0)
4916 break;
4917 }
4918
4919 /* second pass, evict/count anything still on the inactive list */
4920 list_for_each_entry_safe(dev_priv, next_dev,
4921 &shrink_list, mm.shrink_list) {
4922 struct drm_device *dev = dev_priv->dev;
4923
4924 if (! mutex_trylock(&dev->struct_mutex))
4925 continue;
4926
4927 spin_unlock(&shrink_list_lock);
4928
4929 list_for_each_entry_safe(obj_priv, next_obj,
4930 &dev_priv->mm.inactive_list,
4931 list) {
4932 if (nr_to_scan > 0) {
a8089e84 4933 i915_gem_object_unbind(&obj_priv->base);
31169714
CW
4934 nr_to_scan--;
4935 } else
4936 cnt++;
4937 }
4938
4939 spin_lock(&shrink_list_lock);
4940 mutex_unlock(&dev->struct_mutex);
4941
4942 would_deadlock = 0;
4943 }
4944
1637ef41
CW
4945 if (nr_to_scan) {
4946 int active = 0;
4947
4948 /*
4949 * We are desperate for pages, so as a last resort, wait
4950 * for the GPU to finish and discard whatever we can.
4951 * This has a dramatic impact to reduce the number of
4952 * OOM-killer events whilst running the GPU aggressively.
4953 */
4954 list_for_each_entry(dev_priv, &shrink_list, mm.shrink_list) {
4955 struct drm_device *dev = dev_priv->dev;
4956
4957 if (!mutex_trylock(&dev->struct_mutex))
4958 continue;
4959
4960 spin_unlock(&shrink_list_lock);
4961
4962 if (i915_gpu_is_active(dev)) {
4963 i915_gpu_idle(dev);
4964 active++;
4965 }
4966
4967 spin_lock(&shrink_list_lock);
4968 mutex_unlock(&dev->struct_mutex);
4969 }
4970
4971 if (active)
4972 goto rescan;
4973 }
4974
31169714
CW
4975 spin_unlock(&shrink_list_lock);
4976
4977 if (would_deadlock)
4978 return -1;
4979 else if (cnt > 0)
4980 return (cnt / 100) * sysctl_vfs_cache_pressure;
4981 else
4982 return 0;
4983}
4984
4985static struct shrinker shrinker = {
4986 .shrink = i915_gem_shrink,
4987 .seeks = DEFAULT_SEEKS,
4988};
4989
4990__init void
4991i915_gem_shrinker_init(void)
4992{
4993 register_shrinker(&shrinker);
4994}
4995
4996__exit void
4997i915_gem_shrinker_exit(void)
4998{
4999 unregister_shrinker(&shrinker);
5000}