drm/i915: Fix infinite loop regression from 21dd3734
authorChris Wilson <chris@chris-wilson.co.uk>
Mon, 7 Feb 2011 13:09:31 +0000 (13:09 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Mon, 7 Feb 2011 14:33:55 +0000 (14:33 +0000)
By returning EAGAIN upon a wedged GPU before attempting to wait, we
would hit an infinite loop of repeating operation without ever
progressing. Instead this needs to be EIO so that userspace knows that
the GPU is truly wedged and not in the process of error recovery.

Similarly, we need to handle the error recovery during i915_gem_fault.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
drivers/gpu/drm/i915/i915_gem.c

index 52dd77b1bb7ceddaf34bd1a145aa51d6520de953..a41c0e7168057a71a7282e739e661888d4cbaaa3 100644 (file)
@@ -1171,9 +1171,11 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
        page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
                PAGE_SHIFT;
 
-       /* Now bind it into the GTT if needed */
-       mutex_lock(&dev->struct_mutex);
+       ret = i915_mutex_lock_interruptible(dev);
+       if (ret)
+               goto out;
 
+       /* Now bind it into the GTT if needed */
        if (!obj->map_and_fenceable) {
                ret = i915_gem_object_unbind(obj);
                if (ret)
@@ -1208,9 +1210,17 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
        ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
 unlock:
        mutex_unlock(&dev->struct_mutex);
-
+out:
        switch (ret) {
+       case -EIO:
        case -EAGAIN:
+               /* Give the error handler a chance to run and move the
+                * objects off the GPU active list. Next time we service the
+                * fault, we should be able to transition the page into the
+                * GTT without touching the GPU (and so avoid further
+                * EIO/EGAIN). If the GPU is wedged, then there is no issue
+                * with coherency, just lost writes.
+                */
                set_need_resched();
        case 0:
        case -ERESTARTSYS:
@@ -1981,8 +1991,18 @@ i915_do_wait_request(struct drm_device *dev, uint32_t seqno,
 
        BUG_ON(seqno == 0);
 
-       if (atomic_read(&dev_priv->mm.wedged))
-               return -EAGAIN;
+       if (atomic_read(&dev_priv->mm.wedged)) {
+               struct completion *x = &dev_priv->error_completion;
+               bool recovery_complete;
+               unsigned long flags;
+
+               /* Give the error handler a chance to run. */
+               spin_lock_irqsave(&x->wait.lock, flags);
+               recovery_complete = x->done > 0;
+               spin_unlock_irqrestore(&x->wait.lock, flags);
+
+               return recovery_complete ? -EIO : -EAGAIN;
+       }
 
        if (seqno == ring->outstanding_lazy_request) {
                struct drm_i915_gem_request *request;