drm/amdgpu: return -ENODEV to user space when vram is lost v2
authorChunming Zhou <David1.Zhou@amd.com>
Mon, 15 May 2017 08:48:27 +0000 (16:48 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 24 May 2017 22:11:52 +0000 (18:11 -0400)
below ioctl will return -ENODEV:
amdgpu_cs_ioctl
amdgpu_cs_wait_ioctl
amdgpu_cs_wait_fences_ioctl
amdgpu_gem_va_ioctl
amdgpu_info_ioctl

v2: only for map and replace cases in amdgpu_gem_va_ioctl

Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

index ec9774c0d1d95fa8845981afe848b3b4bb5d16fa..333413f481654d474def35654d2cadb8136f10a1 100644 (file)
@@ -824,6 +824,7 @@ struct amdgpu_fpriv {
        struct mutex            bo_list_lock;
        struct idr              bo_list_handles;
        struct amdgpu_ctx_mgr   ctx_mgr;
+       u32                     vram_lost_counter;
 };
 
 /*
@@ -1528,6 +1529,7 @@ struct amdgpu_device {
        atomic64_t                      num_bytes_moved;
        atomic64_t                      num_evictions;
        atomic_t                        gpu_reset_counter;
+       atomic_t                        vram_lost_counter;
 
        /* data for buffer migration throttling */
        struct {
@@ -1914,6 +1916,8 @@ static inline bool amdgpu_has_atpx(void) { return false; }
 extern const struct drm_ioctl_desc amdgpu_ioctls_kms[];
 extern const int amdgpu_max_kms_ioctl;
 
+bool amdgpu_kms_vram_lost(struct amdgpu_device *adev,
+                         struct amdgpu_fpriv *fpriv);
 int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags);
 void amdgpu_driver_unload_kms(struct drm_device *dev);
 void amdgpu_driver_lastclose_kms(struct drm_device *dev);
index 1375a896e87b95dc48746bb8ef6363d71d6f97b7..9a0b2e5f3c116fc2feeb9e42ba155c0b696f2e3e 100644 (file)
@@ -1097,6 +1097,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 {
        struct amdgpu_device *adev = dev->dev_private;
+       struct amdgpu_fpriv *fpriv = filp->driver_priv;
        union drm_amdgpu_cs *cs = data;
        struct amdgpu_cs_parser parser = {};
        bool reserved_buffers = false;
@@ -1104,6 +1105,8 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 
        if (!adev->accel_working)
                return -EBUSY;
+       if (amdgpu_kms_vram_lost(adev, fpriv))
+               return -ENODEV;
 
        parser.adev = adev;
        parser.filp = filp;
@@ -1165,12 +1168,15 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
 {
        union drm_amdgpu_wait_cs *wait = data;
        struct amdgpu_device *adev = dev->dev_private;
+       struct amdgpu_fpriv *fpriv = filp->driver_priv;
        unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
        struct amdgpu_ring *ring = NULL;
        struct amdgpu_ctx *ctx;
        struct dma_fence *fence;
        long r;
 
+       if (amdgpu_kms_vram_lost(adev, fpriv))
+               return -ENODEV;
        r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance,
                               wait->in.ring, &ring);
        if (r)
@@ -1344,12 +1350,15 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
                                struct drm_file *filp)
 {
        struct amdgpu_device *adev = dev->dev_private;
+       struct amdgpu_fpriv *fpriv = filp->driver_priv;
        union drm_amdgpu_wait_fences *wait = data;
        uint32_t fence_count = wait->in.fence_count;
        struct drm_amdgpu_fence *fences_user;
        struct drm_amdgpu_fence *fences;
        int r;
 
+       if (amdgpu_kms_vram_lost(adev, fpriv))
+               return -ENODEV;
        /* Get the fences from userspace */
        fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence),
                        GFP_KERNEL);
index 5a170071702a3d205879e48ed80e1f21c4ed12ae..794e14d8e906408136e203fa9192b1f82cfb7a8c 100644 (file)
@@ -2839,8 +2839,10 @@ retry:
                        if (r)
                                goto out;
                        vram_lost = amdgpu_check_vram_lost(adev);
-                       if (vram_lost)
+                       if (vram_lost) {
                                DRM_ERROR("VRAM is lost!\n");
+                               atomic_inc(&adev->vram_lost_counter);
+                       }
                        r = amdgpu_ttm_recover_gart(adev);
                        if (r)
                                goto out;
index 92e9248ea8b1d640f04a6bfb0ada0bc5e58b2998..7d1bb44c013636580595c888d6899860e9d2b52e 100644 (file)
@@ -597,6 +597,11 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
                        args->operation);
                return -EINVAL;
        }
+       if ((args->operation == AMDGPU_VA_OP_MAP) ||
+           (args->operation == AMDGPU_VA_OP_REPLACE)) {
+               if (amdgpu_kms_vram_lost(adev, fpriv))
+                       return -ENODEV;
+       }
 
        INIT_LIST_HEAD(&list);
        if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
index 62f0a9591bb5932f32a929bf7912fb93ba6a11b3..b324f07f137a2d3396072bb598b15826f3f3d093 100644 (file)
@@ -235,6 +235,7 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
 static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 {
        struct amdgpu_device *adev = dev->dev_private;
+       struct amdgpu_fpriv *fpriv = filp->driver_priv;
        struct drm_amdgpu_info *info = data;
        struct amdgpu_mode_info *minfo = &adev->mode_info;
        void __user *out = (void __user *)(uintptr_t)info->return_pointer;
@@ -247,6 +248,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 
        if (!info->return_size || !info->return_pointer)
                return -EINVAL;
+       if (amdgpu_kms_vram_lost(adev, fpriv))
+               return -ENODEV;
 
        switch (info->query) {
        case AMDGPU_INFO_ACCEL_WORKING:
@@ -747,6 +750,12 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev)
        vga_switcheroo_process_delayed_switch();
 }
 
+bool amdgpu_kms_vram_lost(struct amdgpu_device *adev,
+                         struct amdgpu_fpriv *fpriv)
+{
+       return fpriv->vram_lost_counter != atomic_read(&adev->vram_lost_counter);
+}
+
 /**
  * amdgpu_driver_open_kms - drm callback for open
  *
@@ -799,6 +808,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 
        amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
 
+       fpriv->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
        file_priv->driver_priv = fpriv;
 
 out_suspend: