drm/amdgpu: add initial vcn support and decode tests
authorLeo Liu <leo.liu@amd.com>
Wed, 21 Dec 2016 18:21:52 +0000 (13:21 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 24 May 2017 21:41:22 +0000 (17:41 -0400)
VCN is the new media block on Raven. Add core support
and the ring and ib tests for decode.

Signed-off-by: Leo Liu <leo.liu@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/Makefile
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c [new file with mode: 0644]
drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h [new file with mode: 0644]

index 684626ba286cf87d2edbba58b865298fc8d5269d..105141486a612f4b8daca1963f384ec179a3db18 100644 (file)
@@ -93,6 +93,10 @@ amdgpu-y += \
        vce_v3_0.o \
        vce_v4_0.o
 
+# add VCN block
+amdgpu-y += \
+       amdgpu_vcn.o
+
 # add amdkfd interfaces
 amdgpu-y += \
         amdgpu_amdkfd.o \
index a2dd218e35b9e07dcbe3ce3c20c94907a642b878..68817e60e7870e37741db0eec7ea336c56a14052 100644 (file)
@@ -1180,6 +1180,31 @@ void amdgpu_wb_free_64bit(struct amdgpu_device *adev, u32 wb);
 
 void amdgpu_get_pcie_info(struct amdgpu_device *adev);
 
+/*
+ * VCN
+ */
+#define AMDGPU_VCN_STACK_SIZE          (200*1024)
+#define AMDGPU_VCN_HEAP_SIZE           (256*1024)
+#define AMDGPU_VCN_SESSION_SIZE                (50*1024)
+#define AMDGPU_VCN_FIRMWARE_OFFSET     256
+#define AMDGPU_VCN_MAX_ENC_RINGS       3
+
+struct amdgpu_vcn {
+       struct amdgpu_bo        *vcpu_bo;
+       void                    *cpu_addr;
+       uint64_t                gpu_addr;
+       unsigned                fw_version;
+       void                    *saved_bo;
+       struct delayed_work     idle_work;
+       const struct firmware   *fw;    /* VCN firmware */
+       struct amdgpu_ring      ring_dec;
+       struct amdgpu_ring      ring_enc[AMDGPU_VCN_MAX_ENC_RINGS];
+       struct amdgpu_irq_src   irq;
+       struct amd_sched_entity entity_dec;
+       struct amd_sched_entity entity_enc;
+       uint32_t                srbm_soft_reset;
+};
+
 /*
  * SDMA
  */
@@ -1572,11 +1597,18 @@ struct amdgpu_device {
        /* sdma */
        struct amdgpu_sdma              sdma;
 
-       /* uvd */
-       struct amdgpu_uvd               uvd;
+       union {
+               struct {
+                       /* uvd */
+                       struct amdgpu_uvd               uvd;
+
+                       /* vce */
+                       struct amdgpu_vce               vce;
+               };
 
-       /* vce */
-       struct amdgpu_vce               vce;
+               /* vcn */
+               struct amdgpu_vcn               vcn;
+       };
 
        /* firmwares */
        struct amdgpu_firmware          firmware;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
new file mode 100644 (file)
index 0000000..97b09b6
--- /dev/null
@@ -0,0 +1,425 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm.h>
+
+#include "amdgpu.h"
+#include "amdgpu_pm.h"
+#include "amdgpu_vcn.h"
+#include "soc15d.h"
+#include "soc15_common.h"
+
+#include "vega10/soc15ip.h"
+#include "raven1/VCN/vcn_1_0_offset.h"
+
+/* 1 second timeout */
+#define VCN_IDLE_TIMEOUT       msecs_to_jiffies(1000)
+
+/* Firmware Names */
+#define FIRMWARE_RAVEN         "amdgpu/raven_vcn.bin"
+
+MODULE_FIRMWARE(FIRMWARE_RAVEN);
+
+static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
+
+int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
+{
+       struct amdgpu_ring *ring;
+       struct amd_sched_rq *rq;
+       unsigned long bo_size;
+       const char *fw_name;
+       const struct common_firmware_header *hdr;
+       unsigned version_major, version_minor, family_id;
+       int r;
+
+       INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);
+
+       switch (adev->asic_type) {
+       case CHIP_RAVEN:
+               fw_name = FIRMWARE_RAVEN;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       r = request_firmware(&adev->vcn.fw, fw_name, adev->dev);
+       if (r) {
+               dev_err(adev->dev, "amdgpu_vcn: Can't load firmware \"%s\"\n",
+                       fw_name);
+               return r;
+       }
+
+       r = amdgpu_ucode_validate(adev->vcn.fw);
+       if (r) {
+               dev_err(adev->dev, "amdgpu_vcn: Can't validate firmware \"%s\"\n",
+                       fw_name);
+               release_firmware(adev->vcn.fw);
+               adev->vcn.fw = NULL;
+               return r;
+       }
+
+       hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
+       family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
+       version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
+       version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
+       DRM_INFO("Found VCN firmware Version: %hu.%hu Family ID: %hu\n",
+               version_major, version_minor, family_id);
+
+
+       bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8)
+                 +  AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_HEAP_SIZE
+                 +  AMDGPU_VCN_SESSION_SIZE * 40;
+       r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
+                                   AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.vcpu_bo,
+                                   &adev->vcn.gpu_addr, &adev->vcn.cpu_addr);
+       if (r) {
+               dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
+               return r;
+       }
+
+       ring = &adev->vcn.ring_dec;
+       rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
+       r = amd_sched_entity_init(&ring->sched, &adev->vcn.entity_dec,
+                                 rq, amdgpu_sched_jobs);
+       if (r != 0) {
+               DRM_ERROR("Failed setting up VCN dec run queue.\n");
+               return r;
+       }
+
+       return 0;
+}
+
+int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
+{
+       kfree(adev->vcn.saved_bo);
+
+       amd_sched_entity_fini(&adev->vcn.ring_dec.sched, &adev->vcn.entity_dec);
+
+       amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo,
+                             &adev->vcn.gpu_addr,
+                             (void **)&adev->vcn.cpu_addr);
+
+       amdgpu_ring_fini(&adev->vcn.ring_dec);
+
+       release_firmware(adev->vcn.fw);
+
+       return 0;
+}
+
+int amdgpu_vcn_suspend(struct amdgpu_device *adev)
+{
+       unsigned size;
+       void *ptr;
+
+       if (adev->vcn.vcpu_bo == NULL)
+               return 0;
+
+       cancel_delayed_work_sync(&adev->vcn.idle_work);
+
+       size = amdgpu_bo_size(adev->vcn.vcpu_bo);
+       ptr = adev->vcn.cpu_addr;
+
+       adev->vcn.saved_bo = kmalloc(size, GFP_KERNEL);
+       if (!adev->vcn.saved_bo)
+               return -ENOMEM;
+
+       memcpy_fromio(adev->vcn.saved_bo, ptr, size);
+
+       return 0;
+}
+
+int amdgpu_vcn_resume(struct amdgpu_device *adev)
+{
+       unsigned size;
+       void *ptr;
+
+       if (adev->vcn.vcpu_bo == NULL)
+               return -EINVAL;
+
+       size = amdgpu_bo_size(adev->vcn.vcpu_bo);
+       ptr = adev->vcn.cpu_addr;
+
+       if (adev->vcn.saved_bo != NULL) {
+               memcpy_toio(ptr, adev->vcn.saved_bo, size);
+               kfree(adev->vcn.saved_bo);
+               adev->vcn.saved_bo = NULL;
+       } else {
+               const struct common_firmware_header *hdr;
+               unsigned offset;
+
+               hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
+               offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
+               memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset,
+                           le32_to_cpu(hdr->ucode_size_bytes));
+               size -= le32_to_cpu(hdr->ucode_size_bytes);
+               ptr += le32_to_cpu(hdr->ucode_size_bytes);
+               memset_io(ptr, 0, size);
+       }
+
+       return 0;
+}
+
+static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
+                              bool direct, struct dma_fence **fence)
+{
+       struct ttm_validate_buffer tv;
+       struct ww_acquire_ctx ticket;
+       struct list_head head;
+       struct amdgpu_job *job;
+       struct amdgpu_ib *ib;
+       struct dma_fence *f = NULL;
+       struct amdgpu_device *adev = ring->adev;
+       uint64_t addr;
+       int i, r;
+
+       memset(&tv, 0, sizeof(tv));
+       tv.bo = &bo->tbo;
+
+       INIT_LIST_HEAD(&head);
+       list_add(&tv.head, &head);
+
+       r = ttm_eu_reserve_buffers(&ticket, &head, true, NULL);
+       if (r)
+               return r;
+
+       r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
+       if (r)
+               goto err;
+
+       r = amdgpu_job_alloc_with_ib(adev, 64, &job);
+       if (r)
+               goto err;
+
+       ib = &job->ibs[0];
+       addr = amdgpu_bo_gpu_offset(bo);
+       ib->ptr[0] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0);
+       ib->ptr[1] = addr;
+       ib->ptr[2] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0);
+       ib->ptr[3] = addr >> 32;
+       ib->ptr[4] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0);
+       ib->ptr[5] = 0;
+       for (i = 6; i < 16; i += 2) {
+               ib->ptr[i] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0);
+               ib->ptr[i+1] = 0;
+       }
+       ib->length_dw = 16;
+
+       if (direct) {
+               r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
+               job->fence = dma_fence_get(f);
+               if (r)
+                       goto err_free;
+
+               amdgpu_job_free(job);
+       } else {
+               r = amdgpu_job_submit(job, ring, &adev->vcn.entity_dec,
+                                     AMDGPU_FENCE_OWNER_UNDEFINED, &f);
+               if (r)
+                       goto err_free;
+       }
+
+       ttm_eu_fence_buffer_objects(&ticket, &head, f);
+
+       if (fence)
+               *fence = dma_fence_get(f);
+       amdgpu_bo_unref(&bo);
+       dma_fence_put(f);
+
+       return 0;
+
+err_free:
+       amdgpu_job_free(job);
+
+err:
+       ttm_eu_backoff_reservation(&ticket, &head);
+       return r;
+}
+
+static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+                             struct dma_fence **fence)
+{
+       struct amdgpu_device *adev = ring->adev;
+       struct amdgpu_bo *bo;
+       uint32_t *msg;
+       int r, i;
+
+       r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true,
+                            AMDGPU_GEM_DOMAIN_VRAM,
+                            AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+                            AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
+                            NULL, NULL, &bo);
+       if (r)
+               return r;
+
+       r = amdgpu_bo_reserve(bo, false);
+       if (r) {
+               amdgpu_bo_unref(&bo);
+               return r;
+       }
+
+       r = amdgpu_bo_kmap(bo, (void **)&msg);
+       if (r) {
+               amdgpu_bo_unreserve(bo);
+               amdgpu_bo_unref(&bo);
+               return r;
+       }
+
+       /* stitch together an vcn create msg */
+       msg[0] = cpu_to_le32(0x00000de4);
+       msg[1] = cpu_to_le32(0x00000000);
+       msg[2] = cpu_to_le32(handle);
+       msg[3] = cpu_to_le32(0x00000000);
+       msg[4] = cpu_to_le32(0x00000000);
+       msg[5] = cpu_to_le32(0x00000000);
+       msg[6] = cpu_to_le32(0x00000000);
+       msg[7] = cpu_to_le32(0x00000780);
+       msg[8] = cpu_to_le32(0x00000440);
+       msg[9] = cpu_to_le32(0x00000000);
+       msg[10] = cpu_to_le32(0x01b37000);
+       for (i = 11; i < 1024; ++i)
+               msg[i] = cpu_to_le32(0x0);
+
+       amdgpu_bo_kunmap(bo);
+       amdgpu_bo_unreserve(bo);
+
+       return amdgpu_vcn_dec_send_msg(ring, bo, true, fence);
+}
+
+static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
+                              bool direct, struct dma_fence **fence)
+{
+       struct amdgpu_device *adev = ring->adev;
+       struct amdgpu_bo *bo;
+       uint32_t *msg;
+       int r, i;
+
+       r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true,
+                            AMDGPU_GEM_DOMAIN_VRAM,
+                            AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+                            AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
+                            NULL, NULL, &bo);
+       if (r)
+               return r;
+
+       r = amdgpu_bo_reserve(bo, false);
+       if (r) {
+               amdgpu_bo_unref(&bo);
+               return r;
+       }
+
+       r = amdgpu_bo_kmap(bo, (void **)&msg);
+       if (r) {
+               amdgpu_bo_unreserve(bo);
+               amdgpu_bo_unref(&bo);
+               return r;
+       }
+
+       /* stitch together an vcn destroy msg */
+       msg[0] = cpu_to_le32(0x00000de4);
+       msg[1] = cpu_to_le32(0x00000002);
+       msg[2] = cpu_to_le32(handle);
+       msg[3] = cpu_to_le32(0x00000000);
+       for (i = 4; i < 1024; ++i)
+               msg[i] = cpu_to_le32(0x0);
+
+       amdgpu_bo_kunmap(bo);
+       amdgpu_bo_unreserve(bo);
+
+       return amdgpu_vcn_dec_send_msg(ring, bo, direct, fence);
+}
+
+static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
+{
+       struct amdgpu_device *adev =
+               container_of(work, struct amdgpu_device, vcn.idle_work.work);
+       unsigned fences = amdgpu_fence_count_emitted(&adev->vcn.ring_dec);
+
+       if (fences == 0) {
+               if (adev->pm.dpm_enabled) {
+                       amdgpu_dpm_enable_uvd(adev, false);
+               } else {
+                       amdgpu_asic_set_uvd_clocks(adev, 0, 0);
+               }
+       } else {
+               schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
+       }
+}
+
+void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
+{
+       struct amdgpu_device *adev = ring->adev;
+       bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
+
+       if (set_clocks) {
+               if (adev->pm.dpm_enabled) {
+                       amdgpu_dpm_enable_uvd(adev, true);
+               } else {
+                       amdgpu_asic_set_uvd_clocks(adev, 53300, 40000);
+               }
+       }
+}
+
+void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
+{
+       schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
+}
+
+int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+       struct dma_fence *fence;
+       long r;
+
+       r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL);
+       if (r) {
+               DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
+               goto error;
+       }
+
+       r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, true, &fence);
+       if (r) {
+               DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
+               goto error;
+       }
+
+       r = dma_fence_wait_timeout(fence, false, timeout);
+       if (r == 0) {
+               DRM_ERROR("amdgpu: IB test timed out.\n");
+               r = -ETIMEDOUT;
+       } else if (r < 0) {
+               DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+       } else {
+               DRM_INFO("ib test on ring %d succeeded\n",  ring->idx);
+               r = 0;
+       }
+
+       dma_fence_put(fence);
+
+error:
+       return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
new file mode 100644 (file)
index 0000000..a32182c
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_VCN_H__
+#define __AMDGPU_VCN_H__
+
+int amdgpu_vcn_sw_init(struct amdgpu_device *adev);
+int amdgpu_vcn_sw_fini(struct amdgpu_device *adev);
+int amdgpu_vcn_suspend(struct amdgpu_device *adev);
+int amdgpu_vcn_resume(struct amdgpu_device *adev);
+void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring);
+void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring);
+int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+
+#endif