2 * Copyright 2016 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 * The above copyright notice and this permission notice (including the
22 * next paragraph) shall be included in all copies or substantial portions
27 #include <linux/firmware.h>
28 #include <linux/module.h>
33 #include "amdgpu_pm.h"
34 #include "amdgpu_vcn.h"
36 #include "soc15_common.h"
38 #include "vega10/soc15ip.h"
39 #include "raven1/VCN/vcn_1_0_offset.h"
41 /* 1 second timeout */
42 #define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000)
45 #define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin"
47 MODULE_FIRMWARE(FIRMWARE_RAVEN
);
49 static void amdgpu_vcn_idle_work_handler(struct work_struct
*work
);
51 int amdgpu_vcn_sw_init(struct amdgpu_device
*adev
)
53 struct amdgpu_ring
*ring
;
54 struct amd_sched_rq
*rq
;
55 unsigned long bo_size
;
57 const struct common_firmware_header
*hdr
;
58 unsigned version_major
, version_minor
, family_id
;
61 INIT_DELAYED_WORK(&adev
->vcn
.idle_work
, amdgpu_vcn_idle_work_handler
);
63 switch (adev
->asic_type
) {
65 fw_name
= FIRMWARE_RAVEN
;
71 r
= request_firmware(&adev
->vcn
.fw
, fw_name
, adev
->dev
);
73 dev_err(adev
->dev
, "amdgpu_vcn: Can't load firmware \"%s\"\n",
78 r
= amdgpu_ucode_validate(adev
->vcn
.fw
);
80 dev_err(adev
->dev
, "amdgpu_vcn: Can't validate firmware \"%s\"\n",
82 release_firmware(adev
->vcn
.fw
);
87 hdr
= (const struct common_firmware_header
*)adev
->vcn
.fw
->data
;
88 adev
->vcn
.fw_version
= le32_to_cpu(hdr
->ucode_version
);
89 family_id
= le32_to_cpu(hdr
->ucode_version
) & 0xff;
90 version_major
= (le32_to_cpu(hdr
->ucode_version
) >> 24) & 0xff;
91 version_minor
= (le32_to_cpu(hdr
->ucode_version
) >> 8) & 0xff;
92 DRM_INFO("Found VCN firmware Version: %hu.%hu Family ID: %hu\n",
93 version_major
, version_minor
, family_id
);
96 bo_size
= AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr
->ucode_size_bytes
) + 8)
97 + AMDGPU_VCN_STACK_SIZE
+ AMDGPU_VCN_HEAP_SIZE
98 + AMDGPU_VCN_SESSION_SIZE
* 40;
99 r
= amdgpu_bo_create_kernel(adev
, bo_size
, PAGE_SIZE
,
100 AMDGPU_GEM_DOMAIN_VRAM
, &adev
->vcn
.vcpu_bo
,
101 &adev
->vcn
.gpu_addr
, &adev
->vcn
.cpu_addr
);
103 dev_err(adev
->dev
, "(%d) failed to allocate vcn bo\n", r
);
107 ring
= &adev
->vcn
.ring_dec
;
108 rq
= &ring
->sched
.sched_rq
[AMD_SCHED_PRIORITY_NORMAL
];
109 r
= amd_sched_entity_init(&ring
->sched
, &adev
->vcn
.entity_dec
,
110 rq
, amdgpu_sched_jobs
);
112 DRM_ERROR("Failed setting up VCN dec run queue.\n");
116 ring
= &adev
->vcn
.ring_enc
[0];
117 rq
= &ring
->sched
.sched_rq
[AMD_SCHED_PRIORITY_NORMAL
];
118 r
= amd_sched_entity_init(&ring
->sched
, &adev
->vcn
.entity_enc
,
119 rq
, amdgpu_sched_jobs
);
121 DRM_ERROR("Failed setting up VCN enc run queue.\n");
128 int amdgpu_vcn_sw_fini(struct amdgpu_device
*adev
)
132 kfree(adev
->vcn
.saved_bo
);
134 amd_sched_entity_fini(&adev
->vcn
.ring_dec
.sched
, &adev
->vcn
.entity_dec
);
136 amd_sched_entity_fini(&adev
->vcn
.ring_enc
[0].sched
, &adev
->vcn
.entity_enc
);
138 amdgpu_bo_free_kernel(&adev
->vcn
.vcpu_bo
,
140 (void **)&adev
->vcn
.cpu_addr
);
142 amdgpu_ring_fini(&adev
->vcn
.ring_dec
);
144 for (i
= 0; i
< adev
->vcn
.num_enc_rings
; ++i
)
145 amdgpu_ring_fini(&adev
->vcn
.ring_enc
[i
]);
147 release_firmware(adev
->vcn
.fw
);
152 int amdgpu_vcn_suspend(struct amdgpu_device
*adev
)
157 if (adev
->vcn
.vcpu_bo
== NULL
)
160 cancel_delayed_work_sync(&adev
->vcn
.idle_work
);
162 size
= amdgpu_bo_size(adev
->vcn
.vcpu_bo
);
163 ptr
= adev
->vcn
.cpu_addr
;
165 adev
->vcn
.saved_bo
= kmalloc(size
, GFP_KERNEL
);
166 if (!adev
->vcn
.saved_bo
)
169 memcpy_fromio(adev
->vcn
.saved_bo
, ptr
, size
);
174 int amdgpu_vcn_resume(struct amdgpu_device
*adev
)
179 if (adev
->vcn
.vcpu_bo
== NULL
)
182 size
= amdgpu_bo_size(adev
->vcn
.vcpu_bo
);
183 ptr
= adev
->vcn
.cpu_addr
;
185 if (adev
->vcn
.saved_bo
!= NULL
) {
186 memcpy_toio(ptr
, adev
->vcn
.saved_bo
, size
);
187 kfree(adev
->vcn
.saved_bo
);
188 adev
->vcn
.saved_bo
= NULL
;
190 const struct common_firmware_header
*hdr
;
193 hdr
= (const struct common_firmware_header
*)adev
->vcn
.fw
->data
;
194 offset
= le32_to_cpu(hdr
->ucode_array_offset_bytes
);
195 memcpy_toio(adev
->vcn
.cpu_addr
, adev
->vcn
.fw
->data
+ offset
,
196 le32_to_cpu(hdr
->ucode_size_bytes
));
197 size
-= le32_to_cpu(hdr
->ucode_size_bytes
);
198 ptr
+= le32_to_cpu(hdr
->ucode_size_bytes
);
199 memset_io(ptr
, 0, size
);
205 static void amdgpu_vcn_idle_work_handler(struct work_struct
*work
)
207 struct amdgpu_device
*adev
=
208 container_of(work
, struct amdgpu_device
, vcn
.idle_work
.work
);
209 unsigned fences
= amdgpu_fence_count_emitted(&adev
->vcn
.ring_dec
);
212 if (adev
->pm
.dpm_enabled
) {
213 /* might be used when with pg/cg
214 amdgpu_dpm_enable_uvd(adev, false);
218 schedule_delayed_work(&adev
->vcn
.idle_work
, VCN_IDLE_TIMEOUT
);
222 void amdgpu_vcn_ring_begin_use(struct amdgpu_ring
*ring
)
224 struct amdgpu_device
*adev
= ring
->adev
;
225 bool set_clocks
= !cancel_delayed_work_sync(&adev
->vcn
.idle_work
);
227 if (set_clocks
&& adev
->pm
.dpm_enabled
) {
228 /* might be used when with pg/cg
229 amdgpu_dpm_enable_uvd(adev, true);
234 void amdgpu_vcn_ring_end_use(struct amdgpu_ring
*ring
)
236 schedule_delayed_work(&ring
->adev
->vcn
.idle_work
, VCN_IDLE_TIMEOUT
);
239 int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring
*ring
)
241 struct amdgpu_device
*adev
= ring
->adev
;
246 WREG32(SOC15_REG_OFFSET(UVD
, 0, mmUVD_CONTEXT_ID
), 0xCAFEDEAD);
247 r
= amdgpu_ring_alloc(ring
, 3);
249 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
253 amdgpu_ring_write(ring
,
254 PACKET0(SOC15_REG_OFFSET(UVD
, 0, mmUVD_CONTEXT_ID
), 0));
255 amdgpu_ring_write(ring
, 0xDEADBEEF);
256 amdgpu_ring_commit(ring
);
257 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
258 tmp
= RREG32(SOC15_REG_OFFSET(UVD
, 0, mmUVD_CONTEXT_ID
));
259 if (tmp
== 0xDEADBEEF)
264 if (i
< adev
->usec_timeout
) {
265 DRM_INFO("ring test on %d succeeded in %d usecs\n",
268 DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
275 static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring
*ring
, struct amdgpu_bo
*bo
,
276 bool direct
, struct dma_fence
**fence
)
278 struct ttm_validate_buffer tv
;
279 struct ww_acquire_ctx ticket
;
280 struct list_head head
;
281 struct amdgpu_job
*job
;
282 struct amdgpu_ib
*ib
;
283 struct dma_fence
*f
= NULL
;
284 struct amdgpu_device
*adev
= ring
->adev
;
288 memset(&tv
, 0, sizeof(tv
));
291 INIT_LIST_HEAD(&head
);
292 list_add(&tv
.head
, &head
);
294 r
= ttm_eu_reserve_buffers(&ticket
, &head
, true, NULL
);
298 r
= ttm_bo_validate(&bo
->tbo
, &bo
->placement
, true, false);
302 r
= amdgpu_job_alloc_with_ib(adev
, 64, &job
);
307 addr
= amdgpu_bo_gpu_offset(bo
);
308 ib
->ptr
[0] = PACKET0(SOC15_REG_OFFSET(UVD
, 0, mmUVD_GPCOM_VCPU_DATA0
), 0);
310 ib
->ptr
[2] = PACKET0(SOC15_REG_OFFSET(UVD
, 0, mmUVD_GPCOM_VCPU_DATA1
), 0);
311 ib
->ptr
[3] = addr
>> 32;
312 ib
->ptr
[4] = PACKET0(SOC15_REG_OFFSET(UVD
, 0, mmUVD_GPCOM_VCPU_CMD
), 0);
314 for (i
= 6; i
< 16; i
+= 2) {
315 ib
->ptr
[i
] = PACKET0(SOC15_REG_OFFSET(UVD
, 0, mmUVD_NO_OP
), 0);
321 r
= amdgpu_ib_schedule(ring
, 1, ib
, NULL
, &f
);
322 job
->fence
= dma_fence_get(f
);
326 amdgpu_job_free(job
);
328 r
= amdgpu_job_submit(job
, ring
, &adev
->vcn
.entity_dec
,
329 AMDGPU_FENCE_OWNER_UNDEFINED
, &f
);
334 ttm_eu_fence_buffer_objects(&ticket
, &head
, f
);
337 *fence
= dma_fence_get(f
);
338 amdgpu_bo_unref(&bo
);
344 amdgpu_job_free(job
);
347 ttm_eu_backoff_reservation(&ticket
, &head
);
351 static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring
*ring
, uint32_t handle
,
352 struct dma_fence
**fence
)
354 struct amdgpu_device
*adev
= ring
->adev
;
355 struct amdgpu_bo
*bo
;
359 r
= amdgpu_bo_create(adev
, 1024, PAGE_SIZE
, true,
360 AMDGPU_GEM_DOMAIN_VRAM
,
361 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED
|
362 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS
,
367 r
= amdgpu_bo_reserve(bo
, false);
369 amdgpu_bo_unref(&bo
);
373 r
= amdgpu_bo_kmap(bo
, (void **)&msg
);
375 amdgpu_bo_unreserve(bo
);
376 amdgpu_bo_unref(&bo
);
380 msg
[0] = cpu_to_le32(0x00000028);
381 msg
[1] = cpu_to_le32(0x00000038);
382 msg
[2] = cpu_to_le32(0x00000001);
383 msg
[3] = cpu_to_le32(0x00000000);
384 msg
[4] = cpu_to_le32(handle
);
385 msg
[5] = cpu_to_le32(0x00000000);
386 msg
[6] = cpu_to_le32(0x00000001);
387 msg
[7] = cpu_to_le32(0x00000028);
388 msg
[8] = cpu_to_le32(0x00000010);
389 msg
[9] = cpu_to_le32(0x00000000);
390 msg
[10] = cpu_to_le32(0x00000007);
391 msg
[11] = cpu_to_le32(0x00000000);
392 msg
[12] = cpu_to_le32(0x00000780);
393 msg
[13] = cpu_to_le32(0x00000440);
394 for (i
= 14; i
< 1024; ++i
)
395 msg
[i
] = cpu_to_le32(0x0);
397 amdgpu_bo_kunmap(bo
);
398 amdgpu_bo_unreserve(bo
);
400 return amdgpu_vcn_dec_send_msg(ring
, bo
, true, fence
);
403 static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring
*ring
, uint32_t handle
,
404 bool direct
, struct dma_fence
**fence
)
406 struct amdgpu_device
*adev
= ring
->adev
;
407 struct amdgpu_bo
*bo
;
411 r
= amdgpu_bo_create(adev
, 1024, PAGE_SIZE
, true,
412 AMDGPU_GEM_DOMAIN_VRAM
,
413 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED
|
414 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS
,
419 r
= amdgpu_bo_reserve(bo
, false);
421 amdgpu_bo_unref(&bo
);
425 r
= amdgpu_bo_kmap(bo
, (void **)&msg
);
427 amdgpu_bo_unreserve(bo
);
428 amdgpu_bo_unref(&bo
);
432 msg
[0] = cpu_to_le32(0x00000028);
433 msg
[1] = cpu_to_le32(0x00000018);
434 msg
[2] = cpu_to_le32(0x00000000);
435 msg
[3] = cpu_to_le32(0x00000002);
436 msg
[4] = cpu_to_le32(handle
);
437 msg
[5] = cpu_to_le32(0x00000000);
438 for (i
= 6; i
< 1024; ++i
)
439 msg
[i
] = cpu_to_le32(0x0);
441 amdgpu_bo_kunmap(bo
);
442 amdgpu_bo_unreserve(bo
);
444 return amdgpu_vcn_dec_send_msg(ring
, bo
, direct
, fence
);
447 int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring
*ring
, long timeout
)
449 struct dma_fence
*fence
;
452 r
= amdgpu_vcn_dec_get_create_msg(ring
, 1, NULL
);
454 DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r
);
458 r
= amdgpu_vcn_dec_get_destroy_msg(ring
, 1, true, &fence
);
460 DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r
);
464 r
= dma_fence_wait_timeout(fence
, false, timeout
);
466 DRM_ERROR("amdgpu: IB test timed out.\n");
469 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r
);
471 DRM_INFO("ib test on ring %d succeeded\n", ring
->idx
);
475 dma_fence_put(fence
);
481 int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring
*ring
)
483 struct amdgpu_device
*adev
= ring
->adev
;
484 uint32_t rptr
= amdgpu_ring_get_rptr(ring
);
488 r
= amdgpu_ring_alloc(ring
, 16);
490 DRM_ERROR("amdgpu: vcn enc failed to lock ring %d (%d).\n",
494 amdgpu_ring_write(ring
, VCN_ENC_CMD_END
);
495 amdgpu_ring_commit(ring
);
497 for (i
= 0; i
< adev
->usec_timeout
; i
++) {
498 if (amdgpu_ring_get_rptr(ring
) != rptr
)
503 if (i
< adev
->usec_timeout
) {
504 DRM_INFO("ring test on %d succeeded in %d usecs\n",
507 DRM_ERROR("amdgpu: ring %d test failed\n",
515 static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring
*ring
, uint32_t handle
,
516 struct dma_fence
**fence
)
518 const unsigned ib_size_dw
= 16;
519 struct amdgpu_job
*job
;
520 struct amdgpu_ib
*ib
;
521 struct dma_fence
*f
= NULL
;
525 r
= amdgpu_job_alloc_with_ib(ring
->adev
, ib_size_dw
* 4, &job
);
530 dummy
= ib
->gpu_addr
+ 1024;
533 ib
->ptr
[ib
->length_dw
++] = 0x00000018;
534 ib
->ptr
[ib
->length_dw
++] = 0x00000001; /* session info */
535 ib
->ptr
[ib
->length_dw
++] = handle
;
536 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(dummy
);
537 ib
->ptr
[ib
->length_dw
++] = dummy
;
538 ib
->ptr
[ib
->length_dw
++] = 0x0000000b;
540 ib
->ptr
[ib
->length_dw
++] = 0x00000014;
541 ib
->ptr
[ib
->length_dw
++] = 0x00000002; /* task info */
542 ib
->ptr
[ib
->length_dw
++] = 0x0000001c;
543 ib
->ptr
[ib
->length_dw
++] = 0x00000000;
544 ib
->ptr
[ib
->length_dw
++] = 0x00000000;
546 ib
->ptr
[ib
->length_dw
++] = 0x00000008;
547 ib
->ptr
[ib
->length_dw
++] = 0x08000001; /* op initialize */
549 for (i
= ib
->length_dw
; i
< ib_size_dw
; ++i
)
552 r
= amdgpu_ib_schedule(ring
, 1, ib
, NULL
, &f
);
553 job
->fence
= dma_fence_get(f
);
557 amdgpu_job_free(job
);
559 *fence
= dma_fence_get(f
);
565 amdgpu_job_free(job
);
569 static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring
*ring
, uint32_t handle
,
570 struct dma_fence
**fence
)
572 const unsigned ib_size_dw
= 16;
573 struct amdgpu_job
*job
;
574 struct amdgpu_ib
*ib
;
575 struct dma_fence
*f
= NULL
;
579 r
= amdgpu_job_alloc_with_ib(ring
->adev
, ib_size_dw
* 4, &job
);
584 dummy
= ib
->gpu_addr
+ 1024;
587 ib
->ptr
[ib
->length_dw
++] = 0x00000018;
588 ib
->ptr
[ib
->length_dw
++] = 0x00000001;
589 ib
->ptr
[ib
->length_dw
++] = handle
;
590 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(dummy
);
591 ib
->ptr
[ib
->length_dw
++] = dummy
;
592 ib
->ptr
[ib
->length_dw
++] = 0x0000000b;
594 ib
->ptr
[ib
->length_dw
++] = 0x00000014;
595 ib
->ptr
[ib
->length_dw
++] = 0x00000002;
596 ib
->ptr
[ib
->length_dw
++] = 0x0000001c;
597 ib
->ptr
[ib
->length_dw
++] = 0x00000000;
598 ib
->ptr
[ib
->length_dw
++] = 0x00000000;
600 ib
->ptr
[ib
->length_dw
++] = 0x00000008;
601 ib
->ptr
[ib
->length_dw
++] = 0x08000002; /* op close session */
603 for (i
= ib
->length_dw
; i
< ib_size_dw
; ++i
)
606 r
= amdgpu_ib_schedule(ring
, 1, ib
, NULL
, &f
);
607 job
->fence
= dma_fence_get(f
);
611 amdgpu_job_free(job
);
613 *fence
= dma_fence_get(f
);
619 amdgpu_job_free(job
);
623 int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring
*ring
, long timeout
)
625 struct dma_fence
*fence
= NULL
;
628 r
= amdgpu_vcn_enc_get_create_msg(ring
, 1, NULL
);
630 DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r
);
634 r
= amdgpu_vcn_enc_get_destroy_msg(ring
, 1, &fence
);
636 DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r
);
640 r
= dma_fence_wait_timeout(fence
, false, timeout
);
642 DRM_ERROR("amdgpu: IB test timed out.\n");
645 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r
);
647 DRM_INFO("ib test on ring %d succeeded\n", ring
->idx
);
651 dma_fence_put(fence
);